bio-gngm 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/Gemfile +20 -0
- data/Gemfile.lock +33 -0
- data/LICENSE.txt +20 -0
- data/README.rdoc +33 -0
- data/Rakefile +53 -0
- data/VERSION +1 -0
- data/bio-gngm.gemspec +173 -0
- data/doc/Bio.html +129 -0
- data/doc/Bio/DB.html +128 -0
- data/doc/Bio/DB/Pileup.html +316 -0
- data/doc/Bio/DB/Vcf.html +683 -0
- data/doc/Bio/Util.html +135 -0
- data/doc/Bio/Util/Gngm.html +1655 -0
- data/doc/LICENSE_txt.html +111 -0
- data/doc/_index.html +169 -0
- data/doc/class_list.html +47 -0
- data/doc/created.rid +4 -0
- data/doc/css/common.css +1 -0
- data/doc/css/full_list.css +55 -0
- data/doc/css/style.css +322 -0
- data/doc/doc/created.rid +0 -0
- data/doc/file_list.html +52 -0
- data/doc/frames.html +13 -0
- data/doc/images/add.png +0 -0
- data/doc/images/bands.png +0 -0
- data/doc/images/brick.png +0 -0
- data/doc/images/brick_link.png +0 -0
- data/doc/images/bug.png +0 -0
- data/doc/images/bullet_black.png +0 -0
- data/doc/images/bullet_toggle_minus.png +0 -0
- data/doc/images/bullet_toggle_plus.png +0 -0
- data/doc/images/date.png +0 -0
- data/doc/images/delete.png +0 -0
- data/doc/images/find.png +0 -0
- data/doc/images/loadingAnimation.gif +0 -0
- data/doc/images/macFFBgHack.png +0 -0
- data/doc/images/package.png +0 -0
- data/doc/images/page_green.png +0 -0
- data/doc/images/page_white_text.png +0 -0
- data/doc/images/page_white_width.png +0 -0
- data/doc/images/plugin.png +0 -0
- data/doc/images/ruby.png +0 -0
- data/doc/images/signal.png +0 -0
- data/doc/images/tag_blue.png +0 -0
- data/doc/images/tag_green.png +0 -0
- data/doc/images/threads.png +0 -0
- data/doc/images/transparent.png +0 -0
- data/doc/images/wrench.png +0 -0
- data/doc/images/wrench_orange.png +0 -0
- data/doc/images/zoom.png +0 -0
- data/doc/index.html +88 -0
- data/doc/js/app.js +205 -0
- data/doc/js/darkfish.js +153 -0
- data/doc/js/full_list.js +167 -0
- data/doc/js/jquery.js +18 -0
- data/doc/js/navigation.js +142 -0
- data/doc/js/search.js +94 -0
- data/doc/js/search_index.js +1 -0
- data/doc/js/searcher.js +228 -0
- data/doc/lib/bio-gngm_rb.html +103 -0
- data/doc/lib/bio/util/bio-gngm_rb.html +96 -0
- data/doc/method_list.html +382 -0
- data/doc/rdoc.css +543 -0
- data/doc/table_of_contents.html +161 -0
- data/examples/.DS_Store +0 -0
- data/examples/make_histograms.rb +40 -0
- data/examples/make_threads.rb +42 -0
- data/examples/make_threads_isize.rb +41 -0
- data/examples/use_indels.rb +36 -0
- data/lib/bio-gngm.rb +12 -0
- data/lib/bio/util/bio-gngm.rb +1029 -0
- data/scripts/get_subseq.rb +16 -0
- data/scripts/make_histograms_laerfyve.rb +83 -0
- data/scripts/make_histograms_laerfyve_stitched.rb +59 -0
- data/scripts/make_threads_isize_laerfyfe.rb +52 -0
- data/scripts/make_threads_unmapped_laerfyfe.rb +72 -0
- data/scripts/make_threads_unmapped_laerfyfe_pseudo.rb +56 -0
- data/scripts/make_threads_unmapped_simulation.rb +54 -0
- data/scripts/make_threads_unmapped_simulation_immediate_region.rb +59 -0
- data/scripts/optimise_freq_window_size.rb +82 -0
- data/stitched_contigs.zip +0 -0
- data/test/data/ids2.txt +1 -0
- data/test/data/sorted.bam +0 -0
- data/test/data/test +0 -0
- data/test/data/test.bam +0 -0
- data/test/data/test.fa +20 -0
- data/test/data/test.fai +0 -0
- data/test/data/test.sai +0 -0
- data/test/data/test.tam +10 -0
- data/test/data/test_chr.fasta +1000 -0
- data/test/data/test_chr.fasta.amb +2 -0
- data/test/data/test_chr.fasta.ann +3 -0
- data/test/data/test_chr.fasta.bwt +0 -0
- data/test/data/test_chr.fasta.fai +1 -0
- data/test/data/test_chr.fasta.pac +0 -0
- data/test/data/test_chr.fasta.rbwt +0 -0
- data/test/data/test_chr.fasta.rpac +0 -0
- data/test/data/test_chr.fasta.rsa +0 -0
- data/test/data/test_chr.fasta.sa +0 -0
- data/test/data/testu.bam +0 -0
- data/test/data/testu.bam.bai +0 -0
- data/test/helper.rb +18 -0
- data/test/test_bio-gngm.rb +126 -0
- metadata +276 -0
data/doc/Bio/Util.html
ADDED
@@ -0,0 +1,135 @@
|
|
1
|
+
<!DOCTYPE html>
|
2
|
+
|
3
|
+
<html>
|
4
|
+
<head>
|
5
|
+
<meta content="text/html; charset=UTF-8" http-equiv="Content-Type">
|
6
|
+
|
7
|
+
<title>Class: Bio::Util</title>
|
8
|
+
|
9
|
+
<link type="text/css" media="screen" href="../rdoc.css" rel="stylesheet">
|
10
|
+
|
11
|
+
<script type="text/javascript">
|
12
|
+
var rdoc_rel_prefix = "../";
|
13
|
+
</script>
|
14
|
+
|
15
|
+
<script type="text/javascript" charset="utf-8" src="../js/jquery.js"></script>
|
16
|
+
<script type="text/javascript" charset="utf-8" src="../js/navigation.js"></script>
|
17
|
+
<script type="text/javascript" charset="utf-8" src="../js/search_index.js"></script>
|
18
|
+
<script type="text/javascript" charset="utf-8" src="../js/search.js"></script>
|
19
|
+
<script type="text/javascript" charset="utf-8" src="../js/searcher.js"></script>
|
20
|
+
<script type="text/javascript" charset="utf-8" src="../js/darkfish.js"></script>
|
21
|
+
|
22
|
+
|
23
|
+
<body id="top" class="class">
|
24
|
+
<nav id="metadata">
|
25
|
+
<nav id="home-section" class="section">
|
26
|
+
<h3 class="section-header">
|
27
|
+
<a href="../index.html">Home</a>
|
28
|
+
<a href="../table_of_contents.html#classes">Classes</a>
|
29
|
+
<a href="../table_of_contents.html#methods">Methods</a>
|
30
|
+
</h3>
|
31
|
+
</nav>
|
32
|
+
|
33
|
+
|
34
|
+
<nav id="search-section" class="section project-section" class="initially-hidden">
|
35
|
+
<form action="#" method="get" accept-charset="utf-8">
|
36
|
+
<h3 class="section-header">
|
37
|
+
<input type="text" name="search" placeholder="Search" id="search-field"
|
38
|
+
title="Type to search, Up and Down to navigate, Enter to load">
|
39
|
+
</h3>
|
40
|
+
</form>
|
41
|
+
|
42
|
+
<ul id="search-results" class="initially-hidden"></ul>
|
43
|
+
</nav>
|
44
|
+
|
45
|
+
|
46
|
+
<div id="file-metadata">
|
47
|
+
<nav id="file-list-section" class="section">
|
48
|
+
<h3 class="section-header">Defined In</h3>
|
49
|
+
<ul>
|
50
|
+
<li>lib/bio/util/bio-gngm.rb
|
51
|
+
</ul>
|
52
|
+
</nav>
|
53
|
+
|
54
|
+
|
55
|
+
</div>
|
56
|
+
|
57
|
+
<div id="class-metadata">
|
58
|
+
|
59
|
+
<nav id="parent-class-section" class="section">
|
60
|
+
<h3 class="section-header">Parent</h3>
|
61
|
+
|
62
|
+
<p class="link">Object
|
63
|
+
|
64
|
+
</nav>
|
65
|
+
|
66
|
+
|
67
|
+
|
68
|
+
</div>
|
69
|
+
|
70
|
+
<div id="project-metadata">
|
71
|
+
<nav id="fileindex-section" class="section project-section">
|
72
|
+
<h3 class="section-header">Pages</h3>
|
73
|
+
|
74
|
+
<ul>
|
75
|
+
|
76
|
+
<li class="file"><a href="../LICENSE_txt.html">LICENSE</a>
|
77
|
+
|
78
|
+
</ul>
|
79
|
+
</nav>
|
80
|
+
|
81
|
+
<nav id="classindex-section" class="section project-section">
|
82
|
+
<h3 class="section-header">Class and Module Index</h3>
|
83
|
+
|
84
|
+
<ul class="link-list">
|
85
|
+
|
86
|
+
<li><a href="../Bio.html">Bio</a>
|
87
|
+
|
88
|
+
<li><a href="../Bio/DB.html">Bio::DB</a>
|
89
|
+
|
90
|
+
<li><a href="../Bio/DB/Pileup.html">Bio::DB::Pileup</a>
|
91
|
+
|
92
|
+
<li><a href="../Bio/DB/Vcf.html">Bio::DB::Vcf</a>
|
93
|
+
|
94
|
+
<li><a href="../Bio/Util.html">Bio::Util</a>
|
95
|
+
|
96
|
+
<li><a href="../Bio/Util/Gngm.html">Bio::Util::Gngm</a>
|
97
|
+
|
98
|
+
</ul>
|
99
|
+
</nav>
|
100
|
+
|
101
|
+
</div>
|
102
|
+
</nav>
|
103
|
+
|
104
|
+
<div id="documentation">
|
105
|
+
<h1 class="class">class Bio::Util</h1>
|
106
|
+
|
107
|
+
<div id="description" class="description">
|
108
|
+
|
109
|
+
</div><!-- description -->
|
110
|
+
|
111
|
+
|
112
|
+
|
113
|
+
|
114
|
+
<section id="5Buntitled-5D" class="documentation-section">
|
115
|
+
|
116
|
+
|
117
|
+
|
118
|
+
|
119
|
+
|
120
|
+
|
121
|
+
|
122
|
+
|
123
|
+
<!-- Methods -->
|
124
|
+
|
125
|
+
</section><!-- 5Buntitled-5D -->
|
126
|
+
|
127
|
+
</div><!-- documentation -->
|
128
|
+
|
129
|
+
|
130
|
+
<footer id="validator-badges">
|
131
|
+
<p><a href="http://validator.w3.org/check/referer">[Validate]</a>
|
132
|
+
<p>Generated by <a href="https://github.com/rdoc/rdoc">RDoc</a> 3.11.
|
133
|
+
<p>Generated with the <a href="http://deveiate.org/projects/Darkfish-Rdoc/">Darkfish Rdoc Generator</a> 3.
|
134
|
+
</footer>
|
135
|
+
|
@@ -0,0 +1,1655 @@
|
|
1
|
+
<!DOCTYPE html>
|
2
|
+
|
3
|
+
<html>
|
4
|
+
<head>
|
5
|
+
<meta content="text/html; charset=UTF-8" http-equiv="Content-Type">
|
6
|
+
|
7
|
+
<title>Class: Bio::Util::Gngm</title>
|
8
|
+
|
9
|
+
<link type="text/css" media="screen" href="../../rdoc.css" rel="stylesheet">
|
10
|
+
|
11
|
+
<script type="text/javascript">
|
12
|
+
var rdoc_rel_prefix = "../../";
|
13
|
+
</script>
|
14
|
+
|
15
|
+
<script type="text/javascript" charset="utf-8" src="../../js/jquery.js"></script>
|
16
|
+
<script type="text/javascript" charset="utf-8" src="../../js/navigation.js"></script>
|
17
|
+
<script type="text/javascript" charset="utf-8" src="../../js/search_index.js"></script>
|
18
|
+
<script type="text/javascript" charset="utf-8" src="../../js/search.js"></script>
|
19
|
+
<script type="text/javascript" charset="utf-8" src="../../js/searcher.js"></script>
|
20
|
+
<script type="text/javascript" charset="utf-8" src="../../js/darkfish.js"></script>
|
21
|
+
|
22
|
+
|
23
|
+
<body id="top" class="class">
|
24
|
+
<nav id="metadata">
|
25
|
+
<nav id="home-section" class="section">
|
26
|
+
<h3 class="section-header">
|
27
|
+
<a href="../../index.html">Home</a>
|
28
|
+
<a href="../../table_of_contents.html#classes">Classes</a>
|
29
|
+
<a href="../../table_of_contents.html#methods">Methods</a>
|
30
|
+
</h3>
|
31
|
+
</nav>
|
32
|
+
|
33
|
+
|
34
|
+
<nav id="search-section" class="section project-section" class="initially-hidden">
|
35
|
+
<form action="#" method="get" accept-charset="utf-8">
|
36
|
+
<h3 class="section-header">
|
37
|
+
<input type="text" name="search" placeholder="Search" id="search-field"
|
38
|
+
title="Type to search, Up and Down to navigate, Enter to load">
|
39
|
+
</h3>
|
40
|
+
</form>
|
41
|
+
|
42
|
+
<ul id="search-results" class="initially-hidden"></ul>
|
43
|
+
</nav>
|
44
|
+
|
45
|
+
|
46
|
+
<div id="file-metadata">
|
47
|
+
<nav id="file-list-section" class="section">
|
48
|
+
<h3 class="section-header">Defined In</h3>
|
49
|
+
<ul>
|
50
|
+
<li>lib/bio/util/bio-gngm.rb
|
51
|
+
</ul>
|
52
|
+
</nav>
|
53
|
+
|
54
|
+
|
55
|
+
</div>
|
56
|
+
|
57
|
+
<div id="class-metadata">
|
58
|
+
|
59
|
+
<nav id="parent-class-section" class="section">
|
60
|
+
<h3 class="section-header">Parent</h3>
|
61
|
+
|
62
|
+
<p class="link">Object
|
63
|
+
|
64
|
+
</nav>
|
65
|
+
|
66
|
+
|
67
|
+
<!-- Method Quickref -->
|
68
|
+
<nav id="method-list-section" class="section">
|
69
|
+
<h3 class="section-header">Methods</h3>
|
70
|
+
|
71
|
+
<ul class="link-list">
|
72
|
+
|
73
|
+
<li><a href="#method-c-new">::new</a>
|
74
|
+
|
75
|
+
<li><a href="#method-i-calculate_clusters">#calculate_clusters</a>
|
76
|
+
|
77
|
+
<li><a href="#method-i-calculate_densities">#calculate_densities</a>
|
78
|
+
|
79
|
+
<li><a href="#method-i-calculate_signal">#calculate_signal</a>
|
80
|
+
|
81
|
+
<li><a href="#method-i-close">#close</a>
|
82
|
+
|
83
|
+
<li><a href="#method-i-clusters">#clusters</a>
|
84
|
+
|
85
|
+
<li><a href="#method-i-collect_threads">#collect_threads</a>
|
86
|
+
|
87
|
+
<li><a href="#method-i-densities">#densities</a>
|
88
|
+
|
89
|
+
<li><a href="#method-i-draw_bands">#draw_bands</a>
|
90
|
+
|
91
|
+
<li><a href="#method-i-draw_hit_count">#draw_hit_count</a>
|
92
|
+
|
93
|
+
<li><a href="#method-i-draw_peaks">#draw_peaks</a>
|
94
|
+
|
95
|
+
<li><a href="#method-i-draw_signal">#draw_signal</a>
|
96
|
+
|
97
|
+
<li><a href="#method-i-draw_threads">#draw_threads</a>
|
98
|
+
|
99
|
+
<li><a href="#method-i-frequency_histogram">#frequency_histogram</a>
|
100
|
+
|
101
|
+
<li><a href="#method-i-get_band">#get_band</a>
|
102
|
+
|
103
|
+
<li><a href="#method-i-get_insert_size_frequency">#get_insert_size_frequency</a>
|
104
|
+
|
105
|
+
<li><a href="#method-i-get_unmapped_mate_frequency">#get_unmapped_mate_frequency</a>
|
106
|
+
|
107
|
+
<li><a href="#method-i-hit_count">#hit_count</a>
|
108
|
+
|
109
|
+
<li><a href="#method-i-peaks">#peaks</a>
|
110
|
+
|
111
|
+
<li><a href="#method-i-signal">#signal</a>
|
112
|
+
|
113
|
+
<li><a href="#method-i-snp_positions">#snp_positions</a>
|
114
|
+
|
115
|
+
<li><a href="#method-i-threads">#threads</a>
|
116
|
+
|
117
|
+
</ul>
|
118
|
+
</nav>
|
119
|
+
|
120
|
+
</div>
|
121
|
+
|
122
|
+
<div id="project-metadata">
|
123
|
+
<nav id="fileindex-section" class="section project-section">
|
124
|
+
<h3 class="section-header">Pages</h3>
|
125
|
+
|
126
|
+
<ul>
|
127
|
+
|
128
|
+
<li class="file"><a href="../../LICENSE_txt.html">LICENSE</a>
|
129
|
+
|
130
|
+
</ul>
|
131
|
+
</nav>
|
132
|
+
|
133
|
+
<nav id="classindex-section" class="section project-section">
|
134
|
+
<h3 class="section-header">Class and Module Index</h3>
|
135
|
+
|
136
|
+
<ul class="link-list">
|
137
|
+
|
138
|
+
<li><a href="../../Bio.html">Bio</a>
|
139
|
+
|
140
|
+
<li><a href="../../Bio/DB.html">Bio::DB</a>
|
141
|
+
|
142
|
+
<li><a href="../../Bio/DB/Pileup.html">Bio::DB::Pileup</a>
|
143
|
+
|
144
|
+
<li><a href="../../Bio/DB/Vcf.html">Bio::DB::Vcf</a>
|
145
|
+
|
146
|
+
<li><a href="../../Bio/Util.html">Bio::Util</a>
|
147
|
+
|
148
|
+
<li><a href="../../Bio/Util/Gngm.html">Bio::Util::Gngm</a>
|
149
|
+
|
150
|
+
</ul>
|
151
|
+
</nav>
|
152
|
+
|
153
|
+
</div>
|
154
|
+
</nav>
|
155
|
+
|
156
|
+
<div id="documentation">
|
157
|
+
<h1 class="class">class Bio::Util::Gngm</h1>
|
158
|
+
|
159
|
+
<div id="description" class="description">
|
160
|
+
|
161
|
+
<p>A <a href="Gngm.html">Bio::Util::Gngm</a> object represents a single region
|
162
|
+
on a reference genome that is to be examined using the NGM technique
|
163
|
+
described in Austin et al (2011) <a
|
164
|
+
href="http://bar.utoronto.ca/ngm/description.html">bar.utoronto.ca/ngm/description.html</a>
|
165
|
+
and <a
|
166
|
+
href="http://onlinelibrary.wiley.com/doi/10.1111/j.1365-313X.2011.04619.x/abstract;jsessionid=F73E2DA628523B26205297CEE95526DA.d02t04">onlinelibrary.wiley.com/doi/10.1111/j.1365-313X.2011.04619.x/abstract;jsessionid=F73E2DA628523B26205297CEE95526DA.d02t04</a>
|
167
|
+
Austin <em>et</em> <em>al</em> (2011) <strong>Next-generation</strong>
|
168
|
+
<strong>mapping</strong> <strong>of</strong> <strong>Arabidopsis</strong>
|
169
|
+
<strong>genes</strong> <em>Plant</em> <em>Journal</em>
|
170
|
+
<strong>67</strong>(4):7125-725 .</p>
|
171
|
+
|
172
|
+
<p><a href="Gngm.html">Bio::Util::Gngm</a> provides methods for finding SNPs,
|
173
|
+
small INDELS and larger INDELS, creating histograms of polymorphism
|
174
|
+
frequency, creating and clustering density curves, creating signal plots
|
175
|
+
and finding peaks. The ratio of reference-agreeing and reference-differing
|
176
|
+
reads can be specified.</p>
|
177
|
+
|
178
|
+
<h2 id="label-Background">Background</h2>
|
179
|
+
|
180
|
+
<p>The basic concept of the technique is that density curves of polymorphism
|
181
|
+
frequency across the region of interest are plotted and analysed. Each
|
182
|
+
curve is called a thread, as it represents a polymorphism that was called
|
183
|
+
with a statistic within a certain user-specified range, eg if a SNP was
|
184
|
+
called with 50% non-reference bases from sequence reads (say all A), and
|
185
|
+
50% reference reads (all T) then a discordant chastity statistic (ChD) of
|
186
|
+
0.5 would be calculated and assigned to that SNP. Depending on the width
|
187
|
+
and slide of the windows the user had specified, the frequency of SNPs with
|
188
|
+
ChD in the specified range would be drawn in the same density curve. In the
|
189
|
+
figure below each different coloured curve represents the frequency of SNPs
|
190
|
+
with similar ChD.</p>
|
191
|
+
|
192
|
+
<p><img src="../../images/threads.png" /></p>
|
193
|
+
|
194
|
+
<p>Each of these density curves is called a thread. Threads are clustered into
|
195
|
+
groups called bands and the bands containing the expected and control
|
196
|
+
polymorphisms extracted. In the figure below, the control band is 0.5, the
|
197
|
+
expected mutation in 1.0. Typically and in the Austin et al (2011)
|
198
|
+
description of NGM the control band is the heterophasic band that
|
199
|
+
represents natural variation, the thing taken to be the baseline. For a
|
200
|
+
simple SNP, numerically the discordant chastity is expected to be 0.5.
|
201
|
+
Conversely the expected band is the homophasic band that represents the
|
202
|
+
selected for SNP region. Normally the discordant chastity is expected to be
|
203
|
+
1.0.</p>
|
204
|
+
|
205
|
+
<p><img src="../../images/bands.png" /></p>
|
206
|
+
|
207
|
+
<p>The points where the signal from the control and expected band converge
|
208
|
+
most is a likely candidate region for the causative mutation, so here at
|
209
|
+
about the 1.6 millionth nucleotide.</p>
|
210
|
+
|
211
|
+
<p><img src="../../images/signal.png" /></p>
|
212
|
+
|
213
|
+
<h2 id="label-Example">Example</h2>
|
214
|
+
|
215
|
+
<pre class="ruby"><span class="ruby-identifier">require</span> <span class="ruby-string">'bio-gngm'</span>
|
216
|
+
|
217
|
+
<span class="ruby-identifier">g</span> = <span class="ruby-constant">Bio</span><span class="ruby-operator">::</span><span class="ruby-constant">Util</span><span class="ruby-operator">::</span><span class="ruby-constant">Gngm</span>.<span class="ruby-identifier">new</span>(:<span class="ruby-identifier">file</span> =<span class="ruby-operator">></span> <span class="ruby-string">"aln.sorted.bam"</span>,
|
218
|
+
:<span class="ruby-identifier">format</span> =<span class="ruby-operator">></span> :<span class="ruby-identifier">bam</span>,
|
219
|
+
:<span class="ruby-identifier">fasta</span> =<span class="ruby-operator">></span> <span class="ruby-string">"reference.fasta"</span>,
|
220
|
+
:<span class="ruby-identifier">samtools</span> =<span class="ruby-operator">></span> {:<span class="ruby-identifier">r</span> =<span class="ruby-operator">></span> <span class="ruby-string">"chr1:1-100000"</span>,
|
221
|
+
:<span class="ruby-identifier">q</span> =<span class="ruby-operator">></span> <span class="ruby-value">20</span>,
|
222
|
+
:<span class="ruby-constant">Q</span> =<span class="ruby-operator">></span> <span class="ruby-value">50</span>
|
223
|
+
},
|
224
|
+
:<span class="ruby-identifier">min_non_ref_freq</span> =<span class="ruby-operator">></span> <span class="ruby-value">0.5</span>,
|
225
|
+
:<span class="ruby-identifier">min_non_ref</span> =<span class="ruby-operator">></span> <span class="ruby-value">3</span>
|
226
|
+
)
|
227
|
+
<span class="ruby-identifier">g</span>.<span class="ruby-identifier">snp_positions</span>
|
228
|
+
<span class="ruby-identifier">g</span>.<span class="ruby-identifier">collect_threads</span>(:<span class="ruby-identifier">start</span> =<span class="ruby-operator">></span> <span class="ruby-value">0.2</span>, :<span class="ruby-identifier">stop</span> =<span class="ruby-operator">></span> <span class="ruby-value">1.0</span>, :<span class="ruby-identifier">slide</span> =<span class="ruby-operator">></span> <span class="ruby-value">0.01</span>, :<span class="ruby-identifier">size</span> =<span class="ruby-operator">></span> <span class="ruby-value">0.1</span> )
|
229
|
+
[<span class="ruby-value">0.25</span>, <span class="ruby-value">0.5</span>, <span class="ruby-value">1.0</span>].<span class="ruby-identifier">each</span> <span class="ruby-keyword">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">kernel_adjust</span><span class="ruby-operator">|</span> <span class="ruby-comment"># loop through different kernel values</span>
|
230
|
+
[<span class="ruby-value">4</span>, <span class="ruby-value">9</span>, <span class="ruby-value">11</span>].<span class="ruby-identifier">each</span> <span class="ruby-keyword">do</span> <span class="ruby-operator">|</span> <span class="ruby-identifier">k</span> <span class="ruby-operator">|</span> <span class="ruby-comment"># loop through different cluster numbers </span>
|
231
|
+
|
232
|
+
<span class="ruby-comment">#cluster</span>
|
233
|
+
<span class="ruby-identifier">g</span>.<span class="ruby-identifier">calculate_clusters</span>(:<span class="ruby-identifier">k</span> =<span class="ruby-operator">></span> <span class="ruby-identifier">k</span>, :<span class="ruby-identifier">adjust</span> =<span class="ruby-operator">></span> <span class="ruby-identifier">kernel_adjust</span>, :<span class="ruby-identifier">control_chd</span> =<span class="ruby-operator">></span> <span class="ruby-value">0.7</span>, :<span class="ruby-identifier">expected_chd</span> =<span class="ruby-operator">></span> <span class="ruby-value">0.5</span>)
|
234
|
+
<span class="ruby-comment">#draw thread and bands</span>
|
235
|
+
<span class="ruby-identifier">filename</span> = <span class="ruby-node">"#{name}_#{k}_#{kernel_adjust}_all_threads.png"</span>
|
236
|
+
<span class="ruby-identifier">g</span>.<span class="ruby-identifier">draw_threads</span>(<span class="ruby-identifier">filename</span>)
|
237
|
+
|
238
|
+
<span class="ruby-identifier">filename</span> = <span class="ruby-node">"#{name}_#{k}_#{kernel_adjust}_clustered_bands.png"</span>
|
239
|
+
<span class="ruby-identifier">g</span>.<span class="ruby-identifier">draw_bands</span>(<span class="ruby-identifier">filename</span>, :<span class="ruby-identifier">add_lines</span> =<span class="ruby-operator">></span> [<span class="ruby-value">100</span>,<span class="ruby-value">30000</span>,<span class="ruby-value">675432</span>])
|
240
|
+
|
241
|
+
<span class="ruby-comment">#draw signal</span>
|
242
|
+
<span class="ruby-identifier">filename</span> = <span class="ruby-node">"#{name}_#{k}_#{kernel_adjust}_signal.png"</span>
|
243
|
+
<span class="ruby-identifier">g</span>.<span class="ruby-identifier">draw_signal</span>(<span class="ruby-identifier">filename</span>)
|
244
|
+
|
245
|
+
<span class="ruby-comment">#auto-guess peaks</span>
|
246
|
+
<span class="ruby-identifier">filename</span> = <span class="ruby-node">"#{name}_#{k}_#{kernel_adjust}_peaks.png"</span>
|
247
|
+
<span class="ruby-identifier">g</span>.<span class="ruby-identifier">draw_peaks</span>(<span class="ruby-identifier">filename</span>)
|
248
|
+
<span class="ruby-keyword">end</span>
|
249
|
+
<span class="ruby-keyword">end</span>
|
250
|
+
<span class="ruby-identifier">g</span>.<span class="ruby-identifier">close</span> <span class="ruby-comment">#close BAM file</span>
|
251
|
+
</pre>
|
252
|
+
|
253
|
+
<h2 id="label-Polymorphisms+and+statistics">Polymorphisms and statistics</h2>
|
254
|
+
|
255
|
+
<p><a href="Gngm.html">Bio::Util::Gngm</a> will allow you to look for
|
256
|
+
polymorphisms that are SNPs, INDELS (as insertions uniquely, deletions
|
257
|
+
uniquely or both) and longer insertions or deletions based on the insert
|
258
|
+
size on paired-end read alignments. Each has a different statistic attached
|
259
|
+
to it.</p>
|
260
|
+
|
261
|
+
<h3 id="label-SNPs">SNPs</h3>
|
262
|
+
|
263
|
+
<p>Simple Single Nucleotide Polymorphisms are called and its ChD statistic
|
264
|
+
calculated as described in Austin et al (2011).</p>
|
265
|
+
|
266
|
+
<h3 id="label-Short+INDELS">Short INDELS</h3>
|
267
|
+
|
268
|
+
<p>These are called via SAMtools/BCFtools so are limited to the INDELs that
|
269
|
+
can be called that way. The implementation at the moment only considers
|
270
|
+
positions with one INDEL, sites with more than one potential INDEL (ie
|
271
|
+
multiple alleles) are disregarded as a position at all. See the <a
|
272
|
+
href="../DB/Vcf.html">Bio::DB::Vcf</a> extensions in this package for a
|
273
|
+
description of what constitutes an INDEL. The Vcf attribute <a
|
274
|
+
href="../DB/Vcf.html#method-i-non_ref_allele_freq">Bio::DB::Vcf#non_ref_allele_freq</a>
|
275
|
+
is used as the statistic in this case.</p>
|
276
|
+
|
277
|
+
<h3 id="label-Insertion+Size">Insertion Size</h3>
|
278
|
+
|
279
|
+
<p>Paired-end alignments have an expected distance between the paired reads
|
280
|
+
(called insert size, or isize). Groups of reads in one position with larger
|
281
|
+
or smaller than expected isize can indicate large deletions or insertions.
|
282
|
+
Due to the details of read preparation the actual isize varies around a
|
283
|
+
mean value with an expected proportion of 50% of reads having isize above
|
284
|
+
the mean, and 50% below. To create density curves of insertion size
|
285
|
+
frequency a moves along the window of user-specified size is moved along
|
286
|
+
the reference genome in user-specified steps and all alignments in that
|
287
|
+
window are examined. The Bio::DB::Sam#isize attribute is inspected for all
|
288
|
+
alignments passing user-specified quality and the proportion of reads in
|
289
|
+
that window that have an insert size > the expected insert size is used
|
290
|
+
as the statistic in this case. Proportions approaching 1 indicate that the
|
291
|
+
sequenced organism has a deletion in that section relative to the
|
292
|
+
reference. Proportions approaching 0 indicate an insertion in that section
|
293
|
+
relative to the reference. Proportions around 0.5 indicate random variation
|
294
|
+
of insert size, IE no INDEL. Seems to be a good idea to keep the window
|
295
|
+
size similar to the read + isize. Useful in conjunction with assessing
|
296
|
+
unmapped mates.</p>
|
297
|
+
|
298
|
+
<h3 id="label-Unmapped+Mate+Pairs+%2F+Paired+Ends.">Unmapped Mate Pairs / Paired Ends.</h3>
|
299
|
+
|
300
|
+
<p>Paired-end alignments where one mate finds a mapping but the other doesnt,
|
301
|
+
can indicate an insertion/deletion larger than the insert size of the reads
|
302
|
+
used (IE one read disappeared into the deleted section). This method uses a
|
303
|
+
statistic based on proportion of mapped/unmapped reads in a window.
|
304
|
+
Proportions of reads that are mapped but the mate is unmapped should be
|
305
|
+
about 0.5 in a window over an insertion/deletion (since the reads can go
|
306
|
+
in either direction..). With no insertion deletion, the proportion should
|
307
|
+
be closer to 1.</p>
|
308
|
+
|
309
|
+
<h2 id="label-Input+types">Input types</h2>
|
310
|
+
|
311
|
+
<p>A sorted BAM file is used as the source of alignments. Pileup is not used
|
312
|
+
nor likely to be as it is a deprecated function within SAMtools. With the
|
313
|
+
BAM file you will need the reference FASTA and the BAM index (.bai).</p>
|
314
|
+
|
315
|
+
<h2 id="label-Workflow">Workflow</h2>
|
316
|
+
<ol><li>
|
317
|
+
<p>Create <a href="Gngm.html">Bio::Util::Gngm</a> object for a specific region
|
318
|
+
in the reference genome</p>
|
319
|
+
</li><li>
|
320
|
+
<p>Polymorphisms are found</p>
|
321
|
+
</li><li>
|
322
|
+
<p>Density curves (threads) are calculated</p>
|
323
|
+
</li><li>
|
324
|
+
<p>Clustering density threads into bands is done</p>
|
325
|
+
</li><li>
|
326
|
+
<p>Signal is compared between band of interest and control</p>
|
327
|
+
</li><li>
|
328
|
+
<p>Figures are printed</p>
|
329
|
+
</li></ol>
|
330
|
+
|
331
|
+
<h2 id="label-Prerequisites">Prerequisites</h2>
|
332
|
+
<ul><li>
|
333
|
+
<p>Ruby 1.9.3 or greater (if you have an earlier version, try RVM for
|
334
|
+
installing different versions of Ruby alongside your system install and
|
335
|
+
switching nicely between them)</p>
|
336
|
+
</li><li>
|
337
|
+
<p>R 2.11.1 or greater</p>
|
338
|
+
</li></ul>
|
339
|
+
|
340
|
+
<p>The following ruby-gems are required</p>
|
341
|
+
<ul><li>
|
342
|
+
<p>rinruby >= 2.0.2</p>
|
343
|
+
</li><li>
|
344
|
+
<p>bio-samtools >= 0.5.0</p>
|
345
|
+
</li></ul>
|
346
|
+
|
347
|
+
<p>The following R packages are required</p>
|
348
|
+
<ul><li>
|
349
|
+
<p>ggplot2</p>
|
350
|
+
</li><li>
|
351
|
+
<p>peaks</p>
|
352
|
+
</li></ul>
|
353
|
+
|
354
|
+
<h2 id="label-Acknowledgements">Acknowledgements</h2>
|
355
|
+
|
356
|
+
<h2 id="label-Using+bio-gngm">Using bio-gngm</h2>
|
357
|
+
|
358
|
+
<p>The package is not yet released, a gem will be prepared soon. Until then
|
359
|
+
scripts run fine when saved in the package scripts from within the package
|
360
|
+
directory with the below pre-amble at the top of the script. Run scripts
|
361
|
+
from the root of the package directory.</p>
|
362
|
+
|
363
|
+
<pre class="ruby"><span class="ruby-identifier">$LOAD_PATH</span>.<span class="ruby-identifier">unshift</span>(<span class="ruby-constant">File</span>.<span class="ruby-identifier">join</span>(<span class="ruby-constant">File</span>.<span class="ruby-identifier">dirname</span>(<span class="ruby-keyword">__FILE__</span>), <span class="ruby-string">'..'</span>, <span class="ruby-string">'lib'</span>))
|
364
|
+
<span class="ruby-identifier">$LOAD_PATH</span>.<span class="ruby-identifier">unshift</span>(<span class="ruby-constant">File</span>.<span class="ruby-identifier">dirname</span>(<span class="ruby-keyword">__FILE__</span>))
|
365
|
+
<span class="ruby-identifier">require</span> <span class="ruby-string">'bio-samtools'</span>
|
366
|
+
<span class="ruby-identifier">require</span> <span class="ruby-string">'bio-gngm'</span>
|
367
|
+
</pre>
|
368
|
+
|
369
|
+
<h2 id="label-API">API</h2>
|
370
|
+
|
371
|
+
</div><!-- description -->
|
372
|
+
|
373
|
+
|
374
|
+
|
375
|
+
|
376
|
+
<section id="5Buntitled-5D" class="documentation-section">
|
377
|
+
|
378
|
+
|
379
|
+
|
380
|
+
|
381
|
+
|
382
|
+
<!-- Constants -->
|
383
|
+
<section id="constants-list" class="section">
|
384
|
+
<h3 class="section-header">Constants</h3>
|
385
|
+
<dl>
|
386
|
+
|
387
|
+
<dt id="ERROR_MARGIN">ERROR_MARGIN
|
388
|
+
|
389
|
+
<dd class="description"><p>Ruby 1.9.3 has a rounding error in the Range#step function such that some
|
390
|
+
decimal places are rounded off to 0.00000000000000…1 above their place. So
|
391
|
+
this constant is used to identify windows within a short distance and
|
392
|
+
prevent any rounding errors. Hopefully I should be able to remove this in
|
393
|
+
later versions.</p>
|
394
|
+
|
395
|
+
|
396
|
+
</dl>
|
397
|
+
</section>
|
398
|
+
|
399
|
+
|
400
|
+
|
401
|
+
<!-- Attributes -->
|
402
|
+
<section id="attribute-method-details" class="method-section section">
|
403
|
+
<h3 class="section-header">Attributes</h3>
|
404
|
+
|
405
|
+
|
406
|
+
<div id="attribute-i-file" class="method-detail">
|
407
|
+
<div class="method-heading attribute-method-heading">
|
408
|
+
<span class="method-name">file</span><span
|
409
|
+
class="attribute-access-type">[RW]</span>
|
410
|
+
</div>
|
411
|
+
|
412
|
+
<div class="method-description">
|
413
|
+
|
414
|
+
|
415
|
+
|
416
|
+
</div>
|
417
|
+
</div>
|
418
|
+
|
419
|
+
</section><!-- attribute-method-details -->
|
420
|
+
|
421
|
+
|
422
|
+
<!-- Methods -->
|
423
|
+
|
424
|
+
<section id="public-class-5Buntitled-5D-method-details" class="method-section section">
|
425
|
+
<h3 class="section-header">Public Class Methods</h3>
|
426
|
+
|
427
|
+
|
428
|
+
<div id="method-c-new" class="method-detail ">
|
429
|
+
|
430
|
+
<div class="method-heading">
|
431
|
+
<span class="method-name">new</span><span
|
432
|
+
class="method-args">(options)</span>
|
433
|
+
<span class="method-click-advice">click to toggle source</span>
|
434
|
+
</div>
|
435
|
+
|
436
|
+
|
437
|
+
<div class="method-description">
|
438
|
+
|
439
|
+
<p>Returns a new <a href="Gngm.html">Bio::Util::Gngm</a> object.</p>
|
440
|
+
|
441
|
+
<pre class="ruby"><span class="ruby-identifier">g</span> = <span class="ruby-constant">Bio</span><span class="ruby-operator">::</span><span class="ruby-constant">Util</span><span class="ruby-operator">::</span><span class="ruby-constant">Gngm</span>.<span class="ruby-identifier">new</span>(:<span class="ruby-identifier">file</span> =<span class="ruby-operator">></span> <span class="ruby-string">"aln.sort.bam"</span>,
|
442
|
+
:<span class="ruby-identifier">format</span> =<span class="ruby-operator">></span> :<span class="ruby-identifier">bam</span>,
|
443
|
+
:<span class="ruby-identifier">samtools</span> =<span class="ruby-operator">></span> {:<span class="ruby-identifier">q</span> =<span class="ruby-operator">></span> <span class="ruby-value">20</span>, :<span class="ruby-constant">Q</span> =<span class="ruby-operator">></span> <span class="ruby-value">50</span>, :<span class="ruby-identifier">r</span> =<span class="ruby-operator">></span> <span class="ruby-string">"Chr1:1-100000"</span>},
|
444
|
+
:<span class="ruby-identifier">fasta</span> =<span class="ruby-operator">></span> <span class="ruby-string">"reference.fa"</span>
|
445
|
+
|
446
|
+
)
|
447
|
+
</pre>
|
448
|
+
|
449
|
+
<p>Required parameters and defaults:</p>
|
450
|
+
<ul><li>
|
451
|
+
<p><code>:file => nil</code> -the path to the bam file containing the
|
452
|
+
alignments, a .bai index must be present</p>
|
453
|
+
</li><li>
|
454
|
+
<p><code>:format => :bam</code> -always bam</p>
|
455
|
+
</li><li>
|
456
|
+
<p><code>:fasta => nil</code> -the path to the FASTA formatted reference
|
457
|
+
sequence</p>
|
458
|
+
</li><li>
|
459
|
+
<p><code>:samtools => {:q => 20, :Q => 50, :r =>
|
460
|
+
"Chr1:100-1100"}</code> -options for samtools, see bio-samtools
|
461
|
+
documentation for further details. The :r option is required to specify the
|
462
|
+
region of interest</p>
|
463
|
+
</li></ul>
|
464
|
+
|
465
|
+
<p>Optional parameters and defaults: Most of these are parameters for specific
|
466
|
+
methods and can be over-ridden when particular methods are called</p>
|
467
|
+
<ul><li>
|
468
|
+
<p><code>:variant_call => {:indels => false, :deletions_only =>
|
469
|
+
false, :insertions_only => false, :min_depth => 2, :max_depth =>
|
470
|
+
10000000, :mapping_quality => 10.0, :min_non_ref_count => 2,
|
471
|
+
:ignore_reference_n => true}</code> -for SNP/Indel calling only one of
|
472
|
+
<code>:indels, :deletions_only, :insertions_only</code> should be used.</p>
|
473
|
+
</li><li>
|
474
|
+
<p><code>:threads => {:start => 0.2, :stop => 1.0, :slide => 0.01,
|
475
|
+
:size => 0.1 }</code> -options for thread windows</p>
|
476
|
+
</li><li>
|
477
|
+
<p><code>:insert_size_opts => {:ref_window_size => 200,
|
478
|
+
:ref_window_slide => 50, :isize => 150}</code> -options for insert
|
479
|
+
size calculations</p>
|
480
|
+
</li><li>
|
481
|
+
<p><code>:histo_bin_width => 250000</code> -bin width for histograms of SNP
|
482
|
+
frequency</p>
|
483
|
+
</li><li>
|
484
|
+
<p><code>:graphics => {:width => 1000, :height => 500, :draw_legend
|
485
|
+
=> false, :add_boxes => nil}</code> -graphics output options,
|
486
|
+
<code>:draw_legend</code> draws a legend plot for band figures only</p>
|
487
|
+
</li><li>
|
488
|
+
<p><code>:peaks => {:sigma => 3.0, :threshold => 10.0, :background
|
489
|
+
=> false, :iterations => 13, :markov => false, :window => 3,
|
490
|
+
:range => 10000}</code> -parameters for automated peak calling,
|
491
|
+
parameters relate to R package Peaks. <code>:range</code> is the width of
|
492
|
+
the box to draw on the peak plot</p>
|
493
|
+
</li></ul>
|
494
|
+
|
495
|
+
|
496
|
+
|
497
|
+
<div class="method-source-code" id="new-source">
|
498
|
+
<pre><span class="ruby-comment"># File lib/bio/util/bio-gngm.rb, line 342</span>
|
499
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">initialize</span>(<span class="ruby-identifier">options</span>)
|
500
|
+
<span class="ruby-ivar">@file</span> = <span class="ruby-keyword">nil</span>
|
501
|
+
<span class="ruby-ivar">@snp_positions</span> = <span class="ruby-keyword">nil</span>
|
502
|
+
<span class="ruby-ivar">@threads</span> = <span class="ruby-keyword">nil</span>
|
503
|
+
<span class="ruby-ivar">@densities</span> = <span class="ruby-keyword">nil</span>
|
504
|
+
<span class="ruby-ivar">@clusters</span> = <span class="ruby-keyword">nil</span>
|
505
|
+
<span class="ruby-ivar">@control_band</span> = <span class="ruby-keyword">nil</span>
|
506
|
+
<span class="ruby-ivar">@expected_band</span> = <span class="ruby-keyword">nil</span>
|
507
|
+
<span class="ruby-ivar">@signal</span> = <span class="ruby-keyword">nil</span>
|
508
|
+
<span class="ruby-ivar">@peak_indices</span> = <span class="ruby-keyword">nil</span>
|
509
|
+
<span class="ruby-ivar">@peak_y_values</span> = <span class="ruby-keyword">nil</span>
|
510
|
+
<span class="ruby-ivar">@density_max_y</span> = <span class="ruby-keyword">nil</span> <span class="ruby-comment">#the maximum y value needed to plot the entire set density plots of threads and maintain a consistent scale for plots</span>
|
511
|
+
<span class="ruby-ivar">@colours</span> = <span class="ruby-node">]#A6CEE3 #1F78B4 #B2DF8A #33A02C #FB9A99 #E31A1C #FDBF6F #FF7F00 #CAB2D6 #6A3D9A #FFFF99 #B15928]</span>
|
512
|
+
<span class="ruby-ivar">@thread_colours</span> = {}
|
513
|
+
<span class="ruby-ivar">@opts</span> = {
|
514
|
+
<span class="ruby-value">:file</span> =<span class="ruby-operator">></span> <span class="ruby-keyword">nil</span>,
|
515
|
+
<span class="ruby-value">:format</span> =<span class="ruby-operator">></span> <span class="ruby-value">:bam</span>,
|
516
|
+
<span class="ruby-value">:fasta</span> =<span class="ruby-operator">></span> <span class="ruby-keyword">nil</span>,
|
517
|
+
<span class="ruby-value">:samtools</span> =<span class="ruby-operator">></span> {<span class="ruby-value">:q</span> =<span class="ruby-operator">></span> <span class="ruby-value">20</span>, <span class="ruby-value">:Q</span> =<span class="ruby-operator">></span> <span class="ruby-value">50</span>},
|
518
|
+
<span class="ruby-comment">##indels = call any and only indels.. :deletions_only :insertions_only = only one tyoe</span>
|
519
|
+
<span class="ruby-comment">## some options are designed to be equivalent to vcfutils.pl from bvftools options when using vcf</span>
|
520
|
+
<span class="ruby-comment">##:min_depth (-d)</span>
|
521
|
+
<span class="ruby-comment">##:max_depth (-D)</span>
|
522
|
+
<span class="ruby-comment">##:mapping_quality (-Q) minimum RMS mappinq quality for SNPs (mq in info fields)</span>
|
523
|
+
<span class="ruby-comment">##:min_non_ref_count (-a) minimum num of alt bases ... the sum of the last two numbers in DP4 in info fields</span>
|
524
|
+
<span class="ruby-comment">##doesnt do anything with window filtering or pv values... </span>
|
525
|
+
<span class="ruby-value">:insert_size_opts</span> =<span class="ruby-operator">></span> {<span class="ruby-value">:ref_window_size</span> =<span class="ruby-operator">></span> <span class="ruby-value">200</span>, <span class="ruby-value">:ref_window_slide</span> =<span class="ruby-operator">></span> <span class="ruby-value">50</span>, <span class="ruby-value">:isize</span> =<span class="ruby-operator">></span> <span class="ruby-value">150</span>},
|
526
|
+
<span class="ruby-value">:variant_call</span> =<span class="ruby-operator">></span> {<span class="ruby-value">:indels</span> =<span class="ruby-operator">></span> <span class="ruby-keyword">false</span>, <span class="ruby-value">:deletions_only</span> =<span class="ruby-operator">></span> <span class="ruby-keyword">false</span>, <span class="ruby-value">:insertions_only</span> =<span class="ruby-operator">></span> <span class="ruby-keyword">false</span>, <span class="ruby-value">:min_depth</span> =<span class="ruby-operator">></span> <span class="ruby-value">2</span>, <span class="ruby-value">:max_depth</span> =<span class="ruby-operator">></span> <span class="ruby-value">10000000</span>, <span class="ruby-value">:mapping_quality</span> =<span class="ruby-operator">></span> <span class="ruby-value">10.0</span>, <span class="ruby-value">:min_non_ref_count</span> =<span class="ruby-operator">></span> <span class="ruby-value">2</span>, <span class="ruby-value">:ignore_reference_n</span> =<span class="ruby-operator">></span> <span class="ruby-keyword">true</span>},
|
527
|
+
<span class="ruby-value">:histo_bin_width</span> =<span class="ruby-operator">></span> <span class="ruby-value">250000</span>,
|
528
|
+
<span class="ruby-value">:graphics</span> =<span class="ruby-operator">></span> {<span class="ruby-value">:width</span> =<span class="ruby-operator">></span> <span class="ruby-value">1000</span>, <span class="ruby-value">:height</span> =<span class="ruby-operator">></span> <span class="ruby-value">500</span>, <span class="ruby-value">:draw_legend</span> =<span class="ruby-operator">></span> <span class="ruby-keyword">false</span>, <span class="ruby-value">:add_boxes</span> =<span class="ruby-operator">></span> <span class="ruby-keyword">nil</span>},
|
529
|
+
<span class="ruby-value">:adjust</span> =<span class="ruby-operator">></span> <span class="ruby-value">1</span>,
|
530
|
+
<span class="ruby-value">:control_chd</span> =<span class="ruby-operator">></span> <span class="ruby-value">0.5</span>,
|
531
|
+
<span class="ruby-value">:expected_chd</span> =<span class="ruby-operator">></span> <span class="ruby-value">1.0</span>,
|
532
|
+
<span class="ruby-value">:threads</span> =<span class="ruby-operator">></span> {<span class="ruby-value">:start</span> =<span class="ruby-operator">></span> <span class="ruby-value">0.2</span>, <span class="ruby-value">:stop</span> =<span class="ruby-operator">></span> <span class="ruby-value">1.0</span>, <span class="ruby-value">:slide</span> =<span class="ruby-operator">></span> <span class="ruby-value">0.01</span>, <span class="ruby-value">:size</span> =<span class="ruby-operator">></span> <span class="ruby-value">0.1</span> },
|
533
|
+
<span class="ruby-value">:peaks</span> =<span class="ruby-operator">></span> {<span class="ruby-value">:sigma</span> =<span class="ruby-operator">></span> <span class="ruby-value">3.0</span>, <span class="ruby-value">:threshold</span> =<span class="ruby-operator">></span> <span class="ruby-value">10.0</span>, <span class="ruby-value">:background</span> =<span class="ruby-operator">></span> <span class="ruby-keyword">false</span>, <span class="ruby-value">:iterations</span> =<span class="ruby-operator">></span> <span class="ruby-value">13</span>, <span class="ruby-value">:markov</span> =<span class="ruby-operator">></span> <span class="ruby-keyword">false</span>, <span class="ruby-value">:window</span> =<span class="ruby-operator">></span> <span class="ruby-value">3</span>, <span class="ruby-value">:range</span> =<span class="ruby-operator">></span> <span class="ruby-value">10000</span>} <span class="ruby-comment">##range is the width of the box to draw on the peak plot</span>
|
534
|
+
}
|
535
|
+
<span class="ruby-ivar">@opts</span>.<span class="ruby-identifier">merge!</span>(<span class="ruby-identifier">options</span>)
|
536
|
+
<span class="ruby-identifier">open_file</span>
|
537
|
+
<span class="ruby-keyword">end</span></pre>
|
538
|
+
</div><!-- new-source -->
|
539
|
+
|
540
|
+
</div>
|
541
|
+
|
542
|
+
|
543
|
+
|
544
|
+
|
545
|
+
</div><!-- new-method -->
|
546
|
+
|
547
|
+
|
548
|
+
</section><!-- public-class-method-details -->
|
549
|
+
|
550
|
+
<section id="public-instance-5Buntitled-5D-method-details" class="method-section section">
|
551
|
+
<h3 class="section-header">Public Instance Methods</h3>
|
552
|
+
|
553
|
+
|
554
|
+
<div id="method-i-calculate_clusters" class="method-detail ">
|
555
|
+
|
556
|
+
<div class="method-heading">
|
557
|
+
<span class="method-name">calculate_clusters</span><span
|
558
|
+
class="method-args">( opts={} )</span>
|
559
|
+
<span class="method-click-advice">click to toggle source</span>
|
560
|
+
</div>
|
561
|
+
|
562
|
+
|
563
|
+
<div class="method-description">
|
564
|
+
|
565
|
+
<p>Calculates the k-means clusters of density curves (groups threads into
|
566
|
+
bands), [density curve y values] ]</tt> Calculates the clusters using
|
567
|
+
the R function +kmeans()+ Recalculates @densities as it does with <a
|
568
|
+
href="Gngm.html#method-i-calculate_densities">#calculate_densities</a>, so
|
569
|
+
clustering can be done without having to explicitly call <a
|
570
|
+
href="Gngm.html#method-i-calculate_densities">#calculate_densities</a>.
|
571
|
+
Clusters are recalulated every time regardless of whether its been done
|
572
|
+
before contains anything or not so is useful for trying out different
|
573
|
+
values for the parameters. When clusters are calculated the expected and
|
574
|
+
control bands are compared with the <a
|
575
|
+
href="Gngm.html#method-i-calculate_signal">#calculate_signal</a> method and
|
576
|
+
the @signal array populated. Resets the instance variables @control_band,
|
577
|
+
@expected_band, @signal, @peak_indices, @peak_y_values and @clusters</p>
|
578
|
+
|
579
|
+
<p>Options and defaults</p>
|
580
|
+
<ul><li>
|
581
|
+
<p><code>:k => 9</code>, -the number of clusters for the R
|
582
|
+
<code>kmeans</code> function</p>
|
583
|
+
</li><li>
|
584
|
+
<p><code>:seed => false</code> -set this to a number to make the randomized
|
585
|
+
clustering reproducible</p>
|
586
|
+
</li><li>
|
587
|
+
<p><code>:control_chd => 0.5</code> -the value of the control thread/window</p>
|
588
|
+
</li><li>
|
589
|
+
<p><code>:expected_chd => 1.0</code> -the value of the expected
|
590
|
+
thread/window</p>
|
591
|
+
</li><li>
|
592
|
+
<p><code>:adjust => 1.0</code> -the kernel adjustment parameter for the R
|
593
|
+
<code>density</code> function</p>
|
594
|
+
</li><li>
|
595
|
+
<p><code>:pseudo => false</code> - force the densities into a single thread
|
596
|
+
cluster when the number of distinct threads with SNPs is < the value of
|
597
|
+
k. This is only useful in a situation where the spread of the statistic is
|
598
|
+
very limited. EG for using mapped/unmapped mate pairs then almost all
|
599
|
+
windows will have proportion 1.0 but a tiny number will be close to 0.5
|
600
|
+
with few other values considered.</p>
|
601
|
+
</li></ul>
|
602
|
+
|
603
|
+
|
604
|
+
|
605
|
+
<div class="method-source-code" id="calculate_clusters-source">
|
606
|
+
<pre><span class="ruby-comment"># File lib/bio/util/bio-gngm.rb, line 764</span>
|
607
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">calculate_clusters</span>( <span class="ruby-identifier">opts</span>={} )
|
608
|
+
<span class="ruby-identifier">options</span> = {<span class="ruby-value">:k</span> =<span class="ruby-operator">></span> <span class="ruby-value">9</span>, <span class="ruby-value">:seed</span> =<span class="ruby-operator">></span> <span class="ruby-keyword">false</span>, <span class="ruby-value">:adjust</span> =<span class="ruby-operator">></span> <span class="ruby-value">1</span>, <span class="ruby-value">:control_chd</span> =<span class="ruby-operator">></span> <span class="ruby-value">0.5</span>, <span class="ruby-value">:expected_chd</span> =<span class="ruby-operator">></span> <span class="ruby-value">1.0</span>, <span class="ruby-value">:pseudo</span> =<span class="ruby-operator">></span> <span class="ruby-keyword">false</span>}
|
609
|
+
<span class="ruby-identifier">options</span> = <span class="ruby-identifier">options</span>.<span class="ruby-identifier">merge</span>(<span class="ruby-identifier">opts</span>)
|
610
|
+
<span class="ruby-keyword">if</span> <span class="ruby-identifier">options</span>[<span class="ruby-value">:pseudo</span>]
|
611
|
+
<span class="ruby-identifier">put_threads_into_individual_clusters</span>(<span class="ruby-identifier">options</span>)
|
612
|
+
<span class="ruby-keyword">return</span>
|
613
|
+
<span class="ruby-keyword">end</span>
|
614
|
+
<span class="ruby-identifier">r</span> = <span class="ruby-identifier">new_r</span>
|
615
|
+
<span class="ruby-identifier">names</span> = []
|
616
|
+
<span class="ruby-identifier">name</span> = <span class="ruby-string">"a"</span>
|
617
|
+
<span class="ruby-ivar">@control_band</span> = <span class="ruby-keyword">nil</span> <span class="ruby-comment">#needs resetting as we are working with new clusters</span>
|
618
|
+
<span class="ruby-ivar">@expected_band</span> = <span class="ruby-keyword">nil</span> <span class="ruby-comment">#needs resetting as we are working with new clusters</span>
|
619
|
+
<span class="ruby-ivar">@signal</span> = <span class="ruby-keyword">nil</span> <span class="ruby-comment">#needs resetting as we are working with new clusters</span>
|
620
|
+
<span class="ruby-ivar">@peak_indices</span> = <span class="ruby-keyword">nil</span> <span class="ruby-comment">#needs resetting as we are working with new cluster</span>
|
621
|
+
<span class="ruby-ivar">@peak_y_values</span> = <span class="ruby-keyword">nil</span> <span class="ruby-comment">#needs resetting as we are working with new cluster</span>
|
622
|
+
<span class="ruby-keyword">self</span>.<span class="ruby-identifier">calculate_densities</span>(<span class="ruby-identifier">options</span>[<span class="ruby-value">:adjust</span>]).<span class="ruby-identifier">each</span> <span class="ruby-keyword">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">d</span><span class="ruby-operator">|</span>
|
623
|
+
<span class="ruby-identifier">density_array</span> = <span class="ruby-identifier">d</span>.<span class="ruby-identifier">last</span>
|
624
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">assign</span> <span class="ruby-identifier">name</span>, <span class="ruby-identifier">density_array</span> <span class="ruby-comment">##although windows go in in numeric order, r wont allow numbers as names in data frames so we need a proxy</span>
|
625
|
+
<span class="ruby-identifier">names</span> <span class="ruby-operator"><<</span> <span class="ruby-node">"#{name}=#{name}"</span>
|
626
|
+
<span class="ruby-identifier">name</span> = <span class="ruby-identifier">name</span>.<span class="ruby-identifier">next</span>
|
627
|
+
<span class="ruby-keyword">end</span>
|
628
|
+
<span class="ruby-identifier">data_frame_command</span> = <span class="ruby-string">"data = data.frame("</span> <span class="ruby-operator">+</span> <span class="ruby-identifier">names</span>.<span class="ruby-identifier">join</span>(<span class="ruby-string">","</span>) <span class="ruby-operator">+</span> <span class="ruby-string">")"</span>
|
629
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span> <span class="ruby-identifier">data_frame_command</span>
|
630
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span> <span class="ruby-node">"set.seed(#{options[:seed]})"</span> <span class="ruby-keyword">if</span> <span class="ruby-identifier">options</span>[<span class="ruby-value">:seed</span>]
|
631
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span> <span class="ruby-node">"k = kmeans(cor(data),#{options[:k]},nstart=1000)"</span>
|
632
|
+
<span class="ruby-ivar">@clusters</span> = <span class="ruby-identifier">r</span>.<span class="ruby-identifier">pull</span> <span class="ruby-string">"k$cluster"</span> <span class="ruby-comment">##clusters are returned in the order in densities</span>
|
633
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">quit</span>
|
634
|
+
<span class="ruby-comment">##now set the cluster colours.. </span>
|
635
|
+
<span class="ruby-identifier">colours</span> = <span class="ruby-node">]#A6CEE3 #1F78B4 #B2DF8A #33A02C #FB9A99 #E31A1C #FDBF6F #FF7F00 #CAB2D6 #6A3D9A #FFFF99 #B15928]</span>
|
636
|
+
<span class="ruby-identifier">ci</span> = <span class="ruby-value">0</span>
|
637
|
+
<span class="ruby-identifier">col_nums</span> = {} <span class="ruby-comment">##hash of cluster numbers and colours</span>
|
638
|
+
<span class="ruby-ivar">@clusters</span>.<span class="ruby-identifier">each_index</span> <span class="ruby-keyword">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">i</span><span class="ruby-operator">|</span>
|
639
|
+
<span class="ruby-keyword">if</span> <span class="ruby-keyword">not</span> <span class="ruby-identifier">col_nums</span>[<span class="ruby-ivar">@clusters</span>[<span class="ruby-identifier">i</span>]]
|
640
|
+
<span class="ruby-identifier">col_nums</span>[<span class="ruby-ivar">@clusters</span>[<span class="ruby-identifier">i</span>]] = <span class="ruby-identifier">colours</span>[<span class="ruby-identifier">ci</span>]
|
641
|
+
<span class="ruby-identifier">ci</span> <span class="ruby-operator">+=</span> <span class="ruby-value">1</span>
|
642
|
+
<span class="ruby-identifier">ci</span> = <span class="ruby-value">0</span> <span class="ruby-keyword">if</span> <span class="ruby-identifier">ci</span> <span class="ruby-operator">></span> <span class="ruby-value">11</span>
|
643
|
+
<span class="ruby-keyword">end</span>
|
644
|
+
<span class="ruby-ivar">@thread_colours</span>[<span class="ruby-keyword">self</span>.<span class="ruby-identifier">densities</span>[<span class="ruby-identifier">i</span>].<span class="ruby-identifier">first</span>] = <span class="ruby-identifier">col_nums</span>[<span class="ruby-ivar">@clusters</span>[<span class="ruby-identifier">i</span>]]
|
645
|
+
<span class="ruby-keyword">end</span>
|
646
|
+
<span class="ruby-ivar">@control_band</span> = <span class="ruby-identifier">get_band</span>(<span class="ruby-identifier">options</span>[<span class="ruby-value">:control_chd</span>])
|
647
|
+
<span class="ruby-ivar">@expected_band</span> = <span class="ruby-identifier">get_band</span>(<span class="ruby-identifier">options</span>[<span class="ruby-value">:expected_chd</span>])
|
648
|
+
<span class="ruby-identifier">calculate_signal</span>
|
649
|
+
<span class="ruby-keyword">end</span></pre>
|
650
|
+
</div><!-- calculate_clusters-source -->
|
651
|
+
|
652
|
+
</div>
|
653
|
+
|
654
|
+
|
655
|
+
|
656
|
+
|
657
|
+
</div><!-- calculate_clusters-method -->
|
658
|
+
|
659
|
+
|
660
|
+
<div id="method-i-calculate_densities" class="method-detail ">
|
661
|
+
|
662
|
+
<div class="method-heading">
|
663
|
+
<span class="method-name">calculate_densities</span><span
|
664
|
+
class="method-args">(adjust=1)</span>
|
665
|
+
<span class="method-click-advice">click to toggle source</span>
|
666
|
+
</div>
|
667
|
+
|
668
|
+
|
669
|
+
<div class="method-description">
|
670
|
+
|
671
|
+
<p>Sets and returns the array of arrays <code>[window, [density curve x
|
672
|
+
values], [density curve y values] ]</code> Calculates the density curve
|
673
|
+
using the R function +density()+ Always sets @densities regardless of
|
674
|
+
whether it contains anything or not so is useful for trying out adjustment
|
675
|
+
values. Ignores threads with fewer than 2 polymorphisms since density can’t
|
676
|
+
be computed with so few polymorphisms.</p>
|
677
|
+
|
678
|
+
<p>Options and defaults</p>
|
679
|
+
<ul><li>
|
680
|
+
<p><code>adjust = 1</code>, -the kernel adjustment parameter for the R
|
681
|
+
<code>density</code> function</p>
|
682
|
+
</li></ul>
|
683
|
+
|
684
|
+
|
685
|
+
|
686
|
+
<div class="method-source-code" id="calculate_densities-source">
|
687
|
+
<pre><span class="ruby-comment"># File lib/bio/util/bio-gngm.rb, line 679</span>
|
688
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">calculate_densities</span>(<span class="ruby-identifier">adjust</span>=<span class="ruby-value">1</span>)
|
689
|
+
<span class="ruby-identifier">r</span> = <span class="ruby-identifier">new_r</span>
|
690
|
+
<span class="ruby-identifier">densities</span> = []
|
691
|
+
<span class="ruby-keyword">self</span>.<span class="ruby-identifier">threads</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">t</span><span class="ruby-operator">|</span>
|
692
|
+
<span class="ruby-keyword">next</span> <span class="ruby-keyword">if</span> <span class="ruby-identifier">t</span>.<span class="ruby-identifier">last</span>.<span class="ruby-identifier">length</span> <span class="ruby-operator"><</span> <span class="ruby-value">2</span> <span class="ruby-comment">##length of density array is smaller or == threads, since too small windows are ignored...</span>
|
693
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">curr_win</span> = <span class="ruby-identifier">t</span>.<span class="ruby-identifier">last</span>
|
694
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span> <span class="ruby-node">"d = density(curr_win,n=240,kernel=\"gaussian\", from=#{@snp_positions.first[0]}, to=#{@snp_positions.last[0]}, adjust=#{adjust})"</span>
|
695
|
+
<span class="ruby-identifier">densities</span> <span class="ruby-operator"><<</span> [<span class="ruby-identifier">t</span>.<span class="ruby-identifier">first</span>, <span class="ruby-identifier">r</span>.<span class="ruby-identifier">pull</span>(<span class="ruby-string">"d$x"</span>), <span class="ruby-identifier">r</span>.<span class="ruby-identifier">pull</span>(<span class="ruby-string">"d$y"</span>)]
|
696
|
+
<span class="ruby-keyword">end</span>
|
697
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">quit</span>
|
698
|
+
<span class="ruby-ivar">@densities</span> = <span class="ruby-identifier">densities</span>
|
699
|
+
<span class="ruby-identifier">calculate_density_max_y</span> <span class="ruby-comment">##need to re-do every time we get new densities</span>
|
700
|
+
<span class="ruby-identifier">densities</span>
|
701
|
+
<span class="ruby-keyword">end</span></pre>
|
702
|
+
</div><!-- calculate_densities-source -->
|
703
|
+
|
704
|
+
</div>
|
705
|
+
|
706
|
+
|
707
|
+
|
708
|
+
|
709
|
+
</div><!-- calculate_densities-method -->
|
710
|
+
|
711
|
+
|
712
|
+
<div id="method-i-calculate_signal" class="method-detail ">
|
713
|
+
|
714
|
+
<div class="method-heading">
|
715
|
+
<span class="method-name">calculate_signal</span><span
|
716
|
+
class="method-args">()</span>
|
717
|
+
<span class="method-click-advice">click to toggle source</span>
|
718
|
+
</div>
|
719
|
+
|
720
|
+
|
721
|
+
<div class="method-description">
|
722
|
+
|
723
|
+
<p>Returns an array of values representing the ratio of average of the
|
724
|
+
expected threads/windows to the control threads/windows. Sets @signal, the
|
725
|
+
signal curve.</p>
|
726
|
+
|
727
|
+
|
728
|
+
|
729
|
+
<div class="method-source-code" id="calculate_signal-source">
|
730
|
+
<pre><span class="ruby-comment"># File lib/bio/util/bio-gngm.rb, line 969</span>
|
731
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">calculate_signal</span>
|
732
|
+
<span class="ruby-identifier">r</span> = <span class="ruby-identifier">new_r</span>
|
733
|
+
<span class="ruby-identifier">name</span> = <span class="ruby-string">"a"</span>
|
734
|
+
<span class="ruby-identifier">control_names</span> = []
|
735
|
+
<span class="ruby-identifier">expected_names</span> = []
|
736
|
+
<span class="ruby-keyword">self</span>.<span class="ruby-identifier">densities</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">d</span><span class="ruby-operator">|</span>
|
737
|
+
<span class="ruby-keyword">if</span> <span class="ruby-ivar">@control_band</span>.<span class="ruby-identifier">include?</span>(<span class="ruby-identifier">d</span>.<span class="ruby-identifier">first</span>)
|
738
|
+
<span class="ruby-identifier">density_array</span> = <span class="ruby-identifier">d</span>.<span class="ruby-identifier">last</span>
|
739
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">assign</span> <span class="ruby-identifier">name</span>, <span class="ruby-identifier">density_array</span> <span class="ruby-comment">##although windows go in in numeric order, r wont allow numbers as names in data frames so we need a proxy</span>
|
740
|
+
<span class="ruby-identifier">control_names</span> <span class="ruby-operator"><<</span> <span class="ruby-node">"#{name}=#{name}"</span>
|
741
|
+
<span class="ruby-keyword">elsif</span> <span class="ruby-ivar">@expected_band</span>.<span class="ruby-identifier">include?</span>(<span class="ruby-identifier">d</span>.<span class="ruby-identifier">first</span>)
|
742
|
+
<span class="ruby-identifier">density_array</span> = <span class="ruby-identifier">d</span>.<span class="ruby-identifier">last</span>
|
743
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">assign</span> <span class="ruby-identifier">name</span>, <span class="ruby-identifier">density_array</span>
|
744
|
+
<span class="ruby-identifier">expected_names</span> <span class="ruby-operator"><<</span> <span class="ruby-node">"#{name}=#{name}"</span>
|
745
|
+
<span class="ruby-keyword">end</span>
|
746
|
+
<span class="ruby-identifier">name</span> = <span class="ruby-identifier">name</span>.<span class="ruby-identifier">next</span>
|
747
|
+
<span class="ruby-keyword">end</span>
|
748
|
+
<span class="ruby-identifier">data_frame_command</span> = <span class="ruby-string">"control = data.frame("</span> <span class="ruby-operator">+</span> <span class="ruby-identifier">control_names</span>.<span class="ruby-identifier">join</span>(<span class="ruby-string">","</span>) <span class="ruby-operator">+</span> <span class="ruby-string">")"</span>
|
749
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span> <span class="ruby-identifier">data_frame_command</span>
|
750
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span> <span class="ruby-string">"control_mean = apply(control, 1, function(ecks) mean((as.numeric(ecks))) )"</span>
|
751
|
+
<span class="ruby-identifier">data_frame_command</span> = <span class="ruby-string">"expected = data.frame("</span> <span class="ruby-operator">+</span> <span class="ruby-identifier">expected_names</span>.<span class="ruby-identifier">join</span>(<span class="ruby-string">","</span>) <span class="ruby-operator">+</span> <span class="ruby-string">")"</span>
|
752
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span> <span class="ruby-identifier">data_frame_command</span>
|
753
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span> <span class="ruby-string">"expected_mean = apply(expected, 1, function(ecks) mean((as.numeric(ecks))) )"</span>
|
754
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span> <span class="ruby-string">"signal = expected_mean / control_mean"</span>
|
755
|
+
<span class="ruby-identifier">signal</span> = <span class="ruby-identifier">r</span>.<span class="ruby-identifier">pull</span> <span class="ruby-string">"signal"</span>
|
756
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">quit</span>
|
757
|
+
<span class="ruby-ivar">@signal</span> = <span class="ruby-identifier">signal</span>
|
758
|
+
<span class="ruby-keyword">end</span></pre>
|
759
|
+
</div><!-- calculate_signal-source -->
|
760
|
+
|
761
|
+
</div>
|
762
|
+
|
763
|
+
|
764
|
+
|
765
|
+
|
766
|
+
</div><!-- calculate_signal-method -->
|
767
|
+
|
768
|
+
|
769
|
+
<div id="method-i-close" class="method-detail ">
|
770
|
+
|
771
|
+
<div class="method-heading">
|
772
|
+
<span class="method-name">close</span><span
|
773
|
+
class="method-args">()</span>
|
774
|
+
<span class="method-click-advice">click to toggle source</span>
|
775
|
+
</div>
|
776
|
+
|
777
|
+
|
778
|
+
<div class="method-description">
|
779
|
+
|
780
|
+
<p>for BAM files calls Bio::DB::Sam#close to close the connections to input
|
781
|
+
files safely</p>
|
782
|
+
|
783
|
+
|
784
|
+
|
785
|
+
<div class="method-source-code" id="close-source">
|
786
|
+
<pre><span class="ruby-comment"># File lib/bio/util/bio-gngm.rb, line 399</span>
|
787
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">close</span>
|
788
|
+
<span class="ruby-keyword">case</span> <span class="ruby-ivar">@opts</span>[<span class="ruby-value">:format</span>]
|
789
|
+
<span class="ruby-keyword">when</span> <span class="ruby-value">:bam</span> <span class="ruby-keyword">then</span> <span class="ruby-ivar">@file</span>.<span class="ruby-identifier">close</span>
|
790
|
+
<span class="ruby-keyword">end</span>
|
791
|
+
<span class="ruby-keyword">end</span></pre>
|
792
|
+
</div><!-- close-source -->
|
793
|
+
|
794
|
+
</div>
|
795
|
+
|
796
|
+
|
797
|
+
|
798
|
+
|
799
|
+
</div><!-- close-method -->
|
800
|
+
|
801
|
+
|
802
|
+
<div id="method-i-clusters" class="method-detail ">
|
803
|
+
|
804
|
+
<div class="method-heading">
|
805
|
+
<span class="method-name">clusters</span><span
|
806
|
+
class="method-args">(opts={})</span>
|
807
|
+
<span class="method-click-advice">click to toggle source</span>
|
808
|
+
</div>
|
809
|
+
|
810
|
+
|
811
|
+
<div class="method-description">
|
812
|
+
|
813
|
+
<p>Returns the array instance variable @clusters. The R function +kmeans()+ is
|
814
|
+
used to calculate the clusters based on a correlation matrix of the density
|
815
|
+
curves. If @clusters is nil when called this method will run the <a
|
816
|
+
href="Gngm.html#method-i-calculate_clusters">#calculate_clusters</a> method
|
817
|
+
and set @clusters With this method you cannot recalculate the clusters
|
818
|
+
after they have been done once.</p>
|
819
|
+
|
820
|
+
<p>Options and defaults</p>
|
821
|
+
<ul><li>
|
822
|
+
<p><code>:k => 9</code>, -the number of clusters for the R
|
823
|
+
<code>kmeans</code> function</p>
|
824
|
+
</li><li>
|
825
|
+
<p><code>:seed => false</code> -set this to a number to make the randomized
|
826
|
+
clustering reproducible</p>
|
827
|
+
</li><li>
|
828
|
+
<p><code>:control_chd => 0.5</code> -the value of the control thread/window</p>
|
829
|
+
</li><li>
|
830
|
+
<p><code>:expected_chd => 1.0</code> -the value of the expected
|
831
|
+
thread/window</p>
|
832
|
+
</li><li>
|
833
|
+
<p><code>:adjust => 1.0</code> -the kernel adjustment parameter for the R
|
834
|
+
<code>density</code> function</p>
|
835
|
+
</li></ul>
|
836
|
+
|
837
|
+
|
838
|
+
|
839
|
+
<div class="method-source-code" id="clusters-source">
|
840
|
+
<pre><span class="ruby-comment"># File lib/bio/util/bio-gngm.rb, line 748</span>
|
841
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">clusters</span>(<span class="ruby-identifier">opts</span>={})
|
842
|
+
<span class="ruby-ivar">@clusters</span> <span class="ruby-operator">||=</span> <span class="ruby-identifier">calculate_clusters</span>(<span class="ruby-identifier">opts</span>={})
|
843
|
+
<span class="ruby-keyword">end</span></pre>
|
844
|
+
</div><!-- clusters-source -->
|
845
|
+
|
846
|
+
</div>
|
847
|
+
|
848
|
+
|
849
|
+
|
850
|
+
|
851
|
+
</div><!-- clusters-method -->
|
852
|
+
|
853
|
+
|
854
|
+
<div id="method-i-collect_threads" class="method-detail ">
|
855
|
+
|
856
|
+
<div class="method-heading">
|
857
|
+
<span class="method-name">collect_threads</span><span
|
858
|
+
class="method-args">(opts=@opts[:threads])</span>
|
859
|
+
<span class="method-click-advice">click to toggle source</span>
|
860
|
+
</div>
|
861
|
+
|
862
|
+
|
863
|
+
<div class="method-description">
|
864
|
+
|
865
|
+
<p>Resets contents of instance variable @threads and returns an array of
|
866
|
+
arrays <code>[[window 1, snp position 1, snp position 2 ... snp position
|
867
|
+
n],[window 2, snp position 1, snp position 2 ... snp position n] ]</code>.
|
868
|
+
Always sets @threads regardless of whether it contains anything or not so
|
869
|
+
is useful for trying out different window sizes etc</p>
|
870
|
+
|
871
|
+
<p>Options and defaults:</p>
|
872
|
+
<ul><li>
|
873
|
+
<p><code>:start => 0.2</code> -first window</p>
|
874
|
+
</li><li>
|
875
|
+
<p><code>:stop => 1.0</code> -last window</p>
|
876
|
+
</li><li>
|
877
|
+
<p><code>:slide => 0.01</code> -distance between windows</p>
|
878
|
+
</li><li>
|
879
|
+
<p><code>:size => 0.1</code> -window width</p>
|
880
|
+
</li></ul>
|
881
|
+
|
882
|
+
|
883
|
+
|
884
|
+
<div class="method-source-code" id="collect_threads-source">
|
885
|
+
<pre><span class="ruby-comment"># File lib/bio/util/bio-gngm.rb, line 597</span>
|
886
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">collect_threads</span>(<span class="ruby-identifier">opts</span>=<span class="ruby-ivar">@opts</span>[<span class="ruby-value">:threads</span>])
|
887
|
+
<span class="ruby-identifier">opts</span>[<span class="ruby-value">:slide</span>] = <span class="ruby-value">0.000001</span> <span class="ruby-keyword">if</span> <span class="ruby-identifier">opts</span>[<span class="ruby-value">:slide</span>] <span class="ruby-operator"><</span> <span class="ruby-value">0.000001</span> <span class="ruby-comment">##to allow for the rounding error in the step function... </span>
|
888
|
+
<span class="ruby-identifier">raise</span> <span class="ruby-constant">RuntimeError</span>, <span class="ruby-string">"snp positions have not been calculated yet"</span> <span class="ruby-keyword">if</span> <span class="ruby-keyword">not</span> <span class="ruby-ivar">@snp_positions</span>
|
889
|
+
<span class="ruby-identifier">start</span>,<span class="ruby-identifier">stop</span>,<span class="ruby-identifier">slide</span>,<span class="ruby-identifier">size</span> = <span class="ruby-identifier">opts</span>[<span class="ruby-value">:start</span>].<span class="ruby-identifier">to_f</span>, <span class="ruby-identifier">opts</span>[<span class="ruby-value">:stop</span>].<span class="ruby-identifier">to_f</span>, <span class="ruby-identifier">opts</span>[<span class="ruby-value">:slide</span>].<span class="ruby-identifier">to_f</span>, <span class="ruby-identifier">opts</span>[<span class="ruby-value">:size</span>].<span class="ruby-identifier">to_f</span>
|
890
|
+
<span class="ruby-identifier">arr</span> = []
|
891
|
+
(<span class="ruby-identifier">start</span><span class="ruby-operator">..</span><span class="ruby-identifier">stop</span>).<span class="ruby-identifier">step</span>(<span class="ruby-identifier">slide</span>) <span class="ruby-keyword">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">win</span><span class="ruby-operator">|</span>
|
892
|
+
<span class="ruby-identifier">arr</span> <span class="ruby-operator"><<</span> [<span class="ruby-identifier">win</span>, <span class="ruby-ivar">@snp_positions</span>.<span class="ruby-identifier">select</span> {<span class="ruby-operator">|</span><span class="ruby-identifier">x</span><span class="ruby-operator">|</span> <span class="ruby-identifier">x</span>.<span class="ruby-identifier">last</span> <span class="ruby-operator">>=</span> <span class="ruby-identifier">win</span> <span class="ruby-keyword">and</span> <span class="ruby-identifier">x</span>.<span class="ruby-identifier">last</span> <span class="ruby-operator"><</span> <span class="ruby-identifier">win</span> <span class="ruby-operator">+</span> <span class="ruby-identifier">size</span> }.<span class="ruby-identifier">collect</span> {<span class="ruby-operator">|</span><span class="ruby-identifier">y</span><span class="ruby-operator">|</span> <span class="ruby-identifier">y</span>.<span class="ruby-identifier">first</span>} ]
|
893
|
+
<span class="ruby-keyword">end</span>
|
894
|
+
<span class="ruby-ivar">@threads</span> = <span class="ruby-identifier">arr</span>
|
895
|
+
<span class="ruby-keyword">end</span></pre>
|
896
|
+
</div><!-- collect_threads-source -->
|
897
|
+
|
898
|
+
</div>
|
899
|
+
|
900
|
+
|
901
|
+
|
902
|
+
|
903
|
+
</div><!-- collect_threads-method -->
|
904
|
+
|
905
|
+
|
906
|
+
<div id="method-i-densities" class="method-detail ">
|
907
|
+
|
908
|
+
<div class="method-heading">
|
909
|
+
<span class="method-name">densities</span><span
|
910
|
+
class="method-args">(adjust=1)</span>
|
911
|
+
<span class="method-click-advice">click to toggle source</span>
|
912
|
+
</div>
|
913
|
+
|
914
|
+
|
915
|
+
<div class="method-description">
|
916
|
+
|
917
|
+
<p>Returns the instance variable @densities array of arrays <code>[window,
|
918
|
+
[density curve x values], [density curve y values] ]</code>. The R function
|
919
|
+
+density()+ is used to calculate the values. If @densities is nil when
|
920
|
+
called this method will run the <a
|
921
|
+
href="Gngm.html#method-i-calculate_densities">#calculate_densities</a>
|
922
|
+
method and set @densities With this method you cannot recalculate the
|
923
|
+
densities after they have been done once.</p>
|
924
|
+
|
925
|
+
<p>Options and defaults</p>
|
926
|
+
<ul><li>
|
927
|
+
<p><code>adjust = 1</code>, -the kernel adjustment parameter for the R
|
928
|
+
<code>density</code> function</p>
|
929
|
+
</li></ul>
|
930
|
+
|
931
|
+
|
932
|
+
|
933
|
+
<div class="method-source-code" id="densities-source">
|
934
|
+
<pre><span class="ruby-comment"># File lib/bio/util/bio-gngm.rb, line 669</span>
|
935
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">densities</span>(<span class="ruby-identifier">adjust</span>=<span class="ruby-value">1</span>)
|
936
|
+
<span class="ruby-ivar">@densities</span> <span class="ruby-operator">||=</span> <span class="ruby-identifier">calculate_densities</span>(<span class="ruby-identifier">adjust</span>)
|
937
|
+
<span class="ruby-keyword">end</span></pre>
|
938
|
+
</div><!-- densities-source -->
|
939
|
+
|
940
|
+
</div>
|
941
|
+
|
942
|
+
|
943
|
+
|
944
|
+
|
945
|
+
</div><!-- densities-method -->
|
946
|
+
|
947
|
+
|
948
|
+
<div id="method-i-draw_bands" class="method-detail ">
|
949
|
+
|
950
|
+
<div class="method-heading">
|
951
|
+
<span class="method-name">draw_bands</span><span
|
952
|
+
class="method-args">(file="myfile.png", optsa={})</span>
|
953
|
+
<span class="method-click-advice">click to toggle source</span>
|
954
|
+
</div>
|
955
|
+
|
956
|
+
|
957
|
+
<div class="method-description">
|
958
|
+
|
959
|
+
<p>Draws the clustered bands that correspond to the expected and control
|
960
|
+
window in a single PNG file <code>file</code></p>
|
961
|
+
|
962
|
+
<p>Options and defaults</p>
|
963
|
+
<ul><li>
|
964
|
+
<p><code>:add_lines => nil</code> -if an array of positions is provided eg
|
965
|
+
+[100,345] , vertical lines will be drawn at these positions. Useful for
|
966
|
+
indicating feature positions on the plot</p>
|
967
|
+
</li><li>
|
968
|
+
<p><code>:width => 1000</code> -width of the PNG in pixels</p>
|
969
|
+
</li><li>
|
970
|
+
<p><code>:height => 500</code> -height of the PNG in pixels</p>
|
971
|
+
</li></ul>
|
972
|
+
|
973
|
+
|
974
|
+
|
975
|
+
<div class="method-source-code" id="draw_bands-source">
|
976
|
+
<pre><span class="ruby-comment"># File lib/bio/util/bio-gngm.rb, line 702</span>
|
977
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">draw_bands</span>(<span class="ruby-identifier">file</span>=<span class="ruby-string">"myfile.png"</span>, <span class="ruby-identifier">optsa</span>={})
|
978
|
+
<span class="ruby-identifier">opts</span> = <span class="ruby-ivar">@opts</span>[<span class="ruby-value">:graphics</span>].<span class="ruby-identifier">merge</span>(<span class="ruby-identifier">optsa</span>)
|
979
|
+
<span class="ruby-identifier">pp</span> <span class="ruby-identifier">optsa</span>
|
980
|
+
<span class="ruby-identifier">raise</span> <span class="ruby-constant">RuntimeError</span>, <span class="ruby-string">"Can't draw threads until clustering is done"</span> <span class="ruby-keyword">unless</span> <span class="ruby-ivar">@clusters</span>
|
981
|
+
<span class="ruby-comment">#uses R's standard plot functions.</span>
|
982
|
+
<span class="ruby-comment">##same as draw_threads, but skips threads that aren't on the bands lists</span>
|
983
|
+
<span class="ruby-comment">## </span>
|
984
|
+
<span class="ruby-identifier">r</span> = <span class="ruby-identifier">new_r</span>
|
985
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span> <span class="ruby-node">"png('#{file}', width=#{opts[:width]}, height=#{opts[:height]})"</span>
|
986
|
+
<span class="ruby-identifier">plot_open</span> = <span class="ruby-keyword">false</span>
|
987
|
+
<span class="ruby-keyword">self</span>.<span class="ruby-identifier">densities</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">t</span><span class="ruby-operator">|</span>
|
988
|
+
<span class="ruby-keyword">if</span> <span class="ruby-ivar">@control_band</span>.<span class="ruby-identifier">include?</span>(<span class="ruby-identifier">t</span>[<span class="ruby-value">0</span>]) <span class="ruby-keyword">or</span> <span class="ruby-ivar">@expected_band</span>.<span class="ruby-identifier">include?</span>(<span class="ruby-identifier">t</span>[<span class="ruby-value">0</span>])
|
989
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">dx</span> = <span class="ruby-identifier">t</span>[<span class="ruby-value">1</span>]
|
990
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">dy</span> = <span class="ruby-identifier">t</span>[<span class="ruby-value">2</span>]
|
991
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">curr_win</span> = <span class="ruby-identifier">t</span>.<span class="ruby-identifier">last</span>
|
992
|
+
<span class="ruby-comment">#r.eval "d = density(curr_win,n=240,kernel=\"gaussian\", from=#{@snp_positions.first[0]}, to=#{@snp_positions.last[0]})"</span>
|
993
|
+
<span class="ruby-keyword">if</span> <span class="ruby-identifier">plot_open</span>
|
994
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span> <span class="ruby-node">"lines(dx, dy, col=\"#{@thread_colours[t.first]}\")"</span>
|
995
|
+
<span class="ruby-keyword">else</span>
|
996
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span> <span class="ruby-node">"plot(dx, dy, type=\"l\", col=\"#{@thread_colours[t.first]}\",ylim=c(0,#{density_max_y}), main='#{file}',xlab='position', ylab='density')"</span>
|
997
|
+
<span class="ruby-identifier">plot_open</span> = <span class="ruby-keyword">true</span>
|
998
|
+
<span class="ruby-keyword">end</span>
|
999
|
+
<span class="ruby-keyword">end</span>
|
1000
|
+
<span class="ruby-keyword">end</span>
|
1001
|
+
<span class="ruby-identifier">label1</span> = <span class="ruby-string">"Control band: "</span> <span class="ruby-operator">+</span> <span class="ruby-ivar">@control_band</span>.<span class="ruby-identifier">min</span>.<span class="ruby-identifier">to_s</span> <span class="ruby-operator">+</span> <span class="ruby-string">" < ChD < "</span> <span class="ruby-operator">+</span> <span class="ruby-ivar">@control_band</span>.<span class="ruby-identifier">max</span>.<span class="ruby-identifier">to_s</span>
|
1002
|
+
<span class="ruby-identifier">label2</span> = <span class="ruby-string">"Expected band: "</span> <span class="ruby-operator">+</span> <span class="ruby-ivar">@expected_band</span>.<span class="ruby-identifier">min</span>.<span class="ruby-identifier">to_s</span> <span class="ruby-operator">+</span> <span class="ruby-string">" < ChD < "</span> <span class="ruby-operator">+</span> <span class="ruby-ivar">@expected_band</span>.<span class="ruby-identifier">max</span>.<span class="ruby-identifier">to_s</span>
|
1003
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span> <span class="ruby-node">"legend('top', c('#{label1}','#{label2}'), lty=c(1,1),lwd=c(2.5,2.5),col=c('#{@thread_colours[@control_band.first]}','#{@thread_colours[@expected_band.first]}'))"</span>
|
1004
|
+
<span class="ruby-keyword">if</span> <span class="ruby-identifier">opts</span>[<span class="ruby-value">:add_lines</span>] <span class="ruby-keyword">and</span> <span class="ruby-identifier">opts</span>[<span class="ruby-value">:add_lines</span>].<span class="ruby-identifier">instance_of?</span>(<span class="ruby-constant">Array</span>)
|
1005
|
+
<span class="ruby-identifier">opts</span>[<span class="ruby-value">:add_lines</span>].<span class="ruby-identifier">each</span> <span class="ruby-keyword">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">pos</span><span class="ruby-operator">|</span>
|
1006
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span> <span class="ruby-node">"abline(v=#{pos})"</span>
|
1007
|
+
<span class="ruby-keyword">end</span>
|
1008
|
+
<span class="ruby-keyword">end</span>
|
1009
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span> <span class="ruby-string">"dev.off()"</span>
|
1010
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">quit</span>
|
1011
|
+
<span class="ruby-keyword">end</span></pre>
|
1012
|
+
</div><!-- draw_bands-source -->
|
1013
|
+
|
1014
|
+
</div>
|
1015
|
+
|
1016
|
+
|
1017
|
+
|
1018
|
+
|
1019
|
+
</div><!-- draw_bands-method -->
|
1020
|
+
|
1021
|
+
|
1022
|
+
<div id="method-i-draw_hit_count" class="method-detail ">
|
1023
|
+
|
1024
|
+
<div class="method-heading">
|
1025
|
+
<span class="method-name">draw_hit_count</span><span
|
1026
|
+
class="method-args">(file="myfile.png",opts=@opts[:graphics])</span>
|
1027
|
+
<span class="method-click-advice">click to toggle source</span>
|
1028
|
+
</div>
|
1029
|
+
|
1030
|
+
|
1031
|
+
<div class="method-description">
|
1032
|
+
|
1033
|
+
<p>Draws a barplot of the number of polymorphisms in each thread/window in a
|
1034
|
+
single PNG file <code>file</code></p>
|
1035
|
+
|
1036
|
+
|
1037
|
+
|
1038
|
+
<div class="method-source-code" id="draw_hit_count-source">
|
1039
|
+
<pre><span class="ruby-comment"># File lib/bio/util/bio-gngm.rb, line 948</span>
|
1040
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">draw_hit_count</span>(<span class="ruby-identifier">file</span>=<span class="ruby-string">"myfile.png"</span>,<span class="ruby-identifier">opts</span>=<span class="ruby-ivar">@opts</span>[<span class="ruby-value">:graphics</span>])
|
1041
|
+
<span class="ruby-identifier">r</span> = <span class="ruby-identifier">new_r</span>
|
1042
|
+
<span class="ruby-identifier">wins</span> = []
|
1043
|
+
<span class="ruby-identifier">hits</span> = []
|
1044
|
+
<span class="ruby-keyword">self</span>.<span class="ruby-identifier">threads</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">thread</span><span class="ruby-operator">|</span>
|
1045
|
+
<span class="ruby-identifier">wins</span> <span class="ruby-operator"><<</span> <span class="ruby-identifier">thread</span>.<span class="ruby-identifier">first</span>
|
1046
|
+
<span class="ruby-keyword">if</span> <span class="ruby-identifier">thread</span>.<span class="ruby-identifier">last</span>.<span class="ruby-identifier">empty?</span>
|
1047
|
+
<span class="ruby-identifier">hits</span> <span class="ruby-operator"><<</span> <span class="ruby-value">0.01</span> <span class="ruby-comment">##pseudovalue gets around the case where a thread has no hits... which messes up barplot in R</span>
|
1048
|
+
<span class="ruby-keyword">else</span>
|
1049
|
+
<span class="ruby-identifier">hits</span> <span class="ruby-operator"><<</span> <span class="ruby-identifier">thread</span>.<span class="ruby-identifier">last</span>.<span class="ruby-identifier">length</span>
|
1050
|
+
<span class="ruby-keyword">end</span>
|
1051
|
+
<span class="ruby-keyword">end</span>
|
1052
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">wins</span> = <span class="ruby-identifier">wins</span>
|
1053
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">hits</span> = <span class="ruby-identifier">hits</span>
|
1054
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span> <span class="ruby-node">"png('#{file}', width=#{opts[:width]}, height=#{opts[:height]})"</span>
|
1055
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span> <span class="ruby-node">"barplot(hits, names.arg=c(wins), xlab='window', log='y', ylab='number of hits', main='Number of Polymorphisms #{file}', col=rgb(r=0,g=1,b=1, alpha=0.3), na.rm = TRUE)"</span>
|
1056
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span> <span class="ruby-string">"dev.off()"</span>
|
1057
|
+
<span class="ruby-keyword">end</span></pre>
|
1058
|
+
</div><!-- draw_hit_count-source -->
|
1059
|
+
|
1060
|
+
</div>
|
1061
|
+
|
1062
|
+
|
1063
|
+
|
1064
|
+
|
1065
|
+
</div><!-- draw_hit_count-method -->
|
1066
|
+
|
1067
|
+
|
1068
|
+
<div id="method-i-draw_peaks" class="method-detail ">
|
1069
|
+
|
1070
|
+
<div class="method-heading">
|
1071
|
+
<span class="method-name">draw_peaks</span><span
|
1072
|
+
class="method-args">(file="myfile.png",opts=@opts[:graphics])</span>
|
1073
|
+
<span class="method-click-advice">click to toggle source</span>
|
1074
|
+
</div>
|
1075
|
+
|
1076
|
+
|
1077
|
+
<div class="method-description">
|
1078
|
+
|
1079
|
+
<p>Draws the peaks calculated from the signal curve by the R function
|
1080
|
+
<code>Peaks</code> in Bio::Util::Gngm#calculate_peaks. Adds boxes of width
|
1081
|
+
<code>:range</code> to each peak and annotates the limits. Options are set
|
1082
|
+
in the global options hash <code>:peaks</code>. and relate to the Peaks
|
1083
|
+
function in R</p>
|
1084
|
+
|
1085
|
+
|
1086
|
+
|
1087
|
+
<div class="method-source-code" id="draw_peaks-source">
|
1088
|
+
<pre><span class="ruby-comment"># File lib/bio/util/bio-gngm.rb, line 891</span>
|
1089
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">draw_peaks</span>(<span class="ruby-identifier">file</span>=<span class="ruby-string">"myfile.png"</span>,<span class="ruby-identifier">opts</span>=<span class="ruby-ivar">@opts</span>[<span class="ruby-value">:graphics</span>])
|
1090
|
+
<span class="ruby-identifier">opts_a</span> = <span class="ruby-ivar">@opts</span>[<span class="ruby-value">:peaks</span>]
|
1091
|
+
<span class="ruby-identifier">opts_a</span>.<span class="ruby-identifier">merge!</span>(<span class="ruby-identifier">opts</span>)
|
1092
|
+
<span class="ruby-identifier">opts</span> = <span class="ruby-identifier">opts_a</span> <span class="ruby-comment">##sigh ... </span>
|
1093
|
+
<span class="ruby-comment">#opts[:background] = opts[:background].to_s.upcase</span>
|
1094
|
+
<span class="ruby-comment">#opts[:markov] = opts[:markov].to_s.upcase </span>
|
1095
|
+
<span class="ruby-keyword">self</span>.<span class="ruby-identifier">get_peaks</span>(<span class="ruby-identifier">opts</span>)
|
1096
|
+
<span class="ruby-identifier">r</span> = <span class="ruby-identifier">new_r</span>
|
1097
|
+
<span class="ruby-comment">#r.eval "suppressMessages ( library('Peaks') )"</span>
|
1098
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">signal</span> = <span class="ruby-keyword">self</span>.<span class="ruby-identifier">signal</span>
|
1099
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">x_vals</span> = <span class="ruby-keyword">self</span>.<span class="ruby-identifier">densities</span>[<span class="ruby-value">0</span>][<span class="ruby-value">1</span>]
|
1100
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span> <span class="ruby-node">"png('#{file}', width=#{opts[:width]}, height=#{opts[:height]})"</span>
|
1101
|
+
<span class="ruby-comment">#r.eval "spec = SpectrumSearch(signal,#{opts[:sigma]},threshold=#{opts[:threshold]},background=#{opts[:background]},iterations=#{opts[:iterations]},markov=#{opts[:markov]},window=#{opts[:window]})"</span>
|
1102
|
+
<span class="ruby-comment">#peak_positions = r.pull "spec$pos"</span>
|
1103
|
+
<span class="ruby-comment">#y = r.pull "spec$y"</span>
|
1104
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">y</span> = <span class="ruby-ivar">@peak_y_values</span>
|
1105
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">pos</span> = <span class="ruby-ivar">@peak_indices</span>
|
1106
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span> <span class="ruby-node">"plot(x_vals,y, type=\"l\", xlab='position', ylab='Peaks', main='#{file}' )"</span>
|
1107
|
+
<span class="ruby-ivar">@peak_indices</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">peak</span><span class="ruby-operator">|</span>
|
1108
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span> <span class="ruby-node">"rect(x_vals[#{peak}]-(#{opts[:range]/2}), 0, x_vals[#{peak}]+#{opts[:range]/2}, max(y), col=rgb(r=0,g=1,b=0, alpha=0.3) )"</span>
|
1109
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span> <span class="ruby-node">"text(x_vals[#{peak}]-(#{opts[:range]/2}),max(y) + 0.05, floor(x_vals[#{peak}]-(#{opts[:range]/2})) )"</span>
|
1110
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span> <span class="ruby-node">"text(x_vals[#{peak}]+(#{opts[:range]/2}), max(y) + 0.05, floor(x_vals[#{peak}]+(#{opts[:range]/2})) )"</span>
|
1111
|
+
<span class="ruby-keyword">end</span>
|
1112
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span> <span class="ruby-string">"dev.off()"</span>
|
1113
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">quit</span>
|
1114
|
+
<span class="ruby-keyword">end</span></pre>
|
1115
|
+
</div><!-- draw_peaks-source -->
|
1116
|
+
|
1117
|
+
</div>
|
1118
|
+
|
1119
|
+
|
1120
|
+
|
1121
|
+
|
1122
|
+
</div><!-- draw_peaks-method -->
|
1123
|
+
|
1124
|
+
|
1125
|
+
<div id="method-i-draw_signal" class="method-detail ">
|
1126
|
+
|
1127
|
+
<div class="method-heading">
|
1128
|
+
<span class="method-name">draw_signal</span><span
|
1129
|
+
class="method-args">(file="myfile.png", opts=@opts[:graphics])</span>
|
1130
|
+
<span class="method-click-advice">click to toggle source</span>
|
1131
|
+
</div>
|
1132
|
+
|
1133
|
+
|
1134
|
+
<div class="method-description">
|
1135
|
+
|
1136
|
+
<p>Draws the contents of the @signal instance variable in a single PNG file
|
1137
|
+
<code>file</code></p>
|
1138
|
+
|
1139
|
+
|
1140
|
+
|
1141
|
+
<div class="method-source-code" id="draw_signal-source">
|
1142
|
+
<pre><span class="ruby-comment"># File lib/bio/util/bio-gngm.rb, line 868</span>
|
1143
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">draw_signal</span>(<span class="ruby-identifier">file</span>=<span class="ruby-string">"myfile.png"</span>, <span class="ruby-identifier">opts</span>=<span class="ruby-ivar">@opts</span>[<span class="ruby-value">:graphics</span>]) <span class="ruby-comment">#data.frame(bubs=data$bubbles_found,conf=data$bubbles_confirmed)</span>
|
1144
|
+
<span class="ruby-identifier">r</span> = <span class="ruby-identifier">new_r</span>
|
1145
|
+
<span class="ruby-identifier">x_vals</span> = <span class="ruby-keyword">self</span>.<span class="ruby-identifier">densities</span>[<span class="ruby-value">0</span>][<span class="ruby-value">1</span>]
|
1146
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span> <span class="ruby-node">"png('#{file}', width=#{opts[:width]}, height=#{opts[:height]})"</span>
|
1147
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">x_vals</span> = <span class="ruby-identifier">x_vals</span>
|
1148
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">signal</span> = <span class="ruby-keyword">self</span>.<span class="ruby-identifier">signal</span>
|
1149
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span> <span class="ruby-node">"plot(x_vals,signal, type=\"l\", xlab='position', ylab='ratio of signals (expected / control ~ homo / hetero)', main='#{file}' )"</span>
|
1150
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span> <span class="ruby-string">"dev.off()"</span>
|
1151
|
+
<span class="ruby-keyword">end</span></pre>
|
1152
|
+
</div><!-- draw_signal-source -->
|
1153
|
+
|
1154
|
+
</div>
|
1155
|
+
|
1156
|
+
|
1157
|
+
|
1158
|
+
|
1159
|
+
</div><!-- draw_signal-method -->
|
1160
|
+
|
1161
|
+
|
1162
|
+
<div id="method-i-draw_threads" class="method-detail ">
|
1163
|
+
|
1164
|
+
<div class="method-heading">
|
1165
|
+
<span class="method-name">draw_threads</span><span
|
1166
|
+
class="method-args">(file="myfile.png", options={})</span>
|
1167
|
+
<span class="method-click-advice">click to toggle source</span>
|
1168
|
+
</div>
|
1169
|
+
|
1170
|
+
|
1171
|
+
<div class="method-description">
|
1172
|
+
|
1173
|
+
<p>Draws the threads in a single PNG file <code>file</code></p>
|
1174
|
+
|
1175
|
+
<p>Options and defaults</p>
|
1176
|
+
<ul><li>
|
1177
|
+
<p><code>:draw_legend => nil</code> -if a filename is provided a legend
|
1178
|
+
will be drawn in a second plot</p>
|
1179
|
+
</li><li>
|
1180
|
+
<p><code>:width => 1000</code> -width of the PNG in pixels</p>
|
1181
|
+
</li><li>
|
1182
|
+
<p><code>:height => 500</code> -height of the PNG in pixels</p>
|
1183
|
+
</li></ul>
|
1184
|
+
|
1185
|
+
|
1186
|
+
|
1187
|
+
<div class="method-source-code" id="draw_threads-source">
|
1188
|
+
<pre><span class="ruby-comment"># File lib/bio/util/bio-gngm.rb, line 632</span>
|
1189
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">draw_threads</span>(<span class="ruby-identifier">file</span>=<span class="ruby-string">"myfile.png"</span>, <span class="ruby-identifier">options</span>={})
|
1190
|
+
<span class="ruby-identifier">opts</span> = <span class="ruby-ivar">@opts</span>[<span class="ruby-value">:graphics</span>].<span class="ruby-identifier">merge</span>(<span class="ruby-identifier">options</span>)
|
1191
|
+
<span class="ruby-comment">#uses R's standard plot functions.. needed because ggplot can die unexpectedly...</span>
|
1192
|
+
<span class="ruby-identifier">raise</span> <span class="ruby-constant">RuntimeError</span>, <span class="ruby-string">"Can't draw threads until clustering is done"</span> <span class="ruby-keyword">unless</span> <span class="ruby-ivar">@clusters</span>
|
1193
|
+
<span class="ruby-identifier">r</span> = <span class="ruby-identifier">new_r</span>
|
1194
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span> <span class="ruby-node">"png('#{file}', width=#{opts[:width]}, height=#{opts[:height]})"</span>
|
1195
|
+
<span class="ruby-identifier">plot_open</span> = <span class="ruby-keyword">false</span>
|
1196
|
+
<span class="ruby-keyword">self</span>.<span class="ruby-identifier">densities</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">t</span><span class="ruby-operator">|</span>
|
1197
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">curr_win</span> = <span class="ruby-identifier">t</span>.<span class="ruby-identifier">last</span>
|
1198
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">dx</span> = <span class="ruby-identifier">t</span>[<span class="ruby-value">1</span>]
|
1199
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">dy</span> = <span class="ruby-identifier">t</span>[<span class="ruby-value">2</span>]
|
1200
|
+
<span class="ruby-keyword">if</span> <span class="ruby-identifier">plot_open</span>
|
1201
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span> <span class="ruby-node">"lines(dx,dy, col=\"#{@thread_colours[t.first]}\", xlab='position', ylab='density')"</span>
|
1202
|
+
<span class="ruby-keyword">else</span>
|
1203
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span> <span class="ruby-node">"plot(dx,dy, type=\"l\", col=\"#{@thread_colours[t.first]}\",ylim=c(0,#{density_max_y}), main='#{file}',xlab='position', ylab='density')"</span>
|
1204
|
+
<span class="ruby-identifier">plot_open</span> = <span class="ruby-keyword">true</span>
|
1205
|
+
<span class="ruby-keyword">end</span>
|
1206
|
+
<span class="ruby-keyword">end</span>
|
1207
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span> <span class="ruby-string">"dev.off()"</span>
|
1208
|
+
<span class="ruby-keyword">if</span> <span class="ruby-identifier">opts</span>[<span class="ruby-value">:draw_legend</span>]
|
1209
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span> <span class="ruby-node">"png('#{opts[:draw_legend]}', width=#{opts[:width]}, height=#{opts[:height]})"</span>
|
1210
|
+
<span class="ruby-identifier">colours</span> = <span class="ruby-ivar">@thread_colours</span>.<span class="ruby-identifier">each</span>.<span class="ruby-identifier">sort</span>.<span class="ruby-identifier">collect</span> {<span class="ruby-operator">|</span><span class="ruby-identifier">x</span><span class="ruby-operator">|</span> <span class="ruby-identifier">x</span>.<span class="ruby-identifier">last</span>}.<span class="ruby-identifier">join</span>(<span class="ruby-string">"','"</span>)
|
1211
|
+
<span class="ruby-identifier">names</span> = <span class="ruby-ivar">@thread_colours</span>.<span class="ruby-identifier">each</span>.<span class="ruby-identifier">sort</span>.<span class="ruby-identifier">collect</span> {<span class="ruby-operator">|</span><span class="ruby-identifier">x</span><span class="ruby-operator">|</span> <span class="ruby-identifier">x</span>.<span class="ruby-identifier">first</span>}.<span class="ruby-identifier">join</span>(<span class="ruby-string">"','"</span>)
|
1212
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span> <span class="ruby-string">"plot(1,xlab="</span><span class="ruby-string">",ylab="</span><span class="ruby-string">",axes=FALSE)"</span>
|
1213
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span> <span class="ruby-node">"legend('top', c('#{names}'), lty=c(1),lwd=c(1),col=c('#{colours}'), ncol=4)"</span>
|
1214
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span> <span class="ruby-string">"dev.off()"</span>
|
1215
|
+
<span class="ruby-keyword">end</span>
|
1216
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">quit</span>
|
1217
|
+
<span class="ruby-keyword">end</span></pre>
|
1218
|
+
</div><!-- draw_threads-source -->
|
1219
|
+
|
1220
|
+
</div>
|
1221
|
+
|
1222
|
+
|
1223
|
+
|
1224
|
+
|
1225
|
+
</div><!-- draw_threads-method -->
|
1226
|
+
|
1227
|
+
|
1228
|
+
<div id="method-i-frequency_histogram" class="method-detail ">
|
1229
|
+
|
1230
|
+
<div class="method-heading">
|
1231
|
+
<span class="method-name">frequency_histogram</span><span
|
1232
|
+
class="method-args">(file="myfile.png", bin_width=@opts[:histo_bin_width], opts=@opts[:graphics])</span>
|
1233
|
+
<span class="method-click-advice">click to toggle source</span>
|
1234
|
+
</div>
|
1235
|
+
|
1236
|
+
|
1237
|
+
<div class="method-description">
|
1238
|
+
|
1239
|
+
<p>Draws a histogram of polymorphism frequencies across the reference genome
|
1240
|
+
section defined in Bio::Util::Gngm#initialize with bin width
|
1241
|
+
<code>bin_width</code> and writes it to a PNG file <code>file</code></p>
|
1242
|
+
|
1243
|
+
|
1244
|
+
|
1245
|
+
<div class="method-source-code" id="frequency_histogram-source">
|
1246
|
+
<pre><span class="ruby-comment"># File lib/bio/util/bio-gngm.rb, line 562</span>
|
1247
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">frequency_histogram</span>(<span class="ruby-identifier">file</span>=<span class="ruby-string">"myfile.png"</span>, <span class="ruby-identifier">bin_width</span>=<span class="ruby-ivar">@opts</span>[<span class="ruby-value">:histo_bin_width</span>], <span class="ruby-identifier">opts</span>=<span class="ruby-ivar">@opts</span>[<span class="ruby-value">:graphics</span>])
|
1248
|
+
<span class="ruby-identifier">posns</span> = <span class="ruby-keyword">self</span>.<span class="ruby-identifier">snp_positions</span>.<span class="ruby-identifier">collect</span> {<span class="ruby-operator">|</span><span class="ruby-identifier">a</span><span class="ruby-operator">|</span> <span class="ruby-identifier">a</span>.<span class="ruby-identifier">first</span>}
|
1249
|
+
<span class="ruby-identifier">r</span> = <span class="ruby-identifier">new_r</span>
|
1250
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span> <span class="ruby-string">"suppressMessages ( library(ggplot2) )"</span> <span class="ruby-comment">#setup R environment... </span>
|
1251
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">posns</span> = <span class="ruby-identifier">posns</span>
|
1252
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span> <span class="ruby-string">"data = data.frame(position=posns)"</span>
|
1253
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span> <span class="ruby-node">"png('#{file}', width=#{opts[:width]}, height=#{opts[:height]})"</span>
|
1254
|
+
<span class="ruby-identifier">graph_cmd</span> = <span class="ruby-node">"qplot(position,data=data, geom='histogram', binwidth = #{bin_width}, alpha=I(1/3), main='#{file}', color='red')"</span>
|
1255
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span>(<span class="ruby-identifier">graph_cmd</span>)
|
1256
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">eval</span> <span class="ruby-string">"dev.off()"</span>
|
1257
|
+
<span class="ruby-identifier">r</span>.<span class="ruby-identifier">quit</span>
|
1258
|
+
<span class="ruby-keyword">end</span></pre>
|
1259
|
+
</div><!-- frequency_histogram-source -->
|
1260
|
+
|
1261
|
+
</div>
|
1262
|
+
|
1263
|
+
|
1264
|
+
|
1265
|
+
|
1266
|
+
</div><!-- frequency_histogram-method -->
|
1267
|
+
|
1268
|
+
|
1269
|
+
<div id="method-i-get_band" class="method-detail ">
|
1270
|
+
|
1271
|
+
<div class="method-heading">
|
1272
|
+
<span class="method-name">get_band</span><span
|
1273
|
+
class="method-args">(window=1.0)</span>
|
1274
|
+
<span class="method-click-advice">click to toggle source</span>
|
1275
|
+
</div>
|
1276
|
+
|
1277
|
+
|
1278
|
+
<div class="method-description">
|
1279
|
+
|
1280
|
+
<p>gets an array of windows that cluster with a given window</p>
|
1281
|
+
|
1282
|
+
|
1283
|
+
|
1284
|
+
<div class="method-source-code" id="get_band-source">
|
1285
|
+
<pre><span class="ruby-comment"># File lib/bio/util/bio-gngm.rb, line 848</span>
|
1286
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">get_band</span>(<span class="ruby-identifier">window</span>=<span class="ruby-value">1.0</span>)
|
1287
|
+
<span class="ruby-comment">##because of the weird step rounding error we need to find the internal name of the window.. so find it from the list from the name the user</span>
|
1288
|
+
<span class="ruby-comment">##expects it to be, may give more than one passing window so keep only first one..</span>
|
1289
|
+
<span class="ruby-identifier">windows</span> = <span class="ruby-identifier">find_window</span>(<span class="ruby-identifier">window</span>)
|
1290
|
+
<span class="ruby-identifier">raise</span> <span class="ruby-constant">RuntimeError</span>, <span class="ruby-node">"Couldnt find window #{window}, or window has no data to calculate: \n windows are #{self.densities.collect {|d| d.first} }"</span> <span class="ruby-keyword">if</span> <span class="ruby-identifier">windows</span>.<span class="ruby-identifier">empty?</span> <span class="ruby-comment">##if we have a window that is close enough to the specified window</span>
|
1291
|
+
<span class="ruby-identifier">idx</span> = <span class="ruby-identifier">find_index</span>(<span class="ruby-identifier">windows</span>.<span class="ruby-identifier">first</span>)
|
1292
|
+
<span class="ruby-comment">#find out which cluster the window is in</span>
|
1293
|
+
<span class="ruby-identifier">cluster</span> = <span class="ruby-keyword">self</span>.<span class="ruby-identifier">clusters</span>[<span class="ruby-identifier">idx</span>]
|
1294
|
+
<span class="ruby-comment">##get the other windows in the same cluster, ie the band...</span>
|
1295
|
+
<span class="ruby-identifier">band</span> = []
|
1296
|
+
<span class="ruby-keyword">self</span>.<span class="ruby-identifier">clusters</span>.<span class="ruby-identifier">each_index</span> <span class="ruby-keyword">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">i</span><span class="ruby-operator">|</span>
|
1297
|
+
<span class="ruby-keyword">if</span> <span class="ruby-keyword">self</span>.<span class="ruby-identifier">clusters</span>[<span class="ruby-identifier">i</span>] <span class="ruby-operator">==</span> <span class="ruby-identifier">cluster</span>
|
1298
|
+
<span class="ruby-identifier">band</span> <span class="ruby-operator"><<</span> <span class="ruby-keyword">self</span>.<span class="ruby-identifier">densities</span>[<span class="ruby-identifier">i</span>].<span class="ruby-identifier">first</span>
|
1299
|
+
<span class="ruby-keyword">end</span>
|
1300
|
+
<span class="ruby-keyword">end</span>
|
1301
|
+
<span class="ruby-identifier">band</span>
|
1302
|
+
<span class="ruby-keyword">end</span></pre>
|
1303
|
+
</div><!-- get_band-source -->
|
1304
|
+
|
1305
|
+
</div>
|
1306
|
+
|
1307
|
+
|
1308
|
+
|
1309
|
+
|
1310
|
+
</div><!-- get_band-method -->
|
1311
|
+
|
1312
|
+
|
1313
|
+
<div id="method-i-get_insert_size_frequency" class="method-detail ">
|
1314
|
+
|
1315
|
+
<div class="method-heading">
|
1316
|
+
<span class="method-name">get_insert_size_frequency</span><span
|
1317
|
+
class="method-args">(options={})</span>
|
1318
|
+
<span class="method-click-advice">click to toggle source</span>
|
1319
|
+
</div>
|
1320
|
+
|
1321
|
+
|
1322
|
+
<div class="method-description">
|
1323
|
+
|
1324
|
+
<p>Returns array of arrays <code>[[window start position, proportion of
|
1325
|
+
alignments > insert size]]</code>. Does this by taking successive
|
1326
|
+
windows across reference and collects the proportion of the reads in that
|
1327
|
+
window that have an insert size > the expected insert size. Proportions
|
1328
|
+
approaching 1 indicate that the sequenced organism has a deletion in that
|
1329
|
+
section, proportions approaching 0 indicate an insertion in that section,
|
1330
|
+
proportions around 0.5 indicate random variation of insert size, IE no
|
1331
|
+
indel.</p>
|
1332
|
+
|
1333
|
+
<p>Each section should be approximately the size of the insertion you expect
|
1334
|
+
to find and should increment in as small steps as possible.</p>
|
1335
|
+
|
1336
|
+
<p>Options and defaults:</p>
|
1337
|
+
<ul><li>
|
1338
|
+
<p><code>:ref_window_size => 200</code> width of window in which to
|
1339
|
+
calculate proportions</p>
|
1340
|
+
</li><li>
|
1341
|
+
<p><code>:ref_window_slide => 50</code> number of bases to move window in
|
1342
|
+
each step</p>
|
1343
|
+
</li><li>
|
1344
|
+
<p><code>:isize => 150</code> expected insert size</p>
|
1345
|
+
</li></ul>
|
1346
|
+
|
1347
|
+
<p>Sets the instance variable @snp_positions. Only gets positions the first
|
1348
|
+
time it is called, in subsequent calls pre-computed positions and
|
1349
|
+
statistics are returned, so changing parameters has no effect</p>
|
1350
|
+
|
1351
|
+
|
1352
|
+
|
1353
|
+
<div class="method-source-code" id="get_insert_size_frequency-source">
|
1354
|
+
<pre><span class="ruby-comment"># File lib/bio/util/bio-gngm.rb, line 531</span>
|
1355
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">get_insert_size_frequency</span>(<span class="ruby-identifier">options</span>={})
|
1356
|
+
<span class="ruby-identifier">opts</span> = <span class="ruby-ivar">@opts</span>[<span class="ruby-value">:insert_size_opts</span>].<span class="ruby-identifier">merge</span>(<span class="ruby-identifier">options</span>)
|
1357
|
+
<span class="ruby-keyword">return</span> <span class="ruby-ivar">@snp_positions</span> <span class="ruby-keyword">if</span> <span class="ruby-ivar">@snp_positions</span>
|
1358
|
+
<span class="ruby-keyword">case</span>
|
1359
|
+
<span class="ruby-keyword">when</span> <span class="ruby-ivar">@file</span>.<span class="ruby-identifier">instance_of?</span>(<span class="ruby-constant">Bio</span><span class="ruby-operator">::</span><span class="ruby-constant">DB</span><span class="ruby-operator">::</span><span class="ruby-constant">Sam</span>) <span class="ruby-keyword">then</span> <span class="ruby-identifier">get_insert_size_frequency_from_bam</span>(<span class="ruby-identifier">opts</span>)
|
1360
|
+
<span class="ruby-keyword">end</span>
|
1361
|
+
<span class="ruby-keyword">end</span></pre>
|
1362
|
+
</div><!-- get_insert_size_frequency-source -->
|
1363
|
+
|
1364
|
+
</div>
|
1365
|
+
|
1366
|
+
|
1367
|
+
|
1368
|
+
|
1369
|
+
</div><!-- get_insert_size_frequency-method -->
|
1370
|
+
|
1371
|
+
|
1372
|
+
<div id="method-i-get_unmapped_mate_frequency" class="method-detail ">
|
1373
|
+
|
1374
|
+
<div class="method-heading">
|
1375
|
+
<span class="method-name">get_unmapped_mate_frequency</span><span
|
1376
|
+
class="method-args">(options={})</span>
|
1377
|
+
<span class="method-click-advice">click to toggle source</span>
|
1378
|
+
</div>
|
1379
|
+
|
1380
|
+
|
1381
|
+
<div class="method-description">
|
1382
|
+
|
1383
|
+
<p>Returns array of arrays <code>[[window start position, proportion of reads
|
1384
|
+
with unmapped mates]]</code>. Does this by taking successive windows across
|
1385
|
+
reference and counting the reads with unmapped mates Proportions
|
1386
|
+
approaching 0.5 indicate that the sequenced organism has an insertion in
|
1387
|
+
that section, proportions approaching 0 indicate nothing different in that
|
1388
|
+
section.</p>
|
1389
|
+
|
1390
|
+
<p>Each section should be approximately the size of the insertion you expect
|
1391
|
+
to find and should increment in as small steps as possible.</p>
|
1392
|
+
|
1393
|
+
<p>Options and defaults:</p>
|
1394
|
+
<ul><li>
|
1395
|
+
<p><code>:ref_window_size => 200</code> width of window in which to
|
1396
|
+
calculate proportions</p>
|
1397
|
+
</li><li>
|
1398
|
+
<p><code>:ref_window_slide => 50</code> number of bases to move window in
|
1399
|
+
each step</p>
|
1400
|
+
</li></ul>
|
1401
|
+
|
1402
|
+
<p>Sets the instance variable @snp_positions. Only gets positions the first
|
1403
|
+
time it is called, in subsequent calls pre-computed positions and
|
1404
|
+
statistics are returned, so changing parameters has no effect</p>
|
1405
|
+
|
1406
|
+
|
1407
|
+
|
1408
|
+
<div class="method-source-code" id="get_unmapped_mate_frequency-source">
|
1409
|
+
<pre><span class="ruby-comment"># File lib/bio/util/bio-gngm.rb, line 550</span>
|
1410
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">get_unmapped_mate_frequency</span>(<span class="ruby-identifier">options</span>={})
|
1411
|
+
<span class="ruby-identifier">opts</span> = <span class="ruby-ivar">@opts</span>[<span class="ruby-value">:insert_size_opts</span>].<span class="ruby-identifier">merge</span>(<span class="ruby-identifier">options</span>)
|
1412
|
+
<span class="ruby-keyword">return</span> <span class="ruby-ivar">@snp_positions</span> <span class="ruby-keyword">if</span> <span class="ruby-ivar">@snp_positions</span>
|
1413
|
+
<span class="ruby-keyword">case</span>
|
1414
|
+
<span class="ruby-keyword">when</span> <span class="ruby-ivar">@file</span>.<span class="ruby-identifier">instance_of?</span>(<span class="ruby-constant">Bio</span><span class="ruby-operator">::</span><span class="ruby-constant">DB</span><span class="ruby-operator">::</span><span class="ruby-constant">Sam</span>) <span class="ruby-keyword">then</span> <span class="ruby-identifier">get_unmapped_mate_frequency_from_bam</span>(<span class="ruby-identifier">opts</span>)
|
1415
|
+
<span class="ruby-keyword">end</span>
|
1416
|
+
<span class="ruby-keyword">end</span></pre>
|
1417
|
+
</div><!-- get_unmapped_mate_frequency-source -->
|
1418
|
+
|
1419
|
+
</div>
|
1420
|
+
|
1421
|
+
|
1422
|
+
|
1423
|
+
|
1424
|
+
</div><!-- get_unmapped_mate_frequency-method -->
|
1425
|
+
|
1426
|
+
|
1427
|
+
<div id="method-i-hit_count" class="method-detail ">
|
1428
|
+
|
1429
|
+
<div class="method-heading">
|
1430
|
+
<span class="method-name">hit_count</span><span
|
1431
|
+
class="method-args">()</span>
|
1432
|
+
<span class="method-click-advice">click to toggle source</span>
|
1433
|
+
</div>
|
1434
|
+
|
1435
|
+
|
1436
|
+
<div class="method-description">
|
1437
|
+
|
1438
|
+
<p>Returns an array of polymorphisms in each thread/window <tt>[[window,
|
1439
|
+
polymorphism count] ]. Useful for sparse polymorphism counts or over small
|
1440
|
+
regions where small polymorphism counts can cause artificially large peaks
|
1441
|
+
in density curves.</p>
|
1442
|
+
|
1443
|
+
|
1444
|
+
|
1445
|
+
<div class="method-source-code" id="hit_count-source">
|
1446
|
+
<pre><span class="ruby-comment"># File lib/bio/util/bio-gngm.rb, line 938</span>
|
1447
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">hit_count</span>
|
1448
|
+
<span class="ruby-identifier">arr</span> = []
|
1449
|
+
<span class="ruby-keyword">self</span>.<span class="ruby-identifier">threads</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">thread</span><span class="ruby-operator">|</span>
|
1450
|
+
<span class="ruby-identifier">arr</span> <span class="ruby-operator"><<</span> [<span class="ruby-identifier">thread</span>.<span class="ruby-identifier">first</span>, <span class="ruby-identifier">thread</span>.<span class="ruby-identifier">last</span>.<span class="ruby-identifier">length</span>]
|
1451
|
+
<span class="ruby-keyword">end</span>
|
1452
|
+
<span class="ruby-identifier">arr</span>
|
1453
|
+
<span class="ruby-keyword">end</span></pre>
|
1454
|
+
</div><!-- hit_count-source -->
|
1455
|
+
|
1456
|
+
</div>
|
1457
|
+
|
1458
|
+
|
1459
|
+
|
1460
|
+
|
1461
|
+
</div><!-- hit_count-method -->
|
1462
|
+
|
1463
|
+
|
1464
|
+
<div id="method-i-peaks" class="method-detail ">
|
1465
|
+
|
1466
|
+
<div class="method-heading">
|
1467
|
+
<span class="method-name">peaks</span><span
|
1468
|
+
class="method-args">()</span>
|
1469
|
+
<span class="method-click-advice">click to toggle source</span>
|
1470
|
+
</div>
|
1471
|
+
|
1472
|
+
|
1473
|
+
<div class="method-description">
|
1474
|
+
|
1475
|
+
<p>Returns the positions of the peaks in the signal curve calculated by
|
1476
|
+
Bio::Util::Gngm#get_peaks as an array</p>
|
1477
|
+
|
1478
|
+
|
1479
|
+
|
1480
|
+
<div class="method-source-code" id="peaks-source">
|
1481
|
+
<pre><span class="ruby-comment"># File lib/bio/util/bio-gngm.rb, line 884</span>
|
1482
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">peaks</span>
|
1483
|
+
<span class="ruby-ivar">@peak_indices</span>.<span class="ruby-identifier">collect</span> {<span class="ruby-operator">|</span><span class="ruby-identifier">x</span><span class="ruby-operator">|</span> <span class="ruby-keyword">self</span>.<span class="ruby-identifier">densities</span>[<span class="ruby-value">0</span>][<span class="ruby-value">1</span>][<span class="ruby-identifier">x</span>].<span class="ruby-identifier">to_f</span>.<span class="ruby-identifier">floor</span>}
|
1484
|
+
<span class="ruby-keyword">end</span></pre>
|
1485
|
+
</div><!-- peaks-source -->
|
1486
|
+
|
1487
|
+
</div>
|
1488
|
+
|
1489
|
+
|
1490
|
+
|
1491
|
+
|
1492
|
+
</div><!-- peaks-method -->
|
1493
|
+
|
1494
|
+
|
1495
|
+
<div id="method-i-signal" class="method-detail ">
|
1496
|
+
|
1497
|
+
<div class="method-heading">
|
1498
|
+
<span class="method-name">signal</span><span
|
1499
|
+
class="method-args">()</span>
|
1500
|
+
<span class="method-click-advice">click to toggle source</span>
|
1501
|
+
</div>
|
1502
|
+
|
1503
|
+
|
1504
|
+
<div class="method-description">
|
1505
|
+
|
1506
|
+
|
1507
|
+
|
1508
|
+
|
1509
|
+
|
1510
|
+
<div class="method-source-code" id="signal-source">
|
1511
|
+
<pre><span class="ruby-comment"># File lib/bio/util/bio-gngm.rb, line 999</span>
|
1512
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">signal</span>
|
1513
|
+
<span class="ruby-ivar">@signal</span> <span class="ruby-operator">||=</span> <span class="ruby-identifier">calculate_signal</span>
|
1514
|
+
<span class="ruby-keyword">end</span></pre>
|
1515
|
+
</div><!-- signal-source -->
|
1516
|
+
|
1517
|
+
</div>
|
1518
|
+
|
1519
|
+
|
1520
|
+
|
1521
|
+
|
1522
|
+
</div><!-- signal-method -->
|
1523
|
+
|
1524
|
+
|
1525
|
+
<div id="method-i-snp_positions" class="method-detail ">
|
1526
|
+
|
1527
|
+
<div class="method-heading">
|
1528
|
+
<span class="method-name">snp_positions</span><span
|
1529
|
+
class="method-args">(optsa={})</span>
|
1530
|
+
<span class="method-click-advice">click to toggle source</span>
|
1531
|
+
</div>
|
1532
|
+
|
1533
|
+
|
1534
|
+
<div class="method-description">
|
1535
|
+
|
1536
|
+
<p>Returns array of arrays <code>[[position, statistic]]</code> for
|
1537
|
+
polymorphisms passing filters in <code>optsa</code> Default options are
|
1538
|
+
those in the <code>:variant_call</code> global options hash which can be
|
1539
|
+
over ridden in the method call</p>
|
1540
|
+
|
1541
|
+
<p>Options and defaults:</p>
|
1542
|
+
<ul><li>
|
1543
|
+
<p><code>:indels => false</code> -call small insertions AND deletions
|
1544
|
+
instead of simple SNPs</p>
|
1545
|
+
</li><li>
|
1546
|
+
<p><code>:deletions_only => false</code> -call just deletions instead of
|
1547
|
+
simple SNPs</p>
|
1548
|
+
</li><li>
|
1549
|
+
<p><code>:insertions_only => false</code> -call small insertions instead of
|
1550
|
+
simple SNPs</p>
|
1551
|
+
</li><li>
|
1552
|
+
<p><code>:min_depth => 2</code> -minimum quality passing depth of coverage
|
1553
|
+
at a position for a SNP call</p>
|
1554
|
+
</li><li>
|
1555
|
+
<p><code>:max_depth => 10000000</code> -maximum quality passing depth of
|
1556
|
+
coverage at a position for a SNP call</p>
|
1557
|
+
</li><li>
|
1558
|
+
<p><code>:mapping_quality => 10.0</code> -minimum mapping quality required
|
1559
|
+
for a read to be used in depth calculation</p>
|
1560
|
+
</li><li>
|
1561
|
+
<p><code>:min_non_ref_count => 2</code> -minimum number of reads not
|
1562
|
+
matching the reference for SNP to be called</p>
|
1563
|
+
</li><li>
|
1564
|
+
<p><code>:ignore_reference_n => true</code> -ignore positions where the
|
1565
|
+
reference is N or n</p>
|
1566
|
+
</li></ul>
|
1567
|
+
|
1568
|
+
<p>When INDEL calling only one of <code>:indels, :deletions_only,
|
1569
|
+
:insertions_only</code> should be used. If all are <code>false</code>, SNPs
|
1570
|
+
are called.</p>
|
1571
|
+
|
1572
|
+
<p>Sets the instance variable @snp_positions. Only gets positions the first
|
1573
|
+
time it is called, in subsequent calls pre-computed positions and
|
1574
|
+
statistics are returned, so changing parameters has no effect.</p>
|
1575
|
+
|
1576
|
+
|
1577
|
+
|
1578
|
+
<div class="method-source-code" id="snp_positions-source">
|
1579
|
+
<pre><span class="ruby-comment"># File lib/bio/util/bio-gngm.rb, line 422</span>
|
1580
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">snp_positions</span>(<span class="ruby-identifier">optsa</span>={})
|
1581
|
+
<span class="ruby-identifier">opts</span> = <span class="ruby-ivar">@opts</span>[<span class="ruby-value">:variant_call</span>].<span class="ruby-identifier">merge</span>(<span class="ruby-identifier">optsa</span>)
|
1582
|
+
<span class="ruby-keyword">return</span> <span class="ruby-ivar">@snp_positions</span> <span class="ruby-keyword">if</span> <span class="ruby-ivar">@snp_positions</span>
|
1583
|
+
<span class="ruby-keyword">case</span>
|
1584
|
+
<span class="ruby-keyword">when</span> <span class="ruby-ivar">@file</span>.<span class="ruby-identifier">instance_of?</span>(<span class="ruby-constant">Bio</span><span class="ruby-operator">::</span><span class="ruby-constant">DB</span><span class="ruby-operator">::</span><span class="ruby-constant">Sam</span>) <span class="ruby-keyword">then</span> <span class="ruby-identifier">get_snp_positions_from_bam</span>(<span class="ruby-identifier">opts</span>)
|
1585
|
+
<span class="ruby-keyword">end</span>
|
1586
|
+
<span class="ruby-keyword">end</span></pre>
|
1587
|
+
</div><!-- snp_positions-source -->
|
1588
|
+
|
1589
|
+
</div>
|
1590
|
+
|
1591
|
+
|
1592
|
+
|
1593
|
+
|
1594
|
+
</div><!-- snp_positions-method -->
|
1595
|
+
|
1596
|
+
|
1597
|
+
<div id="method-i-threads" class="method-detail ">
|
1598
|
+
|
1599
|
+
<div class="method-heading">
|
1600
|
+
<span class="method-name">threads</span><span
|
1601
|
+
class="method-args">(opts=@opts[:threads])</span>
|
1602
|
+
<span class="method-click-advice">click to toggle source</span>
|
1603
|
+
</div>
|
1604
|
+
|
1605
|
+
|
1606
|
+
<div class="method-description">
|
1607
|
+
|
1608
|
+
<p>Returns contents of @threads, an array of arrays <code>[[window 1, snp
|
1609
|
+
position 1, snp position 2 ... snp position n],[window 2, snp position 1,
|
1610
|
+
snp position 2 ... snp position n] ]</code>. If @threads is nil (because
|
1611
|
+
snps have not yet been gathered into threads) the <a
|
1612
|
+
href="Gngm.html#method-i-collect_threads">#collect_threads</a> method is
|
1613
|
+
called and @threads is set before returning</p>
|
1614
|
+
|
1615
|
+
<p>Options and defaults:</p>
|
1616
|
+
<ul><li>
|
1617
|
+
<p><code>:start => 0.2</code> -first window</p>
|
1618
|
+
</li><li>
|
1619
|
+
<p><code>:stop => 1.0</code> -last window</p>
|
1620
|
+
</li><li>
|
1621
|
+
<p><code>:slide => 0.01</code> -distance between windows</p>
|
1622
|
+
</li><li>
|
1623
|
+
<p><code>:size => 0.1</code> -window width</p>
|
1624
|
+
</li></ul>
|
1625
|
+
|
1626
|
+
|
1627
|
+
|
1628
|
+
<div class="method-source-code" id="threads-source">
|
1629
|
+
<pre><span class="ruby-comment"># File lib/bio/util/bio-gngm.rb, line 584</span>
|
1630
|
+
<span class="ruby-keyword">def</span> <span class="ruby-identifier">threads</span>(<span class="ruby-identifier">opts</span>=<span class="ruby-ivar">@opts</span>[<span class="ruby-value">:threads</span>])
|
1631
|
+
<span class="ruby-ivar">@threads</span> <span class="ruby-operator">||=</span> <span class="ruby-identifier">collect_threads</span>(<span class="ruby-identifier">opts</span>)
|
1632
|
+
<span class="ruby-keyword">end</span></pre>
|
1633
|
+
</div><!-- threads-source -->
|
1634
|
+
|
1635
|
+
</div>
|
1636
|
+
|
1637
|
+
|
1638
|
+
|
1639
|
+
|
1640
|
+
</div><!-- threads-method -->
|
1641
|
+
|
1642
|
+
|
1643
|
+
</section><!-- public-instance-method-details -->
|
1644
|
+
|
1645
|
+
</section><!-- 5Buntitled-5D -->
|
1646
|
+
|
1647
|
+
</div><!-- documentation -->
|
1648
|
+
|
1649
|
+
|
1650
|
+
<footer id="validator-badges">
|
1651
|
+
<p><a href="http://validator.w3.org/check/referer">[Validate]</a>
|
1652
|
+
<p>Generated by <a href="https://github.com/rdoc/rdoc">RDoc</a> 3.11.
|
1653
|
+
<p>Generated with the <a href="http://deveiate.org/projects/Darkfish-Rdoc/">Darkfish Rdoc Generator</a> 3.
|
1654
|
+
</footer>
|
1655
|
+
|