BioDSL 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +10 -0
- data/BioDSL.gemspec +64 -0
- data/LICENSE +339 -0
- data/README.md +205 -0
- data/Rakefile +94 -0
- data/examples/fastq_to_fasta.rb +8 -0
- data/lib/BioDSL/cary.rb +242 -0
- data/lib/BioDSL/command.rb +133 -0
- data/lib/BioDSL/commands/add_key.rb +110 -0
- data/lib/BioDSL/commands/align_seq_mothur.rb +194 -0
- data/lib/BioDSL/commands/analyze_residue_distribution.rb +222 -0
- data/lib/BioDSL/commands/assemble_pairs.rb +336 -0
- data/lib/BioDSL/commands/assemble_seq_idba.rb +230 -0
- data/lib/BioDSL/commands/assemble_seq_ray.rb +345 -0
- data/lib/BioDSL/commands/assemble_seq_spades.rb +252 -0
- data/lib/BioDSL/commands/classify_seq.rb +217 -0
- data/lib/BioDSL/commands/classify_seq_mothur.rb +226 -0
- data/lib/BioDSL/commands/clip_primer.rb +318 -0
- data/lib/BioDSL/commands/cluster_otus.rb +181 -0
- data/lib/BioDSL/commands/collapse_otus.rb +170 -0
- data/lib/BioDSL/commands/collect_otus.rb +150 -0
- data/lib/BioDSL/commands/complement_seq.rb +117 -0
- data/lib/BioDSL/commands/count.rb +135 -0
- data/lib/BioDSL/commands/count_values.rb +149 -0
- data/lib/BioDSL/commands/degap_seq.rb +253 -0
- data/lib/BioDSL/commands/dereplicate_seq.rb +168 -0
- data/lib/BioDSL/commands/dump.rb +157 -0
- data/lib/BioDSL/commands/filter_rrna.rb +239 -0
- data/lib/BioDSL/commands/genecall.rb +237 -0
- data/lib/BioDSL/commands/grab.rb +535 -0
- data/lib/BioDSL/commands/index_taxonomy.rb +226 -0
- data/lib/BioDSL/commands/mask_seq.rb +175 -0
- data/lib/BioDSL/commands/mean_scores.rb +168 -0
- data/lib/BioDSL/commands/merge_pair_seq.rb +175 -0
- data/lib/BioDSL/commands/merge_table.rb +225 -0
- data/lib/BioDSL/commands/merge_values.rb +113 -0
- data/lib/BioDSL/commands/plot_heatmap.rb +233 -0
- data/lib/BioDSL/commands/plot_histogram.rb +306 -0
- data/lib/BioDSL/commands/plot_matches.rb +282 -0
- data/lib/BioDSL/commands/plot_residue_distribution.rb +278 -0
- data/lib/BioDSL/commands/plot_scores.rb +285 -0
- data/lib/BioDSL/commands/random.rb +153 -0
- data/lib/BioDSL/commands/read_fasta.rb +222 -0
- data/lib/BioDSL/commands/read_fastq.rb +414 -0
- data/lib/BioDSL/commands/read_table.rb +329 -0
- data/lib/BioDSL/commands/reverse_seq.rb +113 -0
- data/lib/BioDSL/commands/slice_align.rb +400 -0
- data/lib/BioDSL/commands/slice_seq.rb +151 -0
- data/lib/BioDSL/commands/sort.rb +223 -0
- data/lib/BioDSL/commands/split_pair_seq.rb +220 -0
- data/lib/BioDSL/commands/split_values.rb +165 -0
- data/lib/BioDSL/commands/trim_primer.rb +314 -0
- data/lib/BioDSL/commands/trim_seq.rb +192 -0
- data/lib/BioDSL/commands/uchime_ref.rb +170 -0
- data/lib/BioDSL/commands/uclust.rb +286 -0
- data/lib/BioDSL/commands/unique_values.rb +145 -0
- data/lib/BioDSL/commands/usearch_global.rb +171 -0
- data/lib/BioDSL/commands/usearch_local.rb +171 -0
- data/lib/BioDSL/commands/write_fasta.rb +207 -0
- data/lib/BioDSL/commands/write_fastq.rb +191 -0
- data/lib/BioDSL/commands/write_table.rb +419 -0
- data/lib/BioDSL/commands/write_tree.rb +167 -0
- data/lib/BioDSL/commands.rb +31 -0
- data/lib/BioDSL/config.rb +55 -0
- data/lib/BioDSL/csv.rb +307 -0
- data/lib/BioDSL/debug.rb +42 -0
- data/lib/BioDSL/fasta.rb +133 -0
- data/lib/BioDSL/fastq.rb +77 -0
- data/lib/BioDSL/filesys.rb +137 -0
- data/lib/BioDSL/fork.rb +145 -0
- data/lib/BioDSL/hamming.rb +128 -0
- data/lib/BioDSL/helpers/aux_helper.rb +44 -0
- data/lib/BioDSL/helpers/email_helper.rb +66 -0
- data/lib/BioDSL/helpers/history_helper.rb +40 -0
- data/lib/BioDSL/helpers/log_helper.rb +55 -0
- data/lib/BioDSL/helpers/options_helper.rb +405 -0
- data/lib/BioDSL/helpers/status_helper.rb +132 -0
- data/lib/BioDSL/helpers.rb +35 -0
- data/lib/BioDSL/html_report.rb +200 -0
- data/lib/BioDSL/math.rb +55 -0
- data/lib/BioDSL/mummer.rb +216 -0
- data/lib/BioDSL/pipeline.rb +354 -0
- data/lib/BioDSL/seq/ambiguity.rb +66 -0
- data/lib/BioDSL/seq/assemble.rb +240 -0
- data/lib/BioDSL/seq/backtrack.rb +252 -0
- data/lib/BioDSL/seq/digest.rb +99 -0
- data/lib/BioDSL/seq/dynamic.rb +263 -0
- data/lib/BioDSL/seq/homopolymer.rb +59 -0
- data/lib/BioDSL/seq/kmer.rb +293 -0
- data/lib/BioDSL/seq/levenshtein.rb +113 -0
- data/lib/BioDSL/seq/translate.rb +109 -0
- data/lib/BioDSL/seq/trim.rb +188 -0
- data/lib/BioDSL/seq.rb +742 -0
- data/lib/BioDSL/serializer.rb +98 -0
- data/lib/BioDSL/stream.rb +113 -0
- data/lib/BioDSL/taxonomy.rb +691 -0
- data/lib/BioDSL/test.rb +42 -0
- data/lib/BioDSL/tmp_dir.rb +68 -0
- data/lib/BioDSL/usearch.rb +301 -0
- data/lib/BioDSL/verbose.rb +42 -0
- data/lib/BioDSL/version.rb +31 -0
- data/lib/BioDSL.rb +81 -0
- data/test/BioDSL/commands/test_add_key.rb +105 -0
- data/test/BioDSL/commands/test_align_seq_mothur.rb +99 -0
- data/test/BioDSL/commands/test_analyze_residue_distribution.rb +134 -0
- data/test/BioDSL/commands/test_assemble_pairs.rb +459 -0
- data/test/BioDSL/commands/test_assemble_seq_idba.rb +50 -0
- data/test/BioDSL/commands/test_assemble_seq_ray.rb +51 -0
- data/test/BioDSL/commands/test_assemble_seq_spades.rb +50 -0
- data/test/BioDSL/commands/test_classify_seq.rb +50 -0
- data/test/BioDSL/commands/test_classify_seq_mothur.rb +59 -0
- data/test/BioDSL/commands/test_clip_primer.rb +377 -0
- data/test/BioDSL/commands/test_cluster_otus.rb +128 -0
- data/test/BioDSL/commands/test_collapse_otus.rb +81 -0
- data/test/BioDSL/commands/test_collect_otus.rb +82 -0
- data/test/BioDSL/commands/test_complement_seq.rb +78 -0
- data/test/BioDSL/commands/test_count.rb +103 -0
- data/test/BioDSL/commands/test_count_values.rb +85 -0
- data/test/BioDSL/commands/test_degap_seq.rb +96 -0
- data/test/BioDSL/commands/test_dereplicate_seq.rb +92 -0
- data/test/BioDSL/commands/test_dump.rb +109 -0
- data/test/BioDSL/commands/test_filter_rrna.rb +128 -0
- data/test/BioDSL/commands/test_genecall.rb +50 -0
- data/test/BioDSL/commands/test_grab.rb +398 -0
- data/test/BioDSL/commands/test_index_taxonomy.rb +62 -0
- data/test/BioDSL/commands/test_mask_seq.rb +98 -0
- data/test/BioDSL/commands/test_mean_scores.rb +111 -0
- data/test/BioDSL/commands/test_merge_pair_seq.rb +115 -0
- data/test/BioDSL/commands/test_merge_table.rb +131 -0
- data/test/BioDSL/commands/test_merge_values.rb +83 -0
- data/test/BioDSL/commands/test_plot_heatmap.rb +185 -0
- data/test/BioDSL/commands/test_plot_histogram.rb +194 -0
- data/test/BioDSL/commands/test_plot_matches.rb +157 -0
- data/test/BioDSL/commands/test_plot_residue_distribution.rb +309 -0
- data/test/BioDSL/commands/test_plot_scores.rb +308 -0
- data/test/BioDSL/commands/test_random.rb +88 -0
- data/test/BioDSL/commands/test_read_fasta.rb +229 -0
- data/test/BioDSL/commands/test_read_fastq.rb +552 -0
- data/test/BioDSL/commands/test_read_table.rb +327 -0
- data/test/BioDSL/commands/test_reverse_seq.rb +79 -0
- data/test/BioDSL/commands/test_slice_align.rb +218 -0
- data/test/BioDSL/commands/test_slice_seq.rb +131 -0
- data/test/BioDSL/commands/test_sort.rb +128 -0
- data/test/BioDSL/commands/test_split_pair_seq.rb +164 -0
- data/test/BioDSL/commands/test_split_values.rb +95 -0
- data/test/BioDSL/commands/test_trim_primer.rb +329 -0
- data/test/BioDSL/commands/test_trim_seq.rb +150 -0
- data/test/BioDSL/commands/test_uchime_ref.rb +113 -0
- data/test/BioDSL/commands/test_uclust.rb +139 -0
- data/test/BioDSL/commands/test_unique_values.rb +98 -0
- data/test/BioDSL/commands/test_usearch_global.rb +123 -0
- data/test/BioDSL/commands/test_usearch_local.rb +125 -0
- data/test/BioDSL/commands/test_write_fasta.rb +159 -0
- data/test/BioDSL/commands/test_write_fastq.rb +166 -0
- data/test/BioDSL/commands/test_write_table.rb +411 -0
- data/test/BioDSL/commands/test_write_tree.rb +122 -0
- data/test/BioDSL/helpers/test_options_helper.rb +272 -0
- data/test/BioDSL/seq/test_assemble.rb +98 -0
- data/test/BioDSL/seq/test_backtrack.rb +176 -0
- data/test/BioDSL/seq/test_digest.rb +71 -0
- data/test/BioDSL/seq/test_dynamic.rb +133 -0
- data/test/BioDSL/seq/test_homopolymer.rb +58 -0
- data/test/BioDSL/seq/test_kmer.rb +134 -0
- data/test/BioDSL/seq/test_translate.rb +75 -0
- data/test/BioDSL/seq/test_trim.rb +101 -0
- data/test/BioDSL/test_cary.rb +176 -0
- data/test/BioDSL/test_command.rb +45 -0
- data/test/BioDSL/test_csv.rb +514 -0
- data/test/BioDSL/test_debug.rb +42 -0
- data/test/BioDSL/test_fasta.rb +154 -0
- data/test/BioDSL/test_fastq.rb +46 -0
- data/test/BioDSL/test_filesys.rb +145 -0
- data/test/BioDSL/test_fork.rb +85 -0
- data/test/BioDSL/test_math.rb +41 -0
- data/test/BioDSL/test_mummer.rb +79 -0
- data/test/BioDSL/test_pipeline.rb +187 -0
- data/test/BioDSL/test_seq.rb +790 -0
- data/test/BioDSL/test_serializer.rb +72 -0
- data/test/BioDSL/test_stream.rb +55 -0
- data/test/BioDSL/test_taxonomy.rb +336 -0
- data/test/BioDSL/test_test.rb +42 -0
- data/test/BioDSL/test_tmp_dir.rb +58 -0
- data/test/BioDSL/test_usearch.rb +33 -0
- data/test/BioDSL/test_verbose.rb +42 -0
- data/test/helper.rb +82 -0
- data/www/command.html.haml +14 -0
- data/www/css.html.haml +55 -0
- data/www/input_files.html.haml +3 -0
- data/www/layout.html.haml +12 -0
- data/www/output_files.html.haml +3 -0
- data/www/overview.html.haml +15 -0
- data/www/pipeline.html.haml +4 -0
- data/www/png.html.haml +2 -0
- data/www/status.html.haml +9 -0
- data/www/time.html.haml +11 -0
- metadata +503 -0
|
@@ -0,0 +1,252 @@
|
|
|
1
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
2
|
+
# #
|
|
3
|
+
# Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
|
|
4
|
+
# #
|
|
5
|
+
# This program is free software; you can redistribute it and/or #
|
|
6
|
+
# modify it under the terms of the GNU General Public License #
|
|
7
|
+
# as published by the Free Software Foundation; either version 2 #
|
|
8
|
+
# of the License, or (at your option) any later version. #
|
|
9
|
+
# #
|
|
10
|
+
# This program is distributed in the hope that it will be useful, #
|
|
11
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
|
12
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
|
|
13
|
+
# GNU General Public License for more details. #
|
|
14
|
+
# #
|
|
15
|
+
# You should have received a copy of the GNU General Public License #
|
|
16
|
+
# along with this program; if not, write to the Free Software #
|
|
17
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. #
|
|
18
|
+
# #
|
|
19
|
+
# http://www.gnu.org/copyleft/gpl.html #
|
|
20
|
+
# #
|
|
21
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
22
|
+
# #
|
|
23
|
+
# This software is part of BioDSL (www.BioDSL.org). #
|
|
24
|
+
# #
|
|
25
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
26
|
+
|
|
27
|
+
module BioDSL
|
|
28
|
+
# Error class for all exceptions to do with BackTrack.
|
|
29
|
+
class BackTrackError < StandardError; end
|
|
30
|
+
|
|
31
|
+
# Module containing code to locate nucleotide patterns in sequences allowing for
|
|
32
|
+
# ambiguity codes and a given maximum mismatches, insertions, and deletions. The
|
|
33
|
+
# pattern match engine is based on a backtrack algorithm.
|
|
34
|
+
# Insertions are nucleotides found in the pattern but not in the sequence.
|
|
35
|
+
# Deletions are nucleotides found in the sequence but not in the pattern.
|
|
36
|
+
# Algorithm based on code kindly provided by j_random_hacker @ Stackoverflow:
|
|
37
|
+
# http://stackoverflow.com/questions/7557017/approximate-string-matching-using-backtracking/
|
|
38
|
+
module BackTrack
|
|
39
|
+
extend BioDSL::Ambiguity
|
|
40
|
+
|
|
41
|
+
OK_PATTERN = Regexp.new('^[bflsycwphqrimtnkvadegu]+$')
|
|
42
|
+
MAX_MIS = 5 # Maximum number of mismatches allowed
|
|
43
|
+
MAX_INS = 5 # Maximum number of insertions allowed
|
|
44
|
+
MAX_DEL = 5 # Maximum number of deletions allowed
|
|
45
|
+
|
|
46
|
+
# ------------------------------------------------------------------------------
|
|
47
|
+
# str.patmatch(pattern[, options])
|
|
48
|
+
# -> Match
|
|
49
|
+
# str.patmatch(pattern[, options]) { |match|
|
|
50
|
+
# block
|
|
51
|
+
# }
|
|
52
|
+
# -> Match
|
|
53
|
+
#
|
|
54
|
+
# options:
|
|
55
|
+
# :start
|
|
56
|
+
# :stop
|
|
57
|
+
# :max_mismatches
|
|
58
|
+
# :max_insertions
|
|
59
|
+
# :max_deletions
|
|
60
|
+
#
|
|
61
|
+
# ------------------------------------------------------------------------------
|
|
62
|
+
# Method to iterate through a sequence from a given start position to the end of
|
|
63
|
+
# the sequence or to a given stop position to locate a pattern allowing for a
|
|
64
|
+
# maximum number of mismatches, insertions, and deletions. Insertions are
|
|
65
|
+
# nucleotides found in the pattern but not in the sequence. Deletions are
|
|
66
|
+
# nucleotides found in the sequence but not in the pattern.
|
|
67
|
+
def patmatch(pattern, options = {})
|
|
68
|
+
options[:start] ||= 0
|
|
69
|
+
options[:stop] ||= self.length - 1
|
|
70
|
+
options[:max_mismatches] ||= 0
|
|
71
|
+
options[:max_insertions] ||= 0
|
|
72
|
+
options[:max_deletions] ||= 0
|
|
73
|
+
|
|
74
|
+
self.patscan(pattern, options) do |m|
|
|
75
|
+
if block_given?
|
|
76
|
+
yield m
|
|
77
|
+
else
|
|
78
|
+
return m
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
# ------------------------------------------------------------------------------
|
|
84
|
+
# str.patscan(pattern[, options])
|
|
85
|
+
# -> Array
|
|
86
|
+
# str.patscan(pattern[, options]) { |match|
|
|
87
|
+
# block
|
|
88
|
+
# }
|
|
89
|
+
# -> Match
|
|
90
|
+
#
|
|
91
|
+
# options:
|
|
92
|
+
# :start
|
|
93
|
+
# :stop
|
|
94
|
+
# :max_mismatches
|
|
95
|
+
# :max_insertions
|
|
96
|
+
# :max_deletions
|
|
97
|
+
#
|
|
98
|
+
# ------------------------------------------------------------------------------
|
|
99
|
+
# Method to iterate through a sequence from a given start position to the end of
|
|
100
|
+
# the sequence or to a given stop position to locate a pattern allowing for a
|
|
101
|
+
# maximum number of mismatches, insertions, and deletions. Insertions are
|
|
102
|
+
# nucleotides found in the pattern but not in the sequence. Deletions are
|
|
103
|
+
# nucleotides found in the sequence but not in the pattern. Matches found in
|
|
104
|
+
# block context return the Match object. Otherwise matches are returned in an
|
|
105
|
+
# Array of Match objects.
|
|
106
|
+
def patscan(pattern, options = {})
|
|
107
|
+
options[:start] ||= 0
|
|
108
|
+
options[:stop] ||= self.length - 1
|
|
109
|
+
options[:max_mismatches] ||= 0
|
|
110
|
+
options[:max_insertions] ||= 0
|
|
111
|
+
options[:max_deletions] ||= 0
|
|
112
|
+
|
|
113
|
+
raise BackTrackError, "Bad pattern: #{pattern}" unless pattern.downcase =~ OK_PATTERN
|
|
114
|
+
raise BackTrackError, "start: #{options[:start]} out of range (0 .. #{self.length - 1})" unless (0 ... self.length).include? options[:start]
|
|
115
|
+
raise BackTrackError, "stop: #{options[:stop]} out of range (0 .. #{self.length - 1})" unless (0 ... self.length).include? options[:stop]
|
|
116
|
+
raise BackTrackError, "max_mismatches: #{options[:max_mismatches]} out of range (0 .. #{MAX_MIS})" unless (0 .. MAX_MIS).include? options[:max_mismatches]
|
|
117
|
+
raise BackTrackError, "max_insertions: #{options[:max_insertions]} out of range (0 .. #{MAX_INS})" unless (0 .. MAX_INS).include? options[:max_insertions]
|
|
118
|
+
raise BackTrackError, "max_deletions: #{options[:max_deletions]} out of range (0 .. #{MAX_DEL})" unless (0 .. MAX_DEL).include? options[:max_deletions]
|
|
119
|
+
|
|
120
|
+
matches = []
|
|
121
|
+
|
|
122
|
+
while result = scan_C(self.seq,
|
|
123
|
+
pattern,
|
|
124
|
+
options[:start],
|
|
125
|
+
options[:stop],
|
|
126
|
+
options[:max_mismatches],
|
|
127
|
+
options[:max_insertions],
|
|
128
|
+
options[:max_deletions]
|
|
129
|
+
)
|
|
130
|
+
match = Match.new(result.first, result.last, self.seq[result.first ... result.first + result.last])
|
|
131
|
+
|
|
132
|
+
if block_given?
|
|
133
|
+
yield match
|
|
134
|
+
else
|
|
135
|
+
matches << match
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
options[:start] = result.first + 1
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
return matches unless block_given?
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
private
|
|
145
|
+
|
|
146
|
+
inline do |builder|
|
|
147
|
+
add_ambiguity_macro(builder)
|
|
148
|
+
|
|
149
|
+
# Backtrack algorithm for matching a pattern (p) starting in a sequence (s) allowing for mis
|
|
150
|
+
# mismatches, ins insertions and del deletions. ss is the start of the sequence, used only for
|
|
151
|
+
# reporting the match endpoints. State is used to avoid ins followed by del and visa versa which
|
|
152
|
+
# are nonsense.
|
|
153
|
+
builder.prefix %{
|
|
154
|
+
unsigned int backtrack(
|
|
155
|
+
char *ss, // Sequence start
|
|
156
|
+
char *s, // Sequence
|
|
157
|
+
char *p, // Pattern
|
|
158
|
+
unsigned int mis, // Max mismatches
|
|
159
|
+
unsigned int ins, // Max insertions
|
|
160
|
+
unsigned int del, // Max deletions
|
|
161
|
+
int state // Last event: mis, ins or del
|
|
162
|
+
)
|
|
163
|
+
{
|
|
164
|
+
unsigned int r = 0;
|
|
165
|
+
|
|
166
|
+
while (*s && MATCH(*s, *p)) ++s, ++p; // OK to always match longest segment
|
|
167
|
+
|
|
168
|
+
if (!*p)
|
|
169
|
+
return (unsigned int) (s - ss);
|
|
170
|
+
else
|
|
171
|
+
{
|
|
172
|
+
if (mis && *s && *p && (r = backtrack(ss, s + 1, p + 1, mis - 1, ins, del, 0))) return r;
|
|
173
|
+
if (ins && *p && (state != -1) && (r = backtrack(ss, s, p + 1, mis, ins - 1, del, 1))) return r;
|
|
174
|
+
if (del && *s && (state != 1) && (r = backtrack(ss, s + 1, p, mis, ins, del - 1, -1))) return r;
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
return 0;
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
# Find pattern (p) in a sequence (s) starting at pos, with at most mis mismatches, ins
|
|
182
|
+
# insertions and del deletions.
|
|
183
|
+
builder.c %{
|
|
184
|
+
VALUE scan_C(
|
|
185
|
+
VALUE _s, // Sequence
|
|
186
|
+
VALUE _p, // Pattern
|
|
187
|
+
VALUE _start, // Search postition start
|
|
188
|
+
VALUE _stop, // Search position stop
|
|
189
|
+
VALUE _mis, // Maximum mismatches
|
|
190
|
+
VALUE _ins, // Maximum insertions
|
|
191
|
+
VALUE _del // Maximum deletions
|
|
192
|
+
)
|
|
193
|
+
{
|
|
194
|
+
char *s = StringValuePtr(_s);
|
|
195
|
+
char *p = StringValuePtr(_p);
|
|
196
|
+
unsigned int start = FIX2UINT(_start);
|
|
197
|
+
unsigned int stop = FIX2UINT(_stop);
|
|
198
|
+
unsigned int mis = FIX2UINT(_mis);
|
|
199
|
+
unsigned int ins = FIX2UINT(_ins);
|
|
200
|
+
unsigned int del = FIX2UINT(_del);
|
|
201
|
+
|
|
202
|
+
char *ss = s;
|
|
203
|
+
int state = 0;
|
|
204
|
+
unsigned int i = 0;
|
|
205
|
+
unsigned int e = 0;
|
|
206
|
+
VALUE tuple;
|
|
207
|
+
|
|
208
|
+
s += start;
|
|
209
|
+
|
|
210
|
+
for (i = start; i <= stop; i++, s++)
|
|
211
|
+
{
|
|
212
|
+
if ((e = backtrack(ss, s, p, mis, ins, del, state)))
|
|
213
|
+
{
|
|
214
|
+
tuple = rb_ary_new();
|
|
215
|
+
rb_ary_push(tuple, INT2FIX((int) (s - ss)));
|
|
216
|
+
rb_ary_push(tuple, INT2FIX((int) e - (s - ss)));
|
|
217
|
+
return tuple;
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
return Qnil;
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
end
|
|
225
|
+
|
|
226
|
+
# Class containing match information.
|
|
227
|
+
class Match
|
|
228
|
+
attr_reader :pos, :length, :match
|
|
229
|
+
|
|
230
|
+
def initialize(pos, length, match)
|
|
231
|
+
@pos = pos
|
|
232
|
+
@length = length
|
|
233
|
+
@match = match
|
|
234
|
+
end
|
|
235
|
+
|
|
236
|
+
def start
|
|
237
|
+
@pos
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
def stop
|
|
241
|
+
@pos + @length - 1
|
|
242
|
+
end
|
|
243
|
+
|
|
244
|
+
def to_s
|
|
245
|
+
"#{pos}:#{length}:#{match}"
|
|
246
|
+
end
|
|
247
|
+
end
|
|
248
|
+
end
|
|
249
|
+
end
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
__END__
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
2
|
+
# #
|
|
3
|
+
# Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
|
|
4
|
+
# #
|
|
5
|
+
# This program is free software; you can redistribute it and/or #
|
|
6
|
+
# modify it under the terms of the GNU General Public License #
|
|
7
|
+
# as published by the Free Software Foundation; either version 2 #
|
|
8
|
+
# of the License, or (at your option) any later version. #
|
|
9
|
+
# #
|
|
10
|
+
# This program is distributed in the hope that it will be useful, #
|
|
11
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
|
12
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
|
|
13
|
+
# GNU General Public License for more details. #
|
|
14
|
+
# #
|
|
15
|
+
# You should have received a copy of the GNU General Public License #
|
|
16
|
+
# along with this program; if not, write to the Free Software #
|
|
17
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, #
|
|
18
|
+
# USA. #
|
|
19
|
+
# #
|
|
20
|
+
# http://www.gnu.org/copyleft/gpl.html #
|
|
21
|
+
# #
|
|
22
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
23
|
+
# #
|
|
24
|
+
# This software is part of BioDSL (www.github.com/maasha/BioDSL). #
|
|
25
|
+
# #
|
|
26
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
27
|
+
|
|
28
|
+
# Namespace for BioDSL.
|
|
29
|
+
module BioDSL
|
|
30
|
+
# Error class for all exceptions to do with Digest.
|
|
31
|
+
DigestError = Class.new(StandardError)
|
|
32
|
+
|
|
33
|
+
# Namespace for Digest.
|
|
34
|
+
module Digest
|
|
35
|
+
# Method to get the next digestion product from a sequence.
|
|
36
|
+
def each_digest(pattern, cut_pos)
|
|
37
|
+
return to_enum(:each_digest, pattern, cut_pos) unless block_given?
|
|
38
|
+
pattern = disambiguate(pattern)
|
|
39
|
+
offset = 0
|
|
40
|
+
|
|
41
|
+
seq.upcase.scan pattern do
|
|
42
|
+
pos = $`.length + cut_pos
|
|
43
|
+
|
|
44
|
+
if pos >= 0 && pos < length - 2
|
|
45
|
+
subseq = self[offset...pos]
|
|
46
|
+
subseq.seq_name = "#{seq_name}[#{offset}-#{pos - offset - 1}]"
|
|
47
|
+
|
|
48
|
+
yield subseq
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
offset = pos
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
offset = 0 if offset < 0 || offset > length
|
|
55
|
+
subseq = self[offset..-1]
|
|
56
|
+
subseq.seq_name = "#{seq_name}[#{offset}-#{length - 1}]"
|
|
57
|
+
|
|
58
|
+
yield subseq
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
private
|
|
62
|
+
|
|
63
|
+
# Method that returns a regexp object with a restriction
|
|
64
|
+
# enzyme pattern with ambiguity codes substituted to the
|
|
65
|
+
# appropriate regexp.
|
|
66
|
+
def disambiguate(pattern)
|
|
67
|
+
ambiguity = {
|
|
68
|
+
'A' => 'A',
|
|
69
|
+
'T' => 'T',
|
|
70
|
+
'U' => 'T',
|
|
71
|
+
'C' => 'C',
|
|
72
|
+
'G' => 'G',
|
|
73
|
+
'M' => '[AC]',
|
|
74
|
+
'R' => '[AG]',
|
|
75
|
+
'W' => '[AT]',
|
|
76
|
+
'S' => '[CG]',
|
|
77
|
+
'Y' => '[CT]',
|
|
78
|
+
'K' => '[GT]',
|
|
79
|
+
'V' => '[ACG]',
|
|
80
|
+
'H' => '[ACT]',
|
|
81
|
+
'D' => '[AGT]',
|
|
82
|
+
'B' => '[CGT]',
|
|
83
|
+
'N' => '[GATC]'
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
new_pattern = ''
|
|
87
|
+
|
|
88
|
+
pattern.upcase.each_char do |char|
|
|
89
|
+
if ambiguity[char]
|
|
90
|
+
new_pattern << ambiguity[char]
|
|
91
|
+
else
|
|
92
|
+
fail DigestError, "Could not disambiguate residue: #{char}"
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
Regexp.new(new_pattern)
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
end
|
|
@@ -0,0 +1,263 @@
|
|
|
1
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
2
|
+
# #
|
|
3
|
+
# Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
|
|
4
|
+
# #
|
|
5
|
+
# This program is free software; you can redistribute it and/or #
|
|
6
|
+
# modify it under the terms of the GNU General Public License #
|
|
7
|
+
# as published by the Free Software Foundation; either version 2 #
|
|
8
|
+
# of the License, or (at your option) any later version. #
|
|
9
|
+
# #
|
|
10
|
+
# This program is distributed in the hope that it will be useful, #
|
|
11
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
|
12
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
|
|
13
|
+
# GNU General Public License for more details. #
|
|
14
|
+
# #
|
|
15
|
+
# You should have received a copy of the GNU General Public License #
|
|
16
|
+
# along with this program; if not, write to the Free Software #
|
|
17
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. #
|
|
18
|
+
# #
|
|
19
|
+
# http://www.gnu.org/copyleft/gpl.html #
|
|
20
|
+
# #
|
|
21
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
22
|
+
# #
|
|
23
|
+
# This software is part of BioDSL (www.BioDSL.org). #
|
|
24
|
+
# #
|
|
25
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
26
|
+
|
|
27
|
+
module BioDSL
|
|
28
|
+
# Error class for Dynamic.
|
|
29
|
+
class DynamicError < StandardError; end
|
|
30
|
+
|
|
31
|
+
# Module containing code to locate nucleotide patterns in sequences allowing for
|
|
32
|
+
# ambiguity codes and a given maximum edit distance.
|
|
33
|
+
# Insertions are nucleotides found in the pattern but not in the sequence.
|
|
34
|
+
# Deletions are nucleotides found in the sequence but not in the pattern.
|
|
35
|
+
#
|
|
36
|
+
# Inspired by the paper by Bruno Woltzenlogel Paleo (page 197):
|
|
37
|
+
# http://www.logic.at/people/bruno/Papers/2007-GATE-ESSLLI.pdf
|
|
38
|
+
module Dynamic
|
|
39
|
+
extend BioDSL::Ambiguity
|
|
40
|
+
|
|
41
|
+
# ------------------------------------------------------------------------------
|
|
42
|
+
# str.patmatch(pattern[, pos[, max_edit_distance]])
|
|
43
|
+
# -> Match or nil
|
|
44
|
+
# str.patscan(pattern[, pos[, max_edit_distance]]) { |match|
|
|
45
|
+
# block
|
|
46
|
+
# }
|
|
47
|
+
# -> Match
|
|
48
|
+
#
|
|
49
|
+
# ------------------------------------------------------------------------------
|
|
50
|
+
# Method to iterate through a sequence to locate the first pattern match
|
|
51
|
+
# starting from a given position and allowing for a maximum edit distance.
|
|
52
|
+
def patmatch(pattern, pos = 0, max_edit_distance = 0)
|
|
53
|
+
self.patscan(pattern, pos, max_edit_distance) do |m|
|
|
54
|
+
return m
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# ------------------------------------------------------------------------------
|
|
59
|
+
# str.patscan(pattern[, pos[, max_edit_distance]])
|
|
60
|
+
# -> Array or nil
|
|
61
|
+
# str.patscan(pattern[, pos[, max_edit_distance]]) { |match|
|
|
62
|
+
# block
|
|
63
|
+
# }
|
|
64
|
+
# -> Match
|
|
65
|
+
#
|
|
66
|
+
# ------------------------------------------------------------------------------
|
|
67
|
+
# Method to iterate through a sequence to locate pattern matches starting from a
|
|
68
|
+
# given position and allowing for a maximum edit distance. Matches found in
|
|
69
|
+
# block context return the Match object. Otherwise matches are returned in an
|
|
70
|
+
# Array.
|
|
71
|
+
def patscan(pattern, pos = 0, max_edit_distance = 0)
|
|
72
|
+
matches = []
|
|
73
|
+
|
|
74
|
+
while result = match_C(self.seq, self.length, pattern, pattern.length, pos, max_edit_distance)
|
|
75
|
+
match = Match.new(*result, self.seq[result[0] ... result[0] + result[1]]);
|
|
76
|
+
|
|
77
|
+
if block_given?
|
|
78
|
+
yield match
|
|
79
|
+
else
|
|
80
|
+
matches << match
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
pos = match.beg + 1
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
return matches unless block_given?
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
private
|
|
90
|
+
|
|
91
|
+
inline do |builder|
|
|
92
|
+
add_ambiguity_macro(builder)
|
|
93
|
+
|
|
94
|
+
# Macro for matching nucleotides including ambiguity codes.
|
|
95
|
+
builder.prefix %{
|
|
96
|
+
#define MAX_PAT 1024
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
builder.prefix %{
|
|
100
|
+
typedef struct
|
|
101
|
+
{
|
|
102
|
+
unsigned int mis;
|
|
103
|
+
unsigned int ins;
|
|
104
|
+
unsigned int del;
|
|
105
|
+
unsigned int ed;
|
|
106
|
+
} score;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
builder.prefix %{
|
|
110
|
+
void vector_init(score *vec, unsigned int vec_len)
|
|
111
|
+
{
|
|
112
|
+
unsigned int i = 0;
|
|
113
|
+
|
|
114
|
+
for (i = 1; i < vec_len; i++)
|
|
115
|
+
{
|
|
116
|
+
vec[i].ins = i;
|
|
117
|
+
vec[i].ed = i;
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
builder.prefix %{
|
|
123
|
+
void vector_print(score *vec, unsigned int vec_len)
|
|
124
|
+
{
|
|
125
|
+
unsigned int i = 0;
|
|
126
|
+
|
|
127
|
+
for (i = 0; i < vec_len; i++)
|
|
128
|
+
{
|
|
129
|
+
printf("i: %d mis: %d ins: %d del: %d ed: %d\\n", i, vec[i].mis, vec[i].ins, vec[i].del, vec[i].ed);
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
printf("---\\n");
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
builder.prefix %{
|
|
137
|
+
int match_found(score *vec, unsigned int pat_len, unsigned int max_ed)
|
|
138
|
+
{
|
|
139
|
+
return (vec[pat_len].ed <= max_ed);
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
builder.prefix %{
|
|
144
|
+
void vector_update(score *vec, char *seq, char *pat, unsigned int pat_len, unsigned int pos)
|
|
145
|
+
{
|
|
146
|
+
score diag = vec[0];
|
|
147
|
+
score up = {0, 0, 0, 0}; // insertion
|
|
148
|
+
score left = vec[1]; // deletion
|
|
149
|
+
score new = {0, 0, 0, 0};
|
|
150
|
+
|
|
151
|
+
unsigned int i = 0;
|
|
152
|
+
|
|
153
|
+
for (i = 0; i < pat_len; i++)
|
|
154
|
+
{
|
|
155
|
+
if (MATCH(seq[pos], pat[i])) // match
|
|
156
|
+
{
|
|
157
|
+
new = diag;
|
|
158
|
+
}
|
|
159
|
+
else
|
|
160
|
+
{
|
|
161
|
+
if (left.ed <= diag.ed && left.ed <= up.ed) // deletion
|
|
162
|
+
{
|
|
163
|
+
new = left;
|
|
164
|
+
new.del++;
|
|
165
|
+
}
|
|
166
|
+
else if (diag.ed <= up.ed && diag.ed <= left.ed) // mismatch
|
|
167
|
+
{
|
|
168
|
+
new = diag;
|
|
169
|
+
new.mis++;
|
|
170
|
+
}
|
|
171
|
+
else if (up.ed <= diag.ed && up.ed <= left.ed) // insertion
|
|
172
|
+
{
|
|
173
|
+
new = up;
|
|
174
|
+
new.ins++;
|
|
175
|
+
}
|
|
176
|
+
else
|
|
177
|
+
{
|
|
178
|
+
printf("This should not happen\\n");
|
|
179
|
+
exit(1);
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
new.ed++;
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
diag = vec[i + 1];
|
|
186
|
+
up = new;
|
|
187
|
+
left = vec[i + 2];
|
|
188
|
+
|
|
189
|
+
vec[i + 1] = new;
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
builder.c %{
|
|
195
|
+
VALUE match_C(
|
|
196
|
+
VALUE _seq, // Sequence
|
|
197
|
+
VALUE _seq_len, // Sequence length
|
|
198
|
+
VALUE _pat, // Pattern
|
|
199
|
+
VALUE _pat_len, // Pattern length
|
|
200
|
+
VALUE _pos, // Offset position
|
|
201
|
+
VALUE _max_ed // Maximum edit distance
|
|
202
|
+
)
|
|
203
|
+
{
|
|
204
|
+
char *seq = (char *) StringValuePtr(_seq);
|
|
205
|
+
char *pat = (char *) StringValuePtr(_pat);
|
|
206
|
+
unsigned int seq_len = FIX2UINT(_seq_len);
|
|
207
|
+
unsigned int pat_len = FIX2UINT(_pat_len);
|
|
208
|
+
unsigned int pos = FIX2UINT(_pos);
|
|
209
|
+
unsigned int max_ed = FIX2UINT(_max_ed);
|
|
210
|
+
|
|
211
|
+
score vec[MAX_PAT] = {0};
|
|
212
|
+
unsigned int vec_len = pat_len + 1;
|
|
213
|
+
unsigned int match_beg = 0;
|
|
214
|
+
unsigned int match_len = 0;
|
|
215
|
+
|
|
216
|
+
VALUE match_ary;
|
|
217
|
+
|
|
218
|
+
vector_init(vec, vec_len);
|
|
219
|
+
|
|
220
|
+
while (pos < seq_len)
|
|
221
|
+
{
|
|
222
|
+
vector_update(vec, seq, pat, pat_len, pos);
|
|
223
|
+
|
|
224
|
+
if (match_found(vec, pat_len, max_ed))
|
|
225
|
+
{
|
|
226
|
+
match_len = pat_len - vec[pat_len].ins + vec[pat_len].del;
|
|
227
|
+
match_beg = pos - match_len + 1;
|
|
228
|
+
|
|
229
|
+
match_ary = rb_ary_new();
|
|
230
|
+
rb_ary_push(match_ary, INT2FIX(match_beg));
|
|
231
|
+
rb_ary_push(match_ary, INT2FIX(match_len));
|
|
232
|
+
rb_ary_push(match_ary, INT2FIX(vec[pat_len].mis));
|
|
233
|
+
rb_ary_push(match_ary, INT2FIX(vec[pat_len].ins));
|
|
234
|
+
rb_ary_push(match_ary, INT2FIX(vec[pat_len].del));
|
|
235
|
+
|
|
236
|
+
return match_ary;
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
pos++;
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
return Qfalse; // no match
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
end
|
|
246
|
+
|
|
247
|
+
class Match
|
|
248
|
+
attr_accessor :beg, :length, :mis, :ins, :del, :match
|
|
249
|
+
|
|
250
|
+
def initialize(beg, length, mis, ins, del, match)
|
|
251
|
+
@beg = beg
|
|
252
|
+
@length = length
|
|
253
|
+
@mis = mis
|
|
254
|
+
@ins = ins
|
|
255
|
+
@del = del
|
|
256
|
+
@match = match
|
|
257
|
+
end
|
|
258
|
+
end
|
|
259
|
+
end
|
|
260
|
+
end
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
__END__
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
2
|
+
# #
|
|
3
|
+
# Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). #
|
|
4
|
+
# #
|
|
5
|
+
# This program is free software; you can redistribute it and/or #
|
|
6
|
+
# modify it under the terms of the GNU General Public License #
|
|
7
|
+
# as published by the Free Software Foundation; either version 2 #
|
|
8
|
+
# of the License, or (at your option) any later version. #
|
|
9
|
+
# #
|
|
10
|
+
# This program is distributed in the hope that it will be useful, #
|
|
11
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
|
12
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
|
|
13
|
+
# GNU General Public License for more details. #
|
|
14
|
+
# #
|
|
15
|
+
# You should have received a copy of the GNU General Public License #
|
|
16
|
+
# along with this program; if not, write to the Free Software #
|
|
17
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. #
|
|
18
|
+
# #
|
|
19
|
+
# http://www.gnu.org/copyleft/gpl.html #
|
|
20
|
+
# #
|
|
21
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
22
|
+
# #
|
|
23
|
+
# This software is part of BioDSL (www.BioDSL.org). #
|
|
24
|
+
# #
|
|
25
|
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< #
|
|
26
|
+
|
|
27
|
+
module BioDSL
|
|
28
|
+
# Error class for all exceptions to do with Homopolymer.
|
|
29
|
+
class HomopolymerError < StandardError; end
|
|
30
|
+
|
|
31
|
+
module Homopolymer
|
|
32
|
+
def each_homopolymer(min = 1)
|
|
33
|
+
raise HomopolymerError, "Bad min value: #{min}" if min <= 0
|
|
34
|
+
list = []
|
|
35
|
+
|
|
36
|
+
self.seq.upcase.scan(/A{#{min},}|T{#{min},}|G{#{min},}|C{#{min},}|N{#{min},}/) do |match|
|
|
37
|
+
hp = Homopolymer.new(match, match.length, $`.length)
|
|
38
|
+
|
|
39
|
+
if block_given?
|
|
40
|
+
yield hp
|
|
41
|
+
else
|
|
42
|
+
list << hp
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
block_given? ? self : list
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
class Homopolymer
|
|
50
|
+
attr_reader :pattern, :length, :pos
|
|
51
|
+
|
|
52
|
+
def initialize(pattern, length, pos)
|
|
53
|
+
@pattern = pattern
|
|
54
|
+
@length = length
|
|
55
|
+
@pos = pos
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
end
|