genfrag 0.0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.bnsignore +16 -0
- data/History.txt +4 -0
- data/LICENSE.txt +58 -0
- data/README.rdoc +40 -0
- data/Rakefile +53 -0
- data/bin/genfrag +8 -0
- data/lib/genfrag.rb +129 -0
- data/lib/genfrag/app.rb +105 -0
- data/lib/genfrag/app/command.rb +145 -0
- data/lib/genfrag/app/index_command.rb +227 -0
- data/lib/genfrag/app/index_command/db.rb +105 -0
- data/lib/genfrag/app/search_command.rb +298 -0
- data/lib/genfrag/app/search_command/match.rb +165 -0
- data/lib/genfrag/app/search_command/process_file.rb +125 -0
- data/lib/genfrag/app/search_command/trim.rb +121 -0
- data/lib/genfrag/debug.rb +0 -0
- data/spec/data/index_command/in/a.fasta +109 -0
- data/spec/data/index_command/out/1-a_lookup.tdf +4 -0
- data/spec/data/index_command/out/2-a_lookup.db +0 -0
- data/spec/data/index_command/out/3-a_lookup.tdf +2 -0
- data/spec/data/index_command/out/4-a_lookup.db +0 -0
- data/spec/data/index_command/out/5-a_lookup.tdf +4 -0
- data/spec/data/index_command/out/6-a_lookup.db +0 -0
- data/spec/data/index_command/out/a.fasta.db +0 -0
- data/spec/data/index_command/out/a.fasta.tdf +6 -0
- data/spec/genfrag/app/command_spec.rb +55 -0
- data/spec/genfrag/app/index_command_spec.rb +258 -0
- data/spec/genfrag/app/search_command/match_spec.rb +77 -0
- data/spec/genfrag/app/search_command/process_file_spec.rb +185 -0
- data/spec/genfrag/app/search_command/trim_spec.rb +75 -0
- data/spec/genfrag/app/search_command_spec.rb +260 -0
- data/spec/genfrag/app_spec.rb +77 -0
- data/spec/genfrag_spec.rb +87 -0
- data/spec/spec_helper.rb +56 -0
- data/tasks/ann.rake +80 -0
- data/tasks/bones.rake +20 -0
- data/tasks/gem.rake +201 -0
- data/tasks/git.rake +40 -0
- data/tasks/notes.rake +27 -0
- data/tasks/post_load.rake +34 -0
- data/tasks/rdoc.rake +50 -0
- data/tasks/rubyforge.rake +55 -0
- data/tasks/setup.rb +300 -0
- data/tasks/spec.rake +54 -0
- data/tasks/svn.rake +47 -0
- data/tasks/test.rake +40 -0
- metadata +136 -0
@@ -0,0 +1,165 @@
|
|
1
|
+
|
2
|
+
module Genfrag
|
3
|
+
class App
|
4
|
+
|
5
|
+
class SearchCommand < Command
|
6
|
+
|
7
|
+
# Does the sequence match the adapter
|
8
|
+
#
|
9
|
+
def matches_adapter(five_or_three, primary_frag, complement_frag, raw_frag, trim)
|
10
|
+
adapter_specificity = nil
|
11
|
+
adapter_sequence = nil
|
12
|
+
adapter_size = nil
|
13
|
+
trim_primary = nil
|
14
|
+
trim_complement = nil
|
15
|
+
|
16
|
+
if five_or_three == 5
|
17
|
+
tail = right_tail_of(Bio::RestrictionEnzyme::DoubleStranded.new(@ops.re5).aligned_strands_with_cuts.primary)
|
18
|
+
|
19
|
+
adapter_specificity = @adapters[:adapter5_specificity].upcase
|
20
|
+
adapter_sequence = @adapters[:adapter5_sequence].upcase if @adapters[:adapter5_sequence]
|
21
|
+
adapter_size = @adapters[:adapter5_size]
|
22
|
+
trim_primary = trim[:from_left_primary]
|
23
|
+
trim_complement = trim[:from_left_complement]
|
24
|
+
|
25
|
+
# TEMP Check for match
|
26
|
+
primary_frag =~ /(\.*)/
|
27
|
+
dots_on_primary = $1.size
|
28
|
+
lead_in = tail.size + dots_on_primary
|
29
|
+
return false if primary_frag[ lead_in .. -1 ].tr('.', '') !~ /^#{adapter_specificity}/i
|
30
|
+
|
31
|
+
elsif five_or_three == 3
|
32
|
+
tail = left_tail_of(Bio::RestrictionEnzyme::DoubleStranded.new(@ops.re3).aligned_strands_with_cuts.primary)
|
33
|
+
|
34
|
+
if @adapters[:adapter3_specificity][0].chr == '_'
|
35
|
+
adapter_specificity = @adapters[:adapter3_specificity][1..-1].reverse.upcase
|
36
|
+
else
|
37
|
+
adapter_specificity = Bio::Sequence::NA.new(@adapters[:adapter3_specificity]).forward_complement.to_s.upcase
|
38
|
+
end
|
39
|
+
adapter_sequence = Bio::Sequence::NA.new(@adapters[:adapter3_sequence]).forward_complement.to_s.upcase if @adapters[:adapter3_sequence]
|
40
|
+
adapter_size = @adapters[:adapter3_size]
|
41
|
+
trim_primary = trim[:from_right_primary]
|
42
|
+
trim_complement = trim[:from_right_complement]
|
43
|
+
primary_frag.reverse!
|
44
|
+
complement_frag.reverse!
|
45
|
+
raw_frag.reverse!
|
46
|
+
|
47
|
+
# TEMP Check for match
|
48
|
+
primary_frag =~ /(\.*)/
|
49
|
+
dots_on_primary = $1.size
|
50
|
+
lead_in = tail.size + dots_on_primary
|
51
|
+
return false if primary_frag[ lead_in .. -1 ].tr('.', '') !~ /^#{adapter_specificity}/i
|
52
|
+
|
53
|
+
else
|
54
|
+
raise "First argument to matches_adapter must be a '5' or a '3'. Received: #{five_or_three.inspect}"
|
55
|
+
end
|
56
|
+
|
57
|
+
#return false if raw_frag[ [trim_primary, trim_complement].max .. -1 ] !~ /^#{adapter_specificity}/i
|
58
|
+
|
59
|
+
#overhang = [trim_primary, trim_complement].max - [trim_primary, trim_complement].min
|
60
|
+
|
61
|
+
#lead_in = overhang
|
62
|
+
|
63
|
+
if adapter_sequence
|
64
|
+
raise 'FIXME - not functional yet'
|
65
|
+
|
66
|
+
# if lead_in >= adapter_sequence.size
|
67
|
+
# # need to preserve dots on primary string
|
68
|
+
# new_primary_frag = ('.' * (lead_in - adapter_sequence.size)) + adapter_sequence + primary_frag[ lead_in .. -1 ]
|
69
|
+
# new_complement_frag = complement_frag
|
70
|
+
# else
|
71
|
+
# # need to add dots to beginning of complement string
|
72
|
+
# new_primary_frag = adapter_sequence + primary_frag[ lead_in .. -1 ]
|
73
|
+
# new_complement_frag = ('.' * (adapter_sequence.size - lead_in) ) + complement_frag
|
74
|
+
# end
|
75
|
+
|
76
|
+
elsif adapter_size
|
77
|
+
raise 'FIXME - not functional yet'
|
78
|
+
|
79
|
+
# # only the size and the specificity of the adapter has been provided
|
80
|
+
# size_of_specificity = adapter_specificity.size
|
81
|
+
# size_of_sequence = adapter_size - size_of_specificity
|
82
|
+
# if lead_in >= size_of_sequence
|
83
|
+
# # need to preserve dots on primary string
|
84
|
+
# new_primary_frag = primary_frag[ 0 .. (lead_in - 1) ].upcase + primary_frag[ lead_in .. -1 ]
|
85
|
+
# new_complement_frag = complement_frag
|
86
|
+
# else
|
87
|
+
# # need to add dots to beginning of complement string
|
88
|
+
# new_primary_frag = ('+' * (size_of_sequence - lead_in) ) + primary_frag[ 0 .. (lead_in - 1) ].upcase + primary_frag[ lead_in .. -1 ]
|
89
|
+
# new_complement_frag = ('.' * (size_of_sequence - lead_in) ) + complement_frag
|
90
|
+
# end
|
91
|
+
|
92
|
+
else
|
93
|
+
# only the specificity has been provided
|
94
|
+
new_primary_frag = ('.' * dots_on_primary) + ('+' * tail.size) + primary_frag[ lead_in .. -1 ]
|
95
|
+
new_complement_frag = complement_frag
|
96
|
+
|
97
|
+
end
|
98
|
+
|
99
|
+
if five_or_three == 3
|
100
|
+
new_primary_frag.reverse!
|
101
|
+
new_complement_frag.reverse!
|
102
|
+
end
|
103
|
+
|
104
|
+
return [new_primary_frag, new_complement_frag]
|
105
|
+
end
|
106
|
+
|
107
|
+
|
108
|
+
# Find the fragments that match the search parameters
|
109
|
+
#
|
110
|
+
def find_matching_fragments(sizes, left, right)
|
111
|
+
hits=[]
|
112
|
+
s = (@adapters[:adapter5_size] or 0) + (@adapters[:adapter3_size] or 0)
|
113
|
+
|
114
|
+
if [@ops.size].flatten == [0] or [@ops.size].flatten == [nil] or [@ops.size].flatten == ["0"]
|
115
|
+
sizes.each do |raw_size, info|
|
116
|
+
hits << info
|
117
|
+
end
|
118
|
+
|
119
|
+
else
|
120
|
+
[@ops.size].flatten.each do |seek_size|
|
121
|
+
seek_size = seek_size.to_i
|
122
|
+
sizes.each do |raw_size, info|
|
123
|
+
frag_size = raw_size - left[:trim_from_both] - right[:trim_from_both]
|
124
|
+
if (frag_size >= seek_size - s) and (frag_size <= seek_size + s)
|
125
|
+
hits << info
|
126
|
+
end
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
return hits
|
132
|
+
end
|
133
|
+
|
134
|
+
def right_tail_of(s)
|
135
|
+
# 'PpiI' => "n n n n n n^n n n n n n n g a a c n n n n n c t c n n n n n n n n n n n n n^n"
|
136
|
+
# => 'n'
|
137
|
+
# 'BstYI' => "r^g a t c y"
|
138
|
+
# => 'gatcy'
|
139
|
+
|
140
|
+
if s =~ /.*\^(.*)/
|
141
|
+
return $1.tr(' ', '')
|
142
|
+
else
|
143
|
+
raise "Sequence #{s} has no cuts (defined by symbol '^')"
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
def left_tail_of(s)
|
148
|
+
# 'PpiI' => "n n n n n n^n n n n n n n g a a c n n n n n c t c n n n n n n n n n n n n n^n"
|
149
|
+
# => 'nnnnnn'
|
150
|
+
# 'BstYI' => "r^g a t c y"
|
151
|
+
# => 'r'
|
152
|
+
|
153
|
+
if s =~ /([^\^]*)\^/
|
154
|
+
return $1.tr(' ', '')
|
155
|
+
else
|
156
|
+
raise "Sequence #{s} has no cuts (defined by symbol '^')"
|
157
|
+
end
|
158
|
+
|
159
|
+
end
|
160
|
+
|
161
|
+
end # class SearchCommand
|
162
|
+
end # class App
|
163
|
+
end # module Genfrag
|
164
|
+
|
165
|
+
# EOF
|
@@ -0,0 +1,125 @@
|
|
1
|
+
|
2
|
+
module Genfrag
|
3
|
+
class App
|
4
|
+
|
5
|
+
class SearchCommand < Command
|
6
|
+
|
7
|
+
class ProcessFile
|
8
|
+
class << self
|
9
|
+
# Process the standardized Fasta file (tdf format)
|
10
|
+
#
|
11
|
+
def process_tdf_fasta_file(f_normalized_fasta)
|
12
|
+
sequences = {}
|
13
|
+
f_normalized_fasta[1..-1].each do |line|
|
14
|
+
line = line.chomp.split("\t")
|
15
|
+
id = line[0].to_i
|
16
|
+
sequences[id] = {:definitions => CSV.parse_line(line[1]), :sequence => line[2]}
|
17
|
+
end
|
18
|
+
return sequences
|
19
|
+
end
|
20
|
+
|
21
|
+
# Process the standardized Fasta file (sqlite3 format)
|
22
|
+
def process_db_fasta_file(db_normalized_fasta)
|
23
|
+
sequences = {}
|
24
|
+
db_normalized_fasta.execute( "select * from db_normalized_fasta" ) do |row|
|
25
|
+
id = row[0].to_i
|
26
|
+
sequences[id] = {:definitions => CSV.parse_line(row[1]), :sequence => row[2]}
|
27
|
+
end
|
28
|
+
return sequences
|
29
|
+
end
|
30
|
+
|
31
|
+
# Process the fragment frequency file (tdf format)
|
32
|
+
#
|
33
|
+
def process_tdf_freq_lookup(f_freq_lookup)
|
34
|
+
sizes = {}
|
35
|
+
f_freq_lookup[1..-1].each do |line|
|
36
|
+
line = line.chomp.split("\t")
|
37
|
+
id = line[0]
|
38
|
+
size = line[1].to_i
|
39
|
+
multiple = []
|
40
|
+
line[2].split(', ').each do |a|
|
41
|
+
pos = {}
|
42
|
+
pos[:offset], pos[:fasta_id] = a.split(' ')
|
43
|
+
pos[:offset] = pos[:offset].to_i
|
44
|
+
pos[:raw_size] = size.to_i
|
45
|
+
pos[:fasta_id] = pos[:fasta_id].to_i
|
46
|
+
multiple << pos
|
47
|
+
end
|
48
|
+
sizes[size] = multiple
|
49
|
+
end
|
50
|
+
return sizes
|
51
|
+
end
|
52
|
+
|
53
|
+
# Process the fragment frequency file (sqlite3 format)
|
54
|
+
#
|
55
|
+
def process_db_freq_lookup(db_freq_lookup)
|
56
|
+
sizes = {}
|
57
|
+
db_freq_lookup.execute( "select * from db_freq_lookup" ) do |row|
|
58
|
+
id = row[0]
|
59
|
+
size = row[1].to_i
|
60
|
+
multiple = []
|
61
|
+
row[2].split(', ').each do |a|
|
62
|
+
pos = {}
|
63
|
+
pos[:offset], pos[:fasta_id] = a.split(' ')
|
64
|
+
pos[:offset] = pos[:offset].to_i
|
65
|
+
pos[:raw_size] = size.to_i
|
66
|
+
pos[:fasta_id] = pos[:fasta_id].to_i
|
67
|
+
multiple << pos
|
68
|
+
end
|
69
|
+
sizes[size] = multiple
|
70
|
+
end
|
71
|
+
return sizes
|
72
|
+
end
|
73
|
+
|
74
|
+
# Process the adapter file (tdf format)
|
75
|
+
#
|
76
|
+
def process_tdf_adapters(f_adapters, adapter5_name=nil, adapter3_name=nil)
|
77
|
+
adapter5_sequence = nil
|
78
|
+
adapter3_sequence = nil
|
79
|
+
adapter5_specificity = nil
|
80
|
+
adapter3_specificity = nil
|
81
|
+
adapter5_needs_to_be_found = !adapter5_name.nil?
|
82
|
+
adapter3_needs_to_be_found = !adapter3_name.nil?
|
83
|
+
f_adapters[1..-1].each do |line|
|
84
|
+
break if !(adapter5_needs_to_be_found or adapter3_needs_to_be_found)
|
85
|
+
line = line.chomp.split("\t")
|
86
|
+
next if line.empty?
|
87
|
+
name = line[0]
|
88
|
+
worksense = line[1][0].chr.to_i
|
89
|
+
sequence = line[2].gsub(/\|N*$/i,'')
|
90
|
+
specificity = line[3] # what it's supposed to match
|
91
|
+
if (worksense != 3 and worksense != 5)
|
92
|
+
raise "Unknown worksense value \"#{line[1]}\". First character of column must be a '5' or a '3'."
|
93
|
+
end
|
94
|
+
|
95
|
+
if adapter5_name and (worksense == 5) and ( name =~ /#{adapter5_name}/i )
|
96
|
+
adapter5_sequence = sequence
|
97
|
+
adapter5_specificity = specificity
|
98
|
+
adapter5_needs_to_be_found = false
|
99
|
+
elsif adapter3_name and (worksense == 3) and ( name =~ /#{adapter3_name}/i )
|
100
|
+
adapter3_sequence = sequence
|
101
|
+
adapter3_specificity = specificity
|
102
|
+
adapter3_needs_to_be_found = false
|
103
|
+
end
|
104
|
+
end
|
105
|
+
if ( adapter5_name and adapter5_needs_to_be_found )
|
106
|
+
raise "named-adapter5 ('#{adapter5_name}') with the worksense '5' not found."
|
107
|
+
elsif ( adapter3_name and adapter3_needs_to_be_found )
|
108
|
+
raise "named-adapter3 ('#{adapter3_name}') with the worksense '3' not found."
|
109
|
+
end
|
110
|
+
return {
|
111
|
+
:adapter5_sequence => adapter5_sequence,
|
112
|
+
:adapter5_specificity => adapter5_specificity,
|
113
|
+
:adapter3_sequence => adapter3_sequence,
|
114
|
+
:adapter3_specificity => adapter3_specificity
|
115
|
+
}
|
116
|
+
end
|
117
|
+
|
118
|
+
end
|
119
|
+
end # class ProcessFile
|
120
|
+
|
121
|
+
end # class SearchCommand
|
122
|
+
end # class App
|
123
|
+
end # module Genfrag
|
124
|
+
|
125
|
+
# EOF
|
@@ -0,0 +1,121 @@
|
|
1
|
+
|
2
|
+
module Genfrag
|
3
|
+
class App
|
4
|
+
|
5
|
+
class SearchCommand < Command
|
6
|
+
|
7
|
+
# Keep track of extraneous nucleotides that should be removed from the final fragment
|
8
|
+
#
|
9
|
+
# Example BstYI used as RE5
|
10
|
+
# BstYI -
|
11
|
+
# 5' - r^g a t c y - 3'
|
12
|
+
# 3' - y c t a g^r - 5'
|
13
|
+
#
|
14
|
+
# re5_ds.cut_locations.primary # => [0]
|
15
|
+
# re5_ds.cut_locations.complement # => [4]
|
16
|
+
# re5_ds.aligned_strands.primary.size # => 6
|
17
|
+
#
|
18
|
+
# # number of nucleotides to trim from the left side on the primary strand
|
19
|
+
# re5_ds.cut_locations.primary.max + 1 # => 1
|
20
|
+
#
|
21
|
+
# # number of nucleotides to trim from the left side on the complement strand
|
22
|
+
# re5_ds.cut_locations.complement.max + 1 # => 5
|
23
|
+
#
|
24
|
+
#
|
25
|
+
# Example BstYI used as RE3
|
26
|
+
# BstYI -
|
27
|
+
# 5' - r^g a t c y - 3'
|
28
|
+
# 3' - y c t a g^r - 5'
|
29
|
+
#
|
30
|
+
# re3_ds.cut_locations.primary # => [0]
|
31
|
+
# re3_ds.cut_locations.complement # => [4]
|
32
|
+
# re3_ds.aligned_strands.primary.size # => 6
|
33
|
+
#
|
34
|
+
# # number of nucleotides to trim from the right side on the primary strand
|
35
|
+
# re3_ds.aligned_strands.primary.size - (re3_ds.cut_locations.primary.min + 1) # => 5
|
36
|
+
#
|
37
|
+
# # number of nucleotides to trim from the right side on the complement strand
|
38
|
+
# re3_ds.aligned_strands.primary.size - (re3_ds.cut_locations.complement.min + 1) # => 1
|
39
|
+
#
|
40
|
+
#
|
41
|
+
# Example PpiI used as RE5
|
42
|
+
# PpiI -
|
43
|
+
# 5' - n n n n n n^n n n n n n n g a a c n n n n n c t c n n n n n n n n n n n n n^n - 3'
|
44
|
+
# 3' - n^n n n n n n n n n n n n c t t g n n n n n g a g n n n n n n n n^n n n n n n - 5'
|
45
|
+
#
|
46
|
+
# re5_ds.cut_locations.primary # => [5, 37]
|
47
|
+
# re5_ds.cut_locations.complement # => [0, 32]
|
48
|
+
# re5_ds.aligned_strands.primary.size # => 39
|
49
|
+
#
|
50
|
+
# # number of nucleotides to trim from the left side on the primary strand
|
51
|
+
# re5_ds.cut_locations.primary.max + 1 # => 38
|
52
|
+
#
|
53
|
+
# # number of nucleotides to trim from the left side on the complement strand
|
54
|
+
# re5_ds.cut_locations.complement.max + 1 # => 33
|
55
|
+
#
|
56
|
+
#
|
57
|
+
# Example PpiI used as RE3
|
58
|
+
# PpiI -
|
59
|
+
# 5' - n n n n n n^n n n n n n n g a a c n n n n n c t c n n n n n n n n n n n n n^n - 3'
|
60
|
+
# 3' - n^n n n n n n n n n n n n c t t g n n n n n g a g n n n n n n n n^n n n n n n - 5'
|
61
|
+
#
|
62
|
+
# re3_ds.cut_locations.primary # => [5, 37]
|
63
|
+
# re3_ds.cut_locations.complement # => [0, 32]
|
64
|
+
# re3_ds.aligned_strands.primary.size # => 39
|
65
|
+
#
|
66
|
+
# # number of nucleotides to trim from the right side on the primary strand
|
67
|
+
# re3_ds.aligned_strands.primary.size - (re3_ds.cut_locations.primary.min + 1) # => 33
|
68
|
+
#
|
69
|
+
# # number of nucleotides to trim from the right side on the complement strand
|
70
|
+
# re3_ds.aligned_strands.primary.size - (re3_ds.cut_locations.complement.min + 1) # => 38
|
71
|
+
#
|
72
|
+
def calculate_trim_for_nucleotides(re5_ds, re3_ds)
|
73
|
+
trim = {}
|
74
|
+
trim[:from_left_primary] = re5_ds.cut_locations.primary.max + 1
|
75
|
+
trim[:from_left_complement] = re5_ds.cut_locations.complement.max + 1
|
76
|
+
trim[:from_right_primary] = re3_ds.aligned_strands.primary.size - (re3_ds.cut_locations.primary.min + 1)
|
77
|
+
trim[:from_right_complement] = re3_ds.aligned_strands.primary.size - (re3_ds.cut_locations.complement.min + 1)
|
78
|
+
return trim
|
79
|
+
end
|
80
|
+
|
81
|
+
# Calculate left and right trims
|
82
|
+
#
|
83
|
+
def calculate_left_and_right_trims(trim)
|
84
|
+
left = {}
|
85
|
+
# Should we "dot out" (nucleotide padding) from the primary strand? If no, then we assume the complement needs padding.
|
86
|
+
left[:dot_out_from_primary] = (trim[:from_left_primary] > trim[:from_left_complement])
|
87
|
+
# How much gets cut off on both primary and complement strands
|
88
|
+
left[:trim_from_both] = [trim[:from_left_primary], trim[:from_left_complement]].min
|
89
|
+
|
90
|
+
right = {}
|
91
|
+
right[:dot_out_from_primary] = (trim[:from_right_primary] > trim[:from_right_complement])
|
92
|
+
right[:trim_from_both] = [trim[:from_right_primary], trim[:from_right_complement]].min
|
93
|
+
return [left,right]
|
94
|
+
end
|
95
|
+
|
96
|
+
# Do the trimming
|
97
|
+
#
|
98
|
+
def trim_sequences(primary_frag, complement_frag, left, right, trim)
|
99
|
+
if left[:dot_out_from_primary]
|
100
|
+
primary_frag = "." * trim[:from_left_primary] + primary_frag[trim[:from_left_primary]..-1]
|
101
|
+
else
|
102
|
+
complement_frag = "." * trim[:from_left_complement] + complement_frag[trim[:from_left_complement]..-1]
|
103
|
+
end
|
104
|
+
|
105
|
+
if right[:dot_out_from_primary]
|
106
|
+
primary_frag = primary_frag[0..(-1 - trim[:from_right_primary])] + "." * trim[:from_right_primary]
|
107
|
+
else
|
108
|
+
complement_frag = complement_frag[0..(-1 - trim[:from_right_primary])] + "." * trim[:from_right_primary]
|
109
|
+
end
|
110
|
+
|
111
|
+
primary_frag = primary_frag[left[:trim_from_both]..(-1-right[:trim_from_both])]
|
112
|
+
complement_frag = complement_frag[left[:trim_from_both]..(-1-right[:trim_from_both])]
|
113
|
+
|
114
|
+
return [primary_frag, complement_frag]
|
115
|
+
end
|
116
|
+
|
117
|
+
end # class SearchCommand
|
118
|
+
end # class App
|
119
|
+
end # module Genfrag
|
120
|
+
|
121
|
+
# EOF
|
File without changes
|
@@ -0,0 +1,109 @@
|
|
1
|
+
|
2
|
+
>At1g02580 mRNA (2291 bp) UTR's and CDS
|
3
|
+
aggcgagtggttaatggagaaggaaaaccatgaggacgatggtgagggtttgccacccgaactaaatcagataaaa
|
4
|
+
gagcaaatcgaaaaggagagatttctgcatatcaagagaaaattcgagctgagatacattccaagtgtggctactc
|
5
|
+
atgcttcacaccatcaatcgtttgacttaaaccagcccgctgcagaggatgataatggaggagacaacaaatcact
|
6
|
+
tttgtcgagaatgcaaaacccacttcgtcatttcagtgcctcatctgattataattcttacgaagatcaaggttat
|
7
|
+
gttcttgatgaggatcaagattatgctcttgaagaagatgtaccattatttcttgatgaagatgtaccattattac
|
8
|
+
caagtgtcaagcttccaattgttgagaagctaccacgatccattacatgggtcttcaccaaaagtagccagctgat
|
9
|
+
ggctgaaagtgattctgtgattggtaagagacaaatctattatttgaatggtgaggcactagaattgagcagtgaa
|
10
|
+
gaagatgaggaagatgaagaagaagatgaggaagaaatcaagaaagaaaaatgcgaattttctgaagatgtagacc
|
11
|
+
gatttatatggacggttgggcaggactatggtttggatgatctggtcgtgcggcgtgctctcgccaagtacctcga
|
12
|
+
agtggatgtttcggacatattggaaagatacaatgaactcaagcttaagaatgatggaactgctggtgaggcttct
|
13
|
+
gatttgacatccaagacaataactactgctttccaggattttgctgatagacgtcattgccgtcgttgcatgatat
|
14
|
+
tcgattgtcatatgcatgagaagtatgagcccgagtctagatccagcgaagacaaatctagtttgtttgaggatga
|
15
|
+
agatagacaaccatgcagtgagcattgttacctcaaggtgaggagtgtgacagaagctgatcatgtgatggataat
|
16
|
+
gataactctatatcaaacaagattgtggtctcagatccaaacaacactatgtggacgcctgtagagaaggatcttt
|
17
|
+
acttgaaaggaattgagatatttgggagaaacagttgtgatgttgcattaaacatacttcgggggcttaagacgtg
|
18
|
+
cctagagatttacaattacatgcgcgaacaagatcaatgtactatgtcattagaccttaacaaaactacacaaaga
|
19
|
+
cacaatcaggttaccaaaaaagtatctcgaaaaagtagtaggtcggtccgcaaaaaatcgagactccgaaaatatg
|
20
|
+
ctcgttatccgcctgctttaaagaaaacaactagtggagaagctaagttttataagcactacacaccatgcacttg
|
21
|
+
caagtcaaaatgtggacagcaatgcccttgtttaactcacgaaaattgctgcgagaaatattgcgggtgctcaaag
|
22
|
+
gattgcaacaatcgctttggaggatgtaattgtgcaattggccaatgcacaaatcgacaatgtccttgttttgctg
|
23
|
+
ctaatcgtgaatgcgatcca gatctttgtcggagttgtcctcttagctgtggagatggcactcttggtgagacacc
|
24
|
+
agtgcaaatccaatgcaagaacatgcaattcctccttcaaaccaataaaaagattctcattggaaagtctgatgtt
|
25
|
+
catggatggggtgcatttacatgggactctct taaaaagaatgagtatctcggagaatatactggagaactgatca
|
26
|
+
ctcatgatgaagctaatgagcgtgggagaatagaagatcggattggttcttcctacctctttaccttgaatgatca
|
27
|
+
gctcgaaatcgatgctcgccgtaaaggaaacgagttcaaatttctcaatcactcagcaagacctaactgctacgcc
|
28
|
+
aagttgatgattgtgagaggagatcagaggattggtctatttgcggagagagcaatcgaagaaggtgaggagcttt
|
29
|
+
tcttcgactactgctatggaccagaacatgcggattggtcgcgtggtcgagaacctagaaagactggtgcttctaa
|
30
|
+
aaggtctaaggaagcccgtccagctcgttagtttttgatctgaggagaagcagcaattcaagcagtccttttttta
|
31
|
+
tgttatggtatatcaattaataatgtaatgctattttgtgttactaaaccaaaacttaagtttctgttttatttgt
|
32
|
+
tttagggtgttttgtttgtatcatatgtgtcttaactttcaaagttttctttttgtatttcaatttaaaaacaatg
|
33
|
+
tttatgttgtt
|
34
|
+
|
35
|
+
>At1g02580 mRNA (2291 bp) UTR's and CDS (duplicate)
|
36
|
+
aggcgagtggttaatggagaaggaaaaccatgaggacgatggtgagggtttgccacccgaactaaatcagataaaa
|
37
|
+
gagcaaatcgaaaaggagagatttctgcatatcaagagaaaattcgagctgagatacattccaagtgtggctactc
|
38
|
+
atgcttcacaccatcaatcgtttgacttaaaccagcccgctgcagaggatgataatggaggagacaacaaatcact
|
39
|
+
tttgtcgagaatgcaaaacccacttcgtcatttcagtgcctcatctgattataattcttacgaagatcaaggttat
|
40
|
+
gttcttgatgaggatcaagattatgctcttgaagaagatgtaccattatttcttgatgaagatgtaccattattac
|
41
|
+
caagtgtcaagcttccaattgttgagaagctaccacgatccattacatgggtcttcaccaaaagtagccagctgat
|
42
|
+
ggctgaaagtgattctgtgattggtaagagacaaatctattatttgaatggtgaggcactagaattgagcagtgaa
|
43
|
+
gaagatgaggaagatgaagaagaagatgaggaagaaatcaagaaagaaaaatgcgaattttctgaagatgtagacc
|
44
|
+
gatttatatggacggttgggcaggactatggtttggatgatctggtcgtgcggcgtgctctcgccaagtacctcga
|
45
|
+
agtggatgtttcggacatattggaaagatacaatgaactcaagcttaagaatgatggaactgctggtgaggcttct
|
46
|
+
gatttgacatccaagacaataactactgctttccaggattttgctgatagacgtcattgccgtcgttgcatgatat
|
47
|
+
tcgattgtcatatgcatgagaagtatgagcccgagtctagatccagcgaagacaaatctagtttgtttgaggatga
|
48
|
+
agatagacaaccatgcagtgagcattgttacctcaaggtgaggagtgtgacagaagctgatcatgtgatggataat
|
49
|
+
gataactctatatcaaacaagattgtggtctcagatccaaacaacactatgtggacgcctgtagagaaggatcttt
|
50
|
+
acttgaaaggaattgagatatttgggagaaacagttgtgatgttgcattaaacatacttcgggggcttaagacgtg
|
51
|
+
cctagagatttacaattacatgcgcgaacaagatcaatgtactatgtcattagaccttaacaaaactacacaaaga
|
52
|
+
cacaatcaggttaccaaaaaagtatctcgaaaaagtagtaggtcggtccgcaaaaaatcgagactccgaaaatatg
|
53
|
+
ctcgttatccgcctgctttaaagaaaacaactagtggagaagctaagttttataagcactacacaccatgcacttg
|
54
|
+
caagtcaaaatgtggacagcaatgcccttgtttaactcacgaaaattgctgcgagaaatattgcgggtgctcaaag
|
55
|
+
gattgcaacaatcgctttggaggatgtaattgtgcaattggccaatgcacaaatcgacaatgtccttgttttgctg
|
56
|
+
ctaatcgtgaatgcgatcca gatctttgtcggagttgtcctcttagctgtggagatggcactcttggtgagacacc
|
57
|
+
agtgcaaatccaatgcaagaacatgcaattcctccttcaaaccaataaaaagattctcattggaaagtctgatgtt
|
58
|
+
catggatggggtgcatttacatgggactctct taaaaagaatgagtatctcggagaatatactggagaactgatca
|
59
|
+
ctcatgatgaagctaatgagcgtgggagaatagaagatcggattggttcttcctacctctttaccttgaatgatca
|
60
|
+
gctcgaaatcgatgctcgccgtaaaggaaacgagttcaaatttctcaatcactcagcaagacctaactgctacgcc
|
61
|
+
aagttgatgattgtgagaggagatcagaggattggtctatttgcggagagagcaatcgaagaaggtgaggagcttt
|
62
|
+
tcttcgactactgctatggaccagaacatgcggattggtcgcgtggtcgagaacctagaaagactggtgcttctaa
|
63
|
+
aaggtctaaggaagcccgtccagctcgttagtttttgatctgaggagaagcagcaattcaagcagtccttttttta
|
64
|
+
tgttatggtatatcaattaataatgtaatgctattttgtgttactaaaccaaaacttaagtttctgttttatttgt
|
65
|
+
tttagggtgttttgtttgtatcatatgtgtcttaactttcaaagttttctttttgtatttcaatttaaaaacaatg
|
66
|
+
tttatgttgtt
|
67
|
+
|
68
|
+
>At1g65300: mRNA 837bp
|
69
|
+
atgaagagaaagatgaagttatcgttaatagaaaacagtgtatcgaggaaaacaacattcaccaaaaggaagaaag
|
70
|
+
ggatgacgaagaaactaaccgagctagtcactctatgtggtgttgaagcatgtgcggtcgtctatagtccgttcaa
|
71
|
+
ctcgatcccggaggcttggccgtcaagggaaggcgttgaagacgtggtgtcgaaatttatggagttgtcggtgttg
|
72
|
+
gaccggaccaagaagatggtggatcaagagacttttataagtcaaaggatcgccaaagaaaaagagcagctgcaga
|
73
|
+
agctacgtgatgagaaccataattctcagattcgggagttaatgtttggttgtctcaaaggggagacgaatgtgta
|
74
|
+
taatcttgatggaagggatcttcaagatttgagtttatatattgataagtatcttaatggtcttactcgcaggatt
|
75
|
+
ga gatcctTAttgagaacggtgagtcttcttcatctttacctcttcctattgttgcgaatgcagctgcaccagtcg
|
76
|
+
gatttgatggtcctatgtttcaatatcataatcaaaatcagcaaaagccggttcaattccaatatcaggctcttta
|
77
|
+
tgatttttatgatcagattccaaagaaaattcatggttt taatatgaatatgaataaggattcgaatcaaagtatg
|
78
|
+
gttttggatttgaatcaaaatcttaatgatggagaggacgagggcattccttgcatggacaacaacaactaccacc
|
79
|
+
ccgaaatcgattgtctcgctaccgtcaccactgcccccactgatgtttgtgctcctaacatcaccaatgatctcta
|
80
|
+
g
|
81
|
+
|
82
|
+
>At1g65300: mRNA 837bp (shortened at end)
|
83
|
+
atgaagagaaagatgaagttatcgttaatagaaaacagtgtatcgaggaaaacaacattcaccaaaaggaagaaag
|
84
|
+
ggatgacgaagaaactaaccgagctagtcactctatgtggtgttgaagcatgtgcggtcgtctatagtccgttcaa
|
85
|
+
ctcgatcccggaggcttggccgtcaagggaaggcgttgaagacgtggtgtcgaaatttatggagttgtcggtgttg
|
86
|
+
gaccggaccaagaagatggtggatcaagagacttttataagtcaaaggatcgccaaagaaaaagagcagctgcaga
|
87
|
+
agctacgtgatgagaaccataattctcagattcgggagttaatgtttggttgtctcaaaggggagacgaatgtgta
|
88
|
+
taatcttgatggaagggatcttcaagatttgagtttatatattgataagtatcttaatggtcttactcgcaggatt
|
89
|
+
gagatcctTAttgagaacggtgagtcttcttcatctttacctcttcctattgttgcgaatgcagctgcaccagtcg
|
90
|
+
gatttgatggtcctatgtttcaatatcataatcaaaatcagcaaaagccggttcaattccaatatcaggctcttta
|
91
|
+
tgatttttatgatcag
|
92
|
+
|
93
|
+
|
94
|
+
>At1g65300: mRNA 837bp (shortened from start)
|
95
|
+
ttcatctttacctcttcctattgttgcgaatgcagctgcaccagtcg
|
96
|
+
gatttgatggtcctatgtttcaatatcataatcaaaatcagcaaaagccggttcaattccaatatcaggctcttta
|
97
|
+
tgatttttatgatcagattccaaagaaaattcatggttttaatatgaatatgaataaggattcgaatcaaagtatg
|
98
|
+
gttttggatttgaatcaaaatcttaatgatggagaggacgagggcattccttgcatggacaacaacaactaccacc
|
99
|
+
ccgaaatcgattgtctcgctaccgtcaccactgcccccactgatgtttgtgctcctaacatcaccaatgatctcta
|
100
|
+
g
|
101
|
+
|
102
|
+
|
103
|
+
>At1g02580 - shortened for test - inserted cutpoint
|
104
|
+
gattgcaacaatcgctttggaggatgtaattgtgcaattggccaatgcacaaatcgacaatgtccttgttttgctg
|
105
|
+
ctaatcgtgaatgcgatcca gatctttgtcggagttgtcctcttagctgtggagatggcactcttggtgagacacc
|
106
|
+
agtgcaaatccaatgcaagaacatgcaataataaaaagattctcattggaaagtctgatgttcatggattcatggt
|
107
|
+
tttaattggggtgcatttacatgggactctct taaaaagaatgagtatctcggagaatatactggagaactgatca
|
108
|
+
ctcatgatgaagctaatgagcgtgggagaatagaagatcggattggttcttcctacctctttaccttgaatgatca
|
109
|
+
|