genfrag 0.0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.bnsignore +16 -0
- data/History.txt +4 -0
- data/LICENSE.txt +58 -0
- data/README.rdoc +40 -0
- data/Rakefile +53 -0
- data/bin/genfrag +8 -0
- data/lib/genfrag.rb +129 -0
- data/lib/genfrag/app.rb +105 -0
- data/lib/genfrag/app/command.rb +145 -0
- data/lib/genfrag/app/index_command.rb +227 -0
- data/lib/genfrag/app/index_command/db.rb +105 -0
- data/lib/genfrag/app/search_command.rb +298 -0
- data/lib/genfrag/app/search_command/match.rb +165 -0
- data/lib/genfrag/app/search_command/process_file.rb +125 -0
- data/lib/genfrag/app/search_command/trim.rb +121 -0
- data/lib/genfrag/debug.rb +0 -0
- data/spec/data/index_command/in/a.fasta +109 -0
- data/spec/data/index_command/out/1-a_lookup.tdf +4 -0
- data/spec/data/index_command/out/2-a_lookup.db +0 -0
- data/spec/data/index_command/out/3-a_lookup.tdf +2 -0
- data/spec/data/index_command/out/4-a_lookup.db +0 -0
- data/spec/data/index_command/out/5-a_lookup.tdf +4 -0
- data/spec/data/index_command/out/6-a_lookup.db +0 -0
- data/spec/data/index_command/out/a.fasta.db +0 -0
- data/spec/data/index_command/out/a.fasta.tdf +6 -0
- data/spec/genfrag/app/command_spec.rb +55 -0
- data/spec/genfrag/app/index_command_spec.rb +258 -0
- data/spec/genfrag/app/search_command/match_spec.rb +77 -0
- data/spec/genfrag/app/search_command/process_file_spec.rb +185 -0
- data/spec/genfrag/app/search_command/trim_spec.rb +75 -0
- data/spec/genfrag/app/search_command_spec.rb +260 -0
- data/spec/genfrag/app_spec.rb +77 -0
- data/spec/genfrag_spec.rb +87 -0
- data/spec/spec_helper.rb +56 -0
- data/tasks/ann.rake +80 -0
- data/tasks/bones.rake +20 -0
- data/tasks/gem.rake +201 -0
- data/tasks/git.rake +40 -0
- data/tasks/notes.rake +27 -0
- data/tasks/post_load.rake +34 -0
- data/tasks/rdoc.rake +50 -0
- data/tasks/rubyforge.rake +55 -0
- data/tasks/setup.rb +300 -0
- data/tasks/spec.rake +54 -0
- data/tasks/svn.rake +47 -0
- data/tasks/test.rake +40 -0
- metadata +136 -0
@@ -0,0 +1,165 @@
|
|
1
|
+
|
2
|
+
module Genfrag
|
3
|
+
class App
|
4
|
+
|
5
|
+
class SearchCommand < Command
|
6
|
+
|
7
|
+
# Does the sequence match the adapter
|
8
|
+
#
|
9
|
+
def matches_adapter(five_or_three, primary_frag, complement_frag, raw_frag, trim)
|
10
|
+
adapter_specificity = nil
|
11
|
+
adapter_sequence = nil
|
12
|
+
adapter_size = nil
|
13
|
+
trim_primary = nil
|
14
|
+
trim_complement = nil
|
15
|
+
|
16
|
+
if five_or_three == 5
|
17
|
+
tail = right_tail_of(Bio::RestrictionEnzyme::DoubleStranded.new(@ops.re5).aligned_strands_with_cuts.primary)
|
18
|
+
|
19
|
+
adapter_specificity = @adapters[:adapter5_specificity].upcase
|
20
|
+
adapter_sequence = @adapters[:adapter5_sequence].upcase if @adapters[:adapter5_sequence]
|
21
|
+
adapter_size = @adapters[:adapter5_size]
|
22
|
+
trim_primary = trim[:from_left_primary]
|
23
|
+
trim_complement = trim[:from_left_complement]
|
24
|
+
|
25
|
+
# TEMP Check for match
|
26
|
+
primary_frag =~ /(\.*)/
|
27
|
+
dots_on_primary = $1.size
|
28
|
+
lead_in = tail.size + dots_on_primary
|
29
|
+
return false if primary_frag[ lead_in .. -1 ].tr('.', '') !~ /^#{adapter_specificity}/i
|
30
|
+
|
31
|
+
elsif five_or_three == 3
|
32
|
+
tail = left_tail_of(Bio::RestrictionEnzyme::DoubleStranded.new(@ops.re3).aligned_strands_with_cuts.primary)
|
33
|
+
|
34
|
+
if @adapters[:adapter3_specificity][0].chr == '_'
|
35
|
+
adapter_specificity = @adapters[:adapter3_specificity][1..-1].reverse.upcase
|
36
|
+
else
|
37
|
+
adapter_specificity = Bio::Sequence::NA.new(@adapters[:adapter3_specificity]).forward_complement.to_s.upcase
|
38
|
+
end
|
39
|
+
adapter_sequence = Bio::Sequence::NA.new(@adapters[:adapter3_sequence]).forward_complement.to_s.upcase if @adapters[:adapter3_sequence]
|
40
|
+
adapter_size = @adapters[:adapter3_size]
|
41
|
+
trim_primary = trim[:from_right_primary]
|
42
|
+
trim_complement = trim[:from_right_complement]
|
43
|
+
primary_frag.reverse!
|
44
|
+
complement_frag.reverse!
|
45
|
+
raw_frag.reverse!
|
46
|
+
|
47
|
+
# TEMP Check for match
|
48
|
+
primary_frag =~ /(\.*)/
|
49
|
+
dots_on_primary = $1.size
|
50
|
+
lead_in = tail.size + dots_on_primary
|
51
|
+
return false if primary_frag[ lead_in .. -1 ].tr('.', '') !~ /^#{adapter_specificity}/i
|
52
|
+
|
53
|
+
else
|
54
|
+
raise "First argument to matches_adapter must be a '5' or a '3'. Received: #{five_or_three.inspect}"
|
55
|
+
end
|
56
|
+
|
57
|
+
#return false if raw_frag[ [trim_primary, trim_complement].max .. -1 ] !~ /^#{adapter_specificity}/i
|
58
|
+
|
59
|
+
#overhang = [trim_primary, trim_complement].max - [trim_primary, trim_complement].min
|
60
|
+
|
61
|
+
#lead_in = overhang
|
62
|
+
|
63
|
+
if adapter_sequence
|
64
|
+
raise 'FIXME - not functional yet'
|
65
|
+
|
66
|
+
# if lead_in >= adapter_sequence.size
|
67
|
+
# # need to preserve dots on primary string
|
68
|
+
# new_primary_frag = ('.' * (lead_in - adapter_sequence.size)) + adapter_sequence + primary_frag[ lead_in .. -1 ]
|
69
|
+
# new_complement_frag = complement_frag
|
70
|
+
# else
|
71
|
+
# # need to add dots to beginning of complement string
|
72
|
+
# new_primary_frag = adapter_sequence + primary_frag[ lead_in .. -1 ]
|
73
|
+
# new_complement_frag = ('.' * (adapter_sequence.size - lead_in) ) + complement_frag
|
74
|
+
# end
|
75
|
+
|
76
|
+
elsif adapter_size
|
77
|
+
raise 'FIXME - not functional yet'
|
78
|
+
|
79
|
+
# # only the size and the specificity of the adapter has been provided
|
80
|
+
# size_of_specificity = adapter_specificity.size
|
81
|
+
# size_of_sequence = adapter_size - size_of_specificity
|
82
|
+
# if lead_in >= size_of_sequence
|
83
|
+
# # need to preserve dots on primary string
|
84
|
+
# new_primary_frag = primary_frag[ 0 .. (lead_in - 1) ].upcase + primary_frag[ lead_in .. -1 ]
|
85
|
+
# new_complement_frag = complement_frag
|
86
|
+
# else
|
87
|
+
# # need to add dots to beginning of complement string
|
88
|
+
# new_primary_frag = ('+' * (size_of_sequence - lead_in) ) + primary_frag[ 0 .. (lead_in - 1) ].upcase + primary_frag[ lead_in .. -1 ]
|
89
|
+
# new_complement_frag = ('.' * (size_of_sequence - lead_in) ) + complement_frag
|
90
|
+
# end
|
91
|
+
|
92
|
+
else
|
93
|
+
# only the specificity has been provided
|
94
|
+
new_primary_frag = ('.' * dots_on_primary) + ('+' * tail.size) + primary_frag[ lead_in .. -1 ]
|
95
|
+
new_complement_frag = complement_frag
|
96
|
+
|
97
|
+
end
|
98
|
+
|
99
|
+
if five_or_three == 3
|
100
|
+
new_primary_frag.reverse!
|
101
|
+
new_complement_frag.reverse!
|
102
|
+
end
|
103
|
+
|
104
|
+
return [new_primary_frag, new_complement_frag]
|
105
|
+
end
|
106
|
+
|
107
|
+
|
108
|
+
# Find the fragments that match the search parameters
|
109
|
+
#
|
110
|
+
def find_matching_fragments(sizes, left, right)
|
111
|
+
hits=[]
|
112
|
+
s = (@adapters[:adapter5_size] or 0) + (@adapters[:adapter3_size] or 0)
|
113
|
+
|
114
|
+
if [@ops.size].flatten == [0] or [@ops.size].flatten == [nil] or [@ops.size].flatten == ["0"]
|
115
|
+
sizes.each do |raw_size, info|
|
116
|
+
hits << info
|
117
|
+
end
|
118
|
+
|
119
|
+
else
|
120
|
+
[@ops.size].flatten.each do |seek_size|
|
121
|
+
seek_size = seek_size.to_i
|
122
|
+
sizes.each do |raw_size, info|
|
123
|
+
frag_size = raw_size - left[:trim_from_both] - right[:trim_from_both]
|
124
|
+
if (frag_size >= seek_size - s) and (frag_size <= seek_size + s)
|
125
|
+
hits << info
|
126
|
+
end
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
return hits
|
132
|
+
end
|
133
|
+
|
134
|
+
def right_tail_of(s)
|
135
|
+
# 'PpiI' => "n n n n n n^n n n n n n n g a a c n n n n n c t c n n n n n n n n n n n n n^n"
|
136
|
+
# => 'n'
|
137
|
+
# 'BstYI' => "r^g a t c y"
|
138
|
+
# => 'gatcy'
|
139
|
+
|
140
|
+
if s =~ /.*\^(.*)/
|
141
|
+
return $1.tr(' ', '')
|
142
|
+
else
|
143
|
+
raise "Sequence #{s} has no cuts (defined by symbol '^')"
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
def left_tail_of(s)
|
148
|
+
# 'PpiI' => "n n n n n n^n n n n n n n g a a c n n n n n c t c n n n n n n n n n n n n n^n"
|
149
|
+
# => 'nnnnnn'
|
150
|
+
# 'BstYI' => "r^g a t c y"
|
151
|
+
# => 'r'
|
152
|
+
|
153
|
+
if s =~ /([^\^]*)\^/
|
154
|
+
return $1.tr(' ', '')
|
155
|
+
else
|
156
|
+
raise "Sequence #{s} has no cuts (defined by symbol '^')"
|
157
|
+
end
|
158
|
+
|
159
|
+
end
|
160
|
+
|
161
|
+
end # class SearchCommand
|
162
|
+
end # class App
|
163
|
+
end # module Genfrag
|
164
|
+
|
165
|
+
# EOF
|
@@ -0,0 +1,125 @@
|
|
1
|
+
|
2
|
+
module Genfrag
|
3
|
+
class App
|
4
|
+
|
5
|
+
class SearchCommand < Command
|
6
|
+
|
7
|
+
class ProcessFile
|
8
|
+
class << self
|
9
|
+
# Process the standardized Fasta file (tdf format)
|
10
|
+
#
|
11
|
+
def process_tdf_fasta_file(f_normalized_fasta)
|
12
|
+
sequences = {}
|
13
|
+
f_normalized_fasta[1..-1].each do |line|
|
14
|
+
line = line.chomp.split("\t")
|
15
|
+
id = line[0].to_i
|
16
|
+
sequences[id] = {:definitions => CSV.parse_line(line[1]), :sequence => line[2]}
|
17
|
+
end
|
18
|
+
return sequences
|
19
|
+
end
|
20
|
+
|
21
|
+
# Process the standardized Fasta file (sqlite3 format)
|
22
|
+
def process_db_fasta_file(db_normalized_fasta)
|
23
|
+
sequences = {}
|
24
|
+
db_normalized_fasta.execute( "select * from db_normalized_fasta" ) do |row|
|
25
|
+
id = row[0].to_i
|
26
|
+
sequences[id] = {:definitions => CSV.parse_line(row[1]), :sequence => row[2]}
|
27
|
+
end
|
28
|
+
return sequences
|
29
|
+
end
|
30
|
+
|
31
|
+
# Process the fragment frequency file (tdf format)
|
32
|
+
#
|
33
|
+
def process_tdf_freq_lookup(f_freq_lookup)
|
34
|
+
sizes = {}
|
35
|
+
f_freq_lookup[1..-1].each do |line|
|
36
|
+
line = line.chomp.split("\t")
|
37
|
+
id = line[0]
|
38
|
+
size = line[1].to_i
|
39
|
+
multiple = []
|
40
|
+
line[2].split(', ').each do |a|
|
41
|
+
pos = {}
|
42
|
+
pos[:offset], pos[:fasta_id] = a.split(' ')
|
43
|
+
pos[:offset] = pos[:offset].to_i
|
44
|
+
pos[:raw_size] = size.to_i
|
45
|
+
pos[:fasta_id] = pos[:fasta_id].to_i
|
46
|
+
multiple << pos
|
47
|
+
end
|
48
|
+
sizes[size] = multiple
|
49
|
+
end
|
50
|
+
return sizes
|
51
|
+
end
|
52
|
+
|
53
|
+
# Process the fragment frequency file (sqlite3 format)
|
54
|
+
#
|
55
|
+
def process_db_freq_lookup(db_freq_lookup)
|
56
|
+
sizes = {}
|
57
|
+
db_freq_lookup.execute( "select * from db_freq_lookup" ) do |row|
|
58
|
+
id = row[0]
|
59
|
+
size = row[1].to_i
|
60
|
+
multiple = []
|
61
|
+
row[2].split(', ').each do |a|
|
62
|
+
pos = {}
|
63
|
+
pos[:offset], pos[:fasta_id] = a.split(' ')
|
64
|
+
pos[:offset] = pos[:offset].to_i
|
65
|
+
pos[:raw_size] = size.to_i
|
66
|
+
pos[:fasta_id] = pos[:fasta_id].to_i
|
67
|
+
multiple << pos
|
68
|
+
end
|
69
|
+
sizes[size] = multiple
|
70
|
+
end
|
71
|
+
return sizes
|
72
|
+
end
|
73
|
+
|
74
|
+
# Process the adapter file (tdf format)
|
75
|
+
#
|
76
|
+
def process_tdf_adapters(f_adapters, adapter5_name=nil, adapter3_name=nil)
|
77
|
+
adapter5_sequence = nil
|
78
|
+
adapter3_sequence = nil
|
79
|
+
adapter5_specificity = nil
|
80
|
+
adapter3_specificity = nil
|
81
|
+
adapter5_needs_to_be_found = !adapter5_name.nil?
|
82
|
+
adapter3_needs_to_be_found = !adapter3_name.nil?
|
83
|
+
f_adapters[1..-1].each do |line|
|
84
|
+
break if !(adapter5_needs_to_be_found or adapter3_needs_to_be_found)
|
85
|
+
line = line.chomp.split("\t")
|
86
|
+
next if line.empty?
|
87
|
+
name = line[0]
|
88
|
+
worksense = line[1][0].chr.to_i
|
89
|
+
sequence = line[2].gsub(/\|N*$/i,'')
|
90
|
+
specificity = line[3] # what it's supposed to match
|
91
|
+
if (worksense != 3 and worksense != 5)
|
92
|
+
raise "Unknown worksense value \"#{line[1]}\". First character of column must be a '5' or a '3'."
|
93
|
+
end
|
94
|
+
|
95
|
+
if adapter5_name and (worksense == 5) and ( name =~ /#{adapter5_name}/i )
|
96
|
+
adapter5_sequence = sequence
|
97
|
+
adapter5_specificity = specificity
|
98
|
+
adapter5_needs_to_be_found = false
|
99
|
+
elsif adapter3_name and (worksense == 3) and ( name =~ /#{adapter3_name}/i )
|
100
|
+
adapter3_sequence = sequence
|
101
|
+
adapter3_specificity = specificity
|
102
|
+
adapter3_needs_to_be_found = false
|
103
|
+
end
|
104
|
+
end
|
105
|
+
if ( adapter5_name and adapter5_needs_to_be_found )
|
106
|
+
raise "named-adapter5 ('#{adapter5_name}') with the worksense '5' not found."
|
107
|
+
elsif ( adapter3_name and adapter3_needs_to_be_found )
|
108
|
+
raise "named-adapter3 ('#{adapter3_name}') with the worksense '3' not found."
|
109
|
+
end
|
110
|
+
return {
|
111
|
+
:adapter5_sequence => adapter5_sequence,
|
112
|
+
:adapter5_specificity => adapter5_specificity,
|
113
|
+
:adapter3_sequence => adapter3_sequence,
|
114
|
+
:adapter3_specificity => adapter3_specificity
|
115
|
+
}
|
116
|
+
end
|
117
|
+
|
118
|
+
end
|
119
|
+
end # class ProcessFile
|
120
|
+
|
121
|
+
end # class SearchCommand
|
122
|
+
end # class App
|
123
|
+
end # module Genfrag
|
124
|
+
|
125
|
+
# EOF
|
@@ -0,0 +1,121 @@
|
|
1
|
+
|
2
|
+
module Genfrag
|
3
|
+
class App
|
4
|
+
|
5
|
+
class SearchCommand < Command
|
6
|
+
|
7
|
+
# Keep track of extraneous nucleotides that should be removed from the final fragment
|
8
|
+
#
|
9
|
+
# Example BstYI used as RE5
|
10
|
+
# BstYI -
|
11
|
+
# 5' - r^g a t c y - 3'
|
12
|
+
# 3' - y c t a g^r - 5'
|
13
|
+
#
|
14
|
+
# re5_ds.cut_locations.primary # => [0]
|
15
|
+
# re5_ds.cut_locations.complement # => [4]
|
16
|
+
# re5_ds.aligned_strands.primary.size # => 6
|
17
|
+
#
|
18
|
+
# # number of nucleotides to trim from the left side on the primary strand
|
19
|
+
# re5_ds.cut_locations.primary.max + 1 # => 1
|
20
|
+
#
|
21
|
+
# # number of nucleotides to trim from the left side on the complement strand
|
22
|
+
# re5_ds.cut_locations.complement.max + 1 # => 5
|
23
|
+
#
|
24
|
+
#
|
25
|
+
# Example BstYI used as RE3
|
26
|
+
# BstYI -
|
27
|
+
# 5' - r^g a t c y - 3'
|
28
|
+
# 3' - y c t a g^r - 5'
|
29
|
+
#
|
30
|
+
# re3_ds.cut_locations.primary # => [0]
|
31
|
+
# re3_ds.cut_locations.complement # => [4]
|
32
|
+
# re3_ds.aligned_strands.primary.size # => 6
|
33
|
+
#
|
34
|
+
# # number of nucleotides to trim from the right side on the primary strand
|
35
|
+
# re3_ds.aligned_strands.primary.size - (re3_ds.cut_locations.primary.min + 1) # => 5
|
36
|
+
#
|
37
|
+
# # number of nucleotides to trim from the right side on the complement strand
|
38
|
+
# re3_ds.aligned_strands.primary.size - (re3_ds.cut_locations.complement.min + 1) # => 1
|
39
|
+
#
|
40
|
+
#
|
41
|
+
# Example PpiI used as RE5
|
42
|
+
# PpiI -
|
43
|
+
# 5' - n n n n n n^n n n n n n n g a a c n n n n n c t c n n n n n n n n n n n n n^n - 3'
|
44
|
+
# 3' - n^n n n n n n n n n n n n c t t g n n n n n g a g n n n n n n n n^n n n n n n - 5'
|
45
|
+
#
|
46
|
+
# re5_ds.cut_locations.primary # => [5, 37]
|
47
|
+
# re5_ds.cut_locations.complement # => [0, 32]
|
48
|
+
# re5_ds.aligned_strands.primary.size # => 39
|
49
|
+
#
|
50
|
+
# # number of nucleotides to trim from the left side on the primary strand
|
51
|
+
# re5_ds.cut_locations.primary.max + 1 # => 38
|
52
|
+
#
|
53
|
+
# # number of nucleotides to trim from the left side on the complement strand
|
54
|
+
# re5_ds.cut_locations.complement.max + 1 # => 33
|
55
|
+
#
|
56
|
+
#
|
57
|
+
# Example PpiI used as RE3
|
58
|
+
# PpiI -
|
59
|
+
# 5' - n n n n n n^n n n n n n n g a a c n n n n n c t c n n n n n n n n n n n n n^n - 3'
|
60
|
+
# 3' - n^n n n n n n n n n n n n c t t g n n n n n g a g n n n n n n n n^n n n n n n - 5'
|
61
|
+
#
|
62
|
+
# re3_ds.cut_locations.primary # => [5, 37]
|
63
|
+
# re3_ds.cut_locations.complement # => [0, 32]
|
64
|
+
# re3_ds.aligned_strands.primary.size # => 39
|
65
|
+
#
|
66
|
+
# # number of nucleotides to trim from the right side on the primary strand
|
67
|
+
# re3_ds.aligned_strands.primary.size - (re3_ds.cut_locations.primary.min + 1) # => 33
|
68
|
+
#
|
69
|
+
# # number of nucleotides to trim from the right side on the complement strand
|
70
|
+
# re3_ds.aligned_strands.primary.size - (re3_ds.cut_locations.complement.min + 1) # => 38
|
71
|
+
#
|
72
|
+
def calculate_trim_for_nucleotides(re5_ds, re3_ds)
|
73
|
+
trim = {}
|
74
|
+
trim[:from_left_primary] = re5_ds.cut_locations.primary.max + 1
|
75
|
+
trim[:from_left_complement] = re5_ds.cut_locations.complement.max + 1
|
76
|
+
trim[:from_right_primary] = re3_ds.aligned_strands.primary.size - (re3_ds.cut_locations.primary.min + 1)
|
77
|
+
trim[:from_right_complement] = re3_ds.aligned_strands.primary.size - (re3_ds.cut_locations.complement.min + 1)
|
78
|
+
return trim
|
79
|
+
end
|
80
|
+
|
81
|
+
# Calculate left and right trims
|
82
|
+
#
|
83
|
+
def calculate_left_and_right_trims(trim)
|
84
|
+
left = {}
|
85
|
+
# Should we "dot out" (nucleotide padding) from the primary strand? If no, then we assume the complement needs padding.
|
86
|
+
left[:dot_out_from_primary] = (trim[:from_left_primary] > trim[:from_left_complement])
|
87
|
+
# How much gets cut off on both primary and complement strands
|
88
|
+
left[:trim_from_both] = [trim[:from_left_primary], trim[:from_left_complement]].min
|
89
|
+
|
90
|
+
right = {}
|
91
|
+
right[:dot_out_from_primary] = (trim[:from_right_primary] > trim[:from_right_complement])
|
92
|
+
right[:trim_from_both] = [trim[:from_right_primary], trim[:from_right_complement]].min
|
93
|
+
return [left,right]
|
94
|
+
end
|
95
|
+
|
96
|
+
# Do the trimming
|
97
|
+
#
|
98
|
+
def trim_sequences(primary_frag, complement_frag, left, right, trim)
|
99
|
+
if left[:dot_out_from_primary]
|
100
|
+
primary_frag = "." * trim[:from_left_primary] + primary_frag[trim[:from_left_primary]..-1]
|
101
|
+
else
|
102
|
+
complement_frag = "." * trim[:from_left_complement] + complement_frag[trim[:from_left_complement]..-1]
|
103
|
+
end
|
104
|
+
|
105
|
+
if right[:dot_out_from_primary]
|
106
|
+
primary_frag = primary_frag[0..(-1 - trim[:from_right_primary])] + "." * trim[:from_right_primary]
|
107
|
+
else
|
108
|
+
complement_frag = complement_frag[0..(-1 - trim[:from_right_primary])] + "." * trim[:from_right_primary]
|
109
|
+
end
|
110
|
+
|
111
|
+
primary_frag = primary_frag[left[:trim_from_both]..(-1-right[:trim_from_both])]
|
112
|
+
complement_frag = complement_frag[left[:trim_from_both]..(-1-right[:trim_from_both])]
|
113
|
+
|
114
|
+
return [primary_frag, complement_frag]
|
115
|
+
end
|
116
|
+
|
117
|
+
end # class SearchCommand
|
118
|
+
end # class App
|
119
|
+
end # module Genfrag
|
120
|
+
|
121
|
+
# EOF
|
File without changes
|
@@ -0,0 +1,109 @@
|
|
1
|
+
|
2
|
+
>At1g02580 mRNA (2291 bp) UTR's and CDS
|
3
|
+
aggcgagtggttaatggagaaggaaaaccatgaggacgatggtgagggtttgccacccgaactaaatcagataaaa
|
4
|
+
gagcaaatcgaaaaggagagatttctgcatatcaagagaaaattcgagctgagatacattccaagtgtggctactc
|
5
|
+
atgcttcacaccatcaatcgtttgacttaaaccagcccgctgcagaggatgataatggaggagacaacaaatcact
|
6
|
+
tttgtcgagaatgcaaaacccacttcgtcatttcagtgcctcatctgattataattcttacgaagatcaaggttat
|
7
|
+
gttcttgatgaggatcaagattatgctcttgaagaagatgtaccattatttcttgatgaagatgtaccattattac
|
8
|
+
caagtgtcaagcttccaattgttgagaagctaccacgatccattacatgggtcttcaccaaaagtagccagctgat
|
9
|
+
ggctgaaagtgattctgtgattggtaagagacaaatctattatttgaatggtgaggcactagaattgagcagtgaa
|
10
|
+
gaagatgaggaagatgaagaagaagatgaggaagaaatcaagaaagaaaaatgcgaattttctgaagatgtagacc
|
11
|
+
gatttatatggacggttgggcaggactatggtttggatgatctggtcgtgcggcgtgctctcgccaagtacctcga
|
12
|
+
agtggatgtttcggacatattggaaagatacaatgaactcaagcttaagaatgatggaactgctggtgaggcttct
|
13
|
+
gatttgacatccaagacaataactactgctttccaggattttgctgatagacgtcattgccgtcgttgcatgatat
|
14
|
+
tcgattgtcatatgcatgagaagtatgagcccgagtctagatccagcgaagacaaatctagtttgtttgaggatga
|
15
|
+
agatagacaaccatgcagtgagcattgttacctcaaggtgaggagtgtgacagaagctgatcatgtgatggataat
|
16
|
+
gataactctatatcaaacaagattgtggtctcagatccaaacaacactatgtggacgcctgtagagaaggatcttt
|
17
|
+
acttgaaaggaattgagatatttgggagaaacagttgtgatgttgcattaaacatacttcgggggcttaagacgtg
|
18
|
+
cctagagatttacaattacatgcgcgaacaagatcaatgtactatgtcattagaccttaacaaaactacacaaaga
|
19
|
+
cacaatcaggttaccaaaaaagtatctcgaaaaagtagtaggtcggtccgcaaaaaatcgagactccgaaaatatg
|
20
|
+
ctcgttatccgcctgctttaaagaaaacaactagtggagaagctaagttttataagcactacacaccatgcacttg
|
21
|
+
caagtcaaaatgtggacagcaatgcccttgtttaactcacgaaaattgctgcgagaaatattgcgggtgctcaaag
|
22
|
+
gattgcaacaatcgctttggaggatgtaattgtgcaattggccaatgcacaaatcgacaatgtccttgttttgctg
|
23
|
+
ctaatcgtgaatgcgatcca gatctttgtcggagttgtcctcttagctgtggagatggcactcttggtgagacacc
|
24
|
+
agtgcaaatccaatgcaagaacatgcaattcctccttcaaaccaataaaaagattctcattggaaagtctgatgtt
|
25
|
+
catggatggggtgcatttacatgggactctct taaaaagaatgagtatctcggagaatatactggagaactgatca
|
26
|
+
ctcatgatgaagctaatgagcgtgggagaatagaagatcggattggttcttcctacctctttaccttgaatgatca
|
27
|
+
gctcgaaatcgatgctcgccgtaaaggaaacgagttcaaatttctcaatcactcagcaagacctaactgctacgcc
|
28
|
+
aagttgatgattgtgagaggagatcagaggattggtctatttgcggagagagcaatcgaagaaggtgaggagcttt
|
29
|
+
tcttcgactactgctatggaccagaacatgcggattggtcgcgtggtcgagaacctagaaagactggtgcttctaa
|
30
|
+
aaggtctaaggaagcccgtccagctcgttagtttttgatctgaggagaagcagcaattcaagcagtccttttttta
|
31
|
+
tgttatggtatatcaattaataatgtaatgctattttgtgttactaaaccaaaacttaagtttctgttttatttgt
|
32
|
+
tttagggtgttttgtttgtatcatatgtgtcttaactttcaaagttttctttttgtatttcaatttaaaaacaatg
|
33
|
+
tttatgttgtt
|
34
|
+
|
35
|
+
>At1g02580 mRNA (2291 bp) UTR's and CDS (duplicate)
|
36
|
+
aggcgagtggttaatggagaaggaaaaccatgaggacgatggtgagggtttgccacccgaactaaatcagataaaa
|
37
|
+
gagcaaatcgaaaaggagagatttctgcatatcaagagaaaattcgagctgagatacattccaagtgtggctactc
|
38
|
+
atgcttcacaccatcaatcgtttgacttaaaccagcccgctgcagaggatgataatggaggagacaacaaatcact
|
39
|
+
tttgtcgagaatgcaaaacccacttcgtcatttcagtgcctcatctgattataattcttacgaagatcaaggttat
|
40
|
+
gttcttgatgaggatcaagattatgctcttgaagaagatgtaccattatttcttgatgaagatgtaccattattac
|
41
|
+
caagtgtcaagcttccaattgttgagaagctaccacgatccattacatgggtcttcaccaaaagtagccagctgat
|
42
|
+
ggctgaaagtgattctgtgattggtaagagacaaatctattatttgaatggtgaggcactagaattgagcagtgaa
|
43
|
+
gaagatgaggaagatgaagaagaagatgaggaagaaatcaagaaagaaaaatgcgaattttctgaagatgtagacc
|
44
|
+
gatttatatggacggttgggcaggactatggtttggatgatctggtcgtgcggcgtgctctcgccaagtacctcga
|
45
|
+
agtggatgtttcggacatattggaaagatacaatgaactcaagcttaagaatgatggaactgctggtgaggcttct
|
46
|
+
gatttgacatccaagacaataactactgctttccaggattttgctgatagacgtcattgccgtcgttgcatgatat
|
47
|
+
tcgattgtcatatgcatgagaagtatgagcccgagtctagatccagcgaagacaaatctagtttgtttgaggatga
|
48
|
+
agatagacaaccatgcagtgagcattgttacctcaaggtgaggagtgtgacagaagctgatcatgtgatggataat
|
49
|
+
gataactctatatcaaacaagattgtggtctcagatccaaacaacactatgtggacgcctgtagagaaggatcttt
|
50
|
+
acttgaaaggaattgagatatttgggagaaacagttgtgatgttgcattaaacatacttcgggggcttaagacgtg
|
51
|
+
cctagagatttacaattacatgcgcgaacaagatcaatgtactatgtcattagaccttaacaaaactacacaaaga
|
52
|
+
cacaatcaggttaccaaaaaagtatctcgaaaaagtagtaggtcggtccgcaaaaaatcgagactccgaaaatatg
|
53
|
+
ctcgttatccgcctgctttaaagaaaacaactagtggagaagctaagttttataagcactacacaccatgcacttg
|
54
|
+
caagtcaaaatgtggacagcaatgcccttgtttaactcacgaaaattgctgcgagaaatattgcgggtgctcaaag
|
55
|
+
gattgcaacaatcgctttggaggatgtaattgtgcaattggccaatgcacaaatcgacaatgtccttgttttgctg
|
56
|
+
ctaatcgtgaatgcgatcca gatctttgtcggagttgtcctcttagctgtggagatggcactcttggtgagacacc
|
57
|
+
agtgcaaatccaatgcaagaacatgcaattcctccttcaaaccaataaaaagattctcattggaaagtctgatgtt
|
58
|
+
catggatggggtgcatttacatgggactctct taaaaagaatgagtatctcggagaatatactggagaactgatca
|
59
|
+
ctcatgatgaagctaatgagcgtgggagaatagaagatcggattggttcttcctacctctttaccttgaatgatca
|
60
|
+
gctcgaaatcgatgctcgccgtaaaggaaacgagttcaaatttctcaatcactcagcaagacctaactgctacgcc
|
61
|
+
aagttgatgattgtgagaggagatcagaggattggtctatttgcggagagagcaatcgaagaaggtgaggagcttt
|
62
|
+
tcttcgactactgctatggaccagaacatgcggattggtcgcgtggtcgagaacctagaaagactggtgcttctaa
|
63
|
+
aaggtctaaggaagcccgtccagctcgttagtttttgatctgaggagaagcagcaattcaagcagtccttttttta
|
64
|
+
tgttatggtatatcaattaataatgtaatgctattttgtgttactaaaccaaaacttaagtttctgttttatttgt
|
65
|
+
tttagggtgttttgtttgtatcatatgtgtcttaactttcaaagttttctttttgtatttcaatttaaaaacaatg
|
66
|
+
tttatgttgtt
|
67
|
+
|
68
|
+
>At1g65300: mRNA 837bp
|
69
|
+
atgaagagaaagatgaagttatcgttaatagaaaacagtgtatcgaggaaaacaacattcaccaaaaggaagaaag
|
70
|
+
ggatgacgaagaaactaaccgagctagtcactctatgtggtgttgaagcatgtgcggtcgtctatagtccgttcaa
|
71
|
+
ctcgatcccggaggcttggccgtcaagggaaggcgttgaagacgtggtgtcgaaatttatggagttgtcggtgttg
|
72
|
+
gaccggaccaagaagatggtggatcaagagacttttataagtcaaaggatcgccaaagaaaaagagcagctgcaga
|
73
|
+
agctacgtgatgagaaccataattctcagattcgggagttaatgtttggttgtctcaaaggggagacgaatgtgta
|
74
|
+
taatcttgatggaagggatcttcaagatttgagtttatatattgataagtatcttaatggtcttactcgcaggatt
|
75
|
+
ga gatcctTAttgagaacggtgagtcttcttcatctttacctcttcctattgttgcgaatgcagctgcaccagtcg
|
76
|
+
gatttgatggtcctatgtttcaatatcataatcaaaatcagcaaaagccggttcaattccaatatcaggctcttta
|
77
|
+
tgatttttatgatcagattccaaagaaaattcatggttt taatatgaatatgaataaggattcgaatcaaagtatg
|
78
|
+
gttttggatttgaatcaaaatcttaatgatggagaggacgagggcattccttgcatggacaacaacaactaccacc
|
79
|
+
ccgaaatcgattgtctcgctaccgtcaccactgcccccactgatgtttgtgctcctaacatcaccaatgatctcta
|
80
|
+
g
|
81
|
+
|
82
|
+
>At1g65300: mRNA 837bp (shortened at end)
|
83
|
+
atgaagagaaagatgaagttatcgttaatagaaaacagtgtatcgaggaaaacaacattcaccaaaaggaagaaag
|
84
|
+
ggatgacgaagaaactaaccgagctagtcactctatgtggtgttgaagcatgtgcggtcgtctatagtccgttcaa
|
85
|
+
ctcgatcccggaggcttggccgtcaagggaaggcgttgaagacgtggtgtcgaaatttatggagttgtcggtgttg
|
86
|
+
gaccggaccaagaagatggtggatcaagagacttttataagtcaaaggatcgccaaagaaaaagagcagctgcaga
|
87
|
+
agctacgtgatgagaaccataattctcagattcgggagttaatgtttggttgtctcaaaggggagacgaatgtgta
|
88
|
+
taatcttgatggaagggatcttcaagatttgagtttatatattgataagtatcttaatggtcttactcgcaggatt
|
89
|
+
gagatcctTAttgagaacggtgagtcttcttcatctttacctcttcctattgttgcgaatgcagctgcaccagtcg
|
90
|
+
gatttgatggtcctatgtttcaatatcataatcaaaatcagcaaaagccggttcaattccaatatcaggctcttta
|
91
|
+
tgatttttatgatcag
|
92
|
+
|
93
|
+
|
94
|
+
>At1g65300: mRNA 837bp (shortened from start)
|
95
|
+
ttcatctttacctcttcctattgttgcgaatgcagctgcaccagtcg
|
96
|
+
gatttgatggtcctatgtttcaatatcataatcaaaatcagcaaaagccggttcaattccaatatcaggctcttta
|
97
|
+
tgatttttatgatcagattccaaagaaaattcatggttttaatatgaatatgaataaggattcgaatcaaagtatg
|
98
|
+
gttttggatttgaatcaaaatcttaatgatggagaggacgagggcattccttgcatggacaacaacaactaccacc
|
99
|
+
ccgaaatcgattgtctcgctaccgtcaccactgcccccactgatgtttgtgctcctaacatcaccaatgatctcta
|
100
|
+
g
|
101
|
+
|
102
|
+
|
103
|
+
>At1g02580 - shortened for test - inserted cutpoint
|
104
|
+
gattgcaacaatcgctttggaggatgtaattgtgcaattggccaatgcacaaatcgacaatgtccttgttttgctg
|
105
|
+
ctaatcgtgaatgcgatcca gatctttgtcggagttgtcctcttagctgtggagatggcactcttggtgagacacc
|
106
|
+
agtgcaaatccaatgcaagaacatgcaataataaaaagattctcattggaaagtctgatgttcatggattcatggt
|
107
|
+
tttaattggggtgcatttacatgggactctct taaaaagaatgagtatctcggagaatatactggagaactgatca
|
108
|
+
ctcatgatgaagctaatgagcgtgggagaatagaagatcggattggttcttcctacctctttaccttgaatgatca
|
109
|
+
|