genfrag 0.0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (47) hide show
  1. data/.bnsignore +16 -0
  2. data/History.txt +4 -0
  3. data/LICENSE.txt +58 -0
  4. data/README.rdoc +40 -0
  5. data/Rakefile +53 -0
  6. data/bin/genfrag +8 -0
  7. data/lib/genfrag.rb +129 -0
  8. data/lib/genfrag/app.rb +105 -0
  9. data/lib/genfrag/app/command.rb +145 -0
  10. data/lib/genfrag/app/index_command.rb +227 -0
  11. data/lib/genfrag/app/index_command/db.rb +105 -0
  12. data/lib/genfrag/app/search_command.rb +298 -0
  13. data/lib/genfrag/app/search_command/match.rb +165 -0
  14. data/lib/genfrag/app/search_command/process_file.rb +125 -0
  15. data/lib/genfrag/app/search_command/trim.rb +121 -0
  16. data/lib/genfrag/debug.rb +0 -0
  17. data/spec/data/index_command/in/a.fasta +109 -0
  18. data/spec/data/index_command/out/1-a_lookup.tdf +4 -0
  19. data/spec/data/index_command/out/2-a_lookup.db +0 -0
  20. data/spec/data/index_command/out/3-a_lookup.tdf +2 -0
  21. data/spec/data/index_command/out/4-a_lookup.db +0 -0
  22. data/spec/data/index_command/out/5-a_lookup.tdf +4 -0
  23. data/spec/data/index_command/out/6-a_lookup.db +0 -0
  24. data/spec/data/index_command/out/a.fasta.db +0 -0
  25. data/spec/data/index_command/out/a.fasta.tdf +6 -0
  26. data/spec/genfrag/app/command_spec.rb +55 -0
  27. data/spec/genfrag/app/index_command_spec.rb +258 -0
  28. data/spec/genfrag/app/search_command/match_spec.rb +77 -0
  29. data/spec/genfrag/app/search_command/process_file_spec.rb +185 -0
  30. data/spec/genfrag/app/search_command/trim_spec.rb +75 -0
  31. data/spec/genfrag/app/search_command_spec.rb +260 -0
  32. data/spec/genfrag/app_spec.rb +77 -0
  33. data/spec/genfrag_spec.rb +87 -0
  34. data/spec/spec_helper.rb +56 -0
  35. data/tasks/ann.rake +80 -0
  36. data/tasks/bones.rake +20 -0
  37. data/tasks/gem.rake +201 -0
  38. data/tasks/git.rake +40 -0
  39. data/tasks/notes.rake +27 -0
  40. data/tasks/post_load.rake +34 -0
  41. data/tasks/rdoc.rake +50 -0
  42. data/tasks/rubyforge.rake +55 -0
  43. data/tasks/setup.rb +300 -0
  44. data/tasks/spec.rake +54 -0
  45. data/tasks/svn.rake +47 -0
  46. data/tasks/test.rake +40 -0
  47. metadata +136 -0
@@ -0,0 +1,165 @@
1
+
2
+ module Genfrag
3
+ class App
4
+
5
+ class SearchCommand < Command
6
+
7
+ # Does the sequence match the adapter
8
+ #
9
+ def matches_adapter(five_or_three, primary_frag, complement_frag, raw_frag, trim)
10
+ adapter_specificity = nil
11
+ adapter_sequence = nil
12
+ adapter_size = nil
13
+ trim_primary = nil
14
+ trim_complement = nil
15
+
16
+ if five_or_three == 5
17
+ tail = right_tail_of(Bio::RestrictionEnzyme::DoubleStranded.new(@ops.re5).aligned_strands_with_cuts.primary)
18
+
19
+ adapter_specificity = @adapters[:adapter5_specificity].upcase
20
+ adapter_sequence = @adapters[:adapter5_sequence].upcase if @adapters[:adapter5_sequence]
21
+ adapter_size = @adapters[:adapter5_size]
22
+ trim_primary = trim[:from_left_primary]
23
+ trim_complement = trim[:from_left_complement]
24
+
25
+ # TEMP Check for match
26
+ primary_frag =~ /(\.*)/
27
+ dots_on_primary = $1.size
28
+ lead_in = tail.size + dots_on_primary
29
+ return false if primary_frag[ lead_in .. -1 ].tr('.', '') !~ /^#{adapter_specificity}/i
30
+
31
+ elsif five_or_three == 3
32
+ tail = left_tail_of(Bio::RestrictionEnzyme::DoubleStranded.new(@ops.re3).aligned_strands_with_cuts.primary)
33
+
34
+ if @adapters[:adapter3_specificity][0].chr == '_'
35
+ adapter_specificity = @adapters[:adapter3_specificity][1..-1].reverse.upcase
36
+ else
37
+ adapter_specificity = Bio::Sequence::NA.new(@adapters[:adapter3_specificity]).forward_complement.to_s.upcase
38
+ end
39
+ adapter_sequence = Bio::Sequence::NA.new(@adapters[:adapter3_sequence]).forward_complement.to_s.upcase if @adapters[:adapter3_sequence]
40
+ adapter_size = @adapters[:adapter3_size]
41
+ trim_primary = trim[:from_right_primary]
42
+ trim_complement = trim[:from_right_complement]
43
+ primary_frag.reverse!
44
+ complement_frag.reverse!
45
+ raw_frag.reverse!
46
+
47
+ # TEMP Check for match
48
+ primary_frag =~ /(\.*)/
49
+ dots_on_primary = $1.size
50
+ lead_in = tail.size + dots_on_primary
51
+ return false if primary_frag[ lead_in .. -1 ].tr('.', '') !~ /^#{adapter_specificity}/i
52
+
53
+ else
54
+ raise "First argument to matches_adapter must be a '5' or a '3'. Received: #{five_or_three.inspect}"
55
+ end
56
+
57
+ #return false if raw_frag[ [trim_primary, trim_complement].max .. -1 ] !~ /^#{adapter_specificity}/i
58
+
59
+ #overhang = [trim_primary, trim_complement].max - [trim_primary, trim_complement].min
60
+
61
+ #lead_in = overhang
62
+
63
+ if adapter_sequence
64
+ raise 'FIXME - not functional yet'
65
+
66
+ # if lead_in >= adapter_sequence.size
67
+ # # need to preserve dots on primary string
68
+ # new_primary_frag = ('.' * (lead_in - adapter_sequence.size)) + adapter_sequence + primary_frag[ lead_in .. -1 ]
69
+ # new_complement_frag = complement_frag
70
+ # else
71
+ # # need to add dots to beginning of complement string
72
+ # new_primary_frag = adapter_sequence + primary_frag[ lead_in .. -1 ]
73
+ # new_complement_frag = ('.' * (adapter_sequence.size - lead_in) ) + complement_frag
74
+ # end
75
+
76
+ elsif adapter_size
77
+ raise 'FIXME - not functional yet'
78
+
79
+ # # only the size and the specificity of the adapter has been provided
80
+ # size_of_specificity = adapter_specificity.size
81
+ # size_of_sequence = adapter_size - size_of_specificity
82
+ # if lead_in >= size_of_sequence
83
+ # # need to preserve dots on primary string
84
+ # new_primary_frag = primary_frag[ 0 .. (lead_in - 1) ].upcase + primary_frag[ lead_in .. -1 ]
85
+ # new_complement_frag = complement_frag
86
+ # else
87
+ # # need to add dots to beginning of complement string
88
+ # new_primary_frag = ('+' * (size_of_sequence - lead_in) ) + primary_frag[ 0 .. (lead_in - 1) ].upcase + primary_frag[ lead_in .. -1 ]
89
+ # new_complement_frag = ('.' * (size_of_sequence - lead_in) ) + complement_frag
90
+ # end
91
+
92
+ else
93
+ # only the specificity has been provided
94
+ new_primary_frag = ('.' * dots_on_primary) + ('+' * tail.size) + primary_frag[ lead_in .. -1 ]
95
+ new_complement_frag = complement_frag
96
+
97
+ end
98
+
99
+ if five_or_three == 3
100
+ new_primary_frag.reverse!
101
+ new_complement_frag.reverse!
102
+ end
103
+
104
+ return [new_primary_frag, new_complement_frag]
105
+ end
106
+
107
+
108
+ # Find the fragments that match the search parameters
109
+ #
110
+ def find_matching_fragments(sizes, left, right)
111
+ hits=[]
112
+ s = (@adapters[:adapter5_size] or 0) + (@adapters[:adapter3_size] or 0)
113
+
114
+ if [@ops.size].flatten == [0] or [@ops.size].flatten == [nil] or [@ops.size].flatten == ["0"]
115
+ sizes.each do |raw_size, info|
116
+ hits << info
117
+ end
118
+
119
+ else
120
+ [@ops.size].flatten.each do |seek_size|
121
+ seek_size = seek_size.to_i
122
+ sizes.each do |raw_size, info|
123
+ frag_size = raw_size - left[:trim_from_both] - right[:trim_from_both]
124
+ if (frag_size >= seek_size - s) and (frag_size <= seek_size + s)
125
+ hits << info
126
+ end
127
+ end
128
+ end
129
+ end
130
+
131
+ return hits
132
+ end
133
+
134
+ def right_tail_of(s)
135
+ # 'PpiI' => "n n n n n n^n n n n n n n g a a c n n n n n c t c n n n n n n n n n n n n n^n"
136
+ # => 'n'
137
+ # 'BstYI' => "r^g a t c y"
138
+ # => 'gatcy'
139
+
140
+ if s =~ /.*\^(.*)/
141
+ return $1.tr(' ', '')
142
+ else
143
+ raise "Sequence #{s} has no cuts (defined by symbol '^')"
144
+ end
145
+ end
146
+
147
+ def left_tail_of(s)
148
+ # 'PpiI' => "n n n n n n^n n n n n n n g a a c n n n n n c t c n n n n n n n n n n n n n^n"
149
+ # => 'nnnnnn'
150
+ # 'BstYI' => "r^g a t c y"
151
+ # => 'r'
152
+
153
+ if s =~ /([^\^]*)\^/
154
+ return $1.tr(' ', '')
155
+ else
156
+ raise "Sequence #{s} has no cuts (defined by symbol '^')"
157
+ end
158
+
159
+ end
160
+
161
+ end # class SearchCommand
162
+ end # class App
163
+ end # module Genfrag
164
+
165
+ # EOF
@@ -0,0 +1,125 @@
1
+
2
+ module Genfrag
3
+ class App
4
+
5
+ class SearchCommand < Command
6
+
7
+ class ProcessFile
8
+ class << self
9
+ # Process the standardized Fasta file (tdf format)
10
+ #
11
+ def process_tdf_fasta_file(f_normalized_fasta)
12
+ sequences = {}
13
+ f_normalized_fasta[1..-1].each do |line|
14
+ line = line.chomp.split("\t")
15
+ id = line[0].to_i
16
+ sequences[id] = {:definitions => CSV.parse_line(line[1]), :sequence => line[2]}
17
+ end
18
+ return sequences
19
+ end
20
+
21
+ # Process the standardized Fasta file (sqlite3 format)
22
+ def process_db_fasta_file(db_normalized_fasta)
23
+ sequences = {}
24
+ db_normalized_fasta.execute( "select * from db_normalized_fasta" ) do |row|
25
+ id = row[0].to_i
26
+ sequences[id] = {:definitions => CSV.parse_line(row[1]), :sequence => row[2]}
27
+ end
28
+ return sequences
29
+ end
30
+
31
+ # Process the fragment frequency file (tdf format)
32
+ #
33
+ def process_tdf_freq_lookup(f_freq_lookup)
34
+ sizes = {}
35
+ f_freq_lookup[1..-1].each do |line|
36
+ line = line.chomp.split("\t")
37
+ id = line[0]
38
+ size = line[1].to_i
39
+ multiple = []
40
+ line[2].split(', ').each do |a|
41
+ pos = {}
42
+ pos[:offset], pos[:fasta_id] = a.split(' ')
43
+ pos[:offset] = pos[:offset].to_i
44
+ pos[:raw_size] = size.to_i
45
+ pos[:fasta_id] = pos[:fasta_id].to_i
46
+ multiple << pos
47
+ end
48
+ sizes[size] = multiple
49
+ end
50
+ return sizes
51
+ end
52
+
53
+ # Process the fragment frequency file (sqlite3 format)
54
+ #
55
+ def process_db_freq_lookup(db_freq_lookup)
56
+ sizes = {}
57
+ db_freq_lookup.execute( "select * from db_freq_lookup" ) do |row|
58
+ id = row[0]
59
+ size = row[1].to_i
60
+ multiple = []
61
+ row[2].split(', ').each do |a|
62
+ pos = {}
63
+ pos[:offset], pos[:fasta_id] = a.split(' ')
64
+ pos[:offset] = pos[:offset].to_i
65
+ pos[:raw_size] = size.to_i
66
+ pos[:fasta_id] = pos[:fasta_id].to_i
67
+ multiple << pos
68
+ end
69
+ sizes[size] = multiple
70
+ end
71
+ return sizes
72
+ end
73
+
74
+ # Process the adapter file (tdf format)
75
+ #
76
+ def process_tdf_adapters(f_adapters, adapter5_name=nil, adapter3_name=nil)
77
+ adapter5_sequence = nil
78
+ adapter3_sequence = nil
79
+ adapter5_specificity = nil
80
+ adapter3_specificity = nil
81
+ adapter5_needs_to_be_found = !adapter5_name.nil?
82
+ adapter3_needs_to_be_found = !adapter3_name.nil?
83
+ f_adapters[1..-1].each do |line|
84
+ break if !(adapter5_needs_to_be_found or adapter3_needs_to_be_found)
85
+ line = line.chomp.split("\t")
86
+ next if line.empty?
87
+ name = line[0]
88
+ worksense = line[1][0].chr.to_i
89
+ sequence = line[2].gsub(/\|N*$/i,'')
90
+ specificity = line[3] # what it's supposed to match
91
+ if (worksense != 3 and worksense != 5)
92
+ raise "Unknown worksense value \"#{line[1]}\". First character of column must be a '5' or a '3'."
93
+ end
94
+
95
+ if adapter5_name and (worksense == 5) and ( name =~ /#{adapter5_name}/i )
96
+ adapter5_sequence = sequence
97
+ adapter5_specificity = specificity
98
+ adapter5_needs_to_be_found = false
99
+ elsif adapter3_name and (worksense == 3) and ( name =~ /#{adapter3_name}/i )
100
+ adapter3_sequence = sequence
101
+ adapter3_specificity = specificity
102
+ adapter3_needs_to_be_found = false
103
+ end
104
+ end
105
+ if ( adapter5_name and adapter5_needs_to_be_found )
106
+ raise "named-adapter5 ('#{adapter5_name}') with the worksense '5' not found."
107
+ elsif ( adapter3_name and adapter3_needs_to_be_found )
108
+ raise "named-adapter3 ('#{adapter3_name}') with the worksense '3' not found."
109
+ end
110
+ return {
111
+ :adapter5_sequence => adapter5_sequence,
112
+ :adapter5_specificity => adapter5_specificity,
113
+ :adapter3_sequence => adapter3_sequence,
114
+ :adapter3_specificity => adapter3_specificity
115
+ }
116
+ end
117
+
118
+ end
119
+ end # class ProcessFile
120
+
121
+ end # class SearchCommand
122
+ end # class App
123
+ end # module Genfrag
124
+
125
+ # EOF
@@ -0,0 +1,121 @@
1
+
2
+ module Genfrag
3
+ class App
4
+
5
+ class SearchCommand < Command
6
+
7
+ # Keep track of extraneous nucleotides that should be removed from the final fragment
8
+ #
9
+ # Example BstYI used as RE5
10
+ # BstYI -
11
+ # 5' - r^g a t c y - 3'
12
+ # 3' - y c t a g^r - 5'
13
+ #
14
+ # re5_ds.cut_locations.primary # => [0]
15
+ # re5_ds.cut_locations.complement # => [4]
16
+ # re5_ds.aligned_strands.primary.size # => 6
17
+ #
18
+ # # number of nucleotides to trim from the left side on the primary strand
19
+ # re5_ds.cut_locations.primary.max + 1 # => 1
20
+ #
21
+ # # number of nucleotides to trim from the left side on the complement strand
22
+ # re5_ds.cut_locations.complement.max + 1 # => 5
23
+ #
24
+ #
25
+ # Example BstYI used as RE3
26
+ # BstYI -
27
+ # 5' - r^g a t c y - 3'
28
+ # 3' - y c t a g^r - 5'
29
+ #
30
+ # re3_ds.cut_locations.primary # => [0]
31
+ # re3_ds.cut_locations.complement # => [4]
32
+ # re3_ds.aligned_strands.primary.size # => 6
33
+ #
34
+ # # number of nucleotides to trim from the right side on the primary strand
35
+ # re3_ds.aligned_strands.primary.size - (re3_ds.cut_locations.primary.min + 1) # => 5
36
+ #
37
+ # # number of nucleotides to trim from the right side on the complement strand
38
+ # re3_ds.aligned_strands.primary.size - (re3_ds.cut_locations.complement.min + 1) # => 1
39
+ #
40
+ #
41
+ # Example PpiI used as RE5
42
+ # PpiI -
43
+ # 5' - n n n n n n^n n n n n n n g a a c n n n n n c t c n n n n n n n n n n n n n^n - 3'
44
+ # 3' - n^n n n n n n n n n n n n c t t g n n n n n g a g n n n n n n n n^n n n n n n - 5'
45
+ #
46
+ # re5_ds.cut_locations.primary # => [5, 37]
47
+ # re5_ds.cut_locations.complement # => [0, 32]
48
+ # re5_ds.aligned_strands.primary.size # => 39
49
+ #
50
+ # # number of nucleotides to trim from the left side on the primary strand
51
+ # re5_ds.cut_locations.primary.max + 1 # => 38
52
+ #
53
+ # # number of nucleotides to trim from the left side on the complement strand
54
+ # re5_ds.cut_locations.complement.max + 1 # => 33
55
+ #
56
+ #
57
+ # Example PpiI used as RE3
58
+ # PpiI -
59
+ # 5' - n n n n n n^n n n n n n n g a a c n n n n n c t c n n n n n n n n n n n n n^n - 3'
60
+ # 3' - n^n n n n n n n n n n n n c t t g n n n n n g a g n n n n n n n n^n n n n n n - 5'
61
+ #
62
+ # re3_ds.cut_locations.primary # => [5, 37]
63
+ # re3_ds.cut_locations.complement # => [0, 32]
64
+ # re3_ds.aligned_strands.primary.size # => 39
65
+ #
66
+ # # number of nucleotides to trim from the right side on the primary strand
67
+ # re3_ds.aligned_strands.primary.size - (re3_ds.cut_locations.primary.min + 1) # => 33
68
+ #
69
+ # # number of nucleotides to trim from the right side on the complement strand
70
+ # re3_ds.aligned_strands.primary.size - (re3_ds.cut_locations.complement.min + 1) # => 38
71
+ #
72
+ def calculate_trim_for_nucleotides(re5_ds, re3_ds)
73
+ trim = {}
74
+ trim[:from_left_primary] = re5_ds.cut_locations.primary.max + 1
75
+ trim[:from_left_complement] = re5_ds.cut_locations.complement.max + 1
76
+ trim[:from_right_primary] = re3_ds.aligned_strands.primary.size - (re3_ds.cut_locations.primary.min + 1)
77
+ trim[:from_right_complement] = re3_ds.aligned_strands.primary.size - (re3_ds.cut_locations.complement.min + 1)
78
+ return trim
79
+ end
80
+
81
+ # Calculate left and right trims
82
+ #
83
+ def calculate_left_and_right_trims(trim)
84
+ left = {}
85
+ # Should we "dot out" (nucleotide padding) from the primary strand? If no, then we assume the complement needs padding.
86
+ left[:dot_out_from_primary] = (trim[:from_left_primary] > trim[:from_left_complement])
87
+ # How much gets cut off on both primary and complement strands
88
+ left[:trim_from_both] = [trim[:from_left_primary], trim[:from_left_complement]].min
89
+
90
+ right = {}
91
+ right[:dot_out_from_primary] = (trim[:from_right_primary] > trim[:from_right_complement])
92
+ right[:trim_from_both] = [trim[:from_right_primary], trim[:from_right_complement]].min
93
+ return [left,right]
94
+ end
95
+
96
+ # Do the trimming
97
+ #
98
+ def trim_sequences(primary_frag, complement_frag, left, right, trim)
99
+ if left[:dot_out_from_primary]
100
+ primary_frag = "." * trim[:from_left_primary] + primary_frag[trim[:from_left_primary]..-1]
101
+ else
102
+ complement_frag = "." * trim[:from_left_complement] + complement_frag[trim[:from_left_complement]..-1]
103
+ end
104
+
105
+ if right[:dot_out_from_primary]
106
+ primary_frag = primary_frag[0..(-1 - trim[:from_right_primary])] + "." * trim[:from_right_primary]
107
+ else
108
+ complement_frag = complement_frag[0..(-1 - trim[:from_right_primary])] + "." * trim[:from_right_primary]
109
+ end
110
+
111
+ primary_frag = primary_frag[left[:trim_from_both]..(-1-right[:trim_from_both])]
112
+ complement_frag = complement_frag[left[:trim_from_both]..(-1-right[:trim_from_both])]
113
+
114
+ return [primary_frag, complement_frag]
115
+ end
116
+
117
+ end # class SearchCommand
118
+ end # class App
119
+ end # module Genfrag
120
+
121
+ # EOF
File without changes
@@ -0,0 +1,109 @@
1
+
2
+ >At1g02580 mRNA (2291 bp) UTR's and CDS
3
+ aggcgagtggttaatggagaaggaaaaccatgaggacgatggtgagggtttgccacccgaactaaatcagataaaa
4
+ gagcaaatcgaaaaggagagatttctgcatatcaagagaaaattcgagctgagatacattccaagtgtggctactc
5
+ atgcttcacaccatcaatcgtttgacttaaaccagcccgctgcagaggatgataatggaggagacaacaaatcact
6
+ tttgtcgagaatgcaaaacccacttcgtcatttcagtgcctcatctgattataattcttacgaagatcaaggttat
7
+ gttcttgatgaggatcaagattatgctcttgaagaagatgtaccattatttcttgatgaagatgtaccattattac
8
+ caagtgtcaagcttccaattgttgagaagctaccacgatccattacatgggtcttcaccaaaagtagccagctgat
9
+ ggctgaaagtgattctgtgattggtaagagacaaatctattatttgaatggtgaggcactagaattgagcagtgaa
10
+ gaagatgaggaagatgaagaagaagatgaggaagaaatcaagaaagaaaaatgcgaattttctgaagatgtagacc
11
+ gatttatatggacggttgggcaggactatggtttggatgatctggtcgtgcggcgtgctctcgccaagtacctcga
12
+ agtggatgtttcggacatattggaaagatacaatgaactcaagcttaagaatgatggaactgctggtgaggcttct
13
+ gatttgacatccaagacaataactactgctttccaggattttgctgatagacgtcattgccgtcgttgcatgatat
14
+ tcgattgtcatatgcatgagaagtatgagcccgagtctagatccagcgaagacaaatctagtttgtttgaggatga
15
+ agatagacaaccatgcagtgagcattgttacctcaaggtgaggagtgtgacagaagctgatcatgtgatggataat
16
+ gataactctatatcaaacaagattgtggtctcagatccaaacaacactatgtggacgcctgtagagaaggatcttt
17
+ acttgaaaggaattgagatatttgggagaaacagttgtgatgttgcattaaacatacttcgggggcttaagacgtg
18
+ cctagagatttacaattacatgcgcgaacaagatcaatgtactatgtcattagaccttaacaaaactacacaaaga
19
+ cacaatcaggttaccaaaaaagtatctcgaaaaagtagtaggtcggtccgcaaaaaatcgagactccgaaaatatg
20
+ ctcgttatccgcctgctttaaagaaaacaactagtggagaagctaagttttataagcactacacaccatgcacttg
21
+ caagtcaaaatgtggacagcaatgcccttgtttaactcacgaaaattgctgcgagaaatattgcgggtgctcaaag
22
+ gattgcaacaatcgctttggaggatgtaattgtgcaattggccaatgcacaaatcgacaatgtccttgttttgctg
23
+ ctaatcgtgaatgcgatcca gatctttgtcggagttgtcctcttagctgtggagatggcactcttggtgagacacc
24
+ agtgcaaatccaatgcaagaacatgcaattcctccttcaaaccaataaaaagattctcattggaaagtctgatgtt
25
+ catggatggggtgcatttacatgggactctct taaaaagaatgagtatctcggagaatatactggagaactgatca
26
+ ctcatgatgaagctaatgagcgtgggagaatagaagatcggattggttcttcctacctctttaccttgaatgatca
27
+ gctcgaaatcgatgctcgccgtaaaggaaacgagttcaaatttctcaatcactcagcaagacctaactgctacgcc
28
+ aagttgatgattgtgagaggagatcagaggattggtctatttgcggagagagcaatcgaagaaggtgaggagcttt
29
+ tcttcgactactgctatggaccagaacatgcggattggtcgcgtggtcgagaacctagaaagactggtgcttctaa
30
+ aaggtctaaggaagcccgtccagctcgttagtttttgatctgaggagaagcagcaattcaagcagtccttttttta
31
+ tgttatggtatatcaattaataatgtaatgctattttgtgttactaaaccaaaacttaagtttctgttttatttgt
32
+ tttagggtgttttgtttgtatcatatgtgtcttaactttcaaagttttctttttgtatttcaatttaaaaacaatg
33
+ tttatgttgtt
34
+
35
+ >At1g02580 mRNA (2291 bp) UTR's and CDS (duplicate)
36
+ aggcgagtggttaatggagaaggaaaaccatgaggacgatggtgagggtttgccacccgaactaaatcagataaaa
37
+ gagcaaatcgaaaaggagagatttctgcatatcaagagaaaattcgagctgagatacattccaagtgtggctactc
38
+ atgcttcacaccatcaatcgtttgacttaaaccagcccgctgcagaggatgataatggaggagacaacaaatcact
39
+ tttgtcgagaatgcaaaacccacttcgtcatttcagtgcctcatctgattataattcttacgaagatcaaggttat
40
+ gttcttgatgaggatcaagattatgctcttgaagaagatgtaccattatttcttgatgaagatgtaccattattac
41
+ caagtgtcaagcttccaattgttgagaagctaccacgatccattacatgggtcttcaccaaaagtagccagctgat
42
+ ggctgaaagtgattctgtgattggtaagagacaaatctattatttgaatggtgaggcactagaattgagcagtgaa
43
+ gaagatgaggaagatgaagaagaagatgaggaagaaatcaagaaagaaaaatgcgaattttctgaagatgtagacc
44
+ gatttatatggacggttgggcaggactatggtttggatgatctggtcgtgcggcgtgctctcgccaagtacctcga
45
+ agtggatgtttcggacatattggaaagatacaatgaactcaagcttaagaatgatggaactgctggtgaggcttct
46
+ gatttgacatccaagacaataactactgctttccaggattttgctgatagacgtcattgccgtcgttgcatgatat
47
+ tcgattgtcatatgcatgagaagtatgagcccgagtctagatccagcgaagacaaatctagtttgtttgaggatga
48
+ agatagacaaccatgcagtgagcattgttacctcaaggtgaggagtgtgacagaagctgatcatgtgatggataat
49
+ gataactctatatcaaacaagattgtggtctcagatccaaacaacactatgtggacgcctgtagagaaggatcttt
50
+ acttgaaaggaattgagatatttgggagaaacagttgtgatgttgcattaaacatacttcgggggcttaagacgtg
51
+ cctagagatttacaattacatgcgcgaacaagatcaatgtactatgtcattagaccttaacaaaactacacaaaga
52
+ cacaatcaggttaccaaaaaagtatctcgaaaaagtagtaggtcggtccgcaaaaaatcgagactccgaaaatatg
53
+ ctcgttatccgcctgctttaaagaaaacaactagtggagaagctaagttttataagcactacacaccatgcacttg
54
+ caagtcaaaatgtggacagcaatgcccttgtttaactcacgaaaattgctgcgagaaatattgcgggtgctcaaag
55
+ gattgcaacaatcgctttggaggatgtaattgtgcaattggccaatgcacaaatcgacaatgtccttgttttgctg
56
+ ctaatcgtgaatgcgatcca gatctttgtcggagttgtcctcttagctgtggagatggcactcttggtgagacacc
57
+ agtgcaaatccaatgcaagaacatgcaattcctccttcaaaccaataaaaagattctcattggaaagtctgatgtt
58
+ catggatggggtgcatttacatgggactctct taaaaagaatgagtatctcggagaatatactggagaactgatca
59
+ ctcatgatgaagctaatgagcgtgggagaatagaagatcggattggttcttcctacctctttaccttgaatgatca
60
+ gctcgaaatcgatgctcgccgtaaaggaaacgagttcaaatttctcaatcactcagcaagacctaactgctacgcc
61
+ aagttgatgattgtgagaggagatcagaggattggtctatttgcggagagagcaatcgaagaaggtgaggagcttt
62
+ tcttcgactactgctatggaccagaacatgcggattggtcgcgtggtcgagaacctagaaagactggtgcttctaa
63
+ aaggtctaaggaagcccgtccagctcgttagtttttgatctgaggagaagcagcaattcaagcagtccttttttta
64
+ tgttatggtatatcaattaataatgtaatgctattttgtgttactaaaccaaaacttaagtttctgttttatttgt
65
+ tttagggtgttttgtttgtatcatatgtgtcttaactttcaaagttttctttttgtatttcaatttaaaaacaatg
66
+ tttatgttgtt
67
+
68
+ >At1g65300: mRNA 837bp
69
+ atgaagagaaagatgaagttatcgttaatagaaaacagtgtatcgaggaaaacaacattcaccaaaaggaagaaag
70
+ ggatgacgaagaaactaaccgagctagtcactctatgtggtgttgaagcatgtgcggtcgtctatagtccgttcaa
71
+ ctcgatcccggaggcttggccgtcaagggaaggcgttgaagacgtggtgtcgaaatttatggagttgtcggtgttg
72
+ gaccggaccaagaagatggtggatcaagagacttttataagtcaaaggatcgccaaagaaaaagagcagctgcaga
73
+ agctacgtgatgagaaccataattctcagattcgggagttaatgtttggttgtctcaaaggggagacgaatgtgta
74
+ taatcttgatggaagggatcttcaagatttgagtttatatattgataagtatcttaatggtcttactcgcaggatt
75
+ ga gatcctTAttgagaacggtgagtcttcttcatctttacctcttcctattgttgcgaatgcagctgcaccagtcg
76
+ gatttgatggtcctatgtttcaatatcataatcaaaatcagcaaaagccggttcaattccaatatcaggctcttta
77
+ tgatttttatgatcagattccaaagaaaattcatggttt taatatgaatatgaataaggattcgaatcaaagtatg
78
+ gttttggatttgaatcaaaatcttaatgatggagaggacgagggcattccttgcatggacaacaacaactaccacc
79
+ ccgaaatcgattgtctcgctaccgtcaccactgcccccactgatgtttgtgctcctaacatcaccaatgatctcta
80
+ g
81
+
82
+ >At1g65300: mRNA 837bp (shortened at end)
83
+ atgaagagaaagatgaagttatcgttaatagaaaacagtgtatcgaggaaaacaacattcaccaaaaggaagaaag
84
+ ggatgacgaagaaactaaccgagctagtcactctatgtggtgttgaagcatgtgcggtcgtctatagtccgttcaa
85
+ ctcgatcccggaggcttggccgtcaagggaaggcgttgaagacgtggtgtcgaaatttatggagttgtcggtgttg
86
+ gaccggaccaagaagatggtggatcaagagacttttataagtcaaaggatcgccaaagaaaaagagcagctgcaga
87
+ agctacgtgatgagaaccataattctcagattcgggagttaatgtttggttgtctcaaaggggagacgaatgtgta
88
+ taatcttgatggaagggatcttcaagatttgagtttatatattgataagtatcttaatggtcttactcgcaggatt
89
+ gagatcctTAttgagaacggtgagtcttcttcatctttacctcttcctattgttgcgaatgcagctgcaccagtcg
90
+ gatttgatggtcctatgtttcaatatcataatcaaaatcagcaaaagccggttcaattccaatatcaggctcttta
91
+ tgatttttatgatcag
92
+
93
+
94
+ >At1g65300: mRNA 837bp (shortened from start)
95
+ ttcatctttacctcttcctattgttgcgaatgcagctgcaccagtcg
96
+ gatttgatggtcctatgtttcaatatcataatcaaaatcagcaaaagccggttcaattccaatatcaggctcttta
97
+ tgatttttatgatcagattccaaagaaaattcatggttttaatatgaatatgaataaggattcgaatcaaagtatg
98
+ gttttggatttgaatcaaaatcttaatgatggagaggacgagggcattccttgcatggacaacaacaactaccacc
99
+ ccgaaatcgattgtctcgctaccgtcaccactgcccccactgatgtttgtgctcctaacatcaccaatgatctcta
100
+ g
101
+
102
+
103
+ >At1g02580 - shortened for test - inserted cutpoint
104
+ gattgcaacaatcgctttggaggatgtaattgtgcaattggccaatgcacaaatcgacaatgtccttgttttgctg
105
+ ctaatcgtgaatgcgatcca gatctttgtcggagttgtcctcttagctgtggagatggcactcttggtgagacacc
106
+ agtgcaaatccaatgcaagaacatgcaataataaaaagattctcattggaaagtctgatgttcatggattcatggt
107
+ tttaattggggtgcatttacatgggactctct taaaaagaatgagtatctcggagaatatactggagaactgatca
108
+ ctcatgatgaagctaatgagcgtgggagaatagaagatcggattggttcttcctacctctttaccttgaatgatca
109
+