genfrag 0.0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. data/.bnsignore +16 -0
  2. data/History.txt +4 -0
  3. data/LICENSE.txt +58 -0
  4. data/README.rdoc +40 -0
  5. data/Rakefile +53 -0
  6. data/bin/genfrag +8 -0
  7. data/lib/genfrag.rb +129 -0
  8. data/lib/genfrag/app.rb +105 -0
  9. data/lib/genfrag/app/command.rb +145 -0
  10. data/lib/genfrag/app/index_command.rb +227 -0
  11. data/lib/genfrag/app/index_command/db.rb +105 -0
  12. data/lib/genfrag/app/search_command.rb +298 -0
  13. data/lib/genfrag/app/search_command/match.rb +165 -0
  14. data/lib/genfrag/app/search_command/process_file.rb +125 -0
  15. data/lib/genfrag/app/search_command/trim.rb +121 -0
  16. data/lib/genfrag/debug.rb +0 -0
  17. data/spec/data/index_command/in/a.fasta +109 -0
  18. data/spec/data/index_command/out/1-a_lookup.tdf +4 -0
  19. data/spec/data/index_command/out/2-a_lookup.db +0 -0
  20. data/spec/data/index_command/out/3-a_lookup.tdf +2 -0
  21. data/spec/data/index_command/out/4-a_lookup.db +0 -0
  22. data/spec/data/index_command/out/5-a_lookup.tdf +4 -0
  23. data/spec/data/index_command/out/6-a_lookup.db +0 -0
  24. data/spec/data/index_command/out/a.fasta.db +0 -0
  25. data/spec/data/index_command/out/a.fasta.tdf +6 -0
  26. data/spec/genfrag/app/command_spec.rb +55 -0
  27. data/spec/genfrag/app/index_command_spec.rb +258 -0
  28. data/spec/genfrag/app/search_command/match_spec.rb +77 -0
  29. data/spec/genfrag/app/search_command/process_file_spec.rb +185 -0
  30. data/spec/genfrag/app/search_command/trim_spec.rb +75 -0
  31. data/spec/genfrag/app/search_command_spec.rb +260 -0
  32. data/spec/genfrag/app_spec.rb +77 -0
  33. data/spec/genfrag_spec.rb +87 -0
  34. data/spec/spec_helper.rb +56 -0
  35. data/tasks/ann.rake +80 -0
  36. data/tasks/bones.rake +20 -0
  37. data/tasks/gem.rake +201 -0
  38. data/tasks/git.rake +40 -0
  39. data/tasks/notes.rake +27 -0
  40. data/tasks/post_load.rake +34 -0
  41. data/tasks/rdoc.rake +50 -0
  42. data/tasks/rubyforge.rake +55 -0
  43. data/tasks/setup.rb +300 -0
  44. data/tasks/spec.rake +54 -0
  45. data/tasks/svn.rake +47 -0
  46. data/tasks/test.rake +40 -0
  47. metadata +136 -0
@@ -0,0 +1,165 @@
1
+
2
+ module Genfrag
3
+ class App
4
+
5
+ class SearchCommand < Command
6
+
7
+ # Does the sequence match the adapter
8
+ #
9
+ def matches_adapter(five_or_three, primary_frag, complement_frag, raw_frag, trim)
10
+ adapter_specificity = nil
11
+ adapter_sequence = nil
12
+ adapter_size = nil
13
+ trim_primary = nil
14
+ trim_complement = nil
15
+
16
+ if five_or_three == 5
17
+ tail = right_tail_of(Bio::RestrictionEnzyme::DoubleStranded.new(@ops.re5).aligned_strands_with_cuts.primary)
18
+
19
+ adapter_specificity = @adapters[:adapter5_specificity].upcase
20
+ adapter_sequence = @adapters[:adapter5_sequence].upcase if @adapters[:adapter5_sequence]
21
+ adapter_size = @adapters[:adapter5_size]
22
+ trim_primary = trim[:from_left_primary]
23
+ trim_complement = trim[:from_left_complement]
24
+
25
+ # TEMP Check for match
26
+ primary_frag =~ /(\.*)/
27
+ dots_on_primary = $1.size
28
+ lead_in = tail.size + dots_on_primary
29
+ return false if primary_frag[ lead_in .. -1 ].tr('.', '') !~ /^#{adapter_specificity}/i
30
+
31
+ elsif five_or_three == 3
32
+ tail = left_tail_of(Bio::RestrictionEnzyme::DoubleStranded.new(@ops.re3).aligned_strands_with_cuts.primary)
33
+
34
+ if @adapters[:adapter3_specificity][0].chr == '_'
35
+ adapter_specificity = @adapters[:adapter3_specificity][1..-1].reverse.upcase
36
+ else
37
+ adapter_specificity = Bio::Sequence::NA.new(@adapters[:adapter3_specificity]).forward_complement.to_s.upcase
38
+ end
39
+ adapter_sequence = Bio::Sequence::NA.new(@adapters[:adapter3_sequence]).forward_complement.to_s.upcase if @adapters[:adapter3_sequence]
40
+ adapter_size = @adapters[:adapter3_size]
41
+ trim_primary = trim[:from_right_primary]
42
+ trim_complement = trim[:from_right_complement]
43
+ primary_frag.reverse!
44
+ complement_frag.reverse!
45
+ raw_frag.reverse!
46
+
47
+ # TEMP Check for match
48
+ primary_frag =~ /(\.*)/
49
+ dots_on_primary = $1.size
50
+ lead_in = tail.size + dots_on_primary
51
+ return false if primary_frag[ lead_in .. -1 ].tr('.', '') !~ /^#{adapter_specificity}/i
52
+
53
+ else
54
+ raise "First argument to matches_adapter must be a '5' or a '3'. Received: #{five_or_three.inspect}"
55
+ end
56
+
57
+ #return false if raw_frag[ [trim_primary, trim_complement].max .. -1 ] !~ /^#{adapter_specificity}/i
58
+
59
+ #overhang = [trim_primary, trim_complement].max - [trim_primary, trim_complement].min
60
+
61
+ #lead_in = overhang
62
+
63
+ if adapter_sequence
64
+ raise 'FIXME - not functional yet'
65
+
66
+ # if lead_in >= adapter_sequence.size
67
+ # # need to preserve dots on primary string
68
+ # new_primary_frag = ('.' * (lead_in - adapter_sequence.size)) + adapter_sequence + primary_frag[ lead_in .. -1 ]
69
+ # new_complement_frag = complement_frag
70
+ # else
71
+ # # need to add dots to beginning of complement string
72
+ # new_primary_frag = adapter_sequence + primary_frag[ lead_in .. -1 ]
73
+ # new_complement_frag = ('.' * (adapter_sequence.size - lead_in) ) + complement_frag
74
+ # end
75
+
76
+ elsif adapter_size
77
+ raise 'FIXME - not functional yet'
78
+
79
+ # # only the size and the specificity of the adapter has been provided
80
+ # size_of_specificity = adapter_specificity.size
81
+ # size_of_sequence = adapter_size - size_of_specificity
82
+ # if lead_in >= size_of_sequence
83
+ # # need to preserve dots on primary string
84
+ # new_primary_frag = primary_frag[ 0 .. (lead_in - 1) ].upcase + primary_frag[ lead_in .. -1 ]
85
+ # new_complement_frag = complement_frag
86
+ # else
87
+ # # need to add dots to beginning of complement string
88
+ # new_primary_frag = ('+' * (size_of_sequence - lead_in) ) + primary_frag[ 0 .. (lead_in - 1) ].upcase + primary_frag[ lead_in .. -1 ]
89
+ # new_complement_frag = ('.' * (size_of_sequence - lead_in) ) + complement_frag
90
+ # end
91
+
92
+ else
93
+ # only the specificity has been provided
94
+ new_primary_frag = ('.' * dots_on_primary) + ('+' * tail.size) + primary_frag[ lead_in .. -1 ]
95
+ new_complement_frag = complement_frag
96
+
97
+ end
98
+
99
+ if five_or_three == 3
100
+ new_primary_frag.reverse!
101
+ new_complement_frag.reverse!
102
+ end
103
+
104
+ return [new_primary_frag, new_complement_frag]
105
+ end
106
+
107
+
108
+ # Find the fragments that match the search parameters
109
+ #
110
+ def find_matching_fragments(sizes, left, right)
111
+ hits=[]
112
+ s = (@adapters[:adapter5_size] or 0) + (@adapters[:adapter3_size] or 0)
113
+
114
+ if [@ops.size].flatten == [0] or [@ops.size].flatten == [nil] or [@ops.size].flatten == ["0"]
115
+ sizes.each do |raw_size, info|
116
+ hits << info
117
+ end
118
+
119
+ else
120
+ [@ops.size].flatten.each do |seek_size|
121
+ seek_size = seek_size.to_i
122
+ sizes.each do |raw_size, info|
123
+ frag_size = raw_size - left[:trim_from_both] - right[:trim_from_both]
124
+ if (frag_size >= seek_size - s) and (frag_size <= seek_size + s)
125
+ hits << info
126
+ end
127
+ end
128
+ end
129
+ end
130
+
131
+ return hits
132
+ end
133
+
134
+ def right_tail_of(s)
135
+ # 'PpiI' => "n n n n n n^n n n n n n n g a a c n n n n n c t c n n n n n n n n n n n n n^n"
136
+ # => 'n'
137
+ # 'BstYI' => "r^g a t c y"
138
+ # => 'gatcy'
139
+
140
+ if s =~ /.*\^(.*)/
141
+ return $1.tr(' ', '')
142
+ else
143
+ raise "Sequence #{s} has no cuts (defined by symbol '^')"
144
+ end
145
+ end
146
+
147
+ def left_tail_of(s)
148
+ # 'PpiI' => "n n n n n n^n n n n n n n g a a c n n n n n c t c n n n n n n n n n n n n n^n"
149
+ # => 'nnnnnn'
150
+ # 'BstYI' => "r^g a t c y"
151
+ # => 'r'
152
+
153
+ if s =~ /([^\^]*)\^/
154
+ return $1.tr(' ', '')
155
+ else
156
+ raise "Sequence #{s} has no cuts (defined by symbol '^')"
157
+ end
158
+
159
+ end
160
+
161
+ end # class SearchCommand
162
+ end # class App
163
+ end # module Genfrag
164
+
165
+ # EOF
@@ -0,0 +1,125 @@
1
+
2
+ module Genfrag
3
+ class App
4
+
5
+ class SearchCommand < Command
6
+
7
+ class ProcessFile
8
+ class << self
9
+ # Process the standardized Fasta file (tdf format)
10
+ #
11
+ def process_tdf_fasta_file(f_normalized_fasta)
12
+ sequences = {}
13
+ f_normalized_fasta[1..-1].each do |line|
14
+ line = line.chomp.split("\t")
15
+ id = line[0].to_i
16
+ sequences[id] = {:definitions => CSV.parse_line(line[1]), :sequence => line[2]}
17
+ end
18
+ return sequences
19
+ end
20
+
21
+ # Process the standardized Fasta file (sqlite3 format)
22
+ def process_db_fasta_file(db_normalized_fasta)
23
+ sequences = {}
24
+ db_normalized_fasta.execute( "select * from db_normalized_fasta" ) do |row|
25
+ id = row[0].to_i
26
+ sequences[id] = {:definitions => CSV.parse_line(row[1]), :sequence => row[2]}
27
+ end
28
+ return sequences
29
+ end
30
+
31
+ # Process the fragment frequency file (tdf format)
32
+ #
33
+ def process_tdf_freq_lookup(f_freq_lookup)
34
+ sizes = {}
35
+ f_freq_lookup[1..-1].each do |line|
36
+ line = line.chomp.split("\t")
37
+ id = line[0]
38
+ size = line[1].to_i
39
+ multiple = []
40
+ line[2].split(', ').each do |a|
41
+ pos = {}
42
+ pos[:offset], pos[:fasta_id] = a.split(' ')
43
+ pos[:offset] = pos[:offset].to_i
44
+ pos[:raw_size] = size.to_i
45
+ pos[:fasta_id] = pos[:fasta_id].to_i
46
+ multiple << pos
47
+ end
48
+ sizes[size] = multiple
49
+ end
50
+ return sizes
51
+ end
52
+
53
+ # Process the fragment frequency file (sqlite3 format)
54
+ #
55
+ def process_db_freq_lookup(db_freq_lookup)
56
+ sizes = {}
57
+ db_freq_lookup.execute( "select * from db_freq_lookup" ) do |row|
58
+ id = row[0]
59
+ size = row[1].to_i
60
+ multiple = []
61
+ row[2].split(', ').each do |a|
62
+ pos = {}
63
+ pos[:offset], pos[:fasta_id] = a.split(' ')
64
+ pos[:offset] = pos[:offset].to_i
65
+ pos[:raw_size] = size.to_i
66
+ pos[:fasta_id] = pos[:fasta_id].to_i
67
+ multiple << pos
68
+ end
69
+ sizes[size] = multiple
70
+ end
71
+ return sizes
72
+ end
73
+
74
+ # Process the adapter file (tdf format)
75
+ #
76
+ def process_tdf_adapters(f_adapters, adapter5_name=nil, adapter3_name=nil)
77
+ adapter5_sequence = nil
78
+ adapter3_sequence = nil
79
+ adapter5_specificity = nil
80
+ adapter3_specificity = nil
81
+ adapter5_needs_to_be_found = !adapter5_name.nil?
82
+ adapter3_needs_to_be_found = !adapter3_name.nil?
83
+ f_adapters[1..-1].each do |line|
84
+ break if !(adapter5_needs_to_be_found or adapter3_needs_to_be_found)
85
+ line = line.chomp.split("\t")
86
+ next if line.empty?
87
+ name = line[0]
88
+ worksense = line[1][0].chr.to_i
89
+ sequence = line[2].gsub(/\|N*$/i,'')
90
+ specificity = line[3] # what it's supposed to match
91
+ if (worksense != 3 and worksense != 5)
92
+ raise "Unknown worksense value \"#{line[1]}\". First character of column must be a '5' or a '3'."
93
+ end
94
+
95
+ if adapter5_name and (worksense == 5) and ( name =~ /#{adapter5_name}/i )
96
+ adapter5_sequence = sequence
97
+ adapter5_specificity = specificity
98
+ adapter5_needs_to_be_found = false
99
+ elsif adapter3_name and (worksense == 3) and ( name =~ /#{adapter3_name}/i )
100
+ adapter3_sequence = sequence
101
+ adapter3_specificity = specificity
102
+ adapter3_needs_to_be_found = false
103
+ end
104
+ end
105
+ if ( adapter5_name and adapter5_needs_to_be_found )
106
+ raise "named-adapter5 ('#{adapter5_name}') with the worksense '5' not found."
107
+ elsif ( adapter3_name and adapter3_needs_to_be_found )
108
+ raise "named-adapter3 ('#{adapter3_name}') with the worksense '3' not found."
109
+ end
110
+ return {
111
+ :adapter5_sequence => adapter5_sequence,
112
+ :adapter5_specificity => adapter5_specificity,
113
+ :adapter3_sequence => adapter3_sequence,
114
+ :adapter3_specificity => adapter3_specificity
115
+ }
116
+ end
117
+
118
+ end
119
+ end # class ProcessFile
120
+
121
+ end # class SearchCommand
122
+ end # class App
123
+ end # module Genfrag
124
+
125
+ # EOF
@@ -0,0 +1,121 @@
1
+
2
+ module Genfrag
3
+ class App
4
+
5
+ class SearchCommand < Command
6
+
7
+ # Keep track of extraneous nucleotides that should be removed from the final fragment
8
+ #
9
+ # Example BstYI used as RE5
10
+ # BstYI -
11
+ # 5' - r^g a t c y - 3'
12
+ # 3' - y c t a g^r - 5'
13
+ #
14
+ # re5_ds.cut_locations.primary # => [0]
15
+ # re5_ds.cut_locations.complement # => [4]
16
+ # re5_ds.aligned_strands.primary.size # => 6
17
+ #
18
+ # # number of nucleotides to trim from the left side on the primary strand
19
+ # re5_ds.cut_locations.primary.max + 1 # => 1
20
+ #
21
+ # # number of nucleotides to trim from the left side on the complement strand
22
+ # re5_ds.cut_locations.complement.max + 1 # => 5
23
+ #
24
+ #
25
+ # Example BstYI used as RE3
26
+ # BstYI -
27
+ # 5' - r^g a t c y - 3'
28
+ # 3' - y c t a g^r - 5'
29
+ #
30
+ # re3_ds.cut_locations.primary # => [0]
31
+ # re3_ds.cut_locations.complement # => [4]
32
+ # re3_ds.aligned_strands.primary.size # => 6
33
+ #
34
+ # # number of nucleotides to trim from the right side on the primary strand
35
+ # re3_ds.aligned_strands.primary.size - (re3_ds.cut_locations.primary.min + 1) # => 5
36
+ #
37
+ # # number of nucleotides to trim from the right side on the complement strand
38
+ # re3_ds.aligned_strands.primary.size - (re3_ds.cut_locations.complement.min + 1) # => 1
39
+ #
40
+ #
41
+ # Example PpiI used as RE5
42
+ # PpiI -
43
+ # 5' - n n n n n n^n n n n n n n g a a c n n n n n c t c n n n n n n n n n n n n n^n - 3'
44
+ # 3' - n^n n n n n n n n n n n n c t t g n n n n n g a g n n n n n n n n^n n n n n n - 5'
45
+ #
46
+ # re5_ds.cut_locations.primary # => [5, 37]
47
+ # re5_ds.cut_locations.complement # => [0, 32]
48
+ # re5_ds.aligned_strands.primary.size # => 39
49
+ #
50
+ # # number of nucleotides to trim from the left side on the primary strand
51
+ # re5_ds.cut_locations.primary.max + 1 # => 38
52
+ #
53
+ # # number of nucleotides to trim from the left side on the complement strand
54
+ # re5_ds.cut_locations.complement.max + 1 # => 33
55
+ #
56
+ #
57
+ # Example PpiI used as RE3
58
+ # PpiI -
59
+ # 5' - n n n n n n^n n n n n n n g a a c n n n n n c t c n n n n n n n n n n n n n^n - 3'
60
+ # 3' - n^n n n n n n n n n n n n c t t g n n n n n g a g n n n n n n n n^n n n n n n - 5'
61
+ #
62
+ # re3_ds.cut_locations.primary # => [5, 37]
63
+ # re3_ds.cut_locations.complement # => [0, 32]
64
+ # re3_ds.aligned_strands.primary.size # => 39
65
+ #
66
+ # # number of nucleotides to trim from the right side on the primary strand
67
+ # re3_ds.aligned_strands.primary.size - (re3_ds.cut_locations.primary.min + 1) # => 33
68
+ #
69
+ # # number of nucleotides to trim from the right side on the complement strand
70
+ # re3_ds.aligned_strands.primary.size - (re3_ds.cut_locations.complement.min + 1) # => 38
71
+ #
72
+ def calculate_trim_for_nucleotides(re5_ds, re3_ds)
73
+ trim = {}
74
+ trim[:from_left_primary] = re5_ds.cut_locations.primary.max + 1
75
+ trim[:from_left_complement] = re5_ds.cut_locations.complement.max + 1
76
+ trim[:from_right_primary] = re3_ds.aligned_strands.primary.size - (re3_ds.cut_locations.primary.min + 1)
77
+ trim[:from_right_complement] = re3_ds.aligned_strands.primary.size - (re3_ds.cut_locations.complement.min + 1)
78
+ return trim
79
+ end
80
+
81
+ # Calculate left and right trims
82
+ #
83
+ def calculate_left_and_right_trims(trim)
84
+ left = {}
85
+ # Should we "dot out" (nucleotide padding) from the primary strand? If no, then we assume the complement needs padding.
86
+ left[:dot_out_from_primary] = (trim[:from_left_primary] > trim[:from_left_complement])
87
+ # How much gets cut off on both primary and complement strands
88
+ left[:trim_from_both] = [trim[:from_left_primary], trim[:from_left_complement]].min
89
+
90
+ right = {}
91
+ right[:dot_out_from_primary] = (trim[:from_right_primary] > trim[:from_right_complement])
92
+ right[:trim_from_both] = [trim[:from_right_primary], trim[:from_right_complement]].min
93
+ return [left,right]
94
+ end
95
+
96
+ # Do the trimming
97
+ #
98
+ def trim_sequences(primary_frag, complement_frag, left, right, trim)
99
+ if left[:dot_out_from_primary]
100
+ primary_frag = "." * trim[:from_left_primary] + primary_frag[trim[:from_left_primary]..-1]
101
+ else
102
+ complement_frag = "." * trim[:from_left_complement] + complement_frag[trim[:from_left_complement]..-1]
103
+ end
104
+
105
+ if right[:dot_out_from_primary]
106
+ primary_frag = primary_frag[0..(-1 - trim[:from_right_primary])] + "." * trim[:from_right_primary]
107
+ else
108
+ complement_frag = complement_frag[0..(-1 - trim[:from_right_primary])] + "." * trim[:from_right_primary]
109
+ end
110
+
111
+ primary_frag = primary_frag[left[:trim_from_both]..(-1-right[:trim_from_both])]
112
+ complement_frag = complement_frag[left[:trim_from_both]..(-1-right[:trim_from_both])]
113
+
114
+ return [primary_frag, complement_frag]
115
+ end
116
+
117
+ end # class SearchCommand
118
+ end # class App
119
+ end # module Genfrag
120
+
121
+ # EOF
File without changes
@@ -0,0 +1,109 @@
1
+
2
+ >At1g02580 mRNA (2291 bp) UTR's and CDS
3
+ aggcgagtggttaatggagaaggaaaaccatgaggacgatggtgagggtttgccacccgaactaaatcagataaaa
4
+ gagcaaatcgaaaaggagagatttctgcatatcaagagaaaattcgagctgagatacattccaagtgtggctactc
5
+ atgcttcacaccatcaatcgtttgacttaaaccagcccgctgcagaggatgataatggaggagacaacaaatcact
6
+ tttgtcgagaatgcaaaacccacttcgtcatttcagtgcctcatctgattataattcttacgaagatcaaggttat
7
+ gttcttgatgaggatcaagattatgctcttgaagaagatgtaccattatttcttgatgaagatgtaccattattac
8
+ caagtgtcaagcttccaattgttgagaagctaccacgatccattacatgggtcttcaccaaaagtagccagctgat
9
+ ggctgaaagtgattctgtgattggtaagagacaaatctattatttgaatggtgaggcactagaattgagcagtgaa
10
+ gaagatgaggaagatgaagaagaagatgaggaagaaatcaagaaagaaaaatgcgaattttctgaagatgtagacc
11
+ gatttatatggacggttgggcaggactatggtttggatgatctggtcgtgcggcgtgctctcgccaagtacctcga
12
+ agtggatgtttcggacatattggaaagatacaatgaactcaagcttaagaatgatggaactgctggtgaggcttct
13
+ gatttgacatccaagacaataactactgctttccaggattttgctgatagacgtcattgccgtcgttgcatgatat
14
+ tcgattgtcatatgcatgagaagtatgagcccgagtctagatccagcgaagacaaatctagtttgtttgaggatga
15
+ agatagacaaccatgcagtgagcattgttacctcaaggtgaggagtgtgacagaagctgatcatgtgatggataat
16
+ gataactctatatcaaacaagattgtggtctcagatccaaacaacactatgtggacgcctgtagagaaggatcttt
17
+ acttgaaaggaattgagatatttgggagaaacagttgtgatgttgcattaaacatacttcgggggcttaagacgtg
18
+ cctagagatttacaattacatgcgcgaacaagatcaatgtactatgtcattagaccttaacaaaactacacaaaga
19
+ cacaatcaggttaccaaaaaagtatctcgaaaaagtagtaggtcggtccgcaaaaaatcgagactccgaaaatatg
20
+ ctcgttatccgcctgctttaaagaaaacaactagtggagaagctaagttttataagcactacacaccatgcacttg
21
+ caagtcaaaatgtggacagcaatgcccttgtttaactcacgaaaattgctgcgagaaatattgcgggtgctcaaag
22
+ gattgcaacaatcgctttggaggatgtaattgtgcaattggccaatgcacaaatcgacaatgtccttgttttgctg
23
+ ctaatcgtgaatgcgatcca gatctttgtcggagttgtcctcttagctgtggagatggcactcttggtgagacacc
24
+ agtgcaaatccaatgcaagaacatgcaattcctccttcaaaccaataaaaagattctcattggaaagtctgatgtt
25
+ catggatggggtgcatttacatgggactctct taaaaagaatgagtatctcggagaatatactggagaactgatca
26
+ ctcatgatgaagctaatgagcgtgggagaatagaagatcggattggttcttcctacctctttaccttgaatgatca
27
+ gctcgaaatcgatgctcgccgtaaaggaaacgagttcaaatttctcaatcactcagcaagacctaactgctacgcc
28
+ aagttgatgattgtgagaggagatcagaggattggtctatttgcggagagagcaatcgaagaaggtgaggagcttt
29
+ tcttcgactactgctatggaccagaacatgcggattggtcgcgtggtcgagaacctagaaagactggtgcttctaa
30
+ aaggtctaaggaagcccgtccagctcgttagtttttgatctgaggagaagcagcaattcaagcagtccttttttta
31
+ tgttatggtatatcaattaataatgtaatgctattttgtgttactaaaccaaaacttaagtttctgttttatttgt
32
+ tttagggtgttttgtttgtatcatatgtgtcttaactttcaaagttttctttttgtatttcaatttaaaaacaatg
33
+ tttatgttgtt
34
+
35
+ >At1g02580 mRNA (2291 bp) UTR's and CDS (duplicate)
36
+ aggcgagtggttaatggagaaggaaaaccatgaggacgatggtgagggtttgccacccgaactaaatcagataaaa
37
+ gagcaaatcgaaaaggagagatttctgcatatcaagagaaaattcgagctgagatacattccaagtgtggctactc
38
+ atgcttcacaccatcaatcgtttgacttaaaccagcccgctgcagaggatgataatggaggagacaacaaatcact
39
+ tttgtcgagaatgcaaaacccacttcgtcatttcagtgcctcatctgattataattcttacgaagatcaaggttat
40
+ gttcttgatgaggatcaagattatgctcttgaagaagatgtaccattatttcttgatgaagatgtaccattattac
41
+ caagtgtcaagcttccaattgttgagaagctaccacgatccattacatgggtcttcaccaaaagtagccagctgat
42
+ ggctgaaagtgattctgtgattggtaagagacaaatctattatttgaatggtgaggcactagaattgagcagtgaa
43
+ gaagatgaggaagatgaagaagaagatgaggaagaaatcaagaaagaaaaatgcgaattttctgaagatgtagacc
44
+ gatttatatggacggttgggcaggactatggtttggatgatctggtcgtgcggcgtgctctcgccaagtacctcga
45
+ agtggatgtttcggacatattggaaagatacaatgaactcaagcttaagaatgatggaactgctggtgaggcttct
46
+ gatttgacatccaagacaataactactgctttccaggattttgctgatagacgtcattgccgtcgttgcatgatat
47
+ tcgattgtcatatgcatgagaagtatgagcccgagtctagatccagcgaagacaaatctagtttgtttgaggatga
48
+ agatagacaaccatgcagtgagcattgttacctcaaggtgaggagtgtgacagaagctgatcatgtgatggataat
49
+ gataactctatatcaaacaagattgtggtctcagatccaaacaacactatgtggacgcctgtagagaaggatcttt
50
+ acttgaaaggaattgagatatttgggagaaacagttgtgatgttgcattaaacatacttcgggggcttaagacgtg
51
+ cctagagatttacaattacatgcgcgaacaagatcaatgtactatgtcattagaccttaacaaaactacacaaaga
52
+ cacaatcaggttaccaaaaaagtatctcgaaaaagtagtaggtcggtccgcaaaaaatcgagactccgaaaatatg
53
+ ctcgttatccgcctgctttaaagaaaacaactagtggagaagctaagttttataagcactacacaccatgcacttg
54
+ caagtcaaaatgtggacagcaatgcccttgtttaactcacgaaaattgctgcgagaaatattgcgggtgctcaaag
55
+ gattgcaacaatcgctttggaggatgtaattgtgcaattggccaatgcacaaatcgacaatgtccttgttttgctg
56
+ ctaatcgtgaatgcgatcca gatctttgtcggagttgtcctcttagctgtggagatggcactcttggtgagacacc
57
+ agtgcaaatccaatgcaagaacatgcaattcctccttcaaaccaataaaaagattctcattggaaagtctgatgtt
58
+ catggatggggtgcatttacatgggactctct taaaaagaatgagtatctcggagaatatactggagaactgatca
59
+ ctcatgatgaagctaatgagcgtgggagaatagaagatcggattggttcttcctacctctttaccttgaatgatca
60
+ gctcgaaatcgatgctcgccgtaaaggaaacgagttcaaatttctcaatcactcagcaagacctaactgctacgcc
61
+ aagttgatgattgtgagaggagatcagaggattggtctatttgcggagagagcaatcgaagaaggtgaggagcttt
62
+ tcttcgactactgctatggaccagaacatgcggattggtcgcgtggtcgagaacctagaaagactggtgcttctaa
63
+ aaggtctaaggaagcccgtccagctcgttagtttttgatctgaggagaagcagcaattcaagcagtccttttttta
64
+ tgttatggtatatcaattaataatgtaatgctattttgtgttactaaaccaaaacttaagtttctgttttatttgt
65
+ tttagggtgttttgtttgtatcatatgtgtcttaactttcaaagttttctttttgtatttcaatttaaaaacaatg
66
+ tttatgttgtt
67
+
68
+ >At1g65300: mRNA 837bp
69
+ atgaagagaaagatgaagttatcgttaatagaaaacagtgtatcgaggaaaacaacattcaccaaaaggaagaaag
70
+ ggatgacgaagaaactaaccgagctagtcactctatgtggtgttgaagcatgtgcggtcgtctatagtccgttcaa
71
+ ctcgatcccggaggcttggccgtcaagggaaggcgttgaagacgtggtgtcgaaatttatggagttgtcggtgttg
72
+ gaccggaccaagaagatggtggatcaagagacttttataagtcaaaggatcgccaaagaaaaagagcagctgcaga
73
+ agctacgtgatgagaaccataattctcagattcgggagttaatgtttggttgtctcaaaggggagacgaatgtgta
74
+ taatcttgatggaagggatcttcaagatttgagtttatatattgataagtatcttaatggtcttactcgcaggatt
75
+ ga gatcctTAttgagaacggtgagtcttcttcatctttacctcttcctattgttgcgaatgcagctgcaccagtcg
76
+ gatttgatggtcctatgtttcaatatcataatcaaaatcagcaaaagccggttcaattccaatatcaggctcttta
77
+ tgatttttatgatcagattccaaagaaaattcatggttt taatatgaatatgaataaggattcgaatcaaagtatg
78
+ gttttggatttgaatcaaaatcttaatgatggagaggacgagggcattccttgcatggacaacaacaactaccacc
79
+ ccgaaatcgattgtctcgctaccgtcaccactgcccccactgatgtttgtgctcctaacatcaccaatgatctcta
80
+ g
81
+
82
+ >At1g65300: mRNA 837bp (shortened at end)
83
+ atgaagagaaagatgaagttatcgttaatagaaaacagtgtatcgaggaaaacaacattcaccaaaaggaagaaag
84
+ ggatgacgaagaaactaaccgagctagtcactctatgtggtgttgaagcatgtgcggtcgtctatagtccgttcaa
85
+ ctcgatcccggaggcttggccgtcaagggaaggcgttgaagacgtggtgtcgaaatttatggagttgtcggtgttg
86
+ gaccggaccaagaagatggtggatcaagagacttttataagtcaaaggatcgccaaagaaaaagagcagctgcaga
87
+ agctacgtgatgagaaccataattctcagattcgggagttaatgtttggttgtctcaaaggggagacgaatgtgta
88
+ taatcttgatggaagggatcttcaagatttgagtttatatattgataagtatcttaatggtcttactcgcaggatt
89
+ gagatcctTAttgagaacggtgagtcttcttcatctttacctcttcctattgttgcgaatgcagctgcaccagtcg
90
+ gatttgatggtcctatgtttcaatatcataatcaaaatcagcaaaagccggttcaattccaatatcaggctcttta
91
+ tgatttttatgatcag
92
+
93
+
94
+ >At1g65300: mRNA 837bp (shortened from start)
95
+ ttcatctttacctcttcctattgttgcgaatgcagctgcaccagtcg
96
+ gatttgatggtcctatgtttcaatatcataatcaaaatcagcaaaagccggttcaattccaatatcaggctcttta
97
+ tgatttttatgatcagattccaaagaaaattcatggttttaatatgaatatgaataaggattcgaatcaaagtatg
98
+ gttttggatttgaatcaaaatcttaatgatggagaggacgagggcattccttgcatggacaacaacaactaccacc
99
+ ccgaaatcgattgtctcgctaccgtcaccactgcccccactgatgtttgtgctcctaacatcaccaatgatctcta
100
+ g
101
+
102
+
103
+ >At1g02580 - shortened for test - inserted cutpoint
104
+ gattgcaacaatcgctttggaggatgtaattgtgcaattggccaatgcacaaatcgacaatgtccttgttttgctg
105
+ ctaatcgtgaatgcgatcca gatctttgtcggagttgtcctcttagctgtggagatggcactcttggtgagacacc
106
+ agtgcaaatccaatgcaagaacatgcaataataaaaagattctcattggaaagtctgatgttcatggattcatggt
107
+ tttaattggggtgcatttacatgggactctct taaaaagaatgagtatctcggagaatatactggagaactgatca
108
+ ctcatgatgaagctaatgagcgtgggagaatagaagatcggattggttcttcctacctctttaccttgaatgatca
109
+