protk 1.2.6.pre5 → 1.3.0.pre1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +84 -45
  3. data/bin/add_retention_times.rb +9 -5
  4. data/bin/augustus_to_proteindb.rb +7 -11
  5. data/bin/interprophet.rb +28 -46
  6. data/bin/make_decoy.rb +16 -48
  7. data/bin/mascot_search.rb +57 -71
  8. data/bin/mascot_to_pepxml.rb +13 -26
  9. data/bin/msgfplus_search.rb +70 -107
  10. data/bin/omssa_search.rb +52 -109
  11. data/bin/peptide_prophet.rb +44 -119
  12. data/bin/pepxml_to_table.rb +24 -27
  13. data/bin/protein_prophet.rb +22 -82
  14. data/bin/protxml_to_gff.rb +22 -519
  15. data/bin/protxml_to_table.rb +2 -16
  16. data/bin/sixframe.rb +10 -32
  17. data/bin/tandem_search.rb +30 -403
  18. data/bin/tandem_to_pepxml.rb +43 -0
  19. data/bin/unimod_to_loc.rb +1 -1
  20. data/ext/{protk/decoymaker → decoymaker}/decoymaker.c +74 -21
  21. data/ext/decoymaker/extconf.rb +3 -0
  22. data/lib/protk/constants.rb +16 -2
  23. data/lib/protk/data/default_config.yml +2 -1
  24. data/lib/protk/data/tandem_gpm_defaults.xml +175 -0
  25. data/lib/protk/data/tandem_isb_kscore_defaults.xml +123 -0
  26. data/lib/protk/data/tandem_isb_native_defaults.xml +123 -0
  27. data/lib/protk/data/tandem_params.xml +17 -54
  28. data/lib/protk/fastadb.rb +2 -2
  29. data/lib/protk/prophet_tool.rb +1 -1
  30. data/lib/protk/protxml_to_gff_tool.rb +474 -0
  31. data/lib/protk/search_tool.rb +58 -103
  32. data/lib/protk/setup_rakefile.rake +9 -5
  33. data/lib/protk/tandem_search_tool.rb +256 -0
  34. data/lib/protk/tool.rb +85 -104
  35. data/lib/protk.rb +1 -6
  36. metadata +24 -103
  37. data/bin/annotate_ids.rb +0 -59
  38. data/bin/asapratio.rb +0 -27
  39. data/bin/blastxml_to_table.rb +0 -119
  40. data/bin/correct_omssa_retention_times.rb +0 -27
  41. data/bin/feature_finder.rb +0 -95
  42. data/bin/file_convert.rb +0 -164
  43. data/bin/generate_omssa_loc.rb +0 -42
  44. data/bin/gffmerge.rb +0 -208
  45. data/bin/libra.rb +0 -70
  46. data/bin/toppas_pipeline.rb +0 -84
  47. data/bin/uniprot_annotation.rb +0 -141
  48. data/bin/xls_to_table.rb +0 -52
  49. data/bin/xpress.rb +0 -27
  50. data/ext/protk/decoymaker/extconf.rb +0 -3
  51. data/ext/protk/simplealign/extconf.rb +0 -3
  52. data/lib/protk/biotools_excel_converter.rb +0 -60
  53. data/lib/protk/eupathdb_gene_information_table.rb +0 -158
  54. data/lib/protk/gapped_aligner.rb +0 -264
  55. data/lib/protk/protein_annotator.rb +0 -646
  56. data/lib/protk/spreadsheet_extensions.rb +0 -79
  57. data/lib/protk/xtandem_defaults.rb +0 -11
@@ -0,0 +1,474 @@
1
+ require 'protk/tool'
2
+
3
+ class ProtXMLToGFFTool < Tool
4
+
5
+ def initialize
6
+
7
+ super([:explicit_output])
8
+
9
+ @option_parser.banner = "Create a gff containing peptide Observations.\n\nUsage: protxml_to_gff.rb "
10
+
11
+ add_value_option(:database,nil,['-d filename','--database filename','Database used for ms/ms searches (Fasta Format)'])
12
+ add_value_option(:genome,nil,['-g filename','--genome filename', 'Nucleotide sequences for scaffolds (Fasta Format)'])
13
+ add_value_option(:protein_find,nil,['-f term','--find term', 'Restrict output to proteins whose name matches the specified string'])
14
+ add_value_option(:nterm_minlen,7,['-n len','--nterm-min-len len', 'Only include inferred N-terminal sequences if longer than len'])
15
+ add_boolean_option(:skip_fasta_indexing,false,['--skip-index','Don\'t index database (Index should already exist)'])
16
+ add_boolean_option(:stack_charge_states,false,['--stack-charge-states','Different peptide charge states get separate gff entries'])
17
+ add_boolean_option(:collapse_redundant_proteins,false,['--collapse-redundant-proteins','Proteins that cover genomic regions already covered will be skipped'])
18
+ add_value_option(:peptide_probability_threshold,0.95,['--threshold prob','Peptide Probability Threshold (Default 0.95)'])
19
+ add_value_option(:protein_probability_threshold,0.99,['--prot-threshold prob','Protein Probability Threshold (Default 0.99)'])
20
+
21
+ end
22
+
23
+
24
+ def protein_names(protein_node)
25
+ indis_proteins = protein_node.find('protxml:indistinguishable_protein','protxml:http://regis-web.systemsbiology.net/protXML')
26
+ prot_names = [protein_node['protein_name']]
27
+ for protein in indis_proteins
28
+ prot_names += [protein['protein_name']]
29
+ end
30
+ prot_names
31
+ end
32
+
33
+ def peptide_nodes(protein_node)
34
+ return protein_node.find('protxml:peptide','protxml:http://regis-web.systemsbiology.net/protXML')
35
+ end
36
+
37
+
38
+ def get_fasta_record(protein_name,fastadb)
39
+ # puts "Looking up #{protein_name}"
40
+ entry = fastadb.get_by_id protein_name
41
+ if ( entry == nil)
42
+ puts "Failed lookup for #{protein_name}"
43
+ raise KeyError
44
+ end
45
+ entry
46
+ end
47
+
48
+ class CDSInfo
49
+ attr_accessor :fasta_id
50
+ attr_accessor :strand
51
+ attr_accessor :frame
52
+ attr_accessor :name
53
+ attr_accessor :scaffold
54
+ attr_accessor :start
55
+ attr_accessor :end
56
+ attr_accessor :coding_sequences
57
+ attr_accessor :is_sixframe
58
+ attr_accessor :gene_id
59
+
60
+ def overlap(candidate_entry)
61
+ return false if candidate_entry.scaffold!=self.scaffold
62
+ return false if strand!=self.strand
63
+ return false if candidate_entry.start >= self.end
64
+ return false if self.start <= candidate_entry.end
65
+ return true
66
+ end
67
+
68
+ end
69
+
70
+ def cds_info_from_fasta(fasta_entry)
71
+ info=CDSInfo.new
72
+ info.fasta_id=fasta_entry
73
+ positions = fasta_entry.identifiers.description.split(' ').collect { |coords| coords.split('|').collect {|pos| pos.to_i} }
74
+ info.coding_sequences=[]
75
+ info.gene_id
76
+ if ( positions.length < 1 )
77
+ raise EncodingError
78
+ elsif ( positions.length > 1)
79
+ info.coding_sequences = positions[1..-1]
80
+ end
81
+
82
+ info.start = positions[0][0]
83
+ info.end = positions[0][1]
84
+
85
+ info.scaffold=fasta_entry.entry_id.scan(/(scaffold_?\d+)_/)[0][0]
86
+ info.name = fasta_entry.entry_id.scan(/lcl\|(.*)/)[0][0]
87
+
88
+ if fasta_entry.entry_id =~ /frame/
89
+ info.frame=info.name.scan(/frame_(\d)/)[0][0]
90
+ info.strand = (info.frame.to_i > 3) ? '-' : '+'
91
+ info.is_sixframe = true
92
+ else
93
+ info.strand = (info.name =~ /rev/) ? '-' : '+'
94
+ info.gene_id=info.name.scan(/_\w{3}_(.*)\.t/)[0][0]
95
+ info.is_sixframe = false
96
+ end
97
+ info
98
+ end
99
+
100
+
101
+ def is_new_genome_location(candidate_entry,existing_entries)
102
+ # puts existing_entries
103
+ # require 'debugger';debugger
104
+
105
+ # genes=existing_entries.collect { |e| e.gene_id }.compact
106
+
107
+ # if genes.include?(candidate_entry.gene_id)
108
+ # return false
109
+ # end
110
+
111
+ existing_entries.each do |existing|
112
+ return false if existing.gene_id==candidate_entry.gene_id
113
+ return false if existing.overlap(candidate_entry)
114
+ end
115
+
116
+ return true
117
+ end
118
+
119
+
120
+ def generate_protein_gff(protein_name,entry_info,prot_prob,prot_id)
121
+ prot_qualifiers = {"source" => "MSMS", "score" => prot_prob, "ID" => prot_id}
122
+ prot_attributes = [["ID",prot_id],["Name",entry_info.name]]
123
+ prot_gff_line = Bio::GFF::GFF3::Record.new(seqid = entry_info.scaffold,source="MSMS",feature_type="protein",
124
+ start_position=entry_info.start,end_position=entry_info.end,score=prot_prob,strand=entry_info.strand,frame=nil,attributes=prot_attributes)
125
+ prot_gff_line
126
+ end
127
+
128
+ def get_dna_sequence(protein_info,genomedb)
129
+
130
+ scaffold_sequence = get_fasta_record(protein_info.scaffold,genomedb)
131
+ gene_sequence = scaffold_sequence.naseq.to_s[(protein_info.start-1)..protein_info.end]
132
+
133
+ if ( protein_info.strand == "-")
134
+ gene_sequence = Bio::Sequence::NA.new(gene_sequence).reverse_complement
135
+ end
136
+
137
+ gene_sequence
138
+ end
139
+
140
+ def peptide_is_in_sixframe(pep_seq,gene_seq)
141
+ gs=Bio::Sequence::NA.new(gene_seq)
142
+ (1..6).each do |frame|
143
+ if gs.translate(frame).index(pep_seq)
144
+ return true
145
+ end
146
+ end
147
+ return false
148
+ end
149
+
150
+ def fragment_coords_from_protein_coords(pepstart,pepend,gene_start,gene_end,coding_sequences)
151
+
152
+ sorted_cds = coding_sequences.sort { |a, b| a[0] <=> b[0] }
153
+
154
+
155
+ # Assume positive strand
156
+ pi_start=pepstart*3+gene_start-1
157
+ pi_end=pepend*3+gene_start-1
158
+
159
+ fragments=[]
160
+ p_i = pi_start #Initially we are looking for the first fragment
161
+ finding_start=true
162
+
163
+ sorted_cds.each_with_index do |cds_coords, i|
164
+ cds_start=cds_coords[0]
165
+ cds_end = cds_coords[1]
166
+ if cds_end < p_i # Exon is before index in sequence and doesn't contain p_i
167
+ if sorted_cds.length <= i+1
168
+ require 'debugger';debugger
169
+ end
170
+
171
+ next_coords = sorted_cds[i+1]
172
+ intron_offset = ((next_coords[0]-cds_end)-1)
173
+ p_i+=intron_offset
174
+ pi_end+=intron_offset
175
+ if !finding_start
176
+ # This is a middle exon
177
+ fragments << [cds_start,cds_end]
178
+ end
179
+ else
180
+ if finding_start
181
+
182
+ if ( pi_end <= cds_end) #Whole peptide contained in a single exon
183
+ fragments << [p_i+1,pi_end]
184
+ break;
185
+ end
186
+
187
+
188
+ fragments << [p_i+1,(cds_end)]
189
+ next_coords = sorted_cds[i+1]
190
+ intron_offset = ((next_coords[0]-cds_end)-1)
191
+ p_i+=intron_offset
192
+ pi_end+=intron_offset
193
+ p_i = pi_end
194
+ finding_start=false
195
+ else # A terminal exon
196
+ # require 'debugger';debugger
197
+ fragments << [(cds_start),(p_i)]
198
+ break;
199
+ end
200
+ end
201
+ end
202
+ [fragments]
203
+ end
204
+
205
+ # gene_seq should already have been reverse_complemented if on reverse strand
206
+ def get_peptide_coordinates_from_transcript_info(prot_seq,pep_seq,protein_info,gene_seq)
207
+ # if ( peptide_is_in_sixframe(pep_seq,gene_seq))
208
+ # Peptide is in 6-frame but on a predicted transcript
209
+ # return nil
210
+ # else
211
+
212
+ # puts "Found a gap #{protein_info.fasta_id}"
213
+ if protein_info.strand=='-'
214
+ pep_index = prot_seq.reverse.index(pep_seq.reverse)
215
+ if pep_index==nil
216
+ # require 'debugger';debugger
217
+ puts "Warning: Unable to find peptide #{pep_seq} in this protein! #{protein_info}"
218
+ return nil
219
+ end
220
+ pep_start_i = prot_seq.reverse.index(pep_seq.reverse)+1
221
+ # Plus 1 because on reverse stand stop-codon will be at the beginning of the sequence (when read forwards). Need to eliminate it.
222
+ else
223
+ pep_start_i = prot_seq.index(pep_seq)
224
+ if pep_start_i==nil
225
+ # require 'debugger';debugger
226
+ puts "Warning: Unable to find peptide #{pep_seq} in this protein! #{protein_info}"
227
+ return nil
228
+ end
229
+ end
230
+ pep_end_i = pep_start_i+pep_seq.length
231
+
232
+ return fragment_coords_from_protein_coords(pep_start_i,pep_end_i,protein_info.start,protein_info.end,protein_info.coding_sequences)
233
+ # end
234
+ end
235
+
236
+ def get_peptide_coordinates_sixframe(prot_seq,pep_seq,protein_info)
237
+
238
+ if ( protein_info.strand == '-' )
239
+ prot_seq = prot_seq.reverse
240
+ pep_seq = pep_seq.reverse
241
+ end
242
+
243
+ start_indexes = [0]
244
+
245
+ prot_seq.scan /#{pep_seq}/ do |match|
246
+ start_indexes << prot_seq.index(match,start_indexes.last)
247
+ end
248
+ start_indexes.delete_at(0)
249
+
250
+ start_indexes.collect do |si|
251
+ pep_genomic_start = protein_info.start + 3*si
252
+ pep_genomic_end = pep_genomic_start + 3*pep_seq.length - 1
253
+ [[pep_genomic_start,pep_genomic_end]]
254
+ end
255
+
256
+ end
257
+
258
+ # Returns a 4-mer [genomic_start,fragment1_end(or0),frag2_start(or0),genomic_end]
259
+ def get_peptide_coordinates(prot_seq,pep_seq,protein_info,gene_seq)
260
+ if ( protein_info.is_sixframe)
261
+ return get_peptide_coordinates_sixframe(prot_seq,pep_seq,protein_info)
262
+ else
263
+ return get_peptide_coordinates_from_transcript_info(prot_seq,pep_seq,protein_info,gene_seq)
264
+ end
265
+ end
266
+
267
+
268
+ def generate_fragment_gffs_for_coords(coords,protein_info,pep_id,peptide_seq,genomedb,name="fragment")
269
+ scaff = get_fasta_record(protein_info.scaffold,genomedb)
270
+ scaffold_seq = Bio::Sequence::NA.new(scaff.seq)
271
+
272
+ fragment_phase = 0
273
+ ordered_coords= protein_info.strand=='+' ? coords : coords.reverse
274
+ if name=="CDS"
275
+ frag_id="#{pep_id}.fg"
276
+ else
277
+ frag_id="#{pep_id}.sp"
278
+ end
279
+ gff_lines = ordered_coords.collect do |frag_start,frag_end|
280
+ frag_naseq = scaffold_seq[frag_start-1..frag_end-1]
281
+
282
+ begin
283
+ frag_frame = fragment_phase+1
284
+ frag_seq = nil
285
+ if ( protein_info.strand=='-')
286
+ frag_seq = frag_naseq.reverse_complement.translate(frag_frame)
287
+ else
288
+ frag_seq = frag_naseq.translate(frag_frame)
289
+ end
290
+ rescue
291
+ if frag_naseq.length > 1
292
+ puts "Unable to translate #{frag_naseq}"
293
+ # require 'debugger'
294
+ end
295
+ frag_seq="*"
296
+ end
297
+
298
+ fragment_record=Bio::GFF::GFF3::Record.new(seqid = protein_info.scaffold,source="MSMS",
299
+ feature_type=name,start_position=frag_start,end_position=frag_end,score='',
300
+ strand=protein_info.strand,frame=fragment_phase,attributes=[["Parent",pep_id],["ID",frag_id],["Name",frag_seq]])
301
+
302
+
303
+ remainder=(frag_naseq.length-fragment_phase) % 3
304
+ fragment_phase=(3-remainder) % 3
305
+
306
+ fragment_record
307
+ end
308
+
309
+
310
+ concat_seq=nil
311
+
312
+ coords.each do |frag_start,frag_end|
313
+ frag_naseq = scaffold_seq[frag_start-1..frag_end-1]
314
+ concat_seq += frag_naseq unless concat_seq == nil
315
+ concat_seq = frag_naseq if concat_seq==nil
316
+ end
317
+
318
+ check_seq = protein_info.strand=='-' ? concat_seq.reverse_complement.translate : concat_seq.translate
319
+ if ( check_seq != peptide_seq)
320
+ require 'debugger';debugger
321
+ puts "Fragment seqs not equal to peptide seqs"
322
+ end
323
+
324
+ return gff_lines
325
+
326
+ end
327
+
328
+ def get_start_codon_coords_for_peptide(peptide_genomic_start,peptide_genomic_end,peptide_seq,protein_seq,strand)
329
+ pi=protein_seq.index(peptide_seq)
330
+ if ( protein_seq[pi]=='M' )
331
+ is_tryptic=false
332
+ if ( pi>0 && (protein_seq[pi-1]!='K' && protein_seq[pi-1]!='R') )
333
+ is_tryptic=true
334
+ elsif (pi==0)
335
+ is_tryptic=true
336
+ end
337
+ return nil unless is_tryptic
338
+
339
+ start_codon_coord = (strand=='+') ? peptide_genomic_start : peptide_genomic_end-2
340
+ # require 'debugger';debugger
341
+ return [start_codon_coord,start_codon_coord+2]
342
+ else
343
+ return nil
344
+ end
345
+ end
346
+
347
+ def get_cterm_coords_for_peptide(peptide_genomic_start,peptide_genomic_end,peptide_seq,protein_seq,strand)
348
+
349
+ if ( (peptide_seq[-1]!='K' && peptide_seq[-1]!='R' ) )
350
+
351
+ codon_coord = (strand=='+') ? peptide_genomic_end-3 : peptide_genomic_start+1
352
+ # require 'debugger';debugger
353
+ return [codon_coord,codon_coord+2]
354
+ else
355
+ return nil
356
+ end
357
+ end
358
+
359
+
360
+ def get_nterm_peptide_for_peptide(peptide_seq,protein_seq)
361
+ pi=protein_seq.index(peptide_seq)
362
+ if ( pi>0 && (protein_seq[pi-1]!='K' && protein_seq[pi-1]!='R' && protein_seq[pi]!='M') )
363
+ # Since trypsin sometimes cleaves before P (ie breaking the rule)
364
+ # we don't check for it and assume those cases are real tryptic termini
365
+ reverse_leader_seq=protein_seq[0..pi].reverse
366
+ mi=reverse_leader_seq.index('M')
367
+
368
+ if ( mi==nil )
369
+ puts "No methionine found ahead of peptide sequence. Unable to determine n-term sequence"
370
+ return nil
371
+ end
372
+
373
+ mi=pi-mi
374
+
375
+ ntermseq=protein_seq[mi..(pi-1)]
376
+
377
+ # if ( ntermseq.length < minlen )
378
+ # return nil
379
+ # end
380
+
381
+ # $STDOUT.write protein_seq[mi..(pi+peptide_seq.length-1)]
382
+ # require 'debugger';debugger
383
+ full_seq_with_annotations = "#{ntermseq}(cleaved)#{protein_seq[(pi..(pi+peptide_seq.length-1))]}"
384
+
385
+ return full_seq_with_annotations
386
+ else
387
+ return nil
388
+ end
389
+ end
390
+
391
+ def generate_gff_for_peptide_mapped_to_protein(protein_seq,peptide_seq,protein_info,prot_id,peptide_prob,peptide_count,dna_sequence,genomedb=nil)
392
+
393
+ prot_seq = protein_seq
394
+ pep_seq = peptide_seq
395
+
396
+
397
+ peptide_coords = get_peptide_coordinates(prot_seq,pep_seq,protein_info,dna_sequence)
398
+
399
+ if ( peptide_coords==nil ) # Return value of nil means the entry is a predicted transcript that should already be covered by 6-frame
400
+ return []
401
+ end
402
+
403
+ gff_records=[]
404
+
405
+ # Now convert peptide coordinate to genome coordinates
406
+ # And create gff lines for each match
407
+ peptide_coords.each do |coords|
408
+
409
+ # require 'debugger';debugger
410
+ pep_genomic_start = coords.first[0]
411
+ pep_genomic_end = coords.last[1]
412
+
413
+ pep_id = "#{prot_id}.p#{peptide_count.to_s}"
414
+ pep_attributes = [["ID",pep_id],["Parent",prot_id],["Name",pep_seq]]
415
+
416
+ pep_gff_line = Bio::GFF::GFF3::Record.new(seqid = protein_info.scaffold,source="MSMS",
417
+ feature_type="peptide",start_position=pep_genomic_start,end_position=pep_genomic_end,score=peptide_prob,
418
+ strand=protein_info.strand,frame=nil,attributes=pep_attributes)
419
+
420
+ # For standard peptides
421
+ frag_gffs = generate_fragment_gffs_for_coords(coords,protein_info,pep_id,peptide_seq,genomedb,"CDS")
422
+ gff_records += [pep_gff_line] + frag_gffs
423
+ # require 'debugger';debugger
424
+ # For peptides with only 1 tryptic terminus
425
+ start_codon_coords=get_start_codon_coords_for_peptide(pep_genomic_start,pep_genomic_end,peptide_seq,protein_seq,protein_info.strand)
426
+ if start_codon_coords
427
+ start_codon_gff = Bio::GFF::GFF3::Record.new(seqid = protein_info.scaffold,source="MSMS",
428
+ feature_type="start_codon",start_position=start_codon_coords[0],end_position=start_codon_coords[1],score='',
429
+ strand=protein_info.strand,frame=nil,attributes=["Parent",pep_id])
430
+ gff_records+=[start_codon_gff]
431
+ end
432
+
433
+ cterm_coords = get_cterm_coords_for_peptide(pep_genomic_start,pep_genomic_end,peptide_seq,protein_seq,protein_info.strand)
434
+ if ( cterm_coords )
435
+ cterm_gff = Bio::GFF::GFF3::Record.new(seqid = protein_info.scaffold,source="MSMS",
436
+ feature_type="cterm",start_position=cterm_coords[0],end_position=cterm_coords[1],score='',
437
+ strand=protein_info.strand,frame=nil,attributes=["Parent",pep_id])
438
+ gff_records+=[start_codon_gff]
439
+ end
440
+
441
+ end
442
+ # puts gff_records
443
+
444
+ gff_records
445
+ end
446
+
447
+ def add_putative_nterm_to_gff(gff_records,peptide_seq,protein_seq,protein_info,prot_id,peptide_count,dna_sequence,genomedb)
448
+ pep_id = "#{prot_id}.p#{peptide_count.to_s}"
449
+ signal_peptide = get_nterm_peptide_for_peptide(peptide_seq,protein_seq)
450
+ if signal_peptide
451
+ $stdout.write "Nterm\t#{signal_peptide}\t#{protein_info.name}\t#{protein_seq}\n"
452
+ raw_signal_peptide=signal_peptide.sub(/\(cleaved\)/,"")
453
+ # Get raw signal_peptide sequence
454
+
455
+ signal_peptide_coords=get_peptide_coordinates(protein_seq,raw_signal_peptide,protein_info,dna_sequence)
456
+ if signal_peptide_coords
457
+ signal_peptide_coords.each do |spcoords|
458
+ signal_peptide_gff = generate_fragment_gffs_for_coords(spcoords,protein_info,pep_id,raw_signal_peptide,genomedb,"signalpeptide")
459
+ gff_records += signal_peptide_gff
460
+ end
461
+ end
462
+ end
463
+ end
464
+
465
+ def peptide_gff_is_duplicate(peptide_gff,peptides_covered_genome)
466
+ nameindex = peptide_gff.attributes.index {|obj| obj[0]=="Name" }
467
+ pep_seq = peptide_gff.attributes[nameindex][1]
468
+ existing = peptides_covered_genome[pep_seq]
469
+ return true if existing==peptide_gff.start
470
+
471
+ return false
472
+ end
473
+
474
+ end