protk 1.2.6.pre5 → 1.3.0.pre1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +84 -45
- data/bin/add_retention_times.rb +9 -5
- data/bin/augustus_to_proteindb.rb +7 -11
- data/bin/interprophet.rb +28 -46
- data/bin/make_decoy.rb +16 -48
- data/bin/mascot_search.rb +57 -71
- data/bin/mascot_to_pepxml.rb +13 -26
- data/bin/msgfplus_search.rb +70 -107
- data/bin/omssa_search.rb +52 -109
- data/bin/peptide_prophet.rb +44 -119
- data/bin/pepxml_to_table.rb +24 -27
- data/bin/protein_prophet.rb +22 -82
- data/bin/protxml_to_gff.rb +22 -519
- data/bin/protxml_to_table.rb +2 -16
- data/bin/sixframe.rb +10 -32
- data/bin/tandem_search.rb +30 -403
- data/bin/tandem_to_pepxml.rb +43 -0
- data/bin/unimod_to_loc.rb +1 -1
- data/ext/{protk/decoymaker → decoymaker}/decoymaker.c +74 -21
- data/ext/decoymaker/extconf.rb +3 -0
- data/lib/protk/constants.rb +16 -2
- data/lib/protk/data/default_config.yml +2 -1
- data/lib/protk/data/tandem_gpm_defaults.xml +175 -0
- data/lib/protk/data/tandem_isb_kscore_defaults.xml +123 -0
- data/lib/protk/data/tandem_isb_native_defaults.xml +123 -0
- data/lib/protk/data/tandem_params.xml +17 -54
- data/lib/protk/fastadb.rb +2 -2
- data/lib/protk/prophet_tool.rb +1 -1
- data/lib/protk/protxml_to_gff_tool.rb +474 -0
- data/lib/protk/search_tool.rb +58 -103
- data/lib/protk/setup_rakefile.rake +9 -5
- data/lib/protk/tandem_search_tool.rb +256 -0
- data/lib/protk/tool.rb +85 -104
- data/lib/protk.rb +1 -6
- metadata +24 -103
- data/bin/annotate_ids.rb +0 -59
- data/bin/asapratio.rb +0 -27
- data/bin/blastxml_to_table.rb +0 -119
- data/bin/correct_omssa_retention_times.rb +0 -27
- data/bin/feature_finder.rb +0 -95
- data/bin/file_convert.rb +0 -164
- data/bin/generate_omssa_loc.rb +0 -42
- data/bin/gffmerge.rb +0 -208
- data/bin/libra.rb +0 -70
- data/bin/toppas_pipeline.rb +0 -84
- data/bin/uniprot_annotation.rb +0 -141
- data/bin/xls_to_table.rb +0 -52
- data/bin/xpress.rb +0 -27
- data/ext/protk/decoymaker/extconf.rb +0 -3
- data/ext/protk/simplealign/extconf.rb +0 -3
- data/lib/protk/biotools_excel_converter.rb +0 -60
- data/lib/protk/eupathdb_gene_information_table.rb +0 -158
- data/lib/protk/gapped_aligner.rb +0 -264
- data/lib/protk/protein_annotator.rb +0 -646
- data/lib/protk/spreadsheet_extensions.rb +0 -79
- data/lib/protk/xtandem_defaults.rb +0 -11
data/lib/protk/gapped_aligner.rb
DELETED
@@ -1,264 +0,0 @@
|
|
1
|
-
require 'bio'
|
2
|
-
require 'matrix'
|
3
|
-
|
4
|
-
class PeptideFragment
|
5
|
-
attr_accessor :start
|
6
|
-
attr_accessor :end
|
7
|
-
attr_accessor :seq
|
8
|
-
end
|
9
|
-
|
10
|
-
class PeptideToGeneAlignment
|
11
|
-
attr_accessor :gene_seq
|
12
|
-
attr_accessor :pep_seq
|
13
|
-
attr_accessor :trace
|
14
|
-
|
15
|
-
def initialize(gene,peptide,trace)
|
16
|
-
@gene_seq = gene
|
17
|
-
@pep_seq = peptide
|
18
|
-
@trace = trace
|
19
|
-
end
|
20
|
-
|
21
|
-
def inspect
|
22
|
-
descr = "#{@gene_seq}\n"
|
23
|
-
|
24
|
-
pep_triples=""
|
25
|
-
@pep_seq.each_char { |c| pep_triples<<c;pep_triples<<c;pep_triples<<c }
|
26
|
-
|
27
|
-
# gene_seq_triples=""
|
28
|
-
# Bio::Sequence::NA.new(@gene_seq).translate.each_char do |c|
|
29
|
-
# gene_seq_triples<<c;gene_seq_triples<<c;gene_seq_triples<<c
|
30
|
-
# end
|
31
|
-
|
32
|
-
# descr << "#{gene_seq_triples}\n"
|
33
|
-
|
34
|
-
pepi=0
|
35
|
-
@trace.each_with_index do |move, i|
|
36
|
-
if move==1
|
37
|
-
descr<<"-"
|
38
|
-
elsif move==0
|
39
|
-
descr<<"#{pep_triples[pepi]}"
|
40
|
-
pepi+=1
|
41
|
-
end
|
42
|
-
end
|
43
|
-
descr<<"\n"
|
44
|
-
puts descr
|
45
|
-
end
|
46
|
-
|
47
|
-
def fragments
|
48
|
-
frags=[]
|
49
|
-
in_fragment=false
|
50
|
-
@trace.each_with_index do |move,i|
|
51
|
-
if move==0
|
52
|
-
frags << [i,0] unless in_fragment #Start a fragment
|
53
|
-
in_fragment=true
|
54
|
-
else
|
55
|
-
frags.last[1]=i-1 if in_fragment #End a fragment
|
56
|
-
in_fragment=false
|
57
|
-
end
|
58
|
-
end
|
59
|
-
if frags.last[1]==0
|
60
|
-
frags.last[1]=@trace.length-1
|
61
|
-
end
|
62
|
-
frags
|
63
|
-
end
|
64
|
-
|
65
|
-
def gaps
|
66
|
-
gps=[]
|
67
|
-
in_start_end=true
|
68
|
-
in_gap=false
|
69
|
-
@trace.each_with_index do |move, i|
|
70
|
-
if move==0
|
71
|
-
in_start_end=false
|
72
|
-
if in_gap #Ending a gap
|
73
|
-
gps.last[1]=i
|
74
|
-
end
|
75
|
-
in_gap=false
|
76
|
-
else
|
77
|
-
if !in_start_end && !in_gap #Starting a gap
|
78
|
-
in_gap=true
|
79
|
-
gps<<[i,0]
|
80
|
-
end
|
81
|
-
end
|
82
|
-
end
|
83
|
-
#Remove gaps that have zero length (Trailing)
|
84
|
-
gps=gps.collect do |gp|
|
85
|
-
rv=gp
|
86
|
-
if gp[1]==0
|
87
|
-
rv=nil
|
88
|
-
end
|
89
|
-
rv
|
90
|
-
end
|
91
|
-
gps.compact!
|
92
|
-
gps
|
93
|
-
end
|
94
|
-
|
95
|
-
end
|
96
|
-
|
97
|
-
# Uses a dynamic programming algorithm (Smith-Waterman like) to align a peptide sequence to a nucleotide.
|
98
|
-
# This aligner assumes you are doing protogenomics and just want to assume that
|
99
|
-
# (a) The entire peptide sequence matches (with gaps) to the DNA sequence
|
100
|
-
#
|
101
|
-
class GappedAligner
|
102
|
-
|
103
|
-
def initialize
|
104
|
-
@big_penalty = -1000000000
|
105
|
-
@gap_open_penalty = -10000
|
106
|
-
@gap_extend_penalty = -1
|
107
|
-
@end_gap_penalty = 0
|
108
|
-
@match_bonus = 400
|
109
|
-
|
110
|
-
@match_move=0
|
111
|
-
@aadel_move=-1
|
112
|
-
@nadel_move=1
|
113
|
-
@triplet_offsets = [[0,-2,-1],[-1,0,-2],[-2,-1,0]]
|
114
|
-
end
|
115
|
-
|
116
|
-
def aa_deletion()
|
117
|
-
return @big_penalty
|
118
|
-
end
|
119
|
-
|
120
|
-
def score_na_deletion(move_type)
|
121
|
-
if move_type==@nadel_move
|
122
|
-
return @gap_extend_penalty
|
123
|
-
end
|
124
|
-
return @gap_open_penalty
|
125
|
-
end
|
126
|
-
|
127
|
-
def score_match(aa,na)
|
128
|
-
if aa==na
|
129
|
-
return @match_bonus
|
130
|
-
end
|
131
|
-
return @big_penalty
|
132
|
-
end
|
133
|
-
|
134
|
-
def traceback(from_row,from_col,dpmoves)
|
135
|
-
last_move = dpmoves[from_row][from_col]
|
136
|
-
last_row = from_row-1
|
137
|
-
last_col = from_col-1
|
138
|
-
if last_move==@aadel_move
|
139
|
-
last_col+=1
|
140
|
-
elsif last_move==@nadel_move
|
141
|
-
last_row+=1
|
142
|
-
end
|
143
|
-
|
144
|
-
if last_col==0 && last_row==0
|
145
|
-
return [last_move]
|
146
|
-
else
|
147
|
-
throw "Beyond end of array" if last_col<0 || last_row <0
|
148
|
-
|
149
|
-
return traceback(last_row,last_col,dpmoves).push(last_move)
|
150
|
-
end
|
151
|
-
end
|
152
|
-
|
153
|
-
def next_frame(previous_frame)
|
154
|
-
(previous_frame+1) % 3
|
155
|
-
end
|
156
|
-
|
157
|
-
def translate_na_at(j,frame,gene_seq)
|
158
|
-
rm = j % 3
|
159
|
-
start_pos=j+@triplet_offsets[rm][frame]
|
160
|
-
if start_pos < 0
|
161
|
-
return '-'
|
162
|
-
else
|
163
|
-
return gene_seq[start_pos,3].translate
|
164
|
-
end
|
165
|
-
end
|
166
|
-
|
167
|
-
def save_matrix(dpmatrix,pep_triples,gene_seq,name)
|
168
|
-
matfile=File.open("#{name}.csv", "w+")
|
169
|
-
matfile.write(",,")
|
170
|
-
gene_seq.each_char { |na| matfile.write("#{na},") }
|
171
|
-
matfile.write("\n")
|
172
|
-
dpmatrix.each_with_index { |row,ri|
|
173
|
-
if ri>0
|
174
|
-
matfile.write("#{pep_triples[ri-1]},")
|
175
|
-
else
|
176
|
-
matfile.write(",")
|
177
|
-
end
|
178
|
-
row.each { |col|
|
179
|
-
matfile.write("#{col},")
|
180
|
-
}
|
181
|
-
matfile.write("\n")
|
182
|
-
}
|
183
|
-
matfile.close()
|
184
|
-
end
|
185
|
-
|
186
|
-
def calculate_dp(pep_seq,gene_seq)
|
187
|
-
gene_seq = Bio::Sequence::NA.new(gene_seq)
|
188
|
-
nrow = pep_seq.length*3+1
|
189
|
-
ncol = gene_seq.length+1
|
190
|
-
|
191
|
-
throw "Peptide sequence is longer than gene" if nrow > ncol
|
192
|
-
|
193
|
-
pep_triples=""
|
194
|
-
pep_seq.each_char { |c| pep_triples<<c;pep_triples<<c;pep_triples<<c }
|
195
|
-
|
196
|
-
dpmoves=Matrix.build(nrow,ncol) {|r,c| 0 }.to_a
|
197
|
-
dpmatrix=Matrix.build(nrow,ncol) { |r,c| 0 }.to_a
|
198
|
-
dpframes=Matrix.build(nrow,ncol) { |r,c| 0 }.to_a
|
199
|
-
# before_gap_positions = Matrix.build(nrow,ncol) { |r,c| 0 }.to_a
|
200
|
-
|
201
|
-
# Boundary conditions
|
202
|
-
(0..(nrow-1)).each { |i|
|
203
|
-
dpmatrix[i][0] = aa_deletion*i
|
204
|
-
dpmoves[i][0] = @aadel_move
|
205
|
-
}
|
206
|
-
(0..(ncol-1)).each { |j|
|
207
|
-
dpmatrix[0][j] = @end_gap_penalty*j
|
208
|
-
dpmoves[0][j] = @nadel_move
|
209
|
-
dpframes[0][j] = j % 3
|
210
|
-
}
|
211
|
-
dpmoves[0][0]=0
|
212
|
-
dpframes[0][0]=0
|
213
|
-
|
214
|
-
(1..(nrow-1)).each do |i|
|
215
|
-
(1..(ncol-1)).each do |j|
|
216
|
-
aa = pep_triples[i-1]
|
217
|
-
|
218
|
-
translated_na = translate_na_at(j-1,dpframes[i-1][j-1],gene_seq)
|
219
|
-
|
220
|
-
match = score_match(aa,translated_na) + dpmatrix[i-1][j-1]
|
221
|
-
|
222
|
-
nadel = score_na_deletion(dpmoves[i][j-1]) + dpmatrix[i][j-1]
|
223
|
-
|
224
|
-
# if (translated_na=="R") && (pep_seq=="FR") && (aa == "R")
|
225
|
-
# require 'debugger';debugger
|
226
|
-
# end
|
227
|
-
|
228
|
-
if match >= nadel
|
229
|
-
dpmatrix[i][j] = match
|
230
|
-
dpmoves[i][j] = @match_move
|
231
|
-
dpframes[i][j] = dpframes[i-1][j-1]
|
232
|
-
else
|
233
|
-
dpmatrix[i][j] = nadel
|
234
|
-
dpmoves[i][j] = @nadel_move
|
235
|
-
dpframes[i][j] = next_frame(dpframes[i][j-1])
|
236
|
-
end
|
237
|
-
|
238
|
-
end
|
239
|
-
end
|
240
|
-
|
241
|
-
# Find best end-point
|
242
|
-
end_score = dpmatrix[nrow-1].max
|
243
|
-
end_j = dpmatrix[nrow-1].index(end_score)
|
244
|
-
|
245
|
-
save_matrix(dpmatrix,pep_triples,gene_seq,"dpmatrix")
|
246
|
-
save_matrix(dpmoves,pep_triples,gene_seq,"moves")
|
247
|
-
save_matrix(dpframes,pep_triples,gene_seq,"frames")
|
248
|
-
# require 'debugger';debugger
|
249
|
-
|
250
|
-
traceback(nrow-1,end_j,dpmoves)
|
251
|
-
end
|
252
|
-
|
253
|
-
|
254
|
-
def align pep_seq, gene_seq
|
255
|
-
|
256
|
-
trace = calculate_dp(pep_seq,gene_seq)
|
257
|
-
alignment = PeptideToGeneAlignment.new(gene_seq,pep_seq,trace)
|
258
|
-
# puts alignment
|
259
|
-
# require 'debugger';debugger
|
260
|
-
|
261
|
-
return alignment
|
262
|
-
end
|
263
|
-
|
264
|
-
end
|