mspire 0.1.7 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +41 -14
- data/bin/bioworks2excel.rb +1 -1
- data/bin/bioworks_to_pepxml.rb +46 -59
- data/bin/fasta_shaker.rb +1 -1
- data/bin/filter.rb +6 -0
- data/bin/find_aa_freq.rb +23 -0
- data/bin/id_precision.rb +3 -2
- data/bin/mzxml_to_lmat.rb +2 -1
- data/bin/pepproph_filter.rb +1 -1
- data/bin/precision.rb +1 -1
- data/bin/protein_summary.rb +2 -451
- data/bin/raw_to_mzXML.rb +55 -0
- data/bin/srf_group.rb +26 -0
- data/changelog.txt +7 -0
- data/lib/align.rb +3 -3
- data/lib/fasta.rb +6 -1
- data/lib/gi.rb +9 -4
- data/lib/roc.rb +2 -0
- data/lib/sample_enzyme.rb +2 -1
- data/lib/spec/mzxml/parser.rb +2 -43
- data/lib/spec/mzxml.rb +65 -2
- data/lib/spec_id/aa_freqs.rb +10 -7
- data/lib/spec_id/bioworks.rb +67 -87
- data/lib/spec_id/filter.rb +794 -0
- data/lib/spec_id/precision.rb +29 -36
- data/lib/spec_id/proph.rb +5 -3
- data/lib/spec_id/protein_summary.rb +459 -0
- data/lib/spec_id/sequest.rb +323 -271
- data/lib/spec_id/srf.rb +189 -135
- data/lib/spec_id.rb +276 -227
- data/lib/spec_id_xml.rb +101 -0
- data/lib/toppred.rb +18 -0
- data/script/degenerate_peptides.rb +47 -0
- data/script/filter-peps.rb +5 -1
- data/test/tc_align.rb +1 -1
- data/test/tc_bioworks.rb +25 -22
- data/test/tc_bioworks_to_pepxml.rb +37 -4
- data/test/tc_fasta.rb +3 -1
- data/test/tc_fasta_shaker.rb +8 -6
- data/test/tc_filter.rb +203 -0
- data/test/tc_gi.rb +6 -9
- data/test/tc_id_precision.rb +31 -0
- data/test/tc_mzxml.rb +8 -6
- data/test/tc_peptide_parent_times.rb +2 -1
- data/test/tc_precision.rb +1 -1
- data/test/tc_proph.rb +5 -5
- data/test/tc_protein_summary.rb +36 -13
- data/test/tc_sequest.rb +78 -33
- data/test/tc_spec_id.rb +128 -6
- data/test/tc_srf.rb +84 -38
- metadata +67 -62
- data/bin/fasta_cat.rb +0 -39
- data/bin/fasta_cat_mod.rb +0 -59
- data/bin/fasta_mod.rb +0 -57
- data/bin/filter_spec_id.rb +0 -365
- data/bin/raw2mzXML.rb +0 -21
- data/script/gen_database_searching.rb +0 -258
data/test/tc_spec_id.rb
CHANGED
@@ -8,11 +8,14 @@ class SpecIDTest < Test::Unit::TestCase
|
|
8
8
|
def initialize(arg)
|
9
9
|
super(arg)
|
10
10
|
@tfiles = File.dirname(__FILE__) + '/tfiles/'
|
11
|
+
@tfiles_l = File.dirname(__FILE__) + '/tfiles_large/'
|
11
12
|
@bw = @tfiles + "bioworks_small.xml"
|
13
|
+
@old_prot_proph = @tfiles + 'yeast_gly_small-prot.xml'
|
14
|
+
@prot_proph = @tfiles + 'opd1/000_020_3prots-prot.xml'
|
15
|
+
@srf = @tfiles_l + '7MIX_STD_110802_1.srf'
|
12
16
|
end
|
13
17
|
|
14
18
|
def test_spec_id_creation
|
15
|
-
sp = SpecID.new
|
16
19
|
sp = SpecID.new(@bw)
|
17
20
|
assert_equal(106, sp.prots.size)
|
18
21
|
end
|
@@ -45,20 +48,21 @@ class SpecIDTest < Test::Unit::TestCase
|
|
45
48
|
end
|
46
49
|
[write_index, bo]
|
47
50
|
end
|
51
|
+
roc = ROC.new
|
48
52
|
tp, fp = ROC.new.prep_list(answ)
|
49
|
-
(exp_tp, exp_fp) =
|
53
|
+
(exp_tp, exp_fp) = roc.tps_and_ppv(tp, fp)
|
50
54
|
|
51
55
|
sp = SpecID.new(file)
|
52
56
|
assert_equal(19, sp.prots.size)
|
53
57
|
tp, fp = sp.rank_and_classify(:prots, proc {|prt| prt.probability }, proc {|prt| if prt.reference =~ /^INV_/ ; false; else; true; end })
|
54
|
-
tps, ys =
|
58
|
+
(tps, ys) = roc.tps_and_ppv(tp, fp)
|
55
59
|
assert_equal(exp_tp, tps)
|
56
60
|
assert_equal(exp_fp, ys)
|
57
|
-
|
61
|
+
(num_hits, prec) = sp.num_hits_and_ppv_for_prob("INV_")
|
58
62
|
# @TODO: assert these guys for consistencies sake:
|
59
63
|
assert_in_delta_arrays([1, 2, 3, 4, 5, 6, 9, 10, 11, 12, 13, 14, 15], tps, 0.0000001)
|
60
|
-
|
61
|
-
assert_in_delta_arrays([
|
64
|
+
# Consistency check only:
|
65
|
+
assert_in_delta_arrays([1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.909090909090909, 0.916666666666667, 0.923076923076923, 0.928571428571429, 0.866666666666667], prec, 0.0000001)
|
62
66
|
end
|
63
67
|
|
64
68
|
def assert_in_delta_arrays(one, two, delta, message=nil)
|
@@ -66,5 +70,123 @@ class SpecIDTest < Test::Unit::TestCase
|
|
66
70
|
assert_in_delta(v, two[i], delta, message)
|
67
71
|
end
|
68
72
|
end
|
73
|
+
|
74
|
+
def test_file_type
|
75
|
+
assert_equal('bioworks', SpecID.file_type(@bw))
|
76
|
+
assert_equal('protproph', SpecID.file_type(@prot_proph))
|
77
|
+
assert_equal('srg', SpecID.file_type('whatever.srg'))
|
78
|
+
## WOULD BE NICE TO GET THIS WORKING, TOO
|
79
|
+
# assert_equal('protproph', SpecID.file_type(@old_prot_proph))
|
80
|
+
if File.exist? @tfiles_l
|
81
|
+
assert File.exist?(@srf), "file #{@srf} is there"
|
82
|
+
assert_equal('srf', SpecID.file_type(@srf))
|
83
|
+
else
|
84
|
+
assert_nil( puts("\n--SKIPPING TEST-- (missing dir: #{@tfiles_l})") )
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
def test_non_standard_aa_removal
|
89
|
+
hash = {"K.PEPTIDE.Z" => "K.PEPTIDE.Z", "K.*M" => "K.M", "aI" => 'I', "YI.&" => "YI.", "EI.!@#\$%^&*(){}[]|\\;:'\"<>,?/EI" => 'EI.EI'}
|
90
|
+
cl = proc {|v| SpecID::Pep.remove_non_amino_acids(v) }
|
91
|
+
hash.each do |k,v|
|
92
|
+
assert_equal(v, cl.call(k))
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
|
97
|
+
|
98
|
+
end
|
99
|
+
|
100
|
+
class MyProt ; include SpecID::Prot ; end
|
101
|
+
class MyPep ; include SpecID::Pep ; attr_accessor :xcorr end
|
102
|
+
|
103
|
+
|
104
|
+
class TestOccamsRazor < Test::Unit::TestCase
|
105
|
+
|
106
|
+
def test_small
|
107
|
+
|
108
|
+
prots = (0..6).to_a.map do |n|
|
109
|
+
prot = MyProt.new
|
110
|
+
prot.reference = "ref_#{n}"
|
111
|
+
prot
|
112
|
+
end
|
113
|
+
|
114
|
+
peps = (0..12).to_a.map {|v| MyPep.new }
|
115
|
+
|
116
|
+
# 0 1 2 3 4 5 6 7 8 9 10 11 12
|
117
|
+
aaseqs = %w(AAA BBB CCC ABC AAA BBB CCC ABC DDD EEE FFF EEEEE DDD)
|
118
|
+
xcorrs = [1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 0.5, 0.6, 0.7, 0.8, 0.5]
|
119
|
+
|
120
|
+
peps.zip(aaseqs, xcorrs) do |pep,aaseq,xcorr|
|
121
|
+
pep.aaseq = aaseq
|
122
|
+
pep.xcorr = xcorr
|
123
|
+
end
|
124
|
+
|
125
|
+
prots[0].peps = peps[0,4]
|
126
|
+
prots[1].peps = [peps[2]] ## should be missing
|
127
|
+
|
128
|
+
test_prots = prots[0,2]
|
129
|
+
require 'pp'
|
130
|
+
answ = SpecID.occams_razor(test_prots)
|
131
|
+
answ.each do |an|
|
132
|
+
assert( an[0].is_a?(SpecID::Prot), "prots are there")
|
133
|
+
end
|
134
|
+
first = answ.first
|
135
|
+
assert_equal( prots[0], first[0])
|
136
|
+
assert_equal_array_content( prots[0].peps, first[1])
|
137
|
+
|
138
|
+
|
139
|
+
|
140
|
+
#prots[2].peps = [peps[2]]
|
141
|
+
#prots[2].peps.push( peps[3] ) ## should be there since it has 2
|
142
|
+
#prots[3].peps = [peps[3]] ## should be missing
|
143
|
+
end
|
144
|
+
|
145
|
+
def assert_equal_array_content(exp1, ans, message='')
|
146
|
+
exp1.each do |item|
|
147
|
+
assert(ans.include?(item), "finding #{item}: #{message}")
|
148
|
+
end
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
|
153
|
+
require 'fasta'
|
154
|
+
|
155
|
+
class TestProteinGroups < Test::Unit::TestCase
|
156
|
+
|
157
|
+
def test_small
|
158
|
+
prots = []
|
159
|
+
|
160
|
+
aaseq = ('A'..'Z').to_a.join('')
|
161
|
+
header = "prot1"
|
162
|
+
prots << Fasta::Prot.new(header, aaseq)
|
163
|
+
|
164
|
+
aaseq = ('A'..'Z').to_a.reverse.join('')
|
165
|
+
header = "prot1_reverse"
|
166
|
+
prots << Fasta::Prot.new(header, aaseq)
|
167
|
+
|
168
|
+
aaseq = ('A'..'Z').to_a.join('')
|
169
|
+
header = "prot1_identical"
|
170
|
+
prots << Fasta::Prot.new(header, aaseq)
|
171
|
+
|
172
|
+
aaseq = ('A'..'E').to_a.join('')
|
173
|
+
header = "prot1_short"
|
174
|
+
prots << Fasta::Prot.new(header, aaseq)
|
175
|
+
|
176
|
+
aaseq = ('A'..'E').to_a.reverse.join('')
|
177
|
+
header = "prot1_reverse_short"
|
178
|
+
prots << Fasta::Prot.new(header, aaseq)
|
179
|
+
|
180
|
+
fasta = Fasta.new(prots)
|
181
|
+
|
182
|
+
pep_seqs = %w(ABCD DEFG ABCD DEFG EDCB FEDCB XYZ RANDOM AEABA)
|
183
|
+
|
184
|
+
arr = SpecID::Pep.protein_groups_by_sequence(pep_seqs, fasta)
|
185
|
+
|
186
|
+
exp = [[prots[0], prots[2], prots[3]], [prots[0], prots[2]], [prots[0], prots[2], prots[3]], [prots[0],prots[2]], [prots[1], prots[4]], [prots[1]], [prots[0], prots[2]], [], []]
|
187
|
+
|
188
|
+
assert_equal(exp, arr)
|
189
|
+
end
|
190
|
+
|
69
191
|
end
|
70
192
|
|
data/test/tc_srf.rb
CHANGED
@@ -50,53 +50,70 @@ module ToMatch
|
|
50
50
|
|
51
51
|
Out_files_first = {
|
52
52
|
:num_hits => 10,
|
53
|
-
:charge => 1,
|
54
53
|
:computer => "VELA",
|
55
54
|
:date_time => "11/17/2006, 04:13 PM,",
|
56
55
|
}
|
57
56
|
|
58
57
|
Out_files_first_hit = {
|
59
58
|
:mh => 1220.5128044522,
|
60
|
-
:deltacn => 0.
|
59
|
+
:deltacn => 0.071944423019886, ## this is the modified version
|
61
60
|
:sp => 96.5815887451172,
|
62
61
|
:xcorr => 1.08377742767334,
|
63
62
|
:id => 224,
|
64
63
|
:rsp => 13,
|
65
64
|
:ions_matched => 8,
|
66
65
|
:ions_total => 20,
|
67
|
-
:
|
66
|
+
:sequence => "K.LCPHLTLLPGR.F",
|
67
|
+
:aaseq => "LCPHLTLLPGR",
|
68
68
|
:reference => "gi|1786425|gb|AAC73335.1| damage-inducible protein P; putative tRNA synthetase",
|
69
|
+
:first_scan => 2,
|
70
|
+
:last_scan => 2,
|
71
|
+
:base_name => '7MIX_STD_110802_1',
|
72
|
+
:charge => 1,
|
69
73
|
}
|
70
74
|
|
71
75
|
Out_files_last = {
|
72
76
|
:num_hits => 10,
|
73
|
-
:charge => 1,
|
74
77
|
:computer => "VELA",
|
75
78
|
:date_time => "11/17/2006, 04:25 PM," ,
|
76
79
|
}
|
77
80
|
Out_files_last_first_hit = {
|
78
81
|
:mh => 2605.9368784522,
|
79
|
-
:deltacn => 0.
|
82
|
+
:deltacn => 0.03921128064394,
|
80
83
|
:sp => 76.7447052001953,
|
81
84
|
:xcorr => 0.915680646896362,
|
82
85
|
:id => 13562,
|
83
86
|
:rsp => 4,
|
84
87
|
:ions_matched => 10,
|
85
88
|
:ions_total => 84,
|
86
|
-
:
|
89
|
+
:sequence => "K.HLEINPNHPIVETLRQKAETHK.N",
|
90
|
+
:aaseq => "HLEINPNHPIVETLRQKAETHK",
|
87
91
|
:reference => "gi|30149327|ref|XP_293672.2| similar to ebiP7687 [Homo sapiens]",
|
92
|
+
:first_scan => 7161,
|
93
|
+
:last_scan => 7161,
|
94
|
+
:base_name => '7MIX_STD_110802_1',
|
95
|
+
:deltamass => 2605.9368784522 - 2604.8360326775,
|
96
|
+
:ppm => ((1.0e6 * (2605.9368784522 - 2604.8360326775)) / 2604.8360326775).abs,
|
97
|
+
:charge => 3,
|
88
98
|
}
|
89
99
|
Out_files_last_last_hit = {
|
90
100
|
:mh => 2604.9025174522,
|
91
|
-
:deltacn =>
|
101
|
+
:deltacn => 1.1,
|
92
102
|
:sp => 26.1511478424072,
|
93
103
|
:xcorr => 0.634012818336487,
|
94
104
|
:id => 8105,
|
95
105
|
:rsp => 165,
|
96
106
|
:ions_matched => 6,
|
97
107
|
:ions_total => 84,
|
98
|
-
:
|
99
|
-
:
|
108
|
+
:sequence => "R.EAFLVNSDLTLRAQLTEFRDHK.L",
|
109
|
+
:aaseq => "EAFLVNSDLTLRAQLTEFRDHK",
|
110
|
+
:reference => "gi|5453830|ref|NP_006181.1| origin recognition complex, subunit 2-like; origin",
|
111
|
+
:first_scan => 7161,
|
112
|
+
:last_scan => 7161,
|
113
|
+
:base_name => '7MIX_STD_110802_1',
|
114
|
+
:deltamass => 2604.9025174522 - 2604.8360326775,
|
115
|
+
:ppm => ((1.0e6 * (2604.9025174522 - 2604.8360326775)) / 2604.8360326775).abs,
|
116
|
+
:charge => 3,
|
100
117
|
}
|
101
118
|
Sequest_params = {
|
102
119
|
"add_F_Phenylalanine"=>"0.0000",
|
@@ -163,50 +180,79 @@ module ToMatch
|
|
163
180
|
|
164
181
|
end
|
165
182
|
|
183
|
+
tfiles = File.dirname(__FILE__) + '/tfiles/'
|
184
|
+
tfiles_l = File.dirname(__FILE__) + '/tfiles_large/'
|
185
|
+
tf_srf = tfiles_l + "7MIX_STD_110802_1.srf"
|
186
|
+
tf_srf_inv = tfiles_l + "7MIX_STD_110802_1_INV.srf"
|
187
|
+
if File.exist? tfiles_l
|
188
|
+
start = Time.now
|
189
|
+
$group = SRFGroup.new([tf_srf, tf_srf_inv])
|
190
|
+
$srf = $group.srfs.first
|
191
|
+
puts "Time to read and compile two SRF: #{Time.now - start} secs"
|
192
|
+
end
|
193
|
+
|
194
|
+
|
166
195
|
class TestSRF < Test::Unit::TestCase
|
167
196
|
include ToMatch
|
168
197
|
def initialize(arg)
|
169
198
|
super(arg)
|
170
199
|
@tfiles = File.dirname(__FILE__) + '/tfiles/'
|
171
200
|
@tfiles_l = File.dirname(__FILE__) + '/tfiles_large/'
|
172
|
-
@
|
201
|
+
@srg_file = @tfiles + "tmp_bioworks.srg"
|
202
|
+
|
203
|
+
@srf = $srf
|
204
|
+
@group = $group
|
205
|
+
|
173
206
|
end
|
174
207
|
|
175
208
|
def test_basic
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
209
|
+
if File.exist? @tfiles_l
|
210
|
+
## Verify that we have everything and it is as we expect (not exhaustive)
|
211
|
+
head = @srf.header
|
212
|
+
dtgen = head.dta_gen
|
213
|
+
## HEADER
|
214
|
+
hash_match(Header, head)
|
215
|
+
hash_match(Dta_gen, dtgen)
|
216
|
+
## DTA_FILES
|
217
|
+
hash_match(Dta_files_first, @srf.dta_files.first)
|
218
|
+
hash_match(Dta_files_last, @srf.dta_files.last)
|
219
|
+
## OUT_FILES
|
220
|
+
hash_match(Out_files_first, @srf.out_files.first)
|
221
|
+
hash_match(Out_files_first_hit, @srf.out_files.first.hits.first)
|
222
|
+
hash_match(Out_files_last_first_hit, @srf.out_files.last.hits.first)
|
223
|
+
hash_match(Out_files_last_last_hit, @srf.out_files.last.hits.last)
|
224
|
+
## SEQUEST_PARAMS
|
225
|
+
hash_match(Sequest_params, @srf.params)
|
226
|
+
## INDEX
|
227
|
+
assert_equal([7161, 7161, 3], @srf.index.last)
|
228
|
+
assert_equal([2, 2, 1], @srf.index.first)
|
229
|
+
|
230
|
+
assert_equal(@srf.index.size, @srf.dta_files.size)
|
231
|
+
assert_equal(@srf.out_files.size, @srf.dta_files.size)
|
232
|
+
else
|
233
|
+
assert_nil( puts("--SKIPPING TEST-- (missing dir: #{@tfiles_l})") )
|
234
|
+
end
|
235
|
+
end
|
236
|
+
|
237
|
+
def test_srg
|
238
|
+
if File.exist? @tfiles_l
|
239
|
+
@group.to_srg(@srg_file)
|
240
|
+
assert(File.exist?(@srg_file), "file exists: " + @srg_file )
|
241
|
+
else
|
242
|
+
assert_nil( puts("--SKIPPING TEST-- (missing dir: #{@tfiles_l})") )
|
243
|
+
end
|
201
244
|
end
|
202
245
|
|
203
|
-
|
246
|
+
## treats reference special
|
247
|
+
def hash_match(hash, srf)
|
204
248
|
hash.each do |k,v|
|
205
249
|
if v.is_a? Float
|
206
250
|
delta = v/100000
|
207
|
-
assert_in_delta(
|
251
|
+
assert_in_delta(v, srf.send(k.to_sym), delta, "param: #{k}")
|
252
|
+
elsif k == :reference
|
253
|
+
assert_equal(v[0,38], srf.prots.first.reference)
|
208
254
|
else
|
209
|
-
assert_equal(
|
255
|
+
assert_equal(v, srf.send(k.to_sym), "param: #{k}")
|
210
256
|
end
|
211
257
|
end
|
212
258
|
end
|
metadata
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
|
-
rubygems_version: 0.9.
|
2
|
+
rubygems_version: 0.9.2
|
3
3
|
specification_version: 1
|
4
4
|
name: mspire
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.
|
7
|
-
date: 2007-
|
6
|
+
version: 0.2.0
|
7
|
+
date: 2007-04-25 00:00:00 -05:00
|
8
8
|
summary: Mass Spectrometry Proteomics Objects, Scripts, and Executables
|
9
9
|
require_paths:
|
10
10
|
- lib
|
@@ -29,28 +29,32 @@ post_install_message:
|
|
29
29
|
authors:
|
30
30
|
- John Prince
|
31
31
|
files:
|
32
|
-
- lib/spec_id
|
32
|
+
- lib/spec_id.rb
|
33
33
|
- lib/align
|
34
|
+
- lib/spec_id_xml.rb
|
35
|
+
- lib/spec_id
|
36
|
+
- lib/toppred.rb
|
37
|
+
- lib/align.rb
|
34
38
|
- lib/spec
|
35
|
-
- lib/sample_enzyme.rb
|
36
39
|
- lib/fasta.rb
|
37
|
-
- lib/roc.rb
|
38
|
-
- lib/spec_id.rb
|
39
40
|
- lib/gi.rb
|
40
|
-
- lib/
|
41
|
-
- lib/
|
42
|
-
- lib/
|
43
|
-
- lib/spec_id/proph.rb
|
41
|
+
- lib/roc.rb
|
42
|
+
- lib/sample_enzyme.rb
|
43
|
+
- lib/align/chams.rb
|
44
44
|
- lib/spec_id/sequest.rb
|
45
|
+
- lib/spec_id/filter.rb
|
46
|
+
- lib/spec_id/bioworks.rb
|
47
|
+
- lib/spec_id/srf.rb
|
45
48
|
- lib/spec_id/precision.rb
|
49
|
+
- lib/spec_id/protein_summary.rb
|
46
50
|
- lib/spec_id/aa_freqs.rb
|
47
|
-
- lib/
|
48
|
-
- lib/spec/mzxml
|
49
|
-
- lib/spec/mzdata
|
51
|
+
- lib/spec_id/proph.rb
|
50
52
|
- lib/spec/msrun.rb
|
51
53
|
- lib/spec/scan.rb
|
52
|
-
- lib/spec/mzxml
|
54
|
+
- lib/spec/mzxml
|
53
55
|
- lib/spec/mzdata.rb
|
56
|
+
- lib/spec/mzdata
|
57
|
+
- lib/spec/mzxml.rb
|
54
58
|
- lib/spec/mzxml/parser.rb
|
55
59
|
- lib/spec/mzdata/parser.rb
|
56
60
|
- INSTALL
|
@@ -59,62 +63,62 @@ files:
|
|
59
63
|
- LICENSE
|
60
64
|
- changelog.txt
|
61
65
|
- release_notes.txt
|
62
|
-
- bin/fasta_cat_mod.rb
|
63
|
-
- bin/fasta_mod.rb
|
64
|
-
- bin/gi2annot.rb
|
65
|
-
- bin/protein_summary.rb
|
66
|
-
- bin/raw2mzXML.rb
|
67
|
-
- bin/fasta_cat.rb
|
68
66
|
- bin/bioworks2sequestXML_gui.rb
|
69
|
-
- bin/
|
67
|
+
- bin/srf_group.rb
|
70
68
|
- bin/pepproph_filter.rb
|
71
|
-
- bin/
|
72
|
-
- bin/bioworks_to_pepxml.rb
|
73
|
-
- bin/mzxml_to_lmat.rb
|
69
|
+
- bin/filter.rb
|
74
70
|
- bin/protxml2prots_peps.rb
|
75
|
-
- bin/
|
71
|
+
- bin/raw_to_mzXML.rb
|
72
|
+
- bin/gi2annot.rb
|
76
73
|
- bin/id_class_anal.rb
|
77
74
|
- bin/precision.rb
|
75
|
+
- bin/id_precision.rb
|
76
|
+
- bin/protein_summary.rb
|
77
|
+
- bin/bioworks_to_pepxml.rb
|
78
|
+
- bin/bioworks2excel.rb
|
79
|
+
- bin/mzxml_to_lmat.rb
|
78
80
|
- bin/fasta_shaker.rb
|
81
|
+
- bin/find_aa_freq.rb
|
79
82
|
- script/prep_dir.rb
|
83
|
+
- script/degenerate_peptides.rb
|
84
|
+
- script/histogram_probs.rb
|
85
|
+
- script/simple_protein_digestion.rb
|
86
|
+
- script/top_hit_per_scan.rb
|
80
87
|
- script/msvis.rb
|
81
|
-
- script/gen_database_searching.rb
|
82
88
|
- script/mzXML2timeIndex.rb
|
83
89
|
- script/tpp_installer.rb
|
84
|
-
- script/create_little_pepxml.rb
|
85
|
-
- script/histogram_probs.rb
|
86
|
-
- script/top_hit_per_scan.rb
|
87
90
|
- script/filter-peps.rb
|
88
|
-
- script/simple_protein_digestion.rb
|
89
|
-
- script/genuine_tps_and_probs.rb
|
90
91
|
- script/estimate_fpr_by_cysteine.rb
|
92
|
+
- script/genuine_tps_and_probs.rb
|
93
|
+
- script/create_little_pepxml.rb
|
91
94
|
- script/find_cysteine_background.rb
|
92
95
|
test_files:
|
93
|
-
- test/
|
94
|
-
- test/
|
95
|
-
- test/
|
96
|
-
- test/
|
97
|
-
- test/tc_spec.rb
|
98
|
-
- test/tc_aa_freqs.rb
|
99
|
-
- test/tc_protein_summary.rb
|
96
|
+
- test/tc_spec_id_xml.rb
|
97
|
+
- test/tc_mzxml_to_lmat.rb
|
98
|
+
- test/tc_id_class_anal.rb
|
99
|
+
- test/tc_gi.rb
|
100
100
|
- test/tc_fasta.rb
|
101
|
-
- test/tc_bioworks.rb
|
102
101
|
- test/tc_peptide_parent_times.rb
|
103
|
-
- test/tc_msrun.rb
|
104
102
|
- test/tc_spec_id.rb
|
103
|
+
- test/tc_roc.rb
|
105
104
|
- test/tc_mzxml.rb
|
105
|
+
- test/tc_sample_enzyme.rb
|
106
|
+
- test/tc_srf.rb
|
107
|
+
- test/tc_bioworks.rb
|
108
|
+
- test/tc_spec.rb
|
109
|
+
- test/tc_bioworks_to_pepxml.rb
|
110
|
+
- test/tc_scan.rb
|
111
|
+
- test/tc_sequest.rb
|
112
|
+
- test/tc_fasta_shaker.rb
|
106
113
|
- test/tc_id_precision.rb
|
107
|
-
- test/
|
114
|
+
- test/tc_msrun.rb
|
115
|
+
- test/tc_protein_summary.rb
|
108
116
|
- test/tc_filter_peps.rb
|
117
|
+
- test/tc_filter.rb
|
118
|
+
- test/tc_aa_freqs.rb
|
119
|
+
- test/tc_proph.rb
|
120
|
+
- test/tc_align.rb
|
109
121
|
- test/tc_precision.rb
|
110
|
-
- test/tc_roc.rb
|
111
|
-
- test/tc_scan.rb
|
112
|
-
- test/tc_mzxml_to_lmat.rb
|
113
|
-
- test/tc_bioworks_to_pepxml.rb
|
114
|
-
- test/tc_sample_enzyme.rb
|
115
|
-
- test/tc_fasta_shaker.rb
|
116
|
-
- test/tc_gi.rb
|
117
|
-
- test/tc_spec_id_xml.rb
|
118
122
|
rdoc_options:
|
119
123
|
- --main
|
120
124
|
- README
|
@@ -125,29 +129,30 @@ extra_rdoc_files:
|
|
125
129
|
- INSTALL
|
126
130
|
- LICENSE
|
127
131
|
executables:
|
128
|
-
- fasta_cat_mod.rb
|
129
|
-
- fasta_mod.rb
|
130
|
-
- gi2annot.rb
|
131
|
-
- protein_summary.rb
|
132
|
-
- raw2mzXML.rb
|
133
|
-
- fasta_cat.rb
|
134
132
|
- bioworks2sequestXML_gui.rb
|
135
|
-
-
|
133
|
+
- srf_group.rb
|
136
134
|
- pepproph_filter.rb
|
137
|
-
-
|
138
|
-
- bioworks_to_pepxml.rb
|
139
|
-
- mzxml_to_lmat.rb
|
135
|
+
- filter.rb
|
140
136
|
- protxml2prots_peps.rb
|
141
|
-
-
|
137
|
+
- raw_to_mzXML.rb
|
138
|
+
- gi2annot.rb
|
142
139
|
- id_class_anal.rb
|
143
140
|
- precision.rb
|
141
|
+
- id_precision.rb
|
142
|
+
- protein_summary.rb
|
143
|
+
- bioworks_to_pepxml.rb
|
144
|
+
- bioworks2excel.rb
|
145
|
+
- mzxml_to_lmat.rb
|
144
146
|
- fasta_shaker.rb
|
147
|
+
- find_aa_freq.rb
|
145
148
|
extensions: []
|
146
149
|
|
147
150
|
requirements:
|
148
151
|
- "\"xmlparser\" is the prefered xml parser right now. REXML and regular expressions are used as fallback in some routines."
|
149
152
|
- some plotting functions will not be available without the "gnuplot" gem (and underlying gnuplot binary)
|
150
|
-
- the "t2x" binary to convert .RAW files to mzXML
|
153
|
+
- the "t2x" binary (in archive) or readw.exe is required to convert .RAW files to mzXML in some applications
|
154
|
+
- "\"rake\" is useful for development"
|
155
|
+
- "\"webgen (with gems redcloth and bluecloth) is necessary to build web pages"
|
151
156
|
dependencies:
|
152
157
|
- !ruby/object:Gem::Dependency
|
153
158
|
name: libjtp
|
data/bin/fasta_cat.rb
DELETED
@@ -1,39 +0,0 @@
|
|
1
|
-
#!/usr/bin/ruby
|
2
|
-
|
3
|
-
require 'fasta'
|
4
|
-
require 'getoptlong'
|
5
|
-
|
6
|
-
connector = Fasta::FILE_CONNECTOR
|
7
|
-
|
8
|
-
# Get the prefix option:
|
9
|
-
opts = GetoptLong.new(
|
10
|
-
[ "-p", "--prefixes", GetoptLong::REQUIRED_ARGUMENT]
|
11
|
-
)
|
12
|
-
|
13
|
-
opt_hash = {}
|
14
|
-
opts.each do |opt, arg|
|
15
|
-
opt_hash[opt] = arg
|
16
|
-
end
|
17
|
-
|
18
|
-
prefix_array = nil
|
19
|
-
if opt_hash.key?('-p')
|
20
|
-
prefix_array = opt_hash['-p'].split(',')
|
21
|
-
end
|
22
|
-
|
23
|
-
# Usage info:
|
24
|
-
if ARGV.size < 2
|
25
|
-
puts "
|
26
|
-
usage: #{File.basename(__FILE__)} [-p=prefix1,prefix2,...] <file1>.fasta <file2>.fasta ...
|
27
|
-
|
28
|
-
Concatenates the files together with '#{connector}' (the file extension will
|
29
|
-
be the extension of the first file).
|
30
|
-
|
31
|
-
-p prefixes protein headers with the corresponding value in the comma
|
32
|
-
separated list.
|
33
|
-
"
|
34
|
-
exit
|
35
|
-
end
|
36
|
-
|
37
|
-
files = ARGV.to_a
|
38
|
-
outfile = Fasta.cat_and_prefix(files, prefix_array, connector)
|
39
|
-
puts "OUTFILE: #{outfile}"
|
data/bin/fasta_cat_mod.rb
DELETED
@@ -1,59 +0,0 @@
|
|
1
|
-
#!/usr/bin/ruby
|
2
|
-
|
3
|
-
require 'fasta'
|
4
|
-
require 'optparse'
|
5
|
-
|
6
|
-
hash = {
|
7
|
-
'shuffle' => {
|
8
|
-
'method' => :aaseq_shuffle!,
|
9
|
-
'protein_header_prefix' => Fasta::SHUFF_PREFIX,
|
10
|
-
'file_postfix' => Fasta::CAT_SHUFF_FILE_POSTFIX,
|
11
|
-
},
|
12
|
-
'invert' => {
|
13
|
-
'method' => :aaseq_invert!,
|
14
|
-
'protein_header_prefix' => Fasta::INV_PREFIX,
|
15
|
-
'file_postfix' => Fasta::CAT_INV_FILE_POSTFIX,
|
16
|
-
},
|
17
|
-
}
|
18
|
-
|
19
|
-
opt = {}
|
20
|
-
OptionParser.new do |opts|
|
21
|
-
opts.on("-f", "--fraction FLOAT", "fraction") {|v| opt['f'] = v }
|
22
|
-
end.parse!
|
23
|
-
|
24
|
-
|
25
|
-
if ARGV.size < 2
|
26
|
-
puts "
|
27
|
-
usage: #{File.basename(__FILE__)} [-f <fraction>] <method> <file>.fasta ...
|
28
|
-
|
29
|
-
The AA seq's of (a fraction of) proteins will be modified according to
|
30
|
-
<method> and concatenated to the end of the normal proteins. Each modified
|
31
|
-
protein's header takes on a header prefix after the '>'. Each file takes on
|
32
|
-
a postfix (before the extension).
|
33
|
-
|
34
|
-
METHOD PROT_PREFIX FILE_POSTFIX
|
35
|
-
shuffle #{hash['shuffle']['protein_header_prefix']} #{hash['shuffle']['file_postfix']}
|
36
|
-
invert #{hash['invert']['protein_header_prefix']} #{hash['invert']['file_postfix']}
|
37
|
-
"
|
38
|
-
exit
|
39
|
-
end
|
40
|
-
|
41
|
-
method = ARGV.shift
|
42
|
-
opt_h = nil
|
43
|
-
if hash.key? method
|
44
|
-
opth = hash[method]
|
45
|
-
else
|
46
|
-
abort "invalid method! choose: #{hash.keys.join(", ")}"
|
47
|
-
end
|
48
|
-
|
49
|
-
fraction = 1; if opt.key?('f') then fraction = opt['f'] end
|
50
|
-
|
51
|
-
specific_method = opth['method']
|
52
|
-
file_postfix = opth['file_postfix']
|
53
|
-
protein_header_prefix = opth['protein_header_prefix']
|
54
|
-
#puts [file, specific_method, fraction, file_postfix, protein_header_prefix].join("*")
|
55
|
-
|
56
|
-
ARGV.each do |file|
|
57
|
-
outfile = Fasta.modify_fraction_and_cat_to_file(file, specific_method, fraction, file_postfix, protein_header_prefix)
|
58
|
-
puts "OUTPUT: #{outfile}"
|
59
|
-
end
|