mspire 0.1.7 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +41 -14
- data/bin/bioworks2excel.rb +1 -1
- data/bin/bioworks_to_pepxml.rb +46 -59
- data/bin/fasta_shaker.rb +1 -1
- data/bin/filter.rb +6 -0
- data/bin/find_aa_freq.rb +23 -0
- data/bin/id_precision.rb +3 -2
- data/bin/mzxml_to_lmat.rb +2 -1
- data/bin/pepproph_filter.rb +1 -1
- data/bin/precision.rb +1 -1
- data/bin/protein_summary.rb +2 -451
- data/bin/raw_to_mzXML.rb +55 -0
- data/bin/srf_group.rb +26 -0
- data/changelog.txt +7 -0
- data/lib/align.rb +3 -3
- data/lib/fasta.rb +6 -1
- data/lib/gi.rb +9 -4
- data/lib/roc.rb +2 -0
- data/lib/sample_enzyme.rb +2 -1
- data/lib/spec/mzxml/parser.rb +2 -43
- data/lib/spec/mzxml.rb +65 -2
- data/lib/spec_id/aa_freqs.rb +10 -7
- data/lib/spec_id/bioworks.rb +67 -87
- data/lib/spec_id/filter.rb +794 -0
- data/lib/spec_id/precision.rb +29 -36
- data/lib/spec_id/proph.rb +5 -3
- data/lib/spec_id/protein_summary.rb +459 -0
- data/lib/spec_id/sequest.rb +323 -271
- data/lib/spec_id/srf.rb +189 -135
- data/lib/spec_id.rb +276 -227
- data/lib/spec_id_xml.rb +101 -0
- data/lib/toppred.rb +18 -0
- data/script/degenerate_peptides.rb +47 -0
- data/script/filter-peps.rb +5 -1
- data/test/tc_align.rb +1 -1
- data/test/tc_bioworks.rb +25 -22
- data/test/tc_bioworks_to_pepxml.rb +37 -4
- data/test/tc_fasta.rb +3 -1
- data/test/tc_fasta_shaker.rb +8 -6
- data/test/tc_filter.rb +203 -0
- data/test/tc_gi.rb +6 -9
- data/test/tc_id_precision.rb +31 -0
- data/test/tc_mzxml.rb +8 -6
- data/test/tc_peptide_parent_times.rb +2 -1
- data/test/tc_precision.rb +1 -1
- data/test/tc_proph.rb +5 -5
- data/test/tc_protein_summary.rb +36 -13
- data/test/tc_sequest.rb +78 -33
- data/test/tc_spec_id.rb +128 -6
- data/test/tc_srf.rb +84 -38
- metadata +67 -62
- data/bin/fasta_cat.rb +0 -39
- data/bin/fasta_cat_mod.rb +0 -59
- data/bin/fasta_mod.rb +0 -57
- data/bin/filter_spec_id.rb +0 -365
- data/bin/raw2mzXML.rb +0 -21
- data/script/gen_database_searching.rb +0 -258
data/test/tc_spec_id.rb
CHANGED
@@ -8,11 +8,14 @@ class SpecIDTest < Test::Unit::TestCase
|
|
8
8
|
def initialize(arg)
|
9
9
|
super(arg)
|
10
10
|
@tfiles = File.dirname(__FILE__) + '/tfiles/'
|
11
|
+
@tfiles_l = File.dirname(__FILE__) + '/tfiles_large/'
|
11
12
|
@bw = @tfiles + "bioworks_small.xml"
|
13
|
+
@old_prot_proph = @tfiles + 'yeast_gly_small-prot.xml'
|
14
|
+
@prot_proph = @tfiles + 'opd1/000_020_3prots-prot.xml'
|
15
|
+
@srf = @tfiles_l + '7MIX_STD_110802_1.srf'
|
12
16
|
end
|
13
17
|
|
14
18
|
def test_spec_id_creation
|
15
|
-
sp = SpecID.new
|
16
19
|
sp = SpecID.new(@bw)
|
17
20
|
assert_equal(106, sp.prots.size)
|
18
21
|
end
|
@@ -45,20 +48,21 @@ class SpecIDTest < Test::Unit::TestCase
|
|
45
48
|
end
|
46
49
|
[write_index, bo]
|
47
50
|
end
|
51
|
+
roc = ROC.new
|
48
52
|
tp, fp = ROC.new.prep_list(answ)
|
49
|
-
(exp_tp, exp_fp) =
|
53
|
+
(exp_tp, exp_fp) = roc.tps_and_ppv(tp, fp)
|
50
54
|
|
51
55
|
sp = SpecID.new(file)
|
52
56
|
assert_equal(19, sp.prots.size)
|
53
57
|
tp, fp = sp.rank_and_classify(:prots, proc {|prt| prt.probability }, proc {|prt| if prt.reference =~ /^INV_/ ; false; else; true; end })
|
54
|
-
tps, ys =
|
58
|
+
(tps, ys) = roc.tps_and_ppv(tp, fp)
|
55
59
|
assert_equal(exp_tp, tps)
|
56
60
|
assert_equal(exp_fp, ys)
|
57
|
-
|
61
|
+
(num_hits, prec) = sp.num_hits_and_ppv_for_prob("INV_")
|
58
62
|
# @TODO: assert these guys for consistencies sake:
|
59
63
|
assert_in_delta_arrays([1, 2, 3, 4, 5, 6, 9, 10, 11, 12, 13, 14, 15], tps, 0.0000001)
|
60
|
-
|
61
|
-
assert_in_delta_arrays([
|
64
|
+
# Consistency check only:
|
65
|
+
assert_in_delta_arrays([1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.909090909090909, 0.916666666666667, 0.923076923076923, 0.928571428571429, 0.866666666666667], prec, 0.0000001)
|
62
66
|
end
|
63
67
|
|
64
68
|
def assert_in_delta_arrays(one, two, delta, message=nil)
|
@@ -66,5 +70,123 @@ class SpecIDTest < Test::Unit::TestCase
|
|
66
70
|
assert_in_delta(v, two[i], delta, message)
|
67
71
|
end
|
68
72
|
end
|
73
|
+
|
74
|
+
def test_file_type
|
75
|
+
assert_equal('bioworks', SpecID.file_type(@bw))
|
76
|
+
assert_equal('protproph', SpecID.file_type(@prot_proph))
|
77
|
+
assert_equal('srg', SpecID.file_type('whatever.srg'))
|
78
|
+
## WOULD BE NICE TO GET THIS WORKING, TOO
|
79
|
+
# assert_equal('protproph', SpecID.file_type(@old_prot_proph))
|
80
|
+
if File.exist? @tfiles_l
|
81
|
+
assert File.exist?(@srf), "file #{@srf} is there"
|
82
|
+
assert_equal('srf', SpecID.file_type(@srf))
|
83
|
+
else
|
84
|
+
assert_nil( puts("\n--SKIPPING TEST-- (missing dir: #{@tfiles_l})") )
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
def test_non_standard_aa_removal
|
89
|
+
hash = {"K.PEPTIDE.Z" => "K.PEPTIDE.Z", "K.*M" => "K.M", "aI" => 'I', "YI.&" => "YI.", "EI.!@#\$%^&*(){}[]|\\;:'\"<>,?/EI" => 'EI.EI'}
|
90
|
+
cl = proc {|v| SpecID::Pep.remove_non_amino_acids(v) }
|
91
|
+
hash.each do |k,v|
|
92
|
+
assert_equal(v, cl.call(k))
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
|
97
|
+
|
98
|
+
end
|
99
|
+
|
100
|
+
class MyProt ; include SpecID::Prot ; end
|
101
|
+
class MyPep ; include SpecID::Pep ; attr_accessor :xcorr end
|
102
|
+
|
103
|
+
|
104
|
+
class TestOccamsRazor < Test::Unit::TestCase
|
105
|
+
|
106
|
+
def test_small
|
107
|
+
|
108
|
+
prots = (0..6).to_a.map do |n|
|
109
|
+
prot = MyProt.new
|
110
|
+
prot.reference = "ref_#{n}"
|
111
|
+
prot
|
112
|
+
end
|
113
|
+
|
114
|
+
peps = (0..12).to_a.map {|v| MyPep.new }
|
115
|
+
|
116
|
+
# 0 1 2 3 4 5 6 7 8 9 10 11 12
|
117
|
+
aaseqs = %w(AAA BBB CCC ABC AAA BBB CCC ABC DDD EEE FFF EEEEE DDD)
|
118
|
+
xcorrs = [1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 0.5, 0.6, 0.7, 0.8, 0.5]
|
119
|
+
|
120
|
+
peps.zip(aaseqs, xcorrs) do |pep,aaseq,xcorr|
|
121
|
+
pep.aaseq = aaseq
|
122
|
+
pep.xcorr = xcorr
|
123
|
+
end
|
124
|
+
|
125
|
+
prots[0].peps = peps[0,4]
|
126
|
+
prots[1].peps = [peps[2]] ## should be missing
|
127
|
+
|
128
|
+
test_prots = prots[0,2]
|
129
|
+
require 'pp'
|
130
|
+
answ = SpecID.occams_razor(test_prots)
|
131
|
+
answ.each do |an|
|
132
|
+
assert( an[0].is_a?(SpecID::Prot), "prots are there")
|
133
|
+
end
|
134
|
+
first = answ.first
|
135
|
+
assert_equal( prots[0], first[0])
|
136
|
+
assert_equal_array_content( prots[0].peps, first[1])
|
137
|
+
|
138
|
+
|
139
|
+
|
140
|
+
#prots[2].peps = [peps[2]]
|
141
|
+
#prots[2].peps.push( peps[3] ) ## should be there since it has 2
|
142
|
+
#prots[3].peps = [peps[3]] ## should be missing
|
143
|
+
end
|
144
|
+
|
145
|
+
def assert_equal_array_content(exp1, ans, message='')
|
146
|
+
exp1.each do |item|
|
147
|
+
assert(ans.include?(item), "finding #{item}: #{message}")
|
148
|
+
end
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
|
153
|
+
require 'fasta'
|
154
|
+
|
155
|
+
class TestProteinGroups < Test::Unit::TestCase
|
156
|
+
|
157
|
+
def test_small
|
158
|
+
prots = []
|
159
|
+
|
160
|
+
aaseq = ('A'..'Z').to_a.join('')
|
161
|
+
header = "prot1"
|
162
|
+
prots << Fasta::Prot.new(header, aaseq)
|
163
|
+
|
164
|
+
aaseq = ('A'..'Z').to_a.reverse.join('')
|
165
|
+
header = "prot1_reverse"
|
166
|
+
prots << Fasta::Prot.new(header, aaseq)
|
167
|
+
|
168
|
+
aaseq = ('A'..'Z').to_a.join('')
|
169
|
+
header = "prot1_identical"
|
170
|
+
prots << Fasta::Prot.new(header, aaseq)
|
171
|
+
|
172
|
+
aaseq = ('A'..'E').to_a.join('')
|
173
|
+
header = "prot1_short"
|
174
|
+
prots << Fasta::Prot.new(header, aaseq)
|
175
|
+
|
176
|
+
aaseq = ('A'..'E').to_a.reverse.join('')
|
177
|
+
header = "prot1_reverse_short"
|
178
|
+
prots << Fasta::Prot.new(header, aaseq)
|
179
|
+
|
180
|
+
fasta = Fasta.new(prots)
|
181
|
+
|
182
|
+
pep_seqs = %w(ABCD DEFG ABCD DEFG EDCB FEDCB XYZ RANDOM AEABA)
|
183
|
+
|
184
|
+
arr = SpecID::Pep.protein_groups_by_sequence(pep_seqs, fasta)
|
185
|
+
|
186
|
+
exp = [[prots[0], prots[2], prots[3]], [prots[0], prots[2]], [prots[0], prots[2], prots[3]], [prots[0],prots[2]], [prots[1], prots[4]], [prots[1]], [prots[0], prots[2]], [], []]
|
187
|
+
|
188
|
+
assert_equal(exp, arr)
|
189
|
+
end
|
190
|
+
|
69
191
|
end
|
70
192
|
|
data/test/tc_srf.rb
CHANGED
@@ -50,53 +50,70 @@ module ToMatch
|
|
50
50
|
|
51
51
|
Out_files_first = {
|
52
52
|
:num_hits => 10,
|
53
|
-
:charge => 1,
|
54
53
|
:computer => "VELA",
|
55
54
|
:date_time => "11/17/2006, 04:13 PM,",
|
56
55
|
}
|
57
56
|
|
58
57
|
Out_files_first_hit = {
|
59
58
|
:mh => 1220.5128044522,
|
60
|
-
:deltacn => 0.
|
59
|
+
:deltacn => 0.071944423019886, ## this is the modified version
|
61
60
|
:sp => 96.5815887451172,
|
62
61
|
:xcorr => 1.08377742767334,
|
63
62
|
:id => 224,
|
64
63
|
:rsp => 13,
|
65
64
|
:ions_matched => 8,
|
66
65
|
:ions_total => 20,
|
67
|
-
:
|
66
|
+
:sequence => "K.LCPHLTLLPGR.F",
|
67
|
+
:aaseq => "LCPHLTLLPGR",
|
68
68
|
:reference => "gi|1786425|gb|AAC73335.1| damage-inducible protein P; putative tRNA synthetase",
|
69
|
+
:first_scan => 2,
|
70
|
+
:last_scan => 2,
|
71
|
+
:base_name => '7MIX_STD_110802_1',
|
72
|
+
:charge => 1,
|
69
73
|
}
|
70
74
|
|
71
75
|
Out_files_last = {
|
72
76
|
:num_hits => 10,
|
73
|
-
:charge => 1,
|
74
77
|
:computer => "VELA",
|
75
78
|
:date_time => "11/17/2006, 04:25 PM," ,
|
76
79
|
}
|
77
80
|
Out_files_last_first_hit = {
|
78
81
|
:mh => 2605.9368784522,
|
79
|
-
:deltacn => 0.
|
82
|
+
:deltacn => 0.03921128064394,
|
80
83
|
:sp => 76.7447052001953,
|
81
84
|
:xcorr => 0.915680646896362,
|
82
85
|
:id => 13562,
|
83
86
|
:rsp => 4,
|
84
87
|
:ions_matched => 10,
|
85
88
|
:ions_total => 84,
|
86
|
-
:
|
89
|
+
:sequence => "K.HLEINPNHPIVETLRQKAETHK.N",
|
90
|
+
:aaseq => "HLEINPNHPIVETLRQKAETHK",
|
87
91
|
:reference => "gi|30149327|ref|XP_293672.2| similar to ebiP7687 [Homo sapiens]",
|
92
|
+
:first_scan => 7161,
|
93
|
+
:last_scan => 7161,
|
94
|
+
:base_name => '7MIX_STD_110802_1',
|
95
|
+
:deltamass => 2605.9368784522 - 2604.8360326775,
|
96
|
+
:ppm => ((1.0e6 * (2605.9368784522 - 2604.8360326775)) / 2604.8360326775).abs,
|
97
|
+
:charge => 3,
|
88
98
|
}
|
89
99
|
Out_files_last_last_hit = {
|
90
100
|
:mh => 2604.9025174522,
|
91
|
-
:deltacn =>
|
101
|
+
:deltacn => 1.1,
|
92
102
|
:sp => 26.1511478424072,
|
93
103
|
:xcorr => 0.634012818336487,
|
94
104
|
:id => 8105,
|
95
105
|
:rsp => 165,
|
96
106
|
:ions_matched => 6,
|
97
107
|
:ions_total => 84,
|
98
|
-
:
|
99
|
-
:
|
108
|
+
:sequence => "R.EAFLVNSDLTLRAQLTEFRDHK.L",
|
109
|
+
:aaseq => "EAFLVNSDLTLRAQLTEFRDHK",
|
110
|
+
:reference => "gi|5453830|ref|NP_006181.1| origin recognition complex, subunit 2-like; origin",
|
111
|
+
:first_scan => 7161,
|
112
|
+
:last_scan => 7161,
|
113
|
+
:base_name => '7MIX_STD_110802_1',
|
114
|
+
:deltamass => 2604.9025174522 - 2604.8360326775,
|
115
|
+
:ppm => ((1.0e6 * (2604.9025174522 - 2604.8360326775)) / 2604.8360326775).abs,
|
116
|
+
:charge => 3,
|
100
117
|
}
|
101
118
|
Sequest_params = {
|
102
119
|
"add_F_Phenylalanine"=>"0.0000",
|
@@ -163,50 +180,79 @@ module ToMatch
|
|
163
180
|
|
164
181
|
end
|
165
182
|
|
183
|
+
tfiles = File.dirname(__FILE__) + '/tfiles/'
|
184
|
+
tfiles_l = File.dirname(__FILE__) + '/tfiles_large/'
|
185
|
+
tf_srf = tfiles_l + "7MIX_STD_110802_1.srf"
|
186
|
+
tf_srf_inv = tfiles_l + "7MIX_STD_110802_1_INV.srf"
|
187
|
+
if File.exist? tfiles_l
|
188
|
+
start = Time.now
|
189
|
+
$group = SRFGroup.new([tf_srf, tf_srf_inv])
|
190
|
+
$srf = $group.srfs.first
|
191
|
+
puts "Time to read and compile two SRF: #{Time.now - start} secs"
|
192
|
+
end
|
193
|
+
|
194
|
+
|
166
195
|
class TestSRF < Test::Unit::TestCase
|
167
196
|
include ToMatch
|
168
197
|
def initialize(arg)
|
169
198
|
super(arg)
|
170
199
|
@tfiles = File.dirname(__FILE__) + '/tfiles/'
|
171
200
|
@tfiles_l = File.dirname(__FILE__) + '/tfiles_large/'
|
172
|
-
@
|
201
|
+
@srg_file = @tfiles + "tmp_bioworks.srg"
|
202
|
+
|
203
|
+
@srf = $srf
|
204
|
+
@group = $group
|
205
|
+
|
173
206
|
end
|
174
207
|
|
175
208
|
def test_basic
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
209
|
+
if File.exist? @tfiles_l
|
210
|
+
## Verify that we have everything and it is as we expect (not exhaustive)
|
211
|
+
head = @srf.header
|
212
|
+
dtgen = head.dta_gen
|
213
|
+
## HEADER
|
214
|
+
hash_match(Header, head)
|
215
|
+
hash_match(Dta_gen, dtgen)
|
216
|
+
## DTA_FILES
|
217
|
+
hash_match(Dta_files_first, @srf.dta_files.first)
|
218
|
+
hash_match(Dta_files_last, @srf.dta_files.last)
|
219
|
+
## OUT_FILES
|
220
|
+
hash_match(Out_files_first, @srf.out_files.first)
|
221
|
+
hash_match(Out_files_first_hit, @srf.out_files.first.hits.first)
|
222
|
+
hash_match(Out_files_last_first_hit, @srf.out_files.last.hits.first)
|
223
|
+
hash_match(Out_files_last_last_hit, @srf.out_files.last.hits.last)
|
224
|
+
## SEQUEST_PARAMS
|
225
|
+
hash_match(Sequest_params, @srf.params)
|
226
|
+
## INDEX
|
227
|
+
assert_equal([7161, 7161, 3], @srf.index.last)
|
228
|
+
assert_equal([2, 2, 1], @srf.index.first)
|
229
|
+
|
230
|
+
assert_equal(@srf.index.size, @srf.dta_files.size)
|
231
|
+
assert_equal(@srf.out_files.size, @srf.dta_files.size)
|
232
|
+
else
|
233
|
+
assert_nil( puts("--SKIPPING TEST-- (missing dir: #{@tfiles_l})") )
|
234
|
+
end
|
235
|
+
end
|
236
|
+
|
237
|
+
def test_srg
|
238
|
+
if File.exist? @tfiles_l
|
239
|
+
@group.to_srg(@srg_file)
|
240
|
+
assert(File.exist?(@srg_file), "file exists: " + @srg_file )
|
241
|
+
else
|
242
|
+
assert_nil( puts("--SKIPPING TEST-- (missing dir: #{@tfiles_l})") )
|
243
|
+
end
|
201
244
|
end
|
202
245
|
|
203
|
-
|
246
|
+
## treats reference special
|
247
|
+
def hash_match(hash, srf)
|
204
248
|
hash.each do |k,v|
|
205
249
|
if v.is_a? Float
|
206
250
|
delta = v/100000
|
207
|
-
assert_in_delta(
|
251
|
+
assert_in_delta(v, srf.send(k.to_sym), delta, "param: #{k}")
|
252
|
+
elsif k == :reference
|
253
|
+
assert_equal(v[0,38], srf.prots.first.reference)
|
208
254
|
else
|
209
|
-
assert_equal(
|
255
|
+
assert_equal(v, srf.send(k.to_sym), "param: #{k}")
|
210
256
|
end
|
211
257
|
end
|
212
258
|
end
|
metadata
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
|
-
rubygems_version: 0.9.
|
2
|
+
rubygems_version: 0.9.2
|
3
3
|
specification_version: 1
|
4
4
|
name: mspire
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.
|
7
|
-
date: 2007-
|
6
|
+
version: 0.2.0
|
7
|
+
date: 2007-04-25 00:00:00 -05:00
|
8
8
|
summary: Mass Spectrometry Proteomics Objects, Scripts, and Executables
|
9
9
|
require_paths:
|
10
10
|
- lib
|
@@ -29,28 +29,32 @@ post_install_message:
|
|
29
29
|
authors:
|
30
30
|
- John Prince
|
31
31
|
files:
|
32
|
-
- lib/spec_id
|
32
|
+
- lib/spec_id.rb
|
33
33
|
- lib/align
|
34
|
+
- lib/spec_id_xml.rb
|
35
|
+
- lib/spec_id
|
36
|
+
- lib/toppred.rb
|
37
|
+
- lib/align.rb
|
34
38
|
- lib/spec
|
35
|
-
- lib/sample_enzyme.rb
|
36
39
|
- lib/fasta.rb
|
37
|
-
- lib/roc.rb
|
38
|
-
- lib/spec_id.rb
|
39
40
|
- lib/gi.rb
|
40
|
-
- lib/
|
41
|
-
- lib/
|
42
|
-
- lib/
|
43
|
-
- lib/spec_id/proph.rb
|
41
|
+
- lib/roc.rb
|
42
|
+
- lib/sample_enzyme.rb
|
43
|
+
- lib/align/chams.rb
|
44
44
|
- lib/spec_id/sequest.rb
|
45
|
+
- lib/spec_id/filter.rb
|
46
|
+
- lib/spec_id/bioworks.rb
|
47
|
+
- lib/spec_id/srf.rb
|
45
48
|
- lib/spec_id/precision.rb
|
49
|
+
- lib/spec_id/protein_summary.rb
|
46
50
|
- lib/spec_id/aa_freqs.rb
|
47
|
-
- lib/
|
48
|
-
- lib/spec/mzxml
|
49
|
-
- lib/spec/mzdata
|
51
|
+
- lib/spec_id/proph.rb
|
50
52
|
- lib/spec/msrun.rb
|
51
53
|
- lib/spec/scan.rb
|
52
|
-
- lib/spec/mzxml
|
54
|
+
- lib/spec/mzxml
|
53
55
|
- lib/spec/mzdata.rb
|
56
|
+
- lib/spec/mzdata
|
57
|
+
- lib/spec/mzxml.rb
|
54
58
|
- lib/spec/mzxml/parser.rb
|
55
59
|
- lib/spec/mzdata/parser.rb
|
56
60
|
- INSTALL
|
@@ -59,62 +63,62 @@ files:
|
|
59
63
|
- LICENSE
|
60
64
|
- changelog.txt
|
61
65
|
- release_notes.txt
|
62
|
-
- bin/fasta_cat_mod.rb
|
63
|
-
- bin/fasta_mod.rb
|
64
|
-
- bin/gi2annot.rb
|
65
|
-
- bin/protein_summary.rb
|
66
|
-
- bin/raw2mzXML.rb
|
67
|
-
- bin/fasta_cat.rb
|
68
66
|
- bin/bioworks2sequestXML_gui.rb
|
69
|
-
- bin/
|
67
|
+
- bin/srf_group.rb
|
70
68
|
- bin/pepproph_filter.rb
|
71
|
-
- bin/
|
72
|
-
- bin/bioworks_to_pepxml.rb
|
73
|
-
- bin/mzxml_to_lmat.rb
|
69
|
+
- bin/filter.rb
|
74
70
|
- bin/protxml2prots_peps.rb
|
75
|
-
- bin/
|
71
|
+
- bin/raw_to_mzXML.rb
|
72
|
+
- bin/gi2annot.rb
|
76
73
|
- bin/id_class_anal.rb
|
77
74
|
- bin/precision.rb
|
75
|
+
- bin/id_precision.rb
|
76
|
+
- bin/protein_summary.rb
|
77
|
+
- bin/bioworks_to_pepxml.rb
|
78
|
+
- bin/bioworks2excel.rb
|
79
|
+
- bin/mzxml_to_lmat.rb
|
78
80
|
- bin/fasta_shaker.rb
|
81
|
+
- bin/find_aa_freq.rb
|
79
82
|
- script/prep_dir.rb
|
83
|
+
- script/degenerate_peptides.rb
|
84
|
+
- script/histogram_probs.rb
|
85
|
+
- script/simple_protein_digestion.rb
|
86
|
+
- script/top_hit_per_scan.rb
|
80
87
|
- script/msvis.rb
|
81
|
-
- script/gen_database_searching.rb
|
82
88
|
- script/mzXML2timeIndex.rb
|
83
89
|
- script/tpp_installer.rb
|
84
|
-
- script/create_little_pepxml.rb
|
85
|
-
- script/histogram_probs.rb
|
86
|
-
- script/top_hit_per_scan.rb
|
87
90
|
- script/filter-peps.rb
|
88
|
-
- script/simple_protein_digestion.rb
|
89
|
-
- script/genuine_tps_and_probs.rb
|
90
91
|
- script/estimate_fpr_by_cysteine.rb
|
92
|
+
- script/genuine_tps_and_probs.rb
|
93
|
+
- script/create_little_pepxml.rb
|
91
94
|
- script/find_cysteine_background.rb
|
92
95
|
test_files:
|
93
|
-
- test/
|
94
|
-
- test/
|
95
|
-
- test/
|
96
|
-
- test/
|
97
|
-
- test/tc_spec.rb
|
98
|
-
- test/tc_aa_freqs.rb
|
99
|
-
- test/tc_protein_summary.rb
|
96
|
+
- test/tc_spec_id_xml.rb
|
97
|
+
- test/tc_mzxml_to_lmat.rb
|
98
|
+
- test/tc_id_class_anal.rb
|
99
|
+
- test/tc_gi.rb
|
100
100
|
- test/tc_fasta.rb
|
101
|
-
- test/tc_bioworks.rb
|
102
101
|
- test/tc_peptide_parent_times.rb
|
103
|
-
- test/tc_msrun.rb
|
104
102
|
- test/tc_spec_id.rb
|
103
|
+
- test/tc_roc.rb
|
105
104
|
- test/tc_mzxml.rb
|
105
|
+
- test/tc_sample_enzyme.rb
|
106
|
+
- test/tc_srf.rb
|
107
|
+
- test/tc_bioworks.rb
|
108
|
+
- test/tc_spec.rb
|
109
|
+
- test/tc_bioworks_to_pepxml.rb
|
110
|
+
- test/tc_scan.rb
|
111
|
+
- test/tc_sequest.rb
|
112
|
+
- test/tc_fasta_shaker.rb
|
106
113
|
- test/tc_id_precision.rb
|
107
|
-
- test/
|
114
|
+
- test/tc_msrun.rb
|
115
|
+
- test/tc_protein_summary.rb
|
108
116
|
- test/tc_filter_peps.rb
|
117
|
+
- test/tc_filter.rb
|
118
|
+
- test/tc_aa_freqs.rb
|
119
|
+
- test/tc_proph.rb
|
120
|
+
- test/tc_align.rb
|
109
121
|
- test/tc_precision.rb
|
110
|
-
- test/tc_roc.rb
|
111
|
-
- test/tc_scan.rb
|
112
|
-
- test/tc_mzxml_to_lmat.rb
|
113
|
-
- test/tc_bioworks_to_pepxml.rb
|
114
|
-
- test/tc_sample_enzyme.rb
|
115
|
-
- test/tc_fasta_shaker.rb
|
116
|
-
- test/tc_gi.rb
|
117
|
-
- test/tc_spec_id_xml.rb
|
118
122
|
rdoc_options:
|
119
123
|
- --main
|
120
124
|
- README
|
@@ -125,29 +129,30 @@ extra_rdoc_files:
|
|
125
129
|
- INSTALL
|
126
130
|
- LICENSE
|
127
131
|
executables:
|
128
|
-
- fasta_cat_mod.rb
|
129
|
-
- fasta_mod.rb
|
130
|
-
- gi2annot.rb
|
131
|
-
- protein_summary.rb
|
132
|
-
- raw2mzXML.rb
|
133
|
-
- fasta_cat.rb
|
134
132
|
- bioworks2sequestXML_gui.rb
|
135
|
-
-
|
133
|
+
- srf_group.rb
|
136
134
|
- pepproph_filter.rb
|
137
|
-
-
|
138
|
-
- bioworks_to_pepxml.rb
|
139
|
-
- mzxml_to_lmat.rb
|
135
|
+
- filter.rb
|
140
136
|
- protxml2prots_peps.rb
|
141
|
-
-
|
137
|
+
- raw_to_mzXML.rb
|
138
|
+
- gi2annot.rb
|
142
139
|
- id_class_anal.rb
|
143
140
|
- precision.rb
|
141
|
+
- id_precision.rb
|
142
|
+
- protein_summary.rb
|
143
|
+
- bioworks_to_pepxml.rb
|
144
|
+
- bioworks2excel.rb
|
145
|
+
- mzxml_to_lmat.rb
|
144
146
|
- fasta_shaker.rb
|
147
|
+
- find_aa_freq.rb
|
145
148
|
extensions: []
|
146
149
|
|
147
150
|
requirements:
|
148
151
|
- "\"xmlparser\" is the prefered xml parser right now. REXML and regular expressions are used as fallback in some routines."
|
149
152
|
- some plotting functions will not be available without the "gnuplot" gem (and underlying gnuplot binary)
|
150
|
-
- the "t2x" binary to convert .RAW files to mzXML
|
153
|
+
- the "t2x" binary (in archive) or readw.exe is required to convert .RAW files to mzXML in some applications
|
154
|
+
- "\"rake\" is useful for development"
|
155
|
+
- "\"webgen (with gems redcloth and bluecloth) is necessary to build web pages"
|
151
156
|
dependencies:
|
152
157
|
- !ruby/object:Gem::Dependency
|
153
158
|
name: libjtp
|
data/bin/fasta_cat.rb
DELETED
@@ -1,39 +0,0 @@
|
|
1
|
-
#!/usr/bin/ruby
|
2
|
-
|
3
|
-
require 'fasta'
|
4
|
-
require 'getoptlong'
|
5
|
-
|
6
|
-
connector = Fasta::FILE_CONNECTOR
|
7
|
-
|
8
|
-
# Get the prefix option:
|
9
|
-
opts = GetoptLong.new(
|
10
|
-
[ "-p", "--prefixes", GetoptLong::REQUIRED_ARGUMENT]
|
11
|
-
)
|
12
|
-
|
13
|
-
opt_hash = {}
|
14
|
-
opts.each do |opt, arg|
|
15
|
-
opt_hash[opt] = arg
|
16
|
-
end
|
17
|
-
|
18
|
-
prefix_array = nil
|
19
|
-
if opt_hash.key?('-p')
|
20
|
-
prefix_array = opt_hash['-p'].split(',')
|
21
|
-
end
|
22
|
-
|
23
|
-
# Usage info:
|
24
|
-
if ARGV.size < 2
|
25
|
-
puts "
|
26
|
-
usage: #{File.basename(__FILE__)} [-p=prefix1,prefix2,...] <file1>.fasta <file2>.fasta ...
|
27
|
-
|
28
|
-
Concatenates the files together with '#{connector}' (the file extension will
|
29
|
-
be the extension of the first file).
|
30
|
-
|
31
|
-
-p prefixes protein headers with the corresponding value in the comma
|
32
|
-
separated list.
|
33
|
-
"
|
34
|
-
exit
|
35
|
-
end
|
36
|
-
|
37
|
-
files = ARGV.to_a
|
38
|
-
outfile = Fasta.cat_and_prefix(files, prefix_array, connector)
|
39
|
-
puts "OUTFILE: #{outfile}"
|
data/bin/fasta_cat_mod.rb
DELETED
@@ -1,59 +0,0 @@
|
|
1
|
-
#!/usr/bin/ruby
|
2
|
-
|
3
|
-
require 'fasta'
|
4
|
-
require 'optparse'
|
5
|
-
|
6
|
-
hash = {
|
7
|
-
'shuffle' => {
|
8
|
-
'method' => :aaseq_shuffle!,
|
9
|
-
'protein_header_prefix' => Fasta::SHUFF_PREFIX,
|
10
|
-
'file_postfix' => Fasta::CAT_SHUFF_FILE_POSTFIX,
|
11
|
-
},
|
12
|
-
'invert' => {
|
13
|
-
'method' => :aaseq_invert!,
|
14
|
-
'protein_header_prefix' => Fasta::INV_PREFIX,
|
15
|
-
'file_postfix' => Fasta::CAT_INV_FILE_POSTFIX,
|
16
|
-
},
|
17
|
-
}
|
18
|
-
|
19
|
-
opt = {}
|
20
|
-
OptionParser.new do |opts|
|
21
|
-
opts.on("-f", "--fraction FLOAT", "fraction") {|v| opt['f'] = v }
|
22
|
-
end.parse!
|
23
|
-
|
24
|
-
|
25
|
-
if ARGV.size < 2
|
26
|
-
puts "
|
27
|
-
usage: #{File.basename(__FILE__)} [-f <fraction>] <method> <file>.fasta ...
|
28
|
-
|
29
|
-
The AA seq's of (a fraction of) proteins will be modified according to
|
30
|
-
<method> and concatenated to the end of the normal proteins. Each modified
|
31
|
-
protein's header takes on a header prefix after the '>'. Each file takes on
|
32
|
-
a postfix (before the extension).
|
33
|
-
|
34
|
-
METHOD PROT_PREFIX FILE_POSTFIX
|
35
|
-
shuffle #{hash['shuffle']['protein_header_prefix']} #{hash['shuffle']['file_postfix']}
|
36
|
-
invert #{hash['invert']['protein_header_prefix']} #{hash['invert']['file_postfix']}
|
37
|
-
"
|
38
|
-
exit
|
39
|
-
end
|
40
|
-
|
41
|
-
method = ARGV.shift
|
42
|
-
opt_h = nil
|
43
|
-
if hash.key? method
|
44
|
-
opth = hash[method]
|
45
|
-
else
|
46
|
-
abort "invalid method! choose: #{hash.keys.join(", ")}"
|
47
|
-
end
|
48
|
-
|
49
|
-
fraction = 1; if opt.key?('f') then fraction = opt['f'] end
|
50
|
-
|
51
|
-
specific_method = opth['method']
|
52
|
-
file_postfix = opth['file_postfix']
|
53
|
-
protein_header_prefix = opth['protein_header_prefix']
|
54
|
-
#puts [file, specific_method, fraction, file_postfix, protein_header_prefix].join("*")
|
55
|
-
|
56
|
-
ARGV.each do |file|
|
57
|
-
outfile = Fasta.modify_fraction_and_cat_to_file(file, specific_method, fraction, file_postfix, protein_header_prefix)
|
58
|
-
puts "OUTPUT: #{outfile}"
|
59
|
-
end
|