mspire 0.1.7 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (57) hide show
  1. data/Rakefile +41 -14
  2. data/bin/bioworks2excel.rb +1 -1
  3. data/bin/bioworks_to_pepxml.rb +46 -59
  4. data/bin/fasta_shaker.rb +1 -1
  5. data/bin/filter.rb +6 -0
  6. data/bin/find_aa_freq.rb +23 -0
  7. data/bin/id_precision.rb +3 -2
  8. data/bin/mzxml_to_lmat.rb +2 -1
  9. data/bin/pepproph_filter.rb +1 -1
  10. data/bin/precision.rb +1 -1
  11. data/bin/protein_summary.rb +2 -451
  12. data/bin/raw_to_mzXML.rb +55 -0
  13. data/bin/srf_group.rb +26 -0
  14. data/changelog.txt +7 -0
  15. data/lib/align.rb +3 -3
  16. data/lib/fasta.rb +6 -1
  17. data/lib/gi.rb +9 -4
  18. data/lib/roc.rb +2 -0
  19. data/lib/sample_enzyme.rb +2 -1
  20. data/lib/spec/mzxml/parser.rb +2 -43
  21. data/lib/spec/mzxml.rb +65 -2
  22. data/lib/spec_id/aa_freqs.rb +10 -7
  23. data/lib/spec_id/bioworks.rb +67 -87
  24. data/lib/spec_id/filter.rb +794 -0
  25. data/lib/spec_id/precision.rb +29 -36
  26. data/lib/spec_id/proph.rb +5 -3
  27. data/lib/spec_id/protein_summary.rb +459 -0
  28. data/lib/spec_id/sequest.rb +323 -271
  29. data/lib/spec_id/srf.rb +189 -135
  30. data/lib/spec_id.rb +276 -227
  31. data/lib/spec_id_xml.rb +101 -0
  32. data/lib/toppred.rb +18 -0
  33. data/script/degenerate_peptides.rb +47 -0
  34. data/script/filter-peps.rb +5 -1
  35. data/test/tc_align.rb +1 -1
  36. data/test/tc_bioworks.rb +25 -22
  37. data/test/tc_bioworks_to_pepxml.rb +37 -4
  38. data/test/tc_fasta.rb +3 -1
  39. data/test/tc_fasta_shaker.rb +8 -6
  40. data/test/tc_filter.rb +203 -0
  41. data/test/tc_gi.rb +6 -9
  42. data/test/tc_id_precision.rb +31 -0
  43. data/test/tc_mzxml.rb +8 -6
  44. data/test/tc_peptide_parent_times.rb +2 -1
  45. data/test/tc_precision.rb +1 -1
  46. data/test/tc_proph.rb +5 -5
  47. data/test/tc_protein_summary.rb +36 -13
  48. data/test/tc_sequest.rb +78 -33
  49. data/test/tc_spec_id.rb +128 -6
  50. data/test/tc_srf.rb +84 -38
  51. metadata +67 -62
  52. data/bin/fasta_cat.rb +0 -39
  53. data/bin/fasta_cat_mod.rb +0 -59
  54. data/bin/fasta_mod.rb +0 -57
  55. data/bin/filter_spec_id.rb +0 -365
  56. data/bin/raw2mzXML.rb +0 -21
  57. data/script/gen_database_searching.rb +0 -258
data/test/tc_spec_id.rb CHANGED
@@ -8,11 +8,14 @@ class SpecIDTest < Test::Unit::TestCase
8
8
  def initialize(arg)
9
9
  super(arg)
10
10
  @tfiles = File.dirname(__FILE__) + '/tfiles/'
11
+ @tfiles_l = File.dirname(__FILE__) + '/tfiles_large/'
11
12
  @bw = @tfiles + "bioworks_small.xml"
13
+ @old_prot_proph = @tfiles + 'yeast_gly_small-prot.xml'
14
+ @prot_proph = @tfiles + 'opd1/000_020_3prots-prot.xml'
15
+ @srf = @tfiles_l + '7MIX_STD_110802_1.srf'
12
16
  end
13
17
 
14
18
  def test_spec_id_creation
15
- sp = SpecID.new
16
19
  sp = SpecID.new(@bw)
17
20
  assert_equal(106, sp.prots.size)
18
21
  end
@@ -45,20 +48,21 @@ class SpecIDTest < Test::Unit::TestCase
45
48
  end
46
49
  [write_index, bo]
47
50
  end
51
+ roc = ROC.new
48
52
  tp, fp = ROC.new.prep_list(answ)
49
- (exp_tp, exp_fp) = ROC.new.by_tps(:fpr2, tp, fp)
53
+ (exp_tp, exp_fp) = roc.tps_and_ppv(tp, fp)
50
54
 
51
55
  sp = SpecID.new(file)
52
56
  assert_equal(19, sp.prots.size)
53
57
  tp, fp = sp.rank_and_classify(:prots, proc {|prt| prt.probability }, proc {|prt| if prt.reference =~ /^INV_/ ; false; else; true; end })
54
- tps, ys = sp.by_tps(:fpr2, tp, fp)
58
+ (tps, ys) = roc.tps_and_ppv(tp, fp)
55
59
  assert_equal(exp_tp, tps)
56
60
  assert_equal(exp_fp, ys)
57
- tps, prec, fpr = sp.tps_and_precision_and_fpr2_times2_for_prob("INV_")
61
+ (num_hits, prec) = sp.num_hits_and_ppv_for_prob("INV_")
58
62
  # @TODO: assert these guys for consistencies sake:
59
63
  assert_in_delta_arrays([1, 2, 3, 4, 5, 6, 9, 10, 11, 12, 13, 14, 15], tps, 0.0000001)
60
- assert_in_delta_arrays([1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.916666666666667, 0.923076923076923, 0.928571428571429, 0.933333333333333, 0.882352941176471], prec, 0.0000001)
61
- assert_in_delta_arrays([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.166666666666667, 0.153846153846154, 0.142857142857143, 0.133333333333333, 0.235294117647059], fpr, 0.0000001)
64
+ # Consistency check only:
65
+ assert_in_delta_arrays([1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.909090909090909, 0.916666666666667, 0.923076923076923, 0.928571428571429, 0.866666666666667], prec, 0.0000001)
62
66
  end
63
67
 
64
68
  def assert_in_delta_arrays(one, two, delta, message=nil)
@@ -66,5 +70,123 @@ class SpecIDTest < Test::Unit::TestCase
66
70
  assert_in_delta(v, two[i], delta, message)
67
71
  end
68
72
  end
73
+
74
+ def test_file_type
75
+ assert_equal('bioworks', SpecID.file_type(@bw))
76
+ assert_equal('protproph', SpecID.file_type(@prot_proph))
77
+ assert_equal('srg', SpecID.file_type('whatever.srg'))
78
+ ## WOULD BE NICE TO GET THIS WORKING, TOO
79
+ # assert_equal('protproph', SpecID.file_type(@old_prot_proph))
80
+ if File.exist? @tfiles_l
81
+ assert File.exist?(@srf), "file #{@srf} is there"
82
+ assert_equal('srf', SpecID.file_type(@srf))
83
+ else
84
+ assert_nil( puts("\n--SKIPPING TEST-- (missing dir: #{@tfiles_l})") )
85
+ end
86
+ end
87
+
88
+ def test_non_standard_aa_removal
89
+ hash = {"K.PEPTIDE.Z" => "K.PEPTIDE.Z", "K.*M" => "K.M", "aI" => 'I', "YI.&" => "YI.", "EI.!@#\$%^&*(){}[]|\\;:'\"<>,?/EI" => 'EI.EI'}
90
+ cl = proc {|v| SpecID::Pep.remove_non_amino_acids(v) }
91
+ hash.each do |k,v|
92
+ assert_equal(v, cl.call(k))
93
+ end
94
+ end
95
+
96
+
97
+
98
+ end
99
+
100
+ class MyProt ; include SpecID::Prot ; end
101
+ class MyPep ; include SpecID::Pep ; attr_accessor :xcorr end
102
+
103
+
104
+ class TestOccamsRazor < Test::Unit::TestCase
105
+
106
+ def test_small
107
+
108
+ prots = (0..6).to_a.map do |n|
109
+ prot = MyProt.new
110
+ prot.reference = "ref_#{n}"
111
+ prot
112
+ end
113
+
114
+ peps = (0..12).to_a.map {|v| MyPep.new }
115
+
116
+ # 0 1 2 3 4 5 6 7 8 9 10 11 12
117
+ aaseqs = %w(AAA BBB CCC ABC AAA BBB CCC ABC DDD EEE FFF EEEEE DDD)
118
+ xcorrs = [1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 0.5, 0.6, 0.7, 0.8, 0.5]
119
+
120
+ peps.zip(aaseqs, xcorrs) do |pep,aaseq,xcorr|
121
+ pep.aaseq = aaseq
122
+ pep.xcorr = xcorr
123
+ end
124
+
125
+ prots[0].peps = peps[0,4]
126
+ prots[1].peps = [peps[2]] ## should be missing
127
+
128
+ test_prots = prots[0,2]
129
+ require 'pp'
130
+ answ = SpecID.occams_razor(test_prots)
131
+ answ.each do |an|
132
+ assert( an[0].is_a?(SpecID::Prot), "prots are there")
133
+ end
134
+ first = answ.first
135
+ assert_equal( prots[0], first[0])
136
+ assert_equal_array_content( prots[0].peps, first[1])
137
+
138
+
139
+
140
+ #prots[2].peps = [peps[2]]
141
+ #prots[2].peps.push( peps[3] ) ## should be there since it has 2
142
+ #prots[3].peps = [peps[3]] ## should be missing
143
+ end
144
+
145
+ def assert_equal_array_content(exp1, ans, message='')
146
+ exp1.each do |item|
147
+ assert(ans.include?(item), "finding #{item}: #{message}")
148
+ end
149
+ end
150
+ end
151
+
152
+
153
+ require 'fasta'
154
+
155
+ class TestProteinGroups < Test::Unit::TestCase
156
+
157
+ def test_small
158
+ prots = []
159
+
160
+ aaseq = ('A'..'Z').to_a.join('')
161
+ header = "prot1"
162
+ prots << Fasta::Prot.new(header, aaseq)
163
+
164
+ aaseq = ('A'..'Z').to_a.reverse.join('')
165
+ header = "prot1_reverse"
166
+ prots << Fasta::Prot.new(header, aaseq)
167
+
168
+ aaseq = ('A'..'Z').to_a.join('')
169
+ header = "prot1_identical"
170
+ prots << Fasta::Prot.new(header, aaseq)
171
+
172
+ aaseq = ('A'..'E').to_a.join('')
173
+ header = "prot1_short"
174
+ prots << Fasta::Prot.new(header, aaseq)
175
+
176
+ aaseq = ('A'..'E').to_a.reverse.join('')
177
+ header = "prot1_reverse_short"
178
+ prots << Fasta::Prot.new(header, aaseq)
179
+
180
+ fasta = Fasta.new(prots)
181
+
182
+ pep_seqs = %w(ABCD DEFG ABCD DEFG EDCB FEDCB XYZ RANDOM AEABA)
183
+
184
+ arr = SpecID::Pep.protein_groups_by_sequence(pep_seqs, fasta)
185
+
186
+ exp = [[prots[0], prots[2], prots[3]], [prots[0], prots[2]], [prots[0], prots[2], prots[3]], [prots[0],prots[2]], [prots[1], prots[4]], [prots[1]], [prots[0], prots[2]], [], []]
187
+
188
+ assert_equal(exp, arr)
189
+ end
190
+
69
191
  end
70
192
 
data/test/tc_srf.rb CHANGED
@@ -50,53 +50,70 @@ module ToMatch
50
50
 
51
51
  Out_files_first = {
52
52
  :num_hits => 10,
53
- :charge => 1,
54
53
  :computer => "VELA",
55
54
  :date_time => "11/17/2006, 04:13 PM,",
56
55
  }
57
56
 
58
57
  Out_files_first_hit = {
59
58
  :mh => 1220.5128044522,
60
- :deltacn => 0.0,
59
+ :deltacn => 0.071944423019886, ## this is the modified version
61
60
  :sp => 96.5815887451172,
62
61
  :xcorr => 1.08377742767334,
63
62
  :id => 224,
64
63
  :rsp => 13,
65
64
  :ions_matched => 8,
66
65
  :ions_total => 20,
67
- :peptide => "K.LCPHLTLLPGR.F",
66
+ :sequence => "K.LCPHLTLLPGR.F",
67
+ :aaseq => "LCPHLTLLPGR",
68
68
  :reference => "gi|1786425|gb|AAC73335.1| damage-inducible protein P; putative tRNA synthetase",
69
+ :first_scan => 2,
70
+ :last_scan => 2,
71
+ :base_name => '7MIX_STD_110802_1',
72
+ :charge => 1,
69
73
  }
70
74
 
71
75
  Out_files_last = {
72
76
  :num_hits => 10,
73
- :charge => 1,
74
77
  :computer => "VELA",
75
78
  :date_time => "11/17/2006, 04:25 PM," ,
76
79
  }
77
80
  Out_files_last_first_hit = {
78
81
  :mh => 2605.9368784522,
79
- :deltacn => 0.0,
82
+ :deltacn => 0.03921128064394,
80
83
  :sp => 76.7447052001953,
81
84
  :xcorr => 0.915680646896362,
82
85
  :id => 13562,
83
86
  :rsp => 4,
84
87
  :ions_matched => 10,
85
88
  :ions_total => 84,
86
- :peptide => "K.HLEINPNHPIVETLRQKAETHK.N",
89
+ :sequence => "K.HLEINPNHPIVETLRQKAETHK.N",
90
+ :aaseq => "HLEINPNHPIVETLRQKAETHK",
87
91
  :reference => "gi|30149327|ref|XP_293672.2| similar to ebiP7687 [Homo sapiens]",
92
+ :first_scan => 7161,
93
+ :last_scan => 7161,
94
+ :base_name => '7MIX_STD_110802_1',
95
+ :deltamass => 2605.9368784522 - 2604.8360326775,
96
+ :ppm => ((1.0e6 * (2605.9368784522 - 2604.8360326775)) / 2604.8360326775).abs,
97
+ :charge => 3,
88
98
  }
89
99
  Out_files_last_last_hit = {
90
100
  :mh => 2604.9025174522,
91
- :deltacn => 0.307604849338531,
101
+ :deltacn => 1.1,
92
102
  :sp => 26.1511478424072,
93
103
  :xcorr => 0.634012818336487,
94
104
  :id => 8105,
95
105
  :rsp => 165,
96
106
  :ions_matched => 6,
97
107
  :ions_total => 84,
98
- :peptide => "R.EAFLVNSDLTLRAQLTEFRDHK.L",
99
- :reference => "gi|5453830|ref|NP_006181.1| origin recognition complex, subunit 2-like; origin"
108
+ :sequence => "R.EAFLVNSDLTLRAQLTEFRDHK.L",
109
+ :aaseq => "EAFLVNSDLTLRAQLTEFRDHK",
110
+ :reference => "gi|5453830|ref|NP_006181.1| origin recognition complex, subunit 2-like; origin",
111
+ :first_scan => 7161,
112
+ :last_scan => 7161,
113
+ :base_name => '7MIX_STD_110802_1',
114
+ :deltamass => 2604.9025174522 - 2604.8360326775,
115
+ :ppm => ((1.0e6 * (2604.9025174522 - 2604.8360326775)) / 2604.8360326775).abs,
116
+ :charge => 3,
100
117
  }
101
118
  Sequest_params = {
102
119
  "add_F_Phenylalanine"=>"0.0000",
@@ -163,50 +180,79 @@ module ToMatch
163
180
 
164
181
  end
165
182
 
183
+ tfiles = File.dirname(__FILE__) + '/tfiles/'
184
+ tfiles_l = File.dirname(__FILE__) + '/tfiles_large/'
185
+ tf_srf = tfiles_l + "7MIX_STD_110802_1.srf"
186
+ tf_srf_inv = tfiles_l + "7MIX_STD_110802_1_INV.srf"
187
+ if File.exist? tfiles_l
188
+ start = Time.now
189
+ $group = SRFGroup.new([tf_srf, tf_srf_inv])
190
+ $srf = $group.srfs.first
191
+ puts "Time to read and compile two SRF: #{Time.now - start} secs"
192
+ end
193
+
194
+
166
195
  class TestSRF < Test::Unit::TestCase
167
196
  include ToMatch
168
197
  def initialize(arg)
169
198
  super(arg)
170
199
  @tfiles = File.dirname(__FILE__) + '/tfiles/'
171
200
  @tfiles_l = File.dirname(__FILE__) + '/tfiles_large/'
172
- @tf_srf = @tfiles_l + "7MIX_STD_110802_1.srf"
201
+ @srg_file = @tfiles + "tmp_bioworks.srg"
202
+
203
+ @srf = $srf
204
+ @group = $group
205
+
173
206
  end
174
207
 
175
208
  def test_basic
176
- start = Time.now
177
- obj = SRF.new(@tf_srf)
178
- puts "TOOK: #{Time.now - start} secs"
179
- ## Verify that we have everything and it is as we expect (not exhaustive)
180
- head = obj.header
181
- dtgen = head.dta_gen
182
- ## HEADER
183
- hash_match(Header, head)
184
- hash_match(Dta_gen, dtgen)
185
- ## DTA_FILES
186
- hash_match(Dta_files_first, obj.dta_files.first)
187
- hash_match(Dta_files_last, obj.dta_files.last)
188
- ## OUT_FILES
189
- hash_match(Out_files_first, obj.out_files.first)
190
- hash_match(Out_files_first_hit, obj.out_files.first.hits.first)
191
- hash_match(Out_files_last_first_hit, obj.out_files.last.hits.first)
192
- hash_match(Out_files_last_last_hit, obj.out_files.last.hits.last)
193
- ## SEQUEST_PARAMS
194
- hash_match(Sequest_params, obj.params)
195
- ## INDEX
196
- assert_equal([7161, 7161, 3], obj.index.last)
197
- assert_equal([2, 2, 1], obj.index.first)
198
-
199
- assert_equal(obj.index.size, obj.dta_files.size)
200
- assert_equal(obj.out_files.size, obj.dta_files.size)
209
+ if File.exist? @tfiles_l
210
+ ## Verify that we have everything and it is as we expect (not exhaustive)
211
+ head = @srf.header
212
+ dtgen = head.dta_gen
213
+ ## HEADER
214
+ hash_match(Header, head)
215
+ hash_match(Dta_gen, dtgen)
216
+ ## DTA_FILES
217
+ hash_match(Dta_files_first, @srf.dta_files.first)
218
+ hash_match(Dta_files_last, @srf.dta_files.last)
219
+ ## OUT_FILES
220
+ hash_match(Out_files_first, @srf.out_files.first)
221
+ hash_match(Out_files_first_hit, @srf.out_files.first.hits.first)
222
+ hash_match(Out_files_last_first_hit, @srf.out_files.last.hits.first)
223
+ hash_match(Out_files_last_last_hit, @srf.out_files.last.hits.last)
224
+ ## SEQUEST_PARAMS
225
+ hash_match(Sequest_params, @srf.params)
226
+ ## INDEX
227
+ assert_equal([7161, 7161, 3], @srf.index.last)
228
+ assert_equal([2, 2, 1], @srf.index.first)
229
+
230
+ assert_equal(@srf.index.size, @srf.dta_files.size)
231
+ assert_equal(@srf.out_files.size, @srf.dta_files.size)
232
+ else
233
+ assert_nil( puts("--SKIPPING TEST-- (missing dir: #{@tfiles_l})") )
234
+ end
235
+ end
236
+
237
+ def test_srg
238
+ if File.exist? @tfiles_l
239
+ @group.to_srg(@srg_file)
240
+ assert(File.exist?(@srg_file), "file exists: " + @srg_file )
241
+ else
242
+ assert_nil( puts("--SKIPPING TEST-- (missing dir: #{@tfiles_l})") )
243
+ end
201
244
  end
202
245
 
203
- def hash_match(hash, obj)
246
+ ## treats reference special
247
+ def hash_match(hash, srf)
204
248
  hash.each do |k,v|
205
249
  if v.is_a? Float
206
250
  delta = v/100000
207
- assert_in_delta( obj.send(k.to_sym), v, delta, "param: #{k}")
251
+ assert_in_delta(v, srf.send(k.to_sym), delta, "param: #{k}")
252
+ elsif k == :reference
253
+ assert_equal(v[0,38], srf.prots.first.reference)
208
254
  else
209
- assert_equal(obj.send(k.to_sym), v, "param: #{k}")
255
+ assert_equal(v, srf.send(k.to_sym), "param: #{k}")
210
256
  end
211
257
  end
212
258
  end
metadata CHANGED
@@ -1,10 +1,10 @@
1
1
  --- !ruby/object:Gem::Specification
2
- rubygems_version: 0.9.0
2
+ rubygems_version: 0.9.2
3
3
  specification_version: 1
4
4
  name: mspire
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.1.7
7
- date: 2007-03-27 00:00:00 -05:00
6
+ version: 0.2.0
7
+ date: 2007-04-25 00:00:00 -05:00
8
8
  summary: Mass Spectrometry Proteomics Objects, Scripts, and Executables
9
9
  require_paths:
10
10
  - lib
@@ -29,28 +29,32 @@ post_install_message:
29
29
  authors:
30
30
  - John Prince
31
31
  files:
32
- - lib/spec_id
32
+ - lib/spec_id.rb
33
33
  - lib/align
34
+ - lib/spec_id_xml.rb
35
+ - lib/spec_id
36
+ - lib/toppred.rb
37
+ - lib/align.rb
34
38
  - lib/spec
35
- - lib/sample_enzyme.rb
36
39
  - lib/fasta.rb
37
- - lib/roc.rb
38
- - lib/spec_id.rb
39
40
  - lib/gi.rb
40
- - lib/align.rb
41
- - lib/spec_id/srf.rb
42
- - lib/spec_id/bioworks.rb
43
- - lib/spec_id/proph.rb
41
+ - lib/roc.rb
42
+ - lib/sample_enzyme.rb
43
+ - lib/align/chams.rb
44
44
  - lib/spec_id/sequest.rb
45
+ - lib/spec_id/filter.rb
46
+ - lib/spec_id/bioworks.rb
47
+ - lib/spec_id/srf.rb
45
48
  - lib/spec_id/precision.rb
49
+ - lib/spec_id/protein_summary.rb
46
50
  - lib/spec_id/aa_freqs.rb
47
- - lib/align/chams.rb
48
- - lib/spec/mzxml
49
- - lib/spec/mzdata
51
+ - lib/spec_id/proph.rb
50
52
  - lib/spec/msrun.rb
51
53
  - lib/spec/scan.rb
52
- - lib/spec/mzxml.rb
54
+ - lib/spec/mzxml
53
55
  - lib/spec/mzdata.rb
56
+ - lib/spec/mzdata
57
+ - lib/spec/mzxml.rb
54
58
  - lib/spec/mzxml/parser.rb
55
59
  - lib/spec/mzdata/parser.rb
56
60
  - INSTALL
@@ -59,62 +63,62 @@ files:
59
63
  - LICENSE
60
64
  - changelog.txt
61
65
  - release_notes.txt
62
- - bin/fasta_cat_mod.rb
63
- - bin/fasta_mod.rb
64
- - bin/gi2annot.rb
65
- - bin/protein_summary.rb
66
- - bin/raw2mzXML.rb
67
- - bin/fasta_cat.rb
68
66
  - bin/bioworks2sequestXML_gui.rb
69
- - bin/bioworks2excel.rb
67
+ - bin/srf_group.rb
70
68
  - bin/pepproph_filter.rb
71
- - bin/filter_spec_id.rb
72
- - bin/bioworks_to_pepxml.rb
73
- - bin/mzxml_to_lmat.rb
69
+ - bin/filter.rb
74
70
  - bin/protxml2prots_peps.rb
75
- - bin/id_precision.rb
71
+ - bin/raw_to_mzXML.rb
72
+ - bin/gi2annot.rb
76
73
  - bin/id_class_anal.rb
77
74
  - bin/precision.rb
75
+ - bin/id_precision.rb
76
+ - bin/protein_summary.rb
77
+ - bin/bioworks_to_pepxml.rb
78
+ - bin/bioworks2excel.rb
79
+ - bin/mzxml_to_lmat.rb
78
80
  - bin/fasta_shaker.rb
81
+ - bin/find_aa_freq.rb
79
82
  - script/prep_dir.rb
83
+ - script/degenerate_peptides.rb
84
+ - script/histogram_probs.rb
85
+ - script/simple_protein_digestion.rb
86
+ - script/top_hit_per_scan.rb
80
87
  - script/msvis.rb
81
- - script/gen_database_searching.rb
82
88
  - script/mzXML2timeIndex.rb
83
89
  - script/tpp_installer.rb
84
- - script/create_little_pepxml.rb
85
- - script/histogram_probs.rb
86
- - script/top_hit_per_scan.rb
87
90
  - script/filter-peps.rb
88
- - script/simple_protein_digestion.rb
89
- - script/genuine_tps_and_probs.rb
90
91
  - script/estimate_fpr_by_cysteine.rb
92
+ - script/genuine_tps_and_probs.rb
93
+ - script/create_little_pepxml.rb
91
94
  - script/find_cysteine_background.rb
92
95
  test_files:
93
- - test/tc_srf.rb
94
- - test/tc_proph.rb
95
- - test/tc_sequest.rb
96
- - test/tc_align.rb
97
- - test/tc_spec.rb
98
- - test/tc_aa_freqs.rb
99
- - test/tc_protein_summary.rb
96
+ - test/tc_spec_id_xml.rb
97
+ - test/tc_mzxml_to_lmat.rb
98
+ - test/tc_id_class_anal.rb
99
+ - test/tc_gi.rb
100
100
  - test/tc_fasta.rb
101
- - test/tc_bioworks.rb
102
101
  - test/tc_peptide_parent_times.rb
103
- - test/tc_msrun.rb
104
102
  - test/tc_spec_id.rb
103
+ - test/tc_roc.rb
105
104
  - test/tc_mzxml.rb
105
+ - test/tc_sample_enzyme.rb
106
+ - test/tc_srf.rb
107
+ - test/tc_bioworks.rb
108
+ - test/tc_spec.rb
109
+ - test/tc_bioworks_to_pepxml.rb
110
+ - test/tc_scan.rb
111
+ - test/tc_sequest.rb
112
+ - test/tc_fasta_shaker.rb
106
113
  - test/tc_id_precision.rb
107
- - test/tc_id_class_anal.rb
114
+ - test/tc_msrun.rb
115
+ - test/tc_protein_summary.rb
108
116
  - test/tc_filter_peps.rb
117
+ - test/tc_filter.rb
118
+ - test/tc_aa_freqs.rb
119
+ - test/tc_proph.rb
120
+ - test/tc_align.rb
109
121
  - test/tc_precision.rb
110
- - test/tc_roc.rb
111
- - test/tc_scan.rb
112
- - test/tc_mzxml_to_lmat.rb
113
- - test/tc_bioworks_to_pepxml.rb
114
- - test/tc_sample_enzyme.rb
115
- - test/tc_fasta_shaker.rb
116
- - test/tc_gi.rb
117
- - test/tc_spec_id_xml.rb
118
122
  rdoc_options:
119
123
  - --main
120
124
  - README
@@ -125,29 +129,30 @@ extra_rdoc_files:
125
129
  - INSTALL
126
130
  - LICENSE
127
131
  executables:
128
- - fasta_cat_mod.rb
129
- - fasta_mod.rb
130
- - gi2annot.rb
131
- - protein_summary.rb
132
- - raw2mzXML.rb
133
- - fasta_cat.rb
134
132
  - bioworks2sequestXML_gui.rb
135
- - bioworks2excel.rb
133
+ - srf_group.rb
136
134
  - pepproph_filter.rb
137
- - filter_spec_id.rb
138
- - bioworks_to_pepxml.rb
139
- - mzxml_to_lmat.rb
135
+ - filter.rb
140
136
  - protxml2prots_peps.rb
141
- - id_precision.rb
137
+ - raw_to_mzXML.rb
138
+ - gi2annot.rb
142
139
  - id_class_anal.rb
143
140
  - precision.rb
141
+ - id_precision.rb
142
+ - protein_summary.rb
143
+ - bioworks_to_pepxml.rb
144
+ - bioworks2excel.rb
145
+ - mzxml_to_lmat.rb
144
146
  - fasta_shaker.rb
147
+ - find_aa_freq.rb
145
148
  extensions: []
146
149
 
147
150
  requirements:
148
151
  - "\"xmlparser\" is the prefered xml parser right now. REXML and regular expressions are used as fallback in some routines."
149
152
  - some plotting functions will not be available without the "gnuplot" gem (and underlying gnuplot binary)
150
- - the "t2x" binary to convert .RAW files to mzXML is expected in some applications
153
+ - the "t2x" binary (in archive) or readw.exe is required to convert .RAW files to mzXML in some applications
154
+ - "\"rake\" is useful for development"
155
+ - "\"webgen (with gems redcloth and bluecloth) is necessary to build web pages"
151
156
  dependencies:
152
157
  - !ruby/object:Gem::Dependency
153
158
  name: libjtp
data/bin/fasta_cat.rb DELETED
@@ -1,39 +0,0 @@
1
- #!/usr/bin/ruby
2
-
3
- require 'fasta'
4
- require 'getoptlong'
5
-
6
- connector = Fasta::FILE_CONNECTOR
7
-
8
- # Get the prefix option:
9
- opts = GetoptLong.new(
10
- [ "-p", "--prefixes", GetoptLong::REQUIRED_ARGUMENT]
11
- )
12
-
13
- opt_hash = {}
14
- opts.each do |opt, arg|
15
- opt_hash[opt] = arg
16
- end
17
-
18
- prefix_array = nil
19
- if opt_hash.key?('-p')
20
- prefix_array = opt_hash['-p'].split(',')
21
- end
22
-
23
- # Usage info:
24
- if ARGV.size < 2
25
- puts "
26
- usage: #{File.basename(__FILE__)} [-p=prefix1,prefix2,...] <file1>.fasta <file2>.fasta ...
27
-
28
- Concatenates the files together with '#{connector}' (the file extension will
29
- be the extension of the first file).
30
-
31
- -p prefixes protein headers with the corresponding value in the comma
32
- separated list.
33
- "
34
- exit
35
- end
36
-
37
- files = ARGV.to_a
38
- outfile = Fasta.cat_and_prefix(files, prefix_array, connector)
39
- puts "OUTFILE: #{outfile}"
data/bin/fasta_cat_mod.rb DELETED
@@ -1,59 +0,0 @@
1
- #!/usr/bin/ruby
2
-
3
- require 'fasta'
4
- require 'optparse'
5
-
6
- hash = {
7
- 'shuffle' => {
8
- 'method' => :aaseq_shuffle!,
9
- 'protein_header_prefix' => Fasta::SHUFF_PREFIX,
10
- 'file_postfix' => Fasta::CAT_SHUFF_FILE_POSTFIX,
11
- },
12
- 'invert' => {
13
- 'method' => :aaseq_invert!,
14
- 'protein_header_prefix' => Fasta::INV_PREFIX,
15
- 'file_postfix' => Fasta::CAT_INV_FILE_POSTFIX,
16
- },
17
- }
18
-
19
- opt = {}
20
- OptionParser.new do |opts|
21
- opts.on("-f", "--fraction FLOAT", "fraction") {|v| opt['f'] = v }
22
- end.parse!
23
-
24
-
25
- if ARGV.size < 2
26
- puts "
27
- usage: #{File.basename(__FILE__)} [-f <fraction>] <method> <file>.fasta ...
28
-
29
- The AA seq's of (a fraction of) proteins will be modified according to
30
- <method> and concatenated to the end of the normal proteins. Each modified
31
- protein's header takes on a header prefix after the '>'. Each file takes on
32
- a postfix (before the extension).
33
-
34
- METHOD PROT_PREFIX FILE_POSTFIX
35
- shuffle #{hash['shuffle']['protein_header_prefix']} #{hash['shuffle']['file_postfix']}
36
- invert #{hash['invert']['protein_header_prefix']} #{hash['invert']['file_postfix']}
37
- "
38
- exit
39
- end
40
-
41
- method = ARGV.shift
42
- opt_h = nil
43
- if hash.key? method
44
- opth = hash[method]
45
- else
46
- abort "invalid method! choose: #{hash.keys.join(", ")}"
47
- end
48
-
49
- fraction = 1; if opt.key?('f') then fraction = opt['f'] end
50
-
51
- specific_method = opth['method']
52
- file_postfix = opth['file_postfix']
53
- protein_header_prefix = opth['protein_header_prefix']
54
- #puts [file, specific_method, fraction, file_postfix, protein_header_prefix].join("*")
55
-
56
- ARGV.each do |file|
57
- outfile = Fasta.modify_fraction_and_cat_to_file(file, specific_method, fraction, file_postfix, protein_header_prefix)
58
- puts "OUTPUT: #{outfile}"
59
- end