mspire 0.1.7 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. data/Rakefile +41 -14
  2. data/bin/bioworks2excel.rb +1 -1
  3. data/bin/bioworks_to_pepxml.rb +46 -59
  4. data/bin/fasta_shaker.rb +1 -1
  5. data/bin/filter.rb +6 -0
  6. data/bin/find_aa_freq.rb +23 -0
  7. data/bin/id_precision.rb +3 -2
  8. data/bin/mzxml_to_lmat.rb +2 -1
  9. data/bin/pepproph_filter.rb +1 -1
  10. data/bin/precision.rb +1 -1
  11. data/bin/protein_summary.rb +2 -451
  12. data/bin/raw_to_mzXML.rb +55 -0
  13. data/bin/srf_group.rb +26 -0
  14. data/changelog.txt +7 -0
  15. data/lib/align.rb +3 -3
  16. data/lib/fasta.rb +6 -1
  17. data/lib/gi.rb +9 -4
  18. data/lib/roc.rb +2 -0
  19. data/lib/sample_enzyme.rb +2 -1
  20. data/lib/spec/mzxml/parser.rb +2 -43
  21. data/lib/spec/mzxml.rb +65 -2
  22. data/lib/spec_id/aa_freqs.rb +10 -7
  23. data/lib/spec_id/bioworks.rb +67 -87
  24. data/lib/spec_id/filter.rb +794 -0
  25. data/lib/spec_id/precision.rb +29 -36
  26. data/lib/spec_id/proph.rb +5 -3
  27. data/lib/spec_id/protein_summary.rb +459 -0
  28. data/lib/spec_id/sequest.rb +323 -271
  29. data/lib/spec_id/srf.rb +189 -135
  30. data/lib/spec_id.rb +276 -227
  31. data/lib/spec_id_xml.rb +101 -0
  32. data/lib/toppred.rb +18 -0
  33. data/script/degenerate_peptides.rb +47 -0
  34. data/script/filter-peps.rb +5 -1
  35. data/test/tc_align.rb +1 -1
  36. data/test/tc_bioworks.rb +25 -22
  37. data/test/tc_bioworks_to_pepxml.rb +37 -4
  38. data/test/tc_fasta.rb +3 -1
  39. data/test/tc_fasta_shaker.rb +8 -6
  40. data/test/tc_filter.rb +203 -0
  41. data/test/tc_gi.rb +6 -9
  42. data/test/tc_id_precision.rb +31 -0
  43. data/test/tc_mzxml.rb +8 -6
  44. data/test/tc_peptide_parent_times.rb +2 -1
  45. data/test/tc_precision.rb +1 -1
  46. data/test/tc_proph.rb +5 -5
  47. data/test/tc_protein_summary.rb +36 -13
  48. data/test/tc_sequest.rb +78 -33
  49. data/test/tc_spec_id.rb +128 -6
  50. data/test/tc_srf.rb +84 -38
  51. metadata +67 -62
  52. data/bin/fasta_cat.rb +0 -39
  53. data/bin/fasta_cat_mod.rb +0 -59
  54. data/bin/fasta_mod.rb +0 -57
  55. data/bin/filter_spec_id.rb +0 -365
  56. data/bin/raw2mzXML.rb +0 -21
  57. data/script/gen_database_searching.rb +0 -258
data/test/tc_spec_id.rb CHANGED
@@ -8,11 +8,14 @@ class SpecIDTest < Test::Unit::TestCase
8
8
  def initialize(arg)
9
9
  super(arg)
10
10
  @tfiles = File.dirname(__FILE__) + '/tfiles/'
11
+ @tfiles_l = File.dirname(__FILE__) + '/tfiles_large/'
11
12
  @bw = @tfiles + "bioworks_small.xml"
13
+ @old_prot_proph = @tfiles + 'yeast_gly_small-prot.xml'
14
+ @prot_proph = @tfiles + 'opd1/000_020_3prots-prot.xml'
15
+ @srf = @tfiles_l + '7MIX_STD_110802_1.srf'
12
16
  end
13
17
 
14
18
  def test_spec_id_creation
15
- sp = SpecID.new
16
19
  sp = SpecID.new(@bw)
17
20
  assert_equal(106, sp.prots.size)
18
21
  end
@@ -45,20 +48,21 @@ class SpecIDTest < Test::Unit::TestCase
45
48
  end
46
49
  [write_index, bo]
47
50
  end
51
+ roc = ROC.new
48
52
  tp, fp = ROC.new.prep_list(answ)
49
- (exp_tp, exp_fp) = ROC.new.by_tps(:fpr2, tp, fp)
53
+ (exp_tp, exp_fp) = roc.tps_and_ppv(tp, fp)
50
54
 
51
55
  sp = SpecID.new(file)
52
56
  assert_equal(19, sp.prots.size)
53
57
  tp, fp = sp.rank_and_classify(:prots, proc {|prt| prt.probability }, proc {|prt| if prt.reference =~ /^INV_/ ; false; else; true; end })
54
- tps, ys = sp.by_tps(:fpr2, tp, fp)
58
+ (tps, ys) = roc.tps_and_ppv(tp, fp)
55
59
  assert_equal(exp_tp, tps)
56
60
  assert_equal(exp_fp, ys)
57
- tps, prec, fpr = sp.tps_and_precision_and_fpr2_times2_for_prob("INV_")
61
+ (num_hits, prec) = sp.num_hits_and_ppv_for_prob("INV_")
58
62
  # @TODO: assert these guys for consistencies sake:
59
63
  assert_in_delta_arrays([1, 2, 3, 4, 5, 6, 9, 10, 11, 12, 13, 14, 15], tps, 0.0000001)
60
- assert_in_delta_arrays([1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.916666666666667, 0.923076923076923, 0.928571428571429, 0.933333333333333, 0.882352941176471], prec, 0.0000001)
61
- assert_in_delta_arrays([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.166666666666667, 0.153846153846154, 0.142857142857143, 0.133333333333333, 0.235294117647059], fpr, 0.0000001)
64
+ # Consistency check only:
65
+ assert_in_delta_arrays([1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.909090909090909, 0.916666666666667, 0.923076923076923, 0.928571428571429, 0.866666666666667], prec, 0.0000001)
62
66
  end
63
67
 
64
68
  def assert_in_delta_arrays(one, two, delta, message=nil)
@@ -66,5 +70,123 @@ class SpecIDTest < Test::Unit::TestCase
66
70
  assert_in_delta(v, two[i], delta, message)
67
71
  end
68
72
  end
73
+
74
+ def test_file_type
75
+ assert_equal('bioworks', SpecID.file_type(@bw))
76
+ assert_equal('protproph', SpecID.file_type(@prot_proph))
77
+ assert_equal('srg', SpecID.file_type('whatever.srg'))
78
+ ## WOULD BE NICE TO GET THIS WORKING, TOO
79
+ # assert_equal('protproph', SpecID.file_type(@old_prot_proph))
80
+ if File.exist? @tfiles_l
81
+ assert File.exist?(@srf), "file #{@srf} is there"
82
+ assert_equal('srf', SpecID.file_type(@srf))
83
+ else
84
+ assert_nil( puts("\n--SKIPPING TEST-- (missing dir: #{@tfiles_l})") )
85
+ end
86
+ end
87
+
88
+ def test_non_standard_aa_removal
89
+ hash = {"K.PEPTIDE.Z" => "K.PEPTIDE.Z", "K.*M" => "K.M", "aI" => 'I', "YI.&" => "YI.", "EI.!@#\$%^&*(){}[]|\\;:'\"<>,?/EI" => 'EI.EI'}
90
+ cl = proc {|v| SpecID::Pep.remove_non_amino_acids(v) }
91
+ hash.each do |k,v|
92
+ assert_equal(v, cl.call(k))
93
+ end
94
+ end
95
+
96
+
97
+
98
+ end
99
+
100
+ class MyProt ; include SpecID::Prot ; end
101
+ class MyPep ; include SpecID::Pep ; attr_accessor :xcorr end
102
+
103
+
104
+ class TestOccamsRazor < Test::Unit::TestCase
105
+
106
+ def test_small
107
+
108
+ prots = (0..6).to_a.map do |n|
109
+ prot = MyProt.new
110
+ prot.reference = "ref_#{n}"
111
+ prot
112
+ end
113
+
114
+ peps = (0..12).to_a.map {|v| MyPep.new }
115
+
116
+ # 0 1 2 3 4 5 6 7 8 9 10 11 12
117
+ aaseqs = %w(AAA BBB CCC ABC AAA BBB CCC ABC DDD EEE FFF EEEEE DDD)
118
+ xcorrs = [1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 0.5, 0.6, 0.7, 0.8, 0.5]
119
+
120
+ peps.zip(aaseqs, xcorrs) do |pep,aaseq,xcorr|
121
+ pep.aaseq = aaseq
122
+ pep.xcorr = xcorr
123
+ end
124
+
125
+ prots[0].peps = peps[0,4]
126
+ prots[1].peps = [peps[2]] ## should be missing
127
+
128
+ test_prots = prots[0,2]
129
+ require 'pp'
130
+ answ = SpecID.occams_razor(test_prots)
131
+ answ.each do |an|
132
+ assert( an[0].is_a?(SpecID::Prot), "prots are there")
133
+ end
134
+ first = answ.first
135
+ assert_equal( prots[0], first[0])
136
+ assert_equal_array_content( prots[0].peps, first[1])
137
+
138
+
139
+
140
+ #prots[2].peps = [peps[2]]
141
+ #prots[2].peps.push( peps[3] ) ## should be there since it has 2
142
+ #prots[3].peps = [peps[3]] ## should be missing
143
+ end
144
+
145
+ def assert_equal_array_content(exp1, ans, message='')
146
+ exp1.each do |item|
147
+ assert(ans.include?(item), "finding #{item}: #{message}")
148
+ end
149
+ end
150
+ end
151
+
152
+
153
+ require 'fasta'
154
+
155
+ class TestProteinGroups < Test::Unit::TestCase
156
+
157
+ def test_small
158
+ prots = []
159
+
160
+ aaseq = ('A'..'Z').to_a.join('')
161
+ header = "prot1"
162
+ prots << Fasta::Prot.new(header, aaseq)
163
+
164
+ aaseq = ('A'..'Z').to_a.reverse.join('')
165
+ header = "prot1_reverse"
166
+ prots << Fasta::Prot.new(header, aaseq)
167
+
168
+ aaseq = ('A'..'Z').to_a.join('')
169
+ header = "prot1_identical"
170
+ prots << Fasta::Prot.new(header, aaseq)
171
+
172
+ aaseq = ('A'..'E').to_a.join('')
173
+ header = "prot1_short"
174
+ prots << Fasta::Prot.new(header, aaseq)
175
+
176
+ aaseq = ('A'..'E').to_a.reverse.join('')
177
+ header = "prot1_reverse_short"
178
+ prots << Fasta::Prot.new(header, aaseq)
179
+
180
+ fasta = Fasta.new(prots)
181
+
182
+ pep_seqs = %w(ABCD DEFG ABCD DEFG EDCB FEDCB XYZ RANDOM AEABA)
183
+
184
+ arr = SpecID::Pep.protein_groups_by_sequence(pep_seqs, fasta)
185
+
186
+ exp = [[prots[0], prots[2], prots[3]], [prots[0], prots[2]], [prots[0], prots[2], prots[3]], [prots[0],prots[2]], [prots[1], prots[4]], [prots[1]], [prots[0], prots[2]], [], []]
187
+
188
+ assert_equal(exp, arr)
189
+ end
190
+
69
191
  end
70
192
 
data/test/tc_srf.rb CHANGED
@@ -50,53 +50,70 @@ module ToMatch
50
50
 
51
51
  Out_files_first = {
52
52
  :num_hits => 10,
53
- :charge => 1,
54
53
  :computer => "VELA",
55
54
  :date_time => "11/17/2006, 04:13 PM,",
56
55
  }
57
56
 
58
57
  Out_files_first_hit = {
59
58
  :mh => 1220.5128044522,
60
- :deltacn => 0.0,
59
+ :deltacn => 0.071944423019886, ## this is the modified version
61
60
  :sp => 96.5815887451172,
62
61
  :xcorr => 1.08377742767334,
63
62
  :id => 224,
64
63
  :rsp => 13,
65
64
  :ions_matched => 8,
66
65
  :ions_total => 20,
67
- :peptide => "K.LCPHLTLLPGR.F",
66
+ :sequence => "K.LCPHLTLLPGR.F",
67
+ :aaseq => "LCPHLTLLPGR",
68
68
  :reference => "gi|1786425|gb|AAC73335.1| damage-inducible protein P; putative tRNA synthetase",
69
+ :first_scan => 2,
70
+ :last_scan => 2,
71
+ :base_name => '7MIX_STD_110802_1',
72
+ :charge => 1,
69
73
  }
70
74
 
71
75
  Out_files_last = {
72
76
  :num_hits => 10,
73
- :charge => 1,
74
77
  :computer => "VELA",
75
78
  :date_time => "11/17/2006, 04:25 PM," ,
76
79
  }
77
80
  Out_files_last_first_hit = {
78
81
  :mh => 2605.9368784522,
79
- :deltacn => 0.0,
82
+ :deltacn => 0.03921128064394,
80
83
  :sp => 76.7447052001953,
81
84
  :xcorr => 0.915680646896362,
82
85
  :id => 13562,
83
86
  :rsp => 4,
84
87
  :ions_matched => 10,
85
88
  :ions_total => 84,
86
- :peptide => "K.HLEINPNHPIVETLRQKAETHK.N",
89
+ :sequence => "K.HLEINPNHPIVETLRQKAETHK.N",
90
+ :aaseq => "HLEINPNHPIVETLRQKAETHK",
87
91
  :reference => "gi|30149327|ref|XP_293672.2| similar to ebiP7687 [Homo sapiens]",
92
+ :first_scan => 7161,
93
+ :last_scan => 7161,
94
+ :base_name => '7MIX_STD_110802_1',
95
+ :deltamass => 2605.9368784522 - 2604.8360326775,
96
+ :ppm => ((1.0e6 * (2605.9368784522 - 2604.8360326775)) / 2604.8360326775).abs,
97
+ :charge => 3,
88
98
  }
89
99
  Out_files_last_last_hit = {
90
100
  :mh => 2604.9025174522,
91
- :deltacn => 0.307604849338531,
101
+ :deltacn => 1.1,
92
102
  :sp => 26.1511478424072,
93
103
  :xcorr => 0.634012818336487,
94
104
  :id => 8105,
95
105
  :rsp => 165,
96
106
  :ions_matched => 6,
97
107
  :ions_total => 84,
98
- :peptide => "R.EAFLVNSDLTLRAQLTEFRDHK.L",
99
- :reference => "gi|5453830|ref|NP_006181.1| origin recognition complex, subunit 2-like; origin"
108
+ :sequence => "R.EAFLVNSDLTLRAQLTEFRDHK.L",
109
+ :aaseq => "EAFLVNSDLTLRAQLTEFRDHK",
110
+ :reference => "gi|5453830|ref|NP_006181.1| origin recognition complex, subunit 2-like; origin",
111
+ :first_scan => 7161,
112
+ :last_scan => 7161,
113
+ :base_name => '7MIX_STD_110802_1',
114
+ :deltamass => 2604.9025174522 - 2604.8360326775,
115
+ :ppm => ((1.0e6 * (2604.9025174522 - 2604.8360326775)) / 2604.8360326775).abs,
116
+ :charge => 3,
100
117
  }
101
118
  Sequest_params = {
102
119
  "add_F_Phenylalanine"=>"0.0000",
@@ -163,50 +180,79 @@ module ToMatch
163
180
 
164
181
  end
165
182
 
183
+ tfiles = File.dirname(__FILE__) + '/tfiles/'
184
+ tfiles_l = File.dirname(__FILE__) + '/tfiles_large/'
185
+ tf_srf = tfiles_l + "7MIX_STD_110802_1.srf"
186
+ tf_srf_inv = tfiles_l + "7MIX_STD_110802_1_INV.srf"
187
+ if File.exist? tfiles_l
188
+ start = Time.now
189
+ $group = SRFGroup.new([tf_srf, tf_srf_inv])
190
+ $srf = $group.srfs.first
191
+ puts "Time to read and compile two SRF: #{Time.now - start} secs"
192
+ end
193
+
194
+
166
195
  class TestSRF < Test::Unit::TestCase
167
196
  include ToMatch
168
197
  def initialize(arg)
169
198
  super(arg)
170
199
  @tfiles = File.dirname(__FILE__) + '/tfiles/'
171
200
  @tfiles_l = File.dirname(__FILE__) + '/tfiles_large/'
172
- @tf_srf = @tfiles_l + "7MIX_STD_110802_1.srf"
201
+ @srg_file = @tfiles + "tmp_bioworks.srg"
202
+
203
+ @srf = $srf
204
+ @group = $group
205
+
173
206
  end
174
207
 
175
208
  def test_basic
176
- start = Time.now
177
- obj = SRF.new(@tf_srf)
178
- puts "TOOK: #{Time.now - start} secs"
179
- ## Verify that we have everything and it is as we expect (not exhaustive)
180
- head = obj.header
181
- dtgen = head.dta_gen
182
- ## HEADER
183
- hash_match(Header, head)
184
- hash_match(Dta_gen, dtgen)
185
- ## DTA_FILES
186
- hash_match(Dta_files_first, obj.dta_files.first)
187
- hash_match(Dta_files_last, obj.dta_files.last)
188
- ## OUT_FILES
189
- hash_match(Out_files_first, obj.out_files.first)
190
- hash_match(Out_files_first_hit, obj.out_files.first.hits.first)
191
- hash_match(Out_files_last_first_hit, obj.out_files.last.hits.first)
192
- hash_match(Out_files_last_last_hit, obj.out_files.last.hits.last)
193
- ## SEQUEST_PARAMS
194
- hash_match(Sequest_params, obj.params)
195
- ## INDEX
196
- assert_equal([7161, 7161, 3], obj.index.last)
197
- assert_equal([2, 2, 1], obj.index.first)
198
-
199
- assert_equal(obj.index.size, obj.dta_files.size)
200
- assert_equal(obj.out_files.size, obj.dta_files.size)
209
+ if File.exist? @tfiles_l
210
+ ## Verify that we have everything and it is as we expect (not exhaustive)
211
+ head = @srf.header
212
+ dtgen = head.dta_gen
213
+ ## HEADER
214
+ hash_match(Header, head)
215
+ hash_match(Dta_gen, dtgen)
216
+ ## DTA_FILES
217
+ hash_match(Dta_files_first, @srf.dta_files.first)
218
+ hash_match(Dta_files_last, @srf.dta_files.last)
219
+ ## OUT_FILES
220
+ hash_match(Out_files_first, @srf.out_files.first)
221
+ hash_match(Out_files_first_hit, @srf.out_files.first.hits.first)
222
+ hash_match(Out_files_last_first_hit, @srf.out_files.last.hits.first)
223
+ hash_match(Out_files_last_last_hit, @srf.out_files.last.hits.last)
224
+ ## SEQUEST_PARAMS
225
+ hash_match(Sequest_params, @srf.params)
226
+ ## INDEX
227
+ assert_equal([7161, 7161, 3], @srf.index.last)
228
+ assert_equal([2, 2, 1], @srf.index.first)
229
+
230
+ assert_equal(@srf.index.size, @srf.dta_files.size)
231
+ assert_equal(@srf.out_files.size, @srf.dta_files.size)
232
+ else
233
+ assert_nil( puts("--SKIPPING TEST-- (missing dir: #{@tfiles_l})") )
234
+ end
235
+ end
236
+
237
+ def test_srg
238
+ if File.exist? @tfiles_l
239
+ @group.to_srg(@srg_file)
240
+ assert(File.exist?(@srg_file), "file exists: " + @srg_file )
241
+ else
242
+ assert_nil( puts("--SKIPPING TEST-- (missing dir: #{@tfiles_l})") )
243
+ end
201
244
  end
202
245
 
203
- def hash_match(hash, obj)
246
+ ## treats reference special
247
+ def hash_match(hash, srf)
204
248
  hash.each do |k,v|
205
249
  if v.is_a? Float
206
250
  delta = v/100000
207
- assert_in_delta( obj.send(k.to_sym), v, delta, "param: #{k}")
251
+ assert_in_delta(v, srf.send(k.to_sym), delta, "param: #{k}")
252
+ elsif k == :reference
253
+ assert_equal(v[0,38], srf.prots.first.reference)
208
254
  else
209
- assert_equal(obj.send(k.to_sym), v, "param: #{k}")
255
+ assert_equal(v, srf.send(k.to_sym), "param: #{k}")
210
256
  end
211
257
  end
212
258
  end
metadata CHANGED
@@ -1,10 +1,10 @@
1
1
  --- !ruby/object:Gem::Specification
2
- rubygems_version: 0.9.0
2
+ rubygems_version: 0.9.2
3
3
  specification_version: 1
4
4
  name: mspire
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.1.7
7
- date: 2007-03-27 00:00:00 -05:00
6
+ version: 0.2.0
7
+ date: 2007-04-25 00:00:00 -05:00
8
8
  summary: Mass Spectrometry Proteomics Objects, Scripts, and Executables
9
9
  require_paths:
10
10
  - lib
@@ -29,28 +29,32 @@ post_install_message:
29
29
  authors:
30
30
  - John Prince
31
31
  files:
32
- - lib/spec_id
32
+ - lib/spec_id.rb
33
33
  - lib/align
34
+ - lib/spec_id_xml.rb
35
+ - lib/spec_id
36
+ - lib/toppred.rb
37
+ - lib/align.rb
34
38
  - lib/spec
35
- - lib/sample_enzyme.rb
36
39
  - lib/fasta.rb
37
- - lib/roc.rb
38
- - lib/spec_id.rb
39
40
  - lib/gi.rb
40
- - lib/align.rb
41
- - lib/spec_id/srf.rb
42
- - lib/spec_id/bioworks.rb
43
- - lib/spec_id/proph.rb
41
+ - lib/roc.rb
42
+ - lib/sample_enzyme.rb
43
+ - lib/align/chams.rb
44
44
  - lib/spec_id/sequest.rb
45
+ - lib/spec_id/filter.rb
46
+ - lib/spec_id/bioworks.rb
47
+ - lib/spec_id/srf.rb
45
48
  - lib/spec_id/precision.rb
49
+ - lib/spec_id/protein_summary.rb
46
50
  - lib/spec_id/aa_freqs.rb
47
- - lib/align/chams.rb
48
- - lib/spec/mzxml
49
- - lib/spec/mzdata
51
+ - lib/spec_id/proph.rb
50
52
  - lib/spec/msrun.rb
51
53
  - lib/spec/scan.rb
52
- - lib/spec/mzxml.rb
54
+ - lib/spec/mzxml
53
55
  - lib/spec/mzdata.rb
56
+ - lib/spec/mzdata
57
+ - lib/spec/mzxml.rb
54
58
  - lib/spec/mzxml/parser.rb
55
59
  - lib/spec/mzdata/parser.rb
56
60
  - INSTALL
@@ -59,62 +63,62 @@ files:
59
63
  - LICENSE
60
64
  - changelog.txt
61
65
  - release_notes.txt
62
- - bin/fasta_cat_mod.rb
63
- - bin/fasta_mod.rb
64
- - bin/gi2annot.rb
65
- - bin/protein_summary.rb
66
- - bin/raw2mzXML.rb
67
- - bin/fasta_cat.rb
68
66
  - bin/bioworks2sequestXML_gui.rb
69
- - bin/bioworks2excel.rb
67
+ - bin/srf_group.rb
70
68
  - bin/pepproph_filter.rb
71
- - bin/filter_spec_id.rb
72
- - bin/bioworks_to_pepxml.rb
73
- - bin/mzxml_to_lmat.rb
69
+ - bin/filter.rb
74
70
  - bin/protxml2prots_peps.rb
75
- - bin/id_precision.rb
71
+ - bin/raw_to_mzXML.rb
72
+ - bin/gi2annot.rb
76
73
  - bin/id_class_anal.rb
77
74
  - bin/precision.rb
75
+ - bin/id_precision.rb
76
+ - bin/protein_summary.rb
77
+ - bin/bioworks_to_pepxml.rb
78
+ - bin/bioworks2excel.rb
79
+ - bin/mzxml_to_lmat.rb
78
80
  - bin/fasta_shaker.rb
81
+ - bin/find_aa_freq.rb
79
82
  - script/prep_dir.rb
83
+ - script/degenerate_peptides.rb
84
+ - script/histogram_probs.rb
85
+ - script/simple_protein_digestion.rb
86
+ - script/top_hit_per_scan.rb
80
87
  - script/msvis.rb
81
- - script/gen_database_searching.rb
82
88
  - script/mzXML2timeIndex.rb
83
89
  - script/tpp_installer.rb
84
- - script/create_little_pepxml.rb
85
- - script/histogram_probs.rb
86
- - script/top_hit_per_scan.rb
87
90
  - script/filter-peps.rb
88
- - script/simple_protein_digestion.rb
89
- - script/genuine_tps_and_probs.rb
90
91
  - script/estimate_fpr_by_cysteine.rb
92
+ - script/genuine_tps_and_probs.rb
93
+ - script/create_little_pepxml.rb
91
94
  - script/find_cysteine_background.rb
92
95
  test_files:
93
- - test/tc_srf.rb
94
- - test/tc_proph.rb
95
- - test/tc_sequest.rb
96
- - test/tc_align.rb
97
- - test/tc_spec.rb
98
- - test/tc_aa_freqs.rb
99
- - test/tc_protein_summary.rb
96
+ - test/tc_spec_id_xml.rb
97
+ - test/tc_mzxml_to_lmat.rb
98
+ - test/tc_id_class_anal.rb
99
+ - test/tc_gi.rb
100
100
  - test/tc_fasta.rb
101
- - test/tc_bioworks.rb
102
101
  - test/tc_peptide_parent_times.rb
103
- - test/tc_msrun.rb
104
102
  - test/tc_spec_id.rb
103
+ - test/tc_roc.rb
105
104
  - test/tc_mzxml.rb
105
+ - test/tc_sample_enzyme.rb
106
+ - test/tc_srf.rb
107
+ - test/tc_bioworks.rb
108
+ - test/tc_spec.rb
109
+ - test/tc_bioworks_to_pepxml.rb
110
+ - test/tc_scan.rb
111
+ - test/tc_sequest.rb
112
+ - test/tc_fasta_shaker.rb
106
113
  - test/tc_id_precision.rb
107
- - test/tc_id_class_anal.rb
114
+ - test/tc_msrun.rb
115
+ - test/tc_protein_summary.rb
108
116
  - test/tc_filter_peps.rb
117
+ - test/tc_filter.rb
118
+ - test/tc_aa_freqs.rb
119
+ - test/tc_proph.rb
120
+ - test/tc_align.rb
109
121
  - test/tc_precision.rb
110
- - test/tc_roc.rb
111
- - test/tc_scan.rb
112
- - test/tc_mzxml_to_lmat.rb
113
- - test/tc_bioworks_to_pepxml.rb
114
- - test/tc_sample_enzyme.rb
115
- - test/tc_fasta_shaker.rb
116
- - test/tc_gi.rb
117
- - test/tc_spec_id_xml.rb
118
122
  rdoc_options:
119
123
  - --main
120
124
  - README
@@ -125,29 +129,30 @@ extra_rdoc_files:
125
129
  - INSTALL
126
130
  - LICENSE
127
131
  executables:
128
- - fasta_cat_mod.rb
129
- - fasta_mod.rb
130
- - gi2annot.rb
131
- - protein_summary.rb
132
- - raw2mzXML.rb
133
- - fasta_cat.rb
134
132
  - bioworks2sequestXML_gui.rb
135
- - bioworks2excel.rb
133
+ - srf_group.rb
136
134
  - pepproph_filter.rb
137
- - filter_spec_id.rb
138
- - bioworks_to_pepxml.rb
139
- - mzxml_to_lmat.rb
135
+ - filter.rb
140
136
  - protxml2prots_peps.rb
141
- - id_precision.rb
137
+ - raw_to_mzXML.rb
138
+ - gi2annot.rb
142
139
  - id_class_anal.rb
143
140
  - precision.rb
141
+ - id_precision.rb
142
+ - protein_summary.rb
143
+ - bioworks_to_pepxml.rb
144
+ - bioworks2excel.rb
145
+ - mzxml_to_lmat.rb
144
146
  - fasta_shaker.rb
147
+ - find_aa_freq.rb
145
148
  extensions: []
146
149
 
147
150
  requirements:
148
151
  - "\"xmlparser\" is the prefered xml parser right now. REXML and regular expressions are used as fallback in some routines."
149
152
  - some plotting functions will not be available without the "gnuplot" gem (and underlying gnuplot binary)
150
- - the "t2x" binary to convert .RAW files to mzXML is expected in some applications
153
+ - the "t2x" binary (in archive) or readw.exe is required to convert .RAW files to mzXML in some applications
154
+ - "\"rake\" is useful for development"
155
+ - "\"webgen (with gems redcloth and bluecloth) is necessary to build web pages"
151
156
  dependencies:
152
157
  - !ruby/object:Gem::Dependency
153
158
  name: libjtp
data/bin/fasta_cat.rb DELETED
@@ -1,39 +0,0 @@
1
- #!/usr/bin/ruby
2
-
3
- require 'fasta'
4
- require 'getoptlong'
5
-
6
- connector = Fasta::FILE_CONNECTOR
7
-
8
- # Get the prefix option:
9
- opts = GetoptLong.new(
10
- [ "-p", "--prefixes", GetoptLong::REQUIRED_ARGUMENT]
11
- )
12
-
13
- opt_hash = {}
14
- opts.each do |opt, arg|
15
- opt_hash[opt] = arg
16
- end
17
-
18
- prefix_array = nil
19
- if opt_hash.key?('-p')
20
- prefix_array = opt_hash['-p'].split(',')
21
- end
22
-
23
- # Usage info:
24
- if ARGV.size < 2
25
- puts "
26
- usage: #{File.basename(__FILE__)} [-p=prefix1,prefix2,...] <file1>.fasta <file2>.fasta ...
27
-
28
- Concatenates the files together with '#{connector}' (the file extension will
29
- be the extension of the first file).
30
-
31
- -p prefixes protein headers with the corresponding value in the comma
32
- separated list.
33
- "
34
- exit
35
- end
36
-
37
- files = ARGV.to_a
38
- outfile = Fasta.cat_and_prefix(files, prefix_array, connector)
39
- puts "OUTFILE: #{outfile}"
data/bin/fasta_cat_mod.rb DELETED
@@ -1,59 +0,0 @@
1
- #!/usr/bin/ruby
2
-
3
- require 'fasta'
4
- require 'optparse'
5
-
6
- hash = {
7
- 'shuffle' => {
8
- 'method' => :aaseq_shuffle!,
9
- 'protein_header_prefix' => Fasta::SHUFF_PREFIX,
10
- 'file_postfix' => Fasta::CAT_SHUFF_FILE_POSTFIX,
11
- },
12
- 'invert' => {
13
- 'method' => :aaseq_invert!,
14
- 'protein_header_prefix' => Fasta::INV_PREFIX,
15
- 'file_postfix' => Fasta::CAT_INV_FILE_POSTFIX,
16
- },
17
- }
18
-
19
- opt = {}
20
- OptionParser.new do |opts|
21
- opts.on("-f", "--fraction FLOAT", "fraction") {|v| opt['f'] = v }
22
- end.parse!
23
-
24
-
25
- if ARGV.size < 2
26
- puts "
27
- usage: #{File.basename(__FILE__)} [-f <fraction>] <method> <file>.fasta ...
28
-
29
- The AA seq's of (a fraction of) proteins will be modified according to
30
- <method> and concatenated to the end of the normal proteins. Each modified
31
- protein's header takes on a header prefix after the '>'. Each file takes on
32
- a postfix (before the extension).
33
-
34
- METHOD PROT_PREFIX FILE_POSTFIX
35
- shuffle #{hash['shuffle']['protein_header_prefix']} #{hash['shuffle']['file_postfix']}
36
- invert #{hash['invert']['protein_header_prefix']} #{hash['invert']['file_postfix']}
37
- "
38
- exit
39
- end
40
-
41
- method = ARGV.shift
42
- opt_h = nil
43
- if hash.key? method
44
- opth = hash[method]
45
- else
46
- abort "invalid method! choose: #{hash.keys.join(", ")}"
47
- end
48
-
49
- fraction = 1; if opt.key?('f') then fraction = opt['f'] end
50
-
51
- specific_method = opth['method']
52
- file_postfix = opth['file_postfix']
53
- protein_header_prefix = opth['protein_header_prefix']
54
- #puts [file, specific_method, fraction, file_postfix, protein_header_prefix].join("*")
55
-
56
- ARGV.each do |file|
57
- outfile = Fasta.modify_fraction_and_cat_to_file(file, specific_method, fraction, file_postfix, protein_header_prefix)
58
- puts "OUTPUT: #{outfile}"
59
- end