mspire 0.2.4 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (233) hide show
  1. data/INSTALL +1 -0
  2. data/README +25 -0
  3. data/Rakefile +129 -40
  4. data/bin/{find_aa_freq.rb → aafreqs.rb} +2 -2
  5. data/bin/bioworks_to_pepxml.rb +1 -0
  6. data/bin/fasta_shaker.rb +1 -96
  7. data/bin/filter_and_validate.rb +5 -0
  8. data/bin/{mzxml_to_lmat.rb → ms_to_lmat.rb} +8 -7
  9. data/bin/prob_validate.rb +6 -0
  10. data/bin/raw_to_mzXML.rb +2 -2
  11. data/bin/srf_group.rb +1 -0
  12. data/bin/srf_to_sqt.rb +40 -0
  13. data/changelog.txt +68 -0
  14. data/lib/align/chams.rb +6 -6
  15. data/lib/align.rb +4 -3
  16. data/lib/bsearch.rb +120 -0
  17. data/lib/fasta.rb +318 -86
  18. data/lib/group_by.rb +10 -0
  19. data/lib/index_by.rb +11 -0
  20. data/lib/merge_deep.rb +21 -0
  21. data/lib/{spec → ms/converter}/mzxml.rb +77 -109
  22. data/lib/ms/gradient_program.rb +171 -0
  23. data/lib/ms/msrun.rb +209 -0
  24. data/lib/{spec/msrun.rb → ms/msrun_index.rb} +7 -40
  25. data/lib/ms/parser/mzdata/axml.rb +12 -0
  26. data/lib/ms/parser/mzdata/dom.rb +160 -0
  27. data/lib/ms/parser/mzdata/libxml.rb +7 -0
  28. data/lib/ms/parser/mzdata.rb +25 -0
  29. data/lib/ms/parser/mzxml/axml.rb +11 -0
  30. data/lib/ms/parser/mzxml/dom.rb +159 -0
  31. data/lib/ms/parser/mzxml/hpricot.rb +253 -0
  32. data/lib/ms/parser/mzxml/libxml.rb +15 -0
  33. data/lib/ms/parser/mzxml/regexp.rb +122 -0
  34. data/lib/ms/parser/mzxml/rexml.rb +72 -0
  35. data/lib/ms/parser/mzxml/xmlparser.rb +248 -0
  36. data/lib/ms/parser/mzxml.rb +175 -0
  37. data/lib/ms/parser.rb +108 -0
  38. data/lib/ms/precursor.rb +10 -0
  39. data/lib/ms/scan.rb +81 -0
  40. data/lib/ms/spectrum.rb +193 -0
  41. data/lib/ms.rb +10 -0
  42. data/lib/mspire.rb +4 -0
  43. data/lib/roc.rb +61 -1
  44. data/lib/sample_enzyme.rb +31 -8
  45. data/lib/scan_i.rb +21 -0
  46. data/lib/spec_id/aa_freqs.rb +7 -3
  47. data/lib/spec_id/bioworks.rb +20 -14
  48. data/lib/spec_id/digestor.rb +139 -0
  49. data/lib/spec_id/mass.rb +116 -0
  50. data/lib/spec_id/parser/proph.rb +236 -0
  51. data/lib/spec_id/precision/filter/cmdline.rb +209 -0
  52. data/lib/spec_id/precision/filter/interactive.rb +134 -0
  53. data/lib/spec_id/precision/filter/output.rb +147 -0
  54. data/lib/spec_id/precision/filter.rb +623 -0
  55. data/lib/spec_id/precision/output.rb +60 -0
  56. data/lib/spec_id/precision/prob/cmdline.rb +139 -0
  57. data/lib/spec_id/precision/prob/output.rb +88 -0
  58. data/lib/spec_id/precision/prob.rb +171 -0
  59. data/lib/spec_id/proph/pep_summary.rb +92 -0
  60. data/lib/spec_id/proph/prot_summary.rb +484 -0
  61. data/lib/spec_id/proph.rb +2 -466
  62. data/lib/spec_id/protein_summary.rb +2 -2
  63. data/lib/spec_id/sequest/params.rb +316 -0
  64. data/lib/spec_id/sequest/pepxml.rb +1513 -0
  65. data/lib/spec_id/sequest.rb +2 -1672
  66. data/lib/spec_id/srf.rb +445 -177
  67. data/lib/spec_id.rb +183 -95
  68. data/lib/spec_id_xml.rb +8 -10
  69. data/lib/transmem/phobius.rb +147 -0
  70. data/lib/transmem/toppred.rb +368 -0
  71. data/lib/transmem.rb +157 -0
  72. data/lib/validator/aa.rb +135 -0
  73. data/lib/validator/background.rb +73 -0
  74. data/lib/validator/bias.rb +95 -0
  75. data/lib/validator/cmdline.rb +260 -0
  76. data/lib/validator/decoy.rb +94 -0
  77. data/lib/validator/digestion_based.rb +69 -0
  78. data/lib/validator/probability.rb +48 -0
  79. data/lib/validator/prot_from_pep.rb +234 -0
  80. data/lib/validator/transmem.rb +272 -0
  81. data/lib/validator/true_pos.rb +46 -0
  82. data/lib/validator.rb +214 -0
  83. data/lib/xml.rb +38 -0
  84. data/lib/xml_style_parser.rb +105 -0
  85. data/lib/xmlparser_wrapper.rb +19 -0
  86. data/script/compile_and_plot_smriti_final.rb +97 -0
  87. data/script/extract_gradient_programs.rb +56 -0
  88. data/script/get_apex_values_rexml.rb +44 -0
  89. data/script/mzXML2timeIndex.rb +1 -1
  90. data/script/smriti_final_analysis.rb +103 -0
  91. data/script/toppred_to_yaml.rb +47 -0
  92. data/script/tpp_installer.rb +1 -1
  93. data/{test/tc_align.rb → specs/align_spec.rb} +21 -27
  94. data/{test/tc_bioworks_to_pepxml.rb → specs/bin/bioworks_to_pepxml_spec.rb} +25 -41
  95. data/specs/bin/fasta_shaker_spec.rb +259 -0
  96. data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +202 -0
  97. data/specs/bin/filter_and_validate_spec.rb +124 -0
  98. data/specs/bin/ms_to_lmat_spec.rb +34 -0
  99. data/specs/bin/prob_validate_spec.rb +62 -0
  100. data/specs/bin/protein_summary_spec.rb +10 -0
  101. data/{test/tc_fasta.rb → specs/fasta_spec.rb} +354 -310
  102. data/specs/gi_spec.rb +22 -0
  103. data/specs/load_bin_path.rb +7 -0
  104. data/specs/merge_deep_spec.rb +13 -0
  105. data/specs/ms/gradient_program_spec.rb +77 -0
  106. data/specs/ms/msrun_spec.rb +455 -0
  107. data/specs/ms/parser_spec.rb +92 -0
  108. data/specs/ms/spectrum_spec.rb +89 -0
  109. data/specs/roc_spec.rb +251 -0
  110. data/specs/rspec_autotest.rb +149 -0
  111. data/specs/sample_enzyme_spec.rb +41 -0
  112. data/specs/spec_helper.rb +133 -0
  113. data/specs/spec_id/aa_freqs_spec.rb +52 -0
  114. data/{test/tc_bioworks.rb → specs/spec_id/bioworks_spec.rb} +56 -71
  115. data/specs/spec_id/digestor_spec.rb +75 -0
  116. data/specs/spec_id/precision/filter/cmdline_spec.rb +20 -0
  117. data/specs/spec_id/precision/filter/output_spec.rb +31 -0
  118. data/specs/spec_id/precision/filter_spec.rb +243 -0
  119. data/specs/spec_id/precision/prob_spec.rb +111 -0
  120. data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
  121. data/specs/spec_id/proph/pep_summary_spec.rb +143 -0
  122. data/{test/tc_proph.rb → specs/spec_id/proph/prot_summary_spec.rb} +52 -32
  123. data/{test/tc_protein_summary.rb → specs/spec_id/protein_summary_spec.rb} +85 -0
  124. data/specs/spec_id/sequest/params_spec.rb +68 -0
  125. data/specs/spec_id/sequest/pepxml_spec.rb +452 -0
  126. data/specs/spec_id/sqt_spec.rb +138 -0
  127. data/specs/spec_id/srf_spec.rb +209 -0
  128. data/specs/spec_id/srf_spec_helper.rb +302 -0
  129. data/specs/spec_id_helper.rb +33 -0
  130. data/specs/spec_id_spec.rb +361 -0
  131. data/specs/spec_id_xml_spec.rb +33 -0
  132. data/specs/transmem/phobius_spec.rb +423 -0
  133. data/specs/transmem/toppred_spec.rb +297 -0
  134. data/specs/transmem_spec.rb +60 -0
  135. data/specs/transmem_spec_shared.rb +64 -0
  136. data/specs/validator/aa_spec.rb +107 -0
  137. data/specs/validator/background_spec.rb +51 -0
  138. data/specs/validator/bias_spec.rb +146 -0
  139. data/specs/validator/decoy_spec.rb +51 -0
  140. data/specs/validator/fasta_helper.rb +26 -0
  141. data/specs/validator/prot_from_pep_spec.rb +141 -0
  142. data/specs/validator/transmem_spec.rb +145 -0
  143. data/specs/validator/true_pos_spec.rb +58 -0
  144. data/specs/validator_helper.rb +33 -0
  145. data/specs/xml_spec.rb +12 -0
  146. data/test_files/000_pepxml18_small.xml +206 -0
  147. data/test_files/020a.mzXML.timeIndex +4710 -0
  148. data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +3973 -0
  149. data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +3872 -0
  150. data/test_files/4-03-03_small-prot.xml +321 -0
  151. data/test_files/4-03-03_small.xml +3876 -0
  152. data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
  153. data/test_files/bioworks-3.3_10prots.xml +5999 -0
  154. data/test_files/bioworks31.params +77 -0
  155. data/test_files/bioworks32.params +62 -0
  156. data/test_files/bioworks33.params +63 -0
  157. data/test_files/bioworks_single_run_small.xml +7237 -0
  158. data/test_files/bioworks_small.fasta +212 -0
  159. data/test_files/bioworks_small.params +63 -0
  160. data/test_files/bioworks_small.phobius +109 -0
  161. data/test_files/bioworks_small.toppred.out +2847 -0
  162. data/test_files/bioworks_small.xml +5610 -0
  163. data/test_files/bioworks_with_INV_small.xml +3753 -0
  164. data/test_files/bioworks_with_SHUFF_small.xml +2503 -0
  165. data/test_files/corrupted_900.srf +0 -0
  166. data/test_files/head_of_7MIX.srf +0 -0
  167. data/test_files/interact-opd1_mods_small-prot.xml +304 -0
  168. data/test_files/messups.fasta +297 -0
  169. data/test_files/opd1/000.my_answer.100lines.xml +101 -0
  170. data/test_files/opd1/000.tpp_1.2.3.first10.xml +115 -0
  171. data/test_files/opd1/000.tpp_2.9.2.first10.xml +126 -0
  172. data/test_files/opd1/000.v2.1.mzXML.timeIndex +3748 -0
  173. data/test_files/opd1/000_020-prot.png +0 -0
  174. data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +62 -0
  175. data/test_files/opd1/000_020_3prots-prot.xml +62 -0
  176. data/test_files/opd1/opd1_cat_inv_small-prot.xml +139 -0
  177. data/test_files/opd1/sequest.3.1.params +77 -0
  178. data/test_files/opd1/sequest.3.2.params +62 -0
  179. data/test_files/opd1/twenty_scans.mzXML +418 -0
  180. data/test_files/opd1/twenty_scans.v2.1.mzXML +382 -0
  181. data/test_files/opd1/twenty_scans_answ.lmat +0 -0
  182. data/test_files/opd1/twenty_scans_answ.lmata +9 -0
  183. data/test_files/opd1_020_beginning.RAW +0 -0
  184. data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +753 -0
  185. data/test_files/orbitrap_mzData/000_cut.xml +1920 -0
  186. data/test_files/pepproph_small.xml +4691 -0
  187. data/test_files/phobius.small.noheader.txt +50 -0
  188. data/test_files/phobius.small.small.txt +53 -0
  189. data/test_files/s01_anC1_ld020mM.key.txt +25 -0
  190. data/test_files/s01_anC1_ld020mM.meth +0 -0
  191. data/test_files/small.fasta +297 -0
  192. data/test_files/smallraw.RAW +0 -0
  193. data/test_files/tf_bioworks2excel.bioXML +14340 -0
  194. data/test_files/tf_bioworks2excel.txt.actual +1035 -0
  195. data/test_files/toppred.small.out +416 -0
  196. data/test_files/toppred.xml.out +318 -0
  197. data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +7 -0
  198. data/test_files/validator_hits_separate/bioworks_small_HS.xml +5651 -0
  199. data/test_files/yeast_gly_small-prot.xml +265 -0
  200. data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +6 -0
  201. data/test_files/yeast_gly_small.xml +3807 -0
  202. data/test_files/yeast_gly_small2.parentTimes +6 -0
  203. metadata +273 -57
  204. data/bin/filter.rb +0 -6
  205. data/bin/precision.rb +0 -5
  206. data/lib/spec/mzdata/parser.rb +0 -108
  207. data/lib/spec/mzdata.rb +0 -48
  208. data/lib/spec/mzxml/parser.rb +0 -449
  209. data/lib/spec/scan.rb +0 -55
  210. data/lib/spec_id/filter.rb +0 -797
  211. data/lib/spec_id/precision.rb +0 -421
  212. data/lib/toppred.rb +0 -18
  213. data/script/filter-peps.rb +0 -164
  214. data/test/tc_aa_freqs.rb +0 -59
  215. data/test/tc_fasta_shaker.rb +0 -149
  216. data/test/tc_filter.rb +0 -203
  217. data/test/tc_filter_peps.rb +0 -46
  218. data/test/tc_gi.rb +0 -17
  219. data/test/tc_id_class_anal.rb +0 -70
  220. data/test/tc_id_precision.rb +0 -89
  221. data/test/tc_msrun.rb +0 -88
  222. data/test/tc_mzxml.rb +0 -88
  223. data/test/tc_mzxml_to_lmat.rb +0 -36
  224. data/test/tc_peptide_parent_times.rb +0 -27
  225. data/test/tc_precision.rb +0 -60
  226. data/test/tc_roc.rb +0 -166
  227. data/test/tc_sample_enzyme.rb +0 -32
  228. data/test/tc_scan.rb +0 -26
  229. data/test/tc_sequest.rb +0 -336
  230. data/test/tc_spec.rb +0 -78
  231. data/test/tc_spec_id.rb +0 -201
  232. data/test/tc_spec_id_xml.rb +0 -36
  233. data/test/tc_srf.rb +0 -262
data/INSTALL CHANGED
@@ -5,6 +5,7 @@ Prerequisites
5
5
  Much of the package will work without any prerequisites at all. Some functionality may require addition ruby packages or other converters. These are listed in current order of importance:
6
6
 
7
7
  * [xmlparser](http://www.yoshidam.net/Ruby.html) (comes with one-click Windows; on Ubuntu: 'sudo apt-get libxml-parser-ruby1.8')
8
+ * [libxml](http://libxml.rubyforge.org/) in Ubuntu: sudo apt-get install libxml2 libxml2-dev ; sudo gem install libxml-ruby --remote
8
9
  * ['t2x'](http://sashimi.sourceforge.net/software_glossolalia.html#ReAdW) to convert .RAW files to version 1 mzXML files
9
10
  * [gnuplot](http://rgplot.rubyforge.org/) ('gem install gnuplot'). Of course, you'll need [gnuplot](http://www.gnuplot.info/) before this package will work. Under one-click installer for windows this package requires a little configuration. It works with no configuration on cygwin (or linux).
10
11
 
data/README CHANGED
@@ -18,6 +18,31 @@ The project is currently focusing on the following:
18
18
  * ProteinProphet
19
19
  * Preparation of files for [obiwarp](http://obi-warp.sourceforge.net/)
20
20
 
21
+ Features
22
+ --------
23
+
24
+ * mzXML (version 1 & 2) parsing
25
+ * mzData parsing
26
+ * bioworks .srf (binary files) reader
27
+ * bioworks to PeptideProphet input (pepXML files)
28
+ * lightweight APEX values parser
29
+ * histogram protein probabilities
30
+ * developed for Linux, should port easily to Windows or others
31
+ * protein summary views with custom false ID cutoff values
32
+ * conversion to OBI-Warp input files
33
+
34
+ Validation by:
35
+ * Various Decoy Database search options: Reverse/Shuffle, concatenated/separate, with various hashing options (e.g., by amino acid sequence + charge)
36
+ * Amino acid (e.g., search for unblocked cysteines)
37
+ * Transmembrane prediction (Phobius or TopPred)
38
+ * Generic sample bias (e.g., low abundance/high abundance proteins)
39
+ * Defined sample
40
+
41
+ Working with:
42
+ * Bioworks (3.2-3.3.1)
43
+ * Peptide/Protein Prophet
44
+ * Easily extensible to others
45
+
21
46
  Tutorials
22
47
  ---------
23
48
 
data/Rakefile CHANGED
@@ -2,9 +2,9 @@ require 'rake'
2
2
  require 'rubygems'
3
3
  require 'rake/rdoctask'
4
4
  require 'rake/gempackagetask'
5
- require 'rake/testtask'
6
5
  require 'rake/clean'
7
6
  require 'fileutils'
7
+ require 'spec/rake/spectask'
8
8
 
9
9
  ###############################################
10
10
  # GLOBAL
@@ -13,23 +13,25 @@ FL = FileList
13
13
 
14
14
  NAME = "mspire"
15
15
 
16
- lib_files = FL["lib/**/*"]
17
- test_dir_too = FL["test/**/*"]
16
+ $dependencies = %w(libjtp)
17
+ $tfiles_large = 'test_files_large'
18
+ changelog = "changelog.txt"
18
19
 
19
- little_dist_files = lib_files + FL["INSTALL", "README", "Rakefile", "LICENSE", "changelog.txt", "release_notes.txt", "{bin,script,tutorial}/**/*"]
20
- dist_files = lib_files + FL["INSTALL", "README", "Rakefile", "LICENSE", "{bin,script,tutorial}/**/*", test_dir_too]
20
+ core_files = FL["INSTALL", "README", "Rakefile", "LICENSE", changelog, "release_notes.txt", "{lib,bin,script,specs,tutorial,test_files}/**/*"]
21
+ big_dist_files = core_files + FL["test_files_large/**/*"]
21
22
 
22
- dist_files = little_dist_files # comment out to include test files
23
+ dist_files = core_files
24
+ # dist_files = big_dist_files
23
25
 
24
26
  ###############################################
25
27
  # ENVIRONMENT
26
28
  ###############################################
27
29
 
28
30
  ENV["OS"] == "Windows_NT" ? WIN32 = true : WIN32 = false
29
- gemcmd = "gem"
31
+ $gemcmd = "gem"
30
32
  if WIN32
31
33
  unless ENV["TERM"] == "cygwin"
32
- gemcmd << ".cmd"
34
+ $gemcmd << ".cmd"
33
35
  end
34
36
  end
35
37
 
@@ -81,40 +83,123 @@ end
81
83
  # TESTS
82
84
  ###############################################
83
85
 
84
- desc "Run unit tests."
85
- Rake::TestTask.new do |t|
86
- reply = `#{gemcmd} list -l #{NAME}`
86
+ namespace :spec do
87
+ task :autotest do
88
+ require './specs/rspec_autotest'
89
+ RspecAutotest.run
90
+ end
91
+ end
92
+
93
+
94
+ task :ensure_dependencies do
95
+ $dependencies.each do |dep|
96
+ unless `#{$gemcmd} list -l #{dep}`.include?(dep)
97
+ abort "ABORTING: install #{dep} before testing!"
98
+ end
99
+ end
100
+ end
101
+
102
+ task :ensure_large_testfiles do
103
+ if !File.exist?($tfiles_large) and !ENV['SPEC_LARGE'].nil?
104
+ warn "Not running with large files since #{$tfiles_large} does not exist!"
105
+ warn "Removing SPEC_LARGE from ENV!"
106
+ ENV.delete('SPEC_LARGE')
107
+ end
108
+ end
109
+
110
+ task :ensure_gem_is_uninstalled do
111
+ reply = `#{$gemcmd} list -l #{NAME}`
87
112
  if reply.include? NAME + " ("
88
113
  puts "GOING to uninstall gem '#{NAME}' for testing"
89
114
  if WIN32
90
- %x( #{gemcmd} uninstall -x #{NAME} )
115
+ %x( #{$gemcmd} uninstall -x #{NAME} )
91
116
  else
92
- %x( sudo #{gemcmd} uninstall -x #{NAME} )
117
+ %x( sudo #{$gemcmd} uninstall -x #{NAME} )
93
118
  end
94
119
  end
95
- # t.libs << "lib" ## done by default
96
- t.test_files = FL["test/tc_*.rb"]
97
- #t.verbose = true
98
120
  end
99
121
 
122
+ desc "Run all specs"
123
+ Spec::Rake::SpecTask.new('spec') do |t|
124
+ Rake::Task[:ensure_gem_is_uninstalled].invoke
125
+ Rake::Task[:ensure_dependencies].invoke
126
+ Rake::Task[:ensure_large_testfiles].invoke
127
+ t.libs = ['lib']
128
+ #t.ruby_opts = ['-I', 'lib']
129
+ t.spec_files = FileList['specs/**/*_spec.rb']
130
+ end
100
131
 
132
+ desc "Run all specs"
133
+ Spec::Rake::SpecTask.new('specl') do |t|
134
+ Rake::Task[:ensure_gem_is_uninstalled].invoke
135
+ Rake::Task[:ensure_dependencies].invoke
136
+ Rake::Task[:ensure_large_testfiles].invoke
137
+ t.spec_files = FileList['specs/**/*_spec.rb']
138
+ t.libs = ['lib']
139
+ #t.ruby_opts = ['-I', 'lib']
140
+ t.spec_opts = ['--format', 'specdoc' ]
141
+ end
101
142
 
102
- desc "Run unit tests individual on each test"
103
- task :test_ind do |t|
104
- reply = `#{gemcmd} list -l #{NAME}`
105
- if reply.include? NAME + " ("
106
- %x( sudo #{gemcmd} uninstall -x #{NAME} )
107
- end
143
+ desc "Run all specs with RCov"
144
+ Spec::Rake::SpecTask.new('rcov') do |t|
145
+ Rake::Task[:ensure_gem_is_uninstalled].invoke
146
+ Rake::Task[:ensure_dependencies].invoke
147
+ Rake::Task[:ensure_large_testfiles].invoke
148
+ t.spec_files = FileList['specs/**/*_spec.rb']
149
+ t.rcov = true
150
+ t.libs = ['lib']
151
+ #t.ruby_opts = ['-I', 'lib']
152
+ t.rcov_opts = ['--exclude', 'specs']
153
+ end
108
154
 
109
- # t.libs << "lib" ## done by default
110
- test_files = FL["test/tc_*.rb"]
111
- test_files.each do |file|
112
- puts "TESTING: #{file.sub(/test\//,'')}"
113
- puts `ruby -I lib #{file}`
155
+ task :speci => [:ensure_gem_is_uninstalled, :ensure_dependencies, :ensure_large_testfiles] do
156
+ # files that match a key word
157
+ files_to_run = ENV['SPEC'] || FileList['specs/**/*_spec.rb']
158
+ if ENV['SPECM']
159
+ files_to_run = files_to_run.select do |file|
160
+ file.include?(ENV['SPECM'])
161
+ end
162
+ end
163
+ files_to_run.each do |spc|
164
+ puts "------ SPEC=#{spc} ------"
165
+ system "ruby -I lib -S spec #{spc} --format specdoc"
114
166
  end
115
- #t.verbose = true
116
167
  end
117
168
 
169
+ #Spec::Rake::SpecTask.new(:spec) do |t|
170
+ # uninstall_gem
171
+ # t.spec_files = FileList['spec/**/spec_*.rb']
172
+ # t.libs = FileList['lib']
173
+ # t.spec_opts = ['--format', 'specdoc']
174
+ #end
175
+
176
+
177
+ #desc "Run unit tests."
178
+ #Rake::TestTask.new do |t|
179
+ # uninstall_gem
180
+ # # t.libs << "lib" ## done by default
181
+ # t.test_files = FL["test/tc_*.rb"]
182
+ # #t.verbose = true
183
+ #end
184
+
185
+
186
+
187
+ #desc "Run unit tests individual on each test"
188
+ #task :test_ind do |t|
189
+ # reply = `#{$gemcmd} list -l #{NAME}`
190
+ # if reply.include? NAME + " ("
191
+ # %x( sudo #{$gemcmd} uninstall -x #{NAME} )
192
+ # end
193
+ #
194
+ # # t.libs << "lib" ## done by default
195
+ # test_files = FL["test/tc_*.rb"]
196
+ # test_files.each do |file|
197
+ # puts "TESTING: #{file.sub(/test\//,'')}"
198
+ # puts `ruby -I lib #{file}`
199
+ # end
200
+ # #t.verbose = true
201
+ #end
202
+
118
203
 
119
204
 
120
205
 
@@ -140,7 +225,7 @@ tm = Time.now
140
225
  spec = Gem::Specification.new do |s|
141
226
  s.platform = Gem::Platform::RUBY
142
227
  s.name = NAME
143
- s.version = "0.2.4"
228
+ s.version = IO.readlines(changelog).grep(/##.*version/).pop.split(/\s+/).last.chomp
144
229
  s.summary = "Mass Spectrometry Proteomics Objects, Scripts, and Executables"
145
230
  s.date = "#{tm.year}-#{tm.month}-#{tm.day}"
146
231
  s.email = "jprince@icmb.utexas.edu"
@@ -149,17 +234,19 @@ spec = Gem::Specification.new do |s|
149
234
  s.description = "mspire is for working with mass spectrometry proteomics data"
150
235
  s.has_rdoc = true
151
236
  s.authors = ["John Prince"]
152
- s.files = little_dist_files
237
+ s.files = dist_files
153
238
  s.rdoc_options = rdoc_options
154
239
  s.extra_rdoc_files = rdoc_extra_includes
155
240
  s.executables = FL["bin/*"].map {|file| File.basename(file) }
156
- s.add_dependency('libjtp', '~> 0.1.4')
157
- s.requirements << '"xmlparser" is the prefered xml parser right now. REXML and regular expressions are used as fallback in some routines.'
241
+ s.add_dependency('libjtp', '~> 0.2.5')
242
+ s.add_dependency('axml')
243
+ s.requirements << '"libxml" is the prefered xml parser right now. libxml, xmlparser, REXML and regular expressions are used as fallback in some routines.'
158
244
  s.requirements << 'some plotting functions will not be available without the "gnuplot" gem (and underlying gnuplot binary)'
159
245
  s.requirements << 'the "t2x" binary (in archive) or readw.exe is required to convert .RAW files to mzXML in some applications'
160
246
  s.requirements << '"rake" is useful for development'
161
247
  s.requirements << '"webgen (with gems redcloth and bluecloth) is necessary to build web pages'
162
- s.test_files = FL["test/tc_*.rb"]
248
+ #s.test_files = FL["test/tc_*.rb"]
249
+ s.test_files = FL["specs/**/*_spec.rb"]
163
250
  end
164
251
 
165
252
  desc "Create packages."
@@ -180,20 +267,22 @@ end
180
267
  # t.package_task
181
268
  #end
182
269
 
183
-
184
-
270
+ task :remove_pkg do
271
+ FileUtils.rm_rf "pkg"
272
+ end
185
273
 
186
274
  task :install => [:reinstall]
187
275
 
188
276
  desc "uninstalls the package, packages a fresh one, and installs"
189
- task :reinstall => [:clean, :package] do
190
- reply = `#{gemcmd} list -l #{NAME}`
191
- if reply.include? NAME + " ("
192
- %x( #{gemcmd} uninstall -x #{NAME} )
277
+ task :reinstall => [:remove_pkg, :clean, :package] do
278
+ reply = `#{$gemcmd} list -l #{NAME}`
279
+ if reply.include?(NAME + " (")
280
+ %x( #{$gemcmd} uninstall -x #{NAME} )
193
281
  end
194
282
  FileUtils.cd("pkg") do
195
- %x( #{gemcmd} install #{NAME} )
283
+ %x( #{$gemcmd} install #{NAME}*.gem )
196
284
  end
285
+
197
286
  end
198
287
 
199
288
  ###############################################
@@ -1,6 +1,6 @@
1
1
  #!/usr/bin/ruby -w
2
2
 
3
-
3
+ require 'fasta'
4
4
  require 'spec_id/aa_freqs'
5
5
 
6
6
  if ARGV.size < 1
@@ -10,7 +10,7 @@ if ARGV.size < 1
10
10
  end
11
11
 
12
12
  ARGV.each do |file|
13
- obj = SpecID::AAFreqs.new(file)
13
+ obj = SpecID::AAFreqs.new(Fasta.new(file))
14
14
  puts file
15
15
  obj.aafreqs.sort_by{|v| v.to_s }.each do |k,v|
16
16
  puts "#{k}: #{v}"
@@ -12,6 +12,7 @@ DEFAULT_MS_MODEL = 'LCQ'
12
12
  DEFAULT_MASS_ANALYZER = 'Ion Trap'
13
13
  ##############################################################
14
14
 
15
+ require 'spec_id/sequest/pepxml'
15
16
  require 'spec_id'
16
17
  require 'optparse'
17
18
  require 'ostruct'
data/bin/fasta_shaker.rb CHANGED
@@ -1,100 +1,5 @@
1
1
  #!/usr/bin/ruby
2
2
 
3
- # This is my second attempt at writing a simple interface for messing with
4
- # fasta files. Acheiving simplicity (and power) is challenging. It usually
5
- # only happens on the second (or sometimes more) try. Of course, in
6
- # retrospect the simple solution seems sooo obvious. But its deceptive.
7
- # It takes work to acheive simplicity for complex tasks. That's my thought
8
- # for the day.
9
-
10
- # fasta_shaker as in a salt shaker. Shake up your fasta proteins and let them
11
- # season your dinner (hopefully a protein dinner). Mmmm. Don't they taste
12
- # good all mixed up? If you want, you can think of it as a pepper shaker.
13
- # I don't usually comment on my scripts (in my script, anyway), but this one
14
- # came out so nice and clean that I feel like I have room to spare.
15
3
 
16
4
  require 'fasta'
17
- require 'optparse'
18
-
19
- opt = {}
20
-
21
- opts = OptionParser.new do |op|
22
- prog = File.basename(__FILE__)
23
- op.banner = "usage: #{prog} <method> [OPTIONS] <file>.fasta"
24
- op.separator " <method> = reverse | shuffle"
25
- op.on("-c", "--cat", "catenates the output to copy of original") {|v| opt[:cat] = v }
26
- op.on("-o", "--out <string>", "name of output file (default is descriptive)") {|v| opt[:out] = v }
27
- op.on("-p", "--prefix <string>", "give a header prefix to modified prots") {|v| opt[:prefix] = v }
28
- op.on("-f", "--fraction <float>", "creates some fraction of proteins") {|v| opt[:fraction] = v }
29
- op.separator " [if fraction > 1 then the tag 'f<frac#>_' prefixed to proteins"
30
- op.separator " (after any given prefix) so that proteins are unique]"
31
- op.on("--tryptic_peptides", "applies method to [KR][^P] peptides") {|v| opt[:tryptic_peptides] = v }
32
-
33
- op.separator "EXAMPLES: "
34
- op.separator " #{prog} reverse file.fasta -o protein_aa_sequence_reversed.fasta"
35
- op.separator " #{prog} shuffle file.fasta -o protein_aa_sequence_shuffled.fasta"
36
- op.separator " #{prog} shuffle file.fasta -c -p SH_ -o normal_cat_shuffled_with_prefix.fasta"
37
- op.separator " #{prog} reverse file.fasta --tryptic_peptides tryptic_peptides_reversed.fasta"
38
- end
39
-
40
- opts.parse!
41
-
42
- if ARGV.size < 2
43
- puts opts
44
- exit
45
- end
46
-
47
- (method, file) = ARGV
48
-
49
- if opt[:cat] && !opt[:prefix]
50
- puts "WARNING: concatenated proteins don't have unique headers"
51
- puts "[you probably wanted to use the '--prefix' option!]"
52
- end
53
-
54
- # OUT filename:
55
- unless opt[:out]
56
- filebase = file.sub(/\..*$/,'')
57
- parts = [filebase]
58
- parts << 'cat' if opt[:cat]
59
- parts << method
60
- parts << 'prefix' << opt[:prefix] if opt[:prefix]
61
- parts << 'fraction' << opt[:fraction] if opt[:fraction]
62
- parts << 'tryptic_peptides' if opt[:tryptic_peptides]
63
- opt[:out] = parts.join("_") << ".fasta"
64
- end
65
-
66
- ## READ the file
67
- fasta = Fasta.new.read_file(file)
68
-
69
- ## CAT (save an original copy)
70
- fasta_orig = fasta.dup if opt[:cat]
71
-
72
- ## FRACTION the proteins
73
- if f = opt[:fraction]
74
- prefix = nil
75
- f = f.to_f
76
- if f > 1.0
77
- prefix = proc {|cnt| "f#{cnt}_" }
78
- end
79
- fasta = fasta.fraction_of_prots(f, prefix)
80
- end
81
-
82
- ## PREFIX the proteins
83
- if pre = opt[:prefix]
84
- fasta.header_prefix!(pre)
85
- end
86
-
87
- ## MODIFY the proteins
88
- fasta.aaseq!((method + '!').to_sym, opt[:tryptic_peptides])
89
-
90
- ## CAT (finish it up)
91
- if opt[:cat]
92
- fasta_orig << fasta
93
- fasta = fasta_orig
94
- end
95
-
96
- ## WRITE out the file
97
- fasta.write_file(opt[:out])
98
-
99
-
100
-
5
+ FastaShaker.shake_from_argv(ARGV)
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/ruby
2
+
3
+ require 'spec_id/precision/filter'
4
+
5
+ SpecID::Precision::Filter.new.filter_and_validate_cmdline(ARGV)
@@ -1,6 +1,6 @@
1
1
  #!/usr/bin/ruby
2
2
 
3
- require 'spec/mzxml/parser'
3
+ require 'ms/msrun'
4
4
  require 'optparse'
5
5
  require 'ostruct'
6
6
  require 'lmat'
@@ -14,7 +14,8 @@ opt[:inc_mz] = 1.0
14
14
 
15
15
  # get options:
16
16
  opts = OptionParser.new do |op|
17
- op.banner = "usage: #{File.basename(__FILE__)} [options] file.mzXML ..."
17
+ op.banner = "usage: #{File.basename(__FILE__)} [options] <msfile> ..."
18
+ op.separator "input: .mzdata or .mzXML (versions 1.x and 2.x)"
18
19
  op.separator ""
19
20
  op.separator "(sums m/z values that round to the same bin)"
20
21
  op.separator ""
@@ -32,10 +33,10 @@ if ARGV.size < 1
32
33
  end
33
34
 
34
35
  ARGV.each do |file|
35
- parser = Spec::MzXML::Parser.new
36
- (start_mz, end_mz) = parser.start_and_end_mz(file)
37
- (times, spectra) = parser.times_and_spectra(file)
38
- times.map! do |tm| tm.to_f end
36
+ msrun = MS::MSRun.new(file)
37
+ mslevel = 1
38
+ (start_mz, end_mz) = msrun.start_and_end_mz(mslevel)
39
+ (times, spectra) = msrun.times_and_spectra(mslevel)
39
40
  args = {
40
41
  :start_mz => start_mz,
41
42
  :end_mz => end_mz,
@@ -45,7 +46,7 @@ ARGV.each do |file|
45
46
  :inc_tm => nil,
46
47
  }
47
48
  args.merge!(opt)
48
- lmat = LMat.new.from_raw_spectra(times, spectra, args)
49
+ lmat = LMat.new.from_times_and_spectra(times, spectra, args)
49
50
  outfile = file.sub(/\.mzXML$/, opt[:newext])
50
51
  if args[:ascii]
51
52
  outfile << "a"
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/ruby
2
+
3
+ require 'spec_id/precision/prob'
4
+
5
+ SpecID::Precision::Prob.new.precision_vs_num_hits_cmdline(ARGV)
6
+
data/bin/raw_to_mzXML.rb CHANGED
@@ -21,11 +21,11 @@ if ARGV.size == 0
21
21
  exit
22
22
  end
23
23
 
24
- converter = Spec::MzXML.find_mzxml_converter
24
+ converter = MS::MzXML.find_mzxml_converter
25
25
  if converter
26
26
  $stderr.puts "using #{converter} to convert files"
27
27
  else
28
- puts "cannot find [#{Spec::MzXML::Potential_mzxml_converters.join(', ')}] in the paths:"
28
+ puts "cannot find [#{MS::MzXML::Potential_mzxml_converters.join(', ')}] in the paths:"
29
29
  puts ENV['PATH'].split(/[:;]/).join(", ")
30
30
  abort
31
31
  end
data/bin/srf_group.rb CHANGED
@@ -18,6 +18,7 @@ end
18
18
 
19
19
  if ARGV.size == 0
20
20
  puts opts
21
+ exit
21
22
  end
22
23
 
23
24
  obj = SRFGroup.new
data/bin/srf_to_sqt.rb ADDED
@@ -0,0 +1,40 @@
1
+ #!/usr/bin/ruby
2
+
3
+ require 'spec_id/srf'
4
+ require 'optparse'
5
+
6
+
7
+ opt = {}
8
+ opt['db-info'] = false
9
+ opt['db-path'] = nil
10
+ opt['filter'] = true
11
+ opts = OptionParser.new do |op|
12
+ op.banner = "usage: #{File.basename(__FILE__)} [OPTIONS] <file>.srf ..."
13
+ op.separator "outputs: <file>.sqt ..."
14
+ op.separator ""
15
+ op.separator "OPTIONS"
16
+ op.on("-d", "--db-info", "calculates num aa's and md5sum on db") {|v| opt['db-info'] = v }
17
+ op.on("-p", "--db-path <path_to_dir>", "if your database path has changed",
18
+ "and you want db-info, then give the",
19
+ "path to the new *directory*",
20
+ "e.g. /my/new/path") {|v| opt['db-path'] = v }
21
+ op.on("-u", "--db-update", "update the sqt file to reflect --db-path") {|v| opt['db-update'] = v }
22
+ op.on("-n", "--no-filter", "by default, pephit must be within",
23
+ "peptide_mass_tolerance (defined in params)",
24
+ "to be displayed. Turns this off.") {|v| opt['filter'] = false}
25
+ op.on("-r", "--round", "round floating point values reasonably") {|v| opt['round'] = v }
26
+ end
27
+
28
+ opts.parse!
29
+
30
+ if ARGV.size == 0
31
+ puts opts.to_s
32
+ exit
33
+ end
34
+
35
+ ARGV.each do |file|
36
+ abort "file #{file} must be named .srf" if file !~ /\.srf$/i
37
+ new_filename = file.sub(/\.srf$/i, '.sqt')
38
+ SRFGroup.new([file], opt['filter']).srfs.first.to_sqt(new_filename, :db_info => opt['db-info'], :new_db_path => opt['db-path'], :update_db_path => opt['db-update'], :round => opt['round'])
39
+ end
40
+
data/changelog.txt CHANGED
@@ -54,3 +54,71 @@ a prefix option
54
54
 
55
55
  in protein_summary.rb added handling for proteins with no annotation. (either
56
56
  dispaly NA or use gi2annnot to grab them from NCBI)
57
+
58
+ ## version 0.2.5
59
+
60
+ renamed prep_list in roc (potential breaks in code)
61
+
62
+ ## version 0.2.6
63
+
64
+ 1. Massive refactorization of filtering and validation. Validation objects are
65
+ created and then can be used to validate just about anything.
66
+ 2. Massive redo of the parsing of MS runs. Can parse mzXML v1, v2.X
67
+ (including readw broken output), and mzData (even Thermo's broken output).
68
+ 4. Moved all tests to specs (rspec).
69
+ 5. Can read gradient programs off of .meth or .RAW files (both Xcal 1.X and
70
+ 2.X)
71
+
72
+ Bugfixes:
73
+ 1. The search_summary 'base_name' in pepxml output was incorrect (this did not
74
+ appear to influence our analyses, however). Fixed.
75
+ 2. Enzymes with no exceptions (e.g., cuts at KR) would report one too many
76
+ missed cleavages if the last amino acid was a cut point. Fixed.
77
+
78
+ ## version 0.2.7
79
+
80
+ 1. In conversion from bioworks to pepxml, the default was trypsin (KR/P).
81
+ Now, the sample enzyme is set explicitly from the params file and the option
82
+ is not available. This can give more accuract pepxml files than from
83
+ previous depending on your enzyme.
84
+
85
+ ## version 0.2.9
86
+
87
+ 1. Added support for phobius transmembrane predictions
88
+ 2. have filter_and_validate.rb working well (multiple validators allowed).
89
+ 3. Can read bioworks 3.3.1 .srf files (.srf version 3.5 files)
90
+ 4. Added a bias validator
91
+
92
+ ## version 0.2.10
93
+
94
+ 1. Fixed --hits_separate flag in spec_id/filter
95
+
96
+ ## version 0.2.11
97
+
98
+ 1. Added prob precision support and reorganized filter_and_validate libs
99
+
100
+ ## version 0.2.12
101
+
102
+ 1. Fixed bug in transmem for prob and others.
103
+ 2. Can use axml (XMLParser based) or libxml depending on availability
104
+
105
+ ## version 0.2.13
106
+
107
+ 1. Fixed issue with --hits_separate
108
+ 2. filter_and_validate.rb requires decoy validator if decoy proteins
109
+ (refactored code)
110
+
111
+ ## version 0.2.14
112
+
113
+ 1. Can read PeptideProphet files (should be able to read pepxml files, too)
114
+ 2. API change: Some slight modifications to the Sequest::PepXML object
115
+ interfaces and implementations (using ArrayClass)
116
+
117
+ ## version 0.2.15
118
+
119
+ 1. can convert srf files to sqt files
120
+
121
+ ## version 0.3.0
122
+
123
+ 1. IMPORTANT BUG FIX: protein reporting in srf files is correct now (proteins after the first protein were being assigned to the last hit in an out file).
124
+ 2. SQT export is correct and works at least on 3.2 and 3.3.1.
data/lib/align/chams.rb CHANGED
@@ -1,5 +1,5 @@
1
1
 
2
- require 'spec/msrun'
2
+ require 'ms/msrun'
3
3
 
4
4
  module Align; end
5
5
  class Align::CHAMS
@@ -8,7 +8,9 @@ class Align::CHAMS
8
8
  # Scan1 Scan2 Edge_cost Path_cost Edge_direction
9
9
  attr_accessor :avg_score, :time_mscans, :time_nscans, :mscans, :nscans, :edge_costs, :path_costs, :directions
10
10
 
11
- def initialize(chams_file, timeIndex_file1, timeIndex_file2)
11
+ # requires an object that will respond to [<scan_num>] to give time
12
+ # (seconds) for each file
13
+ def initialize(chams_file, time_by_scan_num1, time_by_scan_num2)
12
14
  @time_mscans = []
13
15
  @time_nscans = []
14
16
  @mscans = []
@@ -17,13 +19,11 @@ class Align::CHAMS
17
19
  @path_costs = []
18
20
  @directions = []
19
21
  read_chams_file(chams_file)
20
- scans_by_num1 = Spec::MSRunIndex.new(timeIndex_file1).scans_by_num
21
- scans_by_num2 = Spec::MSRunIndex.new(timeIndex_file2).scans_by_num
22
22
  @mscans.each_with_index do |scan,i|
23
- @time_mscans[i] = scans_by_num1[scan].time
23
+ @time_mscans[i] = time_by_scan_num1[scan]
24
24
  end
25
25
  @nscans.each_with_index do |scan,i|
26
- @time_nscans[i] = scans_by_num2[scan].time
26
+ @time_nscans[i] = time_by_scan_num2[scan]
27
27
  end
28
28
  end
29
29
 
data/lib/align.rb CHANGED
@@ -1,6 +1,7 @@
1
1
 
2
- require 'spec/mzxml/parser'
3
- require 'spec/msrun'
2
+ #require 'ms/parser'
3
+ #require 'ms/parser/mzxml'
4
+ require 'ms/msrun'
4
5
  require 'spec_id/proph'
5
6
  require 'vec'
6
7
 
@@ -18,7 +19,7 @@ class Align
18
19
 
19
20
  ## Create scan indices on msrun name
20
21
  if mztimes.class != Array ; mztimes = [mztimes] end
21
- msrun_indices = mztimes.collect do |file| Spec::MSRunIndex.new(file) end
22
+ msrun_indices = mztimes.collect do |file| MS::MSRunIndex.new(file) end
22
23
  scanindex_by_basename_noext = {}
23
24
  msrun_indices.each do |runindex|
24
25
  scanindex_by_basename_noext[runindex.basename_noext] = runindex.scans_by_num