mspire 0.2.4 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (233) hide show
  1. data/INSTALL +1 -0
  2. data/README +25 -0
  3. data/Rakefile +129 -40
  4. data/bin/{find_aa_freq.rb → aafreqs.rb} +2 -2
  5. data/bin/bioworks_to_pepxml.rb +1 -0
  6. data/bin/fasta_shaker.rb +1 -96
  7. data/bin/filter_and_validate.rb +5 -0
  8. data/bin/{mzxml_to_lmat.rb → ms_to_lmat.rb} +8 -7
  9. data/bin/prob_validate.rb +6 -0
  10. data/bin/raw_to_mzXML.rb +2 -2
  11. data/bin/srf_group.rb +1 -0
  12. data/bin/srf_to_sqt.rb +40 -0
  13. data/changelog.txt +68 -0
  14. data/lib/align/chams.rb +6 -6
  15. data/lib/align.rb +4 -3
  16. data/lib/bsearch.rb +120 -0
  17. data/lib/fasta.rb +318 -86
  18. data/lib/group_by.rb +10 -0
  19. data/lib/index_by.rb +11 -0
  20. data/lib/merge_deep.rb +21 -0
  21. data/lib/{spec → ms/converter}/mzxml.rb +77 -109
  22. data/lib/ms/gradient_program.rb +171 -0
  23. data/lib/ms/msrun.rb +209 -0
  24. data/lib/{spec/msrun.rb → ms/msrun_index.rb} +7 -40
  25. data/lib/ms/parser/mzdata/axml.rb +12 -0
  26. data/lib/ms/parser/mzdata/dom.rb +160 -0
  27. data/lib/ms/parser/mzdata/libxml.rb +7 -0
  28. data/lib/ms/parser/mzdata.rb +25 -0
  29. data/lib/ms/parser/mzxml/axml.rb +11 -0
  30. data/lib/ms/parser/mzxml/dom.rb +159 -0
  31. data/lib/ms/parser/mzxml/hpricot.rb +253 -0
  32. data/lib/ms/parser/mzxml/libxml.rb +15 -0
  33. data/lib/ms/parser/mzxml/regexp.rb +122 -0
  34. data/lib/ms/parser/mzxml/rexml.rb +72 -0
  35. data/lib/ms/parser/mzxml/xmlparser.rb +248 -0
  36. data/lib/ms/parser/mzxml.rb +175 -0
  37. data/lib/ms/parser.rb +108 -0
  38. data/lib/ms/precursor.rb +10 -0
  39. data/lib/ms/scan.rb +81 -0
  40. data/lib/ms/spectrum.rb +193 -0
  41. data/lib/ms.rb +10 -0
  42. data/lib/mspire.rb +4 -0
  43. data/lib/roc.rb +61 -1
  44. data/lib/sample_enzyme.rb +31 -8
  45. data/lib/scan_i.rb +21 -0
  46. data/lib/spec_id/aa_freqs.rb +7 -3
  47. data/lib/spec_id/bioworks.rb +20 -14
  48. data/lib/spec_id/digestor.rb +139 -0
  49. data/lib/spec_id/mass.rb +116 -0
  50. data/lib/spec_id/parser/proph.rb +236 -0
  51. data/lib/spec_id/precision/filter/cmdline.rb +209 -0
  52. data/lib/spec_id/precision/filter/interactive.rb +134 -0
  53. data/lib/spec_id/precision/filter/output.rb +147 -0
  54. data/lib/spec_id/precision/filter.rb +623 -0
  55. data/lib/spec_id/precision/output.rb +60 -0
  56. data/lib/spec_id/precision/prob/cmdline.rb +139 -0
  57. data/lib/spec_id/precision/prob/output.rb +88 -0
  58. data/lib/spec_id/precision/prob.rb +171 -0
  59. data/lib/spec_id/proph/pep_summary.rb +92 -0
  60. data/lib/spec_id/proph/prot_summary.rb +484 -0
  61. data/lib/spec_id/proph.rb +2 -466
  62. data/lib/spec_id/protein_summary.rb +2 -2
  63. data/lib/spec_id/sequest/params.rb +316 -0
  64. data/lib/spec_id/sequest/pepxml.rb +1513 -0
  65. data/lib/spec_id/sequest.rb +2 -1672
  66. data/lib/spec_id/srf.rb +445 -177
  67. data/lib/spec_id.rb +183 -95
  68. data/lib/spec_id_xml.rb +8 -10
  69. data/lib/transmem/phobius.rb +147 -0
  70. data/lib/transmem/toppred.rb +368 -0
  71. data/lib/transmem.rb +157 -0
  72. data/lib/validator/aa.rb +135 -0
  73. data/lib/validator/background.rb +73 -0
  74. data/lib/validator/bias.rb +95 -0
  75. data/lib/validator/cmdline.rb +260 -0
  76. data/lib/validator/decoy.rb +94 -0
  77. data/lib/validator/digestion_based.rb +69 -0
  78. data/lib/validator/probability.rb +48 -0
  79. data/lib/validator/prot_from_pep.rb +234 -0
  80. data/lib/validator/transmem.rb +272 -0
  81. data/lib/validator/true_pos.rb +46 -0
  82. data/lib/validator.rb +214 -0
  83. data/lib/xml.rb +38 -0
  84. data/lib/xml_style_parser.rb +105 -0
  85. data/lib/xmlparser_wrapper.rb +19 -0
  86. data/script/compile_and_plot_smriti_final.rb +97 -0
  87. data/script/extract_gradient_programs.rb +56 -0
  88. data/script/get_apex_values_rexml.rb +44 -0
  89. data/script/mzXML2timeIndex.rb +1 -1
  90. data/script/smriti_final_analysis.rb +103 -0
  91. data/script/toppred_to_yaml.rb +47 -0
  92. data/script/tpp_installer.rb +1 -1
  93. data/{test/tc_align.rb → specs/align_spec.rb} +21 -27
  94. data/{test/tc_bioworks_to_pepxml.rb → specs/bin/bioworks_to_pepxml_spec.rb} +25 -41
  95. data/specs/bin/fasta_shaker_spec.rb +259 -0
  96. data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +202 -0
  97. data/specs/bin/filter_and_validate_spec.rb +124 -0
  98. data/specs/bin/ms_to_lmat_spec.rb +34 -0
  99. data/specs/bin/prob_validate_spec.rb +62 -0
  100. data/specs/bin/protein_summary_spec.rb +10 -0
  101. data/{test/tc_fasta.rb → specs/fasta_spec.rb} +354 -310
  102. data/specs/gi_spec.rb +22 -0
  103. data/specs/load_bin_path.rb +7 -0
  104. data/specs/merge_deep_spec.rb +13 -0
  105. data/specs/ms/gradient_program_spec.rb +77 -0
  106. data/specs/ms/msrun_spec.rb +455 -0
  107. data/specs/ms/parser_spec.rb +92 -0
  108. data/specs/ms/spectrum_spec.rb +89 -0
  109. data/specs/roc_spec.rb +251 -0
  110. data/specs/rspec_autotest.rb +149 -0
  111. data/specs/sample_enzyme_spec.rb +41 -0
  112. data/specs/spec_helper.rb +133 -0
  113. data/specs/spec_id/aa_freqs_spec.rb +52 -0
  114. data/{test/tc_bioworks.rb → specs/spec_id/bioworks_spec.rb} +56 -71
  115. data/specs/spec_id/digestor_spec.rb +75 -0
  116. data/specs/spec_id/precision/filter/cmdline_spec.rb +20 -0
  117. data/specs/spec_id/precision/filter/output_spec.rb +31 -0
  118. data/specs/spec_id/precision/filter_spec.rb +243 -0
  119. data/specs/spec_id/precision/prob_spec.rb +111 -0
  120. data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
  121. data/specs/spec_id/proph/pep_summary_spec.rb +143 -0
  122. data/{test/tc_proph.rb → specs/spec_id/proph/prot_summary_spec.rb} +52 -32
  123. data/{test/tc_protein_summary.rb → specs/spec_id/protein_summary_spec.rb} +85 -0
  124. data/specs/spec_id/sequest/params_spec.rb +68 -0
  125. data/specs/spec_id/sequest/pepxml_spec.rb +452 -0
  126. data/specs/spec_id/sqt_spec.rb +138 -0
  127. data/specs/spec_id/srf_spec.rb +209 -0
  128. data/specs/spec_id/srf_spec_helper.rb +302 -0
  129. data/specs/spec_id_helper.rb +33 -0
  130. data/specs/spec_id_spec.rb +361 -0
  131. data/specs/spec_id_xml_spec.rb +33 -0
  132. data/specs/transmem/phobius_spec.rb +423 -0
  133. data/specs/transmem/toppred_spec.rb +297 -0
  134. data/specs/transmem_spec.rb +60 -0
  135. data/specs/transmem_spec_shared.rb +64 -0
  136. data/specs/validator/aa_spec.rb +107 -0
  137. data/specs/validator/background_spec.rb +51 -0
  138. data/specs/validator/bias_spec.rb +146 -0
  139. data/specs/validator/decoy_spec.rb +51 -0
  140. data/specs/validator/fasta_helper.rb +26 -0
  141. data/specs/validator/prot_from_pep_spec.rb +141 -0
  142. data/specs/validator/transmem_spec.rb +145 -0
  143. data/specs/validator/true_pos_spec.rb +58 -0
  144. data/specs/validator_helper.rb +33 -0
  145. data/specs/xml_spec.rb +12 -0
  146. data/test_files/000_pepxml18_small.xml +206 -0
  147. data/test_files/020a.mzXML.timeIndex +4710 -0
  148. data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +3973 -0
  149. data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +3872 -0
  150. data/test_files/4-03-03_small-prot.xml +321 -0
  151. data/test_files/4-03-03_small.xml +3876 -0
  152. data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
  153. data/test_files/bioworks-3.3_10prots.xml +5999 -0
  154. data/test_files/bioworks31.params +77 -0
  155. data/test_files/bioworks32.params +62 -0
  156. data/test_files/bioworks33.params +63 -0
  157. data/test_files/bioworks_single_run_small.xml +7237 -0
  158. data/test_files/bioworks_small.fasta +212 -0
  159. data/test_files/bioworks_small.params +63 -0
  160. data/test_files/bioworks_small.phobius +109 -0
  161. data/test_files/bioworks_small.toppred.out +2847 -0
  162. data/test_files/bioworks_small.xml +5610 -0
  163. data/test_files/bioworks_with_INV_small.xml +3753 -0
  164. data/test_files/bioworks_with_SHUFF_small.xml +2503 -0
  165. data/test_files/corrupted_900.srf +0 -0
  166. data/test_files/head_of_7MIX.srf +0 -0
  167. data/test_files/interact-opd1_mods_small-prot.xml +304 -0
  168. data/test_files/messups.fasta +297 -0
  169. data/test_files/opd1/000.my_answer.100lines.xml +101 -0
  170. data/test_files/opd1/000.tpp_1.2.3.first10.xml +115 -0
  171. data/test_files/opd1/000.tpp_2.9.2.first10.xml +126 -0
  172. data/test_files/opd1/000.v2.1.mzXML.timeIndex +3748 -0
  173. data/test_files/opd1/000_020-prot.png +0 -0
  174. data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +62 -0
  175. data/test_files/opd1/000_020_3prots-prot.xml +62 -0
  176. data/test_files/opd1/opd1_cat_inv_small-prot.xml +139 -0
  177. data/test_files/opd1/sequest.3.1.params +77 -0
  178. data/test_files/opd1/sequest.3.2.params +62 -0
  179. data/test_files/opd1/twenty_scans.mzXML +418 -0
  180. data/test_files/opd1/twenty_scans.v2.1.mzXML +382 -0
  181. data/test_files/opd1/twenty_scans_answ.lmat +0 -0
  182. data/test_files/opd1/twenty_scans_answ.lmata +9 -0
  183. data/test_files/opd1_020_beginning.RAW +0 -0
  184. data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +753 -0
  185. data/test_files/orbitrap_mzData/000_cut.xml +1920 -0
  186. data/test_files/pepproph_small.xml +4691 -0
  187. data/test_files/phobius.small.noheader.txt +50 -0
  188. data/test_files/phobius.small.small.txt +53 -0
  189. data/test_files/s01_anC1_ld020mM.key.txt +25 -0
  190. data/test_files/s01_anC1_ld020mM.meth +0 -0
  191. data/test_files/small.fasta +297 -0
  192. data/test_files/smallraw.RAW +0 -0
  193. data/test_files/tf_bioworks2excel.bioXML +14340 -0
  194. data/test_files/tf_bioworks2excel.txt.actual +1035 -0
  195. data/test_files/toppred.small.out +416 -0
  196. data/test_files/toppred.xml.out +318 -0
  197. data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +7 -0
  198. data/test_files/validator_hits_separate/bioworks_small_HS.xml +5651 -0
  199. data/test_files/yeast_gly_small-prot.xml +265 -0
  200. data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +6 -0
  201. data/test_files/yeast_gly_small.xml +3807 -0
  202. data/test_files/yeast_gly_small2.parentTimes +6 -0
  203. metadata +273 -57
  204. data/bin/filter.rb +0 -6
  205. data/bin/precision.rb +0 -5
  206. data/lib/spec/mzdata/parser.rb +0 -108
  207. data/lib/spec/mzdata.rb +0 -48
  208. data/lib/spec/mzxml/parser.rb +0 -449
  209. data/lib/spec/scan.rb +0 -55
  210. data/lib/spec_id/filter.rb +0 -797
  211. data/lib/spec_id/precision.rb +0 -421
  212. data/lib/toppred.rb +0 -18
  213. data/script/filter-peps.rb +0 -164
  214. data/test/tc_aa_freqs.rb +0 -59
  215. data/test/tc_fasta_shaker.rb +0 -149
  216. data/test/tc_filter.rb +0 -203
  217. data/test/tc_filter_peps.rb +0 -46
  218. data/test/tc_gi.rb +0 -17
  219. data/test/tc_id_class_anal.rb +0 -70
  220. data/test/tc_id_precision.rb +0 -89
  221. data/test/tc_msrun.rb +0 -88
  222. data/test/tc_mzxml.rb +0 -88
  223. data/test/tc_mzxml_to_lmat.rb +0 -36
  224. data/test/tc_peptide_parent_times.rb +0 -27
  225. data/test/tc_precision.rb +0 -60
  226. data/test/tc_roc.rb +0 -166
  227. data/test/tc_sample_enzyme.rb +0 -32
  228. data/test/tc_scan.rb +0 -26
  229. data/test/tc_sequest.rb +0 -336
  230. data/test/tc_spec.rb +0 -78
  231. data/test/tc_spec_id.rb +0 -201
  232. data/test/tc_spec_id_xml.rb +0 -36
  233. data/test/tc_srf.rb +0 -262
data/INSTALL CHANGED
@@ -5,6 +5,7 @@ Prerequisites
5
5
  Much of the package will work without any prerequisites at all. Some functionality may require addition ruby packages or other converters. These are listed in current order of importance:
6
6
 
7
7
  * [xmlparser](http://www.yoshidam.net/Ruby.html) (comes with one-click Windows; on Ubuntu: 'sudo apt-get libxml-parser-ruby1.8')
8
+ * [libxml](http://libxml.rubyforge.org/) in Ubuntu: sudo apt-get install libxml2 libxml2-dev ; sudo gem install libxml-ruby --remote
8
9
  * ['t2x'](http://sashimi.sourceforge.net/software_glossolalia.html#ReAdW) to convert .RAW files to version 1 mzXML files
9
10
  * [gnuplot](http://rgplot.rubyforge.org/) ('gem install gnuplot'). Of course, you'll need [gnuplot](http://www.gnuplot.info/) before this package will work. Under one-click installer for windows this package requires a little configuration. It works with no configuration on cygwin (or linux).
10
11
 
data/README CHANGED
@@ -18,6 +18,31 @@ The project is currently focusing on the following:
18
18
  * ProteinProphet
19
19
  * Preparation of files for [obiwarp](http://obi-warp.sourceforge.net/)
20
20
 
21
+ Features
22
+ --------
23
+
24
+ * mzXML (version 1 & 2) parsing
25
+ * mzData parsing
26
+ * bioworks .srf (binary files) reader
27
+ * bioworks to PeptideProphet input (pepXML files)
28
+ * lightweight APEX values parser
29
+ * histogram protein probabilities
30
+ * developed for Linux, should port easily to Windows or others
31
+ * protein summary views with custom false ID cutoff values
32
+ * conversion to OBI-Warp input files
33
+
34
+ Validation by:
35
+ * Various Decoy Database search options: Reverse/Shuffle, concatenated/separate, with various hashing options (e.g., by amino acid sequence + charge)
36
+ * Amino acid (e.g., search for unblocked cysteines)
37
+ * Transmembrane prediction (Phobius or TopPred)
38
+ * Generic sample bias (e.g., low abundance/high abundance proteins)
39
+ * Defined sample
40
+
41
+ Working with:
42
+ * Bioworks (3.2-3.3.1)
43
+ * Peptide/Protein Prophet
44
+ * Easily extensible to others
45
+
21
46
  Tutorials
22
47
  ---------
23
48
 
data/Rakefile CHANGED
@@ -2,9 +2,9 @@ require 'rake'
2
2
  require 'rubygems'
3
3
  require 'rake/rdoctask'
4
4
  require 'rake/gempackagetask'
5
- require 'rake/testtask'
6
5
  require 'rake/clean'
7
6
  require 'fileutils'
7
+ require 'spec/rake/spectask'
8
8
 
9
9
  ###############################################
10
10
  # GLOBAL
@@ -13,23 +13,25 @@ FL = FileList
13
13
 
14
14
  NAME = "mspire"
15
15
 
16
- lib_files = FL["lib/**/*"]
17
- test_dir_too = FL["test/**/*"]
16
+ $dependencies = %w(libjtp)
17
+ $tfiles_large = 'test_files_large'
18
+ changelog = "changelog.txt"
18
19
 
19
- little_dist_files = lib_files + FL["INSTALL", "README", "Rakefile", "LICENSE", "changelog.txt", "release_notes.txt", "{bin,script,tutorial}/**/*"]
20
- dist_files = lib_files + FL["INSTALL", "README", "Rakefile", "LICENSE", "{bin,script,tutorial}/**/*", test_dir_too]
20
+ core_files = FL["INSTALL", "README", "Rakefile", "LICENSE", changelog, "release_notes.txt", "{lib,bin,script,specs,tutorial,test_files}/**/*"]
21
+ big_dist_files = core_files + FL["test_files_large/**/*"]
21
22
 
22
- dist_files = little_dist_files # comment out to include test files
23
+ dist_files = core_files
24
+ # dist_files = big_dist_files
23
25
 
24
26
  ###############################################
25
27
  # ENVIRONMENT
26
28
  ###############################################
27
29
 
28
30
  ENV["OS"] == "Windows_NT" ? WIN32 = true : WIN32 = false
29
- gemcmd = "gem"
31
+ $gemcmd = "gem"
30
32
  if WIN32
31
33
  unless ENV["TERM"] == "cygwin"
32
- gemcmd << ".cmd"
34
+ $gemcmd << ".cmd"
33
35
  end
34
36
  end
35
37
 
@@ -81,40 +83,123 @@ end
81
83
  # TESTS
82
84
  ###############################################
83
85
 
84
- desc "Run unit tests."
85
- Rake::TestTask.new do |t|
86
- reply = `#{gemcmd} list -l #{NAME}`
86
+ namespace :spec do
87
+ task :autotest do
88
+ require './specs/rspec_autotest'
89
+ RspecAutotest.run
90
+ end
91
+ end
92
+
93
+
94
+ task :ensure_dependencies do
95
+ $dependencies.each do |dep|
96
+ unless `#{$gemcmd} list -l #{dep}`.include?(dep)
97
+ abort "ABORTING: install #{dep} before testing!"
98
+ end
99
+ end
100
+ end
101
+
102
+ task :ensure_large_testfiles do
103
+ if !File.exist?($tfiles_large) and !ENV['SPEC_LARGE'].nil?
104
+ warn "Not running with large files since #{$tfiles_large} does not exist!"
105
+ warn "Removing SPEC_LARGE from ENV!"
106
+ ENV.delete('SPEC_LARGE')
107
+ end
108
+ end
109
+
110
+ task :ensure_gem_is_uninstalled do
111
+ reply = `#{$gemcmd} list -l #{NAME}`
87
112
  if reply.include? NAME + " ("
88
113
  puts "GOING to uninstall gem '#{NAME}' for testing"
89
114
  if WIN32
90
- %x( #{gemcmd} uninstall -x #{NAME} )
115
+ %x( #{$gemcmd} uninstall -x #{NAME} )
91
116
  else
92
- %x( sudo #{gemcmd} uninstall -x #{NAME} )
117
+ %x( sudo #{$gemcmd} uninstall -x #{NAME} )
93
118
  end
94
119
  end
95
- # t.libs << "lib" ## done by default
96
- t.test_files = FL["test/tc_*.rb"]
97
- #t.verbose = true
98
120
  end
99
121
 
122
+ desc "Run all specs"
123
+ Spec::Rake::SpecTask.new('spec') do |t|
124
+ Rake::Task[:ensure_gem_is_uninstalled].invoke
125
+ Rake::Task[:ensure_dependencies].invoke
126
+ Rake::Task[:ensure_large_testfiles].invoke
127
+ t.libs = ['lib']
128
+ #t.ruby_opts = ['-I', 'lib']
129
+ t.spec_files = FileList['specs/**/*_spec.rb']
130
+ end
100
131
 
132
+ desc "Run all specs"
133
+ Spec::Rake::SpecTask.new('specl') do |t|
134
+ Rake::Task[:ensure_gem_is_uninstalled].invoke
135
+ Rake::Task[:ensure_dependencies].invoke
136
+ Rake::Task[:ensure_large_testfiles].invoke
137
+ t.spec_files = FileList['specs/**/*_spec.rb']
138
+ t.libs = ['lib']
139
+ #t.ruby_opts = ['-I', 'lib']
140
+ t.spec_opts = ['--format', 'specdoc' ]
141
+ end
101
142
 
102
- desc "Run unit tests individual on each test"
103
- task :test_ind do |t|
104
- reply = `#{gemcmd} list -l #{NAME}`
105
- if reply.include? NAME + " ("
106
- %x( sudo #{gemcmd} uninstall -x #{NAME} )
107
- end
143
+ desc "Run all specs with RCov"
144
+ Spec::Rake::SpecTask.new('rcov') do |t|
145
+ Rake::Task[:ensure_gem_is_uninstalled].invoke
146
+ Rake::Task[:ensure_dependencies].invoke
147
+ Rake::Task[:ensure_large_testfiles].invoke
148
+ t.spec_files = FileList['specs/**/*_spec.rb']
149
+ t.rcov = true
150
+ t.libs = ['lib']
151
+ #t.ruby_opts = ['-I', 'lib']
152
+ t.rcov_opts = ['--exclude', 'specs']
153
+ end
108
154
 
109
- # t.libs << "lib" ## done by default
110
- test_files = FL["test/tc_*.rb"]
111
- test_files.each do |file|
112
- puts "TESTING: #{file.sub(/test\//,'')}"
113
- puts `ruby -I lib #{file}`
155
+ task :speci => [:ensure_gem_is_uninstalled, :ensure_dependencies, :ensure_large_testfiles] do
156
+ # files that match a key word
157
+ files_to_run = ENV['SPEC'] || FileList['specs/**/*_spec.rb']
158
+ if ENV['SPECM']
159
+ files_to_run = files_to_run.select do |file|
160
+ file.include?(ENV['SPECM'])
161
+ end
162
+ end
163
+ files_to_run.each do |spc|
164
+ puts "------ SPEC=#{spc} ------"
165
+ system "ruby -I lib -S spec #{spc} --format specdoc"
114
166
  end
115
- #t.verbose = true
116
167
  end
117
168
 
169
+ #Spec::Rake::SpecTask.new(:spec) do |t|
170
+ # uninstall_gem
171
+ # t.spec_files = FileList['spec/**/spec_*.rb']
172
+ # t.libs = FileList['lib']
173
+ # t.spec_opts = ['--format', 'specdoc']
174
+ #end
175
+
176
+
177
+ #desc "Run unit tests."
178
+ #Rake::TestTask.new do |t|
179
+ # uninstall_gem
180
+ # # t.libs << "lib" ## done by default
181
+ # t.test_files = FL["test/tc_*.rb"]
182
+ # #t.verbose = true
183
+ #end
184
+
185
+
186
+
187
+ #desc "Run unit tests individual on each test"
188
+ #task :test_ind do |t|
189
+ # reply = `#{$gemcmd} list -l #{NAME}`
190
+ # if reply.include? NAME + " ("
191
+ # %x( sudo #{$gemcmd} uninstall -x #{NAME} )
192
+ # end
193
+ #
194
+ # # t.libs << "lib" ## done by default
195
+ # test_files = FL["test/tc_*.rb"]
196
+ # test_files.each do |file|
197
+ # puts "TESTING: #{file.sub(/test\//,'')}"
198
+ # puts `ruby -I lib #{file}`
199
+ # end
200
+ # #t.verbose = true
201
+ #end
202
+
118
203
 
119
204
 
120
205
 
@@ -140,7 +225,7 @@ tm = Time.now
140
225
  spec = Gem::Specification.new do |s|
141
226
  s.platform = Gem::Platform::RUBY
142
227
  s.name = NAME
143
- s.version = "0.2.4"
228
+ s.version = IO.readlines(changelog).grep(/##.*version/).pop.split(/\s+/).last.chomp
144
229
  s.summary = "Mass Spectrometry Proteomics Objects, Scripts, and Executables"
145
230
  s.date = "#{tm.year}-#{tm.month}-#{tm.day}"
146
231
  s.email = "jprince@icmb.utexas.edu"
@@ -149,17 +234,19 @@ spec = Gem::Specification.new do |s|
149
234
  s.description = "mspire is for working with mass spectrometry proteomics data"
150
235
  s.has_rdoc = true
151
236
  s.authors = ["John Prince"]
152
- s.files = little_dist_files
237
+ s.files = dist_files
153
238
  s.rdoc_options = rdoc_options
154
239
  s.extra_rdoc_files = rdoc_extra_includes
155
240
  s.executables = FL["bin/*"].map {|file| File.basename(file) }
156
- s.add_dependency('libjtp', '~> 0.1.4')
157
- s.requirements << '"xmlparser" is the prefered xml parser right now. REXML and regular expressions are used as fallback in some routines.'
241
+ s.add_dependency('libjtp', '~> 0.2.5')
242
+ s.add_dependency('axml')
243
+ s.requirements << '"libxml" is the prefered xml parser right now. libxml, xmlparser, REXML and regular expressions are used as fallback in some routines.'
158
244
  s.requirements << 'some plotting functions will not be available without the "gnuplot" gem (and underlying gnuplot binary)'
159
245
  s.requirements << 'the "t2x" binary (in archive) or readw.exe is required to convert .RAW files to mzXML in some applications'
160
246
  s.requirements << '"rake" is useful for development'
161
247
  s.requirements << '"webgen (with gems redcloth and bluecloth) is necessary to build web pages'
162
- s.test_files = FL["test/tc_*.rb"]
248
+ #s.test_files = FL["test/tc_*.rb"]
249
+ s.test_files = FL["specs/**/*_spec.rb"]
163
250
  end
164
251
 
165
252
  desc "Create packages."
@@ -180,20 +267,22 @@ end
180
267
  # t.package_task
181
268
  #end
182
269
 
183
-
184
-
270
+ task :remove_pkg do
271
+ FileUtils.rm_rf "pkg"
272
+ end
185
273
 
186
274
  task :install => [:reinstall]
187
275
 
188
276
  desc "uninstalls the package, packages a fresh one, and installs"
189
- task :reinstall => [:clean, :package] do
190
- reply = `#{gemcmd} list -l #{NAME}`
191
- if reply.include? NAME + " ("
192
- %x( #{gemcmd} uninstall -x #{NAME} )
277
+ task :reinstall => [:remove_pkg, :clean, :package] do
278
+ reply = `#{$gemcmd} list -l #{NAME}`
279
+ if reply.include?(NAME + " (")
280
+ %x( #{$gemcmd} uninstall -x #{NAME} )
193
281
  end
194
282
  FileUtils.cd("pkg") do
195
- %x( #{gemcmd} install #{NAME} )
283
+ %x( #{$gemcmd} install #{NAME}*.gem )
196
284
  end
285
+
197
286
  end
198
287
 
199
288
  ###############################################
@@ -1,6 +1,6 @@
1
1
  #!/usr/bin/ruby -w
2
2
 
3
-
3
+ require 'fasta'
4
4
  require 'spec_id/aa_freqs'
5
5
 
6
6
  if ARGV.size < 1
@@ -10,7 +10,7 @@ if ARGV.size < 1
10
10
  end
11
11
 
12
12
  ARGV.each do |file|
13
- obj = SpecID::AAFreqs.new(file)
13
+ obj = SpecID::AAFreqs.new(Fasta.new(file))
14
14
  puts file
15
15
  obj.aafreqs.sort_by{|v| v.to_s }.each do |k,v|
16
16
  puts "#{k}: #{v}"
@@ -12,6 +12,7 @@ DEFAULT_MS_MODEL = 'LCQ'
12
12
  DEFAULT_MASS_ANALYZER = 'Ion Trap'
13
13
  ##############################################################
14
14
 
15
+ require 'spec_id/sequest/pepxml'
15
16
  require 'spec_id'
16
17
  require 'optparse'
17
18
  require 'ostruct'
data/bin/fasta_shaker.rb CHANGED
@@ -1,100 +1,5 @@
1
1
  #!/usr/bin/ruby
2
2
 
3
- # This is my second attempt at writing a simple interface for messing with
4
- # fasta files. Acheiving simplicity (and power) is challenging. It usually
5
- # only happens on the second (or sometimes more) try. Of course, in
6
- # retrospect the simple solution seems sooo obvious. But its deceptive.
7
- # It takes work to acheive simplicity for complex tasks. That's my thought
8
- # for the day.
9
-
10
- # fasta_shaker as in a salt shaker. Shake up your fasta proteins and let them
11
- # season your dinner (hopefully a protein dinner). Mmmm. Don't they taste
12
- # good all mixed up? If you want, you can think of it as a pepper shaker.
13
- # I don't usually comment on my scripts (in my script, anyway), but this one
14
- # came out so nice and clean that I feel like I have room to spare.
15
3
 
16
4
  require 'fasta'
17
- require 'optparse'
18
-
19
- opt = {}
20
-
21
- opts = OptionParser.new do |op|
22
- prog = File.basename(__FILE__)
23
- op.banner = "usage: #{prog} <method> [OPTIONS] <file>.fasta"
24
- op.separator " <method> = reverse | shuffle"
25
- op.on("-c", "--cat", "catenates the output to copy of original") {|v| opt[:cat] = v }
26
- op.on("-o", "--out <string>", "name of output file (default is descriptive)") {|v| opt[:out] = v }
27
- op.on("-p", "--prefix <string>", "give a header prefix to modified prots") {|v| opt[:prefix] = v }
28
- op.on("-f", "--fraction <float>", "creates some fraction of proteins") {|v| opt[:fraction] = v }
29
- op.separator " [if fraction > 1 then the tag 'f<frac#>_' prefixed to proteins"
30
- op.separator " (after any given prefix) so that proteins are unique]"
31
- op.on("--tryptic_peptides", "applies method to [KR][^P] peptides") {|v| opt[:tryptic_peptides] = v }
32
-
33
- op.separator "EXAMPLES: "
34
- op.separator " #{prog} reverse file.fasta -o protein_aa_sequence_reversed.fasta"
35
- op.separator " #{prog} shuffle file.fasta -o protein_aa_sequence_shuffled.fasta"
36
- op.separator " #{prog} shuffle file.fasta -c -p SH_ -o normal_cat_shuffled_with_prefix.fasta"
37
- op.separator " #{prog} reverse file.fasta --tryptic_peptides tryptic_peptides_reversed.fasta"
38
- end
39
-
40
- opts.parse!
41
-
42
- if ARGV.size < 2
43
- puts opts
44
- exit
45
- end
46
-
47
- (method, file) = ARGV
48
-
49
- if opt[:cat] && !opt[:prefix]
50
- puts "WARNING: concatenated proteins don't have unique headers"
51
- puts "[you probably wanted to use the '--prefix' option!]"
52
- end
53
-
54
- # OUT filename:
55
- unless opt[:out]
56
- filebase = file.sub(/\..*$/,'')
57
- parts = [filebase]
58
- parts << 'cat' if opt[:cat]
59
- parts << method
60
- parts << 'prefix' << opt[:prefix] if opt[:prefix]
61
- parts << 'fraction' << opt[:fraction] if opt[:fraction]
62
- parts << 'tryptic_peptides' if opt[:tryptic_peptides]
63
- opt[:out] = parts.join("_") << ".fasta"
64
- end
65
-
66
- ## READ the file
67
- fasta = Fasta.new.read_file(file)
68
-
69
- ## CAT (save an original copy)
70
- fasta_orig = fasta.dup if opt[:cat]
71
-
72
- ## FRACTION the proteins
73
- if f = opt[:fraction]
74
- prefix = nil
75
- f = f.to_f
76
- if f > 1.0
77
- prefix = proc {|cnt| "f#{cnt}_" }
78
- end
79
- fasta = fasta.fraction_of_prots(f, prefix)
80
- end
81
-
82
- ## PREFIX the proteins
83
- if pre = opt[:prefix]
84
- fasta.header_prefix!(pre)
85
- end
86
-
87
- ## MODIFY the proteins
88
- fasta.aaseq!((method + '!').to_sym, opt[:tryptic_peptides])
89
-
90
- ## CAT (finish it up)
91
- if opt[:cat]
92
- fasta_orig << fasta
93
- fasta = fasta_orig
94
- end
95
-
96
- ## WRITE out the file
97
- fasta.write_file(opt[:out])
98
-
99
-
100
-
5
+ FastaShaker.shake_from_argv(ARGV)
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/ruby
2
+
3
+ require 'spec_id/precision/filter'
4
+
5
+ SpecID::Precision::Filter.new.filter_and_validate_cmdline(ARGV)
@@ -1,6 +1,6 @@
1
1
  #!/usr/bin/ruby
2
2
 
3
- require 'spec/mzxml/parser'
3
+ require 'ms/msrun'
4
4
  require 'optparse'
5
5
  require 'ostruct'
6
6
  require 'lmat'
@@ -14,7 +14,8 @@ opt[:inc_mz] = 1.0
14
14
 
15
15
  # get options:
16
16
  opts = OptionParser.new do |op|
17
- op.banner = "usage: #{File.basename(__FILE__)} [options] file.mzXML ..."
17
+ op.banner = "usage: #{File.basename(__FILE__)} [options] <msfile> ..."
18
+ op.separator "input: .mzdata or .mzXML (versions 1.x and 2.x)"
18
19
  op.separator ""
19
20
  op.separator "(sums m/z values that round to the same bin)"
20
21
  op.separator ""
@@ -32,10 +33,10 @@ if ARGV.size < 1
32
33
  end
33
34
 
34
35
  ARGV.each do |file|
35
- parser = Spec::MzXML::Parser.new
36
- (start_mz, end_mz) = parser.start_and_end_mz(file)
37
- (times, spectra) = parser.times_and_spectra(file)
38
- times.map! do |tm| tm.to_f end
36
+ msrun = MS::MSRun.new(file)
37
+ mslevel = 1
38
+ (start_mz, end_mz) = msrun.start_and_end_mz(mslevel)
39
+ (times, spectra) = msrun.times_and_spectra(mslevel)
39
40
  args = {
40
41
  :start_mz => start_mz,
41
42
  :end_mz => end_mz,
@@ -45,7 +46,7 @@ ARGV.each do |file|
45
46
  :inc_tm => nil,
46
47
  }
47
48
  args.merge!(opt)
48
- lmat = LMat.new.from_raw_spectra(times, spectra, args)
49
+ lmat = LMat.new.from_times_and_spectra(times, spectra, args)
49
50
  outfile = file.sub(/\.mzXML$/, opt[:newext])
50
51
  if args[:ascii]
51
52
  outfile << "a"
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/ruby
2
+
3
+ require 'spec_id/precision/prob'
4
+
5
+ SpecID::Precision::Prob.new.precision_vs_num_hits_cmdline(ARGV)
6
+
data/bin/raw_to_mzXML.rb CHANGED
@@ -21,11 +21,11 @@ if ARGV.size == 0
21
21
  exit
22
22
  end
23
23
 
24
- converter = Spec::MzXML.find_mzxml_converter
24
+ converter = MS::MzXML.find_mzxml_converter
25
25
  if converter
26
26
  $stderr.puts "using #{converter} to convert files"
27
27
  else
28
- puts "cannot find [#{Spec::MzXML::Potential_mzxml_converters.join(', ')}] in the paths:"
28
+ puts "cannot find [#{MS::MzXML::Potential_mzxml_converters.join(', ')}] in the paths:"
29
29
  puts ENV['PATH'].split(/[:;]/).join(", ")
30
30
  abort
31
31
  end
data/bin/srf_group.rb CHANGED
@@ -18,6 +18,7 @@ end
18
18
 
19
19
  if ARGV.size == 0
20
20
  puts opts
21
+ exit
21
22
  end
22
23
 
23
24
  obj = SRFGroup.new
data/bin/srf_to_sqt.rb ADDED
@@ -0,0 +1,40 @@
1
+ #!/usr/bin/ruby
2
+
3
+ require 'spec_id/srf'
4
+ require 'optparse'
5
+
6
+
7
+ opt = {}
8
+ opt['db-info'] = false
9
+ opt['db-path'] = nil
10
+ opt['filter'] = true
11
+ opts = OptionParser.new do |op|
12
+ op.banner = "usage: #{File.basename(__FILE__)} [OPTIONS] <file>.srf ..."
13
+ op.separator "outputs: <file>.sqt ..."
14
+ op.separator ""
15
+ op.separator "OPTIONS"
16
+ op.on("-d", "--db-info", "calculates num aa's and md5sum on db") {|v| opt['db-info'] = v }
17
+ op.on("-p", "--db-path <path_to_dir>", "if your database path has changed",
18
+ "and you want db-info, then give the",
19
+ "path to the new *directory*",
20
+ "e.g. /my/new/path") {|v| opt['db-path'] = v }
21
+ op.on("-u", "--db-update", "update the sqt file to reflect --db-path") {|v| opt['db-update'] = v }
22
+ op.on("-n", "--no-filter", "by default, pephit must be within",
23
+ "peptide_mass_tolerance (defined in params)",
24
+ "to be displayed. Turns this off.") {|v| opt['filter'] = false}
25
+ op.on("-r", "--round", "round floating point values reasonably") {|v| opt['round'] = v }
26
+ end
27
+
28
+ opts.parse!
29
+
30
+ if ARGV.size == 0
31
+ puts opts.to_s
32
+ exit
33
+ end
34
+
35
+ ARGV.each do |file|
36
+ abort "file #{file} must be named .srf" if file !~ /\.srf$/i
37
+ new_filename = file.sub(/\.srf$/i, '.sqt')
38
+ SRFGroup.new([file], opt['filter']).srfs.first.to_sqt(new_filename, :db_info => opt['db-info'], :new_db_path => opt['db-path'], :update_db_path => opt['db-update'], :round => opt['round'])
39
+ end
40
+
data/changelog.txt CHANGED
@@ -54,3 +54,71 @@ a prefix option
54
54
 
55
55
  in protein_summary.rb added handling for proteins with no annotation. (either
56
56
  dispaly NA or use gi2annnot to grab them from NCBI)
57
+
58
+ ## version 0.2.5
59
+
60
+ renamed prep_list in roc (potential breaks in code)
61
+
62
+ ## version 0.2.6
63
+
64
+ 1. Massive refactorization of filtering and validation. Validation objects are
65
+ created and then can be used to validate just about anything.
66
+ 2. Massive redo of the parsing of MS runs. Can parse mzXML v1, v2.X
67
+ (including readw broken output), and mzData (even Thermo's broken output).
68
+ 4. Moved all tests to specs (rspec).
69
+ 5. Can read gradient programs off of .meth or .RAW files (both Xcal 1.X and
70
+ 2.X)
71
+
72
+ Bugfixes:
73
+ 1. The search_summary 'base_name' in pepxml output was incorrect (this did not
74
+ appear to influence our analyses, however). Fixed.
75
+ 2. Enzymes with no exceptions (e.g., cuts at KR) would report one too many
76
+ missed cleavages if the last amino acid was a cut point. Fixed.
77
+
78
+ ## version 0.2.7
79
+
80
+ 1. In conversion from bioworks to pepxml, the default was trypsin (KR/P).
81
+ Now, the sample enzyme is set explicitly from the params file and the option
82
+ is not available. This can give more accuract pepxml files than from
83
+ previous depending on your enzyme.
84
+
85
+ ## version 0.2.9
86
+
87
+ 1. Added support for phobius transmembrane predictions
88
+ 2. have filter_and_validate.rb working well (multiple validators allowed).
89
+ 3. Can read bioworks 3.3.1 .srf files (.srf version 3.5 files)
90
+ 4. Added a bias validator
91
+
92
+ ## version 0.2.10
93
+
94
+ 1. Fixed --hits_separate flag in spec_id/filter
95
+
96
+ ## version 0.2.11
97
+
98
+ 1. Added prob precision support and reorganized filter_and_validate libs
99
+
100
+ ## version 0.2.12
101
+
102
+ 1. Fixed bug in transmem for prob and others.
103
+ 2. Can use axml (XMLParser based) or libxml depending on availability
104
+
105
+ ## version 0.2.13
106
+
107
+ 1. Fixed issue with --hits_separate
108
+ 2. filter_and_validate.rb requires decoy validator if decoy proteins
109
+ (refactored code)
110
+
111
+ ## version 0.2.14
112
+
113
+ 1. Can read PeptideProphet files (should be able to read pepxml files, too)
114
+ 2. API change: Some slight modifications to the Sequest::PepXML object
115
+ interfaces and implementations (using ArrayClass)
116
+
117
+ ## version 0.2.15
118
+
119
+ 1. can convert srf files to sqt files
120
+
121
+ ## version 0.3.0
122
+
123
+ 1. IMPORTANT BUG FIX: protein reporting in srf files is correct now (proteins after the first protein were being assigned to the last hit in an out file).
124
+ 2. SQT export is correct and works at least on 3.2 and 3.3.1.
data/lib/align/chams.rb CHANGED
@@ -1,5 +1,5 @@
1
1
 
2
- require 'spec/msrun'
2
+ require 'ms/msrun'
3
3
 
4
4
  module Align; end
5
5
  class Align::CHAMS
@@ -8,7 +8,9 @@ class Align::CHAMS
8
8
  # Scan1 Scan2 Edge_cost Path_cost Edge_direction
9
9
  attr_accessor :avg_score, :time_mscans, :time_nscans, :mscans, :nscans, :edge_costs, :path_costs, :directions
10
10
 
11
- def initialize(chams_file, timeIndex_file1, timeIndex_file2)
11
+ # requires an object that will respond to [<scan_num>] to give time
12
+ # (seconds) for each file
13
+ def initialize(chams_file, time_by_scan_num1, time_by_scan_num2)
12
14
  @time_mscans = []
13
15
  @time_nscans = []
14
16
  @mscans = []
@@ -17,13 +19,11 @@ class Align::CHAMS
17
19
  @path_costs = []
18
20
  @directions = []
19
21
  read_chams_file(chams_file)
20
- scans_by_num1 = Spec::MSRunIndex.new(timeIndex_file1).scans_by_num
21
- scans_by_num2 = Spec::MSRunIndex.new(timeIndex_file2).scans_by_num
22
22
  @mscans.each_with_index do |scan,i|
23
- @time_mscans[i] = scans_by_num1[scan].time
23
+ @time_mscans[i] = time_by_scan_num1[scan]
24
24
  end
25
25
  @nscans.each_with_index do |scan,i|
26
- @time_nscans[i] = scans_by_num2[scan].time
26
+ @time_nscans[i] = time_by_scan_num2[scan]
27
27
  end
28
28
  end
29
29
 
data/lib/align.rb CHANGED
@@ -1,6 +1,7 @@
1
1
 
2
- require 'spec/mzxml/parser'
3
- require 'spec/msrun'
2
+ #require 'ms/parser'
3
+ #require 'ms/parser/mzxml'
4
+ require 'ms/msrun'
4
5
  require 'spec_id/proph'
5
6
  require 'vec'
6
7
 
@@ -18,7 +19,7 @@ class Align
18
19
 
19
20
  ## Create scan indices on msrun name
20
21
  if mztimes.class != Array ; mztimes = [mztimes] end
21
- msrun_indices = mztimes.collect do |file| Spec::MSRunIndex.new(file) end
22
+ msrun_indices = mztimes.collect do |file| MS::MSRunIndex.new(file) end
22
23
  scanindex_by_basename_noext = {}
23
24
  msrun_indices.each do |runindex|
24
25
  scanindex_by_basename_noext[runindex.basename_noext] = runindex.scans_by_num