mspire 0.2.4 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (233) hide show
  1. data/INSTALL +1 -0
  2. data/README +25 -0
  3. data/Rakefile +129 -40
  4. data/bin/{find_aa_freq.rb → aafreqs.rb} +2 -2
  5. data/bin/bioworks_to_pepxml.rb +1 -0
  6. data/bin/fasta_shaker.rb +1 -96
  7. data/bin/filter_and_validate.rb +5 -0
  8. data/bin/{mzxml_to_lmat.rb → ms_to_lmat.rb} +8 -7
  9. data/bin/prob_validate.rb +6 -0
  10. data/bin/raw_to_mzXML.rb +2 -2
  11. data/bin/srf_group.rb +1 -0
  12. data/bin/srf_to_sqt.rb +40 -0
  13. data/changelog.txt +68 -0
  14. data/lib/align/chams.rb +6 -6
  15. data/lib/align.rb +4 -3
  16. data/lib/bsearch.rb +120 -0
  17. data/lib/fasta.rb +318 -86
  18. data/lib/group_by.rb +10 -0
  19. data/lib/index_by.rb +11 -0
  20. data/lib/merge_deep.rb +21 -0
  21. data/lib/{spec → ms/converter}/mzxml.rb +77 -109
  22. data/lib/ms/gradient_program.rb +171 -0
  23. data/lib/ms/msrun.rb +209 -0
  24. data/lib/{spec/msrun.rb → ms/msrun_index.rb} +7 -40
  25. data/lib/ms/parser/mzdata/axml.rb +12 -0
  26. data/lib/ms/parser/mzdata/dom.rb +160 -0
  27. data/lib/ms/parser/mzdata/libxml.rb +7 -0
  28. data/lib/ms/parser/mzdata.rb +25 -0
  29. data/lib/ms/parser/mzxml/axml.rb +11 -0
  30. data/lib/ms/parser/mzxml/dom.rb +159 -0
  31. data/lib/ms/parser/mzxml/hpricot.rb +253 -0
  32. data/lib/ms/parser/mzxml/libxml.rb +15 -0
  33. data/lib/ms/parser/mzxml/regexp.rb +122 -0
  34. data/lib/ms/parser/mzxml/rexml.rb +72 -0
  35. data/lib/ms/parser/mzxml/xmlparser.rb +248 -0
  36. data/lib/ms/parser/mzxml.rb +175 -0
  37. data/lib/ms/parser.rb +108 -0
  38. data/lib/ms/precursor.rb +10 -0
  39. data/lib/ms/scan.rb +81 -0
  40. data/lib/ms/spectrum.rb +193 -0
  41. data/lib/ms.rb +10 -0
  42. data/lib/mspire.rb +4 -0
  43. data/lib/roc.rb +61 -1
  44. data/lib/sample_enzyme.rb +31 -8
  45. data/lib/scan_i.rb +21 -0
  46. data/lib/spec_id/aa_freqs.rb +7 -3
  47. data/lib/spec_id/bioworks.rb +20 -14
  48. data/lib/spec_id/digestor.rb +139 -0
  49. data/lib/spec_id/mass.rb +116 -0
  50. data/lib/spec_id/parser/proph.rb +236 -0
  51. data/lib/spec_id/precision/filter/cmdline.rb +209 -0
  52. data/lib/spec_id/precision/filter/interactive.rb +134 -0
  53. data/lib/spec_id/precision/filter/output.rb +147 -0
  54. data/lib/spec_id/precision/filter.rb +623 -0
  55. data/lib/spec_id/precision/output.rb +60 -0
  56. data/lib/spec_id/precision/prob/cmdline.rb +139 -0
  57. data/lib/spec_id/precision/prob/output.rb +88 -0
  58. data/lib/spec_id/precision/prob.rb +171 -0
  59. data/lib/spec_id/proph/pep_summary.rb +92 -0
  60. data/lib/spec_id/proph/prot_summary.rb +484 -0
  61. data/lib/spec_id/proph.rb +2 -466
  62. data/lib/spec_id/protein_summary.rb +2 -2
  63. data/lib/spec_id/sequest/params.rb +316 -0
  64. data/lib/spec_id/sequest/pepxml.rb +1513 -0
  65. data/lib/spec_id/sequest.rb +2 -1672
  66. data/lib/spec_id/srf.rb +445 -177
  67. data/lib/spec_id.rb +183 -95
  68. data/lib/spec_id_xml.rb +8 -10
  69. data/lib/transmem/phobius.rb +147 -0
  70. data/lib/transmem/toppred.rb +368 -0
  71. data/lib/transmem.rb +157 -0
  72. data/lib/validator/aa.rb +135 -0
  73. data/lib/validator/background.rb +73 -0
  74. data/lib/validator/bias.rb +95 -0
  75. data/lib/validator/cmdline.rb +260 -0
  76. data/lib/validator/decoy.rb +94 -0
  77. data/lib/validator/digestion_based.rb +69 -0
  78. data/lib/validator/probability.rb +48 -0
  79. data/lib/validator/prot_from_pep.rb +234 -0
  80. data/lib/validator/transmem.rb +272 -0
  81. data/lib/validator/true_pos.rb +46 -0
  82. data/lib/validator.rb +214 -0
  83. data/lib/xml.rb +38 -0
  84. data/lib/xml_style_parser.rb +105 -0
  85. data/lib/xmlparser_wrapper.rb +19 -0
  86. data/script/compile_and_plot_smriti_final.rb +97 -0
  87. data/script/extract_gradient_programs.rb +56 -0
  88. data/script/get_apex_values_rexml.rb +44 -0
  89. data/script/mzXML2timeIndex.rb +1 -1
  90. data/script/smriti_final_analysis.rb +103 -0
  91. data/script/toppred_to_yaml.rb +47 -0
  92. data/script/tpp_installer.rb +1 -1
  93. data/{test/tc_align.rb → specs/align_spec.rb} +21 -27
  94. data/{test/tc_bioworks_to_pepxml.rb → specs/bin/bioworks_to_pepxml_spec.rb} +25 -41
  95. data/specs/bin/fasta_shaker_spec.rb +259 -0
  96. data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +202 -0
  97. data/specs/bin/filter_and_validate_spec.rb +124 -0
  98. data/specs/bin/ms_to_lmat_spec.rb +34 -0
  99. data/specs/bin/prob_validate_spec.rb +62 -0
  100. data/specs/bin/protein_summary_spec.rb +10 -0
  101. data/{test/tc_fasta.rb → specs/fasta_spec.rb} +354 -310
  102. data/specs/gi_spec.rb +22 -0
  103. data/specs/load_bin_path.rb +7 -0
  104. data/specs/merge_deep_spec.rb +13 -0
  105. data/specs/ms/gradient_program_spec.rb +77 -0
  106. data/specs/ms/msrun_spec.rb +455 -0
  107. data/specs/ms/parser_spec.rb +92 -0
  108. data/specs/ms/spectrum_spec.rb +89 -0
  109. data/specs/roc_spec.rb +251 -0
  110. data/specs/rspec_autotest.rb +149 -0
  111. data/specs/sample_enzyme_spec.rb +41 -0
  112. data/specs/spec_helper.rb +133 -0
  113. data/specs/spec_id/aa_freqs_spec.rb +52 -0
  114. data/{test/tc_bioworks.rb → specs/spec_id/bioworks_spec.rb} +56 -71
  115. data/specs/spec_id/digestor_spec.rb +75 -0
  116. data/specs/spec_id/precision/filter/cmdline_spec.rb +20 -0
  117. data/specs/spec_id/precision/filter/output_spec.rb +31 -0
  118. data/specs/spec_id/precision/filter_spec.rb +243 -0
  119. data/specs/spec_id/precision/prob_spec.rb +111 -0
  120. data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
  121. data/specs/spec_id/proph/pep_summary_spec.rb +143 -0
  122. data/{test/tc_proph.rb → specs/spec_id/proph/prot_summary_spec.rb} +52 -32
  123. data/{test/tc_protein_summary.rb → specs/spec_id/protein_summary_spec.rb} +85 -0
  124. data/specs/spec_id/sequest/params_spec.rb +68 -0
  125. data/specs/spec_id/sequest/pepxml_spec.rb +452 -0
  126. data/specs/spec_id/sqt_spec.rb +138 -0
  127. data/specs/spec_id/srf_spec.rb +209 -0
  128. data/specs/spec_id/srf_spec_helper.rb +302 -0
  129. data/specs/spec_id_helper.rb +33 -0
  130. data/specs/spec_id_spec.rb +361 -0
  131. data/specs/spec_id_xml_spec.rb +33 -0
  132. data/specs/transmem/phobius_spec.rb +423 -0
  133. data/specs/transmem/toppred_spec.rb +297 -0
  134. data/specs/transmem_spec.rb +60 -0
  135. data/specs/transmem_spec_shared.rb +64 -0
  136. data/specs/validator/aa_spec.rb +107 -0
  137. data/specs/validator/background_spec.rb +51 -0
  138. data/specs/validator/bias_spec.rb +146 -0
  139. data/specs/validator/decoy_spec.rb +51 -0
  140. data/specs/validator/fasta_helper.rb +26 -0
  141. data/specs/validator/prot_from_pep_spec.rb +141 -0
  142. data/specs/validator/transmem_spec.rb +145 -0
  143. data/specs/validator/true_pos_spec.rb +58 -0
  144. data/specs/validator_helper.rb +33 -0
  145. data/specs/xml_spec.rb +12 -0
  146. data/test_files/000_pepxml18_small.xml +206 -0
  147. data/test_files/020a.mzXML.timeIndex +4710 -0
  148. data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +3973 -0
  149. data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +3872 -0
  150. data/test_files/4-03-03_small-prot.xml +321 -0
  151. data/test_files/4-03-03_small.xml +3876 -0
  152. data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
  153. data/test_files/bioworks-3.3_10prots.xml +5999 -0
  154. data/test_files/bioworks31.params +77 -0
  155. data/test_files/bioworks32.params +62 -0
  156. data/test_files/bioworks33.params +63 -0
  157. data/test_files/bioworks_single_run_small.xml +7237 -0
  158. data/test_files/bioworks_small.fasta +212 -0
  159. data/test_files/bioworks_small.params +63 -0
  160. data/test_files/bioworks_small.phobius +109 -0
  161. data/test_files/bioworks_small.toppred.out +2847 -0
  162. data/test_files/bioworks_small.xml +5610 -0
  163. data/test_files/bioworks_with_INV_small.xml +3753 -0
  164. data/test_files/bioworks_with_SHUFF_small.xml +2503 -0
  165. data/test_files/corrupted_900.srf +0 -0
  166. data/test_files/head_of_7MIX.srf +0 -0
  167. data/test_files/interact-opd1_mods_small-prot.xml +304 -0
  168. data/test_files/messups.fasta +297 -0
  169. data/test_files/opd1/000.my_answer.100lines.xml +101 -0
  170. data/test_files/opd1/000.tpp_1.2.3.first10.xml +115 -0
  171. data/test_files/opd1/000.tpp_2.9.2.first10.xml +126 -0
  172. data/test_files/opd1/000.v2.1.mzXML.timeIndex +3748 -0
  173. data/test_files/opd1/000_020-prot.png +0 -0
  174. data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +62 -0
  175. data/test_files/opd1/000_020_3prots-prot.xml +62 -0
  176. data/test_files/opd1/opd1_cat_inv_small-prot.xml +139 -0
  177. data/test_files/opd1/sequest.3.1.params +77 -0
  178. data/test_files/opd1/sequest.3.2.params +62 -0
  179. data/test_files/opd1/twenty_scans.mzXML +418 -0
  180. data/test_files/opd1/twenty_scans.v2.1.mzXML +382 -0
  181. data/test_files/opd1/twenty_scans_answ.lmat +0 -0
  182. data/test_files/opd1/twenty_scans_answ.lmata +9 -0
  183. data/test_files/opd1_020_beginning.RAW +0 -0
  184. data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +753 -0
  185. data/test_files/orbitrap_mzData/000_cut.xml +1920 -0
  186. data/test_files/pepproph_small.xml +4691 -0
  187. data/test_files/phobius.small.noheader.txt +50 -0
  188. data/test_files/phobius.small.small.txt +53 -0
  189. data/test_files/s01_anC1_ld020mM.key.txt +25 -0
  190. data/test_files/s01_anC1_ld020mM.meth +0 -0
  191. data/test_files/small.fasta +297 -0
  192. data/test_files/smallraw.RAW +0 -0
  193. data/test_files/tf_bioworks2excel.bioXML +14340 -0
  194. data/test_files/tf_bioworks2excel.txt.actual +1035 -0
  195. data/test_files/toppred.small.out +416 -0
  196. data/test_files/toppred.xml.out +318 -0
  197. data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +7 -0
  198. data/test_files/validator_hits_separate/bioworks_small_HS.xml +5651 -0
  199. data/test_files/yeast_gly_small-prot.xml +265 -0
  200. data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +6 -0
  201. data/test_files/yeast_gly_small.xml +3807 -0
  202. data/test_files/yeast_gly_small2.parentTimes +6 -0
  203. metadata +273 -57
  204. data/bin/filter.rb +0 -6
  205. data/bin/precision.rb +0 -5
  206. data/lib/spec/mzdata/parser.rb +0 -108
  207. data/lib/spec/mzdata.rb +0 -48
  208. data/lib/spec/mzxml/parser.rb +0 -449
  209. data/lib/spec/scan.rb +0 -55
  210. data/lib/spec_id/filter.rb +0 -797
  211. data/lib/spec_id/precision.rb +0 -421
  212. data/lib/toppred.rb +0 -18
  213. data/script/filter-peps.rb +0 -164
  214. data/test/tc_aa_freqs.rb +0 -59
  215. data/test/tc_fasta_shaker.rb +0 -149
  216. data/test/tc_filter.rb +0 -203
  217. data/test/tc_filter_peps.rb +0 -46
  218. data/test/tc_gi.rb +0 -17
  219. data/test/tc_id_class_anal.rb +0 -70
  220. data/test/tc_id_precision.rb +0 -89
  221. data/test/tc_msrun.rb +0 -88
  222. data/test/tc_mzxml.rb +0 -88
  223. data/test/tc_mzxml_to_lmat.rb +0 -36
  224. data/test/tc_peptide_parent_times.rb +0 -27
  225. data/test/tc_precision.rb +0 -60
  226. data/test/tc_roc.rb +0 -166
  227. data/test/tc_sample_enzyme.rb +0 -32
  228. data/test/tc_scan.rb +0 -26
  229. data/test/tc_sequest.rb +0 -336
  230. data/test/tc_spec.rb +0 -78
  231. data/test/tc_spec_id.rb +0 -201
  232. data/test/tc_spec_id_xml.rb +0 -36
  233. data/test/tc_srf.rb +0 -262
data/lib/ms/parser.rb ADDED
@@ -0,0 +1,108 @@
1
+ require 'xml_style_parser'
2
+
3
+ module MS; end
4
+
5
+ module MS::Parser
6
+ # inherits attr_accessor :method, :default_parser, and parse (which should
7
+ # be overridden)
8
+ include XMLStyleParser
9
+
10
+ Mzxml_regexp = /http:\/\/sashimi.sourceforge.net\/schema(_revision)?\/([\w\d_\.]+)/o
11
+ # 'http://sashimi.sourceforge.net/schema/MsXML.xsd' # version 1
12
+ # 'http://sashimi.sourceforge.net/schema_revision/mzXML_X.X' # others
13
+ Mzdata_regexp = /<mzData.*version="([\d\.]+)"/m
14
+
15
+ attr_accessor :version
16
+
17
+ ############################################
18
+ # POINTERS (to create META MAGIC)
19
+ ############################################
20
+
21
+ @@filetypes_to_upcase = {
22
+ :mzxml => 'MzXML',
23
+ :mzdata => 'MzData',
24
+ :mzml => 'MzML',
25
+ :raw => 'Raw',
26
+ }
27
+
28
+ @@filetypes_to_require = {}
29
+ @@filetypes_to_constant = {}
30
+
31
+ abbrevs = Dir.chdir(File.dirname(__FILE__) + "/parser") do
32
+ Dir["*.rb"].map {|f| f.sub(/\.rb$/,'') }
33
+ end
34
+ abbrevs.each do |abbr|
35
+ abb = abbr.to_sym
36
+ req = ['ms', 'parser', abbr].join("/")
37
+ @@filetypes_to_require[abb] = req
38
+ @@filetypes_to_constant[abb] = ['MS', 'Parser', @@filetypes_to_upcase[abb]].join("::")
39
+ end
40
+
41
+ ############################################
42
+ # END POINTERS
43
+ ############################################
44
+
45
+ # finds the filetype of a file (expects to be at the beginning) and rewinds
46
+ # the filehandle to the beginning returns [filetype, version]. nil if
47
+ # filetype and version could not be determined
48
+ def self.filetype_and_version(fh_or_filename)
49
+ if fh_or_filename.is_a? IO
50
+ fh = fh_or_filename
51
+ found = nil
52
+ # Test for RAW file:
53
+ header = fh.read(18).unpack('@2axaxaxaxaxaxaxa').join
54
+ if header == 'Finnigan'
55
+ return [:raw, nil]
56
+ end
57
+ fh.rewind
58
+ while (line = fh.gets)
59
+ found =
60
+ case line
61
+ when Mzxml_regexp
62
+ mtch = $2.dup
63
+ case mtch
64
+ when /mzXML_([\d\.]+)/
65
+ [:mzxml, $1.dup]
66
+ when /MsXML/
67
+ [:mzxml, '1.0']
68
+ else
69
+ abort "Cannot determine mzXML version!"
70
+ end
71
+ when Mzdata_regexp
72
+ [:mzdata, $1.dup]
73
+ end
74
+ if found
75
+ break
76
+ end
77
+ end
78
+ fh.rewind
79
+ found
80
+ else
81
+ File.open(fh_or_filename) do |fh|
82
+ filetype_and_version(fh)
83
+ end
84
+ end
85
+ end
86
+
87
+ # filetype_version is an example file to parse, or it is an array: [type, version].
88
+ # parse_type is the information to be gleaned (as symbol).
89
+ def self.new(filetype_version, parse_type)
90
+ unless filetype_version.is_a? Array
91
+ filetype_version = filetype_and_version(filetype_version)
92
+ end
93
+ require_and_create_parser(filetype_version, parse_type)
94
+ end
95
+
96
+ private
97
+
98
+ # returns a working parser.
99
+ def self.require_and_create_parser(filetype_version, parse_type)
100
+ (filetype, version) = filetype_version
101
+ #puts "FT: #{filetype} VERSION: #{version}"
102
+ reply = require @@filetypes_to_require[filetype]
103
+ @@filetypes_to_require[filetype]
104
+ parser_class = MS::Parser.const_get(@@filetypes_to_upcase[filetype])
105
+ parser_class.new(parse_type, version)
106
+ end
107
+
108
+ end
@@ -0,0 +1,10 @@
1
+ require 'array_class'
2
+
3
+ module MS; end
4
+
5
+ # parent == spectrumRef references a scan
6
+ # 0 1 2 3 4
7
+ MS::Precursor = ArrayClass.new(%w(mz intensity parent ms_level charge_states))
8
+
9
+ class MS::Precursor
10
+ end
data/lib/ms/scan.rb ADDED
@@ -0,0 +1,81 @@
1
+ require 'array_class'
2
+ require 'ms/precursor'
3
+
4
+ module MS ; end
5
+
6
+ # 0 1 2 3 4 5 6
7
+ MS::Scan = ArrayClass.new( %w(num ms_level time start_mz end_mz precursors spectrum) )
8
+
9
+ # time in seconds
10
+ # everything else in float/int or as array (precursors)
11
+
12
+ class MS::Scan
13
+ #@@order = %w(num ms_level time start_mz end_mz prec_mz prec_inten parent spectrum)
14
+ #attr_accessor :num, :ms_level, :time, :start_mz, :end_mz, :prec_mz, :prec_inten, :parent, :spectrum
15
+
16
+ #def initialize(ar=nil)
17
+ # @@order.zip(ar) do |x,v|
18
+ # send((x+'=').to_sym, v)
19
+ # end
20
+ #end
21
+
22
+ def to_s
23
+ "<Scan num=#{num} ms_level=#{ms_level} time=#{time}>"
24
+ end
25
+
26
+ undef_method :inspect
27
+ def inspect
28
+ atts = %w(num ms_level time start_mz end_mz)
29
+ display = atts.map do |att|
30
+ if val = send(att.to_sym)
31
+ "@#{att}=#{val}"
32
+ else
33
+ nil
34
+ end
35
+ end
36
+ display.compact!
37
+ spec_display =
38
+ if spectrum
39
+ spectrum.mz.size
40
+ else
41
+ nil
42
+ end
43
+ "<MS::Scan:#{__id__} " + display.join(", ") + "@precursors=#{precursors.inspect}" + "@spectrum=size:#{spec_display}" + ">"
44
+ end
45
+
46
+ # returns the string (space delimited): "ms_level num time [prec_mz prec_inten]"
47
+ def to_index_file_string
48
+ arr = [ms_level, num, time]
49
+ if precursors then arr << precursors.first.mz end
50
+ if x = precursors.first.inten then arr << x end
51
+ arr.join(" ")
52
+ end
53
+
54
+ # adds the attribute parent to each scan with a parent
55
+ # (level 1 = no parent; level 2 = prev level 1, etc.
56
+ def self.add_parent_scan(scans)
57
+ prev_scan = nil
58
+ parent_stack = [nil]
59
+ ## we want to set the level to be the first mslevel we come to
60
+ prev_level = 1
61
+ scans.each do |scan|
62
+ if scan then prev_level = scan.ms_level; break; end
63
+ end
64
+ scans.each do |scan|
65
+ next unless scan ## the first one is nil, (others?)
66
+ level = scan.ms_level
67
+ if prev_level < level
68
+ parent_stack.unshift prev_scan
69
+ end
70
+ if prev_level > level
71
+ (prev_level - level).times do parent_stack.shift end
72
+ end
73
+ scan.parent = parent_stack.first
74
+ prev_level = level
75
+ prev_scan = scan
76
+ end
77
+ end
78
+
79
+ end
80
+
81
+
@@ -0,0 +1,193 @@
1
+ require 'base64'
2
+ require 'bsearch'
3
+ require 'ms'
4
+
5
+ class MS::Spectrum
6
+
7
+ Unpack_network_float = 'g*'
8
+ Unpack_network_double = 'G*'
9
+ Unpack_little_endian_float = 'e*'
10
+ Unpack_little_endian_double = 'E*'
11
+
12
+ # m/z's
13
+ attr_accessor :mz
14
+ # intensities
15
+ attr_accessor :intensity
16
+
17
+ def initialize(mz=[], intensity=[])
18
+ @mz = mz
19
+ @intensity = intensity
20
+ end
21
+
22
+
23
+ def has_mz_data?
24
+ (@mz.size > 0) && (@mz.first.is_a?(Numeric))
25
+ end
26
+
27
+ def has_intensity_data?
28
+ (@intensity.size > 0) && (@intensity.first.is_a?(Numeric))
29
+ end
30
+
31
+ # takes a base64 string and returns an array
32
+ def self.base64_to_array(string, precision=32, network_order=true)
33
+ b64d = Base64.decode64(string)
34
+ unpack_code =
35
+ if network_order
36
+ if precision == 32
37
+ Unpack_network_float
38
+ elsif precision == 64
39
+ Unpack_network_double
40
+ end
41
+ else ## little endian
42
+ if precision == 32
43
+ Unpack_little_endian_float
44
+ elsif precision == 64
45
+ Unpack_little_endian_double
46
+ end
47
+ end
48
+ b64d.unpack(unpack_code)
49
+ end
50
+
51
+ def self.from_base64_pair(mz_string, mz_precision, mz_network_order, inten_string, inten_precision, inten_network_order)
52
+ mz = base64_to_array(mz_string, mz_precision, mz_network_order)
53
+ inten = base64_to_array(inten_string, inten_precision, inten_network_order)
54
+ self.new(mz, inten)
55
+ end
56
+
57
+ # takes a base64 peaks string and sets spectrum
58
+ # returns self for chaining
59
+ def self.from_base64_peaks(string, precision=32, network_order=true)
60
+ data = base64_to_array(string, precision, network_order)
61
+ sz = data.size/2
62
+ mz = Array.new(sz)
63
+ intensity = Array.new(sz)
64
+ ndata = []
65
+ my_ind = 0
66
+ data.each_with_index do |dat,ind|
67
+ if (ind % 2) == 0 # even
68
+ mz[my_ind] = dat
69
+ else
70
+ intensity[my_ind] = dat
71
+ my_ind += 1
72
+ end
73
+ end
74
+ self.new(mz, intensity)
75
+ end
76
+
77
+
78
+
79
+
80
+ ######
81
+ # NOT REALLY USING RIGHT NOW:
82
+ ######
83
+
84
+ # takes a base64 peaks string and returns an array of [m/z,intens] doublets
85
+ # mzXML as network ordered
86
+ def base64_peaks_to_pairs(string, precision=32)
87
+ data = base64_peaks_to_array(string, precision)
88
+ ndata = []
89
+ data.each_with_index do |dat,ind|
90
+ if (ind % 2) == 0 # even
91
+ arr = Array.new(2)
92
+ arr[0] = dat
93
+ ndata.push( arr )
94
+ else
95
+ ndata.last[1] = dat
96
+ end
97
+ end
98
+ ndata
99
+ end
100
+
101
+ # returns the index of the first value matching that m/z. the argument m/z
102
+ # may be less precise than the actual m/z (rounding to the same precision
103
+ # given) but must be at least integer precision (after rounding)
104
+ # implemented as binary search (bsearch from the web)
105
+ def index(mz)
106
+ return_val = nil
107
+ ind = @mz.bsearch_lower_boundary{|x| x <=> mz }
108
+ if @mz[ind] == mz
109
+ return_val = ind
110
+ else
111
+ # do a rounding game to see which one is it, or nil
112
+ # find all the values rounding to the same integer in the locale
113
+ # test each one fully in turn
114
+ mz = mz.to_f
115
+ mz_size = @mz.size
116
+ if ((ind < mz_size) and equal_after_rounding?(@mz[ind], mz))
117
+ return_val = ind
118
+ else # run the loop
119
+ up = ind
120
+ loop do
121
+ up += 1
122
+ if up >= mz_size
123
+ break
124
+ end
125
+ mz_up = @mz[up]
126
+ if (mz_up.ceil - mz.ceil >= 2)
127
+ break
128
+ else
129
+ if equal_after_rounding?(mz_up, mz)
130
+ return_val = up
131
+ return return_val
132
+ end
133
+ end
134
+ end
135
+ dn= ind
136
+ loop do
137
+ dn -= 1
138
+ if dn < 0
139
+ break
140
+ end
141
+ mz_dn = @mz[dn]
142
+ if (mz.floor - mz_dn.floor >= 2)
143
+ break
144
+ else
145
+ if equal_after_rounding?(mz_dn, mz)
146
+ return_val = dn
147
+ return return_val
148
+ end
149
+ end
150
+ end
151
+ end
152
+ end
153
+ return_val
154
+ end
155
+
156
+ # uses index function and returns the intensity at that value
157
+ def intensity_at_mz(mz)
158
+ if x = index(mz)
159
+ @intensity[x]
160
+ else
161
+ nil
162
+ end
163
+ end
164
+
165
+ # less_precise should be a float
166
+ # precise should be a float
167
+ def equal_after_rounding?(precise, less_precise)
168
+ # determine the precision of less_precise
169
+ exp10 = precision_as_neg_int(less_precise)
170
+ #puts "EXP10: #{exp10}"
171
+ answ = ((precise*exp10).round == (less_precise*exp10).round)
172
+ #puts "TESTING FOR EQUAL: #{precise} #{less_precise}"
173
+ #puts answ
174
+ (precise*exp10).round == (less_precise*exp10).round
175
+ end
176
+
177
+ # returns 1 for ones place, 10 for tenths, 100 for hundredths
178
+ # to a precision exceeding 1e-6
179
+ def precision_as_neg_int(float)
180
+ neg_exp10 = 1
181
+ loop do
182
+ over = float * neg_exp10
183
+ rounded = over.round
184
+ if (over - rounded).abs <= 1e-6
185
+ break
186
+ end
187
+ neg_exp10 *= 10
188
+ end
189
+ neg_exp10
190
+ end
191
+
192
+ end
193
+
data/lib/ms.rb ADDED
@@ -0,0 +1,10 @@
1
+
2
+
3
+ module MS
4
+ attr_accessor :spectra
5
+
6
+ # should
7
+ def new(file=nil)
8
+ end
9
+
10
+ end
data/lib/mspire.rb ADDED
@@ -0,0 +1,4 @@
1
+
2
+ module Mspire
3
+ Version = '0.3.0'
4
+ end
data/lib/roc.rb CHANGED
@@ -42,10 +42,19 @@ class ROC
42
42
  area
43
43
  end
44
44
 
45
+ # takes two lists of values and makes doublets [[val, boolean],...]
46
+ def separate_to_doublets(tps, fps)
47
+ true_doublets = tps.map {|v| [v, 0] }
48
+ false_doublets = fps.map {|v| [v, 1] }
49
+ all_doublets = true_doublets + false_doublets
50
+ all_doublets.sort!
51
+ all_doublets.map {|v| ((v[1] == 0) ? [v[0], true] : [v[0], false]) }
52
+ end
53
+
45
54
  # given an array of doublets where each doublet is a value and a boolean,
46
55
  # sorts the list and divides it into two arrays (tps, fps) of the values.
47
56
  # The output can then be fed into many of the other routines.
48
- def prep_list(list)
57
+ def doublets_to_separate(list)
49
58
  tp = []; fp = []
50
59
  list.each do |dbl|
51
60
  if dbl[1]
@@ -85,6 +94,27 @@ class ROC
85
94
  end
86
95
  return x, y
87
96
  end
97
+
98
+ # takes previously sorted doublets [value, boolean]
99
+ def numhits_and_ppv(doublets)
100
+ x = []
101
+ y = []
102
+ tps = 0
103
+ fps = 0
104
+ doublets.each_with_index do |d,i|
105
+ if d[1] ; tps += 1
106
+ else ; fps += 1 end
107
+
108
+ if (i+1 == doublets.size) || (d[0] != doublets[i+1][0])
109
+ num_hits = tps + fps
110
+ x << num_hits
111
+ y << tps.to_f/num_hits
112
+ end
113
+ end
114
+ [x, y]
115
+ end
116
+
117
+
88
118
  end
89
119
 
90
120
  # For calculating precision given lists of hits and decoy hits. The hits are
@@ -124,4 +154,34 @@ class DecoyROC < ROC
124
154
  [num_hits_ar, num_tps_ar, ppv_ar]
125
155
  end
126
156
 
157
+ # returns [num_hits, precision] as a function of num hits. decoy hits are
158
+ # seen merely as indicators of the number of false hits in the dataset.
159
+ # This is the same algorithm as pred_and_tps_and_ppv, just eliminates
160
+ # uneeded calcs
161
+ def pred_and_ppv(hits, decoy_hits)
162
+ hits_i = 0
163
+ decoy_i = 0
164
+
165
+ num_hits_ar = []
166
+ ppv_ar = []
167
+
168
+ while hits_i < hits.size
169
+ while decoy_i < decoy_hits.size && hits[hits_i] >= decoy_hits[decoy_i]
170
+ decoy_i += 1
171
+ end
172
+ unless hits[hits_i] == hits[hits_i+1]
173
+ ## determine the number of false positives
174
+ tot_num_hits = hits_i+1
175
+ num_tps = tot_num_hits - decoy_i
176
+
177
+ num_hits_ar << tot_num_hits
178
+ ppv_ar << ( num_tps.to_f/tot_num_hits )
179
+
180
+ end
181
+ hits_i += 1
182
+ end
183
+ [num_hits_ar, ppv_ar]
184
+
185
+ end
186
+
127
187
  end
data/lib/sample_enzyme.rb CHANGED
@@ -1,9 +1,12 @@
1
1
 
2
2
  module SpecIDXML; end
3
3
 
4
- require 'spec_id_xml'
5
4
  require 'strscan'
6
5
 
6
+ require 'spec_id_xml'
7
+ require 'spec_id'
8
+
9
+
7
10
  class SampleEnzyme
8
11
  include SpecIDXML
9
12
 
@@ -18,6 +21,7 @@ class SampleEnzyme
18
21
  # Currently, recognize:
19
22
  # trypsin
20
23
  # For other enzymes, you must set :cut, :no_cut, :name, and :sense
24
+ # will yield the object if you want to set the values that way
21
25
  def initialize(name=nil)
22
26
  @sense = nil
23
27
  @cut = nil
@@ -25,11 +29,14 @@ class SampleEnzyme
25
29
  @name = name
26
30
  if @name
27
31
  # set the values if we recognize this name
28
- send(@name.to_sym)
32
+ send("set_#{@name}".to_sym)
33
+ end
34
+ if block_given?
35
+ yield(self)
29
36
  end
30
37
  end
31
38
 
32
- def trypsin
39
+ def set_trypsin
33
40
  @sense = 'C'
34
41
  @cut = 'KR'
35
42
  @no_cut = 'P'
@@ -41,10 +48,26 @@ class SampleEnzyme
41
48
  end
42
49
  end
43
50
 
51
+ # returns self
52
+ def from_pepxml_node(node)
53
+ self.name = node['name']
54
+ ch = node.child
55
+ self.cut = ch['cut']
56
+ self.no_cut= ch['no_cut']
57
+ self.sense = ch['sense']
58
+ self
59
+ end
60
+
61
+ def self.from_pepxml_node(node)
62
+ self.new.from_pepxml_node(node)
63
+ end
64
+
44
65
  # returns all peptides of missed cleavages <= 'missed_cleavages'
45
66
  # so 2 missed cleavages will return all no missed cleavage peptides
46
67
  # all 1 missed cleavages and all 2 missed cleavages.
47
- def digest(string, missed_cleavages=0)
68
+ # options:
69
+ def digest(string, missed_cleavages=0, options={})
70
+ raise NotImplementedError if @sense == 'N'
48
71
  s = StringScanner.new(string)
49
72
  no_cut_regex = Regexp.new("[#{@no_cut}]")
50
73
  regex = Regexp.new("[#{@cut}]")
@@ -75,7 +98,7 @@ class SampleEnzyme
75
98
  end
76
99
  ## LOOP through and grab each set of missed cleavages from num down to 0
77
100
  all_sets_of_peps = []
78
- (0..missed_cleavages).to_a.reverse.map do |num_mc|
101
+ (0..missed_cleavages).to_a.reverse.each do |num_mc|
79
102
  all_sets_of_peps.push( *(get_missed_cleavages(peps, num_mc)) )
80
103
  end
81
104
  all_sets_of_peps
@@ -85,9 +108,9 @@ class SampleEnzyme
85
108
  # cleavages
86
109
  # DOES NOT contain peptides that contain < num of missed cleavages
87
110
  # (i.e., will not return missed cleaveages of 1 or 2 if num == 3
88
- def get_missed_cleavages(tryptic_peps, num)
89
- (0...(tryptic_peps.size - num)).to_a.map do |i|
90
- tryptic_peps[i,num+1].join
111
+ def get_missed_cleavages(ar_of_peptide_seqs, num)
112
+ (0...(ar_of_peptide_seqs.size - num)).to_a.map do |i|
113
+ ar_of_peptide_seqs[i,num+1].join
91
114
  end
92
115
  end
93
116
 
data/lib/scan_i.rb ADDED
@@ -0,0 +1,21 @@
1
+
2
+ # http://groups.google.com/group/comp.lang.ruby/browse_thread/thread/7370f94e852c0fae/4068c8c1c1c158ee
3
+ class String
4
+ def scan_i seq
5
+ pos=0
6
+ ndx=[]
7
+ slen = seq.length
8
+ while i=index(seq,pos)
9
+ ndx << i
10
+ pos = i + slen
11
+ end
12
+ ndx
13
+ end
14
+
15
+ #def scan_enum seq
16
+ # self.enum_for(:scan, seq).map do
17
+ # $~.offset(0)[0]
18
+ # end
19
+ #end
20
+ end
21
+
@@ -9,9 +9,10 @@ class SpecID::AAFreqs
9
9
  # seeing that amino acid. Frequencies should add to 1.
10
10
  attr_accessor :aafreqs
11
11
 
12
- def initialize(fasta_file=nil)
13
- if fasta_file
14
- @fasta = Fasta.new.read_file(fasta_file)
12
+ # fasta is fasta object!
13
+ def initialize(fasta=nil)
14
+ @fasta = fasta
15
+ if @fasta
15
16
  @aafreqs = calculate_frequencies(@fasta)
16
17
  end
17
18
  end
@@ -64,6 +65,9 @@ class SpecID::AAFreqs
64
65
  # returns two numbers in array [actual, expected]
65
66
  # expected is a Float!!!
66
67
  def actual_and_expected_number(peptide_aaseqs, amino_acid=:C, at_least=1)
68
+ if at_least > 1
69
+ raise NotImplementedError, "can only do at_least=1 right now!"
70
+ end
67
71
  one_minus_freq = 1.0 - @aafreqs[amino_acid.to_sym]
68
72
  amino_acid_as_st = amino_acid.to_s
69
73
  probs = []