mspire 0.2.4 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/INSTALL +1 -0
- data/README +25 -0
- data/Rakefile +129 -40
- data/bin/{find_aa_freq.rb → aafreqs.rb} +2 -2
- data/bin/bioworks_to_pepxml.rb +1 -0
- data/bin/fasta_shaker.rb +1 -96
- data/bin/filter_and_validate.rb +5 -0
- data/bin/{mzxml_to_lmat.rb → ms_to_lmat.rb} +8 -7
- data/bin/prob_validate.rb +6 -0
- data/bin/raw_to_mzXML.rb +2 -2
- data/bin/srf_group.rb +1 -0
- data/bin/srf_to_sqt.rb +40 -0
- data/changelog.txt +68 -0
- data/lib/align/chams.rb +6 -6
- data/lib/align.rb +4 -3
- data/lib/bsearch.rb +120 -0
- data/lib/fasta.rb +318 -86
- data/lib/group_by.rb +10 -0
- data/lib/index_by.rb +11 -0
- data/lib/merge_deep.rb +21 -0
- data/lib/{spec → ms/converter}/mzxml.rb +77 -109
- data/lib/ms/gradient_program.rb +171 -0
- data/lib/ms/msrun.rb +209 -0
- data/lib/{spec/msrun.rb → ms/msrun_index.rb} +7 -40
- data/lib/ms/parser/mzdata/axml.rb +12 -0
- data/lib/ms/parser/mzdata/dom.rb +160 -0
- data/lib/ms/parser/mzdata/libxml.rb +7 -0
- data/lib/ms/parser/mzdata.rb +25 -0
- data/lib/ms/parser/mzxml/axml.rb +11 -0
- data/lib/ms/parser/mzxml/dom.rb +159 -0
- data/lib/ms/parser/mzxml/hpricot.rb +253 -0
- data/lib/ms/parser/mzxml/libxml.rb +15 -0
- data/lib/ms/parser/mzxml/regexp.rb +122 -0
- data/lib/ms/parser/mzxml/rexml.rb +72 -0
- data/lib/ms/parser/mzxml/xmlparser.rb +248 -0
- data/lib/ms/parser/mzxml.rb +175 -0
- data/lib/ms/parser.rb +108 -0
- data/lib/ms/precursor.rb +10 -0
- data/lib/ms/scan.rb +81 -0
- data/lib/ms/spectrum.rb +193 -0
- data/lib/ms.rb +10 -0
- data/lib/mspire.rb +4 -0
- data/lib/roc.rb +61 -1
- data/lib/sample_enzyme.rb +31 -8
- data/lib/scan_i.rb +21 -0
- data/lib/spec_id/aa_freqs.rb +7 -3
- data/lib/spec_id/bioworks.rb +20 -14
- data/lib/spec_id/digestor.rb +139 -0
- data/lib/spec_id/mass.rb +116 -0
- data/lib/spec_id/parser/proph.rb +236 -0
- data/lib/spec_id/precision/filter/cmdline.rb +209 -0
- data/lib/spec_id/precision/filter/interactive.rb +134 -0
- data/lib/spec_id/precision/filter/output.rb +147 -0
- data/lib/spec_id/precision/filter.rb +623 -0
- data/lib/spec_id/precision/output.rb +60 -0
- data/lib/spec_id/precision/prob/cmdline.rb +139 -0
- data/lib/spec_id/precision/prob/output.rb +88 -0
- data/lib/spec_id/precision/prob.rb +171 -0
- data/lib/spec_id/proph/pep_summary.rb +92 -0
- data/lib/spec_id/proph/prot_summary.rb +484 -0
- data/lib/spec_id/proph.rb +2 -466
- data/lib/spec_id/protein_summary.rb +2 -2
- data/lib/spec_id/sequest/params.rb +316 -0
- data/lib/spec_id/sequest/pepxml.rb +1513 -0
- data/lib/spec_id/sequest.rb +2 -1672
- data/lib/spec_id/srf.rb +445 -177
- data/lib/spec_id.rb +183 -95
- data/lib/spec_id_xml.rb +8 -10
- data/lib/transmem/phobius.rb +147 -0
- data/lib/transmem/toppred.rb +368 -0
- data/lib/transmem.rb +157 -0
- data/lib/validator/aa.rb +135 -0
- data/lib/validator/background.rb +73 -0
- data/lib/validator/bias.rb +95 -0
- data/lib/validator/cmdline.rb +260 -0
- data/lib/validator/decoy.rb +94 -0
- data/lib/validator/digestion_based.rb +69 -0
- data/lib/validator/probability.rb +48 -0
- data/lib/validator/prot_from_pep.rb +234 -0
- data/lib/validator/transmem.rb +272 -0
- data/lib/validator/true_pos.rb +46 -0
- data/lib/validator.rb +214 -0
- data/lib/xml.rb +38 -0
- data/lib/xml_style_parser.rb +105 -0
- data/lib/xmlparser_wrapper.rb +19 -0
- data/script/compile_and_plot_smriti_final.rb +97 -0
- data/script/extract_gradient_programs.rb +56 -0
- data/script/get_apex_values_rexml.rb +44 -0
- data/script/mzXML2timeIndex.rb +1 -1
- data/script/smriti_final_analysis.rb +103 -0
- data/script/toppred_to_yaml.rb +47 -0
- data/script/tpp_installer.rb +1 -1
- data/{test/tc_align.rb → specs/align_spec.rb} +21 -27
- data/{test/tc_bioworks_to_pepxml.rb → specs/bin/bioworks_to_pepxml_spec.rb} +25 -41
- data/specs/bin/fasta_shaker_spec.rb +259 -0
- data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +202 -0
- data/specs/bin/filter_and_validate_spec.rb +124 -0
- data/specs/bin/ms_to_lmat_spec.rb +34 -0
- data/specs/bin/prob_validate_spec.rb +62 -0
- data/specs/bin/protein_summary_spec.rb +10 -0
- data/{test/tc_fasta.rb → specs/fasta_spec.rb} +354 -310
- data/specs/gi_spec.rb +22 -0
- data/specs/load_bin_path.rb +7 -0
- data/specs/merge_deep_spec.rb +13 -0
- data/specs/ms/gradient_program_spec.rb +77 -0
- data/specs/ms/msrun_spec.rb +455 -0
- data/specs/ms/parser_spec.rb +92 -0
- data/specs/ms/spectrum_spec.rb +89 -0
- data/specs/roc_spec.rb +251 -0
- data/specs/rspec_autotest.rb +149 -0
- data/specs/sample_enzyme_spec.rb +41 -0
- data/specs/spec_helper.rb +133 -0
- data/specs/spec_id/aa_freqs_spec.rb +52 -0
- data/{test/tc_bioworks.rb → specs/spec_id/bioworks_spec.rb} +56 -71
- data/specs/spec_id/digestor_spec.rb +75 -0
- data/specs/spec_id/precision/filter/cmdline_spec.rb +20 -0
- data/specs/spec_id/precision/filter/output_spec.rb +31 -0
- data/specs/spec_id/precision/filter_spec.rb +243 -0
- data/specs/spec_id/precision/prob_spec.rb +111 -0
- data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
- data/specs/spec_id/proph/pep_summary_spec.rb +143 -0
- data/{test/tc_proph.rb → specs/spec_id/proph/prot_summary_spec.rb} +52 -32
- data/{test/tc_protein_summary.rb → specs/spec_id/protein_summary_spec.rb} +85 -0
- data/specs/spec_id/sequest/params_spec.rb +68 -0
- data/specs/spec_id/sequest/pepxml_spec.rb +452 -0
- data/specs/spec_id/sqt_spec.rb +138 -0
- data/specs/spec_id/srf_spec.rb +209 -0
- data/specs/spec_id/srf_spec_helper.rb +302 -0
- data/specs/spec_id_helper.rb +33 -0
- data/specs/spec_id_spec.rb +361 -0
- data/specs/spec_id_xml_spec.rb +33 -0
- data/specs/transmem/phobius_spec.rb +423 -0
- data/specs/transmem/toppred_spec.rb +297 -0
- data/specs/transmem_spec.rb +60 -0
- data/specs/transmem_spec_shared.rb +64 -0
- data/specs/validator/aa_spec.rb +107 -0
- data/specs/validator/background_spec.rb +51 -0
- data/specs/validator/bias_spec.rb +146 -0
- data/specs/validator/decoy_spec.rb +51 -0
- data/specs/validator/fasta_helper.rb +26 -0
- data/specs/validator/prot_from_pep_spec.rb +141 -0
- data/specs/validator/transmem_spec.rb +145 -0
- data/specs/validator/true_pos_spec.rb +58 -0
- data/specs/validator_helper.rb +33 -0
- data/specs/xml_spec.rb +12 -0
- data/test_files/000_pepxml18_small.xml +206 -0
- data/test_files/020a.mzXML.timeIndex +4710 -0
- data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +3973 -0
- data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +3872 -0
- data/test_files/4-03-03_small-prot.xml +321 -0
- data/test_files/4-03-03_small.xml +3876 -0
- data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
- data/test_files/bioworks-3.3_10prots.xml +5999 -0
- data/test_files/bioworks31.params +77 -0
- data/test_files/bioworks32.params +62 -0
- data/test_files/bioworks33.params +63 -0
- data/test_files/bioworks_single_run_small.xml +7237 -0
- data/test_files/bioworks_small.fasta +212 -0
- data/test_files/bioworks_small.params +63 -0
- data/test_files/bioworks_small.phobius +109 -0
- data/test_files/bioworks_small.toppred.out +2847 -0
- data/test_files/bioworks_small.xml +5610 -0
- data/test_files/bioworks_with_INV_small.xml +3753 -0
- data/test_files/bioworks_with_SHUFF_small.xml +2503 -0
- data/test_files/corrupted_900.srf +0 -0
- data/test_files/head_of_7MIX.srf +0 -0
- data/test_files/interact-opd1_mods_small-prot.xml +304 -0
- data/test_files/messups.fasta +297 -0
- data/test_files/opd1/000.my_answer.100lines.xml +101 -0
- data/test_files/opd1/000.tpp_1.2.3.first10.xml +115 -0
- data/test_files/opd1/000.tpp_2.9.2.first10.xml +126 -0
- data/test_files/opd1/000.v2.1.mzXML.timeIndex +3748 -0
- data/test_files/opd1/000_020-prot.png +0 -0
- data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +62 -0
- data/test_files/opd1/000_020_3prots-prot.xml +62 -0
- data/test_files/opd1/opd1_cat_inv_small-prot.xml +139 -0
- data/test_files/opd1/sequest.3.1.params +77 -0
- data/test_files/opd1/sequest.3.2.params +62 -0
- data/test_files/opd1/twenty_scans.mzXML +418 -0
- data/test_files/opd1/twenty_scans.v2.1.mzXML +382 -0
- data/test_files/opd1/twenty_scans_answ.lmat +0 -0
- data/test_files/opd1/twenty_scans_answ.lmata +9 -0
- data/test_files/opd1_020_beginning.RAW +0 -0
- data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +753 -0
- data/test_files/orbitrap_mzData/000_cut.xml +1920 -0
- data/test_files/pepproph_small.xml +4691 -0
- data/test_files/phobius.small.noheader.txt +50 -0
- data/test_files/phobius.small.small.txt +53 -0
- data/test_files/s01_anC1_ld020mM.key.txt +25 -0
- data/test_files/s01_anC1_ld020mM.meth +0 -0
- data/test_files/small.fasta +297 -0
- data/test_files/smallraw.RAW +0 -0
- data/test_files/tf_bioworks2excel.bioXML +14340 -0
- data/test_files/tf_bioworks2excel.txt.actual +1035 -0
- data/test_files/toppred.small.out +416 -0
- data/test_files/toppred.xml.out +318 -0
- data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +7 -0
- data/test_files/validator_hits_separate/bioworks_small_HS.xml +5651 -0
- data/test_files/yeast_gly_small-prot.xml +265 -0
- data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +6 -0
- data/test_files/yeast_gly_small.xml +3807 -0
- data/test_files/yeast_gly_small2.parentTimes +6 -0
- metadata +273 -57
- data/bin/filter.rb +0 -6
- data/bin/precision.rb +0 -5
- data/lib/spec/mzdata/parser.rb +0 -108
- data/lib/spec/mzdata.rb +0 -48
- data/lib/spec/mzxml/parser.rb +0 -449
- data/lib/spec/scan.rb +0 -55
- data/lib/spec_id/filter.rb +0 -797
- data/lib/spec_id/precision.rb +0 -421
- data/lib/toppred.rb +0 -18
- data/script/filter-peps.rb +0 -164
- data/test/tc_aa_freqs.rb +0 -59
- data/test/tc_fasta_shaker.rb +0 -149
- data/test/tc_filter.rb +0 -203
- data/test/tc_filter_peps.rb +0 -46
- data/test/tc_gi.rb +0 -17
- data/test/tc_id_class_anal.rb +0 -70
- data/test/tc_id_precision.rb +0 -89
- data/test/tc_msrun.rb +0 -88
- data/test/tc_mzxml.rb +0 -88
- data/test/tc_mzxml_to_lmat.rb +0 -36
- data/test/tc_peptide_parent_times.rb +0 -27
- data/test/tc_precision.rb +0 -60
- data/test/tc_roc.rb +0 -166
- data/test/tc_sample_enzyme.rb +0 -32
- data/test/tc_scan.rb +0 -26
- data/test/tc_sequest.rb +0 -336
- data/test/tc_spec.rb +0 -78
- data/test/tc_spec_id.rb +0 -201
- data/test/tc_spec_id_xml.rb +0 -36
- data/test/tc_srf.rb +0 -262
data/lib/spec_id/sequest.rb
CHANGED
|
@@ -1,1675 +1,5 @@
|
|
|
1
|
+
require 'spec_id/sequest/params'
|
|
1
2
|
|
|
2
|
-
|
|
3
|
-
require 'spec/mzxml/parser'
|
|
4
|
-
require 'hash_by'
|
|
5
|
-
require 'set_from_hash'
|
|
6
|
-
require 'spec_id/bioworks'
|
|
7
|
-
require 'instance_var_set_from_hash'
|
|
8
|
-
require 'spec/msrun'
|
|
9
|
-
require 'spec_id/srf'
|
|
10
|
-
require 'fileutils'
|
|
11
|
-
|
|
12
|
-
class Numeric
|
|
13
|
-
# returns a string with a + or - on the front
|
|
14
|
-
def to_plus_minus_string
|
|
15
|
-
if self >= 0
|
|
16
|
-
'+' << self.to_s
|
|
17
|
-
else
|
|
18
|
-
'-' << self.to_s
|
|
19
|
-
end
|
|
20
|
-
end
|
|
21
|
-
end
|
|
22
|
-
|
|
23
|
-
##########################################
|
|
24
|
-
# NEED TO ADD MODIFICATIONS and generally verify pepxml creation!!! :
|
|
25
|
-
# HERE's an excerpt from an example file from tpp 2.9.2 that I'm going to follow:
|
|
26
|
-
=begin
|
|
27
|
-
<search_summary base_name="/regis/data3/search/akeller/LCQ/COMET/LIGHT/haloICAT2_41" search_engine="COMET" precursor_mass_type="average" fragment_mass_type="average">
|
|
28
|
-
<sequence_search_constraint sequence="C"/>
|
|
29
|
-
<aminoacid_modification aminoacid="C" massdiff="8.049" mass="553.765" variable="Y" binary="N"/>
|
|
30
|
-
<aminoacid_modification aminoacid="C" massdiff="442.5772" mass="545.7160" variable="N"/>
|
|
31
|
-
<aminoacid_modification aminoacid="M" massdiff="16.0000" mass="147.1926" variable="Y" binary="N" symbol="1"/>
|
|
32
|
-
<parameter name="peptide_mass_tol" value="3.0000"/>
|
|
33
|
-
<parameter name="peptide_mass_tol_units" value="DA"/>
|
|
34
|
-
<parameter name="num_output_lines" value="10"/>
|
|
35
|
-
<parameter name="remove_precursor_peak" value="0"/>
|
|
36
|
-
<parameter name="num_dup_headers" value="1"/>
|
|
37
|
-
<parameter name="email_address" value=""/>
|
|
38
|
-
<parameter name="ion_series" value="010000010"/>
|
|
39
|
-
<parameter name="max_num_var_mod_residues" value="3"/>
|
|
40
|
-
<parameter name="md5_check_sum" value="2547286a77a35abe2af3f2e9825ab814"/>
|
|
41
|
-
</search_summary>
|
|
42
|
-
=end
|
|
43
|
-
|
|
44
|
-
# and a guy with modifications:
|
|
45
|
-
=begin
|
|
46
|
-
<search_result spectrum="haloICAT2_41.1110.1110.2" start_scan="1110" end_scan="1110" precursor_neutral_mass="2000.6641" assumed_charge="2" index="28">
|
|
47
|
-
<search_hit hit_rank="1" peptide="GCMPSKEVLSAGAHR" peptide_prev_aa="R" peptide_next_aa="Y" protein="Chr_ORF0132" num_tot_proteins="1" num_matched_ions="19" tot_num_ions="30" calc_neutral_pep_mass="2001.3685" massdiff="-0.704" num_tol_term="2" num_missed_cleavages="1" is_rejected="0">
|
|
48
|
-
<modification_info modified_peptide="GC[546]M[147]PSKEVLSAGAHR">
|
|
49
|
-
<mod_aminoacid_mass position="2" mass="545.7160"/>
|
|
50
|
-
<mod_aminoacid_mass position="3" mass="147.1926"/>
|
|
51
|
-
</modification_info>
|
|
52
|
-
<search_score name="dotproduct" value="359"/>
|
|
53
|
-
<search_score name="delta" value="0.296"/>
|
|
54
|
-
<search_score name="deltastar" value="0"/>
|
|
55
|
-
<search_score name="zscore" value="5.290"/>
|
|
56
|
-
<search_score name="expect" value="0.000E+00"/>
|
|
57
|
-
<peptideprophet_result probability="0.9994" all_ntt_prob="(0.3713,0.4360,0.9994)">
|
|
58
|
-
<search_score_summary>
|
|
59
|
-
<parameter name="fval" value="3.4002"/>
|
|
60
|
-
<parameter name="ntt" value="2"/>
|
|
61
|
-
<parameter name="nmc" value="1"/>
|
|
62
|
-
<parameter name="massd" value="-0.704"/>
|
|
63
|
-
</search_score_summary>
|
|
64
|
-
</peptideprophet_result>
|
|
65
|
-
=end
|
|
66
|
-
|
|
67
|
-
# sequest.params option:
|
|
68
|
-
# diff_search_options = 15.994910 M 0.000000 C 0.000000 M 0.000000 X 0.000000 T 0.000000 Y
|
|
69
|
-
# permanent mods are at the bottom: ...
|
|
70
|
-
# add_A_Alanine = 0.0000 ; added to A
|
|
71
|
-
# add_S_Serine = 0.0000 ; added to S
|
|
72
|
-
# add_P_Proline = 0.0000 ; added to P
|
|
73
|
-
# add_V_Valine = 0.0000 ; added to V
|
|
74
|
-
# add_T_Threonine = 0.0000 ; added to T
|
|
75
|
-
# ...
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
module Sequest; end
|
|
80
|
-
class Sequest::PepXML; end
|
|
81
|
-
|
|
82
|
-
class Sequest::PepXML::MSMSPipelineAnalysis
|
|
83
|
-
include SpecIDXML
|
|
84
|
-
# Version 1.2.3
|
|
85
|
-
attr_writer :date
|
|
86
|
-
attr_writer :xmlns, :xmlns_xsi, :xsi_schemaLocation
|
|
87
|
-
attr_accessor :summary_xml
|
|
88
|
-
# Version 2.3.4
|
|
89
|
-
attr_writer :xmlns, :xmlns_xsi, :xsi_schema_location
|
|
90
|
-
attr_accessor :pepxml_version
|
|
91
|
-
attr_accessor :msms_run_summary
|
|
92
|
-
|
|
93
|
-
# if block given, sets msms_run_summary to block
|
|
94
|
-
def initialize(hash=nil)
|
|
95
|
-
@xmlns = nil
|
|
96
|
-
@xmlns_xsi = nil
|
|
97
|
-
@xsi_schema_location = nil
|
|
98
|
-
if hash
|
|
99
|
-
self.set_from_hash(hash)
|
|
100
|
-
end
|
|
101
|
-
if block_given?
|
|
102
|
-
@msms_run_summary = yield
|
|
103
|
-
end
|
|
104
|
-
end
|
|
105
|
-
|
|
106
|
-
# if no date string given, then it will set to Time.now
|
|
107
|
-
def date
|
|
108
|
-
if @date ; @date
|
|
109
|
-
else
|
|
110
|
-
case Sequest::PepXML.pepxml_version
|
|
111
|
-
when 18 ; tarr = Time.now.to_a ; tarr[3..5].reverse.join('-') + "T#{tarr[0..2].reverse.join(':')}"
|
|
112
|
-
when 0 ; Time.new.to_s
|
|
113
|
-
end
|
|
114
|
-
end
|
|
115
|
-
end
|
|
116
|
-
|
|
117
|
-
def xmlns
|
|
118
|
-
if @xmlns ; @xmlns
|
|
119
|
-
else ; "http://regis-web.systemsbiology.net/pepXML"
|
|
120
|
-
end
|
|
121
|
-
end
|
|
122
|
-
|
|
123
|
-
def xmlns_xsi
|
|
124
|
-
if @xmlns_xsi ; @xmlns_xsi
|
|
125
|
-
else ; "http://www.w3.org/2001/XMLSchema-instance"
|
|
126
|
-
end
|
|
127
|
-
end
|
|
128
|
-
|
|
129
|
-
def xsi_schema_location
|
|
130
|
-
if @xsi_schema_location ; @xsi_schema_location
|
|
131
|
-
else ; "http://regis-web.systemsbiology.net/pepXML /tools/bin/TPP/tpp/schema/pepXML_v18.xsd"
|
|
132
|
-
end
|
|
133
|
-
end
|
|
134
|
-
|
|
135
|
-
def to_pepxml
|
|
136
|
-
case Sequest::PepXML.pepxml_version
|
|
137
|
-
when 0
|
|
138
|
-
element_xml(:msms_pipeline_analysis, [:date, :summary_xml]) do
|
|
139
|
-
@msms_run_summary.to_pepxml
|
|
140
|
-
end
|
|
141
|
-
when 18
|
|
142
|
-
element_xml_and_att_string(:msms_pipeline_analysis, "date=\"#{date}\" xmlns=\"#{xmlns}\" xmlns:xsi=\"#{xmlns_xsi}\" xsi:schemaLocation=\"#{xsi_schema_location}\" summary_xml=\"#{summary_xml}\"") do
|
|
143
|
-
@msms_run_summary.to_pepxml
|
|
144
|
-
end
|
|
145
|
-
else
|
|
146
|
-
abort "Don't know how to deal with version: #{Sequest::PepXML.pepxml_version}"
|
|
147
|
-
end
|
|
148
|
-
end
|
|
149
|
-
|
|
150
|
-
end
|
|
151
|
-
|
|
152
|
-
class Sequest::PepXML::MSMSRunSummary
|
|
153
|
-
include SpecIDXML
|
|
154
|
-
|
|
155
|
-
# the version of TPP you are using (determines xml output)
|
|
156
|
-
# The name of the pep xml file (without extension) (but this is a long
|
|
157
|
-
# filename!!!)
|
|
158
|
-
attr_accessor :base_name
|
|
159
|
-
# The name of the mass spec manufacturer
|
|
160
|
-
attr_accessor :ms_manufacturer
|
|
161
|
-
attr_accessor :ms_model
|
|
162
|
-
attr_accessor :ms_mass_analyzer
|
|
163
|
-
attr_accessor :ms_detector
|
|
164
|
-
attr_accessor :raw_data_type
|
|
165
|
-
attr_accessor :raw_data
|
|
166
|
-
attr_accessor :ms_ionization
|
|
167
|
-
attr_accessor :pepxml_version
|
|
168
|
-
|
|
169
|
-
# A SampleEnzyme object (responds to: name, cut, no_cut, sense)
|
|
170
|
-
attr_accessor :sample_enzyme
|
|
171
|
-
# A SearchSummary object
|
|
172
|
-
attr_accessor :search_summary
|
|
173
|
-
# An array of spectrum_queries
|
|
174
|
-
attr_accessor :spectrum_queries
|
|
175
|
-
|
|
176
|
-
# takes a hash of name, value pairs
|
|
177
|
-
# if block given, spectrum_queries (should be array of spectrum queries) is
|
|
178
|
-
# set to the return value of the block
|
|
179
|
-
def initialize(hash=nil)
|
|
180
|
-
@spectrum_queries = []
|
|
181
|
-
if hash
|
|
182
|
-
instance_var_set_from_hash(hash)
|
|
183
|
-
end
|
|
184
|
-
if block_given? ; @spectrum_queries = yield end
|
|
185
|
-
end
|
|
186
|
-
|
|
187
|
-
def to_pepxml
|
|
188
|
-
case Sequest::PepXML.pepxml_version
|
|
189
|
-
when 18
|
|
190
|
-
element_xml_and_att_string(:msms_run_summary, "base_name=\"#{base_name}\" msManufacturer=\"#{ms_manufacturer}\" msModel=\"#{ms_model}\" msIonization=\"#{ms_ionization}\" msMassAnalyzer=\"#{ms_mass_analyzer}\" msDetector=\"#{ms_detector}\" raw_data_type=\"#{raw_data_type}\" raw_data=\"#{raw_data}\"") do
|
|
191
|
-
sample_enzyme.to_pepxml +
|
|
192
|
-
search_summary.to_pepxml +
|
|
193
|
-
spectrum_queries.map {|sq| sq.to_pepxml }.join
|
|
194
|
-
end
|
|
195
|
-
when 0
|
|
196
|
-
# element_xml(:msms_run_summary, [:base_name, :search_engine, :database, :raw_data_type, :raw_data, :out_data_type, :out_data, :sample_enzyme]) do
|
|
197
|
-
# element_xml(:search_summary, [:base_name, :search_engine, :precursor_mass_type, :fragment_mass_type]) do
|
|
198
|
-
# [
|
|
199
|
-
# @params.short_element_xml(:enzymatic_search_constraint, [:enzyme, :max_num_internal_cleavages, :min_number_termini]),
|
|
200
|
-
# @params.short_element_xml(:sequence_search_constraint, [:sequence]),
|
|
201
|
-
# @params.short_element_xml(:sequence_search_constraint, [:sequence]),
|
|
202
|
-
# @params.pepxml_parameters(:peptide_mass_tol, :fragment_ion_tol, :ion_series, :max_num_differential_AA_per_mod, :nucleotide_reading_frame, :num_output_lines, :remove_precursor_peak, :ion_cutoff_percentage, :match_peak_count, :match_peak_allowed_error, :match_peak_tolerance, :protein_mass_filter, :sequence_header_filter)
|
|
203
|
-
# ].join("\n")
|
|
204
|
-
# end + "\n" +
|
|
205
|
-
# @spectrum_queries.collect {|result| result.to_pepxml }.join("\n")
|
|
206
|
-
# end
|
|
207
|
-
end
|
|
208
|
-
end
|
|
209
|
-
|
|
210
|
-
end
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
class Sequest::PepXML
|
|
215
|
-
include SpecIDXML
|
|
216
|
-
|
|
217
|
-
## CREATE a default version for the entire class
|
|
218
|
-
class << self
|
|
219
|
-
attr_accessor :pepxml_version
|
|
220
|
-
end
|
|
221
|
-
DEF_VERSION = 18
|
|
222
|
-
self.pepxml_version = DEF_VERSION # default version
|
|
223
|
-
|
|
224
|
-
attr_accessor :pepxml_version, :msms_pipeline_analysis
|
|
225
|
-
## the full path name (no extension)
|
|
226
|
-
attr_accessor :base_name
|
|
227
|
-
attr_accessor :h_plus
|
|
228
|
-
attr_accessor :avg_parent
|
|
229
|
-
|
|
230
|
-
#attr_accessor :spectrum_queries, :params, :base_name, :search_engine, :database, :raw_data_type, :raw_data, :out_data_type, :out_data, :sample_enzyme, :pepxml_version
|
|
231
|
-
|
|
232
|
-
# returns an array of spectrum queries
|
|
233
|
-
def spectrum_queries
|
|
234
|
-
msms_pipeline_analysis.msms_run_summary.spectrum_queries
|
|
235
|
-
end
|
|
236
|
-
|
|
237
|
-
# msms_pipeline_analysis is set to the result of the yielded block
|
|
238
|
-
# and set_mono_or_avg is called with params if given
|
|
239
|
-
def initialize(pepxml_version=DEF_VERSION, sequest_params_obj=nil)
|
|
240
|
-
self.class.pepxml_version = pepxml_version
|
|
241
|
-
if sequest_params_obj
|
|
242
|
-
set_mono_or_avg(sequest_params_obj)
|
|
243
|
-
end
|
|
244
|
-
if block_given?
|
|
245
|
-
@msms_pipeline_analysis = yield
|
|
246
|
-
@base_name = @msms_pipeline_analysis.msms_run_summary.base_name
|
|
247
|
-
end
|
|
248
|
-
end
|
|
249
|
-
|
|
250
|
-
# sets @h_plus and @avg_parent from the sequest params object
|
|
251
|
-
def set_mono_or_avg(sequest_params_obj)
|
|
252
|
-
case sequest_params_obj.precursor_mass_type
|
|
253
|
-
when "monoisotopic" ; @avg_parent = false
|
|
254
|
-
else ; @avg_parent = true
|
|
255
|
-
end
|
|
256
|
-
|
|
257
|
-
case @avg_parent
|
|
258
|
-
when true ; @h_plus = SpecID::AVG[:h_plus]
|
|
259
|
-
when false ; @h_plus = SpecID::MONO[:h_plus]
|
|
260
|
-
end
|
|
261
|
-
end
|
|
262
|
-
|
|
263
|
-
def date
|
|
264
|
-
Time.new.to_s
|
|
265
|
-
end
|
|
266
|
-
|
|
267
|
-
def xml_version
|
|
268
|
-
'<?xml version="1.0" encoding="UTF-8"?>' + "\n"
|
|
269
|
-
end
|
|
270
|
-
|
|
271
|
-
# for pepxml_version == 0
|
|
272
|
-
def doctype
|
|
273
|
-
'<!DOCTYPE msms_pipeline_analysis SYSTEM "/usr/bin/msms_analysis3.dtd">' + "\n"
|
|
274
|
-
end
|
|
275
|
-
|
|
276
|
-
def style_sheet
|
|
277
|
-
case self.class.pepxml_version
|
|
278
|
-
when 0
|
|
279
|
-
'<?xml-stylesheet type="text/xsl" href="/isb/std_xsl/pepXML_std.xsl"?>' + "\n"
|
|
280
|
-
when 18
|
|
281
|
-
'<?xml-stylesheet type="text/xsl" href="/tools/bin/TPP/tpp/schema/pepXML_std.xsl"?>'
|
|
282
|
-
end
|
|
283
|
-
end
|
|
284
|
-
|
|
285
|
-
def header
|
|
286
|
-
case self.class.pepxml_version
|
|
287
|
-
when 0 ; xml_version + doctype + style_sheet
|
|
288
|
-
when 18 ; xml_version + style_sheet
|
|
289
|
-
end
|
|
290
|
-
end
|
|
291
|
-
|
|
292
|
-
# updates the private attrs _num_prots and _first_prot on bioworks pep
|
|
293
|
-
# objects. Ideally, we'd like these attributes to reside elsewhere, but for
|
|
294
|
-
# memory concerns, this is best for now.
|
|
295
|
-
def self._prot_num_and_first_prot_by_pep(pep_array)
|
|
296
|
-
pep_array.hash_by(:aaseq).each do |aasq, pep_arr|
|
|
297
|
-
prts = []
|
|
298
|
-
pep_arr.each { |pep| prts.push( *(pep.prots) ) }
|
|
299
|
-
prts.uniq!
|
|
300
|
-
_size = prts.size
|
|
301
|
-
pep_arr.each do |pep|
|
|
302
|
-
pep._num_prots = _size.to_s
|
|
303
|
-
pep._first_prot = prts.first
|
|
304
|
-
end
|
|
305
|
-
end
|
|
306
|
-
end
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
Default_Options = {
|
|
310
|
-
:out_path => '.',
|
|
311
|
-
#:backup_db_path => '.',
|
|
312
|
-
# a PepXML option
|
|
313
|
-
:pepxml_version => DEF_VERSION,
|
|
314
|
-
## MSMSRunSummary options:
|
|
315
|
-
# string must be recognized in sample_enzyme.rb
|
|
316
|
-
# or create your own SampleEnzyme object
|
|
317
|
-
:sample_enzyme => 'trypsin',
|
|
318
|
-
:ms_manufacturer => 'ThermoFinnigan',
|
|
319
|
-
:ms_model => 'LCQ Deca XP Plus',
|
|
320
|
-
:ms_ionization => 'ESI',
|
|
321
|
-
:ms_mass_analyzer => 'Ion Trap',
|
|
322
|
-
:ms_detector => 'UNKNOWN',
|
|
323
|
-
:ms_data => '.', # path to ms data files (raw or mzxml)
|
|
324
|
-
:raw_data_type => "raw",
|
|
325
|
-
:raw_data => ".mzXML", ## even if you don't have it?
|
|
326
|
-
## SearchSummary options:
|
|
327
|
-
:out_data_type => "out", ## may be srf?? don't think pepxml recognizes this yet
|
|
328
|
-
:out_data => ".tgz", ## may be srf??
|
|
329
|
-
:copy_mzxml => false, # copy the mzxml file to the out_path (create it if necessary)
|
|
330
|
-
:print => false, # print the objects to file
|
|
331
|
-
}
|
|
332
|
-
|
|
333
|
-
# will dynamically set :ms_model and :ms_mass_analyzer from srf info
|
|
334
|
-
# (ignoring defaults or anything passed in) for LTQ Orbitrap
|
|
335
|
-
# and LCQ Deca XP
|
|
336
|
-
# See SRF::Sequest::PepXML::Default_Options hash for defaults
|
|
337
|
-
# unless given, the out_path will be given as the path of the srf_file
|
|
338
|
-
# srf may be an object or a filename
|
|
339
|
-
def self.new_from_srf(srf, opts={})
|
|
340
|
-
opts = Default_Options.merge(opts)
|
|
341
|
-
|
|
342
|
-
## read the srf file
|
|
343
|
-
if srf.is_a? String
|
|
344
|
-
srf = SRF.new(srf)
|
|
345
|
-
end
|
|
346
|
-
|
|
347
|
-
## set the outpath
|
|
348
|
-
out_path = opts.delete(:out_path)
|
|
349
|
-
|
|
350
|
-
params = srf.params
|
|
351
|
-
|
|
352
|
-
## check to see if we need backup_db
|
|
353
|
-
backup_db_path = opts.delete(:backup_db_path)
|
|
354
|
-
if !File.exist?(params.database) && backup_db_path
|
|
355
|
-
params.database_path = backup_db_path
|
|
356
|
-
end
|
|
357
|
-
|
|
358
|
-
#######################################################################
|
|
359
|
-
# PREPARE THE OPTIONS:
|
|
360
|
-
#######################################################################
|
|
361
|
-
## remove items from the options hash that don't belong to
|
|
362
|
-
ppxml_version = opts.delete(:pepxml_version)
|
|
363
|
-
out_data_type = opts.delete(:out_data_type)
|
|
364
|
-
out_data = opts.delete(:out_data)
|
|
365
|
-
|
|
366
|
-
## Extract meta info from srf
|
|
367
|
-
bn_noext = base_name_noext(srf.header.raw_filename)
|
|
368
|
-
opts[:ms_model] = srf.header.model
|
|
369
|
-
case opts[:ms_model]
|
|
370
|
-
when /Orbitrap/
|
|
371
|
-
opts[:ms_mass_analyzer] = 'Orbitrap'
|
|
372
|
-
when /LCQ Deca XP/
|
|
373
|
-
opts[:ms_mass_analyzer] = 'Ion Trap'
|
|
374
|
-
end
|
|
375
|
-
|
|
376
|
-
## Create the base name
|
|
377
|
-
full_base_name_no_ext = make_base_name( File.expand_path(out_path), bn_noext)
|
|
378
|
-
opts[:base_name] = full_base_name_no_ext
|
|
379
|
-
|
|
380
|
-
## Create the search summary:
|
|
381
|
-
search_summary_options = {
|
|
382
|
-
:search_database => Sequest::PepXML::SearchDatabase.new(params),
|
|
383
|
-
:base_name => full_base_name_no_ext,
|
|
384
|
-
:out_data_type => out_data_type,
|
|
385
|
-
:out_data => out_data
|
|
386
|
-
}
|
|
387
|
-
modifications_string = srf.header.modifications
|
|
388
|
-
search_summary = Sequest::PepXML::SearchSummary.new( params, modifications_string, search_summary_options)
|
|
389
|
-
|
|
390
|
-
## Create the SampleEnzyme object if necessary
|
|
391
|
-
unless opts[:sample_enzyme].is_a? SampleEnzyme
|
|
392
|
-
opts[:sample_enzyme] = SampleEnzyme.new(opts[:sample_enzyme])
|
|
393
|
-
end
|
|
394
|
-
|
|
395
|
-
## Create the pepxml obj and top level objects
|
|
396
|
-
pepxml_obj = Sequest::PepXML.new(ppxml_version, params)
|
|
397
|
-
pipeline = Sequest::PepXML::MSMSPipelineAnalysis.new({:date=>nil,:summary_xml=> bn_noext +'.xml'})
|
|
398
|
-
pepxml_obj.msms_pipeline_analysis = pipeline
|
|
399
|
-
pipeline.msms_run_summary = Sequest::PepXML::MSMSRunSummary.new(opts)
|
|
400
|
-
pipeline.msms_run_summary.search_summary = search_summary
|
|
401
|
-
modifications_obj = search_summary.modifications
|
|
402
|
-
|
|
403
|
-
## name some common variables we'll need
|
|
404
|
-
h_plus = pepxml_obj.h_plus
|
|
405
|
-
avg_parent = pepxml_obj.avg_parent
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
## COPY MZXML FILES IF NECESSARY
|
|
409
|
-
if opts[:copy_mzxml]
|
|
410
|
-
mzxml_pathname_noext = File.join(opts[:ms_data], bn_noext)
|
|
411
|
-
to_copy = Spec::MzXML.file_to_mzxml(mzxml_pathname_noext)
|
|
412
|
-
if to_copy
|
|
413
|
-
FileUtils.cp to_copy, out_path
|
|
414
|
-
else
|
|
415
|
-
puts "Couldn't file mzXML file with base: #{mzxml_pathname_noext}"
|
|
416
|
-
puts "Perhaps you need to specifiy the location of the raw data"
|
|
417
|
-
puts "or need an mzXML converter (readw.exe or t2x)"
|
|
418
|
-
exit
|
|
419
|
-
end
|
|
420
|
-
end
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
#######################################################################
|
|
424
|
-
# CREATE the spectrum_queries_ar
|
|
425
|
-
#######################################################################
|
|
426
|
-
srf_index = srf.index
|
|
427
|
-
out_files = srf.out_files
|
|
428
|
-
spectrum_queries_arr = Array.new(srf.dta_files.size)
|
|
429
|
-
files_with_hits_index = 0 ## will end up being 1 indexed
|
|
430
|
-
srf.dta_files.each_with_index do |dta_file,i|
|
|
431
|
-
next if out_files[i].num_hits == 0
|
|
432
|
-
files_with_hits_index += 1
|
|
433
|
-
|
|
434
|
-
# Sort the hits
|
|
435
|
-
hits = out_files[i].hits
|
|
436
|
-
arr = hits.sort_by{|v| v.xcorr }
|
|
437
|
-
|
|
438
|
-
# Get proper deltacn and deltacnstar
|
|
439
|
-
# Prophet deltacn is not the same as the native Sequest deltacn
|
|
440
|
-
# It is the deltacn of the second best hit!
|
|
441
|
-
top_hit = arr.pop
|
|
442
|
-
second_hit = arr.last
|
|
443
|
-
if second_hit
|
|
444
|
-
top_hit[1] = second_hit[1]
|
|
445
|
-
deltacnstar = '0'
|
|
446
|
-
else
|
|
447
|
-
top_hit[1] = '1.0'
|
|
448
|
-
deltacnstar = '1'
|
|
449
|
-
end
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
## mass calculations:
|
|
454
|
-
precursor_neutral_mass = dta_file.mh - h_plus
|
|
455
|
-
calc_neutral_pep_mass = top_hit[0] - h_plus
|
|
456
|
-
massdiff = precursor_neutral_mass - calc_neutral_pep_mass
|
|
457
|
-
if massdiff >= 0 ; massdiff = "+" + massdiff.to_s
|
|
458
|
-
else ; massdiff = massdiff.to_s end
|
|
459
|
-
|
|
460
|
-
(start_scan, end_scan, charge) = srf_index[i]
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
sq_hash = {
|
|
465
|
-
:spectrum => [bn_noext, start_scan, end_scan, charge].join('.'),
|
|
466
|
-
:start_scan => start_scan,
|
|
467
|
-
:end_scan => end_scan,
|
|
468
|
-
:precursor_neutral_mass => precursor_neutral_mass,
|
|
469
|
-
:assumed_charge => charge,
|
|
470
|
-
:pepxml_version => ppxml_version,
|
|
471
|
-
:index => files_with_hits_index,
|
|
472
|
-
}
|
|
473
|
-
|
|
474
|
-
spectrum_query = Sequest::PepXML::SpectrumQuery.new(sq_hash)
|
|
475
|
-
|
|
476
|
-
sequence = top_hit[8]
|
|
477
|
-
|
|
478
|
-
# NEED TO MODIFY SPLIT SEQUENCE TO DO MODS!
|
|
479
|
-
## THIS IS ALL INNER LOOP, so we make every effort at speed here:
|
|
480
|
-
(prevaa, pepseq, nextaa) = SpecID::Pep.prepare_sequence(sequence)
|
|
481
|
-
# ind_keys = {:mh => 0, :deltacn => 1, :sp => 2, :xcorr => 3, :id => 4, :rsp => 5, :ions_matched => 6, :ions_total => 7, :peptide => 8, :reference => 9 }
|
|
482
|
-
|
|
483
|
-
sh_hash = {
|
|
484
|
-
:hit_rank => "1",
|
|
485
|
-
:peptide => pepseq,
|
|
486
|
-
:peptide_prev_aa => prevaa,
|
|
487
|
-
:peptide_next_aa => nextaa,
|
|
488
|
-
:protein => top_hit[9].first.reference.split(" ").first,
|
|
489
|
-
:num_tot_proteins => top_hit[9].size,
|
|
490
|
-
:num_matched_ions => top_hit[6],
|
|
491
|
-
:tot_num_ions => top_hit[7],
|
|
492
|
-
:calc_neutral_pep_mass => calc_neutral_pep_mass,
|
|
493
|
-
:massdiff => massdiff,
|
|
494
|
-
:num_tol_term => Sequest::PepXML::SearchHit.calc_num_tol_term(params, sequence),
|
|
495
|
-
:num_missed_cleavages => Sequest::PepXML::SearchHit.calc_num_missed_cleavages(params, sequence),
|
|
496
|
-
:is_rejected => '0',
|
|
497
|
-
# These are search score attributes:
|
|
498
|
-
:xcorr => top_hit[3],
|
|
499
|
-
:deltacn => top_hit[1],
|
|
500
|
-
:deltacnstar => deltacnstar,
|
|
501
|
-
:spscore => top_hit[2],
|
|
502
|
-
:sprank => top_hit[5],
|
|
503
|
-
:modification_info => modifications_obj.modification_info(SpecID::Pep.split_sequence(sequence)[1]),
|
|
504
|
-
}
|
|
505
|
-
search_hit = Sequest::PepXML::SearchHit.new(sh_hash) # there can be multiple hits
|
|
506
|
-
|
|
507
|
-
search_result = Sequest::PepXML::SearchResult.new
|
|
508
|
-
search_result.search_hits = [search_hit]
|
|
509
|
-
spectrum_query.search_results = [search_result]
|
|
510
|
-
spectrum_queries_arr[files_with_hits_index] = spectrum_query
|
|
511
|
-
end
|
|
512
|
-
spectrum_queries_arr.compact!
|
|
513
|
-
|
|
514
|
-
pipeline.msms_run_summary.spectrum_queries = spectrum_queries_arr
|
|
515
|
-
pepxml_obj.base_name = pipeline.msms_run_summary.base_name
|
|
516
|
-
pipeline.msms_run_summary.spectrum_queries = spectrum_queries_arr
|
|
517
|
-
|
|
518
|
-
pepxml_obj
|
|
519
|
-
end
|
|
520
|
-
|
|
521
|
-
# takes an .srg or bioworks.xml file
|
|
522
|
-
# if possible, ensures that an mzXML file is present for each pepxml file
|
|
523
|
-
# :print => true, will print files
|
|
524
|
-
def self.set_from_bioworks(bioworks_file, opts={})
|
|
525
|
-
opts = Default_Options.merge(opts)
|
|
526
|
-
## Create the out_path directory if necessary
|
|
527
|
-
|
|
528
|
-
unless File.exist? opts[:out_path]
|
|
529
|
-
FileUtils.mkpath(opts[:out_path])
|
|
530
|
-
end
|
|
531
|
-
unless File.directory? opts[:out_path]
|
|
532
|
-
abort "#{opts[:out_path]} must be a directory!"
|
|
533
|
-
end
|
|
534
|
-
|
|
535
|
-
spec_id = SpecID.new(bioworks_file)
|
|
536
|
-
pepxml_objs =
|
|
537
|
-
if spec_id.is_a? Bioworks
|
|
538
|
-
abort("must have opts[:params] set!") unless opts[:params]
|
|
539
|
-
set_from_bioworks_xml(bioworks_file, opts[:params], opts)
|
|
540
|
-
elsif spec_id.is_a? SRFGroup
|
|
541
|
-
spec_id.srfs.map do |srf|
|
|
542
|
-
new_from_srf(srf, opts)
|
|
543
|
-
end
|
|
544
|
-
else
|
|
545
|
-
abort "invalid object"
|
|
546
|
-
end
|
|
547
|
-
|
|
548
|
-
if opts[:print]
|
|
549
|
-
pepxml_objs.each do |obj|
|
|
550
|
-
obj.to_pepxml(obj.base_name + ".xml")
|
|
551
|
-
end
|
|
552
|
-
end
|
|
553
|
-
pepxml_objs
|
|
554
|
-
end
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
# Takes bioworks 3.2/3.3 xml output (with no filters)
|
|
558
|
-
# Returns a list of PepXML objects
|
|
559
|
-
# params = sequest.params file
|
|
560
|
-
# bioworks = bioworks.xml exported multi-consensus view file
|
|
561
|
-
# pepxml_version = 0 for tpp 1.2.3
|
|
562
|
-
# pepxml_version = 18 for tpp 2.8.2, 2.8.3, 2.9.2
|
|
563
|
-
def self.set_from_bioworks_xml(bioworks, params, opts={})
|
|
564
|
-
opts = Default_Options.merge(opts)
|
|
565
|
-
pepxml_version, sample_enzyme, ms_manufacturer, ms_model, ms_ionization, ms_mass_analyzer, ms_detector, raw_data_type, raw_data, out_data_type, out_data, ms_data, out_path = opts.values_at(:pepxml_version, :sample_enzyme, :ms_manufacturer, :ms_model, :ms_ionization, :ms_mass_analyzer, :ms_detector, :raw_data_type, :raw_data, :out_data_type, :out_data, :ms_data, :out_path)
|
|
566
|
-
|
|
567
|
-
unless out_path
|
|
568
|
-
out_path = '.'
|
|
569
|
-
end
|
|
570
|
-
|
|
571
|
-
supported_versions = [0,18]
|
|
572
|
-
|
|
573
|
-
unless supported_versions.include?(opts[:pepxml_version])
|
|
574
|
-
abort "pepxml_version: #{pepxml_version} not currently supported. Current support is for versions #{supported_versions.join(', ')}"
|
|
575
|
-
end
|
|
576
|
-
|
|
577
|
-
## Turn params and bioworks_obj into objects if necessary:
|
|
578
|
-
# Params:
|
|
579
|
-
if params.class == Sequest::Params # OK!
|
|
580
|
-
elsif params.class == String ; params = Sequest::Params.new(params)
|
|
581
|
-
else ; abort "Don't recognize #{params} as object or string!"
|
|
582
|
-
end
|
|
583
|
-
# Bioworks:
|
|
584
|
-
if bioworks.class == Bioworks # OK!
|
|
585
|
-
elsif bioworks.class == String ; bioworks = SpecID.new(bioworks)
|
|
586
|
-
else ; abort "Don't recognize #{bioworks} as object or string!"
|
|
587
|
-
end
|
|
588
|
-
|
|
589
|
-
#puts "bioworks.peps.size: #{bioworks.peps.size}"; #puts "bioworks.prots.size: #{bioworks.prots.size}"; #puts "Bioworks.version: #{bioworks.version}"
|
|
590
|
-
|
|
591
|
-
## TURN THIS ON IF YOU THINK YOU MIGHT NOT BE GETTING PEPTIDES from
|
|
592
|
-
## bioworks
|
|
593
|
-
#bioworks.peps.each { |pep| if pep.class != Bioworks::Pep ; puts "trying to pass as pep: "; p pep; abort "NOT a pep!" end }
|
|
594
|
-
|
|
595
|
-
## check to see if we need backup_db
|
|
596
|
-
|
|
597
|
-
backup_db_path = opts.delete(:backup_db_path)
|
|
598
|
-
if !File.exist?(params.database) && backup_db_path
|
|
599
|
-
params.database_path = backup_db_path
|
|
600
|
-
end
|
|
601
|
-
|
|
602
|
-
## Start
|
|
603
|
-
split_bio_objs = []
|
|
604
|
-
|
|
605
|
-
## (num_prots_by_pep, prot_by_pep) =
|
|
606
|
-
#num_prots_by_pep.each do |k,v| puts "k: #{k} v: #{v}\n"; break end ; prot_by_pep.each do |k,v| puts "k: #{k} v: #{v}" ; break end ; abort "HERE"
|
|
607
|
-
|
|
608
|
-
modifications_string = bioworks.modifications
|
|
609
|
-
search_summary = Sequest::PepXML::SearchSummary.new(params, modifications_string, {:search_database => Sequest::PepXML::SearchDatabase.new(params), :out_data_type => out_data_type, :out_data => out_data})
|
|
610
|
-
modifications_obj = search_summary.modifications
|
|
611
|
-
|
|
612
|
-
## Create a hash of spectrum_query arrays by filename (this very big block):
|
|
613
|
-
spectrum_queries_by_base_name = {}
|
|
614
|
-
# Hash by the filenames to split into filenames:
|
|
615
|
-
bioworks.peps.hash_by(:base_name).map do |base_name, pep_arr|
|
|
616
|
-
|
|
617
|
-
pepxml_obj = Sequest::PepXML.new(pepxml_version, params)
|
|
618
|
-
full_base_name_no_ext = self.make_base_name( File.expand_path(out_path), base_name)
|
|
619
|
-
|
|
620
|
-
case pepxml_version
|
|
621
|
-
when 18
|
|
622
|
-
pipeline = Sequest::PepXML::MSMSPipelineAnalysis.new({:date=>nil,:summary_xml=>base_name+'.xml'})
|
|
623
|
-
msms_run_summary = Sequest::PepXML::MSMSRunSummary.new({
|
|
624
|
-
:base_name => full_base_name_no_ext,
|
|
625
|
-
:ms_manufacturer => ms_manufacturer,
|
|
626
|
-
:ms_model => ms_model,
|
|
627
|
-
:ms_ionization => ms_ionization,
|
|
628
|
-
:ms_mass_analyzer => ms_mass_analyzer,
|
|
629
|
-
:ms_detector => ms_detector,
|
|
630
|
-
:raw_data_type => raw_data_type,
|
|
631
|
-
:raw_data => raw_data,
|
|
632
|
-
:sample_enzyme => SampleEnzyme.new(sample_enzyme),
|
|
633
|
-
:search_summary => search_summary,
|
|
634
|
-
})
|
|
635
|
-
pipeline.msms_run_summary = msms_run_summary
|
|
636
|
-
pepxml_obj.msms_pipeline_analysis = pipeline
|
|
637
|
-
pepxml_obj.msms_pipeline_analysis.msms_run_summary.search_summary.base_name = full_base_name_no_ext
|
|
638
|
-
pepxml_obj.base_name = full_base_name_no_ext
|
|
639
|
-
pepxml_obj
|
|
640
|
-
when 0
|
|
641
|
-
## @TODO: NEED TO REVAMP THIS:
|
|
642
|
-
# Sequest::PepXML.new(pepxml_version).set_from_hash({
|
|
643
|
-
# :params => params,
|
|
644
|
-
# :search_results => spectrum_queries_arr,
|
|
645
|
-
# :base_name => self.make_base_name( File.expand_path(out_path), base_name),
|
|
646
|
-
# :search_engine => params.search_engine,
|
|
647
|
-
# :database => params.database,
|
|
648
|
-
# :raw_data_type => "mzXML",
|
|
649
|
-
# :raw_data => ".mzXML",
|
|
650
|
-
# :out_data_type => "out",
|
|
651
|
-
# :out_data => ".tgz",
|
|
652
|
-
# :sample_enzyme => params.enzyme,
|
|
653
|
-
# })
|
|
654
|
-
end
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
# Create a hash by pep object containing num_tot_proteins
|
|
661
|
-
# This is only valid if all hits are present (no previous thresholding)
|
|
662
|
-
# Since out2summary only acts on one folder at a time,
|
|
663
|
-
# we should only do it for one folder at a time! (that's why we do this
|
|
664
|
-
# here instead of globally)
|
|
665
|
-
self._prot_num_and_first_prot_by_pep(pep_arr)
|
|
666
|
-
prec_mz_arr = nil
|
|
667
|
-
case x = bioworks.version
|
|
668
|
-
when /3.2/
|
|
669
|
-
calc_prec_by = :prec_mz_arr
|
|
670
|
-
# get the precursor_mz array for this filename
|
|
671
|
-
prec_mz_arr = Spec::MSRun.precursor_mz_by_scan(File.join(ms_data, base_name))
|
|
672
|
-
when /3.3/
|
|
673
|
-
calc_prec_by = :deltamass
|
|
674
|
-
else
|
|
675
|
-
abort "invalid BioworksBrowser version: #{x}"
|
|
676
|
-
end
|
|
677
|
-
|
|
678
|
-
if opts[:copy_mzxml]
|
|
679
|
-
to_copy = Spec::MzXML.file_to_mzxml(File.join(ms_data, base_name))
|
|
680
|
-
if to_copy
|
|
681
|
-
FileUtils.cp to_copy, out_path
|
|
682
|
-
end
|
|
683
|
-
end
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
spectrum_queries_ar = pep_arr.hash_by(:first_scan, :last_scan, :charge).collect do |key,arr|
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
# Sort_by_rank and take the top hit (to mimick out2summary):
|
|
690
|
-
arr = arr.sort_by {|pep| pep.xcorr.to_f } # ascending
|
|
691
|
-
top_pep = arr.pop
|
|
692
|
-
second_hit = arr.last # needed for deltacnstar
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
case calc_prec_by
|
|
696
|
-
when :prec_mz_arr
|
|
697
|
-
precursor_neutral_mass = Sequest::PepXML::SpectrumQuery.calc_precursor_neutral_mass(calc_prec_by, top_pep.first_scan.to_i, top_pep.last_scan.to_i, prec_mz_arr, top_pep.charge.to_i, pepxml_obj.avg_parent)
|
|
698
|
-
when :deltamass
|
|
699
|
-
precursor_neutral_mass = Sequest::PepXML::SpectrumQuery.calc_precursor_neutral_mass(calc_prec_by, top_pep.mass.to_f, top_pep.deltamass.to_f, pepxml_obj.avg_parent)
|
|
700
|
-
end
|
|
701
|
-
|
|
702
|
-
calc_neutral_pep_mass = (top_pep.mass.to_f - pepxml_obj.h_plus)
|
|
703
|
-
massdiff = precursor_neutral_mass - calc_neutral_pep_mass
|
|
704
|
-
if massdiff >= 0 ; massdiff = "+" + massdiff.to_s
|
|
705
|
-
else ; massdiff = massdiff.to_s end #already has a -
|
|
706
|
-
# deltacn & star:
|
|
707
|
-
# (NOTE: OLD?? out2summary wants the deltacn of the 2nd best hit.)
|
|
708
|
-
if second_hit
|
|
709
|
-
#top_pep.deltacn = second_hit.deltacn
|
|
710
|
-
deltacnstar = '0'
|
|
711
|
-
else
|
|
712
|
-
top_pep.deltacn = '1.0'
|
|
713
|
-
deltacnstar = '1'
|
|
714
|
-
end
|
|
715
|
-
# Create the nested structure of queries{results{hits}}
|
|
716
|
-
# (Ruby's blocks work beautifully for things like this)
|
|
717
|
-
spec_query = Sequest::PepXML::SpectrumQuery.new({
|
|
718
|
-
:spectrum => [top_pep.base_name, top_pep.first_scan, top_pep.last_scan, top_pep.charge].join("."),
|
|
719
|
-
:start_scan => top_pep.first_scan,
|
|
720
|
-
:end_scan => top_pep.last_scan,
|
|
721
|
-
:precursor_neutral_mass => precursor_neutral_mass.to_s,
|
|
722
|
-
:assumed_charge => top_pep.charge,
|
|
723
|
-
:pepxml_version => pepxml_version,
|
|
724
|
-
})
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
search_result = Sequest::PepXML::SearchResult.new
|
|
728
|
-
|
|
729
|
-
## Calculate some interdependent values;
|
|
730
|
-
# NOTE: the bioworks mass is reallyf M+H if two or more scans went
|
|
731
|
-
# into the search_hit; calc_neutral_pep_mass is simply the avg of
|
|
732
|
-
# precursor masses adjusted to be neutral
|
|
733
|
-
(prevaa, pepseq, nextaa) = SpecID::Pep.prepare_sequence(top_pep.sequence)
|
|
734
|
-
(num_matched_ions, tot_num_ions) = Sequest::PepXML::SearchHit.split_ions(top_pep.ions)
|
|
735
|
-
search_hit = Sequest::PepXML::SearchHit.new({
|
|
736
|
-
:hit_rank => "1",
|
|
737
|
-
:peptide => pepseq,
|
|
738
|
-
:peptide_prev_aa => prevaa,
|
|
739
|
-
:peptide_next_aa => nextaa,
|
|
740
|
-
:protein => top_pep._first_prot.reference.split(" ").first,
|
|
741
|
-
:num_tot_proteins => top_pep._num_prots,
|
|
742
|
-
:num_matched_ions => num_matched_ions,
|
|
743
|
-
:tot_num_ions => tot_num_ions,
|
|
744
|
-
:calc_neutral_pep_mass => calc_neutral_pep_mass.to_s,
|
|
745
|
-
:massdiff => massdiff,
|
|
746
|
-
:num_tol_term => Sequest::PepXML::SearchHit.calc_num_tol_term(params, top_pep.sequence).to_s,
|
|
747
|
-
:num_missed_cleavages => Sequest::PepXML::SearchHit.calc_num_missed_cleavages(params, top_pep.sequence).to_s,
|
|
748
|
-
:is_rejected => "0",
|
|
749
|
-
# These are search score attributes:
|
|
750
|
-
:xcorr => top_pep.xcorr,
|
|
751
|
-
:deltacn => top_pep.deltacn,
|
|
752
|
-
:deltacnstar => deltacnstar,
|
|
753
|
-
:spscore => top_pep.sp,
|
|
754
|
-
:sprank => top_pep.rsp,
|
|
755
|
-
:modification_info => modifications_obj.modification_info(SpecID::Pep.split_sequence(top_pep.sequence)[1]),
|
|
756
|
-
})
|
|
757
|
-
search_result.search_hits = [search_hit] # there can be multiple search hits
|
|
758
|
-
spec_query.search_results = [search_result] # can be multiple search_results
|
|
759
|
-
spec_query
|
|
760
|
-
end
|
|
761
|
-
|
|
762
|
-
# create an index by spectrum as results end up typically in out2summary
|
|
763
|
-
# (I really dislike this order, however)
|
|
764
|
-
spectrum_queries_ar = spectrum_queries_ar.sort_by {|pep| pep.spectrum }
|
|
765
|
-
spectrum_queries_ar.each_with_index {|res,index| res.index = "#{index + 1}" }
|
|
766
|
-
pipeline.msms_run_summary.spectrum_queries = spectrum_queries_ar
|
|
767
|
-
pepxml_obj
|
|
768
|
-
end ## collects pepxml_objs
|
|
769
|
-
end
|
|
770
|
-
|
|
771
|
-
def summary_xml
|
|
772
|
-
base_name + ".xml"
|
|
773
|
-
end
|
|
774
|
-
|
|
775
|
-
def precursor_mass_type
|
|
776
|
-
@params.precursor_mass_type
|
|
777
|
-
end
|
|
778
|
-
|
|
779
|
-
def fragment_mass_type
|
|
780
|
-
@params.fragment_mass_type
|
|
781
|
-
end
|
|
782
|
-
|
|
783
|
-
# combines filename in a manner consistent with the path
|
|
784
|
-
def self.make_base_name(path, filename)
|
|
785
|
-
sep = "/"
|
|
786
|
-
if path.split("/").size < path.split("\\").size
|
|
787
|
-
sep = "\\"
|
|
788
|
-
end
|
|
789
|
-
if path.split("").last == sep
|
|
790
|
-
return path + File.basename(filename)
|
|
791
|
-
else
|
|
792
|
-
return path + sep + File.basename(filename)
|
|
793
|
-
end
|
|
794
|
-
end
|
|
795
|
-
|
|
796
|
-
# outputs pepxml, (to file if given)
|
|
797
|
-
def to_pepxml(file=nil)
|
|
798
|
-
string = header
|
|
799
|
-
string << @msms_pipeline_analysis.to_pepxml
|
|
800
|
-
|
|
801
|
-
if file
|
|
802
|
-
File.open(file, "w") do |fh| fh.print string end
|
|
803
|
-
end
|
|
804
|
-
string
|
|
805
|
-
end
|
|
806
|
-
|
|
807
|
-
# given any kind of filename (from windows or whatever)
|
|
808
|
-
# returns the base of the filename with no file extension
|
|
809
|
-
def self.base_name_noext(file)
|
|
810
|
-
file.gsub!("\\", '/')
|
|
811
|
-
File.basename(file).sub(/\.[\w^\.]+$/, '')
|
|
812
|
-
end
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
end # PepXML
|
|
816
|
-
|
|
817
|
-
##
|
|
818
|
-
# In the future, this guy should accept any version of bioworks params file
|
|
819
|
-
# and spit out any param queried.
|
|
820
|
-
class Sequest::Params
|
|
821
|
-
include SpecIDXML
|
|
822
|
-
|
|
823
|
-
# current attributes supported are:
|
|
824
|
-
# bioworks 3.2:
|
|
825
|
-
@@param_re = / = ?/o
|
|
826
|
-
@@param_two_split = ';'
|
|
827
|
-
|
|
828
|
-
# opts are the general options
|
|
829
|
-
# mods are the weights added to amino acids
|
|
830
|
-
attr_accessor :opts, :mods
|
|
831
|
-
|
|
832
|
-
# all keys and values stored as strings!
|
|
833
|
-
def initialize(file=nil)
|
|
834
|
-
if file
|
|
835
|
-
parse(file)
|
|
836
|
-
end
|
|
837
|
-
end
|
|
838
|
-
|
|
839
|
-
# returns hash of params for continuous lines of non-whitespace
|
|
840
|
-
def grab_params(fh)
|
|
841
|
-
hash = {}
|
|
842
|
-
while line = fh.gets
|
|
843
|
-
if line =~ /[^\s]/
|
|
844
|
-
one,two = line.split @@param_re
|
|
845
|
-
two,comment = two.split @@param_two_split
|
|
846
|
-
hash[one] = two.rstrip
|
|
847
|
-
# it is necessary to add this break so that params files inside srf
|
|
848
|
-
# files can be read. This will terminate the reading at the end of
|
|
849
|
-
# the file even though there are more lines
|
|
850
|
-
if line =~ /added to U/ || line =~ /digest_mass_range/## Will only work on bioworks 3.2 & 3.3 (bioworks 3.1 last line => Elastase/Tryp...)
|
|
851
|
-
break
|
|
852
|
-
end
|
|
853
|
-
if line =~ /digest_mass_range/ # there is no space in the srf params files
|
|
854
|
-
break
|
|
855
|
-
end
|
|
856
|
-
else
|
|
857
|
-
break
|
|
858
|
-
end
|
|
859
|
-
end
|
|
860
|
-
hash
|
|
861
|
-
end
|
|
862
|
-
|
|
863
|
-
# returns self
|
|
864
|
-
def parse_handle(fh)
|
|
865
|
-
sequest_line = fh.gets #[SEQUEST]
|
|
866
|
-
@opts = grab_params(fh)
|
|
867
|
-
@opts["search_engine"] = "SEQUEST"
|
|
868
|
-
@mods = grab_params(fh)
|
|
869
|
-
|
|
870
|
-
## this gets rid of the .hdr postfix on indexed databases
|
|
871
|
-
@opts["first_database_name"] = @opts["first_database_name"].sub(/\.hdr$/, '')
|
|
872
|
-
self
|
|
873
|
-
end
|
|
874
|
-
|
|
875
|
-
## parses file
|
|
876
|
-
## and drops the .hdr behind indexed fasta files
|
|
877
|
-
## returns self
|
|
878
|
-
def parse(file)
|
|
879
|
-
File.open(file) do |fh|
|
|
880
|
-
parse_handle(fh)
|
|
881
|
-
end
|
|
882
|
-
self
|
|
883
|
-
end
|
|
884
|
-
|
|
885
|
-
# returns( split_after, except_before)
|
|
886
|
-
def enzyme_specificity
|
|
887
|
-
if version == "3.2"
|
|
888
|
-
arr = enzyme_info.split(/\s+/)[3,2]
|
|
889
|
-
arr.collect! do |str|
|
|
890
|
-
if str && str.class == String ; str
|
|
891
|
-
else ; ""
|
|
892
|
-
end
|
|
893
|
-
end
|
|
894
|
-
return *arr
|
|
895
|
-
end
|
|
896
|
-
end
|
|
897
|
-
|
|
898
|
-
# Returns the version of the sequest.params file
|
|
899
|
-
# Returns String "3.2" if contains "enyzme_info"
|
|
900
|
-
# Returns String "3.1" if contains "enzyme_number"
|
|
901
|
-
def version
|
|
902
|
-
if @opts["enzyme_info"] ; return "3.2"
|
|
903
|
-
elsif @opts["enzyme_number"] ; return "3.1"
|
|
904
|
-
end
|
|
905
|
-
end
|
|
906
|
-
|
|
907
|
-
####################################################
|
|
908
|
-
# TO PEPXML
|
|
909
|
-
####################################################
|
|
910
|
-
# In some ways, this is merely translating to the older Bioworks
|
|
911
|
-
# sequest.params files
|
|
912
|
-
|
|
913
|
-
# I'm not sure if this is the right mapping for sequence_search_constraint?
|
|
914
|
-
def sequence
|
|
915
|
-
pseq = @opts['partial_sequence']
|
|
916
|
-
if !pseq || pseq == "" ; pseq = "0" end
|
|
917
|
-
pseq
|
|
918
|
-
end
|
|
919
|
-
|
|
920
|
-
# Returns xml in the form <parameter name="#{method_name}"
|
|
921
|
-
# value="#{method_value}"/> for list of symbols
|
|
922
|
-
def pepxml_parameters
|
|
923
|
-
keys_as_symbols = @opts.sort.map do |k,v| k.to_s end
|
|
924
|
-
params_xml(*keys_as_symbols)
|
|
925
|
-
# (:peptide_mass_tol, :peptide_mass_units, :fragment_ion_tol, :ion_series, :max_num_differential_AA_per_mod, :nucleotide_reading_frame, :num_output_lines, :remove_precursor_peak, :ion_cutoff_percentage, :match_peak_count, :match_peak_allowed_error, :match_peak_tolerance, :protein_mass_filter, :sequence_header_filter)
|
|
926
|
-
end
|
|
927
|
-
|
|
928
|
-
def precursor_mass_type
|
|
929
|
-
case @opts['mass_type_parent']
|
|
930
|
-
when '0' ; "average"
|
|
931
|
-
when '1' ; "monoisotopic"
|
|
932
|
-
else ; abort "error in mass_type_parent in sequest!"
|
|
933
|
-
end
|
|
934
|
-
end
|
|
935
|
-
|
|
936
|
-
def fragment_mass_type
|
|
937
|
-
fmtype =
|
|
938
|
-
case @opts['mass_type_fragment']
|
|
939
|
-
when '0' ; "average"
|
|
940
|
-
when '1' ; "monoisotopic"
|
|
941
|
-
else ; abort "error in mass_type_fragment in sequest!"
|
|
942
|
-
end
|
|
943
|
-
end
|
|
944
|
-
|
|
945
|
-
def method_missing(name, *args)
|
|
946
|
-
string = name.to_s
|
|
947
|
-
if @opts.key?(string) ; return @opts[string]
|
|
948
|
-
elsif @mods.key?(string) ; return @mods[string]
|
|
949
|
-
else ; return nil
|
|
950
|
-
end
|
|
951
|
-
end
|
|
952
|
-
|
|
953
|
-
## We only need to define values if they are different than sequest.params
|
|
954
|
-
## The method_missing will look them up in the hash!
|
|
955
|
-
|
|
956
|
-
# Returns a system independent basename
|
|
957
|
-
# Splits on "\" or "/"
|
|
958
|
-
def _sys_ind_basename(file)
|
|
959
|
-
return file.split(/[\\\/]/)[-1]
|
|
960
|
-
end
|
|
961
|
-
|
|
962
|
-
# changes the path of the database
|
|
963
|
-
def database_path=(newpath)
|
|
964
|
-
db = @opts["first_database_name"]
|
|
965
|
-
newpath = File.join(newpath, _sys_ind_basename(db))
|
|
966
|
-
@opts["first_database_name"] = newpath
|
|
967
|
-
end
|
|
968
|
-
|
|
969
|
-
def database
|
|
970
|
-
@opts["first_database_name"]
|
|
971
|
-
end
|
|
972
|
-
|
|
973
|
-
# returns the appropriate aminoacid mass lookup table (in spec_id.rb SpecID::MONO or
|
|
974
|
-
# SpecID::AVG based on precursor_mass_type
|
|
975
|
-
def mass_table
|
|
976
|
-
case precursor_mass_type
|
|
977
|
-
when 'average'
|
|
978
|
-
SpecID::AVG
|
|
979
|
-
when 'monoisotopic'
|
|
980
|
-
SpecID::MONO
|
|
981
|
-
end
|
|
982
|
-
end
|
|
983
|
-
|
|
984
|
-
# at least in Bioworks 3.2, the First number after the enzyme
|
|
985
|
-
# is the indication of the enzymatic end stringency (required):
|
|
986
|
-
# 1 = Fully enzymatic
|
|
987
|
-
# 2 = Either end
|
|
988
|
-
# 3 = N terminal only
|
|
989
|
-
# 4 = C terminal only
|
|
990
|
-
# So, to get min_number_termini we map like this:
|
|
991
|
-
# 1 => 2
|
|
992
|
-
# 2 => 1
|
|
993
|
-
def min_number_termini
|
|
994
|
-
termini_number = @opts["enzyme_info"].split(" ")[1]
|
|
995
|
-
if termini_number == "1"
|
|
996
|
-
return "2"
|
|
997
|
-
elsif termini_number == "2"
|
|
998
|
-
return "1"
|
|
999
|
-
else
|
|
1000
|
-
puts "WARNING: Enzyme termini info might be imprecise!"
|
|
1001
|
-
return "1"
|
|
1002
|
-
end
|
|
1003
|
-
end
|
|
1004
|
-
|
|
1005
|
-
def enzyme
|
|
1006
|
-
#if @opts["enzyme_info"] =~ /Trypsin/ ; return "tryptic"
|
|
1007
|
-
#else ; return @opts["enzyme_info"].split('(')[0] end
|
|
1008
|
-
return @opts["enzyme_info"].split('(')[0]
|
|
1009
|
-
end
|
|
1010
|
-
|
|
1011
|
-
def max_num_internal_cleavages
|
|
1012
|
-
@opts["max_num_internal_cleavage_sites"]
|
|
1013
|
-
end
|
|
1014
|
-
|
|
1015
|
-
def peptide_mass_tol
|
|
1016
|
-
if @opts["peptide_mass_units"] != "0"
|
|
1017
|
-
puts "WARNING: peptide_mass_tol units need to be adjusted!"
|
|
1018
|
-
end
|
|
1019
|
-
@opts["peptide_mass_tolerance"]
|
|
1020
|
-
end
|
|
1021
|
-
|
|
1022
|
-
def fragment_ion_tol
|
|
1023
|
-
@opts["fragment_ion_tolerance"]
|
|
1024
|
-
end
|
|
1025
|
-
def max_num_differential_AA_per_mod
|
|
1026
|
-
@opts["max_num_differential_per_peptide"]
|
|
1027
|
-
end
|
|
1028
|
-
|
|
1029
|
-
## @TODO: We could add some of the parameters not currently being asked for to be more complete
|
|
1030
|
-
## @TODO: We could always add the Bioworks 3.2 specific params as params
|
|
1031
|
-
|
|
1032
|
-
####################################################
|
|
1033
|
-
####################################################
|
|
1034
|
-
|
|
1035
|
-
end
|
|
1036
|
-
|
|
1037
|
-
class Sequest::PepXML::SearchResult
|
|
1038
|
-
include SpecIDXML
|
|
1039
|
-
# an array of search_hits
|
|
1040
|
-
attr_accessor :search_hits
|
|
1041
|
-
|
|
1042
|
-
# if block given, then search_hits set to return value
|
|
1043
|
-
def initialize
|
|
1044
|
-
if block_given? ; @search_hits = yield
|
|
1045
|
-
else ; @search_hits = [] end
|
|
1046
|
-
end
|
|
1047
|
-
|
|
1048
|
-
def to_pepxml
|
|
1049
|
-
element_xml_no_atts(:search_result) do
|
|
1050
|
-
@search_hits.map {|sh| sh.to_pepxml }.join
|
|
1051
|
-
end
|
|
1052
|
-
end
|
|
1053
|
-
end
|
|
1054
|
-
|
|
1055
|
-
class Sequest::PepXML::SearchSummary
|
|
1056
|
-
include SpecIDXML
|
|
1057
|
-
attr_accessor :params
|
|
1058
|
-
attr_accessor :base_name
|
|
1059
|
-
attr_accessor :out_data_type
|
|
1060
|
-
attr_accessor :out_data
|
|
1061
|
-
attr_accessor :modifications
|
|
1062
|
-
# A SearchDatabase object (responds to :local_path and :type)
|
|
1063
|
-
attr_accessor :search_database
|
|
1064
|
-
# if given a sequest params object, then will set the following attributes:
|
|
1065
|
-
# args is a hash of parameters
|
|
1066
|
-
# modifications_string -> See Modifications
|
|
1067
|
-
def initialize(params, modifications_string='', args=nil)
|
|
1068
|
-
@search_id = nil
|
|
1069
|
-
@params = params
|
|
1070
|
-
@modifications = Sequest::PepXML::Modifications.new(params, modifications_string)
|
|
1071
|
-
if args ; set_from_hash(args) end
|
|
1072
|
-
end
|
|
1073
|
-
|
|
1074
|
-
def method_missing(symbol, *args)
|
|
1075
|
-
if @params ; @params.send(symbol, *args) end
|
|
1076
|
-
end
|
|
1077
|
-
|
|
1078
|
-
def search_id
|
|
1079
|
-
if @search_id ; @search_id
|
|
1080
|
-
else ; '1' end
|
|
1081
|
-
end
|
|
1082
|
-
|
|
1083
|
-
|
|
1084
|
-
def to_pepxml
|
|
1085
|
-
element_xml(:search_summary, [:base_name, :search_engine, :precursor_mass_type, :fragment_mass_type, :out_data_type, :out_data, :search_id]) do
|
|
1086
|
-
search_database.to_pepxml +
|
|
1087
|
-
short_element_xml(:enzymatic_search_constraint, [:enzyme, :max_num_internal_cleavages, :min_number_termini]) +
|
|
1088
|
-
@modifications.to_pepxml +
|
|
1089
|
-
@params.pepxml_parameters
|
|
1090
|
-
end
|
|
1091
|
-
end
|
|
1092
|
-
|
|
1093
|
-
end
|
|
1094
|
-
|
|
1095
|
-
class Sequest::PepXML::Modifications
|
|
1096
|
-
include SpecIDXML
|
|
1097
|
-
|
|
1098
|
-
# sequest params object
|
|
1099
|
-
attr_accessor :params
|
|
1100
|
-
# array holding AAModifications
|
|
1101
|
-
attr_accessor :aa_mods
|
|
1102
|
-
# array holding TerminalModifications
|
|
1103
|
-
attr_accessor :term_mods
|
|
1104
|
-
# a hash of all differential modifications present by aa_one_letter_symbol
|
|
1105
|
-
# and special_symbol. This is NOT the mass difference but the total mass {
|
|
1106
|
-
# 'M*' => 155.5, 'S@' => 190.3 }. NOTE: Since the termini are dependent on
|
|
1107
|
-
# the amino acid sequence, they are give the *differential* mass. The
|
|
1108
|
-
# termini are given the special symbol as in sequest e.g. '[' => 12.22, #
|
|
1109
|
-
# cterminus ']' => 14.55 # nterminus
|
|
1110
|
-
attr_accessor :masses_by_diff_mod_hash
|
|
1111
|
-
# a hash, key is [AA_one_letter_symbol.to_sym, difference.to_f]
|
|
1112
|
-
# values are the special_symbols
|
|
1113
|
-
attr_accessor :mod_symbols_hash
|
|
1114
|
-
|
|
1115
|
-
# The modification symbols string looks like this:
|
|
1116
|
-
# (M* +15.90000) (M# +29.00000) (S@ +80.00000) (C^ +12.00000) (ct[ +12.33000) (nt] +14.20000)
|
|
1117
|
-
# ct is cterminal peptide (differential)
|
|
1118
|
-
# nt is nterminal peptide (differential)
|
|
1119
|
-
# the C is just cysteine
|
|
1120
|
-
# will set_modifications and masses_by_diff_mod hash
|
|
1121
|
-
def initialize(params, modification_symbols_string='')
|
|
1122
|
-
@params = params
|
|
1123
|
-
set_modifications(params, modification_symbols_string)
|
|
1124
|
-
end
|
|
1125
|
-
|
|
1126
|
-
# set the masses_by_diff_mod and mod_symbols_hash from
|
|
1127
|
-
def set_hashes(modification_symbols_string)
|
|
1128
|
-
|
|
1129
|
-
@mod_symbols_hash = {}
|
|
1130
|
-
@masses_by_diff_mod = {}
|
|
1131
|
-
if (modification_symbols_string == nil || modification_symbols_string == '')
|
|
1132
|
-
return nil
|
|
1133
|
-
end
|
|
1134
|
-
table = @params.mass_table
|
|
1135
|
-
modification_symbols_string.split(/\)\s+\(/).each do |mod|
|
|
1136
|
-
if mod =~ /\(?(\w+)(.) (.[\d\.]+)\)?/
|
|
1137
|
-
if $1 == 'ct' || $1 == 'nt'
|
|
1138
|
-
mass_diff = $3.to_f
|
|
1139
|
-
@masses_by_diff_mod[$2] = mass_diff
|
|
1140
|
-
@mod_symbols_hash[[$1, mass_diff]] = $2.dup
|
|
1141
|
-
else
|
|
1142
|
-
symbol_string = $2.dup
|
|
1143
|
-
mass_diff = $3.to_f
|
|
1144
|
-
$1.split('').each do |aa|
|
|
1145
|
-
aa_as_sym = aa.to_sym
|
|
1146
|
-
@masses_by_diff_mod[aa+symbol_string] = mass_diff + table[aa_as_sym]
|
|
1147
|
-
@mod_symbols_hash[[aa_as_sym, mass_diff]] = symbol_string
|
|
1148
|
-
end
|
|
1149
|
-
end
|
|
1150
|
-
end
|
|
1151
|
-
end
|
|
1152
|
-
end
|
|
1153
|
-
|
|
1154
|
-
# given a bare peptide (no end pieces) returns a ModificationInfo object
|
|
1155
|
-
# e.g. given "]PEPT*IDE", NOT 'K.PEPTIDE.R'
|
|
1156
|
-
# if there are no modifications, returns nil
|
|
1157
|
-
def modification_info(peptide)
|
|
1158
|
-
if @masses_by_diff_mod.size == 0
|
|
1159
|
-
return nil
|
|
1160
|
-
end
|
|
1161
|
-
hash = {}
|
|
1162
|
-
hash[:modified_peptide] = peptide.dup
|
|
1163
|
-
hsh = @masses_by_diff_mod
|
|
1164
|
-
table = @params.mass_table
|
|
1165
|
-
h = table[:h] # this? or h_plus ??
|
|
1166
|
-
oh = table[:o] + h
|
|
1167
|
-
## only the termini can match a single char
|
|
1168
|
-
if hsh.key? peptide[0,1]
|
|
1169
|
-
# AA + H + differential_mod
|
|
1170
|
-
hash[:mod_nterm_mass] = table[peptide[1,1].to_sym] + h + hsh[peptide[0,1]]
|
|
1171
|
-
peptide = peptide[1...(peptide.size)]
|
|
1172
|
-
end
|
|
1173
|
-
if hsh.key? peptide[(peptide.size-1),1]
|
|
1174
|
-
# AA + OH + differential_mod
|
|
1175
|
-
hash[:mod_cterm_mass] = table[peptide[(peptide.size-2),1].to_sym] + oh + hsh[peptide[-1,1]]
|
|
1176
|
-
peptide.slice!( 0..-2 )
|
|
1177
|
-
peptide = peptide[0...(peptide.size-1)]
|
|
1178
|
-
end
|
|
1179
|
-
mod_array = []
|
|
1180
|
-
(0...peptide.size).each do |i|
|
|
1181
|
-
if hsh.key? peptide[i,2]
|
|
1182
|
-
mod_array << [ i+1 , hsh[peptide[i,2]] ]
|
|
1183
|
-
end
|
|
1184
|
-
end
|
|
1185
|
-
if mod_array.size > 0
|
|
1186
|
-
hash[:mod_aminoacid_mass_array] = mod_array
|
|
1187
|
-
end
|
|
1188
|
-
if hash.size > 1 # if there is more than just the modified peptide there
|
|
1189
|
-
Sequest::PepXML::SearchHit::ModificationInfo.new(hash)
|
|
1190
|
-
else
|
|
1191
|
-
nil
|
|
1192
|
-
end
|
|
1193
|
-
end
|
|
1194
|
-
|
|
1195
|
-
# 1. sets aa_mods and term_mods from a sequest params object
|
|
1196
|
-
# 2. sets @params
|
|
1197
|
-
# 3. sets @masses_by_diff_mod
|
|
1198
|
-
def set_modifications(params, modification_symbols_string)
|
|
1199
|
-
@params = params
|
|
1200
|
-
|
|
1201
|
-
set_hashes(modification_symbols_string)
|
|
1202
|
-
|
|
1203
|
-
####################################
|
|
1204
|
-
## static mods
|
|
1205
|
-
####################################
|
|
1206
|
-
|
|
1207
|
-
static_mods = [] # [[one_letter_amino_acid.to_sym, add_amount.to_f], ...]
|
|
1208
|
-
static_terminal_mods = [] # e.g. [add_Cterm_peptide, amount.to_f]
|
|
1209
|
-
|
|
1210
|
-
params.mods.each do |k,v|
|
|
1211
|
-
v_to_f = v.to_f
|
|
1212
|
-
if v_to_f != 0.0
|
|
1213
|
-
if k =~ /add_(\w)_/
|
|
1214
|
-
static_mods << [$1.to_sym, v_to_f]
|
|
1215
|
-
else
|
|
1216
|
-
static_terminal_mods << [k, v_to_f]
|
|
1217
|
-
end
|
|
1218
|
-
end
|
|
1219
|
-
end
|
|
1220
|
-
aa_hash = params.mass_table
|
|
1221
|
-
|
|
1222
|
-
## Create the static_mods objects
|
|
1223
|
-
static_mods.map! do |mod|
|
|
1224
|
-
hash = {
|
|
1225
|
-
:aminoacid => mod[0].to_s,
|
|
1226
|
-
:massdiff => mod[1].to_plus_minus_string,
|
|
1227
|
-
:mass => aa_hash[mod[0]] + mod[1],
|
|
1228
|
-
:variable => 'N',
|
|
1229
|
-
:binary => 'Y',
|
|
1230
|
-
}
|
|
1231
|
-
Sequest::PepXML::AAModification.new(hash)
|
|
1232
|
-
end
|
|
1233
|
-
|
|
1234
|
-
## Create the static_terminal_mods objects
|
|
1235
|
-
static_terminal_mods.map! do |mod|
|
|
1236
|
-
terminus = if mod[0] =~ /Cterm/ ; 'c'
|
|
1237
|
-
else ; 'n' # only two possible termini
|
|
1238
|
-
end
|
|
1239
|
-
protein_terminus = case mod[0]
|
|
1240
|
-
when /Nterm_protein/ ; 'n'
|
|
1241
|
-
when /Cterm_protein/ ; 'c'
|
|
1242
|
-
else nil
|
|
1243
|
-
end
|
|
1244
|
-
|
|
1245
|
-
# create the hash
|
|
1246
|
-
hash = {
|
|
1247
|
-
:terminus => terminus,
|
|
1248
|
-
:massdiff => mod[1].to_plus_minus_string,
|
|
1249
|
-
:variable => 'N',
|
|
1250
|
-
:description => mod[0],
|
|
1251
|
-
}
|
|
1252
|
-
hash[:protein_terminus] = protein_terminus if protein_terminus
|
|
1253
|
-
Sequest::PepXML::TerminalModification.new(hash)
|
|
1254
|
-
end
|
|
1255
|
-
#################################
|
|
1256
|
-
# Variable Mods:
|
|
1257
|
-
#################################
|
|
1258
|
-
arr = params.diff_search_options.rstrip.split(/\s+/)
|
|
1259
|
-
# [aa.to_sym, diff.to_f]
|
|
1260
|
-
variable_mods = []
|
|
1261
|
-
(0...arr.size).step(2) do |i|
|
|
1262
|
-
if arr[i].to_f != 0.0
|
|
1263
|
-
variable_mods << [arr[i+1], arr[i].to_f]
|
|
1264
|
-
end
|
|
1265
|
-
end
|
|
1266
|
-
mod_objects = []
|
|
1267
|
-
variable_mods.each do |mod|
|
|
1268
|
-
mod[0].split('').each do |aa|
|
|
1269
|
-
hash = {
|
|
1270
|
-
|
|
1271
|
-
:aminoacid => aa,
|
|
1272
|
-
:massdiff => mod[1].to_plus_minus_string,
|
|
1273
|
-
:mass => aa_hash[aa.to_sym] + mod[1],
|
|
1274
|
-
:variable => 'Y',
|
|
1275
|
-
:binary => 'N',
|
|
1276
|
-
:symbol => @mod_symbols_hash[[aa.to_sym, mod[1]]],
|
|
1277
|
-
}
|
|
1278
|
-
mod_objects << Sequest::PepXML::AAModification.new(hash)
|
|
1279
|
-
end
|
|
1280
|
-
end
|
|
1281
|
-
variable_mods = mod_objects
|
|
1282
|
-
#################################
|
|
1283
|
-
# TERMINAL Variable Mods:
|
|
1284
|
-
#################################
|
|
1285
|
-
# These are always peptide, not protein termini (for sequest)
|
|
1286
|
-
(nterm_diff, cterm_diff) = params.term_diff_search_options.rstrip.split(/\s+/).map{|v| v.to_f }
|
|
1287
|
-
|
|
1288
|
-
to_add = []
|
|
1289
|
-
if nterm_diff != 0.0
|
|
1290
|
-
to_add << ['n',nterm_diff.to_plus_minus_string, @mod_symbols_hash[:nt, nterm_diff]]
|
|
1291
|
-
end
|
|
1292
|
-
if cterm_diff != 0.0
|
|
1293
|
-
to_add << ['c', cterm_diff.to_plus_minus_string, @mod_symbols_hash[:ct, cterm_diff]]
|
|
1294
|
-
end
|
|
1295
|
-
|
|
1296
|
-
variable_terminal_mods = to_add.map do |term, mssdiff, symb|
|
|
1297
|
-
hash = {
|
|
1298
|
-
:terminus => term,
|
|
1299
|
-
:massdiff => mssdiff,
|
|
1300
|
-
:variable => 'Y',
|
|
1301
|
-
:symbol => symb,
|
|
1302
|
-
}
|
|
1303
|
-
Sequest::PepXML::TerminalModification.new(hash)
|
|
1304
|
-
end
|
|
1305
|
-
|
|
1306
|
-
#########################
|
|
1307
|
-
# COLLECT THEM
|
|
1308
|
-
#########################
|
|
1309
|
-
@aa_mods = static_mods + variable_mods
|
|
1310
|
-
@term_mods = static_terminal_mods + variable_terminal_mods
|
|
1311
|
-
end
|
|
1312
|
-
|
|
1313
|
-
## Generates the pepxml for static and differential amino acid mods based on
|
|
1314
|
-
## sequest object
|
|
1315
|
-
def to_pepxml
|
|
1316
|
-
st = ''
|
|
1317
|
-
if @aa_mods
|
|
1318
|
-
st << @aa_mods.map {|v| v.to_pepxml }.join
|
|
1319
|
-
end
|
|
1320
|
-
if @term_mods
|
|
1321
|
-
st << @term_mods.map {|v| v.to_pepxml }.join
|
|
1322
|
-
end
|
|
1323
|
-
st
|
|
1324
|
-
end
|
|
1325
|
-
|
|
3
|
+
module Sequest
|
|
1326
4
|
end
|
|
1327
5
|
|
|
1328
|
-
# Modified aminoacid, static or variable
|
|
1329
|
-
# unless otherwise stated, all attributes can be anything
|
|
1330
|
-
class Sequest::PepXML::AAModification
|
|
1331
|
-
include SpecIDXML
|
|
1332
|
-
|
|
1333
|
-
# The amino acid (one letter code)
|
|
1334
|
-
attr_accessor :aminoacid
|
|
1335
|
-
# Must be a string!!!!
|
|
1336
|
-
# Mass difference with respect to unmodified aminoacid, must begin with
|
|
1337
|
-
# either + (nonnegative) or - [e.g. +1.05446 or -2.3342]
|
|
1338
|
-
# consider Numeric#to_plus_minus_string at top
|
|
1339
|
-
attr_accessor :massdiff
|
|
1340
|
-
# Mass of modified aminoacid
|
|
1341
|
-
attr_accessor :mass
|
|
1342
|
-
# Y if both modified and unmodified aminoacid could be present in the
|
|
1343
|
-
# dataset, N if only modified aminoacid can be present
|
|
1344
|
-
attr_accessor :variable
|
|
1345
|
-
# whether modification can reside only at protein terminus (specified 'n',
|
|
1346
|
-
# 'c', or 'nc')
|
|
1347
|
-
attr_accessor :peptide_terminus
|
|
1348
|
-
# Special symbol used by search engine to designate this modification
|
|
1349
|
-
attr_accessor :symbol
|
|
1350
|
-
# Y if each peptide must have only modified or unmodified aminoacid, N if a
|
|
1351
|
-
# peptide may contain both modified and unmodified aminoacid
|
|
1352
|
-
attr_accessor :binary
|
|
1353
|
-
|
|
1354
|
-
def initialize(hash=nil)
|
|
1355
|
-
instance_var_set_from_hash(hash) if hash # can use unless there are weird methods
|
|
1356
|
-
end
|
|
1357
|
-
|
|
1358
|
-
def to_pepxml
|
|
1359
|
-
short_element_xml_from_instance_vars("aminoacid_modification")
|
|
1360
|
-
end
|
|
1361
|
-
|
|
1362
|
-
end
|
|
1363
|
-
|
|
1364
|
-
# Modified aminoacid, static or variable
|
|
1365
|
-
class Sequest::PepXML::TerminalModification
|
|
1366
|
-
include SpecIDXML
|
|
1367
|
-
|
|
1368
|
-
# n for N-terminus, c for C-terminus
|
|
1369
|
-
attr_accessor :terminus
|
|
1370
|
-
# Mass difference with respect to unmodified terminus
|
|
1371
|
-
attr_accessor :massdiff
|
|
1372
|
-
# Mass of modified terminus
|
|
1373
|
-
attr_accessor :mass
|
|
1374
|
-
# Y if both modified and unmodified terminus could be present in the
|
|
1375
|
-
# dataset, N if only modified terminus can be present
|
|
1376
|
-
attr_accessor :variable
|
|
1377
|
-
# Special symbol used by search engine to designate this modification
|
|
1378
|
-
attr_accessor :symbol
|
|
1379
|
-
# whether modification can reside only at protein terminus (specified n or
|
|
1380
|
-
# c)
|
|
1381
|
-
attr_accessor :protein_terminus
|
|
1382
|
-
attr_accessor :description
|
|
1383
|
-
|
|
1384
|
-
def initialize(hash=nil)
|
|
1385
|
-
instance_var_set_from_hash(hash) if hash # can use unless there are weird methods
|
|
1386
|
-
end
|
|
1387
|
-
|
|
1388
|
-
def to_pepxml
|
|
1389
|
-
short_element_xml_from_instance_vars("terminal_modification")
|
|
1390
|
-
end
|
|
1391
|
-
end
|
|
1392
|
-
|
|
1393
|
-
|
|
1394
|
-
class Sequest::PepXML::SearchDatabase
|
|
1395
|
-
include SpecIDXML
|
|
1396
|
-
attr_accessor :local_path
|
|
1397
|
-
attr_writer :seq_type
|
|
1398
|
-
# Takes a SequestParams object
|
|
1399
|
-
# Sets :local_path from the params object attr :database
|
|
1400
|
-
def initialize(params=nil, args=nil)
|
|
1401
|
-
@seq_type = nil
|
|
1402
|
-
if params
|
|
1403
|
-
@local_path = params.database
|
|
1404
|
-
end
|
|
1405
|
-
if args ; set_from_hash(args) end
|
|
1406
|
-
end
|
|
1407
|
-
|
|
1408
|
-
def seq_type
|
|
1409
|
-
if @seq_type ; @seq_type
|
|
1410
|
-
else
|
|
1411
|
-
if @local_path =~ /\.fasta/
|
|
1412
|
-
'AA'
|
|
1413
|
-
else
|
|
1414
|
-
abort "Don't recognize type from your database local path: #{@local_path}"
|
|
1415
|
-
end
|
|
1416
|
-
end
|
|
1417
|
-
end
|
|
1418
|
-
|
|
1419
|
-
def to_pepxml
|
|
1420
|
-
short_element_xml_and_att_string(:search_database, "local_path=\"#{local_path}\" type=\"#{seq_type}\"")
|
|
1421
|
-
end
|
|
1422
|
-
|
|
1423
|
-
end
|
|
1424
|
-
|
|
1425
|
-
class Sequest::PepXML::SpectrumQuery
|
|
1426
|
-
include SpecIDXML
|
|
1427
|
-
|
|
1428
|
-
# basename_noext.first_scan.last_scan.charge
|
|
1429
|
-
attr_accessor :spectrum
|
|
1430
|
-
attr_accessor :start_scan
|
|
1431
|
-
attr_accessor :end_scan
|
|
1432
|
-
attr_accessor :precursor_neutral_mass
|
|
1433
|
-
attr_accessor :index
|
|
1434
|
-
attr_accessor :search_results
|
|
1435
|
-
|
|
1436
|
-
# this is a string
|
|
1437
|
-
attr_accessor :assumed_charge
|
|
1438
|
-
attr_accessor :pepxml_version
|
|
1439
|
-
|
|
1440
|
-
# sets the search_results array
|
|
1441
|
-
# if block given, sets search_results to return value
|
|
1442
|
-
def initialize(hash=nil)
|
|
1443
|
-
if block_given? ; @search_results = yield
|
|
1444
|
-
else ; @search_results = []
|
|
1445
|
-
end
|
|
1446
|
-
if hash ; set_from_hash(hash) end
|
|
1447
|
-
end
|
|
1448
|
-
|
|
1449
|
-
############################################################
|
|
1450
|
-
# FOR PEPXML:
|
|
1451
|
-
############################################################
|
|
1452
|
-
def to_pepxml
|
|
1453
|
-
case Sequest::PepXML.pepxml_version
|
|
1454
|
-
when 18
|
|
1455
|
-
element_xml("spectrum_query", [:spectrum, :start_scan, :end_scan, :precursor_neutral_mass, :assumed_charge, :index]) do
|
|
1456
|
-
@search_results.collect { |sr| sr.to_pepxml }.join
|
|
1457
|
-
end
|
|
1458
|
-
when 0
|
|
1459
|
-
#element_xml("search_result", [:spectrum, :start_scan, :end_scan, :precursor_neutral_mass, :assumed_charge, :index]) do
|
|
1460
|
-
# @search_results.collect { |search_result|
|
|
1461
|
-
# search_result.to_pepxml
|
|
1462
|
-
# }.join("\n")
|
|
1463
|
-
#end
|
|
1464
|
-
end
|
|
1465
|
-
end
|
|
1466
|
-
|
|
1467
|
-
# Returns the precursor_neutral based on the scans and an array indexed by
|
|
1468
|
-
# scan numbers. first and last scan and charge should be integers.
|
|
1469
|
-
# This is the precursor_mz - h_plus!
|
|
1470
|
-
# by=:prec_mz_arr|:deltamass
|
|
1471
|
-
# if prec_mz_arr then the following arguments must be supplied:
|
|
1472
|
-
# :first_scan = int, :last_scan = int, :prec_mz_arr = array with the precursor
|
|
1473
|
-
# m/z for each product scan, :charge = int
|
|
1474
|
-
# if deltamass then the following arguments must be supplied:
|
|
1475
|
-
# m_plus_h = float, deltamass = float
|
|
1476
|
-
# For both flavors, a final additional argument 'average_weights'
|
|
1477
|
-
# can be used. If true (default), average weights will be used, if false,
|
|
1478
|
-
# monoisotopic weights (currently this is simply the mass of the proton)
|
|
1479
|
-
def self.calc_precursor_neutral_mass(by, *args)
|
|
1480
|
-
average_weights = true
|
|
1481
|
-
case by
|
|
1482
|
-
when :prec_mz_arr
|
|
1483
|
-
(first_scan, last_scan, prec_mz_arr, charge, average_weights) = args
|
|
1484
|
-
when :deltamass
|
|
1485
|
-
(m_plus_h, deltamass, average_weights) = args
|
|
1486
|
-
end
|
|
1487
|
-
|
|
1488
|
-
if average_weights
|
|
1489
|
-
mass_h_plus = SpecID::AVG[:h_plus]
|
|
1490
|
-
else
|
|
1491
|
-
mass_h_plus = SpecID::MONO[:h_plus]
|
|
1492
|
-
end
|
|
1493
|
-
|
|
1494
|
-
case by
|
|
1495
|
-
when :prec_mz_arr
|
|
1496
|
-
mz = nil
|
|
1497
|
-
if first_scan != last_scan
|
|
1498
|
-
sum = 0.0
|
|
1499
|
-
tot_num = 0
|
|
1500
|
-
(first_scan..last_scan).each do |scan|
|
|
1501
|
-
val = prec_mz_arr[scan]
|
|
1502
|
-
if val # if the scan is not an mslevel 2
|
|
1503
|
-
sum += val.to_f
|
|
1504
|
-
tot_num += 1
|
|
1505
|
-
end
|
|
1506
|
-
end
|
|
1507
|
-
mz = sum/tot_num.to_f
|
|
1508
|
-
else
|
|
1509
|
-
mz = prec_mz_arr[first_scan].to_f
|
|
1510
|
-
end
|
|
1511
|
-
charge * (mz - mass_h_plus)
|
|
1512
|
-
when :deltamass
|
|
1513
|
-
m_plus_h - mass_h_plus + deltamass
|
|
1514
|
-
else
|
|
1515
|
-
abort "don't recognize 'by' in calc_precursor_neutral_mass: #{by}"
|
|
1516
|
-
end
|
|
1517
|
-
end
|
|
1518
|
-
|
|
1519
|
-
end
|
|
1520
|
-
|
|
1521
|
-
|
|
1522
|
-
|
|
1523
|
-
Sequest::PepXML::SearchHit = ArrayClass.new( %w( hit_rank peptide peptide_prev_aa peptide_next_aa protein num_tot_proteins num_matched_ions tot_num_ions calc_neutral_pep_mass massdiff num_tol_term num_missed_cleavages is_rejected deltacnstar xcorr deltacn spscore sprank modification_info) )
|
|
1524
|
-
|
|
1525
|
-
# hit_rank=0 peptide=1 peptide_prev_aa=2 peptide_next_aa=3 protein=4 num_tot_proteins=5 num_matched_ions=6 tot_num_ions=7 calc_neutral_pep_mass=8 massdiff=9 num_tol_term=10 num_missed_cleavages=11 is_rejected=12 deltacnstar=13 xcorr=14 deltacn=15 spscore=16 sprank=17 modification_info=18
|
|
1526
|
-
|
|
1527
|
-
class Sequest::PepXML::SearchHit
|
|
1528
|
-
include SpecIDXML
|
|
1529
|
-
|
|
1530
|
-
Non_standard_amino_acid_char_re = /[^A-Z\.\-]/
|
|
1531
|
-
|
|
1532
|
-
|
|
1533
|
-
# These are all search_score elements:
|
|
1534
|
-
|
|
1535
|
-
# 1 if there is no second ranked hit, 0 otherwise
|
|
1536
|
-
|
|
1537
|
-
tmp_verb = $VERBOSE
|
|
1538
|
-
$VERBOSE = nil
|
|
1539
|
-
def initialize(hash=nil)
|
|
1540
|
-
super(@@arr_size)
|
|
1541
|
-
if hash
|
|
1542
|
-
self[0,19] = [hash[:hit_rank], hash[:peptide], hash[:peptide_prev_aa], hash[:peptide_next_aa], hash[:protein], hash[:num_tot_proteins], hash[:num_matched_ions], hash[:tot_num_ions], hash[:calc_neutral_pep_mass], hash[:massdiff], hash[:num_tol_term], hash[:num_missed_cleavages], hash[:is_rejected], hash[:deltacnstar], hash[:xcorr], hash[:deltacn], hash[:spscore], hash[:sprank], hash[:modification_info]]
|
|
1543
|
-
end
|
|
1544
|
-
self
|
|
1545
|
-
end
|
|
1546
|
-
$VERBOSE = tmp_verb
|
|
1547
|
-
|
|
1548
|
-
def inspect
|
|
1549
|
-
var = @@attributes.map do |m| "#{m}:#{self.send(m)}" end.join(" ")
|
|
1550
|
-
"#<SearchHit #{var}>"
|
|
1551
|
-
end
|
|
1552
|
-
|
|
1553
|
-
# requires Params object and full sequence (with heads and tails)
|
|
1554
|
-
def self.calc_num_missed_cleavages(params, sequence)
|
|
1555
|
-
num_missed = 0
|
|
1556
|
-
split_after, except_before = params.enzyme_specificity
|
|
1557
|
-
first, middle, last = SpecID::Pep.split_sequence(sequence)
|
|
1558
|
-
arr = middle.scan(/[#{split_after}][^#{except_before}]/)
|
|
1559
|
-
return arr.size
|
|
1560
|
-
end
|
|
1561
|
-
|
|
1562
|
-
# requires Params object and full sequence (with heads and tails)
|
|
1563
|
-
def self.calc_num_tol_term(params, sequence)
|
|
1564
|
-
num_tol = 0
|
|
1565
|
-
split_after, except_before = params.enzyme_specificity
|
|
1566
|
-
first, middle, last = SpecID::Pep.split_sequence(sequence)
|
|
1567
|
-
last_of_middle = middle[-1,1]
|
|
1568
|
-
first_of_middle = middle[0,1]
|
|
1569
|
-
if ( split_after.include?(first) && !except_before.include?(first_of_middle) ) || first == '-'
|
|
1570
|
-
num_tol += 1
|
|
1571
|
-
end
|
|
1572
|
-
if split_after.include?(last_of_middle) && !except_before.include?(last) || last == '-'
|
|
1573
|
-
num_tol += 1
|
|
1574
|
-
end
|
|
1575
|
-
return num_tol
|
|
1576
|
-
end
|
|
1577
|
-
|
|
1578
|
-
# Takes ions in the form XX/YY and returns XX, YY
|
|
1579
|
-
def self.split_ions(ions)
|
|
1580
|
-
return *(ions.split("/"))
|
|
1581
|
-
end
|
|
1582
|
-
|
|
1583
|
-
def search_score_xml(symbol)
|
|
1584
|
-
"#{tabs}<search_score name=\"#{symbol}\" value=\"#{send(symbol)}\"/>"
|
|
1585
|
-
end
|
|
1586
|
-
|
|
1587
|
-
def search_scores_xml(*symbol_list)
|
|
1588
|
-
symbol_list.collect do |sy|
|
|
1589
|
-
search_score_xml(sy)
|
|
1590
|
-
end.join("\n") + "\n"
|
|
1591
|
-
end
|
|
1592
|
-
|
|
1593
|
-
def to_pepxml
|
|
1594
|
-
mod_pepxml =
|
|
1595
|
-
if self[18]
|
|
1596
|
-
self[18].to_pepxml
|
|
1597
|
-
else
|
|
1598
|
-
''
|
|
1599
|
-
end
|
|
1600
|
-
|
|
1601
|
-
element_xml("search_hit", [:hit_rank, :peptide, :peptide_prev_aa, :peptide_next_aa, :protein, :num_tot_proteins, :num_matched_ions, :tot_num_ions, :calc_neutral_pep_mass, :massdiff, :num_tol_term, :num_missed_cleavages, :is_rejected]) do
|
|
1602
|
-
mod_pepxml +
|
|
1603
|
-
search_scores_xml(:xcorr, :deltacn, :deltacnstar, :spscore, :sprank)
|
|
1604
|
-
end
|
|
1605
|
-
end
|
|
1606
|
-
|
|
1607
|
-
end
|
|
1608
|
-
|
|
1609
|
-
# Positions and masses of modifications
|
|
1610
|
-
class Sequest::PepXML::SearchHit::ModificationInfo
|
|
1611
|
-
include SpecIDXML
|
|
1612
|
-
|
|
1613
|
-
## Should be something like this:
|
|
1614
|
-
# <modification_info mod_nterm_mass=" " mod_nterm_mass=" " modified_peptide=" ">
|
|
1615
|
-
# <mod_aminoacid_mass position=" " mass=" "/>
|
|
1616
|
-
# </modification_info>
|
|
1617
|
-
|
|
1618
|
-
|
|
1619
|
-
# Mass of modified N terminus<
|
|
1620
|
-
attr_accessor :mod_nterm_mass
|
|
1621
|
-
# Mass of modified C terminus<
|
|
1622
|
-
attr_accessor :mod_cterm_mass
|
|
1623
|
-
# Peptide sequence (with indicated modifications) I'm assuming that the
|
|
1624
|
-
# native sequest indicators are OK here
|
|
1625
|
-
attr_accessor :modified_peptide
|
|
1626
|
-
## A few main types:
|
|
1627
|
-
|
|
1628
|
-
# this should be an array of arrays: [[position, modified_mass], ...]
|
|
1629
|
-
# position ranges from 1 to peptide length
|
|
1630
|
-
attr_accessor :mod_aminoacid_mass_array
|
|
1631
|
-
|
|
1632
|
-
def initialize(hash=nil)
|
|
1633
|
-
@mod_nterm_mass = nil
|
|
1634
|
-
@mod_cterm_mass = nil
|
|
1635
|
-
if hash
|
|
1636
|
-
instance_var_set_from_hash(hash)
|
|
1637
|
-
end
|
|
1638
|
-
end
|
|
1639
|
-
|
|
1640
|
-
# Will escape any xml special chars in modified_peptide
|
|
1641
|
-
def to_pepxml
|
|
1642
|
-
## Collect the modifications:
|
|
1643
|
-
mod_strings = []
|
|
1644
|
-
if @mod_aminoacid_mass_array
|
|
1645
|
-
mod_strings = @mod_aminoacid_mass_array.map do |ar|
|
|
1646
|
-
"position=\"#{ar[0]}\" mass=\"#{ar[1]}\""
|
|
1647
|
-
end
|
|
1648
|
-
end
|
|
1649
|
-
## Create the attribute string:
|
|
1650
|
-
att_parts = []
|
|
1651
|
-
if @mod_nterm_mass
|
|
1652
|
-
att_parts << "mod_nterm_mass=\"#{@mod_nterm_mass}\""
|
|
1653
|
-
end
|
|
1654
|
-
if @mod_cterm_mass
|
|
1655
|
-
att_parts << "mod_cterm_mass=\"#{@mod_cterm_mass}\""
|
|
1656
|
-
end
|
|
1657
|
-
if @modified_peptide
|
|
1658
|
-
att_parts << "modified_peptide=\"#{escape_special_chars(@modified_peptide)}\""
|
|
1659
|
-
end
|
|
1660
|
-
element_xml_and_att_string('modification_info', att_parts.join(" ")) do
|
|
1661
|
-
mod_strings.map {|st| short_element_xml_and_att_string('mod_aminoacid_mass', st) }.join
|
|
1662
|
-
end
|
|
1663
|
-
end
|
|
1664
|
-
|
|
1665
|
-
##
|
|
1666
|
-
|
|
1667
|
-
# <modification_info modified_peptide="GC[546]M[147]PSKEVLSAGAHR">
|
|
1668
|
-
# <mod_aminoacid_mass position="2" mass="545.7160"/>
|
|
1669
|
-
# <mod_aminoacid_mass position="3" mass="147.1926"/>
|
|
1670
|
-
# </modification_info>
|
|
1671
|
-
|
|
1672
|
-
|
|
1673
|
-
end
|
|
1674
|
-
|
|
1675
|
-
|