mspire 0.2.4 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (233) hide show
  1. data/INSTALL +1 -0
  2. data/README +25 -0
  3. data/Rakefile +129 -40
  4. data/bin/{find_aa_freq.rb → aafreqs.rb} +2 -2
  5. data/bin/bioworks_to_pepxml.rb +1 -0
  6. data/bin/fasta_shaker.rb +1 -96
  7. data/bin/filter_and_validate.rb +5 -0
  8. data/bin/{mzxml_to_lmat.rb → ms_to_lmat.rb} +8 -7
  9. data/bin/prob_validate.rb +6 -0
  10. data/bin/raw_to_mzXML.rb +2 -2
  11. data/bin/srf_group.rb +1 -0
  12. data/bin/srf_to_sqt.rb +40 -0
  13. data/changelog.txt +68 -0
  14. data/lib/align/chams.rb +6 -6
  15. data/lib/align.rb +4 -3
  16. data/lib/bsearch.rb +120 -0
  17. data/lib/fasta.rb +318 -86
  18. data/lib/group_by.rb +10 -0
  19. data/lib/index_by.rb +11 -0
  20. data/lib/merge_deep.rb +21 -0
  21. data/lib/{spec → ms/converter}/mzxml.rb +77 -109
  22. data/lib/ms/gradient_program.rb +171 -0
  23. data/lib/ms/msrun.rb +209 -0
  24. data/lib/{spec/msrun.rb → ms/msrun_index.rb} +7 -40
  25. data/lib/ms/parser/mzdata/axml.rb +12 -0
  26. data/lib/ms/parser/mzdata/dom.rb +160 -0
  27. data/lib/ms/parser/mzdata/libxml.rb +7 -0
  28. data/lib/ms/parser/mzdata.rb +25 -0
  29. data/lib/ms/parser/mzxml/axml.rb +11 -0
  30. data/lib/ms/parser/mzxml/dom.rb +159 -0
  31. data/lib/ms/parser/mzxml/hpricot.rb +253 -0
  32. data/lib/ms/parser/mzxml/libxml.rb +15 -0
  33. data/lib/ms/parser/mzxml/regexp.rb +122 -0
  34. data/lib/ms/parser/mzxml/rexml.rb +72 -0
  35. data/lib/ms/parser/mzxml/xmlparser.rb +248 -0
  36. data/lib/ms/parser/mzxml.rb +175 -0
  37. data/lib/ms/parser.rb +108 -0
  38. data/lib/ms/precursor.rb +10 -0
  39. data/lib/ms/scan.rb +81 -0
  40. data/lib/ms/spectrum.rb +193 -0
  41. data/lib/ms.rb +10 -0
  42. data/lib/mspire.rb +4 -0
  43. data/lib/roc.rb +61 -1
  44. data/lib/sample_enzyme.rb +31 -8
  45. data/lib/scan_i.rb +21 -0
  46. data/lib/spec_id/aa_freqs.rb +7 -3
  47. data/lib/spec_id/bioworks.rb +20 -14
  48. data/lib/spec_id/digestor.rb +139 -0
  49. data/lib/spec_id/mass.rb +116 -0
  50. data/lib/spec_id/parser/proph.rb +236 -0
  51. data/lib/spec_id/precision/filter/cmdline.rb +209 -0
  52. data/lib/spec_id/precision/filter/interactive.rb +134 -0
  53. data/lib/spec_id/precision/filter/output.rb +147 -0
  54. data/lib/spec_id/precision/filter.rb +623 -0
  55. data/lib/spec_id/precision/output.rb +60 -0
  56. data/lib/spec_id/precision/prob/cmdline.rb +139 -0
  57. data/lib/spec_id/precision/prob/output.rb +88 -0
  58. data/lib/spec_id/precision/prob.rb +171 -0
  59. data/lib/spec_id/proph/pep_summary.rb +92 -0
  60. data/lib/spec_id/proph/prot_summary.rb +484 -0
  61. data/lib/spec_id/proph.rb +2 -466
  62. data/lib/spec_id/protein_summary.rb +2 -2
  63. data/lib/spec_id/sequest/params.rb +316 -0
  64. data/lib/spec_id/sequest/pepxml.rb +1513 -0
  65. data/lib/spec_id/sequest.rb +2 -1672
  66. data/lib/spec_id/srf.rb +445 -177
  67. data/lib/spec_id.rb +183 -95
  68. data/lib/spec_id_xml.rb +8 -10
  69. data/lib/transmem/phobius.rb +147 -0
  70. data/lib/transmem/toppred.rb +368 -0
  71. data/lib/transmem.rb +157 -0
  72. data/lib/validator/aa.rb +135 -0
  73. data/lib/validator/background.rb +73 -0
  74. data/lib/validator/bias.rb +95 -0
  75. data/lib/validator/cmdline.rb +260 -0
  76. data/lib/validator/decoy.rb +94 -0
  77. data/lib/validator/digestion_based.rb +69 -0
  78. data/lib/validator/probability.rb +48 -0
  79. data/lib/validator/prot_from_pep.rb +234 -0
  80. data/lib/validator/transmem.rb +272 -0
  81. data/lib/validator/true_pos.rb +46 -0
  82. data/lib/validator.rb +214 -0
  83. data/lib/xml.rb +38 -0
  84. data/lib/xml_style_parser.rb +105 -0
  85. data/lib/xmlparser_wrapper.rb +19 -0
  86. data/script/compile_and_plot_smriti_final.rb +97 -0
  87. data/script/extract_gradient_programs.rb +56 -0
  88. data/script/get_apex_values_rexml.rb +44 -0
  89. data/script/mzXML2timeIndex.rb +1 -1
  90. data/script/smriti_final_analysis.rb +103 -0
  91. data/script/toppred_to_yaml.rb +47 -0
  92. data/script/tpp_installer.rb +1 -1
  93. data/{test/tc_align.rb → specs/align_spec.rb} +21 -27
  94. data/{test/tc_bioworks_to_pepxml.rb → specs/bin/bioworks_to_pepxml_spec.rb} +25 -41
  95. data/specs/bin/fasta_shaker_spec.rb +259 -0
  96. data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +202 -0
  97. data/specs/bin/filter_and_validate_spec.rb +124 -0
  98. data/specs/bin/ms_to_lmat_spec.rb +34 -0
  99. data/specs/bin/prob_validate_spec.rb +62 -0
  100. data/specs/bin/protein_summary_spec.rb +10 -0
  101. data/{test/tc_fasta.rb → specs/fasta_spec.rb} +354 -310
  102. data/specs/gi_spec.rb +22 -0
  103. data/specs/load_bin_path.rb +7 -0
  104. data/specs/merge_deep_spec.rb +13 -0
  105. data/specs/ms/gradient_program_spec.rb +77 -0
  106. data/specs/ms/msrun_spec.rb +455 -0
  107. data/specs/ms/parser_spec.rb +92 -0
  108. data/specs/ms/spectrum_spec.rb +89 -0
  109. data/specs/roc_spec.rb +251 -0
  110. data/specs/rspec_autotest.rb +149 -0
  111. data/specs/sample_enzyme_spec.rb +41 -0
  112. data/specs/spec_helper.rb +133 -0
  113. data/specs/spec_id/aa_freqs_spec.rb +52 -0
  114. data/{test/tc_bioworks.rb → specs/spec_id/bioworks_spec.rb} +56 -71
  115. data/specs/spec_id/digestor_spec.rb +75 -0
  116. data/specs/spec_id/precision/filter/cmdline_spec.rb +20 -0
  117. data/specs/spec_id/precision/filter/output_spec.rb +31 -0
  118. data/specs/spec_id/precision/filter_spec.rb +243 -0
  119. data/specs/spec_id/precision/prob_spec.rb +111 -0
  120. data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
  121. data/specs/spec_id/proph/pep_summary_spec.rb +143 -0
  122. data/{test/tc_proph.rb → specs/spec_id/proph/prot_summary_spec.rb} +52 -32
  123. data/{test/tc_protein_summary.rb → specs/spec_id/protein_summary_spec.rb} +85 -0
  124. data/specs/spec_id/sequest/params_spec.rb +68 -0
  125. data/specs/spec_id/sequest/pepxml_spec.rb +452 -0
  126. data/specs/spec_id/sqt_spec.rb +138 -0
  127. data/specs/spec_id/srf_spec.rb +209 -0
  128. data/specs/spec_id/srf_spec_helper.rb +302 -0
  129. data/specs/spec_id_helper.rb +33 -0
  130. data/specs/spec_id_spec.rb +361 -0
  131. data/specs/spec_id_xml_spec.rb +33 -0
  132. data/specs/transmem/phobius_spec.rb +423 -0
  133. data/specs/transmem/toppred_spec.rb +297 -0
  134. data/specs/transmem_spec.rb +60 -0
  135. data/specs/transmem_spec_shared.rb +64 -0
  136. data/specs/validator/aa_spec.rb +107 -0
  137. data/specs/validator/background_spec.rb +51 -0
  138. data/specs/validator/bias_spec.rb +146 -0
  139. data/specs/validator/decoy_spec.rb +51 -0
  140. data/specs/validator/fasta_helper.rb +26 -0
  141. data/specs/validator/prot_from_pep_spec.rb +141 -0
  142. data/specs/validator/transmem_spec.rb +145 -0
  143. data/specs/validator/true_pos_spec.rb +58 -0
  144. data/specs/validator_helper.rb +33 -0
  145. data/specs/xml_spec.rb +12 -0
  146. data/test_files/000_pepxml18_small.xml +206 -0
  147. data/test_files/020a.mzXML.timeIndex +4710 -0
  148. data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +3973 -0
  149. data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +3872 -0
  150. data/test_files/4-03-03_small-prot.xml +321 -0
  151. data/test_files/4-03-03_small.xml +3876 -0
  152. data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
  153. data/test_files/bioworks-3.3_10prots.xml +5999 -0
  154. data/test_files/bioworks31.params +77 -0
  155. data/test_files/bioworks32.params +62 -0
  156. data/test_files/bioworks33.params +63 -0
  157. data/test_files/bioworks_single_run_small.xml +7237 -0
  158. data/test_files/bioworks_small.fasta +212 -0
  159. data/test_files/bioworks_small.params +63 -0
  160. data/test_files/bioworks_small.phobius +109 -0
  161. data/test_files/bioworks_small.toppred.out +2847 -0
  162. data/test_files/bioworks_small.xml +5610 -0
  163. data/test_files/bioworks_with_INV_small.xml +3753 -0
  164. data/test_files/bioworks_with_SHUFF_small.xml +2503 -0
  165. data/test_files/corrupted_900.srf +0 -0
  166. data/test_files/head_of_7MIX.srf +0 -0
  167. data/test_files/interact-opd1_mods_small-prot.xml +304 -0
  168. data/test_files/messups.fasta +297 -0
  169. data/test_files/opd1/000.my_answer.100lines.xml +101 -0
  170. data/test_files/opd1/000.tpp_1.2.3.first10.xml +115 -0
  171. data/test_files/opd1/000.tpp_2.9.2.first10.xml +126 -0
  172. data/test_files/opd1/000.v2.1.mzXML.timeIndex +3748 -0
  173. data/test_files/opd1/000_020-prot.png +0 -0
  174. data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +62 -0
  175. data/test_files/opd1/000_020_3prots-prot.xml +62 -0
  176. data/test_files/opd1/opd1_cat_inv_small-prot.xml +139 -0
  177. data/test_files/opd1/sequest.3.1.params +77 -0
  178. data/test_files/opd1/sequest.3.2.params +62 -0
  179. data/test_files/opd1/twenty_scans.mzXML +418 -0
  180. data/test_files/opd1/twenty_scans.v2.1.mzXML +382 -0
  181. data/test_files/opd1/twenty_scans_answ.lmat +0 -0
  182. data/test_files/opd1/twenty_scans_answ.lmata +9 -0
  183. data/test_files/opd1_020_beginning.RAW +0 -0
  184. data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +753 -0
  185. data/test_files/orbitrap_mzData/000_cut.xml +1920 -0
  186. data/test_files/pepproph_small.xml +4691 -0
  187. data/test_files/phobius.small.noheader.txt +50 -0
  188. data/test_files/phobius.small.small.txt +53 -0
  189. data/test_files/s01_anC1_ld020mM.key.txt +25 -0
  190. data/test_files/s01_anC1_ld020mM.meth +0 -0
  191. data/test_files/small.fasta +297 -0
  192. data/test_files/smallraw.RAW +0 -0
  193. data/test_files/tf_bioworks2excel.bioXML +14340 -0
  194. data/test_files/tf_bioworks2excel.txt.actual +1035 -0
  195. data/test_files/toppred.small.out +416 -0
  196. data/test_files/toppred.xml.out +318 -0
  197. data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +7 -0
  198. data/test_files/validator_hits_separate/bioworks_small_HS.xml +5651 -0
  199. data/test_files/yeast_gly_small-prot.xml +265 -0
  200. data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +6 -0
  201. data/test_files/yeast_gly_small.xml +3807 -0
  202. data/test_files/yeast_gly_small2.parentTimes +6 -0
  203. metadata +273 -57
  204. data/bin/filter.rb +0 -6
  205. data/bin/precision.rb +0 -5
  206. data/lib/spec/mzdata/parser.rb +0 -108
  207. data/lib/spec/mzdata.rb +0 -48
  208. data/lib/spec/mzxml/parser.rb +0 -449
  209. data/lib/spec/scan.rb +0 -55
  210. data/lib/spec_id/filter.rb +0 -797
  211. data/lib/spec_id/precision.rb +0 -421
  212. data/lib/toppred.rb +0 -18
  213. data/script/filter-peps.rb +0 -164
  214. data/test/tc_aa_freqs.rb +0 -59
  215. data/test/tc_fasta_shaker.rb +0 -149
  216. data/test/tc_filter.rb +0 -203
  217. data/test/tc_filter_peps.rb +0 -46
  218. data/test/tc_gi.rb +0 -17
  219. data/test/tc_id_class_anal.rb +0 -70
  220. data/test/tc_id_precision.rb +0 -89
  221. data/test/tc_msrun.rb +0 -88
  222. data/test/tc_mzxml.rb +0 -88
  223. data/test/tc_mzxml_to_lmat.rb +0 -36
  224. data/test/tc_peptide_parent_times.rb +0 -27
  225. data/test/tc_precision.rb +0 -60
  226. data/test/tc_roc.rb +0 -166
  227. data/test/tc_sample_enzyme.rb +0 -32
  228. data/test/tc_scan.rb +0 -26
  229. data/test/tc_sequest.rb +0 -336
  230. data/test/tc_spec.rb +0 -78
  231. data/test/tc_spec_id.rb +0 -201
  232. data/test/tc_spec_id_xml.rb +0 -36
  233. data/test/tc_srf.rb +0 -262
Binary file
@@ -0,0 +1,62 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <?xml-stylesheet type="text/xsl" href="/work/tpp-data/trial-prot.xsl"?>
3
+ <protein_summary xmlns="http://regis-web.systemsbiology.net/protXML" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://regis-web.systemsbiology.net/protXML /tools/bin/TPP/tpp/bin/../schema/protXML_v4.xsd" summary_xml="/work/tpp-data/trial-prot.xml">
4
+ <protein_summary_header reference_database="/project/marcotte/ms/database/ecoli_K12.fasta" residue_substitution_list="I -> L" source_files="/work/tpp-data/trial.xml" source_files_alt="/work/tpp-data/trial.xml" min_peptide_probability="0.20" min_peptide_weight="0.50" num_predicted_correct_prots="268.5" num_input_1_spectra="34" num_input_2_spectra="454" num_input_3_spectra="672" initial_min_peptide_prob="0.05" total_no_spectrum_ids="348.3" sample_enzyme="trypsin">
5
+ <program_details analysis="proteinprophet" time="2006-08-21T22:29:51" version="4.0(TPP v2.9 GALE rev.2, Build 200608211407)">
6
+ <proteinprophet_details occam_flag="Y" groups_flag="Y" degen_flag="Y" nsp_flag="Y" initial_peptide_wt_iters="2" nsp_distribution_iters="1" final_peptide_wt_iters="2" run_options="XML_INPUT">
7
+ <nsp_information neighboring_bin_smoothing="Y">
8
+ <nsp_distribution bin_no="0" nsp_lower_bound_incl="0.00" nsp_upper_bound_excl="0.10" pos_freq="0.623" neg_freq="0.623" pos_to_neg_ratio="1.00"/>
9
+ <nsp_distribution bin_no="1" nsp_lower_bound_incl="0.10" nsp_upper_bound_excl="0.25" pos_freq="0.198" neg_freq="0.184" pos_to_neg_ratio="1.07"/>
10
+ <nsp_distribution bin_no="2" nsp_lower_bound_incl="0.25" nsp_upper_bound_excl="0.50" pos_freq="0.075" neg_freq="0.074" pos_to_neg_ratio="1.02" alt_pos_to_neg_ratio="1.07"/>
11
+ <nsp_distribution bin_no="3" nsp_lower_bound_incl="0.50" nsp_upper_bound_excl="1.00" pos_freq="0.063" neg_freq="0.074" pos_to_neg_ratio="0.85" alt_pos_to_neg_ratio="1.07"/>
12
+ <nsp_distribution bin_no="4" nsp_lower_bound_incl="1.00" nsp_upper_bound_excl="2.00" pos_freq="0.023" neg_freq="0.026" pos_to_neg_ratio="0.87" alt_pos_to_neg_ratio="1.07"/>
13
+ <nsp_distribution bin_no="5" nsp_lower_bound_incl="2.00" nsp_upper_bound_excl="5.00" pos_freq="0.007" neg_freq="0.007" pos_to_neg_ratio="0.97" alt_pos_to_neg_ratio="1.07"/>
14
+ <nsp_distribution bin_no="6" nsp_lower_bound_incl="5.00" nsp_upper_bound_excl="15.00" pos_freq="0.006" neg_freq="0.006" pos_to_neg_ratio="1.00" alt_pos_to_neg_ratio="1.07"/>
15
+ <nsp_distribution bin_no="7" nsp_lower_bound_incl="15.00" nsp_upper_bound_excl="inf" pos_freq="0.006" neg_freq="0.006" pos_to_neg_ratio="1.00" alt_pos_to_neg_ratio="1.07"/>
16
+ </nsp_information> <protein_summary_data_filter min_probability="0.00" sensitivity="1.000" false_positive_error_rate="0.715" predicted_num_correct="268" predicted_num_incorrect="675" />
17
+ <protein_summary_data_filter min_probability="0.10" sensitivity="1.000" false_positive_error_rate="0.497" predicted_num_correct="268" predicted_num_incorrect="266" />
18
+ <protein_summary_data_filter min_probability="0.20" sensitivity="1.000" false_positive_error_rate="0.497" predicted_num_correct="268" predicted_num_incorrect="266" />
19
+ <protein_summary_data_filter min_probability="0.30" sensitivity="0.881" false_positive_error_rate="0.415" predicted_num_correct="237" predicted_num_incorrect="167" />
20
+ <protein_summary_data_filter min_probability="0.40" sensitivity="0.759" false_positive_error_rate="0.336" predicted_num_correct="204" predicted_num_incorrect="103" />
21
+ <protein_summary_data_filter min_probability="0.50" sensitivity="0.654" false_positive_error_rate="0.281" predicted_num_correct="176" predicted_num_incorrect="68" />
22
+ <protein_summary_data_filter min_probability="0.60" sensitivity="0.514" false_positive_error_rate="0.211" predicted_num_correct="138" predicted_num_incorrect="37" />
23
+ <protein_summary_data_filter min_probability="0.70" sensitivity="0.380" false_positive_error_rate="0.143" predicted_num_correct="102" predicted_num_incorrect="17" />
24
+ <protein_summary_data_filter min_probability="0.80" sensitivity="0.220" false_positive_error_rate="0.030" predicted_num_correct="59" predicted_num_incorrect="2" />
25
+ <protein_summary_data_filter min_probability="0.90" sensitivity="0.205" false_positive_error_rate="0.016" predicted_num_correct="55" predicted_num_incorrect="1" />
26
+ <protein_summary_data_filter min_probability="0.95" sensitivity="0.181" false_positive_error_rate="0.008" predicted_num_correct="49" predicted_num_incorrect="0" />
27
+ <protein_summary_data_filter min_probability="0.96" sensitivity="0.178" false_positive_error_rate="0.007" predicted_num_correct="48" predicted_num_incorrect="0" />
28
+ <protein_summary_data_filter min_probability="0.97" sensitivity="0.170" false_positive_error_rate="0.006" predicted_num_correct="46" predicted_num_incorrect="0" />
29
+ <protein_summary_data_filter min_probability="0.98" sensitivity="0.163" false_positive_error_rate="0.005" predicted_num_correct="44" predicted_num_incorrect="0" />
30
+ <protein_summary_data_filter min_probability="0.99" sensitivity="0.149" false_positive_error_rate="0.003" predicted_num_correct="40" predicted_num_incorrect="0" />
31
+ <protein_summary_data_filter min_probability="1.00" sensitivity="0.104" false_positive_error_rate="0.000" predicted_num_correct="28" predicted_num_incorrect="0" />
32
+ </proteinprophet_details>
33
+ </program_details>
34
+ </protein_summary_header>
35
+ <dataset_derivation generation_no="0">
36
+ </dataset_derivation>
37
+ <protein_group group_number="1" probability="1.00">
38
+ <protein protein_name="gi|16128297|ref|NP_414846.1|" n_indistinguishable_proteins="1" probability="1.00" percent_coverage="1.4" unique_stripped_peptides="RAVDILR" group_sibling_id="a" total_number_peptides="2" pct_spectrum_ids="0.56">
39
+ <annotation protein_description="NAD+-dependent betaine aldehyde dehydrogenase [Escherichia coli K12]"/>
40
+ <peptide peptide_sequence="RAVDILR" charge="2" initial_probability="1.00" nsp_adjusted_probability="1.00" weight="1.00" is_nondegenerate_evidence="Y" n_enzymatic_termini="2" n_sibling_peptides="0.00" n_sibling_peptides_bin="0" n_instances="2" is_contributing_evidence="Y" calc_neutral_pep_mass="841.9920">
41
+ </peptide>
42
+ </protein>
43
+ </protein_group>
44
+ <protein_group group_number="2" probability="1.00">
45
+ <protein protein_name="gi|16132019|ref|NP_418618.1|" n_indistinguishable_proteins="1" probability="1.00" percent_coverage="13.0" unique_stripped_peptides="FRDGLK+AIQFAQDVGIRVIQLAGYDVYYQEANNETRR" group_sibling_id="a" total_number_peptides="2" pct_spectrum_ids="0.41">
46
+ <annotation protein_description="putative hexulose-6-phosphate isomerase [Escherichia coli K12]"/>
47
+ <peptide peptide_sequence="FRDGLK" charge="1" initial_probability="0.90" nsp_adjusted_probability="1.00" weight="1.00" is_nondegenerate_evidence="Y" n_enzymatic_termini="2" n_sibling_peptides="0.42" n_sibling_peptides_bin="2" n_instances="1" is_contributing_evidence="Y" calc_neutral_pep_mass="734.8920">
48
+ </peptide>
49
+ <peptide peptide_sequence="AIQFAQDVGIRVIQLAGYDVYYQEANNETRR" charge="3" initial_probability="0.32" nsp_adjusted_probability="0.42" weight="1.00" is_nondegenerate_evidence="Y" n_enzymatic_termini="2" n_sibling_peptides="1.00" n_sibling_peptides_bin="3" n_instances="1" is_contributing_evidence="Y" calc_neutral_pep_mass="3601.9920">
50
+ </peptide>
51
+ </protein>
52
+ </protein_group>
53
+ <protein_group group_number="41" probability="0.98">
54
+ <protein protein_name="gi|16128237|ref|NP_414786.1|" n_indistinguishable_proteins="2" probability="0.98" percent_coverage="4.4" unique_stripped_peptides="GGLSGRNAKGGR" group_sibling_id="a" total_number_peptides="2" pct_spectrum_ids="0.46">
55
+ <annotation protein_description="orf, hypothetical protein [Escherichia coli K12]"/>
56
+ <indistinguishable_protein protein_name="gi|16130551|ref|NP_417122.1|">
57
+ <annotation protein_description="orf, hypothetical protein [Escherichia coli K12]"/> </indistinguishable_protein>
58
+ <peptide peptide_sequence="GGLSGRNAKGGR" charge="2" initial_probability="0.99" nsp_adjusted_probability="0.98" weight="1.00" is_nondegenerate_evidence="Y" n_enzymatic_termini="2" n_sibling_peptides="0.00" n_sibling_peptides_bin="0" n_instances="2" is_contributing_evidence="Y" calc_neutral_pep_mass="1129.1920">
59
+ </peptide>
60
+ </protein>
61
+ </protein_group>
62
+ </protein_summary>
@@ -0,0 +1,62 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <?xml-stylesheet type="text/xsl" href="/work/tpp-data/trial-prot.xsl"?>
3
+ <protein_summary xmlns="http://regis-web.systemsbiology.net/protXML" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://regis-web.systemsbiology.net/protXML /tools/bin/TPP/tpp/bin/../schema/protXML_v4.xsd" summary_xml="/work/tpp-data/trial-prot.xml">
4
+ <protein_summary_header reference_database="/project/marcotte/ms/database/ecoli_K12.fasta" residue_substitution_list="I -> L" source_files="/work/tpp-data/trial.xml" source_files_alt="/work/tpp-data/trial.xml" min_peptide_probability="0.20" min_peptide_weight="0.50" num_predicted_correct_prots="268.5" num_input_1_spectra="34" num_input_2_spectra="454" num_input_3_spectra="672" initial_min_peptide_prob="0.05" total_no_spectrum_ids="348.3" sample_enzyme="trypsin">
5
+ <program_details analysis="proteinprophet" time="2006-08-21T22:29:51" version="4.0(TPP v2.9 GALE rev.2, Build 200608211407)">
6
+ <proteinprophet_details occam_flag="Y" groups_flag="Y" degen_flag="Y" nsp_flag="Y" initial_peptide_wt_iters="2" nsp_distribution_iters="1" final_peptide_wt_iters="2" run_options="XML_INPUT">
7
+ <nsp_information neighboring_bin_smoothing="Y">
8
+ <nsp_distribution bin_no="0" nsp_lower_bound_incl="0.00" nsp_upper_bound_excl="0.10" pos_freq="0.623" neg_freq="0.623" pos_to_neg_ratio="1.00"/>
9
+ <nsp_distribution bin_no="1" nsp_lower_bound_incl="0.10" nsp_upper_bound_excl="0.25" pos_freq="0.198" neg_freq="0.184" pos_to_neg_ratio="1.07"/>
10
+ <nsp_distribution bin_no="2" nsp_lower_bound_incl="0.25" nsp_upper_bound_excl="0.50" pos_freq="0.075" neg_freq="0.074" pos_to_neg_ratio="1.02" alt_pos_to_neg_ratio="1.07"/>
11
+ <nsp_distribution bin_no="3" nsp_lower_bound_incl="0.50" nsp_upper_bound_excl="1.00" pos_freq="0.063" neg_freq="0.074" pos_to_neg_ratio="0.85" alt_pos_to_neg_ratio="1.07"/>
12
+ <nsp_distribution bin_no="4" nsp_lower_bound_incl="1.00" nsp_upper_bound_excl="2.00" pos_freq="0.023" neg_freq="0.026" pos_to_neg_ratio="0.87" alt_pos_to_neg_ratio="1.07"/>
13
+ <nsp_distribution bin_no="5" nsp_lower_bound_incl="2.00" nsp_upper_bound_excl="5.00" pos_freq="0.007" neg_freq="0.007" pos_to_neg_ratio="0.97" alt_pos_to_neg_ratio="1.07"/>
14
+ <nsp_distribution bin_no="6" nsp_lower_bound_incl="5.00" nsp_upper_bound_excl="15.00" pos_freq="0.006" neg_freq="0.006" pos_to_neg_ratio="1.00" alt_pos_to_neg_ratio="1.07"/>
15
+ <nsp_distribution bin_no="7" nsp_lower_bound_incl="15.00" nsp_upper_bound_excl="inf" pos_freq="0.006" neg_freq="0.006" pos_to_neg_ratio="1.00" alt_pos_to_neg_ratio="1.07"/>
16
+ </nsp_information> <protein_summary_data_filter min_probability="0.00" sensitivity="1.000" false_positive_error_rate="0.715" predicted_num_correct="268" predicted_num_incorrect="675" />
17
+ <protein_summary_data_filter min_probability="0.10" sensitivity="1.000" false_positive_error_rate="0.497" predicted_num_correct="268" predicted_num_incorrect="266" />
18
+ <protein_summary_data_filter min_probability="0.20" sensitivity="1.000" false_positive_error_rate="0.497" predicted_num_correct="268" predicted_num_incorrect="266" />
19
+ <protein_summary_data_filter min_probability="0.30" sensitivity="0.881" false_positive_error_rate="0.415" predicted_num_correct="237" predicted_num_incorrect="167" />
20
+ <protein_summary_data_filter min_probability="0.40" sensitivity="0.759" false_positive_error_rate="0.336" predicted_num_correct="204" predicted_num_incorrect="103" />
21
+ <protein_summary_data_filter min_probability="0.50" sensitivity="0.654" false_positive_error_rate="0.281" predicted_num_correct="176" predicted_num_incorrect="68" />
22
+ <protein_summary_data_filter min_probability="0.60" sensitivity="0.514" false_positive_error_rate="0.211" predicted_num_correct="138" predicted_num_incorrect="37" />
23
+ <protein_summary_data_filter min_probability="0.70" sensitivity="0.380" false_positive_error_rate="0.143" predicted_num_correct="102" predicted_num_incorrect="17" />
24
+ <protein_summary_data_filter min_probability="0.80" sensitivity="0.220" false_positive_error_rate="0.030" predicted_num_correct="59" predicted_num_incorrect="2" />
25
+ <protein_summary_data_filter min_probability="0.90" sensitivity="0.205" false_positive_error_rate="0.016" predicted_num_correct="55" predicted_num_incorrect="1" />
26
+ <protein_summary_data_filter min_probability="0.95" sensitivity="0.181" false_positive_error_rate="0.008" predicted_num_correct="49" predicted_num_incorrect="0" />
27
+ <protein_summary_data_filter min_probability="0.96" sensitivity="0.178" false_positive_error_rate="0.007" predicted_num_correct="48" predicted_num_incorrect="0" />
28
+ <protein_summary_data_filter min_probability="0.97" sensitivity="0.170" false_positive_error_rate="0.006" predicted_num_correct="46" predicted_num_incorrect="0" />
29
+ <protein_summary_data_filter min_probability="0.98" sensitivity="0.163" false_positive_error_rate="0.005" predicted_num_correct="44" predicted_num_incorrect="0" />
30
+ <protein_summary_data_filter min_probability="0.99" sensitivity="0.149" false_positive_error_rate="0.003" predicted_num_correct="40" predicted_num_incorrect="0" />
31
+ <protein_summary_data_filter min_probability="1.00" sensitivity="0.104" false_positive_error_rate="0.000" predicted_num_correct="28" predicted_num_incorrect="0" />
32
+ </proteinprophet_details>
33
+ </program_details>
34
+ </protein_summary_header>
35
+ <dataset_derivation generation_no="0">
36
+ </dataset_derivation>
37
+ <protein_group group_number="1" probability="1.00">
38
+ <protein protein_name="gi|16128297|ref|NP_414846.1|" n_indistinguishable_proteins="1" probability="1.00" percent_coverage="1.4" unique_stripped_peptides="RAVDILR" group_sibling_id="a" total_number_peptides="2" pct_spectrum_ids="0.56">
39
+ <annotation protein_description="NAD+-dependent betaine aldehyde dehydrogenase [Escherichia coli K12]"/>
40
+ <peptide peptide_sequence="RAVDILR" charge="2" initial_probability="1.00" nsp_adjusted_probability="1.00" weight="1.00" is_nondegenerate_evidence="Y" n_enzymatic_termini="2" n_sibling_peptides="0.00" n_sibling_peptides_bin="0" n_instances="2" is_contributing_evidence="Y" calc_neutral_pep_mass="841.9920">
41
+ </peptide>
42
+ </protein>
43
+ </protein_group>
44
+ <protein_group group_number="2" probability="1.00">
45
+ <protein protein_name="gi|16132019|ref|NP_418618.1|" n_indistinguishable_proteins="1" probability="1.00" percent_coverage="13.0" unique_stripped_peptides="FRDGLK+AIQFAQDVGIRVIQLAGYDVYYQEANNETRR" group_sibling_id="a" total_number_peptides="2" pct_spectrum_ids="0.41">
46
+ <annotation protein_description="putative hexulose-6-phosphate isomerase [Escherichia coli K12]"/>
47
+ <peptide peptide_sequence="FRDGLK" charge="1" initial_probability="1.00" nsp_adjusted_probability="1.00" weight="1.00" is_nondegenerate_evidence="Y" n_enzymatic_termini="2" n_sibling_peptides="0.42" n_sibling_peptides_bin="2" n_instances="1" is_contributing_evidence="Y" calc_neutral_pep_mass="734.8920">
48
+ </peptide>
49
+ <peptide peptide_sequence="AIQFAQDVGIRVIQLAGYDVYYQEANNETRR" charge="3" initial_probability="0.42" nsp_adjusted_probability="0.42" weight="1.00" is_nondegenerate_evidence="Y" n_enzymatic_termini="2" n_sibling_peptides="1.00" n_sibling_peptides_bin="3" n_instances="1" is_contributing_evidence="Y" calc_neutral_pep_mass="3601.9920">
50
+ </peptide>
51
+ </protein>
52
+ </protein_group>
53
+ <protein_group group_number="41" probability="0.98">
54
+ <protein protein_name="gi|16128237|ref|NP_414786.1|" n_indistinguishable_proteins="2" probability="0.98" percent_coverage="4.4" unique_stripped_peptides="GGLSGRNAKGGR" group_sibling_id="a" total_number_peptides="2" pct_spectrum_ids="0.46">
55
+ <annotation protein_description="orf, hypothetical protein [Escherichia coli K12]"/>
56
+ <indistinguishable_protein protein_name="gi|16130551|ref|NP_417122.1|">
57
+ <annotation protein_description="orf, hypothetical protein [Escherichia coli K12]"/> </indistinguishable_protein>
58
+ <peptide peptide_sequence="GGLSGRNAKGGR" charge="2" initial_probability="0.98" nsp_adjusted_probability="0.98" weight="1.00" is_nondegenerate_evidence="Y" n_enzymatic_termini="2" n_sibling_peptides="0.00" n_sibling_peptides_bin="0" n_instances="2" is_contributing_evidence="Y" calc_neutral_pep_mass="1129.1920">
59
+ </peptide>
60
+ </protein>
61
+ </protein_group>
62
+ </protein_summary>
@@ -0,0 +1,139 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <?xml-stylesheet type="text/xsl" href="/work/tpp-data/opd1_cat_inv-prot.xsl"?>
3
+ <protein_summary xmlns="http://regis-web.systemsbiology.net/protXML" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://regis-web.systemsbiology.net/protXML /tools/bin/TPP/tpp/bin/../schema/protXML_v4.xsd" summary_xml="/work/tpp-data/opd1_cat_inv-prot.xml">
4
+ <protein_summary_header reference_database="/project/marcotte/ms/database/ecoli_K12_ncbi_20060321_CAT_INV.fasta" residue_substitution_list="I -> L" source_files="/work/tpp-data/opd1_cat_inv.xml" source_files_alt="/work/tpp-data/opd1_cat_inv.xml" min_peptide_probability="0.20" min_peptide_weight="0.50" num_predicted_correct_prots="1329.6" num_input_1_spectra="12" num_input_2_spectra="646" num_input_3_spectra="6239" initial_min_peptide_prob="0.05" total_no_spectrum_ids="2039.5" sample_enzyme="trypsin">
5
+ <program_details analysis="proteinprophet" time="2006-08-30T15:35:12" version="4.0(TPP v2.9 GALE rev.2, Build 200608211407)">
6
+ <proteinprophet_details occam_flag="Y" groups_flag="Y" degen_flag="Y" nsp_flag="Y" initial_peptide_wt_iters="2" nsp_distribution_iters="1" final_peptide_wt_iters="1" run_options="XML_INPUT">
7
+ <nsp_information neighboring_bin_smoothing="Y">
8
+ <nsp_distribution bin_no="0" nsp_lower_bound_incl="0.00" nsp_upper_bound_excl="0.10" pos_freq="0.403" neg_freq="0.416" pos_to_neg_ratio="0.97"/>
9
+ <nsp_distribution bin_no="1" nsp_lower_bound_incl="0.10" nsp_upper_bound_excl="0.25" pos_freq="0.183" neg_freq="0.185" pos_to_neg_ratio="0.99" alt_pos_to_neg_ratio="0.97"/>
10
+ <nsp_distribution bin_no="2" nsp_lower_bound_incl="0.25" nsp_upper_bound_excl="0.50" pos_freq="0.128" neg_freq="0.132" pos_to_neg_ratio="0.97" alt_pos_to_neg_ratio="0.99"/>
11
+ <nsp_distribution bin_no="3" nsp_lower_bound_incl="0.50" nsp_upper_bound_excl="1.00" pos_freq="0.148" neg_freq="0.147" pos_to_neg_ratio="1.00"/>
12
+ <nsp_distribution bin_no="4" nsp_lower_bound_incl="1.00" nsp_upper_bound_excl="2.00" pos_freq="0.096" neg_freq="0.090" pos_to_neg_ratio="1.06"/>
13
+ <nsp_distribution bin_no="5" nsp_lower_bound_incl="2.00" nsp_upper_bound_excl="5.00" pos_freq="0.035" neg_freq="0.026" pos_to_neg_ratio="1.37"/>
14
+ <nsp_distribution bin_no="6" nsp_lower_bound_incl="5.00" nsp_upper_bound_excl="15.00" pos_freq="0.006" neg_freq="0.003" pos_to_neg_ratio="1.64"/>
15
+ <nsp_distribution bin_no="7" nsp_lower_bound_incl="15.00" nsp_upper_bound_excl="inf" pos_freq="0.001" neg_freq="0.001" pos_to_neg_ratio="1.00" alt_pos_to_neg_ratio="1.64"/>
16
+ </nsp_information> <protein_summary_data_filter min_probability="0.00" sensitivity="1.000" false_positive_error_rate="0.660" predicted_num_correct="1330" predicted_num_incorrect="2586" />
17
+ <protein_summary_data_filter min_probability="0.10" sensitivity="1.000" false_positive_error_rate="0.428" predicted_num_correct="1330" predicted_num_incorrect="995" />
18
+ <protein_summary_data_filter min_probability="0.20" sensitivity="1.000" false_positive_error_rate="0.428" predicted_num_correct="1330" predicted_num_incorrect="995" />
19
+ <protein_summary_data_filter min_probability="0.30" sensitivity="0.922" false_positive_error_rate="0.355" predicted_num_correct="1226" predicted_num_incorrect="675" />
20
+ <protein_summary_data_filter min_probability="0.40" sensitivity="0.831" false_positive_error_rate="0.287" predicted_num_correct="1105" predicted_num_incorrect="444" />
21
+ <protein_summary_data_filter min_probability="0.50" sensitivity="0.744" false_positive_error_rate="0.233" predicted_num_correct="990" predicted_num_incorrect="300" />
22
+ <protein_summary_data_filter min_probability="0.60" sensitivity="0.637" false_positive_error_rate="0.176" predicted_num_correct="847" predicted_num_incorrect="180" />
23
+ <protein_summary_data_filter min_probability="0.70" sensitivity="0.517" false_positive_error_rate="0.120" predicted_num_correct="688" predicted_num_incorrect="93" />
24
+ <protein_summary_data_filter min_probability="0.80" sensitivity="0.386" false_positive_error_rate="0.060" predicted_num_correct="513" predicted_num_incorrect="33" />
25
+ <protein_summary_data_filter min_probability="0.90" sensitivity="0.282" false_positive_error_rate="0.017" predicted_num_correct="375" predicted_num_incorrect="6" />
26
+ <protein_summary_data_filter min_probability="0.95" sensitivity="0.247" false_positive_error_rate="0.007" predicted_num_correct="329" predicted_num_incorrect="2" />
27
+ <protein_summary_data_filter min_probability="0.96" sensitivity="0.239" false_positive_error_rate="0.006" predicted_num_correct="318" predicted_num_incorrect="2" />
28
+ <protein_summary_data_filter min_probability="0.97" sensitivity="0.231" false_positive_error_rate="0.005" predicted_num_correct="307" predicted_num_incorrect="1" />
29
+ <protein_summary_data_filter min_probability="0.98" sensitivity="0.223" false_positive_error_rate="0.004" predicted_num_correct="296" predicted_num_incorrect="1" />
30
+ <protein_summary_data_filter min_probability="0.99" sensitivity="0.209" false_positive_error_rate="0.003" predicted_num_correct="278" predicted_num_incorrect="1" />
31
+ <protein_summary_data_filter min_probability="1.00" sensitivity="0.156" false_positive_error_rate="0.000" predicted_num_correct="208" predicted_num_incorrect="0" />
32
+ </proteinprophet_details>
33
+ </program_details>
34
+ </protein_summary_header>
35
+ <dataset_derivation generation_no="0">
36
+ </dataset_derivation>
37
+ <protein_group group_number="1" probability="1.00">
38
+ <protein protein_name="INV_gi|16131365|ref|NP_417950.1|" n_indistinguishable_proteins="1" probability="1.00" percent_coverage="10.0" unique_stripped_peptides="QDLSLKDYSEVDTTLMGK+TRTIEYGTANMNVVFGAPAVGILVLMVLGIGK" group_sibling_id="a" total_number_peptides="2" pct_spectrum_ids="0.08">
39
+ <annotation protein_description="low-affinity phosphate transport [Escherichia coli K12]"/>
40
+ <peptide peptide_sequence="TRTIEYGTANMNVVFGAPAVGILVLMVLGIGK" charge="3" initial_probability="1.00" nsp_adjusted_probability="1.00" weight="1.00" is_nondegenerate_evidence="Y" n_enzymatic_termini="2" n_sibling_peptides="0.69" n_sibling_peptides_bin="3" n_instances="1" is_contributing_evidence="Y" calc_neutral_pep_mass="3305.96206">
41
+ </peptide>
42
+ <peptide peptide_sequence="QDLSLKDYSEVDTTLMGK" charge="3" initial_probability="0.69" nsp_adjusted_probability="0.70" weight="1.00" is_nondegenerate_evidence="Y" n_enzymatic_termini="2" n_sibling_peptides="1.00" n_sibling_peptides_bin="3" n_instances="1" is_contributing_evidence="Y" calc_neutral_pep_mass="2043.26206">
43
+ </peptide>
44
+ </protein>
45
+ </protein_group>
46
+ <protein_group group_number="2" probability="1.00">
47
+ <protein protein_name="INV_gi|16130377|ref|NP_416947.1|" n_indistinguishable_proteins="1" probability="1.00" percent_coverage="7.4" unique_stripped_peptides="TLLLVMPYAGLLVCSISGIVEIARMVEGPK" group_sibling_id="a" total_number_peptides="1" pct_spectrum_ids="0.05">
48
+ <annotation protein_description="ethanolamine utilization; homolog of Salmonella putative transport protein [Escherichia coli K12]"/>
49
+ <peptide peptide_sequence="TLLLVMPYAGLLVCSISGIVEIARMVEGPK" charge="3" initial_probability="1.00" nsp_adjusted_probability="1.00" weight="1.00" is_nondegenerate_evidence="Y" n_enzymatic_termini="2" n_sibling_peptides="0.00" n_sibling_peptides_bin="0" n_instances="1" is_contributing_evidence="Y" calc_neutral_pep_mass="3173.91206">
50
+ </peptide>
51
+ </protein>
52
+ </protein_group>
53
+ <protein_group group_number="3" probability="1.00">
54
+ <protein protein_name="gi|16131182|ref|NP_417762.1|" n_indistinguishable_proteins="1" probability="1.00" percent_coverage="37.1" unique_stripped_peptides="VFMQPASEGTGIIAGGAMR+AVLEVAGVHNVLAK+AYGSTNPINVVR+NMINVALNNGTLQHPVK" group_sibling_id="a" total_number_peptides="7" pct_spectrum_ids="0.29">
55
+ <annotation protein_description="30S ribosomal protein S5 [Escherichia coli K12]"/>
56
+ <peptide peptide_sequence="AVLEVAGVHNVLAK" charge="2" initial_probability="0.97" nsp_adjusted_probability="0.98" weight="1.00" is_nondegenerate_evidence="Y" n_enzymatic_termini="2" n_sibling_peptides="2.63" n_sibling_peptides_bin="5" n_instances="2" is_contributing_evidence="Y" calc_neutral_pep_mass="1419.67206">
57
+ </peptide>
58
+ <peptide peptide_sequence="VFMQPASEGTGIIAGGAMR" charge="2" initial_probability="0.95" nsp_adjusted_probability="0.97" weight="1.00" is_nondegenerate_evidence="Y" n_enzymatic_termini="2" n_sibling_peptides="2.65" n_sibling_peptides_bin="5" n_instances="2" is_contributing_evidence="Y" calc_neutral_pep_mass="1893.20206">
59
+ </peptide>
60
+ <peptide peptide_sequence="AYGSTNPINVVR" charge="2" initial_probability="0.88" nsp_adjusted_probability="0.91" weight="1.00" is_nondegenerate_evidence="Y" n_enzymatic_termini="2" n_sibling_peptides="2.72" n_sibling_peptides_bin="5" n_instances="1" is_contributing_evidence="Y" calc_neutral_pep_mass="1290.43206">
61
+ </peptide>
62
+ <peptide peptide_sequence="NMINVALNNGTLQHPVK" charge="2" initial_probability="0.80" nsp_adjusted_probability="0.84" weight="1.00" is_nondegenerate_evidence="Y" n_enzymatic_termini="2" n_sibling_peptides="2.81" n_sibling_peptides_bin="5" n_instances="2" is_contributing_evidence="Y" calc_neutral_pep_mass="1863.15206">
63
+ </peptide>
64
+ </protein>
65
+ </protein_group>
66
+ <protein_group group_number="4" probability="1.00">
67
+ <protein protein_name="gi|16131115|ref|NP_417692.1|" n_indistinguishable_proteins="1" probability="1.00" percent_coverage="4.0" unique_stripped_peptides="VIDLLIKTGVFR" group_sibling_id="a" total_number_peptides="1" pct_spectrum_ids="0.05">
68
+ <annotation protein_description="N-acetylneuraminate lyase [Escherichia coli K12]"/>
69
+ <peptide peptide_sequence="VIDLLIKTGVFR" charge="3" initial_probability="1.00" nsp_adjusted_probability="1.00" weight="1.00" is_nondegenerate_evidence="Y" n_enzymatic_termini="2" n_sibling_peptides="0.00" n_sibling_peptides_bin="0" n_instances="1" is_contributing_evidence="Y" calc_neutral_pep_mass="1373.69206">
70
+ </peptide>
71
+ </protein>
72
+ </protein_group>
73
+ <protein_group group_number="281" probability="0.98">
74
+ <protein protein_name="gi|90111270|ref|NP_415941.4|" n_indistinguishable_proteins="1" probability="0.98" percent_coverage="6.5" unique_stripped_peptides="AVKAGDEFAFQYR+IQFDWYPTSDSTDPVDMRMYLR" group_sibling_id="a" total_number_peptides="1" pct_spectrum_ids="0.05">
75
+ <annotation protein_description="putative enzyme [Escherichia coli K12]"/>
76
+ <peptide peptide_sequence="IQFDWYPTSDSTDPVDMRMYLR" charge="3" initial_probability="0.98" nsp_adjusted_probability="0.98" weight="1.00" is_nondegenerate_evidence="Y" n_enzymatic_termini="2" n_sibling_peptides="0.00" n_sibling_peptides_bin="0" n_instances="1" is_contributing_evidence="Y" calc_neutral_pep_mass="2737.04206">
77
+ </peptide>
78
+ <peptide peptide_sequence="AVKAGDEFAFQYR" charge="3" initial_probability="0.08" nsp_adjusted_probability="0.08" weight="1.00" is_nondegenerate_evidence="Y" n_enzymatic_termini="2" n_sibling_peptides="0.98" n_sibling_peptides_bin="3" n_instances="1" is_contributing_evidence="N" calc_neutral_pep_mass="1501.65206">
79
+ </peptide>
80
+ </protein>
81
+ </protein_group>
82
+ <protein_group group_number="302" probability="0.97">
83
+ <protein protein_name="gi|16128870|ref|NP_415423.1|" n_indistinguishable_proteins="1" probability="0.97" percent_coverage="10.5" unique_stripped_peptides="GDVLNYDEVMER+IIGDYRRVALYGIDYLMK+YSYEASLMALHDRDVIRTMACGIAGLSVAADSLSAIK+SGVLTGLPDAYGR" group_sibling_id="a" total_number_peptides="3" pct_spectrum_ids="0.10">
84
+ <annotation protein_description="formate acetyltransferase 1 [Escherichia coli K12]"/>
85
+ <peptide peptide_sequence="GDVLNYDEVMER" charge="2" initial_probability="0.79" nsp_adjusted_probability="0.80" weight="1.00" is_nondegenerate_evidence="Y" n_enzymatic_termini="2" n_sibling_peptides="1.17" n_sibling_peptides_bin="4" n_instances="1" is_contributing_evidence="Y" calc_neutral_pep_mass="1439.55206">
86
+ </peptide>
87
+ <peptide peptide_sequence="SGVLTGLPDAYGR" charge="2" initial_probability="0.75" nsp_adjusted_probability="0.76" weight="1.00" is_nondegenerate_evidence="Y" n_enzymatic_termini="2" n_sibling_peptides="1.21" n_sibling_peptides_bin="4" n_instances="1" is_contributing_evidence="Y" calc_neutral_pep_mass="1305.44206">
88
+ </peptide>
89
+ <peptide peptide_sequence="YSYEASLMALHDRDVIRTMACGIAGLSVAADSLSAIK" charge="3" initial_probability="0.42" nsp_adjusted_probability="0.42" weight="1.00" is_nondegenerate_evidence="Y" n_enzymatic_termini="2" n_sibling_peptides="1.54" n_sibling_peptides_bin="4" n_instances="1" is_contributing_evidence="Y" calc_neutral_pep_mass="3900.48206">
90
+ </peptide>
91
+ <peptide peptide_sequence="IIGDYRRVALYGIDYLMK" charge="3" initial_probability="0.05" nsp_adjusted_probability="0.05" weight="1.00" is_nondegenerate_evidence="Y" n_enzymatic_termini="2" n_sibling_peptides="1.96" n_sibling_peptides_bin="4" n_instances="1" is_contributing_evidence="N" calc_neutral_pep_mass="2159.56206">
92
+ </peptide>
93
+ </protein>
94
+ </protein_group>
95
+ <protein_group group_number="303" probability="0.97">
96
+ <protein protein_name="gi|90111652|ref|NP_418264.4|" n_indistinguishable_proteins="1" probability="0.97" percent_coverage="9.7" unique_stripped_peptides="HETISEDELRQRLSR" group_sibling_id="a" total_number_peptides="1" pct_spectrum_ids="0.05">
97
+ <annotation protein_description="hypothetical protein b3820 [Escherichia coli K12]"/>
98
+ <peptide peptide_sequence="HETISEDELRQRLSR" charge="3" initial_probability="0.97" nsp_adjusted_probability="0.97" weight="1.00" is_nondegenerate_evidence="Y" n_enzymatic_termini="2" n_sibling_peptides="0.00" n_sibling_peptides_bin="0" n_instances="1" is_contributing_evidence="Y" calc_neutral_pep_mass="1869.01206">
99
+ </peptide>
100
+ </protein>
101
+ </protein_group>
102
+ <protein_group group_number="304" probability="0.97">
103
+ <protein protein_name="gi|16128170|ref|NP_414719.1|" n_indistinguishable_proteins="1" probability="0.97" percent_coverage="11.6" unique_stripped_peptides="VNLTGKVTIPGSDNEYYK+REMRQMEGAWLGSDLVDQGK+DGDTLLVQVKERPTIASITFSGNK+EMRQMEGAWLGSDLVDQGK+SDDAVGGNAMAVASLEFITPTPFISDK+DAVLRREMR" group_sibling_id="a" total_number_peptides="4" pct_spectrum_ids="0.12">
104
+ <annotation protein_description="putative outer membrane antigen [Escherichia coli K12]"/>
105
+ <peptide peptide_sequence="DGDTLLVQVKERPTIASITFSGNK" charge="3" initial_probability="0.79" nsp_adjusted_probability="0.80" weight="1.00" is_nondegenerate_evidence="Y" n_enzymatic_termini="2" n_sibling_peptides="1.42" n_sibling_peptides_bin="4" n_instances="1" is_contributing_evidence="Y" calc_neutral_pep_mass="2589.90206">
106
+ </peptide>
107
+ <peptide peptide_sequence="SDDAVGGNAMAVASLEFITPTPFISDK" charge="3" initial_probability="0.67" nsp_adjusted_probability="0.68" weight="1.00" is_nondegenerate_evidence="Y" n_enzymatic_termini="2" n_sibling_peptides="1.54" n_sibling_peptides_bin="4" n_instances="1" is_contributing_evidence="Y" calc_neutral_pep_mass="2754.04206">
108
+ </peptide>
109
+ <peptide peptide_sequence="REMRQMEGAWLGSDLVDQGK" charge="3" initial_probability="0.42" nsp_adjusted_probability="0.42" weight="1.00" is_nondegenerate_evidence="Y" n_enzymatic_termini="2" n_sibling_peptides="1.79" n_sibling_peptides_bin="4" n_instances="1" is_contributing_evidence="Y" calc_neutral_pep_mass="2306.59206">
110
+ </peptide>
111
+ <peptide peptide_sequence="DAVLRREMR" charge="3" initial_probability="0.23" nsp_adjusted_probability="0.23" weight="1.00" is_nondegenerate_evidence="Y" n_enzymatic_termini="2" n_sibling_peptides="1.98" n_sibling_peptides_bin="4" n_instances="1" is_contributing_evidence="Y" calc_neutral_pep_mass="1145.34206">
112
+ </peptide>
113
+ <peptide peptide_sequence="EMRQMEGAWLGSDLVDQGK" charge="3" initial_probability="0.10" nsp_adjusted_probability="0.10" weight="1.00" is_nondegenerate_evidence="Y" n_enzymatic_termini="2" n_sibling_peptides="2.11" n_sibling_peptides_bin="5" n_instances="2" is_contributing_evidence="N" calc_neutral_pep_mass="2150.40206">
114
+ </peptide>
115
+ <peptide peptide_sequence="VNLTGKVTIPGSDNEYYK" charge="3" initial_probability="0.09" nsp_adjusted_probability="0.09" weight="1.00" is_nondegenerate_evidence="Y" n_enzymatic_termini="2" n_sibling_peptides="2.21" n_sibling_peptides_bin="5" n_instances="1" is_contributing_evidence="N" calc_neutral_pep_mass="1998.20206">
116
+ </peptide>
117
+ </protein>
118
+ </protein_group>
119
+ <protein_group group_number="305" probability="0.97">
120
+ <protein protein_name="gi|16128077|ref|NP_414626.1|" n_indistinguishable_proteins="1" probability="0.97" percent_coverage="3.1" unique_stripped_peptides="TVVHMMESVALPGGGGVK" group_sibling_id="a" total_number_peptides="1" pct_spectrum_ids="0.05">
121
+ <annotation protein_description="division-specific transpeptidase, penicillin-binding protein 3 [Escherichia coli K12]"/>
122
+ <peptide peptide_sequence="TVVHMMESVALPGGGGVK" charge="3" initial_probability="0.97" nsp_adjusted_probability="0.97" weight="1.00" is_nondegenerate_evidence="Y" n_enzymatic_termini="2" n_sibling_peptides="0.00" n_sibling_peptides_bin="0" n_instances="1" is_contributing_evidence="Y" calc_neutral_pep_mass="1769.10206">
123
+ </peptide>
124
+ </protein>
125
+ </protein_group>
126
+ <protein_group group_number="306" probability="0.97">
127
+ <protein protein_name="INV_gi|90111252|ref|NP_415854.4|" n_indistinguishable_proteins="1" probability="0.97" percent_coverage="14.4" unique_stripped_peptides="AMMLTADESGAPAEVR+TGEEAPQFILKIVGHLGSEFQK+ENHHGAALQTGFVVYSAQGQHQQVRAMMLTADESGAPAEVR" group_sibling_id="a" total_number_peptides="2" pct_spectrum_ids="0.07">
128
+ <annotation protein_description="aminobenzoyl-glutamate utilization protein [Escherichia coli K12]"/>
129
+ <peptide peptide_sequence="AMMLTADESGAPAEVR" charge="3" initial_probability="0.95" nsp_adjusted_probability="0.95" peptide_group_designator="a" weight="1.00" is_nondegenerate_evidence="Y" n_enzymatic_termini="2" n_sibling_peptides="0.41" n_sibling_peptides_bin="2" n_instances="1" is_contributing_evidence="Y" calc_neutral_pep_mass="1648.86206">
130
+ </peptide>
131
+ <peptide peptide_sequence="TGEEAPQFILKIVGHLGSEFQK" charge="3" initial_probability="0.28" nsp_adjusted_probability="0.28" weight="1.00" is_nondegenerate_evidence="Y" n_enzymatic_termini="2" n_sibling_peptides="1.08" n_sibling_peptides_bin="4" n_instances="1" is_contributing_evidence="Y" calc_neutral_pep_mass="2428.75206">
132
+ </peptide>
133
+ <peptide peptide_sequence="AMMLTADESGAPAEVR" charge="2" initial_probability="0.13" nsp_adjusted_probability="0.13" peptide_group_designator="a" weight="1.00" is_nondegenerate_evidence="Y" n_enzymatic_termini="2" n_sibling_peptides="1.23" n_sibling_peptides_bin="4" n_instances="1" is_contributing_evidence="N" calc_neutral_pep_mass="1648.86206">
134
+ </peptide>
135
+ <peptide peptide_sequence="ENHHGAALQTGFVVYSAQGQHQQVRAMMLTADESGAPAEVR" charge="3" initial_probability="0.07" nsp_adjusted_probability="0.07" weight="1.00" is_nondegenerate_evidence="Y" n_enzymatic_termini="2" n_sibling_peptides="1.36" n_sibling_peptides_bin="4" n_instances="1" is_contributing_evidence="N" calc_neutral_pep_mass="4393.81206">
136
+ </peptide>
137
+ </protein>
138
+ </protein_group>
139
+ </protein_summary>
@@ -0,0 +1,77 @@
1
+ [SEQUEST]
2
+ first_database_name = C:\Xcalibur\database\ecoli_K12.fasta
3
+ second_database_name =
4
+ peptide_mass_tolerance = 1.5000
5
+ ion_series = 0 1 1 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0
6
+ fragment_ion_tolerance = 0.0000 ; leave at 0.0 unless you have real poor data
7
+ num_output_lines = 10 ; # peptide results to show
8
+ num_results = 500 ; # results to store
9
+ num_description_lines = 5 ; # full protein descriptions to show for top N peptides
10
+ show_fragment_ions = 1 ; 0=no, 1=yes
11
+ print_duplicate_references = 40 ; 0=no, 1=yes
12
+ enzyme_number = 1
13
+ max_num_differential_AA_per_mod = 4 ; max # of modified AA per diff. mod in a peptide
14
+ diff_search_options = 0.0000 S 0.0000 C 0.0000 M 0.0000 X 0.0000 T 0.0000 Y
15
+ term_diff_search_options = 0.0000 0.0000
16
+ nucleotide_reading_frame = 0 ; 0=protein db, 1-6, 7 = forward three, 8-reverse three, 9=all six
17
+ mass_type_parent = 0 ; 0=average masses, 1=monoisotopic masses
18
+ mass_type_fragment = 0 ; 0=average masses, 1=monoisotopic masses
19
+ normalize_xcorr = 0 ; use normalized xcorr values in the out file
20
+ remove_precursor_peak = 0 ; 0=no, 1=yes
21
+ ion_cutoff_percentage = 0.0000 ; prelim. score cutoff % as a decimal number i.e. 0.30 for 30%
22
+ max_num_internal_cleavage_sites = 2 ; maximum value is 5
23
+ protein_mass_filter = 0 0 ; enter protein mass min & max value ( 0 for both = unused)
24
+ match_peak_count = 0 ; number of auto-detected peaks to try matching (max 5)
25
+ match_peak_allowed_error = 1 ; number of allowed errors in matching auto-detected peaks
26
+ match_peak_tolerance = 1.0000 ; mass tolerance for matching auto-detected peaks
27
+ create_output_files = 1 ; 0=no, 1=yes
28
+ partial_sequence =
29
+ sequence_header_filter =
30
+
31
+ add_Cterm_peptide = 0.0000 ; added to each peptide C-terminus
32
+ add_Cterm_protein = 0.0000 ; added to each protein C-terminus
33
+ add_Nterm_peptide = 0.0000 ; added to each peptide N-terminus
34
+ add_Nterm_protein = 0.0000 ; added to each protein N-terminus
35
+ add_G_Glycine = 0.0000 ; added to G - avg. 57.0519, mono. 57.02146
36
+ add_A_Alanine = 0.0000 ; added to A - avg. 71.0788, mono. 71.03711
37
+ add_S_Serine = 0.0000 ; added to S - avg. 87.0782, mono. 87.02303
38
+ add_P_Proline = 0.0000 ; added to P - avg. 97.1167, mono. 97.05276
39
+ add_V_Valine = 0.0000 ; added to V - avg. 99.1326, mono. 99.06841
40
+ add_T_Threonine = 0.0000 ; added to T - avg. 101.1051, mono. 101.04768
41
+ add_C_Cysteine = 0.0000 ; added to C - avg. 103.1388, mono. 103.00919
42
+ add_L_Leucine = 0.0000 ; added to L - avg. 113.1594, mono. 113.08406
43
+ add_I_Isoleucine = 0.0000 ; added to I - avg. 113.1594, mono. 113.08406
44
+ add_X_LorI = 0.0000 ; added to X - avg. 113.1594, mono. 113.08406
45
+ add_N_Asparagine = 0.0000 ; added to N - avg. 114.1038, mono. 114.04293
46
+ add_O_Ornithine = 0.0000 ; added to O - avg. 114.1472, mono 114.07931
47
+ add_B_avg_NandD = 0.0000 ; added to B - avg. 114.5962, mono. 114.53494
48
+ add_D_Aspartic_Acid = 0.0000 ; added to D - avg. 115.0886, mono. 115.02694
49
+ add_Q_Glutamine = 0.0000 ; added to Q - avg. 128.1307, mono. 128.05858
50
+ add_K_Lysine = 0.0000 ; added to K - avg. 128.1741, mono. 128.09496
51
+ add_Z_avg_QandE = 0.0000 ; added to Z - avg. 128.6231, mono. 128.55059
52
+ add_E_Glutamic_Acid = 0.0000 ; added to E - avg. 129.1155, mono. 129.04259
53
+ add_M_Methionine = 0.0000 ; added to M - avg. 131.1926, mono. 131.04049
54
+ add_H_Histidine = 0.0000 ; added to H - avg. 137.1411, mono. 137.05891
55
+ add_F_Phenylalanine = 0.0000 ; added to F - avg. 147.1766, mono. 147.06841
56
+ add_R_Arginine = 0.0000 ; added to R - avg. 156.1875, mono. 156.10111
57
+ add_Y_Tyrosine = 0.0000 ; added to Y - avg. 163.1760, mono. 163.06333
58
+ add_W_Tryptophan = 0.0000 ; added to W - avg. 186.2132, mono. 186.07931
59
+
60
+ [SEQUEST_ENZYME_INFO]
61
+ 0. No_Enzyme 0 - -
62
+ 1. Trypsin 1 KR -
63
+ 2. Trypsin(KRLNH) 1 KRLNH -
64
+ 3. Chymotrypsin 1 FWYL -
65
+ 4. Chymotrypsin(FWY) 1 FWY P
66
+ 5. Clostripain 1 R -
67
+ 6. Cyanogen_Bromide 1 M -
68
+ 7. IodosoBenzoate 1 W -
69
+ 8. Proline_Endopept 1 P -
70
+ 9. Staph_Protease 1 E -
71
+ 10. Trypsin_K 1 K P
72
+ 11. Trypsin_R 1 R P
73
+ 12. GluC 1 ED -
74
+ 13. LysC 1 K -
75
+ 14. AspN 0 D -
76
+ 15. Elastase 1 ALIV P
77
+ 16. Elastase/Tryp/Chymo 1 ALIVKRWFY P
@@ -0,0 +1,62 @@
1
+ [SEQUEST]
2
+ first_database_name = C:\Xcalibur\database\ecoli_K12.fasta
3
+ second_database_name =
4
+ peptide_mass_tolerance = 1.5000
5
+ peptide_mass_units = 0 ; 0=amu, 1=mmu, 2=ppm
6
+ ion_series = 0 1 1 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0
7
+ fragment_ion_tolerance = 1.0000 ; leave at 0.0 unless you have real poor data
8
+ num_output_lines = 10 ; # peptide results to show
9
+ num_results = 500 ; # results to store
10
+ num_description_lines = 5 ; # full protein descriptions to show for top N peptides
11
+ show_fragment_ions = 1 ; 0=no, 1=yes
12
+ print_duplicate_references = 40 ; 0=no, 1=yes
13
+ enzyme_info = Trypsin(KR) 1 1 KR -
14
+ max_num_differential_per_peptide = 3 ; max # of diff. mod in a peptide
15
+ diff_search_options = 0.000000 S 0.000000 C 0.000000 M 0.000000 X 0.000000 T 0.000000 Y
16
+ term_diff_search_options = 0.000000 0.000000
17
+ nucleotide_reading_frame = 0 ; 0=protein db, 1-6, 7 = forward three, 8-reverse three, 9=all six
18
+ mass_type_parent = 0 ; 0=average masses, 1=monoisotopic masses
19
+ mass_type_fragment = 0 ; 0=average masses, 1=monoisotopic masses
20
+ normalize_xcorr = 0 ; use normalized xcorr values in the out file
21
+ remove_precursor_peak = 0 ; 0=no, 1=yes
22
+ ion_cutoff_percentage = 0.0000 ; prelim. score cutoff % as a decimal number i.e. 0.30 for 30%
23
+ max_num_internal_cleavage_sites = 2 ; maximum value is 5
24
+ protein_mass_filter = 0 0 ; enter protein mass min & max value ( 0 for both = unused)
25
+ match_peak_count = 0 ; number of auto-detected peaks to try matching (max 5)
26
+ match_peak_allowed_error = 1 ; number of allowed errors in matching auto-detected peaks
27
+ match_peak_tolerance = 1.0000 ; mass tolerance for matching auto-detected peaks
28
+ partial_sequence =
29
+ sequence_header_filter =
30
+ digest_mass_range = 600.0 3500.0
31
+
32
+ add_Cterm_peptide = 0.0000 ; added to each peptide C-terminus
33
+ add_Cterm_protein = 0.0000 ; added to each protein C-terminus
34
+ add_Nterm_peptide = 0.0000 ; added to each peptide N-terminus
35
+ add_Nterm_protein = 0.0000 ; added to each protein N-terminus
36
+ add_G_Glycine = 0.0000 ; added to G - avg. 57.0519, mono. 57.02146
37
+ add_A_Alanine = 0.0000 ; added to A - avg. 71.0788, mono. 71.03711
38
+ add_S_Serine = 0.0000 ; added to S - avg. 87.0782, mono. 87.02303
39
+ add_P_Proline = 0.0000 ; added to P - avg. 97.1167, mono. 97.05276
40
+ add_V_Valine = 0.0000 ; added to V - avg. 99.1326, mono. 99.06841
41
+ add_T_Threonine = 0.0000 ; added to T - avg. 101.1051, mono. 101.04768
42
+ add_C_Cysteine = 0.0000 ; added to C - avg. 103.1388, mono. 103.00919
43
+ add_L_Leucine = 0.0000 ; added to L - avg. 113.1594, mono. 113.08406
44
+ add_I_Isoleucine = 0.0000 ; added to I - avg. 113.1594, mono. 113.08406
45
+ add_X_LorI = 0.0000 ; added to X - avg. 113.1594, mono. 113.08406
46
+ add_N_Asparagine = 0.0000 ; added to N - avg. 114.1038, mono. 114.04293
47
+ add_O_Ornithine = 0.0000 ; added to O - avg. 114.1472, mono 114.07931
48
+ add_B_avg_NandD = 0.0000 ; added to B - avg. 114.5962, mono. 114.53494
49
+ add_D_Aspartic_Acid = 0.0000 ; added to D - avg. 115.0886, mono. 115.02694
50
+ add_Q_Glutamine = 0.0000 ; added to Q - avg. 128.1307, mono. 128.05858
51
+ add_K_Lysine = 0.0000 ; added to K - avg. 128.1741, mono. 128.09496
52
+ add_Z_avg_QandE = 0.0000 ; added to Z - avg. 128.6231, mono. 128.55059
53
+ add_E_Glutamic_Acid = 0.0000 ; added to E - avg. 129.1155, mono. 129.04259
54
+ add_M_Methionine = 0.0000 ; added to M - avg. 131.1926, mono. 131.04049
55
+ add_H_Histidine = 0.0000 ; added to H - avg. 137.1411, mono. 137.05891
56
+ add_F_Phenylalanine = 0.0000 ; added to F - avg. 147.1766, mono. 147.06841
57
+ add_R_Arginine = 0.0000 ; added to R - avg. 156.1875, mono. 156.10111
58
+ add_Y_Tyrosine = 0.0000 ; added to Y - avg. 163.1760, mono. 163.06333
59
+ add_W_Tryptophan = 0.0000 ; added to W - avg. 186.2132, mono. 186.07931
60
+ add_J_user_amino_acid = 0.0000 ; added to J
61
+ add_U_user_amino_acid = 0.0000 ; added to U
62
+