mspire 0.5.0 → 0.6.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (107) hide show
  1. data/README.rdoc +24 -0
  2. data/Rakefile +51 -0
  3. data/VERSION +1 -0
  4. data/lib/cv/description.rb +18 -0
  5. data/lib/cv/param.rb +33 -0
  6. data/lib/cv.rb +3 -0
  7. data/lib/io/bookmark.rb +13 -0
  8. data/lib/merge.rb +7 -0
  9. data/lib/ms/cvlist.rb +76 -0
  10. data/lib/ms/digester.rb +245 -0
  11. data/lib/ms/fasta.rb +86 -0
  12. data/lib/ms/ident/peptide/db.rb +243 -0
  13. data/lib/ms/ident/peptide.rb +72 -0
  14. data/lib/ms/ident/peptide_hit/qvalue.rb +56 -0
  15. data/lib/ms/ident/peptide_hit.rb +26 -0
  16. data/lib/ms/ident/pepxml/modifications.rb +83 -0
  17. data/lib/ms/ident/pepxml/msms_pipeline_analysis.rb +70 -0
  18. data/lib/ms/ident/pepxml/msms_run_summary.rb +82 -0
  19. data/lib/ms/ident/pepxml/parameters.rb +14 -0
  20. data/lib/ms/ident/pepxml/sample_enzyme.rb +165 -0
  21. data/lib/ms/ident/pepxml/search_database.rb +49 -0
  22. data/lib/ms/ident/pepxml/search_hit/modification_info.rb +79 -0
  23. data/lib/ms/ident/pepxml/search_hit.rb +144 -0
  24. data/lib/ms/ident/pepxml/search_result.rb +35 -0
  25. data/lib/ms/ident/pepxml/search_summary.rb +92 -0
  26. data/lib/ms/ident/pepxml/spectrum_query.rb +85 -0
  27. data/lib/ms/ident/pepxml.rb +112 -0
  28. data/lib/ms/ident/protein.rb +33 -0
  29. data/lib/ms/ident/protein_group.rb +80 -0
  30. data/lib/ms/ident/search.rb +114 -0
  31. data/lib/ms/ident.rb +37 -0
  32. data/lib/ms/isotope/aa.rb +59 -0
  33. data/lib/ms/mascot.rb +6 -0
  34. data/lib/ms/mass/aa.rb +79 -0
  35. data/lib/ms/mass.rb +55 -0
  36. data/lib/ms/mzml/index_list.rb +98 -0
  37. data/lib/ms/mzml/plms1.rb +34 -0
  38. data/lib/ms/mzml.rb +197 -0
  39. data/lib/ms/obo.rb +38 -0
  40. data/lib/ms/plms1.rb +156 -0
  41. data/lib/ms/quant/qspec/protein_group_comparison.rb +22 -0
  42. data/lib/ms/quant/qspec.rb +112 -0
  43. data/lib/ms/spectrum.rb +154 -8
  44. data/lib/ms.rb +3 -10
  45. data/lib/msplat.rb +2 -0
  46. data/lib/obo/ims.rb +5 -0
  47. data/lib/obo/ms.rb +7 -0
  48. data/lib/obo/ontology.rb +41 -0
  49. data/lib/obo/unit.rb +5 -0
  50. data/lib/openany.rb +23 -0
  51. data/lib/write_file_or_string.rb +18 -0
  52. data/obo/ims.obo +562 -0
  53. data/obo/ms.obo +11677 -0
  54. data/obo/unit.obo +2563 -0
  55. data/spec/ms/cvlist_spec.rb +60 -0
  56. data/spec/ms/digester_spec.rb +351 -0
  57. data/spec/ms/fasta_spec.rb +100 -0
  58. data/spec/ms/ident/peptide/db_spec.rb +108 -0
  59. data/spec/ms/ident/pepxml/sample_enzyme_spec.rb +181 -0
  60. data/spec/ms/ident/pepxml/search_hit/modification_info_spec.rb +37 -0
  61. data/spec/ms/ident/pepxml_spec.rb +442 -0
  62. data/spec/ms/ident/protein_group_spec.rb +68 -0
  63. data/spec/ms/mass_spec.rb +8 -0
  64. data/spec/ms/mzml/index_list_spec.rb +122 -0
  65. data/spec/ms/mzml/plms1_spec.rb +62 -0
  66. data/spec/ms/mzml_spec.rb +50 -0
  67. data/spec/ms/plms1_spec.rb +38 -0
  68. data/spec/ms/quant/qspec_spec.rb +25 -0
  69. data/spec/msplat_spec.rb +24 -0
  70. data/spec/obo_spec.rb +25 -0
  71. data/spec/spec_helper.rb +25 -0
  72. data/spec/testfiles/ms/ident/peptide/db/uni_11_sp_tr.fasta +69 -0
  73. data/spec/testfiles/ms/ident/peptide/db/uni_11_sp_tr.msd_clvg2.min_aaseq4.yml +728 -0
  74. data/spec/testfiles/ms/mzml/j24z.idx_comp.3.mzML +271 -0
  75. data/spec/testfiles/ms/mzml/openms.noidx_nocomp.12.mzML +330 -0
  76. data/spec/testfiles/ms/quant/kill_extra_tabs.rb +13 -0
  77. data/spec/testfiles/ms/quant/max_quant_output.provenance.txt +15 -0
  78. data/spec/testfiles/ms/quant/max_quant_output.txt +199 -0
  79. data/spec/testfiles/ms/quant/pdcd5_final.killedextratabs.tsv +199 -0
  80. data/spec/testfiles/ms/quant/pdcd5_final.killedextratabs.tsv_qspecgp +199 -0
  81. data/spec/testfiles/ms/quant/pdcd5_final.killedextratabs.tsv_qspecgp.csv +199 -0
  82. data/spec/testfiles/ms/quant/pdcd5_final.txt +199 -0
  83. data/spec/testfiles/ms/quant/pdcd5_final.txt_qspecgp +0 -0
  84. data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.CSV.csv +199 -0
  85. data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.csv +199 -0
  86. data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.csv +199 -0
  87. data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.tsv +199 -0
  88. data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.tsv_qspecgp +199 -0
  89. data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.tsv_qspecgp.csv +199 -0
  90. data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.txt +199 -0
  91. data/spec/testfiles/ms/quant/pdcd5_lfq_tabdel.txt +134 -0
  92. data/spec/testfiles/ms/quant/pdcd5_lfq_tabdel.txt_qspecgp +134 -0
  93. data/spec/testfiles/ms/quant/remove_rest_of_proteins.rb +13 -0
  94. data/spec/testfiles/ms/quant/unlog_transform.rb +13 -0
  95. data/spec/testfiles/plms1/output.key +0 -0
  96. metadata +157 -40
  97. data/README +0 -77
  98. data/changelog.txt +0 -196
  99. data/lib/ms/calc.rb +0 -32
  100. data/lib/ms/data/interleaved.rb +0 -60
  101. data/lib/ms/data/lazy_io.rb +0 -73
  102. data/lib/ms/data/lazy_string.rb +0 -15
  103. data/lib/ms/data/simple.rb +0 -59
  104. data/lib/ms/data/transposed.rb +0 -41
  105. data/lib/ms/data.rb +0 -57
  106. data/lib/ms/format/format_error.rb +0 -12
  107. data/lib/ms/support/binary_search.rb +0 -126
metadata CHANGED
@@ -1,88 +1,205 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mspire
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0
4
+ prerelease:
5
+ version: 0.6.1
5
6
  platform: ruby
6
7
  authors:
7
- - John Prince
8
+ - John T. Prince
8
9
  - Simon Chiang
9
10
  autorequire:
10
11
  bindir: bin
11
12
  cert_chain: []
12
13
 
13
- date: 2008-11-20 00:00:00 -07:00
14
- default_executable:
14
+ date: 2012-01-25 00:00:00 Z
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
17
- name: tap
17
+ name: nokogiri
18
+ prerelease: false
19
+ requirement: &id001 !ruby/object:Gem::Requirement
20
+ none: false
21
+ requirements:
22
+ - - ~>
23
+ - !ruby/object:Gem::Version
24
+ version: "1.5"
25
+ type: :runtime
26
+ version_requirements: *id001
27
+ - !ruby/object:Gem::Dependency
28
+ name: rspec
29
+ prerelease: false
30
+ requirement: &id002 !ruby/object:Gem::Requirement
31
+ none: false
32
+ requirements:
33
+ - - ~>
34
+ - !ruby/object:Gem::Version
35
+ version: "2.6"
18
36
  type: :development
19
- version_requirement:
20
- version_requirements: !ruby/object:Gem::Requirement
37
+ version_requirements: *id002
38
+ - !ruby/object:Gem::Dependency
39
+ name: jeweler
40
+ prerelease: false
41
+ requirement: &id003 !ruby/object:Gem::Requirement
42
+ none: false
21
43
  requirements:
22
- - - ">="
44
+ - - ~>
23
45
  - !ruby/object:Gem::Version
24
- version: 0.11.2
25
- version:
46
+ version: 1.5.2
47
+ type: :development
48
+ version_requirements: *id003
26
49
  - !ruby/object:Gem::Dependency
27
- name: minitest
50
+ name: rcov
51
+ prerelease: false
52
+ requirement: &id004 !ruby/object:Gem::Requirement
53
+ none: false
54
+ requirements:
55
+ - - ">="
56
+ - !ruby/object:Gem::Version
57
+ version: "0"
28
58
  type: :development
29
- version_requirement:
30
- version_requirements: !ruby/object:Gem::Requirement
59
+ version_requirements: *id004
60
+ - !ruby/object:Gem::Dependency
61
+ name: obo
62
+ prerelease: false
63
+ requirement: &id005 !ruby/object:Gem::Requirement
64
+ none: false
31
65
  requirements:
32
- - - "="
66
+ - - ">="
33
67
  - !ruby/object:Gem::Version
34
- version: 1.3.0
35
- version:
36
- description: A library for working with mass spectrometry proteomics data.
37
- email: jprince@icmb.utexas.edu
68
+ version: 0.1.0
69
+ type: :development
70
+ version_requirements: *id005
71
+ description: mass spectrometry proteomics, lipidomics, and tools, a rewrite of mspire, merging of ms-* gems
72
+ email: jtprince@gmail.com
38
73
  executables: []
39
74
 
40
75
  extensions: []
41
76
 
42
77
  extra_rdoc_files:
43
- - changelog.txt
44
78
  - LICENSE
45
- - README
79
+ - README.rdoc
46
80
  files:
81
+ - LICENSE
82
+ - README.rdoc
83
+ - Rakefile
84
+ - VERSION
85
+ - lib/cv.rb
86
+ - lib/cv/description.rb
87
+ - lib/cv/param.rb
88
+ - lib/io/bookmark.rb
89
+ - lib/merge.rb
47
90
  - lib/ms.rb
48
- - lib/ms/calc.rb
49
- - lib/ms/data.rb
50
- - lib/ms/data/interleaved.rb
51
- - lib/ms/data/transposed.rb
52
- - lib/ms/data/simple.rb
53
- - lib/ms/data/lazy_io.rb
54
- - lib/ms/data/lazy_string.rb
55
- - lib/ms/format/format_error.rb
56
- - lib/ms/support/binary_search.rb
91
+ - lib/ms/cvlist.rb
92
+ - lib/ms/digester.rb
93
+ - lib/ms/fasta.rb
94
+ - lib/ms/ident.rb
95
+ - lib/ms/ident/peptide.rb
96
+ - lib/ms/ident/peptide/db.rb
97
+ - lib/ms/ident/peptide_hit.rb
98
+ - lib/ms/ident/peptide_hit/qvalue.rb
99
+ - lib/ms/ident/pepxml.rb
100
+ - lib/ms/ident/pepxml/modifications.rb
101
+ - lib/ms/ident/pepxml/msms_pipeline_analysis.rb
102
+ - lib/ms/ident/pepxml/msms_run_summary.rb
103
+ - lib/ms/ident/pepxml/parameters.rb
104
+ - lib/ms/ident/pepxml/sample_enzyme.rb
105
+ - lib/ms/ident/pepxml/search_database.rb
106
+ - lib/ms/ident/pepxml/search_hit.rb
107
+ - lib/ms/ident/pepxml/search_hit/modification_info.rb
108
+ - lib/ms/ident/pepxml/search_result.rb
109
+ - lib/ms/ident/pepxml/search_summary.rb
110
+ - lib/ms/ident/pepxml/spectrum_query.rb
111
+ - lib/ms/ident/protein.rb
112
+ - lib/ms/ident/protein_group.rb
113
+ - lib/ms/ident/search.rb
114
+ - lib/ms/isotope/aa.rb
115
+ - lib/ms/mascot.rb
116
+ - lib/ms/mass.rb
117
+ - lib/ms/mass/aa.rb
118
+ - lib/ms/mzml.rb
119
+ - lib/ms/mzml/index_list.rb
120
+ - lib/ms/mzml/plms1.rb
121
+ - lib/ms/obo.rb
122
+ - lib/ms/plms1.rb
123
+ - lib/ms/quant/qspec.rb
124
+ - lib/ms/quant/qspec/protein_group_comparison.rb
57
125
  - lib/ms/spectrum.rb
58
- - changelog.txt
59
- - LICENSE
60
- - README
61
- has_rdoc: true
62
- homepage: http://mspire.rubyforge.org
126
+ - lib/msplat.rb
127
+ - lib/obo/ims.rb
128
+ - lib/obo/ms.rb
129
+ - lib/obo/ontology.rb
130
+ - lib/obo/unit.rb
131
+ - lib/openany.rb
132
+ - lib/write_file_or_string.rb
133
+ - obo/ims.obo
134
+ - obo/ms.obo
135
+ - obo/unit.obo
136
+ - spec/ms/cvlist_spec.rb
137
+ - spec/ms/digester_spec.rb
138
+ - spec/ms/fasta_spec.rb
139
+ - spec/ms/ident/peptide/db_spec.rb
140
+ - spec/ms/ident/pepxml/sample_enzyme_spec.rb
141
+ - spec/ms/ident/pepxml/search_hit/modification_info_spec.rb
142
+ - spec/ms/ident/pepxml_spec.rb
143
+ - spec/ms/ident/protein_group_spec.rb
144
+ - spec/ms/mass_spec.rb
145
+ - spec/ms/mzml/index_list_spec.rb
146
+ - spec/ms/mzml/plms1_spec.rb
147
+ - spec/ms/mzml_spec.rb
148
+ - spec/ms/plms1_spec.rb
149
+ - spec/ms/quant/qspec_spec.rb
150
+ - spec/msplat_spec.rb
151
+ - spec/obo_spec.rb
152
+ - spec/spec_helper.rb
153
+ - spec/testfiles/ms/ident/peptide/db/uni_11_sp_tr.fasta
154
+ - spec/testfiles/ms/ident/peptide/db/uni_11_sp_tr.msd_clvg2.min_aaseq4.yml
155
+ - spec/testfiles/ms/mzml/j24z.idx_comp.3.mzML
156
+ - spec/testfiles/ms/mzml/openms.noidx_nocomp.12.mzML
157
+ - spec/testfiles/ms/quant/kill_extra_tabs.rb
158
+ - spec/testfiles/ms/quant/max_quant_output.provenance.txt
159
+ - spec/testfiles/ms/quant/max_quant_output.txt
160
+ - spec/testfiles/ms/quant/pdcd5_final.killedextratabs.tsv
161
+ - spec/testfiles/ms/quant/pdcd5_final.killedextratabs.tsv_qspecgp
162
+ - spec/testfiles/ms/quant/pdcd5_final.killedextratabs.tsv_qspecgp.csv
163
+ - spec/testfiles/ms/quant/pdcd5_final.txt
164
+ - spec/testfiles/ms/quant/pdcd5_final.txt_qspecgp
165
+ - spec/testfiles/ms/quant/pdcd5_lfq_qspec.CSV.csv
166
+ - spec/testfiles/ms/quant/pdcd5_lfq_qspec.csv
167
+ - spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.csv
168
+ - spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.tsv
169
+ - spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.tsv_qspecgp
170
+ - spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.tsv_qspecgp.csv
171
+ - spec/testfiles/ms/quant/pdcd5_lfq_qspec.txt
172
+ - spec/testfiles/ms/quant/pdcd5_lfq_tabdel.txt
173
+ - spec/testfiles/ms/quant/pdcd5_lfq_tabdel.txt_qspecgp
174
+ - spec/testfiles/ms/quant/remove_rest_of_proteins.rb
175
+ - spec/testfiles/ms/quant/unlog_transform.rb
176
+ - spec/testfiles/plms1/output.key
177
+ homepage: http://github.com/princelab/mspire
178
+ licenses:
179
+ - MIT
63
180
  post_install_message:
64
181
  rdoc_options: []
65
182
 
66
183
  require_paths:
67
184
  - lib
68
185
  required_ruby_version: !ruby/object:Gem::Requirement
186
+ none: false
69
187
  requirements:
70
188
  - - ">="
71
189
  - !ruby/object:Gem::Version
72
190
  version: "0"
73
- version:
74
191
  required_rubygems_version: !ruby/object:Gem::Requirement
192
+ none: false
75
193
  requirements:
76
194
  - - ">="
77
195
  - !ruby/object:Gem::Version
78
196
  version: "0"
79
- version:
80
197
  requirements: []
81
198
 
82
- rubyforge_project: mspire
83
- rubygems_version: 1.3.0
199
+ rubyforge_project:
200
+ rubygems_version: 1.8.10
84
201
  signing_key:
85
- specification_version: 2
86
- summary: Mass Spectrometry Proteomics Objects, Scripts, and Executables
202
+ specification_version: 3
203
+ summary: mass spectrometry proteomics, lipidomics, and tools
87
204
  test_files: []
88
205
 
data/README DELETED
@@ -1,77 +0,0 @@
1
- = {Mspire}[http://mspire.rubyforge.org]
2
-
3
- A library for working with mass spectrometry proteomics data.
4
-
5
- <em> Mspire is going through a re-write as of version 0.5.0 to support a new
6
- development model. Many modules are absent but will gradually be added back.
7
- Use the 0.4 releases as necessary. </em>
8
-
9
- == Description
10
-
11
- mspire - 'Mass Spectrometry Proteomics in Ruby' is a collection of tools for
12
- working with MS proteomics data in ruby. It seeks to provide support for open
13
- standards (e.g., parsers for mzData, mzXML, Peptide/Protein Prophet and the
14
- TPP) and contribute other useful functionality for working with mass
15
- spectrometry data in ruby.
16
-
17
- * Lighthouse[http://bahuvrihi.lighthouseapp.com/projects/16692-mspire/tickets]
18
- * Github[http://github.com/bahuvrihi/mspire/tree/master]
19
- * {Google Group}[http://groups.google.com/group/mspire-forum]
20
-
21
- --
22
- === Current Focus
23
-
24
- The project is currently focusing on the following:
25
-
26
- * SEQUEST data (particularly the output of Bioworks 3.2-3.3.1)
27
- * mzXML
28
- * mzData
29
- * ProteinProphet
30
- * Preparation of files for [obiwarp](http://obi-warp.sourceforge.net/)
31
-
32
- === Features
33
-
34
- * mzXML (version 1, 2, and 3) parsing
35
- * mzData parsing
36
- * bioworks .srf (binary files) reader
37
- * read/write .sqt files
38
- * bioworks to PeptideProphet input (pepXML files)
39
- * lightweight APEX values parser
40
- * histogram protein probabilities
41
- * protein summary views with custom false ID cutoff values
42
- * conversion to OBI-Warp input files
43
- * portable: works across platforms
44
-
45
- Validation by:
46
- * Various Decoy Database search options: Reverse/Shuffle, concatenated/separate, with various hashing options (e.g., by amino acid sequence + charge)
47
- * Amino acid (e.g., search for unblocked cysteines)
48
- * Transmembrane prediction (Phobius or TopPred)
49
- * Generic sample bias (e.g., low abundance/high abundance proteins)
50
- * Defined sample
51
-
52
- === Spectra and Spectra Identification
53
-
54
- The [MS](ms/index.html) namespace contains objects for working with mass spectra and associated file formats.
55
-
56
- The [SpecID](spec_id/index.html) namespace contains objects for working with spectral identifications.
57
-
58
- === Tutorials
59
-
60
- * [Database Searching Tutorial](tutorial/database_searching/index.html) -
61
- Demonstrates two methods for running and analysing Bioworks output to obtain
62
- false positive rates using mspire executables.
63
- ++
64
-
65
- == Installation
66
-
67
- Mspire is available as a gem on RubyForge[http://rubyforge.org/projects/mspire]. Use:
68
-
69
- % gem install mspire
70
-
71
- = Warning
72
-
73
- This is an experimental package. As such, all versions prior to version 1.0
74
- may contain interface changes on minor revisions (major.minor.build) (e.g.,
75
- 0.4.0 may contain interface change from 0.3.9). Beyond version 1.0, the
76
- versioning scheme will be strictly adhered to (no interface changes except on
77
- major revisions).
data/changelog.txt DELETED
@@ -1,196 +0,0 @@
1
-
2
- == version 0.1.7
3
-
4
- 1. A couple of scripts and subroutines were hashing peptides but not on the file
5
- basename. This would result in slightly incorrect results (any time there
6
- were overlapping scan numbers in multiple datasets, only the top one would be
7
- chosen). The results would be correct for single runs.
8
-
9
- Output files that could be affected:
10
- *.top_per_scan.txt
11
- *.all_peps_per_scan.txt
12
-
13
- Scripts that could be affected:
14
- script/top_hit_per_scan.rb
15
- bin/filter_spec_id.rb
16
- script/filter-peps.rb
17
- bin/id_precision.rb
18
-
19
- Subroutines that were affected:
20
- spec_id.rb (pep_probs_by_* )
21
- spec_id.rb (top_peps_prefilter!)
22
- proph.rb uniq_by_seqcharge
23
- align.rb called uniq_by_seqcharge
24
-
25
-
26
- 2. false_positive_rate.rb and protein_summary.rb (by extension) were using
27
- number of true positives on the x axis while in reality I was plotting the
28
- number of hits. I've updated x axis labels to reflect this change. In
29
- addition, since the term 'false positive rate' has such a distinct definition
30
- in classical ROC plots and binary statistics, I've decided to work primarily
31
- in terms of precision (TP/(TP+FP)). I've purged the terms 'False Positive
32
- Rate' and 'FPR' from the package. It's been suggested that FP/(TP+FP) be
33
- called the False Positive Predictive Rate (FPPR). I will probably implement
34
- this in a future release.
35
-
36
- == version 0.2.0
37
-
38
- Revamped the way SpecID works (it is now mixed-in).
39
- Added support for modifications to bioworks_to_pepxml.rb
40
- Can read .srf files (nearly interchangeable with bioworks files)
41
- Redid filter.rb
42
-
43
- == version 0.2.1
44
-
45
- minor bugfix
46
-
47
- == version 0.2.2
48
-
49
- made compatible with Bioworks fasta file reverser and updated tutorial.
50
- Killed classify_by_prefix routine in favor of classify_by_false_flag which has
51
- a prefix option
52
-
53
- == version 0.2.3
54
-
55
- in protein_summary.rb added handling for proteins with no annotation. (either
56
- dispaly NA or use gi2annnot to grab them from NCBI)
57
-
58
- == version 0.2.5
59
-
60
- renamed prep_list in roc (potential breaks in code)
61
-
62
- == version 0.2.6
63
-
64
- 1. Massive refactorization of filtering and validation. Validation objects are
65
- created and then can be used to validate just about anything.
66
- 2. Massive redo of the parsing of MS runs. Can parse mzXML v1, v2.X
67
- (including readw broken output), and mzData (even Thermo's broken output).
68
- 4. Moved all tests to specs (rspec).
69
- 5. Can read gradient programs off of .meth or .RAW files (both Xcal 1.X and
70
- 2.X)
71
-
72
- Bugfixes:
73
- 1. The search_summary 'base_name' in pepxml output was incorrect (this did not
74
- appear to influence our analyses, however). Fixed.
75
- 2. Enzymes with no exceptions (e.g., cuts at KR) would report one too many
76
- missed cleavages if the last amino acid was a cut point. Fixed.
77
-
78
- == version 0.2.7
79
-
80
- 1. In conversion from bioworks to pepxml, the default was trypsin (KR/P).
81
- Now, the sample enzyme is set explicitly from the params file and the option
82
- is not available. This can give more accuract pepxml files than from
83
- previous depending on your enzyme.
84
-
85
- == version 0.2.9
86
-
87
- 1. Added support for phobius transmembrane predictions
88
- 2. have filter_and_validate.rb working well (multiple validators allowed).
89
- 3. Can read bioworks 3.3.1 .srf files (.srf version 3.5 files)
90
- 4. Added a bias validator
91
-
92
- == version 0.2.10
93
-
94
- 1. Fixed --hits_separate flag in spec_id/filter
95
-
96
- == version 0.2.11
97
-
98
- 1. Added prob precision support and reorganized filter_and_validate libs
99
-
100
- == version 0.2.12
101
-
102
- 1. Fixed bug in transmem for prob and others.
103
- 2. Can use axml (XMLParser based) or libxml depending on availability
104
-
105
- == version 0.2.13
106
-
107
- 1. Fixed issue with --hits_separate
108
- 2. filter_and_validate.rb requires decoy validator if decoy proteins
109
- (refactored code)
110
-
111
- == version 0.2.14
112
-
113
- 1. Can read PeptideProphet files (should be able to read pepxml files, too)
114
- 2. API change: Some slight modifications to the Sequest::PepXML object
115
- interfaces and implementations (using ArrayClass)
116
-
117
- == version 0.2.15
118
-
119
- 1. can convert srf files to sqt files
120
-
121
- == version 0.3.0
122
-
123
- 1. IMPORTANT BUG FIX: protein reporting in srf files is correct now (proteins after the first protein were being assigned to the last hit in an out file).
124
- 2. SQT export is correct and works at least on 3.2 and 3.3.1.
125
-
126
- == version 0.3.1
127
-
128
- 1. Bug fix in srf filtering (num_hits adjusted)
129
-
130
- == version 0.3.2
131
-
132
- 1. Uses sequest peptide_mass_tolerance filter on srf group files by default
133
- now.
134
-
135
- == version 0.3.3
136
-
137
- 1. Worked out minor kinks in prob_precision.rb
138
-
139
- == version 0.3.4
140
-
141
- 1. filters >= +3 charged ions now.
142
-
143
- == version 0.3.5
144
-
145
- 1. fixed creation of background distribution in validators (hash_by base_name,
146
- first_scan, charge now)
147
-
148
- == version 0.3.6
149
-
150
- 1. split off bad_aa_est from bad_aa
151
-
152
- == version 0.3.7
153
-
154
- 1. can deal with No_Enzyme searches now (while still capable of setting
155
- sample_enzyme)
156
-
157
- == version 0.3.8
158
-
159
- 1. can set a decoy to target ratio for decoy validation
160
- 2. added mass calculator in Mass::Calculator
161
-
162
- == version 0.3.9
163
-
164
- 1. doesn't clobber mzdata filename in ms_to_lmat.rb conversion
165
-
166
- == version 0.3.10
167
-
168
- 1. added run_percolator.rb script which makes running multiple files easy
169
-
170
- == version 0.3.11
171
-
172
- 1. faster sensing of bad scan tags in mzXML v. 2.0 files
173
- 2. implemented lazy evaluation of spectrum in 2 different ways allowing much
174
- larger files to be parsed
175
-
176
- == version 0.4.0
177
-
178
- 1. ** INTERFACE CHANGE: each scan can only have one precursor (used to be an array)
179
- 2. ** INTERFACE CHANGE: spectrum mz and intensity data accessed with mzs and intensities
180
- 3. lazy eval working on mzData
181
- 4. mzData not necessarily guaranteed to have precursor intensities on lazy
182
- eval methos (however, the method intensity_at_mz will still work (causing
183
- evaluation))
184
-
185
- == version 0.4.1
186
-
187
- 1. added support for reading mzXML version 3.0 (may fail in some cases)
188
-
189
- == version 0.4.2
190
-
191
- 1. added MS::MSRun.open method
192
- 2. added method to write dta files from SRF
193
-
194
- == version 0.4.3
195
-
196
- 1. added to_mfg_file from SRF
data/lib/ms/calc.rb DELETED
@@ -1,32 +0,0 @@
1
- module Ms
2
- module Calc
3
- module_function
4
-
5
- #
6
- # ppm calculations... maybe use RUnit
7
- #
8
-
9
- def ppm_tol_at(mz, ppm)
10
- 1.0 * mz * ppm / 10**6
11
- end
12
-
13
- def ppm_span_at(mz, ppm)
14
- tol = ppm_tol_at(mz, ppm)
15
- [mz-tol, mz+tol]
16
- end
17
-
18
- def ppm_range_at(mz, ppm)
19
- mz = mz.to_f
20
- tol = ppm_tol_at(mz, ppm)
21
- mz-tol...mz+tol
22
- end
23
-
24
-
25
- # Rounds n to the specified precision (ie number of decimal places)
26
- # def round(n, precision)
27
- # factor = 10**precision.to_i
28
- # (n * factor).round.to_f / factor
29
- # end
30
-
31
- end
32
- end
@@ -1,60 +0,0 @@
1
- require 'ms/data/simple'
2
-
3
- module Ms
4
- module Data
5
- module_function
6
-
7
- # Initializes a new interleaved data array.
8
- def new_interleaved(unresolved_data, n=2)
9
- Interleaved.new(unresolved_data, n=2)
10
- end
11
-
12
- # An Interleaved data array lazily evaluates it's unresolved data as
13
- # an interleaved array of n members. The unresolved data is evaluated
14
- # into an array using to_a.
15
- #
16
- # i = Ms::Data::Interleaved.new([1,4,2,5,3,6])
17
- # i.unresolved_data # => [1,4,2,5,3,6]
18
- # i.data # => []
19
- # i[0] # => [1,2,3]
20
- # i[1] # => [4,5,6]
21
- # i.data # => [[1,2,3], [4,5,6]]
22
- #
23
- class Interleaved < Simple
24
- attr_reader :n
25
-
26
- def initialize(unresolved_data, n=2)
27
- @n = 2
28
- super(unresolved_data)
29
- end
30
-
31
- def [](index)
32
- resolve.data[index]
33
- end
34
-
35
- def resolved?
36
- !@data.empty?
37
- end
38
-
39
- def resolve
40
- return(self) if resolved?
41
-
42
- unresolved_data = @unresolved_data.to_a
43
-
44
- unless unresolved_data.length % n == 0
45
- raise ArgumentError, "interleaved data must have a number of elements evenly divisible by n (#{n})"
46
- end
47
-
48
- n.times { @data << [] }
49
- map = @data * (unresolved_data.length/n)
50
-
51
- unresolved_data.each_with_index do |item, i|
52
- map[i] << item
53
- end
54
-
55
- self
56
- end
57
-
58
- end
59
- end
60
- end
@@ -1,73 +0,0 @@
1
- module Ms
2
- module Data
3
-
4
- # LazyIO represents data to be lazily read from an IO. To read the data
5
- # from the IO, either string or to_a may be called (to_a unpacks the
6
- # string into an array using the decode_format and unpack_format).
7
- #
8
- # LazyIO is a suitable unresolved_data source for Ms::Data formats.
9
- class LazyIO
10
- NETWORK_FLOAT = 'g*'
11
- NETWORK_DOUBLE = 'G*'
12
- LITTLE_ENDIAN_FLOAT = 'e*'
13
- LITTLE_ENDIAN_DOUBLE = 'E*'
14
- BASE_64 = 'm'
15
-
16
- class << self
17
- # Returns the unpacking code for the given precision (32 or 64-bit)
18
- # and network order (true for big-endian).
19
- def unpack_code(precision, network_order)
20
- case precision
21
- when 32 then network_order ? NETWORK_FLOAT : LITTLE_ENDIAN_FLOAT
22
- when 64 then network_order ? NETWORK_DOUBLE : LITTLE_ENDIAN_DOUBLE
23
- else raise ArgumentError, "unknown precision (should be 32 or 64): #{precision}"
24
- end
25
- end
26
- end
27
-
28
- # The IO from which string is read
29
- attr_reader :io
30
-
31
- # The start index for reading string
32
- attr_reader :start_index
33
-
34
- # The number of bytes to be read from io when evaluating string
35
- attr_reader :num_bytes
36
-
37
- # Indicates the unpacking format
38
- attr_reader :unpack_format
39
-
40
- # Indicates a decoding format, may be false to unpack string
41
- # without decoding.
42
- attr_reader :decode_format
43
-
44
- def initialize(io, start_index=io.pos, num_bytes=nil, unpack_format=NETWORK_FLOAT, decode_format=BASE_64)
45
- @io = io
46
- @start_index = start_index
47
- @num_bytes = num_bytes
48
- @unpack_format = unpack_format
49
- @decode_format = decode_format
50
- end
51
-
52
- # Positions io at start_index and reads a string of num_bytes length.
53
- # The string is newly read from io each time string is called.
54
- def string
55
- io.pos = start_index unless io.pos == start_index
56
- io.read(num_bytes)
57
- end
58
-
59
- # Resets the cached array (returned by to_a) so that the array will
60
- # be re-read from io.
61
- def reset
62
- @array = nil
63
- end
64
-
65
- # Reads string and unpacks using decode_format and unpack_code. The
66
- # array is cached internally; to re-read the array, use reset.
67
- def to_a
68
- @array ||= (decode_format ? string.unpack(decode_format)[0] : string).unpack(unpack_format)
69
- end
70
-
71
- end
72
- end
73
- end
@@ -1,15 +0,0 @@
1
- require 'ms/data/lazy_io'
2
- require 'stringio'
3
-
4
- module Ms
5
- module Data
6
-
7
- # LazyString is a LazyIO initialized from a string, which is converted into
8
- # a StringIO.
9
- class LazyString < LazyIO
10
- def initialize(string, unpack_format=NETWORK_FLOAT, decode_format=BASE_64)
11
- super(StringIO.new(string), 0, string.length, unpack_format, decode_format)
12
- end
13
- end
14
- end
15
- end