mspire 0.5.0 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. data/README.rdoc +24 -0
  2. data/Rakefile +51 -0
  3. data/VERSION +1 -0
  4. data/lib/cv/description.rb +18 -0
  5. data/lib/cv/param.rb +33 -0
  6. data/lib/cv.rb +3 -0
  7. data/lib/io/bookmark.rb +13 -0
  8. data/lib/merge.rb +7 -0
  9. data/lib/ms/cvlist.rb +76 -0
  10. data/lib/ms/digester.rb +245 -0
  11. data/lib/ms/fasta.rb +86 -0
  12. data/lib/ms/ident/peptide/db.rb +243 -0
  13. data/lib/ms/ident/peptide.rb +72 -0
  14. data/lib/ms/ident/peptide_hit/qvalue.rb +56 -0
  15. data/lib/ms/ident/peptide_hit.rb +26 -0
  16. data/lib/ms/ident/pepxml/modifications.rb +83 -0
  17. data/lib/ms/ident/pepxml/msms_pipeline_analysis.rb +70 -0
  18. data/lib/ms/ident/pepxml/msms_run_summary.rb +82 -0
  19. data/lib/ms/ident/pepxml/parameters.rb +14 -0
  20. data/lib/ms/ident/pepxml/sample_enzyme.rb +165 -0
  21. data/lib/ms/ident/pepxml/search_database.rb +49 -0
  22. data/lib/ms/ident/pepxml/search_hit/modification_info.rb +79 -0
  23. data/lib/ms/ident/pepxml/search_hit.rb +144 -0
  24. data/lib/ms/ident/pepxml/search_result.rb +35 -0
  25. data/lib/ms/ident/pepxml/search_summary.rb +92 -0
  26. data/lib/ms/ident/pepxml/spectrum_query.rb +85 -0
  27. data/lib/ms/ident/pepxml.rb +112 -0
  28. data/lib/ms/ident/protein.rb +33 -0
  29. data/lib/ms/ident/protein_group.rb +80 -0
  30. data/lib/ms/ident/search.rb +114 -0
  31. data/lib/ms/ident.rb +37 -0
  32. data/lib/ms/isotope/aa.rb +59 -0
  33. data/lib/ms/mascot.rb +6 -0
  34. data/lib/ms/mass/aa.rb +79 -0
  35. data/lib/ms/mass.rb +55 -0
  36. data/lib/ms/mzml/index_list.rb +98 -0
  37. data/lib/ms/mzml/plms1.rb +34 -0
  38. data/lib/ms/mzml.rb +197 -0
  39. data/lib/ms/obo.rb +38 -0
  40. data/lib/ms/plms1.rb +156 -0
  41. data/lib/ms/quant/qspec/protein_group_comparison.rb +22 -0
  42. data/lib/ms/quant/qspec.rb +112 -0
  43. data/lib/ms/spectrum.rb +154 -8
  44. data/lib/ms.rb +3 -10
  45. data/lib/msplat.rb +2 -0
  46. data/lib/obo/ims.rb +5 -0
  47. data/lib/obo/ms.rb +7 -0
  48. data/lib/obo/ontology.rb +41 -0
  49. data/lib/obo/unit.rb +5 -0
  50. data/lib/openany.rb +23 -0
  51. data/lib/write_file_or_string.rb +18 -0
  52. data/obo/ims.obo +562 -0
  53. data/obo/ms.obo +11677 -0
  54. data/obo/unit.obo +2563 -0
  55. data/spec/ms/cvlist_spec.rb +60 -0
  56. data/spec/ms/digester_spec.rb +351 -0
  57. data/spec/ms/fasta_spec.rb +100 -0
  58. data/spec/ms/ident/peptide/db_spec.rb +108 -0
  59. data/spec/ms/ident/pepxml/sample_enzyme_spec.rb +181 -0
  60. data/spec/ms/ident/pepxml/search_hit/modification_info_spec.rb +37 -0
  61. data/spec/ms/ident/pepxml_spec.rb +442 -0
  62. data/spec/ms/ident/protein_group_spec.rb +68 -0
  63. data/spec/ms/mass_spec.rb +8 -0
  64. data/spec/ms/mzml/index_list_spec.rb +122 -0
  65. data/spec/ms/mzml/plms1_spec.rb +62 -0
  66. data/spec/ms/mzml_spec.rb +50 -0
  67. data/spec/ms/plms1_spec.rb +38 -0
  68. data/spec/ms/quant/qspec_spec.rb +25 -0
  69. data/spec/msplat_spec.rb +24 -0
  70. data/spec/obo_spec.rb +25 -0
  71. data/spec/spec_helper.rb +25 -0
  72. data/spec/testfiles/ms/ident/peptide/db/uni_11_sp_tr.fasta +69 -0
  73. data/spec/testfiles/ms/ident/peptide/db/uni_11_sp_tr.msd_clvg2.min_aaseq4.yml +728 -0
  74. data/spec/testfiles/ms/mzml/j24z.idx_comp.3.mzML +271 -0
  75. data/spec/testfiles/ms/mzml/openms.noidx_nocomp.12.mzML +330 -0
  76. data/spec/testfiles/ms/quant/kill_extra_tabs.rb +13 -0
  77. data/spec/testfiles/ms/quant/max_quant_output.provenance.txt +15 -0
  78. data/spec/testfiles/ms/quant/max_quant_output.txt +199 -0
  79. data/spec/testfiles/ms/quant/pdcd5_final.killedextratabs.tsv +199 -0
  80. data/spec/testfiles/ms/quant/pdcd5_final.killedextratabs.tsv_qspecgp +199 -0
  81. data/spec/testfiles/ms/quant/pdcd5_final.killedextratabs.tsv_qspecgp.csv +199 -0
  82. data/spec/testfiles/ms/quant/pdcd5_final.txt +199 -0
  83. data/spec/testfiles/ms/quant/pdcd5_final.txt_qspecgp +0 -0
  84. data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.CSV.csv +199 -0
  85. data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.csv +199 -0
  86. data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.csv +199 -0
  87. data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.tsv +199 -0
  88. data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.tsv_qspecgp +199 -0
  89. data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.tsv_qspecgp.csv +199 -0
  90. data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.txt +199 -0
  91. data/spec/testfiles/ms/quant/pdcd5_lfq_tabdel.txt +134 -0
  92. data/spec/testfiles/ms/quant/pdcd5_lfq_tabdel.txt_qspecgp +134 -0
  93. data/spec/testfiles/ms/quant/remove_rest_of_proteins.rb +13 -0
  94. data/spec/testfiles/ms/quant/unlog_transform.rb +13 -0
  95. data/spec/testfiles/plms1/output.key +0 -0
  96. metadata +157 -40
  97. data/README +0 -77
  98. data/changelog.txt +0 -196
  99. data/lib/ms/calc.rb +0 -32
  100. data/lib/ms/data/interleaved.rb +0 -60
  101. data/lib/ms/data/lazy_io.rb +0 -73
  102. data/lib/ms/data/lazy_string.rb +0 -15
  103. data/lib/ms/data/simple.rb +0 -59
  104. data/lib/ms/data/transposed.rb +0 -41
  105. data/lib/ms/data.rb +0 -57
  106. data/lib/ms/format/format_error.rb +0 -12
  107. data/lib/ms/support/binary_search.rb +0 -126
metadata CHANGED
@@ -1,88 +1,205 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mspire
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0
4
+ prerelease:
5
+ version: 0.6.1
5
6
  platform: ruby
6
7
  authors:
7
- - John Prince
8
+ - John T. Prince
8
9
  - Simon Chiang
9
10
  autorequire:
10
11
  bindir: bin
11
12
  cert_chain: []
12
13
 
13
- date: 2008-11-20 00:00:00 -07:00
14
- default_executable:
14
+ date: 2012-01-25 00:00:00 Z
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
17
- name: tap
17
+ name: nokogiri
18
+ prerelease: false
19
+ requirement: &id001 !ruby/object:Gem::Requirement
20
+ none: false
21
+ requirements:
22
+ - - ~>
23
+ - !ruby/object:Gem::Version
24
+ version: "1.5"
25
+ type: :runtime
26
+ version_requirements: *id001
27
+ - !ruby/object:Gem::Dependency
28
+ name: rspec
29
+ prerelease: false
30
+ requirement: &id002 !ruby/object:Gem::Requirement
31
+ none: false
32
+ requirements:
33
+ - - ~>
34
+ - !ruby/object:Gem::Version
35
+ version: "2.6"
18
36
  type: :development
19
- version_requirement:
20
- version_requirements: !ruby/object:Gem::Requirement
37
+ version_requirements: *id002
38
+ - !ruby/object:Gem::Dependency
39
+ name: jeweler
40
+ prerelease: false
41
+ requirement: &id003 !ruby/object:Gem::Requirement
42
+ none: false
21
43
  requirements:
22
- - - ">="
44
+ - - ~>
23
45
  - !ruby/object:Gem::Version
24
- version: 0.11.2
25
- version:
46
+ version: 1.5.2
47
+ type: :development
48
+ version_requirements: *id003
26
49
  - !ruby/object:Gem::Dependency
27
- name: minitest
50
+ name: rcov
51
+ prerelease: false
52
+ requirement: &id004 !ruby/object:Gem::Requirement
53
+ none: false
54
+ requirements:
55
+ - - ">="
56
+ - !ruby/object:Gem::Version
57
+ version: "0"
28
58
  type: :development
29
- version_requirement:
30
- version_requirements: !ruby/object:Gem::Requirement
59
+ version_requirements: *id004
60
+ - !ruby/object:Gem::Dependency
61
+ name: obo
62
+ prerelease: false
63
+ requirement: &id005 !ruby/object:Gem::Requirement
64
+ none: false
31
65
  requirements:
32
- - - "="
66
+ - - ">="
33
67
  - !ruby/object:Gem::Version
34
- version: 1.3.0
35
- version:
36
- description: A library for working with mass spectrometry proteomics data.
37
- email: jprince@icmb.utexas.edu
68
+ version: 0.1.0
69
+ type: :development
70
+ version_requirements: *id005
71
+ description: mass spectrometry proteomics, lipidomics, and tools, a rewrite of mspire, merging of ms-* gems
72
+ email: jtprince@gmail.com
38
73
  executables: []
39
74
 
40
75
  extensions: []
41
76
 
42
77
  extra_rdoc_files:
43
- - changelog.txt
44
78
  - LICENSE
45
- - README
79
+ - README.rdoc
46
80
  files:
81
+ - LICENSE
82
+ - README.rdoc
83
+ - Rakefile
84
+ - VERSION
85
+ - lib/cv.rb
86
+ - lib/cv/description.rb
87
+ - lib/cv/param.rb
88
+ - lib/io/bookmark.rb
89
+ - lib/merge.rb
47
90
  - lib/ms.rb
48
- - lib/ms/calc.rb
49
- - lib/ms/data.rb
50
- - lib/ms/data/interleaved.rb
51
- - lib/ms/data/transposed.rb
52
- - lib/ms/data/simple.rb
53
- - lib/ms/data/lazy_io.rb
54
- - lib/ms/data/lazy_string.rb
55
- - lib/ms/format/format_error.rb
56
- - lib/ms/support/binary_search.rb
91
+ - lib/ms/cvlist.rb
92
+ - lib/ms/digester.rb
93
+ - lib/ms/fasta.rb
94
+ - lib/ms/ident.rb
95
+ - lib/ms/ident/peptide.rb
96
+ - lib/ms/ident/peptide/db.rb
97
+ - lib/ms/ident/peptide_hit.rb
98
+ - lib/ms/ident/peptide_hit/qvalue.rb
99
+ - lib/ms/ident/pepxml.rb
100
+ - lib/ms/ident/pepxml/modifications.rb
101
+ - lib/ms/ident/pepxml/msms_pipeline_analysis.rb
102
+ - lib/ms/ident/pepxml/msms_run_summary.rb
103
+ - lib/ms/ident/pepxml/parameters.rb
104
+ - lib/ms/ident/pepxml/sample_enzyme.rb
105
+ - lib/ms/ident/pepxml/search_database.rb
106
+ - lib/ms/ident/pepxml/search_hit.rb
107
+ - lib/ms/ident/pepxml/search_hit/modification_info.rb
108
+ - lib/ms/ident/pepxml/search_result.rb
109
+ - lib/ms/ident/pepxml/search_summary.rb
110
+ - lib/ms/ident/pepxml/spectrum_query.rb
111
+ - lib/ms/ident/protein.rb
112
+ - lib/ms/ident/protein_group.rb
113
+ - lib/ms/ident/search.rb
114
+ - lib/ms/isotope/aa.rb
115
+ - lib/ms/mascot.rb
116
+ - lib/ms/mass.rb
117
+ - lib/ms/mass/aa.rb
118
+ - lib/ms/mzml.rb
119
+ - lib/ms/mzml/index_list.rb
120
+ - lib/ms/mzml/plms1.rb
121
+ - lib/ms/obo.rb
122
+ - lib/ms/plms1.rb
123
+ - lib/ms/quant/qspec.rb
124
+ - lib/ms/quant/qspec/protein_group_comparison.rb
57
125
  - lib/ms/spectrum.rb
58
- - changelog.txt
59
- - LICENSE
60
- - README
61
- has_rdoc: true
62
- homepage: http://mspire.rubyforge.org
126
+ - lib/msplat.rb
127
+ - lib/obo/ims.rb
128
+ - lib/obo/ms.rb
129
+ - lib/obo/ontology.rb
130
+ - lib/obo/unit.rb
131
+ - lib/openany.rb
132
+ - lib/write_file_or_string.rb
133
+ - obo/ims.obo
134
+ - obo/ms.obo
135
+ - obo/unit.obo
136
+ - spec/ms/cvlist_spec.rb
137
+ - spec/ms/digester_spec.rb
138
+ - spec/ms/fasta_spec.rb
139
+ - spec/ms/ident/peptide/db_spec.rb
140
+ - spec/ms/ident/pepxml/sample_enzyme_spec.rb
141
+ - spec/ms/ident/pepxml/search_hit/modification_info_spec.rb
142
+ - spec/ms/ident/pepxml_spec.rb
143
+ - spec/ms/ident/protein_group_spec.rb
144
+ - spec/ms/mass_spec.rb
145
+ - spec/ms/mzml/index_list_spec.rb
146
+ - spec/ms/mzml/plms1_spec.rb
147
+ - spec/ms/mzml_spec.rb
148
+ - spec/ms/plms1_spec.rb
149
+ - spec/ms/quant/qspec_spec.rb
150
+ - spec/msplat_spec.rb
151
+ - spec/obo_spec.rb
152
+ - spec/spec_helper.rb
153
+ - spec/testfiles/ms/ident/peptide/db/uni_11_sp_tr.fasta
154
+ - spec/testfiles/ms/ident/peptide/db/uni_11_sp_tr.msd_clvg2.min_aaseq4.yml
155
+ - spec/testfiles/ms/mzml/j24z.idx_comp.3.mzML
156
+ - spec/testfiles/ms/mzml/openms.noidx_nocomp.12.mzML
157
+ - spec/testfiles/ms/quant/kill_extra_tabs.rb
158
+ - spec/testfiles/ms/quant/max_quant_output.provenance.txt
159
+ - spec/testfiles/ms/quant/max_quant_output.txt
160
+ - spec/testfiles/ms/quant/pdcd5_final.killedextratabs.tsv
161
+ - spec/testfiles/ms/quant/pdcd5_final.killedextratabs.tsv_qspecgp
162
+ - spec/testfiles/ms/quant/pdcd5_final.killedextratabs.tsv_qspecgp.csv
163
+ - spec/testfiles/ms/quant/pdcd5_final.txt
164
+ - spec/testfiles/ms/quant/pdcd5_final.txt_qspecgp
165
+ - spec/testfiles/ms/quant/pdcd5_lfq_qspec.CSV.csv
166
+ - spec/testfiles/ms/quant/pdcd5_lfq_qspec.csv
167
+ - spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.csv
168
+ - spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.tsv
169
+ - spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.tsv_qspecgp
170
+ - spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.tsv_qspecgp.csv
171
+ - spec/testfiles/ms/quant/pdcd5_lfq_qspec.txt
172
+ - spec/testfiles/ms/quant/pdcd5_lfq_tabdel.txt
173
+ - spec/testfiles/ms/quant/pdcd5_lfq_tabdel.txt_qspecgp
174
+ - spec/testfiles/ms/quant/remove_rest_of_proteins.rb
175
+ - spec/testfiles/ms/quant/unlog_transform.rb
176
+ - spec/testfiles/plms1/output.key
177
+ homepage: http://github.com/princelab/mspire
178
+ licenses:
179
+ - MIT
63
180
  post_install_message:
64
181
  rdoc_options: []
65
182
 
66
183
  require_paths:
67
184
  - lib
68
185
  required_ruby_version: !ruby/object:Gem::Requirement
186
+ none: false
69
187
  requirements:
70
188
  - - ">="
71
189
  - !ruby/object:Gem::Version
72
190
  version: "0"
73
- version:
74
191
  required_rubygems_version: !ruby/object:Gem::Requirement
192
+ none: false
75
193
  requirements:
76
194
  - - ">="
77
195
  - !ruby/object:Gem::Version
78
196
  version: "0"
79
- version:
80
197
  requirements: []
81
198
 
82
- rubyforge_project: mspire
83
- rubygems_version: 1.3.0
199
+ rubyforge_project:
200
+ rubygems_version: 1.8.10
84
201
  signing_key:
85
- specification_version: 2
86
- summary: Mass Spectrometry Proteomics Objects, Scripts, and Executables
202
+ specification_version: 3
203
+ summary: mass spectrometry proteomics, lipidomics, and tools
87
204
  test_files: []
88
205
 
data/README DELETED
@@ -1,77 +0,0 @@
1
- = {Mspire}[http://mspire.rubyforge.org]
2
-
3
- A library for working with mass spectrometry proteomics data.
4
-
5
- <em> Mspire is going through a re-write as of version 0.5.0 to support a new
6
- development model. Many modules are absent but will gradually be added back.
7
- Use the 0.4 releases as necessary. </em>
8
-
9
- == Description
10
-
11
- mspire - 'Mass Spectrometry Proteomics in Ruby' is a collection of tools for
12
- working with MS proteomics data in ruby. It seeks to provide support for open
13
- standards (e.g., parsers for mzData, mzXML, Peptide/Protein Prophet and the
14
- TPP) and contribute other useful functionality for working with mass
15
- spectrometry data in ruby.
16
-
17
- * Lighthouse[http://bahuvrihi.lighthouseapp.com/projects/16692-mspire/tickets]
18
- * Github[http://github.com/bahuvrihi/mspire/tree/master]
19
- * {Google Group}[http://groups.google.com/group/mspire-forum]
20
-
21
- --
22
- === Current Focus
23
-
24
- The project is currently focusing on the following:
25
-
26
- * SEQUEST data (particularly the output of Bioworks 3.2-3.3.1)
27
- * mzXML
28
- * mzData
29
- * ProteinProphet
30
- * Preparation of files for [obiwarp](http://obi-warp.sourceforge.net/)
31
-
32
- === Features
33
-
34
- * mzXML (version 1, 2, and 3) parsing
35
- * mzData parsing
36
- * bioworks .srf (binary files) reader
37
- * read/write .sqt files
38
- * bioworks to PeptideProphet input (pepXML files)
39
- * lightweight APEX values parser
40
- * histogram protein probabilities
41
- * protein summary views with custom false ID cutoff values
42
- * conversion to OBI-Warp input files
43
- * portable: works across platforms
44
-
45
- Validation by:
46
- * Various Decoy Database search options: Reverse/Shuffle, concatenated/separate, with various hashing options (e.g., by amino acid sequence + charge)
47
- * Amino acid (e.g., search for unblocked cysteines)
48
- * Transmembrane prediction (Phobius or TopPred)
49
- * Generic sample bias (e.g., low abundance/high abundance proteins)
50
- * Defined sample
51
-
52
- === Spectra and Spectra Identification
53
-
54
- The [MS](ms/index.html) namespace contains objects for working with mass spectra and associated file formats.
55
-
56
- The [SpecID](spec_id/index.html) namespace contains objects for working with spectral identifications.
57
-
58
- === Tutorials
59
-
60
- * [Database Searching Tutorial](tutorial/database_searching/index.html) -
61
- Demonstrates two methods for running and analysing Bioworks output to obtain
62
- false positive rates using mspire executables.
63
- ++
64
-
65
- == Installation
66
-
67
- Mspire is available as a gem on RubyForge[http://rubyforge.org/projects/mspire]. Use:
68
-
69
- % gem install mspire
70
-
71
- = Warning
72
-
73
- This is an experimental package. As such, all versions prior to version 1.0
74
- may contain interface changes on minor revisions (major.minor.build) (e.g.,
75
- 0.4.0 may contain interface change from 0.3.9). Beyond version 1.0, the
76
- versioning scheme will be strictly adhered to (no interface changes except on
77
- major revisions).
data/changelog.txt DELETED
@@ -1,196 +0,0 @@
1
-
2
- == version 0.1.7
3
-
4
- 1. A couple of scripts and subroutines were hashing peptides but not on the file
5
- basename. This would result in slightly incorrect results (any time there
6
- were overlapping scan numbers in multiple datasets, only the top one would be
7
- chosen). The results would be correct for single runs.
8
-
9
- Output files that could be affected:
10
- *.top_per_scan.txt
11
- *.all_peps_per_scan.txt
12
-
13
- Scripts that could be affected:
14
- script/top_hit_per_scan.rb
15
- bin/filter_spec_id.rb
16
- script/filter-peps.rb
17
- bin/id_precision.rb
18
-
19
- Subroutines that were affected:
20
- spec_id.rb (pep_probs_by_* )
21
- spec_id.rb (top_peps_prefilter!)
22
- proph.rb uniq_by_seqcharge
23
- align.rb called uniq_by_seqcharge
24
-
25
-
26
- 2. false_positive_rate.rb and protein_summary.rb (by extension) were using
27
- number of true positives on the x axis while in reality I was plotting the
28
- number of hits. I've updated x axis labels to reflect this change. In
29
- addition, since the term 'false positive rate' has such a distinct definition
30
- in classical ROC plots and binary statistics, I've decided to work primarily
31
- in terms of precision (TP/(TP+FP)). I've purged the terms 'False Positive
32
- Rate' and 'FPR' from the package. It's been suggested that FP/(TP+FP) be
33
- called the False Positive Predictive Rate (FPPR). I will probably implement
34
- this in a future release.
35
-
36
- == version 0.2.0
37
-
38
- Revamped the way SpecID works (it is now mixed-in).
39
- Added support for modifications to bioworks_to_pepxml.rb
40
- Can read .srf files (nearly interchangeable with bioworks files)
41
- Redid filter.rb
42
-
43
- == version 0.2.1
44
-
45
- minor bugfix
46
-
47
- == version 0.2.2
48
-
49
- made compatible with Bioworks fasta file reverser and updated tutorial.
50
- Killed classify_by_prefix routine in favor of classify_by_false_flag which has
51
- a prefix option
52
-
53
- == version 0.2.3
54
-
55
- in protein_summary.rb added handling for proteins with no annotation. (either
56
- dispaly NA or use gi2annnot to grab them from NCBI)
57
-
58
- == version 0.2.5
59
-
60
- renamed prep_list in roc (potential breaks in code)
61
-
62
- == version 0.2.6
63
-
64
- 1. Massive refactorization of filtering and validation. Validation objects are
65
- created and then can be used to validate just about anything.
66
- 2. Massive redo of the parsing of MS runs. Can parse mzXML v1, v2.X
67
- (including readw broken output), and mzData (even Thermo's broken output).
68
- 4. Moved all tests to specs (rspec).
69
- 5. Can read gradient programs off of .meth or .RAW files (both Xcal 1.X and
70
- 2.X)
71
-
72
- Bugfixes:
73
- 1. The search_summary 'base_name' in pepxml output was incorrect (this did not
74
- appear to influence our analyses, however). Fixed.
75
- 2. Enzymes with no exceptions (e.g., cuts at KR) would report one too many
76
- missed cleavages if the last amino acid was a cut point. Fixed.
77
-
78
- == version 0.2.7
79
-
80
- 1. In conversion from bioworks to pepxml, the default was trypsin (KR/P).
81
- Now, the sample enzyme is set explicitly from the params file and the option
82
- is not available. This can give more accuract pepxml files than from
83
- previous depending on your enzyme.
84
-
85
- == version 0.2.9
86
-
87
- 1. Added support for phobius transmembrane predictions
88
- 2. have filter_and_validate.rb working well (multiple validators allowed).
89
- 3. Can read bioworks 3.3.1 .srf files (.srf version 3.5 files)
90
- 4. Added a bias validator
91
-
92
- == version 0.2.10
93
-
94
- 1. Fixed --hits_separate flag in spec_id/filter
95
-
96
- == version 0.2.11
97
-
98
- 1. Added prob precision support and reorganized filter_and_validate libs
99
-
100
- == version 0.2.12
101
-
102
- 1. Fixed bug in transmem for prob and others.
103
- 2. Can use axml (XMLParser based) or libxml depending on availability
104
-
105
- == version 0.2.13
106
-
107
- 1. Fixed issue with --hits_separate
108
- 2. filter_and_validate.rb requires decoy validator if decoy proteins
109
- (refactored code)
110
-
111
- == version 0.2.14
112
-
113
- 1. Can read PeptideProphet files (should be able to read pepxml files, too)
114
- 2. API change: Some slight modifications to the Sequest::PepXML object
115
- interfaces and implementations (using ArrayClass)
116
-
117
- == version 0.2.15
118
-
119
- 1. can convert srf files to sqt files
120
-
121
- == version 0.3.0
122
-
123
- 1. IMPORTANT BUG FIX: protein reporting in srf files is correct now (proteins after the first protein were being assigned to the last hit in an out file).
124
- 2. SQT export is correct and works at least on 3.2 and 3.3.1.
125
-
126
- == version 0.3.1
127
-
128
- 1. Bug fix in srf filtering (num_hits adjusted)
129
-
130
- == version 0.3.2
131
-
132
- 1. Uses sequest peptide_mass_tolerance filter on srf group files by default
133
- now.
134
-
135
- == version 0.3.3
136
-
137
- 1. Worked out minor kinks in prob_precision.rb
138
-
139
- == version 0.3.4
140
-
141
- 1. filters >= +3 charged ions now.
142
-
143
- == version 0.3.5
144
-
145
- 1. fixed creation of background distribution in validators (hash_by base_name,
146
- first_scan, charge now)
147
-
148
- == version 0.3.6
149
-
150
- 1. split off bad_aa_est from bad_aa
151
-
152
- == version 0.3.7
153
-
154
- 1. can deal with No_Enzyme searches now (while still capable of setting
155
- sample_enzyme)
156
-
157
- == version 0.3.8
158
-
159
- 1. can set a decoy to target ratio for decoy validation
160
- 2. added mass calculator in Mass::Calculator
161
-
162
- == version 0.3.9
163
-
164
- 1. doesn't clobber mzdata filename in ms_to_lmat.rb conversion
165
-
166
- == version 0.3.10
167
-
168
- 1. added run_percolator.rb script which makes running multiple files easy
169
-
170
- == version 0.3.11
171
-
172
- 1. faster sensing of bad scan tags in mzXML v. 2.0 files
173
- 2. implemented lazy evaluation of spectrum in 2 different ways allowing much
174
- larger files to be parsed
175
-
176
- == version 0.4.0
177
-
178
- 1. ** INTERFACE CHANGE: each scan can only have one precursor (used to be an array)
179
- 2. ** INTERFACE CHANGE: spectrum mz and intensity data accessed with mzs and intensities
180
- 3. lazy eval working on mzData
181
- 4. mzData not necessarily guaranteed to have precursor intensities on lazy
182
- eval methos (however, the method intensity_at_mz will still work (causing
183
- evaluation))
184
-
185
- == version 0.4.1
186
-
187
- 1. added support for reading mzXML version 3.0 (may fail in some cases)
188
-
189
- == version 0.4.2
190
-
191
- 1. added MS::MSRun.open method
192
- 2. added method to write dta files from SRF
193
-
194
- == version 0.4.3
195
-
196
- 1. added to_mfg_file from SRF
data/lib/ms/calc.rb DELETED
@@ -1,32 +0,0 @@
1
- module Ms
2
- module Calc
3
- module_function
4
-
5
- #
6
- # ppm calculations... maybe use RUnit
7
- #
8
-
9
- def ppm_tol_at(mz, ppm)
10
- 1.0 * mz * ppm / 10**6
11
- end
12
-
13
- def ppm_span_at(mz, ppm)
14
- tol = ppm_tol_at(mz, ppm)
15
- [mz-tol, mz+tol]
16
- end
17
-
18
- def ppm_range_at(mz, ppm)
19
- mz = mz.to_f
20
- tol = ppm_tol_at(mz, ppm)
21
- mz-tol...mz+tol
22
- end
23
-
24
-
25
- # Rounds n to the specified precision (ie number of decimal places)
26
- # def round(n, precision)
27
- # factor = 10**precision.to_i
28
- # (n * factor).round.to_f / factor
29
- # end
30
-
31
- end
32
- end
@@ -1,60 +0,0 @@
1
- require 'ms/data/simple'
2
-
3
- module Ms
4
- module Data
5
- module_function
6
-
7
- # Initializes a new interleaved data array.
8
- def new_interleaved(unresolved_data, n=2)
9
- Interleaved.new(unresolved_data, n=2)
10
- end
11
-
12
- # An Interleaved data array lazily evaluates it's unresolved data as
13
- # an interleaved array of n members. The unresolved data is evaluated
14
- # into an array using to_a.
15
- #
16
- # i = Ms::Data::Interleaved.new([1,4,2,5,3,6])
17
- # i.unresolved_data # => [1,4,2,5,3,6]
18
- # i.data # => []
19
- # i[0] # => [1,2,3]
20
- # i[1] # => [4,5,6]
21
- # i.data # => [[1,2,3], [4,5,6]]
22
- #
23
- class Interleaved < Simple
24
- attr_reader :n
25
-
26
- def initialize(unresolved_data, n=2)
27
- @n = 2
28
- super(unresolved_data)
29
- end
30
-
31
- def [](index)
32
- resolve.data[index]
33
- end
34
-
35
- def resolved?
36
- !@data.empty?
37
- end
38
-
39
- def resolve
40
- return(self) if resolved?
41
-
42
- unresolved_data = @unresolved_data.to_a
43
-
44
- unless unresolved_data.length % n == 0
45
- raise ArgumentError, "interleaved data must have a number of elements evenly divisible by n (#{n})"
46
- end
47
-
48
- n.times { @data << [] }
49
- map = @data * (unresolved_data.length/n)
50
-
51
- unresolved_data.each_with_index do |item, i|
52
- map[i] << item
53
- end
54
-
55
- self
56
- end
57
-
58
- end
59
- end
60
- end
@@ -1,73 +0,0 @@
1
- module Ms
2
- module Data
3
-
4
- # LazyIO represents data to be lazily read from an IO. To read the data
5
- # from the IO, either string or to_a may be called (to_a unpacks the
6
- # string into an array using the decode_format and unpack_format).
7
- #
8
- # LazyIO is a suitable unresolved_data source for Ms::Data formats.
9
- class LazyIO
10
- NETWORK_FLOAT = 'g*'
11
- NETWORK_DOUBLE = 'G*'
12
- LITTLE_ENDIAN_FLOAT = 'e*'
13
- LITTLE_ENDIAN_DOUBLE = 'E*'
14
- BASE_64 = 'm'
15
-
16
- class << self
17
- # Returns the unpacking code for the given precision (32 or 64-bit)
18
- # and network order (true for big-endian).
19
- def unpack_code(precision, network_order)
20
- case precision
21
- when 32 then network_order ? NETWORK_FLOAT : LITTLE_ENDIAN_FLOAT
22
- when 64 then network_order ? NETWORK_DOUBLE : LITTLE_ENDIAN_DOUBLE
23
- else raise ArgumentError, "unknown precision (should be 32 or 64): #{precision}"
24
- end
25
- end
26
- end
27
-
28
- # The IO from which string is read
29
- attr_reader :io
30
-
31
- # The start index for reading string
32
- attr_reader :start_index
33
-
34
- # The number of bytes to be read from io when evaluating string
35
- attr_reader :num_bytes
36
-
37
- # Indicates the unpacking format
38
- attr_reader :unpack_format
39
-
40
- # Indicates a decoding format, may be false to unpack string
41
- # without decoding.
42
- attr_reader :decode_format
43
-
44
- def initialize(io, start_index=io.pos, num_bytes=nil, unpack_format=NETWORK_FLOAT, decode_format=BASE_64)
45
- @io = io
46
- @start_index = start_index
47
- @num_bytes = num_bytes
48
- @unpack_format = unpack_format
49
- @decode_format = decode_format
50
- end
51
-
52
- # Positions io at start_index and reads a string of num_bytes length.
53
- # The string is newly read from io each time string is called.
54
- def string
55
- io.pos = start_index unless io.pos == start_index
56
- io.read(num_bytes)
57
- end
58
-
59
- # Resets the cached array (returned by to_a) so that the array will
60
- # be re-read from io.
61
- def reset
62
- @array = nil
63
- end
64
-
65
- # Reads string and unpacks using decode_format and unpack_code. The
66
- # array is cached internally; to re-read the array, use reset.
67
- def to_a
68
- @array ||= (decode_format ? string.unpack(decode_format)[0] : string).unpack(unpack_format)
69
- end
70
-
71
- end
72
- end
73
- end
@@ -1,15 +0,0 @@
1
- require 'ms/data/lazy_io'
2
- require 'stringio'
3
-
4
- module Ms
5
- module Data
6
-
7
- # LazyString is a LazyIO initialized from a string, which is converted into
8
- # a StringIO.
9
- class LazyString < LazyIO
10
- def initialize(string, unpack_format=NETWORK_FLOAT, decode_format=BASE_64)
11
- super(StringIO.new(string), 0, string.length, unpack_format, decode_format)
12
- end
13
- end
14
- end
15
- end