mspire 0.8.0 → 0.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +60 -0
- data/Rakefile +16 -4
- data/VERSION +1 -1
- data/lib/mspire/imzml/writer/commandline.rb +1 -1
- data/lib/mspire/imzml/writer.rb +20 -1
- data/lib/mspire/mzml/cv.rb +3 -4
- data/obo/ims.obo +2 -4
- data/obo/ms.obo +708 -56
- data/obo/unit.obo +1 -1
- data/spec/mspire/imzml/writer_spec.rb +30 -0
- data/spec/testfiles/mspire/mzml/1_BB7_SIM_478.5.CHECK.ibd +0 -0
- data/spec/testfiles/mspire/mzml/1_BB7_SIM_478.5.CHECK.imzML +3975 -0
- data/spec/testfiles/mspire/mzml/mspire_simulated.MSn.check.mzML +2 -2
- metadata +6 -4
- data/README.rdoc +0 -24
data/README.md
ADDED
@@ -0,0 +1,60 @@
|
|
1
|
+
# mspire
|
2
|
+
|
3
|
+
Mspire is a full featured library for working with mass spectrometry data,
|
4
|
+
particularly proteomic, metabolomic and lipidomic data sets. It aims to be
|
5
|
+
fast, robust, and beautiful.
|
6
|
+
|
7
|
+
## Cite
|
8
|
+
|
9
|
+
Prince JT, Marcotte EM. <b>mspire: mass spectrometry proteomics in Ruby.</b> *Bioinformatics.* 2008 Dec 1;24(23):2796-7. ([pubmed](http://www.ncbi.nlm.nih.gov/pubmed/18930952))
|
10
|
+
|
11
|
+
## Features
|
12
|
+
|
13
|
+
### mzml
|
14
|
+
|
15
|
+
* Reading *and* writing
|
16
|
+
* True random access to spectra or chromatograms
|
17
|
+
* Complete object model with implicit object link resolution (even with random access)
|
18
|
+
* Simplified creation of and full support for CV params and referenceable param groups
|
19
|
+
|
20
|
+
### imzml
|
21
|
+
|
22
|
+
Mspire is the *only* converter from mzml into imzml.
|
23
|
+
|
24
|
+
* handles both processed and continuous modes
|
25
|
+
* gracefully handles SIM data
|
26
|
+
|
27
|
+
### Other Feature Highlights
|
28
|
+
|
29
|
+
* isotope distribution prediction: uses fastest method known (FFT convolution)
|
30
|
+
* protein digestion: Support for 32 enzymes/variants by name
|
31
|
+
* pepxml: full object model and complete write support
|
32
|
+
* fasta files: complete programmatic access to description lines (via bio-ruby)
|
33
|
+
* peak lists: merging/summing and splitting algorithms
|
34
|
+
* obo: ontology hash access
|
35
|
+
* molecular formulas: can do arithmetic with formulas
|
36
|
+
* calculates q-values
|
37
|
+
|
38
|
+
## Examples
|
39
|
+
|
40
|
+
### mzml
|
41
|
+
|
42
|
+
require 'ms/mzml'
|
43
|
+
|
44
|
+
MS::Mzml.open("somefile.mzml") do |mzml|
|
45
|
+
spectrum = mzml[0] # the first spectrum ( same as mzml.spectrum(0) )
|
46
|
+
spectrum = mzml["controllerType=0 controllerNumber=1 scan=2"] # query by id string
|
47
|
+
mzml.spectrum_from_scan_num(23) # raises ScanNumbersNotFound or ScanNumbersNotUnique errors if problems
|
48
|
+
end
|
49
|
+
|
50
|
+
require 'ms/mass/aa'
|
51
|
+
|
52
|
+
MS::Mass::AA::MONO['A'] # or access by symbol
|
53
|
+
|
54
|
+
## Acronym
|
55
|
+
|
56
|
+
<i>M</i>ass <i>SP</i>ectrometry <i>I</i>n <i>R</i>uby. Mspire originally stood for <i>M</i>ass <i>S</i>pectrometry <i>P</i>roteomics <i>I</i>n <i>R</i>uby but the library has since proven useful for all kinds of mass spectrometry projects, hence the more inclusive form. The <i>e</i> was originally included for aesthetic reasons, but it also provides the user/developer the ability to attach whatever <i>E</i>xclamation or <i>E</i>pithet they choose to the acronym (the best ones will begin with <i>e</i> of course).
|
57
|
+
|
58
|
+
## Copyright
|
59
|
+
|
60
|
+
MIT license. See LICENSE for details.
|
data/Rakefile
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
require 'rubygems'
|
2
2
|
require 'rake'
|
3
3
|
require 'rspec/core/rake_task'
|
4
|
+
require 'yard'
|
4
5
|
|
5
6
|
require 'jeweler'
|
6
7
|
Jeweler::Tasks.new do |gem|
|
@@ -35,6 +36,11 @@ RSpec::Core::RakeTask.new(:spec) do |spec|
|
|
35
36
|
spec.pattern = FileList['spec/**/*_spec.rb']
|
36
37
|
end
|
37
38
|
|
39
|
+
YARD::Rake::YardocTask.new do |t|
|
40
|
+
t.files = ['lib/**/*.rb', 'obo/**/*', 'README.md', 'script/**/*'] # optional
|
41
|
+
#t.options = ['--any', '--extra', '--opts'] # optional
|
42
|
+
end
|
43
|
+
|
38
44
|
RSpec::Core::RakeTask.new(:rcov) do |spec|
|
39
45
|
spec.pattern = 'spec/**/*_spec.rb'
|
40
46
|
spec.rcov = true
|
@@ -59,7 +65,13 @@ Rake::RDocTask.new do |rdoc|
|
|
59
65
|
rdoc.rdoc_files.include('lib/**/*.rb')
|
60
66
|
end
|
61
67
|
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
68
|
+
desc "downloads the latest obo to appropriate spot"
|
69
|
+
task 'obo-update' do
|
70
|
+
require 'mspire/mzml/cv'
|
71
|
+
require 'open-uri'
|
72
|
+
Mspire::Mzml::CV::DEFAULT_CVS.each do |const|
|
73
|
+
obo_fn = File.dirname(__FILE__) + "/obo/#{const.id.downcase}.obo"
|
74
|
+
File.write(obo_fn, open(const.uri, &:read).gsub(/\r\n?/, "\n"))
|
75
|
+
puts "NOTE: if a file changed (git status), then update lib/mspire/mzml/cv.rb with correct version !!!"
|
76
|
+
end
|
77
|
+
end
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.8.
|
1
|
+
0.8.1
|
data/lib/mspire/imzml/writer.rb
CHANGED
@@ -4,6 +4,7 @@ require 'uuid'
|
|
4
4
|
require 'cv'
|
5
5
|
require 'mspire/mzml'
|
6
6
|
require 'mspire/mzml/spectrum'
|
7
|
+
require 'mspire/spectrum'
|
7
8
|
require 'pathname'
|
8
9
|
require 'digest/sha1'
|
9
10
|
|
@@ -325,7 +326,25 @@ module Mspire::Imzml
|
|
325
326
|
sourcefile_id = "source_file_#{i}"
|
326
327
|
sourcefile_ids << sourcefile_id
|
327
328
|
Mspire::Mzml.open(mzml_filename) do |mzml|
|
328
|
-
|
329
|
+
enumerates_spectra =
|
330
|
+
# handle SIM files "MS:1001472", "selected ion monitoring chromatogram"
|
331
|
+
if mzml.file_description.file_content.fetch_by_acc('MS:1001472')
|
332
|
+
# handle normal mzml files
|
333
|
+
mz_ars = []
|
334
|
+
its_ars = []
|
335
|
+
mzml.each_chromatogram.each do |chromatogram,i|
|
336
|
+
next unless chromatogram.fetch_by_acc('MS:1001472')
|
337
|
+
target_mz = chromatogram.precursor.isolation_window.fetch_by_acc('MS:1000827').to_f
|
338
|
+
its = chromatogram.intensities
|
339
|
+
mz_ars << Array.new(its.size, target_mz)
|
340
|
+
its_ars << its
|
341
|
+
end
|
342
|
+
mz_ars.transpose.zip(its_ars.transpose).map {|mzs, its| Mspire::Spectrum.new([mzs, its]) }
|
343
|
+
else
|
344
|
+
# normal mzml file with spectra
|
345
|
+
mzml
|
346
|
+
end
|
347
|
+
enumerates_spectra.each_with_index do |spec,i|
|
329
348
|
break if config[:trim_to] && (i >= config[:trim_to])
|
330
349
|
sourcefile_id_parallel_to_spectra << sourcefile_id
|
331
350
|
yielder << spec
|
data/lib/mspire/mzml/cv.rb
CHANGED
@@ -38,12 +38,11 @@ module Mspire
|
|
38
38
|
self.new(xml[:id], xml[:fullName], xml[:URI], xml[:version])
|
39
39
|
end
|
40
40
|
|
41
|
-
# These are derived by looking in the obo folder at the top of mspire
|
42
41
|
IMS = self.new("IMS", "Imaging MS Ontology", "http://www.maldi-msi.org/download/imzml/imagingMS.obo", "0.9.1")
|
43
|
-
MS = self.new('MS', "Proteomics Standards Initiative Mass Spectrometry Ontology", "http://psidev.cvs.sourceforge.net/*checkout*/psidev/psi/psi-ms/mzML/controlledVocabulary/psi-ms.obo", "3.
|
42
|
+
MS = self.new('MS', "Proteomics Standards Initiative Mass Spectrometry Ontology", "http://psidev.cvs.sourceforge.net/*checkout*/psidev/psi/psi-ms/mzML/controlledVocabulary/psi-ms.obo", "3.29.0")
|
44
43
|
# the version for UO doesn't really exist: seen files where they use the
|
45
|
-
# download date: DD:MM:YYY
|
46
|
-
UO = self.new("UO", "Unit Ontology", "http://obo.cvs.sourceforge.net/*checkout*/obo/obo/ontology/phenotype/unit.obo", "
|
44
|
+
# download date: DD:MM:YYY. I'm going to use the save date in the header.
|
45
|
+
UO = self.new("UO", "Unit Ontology", "http://obo.cvs.sourceforge.net/*checkout*/obo/obo/ontology/phenotype/unit.obo", "12:10:2011")
|
47
46
|
|
48
47
|
DEFAULT_CVS = [MS, UO, IMS]
|
49
48
|
|
data/obo/ims.obo
CHANGED
@@ -1,4 +1,3 @@
|
|
1
|
-
|
2
1
|
format-version: 1.2
|
3
2
|
date: 08:09:2008 13:53
|
4
3
|
saved-by: Thorsten Schramm
|
@@ -377,7 +376,7 @@ relationship: is_a IMS:1000002 ! Sample Stage
|
|
377
376
|
relationship: has_units UO:0000017 ! Micrometer
|
378
377
|
|
379
378
|
[Term]
|
380
|
-
id: IMS:
|
379
|
+
id: IMS:1000202
|
381
380
|
name: target material
|
382
381
|
def: "Describes the material the target is made of." [COMPUTIS:IMS]
|
383
382
|
xref: value-type:xsd\:string "The allowed value-type for this CV term."
|
@@ -558,5 +557,4 @@ name: solvent flowrate
|
|
558
557
|
def: "Rate with which the solvent is flowing on the surface of the imaging object" [COMPUTIS:IMS]
|
559
558
|
xref: value-type:xsd\:float "The allowed value-type for this CV term."
|
560
559
|
relationship: is_a IMS:1000121 ! DESI
|
561
|
-
relationship: has_units IMS:1000131 ! milliliter per minute
|
562
|
-
|
560
|
+
relationship: has_units IMS:1000131 ! milliliter per minute
|