mspire 0.8.0 → 0.8.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +60 -0
- data/Rakefile +16 -4
- data/VERSION +1 -1
- data/lib/mspire/imzml/writer/commandline.rb +1 -1
- data/lib/mspire/imzml/writer.rb +20 -1
- data/lib/mspire/mzml/cv.rb +3 -4
- data/obo/ims.obo +2 -4
- data/obo/ms.obo +708 -56
- data/obo/unit.obo +1 -1
- data/spec/mspire/imzml/writer_spec.rb +30 -0
- data/spec/testfiles/mspire/mzml/1_BB7_SIM_478.5.CHECK.ibd +0 -0
- data/spec/testfiles/mspire/mzml/1_BB7_SIM_478.5.CHECK.imzML +3975 -0
- data/spec/testfiles/mspire/mzml/mspire_simulated.MSn.check.mzML +2 -2
- metadata +6 -4
- data/README.rdoc +0 -24
data/README.md
ADDED
@@ -0,0 +1,60 @@
|
|
1
|
+
# mspire
|
2
|
+
|
3
|
+
Mspire is a full featured library for working with mass spectrometry data,
|
4
|
+
particularly proteomic, metabolomic and lipidomic data sets. It aims to be
|
5
|
+
fast, robust, and beautiful.
|
6
|
+
|
7
|
+
## Cite
|
8
|
+
|
9
|
+
Prince JT, Marcotte EM. <b>mspire: mass spectrometry proteomics in Ruby.</b> *Bioinformatics.* 2008 Dec 1;24(23):2796-7. ([pubmed](http://www.ncbi.nlm.nih.gov/pubmed/18930952))
|
10
|
+
|
11
|
+
## Features
|
12
|
+
|
13
|
+
### mzml
|
14
|
+
|
15
|
+
* Reading *and* writing
|
16
|
+
* True random access to spectra or chromatograms
|
17
|
+
* Complete object model with implicit object link resolution (even with random access)
|
18
|
+
* Simplified creation of and full support for CV params and referenceable param groups
|
19
|
+
|
20
|
+
### imzml
|
21
|
+
|
22
|
+
Mspire is the *only* converter from mzml into imzml.
|
23
|
+
|
24
|
+
* handles both processed and continuous modes
|
25
|
+
* gracefully handles SIM data
|
26
|
+
|
27
|
+
### Other Feature Highlights
|
28
|
+
|
29
|
+
* isotope distribution prediction: uses fastest method known (FFT convolution)
|
30
|
+
* protein digestion: Support for 32 enzymes/variants by name
|
31
|
+
* pepxml: full object model and complete write support
|
32
|
+
* fasta files: complete programmatic access to description lines (via bio-ruby)
|
33
|
+
* peak lists: merging/summing and splitting algorithms
|
34
|
+
* obo: ontology hash access
|
35
|
+
* molecular formulas: can do arithmetic with formulas
|
36
|
+
* calculates q-values
|
37
|
+
|
38
|
+
## Examples
|
39
|
+
|
40
|
+
### mzml
|
41
|
+
|
42
|
+
require 'ms/mzml'
|
43
|
+
|
44
|
+
MS::Mzml.open("somefile.mzml") do |mzml|
|
45
|
+
spectrum = mzml[0] # the first spectrum ( same as mzml.spectrum(0) )
|
46
|
+
spectrum = mzml["controllerType=0 controllerNumber=1 scan=2"] # query by id string
|
47
|
+
mzml.spectrum_from_scan_num(23) # raises ScanNumbersNotFound or ScanNumbersNotUnique errors if problems
|
48
|
+
end
|
49
|
+
|
50
|
+
require 'ms/mass/aa'
|
51
|
+
|
52
|
+
MS::Mass::AA::MONO['A'] # or access by symbol
|
53
|
+
|
54
|
+
## Acronym
|
55
|
+
|
56
|
+
<i>M</i>ass <i>SP</i>ectrometry <i>I</i>n <i>R</i>uby. Mspire originally stood for <i>M</i>ass <i>S</i>pectrometry <i>P</i>roteomics <i>I</i>n <i>R</i>uby but the library has since proven useful for all kinds of mass spectrometry projects, hence the more inclusive form. The <i>e</i> was originally included for aesthetic reasons, but it also provides the user/developer the ability to attach whatever <i>E</i>xclamation or <i>E</i>pithet they choose to the acronym (the best ones will begin with <i>e</i> of course).
|
57
|
+
|
58
|
+
## Copyright
|
59
|
+
|
60
|
+
MIT license. See LICENSE for details.
|
data/Rakefile
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
require 'rubygems'
|
2
2
|
require 'rake'
|
3
3
|
require 'rspec/core/rake_task'
|
4
|
+
require 'yard'
|
4
5
|
|
5
6
|
require 'jeweler'
|
6
7
|
Jeweler::Tasks.new do |gem|
|
@@ -35,6 +36,11 @@ RSpec::Core::RakeTask.new(:spec) do |spec|
|
|
35
36
|
spec.pattern = FileList['spec/**/*_spec.rb']
|
36
37
|
end
|
37
38
|
|
39
|
+
YARD::Rake::YardocTask.new do |t|
|
40
|
+
t.files = ['lib/**/*.rb', 'obo/**/*', 'README.md', 'script/**/*'] # optional
|
41
|
+
#t.options = ['--any', '--extra', '--opts'] # optional
|
42
|
+
end
|
43
|
+
|
38
44
|
RSpec::Core::RakeTask.new(:rcov) do |spec|
|
39
45
|
spec.pattern = 'spec/**/*_spec.rb'
|
40
46
|
spec.rcov = true
|
@@ -59,7 +65,13 @@ Rake::RDocTask.new do |rdoc|
|
|
59
65
|
rdoc.rdoc_files.include('lib/**/*.rb')
|
60
66
|
end
|
61
67
|
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
68
|
+
desc "downloads the latest obo to appropriate spot"
|
69
|
+
task 'obo-update' do
|
70
|
+
require 'mspire/mzml/cv'
|
71
|
+
require 'open-uri'
|
72
|
+
Mspire::Mzml::CV::DEFAULT_CVS.each do |const|
|
73
|
+
obo_fn = File.dirname(__FILE__) + "/obo/#{const.id.downcase}.obo"
|
74
|
+
File.write(obo_fn, open(const.uri, &:read).gsub(/\r\n?/, "\n"))
|
75
|
+
puts "NOTE: if a file changed (git status), then update lib/mspire/mzml/cv.rb with correct version !!!"
|
76
|
+
end
|
77
|
+
end
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.8.
|
1
|
+
0.8.1
|
data/lib/mspire/imzml/writer.rb
CHANGED
@@ -4,6 +4,7 @@ require 'uuid'
|
|
4
4
|
require 'cv'
|
5
5
|
require 'mspire/mzml'
|
6
6
|
require 'mspire/mzml/spectrum'
|
7
|
+
require 'mspire/spectrum'
|
7
8
|
require 'pathname'
|
8
9
|
require 'digest/sha1'
|
9
10
|
|
@@ -325,7 +326,25 @@ module Mspire::Imzml
|
|
325
326
|
sourcefile_id = "source_file_#{i}"
|
326
327
|
sourcefile_ids << sourcefile_id
|
327
328
|
Mspire::Mzml.open(mzml_filename) do |mzml|
|
328
|
-
|
329
|
+
enumerates_spectra =
|
330
|
+
# handle SIM files "MS:1001472", "selected ion monitoring chromatogram"
|
331
|
+
if mzml.file_description.file_content.fetch_by_acc('MS:1001472')
|
332
|
+
# handle normal mzml files
|
333
|
+
mz_ars = []
|
334
|
+
its_ars = []
|
335
|
+
mzml.each_chromatogram.each do |chromatogram,i|
|
336
|
+
next unless chromatogram.fetch_by_acc('MS:1001472')
|
337
|
+
target_mz = chromatogram.precursor.isolation_window.fetch_by_acc('MS:1000827').to_f
|
338
|
+
its = chromatogram.intensities
|
339
|
+
mz_ars << Array.new(its.size, target_mz)
|
340
|
+
its_ars << its
|
341
|
+
end
|
342
|
+
mz_ars.transpose.zip(its_ars.transpose).map {|mzs, its| Mspire::Spectrum.new([mzs, its]) }
|
343
|
+
else
|
344
|
+
# normal mzml file with spectra
|
345
|
+
mzml
|
346
|
+
end
|
347
|
+
enumerates_spectra.each_with_index do |spec,i|
|
329
348
|
break if config[:trim_to] && (i >= config[:trim_to])
|
330
349
|
sourcefile_id_parallel_to_spectra << sourcefile_id
|
331
350
|
yielder << spec
|
data/lib/mspire/mzml/cv.rb
CHANGED
@@ -38,12 +38,11 @@ module Mspire
|
|
38
38
|
self.new(xml[:id], xml[:fullName], xml[:URI], xml[:version])
|
39
39
|
end
|
40
40
|
|
41
|
-
# These are derived by looking in the obo folder at the top of mspire
|
42
41
|
IMS = self.new("IMS", "Imaging MS Ontology", "http://www.maldi-msi.org/download/imzml/imagingMS.obo", "0.9.1")
|
43
|
-
MS = self.new('MS', "Proteomics Standards Initiative Mass Spectrometry Ontology", "http://psidev.cvs.sourceforge.net/*checkout*/psidev/psi/psi-ms/mzML/controlledVocabulary/psi-ms.obo", "3.
|
42
|
+
MS = self.new('MS', "Proteomics Standards Initiative Mass Spectrometry Ontology", "http://psidev.cvs.sourceforge.net/*checkout*/psidev/psi/psi-ms/mzML/controlledVocabulary/psi-ms.obo", "3.29.0")
|
44
43
|
# the version for UO doesn't really exist: seen files where they use the
|
45
|
-
# download date: DD:MM:YYY
|
46
|
-
UO = self.new("UO", "Unit Ontology", "http://obo.cvs.sourceforge.net/*checkout*/obo/obo/ontology/phenotype/unit.obo", "
|
44
|
+
# download date: DD:MM:YYY. I'm going to use the save date in the header.
|
45
|
+
UO = self.new("UO", "Unit Ontology", "http://obo.cvs.sourceforge.net/*checkout*/obo/obo/ontology/phenotype/unit.obo", "12:10:2011")
|
47
46
|
|
48
47
|
DEFAULT_CVS = [MS, UO, IMS]
|
49
48
|
|
data/obo/ims.obo
CHANGED
@@ -1,4 +1,3 @@
|
|
1
|
-
|
2
1
|
format-version: 1.2
|
3
2
|
date: 08:09:2008 13:53
|
4
3
|
saved-by: Thorsten Schramm
|
@@ -377,7 +376,7 @@ relationship: is_a IMS:1000002 ! Sample Stage
|
|
377
376
|
relationship: has_units UO:0000017 ! Micrometer
|
378
377
|
|
379
378
|
[Term]
|
380
|
-
id: IMS:
|
379
|
+
id: IMS:1000202
|
381
380
|
name: target material
|
382
381
|
def: "Describes the material the target is made of." [COMPUTIS:IMS]
|
383
382
|
xref: value-type:xsd\:string "The allowed value-type for this CV term."
|
@@ -558,5 +557,4 @@ name: solvent flowrate
|
|
558
557
|
def: "Rate with which the solvent is flowing on the surface of the imaging object" [COMPUTIS:IMS]
|
559
558
|
xref: value-type:xsd\:float "The allowed value-type for this CV term."
|
560
559
|
relationship: is_a IMS:1000121 ! DESI
|
561
|
-
relationship: has_units IMS:1000131 ! milliliter per minute
|
562
|
-
|
560
|
+
relationship: has_units IMS:1000131 ! milliliter per minute
|