mspire 0.8.0 → 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md ADDED
@@ -0,0 +1,60 @@
1
+ # mspire
2
+
3
+ Mspire is a full featured library for working with mass spectrometry data,
4
+ particularly proteomic, metabolomic and lipidomic data sets. It aims to be
5
+ fast, robust, and beautiful.
6
+
7
+ ## Cite
8
+
9
+ Prince JT, Marcotte EM. <b>mspire: mass spectrometry proteomics in Ruby.</b> *Bioinformatics.* 2008 Dec 1;24(23):2796-7. ([pubmed](http://www.ncbi.nlm.nih.gov/pubmed/18930952))
10
+
11
+ ## Features
12
+
13
+ ### mzml
14
+
15
+ * Reading *and* writing
16
+ * True random access to spectra or chromatograms
17
+ * Complete object model with implicit object link resolution (even with random access)
18
+ * Simplified creation of and full support for CV params and referenceable param groups
19
+
20
+ ### imzml
21
+
22
+ Mspire is the *only* converter from mzml into imzml.
23
+
24
+ * handles both processed and continuous modes
25
+ * gracefully handles SIM data
26
+
27
+ ### Other Feature Highlights
28
+
29
+ * isotope distribution prediction: uses fastest method known (FFT convolution)
30
+ * protein digestion: Support for 32 enzymes/variants by name
31
+ * pepxml: full object model and complete write support
32
+ * fasta files: complete programmatic access to description lines (via bio-ruby)
33
+ * peak lists: merging/summing and splitting algorithms
34
+ * obo: ontology hash access
35
+ * molecular formulas: can do arithmetic with formulas
36
+ * calculates q-values
37
+
38
+ ## Examples
39
+
40
+ ### mzml
41
+
42
+ require 'ms/mzml'
43
+
44
+ MS::Mzml.open("somefile.mzml") do |mzml|
45
+ spectrum = mzml[0] # the first spectrum ( same as mzml.spectrum(0) )
46
+ spectrum = mzml["controllerType=0 controllerNumber=1 scan=2"] # query by id string
47
+ mzml.spectrum_from_scan_num(23) # raises ScanNumbersNotFound or ScanNumbersNotUnique errors if problems
48
+ end
49
+
50
+ require 'ms/mass/aa'
51
+
52
+ MS::Mass::AA::MONO['A'] # or access by symbol
53
+
54
+ ## Acronym
55
+
56
+ <i>M</i>ass <i>SP</i>ectrometry <i>I</i>n <i>R</i>uby. Mspire originally stood for <i>M</i>ass <i>S</i>pectrometry <i>P</i>roteomics <i>I</i>n <i>R</i>uby but the library has since proven useful for all kinds of mass spectrometry projects, hence the more inclusive form. The <i>e</i> was originally included for aesthetic reasons, but it also provides the user/developer the ability to attach whatever <i>E</i>xclamation or <i>E</i>pithet they choose to the acronym (the best ones will begin with <i>e</i> of course).
57
+
58
+ ## Copyright
59
+
60
+ MIT license. See LICENSE for details.
data/Rakefile CHANGED
@@ -1,6 +1,7 @@
1
1
  require 'rubygems'
2
2
  require 'rake'
3
3
  require 'rspec/core/rake_task'
4
+ require 'yard'
4
5
 
5
6
  require 'jeweler'
6
7
  Jeweler::Tasks.new do |gem|
@@ -35,6 +36,11 @@ RSpec::Core::RakeTask.new(:spec) do |spec|
35
36
  spec.pattern = FileList['spec/**/*_spec.rb']
36
37
  end
37
38
 
39
+ YARD::Rake::YardocTask.new do |t|
40
+ t.files = ['lib/**/*.rb', 'obo/**/*', 'README.md', 'script/**/*'] # optional
41
+ #t.options = ['--any', '--extra', '--opts'] # optional
42
+ end
43
+
38
44
  RSpec::Core::RakeTask.new(:rcov) do |spec|
39
45
  spec.pattern = 'spec/**/*_spec.rb'
40
46
  spec.rcov = true
@@ -59,7 +65,13 @@ Rake::RDocTask.new do |rdoc|
59
65
  rdoc.rdoc_files.include('lib/**/*.rb')
60
66
  end
61
67
 
62
- # need to write updaters to get latest obo
63
- #task 'update-obo' do
64
- # "http://psidev.cvs.sourceforge.net/*checkout*/psidev/psi/psi-ms/mzML/controlledVocabulary/psi-ms.obo"
65
- #end
68
+ desc "downloads the latest obo to appropriate spot"
69
+ task 'obo-update' do
70
+ require 'mspire/mzml/cv'
71
+ require 'open-uri'
72
+ Mspire::Mzml::CV::DEFAULT_CVS.each do |const|
73
+ obo_fn = File.dirname(__FILE__) + "/obo/#{const.id.downcase}.obo"
74
+ File.write(obo_fn, open(const.uri, &:read).gsub(/\r\n?/, "\n"))
75
+ puts "NOTE: if a file changed (git status), then update lib/mspire/mzml/cv.rb with correct version !!!"
76
+ end
77
+ end
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.8.0
1
+ 0.8.1
@@ -83,7 +83,7 @@ output: <file>.imzML and <file>.ibd
83
83
  end
84
84
  end
85
85
 
86
- def self.run(argv, globalopts)
86
+ def self.run(argv, globalopts=[])
87
87
  begin
88
88
  opts = parser.parse(argv)
89
89
  rescue Trollop::HelpNeeded
@@ -4,6 +4,7 @@ require 'uuid'
4
4
  require 'cv'
5
5
  require 'mspire/mzml'
6
6
  require 'mspire/mzml/spectrum'
7
+ require 'mspire/spectrum'
7
8
  require 'pathname'
8
9
  require 'digest/sha1'
9
10
 
@@ -325,7 +326,25 @@ module Mspire::Imzml
325
326
  sourcefile_id = "source_file_#{i}"
326
327
  sourcefile_ids << sourcefile_id
327
328
  Mspire::Mzml.open(mzml_filename) do |mzml|
328
- mzml.each_with_index do |spec,i|
329
+ enumerates_spectra =
330
+ # handle SIM files "MS:1001472", "selected ion monitoring chromatogram"
331
+ if mzml.file_description.file_content.fetch_by_acc('MS:1001472')
332
+ # handle normal mzml files
333
+ mz_ars = []
334
+ its_ars = []
335
+ mzml.each_chromatogram.each do |chromatogram,i|
336
+ next unless chromatogram.fetch_by_acc('MS:1001472')
337
+ target_mz = chromatogram.precursor.isolation_window.fetch_by_acc('MS:1000827').to_f
338
+ its = chromatogram.intensities
339
+ mz_ars << Array.new(its.size, target_mz)
340
+ its_ars << its
341
+ end
342
+ mz_ars.transpose.zip(its_ars.transpose).map {|mzs, its| Mspire::Spectrum.new([mzs, its]) }
343
+ else
344
+ # normal mzml file with spectra
345
+ mzml
346
+ end
347
+ enumerates_spectra.each_with_index do |spec,i|
329
348
  break if config[:trim_to] && (i >= config[:trim_to])
330
349
  sourcefile_id_parallel_to_spectra << sourcefile_id
331
350
  yielder << spec
@@ -38,12 +38,11 @@ module Mspire
38
38
  self.new(xml[:id], xml[:fullName], xml[:URI], xml[:version])
39
39
  end
40
40
 
41
- # These are derived by looking in the obo folder at the top of mspire
42
41
  IMS = self.new("IMS", "Imaging MS Ontology", "http://www.maldi-msi.org/download/imzml/imagingMS.obo", "0.9.1")
43
- MS = self.new('MS', "Proteomics Standards Initiative Mass Spectrometry Ontology", "http://psidev.cvs.sourceforge.net/*checkout*/psidev/psi/psi-ms/mzML/controlledVocabulary/psi-ms.obo", "3.18.0")
42
+ MS = self.new('MS', "Proteomics Standards Initiative Mass Spectrometry Ontology", "http://psidev.cvs.sourceforge.net/*checkout*/psidev/psi/psi-ms/mzML/controlledVocabulary/psi-ms.obo", "3.29.0")
44
43
  # the version for UO doesn't really exist: seen files where they use the
45
- # download date: DD:MM:YYY
46
- UO = self.new("UO", "Unit Ontology", "http://obo.cvs.sourceforge.net/*checkout*/obo/obo/ontology/phenotype/unit.obo", "16:02:2012")
44
+ # download date: DD:MM:YYY. I'm going to use the save date in the header.
45
+ UO = self.new("UO", "Unit Ontology", "http://obo.cvs.sourceforge.net/*checkout*/obo/obo/ontology/phenotype/unit.obo", "12:10:2011")
47
46
 
48
47
  DEFAULT_CVS = [MS, UO, IMS]
49
48
 
data/obo/ims.obo CHANGED
@@ -1,4 +1,3 @@
1
-
2
1
  format-version: 1.2
3
2
  date: 08:09:2008 13:53
4
3
  saved-by: Thorsten Schramm
@@ -377,7 +376,7 @@ relationship: is_a IMS:1000002 ! Sample Stage
377
376
  relationship: has_units UO:0000017 ! Micrometer
378
377
 
379
378
  [Term]
380
- id: IMS:10000202
379
+ id: IMS:1000202
381
380
  name: target material
382
381
  def: "Describes the material the target is made of." [COMPUTIS:IMS]
383
382
  xref: value-type:xsd\:string "The allowed value-type for this CV term."
@@ -558,5 +557,4 @@ name: solvent flowrate
558
557
  def: "Rate with which the solvent is flowing on the surface of the imaging object" [COMPUTIS:IMS]
559
558
  xref: value-type:xsd\:float "The allowed value-type for this CV term."
560
559
  relationship: is_a IMS:1000121 ! DESI
561
- relationship: has_units IMS:1000131 ! milliliter per minute
562
-
560
+ relationship: has_units IMS:1000131 ! milliliter per minute