RubyGems - ms-core - Versions diffs - 0.0.1 → 0.0.2 - Mend

ms-core 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

data/History ADDED Viewed

@@ -0,0 +1,7 @@
+== version 0.0.1
+* copy overed from simon's mspire.
+* added functionality to Ms::Spectrum

data/{LICENSE → MIT-LICENSE} RENAMED Viewed

File without changes

data/{README → README.rdoc} RENAMED Viewed

File without changes

data/lib/ms/calc.rb CHANGED Viewed

@@ -19,14 +19,12 @@ module Ms
         mz = mz.to_f
         tol = ppm_tol_at(mz, ppm)
         mz-tol...mz+tol
-      end
+      end
       # Rounds n to the specified precision (ie number of decimal places)
-      # def round(n, precision)
-      #   factor = 10**precision.to_i
-      #   (n * factor).round.to_f / factor
-      # end
+      def round(n, precision)
+        factor = 10**precision.to_i
+        (n * factor).round.to_f / factor
+      end
   end
 end

data/lib/ms/data.rb CHANGED Viewed

@@ -1,5 +1,6 @@
 require 'ms/data/interleaved'
 require 'ms/data/transposed'
+require 'ms/data/lazy_string'
 module Ms
@@ -54,4 +55,4 @@ module Ms
       send("new_#{type}", data)
     end
   end
-end
+end

data/lib/ms/id/peptide.rb CHANGED Viewed

@@ -28,7 +28,7 @@ module Ms::Id::Peptide
       when 1  ## this must be a parse error!
         pieces[0] ## which is the peptide itself
       else
-        abort "bad peptide sequence: #{sequence}"
+        abort "bad peptide sequence: #{sequence.inspect}"
       end
     end

data/lib/ms/mass/aa.rb CHANGED Viewed

@@ -1,3 +1,4 @@
+require 'molecules'
 require 'ms/mass'
 module Ms
@@ -14,16 +15,10 @@ module Ms
     #     # or use symbols
     #     MONO[:A]  # => 71.0371137878
     #
-    # This module is built on masses generated from the excellent {'molecules'
+    # This module is built on the excellent {'molecules'
     # library}[http://github.com/bahuvrihi/molecules/tree/master].  See that
-    # library for more serious work with masses:
-    #
-    #     gem install molecules
+    # library for more serious work with masses.
     module AA
-      Ms::Mass.constants.reject {|v| v == 'AA' }.each do |const|
-        const_set(const, Ms::Mass.const_get(const))
-      end
       # These are included here to offer maximum functionality
       MOLECULES_MONO_UNSUPPORTED = {
         :B => 172.048405, # average of aspartic acid and asparagine
@@ -40,70 +35,10 @@ module Ms
         #:J => nil,
       }
-      # generated from molecules version 0.1.3:
-      MOLECULES_MONO = {
-        :A => 71.0371137878,
-        :C => 103.0091844778,
-        :D => 115.026943032,
-        :E => 129.0425930962,
-        :F => 147.0684139162,
-        :G => 57.0214637236,
-        :H => 137.0589118624,
-        :I => 113.0840639804,
-        :K => 128.0949630177,
-        :L => 113.0840639804,
-        :M => 131.0404846062,
-        :N => 114.0429274472,
-        :O => 211.1446528645,
-        :P => 97.052763852,
-        :Q => 128.0585775114,
-        :R => 156.1011110281,
-        :S => 87.0320284099,
-        :T => 101.0476784741,
-        :U => 150.9536355878,
-        :V => 99.0684139162,
-        :W => 186.0793129535,
-        :Y => 163.0633285383,
-      }
-      MONO = MOLECULES_MONO_UNSUPPORTED.merge MOLECULES_MONO
-      # generated from molecules version 0.1.3:
-      MOLECULES_AVG = {
-        :A => 71.0779,
-        :C => 103.1429,
-        :D => 115.0874,
-        :E => 129.11398,
-        :F => 147.17386,
-        :G => 57.05132,
-        :H => 137.13928,
-        :I => 113.15764,
-        :K => 128.17228,
-        :L => 113.15764,
-        :M => 131.19606,
-        :N => 114.10264,
-        :O => 211.28076,
-        :P => 97.11518,
-        :Q => 128.12922,
-        :R => 156.18568,
-        :S => 87.0773,
-        :T => 101.10388,
-        :U => 150.0379,
-        :V => 99.13106,
-        :W => 186.2099,
-        :Y => 163.17326,
-      }
-      AVG = MOLECULES_AVG_UNSUPPORTED.merge MOLECULES_AVG
-      [AVG, MONO].each do |hash|
-        hash.each {|k,v| hash[k.to_s] = v }
-      end
       # returns a hash based on the molecules library of amino acid residues.
       # type is :mono or :avg
       def self.mass_index(type=:mono)
-        require 'molecules'
         hash = {}
         ('A'..'Z').each do |letter|
           if res = Molecules::Libraries::Residue[letter]
@@ -120,14 +55,10 @@ module Ms
         hash
       end
-      # prints a MONO or AVG hash for inclusion in ruby code
-      # type can be :mono or :avg
-      def self.print_mass_index(type=:mono)
-        puts "#{type.to_s.upcase} = {"
-        mass_index(type).sort.each do |k,v|
-          puts ":#{k} => #{v},"
-        end
-        puts "}"
+      MONO = MOLECULES_MONO_UNSUPPORTED.merge( self.mass_index(:mono) )
+      AVG = MOLECULES_AVG_UNSUPPORTED.merge( self.mass_index(:avg) )
+      [AVG, MONO].each do |hash|
+        hash.each {|k,v| hash[k.to_s] = v }
       end
     end

data/lib/ms/spectrum.rb CHANGED Viewed

@@ -1,5 +1,7 @@
 module Ms
   class Spectrum
+    include Enumerable
     # The underlying data store.
     attr_reader :data
@@ -60,97 +62,99 @@ module Ms
       Ms::Spectrum.new([self.mzs, self.intensities.map {|v| v / tic }])
     end
-    # uses index function and returns the intensity at that value
-    def intensity_at_mz(mz)
-      if x = index(mz)
-        intensities[x]
-      else
-        nil
-      end
-    end
+    ## uses index function and returns the intensity at that value
+    #def intensity_at_mz(mz)
+      #if x = index(mz)
+        #intensities[x]
+      #else
+        #nil
+      #end
+    #end
-    # returns the index of the first value matching that m/z.  the argument m/z
-    # may be less precise than the actual m/z (rounding to the same precision
-    # given) but must be at least integer precision (after rounding)
-    # implemented as binary search (bsearch from the web)
-    def index(mz)
-      mz_ar = mzs
-      return_val = nil
-      ind = mz_ar.bsearch_lower_boundary{|x| x <=> mz }
-      if mz_ar[ind] == mz
-        return_val = ind
-      else
-        # do a rounding game to see which one is it, or nil
-        # find all the values rounding to the same integer in the locale
-        # test each one fully in turn
-        mz = mz.to_f
-        mz_size = mz_ar.size
-        if ((ind < mz_size) and equal_after_rounding?(mz_ar[ind], mz))
-          return_val = ind
-        else # run the loop
-          up = ind
-          loop do
-            up += 1
-            if up >= mz_size
-              break
-            end
-            mz_up = mz_ar[up]
-            if (mz_up.ceil  - mz.ceil >= 2)
-              break
-            else
-              if equal_after_rounding?(mz_up, mz)
-                return_val = up
-                return return_val
-              end
-            end
-          end
-          dn= ind
-          loop do
-            dn -= 1
-            if dn < 0
-              break
-            end
-            mz_dn = mz_ar[dn]
-            if (mz.floor - mz_dn.floor >= 2)
-              break
-            else
-              if equal_after_rounding?(mz_dn, mz)
-                return_val = dn
-                return return_val
-              end
-            end
-          end
-        end
-      end
-      return_val
-    end
+    ## index mz, tolerance = :nearest(1), Float, :nearest_within_integer
-    # less_precise should be a float
-    # precise should be a float
-    def equal_after_rounding?(precise, less_precise) # :nodoc:
-      # determine the precision of less_precise
-      exp10 = precision_as_neg_int(less_precise)
-      #puts "EXP10: #{exp10}"
-      answ = ((precise*exp10).round == (less_precise*exp10).round)
-      #puts "TESTING FOR EQUAL: #{precise} #{less_precise}"
-      #puts answ
-      (precise*exp10).round == (less_precise*exp10).round
-    end
+    ## returns the index of the first value matching that m/z.  the argument m/z
+    ## may be less precise than the actual m/z (rounding to the same precision
+    ## given) but must be at least integer precision (after rounding)
+    ## implemented as binary search (bsearch from the web)
+    #def index(mz)
+      #mz_ar = mzs
+      #return_val = nil
+      #ind = mz_ar.bsearch_lower_boundary{|x| x <=> mz }
+      #if mz_ar[ind] == mz
+        #return_val = ind
+      #else
+        ## do a rounding game to see which one is it, or nil
+        ## find all the values rounding to the same integer in the locale
+        ## test each one fully in turn
+        #mz = mz.to_f
+        #mz_size = mz_ar.size
+        #if ((ind < mz_size) and equal_after_rounding?(mz_ar[ind], mz))
+          #return_val = ind
+        #else # run the loop
+          #up = ind
+          #loop do
+            #up += 1
+            #if up >= mz_size
+              #break
+            #end
+            #mz_up = mz_ar[up]
+            #if (mz_up.ceil  - mz.ceil >= 2)
+              #break
+            #else
+              #if equal_after_rounding?(mz_up, mz)
+                #return_val = up
+                #return return_val
+              #end
+            #end
+          #end
+          #dn= ind
+          #loop do
+            #dn -= 1
+            #if dn < 0
+              #break
+            #end
+            #mz_dn = mz_ar[dn]
+            #if (mz.floor - mz_dn.floor >= 2)
+              #break
+            #else
+              #if equal_after_rounding?(mz_dn, mz)
+                #return_val = dn
+                #return return_val
+              #end
+            #end
+          #end
+        #end
+      #end
+      #return_val
+    #end
-    # returns 1 for ones place, 10 for tenths, 100 for hundredths
-    # to a precision exceeding 1e-6
-    def precision_as_neg_int(float) # :nodoc:
-      neg_exp10 = 1
-      loop do
-        over = float * neg_exp10
-        rounded = over.round
-        if (over - rounded).abs <= 1e-6
-          break
-        end
-        neg_exp10 *= 10
-      end
-      neg_exp10
-    end
+    ## less_precise should be a float
+    ## precise should be a float
+    #def equal_after_rounding?(precise, less_precise) # :nodoc:
+      ## determine the precision of less_precise
+      #exp10 = precision_as_neg_int(less_precise)
+      ##puts "EXP10: #{exp10}"
+      #answ = ((precise*exp10).round == (less_precise*exp10).round)
+      ##puts "TESTING FOR EQUAL: #{precise} #{less_precise}"
+      ##puts answ
+      #(precise*exp10).round == (less_precise*exp10).round
+    #end
+    ## returns 1 for ones place, 10 for tenths, 100 for hundredths
+    ## to a precision exceeding 1e-6
+    #def precision_as_neg_int(float) # :nodoc:
+      #neg_exp10 = 1
+      #loop do
+        #over = float * neg_exp10
+        #rounded = over.round
+        #if (over - rounded).abs <= 1e-6
+          #break
+        #end
+        #neg_exp10 *= 10
+      #end
+      #neg_exp10
+    #end
   end

metadata CHANGED Viewed

@@ -1,18 +1,28 @@
 --- !ruby/object:Gem::Specification
 name: ms-core
 version: !ruby/object:Gem::Version
-  version: 0.0.1
+  version: 0.0.2
 platform: ruby
 authors:
-- John Prince
 - Simon Chiang
+- John Prince
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2009-05-22 00:00:00 -06:00
+date: 2009-09-08 00:00:00 -06:00
 default_executable:
 dependencies:
+- !ruby/object:Gem::Dependency
+  name: molecules
+  type: :runtime
+  version_requirement:
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: 0.2.0
+    version:
 - !ruby/object:Gem::Dependency
   name: tap
   type: :development
@@ -29,42 +39,43 @@ dependencies:
   version_requirement:
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
-    - - "="
+    - - ">="
       - !ruby/object:Gem::Version
         version: 1.3.0
     version:
 description:
-email: jtprince@gmail.com
+email:
+- jtprince@gmail.com
 executables: []
 extensions: []
 extra_rdoc_files:
-- changelog.txt
-- LICENSE
-- README
+- README.rdoc
+- MIT-LICENSE
+- History
 files:
+- MIT-LICENSE
+- README.rdoc
+- History
+- lib/ms.rb
+- lib/ms/calc.rb
+- lib/ms/data.rb
+- lib/ms/data/interleaved.rb
+- lib/ms/data/lazy_io.rb
+- lib/ms/data/lazy_string.rb
+- lib/ms/data/simple.rb
+- lib/ms/data/transposed.rb
 - lib/ms/format/format_error.rb
-- lib/ms/id/search.rb
 - lib/ms/id/peptide.rb
 - lib/ms/id/protein.rb
+- lib/ms/id/search.rb
+- lib/ms/mass.rb
 - lib/ms/mass/aa.rb
-- lib/ms/data.rb
 - lib/ms/spectrum.rb
 - lib/ms/support/binary_search.rb
-- lib/ms/mass.rb
-- lib/ms/calc.rb
-- lib/ms/data/interleaved.rb
-- lib/ms/data/simple.rb
-- lib/ms/data/lazy_string.rb
-- lib/ms/data/transposed.rb
-- lib/ms/data/lazy_io.rb
-- lib/ms.rb
-- changelog.txt
-- LICENSE
-- README
 has_rdoc: true
-homepage: http://mspire.rubyforge.org/projects/ms-core/
+homepage: http://mspire.rubyforge.org/ms-core/
 licenses: []
 post_install_message:
@@ -90,6 +101,6 @@ rubyforge_project: mspire
 rubygems_version: 1.3.2
 signing_key:
 specification_version: 3
-summary: the core, shared library for mspire
+summary: basic, shared functionality for mspire libraries
 test_files: []

data/changelog.txt DELETED Viewed

@@ -1,196 +0,0 @@
-== version 0.1.7
-1. A couple of scripts and subroutines were hashing peptides but not on the file
-basename.  This would result in slightly incorrect results (any time there
-were overlapping scan numbers in multiple datasets, only the top one would be
-chosen).  The results would be correct for single runs.
-Output files that could be affected:
-*.top_per_scan.txt
-*.all_peps_per_scan.txt
-Scripts that could be affected:
-script/top_hit_per_scan.rb
-bin/filter_spec_id.rb
-script/filter-peps.rb
-bin/id_precision.rb
-Subroutines that were affected:
-spec_id.rb (pep_probs_by_* )
-spec_id.rb (top_peps_prefilter!)
-proph.rb uniq_by_seqcharge
-align.rb called uniq_by_seqcharge
-2. false_positive_rate.rb and protein_summary.rb (by extension) were using
-number of true positives on the x axis while in reality I was plotting the
-number of hits.  I've updated x axis labels to reflect this change.  In
-addition, since the term 'false positive rate' has such a distinct definition
-in classical ROC plots and binary statistics, I've decided to work primarily
-in terms of precision (TP/(TP+FP)).  I've purged the terms 'False Positive
-Rate' and 'FPR' from the package. It's been suggested that FP/(TP+FP) be
-called the False Positive Predictive Rate (FPPR).  I will probably implement
-this in a future release.
-== version 0.2.0
-Revamped the way SpecID works (it is now mixed-in).
-Added support for modifications to bioworks_to_pepxml.rb
-Can read .srf files (nearly interchangeable with bioworks files)
-Redid filter.rb
-== version 0.2.1
-minor bugfix
-== version 0.2.2
-made compatible with Bioworks fasta file reverser and updated tutorial.
-Killed classify_by_prefix routine in favor of classify_by_false_flag which has
-a prefix option
-== version 0.2.3
-in protein_summary.rb added handling for proteins with no annotation. (either
-dispaly NA or use gi2annnot to grab them from NCBI)
-== version 0.2.5
-renamed prep_list in roc (potential breaks in code)
-== version 0.2.6
-1. Massive refactorization of filtering and validation.  Validation objects are
-created and then can be used to validate just about anything.
-2. Massive redo of the parsing of MS runs.  Can parse mzXML v1, v2.X
-(including readw broken output), and mzData (even Thermo's broken output).
-4. Moved all tests to specs (rspec).
-5. Can read gradient programs off of .meth or .RAW files (both Xcal 1.X and
-2.X)
-Bugfixes:
-1. The search_summary 'base_name' in pepxml output was incorrect (this did not
-appear to influence our analyses, however). Fixed.
-2. Enzymes with no exceptions (e.g., cuts at KR) would report one too many
-missed cleavages if the last amino acid was a cut point. Fixed.
-== version 0.2.7
-1. In conversion from bioworks to pepxml, the default was trypsin (KR/P).
-Now, the sample enzyme is set explicitly from the params file and the option
-is not available.  This can give more accuract pepxml files than from
-previous depending on your enzyme.
-== version 0.2.9
-1. Added support for phobius transmembrane predictions
-2. have filter_and_validate.rb working well (multiple validators allowed).
-3. Can read bioworks 3.3.1 .srf files (.srf version 3.5 files)
-4. Added a bias validator
-== version 0.2.10
-1. Fixed --hits_separate flag in spec_id/filter
-== version 0.2.11
-1. Added prob precision support and reorganized filter_and_validate libs
-== version 0.2.12
-1. Fixed bug in transmem for prob and others.
-2. Can use axml (XMLParser based) or libxml depending on availability
-== version 0.2.13
-1. Fixed issue with --hits_separate
-2. filter_and_validate.rb requires decoy validator if decoy proteins
-(refactored code)
-== version 0.2.14
-1. Can read PeptideProphet files (should be able to read pepxml files, too)
-2. API change: Some slight modifications to the Sequest::PepXML object
-interfaces and implementations (using ArrayClass)
-== version 0.2.15
-1. can convert srf files to sqt files
-== version 0.3.0
-1. IMPORTANT BUG FIX: protein reporting in srf files is correct now (proteins after the first protein were being assigned to the last hit in an out file).
-2. SQT export is correct and works at least on 3.2 and 3.3.1.
-== version 0.3.1
-1. Bug fix in srf filtering (num_hits adjusted)
-== version 0.3.2
-1. Uses sequest peptide_mass_tolerance filter on srf group files by default
-now.
-== version 0.3.3
-1. Worked out minor kinks in prob_precision.rb
-== version 0.3.4
-1. filters >= +3 charged ions now.
-== version 0.3.5
-1. fixed creation of background distribution in validators (hash_by base_name,
-first_scan, charge now)
-== version 0.3.6
-1. split off bad_aa_est from bad_aa
-== version 0.3.7
-1. can deal with No_Enzyme searches now (while still capable of setting
-sample_enzyme)
-== version 0.3.8
-1. can set a decoy to target ratio for decoy validation
-2. added mass calculator in Mass::Calculator
-== version 0.3.9
-1. doesn't clobber mzdata filename in ms_to_lmat.rb conversion
-== version 0.3.10
-1. added run_percolator.rb script which makes running multiple files easy
-== version 0.3.11
-1. faster sensing of bad scan tags in mzXML v. 2.0 files
-2. implemented lazy evaluation of spectrum in 2 different ways allowing much
-larger files to be parsed
-== version 0.4.0
-1. ** INTERFACE CHANGE: each scan can only have one precursor (used to be an array)
-2. ** INTERFACE CHANGE: spectrum mz and intensity data accessed with mzs and intensities
-3. lazy eval working on mzData
-4. mzData not necessarily guaranteed to have precursor intensities on lazy
-eval methos (however, the method intensity_at_mz will still work (causing
-evaluation))
-== version 0.4.1
-1. added support for reading mzXML version 3.0 (may fail in some cases)
-== version 0.4.2
-1. added MS::MSRun.open method
-2. added method to write dta files from SRF
-== version 0.4.3
-1. added to_mfg_file from SRF