RubyGems - mspire - Versions diffs - 0.4.9 → 0.5.0 - Mend

mspire 0.4.9 → 0.5.0

Files changed (255) hide show

data/README +27 -17
data/changelog.txt +31 -62
data/lib/ms/calc.rb +32 -0
data/lib/ms/data/interleaved.rb +60 -0
data/lib/ms/data/lazy_io.rb +73 -0
data/lib/ms/data/lazy_string.rb +15 -0
data/lib/ms/data/simple.rb +59 -0
data/lib/ms/data/transposed.rb +41 -0
data/lib/ms/data.rb +57 -0
data/lib/ms/format/format_error.rb +12 -0
data/lib/ms/spectrum.rb +25 -384
data/lib/ms/support/binary_search.rb +126 -0
data/lib/ms.rb +10 -10
metadata +38 -350
data/INSTALL +0 -58
data/README.rdoc +0 -18
data/Rakefile +0 -330
data/bin/aafreqs.rb +0 -23
data/bin/bioworks2excel.rb +0 -14
data/bin/bioworks_to_pepxml.rb +0 -148
data/bin/bioworks_to_pepxml_gui.rb +0 -225
data/bin/fasta_shaker.rb +0 -5
data/bin/filter_and_validate.rb +0 -5
data/bin/gi2annot.rb +0 -14
data/bin/id_class_anal.rb +0 -112
data/bin/id_precision.rb +0 -172
data/bin/ms_to_lmat.rb +0 -67
data/bin/pepproph_filter.rb +0 -16
data/bin/prob_validate.rb +0 -6
data/bin/protein_summary.rb +0 -6
data/bin/protxml2prots_peps.rb +0 -32
data/bin/raw_to_mzXML.rb +0 -55
data/bin/run_percolator.rb +0 -122
data/bin/sqt_group.rb +0 -26
data/bin/srf_group.rb +0 -27
data/bin/srf_to_sqt.rb +0 -40
data/lib/align/chams.rb +0 -78
data/lib/align.rb +0 -154
data/lib/archive/targz.rb +0 -94
data/lib/bsearch.rb +0 -120
data/lib/core_extensions.rb +0 -16
data/lib/fasta.rb +0 -626
data/lib/gi.rb +0 -124
data/lib/group_by.rb +0 -10
data/lib/index_by.rb +0 -11
data/lib/merge_deep.rb +0 -21
data/lib/ms/converter/mzxml.rb +0 -77
data/lib/ms/gradient_program.rb +0 -170
data/lib/ms/msrun.rb +0 -244
data/lib/ms/msrun_index.rb +0 -108
data/lib/ms/parser/mzdata/axml.rb +0 -67
data/lib/ms/parser/mzdata/dom.rb +0 -175
data/lib/ms/parser/mzdata/libxml.rb +0 -7
data/lib/ms/parser/mzdata.rb +0 -31
data/lib/ms/parser/mzxml/axml.rb +0 -70
data/lib/ms/parser/mzxml/dom.rb +0 -182
data/lib/ms/parser/mzxml/hpricot.rb +0 -253
data/lib/ms/parser/mzxml/libxml.rb +0 -19
data/lib/ms/parser/mzxml/regexp.rb +0 -122
data/lib/ms/parser/mzxml/rexml.rb +0 -72
data/lib/ms/parser/mzxml/xmlparser.rb +0 -248
data/lib/ms/parser/mzxml.rb +0 -282
data/lib/ms/parser.rb +0 -108
data/lib/ms/precursor.rb +0 -25
data/lib/ms/scan.rb +0 -81
data/lib/mspire.rb +0 -4
data/lib/pi_zero.rb +0 -244
data/lib/qvalue.rb +0 -161
data/lib/roc.rb +0 -187
data/lib/sample_enzyme.rb +0 -160
data/lib/scan_i.rb +0 -21
data/lib/spec_id/aa_freqs.rb +0 -170
data/lib/spec_id/bioworks.rb +0 -497
data/lib/spec_id/digestor.rb +0 -138
data/lib/spec_id/mass.rb +0 -179
data/lib/spec_id/parser/proph.rb +0 -335
data/lib/spec_id/precision/filter/cmdline.rb +0 -218
data/lib/spec_id/precision/filter/interactive.rb +0 -134
data/lib/spec_id/precision/filter/output.rb +0 -148
data/lib/spec_id/precision/filter.rb +0 -637
data/lib/spec_id/precision/output.rb +0 -60
data/lib/spec_id/precision/prob/cmdline.rb +0 -160
data/lib/spec_id/precision/prob/output.rb +0 -94
data/lib/spec_id/precision/prob.rb +0 -249
data/lib/spec_id/proph/pep_summary.rb +0 -104
data/lib/spec_id/proph/prot_summary.rb +0 -484
data/lib/spec_id/proph.rb +0 -4
data/lib/spec_id/protein_summary.rb +0 -489
data/lib/spec_id/sequest/params.rb +0 -316
data/lib/spec_id/sequest/pepxml.rb +0 -1458
data/lib/spec_id/sequest.rb +0 -33
data/lib/spec_id/sqt.rb +0 -349
data/lib/spec_id/srf.rb +0 -973
data/lib/spec_id.rb +0 -778
data/lib/spec_id_xml.rb +0 -99
data/lib/transmem/phobius.rb +0 -147
data/lib/transmem/toppred.rb +0 -368
data/lib/transmem.rb +0 -157
data/lib/validator/aa.rb +0 -48
data/lib/validator/aa_est.rb +0 -112
data/lib/validator/background.rb +0 -77
data/lib/validator/bias.rb +0 -95
data/lib/validator/cmdline.rb +0 -431
data/lib/validator/decoy.rb +0 -107
data/lib/validator/digestion_based.rb +0 -70
data/lib/validator/probability.rb +0 -51
data/lib/validator/prot_from_pep.rb +0 -234
data/lib/validator/q_value.rb +0 -32
data/lib/validator/transmem.rb +0 -272
data/lib/validator/true_pos.rb +0 -46
data/lib/validator.rb +0 -197
data/lib/xml.rb +0 -38
data/lib/xml_style_parser.rb +0 -119
data/lib/xmlparser_wrapper.rb +0 -19
data/release_notes.txt +0 -2
data/script/compile_and_plot_smriti_final.rb +0 -97
data/script/create_little_pepxml.rb +0 -61
data/script/degenerate_peptides.rb +0 -47
data/script/estimate_fpr_by_cysteine.rb +0 -226
data/script/extract_gradient_programs.rb +0 -56
data/script/find_cysteine_background.rb +0 -137
data/script/genuine_tps_and_probs.rb +0 -136
data/script/get_apex_values_rexml.rb +0 -44
data/script/histogram_probs.rb +0 -61
data/script/mascot_fix_pepxml.rb +0 -123
data/script/msvis.rb +0 -42
data/script/mzXML2timeIndex.rb +0 -25
data/script/peps_per_bin.rb +0 -67
data/script/prep_dir.rb +0 -121
data/script/simple_protein_digestion.rb +0 -27
data/script/smriti_final_analysis.rb +0 -103
data/script/sqt_to_meta.rb +0 -24
data/script/top_hit_per_scan.rb +0 -67
data/script/toppred_to_yaml.rb +0 -47
data/script/tpp_installer.rb +0 -249
data/specs/align_spec.rb +0 -79
data/specs/bin/bioworks_to_pepxml_spec.rb +0 -79
data/specs/bin/fasta_shaker_spec.rb +0 -259
data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +0 -199
data/specs/bin/filter_and_validate_spec.rb +0 -180
data/specs/bin/ms_to_lmat_spec.rb +0 -34
data/specs/bin/prob_validate_spec.rb +0 -86
data/specs/bin/protein_summary_spec.rb +0 -14
data/specs/fasta_spec.rb +0 -354
data/specs/gi_spec.rb +0 -22
data/specs/load_bin_path.rb +0 -7
data/specs/merge_deep_spec.rb +0 -13
data/specs/ms/gradient_program_spec.rb +0 -77
data/specs/ms/msrun_spec.rb +0 -498
data/specs/ms/parser_spec.rb +0 -92
data/specs/ms/spectrum_spec.rb +0 -87
data/specs/pi_zero_spec.rb +0 -115
data/specs/qvalue_spec.rb +0 -39
data/specs/roc_spec.rb +0 -251
data/specs/rspec_autotest.rb +0 -149
data/specs/sample_enzyme_spec.rb +0 -126
data/specs/spec_helper.rb +0 -135
data/specs/spec_id/aa_freqs_spec.rb +0 -52
data/specs/spec_id/bioworks_spec.rb +0 -148
data/specs/spec_id/digestor_spec.rb +0 -75
data/specs/spec_id/precision/filter/cmdline_spec.rb +0 -20
data/specs/spec_id/precision/filter/output_spec.rb +0 -31
data/specs/spec_id/precision/filter_spec.rb +0 -246
data/specs/spec_id/precision/prob_spec.rb +0 -44
data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
data/specs/spec_id/proph/pep_summary_spec.rb +0 -98
data/specs/spec_id/proph/prot_summary_spec.rb +0 -128
data/specs/spec_id/protein_summary_spec.rb +0 -189
data/specs/spec_id/sequest/params_spec.rb +0 -68
data/specs/spec_id/sequest/pepxml_spec.rb +0 -374
data/specs/spec_id/sequest_spec.rb +0 -38
data/specs/spec_id/sqt_spec.rb +0 -246
data/specs/spec_id/srf_spec.rb +0 -172
data/specs/spec_id/srf_spec_helper.rb +0 -139
data/specs/spec_id_helper.rb +0 -33
data/specs/spec_id_spec.rb +0 -366
data/specs/spec_id_xml_spec.rb +0 -33
data/specs/transmem/phobius_spec.rb +0 -425
data/specs/transmem/toppred_spec.rb +0 -298
data/specs/transmem_spec.rb +0 -60
data/specs/transmem_spec_shared.rb +0 -64
data/specs/validator/aa_est_spec.rb +0 -66
data/specs/validator/aa_spec.rb +0 -40
data/specs/validator/background_spec.rb +0 -67
data/specs/validator/bias_spec.rb +0 -122
data/specs/validator/decoy_spec.rb +0 -51
data/specs/validator/fasta_helper.rb +0 -26
data/specs/validator/prot_from_pep_spec.rb +0 -141
data/specs/validator/transmem_spec.rb +0 -146
data/specs/validator/true_pos_spec.rb +0 -58
data/specs/validator_helper.rb +0 -33
data/specs/xml_spec.rb +0 -12
data/test_files/000_pepxml18_small.xml +0 -206
data/test_files/020a.mzXML.timeIndex +0 -4710
data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +0 -3973
data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +0 -3872
data/test_files/4-03-03_small-prot.xml +0 -321
data/test_files/4-03-03_small.xml +0 -3876
data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
data/test_files/bioworks-3.3_10prots.xml +0 -5999
data/test_files/bioworks31.params +0 -77
data/test_files/bioworks32.params +0 -62
data/test_files/bioworks33.params +0 -63
data/test_files/bioworks_single_run_small.xml +0 -7237
data/test_files/bioworks_small.fasta +0 -212
data/test_files/bioworks_small.params +0 -63
data/test_files/bioworks_small.phobius +0 -109
data/test_files/bioworks_small.toppred.out +0 -2847
data/test_files/bioworks_small.xml +0 -5610
data/test_files/bioworks_with_INV_small.xml +0 -3753
data/test_files/bioworks_with_SHUFF_small.xml +0 -2503
data/test_files/corrupted_900.srf +0 -0
data/test_files/head_of_7MIX.srf +0 -0
data/test_files/interact-opd1_mods_small-prot.xml +0 -304
data/test_files/messups.fasta +0 -297
data/test_files/opd1/000.my_answer.100lines.xml +0 -101
data/test_files/opd1/000.tpp_1.2.3.first10.xml +0 -115
data/test_files/opd1/000.tpp_2.9.2.first10.xml +0 -126
data/test_files/opd1/000.v2.1.mzXML.timeIndex +0 -3748
data/test_files/opd1/000_020-prot.png +0 -0
data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +0 -62
data/test_files/opd1/000_020_3prots-prot.xml +0 -62
data/test_files/opd1/opd1_cat_inv_small-prot.xml +0 -139
data/test_files/opd1/sequest.3.1.params +0 -77
data/test_files/opd1/sequest.3.2.params +0 -62
data/test_files/opd1/twenty_scans.mzXML +0 -418
data/test_files/opd1/twenty_scans.v2.1.mzXML +0 -382
data/test_files/opd1/twenty_scans_answ.lmat +0 -0
data/test_files/opd1/twenty_scans_answ.lmata +0 -9
data/test_files/opd1_020_beginning.RAW +0 -0
data/test_files/opd1_2runs_2mods/data/020.mzData.xml +0 -683
data/test_files/opd1_2runs_2mods/data/020.readw.mzXML +0 -382
data/test_files/opd1_2runs_2mods/data/040.mzData.xml +0 -683
data/test_files/opd1_2runs_2mods/data/040.readw.mzXML +0 -382
data/test_files/opd1_2runs_2mods/data/README.txt +0 -6
data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +0 -753
data/test_files/orbitrap_mzData/000_cut.xml +0 -1920
data/test_files/pepproph_small.xml +0 -4691
data/test_files/phobius.small.noheader.txt +0 -50
data/test_files/phobius.small.small.txt +0 -53
data/test_files/s01_anC1_ld020mM.key.txt +0 -25
data/test_files/s01_anC1_ld020mM.meth +0 -0
data/test_files/small.fasta +0 -297
data/test_files/small.sqt +0 -87
data/test_files/smallraw.RAW +0 -0
data/test_files/tf_bioworks2excel.bioXML +0 -14340
data/test_files/tf_bioworks2excel.txt.actual +0 -1035
data/test_files/toppred.small.out +0 -416
data/test_files/toppred.xml.out +0 -318
data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +0 -7
data/test_files/validator_hits_separate/bioworks_small_HS.xml +0 -5651
data/test_files/yeast_gly_small-prot.xml +0 -265
data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +0 -6
data/test_files/yeast_gly_small.xml +0 -3807
data/test_files/yeast_gly_small2.parentTimes +0 -6

data/lib/transmem.rb DELETED Viewed

@@ -1,157 +0,0 @@
-# A transmemIndex is a hash that takes a fasta reference as key and returns
-# a structured hash containing the transmembrane information.
-module TransmemIndex
-  # returns :toppred or :phobius
-  def self.filetype(file)
-    tp = nil
-    File.open(file) do |fh|
-      while (line = fh.gets)
-        case line
-        when /SEQENCE/
-          tp = :phobius
-          break
-        when /    0  0 i/
-          tp = :phobius  # if they don't have the headers,
-                         # this will pick it up if they have a
-                         # single prot without tm or signal peptide.
-          break
-        when /Algorithm specific parameters/
-          tp = :toppred  # New text
-          break
-        when /<parameters>/
-          tp = :toppred  # XML
-          break
-        end
-      end
-    end
-    tp
-  end
-  def reference_to_key(reference)
-    # needs to be subclassed or written
-  end
-  # right now accepts toppred.out files
-  # Phobius objects can use the fasta object to update their hash for methods
-  # like avg_overlap
-  def self.new(file, fasta=nil)
-    case x = filetype(file)
-    when :toppred
-      require 'transmem/toppred'
-      TopPred::Index.new(file)
-    when :phobius
-      require 'transmem/phobius'
-      # warn "WARNING: You have NO fasta object with Phobius based TransmemIndex! (which needs one to do proper indexing!)" unless fasta
-      Phobius::Index.new(file, fasta)
-    else
-      raise ArgumentError, "#{x} filetype for #{file} not recognized!"
-    end
-  end
-  # returns a hash of key -> num certain transmembrane segments
-  def num_certain_index
-    hash = {}
-    self.each do |k,v|
-      hash[k] = v[:num_certain_transmembrane_segments] || 0
-    end
-    hash
-  end
-  # tp = :number or :fraction which is the fraction of the sequence size
-  # returns the average number of overlapping amino acids with transmembrane
-  # segments
-  # returns nil if there is no protein by that key
-  def avg_overlap(key, sequence, tp=:number)
-    if self.key? key
-      numbers = num_transmem_aa(self[key], sequence)
-      if numbers.size > 0
-        sum = 0
-        numbers.each {|num| sum += num}
-        avg_num = sum.to_f / numbers.size
-        # the one line way to do it
-        #avg_num = numbers.inject(0) {|memo,num| num + memo }.to_f / numbers.size
-        if tp == :fraction
-          avg_num / sequence.size
-          # this is the same as doing this:
-          #numbers.inject(0.0) {|memo,num| (num.to_f/seq_size + memo) } / numbers.size
-        else
-          avg_num
-        end
-      else
-        0.0
-      end
-    else  # what to do if the protein isn't there?? which happens on occasion
-      nil
-    end
-  end
-  # returns an array (usually length of 1) of the number of amino acids
-  # contained inside transmembrane spanning segments.
-  # assumes that tmhash has the key 'transmembrane_segments'
-  # if there are no transmembrane segments, returns empty array.
-  def num_transmem_aa(tmhash, sequence)
-    if tmhash.key? :transmembrane_segments
-      ranges = tmhash[:transmembrane_segments].map do |tmseg|
-        Range.new( tmseg[:start]-1, tmseg[:stop]-1 )
-      end
-      num_overlapping_chars(tmhash[:aaseq], ranges, sequence)
-    else
-      []
-    end
-  end
-  # returns an array of the number of overlapping sequences in substring with
-  # the substrings defined in start_stop_doublets within full_sequence
-  # start_stop_doublets should be 0 indexed!!!
-  # the span includes the 'stop' position i.e., full_sequence[start..stop]
-  def num_overlapping_chars(full_sequence, ranges, substring)
-    #start_positions = aaseq.enum_for(:scan, substring).map { $~.offset(0)[0]}
-    if ranges.size == 0
-      []
-      #full_sequence.enum_for(:scan, substring).map { 0 }
-    else
-      substring_ranges = []
-      pos = 0
-      slen = substring.size
-      while i=full_sequence.index(substring,pos)
-        substring_ranges << Range.new(i, i+slen-1)
-        pos = i + slen
-      end
-      # brute force way
-      last_tm_range = ranges.last.last
-      to_return = substring_ranges.map do |sb|
-        overlap = 0
-        # there's got to be a much simpler way to do this, but this does work...
-        ranges.each do |tm|
-          (frst, lst) =
-            if tm.include?( sb.first )
-              [tm, sb]
-            elsif tm.include?( sb.last )
-              [sb, tm]
-            else
-              nil
-            end
-          if frst
-            if lst.last <= frst.last
-              overlap += (frst.last+1 - frst.first) - (lst.first - frst.first) - (frst.last - lst.last)
-            else
-              overlap += (frst.last+1 - frst.first) - (lst.first - frst.first)
-            end
-          end
-        end
-        overlap
-      end
-    end
-  end
-end
-#substring_ranges = full_sequence.enum_for(:scan, substring).map do
-#        (ofirst, olast) = $~.offset(0)
-#        Range.new(ofirst, olast - 1)
-#      end

data/lib/validator/aa.rb DELETED Viewed

@@ -1,48 +0,0 @@
-require 'validator/digestion_based'
-require 'fasta'
-require 'spec_id/aa_freqs'
-# Constraints on aaseq attribute of peptides (the bare amino acid sequence)
-# works by calculating amino acid frequencies in the fasta file used.
-class Validator::AA < Validator::DigestionBased
-  include Precision::Calculator
-  attr_accessor :constraint
-  # it is a false hit if the amino acid is located in the peptide
-  attr_accessor :false_if_found
-  DEFAULTS = Validator::DigestionBased::DEFAULTS.merge( {
-    :false_if_found => true,
-  } )
-  # returns tp, fp
-  def partition(peps)
-    (found, not_found) = peps.partition do |pep|
-      pep.aaseq.include?(@constraint)
-    end
-    if @false_if_found
-      [not_found, found]
-    else
-      [found, not_found]
-    end
-  end
-  # right now only accepts single amino acids as constraints (as a string,
-  # e.g. 'C', or symbol, e.g. :C)
-  # options:
-  #  :false_to_total_ratio => if a true digestion was already performed (see
-  #                           Validator::AA.calc_false_to_total_ratio)
-  #  :false_if_found => it is a false positive if the amino acid is found.
-  #  :background => the background level of amino acid Float
-  def initialize(constraint, options={})
-    @constraint = constraint.to_s
-    opts = DEFAULTS.merge(options)
-    (@false_to_total_ratio, @false_if_found, @background) = opts.values_at(:false_to_total_ratio, :false_if_found, :background)
-  end
-  def to_param_string
-    "aminoacid(bad_aa)=" + ["{constraint=#{@constraint}", "false_to_total_ratio=#{@false_to_total_ratio}", "bkg=#{(@background ? @background : 0.0) }}"].join(", ")
-  end
-end

data/lib/validator/aa_est.rb DELETED Viewed

@@ -1,112 +0,0 @@
-require 'validator/aa'
-class Validator ; end
-class Validator::AA ; end
-# A class that uses the peps given to it and a background frequency to
-# calculate the false_to_total_ratio at each turn.
-class Validator::AAEst < Validator::AA
-  attr_accessor :constraint
-  attr_accessor :false_if_found
-  # the frequency of the amino acid is used to estimate the false to
-  # total ratio based on the pephits given for pephit_precision.
-  # see Validator::AA.calc_frequency to calculate a frequency
-  # or use set_frequency to set from pep hits.
-  attr_accessor :frequency
-  DEFAULTS = {
-    :false_if_found => true
-  }.merge(Validator::DigestionBased::DEFAULTS)  # background 0.0
-  # only takes a string right now for constraint
-  def initialize(constraint, options={})
-    @constraint = constraint.to_s
-    opts = DEFAULTS.merge(options)
-    (@frequency, @false_if_found, @background) = opts.values_at(:frequency, :false_if_found, :background)
-  end
-  def pephit_precision(peps)
-    set_false_to_total_ratio(peps)
-    super(peps)
-  end
-  def set_false_to_total_ratio(peps)
-    if peps.size > 0
-      expected = 0.0
-      peps.each do |pep|
-        expected += (1.0 - ((1.0 - @frequency)**pep.aaseq.size))
-      end
-      @false_to_total_ratio = expected / peps.size
-    else
-      @false_to_total_ratio = 1.0
-    end
-  end
-  def set_ongoing_false_to_total_ratio(peps)
-    if peps.size > 0
-      peps.each do |pep|
-        @expected += (1.0 - ((1.0-@frequency)**pep.aaseq.size))
-      end
-      # @increment_total_submitted should == @increment_tps and @increment_fps
-      # since these are either/or
-      @false_to_total_ratio = @expected / @increment_total_submitted
-    else
-      @false_to_total_ratio = 1.0
-    end
-  end
-  def to_param_string
-    "aminoacid(bad_aa)=" + ["{constraint=#{@constraint}", "frequency=#{@frequency}", "bkg=#{(@background ? @background : 0.0) }}"].join(", ")
-  end
-  # takes objects responding to aaseq and sets the frequency based on
-  # constraint.  constraint is one acceptable to initialize!  returns self
-  def set_frequency(objs)
-    table = SpecID::AAFreqs.new.calculate_frequencies(objs)
-    @frequency = table[@constraint.to_sym]
-    self
-  end
-   # if adding pephits in groups at a time, the entire group does not need to be
-  # queried, just the individual hit.  Use this OR pephits_precision (NOT
-  # both).  The initial query to this method will begin a running tally that
-  # is saved by the validator.
-  # takes either an array or a single pephit (determined by if it is a
-  # SpecID::Pep)
-  def increment_pephits_precision(peps)
-    tmp = $VERBOSE; $VERBOSE = nil
-    unless @increment_initialized
-      initialize_increment
-      @expected = 0.0
-    end
-    $VERBOSE = tmp
-    to_submit =
-      if peps.is_a? SpecID::Pep
-        [peps]
-      else
-        peps
-      end
-    @increment_total_submitted += to_submit.size
-    (tps, fps) = partition(to_submit)
-    #### THIS IS THE MAGIC FOR THIS VALIDATOR:
-    set_ongoing_false_to_total_ratio(to_submit)
-    @increment_tps += tps.size
-    @increment_fps += fps.size
-    (num_tps, num_fps) =
-      if self.respond_to?(:calc_precision_prep)  # for digestion based validators
-        (num_tps, num_fps) = calc_precision_prep(@increment_tps, @increment_fps)
-        [num_tps, num_fps]
-      else
-        [@increment_tps, @increment_fps]
-      end
-    calc_precision(num_tps, num_fps)
-  end
-end

data/lib/validator/background.rb DELETED Viewed

@@ -1,77 +0,0 @@
-require 'validator'
-require 'vec'
-require 'enumerator'
-class Validator ; end
-class Validator::Background
-  attr_accessor :data
-  def initialize(data=nil)
-    @data = data
-  end
-  def delete_nan!(vec)
-    vec.each_with_index do |v,i|
-      if v.nan?
-        vec[i] = 0
-      end
-    end
-  end
-  def stdev_plus_spread(stdev_factor=2.0, stdev_points=15, min_window_pre=5, min_window_post=5)
-    data_vec = VecD[*@data]
-    delete_nan!(data_vec)
-    stdev_transform = data_vec.transform(9) {|vec| (stdev_factor * vec.sample_stats[1]) + vec.spread  }
-    smoothed_stdev = stdev_transform.transform(9) {|vec| vec.avg }
-    smoothed_stdev_derivs = smoothed_stdev.chim
-    last_0_index = index_of_last_0(smoothed_stdev_derivs)
-    min_in_window(data_vec, last_0_index, min_window_pre, min_window_post)
-  end
-  def plot(vec)
-    `graph #{vec.join(" ")} -a -T X`
-  end
-  # not really working right currently
-  def derivs(avg_points=15, min_window_pre=5, min_window_post=5)
-    data_vec = VecD[*@data]
-    delete_nan!(data_vec)
-    drvs = data_vec.chim
-    # absolute value
-    drvs.each_with_index {|x,i| drvs[i] = x.abs }
-    mv_avg = drvs.transform(avg_points) {|v| v.avg }
-    last_0_index = index_of_last_0(mv_avg.chim)
-    min_in_window(data_vec, last_0_index, min_window_pre, min_window_post)
-  end
-  def index_of_last_0(vec)
-    last_0_index = nil
-    vec.each_with_index do |v,i|
-      if v == 0
-        last_0_index = i
-      end
-    end
-    last_0_index
-  end
-  # returns the minimum value in the window centered on index
-  def min_in_window(vec, index, pre, post)
-    last_index = vec.size - 1
-    start = index - pre
-    stop = index + post
-    start = 0 if start < 0
-    stop = last_index if stop > last_index
-    vec[start..stop].min
-  end
-  # very simple, should work
-  def min_mesa(start, stop, points=3)
-    data_vec = VecD[*@data]
-    delete_nan!(data_vec)
-    smoothed = data_vec.transform(3) {|v| v.avg }
-    smoothed[start..stop].min
-  end
-end

data/lib/validator/bias.rb DELETED Viewed

@@ -1,95 +0,0 @@
-require 'validator'
-require 'validator/digestion_based'
-# class for any generic kind of bias.  For instance, a list of high abundance
-# proteins we would expect to see, or a list of low abundance proteins we
-# would not expect to see, or proteins that have been filtered out in some
-# way, etc.
-class Validator::Bias < Validator::DigestionBased
-  include Precision::Calculator
-  # a fasta object (by default containing proteins expected to be in the
-  # sample [see proteins_expected to modify that behavior])
-  attr_reader :fasta
-  # correct_wins means that only a single protein from a pep.aaseq must match
-  # the fasta object for the pep hit to be considered valid.  Otherwise, all
-  # must be a match (logic negated by proteins_expected)
-  attr_accessor :correct_wins
-  # proteins_expected==true means we expect to see the proteins in the sample
-  # proteins_expected==false means we do not expect to see these proteins in
-  # the sample
-  attr_accessor :proteins_expected
-  # a hash made by taking each fasta reference in fasta_object, (everything
-  # until a space) and setting the value to true.  It can be queried with the
-  # start of an fasta sequence
-  attr_accessor :short_reference_hash
-  DEFAULTS = Validator::DigestionBased::DEFAULTS.merge( {
-    :proteins_expected => true,
-    :correct_wins => true,
-  } )
-  # options:
-  #   (t = true, f = false, '*'= default)
-  #   :proteins_expected => *t/f  we expect to see the fasta proteins in our hit list
-  #   :correct_wins => *t/f  a single peptide hit from one of these proteins
-  #                    constitutes a true positive
-  #   :background => Float  (*0.0-1.0)
-  #   :false_to_total_ratio => Float (*nil by default)
-  def initialize(fasta_object, options={})
-    opts = DEFAULTS.merge(options)
-    (@proteins_expected, @correct_wins, @background, @false_to_total_ratio) = opts.values_at(:proteins_expected, :correct_wins, :background, :false_to_total_ratio)
-    @fasta = fasta_object
-    @header_split_hash = @fasta.prots.map {|prot| prot.reference }
-    @short_reference_hash = self.class.make_short_reference_hash(fasta_object)
-  end
-  def self.make_short_reference_hash(fasta_object)
-    hash = {}
-    fasta_object.each do |prot|
-      hash[prot.first_entry] = true
-    end
-    hash
-  end
-  def partition(peps)
-    klass = self.class
-    cw =
-      if !@proteins_expected
-        !@correct_wins
-      else
-        @correct_wins
-      end
-    (tp, fp) =
-      if cw
-        peps.partition do |pep|
-          pep.prots.any? do |pepprot|
-            @short_reference_hash.key?( pepprot.first_entry )
-          end
-        end
-      else
-        peps.partition do |pep|
-          pep.prots.any? do |pepprot|
-            !@short_reference_hash.key?( pepprot.first_entry )
-          end
-        end
-      end
-    if !@correct_wins
-      tp, fp = fp, tp
-    end
-    [tp, fp]
-  end
-  # pephit_precision is done through inheritance
-  def to_param_string
-    "abundance=" +  ["{fasta=#{@fasta.filename}", "proteins_expected=#{@proteins_expected}", "correct_wins=#{@correct_wins}", "background=#{@background}}"].join(", ")
-  end
-end