RubyGems - mspire - Versions diffs - 0.4.9 → 0.5.0 - Mend

mspire 0.4.9 → 0.5.0

Files changed (255) hide show

data/README +27 -17
data/changelog.txt +31 -62
data/lib/ms/calc.rb +32 -0
data/lib/ms/data/interleaved.rb +60 -0
data/lib/ms/data/lazy_io.rb +73 -0
data/lib/ms/data/lazy_string.rb +15 -0
data/lib/ms/data/simple.rb +59 -0
data/lib/ms/data/transposed.rb +41 -0
data/lib/ms/data.rb +57 -0
data/lib/ms/format/format_error.rb +12 -0
data/lib/ms/spectrum.rb +25 -384
data/lib/ms/support/binary_search.rb +126 -0
data/lib/ms.rb +10 -10
metadata +38 -350
data/INSTALL +0 -58
data/README.rdoc +0 -18
data/Rakefile +0 -330
data/bin/aafreqs.rb +0 -23
data/bin/bioworks2excel.rb +0 -14
data/bin/bioworks_to_pepxml.rb +0 -148
data/bin/bioworks_to_pepxml_gui.rb +0 -225
data/bin/fasta_shaker.rb +0 -5
data/bin/filter_and_validate.rb +0 -5
data/bin/gi2annot.rb +0 -14
data/bin/id_class_anal.rb +0 -112
data/bin/id_precision.rb +0 -172
data/bin/ms_to_lmat.rb +0 -67
data/bin/pepproph_filter.rb +0 -16
data/bin/prob_validate.rb +0 -6
data/bin/protein_summary.rb +0 -6
data/bin/protxml2prots_peps.rb +0 -32
data/bin/raw_to_mzXML.rb +0 -55
data/bin/run_percolator.rb +0 -122
data/bin/sqt_group.rb +0 -26
data/bin/srf_group.rb +0 -27
data/bin/srf_to_sqt.rb +0 -40
data/lib/align/chams.rb +0 -78
data/lib/align.rb +0 -154
data/lib/archive/targz.rb +0 -94
data/lib/bsearch.rb +0 -120
data/lib/core_extensions.rb +0 -16
data/lib/fasta.rb +0 -626
data/lib/gi.rb +0 -124
data/lib/group_by.rb +0 -10
data/lib/index_by.rb +0 -11
data/lib/merge_deep.rb +0 -21
data/lib/ms/converter/mzxml.rb +0 -77
data/lib/ms/gradient_program.rb +0 -170
data/lib/ms/msrun.rb +0 -244
data/lib/ms/msrun_index.rb +0 -108
data/lib/ms/parser/mzdata/axml.rb +0 -67
data/lib/ms/parser/mzdata/dom.rb +0 -175
data/lib/ms/parser/mzdata/libxml.rb +0 -7
data/lib/ms/parser/mzdata.rb +0 -31
data/lib/ms/parser/mzxml/axml.rb +0 -70
data/lib/ms/parser/mzxml/dom.rb +0 -182
data/lib/ms/parser/mzxml/hpricot.rb +0 -253
data/lib/ms/parser/mzxml/libxml.rb +0 -19
data/lib/ms/parser/mzxml/regexp.rb +0 -122
data/lib/ms/parser/mzxml/rexml.rb +0 -72
data/lib/ms/parser/mzxml/xmlparser.rb +0 -248
data/lib/ms/parser/mzxml.rb +0 -282
data/lib/ms/parser.rb +0 -108
data/lib/ms/precursor.rb +0 -25
data/lib/ms/scan.rb +0 -81
data/lib/mspire.rb +0 -4
data/lib/pi_zero.rb +0 -244
data/lib/qvalue.rb +0 -161
data/lib/roc.rb +0 -187
data/lib/sample_enzyme.rb +0 -160
data/lib/scan_i.rb +0 -21
data/lib/spec_id/aa_freqs.rb +0 -170
data/lib/spec_id/bioworks.rb +0 -497
data/lib/spec_id/digestor.rb +0 -138
data/lib/spec_id/mass.rb +0 -179
data/lib/spec_id/parser/proph.rb +0 -335
data/lib/spec_id/precision/filter/cmdline.rb +0 -218
data/lib/spec_id/precision/filter/interactive.rb +0 -134
data/lib/spec_id/precision/filter/output.rb +0 -148
data/lib/spec_id/precision/filter.rb +0 -637
data/lib/spec_id/precision/output.rb +0 -60
data/lib/spec_id/precision/prob/cmdline.rb +0 -160
data/lib/spec_id/precision/prob/output.rb +0 -94
data/lib/spec_id/precision/prob.rb +0 -249
data/lib/spec_id/proph/pep_summary.rb +0 -104
data/lib/spec_id/proph/prot_summary.rb +0 -484
data/lib/spec_id/proph.rb +0 -4
data/lib/spec_id/protein_summary.rb +0 -489
data/lib/spec_id/sequest/params.rb +0 -316
data/lib/spec_id/sequest/pepxml.rb +0 -1458
data/lib/spec_id/sequest.rb +0 -33
data/lib/spec_id/sqt.rb +0 -349
data/lib/spec_id/srf.rb +0 -973
data/lib/spec_id.rb +0 -778
data/lib/spec_id_xml.rb +0 -99
data/lib/transmem/phobius.rb +0 -147
data/lib/transmem/toppred.rb +0 -368
data/lib/transmem.rb +0 -157
data/lib/validator/aa.rb +0 -48
data/lib/validator/aa_est.rb +0 -112
data/lib/validator/background.rb +0 -77
data/lib/validator/bias.rb +0 -95
data/lib/validator/cmdline.rb +0 -431
data/lib/validator/decoy.rb +0 -107
data/lib/validator/digestion_based.rb +0 -70
data/lib/validator/probability.rb +0 -51
data/lib/validator/prot_from_pep.rb +0 -234
data/lib/validator/q_value.rb +0 -32
data/lib/validator/transmem.rb +0 -272
data/lib/validator/true_pos.rb +0 -46
data/lib/validator.rb +0 -197
data/lib/xml.rb +0 -38
data/lib/xml_style_parser.rb +0 -119
data/lib/xmlparser_wrapper.rb +0 -19
data/release_notes.txt +0 -2
data/script/compile_and_plot_smriti_final.rb +0 -97
data/script/create_little_pepxml.rb +0 -61
data/script/degenerate_peptides.rb +0 -47
data/script/estimate_fpr_by_cysteine.rb +0 -226
data/script/extract_gradient_programs.rb +0 -56
data/script/find_cysteine_background.rb +0 -137
data/script/genuine_tps_and_probs.rb +0 -136
data/script/get_apex_values_rexml.rb +0 -44
data/script/histogram_probs.rb +0 -61
data/script/mascot_fix_pepxml.rb +0 -123
data/script/msvis.rb +0 -42
data/script/mzXML2timeIndex.rb +0 -25
data/script/peps_per_bin.rb +0 -67
data/script/prep_dir.rb +0 -121
data/script/simple_protein_digestion.rb +0 -27
data/script/smriti_final_analysis.rb +0 -103
data/script/sqt_to_meta.rb +0 -24
data/script/top_hit_per_scan.rb +0 -67
data/script/toppred_to_yaml.rb +0 -47
data/script/tpp_installer.rb +0 -249
data/specs/align_spec.rb +0 -79
data/specs/bin/bioworks_to_pepxml_spec.rb +0 -79
data/specs/bin/fasta_shaker_spec.rb +0 -259
data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +0 -199
data/specs/bin/filter_and_validate_spec.rb +0 -180
data/specs/bin/ms_to_lmat_spec.rb +0 -34
data/specs/bin/prob_validate_spec.rb +0 -86
data/specs/bin/protein_summary_spec.rb +0 -14
data/specs/fasta_spec.rb +0 -354
data/specs/gi_spec.rb +0 -22
data/specs/load_bin_path.rb +0 -7
data/specs/merge_deep_spec.rb +0 -13
data/specs/ms/gradient_program_spec.rb +0 -77
data/specs/ms/msrun_spec.rb +0 -498
data/specs/ms/parser_spec.rb +0 -92
data/specs/ms/spectrum_spec.rb +0 -87
data/specs/pi_zero_spec.rb +0 -115
data/specs/qvalue_spec.rb +0 -39
data/specs/roc_spec.rb +0 -251
data/specs/rspec_autotest.rb +0 -149
data/specs/sample_enzyme_spec.rb +0 -126
data/specs/spec_helper.rb +0 -135
data/specs/spec_id/aa_freqs_spec.rb +0 -52
data/specs/spec_id/bioworks_spec.rb +0 -148
data/specs/spec_id/digestor_spec.rb +0 -75
data/specs/spec_id/precision/filter/cmdline_spec.rb +0 -20
data/specs/spec_id/precision/filter/output_spec.rb +0 -31
data/specs/spec_id/precision/filter_spec.rb +0 -246
data/specs/spec_id/precision/prob_spec.rb +0 -44
data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
data/specs/spec_id/proph/pep_summary_spec.rb +0 -98
data/specs/spec_id/proph/prot_summary_spec.rb +0 -128
data/specs/spec_id/protein_summary_spec.rb +0 -189
data/specs/spec_id/sequest/params_spec.rb +0 -68
data/specs/spec_id/sequest/pepxml_spec.rb +0 -374
data/specs/spec_id/sequest_spec.rb +0 -38
data/specs/spec_id/sqt_spec.rb +0 -246
data/specs/spec_id/srf_spec.rb +0 -172
data/specs/spec_id/srf_spec_helper.rb +0 -139
data/specs/spec_id_helper.rb +0 -33
data/specs/spec_id_spec.rb +0 -366
data/specs/spec_id_xml_spec.rb +0 -33
data/specs/transmem/phobius_spec.rb +0 -425
data/specs/transmem/toppred_spec.rb +0 -298
data/specs/transmem_spec.rb +0 -60
data/specs/transmem_spec_shared.rb +0 -64
data/specs/validator/aa_est_spec.rb +0 -66
data/specs/validator/aa_spec.rb +0 -40
data/specs/validator/background_spec.rb +0 -67
data/specs/validator/bias_spec.rb +0 -122
data/specs/validator/decoy_spec.rb +0 -51
data/specs/validator/fasta_helper.rb +0 -26
data/specs/validator/prot_from_pep_spec.rb +0 -141
data/specs/validator/transmem_spec.rb +0 -146
data/specs/validator/true_pos_spec.rb +0 -58
data/specs/validator_helper.rb +0 -33
data/specs/xml_spec.rb +0 -12
data/test_files/000_pepxml18_small.xml +0 -206
data/test_files/020a.mzXML.timeIndex +0 -4710
data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +0 -3973
data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +0 -3872
data/test_files/4-03-03_small-prot.xml +0 -321
data/test_files/4-03-03_small.xml +0 -3876
data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
data/test_files/bioworks-3.3_10prots.xml +0 -5999
data/test_files/bioworks31.params +0 -77
data/test_files/bioworks32.params +0 -62
data/test_files/bioworks33.params +0 -63
data/test_files/bioworks_single_run_small.xml +0 -7237
data/test_files/bioworks_small.fasta +0 -212
data/test_files/bioworks_small.params +0 -63
data/test_files/bioworks_small.phobius +0 -109
data/test_files/bioworks_small.toppred.out +0 -2847
data/test_files/bioworks_small.xml +0 -5610
data/test_files/bioworks_with_INV_small.xml +0 -3753
data/test_files/bioworks_with_SHUFF_small.xml +0 -2503
data/test_files/corrupted_900.srf +0 -0
data/test_files/head_of_7MIX.srf +0 -0
data/test_files/interact-opd1_mods_small-prot.xml +0 -304
data/test_files/messups.fasta +0 -297
data/test_files/opd1/000.my_answer.100lines.xml +0 -101
data/test_files/opd1/000.tpp_1.2.3.first10.xml +0 -115
data/test_files/opd1/000.tpp_2.9.2.first10.xml +0 -126
data/test_files/opd1/000.v2.1.mzXML.timeIndex +0 -3748
data/test_files/opd1/000_020-prot.png +0 -0
data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +0 -62
data/test_files/opd1/000_020_3prots-prot.xml +0 -62
data/test_files/opd1/opd1_cat_inv_small-prot.xml +0 -139
data/test_files/opd1/sequest.3.1.params +0 -77
data/test_files/opd1/sequest.3.2.params +0 -62
data/test_files/opd1/twenty_scans.mzXML +0 -418
data/test_files/opd1/twenty_scans.v2.1.mzXML +0 -382
data/test_files/opd1/twenty_scans_answ.lmat +0 -0
data/test_files/opd1/twenty_scans_answ.lmata +0 -9
data/test_files/opd1_020_beginning.RAW +0 -0
data/test_files/opd1_2runs_2mods/data/020.mzData.xml +0 -683
data/test_files/opd1_2runs_2mods/data/020.readw.mzXML +0 -382
data/test_files/opd1_2runs_2mods/data/040.mzData.xml +0 -683
data/test_files/opd1_2runs_2mods/data/040.readw.mzXML +0 -382
data/test_files/opd1_2runs_2mods/data/README.txt +0 -6
data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +0 -753
data/test_files/orbitrap_mzData/000_cut.xml +0 -1920
data/test_files/pepproph_small.xml +0 -4691
data/test_files/phobius.small.noheader.txt +0 -50
data/test_files/phobius.small.small.txt +0 -53
data/test_files/s01_anC1_ld020mM.key.txt +0 -25
data/test_files/s01_anC1_ld020mM.meth +0 -0
data/test_files/small.fasta +0 -297
data/test_files/small.sqt +0 -87
data/test_files/smallraw.RAW +0 -0
data/test_files/tf_bioworks2excel.bioXML +0 -14340
data/test_files/tf_bioworks2excel.txt.actual +0 -1035
data/test_files/toppred.small.out +0 -416
data/test_files/toppred.xml.out +0 -318
data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +0 -7
data/test_files/validator_hits_separate/bioworks_small_HS.xml +0 -5651
data/test_files/yeast_gly_small-prot.xml +0 -265
data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +0 -6
data/test_files/yeast_gly_small.xml +0 -3807
data/test_files/yeast_gly_small2.parentTimes +0 -6

data/lib/spec_id/precision/filter.rb DELETED Viewed

@@ -1,637 +0,0 @@
-require 'sort_by_attributes'
-require 'validator'
-require 'spec_id'
-require 'merge_deep'
-require 'spec_id/precision/filter/interactive'
-require 'spec_id/precision/filter/output'
-class Filter
-  # filters using previously passed in methods and options
-  def filter(group)
-    if @opts
-      send(@method, group, *@opts)
-    else
-      send(@method, group)
-    end
-  end
-  # replaces the contents of group with what passed
-  def filter!(group)
-    group.replace(filter(group))
-  end
-end
-# we have to require this after we setup our defaults hash
-# require 'filter/spec_id/cmdline'
-class SpecID::Precision::Filter
-  FV_DEFAULTS = {
-    :sequest =>
-    {
-      :xcorr1 => 1.0,
-      :xcorr2 => 1.5,
-      :xcorr3 => 2.0,
-      :deltacn => 0.1,
-      :ppm => 1000,
-      :include_deltacnstar => true,
-    },
-    # output
-    :proteins => false,
-    :output => [],
-    # general
-    :top_hit_by => :xcorr,
-    :postfilter => :top_per_scan,
-    :prefilter => false,
-    :hits_together => true,
-    # These are also defaulted in the commandline because they are necessary
-    # for the validators...  could this introduce conflicts somehow?
-    :decoy_on_match => true,
-    :ties => true,
-    # UNLISTED FOR NOW:
-    :include_ties_in_top_hit_prefilter => true,
-    :include_ties_in_top_hit_postfilter => false,
-  }
-  require 'spec_id/precision/filter/cmdline'
-  def filter_and_validate_cmdline(args)
-    (spec_id_obj, options, option_parser) = CmdlineParser.new.parse(args)
-    if spec_id_obj == nil
-      puts option_parser
-      return
-    end
-    final_answer = SpecID::Precision::Filter.new.filter_and_validate(spec_id_obj, options)
-  end
-    #  # output_array has doublets of [format, handle]
-    #  # answer is the answer one gets out of filter_and_validate
-    #  def output(answer, output_array)
-    #    output_array.each do |format, handle|
-    #      SpecID::Precision::Filter::Output.new(format, handle)
-    #    end
-    #  end
-  # Very high level method that takes simple parameters.
-  # spec_id may be a filename or a SpecID object (containing peps)
-  # Default values may be queried from SpecID::Precision::Filter::FV_DEFAULTS
-  # Returns a structured hash:
-  #  Fl = Float ; Ar = Array
-  # { :params => <Hash of filtering params>,
-  #   :pephits => <Ar of pephits>,
-  #   :pephits_precision => [<array of precision>]
-  #      # if :proteins => true
-  #   :prothits => <Array of prothits>,
-  #   :prothits_precision => [ Array of hashes where each hash =
-  #                             { :worst => Fl, :normal => Fl,
-  #                             :normal_stdev => Fl } ]
-  # }
-  #
-  # NOTE: Brackets [] indicate an Array! The Bar '|' indicates another option.
-  # The asterik '*' is the default option.
-  #
-  # :sequest => {
-  #   :xcorr1 -> >= (xcorr +1 charge state)
-  #   :xcorr2 -> >= (xcorr +2 charge state)
-  #   :xcorr3 -> >= (xcorr +3 charge state)
-  #   :deltacn -> >= (delta cn)
-  #   :ppm -> <= parts per million (Float)
-  #   :include_deltacnstar => *true | false  include deltacn (given at 1.1) of
-  #                                          top hit with no 2nd hit
-  #
-  # }
-  # OUTPUT:
-  #   :proteins => true | *false    gives proteins (and validation)
-  #   :output => [[format, FILENAME=nil],...]  formats to output filtering results.
-  #                                            can be used multiple times
-  #                                            FILENAME is the filename to use
-  #                                            if nil, then outputs to $stdout
-  #                                            valid formats are:
-  #                                            :text_table   (default)
-  #                                            :yaml         (need to implement)
-  #                                            :protein_summary (need to implement)
-  #                                            :html_table   (need to implement)
-  #                                            default value =>
-  #                                            [[:text_table,nil]]
-  #
-  # VALIDATION:
-  #   :validators => [Array]  objects that respond to pephit_precision
-  #                           usually of base class Validator
-  #                           NOTE: if you have decoy peptides, you MUST have
-  #                           a Validator::Decoy object to separate them out.
-  #                           NOTE: if transmem validator passed in, the
-  #                           proteins in spec_id must already be granted
-  #                           transmem status!
-  #
-  #
-  # OTHER:
-  #   :top_hit_by -> *:xcorr | :probability
-  #                   probabilities only in bioworks.xml files right now (if
-  #                   they were calculated).
-  #   :postfilter -> *:top_per_scan | :top_per_aaseq | :top_per_aaseq_charge
-  #                   :top_per_scan hashes by filename + scan
-  #                   :top_per_aaseq hashes by top_per_scan + aaseq
-  #                   :top_per_aaseq_charge hashes by top_per_aaseq + charge
-  #   :prefilter -> true | *false    Takes top hit per file+scan+charge
-  #   :interactive => interactive_object
-  #        # should behave like this:
-  #        # interactive_object.filter_args(currentopts) -> args_for_filtering | nil (done)
-  #
-  #        # interactive_object.passing(final_answer)
-  # The defaults for filter_and_validate
-  def filter_and_validate(spec_id_obj, options={})
-    # NOTE:
-    # This is a fairly complicated method.  The complication comes in doing
-    # top hit filters on separate/cat searches wanted them to be either
-    # together or separate.  I opt for fewer conversions between the two, but
-    # that means keeping track of more things...
-    opts = FV_DEFAULTS.merge_deep(options)
-    spec_id = spec_id_obj
-    peps = spec_id.peps
-    filename = spec_id.filename
-    #######################################
-    # DEFAULTS:
-    interactive_changing_keys = [:xcorr1, :xcorr2, :xcorr3, :deltacn, :ppm, :include_deltacnstar, :postfilter]
-    interactive_shortcut_map = {
-      :xcorr1 => 'x1',
-      :xcorr2 => 'x2',
-      :xcorr3 => 'x3',
-      :deltacn => 'dcn',
-      :ppm => 'ppm',
-      :include_deltacnstar => 'dcns',
-      :postfilter => 'pf',
-    }
-    to_float = proc {|x| x.to_f}
-    to_bool = proc do |x|
-      case x
-      when /^t/io
-        true
-      when /^f/io
-        false
-      when true
-        true
-      when false
-        false
-      else
-        nil
-      end
-    end
-    to_postfilter = proc do |x|
-      case x
-      when 's'
-        :top_per_scan
-      when 'a'
-        :top_per_aaseq
-      when 'ac'
-        :top_per_aaseq_charge
-      when Symbol
-        x
-      end
-    end
-    casting_map = {
-      :xcorr1 => to_float,
-      :xcorr2 => to_float,
-      :xcorr3 => to_float,
-      :deltacn => to_float,
-      :ppm => to_float,
-      :include_deltacnstar => to_bool,
-      :postfilter => to_postfilter,
-    }
-    # output:
-    # NOTE: BOOLEANS that are by default false do not need a default!!
-    # They will yield false on key lookup if no key or false!
-    # BOOLEANS that by default are true should be queried like this
-    # !(opts[:<option>] == false)
-    # open up each of the files for writing
-    if opts[:output]
-      outputs = opts[:output].map do |format, where|
-        if where == nil
-          where = $stdout
-        end
-        SpecID::Precision::Filter::Output.new(format, where)
-      end
-    end
-    postfilters_per_hash = {
-      :top_per_scan => [:base_name, :first_scan],
-      :top_per_aaseq => [:aaseq],  # first by top_per_scan, then this guy
-      :top_per_aaseq_charge => [:aaseq, :charge], # first by top_per_scan, then this one
-    }
-    top_hit_by__to_sort_by = {
-      :xcorr => [:xcorr, {:down=> [:xcorr]}],
-      :probability => [:probability, (spec_id.hi_prob_best ? {:down=> [:probability]} : {})],
-    }
-    sort_by_att_opts = top_hit_by__to_sort_by[opts[:top_hit_by]]
-    opts_for_top_hit_prefilter = {
-      :per => [:base_name, :first_scan, :charge],
-      :by => sort_by_att_opts,
-      :include_ties => opts[:include_ties_in_top_hit_prefilter]
-    }
-    # PRIVATE DEFAULTS:
-    merge_prefix = 'DECOY_'
-    unmerge_regexp = /^DECOY_/
-    #######################################
-    # opts_decoy = opts[:decoy]
-    # if we have a Validator::Decoy object, we will use its defaults to split
-    # peptides.
-    decoy_validator =
-      if opts[:validators]
-        decoy_vals = opts[:validators].select {|v| v.class == Validator::Decoy }
-        if decoy_vals.size == 0
-          nil
-        elsif decoy_vals.size == 1
-          decoy_vals.first
-        else
-          raise ArgumentError, "can only have one Validator::Decoy object"
-        end
-        ### suck out the relevant parameters
-        #sep_params = [:decoy_on_match, :correct_wins].inject({}) do |hash,k|
-        #  hash[k] = decoy_validator.send(k)
-        #  hash
-        #end
-      else
-        nil
-      end
-    decoy_validator_to_split_with = nil
-    pep_sets =
-      if decoy_validator
-        if decoy_validator.constraint.is_a?(Regexp)
-          if opts[:hits_together]
-            decoy_validator_to_split_with = decoy_validator
-            [peps]
-          else
-            (target, decoy) = decoy_validator.partition(peps)
-            #(target, decoy) = SpecID.classify_by_prot(peps, opts_decoy, sep_params[:decoy_on_match], sep_params[:correct_wins])
-            [target, decoy]
-          end
-        elsif decoy_validator.constraint.is_a?(String)  ## a Filename
-          decoy_peps = SpecID.new(decoy_validator.constraint).peps
-          if opts[:hits_together]
-            # we fake that the protein sets are together
-            decoy_validator_to_split_with = Validator::Decoy.new(:constraint => unmerge_regexp)
-            decoy_peps.each do |pep|
-              pep.prots.each {|prt| prt.reference = merge_prefix + prt.reference }
-            end
-            [peps + decoy_peps] # wrap them so we get the target out
-          else
-            [peps, decoy_peps]
-          end
-        else
-          raise ArgumentError, "Decoy::Validator#constraint must be a Regexp or valid SpecID file"
-        end
-      else
-        [peps]  # no decoy
-      end
-    # This method doesn't seem to do so well, but a person can use a different
-    # one and enter in their own custom pi_0 value!
-    #if opts[:decoy_pi_zero]
-    #  if pep_sets.size < 2
-    #    raise ArgumentError, "must have a decoy validator for pi zero calculation!"
-    #  end
-    #  require 'pi_zero'
-    #  (_target, _decoy) = pep_sets
-    #  pvals = PiZero.p_values_for_sequest(*pep_sets).sort
-    #  pi_zero = PiZero.pi_zero(pvals)
-    #  opts[:decoy_pi_zero] = PiZero.pi_zero(pvals)
-    #end
-    if opts[:proteins]
-      protein_validator = Validator::ProtFromPep.new
-    end
-    ### TOP HITS PREFILTER < < TOP_HITS_TOGETHER > >
-    ###########################
-    # TOP HITS FILTER:
-    ###########################
-    # REALLY, this guy only exists for speed and memory consumption
-    # If we prefilter, we don't have to filter as many hits in every
-    # interactive round.  I'd leave this guy out if I were doing only a
-    # sequest filter.  (I should compare results with this filter and w/o)
-    # This guy is very tricky since we need to consider whether they are to be
-    # run together or separately and not do more work than we need
-    # get passed_target for any case (and passed_decoy if opts[:decoy])
-    top_hit_prefilter = SpecID::Precision::Filter::Peps.new(:top_hit, opts_for_top_hit_prefilter)  if opts[:prefilter]
-    if top_hit_prefilter
-      pep_sets.map! do |pep_set|
-        top_hit_prefilter.filter(pep_set)
-      end
-    end
-    # prepare our top hit filter:
-    # since we are now modulating this guy, we need to create it fresh every
-    # time
-    top_per_scan_postfilter = SpecID::Precision::Filter::Peps.new(:top_hit,
-                                                  :per => postfilters_per_hash[:top_per_scan],
-                                                  :by => sort_by_att_opts,
-                                                  :include_ties => opts[:include_ties_in_top_hit_postfilter])
-    # Prepare to loop
-    # Give interactive help once here if necessary
-    interactive = opts[:interactive]
-    if interactive
-      ARGV.clear
-      interactive.out(interactive.interactive_help(interactive_changing_keys, interactive_shortcut_map)) if interactive.verbose
-    end
-    # the loop is for if we are interactive
-    final_answer = nil
-    loop do
-      if interactive #interactive
-        # a bit of a hack, but we shove on the postfilter param to modulate
-        opts[:sequest][:postfilter] = opts[:postfilter]
-        response = interactive.filter_args(opts[:sequest], interactive_changing_keys, interactive_shortcut_map, casting_map)
-        opts[:postfilter] = opts[:sequest].delete(:postfilter)
-        break if response == nil
-      end
-      # prepare our top hit filter:
-      # since we are now modulating this guy, we need to create it fresh every
-      # time
-      sub_postfilter =
-        if opts[:postfilter] == :top_per_scan
-          nil
-        else
-          postfilter_per_args = postfilters_per_hash[opts[:postfilter]]
-          SpecID::Precision::Filter::Peps.new(:top_hit,
-                                   :per => postfilter_per_args,
-                                   :by => sort_by_att_opts,
-                                   :include_ties => opts[:include_ties_in_top_hit_postfilter]
-                                  )
-        end
-      pep_sets_to_be_filtered = pep_sets.map
-      ### SEQUEST < EITHER >
-      ###########################
-      # SEQUEST FILTER:
-      ###########################
-      # This guy is immune to the trickiness of top hits, so we just filter
-      # separately since validation is best done without decoys (except decoy)
-      sequest_args = opts[:sequest].values_at( :xcorr1, :xcorr2, :xcorr3, :deltacn, :ppm, :include_deltacnstar )
-      sequest_filter = SpecID::Precision::Filter::Peps.new(:standard_sequest_filter, *sequest_args)
-      pep_sets_filtered = pep_sets_to_be_filtered.map do |pep_set|
-        sequest_filter.filter(pep_set)
-      end
-      ### FINAL HIT PER SCAN < < TOP_HITS_TOGETHER > >
-      ##########################
-      # FINAL HIT PER SCAN
-      ##########################
-      # Why not just do the top hit filter in the top hits pre filter before?
-      # Good question.  Answer: We may have instances when the top hit (by
-      # xcorr) has some other poorer attribute than the hit at the other charge.
-      # In this case, we'd end up with no passing peptide.
-      # Also, the xcorr filter is per charge, so we may filter out the higher
-      # scoring peptide hit even though the other would pass based on its charge
-      # state, etc., etc....
-      # ###################################################
-      # NOTE THIS WELL:
-      # IF IT IS SUPPOSE TO be separate it's *ALREADY* separate, if together its
-      # *ALREADY* together!!!!
-      # the implication is that we don't need to do any merging or
-      # separating before we do this last filter!!!!
-      # ###################################################
-      # TODO: We need to add this guy in!
-      #if opts[:uniq_aa]
-      #  pep_sets_filtered.map do |pep_set|
-      #  end
-      #end
-      pep_sets_filtered.map! do |pep_set|
-        top_per_scan_postfilter.filter!(pep_set)
-        if sub_postfilter
-          sub_postfilter.filter!(pep_set)
-        else
-          pep_set
-        end
-      end
-      normal_post_filtered_peps = pep_sets_filtered.first
-      # separate the decoy's out if they are together
-      if decoy_validator_to_split_with  # only set if opts[:hits_together]!!
-        (target, decoy) = decoy_validator_to_split_with.partition(normal_post_filtered_peps)
-        pep_sets_filtered = [target, decoy]
-      end
-      ### VALIDATION < SEPARATE >
-      pephit_precision_array = get_pephit_precision(opts[:validators], *pep_sets_filtered) if opts[:validators]
-      final_answer = {
-        :params => opts,
-        :pephits => pep_sets_filtered.first,
-      }
-      if pephit_precision_array
-        final_answer[:pephits_precision] = pephit_precision_array
-      end
-      if opts[:proteins]
-        protein_precision_array = peptide_precision_to_protein_precision(protein_validator, normal_post_filtered_peps, pephit_precision_array)
-        # this could be factored out (since we do it in protein_precision)
-        # merge the final prots into a unique set:
-        final_answer[:prothits] = normal_post_filtered_peps.inject(Set.new) do |protset, pep|
-          protset.merge(pep.prots)
-        end
-        final_answer[:prothits_precision] = protein_precision_array
-      end
-      ## output the output
-      outputs.each {|output| output.print(final_answer) }
-      if interactive
-        interactive.passing(opts, final_answer)
-      end
-      if !interactive
-        break
-      end
-    end
-    # Close the filehandles
-    outputs.each { |output| output.close } if opts[:output]
-    final_answer
-  end
-  # takes peps and a peptide_precision_hash.  Returns a hash with the same
-  # keys of peptide_precision_hash where the value is a hash with these keys:
-  #   :worst => worstcase protein precision
-  #   :normal => estimaton by binomial/gaussian method (optimistic)
-  #   :normal_stdev => the stdev of the normal method
-  def peptide_precision_to_protein_precision(protein_validator, peps, peptide_precision_array, round_num_false=:ceil)
-    peptide_precision_array.map do |precision|
-      num_false = ((1.0 - precision) * peps.size).ceil
-      reply = protein_validator.prothit_precision(peps, num_false)
-      hash = {}
-      %w(worst normal normal_stdev).zip(reply) do |label, answer|
-        hash[label.to_sym] = answer
-      end
-      hash
-    end
-  end
-  # takes an array of validator objects and peps (already separated out from
-  # decoys; the decoy's can be passed in
-  # returns an array of results
-  def get_pephit_precision(validators, peps, decoy_peps=nil, grant_transmem_status=false)
-    validators.map do |validator|
-      if validator.class == Validator::Decoy
-        validator.pephit_precision(peps, decoy_peps)
-      else
-        validator.pephit_precision(peps)
-      end
-    end
-  end
-end
-class SpecID::Precision::Filter::Peps < Filter
-  # can pass in the method to call.  If you have static options and you will
-  # reuse your filter, you can pass them in here.
-  # BEWARE: this will override any passed into the method at filter time.
-  # If you need to do that, make a new, blank filter and pass in your args
-  # at filter time
-  def initialize(meth=nil, *opts)
-    @method = meth
-    if opts.size > 0
-      @opts = opts
-    else
-      @opts = nil
-    end
-  end
-  # passes the top peptide hits per attributes that it is hashed by
-  # all hits with same score as top score are returned
-  # assumes that all attributes are cast properly: Float,Integer, etc
-  # converts xcorr, deltacn, deltamass, mass, and charge into numerical types
-  # deletes the protein array (but not relevant proteins)
-  # hashes on [pep.basename, pep.first_scan.to_i, pep.charge.to_i]
-  # returns self for chaining
-  # opts
-  #   :per => Array of attributes e.g. [:first_scan, :charge]   # TODO: allow lambda
-  #   :by  => an array for sort_by_attributes
-  #           e.g. [:xcorr, :deltacn, :ppm, {:down => [:xcorr, :deltacn]}]
-  #   :ties => *false | true | :as_array
-  #             false -     one top hit is selected by random (by sorting)
-  #             true  -     all ties are included in final answer
-  #             :as_array - ties are included as an array
-  def top_hit(peps, opts = {})
-    # get the top peptide by firstscan/charge (equivalent to .out files)
-    top_peps = []
-    #hash = peps.hash_by(*(opts[:per]))
-    per_array = opts[:per]
-    hash = peps.hash_by(*per_array)
-    ties = opts[:ties]
-    if ties == :as_array
-      as_array = true
-    end
-    hash.values.each do |v|
-      best_to_worst = v.sort_by_attributes(*(opts[:by]))
-      if ties
-        best_hit = best_to_worst.first
-        ## get the values that matter for the top hit
-        # here get the attributes we are considering
-        atts =
-          if opts[:by].last.is_a? Hash
-            opts[:by][0...-1]
-          else
-            opts[:by].dup
-          end
-        # find the best hits values
-        top_hit_vals = atts.map do |att|
-          best_hit.send(att)
-        end
-        tying_peps = []
-        best_to_worst.each do |pep|
-          tie = true
-          atts.each_with_index do |att,i|
-            unless (pep.send(att) == top_hit_vals[i])
-              tie = false
-              break
-            end
-          end
-          if tie
-            tying_peps << pep
-          else
-            break
-          end
-        end
-        if as_array
-          if tying_peps.size == 1
-            top_peps.push( *tying_peps )
-          else
-            top_peps.push( tying_peps )
-          end
-        else
-          top_peps.push( *tying_peps )
-        end
-      else
-        top_peps << best_to_worst.first
-      end
-    end
-    top_peps
-  end
-  # returns self for chaining
-  # ( >= +3 charge for the x3)
-  def standard_sequest_filter(peps, x1,x2,x3,deltacn,ppm,include_deltacnstar=true)
-    peps.select do |pep|
-      pep_deltacn = pep.deltacn
-      pep_charge = pep.charge
-      ## The outer parentheses are critical to getting the correct answer!
-      _passing = ( (pep_deltacn >= deltacn) and ((pep_charge == 1 && pep.xcorr >= x1) or (pep_charge == 2 && pep.xcorr >= x2) or (pep_charge >= 3 && pep.xcorr >= x3)) and ( pep.ppm <= ppm ))
-      if _passing
-        if ((!include_deltacnstar) && (pep_deltacn > 1.0))
-          false
-        else
-          true
-        end
-      else
-        false
-      end
-    end
-  end
-end

data/lib/spec_id/precision/output.rb DELETED Viewed

@@ -1,60 +0,0 @@
-module SpecID ; end
-module SpecID::Precision ; end
-module SpecID::Precision::Output
-  # takes a format type (as symbol) and the handle to write to
-  # if handle_or_file is a file, will open it and close (on calling close)
-  # if it is a handle, will not close it
-  def initialize(format, handle_or_file)
-    @handle =
-      if handle_or_file.is_a? String
-        @need_to_close = true
-        File.open(handle_or_file, 'w')
-      else
-        @need_to_close = false
-        handle_or_file
-      end
-    @format = format
-  end
-  # returns self
-  def print(answer)
-    send( @format, @handle, answer )
-    self
-  end
-  # turns all keys that are symbols into strings (recursively into *Hashes*)
-  def self.symbol_keys_to_string(hash)
-    new_hash = {}
-    hash.each do |k,v|
-      new_value =
-        if v.is_a? Hash
-          symbol_keys_to_string(v)
-        else
-          v
-        end
-      if k.is_a? Symbol
-        new_hash[k.to_s] = new_value
-      else
-        new_hash[k] = new_value
-      end
-    end
-    new_hash
-  end
-    # TODO: implement recursively, this has just grown and grown terribly
-  def hash_as_string(hash)
-    hash.inspect
-  end
-   # will close the handle if it is a File object
-  def close
-    if @need_to_close
-      @handle.close
-    end
-  end
-end