RubyGems - mspire - Versions diffs - 0.4.9 → 0.5.0 - Mend

mspire 0.4.9 → 0.5.0

Files changed (255) hide show

data/README +27 -17
data/changelog.txt +31 -62
data/lib/ms/calc.rb +32 -0
data/lib/ms/data/interleaved.rb +60 -0
data/lib/ms/data/lazy_io.rb +73 -0
data/lib/ms/data/lazy_string.rb +15 -0
data/lib/ms/data/simple.rb +59 -0
data/lib/ms/data/transposed.rb +41 -0
data/lib/ms/data.rb +57 -0
data/lib/ms/format/format_error.rb +12 -0
data/lib/ms/spectrum.rb +25 -384
data/lib/ms/support/binary_search.rb +126 -0
data/lib/ms.rb +10 -10
metadata +38 -350
data/INSTALL +0 -58
data/README.rdoc +0 -18
data/Rakefile +0 -330
data/bin/aafreqs.rb +0 -23
data/bin/bioworks2excel.rb +0 -14
data/bin/bioworks_to_pepxml.rb +0 -148
data/bin/bioworks_to_pepxml_gui.rb +0 -225
data/bin/fasta_shaker.rb +0 -5
data/bin/filter_and_validate.rb +0 -5
data/bin/gi2annot.rb +0 -14
data/bin/id_class_anal.rb +0 -112
data/bin/id_precision.rb +0 -172
data/bin/ms_to_lmat.rb +0 -67
data/bin/pepproph_filter.rb +0 -16
data/bin/prob_validate.rb +0 -6
data/bin/protein_summary.rb +0 -6
data/bin/protxml2prots_peps.rb +0 -32
data/bin/raw_to_mzXML.rb +0 -55
data/bin/run_percolator.rb +0 -122
data/bin/sqt_group.rb +0 -26
data/bin/srf_group.rb +0 -27
data/bin/srf_to_sqt.rb +0 -40
data/lib/align/chams.rb +0 -78
data/lib/align.rb +0 -154
data/lib/archive/targz.rb +0 -94
data/lib/bsearch.rb +0 -120
data/lib/core_extensions.rb +0 -16
data/lib/fasta.rb +0 -626
data/lib/gi.rb +0 -124
data/lib/group_by.rb +0 -10
data/lib/index_by.rb +0 -11
data/lib/merge_deep.rb +0 -21
data/lib/ms/converter/mzxml.rb +0 -77
data/lib/ms/gradient_program.rb +0 -170
data/lib/ms/msrun.rb +0 -244
data/lib/ms/msrun_index.rb +0 -108
data/lib/ms/parser/mzdata/axml.rb +0 -67
data/lib/ms/parser/mzdata/dom.rb +0 -175
data/lib/ms/parser/mzdata/libxml.rb +0 -7
data/lib/ms/parser/mzdata.rb +0 -31
data/lib/ms/parser/mzxml/axml.rb +0 -70
data/lib/ms/parser/mzxml/dom.rb +0 -182
data/lib/ms/parser/mzxml/hpricot.rb +0 -253
data/lib/ms/parser/mzxml/libxml.rb +0 -19
data/lib/ms/parser/mzxml/regexp.rb +0 -122
data/lib/ms/parser/mzxml/rexml.rb +0 -72
data/lib/ms/parser/mzxml/xmlparser.rb +0 -248
data/lib/ms/parser/mzxml.rb +0 -282
data/lib/ms/parser.rb +0 -108
data/lib/ms/precursor.rb +0 -25
data/lib/ms/scan.rb +0 -81
data/lib/mspire.rb +0 -4
data/lib/pi_zero.rb +0 -244
data/lib/qvalue.rb +0 -161
data/lib/roc.rb +0 -187
data/lib/sample_enzyme.rb +0 -160
data/lib/scan_i.rb +0 -21
data/lib/spec_id/aa_freqs.rb +0 -170
data/lib/spec_id/bioworks.rb +0 -497
data/lib/spec_id/digestor.rb +0 -138
data/lib/spec_id/mass.rb +0 -179
data/lib/spec_id/parser/proph.rb +0 -335
data/lib/spec_id/precision/filter/cmdline.rb +0 -218
data/lib/spec_id/precision/filter/interactive.rb +0 -134
data/lib/spec_id/precision/filter/output.rb +0 -148
data/lib/spec_id/precision/filter.rb +0 -637
data/lib/spec_id/precision/output.rb +0 -60
data/lib/spec_id/precision/prob/cmdline.rb +0 -160
data/lib/spec_id/precision/prob/output.rb +0 -94
data/lib/spec_id/precision/prob.rb +0 -249
data/lib/spec_id/proph/pep_summary.rb +0 -104
data/lib/spec_id/proph/prot_summary.rb +0 -484
data/lib/spec_id/proph.rb +0 -4
data/lib/spec_id/protein_summary.rb +0 -489
data/lib/spec_id/sequest/params.rb +0 -316
data/lib/spec_id/sequest/pepxml.rb +0 -1458
data/lib/spec_id/sequest.rb +0 -33
data/lib/spec_id/sqt.rb +0 -349
data/lib/spec_id/srf.rb +0 -973
data/lib/spec_id.rb +0 -778
data/lib/spec_id_xml.rb +0 -99
data/lib/transmem/phobius.rb +0 -147
data/lib/transmem/toppred.rb +0 -368
data/lib/transmem.rb +0 -157
data/lib/validator/aa.rb +0 -48
data/lib/validator/aa_est.rb +0 -112
data/lib/validator/background.rb +0 -77
data/lib/validator/bias.rb +0 -95
data/lib/validator/cmdline.rb +0 -431
data/lib/validator/decoy.rb +0 -107
data/lib/validator/digestion_based.rb +0 -70
data/lib/validator/probability.rb +0 -51
data/lib/validator/prot_from_pep.rb +0 -234
data/lib/validator/q_value.rb +0 -32
data/lib/validator/transmem.rb +0 -272
data/lib/validator/true_pos.rb +0 -46
data/lib/validator.rb +0 -197
data/lib/xml.rb +0 -38
data/lib/xml_style_parser.rb +0 -119
data/lib/xmlparser_wrapper.rb +0 -19
data/release_notes.txt +0 -2
data/script/compile_and_plot_smriti_final.rb +0 -97
data/script/create_little_pepxml.rb +0 -61
data/script/degenerate_peptides.rb +0 -47
data/script/estimate_fpr_by_cysteine.rb +0 -226
data/script/extract_gradient_programs.rb +0 -56
data/script/find_cysteine_background.rb +0 -137
data/script/genuine_tps_and_probs.rb +0 -136
data/script/get_apex_values_rexml.rb +0 -44
data/script/histogram_probs.rb +0 -61
data/script/mascot_fix_pepxml.rb +0 -123
data/script/msvis.rb +0 -42
data/script/mzXML2timeIndex.rb +0 -25
data/script/peps_per_bin.rb +0 -67
data/script/prep_dir.rb +0 -121
data/script/simple_protein_digestion.rb +0 -27
data/script/smriti_final_analysis.rb +0 -103
data/script/sqt_to_meta.rb +0 -24
data/script/top_hit_per_scan.rb +0 -67
data/script/toppred_to_yaml.rb +0 -47
data/script/tpp_installer.rb +0 -249
data/specs/align_spec.rb +0 -79
data/specs/bin/bioworks_to_pepxml_spec.rb +0 -79
data/specs/bin/fasta_shaker_spec.rb +0 -259
data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +0 -199
data/specs/bin/filter_and_validate_spec.rb +0 -180
data/specs/bin/ms_to_lmat_spec.rb +0 -34
data/specs/bin/prob_validate_spec.rb +0 -86
data/specs/bin/protein_summary_spec.rb +0 -14
data/specs/fasta_spec.rb +0 -354
data/specs/gi_spec.rb +0 -22
data/specs/load_bin_path.rb +0 -7
data/specs/merge_deep_spec.rb +0 -13
data/specs/ms/gradient_program_spec.rb +0 -77
data/specs/ms/msrun_spec.rb +0 -498
data/specs/ms/parser_spec.rb +0 -92
data/specs/ms/spectrum_spec.rb +0 -87
data/specs/pi_zero_spec.rb +0 -115
data/specs/qvalue_spec.rb +0 -39
data/specs/roc_spec.rb +0 -251
data/specs/rspec_autotest.rb +0 -149
data/specs/sample_enzyme_spec.rb +0 -126
data/specs/spec_helper.rb +0 -135
data/specs/spec_id/aa_freqs_spec.rb +0 -52
data/specs/spec_id/bioworks_spec.rb +0 -148
data/specs/spec_id/digestor_spec.rb +0 -75
data/specs/spec_id/precision/filter/cmdline_spec.rb +0 -20
data/specs/spec_id/precision/filter/output_spec.rb +0 -31
data/specs/spec_id/precision/filter_spec.rb +0 -246
data/specs/spec_id/precision/prob_spec.rb +0 -44
data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
data/specs/spec_id/proph/pep_summary_spec.rb +0 -98
data/specs/spec_id/proph/prot_summary_spec.rb +0 -128
data/specs/spec_id/protein_summary_spec.rb +0 -189
data/specs/spec_id/sequest/params_spec.rb +0 -68
data/specs/spec_id/sequest/pepxml_spec.rb +0 -374
data/specs/spec_id/sequest_spec.rb +0 -38
data/specs/spec_id/sqt_spec.rb +0 -246
data/specs/spec_id/srf_spec.rb +0 -172
data/specs/spec_id/srf_spec_helper.rb +0 -139
data/specs/spec_id_helper.rb +0 -33
data/specs/spec_id_spec.rb +0 -366
data/specs/spec_id_xml_spec.rb +0 -33
data/specs/transmem/phobius_spec.rb +0 -425
data/specs/transmem/toppred_spec.rb +0 -298
data/specs/transmem_spec.rb +0 -60
data/specs/transmem_spec_shared.rb +0 -64
data/specs/validator/aa_est_spec.rb +0 -66
data/specs/validator/aa_spec.rb +0 -40
data/specs/validator/background_spec.rb +0 -67
data/specs/validator/bias_spec.rb +0 -122
data/specs/validator/decoy_spec.rb +0 -51
data/specs/validator/fasta_helper.rb +0 -26
data/specs/validator/prot_from_pep_spec.rb +0 -141
data/specs/validator/transmem_spec.rb +0 -146
data/specs/validator/true_pos_spec.rb +0 -58
data/specs/validator_helper.rb +0 -33
data/specs/xml_spec.rb +0 -12
data/test_files/000_pepxml18_small.xml +0 -206
data/test_files/020a.mzXML.timeIndex +0 -4710
data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +0 -3973
data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +0 -3872
data/test_files/4-03-03_small-prot.xml +0 -321
data/test_files/4-03-03_small.xml +0 -3876
data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
data/test_files/bioworks-3.3_10prots.xml +0 -5999
data/test_files/bioworks31.params +0 -77
data/test_files/bioworks32.params +0 -62
data/test_files/bioworks33.params +0 -63
data/test_files/bioworks_single_run_small.xml +0 -7237
data/test_files/bioworks_small.fasta +0 -212
data/test_files/bioworks_small.params +0 -63
data/test_files/bioworks_small.phobius +0 -109
data/test_files/bioworks_small.toppred.out +0 -2847
data/test_files/bioworks_small.xml +0 -5610
data/test_files/bioworks_with_INV_small.xml +0 -3753
data/test_files/bioworks_with_SHUFF_small.xml +0 -2503
data/test_files/corrupted_900.srf +0 -0
data/test_files/head_of_7MIX.srf +0 -0
data/test_files/interact-opd1_mods_small-prot.xml +0 -304
data/test_files/messups.fasta +0 -297
data/test_files/opd1/000.my_answer.100lines.xml +0 -101
data/test_files/opd1/000.tpp_1.2.3.first10.xml +0 -115
data/test_files/opd1/000.tpp_2.9.2.first10.xml +0 -126
data/test_files/opd1/000.v2.1.mzXML.timeIndex +0 -3748
data/test_files/opd1/000_020-prot.png +0 -0
data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +0 -62
data/test_files/opd1/000_020_3prots-prot.xml +0 -62
data/test_files/opd1/opd1_cat_inv_small-prot.xml +0 -139
data/test_files/opd1/sequest.3.1.params +0 -77
data/test_files/opd1/sequest.3.2.params +0 -62
data/test_files/opd1/twenty_scans.mzXML +0 -418
data/test_files/opd1/twenty_scans.v2.1.mzXML +0 -382
data/test_files/opd1/twenty_scans_answ.lmat +0 -0
data/test_files/opd1/twenty_scans_answ.lmata +0 -9
data/test_files/opd1_020_beginning.RAW +0 -0
data/test_files/opd1_2runs_2mods/data/020.mzData.xml +0 -683
data/test_files/opd1_2runs_2mods/data/020.readw.mzXML +0 -382
data/test_files/opd1_2runs_2mods/data/040.mzData.xml +0 -683
data/test_files/opd1_2runs_2mods/data/040.readw.mzXML +0 -382
data/test_files/opd1_2runs_2mods/data/README.txt +0 -6
data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +0 -753
data/test_files/orbitrap_mzData/000_cut.xml +0 -1920
data/test_files/pepproph_small.xml +0 -4691
data/test_files/phobius.small.noheader.txt +0 -50
data/test_files/phobius.small.small.txt +0 -53
data/test_files/s01_anC1_ld020mM.key.txt +0 -25
data/test_files/s01_anC1_ld020mM.meth +0 -0
data/test_files/small.fasta +0 -297
data/test_files/small.sqt +0 -87
data/test_files/smallraw.RAW +0 -0
data/test_files/tf_bioworks2excel.bioXML +0 -14340
data/test_files/tf_bioworks2excel.txt.actual +0 -1035
data/test_files/toppred.small.out +0 -416
data/test_files/toppred.xml.out +0 -318
data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +0 -7
data/test_files/validator_hits_separate/bioworks_small_HS.xml +0 -5651
data/test_files/yeast_gly_small-prot.xml +0 -265
data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +0 -6
data/test_files/yeast_gly_small.xml +0 -3807
data/test_files/yeast_gly_small2.parentTimes +0 -6

data/lib/sample_enzyme.rb DELETED Viewed

@@ -1,160 +0,0 @@
-module SpecIDXML; end
-require 'strscan'
-require 'spec_id_xml'
-require 'spec_id'
-class SampleEnzyme
-  include SpecIDXML
-  attr_accessor :name
-  # amino acids after which to cleave
-  attr_accessor :cut
-  # cleave at 'cut' amino acids UNLESS it is followed by 'no_cut'
-  attr_accessor :no_cut
-  # 'C' or 'N'
-  attr_accessor :sense
-  # Currently, recognize:
-  #   trypsin
-  # For other enzymes, you must set :cut, :no_cut, :name, and :sense
-  # will yield the object if you want to set the values that way
-  def initialize(name=nil)
-    @num_missed_cleavages_regex = nil
-    @sense = nil
-    @cut = nil
-    @no_cut = nil
-    @name = name
-    if @name
-      # set the values if we recognize this name
-      send("set_#{@name}".to_sym)
-    end
-    if block_given?
-      yield(self)
-    end
-  end
-  def set_trypsin
-    @sense = 'C'
-    @cut = 'KR'
-    @no_cut = 'P'
-  end
-  def to_pepxml
-    element_xml(:sample_enzyme, [:name]) do
-      short_element_xml(:specificity, [:cut, :no_cut, :sense])
-    end
-  end
-  # returns self
-  def from_pepxml_node(node)
-    self.name = node['name']
-    ch = node.child
-    self.cut = ch['cut']
-    self.no_cut= ch['no_cut']
-    self.sense = ch['sense']
-    self
-  end
-  def self.from_pepxml_node(node)
-    self.new.from_pepxml_node(node)
-  end
-  # takes an amino acid sequence (e.g., -.PEPTIDK.L)
-  # returns the number of missed cleavages
-  def num_missed_cleavages(aaseq)
-    raise NotImplementedError, 'need to implement for N terminal sense'  if sense == 'N'
-    @num_missed_cleavages_regex =
-      if @num_missed_cleavages_regex ; @num_missed_cleavages_regex
-      else
-        regex_string = "[#{@cut}]"
-        if @no_cut and @no_cut != ''
-          regex_string << "[^#{@no_cut}]"
-        end
-        /#{regex_string}/
-      end
-    arr = aaseq.scan(@num_missed_cleavages_regex)
-    num = arr.size
-    if aaseq[-1,1] =~ @num_missed_cleavages_regex
-      num -= 1
-    end
-    num
-  end
-  # requires full sequence (with heads and tails)
-  def num_tol_term(sequence)
-    raise NotImplementedError, 'need to implement for N terminal sense'  if sense == 'N'
-    no_cut = @no_cut || ''
-    num_tol = 0
-    first, middle, last = SpecID::Pep.split_sequence(sequence)
-    last_of_middle = middle[-1,1]
-    first_of_middle = middle[0,1]
-    if ( @cut.include?(first) && !no_cut.include?(first_of_middle) ) || first == '-'
-      num_tol += 1
-    end
-    if @cut.include?(last_of_middle) && !no_cut.include?(last) || last == '-'
-      num_tol += 1
-    end
-    num_tol
-  end
-  # returns all peptides of missed cleavages <= 'missed_cleavages'
-  # so 2 missed cleavages will return all no missed cleavage peptides
-  # all 1 missed cleavages and all 2 missed cleavages.
-  # options:
-  def digest(string, missed_cleavages=0, options={})
-    raise NotImplementedError if @sense == 'N'
-    s = StringScanner.new(string)
-    no_cut_regex = Regexp.new("[#{@no_cut}]")
-    regex = Regexp.new("[#{@cut}]")
-    peps = []
-    last_pos = 0
-    current_pep = ''
-    loop do
-      if s.eos?
-        break
-      end
-      m = s.scan_until(regex)
-      if m  ## found a cut point
-        last_pos = s.pos
-        # is the next amino acid a no_cut?
-        if string[s.pos,1] =~ no_cut_regex
-          current_pep << m
-        else
-          # cut it
-          current_pep << m
-          peps << current_pep
-          current_pep = ''
-        end
-      else  ## didn't find a cut point
-        current_pep << string[last_pos..-1]
-        peps << current_pep
-        break
-      end
-    end
-    ## LOOP through and grab each set of missed cleavages from num down to 0
-    all_sets_of_peps = []
-    (0..missed_cleavages).to_a.reverse.each do |num_mc|
-      all_sets_of_peps.push( *(get_missed_cleavages(peps, num_mc)) )
-    end
-    all_sets_of_peps
-  end
-  # takes an array of peptides and returns an array containing 'num' missed
-  # cleavages
-  # DOES NOT contain peptides that contain < num of missed cleavages
-  # (i.e., will not return missed cleaveages of 1 or 2 if num == 3
-  def get_missed_cleavages(ar_of_peptide_seqs, num)
-    (0...(ar_of_peptide_seqs.size - num)).to_a.map do |i|
-      ar_of_peptide_seqs[i,num+1].join
-    end
-  end
-  def self.tryptic(string, missed_cleavages=0)
-    self.new("trypsin").digest(string, missed_cleavages)
-  end
-end

data/lib/scan_i.rb DELETED Viewed

@@ -1,21 +0,0 @@
-# http://groups.google.com/group/comp.lang.ruby/browse_thread/thread/7370f94e852c0fae/4068c8c1c1c158ee
-class String
-  def scan_i seq
-    pos=0
-    ndx=[]
-    slen = seq.length
-    while i=index(seq,pos)
-      ndx << i
-      pos = i + slen
-    end
-    ndx
-  end
-  #def scan_enum seq
-  #  self.enum_for(:scan, seq).map do
-  #    $~.offset(0)[0]
-  #  end
-  #end
-end

data/lib/spec_id/aa_freqs.rb DELETED Viewed

@@ -1,170 +0,0 @@
-require 'fasta'
-module SpecID ; end
-class SpecID::AAFreqs
-  # hash by capital one-letter amino acid symbols giving the frequency of
-  # seeing that amino acid.  Frequencies should add to 1.
-  attr_accessor :aafreqs
-  # fasta is fasta object!
-  def initialize(fasta=nil)
-    if fasta
-      @aafreqs = calculate_frequencies(fasta.prots)
-    end
-  end
-  # takes an enumerable of objects responding to :aaseq and creates an aafreqs hash
-  def calculate_frequencies(objs)
-    hash = {}
-    total_aas = 0
-    ('A'..'Z').each do |x|
-      hash[x] = 0
-    end
-    hash['*'] = 0
-    objs.each do |obj|
-      aaseq = obj.aaseq
-      total_aas += aaseq.size
-      aaseq.split('').each do |x|
-        hash[x] += 1
-      end
-    end
-    # normalize by total amount:
-    hash.each do |k,v|
-      hash[k] = hash[k].to_f / total_aas
-    end
-    # convert all strings to symbols:
-    hash.each do |k,v|
-      hash[k.to_sym] = hash.delete(k)
-    end
-    hash
-  end
-  # The expected probability for seeing that amino acid in a given length.
-  # This calculates a lookup table (array) from 0 to highest_length of the
-  # probability of seeing at least one amino acid (given its frequency, where
-  # frequency is from 0 to 1)
-  def self.probability_of_length_table(frequency, max_length)
-    one_minus_freq = 1.0 - frequency.to_f
-    lookup = Array.new(max_length + 1)
-    (0..max_length).each do |len|
-      lookup[len] =  1.0 - (one_minus_freq**len);
-    end
-    lookup
-  end
-  # takes an array of peptide strings
-  # gives the actual number of peptides with at least one
-  # gives the expected number of peptides given the probabilities in the
-  # length lookup table.
-  # currently ONLY takes at_least = 1
-  # depends on @aafreqs
-  # returns two numbers in array [actual, expected]
-  # expected is a Float!!!
-  def actual_and_expected_number(peptide_aaseqs, amino_acid=:C, at_least=1)
-    if at_least > 1
-      raise NotImplementedError, "can only do at_least=1 right now!"
-    end
-    one_minus_freq = 1.0 - @aafreqs[amino_acid.to_sym]
-    amino_acid_as_st = amino_acid.to_s
-    probs = []
-    actual = 0
-    expected = 0.0
-    peptide_aaseqs.each do |pep|
-      expected += (1.0 - (one_minus_freq**pep.size))
-      if pep.include?(amino_acid_as_st)
-        actual += 1
-      end
-    end
-    [actual, expected]
-  end
-  # pep_objs respond to sequence?
-  # also takes a hash of peptides keyed on :aaseq
-  def actual_and_expected_number_containing_cysteines(pep_objs, cyst_freq)
-    if pep_objs.is_a? Hash
-      seqs = pep_objs.keys
-    else
-      seqs = pep_objs.map do |v|
-        v.aaseq
-      end
-    end
-    @aafreqs ||= {}
-    @aafreqs[:C] = cyst_freq
-    actual_and_expected_number(seqs, :C, 1)
-  end
-  ##
-=begin
-  foreach my $pep (@$peps) {
-        unless ($pep->prob() >= $prob_cutoff) {next;}
-        my %freq = ();
-        my $aa = $pep->AA_sequence();
-        my $len = length($aa);
-        ## EXPECTED probability for each length
-        for (my $i = 0; $i < 20; $i++) {
-            ## rolling at least one 6 in n rolls is 1 - (5/6)^n.
-            $expected[$cnt][$i] = 1 - (($freqs_inv[$i])**$len);
-        }
-        ## FILTER any peptides we've already seen
-        if ($seen{$aa}) { next; }
-        else { $seen{$aa}++; }
-        ## Fill in these values with zeroes:
-        for (my $a = 0; $a < 20; $a++) { $pepc[$cnt][$a] = 0; }
-        ## get the frequencies for each AA in each peptide:
-        for (my $i = 0; $i < $len; $i++) {
-            my $let = substr($aa, $i, 1);
-            $tot_freq{$let}++;
-            $pepc[$cnt][$an{$let}]++;
-        }
-        $cnt++;
-    }
-##############################################################
-# ANALYSIS 2: Fraction of Peptides containing X Amino Acid
-##############################################################
-## What is the percentage of peptides containing at least 1 cysteine?
-    my $atleast = 1;
-    my @has;
-## initialize
-    for (my $i = 0; $i < 20; $i++) { $has[$i] = 0; }
-    my $tot = scalar(@pepc);
-    foreach my $pep (@pepc) {
-        for (my $index = 0; $index < 20; $index++) {
-            if ($pep->[$index] >= $atleast) {
-                $has[$index]++;
-            }
-        }
-    }
-    my @exp_sum = ();  ## The total number of peptides I'd expect
-## WE simply add up the peptides' probabilities
-## can think of it like this avg(peptide_prob) * #peptides = sum(pep_prob)
-    foreach my $pep (@expected) {
-        for (my $i = 0; $i < 20; $i++) {
-            $exp_sum[$i] += $pep->[$i];
-        }
-    }
-    my @obs = map { $_/$tot } @has;
-    my @exp = map { $_/$tot } @exp_sum;
-    print STDERR "*********************************************\n";
-    print "Fraction of peptides (obs and expected)\nwith at least one of the AA:\n";
-    print "[AA] [Observed] [Predicted]\n";
-    for (my $i = 0; $i < 20; $i++) {
-        print "$AA[$i] $obs[$i] $exp[$i]\n";
-    }
-    print STDERR "*********************************************\n";
-=end
-end