RubyGems - mspire - Versions diffs - 0.1.7 → 0.2.0 - Mend

mspire 0.1.7 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (57) hide show

data/Rakefile +41 -14
data/bin/bioworks2excel.rb +1 -1
data/bin/bioworks_to_pepxml.rb +46 -59
data/bin/fasta_shaker.rb +1 -1
data/bin/filter.rb +6 -0
data/bin/find_aa_freq.rb +23 -0
data/bin/id_precision.rb +3 -2
data/bin/mzxml_to_lmat.rb +2 -1
data/bin/pepproph_filter.rb +1 -1
data/bin/precision.rb +1 -1
data/bin/protein_summary.rb +2 -451
data/bin/raw_to_mzXML.rb +55 -0
data/bin/srf_group.rb +26 -0
data/changelog.txt +7 -0
data/lib/align.rb +3 -3
data/lib/fasta.rb +6 -1
data/lib/gi.rb +9 -4
data/lib/roc.rb +2 -0
data/lib/sample_enzyme.rb +2 -1
data/lib/spec/mzxml/parser.rb +2 -43
data/lib/spec/mzxml.rb +65 -2
data/lib/spec_id/aa_freqs.rb +10 -7
data/lib/spec_id/bioworks.rb +67 -87
data/lib/spec_id/filter.rb +794 -0
data/lib/spec_id/precision.rb +29 -36
data/lib/spec_id/proph.rb +5 -3
data/lib/spec_id/protein_summary.rb +459 -0
data/lib/spec_id/sequest.rb +323 -271
data/lib/spec_id/srf.rb +189 -135
data/lib/spec_id.rb +276 -227
data/lib/spec_id_xml.rb +101 -0
data/lib/toppred.rb +18 -0
data/script/degenerate_peptides.rb +47 -0
data/script/filter-peps.rb +5 -1
data/test/tc_align.rb +1 -1
data/test/tc_bioworks.rb +25 -22
data/test/tc_bioworks_to_pepxml.rb +37 -4
data/test/tc_fasta.rb +3 -1
data/test/tc_fasta_shaker.rb +8 -6
data/test/tc_filter.rb +203 -0
data/test/tc_gi.rb +6 -9
data/test/tc_id_precision.rb +31 -0
data/test/tc_mzxml.rb +8 -6
data/test/tc_peptide_parent_times.rb +2 -1
data/test/tc_precision.rb +1 -1
data/test/tc_proph.rb +5 -5
data/test/tc_protein_summary.rb +36 -13
data/test/tc_sequest.rb +78 -33
data/test/tc_spec_id.rb +128 -6
data/test/tc_srf.rb +84 -38
metadata +67 -62
data/bin/fasta_cat.rb +0 -39
data/bin/fasta_cat_mod.rb +0 -59
data/bin/fasta_mod.rb +0 -57
data/bin/filter_spec_id.rb +0 -365
data/bin/raw2mzXML.rb +0 -21
data/script/gen_database_searching.rb +0 -258

data/test/tc_spec_id.rb CHANGED Viewed

@@ -8,11 +8,14 @@ class SpecIDTest < Test::Unit::TestCase
   def initialize(arg)
     super(arg)
     @tfiles = File.dirname(__FILE__) + '/tfiles/'
+    @tfiles_l = File.dirname(__FILE__) + '/tfiles_large/'
     @bw = @tfiles + "bioworks_small.xml"
+    @old_prot_proph = @tfiles + 'yeast_gly_small-prot.xml'
+    @prot_proph = @tfiles + 'opd1/000_020_3prots-prot.xml'
+    @srf = @tfiles_l + '7MIX_STD_110802_1.srf'
   end
   def test_spec_id_creation
-    sp = SpecID.new
     sp = SpecID.new(@bw)
     assert_equal(106, sp.prots.size)
   end
@@ -45,20 +48,21 @@ class SpecIDTest < Test::Unit::TestCase
       end
       [write_index, bo]
     end
+    roc = ROC.new
     tp, fp = ROC.new.prep_list(answ)
-    (exp_tp, exp_fp) = ROC.new.by_tps(:fpr2, tp, fp)
+    (exp_tp, exp_fp) = roc.tps_and_ppv(tp, fp)
     sp = SpecID.new(file)
     assert_equal(19, sp.prots.size)
     tp, fp = sp.rank_and_classify(:prots, proc {|prt| prt.probability }, proc {|prt| if prt.reference =~ /^INV_/ ; false; else; true; end })
-    tps, ys = sp.by_tps(:fpr2, tp, fp)
+    (tps, ys) = roc.tps_and_ppv(tp, fp)
     assert_equal(exp_tp, tps)
     assert_equal(exp_fp, ys)
-    tps, prec, fpr = sp.tps_and_precision_and_fpr2_times2_for_prob("INV_")
+    (num_hits, prec) = sp.num_hits_and_ppv_for_prob("INV_")
     # @TODO: assert these guys for consistencies sake:
     assert_in_delta_arrays([1, 2, 3, 4, 5, 6, 9, 10, 11, 12, 13, 14, 15], tps, 0.0000001)
-    assert_in_delta_arrays([1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.916666666666667, 0.923076923076923, 0.928571428571429, 0.933333333333333, 0.882352941176471], prec, 0.0000001)
-    assert_in_delta_arrays([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.166666666666667, 0.153846153846154, 0.142857142857143, 0.133333333333333, 0.235294117647059], fpr, 0.0000001)
+    # Consistency check only:
+    assert_in_delta_arrays([1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.909090909090909, 0.916666666666667, 0.923076923076923, 0.928571428571429, 0.866666666666667], prec, 0.0000001)
   end
   def assert_in_delta_arrays(one, two, delta, message=nil)
@@ -66,5 +70,123 @@ class SpecIDTest < Test::Unit::TestCase
       assert_in_delta(v, two[i], delta, message)
     end
   end
+  def test_file_type
+    assert_equal('bioworks', SpecID.file_type(@bw))
+    assert_equal('protproph', SpecID.file_type(@prot_proph))
+    assert_equal('srg', SpecID.file_type('whatever.srg'))
+    ## WOULD BE NICE TO GET THIS WORKING, TOO
+    # assert_equal('protproph', SpecID.file_type(@old_prot_proph))
+    if File.exist? @tfiles_l
+      assert File.exist?(@srf), "file #{@srf} is there"
+      assert_equal('srf', SpecID.file_type(@srf))
+    else
+      assert_nil( puts("\n--SKIPPING TEST-- (missing dir: #{@tfiles_l})") )
+    end
+  end
+  def test_non_standard_aa_removal
+    hash = {"K.PEPTIDE.Z" => "K.PEPTIDE.Z", "K.*M" => "K.M", "aI" => 'I', "YI.&" => "YI.", "EI.!@#\$%^&*(){}[]|\\;:'\"<>,?/EI" => 'EI.EI'}
+    cl = proc {|v| SpecID::Pep.remove_non_amino_acids(v) }
+    hash.each do |k,v|
+      assert_equal(v, cl.call(k))
+    end
+  end
+end
+class MyProt ; include SpecID::Prot ; end
+class MyPep ; include SpecID::Pep ; attr_accessor :xcorr end
+class TestOccamsRazor < Test::Unit::TestCase
+  def test_small
+    prots = (0..6).to_a.map do |n|
+      prot = MyProt.new
+      prot.reference = "ref_#{n}"
+      prot
+    end
+    peps = (0..12).to_a.map {|v| MyPep.new }
+    #           0   1   2   3   4   5   6   7   8   9   10  11    12
+    aaseqs = %w(AAA BBB CCC ABC AAA BBB CCC ABC DDD EEE FFF EEEEE DDD)
+    xcorrs = [1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 0.5, 0.6, 0.7, 0.8, 0.5]
+    peps.zip(aaseqs, xcorrs) do |pep,aaseq,xcorr|
+      pep.aaseq = aaseq
+      pep.xcorr = xcorr
+    end
+    prots[0].peps = peps[0,4]
+    prots[1].peps = [peps[2]]  ## should be missing
+    test_prots = prots[0,2]
+    require 'pp'
+    answ = SpecID.occams_razor(test_prots)
+    answ.each do |an|
+      assert( an[0].is_a?(SpecID::Prot), "prots are there")
+    end
+    first = answ.first
+    assert_equal( prots[0], first[0])
+    assert_equal_array_content( prots[0].peps, first[1])
+    #prots[2].peps = [peps[2]]
+    #prots[2].peps.push( peps[3] ) ## should be there since it has 2
+    #prots[3].peps = [peps[3]] ## should be missing
+  end
+  def assert_equal_array_content(exp1, ans, message='')
+    exp1.each do |item|
+      assert(ans.include?(item), "finding #{item}: #{message}")
+    end
+  end
+end
+require 'fasta'
+class TestProteinGroups < Test::Unit::TestCase
+  def test_small
+    prots = []
+    aaseq = ('A'..'Z').to_a.join('')
+    header = "prot1"
+    prots << Fasta::Prot.new(header, aaseq)
+    aaseq = ('A'..'Z').to_a.reverse.join('')
+    header = "prot1_reverse"
+    prots << Fasta::Prot.new(header, aaseq)
+    aaseq = ('A'..'Z').to_a.join('')
+    header = "prot1_identical"
+    prots << Fasta::Prot.new(header, aaseq)
+    aaseq = ('A'..'E').to_a.join('')
+    header = "prot1_short"
+    prots << Fasta::Prot.new(header, aaseq)
+    aaseq = ('A'..'E').to_a.reverse.join('')
+    header = "prot1_reverse_short"
+    prots << Fasta::Prot.new(header, aaseq)
+    fasta = Fasta.new(prots)
+    pep_seqs = %w(ABCD DEFG ABCD DEFG EDCB FEDCB XYZ RANDOM AEABA)
+    arr = SpecID::Pep.protein_groups_by_sequence(pep_seqs, fasta)
+    exp = [[prots[0], prots[2], prots[3]], [prots[0], prots[2]], [prots[0], prots[2], prots[3]], [prots[0],prots[2]], [prots[1], prots[4]], [prots[1]], [prots[0], prots[2]], [], []]
+    assert_equal(exp, arr)
+  end
 end

data/test/tc_srf.rb CHANGED Viewed

@@ -50,53 +50,70 @@ module ToMatch
   Out_files_first = {
     :num_hits => 10,
-    :charge => 1,
     :computer => "VELA",
     :date_time => "11/17/2006, 04:13 PM,",
   }
   Out_files_first_hit = {
     :mh => 1220.5128044522,
-    :deltacn => 0.0,
+    :deltacn => 0.071944423019886, ## this is the modified version
     :sp => 96.5815887451172,
     :xcorr => 1.08377742767334,
     :id => 224,
     :rsp => 13,
     :ions_matched => 8,
     :ions_total => 20,
-    :peptide => "K.LCPHLTLLPGR.F",
+    :sequence => "K.LCPHLTLLPGR.F",
+    :aaseq => "LCPHLTLLPGR",
     :reference => "gi|1786425|gb|AAC73335.1| damage-inducible protein P; putative tRNA synthetase",
+    :first_scan => 2,
+    :last_scan => 2,
+    :base_name => '7MIX_STD_110802_1',
+    :charge => 1,
   }
   Out_files_last = {
     :num_hits => 10,
-    :charge => 1,
     :computer => "VELA",
     :date_time => "11/17/2006, 04:25 PM," ,
   }
   Out_files_last_first_hit = {
     :mh => 2605.9368784522,
-    :deltacn => 0.0,
+    :deltacn => 0.03921128064394,
     :sp => 76.7447052001953,
     :xcorr => 0.915680646896362,
     :id => 13562,
     :rsp => 4,
     :ions_matched => 10,
     :ions_total => 84,
-    :peptide => "K.HLEINPNHPIVETLRQKAETHK.N",
+    :sequence => "K.HLEINPNHPIVETLRQKAETHK.N",
+    :aaseq => "HLEINPNHPIVETLRQKAETHK",
     :reference => "gi|30149327|ref|XP_293672.2| similar to ebiP7687 [Homo sapiens]",
+    :first_scan => 7161,
+    :last_scan => 7161,
+    :base_name => '7MIX_STD_110802_1',
+    :deltamass => 2605.9368784522 - 2604.8360326775,
+    :ppm => ((1.0e6 * (2605.9368784522 - 2604.8360326775)) / 2604.8360326775).abs,
+    :charge => 3,
   }
   Out_files_last_last_hit = {
     :mh => 2604.9025174522,
-    :deltacn => 0.307604849338531,
+    :deltacn => 1.1,
     :sp => 26.1511478424072,
     :xcorr => 0.634012818336487,
     :id => 8105,
     :rsp => 165,
     :ions_matched => 6,
     :ions_total => 84,
-    :peptide => "R.EAFLVNSDLTLRAQLTEFRDHK.L",
-    :reference => "gi|5453830|ref|NP_006181.1| origin recognition complex, subunit 2-like; origin"
+    :sequence => "R.EAFLVNSDLTLRAQLTEFRDHK.L",
+    :aaseq => "EAFLVNSDLTLRAQLTEFRDHK",
+    :reference => "gi|5453830|ref|NP_006181.1| origin recognition complex, subunit 2-like; origin",
+    :first_scan => 7161,
+    :last_scan => 7161,
+    :base_name => '7MIX_STD_110802_1',
+    :deltamass =>  2604.9025174522 - 2604.8360326775,
+    :ppm => ((1.0e6 * (2604.9025174522 - 2604.8360326775)) / 2604.8360326775).abs,
+    :charge => 3,
   }
   Sequest_params = {
      "add_F_Phenylalanine"=>"0.0000",
@@ -163,50 +180,79 @@ module ToMatch
 end
+tfiles = File.dirname(__FILE__) + '/tfiles/'
+tfiles_l = File.dirname(__FILE__) + '/tfiles_large/'
+tf_srf = tfiles_l + "7MIX_STD_110802_1.srf"
+tf_srf_inv = tfiles_l + "7MIX_STD_110802_1_INV.srf"
+if File.exist? tfiles_l
+  start = Time.now
+  $group = SRFGroup.new([tf_srf, tf_srf_inv])
+  $srf = $group.srfs.first
+  puts "Time to read and compile two SRF: #{Time.now - start} secs"
+end
 class TestSRF < Test::Unit::TestCase
   include ToMatch
   def initialize(arg)
     super(arg)
     @tfiles = File.dirname(__FILE__) + '/tfiles/'
     @tfiles_l = File.dirname(__FILE__) + '/tfiles_large/'
-    @tf_srf = @tfiles_l + "7MIX_STD_110802_1.srf"
+    @srg_file = @tfiles + "tmp_bioworks.srg"
+    @srf = $srf
+    @group = $group
   end
   def test_basic
-    start = Time.now
-    obj = SRF.new(@tf_srf)
-    puts "TOOK: #{Time.now - start} secs"
-    ## Verify that we have everything and it is as we expect (not exhaustive)
-    head = obj.header
-    dtgen = head.dta_gen
-    ## HEADER
-    hash_match(Header, head)
-    hash_match(Dta_gen, dtgen)
-    ## DTA_FILES
-    hash_match(Dta_files_first, obj.dta_files.first)
-    hash_match(Dta_files_last, obj.dta_files.last)
-    ## OUT_FILES
-    hash_match(Out_files_first, obj.out_files.first)
-    hash_match(Out_files_first_hit, obj.out_files.first.hits.first)
-    hash_match(Out_files_last_first_hit, obj.out_files.last.hits.first)
-    hash_match(Out_files_last_last_hit, obj.out_files.last.hits.last)
-    ## SEQUEST_PARAMS
-    hash_match(Sequest_params, obj.params)
-    ## INDEX
-    assert_equal([7161, 7161, 3], obj.index.last)
-    assert_equal([2, 2, 1], obj.index.first)
-    assert_equal(obj.index.size, obj.dta_files.size)
-    assert_equal(obj.out_files.size, obj.dta_files.size)
+    if File.exist? @tfiles_l
+      ## Verify that we have everything and it is as we expect (not exhaustive)
+      head = @srf.header
+      dtgen = head.dta_gen
+      ## HEADER
+      hash_match(Header, head)
+      hash_match(Dta_gen, dtgen)
+      ## DTA_FILES
+      hash_match(Dta_files_first, @srf.dta_files.first)
+      hash_match(Dta_files_last, @srf.dta_files.last)
+      ## OUT_FILES
+      hash_match(Out_files_first, @srf.out_files.first)
+      hash_match(Out_files_first_hit, @srf.out_files.first.hits.first)
+      hash_match(Out_files_last_first_hit, @srf.out_files.last.hits.first)
+      hash_match(Out_files_last_last_hit, @srf.out_files.last.hits.last)
+      ## SEQUEST_PARAMS
+      hash_match(Sequest_params, @srf.params)
+      ## INDEX
+      assert_equal([7161, 7161, 3], @srf.index.last)
+      assert_equal([2, 2, 1], @srf.index.first)
+      assert_equal(@srf.index.size, @srf.dta_files.size)
+      assert_equal(@srf.out_files.size, @srf.dta_files.size)
+    else
+      assert_nil( puts("--SKIPPING TEST-- (missing dir: #{@tfiles_l})") )
+    end
+  end
+  def test_srg
+    if File.exist? @tfiles_l
+      @group.to_srg(@srg_file)
+      assert(File.exist?(@srg_file), "file exists: " + @srg_file )
+    else
+      assert_nil( puts("--SKIPPING TEST-- (missing dir: #{@tfiles_l})") )
+    end
   end
-  def hash_match(hash, obj)
+  ## treats reference special
+  def hash_match(hash, srf)
     hash.each do |k,v|
       if v.is_a? Float
         delta = v/100000
-        assert_in_delta( obj.send(k.to_sym), v, delta, "param: #{k}")
+        assert_in_delta(v, srf.send(k.to_sym), delta, "param: #{k}")
+      elsif k == :reference
+        assert_equal(v[0,38], srf.prots.first.reference)
       else
-        assert_equal(obj.send(k.to_sym), v, "param: #{k}")
+        assert_equal(v, srf.send(k.to_sym), "param: #{k}")
       end
     end
   end

metadata CHANGED Viewed

@@ -1,10 +1,10 @@
 --- !ruby/object:Gem::Specification
-rubygems_version: 0.9.0
+rubygems_version: 0.9.2
 specification_version: 1
 name: mspire
 version: !ruby/object:Gem::Version
-  version: 0.1.7
-date: 2007-03-27 00:00:00 -05:00
+  version: 0.2.0
+date: 2007-04-25 00:00:00 -05:00
 summary: Mass Spectrometry Proteomics Objects, Scripts, and Executables
 require_paths:
 - lib
@@ -29,28 +29,32 @@ post_install_message:
 authors:
 - John Prince
 files:
-- lib/spec_id
+- lib/spec_id.rb
 - lib/align
+- lib/spec_id_xml.rb
+- lib/spec_id
+- lib/toppred.rb
+- lib/align.rb
 - lib/spec
-- lib/sample_enzyme.rb
 - lib/fasta.rb
-- lib/roc.rb
-- lib/spec_id.rb
 - lib/gi.rb
-- lib/align.rb
-- lib/spec_id/srf.rb
-- lib/spec_id/bioworks.rb
-- lib/spec_id/proph.rb
+- lib/roc.rb
+- lib/sample_enzyme.rb
+- lib/align/chams.rb
 - lib/spec_id/sequest.rb
+- lib/spec_id/filter.rb
+- lib/spec_id/bioworks.rb
+- lib/spec_id/srf.rb
 - lib/spec_id/precision.rb
+- lib/spec_id/protein_summary.rb
 - lib/spec_id/aa_freqs.rb
-- lib/align/chams.rb
-- lib/spec/mzxml
-- lib/spec/mzdata
+- lib/spec_id/proph.rb
 - lib/spec/msrun.rb
 - lib/spec/scan.rb
-- lib/spec/mzxml.rb
+- lib/spec/mzxml
 - lib/spec/mzdata.rb
+- lib/spec/mzdata
+- lib/spec/mzxml.rb
 - lib/spec/mzxml/parser.rb
 - lib/spec/mzdata/parser.rb
 - INSTALL
@@ -59,62 +63,62 @@ files:
 - LICENSE
 - changelog.txt
 - release_notes.txt
-- bin/fasta_cat_mod.rb
-- bin/fasta_mod.rb
-- bin/gi2annot.rb
-- bin/protein_summary.rb
-- bin/raw2mzXML.rb
-- bin/fasta_cat.rb
 - bin/bioworks2sequestXML_gui.rb
-- bin/bioworks2excel.rb
+- bin/srf_group.rb
 - bin/pepproph_filter.rb
-- bin/filter_spec_id.rb
-- bin/bioworks_to_pepxml.rb
-- bin/mzxml_to_lmat.rb
+- bin/filter.rb
 - bin/protxml2prots_peps.rb
-- bin/id_precision.rb
+- bin/raw_to_mzXML.rb
+- bin/gi2annot.rb
 - bin/id_class_anal.rb
 - bin/precision.rb
+- bin/id_precision.rb
+- bin/protein_summary.rb
+- bin/bioworks_to_pepxml.rb
+- bin/bioworks2excel.rb
+- bin/mzxml_to_lmat.rb
 - bin/fasta_shaker.rb
+- bin/find_aa_freq.rb
 - script/prep_dir.rb
+- script/degenerate_peptides.rb
+- script/histogram_probs.rb
+- script/simple_protein_digestion.rb
+- script/top_hit_per_scan.rb
 - script/msvis.rb
-- script/gen_database_searching.rb
 - script/mzXML2timeIndex.rb
 - script/tpp_installer.rb
-- script/create_little_pepxml.rb
-- script/histogram_probs.rb
-- script/top_hit_per_scan.rb
 - script/filter-peps.rb
-- script/simple_protein_digestion.rb
-- script/genuine_tps_and_probs.rb
 - script/estimate_fpr_by_cysteine.rb
+- script/genuine_tps_and_probs.rb
+- script/create_little_pepxml.rb
 - script/find_cysteine_background.rb
 test_files:
-- test/tc_srf.rb
-- test/tc_proph.rb
-- test/tc_sequest.rb
-- test/tc_align.rb
-- test/tc_spec.rb
-- test/tc_aa_freqs.rb
-- test/tc_protein_summary.rb
+- test/tc_spec_id_xml.rb
+- test/tc_mzxml_to_lmat.rb
+- test/tc_id_class_anal.rb
+- test/tc_gi.rb
 - test/tc_fasta.rb
-- test/tc_bioworks.rb
 - test/tc_peptide_parent_times.rb
-- test/tc_msrun.rb
 - test/tc_spec_id.rb
+- test/tc_roc.rb
 - test/tc_mzxml.rb
+- test/tc_sample_enzyme.rb
+- test/tc_srf.rb
+- test/tc_bioworks.rb
+- test/tc_spec.rb
+- test/tc_bioworks_to_pepxml.rb
+- test/tc_scan.rb
+- test/tc_sequest.rb
+- test/tc_fasta_shaker.rb
 - test/tc_id_precision.rb
-- test/tc_id_class_anal.rb
+- test/tc_msrun.rb
+- test/tc_protein_summary.rb
 - test/tc_filter_peps.rb
+- test/tc_filter.rb
+- test/tc_aa_freqs.rb
+- test/tc_proph.rb
+- test/tc_align.rb
 - test/tc_precision.rb
-- test/tc_roc.rb
-- test/tc_scan.rb
-- test/tc_mzxml_to_lmat.rb
-- test/tc_bioworks_to_pepxml.rb
-- test/tc_sample_enzyme.rb
-- test/tc_fasta_shaker.rb
-- test/tc_gi.rb
-- test/tc_spec_id_xml.rb
 rdoc_options:
 - --main
 - README
@@ -125,29 +129,30 @@ extra_rdoc_files:
 - INSTALL
 - LICENSE
 executables:
-- fasta_cat_mod.rb
-- fasta_mod.rb
-- gi2annot.rb
-- protein_summary.rb
-- raw2mzXML.rb
-- fasta_cat.rb
 - bioworks2sequestXML_gui.rb
-- bioworks2excel.rb
+- srf_group.rb
 - pepproph_filter.rb
-- filter_spec_id.rb
-- bioworks_to_pepxml.rb
-- mzxml_to_lmat.rb
+- filter.rb
 - protxml2prots_peps.rb
-- id_precision.rb
+- raw_to_mzXML.rb
+- gi2annot.rb
 - id_class_anal.rb
 - precision.rb
+- id_precision.rb
+- protein_summary.rb
+- bioworks_to_pepxml.rb
+- bioworks2excel.rb
+- mzxml_to_lmat.rb
 - fasta_shaker.rb
+- find_aa_freq.rb
 extensions: []
 requirements:
 - "\"xmlparser\" is the prefered xml parser right now.  REXML and regular expressions are used as fallback in some routines."
 - some plotting functions will not be available without the "gnuplot" gem (and underlying gnuplot binary)
-- the "t2x" binary to convert .RAW files to mzXML is expected in some applications
+- the "t2x" binary (in archive) or readw.exe is required to convert .RAW files to mzXML in some applications
+- "\"rake\" is useful for development"
+- "\"webgen (with gems redcloth and bluecloth) is necessary to build web pages"
 dependencies:
 - !ruby/object:Gem::Dependency
   name: libjtp

data/bin/fasta_cat.rb DELETED Viewed

@@ -1,39 +0,0 @@
-#!/usr/bin/ruby
-require 'fasta'
-require 'getoptlong'
-connector = Fasta::FILE_CONNECTOR
-# Get the prefix option:
-opts = GetoptLong.new(
-  [ "-p",    "--prefixes",        GetoptLong::REQUIRED_ARGUMENT]
-)
-opt_hash = {}
-opts.each do |opt, arg|
-  opt_hash[opt] = arg
-end
-prefix_array = nil
-if opt_hash.key?('-p')
-  prefix_array = opt_hash['-p'].split(',')
-end
-# Usage info:
-if ARGV.size < 2
-  puts "
-  usage: #{File.basename(__FILE__)} [-p=prefix1,prefix2,...] <file1>.fasta <file2>.fasta ...
-  Concatenates the files together with '#{connector}' (the file extension will
-  be the extension of the first file).
-  -p    prefixes protein headers with the corresponding value in the comma
-  separated list.
-  "
-  exit
-end
-files = ARGV.to_a
-outfile = Fasta.cat_and_prefix(files, prefix_array, connector)
-puts "OUTFILE: #{outfile}"

data/bin/fasta_cat_mod.rb DELETED Viewed

@@ -1,59 +0,0 @@
-#!/usr/bin/ruby
-require 'fasta'
-require 'optparse'
-hash = {
-  'shuffle' => {
-    'method' => :aaseq_shuffle!,
-    'protein_header_prefix' => Fasta::SHUFF_PREFIX,
-    'file_postfix' => Fasta::CAT_SHUFF_FILE_POSTFIX,
-  },
-  'invert' => {
-    'method' => :aaseq_invert!,
-    'protein_header_prefix' => Fasta::INV_PREFIX,
-    'file_postfix' => Fasta::CAT_INV_FILE_POSTFIX,
-  },
-}
-opt = {}
-OptionParser.new do |opts|
-  opts.on("-f", "--fraction FLOAT", "fraction") {|v| opt['f'] = v }
-end.parse!
-if ARGV.size < 2
-  puts "
-  usage: #{File.basename(__FILE__)} [-f <fraction>] <method> <file>.fasta ...
-  The AA seq's of (a fraction of) proteins will be modified according to
-  <method> and concatenated to the end of the normal proteins.  Each modified
-  protein's header takes on a header prefix after the '>'.  Each file takes on
-  a postfix (before the extension).
-  METHOD      PROT_PREFIX     FILE_POSTFIX
-  shuffle     #{hash['shuffle']['protein_header_prefix']}          #{hash['shuffle']['file_postfix']}
-  invert      #{hash['invert']['protein_header_prefix']}            #{hash['invert']['file_postfix']}
-  "
-  exit
-end
-method = ARGV.shift
-opt_h = nil
-if hash.key? method
-  opth = hash[method]
-else
-  abort "invalid method! choose: #{hash.keys.join(", ")}"
-end
-fraction = 1; if opt.key?('f') then fraction = opt['f'] end
-specific_method = opth['method']
-file_postfix = opth['file_postfix']
-protein_header_prefix = opth['protein_header_prefix']
-#puts [file, specific_method, fraction, file_postfix, protein_header_prefix].join("*")
-ARGV.each do |file|
-  outfile = Fasta.modify_fraction_and_cat_to_file(file, specific_method, fraction, file_postfix, protein_header_prefix)
-  puts "OUTPUT: #{outfile}"
-end