RubyGems - mspire - Versions diffs - 0.1.5 → 0.1.7 - Mend

mspire 0.1.5 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

data/Rakefile +5 -2
data/bin/bioworks_to_pepxml.rb +84 -40
data/bin/fasta_shaker.rb +100 -0
data/bin/filter_spec_id.rb +185 -23
data/bin/gi2annot.rb +2 -110
data/bin/id_class_anal.rb +31 -21
data/bin/id_precision.rb +12 -8
data/bin/{false_positive_rate.rb → precision.rb} +1 -1
data/bin/protein_summary.rb +55 -62
data/changelog.txt +34 -0
data/lib/align.rb +0 -1
data/lib/fasta.rb +88 -24
data/lib/gi.rb +114 -0
data/lib/roc.rb +64 -58
data/lib/spec_id/aa_freqs.rb +166 -0
data/lib/spec_id/bioworks.rb +5 -1
data/lib/spec_id/precision.rb +427 -0
data/lib/spec_id/proph.rb +2 -2
data/lib/spec_id/sequest.rb +810 -113
data/lib/spec_id/srf.rb +486 -0
data/lib/spec_id.rb +107 -23
data/release_notes.txt +11 -0
data/script/estimate_fpr_by_cysteine.rb +226 -0
data/script/filter-peps.rb +3 -3
data/script/find_cysteine_background.rb +137 -0
data/script/gen_database_searching.rb +11 -7
data/script/genuine_tps_and_probs.rb +136 -0
data/script/top_hit_per_scan.rb +5 -2
data/test/tc_aa_freqs.rb +59 -0
data/test/tc_bioworks.rb +6 -1
data/test/tc_bioworks_to_pepxml.rb +25 -18
data/test/tc_fasta.rb +81 -3
data/test/tc_fasta_shaker.rb +147 -0
data/test/tc_gi.rb +20 -0
data/test/tc_id_class_anal.rb +9 -12
data/test/tc_id_precision.rb +12 -11
data/test/{tc_false_positive_rate.rb → tc_precision.rb} +13 -22
data/test/tc_protein_summary.rb +31 -22
data/test/tc_roc.rb +95 -50
data/test/tc_sequest.rb +212 -145
data/test/tc_spec.rb +10 -5
data/test/tc_spec_id.rb +0 -2
data/test/tc_spec_id_xml.rb +36 -0
data/test/tc_srf.rb +216 -0
metadata +35 -21
data/lib/spec_id/false_positive_rate.rb +0 -476
data/test/tc_gi2annot.rb +0 -12

data/test/tc_sequest.rb CHANGED Viewed

@@ -12,17 +12,22 @@ class SequestTest < Test::Unit::TestCase
   def initialize(arg)
     super(arg)
     @tfiles = File.dirname(__FILE__) + '/tfiles/'
+    @tfiles_l = File.dirname(__FILE__) + '/tfiles_large/'
     @tf_params = @tfiles + "bioworks32.params"
-    @tf_mzxml_path = @tfiles + "yeast_gly_mzXML"
+    @tf_mzxml_path = @tfiles_l + "yeast_gly_mzXML"
     @tf_bioworks_xml = @tfiles + "bioworks_small.xml"
   end
-  def Xtest_set_from_bioworks
-    out_path = '.'
-    pepxml_objs = SpecID::Sequest::PepXML.set_from_bioworks(@tf_params, @tf_bioworks_xml, @tf_mzxml_path, out_path)
-    pepxml_objs.each do |obj|
-      assert(obj.spectrum_queries.size > 2)
-      assert(obj.spectrum_queries.first.search_results.first.search_hits.size > 0)
+  def test_set_from_bioworks
+    if File.exist? @tfiles_l
+      out_path = '.'
+      pepxml_objs = SpecID::Sequest::PepXML.set_from_bioworks(@tf_params, @tf_bioworks_xml, @tf_mzxml_path, out_path)
+      pepxml_objs.each do |obj|
+        assert(obj.spectrum_queries.size > 2)
+        assert(obj.spectrum_queries.first.search_results.first.search_hits.size > 0)
+      end
+    else
+      assert_nil( puts("--SKIPPING TEST-- (missing dir: #{@tfiles_l})") )
     end
   end
@@ -41,147 +46,152 @@ class SequestTest < Test::Unit::TestCase
     aep(obj, arrs)
   end
-  def test_set_from_bioworks  ## new one for opd1
-    st = Time.new
-    params = @tfiles + "opd1/sequest.3.2.params"
-    bioworks_xml = @tfiles + "opd1/bioworks.000.oldparams.xml"
-    mzxml_path = @tfiles + "opd1"
-    out_path = @tfiles
-    pepxml_version = 18
-    pepxml_objs = SpecID::Sequest::PepXML.set_from_bioworks(params, bioworks_xml, mzxml_path, out_path, pepxml_version, "trypsin")
-    puts "TOOK #{Time.new - st}secs"
-    po = pepxml_objs.first
-    assert_equal(pepxml_version, SpecID::Sequest::PepXML.pepxml_version)
-    # MSMSPipelineAnalysis
-    pipe = po.msms_pipeline_analysis
-    aep(pipe, [
-        ['http://regis-web.systemsbiology.net/pepXML', :xmlns],
-        ['http://www.w3.org/2001/XMLSchema-instance', :xmlns_xsi],
-        ['http://regis-web.systemsbiology.net/pepXML /tools/bin/TPP/tpp/schema/pepXML_v18.xsd', :xsi_schema_location],
-        ['000.xml', :summary_xml],
-    ])
-    # MSMSRunSummary
-    rs = pipe.msms_run_summary
-    assert_match(/test\/tfiles\/000/, rs.base_name)
-    aep(rs, [ ['ThermoFinnigan', :ms_manufacturer], ['LCQ Deca XP Plus', :ms_model], ['ESI', :ms_ionization], ['Ion Trap', :ms_mass_analyzer], ['UNKNOWN', :ms_detector], ['raw', :raw_data_type], ['.mzXML', :raw_data], ])
-    # SampleEnzyme
-    se = rs.sample_enzyme
-    aep(se, [ ['trypsin', :name], ['KR', :cut], ['P', :no_cut], ['C', :sense], ])
-    # SearchSummary
-    ss = rs.search_summary
-    assert_match(/test\/tfiles\/000/, ss.base_name)
-    assert_match(/1\.500/, ss.peptide_mass_tol)
-    aeps(ss, [ # normal attributes
-         [:search_engine, "SEQUEST"], [:precursor_mass_type, "average"], [:fragment_mass_type, "average"], [:out_data_type, "out"], [:out_data, ".tgz"], [:search_id, "1"],
-         # enzymatic_search_constraint
-         [:enzyme, 'Trypsin'], [:max_num_internal_cleavages, '2'], [:min_number_termini, '2'],
-         # parameters
-         [:fragment_ion_tol, "1.0000"], [:ion_series, "0 1 1 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0"], [:max_num_differential_AA_per_mod, "3"], [:nucleotide_reading_frame, "0"], [:num_output_lines, "10"], [:remove_precursor_peak, "0"], [:ion_cutoff_percentage, "0.0000"], [:match_peak_count, "0"], [:match_peak_allowed_error, "1"], [:match_peak_tolerance, "1.0000"], [:protein_mass_filter, "0 0"],
-    ])
-    # SearchDatabase
-    sd = ss.search_database
-    aeps(sd, [ [:local_path, "C:\\Xcalibur\\database\\ecoli_K12.fasta"], [:seq_type, 'AA'], ])
-    # SpectrumQueries
-    sq = rs.spectrum_queries
-    spec = sq.first
-    aeps(spec, [
-          [:spectrum, "000.100.100.1"], [:start_scan, "100"], [:end_scan, "100"],
-          #[:precursor_neutral_mass, "1074.5920"], # out2summary
-          [:precursor_neutral_mass, "1074.666926"], # mine
-          [:assumed_charge, "1"], [:index, "1"],
-    ])
-    sh = spec.search_results.first.search_hits.first
-    aeps(sh, [
-         # normal attributes
-         [:hit_rank, "1"],
-         [:peptide, "SIYFRNFK"],
-         [:peptide_prev_aa, "R"],
-         [:peptide_next_aa, "G"],
-         [:protein, "gi|16130084|ref|NP_416651.1|"],
-         [:num_tot_proteins, "1"],
-         [:num_matched_ions, "4"],
-         [:tot_num_ions, "14"],
-         #[:calc_neutral_pep_mass, "1074.1920"], # out2summary
-         [:calc_neutral_pep_mass, "1074.23261"], # mine
-         #[:massdiff, "+0.400000"], # out2summary
-         [:massdiff, "+0.434316000000081"],  # mine
-         [:num_tol_term, "2"], [:num_missed_cleavages, "1"], [:is_rejected, "0"],
-         # search_score
-         [:xcorr, "0.400"], [:deltacn, "0.023"], [:deltacnstar, "0"], [:spscore, "78.8"], [:sprank, "1"],
-    ])
-    spec = sq[1]
-    aeps(spec, [
-          [:spectrum, "000.1000.1000.1"], [:start_scan, "1000"], [:end_scan, "1000"], #[:precursor_neutral_mass, "663.1920"], # out2summary
-          [:precursor_neutral_mass, "663.206111"], # mine
-          [:assumed_charge, "1"], [:index, "2"],
-    ])
-    sh = spec.search_results.first.search_hits.first
-    aeps(sh, [
-         # normal attributes
-         [:hit_rank, "1"], [:peptide, "ALADFK"], [:peptide_prev_aa, "R"], [:peptide_next_aa, "S"], [:protein, "gi|16128765|ref|NP_415318.1|"], [:num_tot_proteins, "1"], [:num_matched_ions, "5"], [:tot_num_ions, "10"],
-         [:num_tol_term, "2"], [:num_missed_cleavages, "0"], [:is_rejected, "0"],
-         #[:massdiff, "-0.600000"], # out2summary
-         [:massdiff, "-0.556499000000031"],  # mine
-         #[:calc_neutral_pep_mass, "663.7920"], # out2summary
-         [:calc_neutral_pep_mass, "663.76261"], # mine
-         # search_score
-         [:xcorr, "0.965"], [:deltacn, "0.132"], [:deltacnstar, "0"], [:spscore, "81.1"], [:sprank, "1"],
-    ])
-    spec = sq[9]
-    aeps(spec, [
-          [:spectrum, "000.1008.1008.2"], [:start_scan, "1008"], [:end_scan, "1008"], [:assumed_charge, "2"], [:index, "10"],
-          #[:precursor_neutral_mass, "691.0920"], # out2summary
-          [:precursor_neutral_mass, "691.150992"], # mine
-    ])
-    sh = spec.search_results.first.search_hits.first
-    aeps(sh, [
-         # normal attributes
-         [:hit_rank, "1"], [:peptide, "RLFTR"], [:peptide_prev_aa, "R"], [:peptide_next_aa, "A"], [:protein, "gi|16130457|ref|NP_417027.1|"], [:num_tot_proteins, "1"], [:num_matched_ions, "5"], [:tot_num_ions, "8"], [:num_tol_term, "2"],
-         #[:num_missed_cleavages, "0"],  # out2summary misses this!
-         [:num_missed_cleavages, "1"],
-         [:is_rejected, "0"],
-         #[:calc_neutral_pep_mass, "691.7920"], # out2summary
-         [:calc_neutral_pep_mass, "691.82261"], # mine
-         #[:massdiff, "-0.700000"], # out2summary
-         [:massdiff, "-0.67161800000008"],  # mine
-         # search_score
-         [:xcorr, "0.903"], [:deltacn, "0.333"], [:deltacnstar, "0"], [:spscore, "172.8"], [:sprank, "1"],
-    ])
-    ## IF ARE OBJECT IS CORRECT, THEN WE GET THE OUTPUT:
-    string = po.to_pepxml
-    ans_lines = IO.read(@tfiles + "opd1/000.my_answer.100lines.xml").split("\n")
-    string.split("\n").each_with_index do |line,i|
-    base_name_re = /base_name=".*?\/test/o
-      if i > 99 ; break end
-      if i == 1
-        assert_equal(ans_lines[i].sub(/date=".*?"/,''), line.sub(/date=".*?"/,''))
-      elsif i == 2
-        assert_equal(ans_lines[i].sub(base_name_re,''), line.sub(base_name_re,''))
-      else
-        assert_equal(ans_lines[i].sub(base_name_re,''), line.sub(base_name_re,''))
-#assert_equal(ans_lines[i], line)
+  ## turn this off if you are doing lots of tests
+  def Xtest_set_from_bioworks  ## new one for opd1
+    if File.exist? @tfiles_l
+      st = Time.new
+      params = @tfiles + "opd1/sequest.3.2.params"
+      bioworks_xml = @tfiles_l + "opd1/bioworks.000.oldparams.xml"
+      mzxml_path = @tfiles + "opd1"
+      out_path = @tfiles
+      pepxml_version = 18
+      pepxml_objs = SpecID::Sequest::PepXML.set_from_bioworks(params, bioworks_xml, mzxml_path, out_path, pepxml_version, "trypsin")
+      puts "TOOK #{Time.new - st}secs"
+      po = pepxml_objs.first
+      assert_equal(pepxml_version, SpecID::Sequest::PepXML.pepxml_version)
+      # MSMSPipelineAnalysis
+      pipe = po.msms_pipeline_analysis
+      aep(pipe, [
+          ['http://regis-web.systemsbiology.net/pepXML', :xmlns],
+          ['http://www.w3.org/2001/XMLSchema-instance', :xmlns_xsi],
+          ['http://regis-web.systemsbiology.net/pepXML /tools/bin/TPP/tpp/schema/pepXML_v18.xsd', :xsi_schema_location],
+          ['000.xml', :summary_xml],
+      ])
+      # MSMSRunSummary
+      rs = pipe.msms_run_summary
+      assert_match(/test\/tfiles\/000/, rs.base_name)
+      aep(rs, [ ['ThermoFinnigan', :ms_manufacturer], ['LCQ Deca XP Plus', :ms_model], ['ESI', :ms_ionization], ['Ion Trap', :ms_mass_analyzer], ['UNKNOWN', :ms_detector], ['raw', :raw_data_type], ['.mzXML', :raw_data], ])
+      # SampleEnzyme
+      se = rs.sample_enzyme
+      aep(se, [ ['trypsin', :name], ['KR', :cut], ['P', :no_cut], ['C', :sense], ])
+      # SearchSummary
+      ss = rs.search_summary
+      assert_match(/test\/tfiles\/000/, ss.base_name)
+      assert_match(/1\.500/, ss.peptide_mass_tol)
+      aeps(ss, [ # normal attributes
+           [:search_engine, "SEQUEST"], [:precursor_mass_type, "average"], [:fragment_mass_type, "average"], [:out_data_type, "out"], [:out_data, ".tgz"], [:search_id, "1"],
+           # enzymatic_search_constraint
+           [:enzyme, 'Trypsin'], [:max_num_internal_cleavages, '2'], [:min_number_termini, '2'],
+           # parameters
+           [:fragment_ion_tol, "1.0000"], [:ion_series, "0 1 1 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0"], [:max_num_differential_AA_per_mod, "3"], [:nucleotide_reading_frame, "0"], [:num_output_lines, "10"], [:remove_precursor_peak, "0"], [:ion_cutoff_percentage, "0.0000"], [:match_peak_count, "0"], [:match_peak_allowed_error, "1"], [:match_peak_tolerance, "1.0000"], [:protein_mass_filter, "0 0"],
+      ])
+      # SearchDatabase
+      sd = ss.search_database
+      aeps(sd, [ [:local_path, "C:\\Xcalibur\\database\\ecoli_K12.fasta"], [:seq_type, 'AA'], ])
+      # SpectrumQueries
+      sq = rs.spectrum_queries
+      spec = sq.first
+      aeps(spec, [
+           [:spectrum, "000.100.100.1"], [:start_scan, "100"], [:end_scan, "100"],
+           #[:precursor_neutral_mass, "1074.5920"], # out2summary
+           [:precursor_neutral_mass, "1074.666926"], # mine
+           [:assumed_charge, "1"], [:index, "1"],
+      ])
+      sh = spec.search_results.first.search_hits.first
+      aeps(sh, [
+           # normal attributes
+           [:hit_rank, "1"],
+           [:peptide, "SIYFRNFK"],
+           [:peptide_prev_aa, "R"],
+           [:peptide_next_aa, "G"],
+           [:protein, "gi|16130084|ref|NP_416651.1|"],
+           [:num_tot_proteins, "1"],
+           [:num_matched_ions, "4"],
+           [:tot_num_ions, "14"],
+           #[:calc_neutral_pep_mass, "1074.1920"], # out2summary
+           [:calc_neutral_pep_mass, "1074.23261"], # mine
+           #[:massdiff, "+0.400000"], # out2summary
+           [:massdiff, "+0.434316000000081"],  # mine
+           [:num_tol_term, "2"], [:num_missed_cleavages, "1"], [:is_rejected, "0"],
+           # search_score
+           [:xcorr, "0.400"], [:deltacn, "0.023"], [:deltacnstar, "0"], [:spscore, "78.8"], [:sprank, "1"],
+      ])
+      spec = sq[1]
+      aeps(spec, [
+           [:spectrum, "000.1000.1000.1"], [:start_scan, "1000"], [:end_scan, "1000"], #[:precursor_neutral_mass, "663.1920"], # out2summary
+           [:precursor_neutral_mass, "663.206111"], # mine
+           [:assumed_charge, "1"], [:index, "2"],
+      ])
+      sh = spec.search_results.first.search_hits.first
+      aeps(sh, [
+           # normal attributes
+           [:hit_rank, "1"], [:peptide, "ALADFK"], [:peptide_prev_aa, "R"], [:peptide_next_aa, "S"], [:protein, "gi|16128765|ref|NP_415318.1|"], [:num_tot_proteins, "1"], [:num_matched_ions, "5"], [:tot_num_ions, "10"],
+           [:num_tol_term, "2"], [:num_missed_cleavages, "0"], [:is_rejected, "0"],
+           #[:massdiff, "-0.600000"], # out2summary
+           [:massdiff, "-0.556499000000031"],  # mine
+           #[:calc_neutral_pep_mass, "663.7920"], # out2summary
+           [:calc_neutral_pep_mass, "663.76261"], # mine
+           # search_score
+           [:xcorr, "0.965"], [:deltacn, "0.132"], [:deltacnstar, "0"], [:spscore, "81.1"], [:sprank, "1"],
+      ])
+      spec = sq[9]
+      aeps(spec, [
+           [:spectrum, "000.1008.1008.2"], [:start_scan, "1008"], [:end_scan, "1008"], [:assumed_charge, "2"], [:index, "10"],
+           #[:precursor_neutral_mass, "691.0920"], # out2summary
+           [:precursor_neutral_mass, "691.150992"], # mine
+      ])
+      sh = spec.search_results.first.search_hits.first
+      aeps(sh, [
+           # normal attributes
+           [:hit_rank, "1"], [:peptide, "RLFTR"], [:peptide_prev_aa, "R"], [:peptide_next_aa, "A"], [:protein, "gi|16130457|ref|NP_417027.1|"], [:num_tot_proteins, "1"], [:num_matched_ions, "5"], [:tot_num_ions, "8"], [:num_tol_term, "2"],
+           #[:num_missed_cleavages, "0"],  # out2summary misses this!
+           [:num_missed_cleavages, "1"],
+           [:is_rejected, "0"],
+           #[:calc_neutral_pep_mass, "691.7920"], # out2summary
+           [:calc_neutral_pep_mass, "691.82261"], # mine
+           #[:massdiff, "-0.700000"], # out2summary
+           [:massdiff, "-0.67161800000008"],  # mine
+           # search_score
+           [:xcorr, "0.903"], [:deltacn, "0.333"], [:deltacnstar, "0"], [:spscore, "172.8"], [:sprank, "1"],
+      ])
+      ## IF ARE OBJECT IS CORRECT, THEN WE GET THE OUTPUT:
+      string = po.to_pepxml
+      ans_lines = IO.read(@tfiles + "opd1/000.my_answer.100lines.xml").split("\n")
+      string.split("\n").each_with_index do |line,i|
+        base_name_re = /base_name=".*?\/test/o
+        if i > 99 ; break end
+        if i == 1
+          assert_equal(ans_lines[i].sub(/date=".*?"/,''), line.sub(/date=".*?"/,''))
+        elsif i == 2
+          assert_equal(ans_lines[i].sub(base_name_re,''), line.sub(base_name_re,''))
+        else
+          assert_equal(ans_lines[i].sub(base_name_re,''), line.sub(base_name_re,''))
+          #assert_equal(ans_lines[i], line)
+        end
       end
+    else
+      assert_nil(puts("--SKIPPING TEST-- (missing dir: #{@tfiles_l})"))
     end
-    #assert_match(/#{Regexp.escape("")}/, string)
+      #assert_match(/#{Regexp.escape("")}/, string)
   end
@@ -220,5 +230,62 @@ class SequestTest < Test::Unit::TestCase
     assert_equal("hello.fasta", SpecID::Sequest::Params.new._sys_ind_basename("/work/john/hello.fasta"))
   end
+  def test_modifications
+    obj = SpecID::Sequest::PepXML::Modifications.new(nil, "(M* +15.90000) (M# +29.00000) (S@ +80.00000) (C^ +12.00000) (ct[ +12.33000) (nt] +14.20000) ")
+    answ = {[:C, 12.0]=>"^", [:S, 80.0]=>"@", [:M, 29.0]=>"#", [:M, 15.9]=>"*", [:ct, 12.33]=>"[", [:nt, 14.2]=>"]"}
+    assert_equal(answ, obj.mod_symbols_hash, "mod_symbols_hash")
+    ## need more here
+  end
+  def test_non_standard_aa_removal
+    hash = {"K.PEPTIDE.Z" => "K.PEPTIDE.Z", "K.*M" => "K.M", "aI" => 'I', "YI.&" => "YI.", "EI.!@#\$%^&*(){}[]|\\;:'\"<>,?/EI" => 'EI.EI'}
+    cl = proc {|v| SpecID::Sequest::PepXML::SearchHit.remove_non_amino_acids(v) }
+    hash.each do |k,v|
+      assert_equal(v, cl.call(k))
+    end
+  end
+  def test_modification_info
+    hash = {
+      :mod_nterm_mass => 520.2,
+      :modified_peptide => "MOD*IFI^E&D",
+      :mod_aminoacid_mass => [[3, 150.3], [6, 345.2]],
+    }
+    answ = "<modification_info mod_nterm_mass=\"520.2\" modified_peptide=\"MOD*IFI^E&amp;D\">\n\t<mod_aminoacid_mass position=\"3\" mass=\"150.3\"/>\n\t<mod_aminoacid_mass position=\"6\" mass=\"345.2\"/>\n</modification_info>\n"
+    string = SpecID::Sequest::PepXML::SearchHit::ModificationInfo.new(hash).to_pepxml
+    assert_match(_re('<modification_info'), answ)
+    assert_match(_re(" mod_nterm_mass=\"520.2\""), answ)
+    assert_match(_re(" modified_peptide=\"MOD*IFI^E&amp;D\""), answ)
+    assert_match(_re("<mod_aminoacid_mass"), answ)
+    assert_match(_re(" position=\"3\""), answ)
+    assert_match(_re(" mass=\"150.3\""), answ)
+    assert_match(_re(" position=\"6\""), answ)
+    assert_match(_re(" mass=\"345.2\""), answ)
+    assert_match(_re("</modification_info>"), answ)
+  end
+  def _re(st)
+    /#{Regexp.escape(st)}/
+  end
+  def test_modifications
+    params = SpecID::Sequest::Params.new(@tf_params)
+    mod_string = "(M* +15.90000) (M# +29.00000) (S@ +80.00000) (C^ +12.00000) (ct[ +12.33000) (nt] +14.20000) "
+    params.diff_search_options = "15.90000 M 29.00000 M 80.00000 S 12.00000 C"
+    params.term_diff_search_options = "14.20000 12.33000"
+    assert 1
+=begin
+    mod = SpecID::Sequest::PepXML::Modifications(params, mod_string)
+SpecID::Sequest::PepXML::Modifications
+    peptide = "PEPTIDE"
+    ## no mods
+    assert_equal(nil, mod.modification_info(peptide))
+    peptide = "]M*EC^S@IDM#M*EMSCM["
+    p mod.modification_info(peptide)
+=end
+  end
 end

data/test/tc_spec.rb CHANGED Viewed

@@ -10,17 +10,22 @@ class SpecTest < Test::Unit::TestCase
   def initialize(arg)
     super(arg)
     @tfiles = File.dirname(__FILE__) + '/tfiles/'
+    @tfiles_l = File.dirname(__FILE__) + '/tfiles_large/'
     @tscans = @tfiles + "opd1/twenty_scans.mzXML"
-    @tf_mzxml_path = @tfiles + "yeast_gly_mzXML"
+    @tf_mzxml_path = @tfiles_l + "yeast_gly_mzXML"
     #@big_file = "/work/john/ISB_Proteomics_18Set/mzXML/sergei_digest_A_full_01.mzXML"
     @big_file = "../bioworks2prophet/xml/opd00001_test_set/opd00001_prophprepped/000.mzXML"
   end
   def test_mzxml_path_precursor_mz_by_scan
-    hash = Spec::MzXML::Parser.new.precursor_mz_by_scan_for_path(@tf_mzxml_path, "*.mzXML")
-    assert_equal(%w(000 020), hash.keys.sort)
-    assert(hash["000"].size > 0)
-    assert(hash["020"].size > 0)
+    if File.exist? @tfiles_l
+      hash = Spec::MzXML::Parser.new.precursor_mz_by_scan_for_path(@tf_mzxml_path, "*.mzXML")
+      assert_equal(%w(000 020), hash.keys.sort)
+      assert(hash["000"].size > 0)
+      assert(hash["020"].size > 0)
+    else
+      assert_nil( puts("--SKIPPING TEST-- (missing dir: #{@tfiles_l})") )
+    end
   end
   def test_mzxml_precursor_mz_by_scan

data/test/tc_spec_id.rb CHANGED Viewed

@@ -66,7 +66,5 @@ class SpecIDTest < Test::Unit::TestCase
       assert_in_delta(v, two[i], delta, message)
     end
   end
 end

data/test/tc_spec_id_xml.rb ADDED Viewed

@@ -0,0 +1,36 @@
+require 'test/unit'
+require 'spec_id'
+require 'ostruct'
+class Bob
+  include SpecIDXML
+  def initialize(first, second)
+    @first = first ; @second = second
+  end
+end
+class SpecIDXMLTest < Test::Unit::TestCase
+  include SpecIDXML
+  def initialize(*args)
+    super(*args)
+  end
+  def test_short_element_xml_from_instance_vars
+    obj = Bob.new(1, 2)
+    st = obj.short_element_xml_from_instance_vars("bob")
+    assert_match(/second="2"/, st)
+    assert_match(/first="1"/, st)
+    assert_match(/^<bob /, st)
+    assert_match(/>$/, st)
+  end
+  def test_escape_special_chars
+    assert_equal("&amp;&gt;&lt;&quot;&apos;" , escape_special_chars("&><\"'"))
+    assert_equal("PE&amp;PT&gt;I&lt;D&quot;E&apos;", escape_special_chars("PE&PT>I<D\"E'"))
+  end
+end