mspire 0.4.9 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +27 -17
- data/changelog.txt +31 -62
- data/lib/ms/calc.rb +32 -0
- data/lib/ms/data/interleaved.rb +60 -0
- data/lib/ms/data/lazy_io.rb +73 -0
- data/lib/ms/data/lazy_string.rb +15 -0
- data/lib/ms/data/simple.rb +59 -0
- data/lib/ms/data/transposed.rb +41 -0
- data/lib/ms/data.rb +57 -0
- data/lib/ms/format/format_error.rb +12 -0
- data/lib/ms/spectrum.rb +25 -384
- data/lib/ms/support/binary_search.rb +126 -0
- data/lib/ms.rb +10 -10
- metadata +38 -350
- data/INSTALL +0 -58
- data/README.rdoc +0 -18
- data/Rakefile +0 -330
- data/bin/aafreqs.rb +0 -23
- data/bin/bioworks2excel.rb +0 -14
- data/bin/bioworks_to_pepxml.rb +0 -148
- data/bin/bioworks_to_pepxml_gui.rb +0 -225
- data/bin/fasta_shaker.rb +0 -5
- data/bin/filter_and_validate.rb +0 -5
- data/bin/gi2annot.rb +0 -14
- data/bin/id_class_anal.rb +0 -112
- data/bin/id_precision.rb +0 -172
- data/bin/ms_to_lmat.rb +0 -67
- data/bin/pepproph_filter.rb +0 -16
- data/bin/prob_validate.rb +0 -6
- data/bin/protein_summary.rb +0 -6
- data/bin/protxml2prots_peps.rb +0 -32
- data/bin/raw_to_mzXML.rb +0 -55
- data/bin/run_percolator.rb +0 -122
- data/bin/sqt_group.rb +0 -26
- data/bin/srf_group.rb +0 -27
- data/bin/srf_to_sqt.rb +0 -40
- data/lib/align/chams.rb +0 -78
- data/lib/align.rb +0 -154
- data/lib/archive/targz.rb +0 -94
- data/lib/bsearch.rb +0 -120
- data/lib/core_extensions.rb +0 -16
- data/lib/fasta.rb +0 -626
- data/lib/gi.rb +0 -124
- data/lib/group_by.rb +0 -10
- data/lib/index_by.rb +0 -11
- data/lib/merge_deep.rb +0 -21
- data/lib/ms/converter/mzxml.rb +0 -77
- data/lib/ms/gradient_program.rb +0 -170
- data/lib/ms/msrun.rb +0 -244
- data/lib/ms/msrun_index.rb +0 -108
- data/lib/ms/parser/mzdata/axml.rb +0 -67
- data/lib/ms/parser/mzdata/dom.rb +0 -175
- data/lib/ms/parser/mzdata/libxml.rb +0 -7
- data/lib/ms/parser/mzdata.rb +0 -31
- data/lib/ms/parser/mzxml/axml.rb +0 -70
- data/lib/ms/parser/mzxml/dom.rb +0 -182
- data/lib/ms/parser/mzxml/hpricot.rb +0 -253
- data/lib/ms/parser/mzxml/libxml.rb +0 -19
- data/lib/ms/parser/mzxml/regexp.rb +0 -122
- data/lib/ms/parser/mzxml/rexml.rb +0 -72
- data/lib/ms/parser/mzxml/xmlparser.rb +0 -248
- data/lib/ms/parser/mzxml.rb +0 -282
- data/lib/ms/parser.rb +0 -108
- data/lib/ms/precursor.rb +0 -25
- data/lib/ms/scan.rb +0 -81
- data/lib/mspire.rb +0 -4
- data/lib/pi_zero.rb +0 -244
- data/lib/qvalue.rb +0 -161
- data/lib/roc.rb +0 -187
- data/lib/sample_enzyme.rb +0 -160
- data/lib/scan_i.rb +0 -21
- data/lib/spec_id/aa_freqs.rb +0 -170
- data/lib/spec_id/bioworks.rb +0 -497
- data/lib/spec_id/digestor.rb +0 -138
- data/lib/spec_id/mass.rb +0 -179
- data/lib/spec_id/parser/proph.rb +0 -335
- data/lib/spec_id/precision/filter/cmdline.rb +0 -218
- data/lib/spec_id/precision/filter/interactive.rb +0 -134
- data/lib/spec_id/precision/filter/output.rb +0 -148
- data/lib/spec_id/precision/filter.rb +0 -637
- data/lib/spec_id/precision/output.rb +0 -60
- data/lib/spec_id/precision/prob/cmdline.rb +0 -160
- data/lib/spec_id/precision/prob/output.rb +0 -94
- data/lib/spec_id/precision/prob.rb +0 -249
- data/lib/spec_id/proph/pep_summary.rb +0 -104
- data/lib/spec_id/proph/prot_summary.rb +0 -484
- data/lib/spec_id/proph.rb +0 -4
- data/lib/spec_id/protein_summary.rb +0 -489
- data/lib/spec_id/sequest/params.rb +0 -316
- data/lib/spec_id/sequest/pepxml.rb +0 -1458
- data/lib/spec_id/sequest.rb +0 -33
- data/lib/spec_id/sqt.rb +0 -349
- data/lib/spec_id/srf.rb +0 -973
- data/lib/spec_id.rb +0 -778
- data/lib/spec_id_xml.rb +0 -99
- data/lib/transmem/phobius.rb +0 -147
- data/lib/transmem/toppred.rb +0 -368
- data/lib/transmem.rb +0 -157
- data/lib/validator/aa.rb +0 -48
- data/lib/validator/aa_est.rb +0 -112
- data/lib/validator/background.rb +0 -77
- data/lib/validator/bias.rb +0 -95
- data/lib/validator/cmdline.rb +0 -431
- data/lib/validator/decoy.rb +0 -107
- data/lib/validator/digestion_based.rb +0 -70
- data/lib/validator/probability.rb +0 -51
- data/lib/validator/prot_from_pep.rb +0 -234
- data/lib/validator/q_value.rb +0 -32
- data/lib/validator/transmem.rb +0 -272
- data/lib/validator/true_pos.rb +0 -46
- data/lib/validator.rb +0 -197
- data/lib/xml.rb +0 -38
- data/lib/xml_style_parser.rb +0 -119
- data/lib/xmlparser_wrapper.rb +0 -19
- data/release_notes.txt +0 -2
- data/script/compile_and_plot_smriti_final.rb +0 -97
- data/script/create_little_pepxml.rb +0 -61
- data/script/degenerate_peptides.rb +0 -47
- data/script/estimate_fpr_by_cysteine.rb +0 -226
- data/script/extract_gradient_programs.rb +0 -56
- data/script/find_cysteine_background.rb +0 -137
- data/script/genuine_tps_and_probs.rb +0 -136
- data/script/get_apex_values_rexml.rb +0 -44
- data/script/histogram_probs.rb +0 -61
- data/script/mascot_fix_pepxml.rb +0 -123
- data/script/msvis.rb +0 -42
- data/script/mzXML2timeIndex.rb +0 -25
- data/script/peps_per_bin.rb +0 -67
- data/script/prep_dir.rb +0 -121
- data/script/simple_protein_digestion.rb +0 -27
- data/script/smriti_final_analysis.rb +0 -103
- data/script/sqt_to_meta.rb +0 -24
- data/script/top_hit_per_scan.rb +0 -67
- data/script/toppred_to_yaml.rb +0 -47
- data/script/tpp_installer.rb +0 -249
- data/specs/align_spec.rb +0 -79
- data/specs/bin/bioworks_to_pepxml_spec.rb +0 -79
- data/specs/bin/fasta_shaker_spec.rb +0 -259
- data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +0 -199
- data/specs/bin/filter_and_validate_spec.rb +0 -180
- data/specs/bin/ms_to_lmat_spec.rb +0 -34
- data/specs/bin/prob_validate_spec.rb +0 -86
- data/specs/bin/protein_summary_spec.rb +0 -14
- data/specs/fasta_spec.rb +0 -354
- data/specs/gi_spec.rb +0 -22
- data/specs/load_bin_path.rb +0 -7
- data/specs/merge_deep_spec.rb +0 -13
- data/specs/ms/gradient_program_spec.rb +0 -77
- data/specs/ms/msrun_spec.rb +0 -498
- data/specs/ms/parser_spec.rb +0 -92
- data/specs/ms/spectrum_spec.rb +0 -87
- data/specs/pi_zero_spec.rb +0 -115
- data/specs/qvalue_spec.rb +0 -39
- data/specs/roc_spec.rb +0 -251
- data/specs/rspec_autotest.rb +0 -149
- data/specs/sample_enzyme_spec.rb +0 -126
- data/specs/spec_helper.rb +0 -135
- data/specs/spec_id/aa_freqs_spec.rb +0 -52
- data/specs/spec_id/bioworks_spec.rb +0 -148
- data/specs/spec_id/digestor_spec.rb +0 -75
- data/specs/spec_id/precision/filter/cmdline_spec.rb +0 -20
- data/specs/spec_id/precision/filter/output_spec.rb +0 -31
- data/specs/spec_id/precision/filter_spec.rb +0 -246
- data/specs/spec_id/precision/prob_spec.rb +0 -44
- data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
- data/specs/spec_id/proph/pep_summary_spec.rb +0 -98
- data/specs/spec_id/proph/prot_summary_spec.rb +0 -128
- data/specs/spec_id/protein_summary_spec.rb +0 -189
- data/specs/spec_id/sequest/params_spec.rb +0 -68
- data/specs/spec_id/sequest/pepxml_spec.rb +0 -374
- data/specs/spec_id/sequest_spec.rb +0 -38
- data/specs/spec_id/sqt_spec.rb +0 -246
- data/specs/spec_id/srf_spec.rb +0 -172
- data/specs/spec_id/srf_spec_helper.rb +0 -139
- data/specs/spec_id_helper.rb +0 -33
- data/specs/spec_id_spec.rb +0 -366
- data/specs/spec_id_xml_spec.rb +0 -33
- data/specs/transmem/phobius_spec.rb +0 -425
- data/specs/transmem/toppred_spec.rb +0 -298
- data/specs/transmem_spec.rb +0 -60
- data/specs/transmem_spec_shared.rb +0 -64
- data/specs/validator/aa_est_spec.rb +0 -66
- data/specs/validator/aa_spec.rb +0 -40
- data/specs/validator/background_spec.rb +0 -67
- data/specs/validator/bias_spec.rb +0 -122
- data/specs/validator/decoy_spec.rb +0 -51
- data/specs/validator/fasta_helper.rb +0 -26
- data/specs/validator/prot_from_pep_spec.rb +0 -141
- data/specs/validator/transmem_spec.rb +0 -146
- data/specs/validator/true_pos_spec.rb +0 -58
- data/specs/validator_helper.rb +0 -33
- data/specs/xml_spec.rb +0 -12
- data/test_files/000_pepxml18_small.xml +0 -206
- data/test_files/020a.mzXML.timeIndex +0 -4710
- data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +0 -3973
- data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +0 -3872
- data/test_files/4-03-03_small-prot.xml +0 -321
- data/test_files/4-03-03_small.xml +0 -3876
- data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
- data/test_files/bioworks-3.3_10prots.xml +0 -5999
- data/test_files/bioworks31.params +0 -77
- data/test_files/bioworks32.params +0 -62
- data/test_files/bioworks33.params +0 -63
- data/test_files/bioworks_single_run_small.xml +0 -7237
- data/test_files/bioworks_small.fasta +0 -212
- data/test_files/bioworks_small.params +0 -63
- data/test_files/bioworks_small.phobius +0 -109
- data/test_files/bioworks_small.toppred.out +0 -2847
- data/test_files/bioworks_small.xml +0 -5610
- data/test_files/bioworks_with_INV_small.xml +0 -3753
- data/test_files/bioworks_with_SHUFF_small.xml +0 -2503
- data/test_files/corrupted_900.srf +0 -0
- data/test_files/head_of_7MIX.srf +0 -0
- data/test_files/interact-opd1_mods_small-prot.xml +0 -304
- data/test_files/messups.fasta +0 -297
- data/test_files/opd1/000.my_answer.100lines.xml +0 -101
- data/test_files/opd1/000.tpp_1.2.3.first10.xml +0 -115
- data/test_files/opd1/000.tpp_2.9.2.first10.xml +0 -126
- data/test_files/opd1/000.v2.1.mzXML.timeIndex +0 -3748
- data/test_files/opd1/000_020-prot.png +0 -0
- data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +0 -62
- data/test_files/opd1/000_020_3prots-prot.xml +0 -62
- data/test_files/opd1/opd1_cat_inv_small-prot.xml +0 -139
- data/test_files/opd1/sequest.3.1.params +0 -77
- data/test_files/opd1/sequest.3.2.params +0 -62
- data/test_files/opd1/twenty_scans.mzXML +0 -418
- data/test_files/opd1/twenty_scans.v2.1.mzXML +0 -382
- data/test_files/opd1/twenty_scans_answ.lmat +0 -0
- data/test_files/opd1/twenty_scans_answ.lmata +0 -9
- data/test_files/opd1_020_beginning.RAW +0 -0
- data/test_files/opd1_2runs_2mods/data/020.mzData.xml +0 -683
- data/test_files/opd1_2runs_2mods/data/020.readw.mzXML +0 -382
- data/test_files/opd1_2runs_2mods/data/040.mzData.xml +0 -683
- data/test_files/opd1_2runs_2mods/data/040.readw.mzXML +0 -382
- data/test_files/opd1_2runs_2mods/data/README.txt +0 -6
- data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +0 -753
- data/test_files/orbitrap_mzData/000_cut.xml +0 -1920
- data/test_files/pepproph_small.xml +0 -4691
- data/test_files/phobius.small.noheader.txt +0 -50
- data/test_files/phobius.small.small.txt +0 -53
- data/test_files/s01_anC1_ld020mM.key.txt +0 -25
- data/test_files/s01_anC1_ld020mM.meth +0 -0
- data/test_files/small.fasta +0 -297
- data/test_files/small.sqt +0 -87
- data/test_files/smallraw.RAW +0 -0
- data/test_files/tf_bioworks2excel.bioXML +0 -14340
- data/test_files/tf_bioworks2excel.txt.actual +0 -1035
- data/test_files/toppred.small.out +0 -416
- data/test_files/toppred.xml.out +0 -318
- data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +0 -7
- data/test_files/validator_hits_separate/bioworks_small_HS.xml +0 -5651
- data/test_files/yeast_gly_small-prot.xml +0 -265
- data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +0 -6
- data/test_files/yeast_gly_small.xml +0 -3807
- data/test_files/yeast_gly_small2.parentTimes +0 -6
data/specs/spec_helper.rb
DELETED
|
@@ -1,135 +0,0 @@
|
|
|
1
|
-
gem 'rspec'
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
# a global flag that lets me know what format we're dealing with for output
|
|
5
|
-
$specdoc = false
|
|
6
|
-
## something changed between version 1.0.6?? and 1.1.1 in rspec so that
|
|
7
|
-
#Spec::Runner is no longer an object being created...
|
|
8
|
-
#ObjectSpace.each_object do |obj|
|
|
9
|
-
# case obj
|
|
10
|
-
# when Spec::Runner::Formatter::SpecdocFormatter
|
|
11
|
-
# $specdoc = true
|
|
12
|
-
# end
|
|
13
|
-
#end
|
|
14
|
-
|
|
15
|
-
# Set up some global testing variables:
|
|
16
|
-
#silent {
|
|
17
|
-
ROOT_DIR = File.dirname(__FILE__) + '/..'
|
|
18
|
-
SPEC_DIR = File.dirname(__FILE__)
|
|
19
|
-
|
|
20
|
-
Tfiles = File.dirname(__FILE__) + '/../test_files'
|
|
21
|
-
Tfiles_l = File.dirname(__FILE__) + '/../test_files_large'
|
|
22
|
-
Tfiles_large = Tfiles_l
|
|
23
|
-
#}
|
|
24
|
-
|
|
25
|
-
# this variable is for large files!
|
|
26
|
-
if ENV['SPEC_LARGE']
|
|
27
|
-
$spec_large = true
|
|
28
|
-
else
|
|
29
|
-
#[NOTE: NOT testing with large test files]"
|
|
30
|
-
# ** run with env var: SPEC_LARGE and ensure tfiles_large dir"
|
|
31
|
-
$spec_large = false
|
|
32
|
-
end
|
|
33
|
-
|
|
34
|
-
def spec_large(&block)
|
|
35
|
-
if $spec_large
|
|
36
|
-
block.call
|
|
37
|
-
else
|
|
38
|
-
# Requires SPEC_LARGE=true and tfiles_large dir for testing large test files
|
|
39
|
-
it 'SKIPPING (not testing large files)' do
|
|
40
|
-
end
|
|
41
|
-
end
|
|
42
|
-
end
|
|
43
|
-
|
|
44
|
-
# returns all output to stdout as a string
|
|
45
|
-
# will respond to is_a? File -> false is_a? IO true even though it is really a
|
|
46
|
-
# file
|
|
47
|
-
def capture_stdout(&block)
|
|
48
|
-
capture_file = Tfiles + '/capture_stdout.tmp'
|
|
49
|
-
def capture_file.is_a?(klass)
|
|
50
|
-
case klass.to_s
|
|
51
|
-
when 'IO'
|
|
52
|
-
true
|
|
53
|
-
when 'File'
|
|
54
|
-
false
|
|
55
|
-
else
|
|
56
|
-
false
|
|
57
|
-
end
|
|
58
|
-
end
|
|
59
|
-
$stdout = File.open(capture_file, 'w')
|
|
60
|
-
block.call
|
|
61
|
-
$stdout.close
|
|
62
|
-
$stdout = STDOUT
|
|
63
|
-
string = IO.read(capture_file)
|
|
64
|
-
File.unlink capture_file
|
|
65
|
-
string
|
|
66
|
-
end
|
|
67
|
-
|
|
68
|
-
require 'ostruct'
|
|
69
|
-
# class for using a ruby-ish initializer
|
|
70
|
-
class MyOpenStruct < OpenStruct
|
|
71
|
-
def initialize(*args)
|
|
72
|
-
super(*args)
|
|
73
|
-
if block_given?
|
|
74
|
-
yield(self)
|
|
75
|
-
end
|
|
76
|
-
end
|
|
77
|
-
end
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
def xdescribe(*args)
|
|
81
|
-
puts "describe: #{args.join(' ')}"
|
|
82
|
-
puts "**SKIPPING**"
|
|
83
|
-
end
|
|
84
|
-
|
|
85
|
-
def Xdescribe(*args)
|
|
86
|
-
xdescribe(*args)
|
|
87
|
-
end
|
|
88
|
-
|
|
89
|
-
def xit(*args)
|
|
90
|
-
puts "\n- SKIPPING: #{args.join(' ')}"
|
|
91
|
-
end
|
|
92
|
-
|
|
93
|
-
def it_should(*args)
|
|
94
|
-
string = "- WRITE TEST: #{args.join(' ')}"
|
|
95
|
-
if $specdoc
|
|
96
|
-
puts(string)
|
|
97
|
-
else
|
|
98
|
-
puts("\n" + string)
|
|
99
|
-
end
|
|
100
|
-
end
|
|
101
|
-
|
|
102
|
-
def silent(&block)
|
|
103
|
-
tmp = $VERBOSE ; $VERBOSE = nil
|
|
104
|
-
block.call
|
|
105
|
-
$VERBOSE = tmp
|
|
106
|
-
end
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
require SPEC_DIR + '/load_bin_path'
|
|
110
|
-
|
|
111
|
-
class String
|
|
112
|
-
#alias_method :exist?, exist_as_a_file?
|
|
113
|
-
#alias_method exist_as_a_file?, exist?
|
|
114
|
-
#def exist?
|
|
115
|
-
# File.exist? self
|
|
116
|
-
#end
|
|
117
|
-
def exist_as_a_file?
|
|
118
|
-
File.exist? self
|
|
119
|
-
end
|
|
120
|
-
end
|
|
121
|
-
|
|
122
|
-
describe "a cmdline program", :shared => true do
|
|
123
|
-
before(:all) do
|
|
124
|
-
testdir = File.dirname(__FILE__)
|
|
125
|
-
libdir = testdir + '/../lib'
|
|
126
|
-
bindir = testdir + '/../bin'
|
|
127
|
-
@cmd = "ruby -I #{libdir} #{bindir}/#{@progname} "
|
|
128
|
-
end
|
|
129
|
-
|
|
130
|
-
it 'gives usage when called with no args' do
|
|
131
|
-
reply = `#{@cmd}`
|
|
132
|
-
reply.should =~ /usage/i
|
|
133
|
-
end
|
|
134
|
-
|
|
135
|
-
end
|
|
@@ -1,52 +0,0 @@
|
|
|
1
|
-
require File.expand_path( File.dirname(__FILE__) + '/../spec_helper'
|
|
2
|
)
|
|
3
|
-
require 'spec_id/aa_freqs'
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
describe SpecID::AAFreqs, "given a small fasta file" do
|
|
8
|
-
before(:all) do
|
|
9
|
-
@sf = Tfiles + "/small.fasta"
|
|
10
|
-
@fobj = Fasta.new(@sf)
|
|
11
|
-
@obj = SpecID::AAFreqs.new(@fobj)
|
|
12
|
-
end
|
|
13
|
-
|
|
14
|
-
it 'calculates AA freqs properly' do
|
|
15
|
-
expect = {:I=>0.0628918621937819, :S=>0.0539719475147049, :D=>0.0526145691939758, :Z=>0.0, :L=>0.102772929998061, :T=>0.0491888048607071, :E=>0.0609527503070261, :O=>0.0, :C=>0.0157714433456144, :K=>0.0471850559110594, :U=>0.0, :Q=>0.0382651412319824, :W=>0.0137030573330748, :A=>0.101997285243359, :M=>0.0294745006786892, :J=>0.0, :G=>0.0811195139292871, :Y=>0.0254670027793937, :X=>0.0, :F=>0.0418201796910348, :R=>0.0546829552065154, :V=>0.0702604873634542, :H=>0.0213302307543145, :B=>0.0, :N=>0.03471010277293, :P=>0.0418201796910348}
|
|
16
|
-
aaf = @obj.aafreqs
|
|
17
|
-
expect.each do |k,v|
|
|
18
|
-
#aaf.key?(k).should be_true
|
|
19
|
-
aaf.should have_key(k)
|
|
20
|
-
aaf[k].should be_close(v, 0.00000001)
|
|
21
|
-
end
|
|
22
|
-
sum = 0.0
|
|
23
|
-
aaf.values.each do |v|
|
|
24
|
-
sum += v
|
|
25
|
-
end
|
|
26
|
-
sum.should be_close(1.0, 0.0000000000001)
|
|
27
|
-
end
|
|
28
|
-
|
|
29
|
-
it 'gets actual and expected nums for at least 1 amino acid' do
|
|
30
|
-
peptide_aaseqs = @fobj.prots.map do |prot|
|
|
31
|
-
prot.aaseq[0..12]
|
|
32
|
-
end
|
|
33
|
-
peptide_aaseqs.size.should == 50
|
|
34
|
-
(ac,ex) = @obj.actual_and_expected_number(peptide_aaseqs, :C, 1)
|
|
35
|
-
ac.should == 9
|
|
36
|
-
ex.should be_close(9.33530631238985, 0.0000000001)
|
|
37
|
-
end
|
|
38
|
-
end
|
|
39
|
-
|
|
40
|
-
describe SpecID::AAFreqs, "with class methods" do
|
|
41
|
-
it 'creates a probability of length lookup table' do
|
|
42
|
-
expecting = [0.0, 0.01, 0.0199, 0.029701, 0.0394039900000001]
|
|
43
|
-
SpecID::AAFreqs.probability_of_length_table(0.01, 4).zip(expecting) do |answ, exp|
|
|
44
|
-
answ.should be_close(exp, 0.0000000001)
|
|
45
|
-
end
|
|
46
|
-
expecting = [0.0, 0.2, 0.36, 0.488, 0.5904]
|
|
47
|
-
SpecID::AAFreqs.probability_of_length_table(0.2, 4).zip(expecting) do |answ, exp|
|
|
48
|
-
answ.should be_close(exp, 0.0000000001)
|
|
49
|
-
end
|
|
50
|
-
end
|
|
51
|
-
end
|
|
52
|
-
|
|
53
|
-
|
|
@@ -1,148 +0,0 @@
|
|
|
1
|
-
require File.expand_path( File.dirname(__FILE__) + '/../spec_helper' )
|
|
2
|
-
|
|
3
|
-
require 'spec_id'
|
|
4
|
-
require 'spec_id/bioworks'
|
|
5
|
-
#require 'benchmark'
|
|
6
|
-
|
|
7
|
-
describe Bioworks, 'set from an xml file' do
|
|
8
|
-
# NEED TO DEBUG THIS PROB!
|
|
9
|
-
it 'can set one with labeled proteins' do
|
|
10
|
-
file = Tfiles + "/bioworks_with_INV_small.xml"
|
|
11
|
-
obj = Bioworks.new(file)
|
|
12
|
-
obj.prots.size.should == 19
|
|
13
|
-
file = Tfiles + '/bioworks_small.xml'
|
|
14
|
-
obj = Bioworks.new(file)
|
|
15
|
-
obj.prots.size.should == 106
|
|
16
|
-
end
|
|
17
|
-
|
|
18
|
-
it 'can parse an xml file NOT derived from multi-concensus' do
|
|
19
|
-
tf_bioworks_single_xml_small = Tfiles + '/bioworks_single_run_small.xml'
|
|
20
|
-
obj = Bioworks.new(tf_bioworks_single_xml_small)
|
|
21
|
-
gfn = '5prot_mix_michrom_20fmol_200pmol'
|
|
22
|
-
origfilename = '5prot_mix_michrom_20fmol_200pmol.RAW'
|
|
23
|
-
origfilepath = 'C:\Xcalibur\sequest'
|
|
24
|
-
obj.global_filename.should == gfn
|
|
25
|
-
obj.origfilename.should == origfilename
|
|
26
|
-
obj.origfilepath.should == origfilepath
|
|
27
|
-
obj.prots.size.should == 7
|
|
28
|
-
obj.prots.first.peps.first.base_name.should == gfn
|
|
29
|
-
obj.prots.first.peps.first.file.should == "152"
|
|
30
|
-
obj.prots.first.peps.first.charge.should == 2
|
|
31
|
-
# @TODO: add more tests here
|
|
32
|
-
end
|
|
33
|
-
|
|
34
|
-
it 'can output in excel format (**semi-verified right now)' do
|
|
35
|
-
tf_bioworks_to_excel = Tfiles + '/tf_bioworks2excel.bioXML'
|
|
36
|
-
tf_bioworks_to_excel_actual = Tfiles + '/tf_bioworks2excel.txt.actual'
|
|
37
|
-
tmpfile = Tfiles + "/tf_bioworks_to_excel.tmp"
|
|
38
|
-
bio = Bioworks.new(tf_bioworks_to_excel)
|
|
39
|
-
bio.to_excel(tmpfile)
|
|
40
|
-
tmpfile.exist_as_a_file?.should be_true
|
|
41
|
-
#File.should exist_as_a_file(tmpfile)
|
|
42
|
-
exp = _arr_of_arrs(tf_bioworks_to_excel_actual)
|
|
43
|
-
act = _arr_of_arrs(tmpfile)
|
|
44
|
-
exp.each_index do |i|
|
|
45
|
-
break if i == 23 ## this is where the ordering becomes arbitrary between guys with the same scans, but different filenames
|
|
46
|
-
_assert_equal_pieces(exp[i], act[i], exp[i][0] =~ /\d/)
|
|
47
|
-
end
|
|
48
|
-
|
|
49
|
-
File.unlink tmpfile
|
|
50
|
-
end
|
|
51
|
-
|
|
52
|
-
# prot is boolean if this is a protein line!
|
|
53
|
-
def _assert_equal_pieces(exp, act, prot)
|
|
54
|
-
# equal as floats (by delta)
|
|
55
|
-
exp.each_index do |i|
|
|
56
|
-
if i == 5 # both prots and peps
|
|
57
|
-
act[i].to_f.should be_close(exp[i].to_f, 0.1)
|
|
58
|
-
elsif i == 3 && !prot
|
|
59
|
-
act[i].to_f.should be_close(exp[i].to_f, 0.01)
|
|
60
|
-
elsif i == 6 && !prot
|
|
61
|
-
act[i].to_f.should be_close(exp[i].to_f, 0.01)
|
|
62
|
-
elsif i == 9 && prot
|
|
63
|
-
## NEED TO GET THESE BACK (for consistency):
|
|
64
|
-
#act[i].split(" ")[0].should =~ exp[i].split(" ")[0]
|
|
65
|
-
else
|
|
66
|
-
## NEED TO GET THESE BACK (for consistency):
|
|
67
|
-
#act[i].should == exp[i]
|
|
68
|
-
end
|
|
69
|
-
end
|
|
70
|
-
end
|
|
71
|
-
|
|
72
|
-
# takes a bioworks excel (in txt format) and outputs an arr of arrs
|
|
73
|
-
def _arr_of_arrs(file)
|
|
74
|
-
IO.readlines(file).collect do |line|
|
|
75
|
-
line.chomp!
|
|
76
|
-
line.split("\t")
|
|
77
|
-
end
|
|
78
|
-
end
|
|
79
|
-
|
|
80
|
-
it 'can return unique peptides and proteins by sequence+charge (private)' do
|
|
81
|
-
cnt = 0
|
|
82
|
-
answer = [%w(2 PEPTIDE), %w(3 PEPTIDE), %w(3 PEPY), %w(2 PEPY)]
|
|
83
|
-
exp_peps = answer.collect! do |arr|
|
|
84
|
-
pep = Bioworks::Pep.new
|
|
85
|
-
pep.charge = arr[0]
|
|
86
|
-
pep.sequence = arr[1]
|
|
87
|
-
pep
|
|
88
|
-
end
|
|
89
|
-
exp_prots = [[0,2],[1,4,5],[3],[6]].collect do |arr|
|
|
90
|
-
arr.collect do |num|
|
|
91
|
-
prot = Bioworks::Prot.new
|
|
92
|
-
prot.reference = "#{num}"
|
|
93
|
-
prot
|
|
94
|
-
end
|
|
95
|
-
end
|
|
96
|
-
exp_peps = exp_peps.zip(exp_prots)
|
|
97
|
-
exp_peps.collect! do |both|
|
|
98
|
-
both[0].prots = [both[1]]
|
|
99
|
-
both[0]
|
|
100
|
-
end
|
|
101
|
-
|
|
102
|
-
peptides = [%w(2 PEPTIDE), %w(3 PEPTIDE), %w(2 PEPTIDE), %w(3 PEPY), %w(3 PEPTIDE), %w(3 PEPTIDE), %w(2 PEPY)].collect do |arr|
|
|
103
|
-
pep = Bioworks::Pep.new
|
|
104
|
-
pep.charge = arr[0]
|
|
105
|
-
pep.sequence = arr[1]
|
|
106
|
-
pep.prots = [Bioworks::Prot.new]
|
|
107
|
-
pep.prots.first.reference = "#{cnt}"
|
|
108
|
-
cnt += 1
|
|
109
|
-
pep
|
|
110
|
-
end
|
|
111
|
-
peptides, proteins = Bioworks.new._uniq_peps_by_sequence_charge(peptides)
|
|
112
|
-
proteins.size.should == peptides.size
|
|
113
|
-
exp_peps.each_with_index do |pep, i|
|
|
114
|
-
peptides[i].charge.should == pep.charge
|
|
115
|
-
peptides[i].sequence.should == pep.sequence
|
|
116
|
-
end
|
|
117
|
-
|
|
118
|
-
exp_prots.each_index do |i|
|
|
119
|
-
exp_prots[i].each_index do |j|
|
|
120
|
-
proteins[i][j].reference.should == exp_prots[i][j].reference
|
|
121
|
-
end
|
|
122
|
-
end
|
|
123
|
-
end
|
|
124
|
-
|
|
125
|
-
end
|
|
126
|
-
|
|
127
|
-
describe Bioworks::Pep do
|
|
128
|
-
it 'can be initialized from a hash' do
|
|
129
|
-
hash = {:sequence => 0, :mass => 1, :deltamass => 2, :charge => 3, :xcorr => 4, :deltacn => 5, :sp => 6, :rsp => 7, :ions => 8, :count => 9, :tic => 10, :prots => 11, :base_name => 12, :first_scan => 13, :last_scan => 14, :peptide_probability => 15, :file => 16, :_num_prots => 17, :_first_prot => 18}
|
|
130
|
-
pep = Bioworks::Pep.new(hash)
|
|
131
|
-
hash.each do |k,v|
|
|
132
|
-
pep.send(k).should == v
|
|
133
|
-
end
|
|
134
|
-
end
|
|
135
|
-
|
|
136
|
-
it 'correctly extracts file information' do
|
|
137
|
-
pep = Bioworks::Pep.new
|
|
138
|
-
testing = ['005a, 1131', '005b, 1131 - 1133', '1131', '1131 - 1133']
|
|
139
|
-
answers = [%w(005a 1131 1131), %w(005b 1131 1133), [nil, '1131', '1131'], [nil, '1131', '1133']]
|
|
140
|
-
testing.zip(answers) do |ar|
|
|
141
|
-
ans = pep.class.extract_file_info(ar[0])
|
|
142
|
-
ans.join(" ").should == ar[1].join(" ")
|
|
143
|
-
end
|
|
144
|
-
end
|
|
145
|
-
|
|
146
|
-
end
|
|
147
|
-
|
|
148
|
-
|
|
@@ -1,75 +0,0 @@
|
|
|
1
|
-
require 'set'
|
|
2
|
-
|
|
3
|
-
require File.expand_path( File.dirname(__FILE__) + '/../spec_helper' )
|
|
4
|
-
require 'spec_id/digestor'
|
|
5
|
-
require 'spec_id/sequest/params'
|
|
6
|
-
require 'fasta'
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
describe 'selecting peptides based on size' do
|
|
10
|
-
before(:each) do
|
|
11
|
-
# (M+H)+ PEPTIDE
|
|
12
|
-
# http://db.systemsbiology.net:8080/proteomicsToolkit/FragIonServlet.html
|
|
13
|
-
mono = {
|
|
14
|
-
'AACK' => 392.19681,
|
|
15
|
-
'PEPTIDE' => 800.36783,
|
|
16
|
-
'TTTYW' => 671.72767,
|
|
17
|
-
'AGGGGGGLKNADEEEP' => 1457.65088,
|
|
18
|
-
'IMNDR' => 648.31396
|
|
19
|
-
|
|
20
|
-
}
|
|
21
|
-
avg = {
|
|
22
|
-
'AACK' => 392.49375,
|
|
23
|
-
'PEPTIDE' => 800.84071,
|
|
24
|
-
'TTTYW' => 671.30411,
|
|
25
|
-
'AGGGGGGLKNADEEEP' => 1458.48147,
|
|
26
|
-
'IMNDR' => 648.75518, # 648.76, thermo
|
|
27
|
-
}
|
|
28
|
-
@pepseqs = [%w(AACK PEPTIDE TTTYW), %w(AGGGGGGLKNADEEEP IMNDR)]
|
|
29
|
-
# basically the protein sequence ONLY matters if the peptide is n or c
|
|
30
|
-
# terminal and there is an n or c terminal modification for ONLY the
|
|
31
|
-
# protein.
|
|
32
|
-
@protseqs = %w(LLLLAACKLLLLLLLPEPTIDELLLLLLTTTYWLLL LLLLAGGGGGGLKNADEEEPLLLLLLIMNDRLLL)
|
|
33
|
-
end
|
|
34
|
-
|
|
35
|
-
it 'is sensitive to mono/avg' do
|
|
36
|
-
h_plus = false
|
|
37
|
-
|
|
38
|
-
expect = [%w(PEPTIDE TTTYW), %w(IMNDR)]
|
|
39
|
-
masses_hash = Mass::MONO
|
|
40
|
-
answ = Digestor.new.limit_sizes(@protseqs, @pepseqs, 400.0, 800.38, masses_hash, h_plus)
|
|
41
|
-
answ.to_set.should == expect.to_set
|
|
42
|
-
masses_hash = Mass::AVG
|
|
43
|
-
expect = [%w(TTTYW), %w(IMNDR)]
|
|
44
|
-
answ = Digestor.new.limit_sizes(@protseqs, @pepseqs, 400.0, 800.38, masses_hash, h_plus)
|
|
45
|
-
answ.to_set.should == expect.to_set
|
|
46
|
-
end
|
|
47
|
-
|
|
48
|
-
it 'is sensitive to static mass changes' do
|
|
49
|
-
expect_before = [%w(PEPTIDE TTTYW), %w(IMNDR)]
|
|
50
|
-
h_plus = false
|
|
51
|
-
masses_hash = Mass::MONO
|
|
52
|
-
answ = Digestor.new.limit_sizes(@protseqs, @pepseqs, 400.0, 800.38, Mass::MONO, h_plus)
|
|
53
|
-
answ.to_set.should == expect_before.to_set
|
|
54
|
-
|
|
55
|
-
static = {:C => 20.0}
|
|
56
|
-
expect_after = [%w(AACK PEPTIDE TTTYW), %w(IMNDR)]
|
|
57
|
-
masses_hash = Mass::MONO.dup
|
|
58
|
-
masses_hash[:C] = masses_hash[:C] + 20.0
|
|
59
|
-
answ = Digestor.new.limit_sizes(@protseqs, @pepseqs, 400.0, 800.38, masses_hash, h_plus)
|
|
60
|
-
#answ.to_set.should == expect_before.to_set
|
|
61
|
-
answ.to_set.should == expect_after.to_set
|
|
62
|
-
end
|
|
63
|
-
|
|
64
|
-
it 'returns peptides linked to their proteins given fasta and params' do
|
|
65
|
-
fasta_obj = Fasta.new(Tfiles + '/small.fasta')
|
|
66
|
-
params_obj = Sequest::Params.new(Tfiles + '/bioworks32.params')
|
|
67
|
-
peps = Digestor.digest(fasta_obj, params_obj)
|
|
68
|
-
peps.first.is_a?(SpecID::Pep).should be_true
|
|
69
|
-
# frozen
|
|
70
|
-
peps.size.should == 2843
|
|
71
|
-
# frozen
|
|
72
|
-
peps.select {|v| v.prots.size > 1 }.size.should == 10
|
|
73
|
-
end
|
|
74
|
-
|
|
75
|
-
end
|
|
@@ -1,20 +0,0 @@
|
|
|
1
|
-
require File.expand_path( File.dirname(__FILE__) + '/../../../spec_helper' )
|
|
2
|
-
|
|
3
|
-
require 'spec_id/precision/filter'
|
|
4
|
-
|
|
5
|
-
describe SpecID::Precision::Filter::CmdlineParser, 'getting all command line options correct' do
|
|
6
|
-
|
|
7
|
-
before(:all) do
|
|
8
|
-
@bioworks_file = Tfiles + '/bioworks_small.xml'
|
|
9
|
-
end
|
|
10
|
-
|
|
11
|
-
it_should 'gets all defaults correct with nothing passed in' do
|
|
12
|
-
(spec_id_obj, options, option_parser) = SpecID::Precision::Filter::CmdlineParser.new.parse([@bioworks_file])
|
|
13
|
-
p options
|
|
14
|
-
end
|
|
15
|
-
|
|
16
|
-
it_should 'gets all passed in params correct' do
|
|
17
|
-
end
|
|
18
|
-
|
|
19
|
-
end
|
|
20
|
-
|
|
@@ -1,31 +0,0 @@
|
|
|
1
|
-
require File.expand_path( File.dirname(__FILE__) + '/../../../spec_helper' )
|
|
2
|
-
require 'spec_id/precision/filter'
|
|
3
|
-
require 'spec_id/precision/filter/output'
|
|
4
|
-
|
|
5
|
-
describe 'transforming hash with symbols into strings' do
|
|
6
|
-
it 'works' do
|
|
7
|
-
hash = {:one=>2, :this=>{:one=>"string", 3=>{:four=>5}}}
|
|
8
|
-
new_hash = SpecID::Precision::Output.symbol_keys_to_string(hash)
|
|
9
|
-
new_hash.should == {'one'=>2, 'this'=>{'one'=>"string", 3=>{'four'=>5}}}
|
|
10
|
-
end
|
|
11
|
-
end
|
|
12
|
-
|
|
13
|
-
describe 'outputs' do
|
|
14
|
-
before(:each) do
|
|
15
|
-
@file = Tfiles + '/bioworks_with_INV_small.xml'
|
|
16
|
-
@opts = {}
|
|
17
|
-
end
|
|
18
|
-
|
|
19
|
-
it 'makes a table' do
|
|
20
|
-
my_file = Tfiles + '/filtering_tmp.tmp'
|
|
21
|
-
File.unlink my_file if File.exist? my_file
|
|
22
|
-
@opts[:output] = [[:text_table, my_file]]
|
|
23
|
-
SpecID::Precision::Filter.new.filter_and_validate(SpecID.new(@file), @opts)
|
|
24
|
-
#reply = capture_stdout {
|
|
25
|
-
# SpecID::Precision::Filter.new.filter_and_validate(SpecID.new(@file), @opts)
|
|
26
|
-
#}
|
|
27
|
-
# frozen
|
|
28
|
-
IO.read(my_file) =~ /138/
|
|
29
|
-
File.unlink my_file if File.exist? my_file
|
|
30
|
-
end
|
|
31
|
-
end
|