mspire 0.4.9 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (255) hide show
  1. data/README +27 -17
  2. data/changelog.txt +31 -62
  3. data/lib/ms/calc.rb +32 -0
  4. data/lib/ms/data/interleaved.rb +60 -0
  5. data/lib/ms/data/lazy_io.rb +73 -0
  6. data/lib/ms/data/lazy_string.rb +15 -0
  7. data/lib/ms/data/simple.rb +59 -0
  8. data/lib/ms/data/transposed.rb +41 -0
  9. data/lib/ms/data.rb +57 -0
  10. data/lib/ms/format/format_error.rb +12 -0
  11. data/lib/ms/spectrum.rb +25 -384
  12. data/lib/ms/support/binary_search.rb +126 -0
  13. data/lib/ms.rb +10 -10
  14. metadata +38 -350
  15. data/INSTALL +0 -58
  16. data/README.rdoc +0 -18
  17. data/Rakefile +0 -330
  18. data/bin/aafreqs.rb +0 -23
  19. data/bin/bioworks2excel.rb +0 -14
  20. data/bin/bioworks_to_pepxml.rb +0 -148
  21. data/bin/bioworks_to_pepxml_gui.rb +0 -225
  22. data/bin/fasta_shaker.rb +0 -5
  23. data/bin/filter_and_validate.rb +0 -5
  24. data/bin/gi2annot.rb +0 -14
  25. data/bin/id_class_anal.rb +0 -112
  26. data/bin/id_precision.rb +0 -172
  27. data/bin/ms_to_lmat.rb +0 -67
  28. data/bin/pepproph_filter.rb +0 -16
  29. data/bin/prob_validate.rb +0 -6
  30. data/bin/protein_summary.rb +0 -6
  31. data/bin/protxml2prots_peps.rb +0 -32
  32. data/bin/raw_to_mzXML.rb +0 -55
  33. data/bin/run_percolator.rb +0 -122
  34. data/bin/sqt_group.rb +0 -26
  35. data/bin/srf_group.rb +0 -27
  36. data/bin/srf_to_sqt.rb +0 -40
  37. data/lib/align/chams.rb +0 -78
  38. data/lib/align.rb +0 -154
  39. data/lib/archive/targz.rb +0 -94
  40. data/lib/bsearch.rb +0 -120
  41. data/lib/core_extensions.rb +0 -16
  42. data/lib/fasta.rb +0 -626
  43. data/lib/gi.rb +0 -124
  44. data/lib/group_by.rb +0 -10
  45. data/lib/index_by.rb +0 -11
  46. data/lib/merge_deep.rb +0 -21
  47. data/lib/ms/converter/mzxml.rb +0 -77
  48. data/lib/ms/gradient_program.rb +0 -170
  49. data/lib/ms/msrun.rb +0 -244
  50. data/lib/ms/msrun_index.rb +0 -108
  51. data/lib/ms/parser/mzdata/axml.rb +0 -67
  52. data/lib/ms/parser/mzdata/dom.rb +0 -175
  53. data/lib/ms/parser/mzdata/libxml.rb +0 -7
  54. data/lib/ms/parser/mzdata.rb +0 -31
  55. data/lib/ms/parser/mzxml/axml.rb +0 -70
  56. data/lib/ms/parser/mzxml/dom.rb +0 -182
  57. data/lib/ms/parser/mzxml/hpricot.rb +0 -253
  58. data/lib/ms/parser/mzxml/libxml.rb +0 -19
  59. data/lib/ms/parser/mzxml/regexp.rb +0 -122
  60. data/lib/ms/parser/mzxml/rexml.rb +0 -72
  61. data/lib/ms/parser/mzxml/xmlparser.rb +0 -248
  62. data/lib/ms/parser/mzxml.rb +0 -282
  63. data/lib/ms/parser.rb +0 -108
  64. data/lib/ms/precursor.rb +0 -25
  65. data/lib/ms/scan.rb +0 -81
  66. data/lib/mspire.rb +0 -4
  67. data/lib/pi_zero.rb +0 -244
  68. data/lib/qvalue.rb +0 -161
  69. data/lib/roc.rb +0 -187
  70. data/lib/sample_enzyme.rb +0 -160
  71. data/lib/scan_i.rb +0 -21
  72. data/lib/spec_id/aa_freqs.rb +0 -170
  73. data/lib/spec_id/bioworks.rb +0 -497
  74. data/lib/spec_id/digestor.rb +0 -138
  75. data/lib/spec_id/mass.rb +0 -179
  76. data/lib/spec_id/parser/proph.rb +0 -335
  77. data/lib/spec_id/precision/filter/cmdline.rb +0 -218
  78. data/lib/spec_id/precision/filter/interactive.rb +0 -134
  79. data/lib/spec_id/precision/filter/output.rb +0 -148
  80. data/lib/spec_id/precision/filter.rb +0 -637
  81. data/lib/spec_id/precision/output.rb +0 -60
  82. data/lib/spec_id/precision/prob/cmdline.rb +0 -160
  83. data/lib/spec_id/precision/prob/output.rb +0 -94
  84. data/lib/spec_id/precision/prob.rb +0 -249
  85. data/lib/spec_id/proph/pep_summary.rb +0 -104
  86. data/lib/spec_id/proph/prot_summary.rb +0 -484
  87. data/lib/spec_id/proph.rb +0 -4
  88. data/lib/spec_id/protein_summary.rb +0 -489
  89. data/lib/spec_id/sequest/params.rb +0 -316
  90. data/lib/spec_id/sequest/pepxml.rb +0 -1458
  91. data/lib/spec_id/sequest.rb +0 -33
  92. data/lib/spec_id/sqt.rb +0 -349
  93. data/lib/spec_id/srf.rb +0 -973
  94. data/lib/spec_id.rb +0 -778
  95. data/lib/spec_id_xml.rb +0 -99
  96. data/lib/transmem/phobius.rb +0 -147
  97. data/lib/transmem/toppred.rb +0 -368
  98. data/lib/transmem.rb +0 -157
  99. data/lib/validator/aa.rb +0 -48
  100. data/lib/validator/aa_est.rb +0 -112
  101. data/lib/validator/background.rb +0 -77
  102. data/lib/validator/bias.rb +0 -95
  103. data/lib/validator/cmdline.rb +0 -431
  104. data/lib/validator/decoy.rb +0 -107
  105. data/lib/validator/digestion_based.rb +0 -70
  106. data/lib/validator/probability.rb +0 -51
  107. data/lib/validator/prot_from_pep.rb +0 -234
  108. data/lib/validator/q_value.rb +0 -32
  109. data/lib/validator/transmem.rb +0 -272
  110. data/lib/validator/true_pos.rb +0 -46
  111. data/lib/validator.rb +0 -197
  112. data/lib/xml.rb +0 -38
  113. data/lib/xml_style_parser.rb +0 -119
  114. data/lib/xmlparser_wrapper.rb +0 -19
  115. data/release_notes.txt +0 -2
  116. data/script/compile_and_plot_smriti_final.rb +0 -97
  117. data/script/create_little_pepxml.rb +0 -61
  118. data/script/degenerate_peptides.rb +0 -47
  119. data/script/estimate_fpr_by_cysteine.rb +0 -226
  120. data/script/extract_gradient_programs.rb +0 -56
  121. data/script/find_cysteine_background.rb +0 -137
  122. data/script/genuine_tps_and_probs.rb +0 -136
  123. data/script/get_apex_values_rexml.rb +0 -44
  124. data/script/histogram_probs.rb +0 -61
  125. data/script/mascot_fix_pepxml.rb +0 -123
  126. data/script/msvis.rb +0 -42
  127. data/script/mzXML2timeIndex.rb +0 -25
  128. data/script/peps_per_bin.rb +0 -67
  129. data/script/prep_dir.rb +0 -121
  130. data/script/simple_protein_digestion.rb +0 -27
  131. data/script/smriti_final_analysis.rb +0 -103
  132. data/script/sqt_to_meta.rb +0 -24
  133. data/script/top_hit_per_scan.rb +0 -67
  134. data/script/toppred_to_yaml.rb +0 -47
  135. data/script/tpp_installer.rb +0 -249
  136. data/specs/align_spec.rb +0 -79
  137. data/specs/bin/bioworks_to_pepxml_spec.rb +0 -79
  138. data/specs/bin/fasta_shaker_spec.rb +0 -259
  139. data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +0 -199
  140. data/specs/bin/filter_and_validate_spec.rb +0 -180
  141. data/specs/bin/ms_to_lmat_spec.rb +0 -34
  142. data/specs/bin/prob_validate_spec.rb +0 -86
  143. data/specs/bin/protein_summary_spec.rb +0 -14
  144. data/specs/fasta_spec.rb +0 -354
  145. data/specs/gi_spec.rb +0 -22
  146. data/specs/load_bin_path.rb +0 -7
  147. data/specs/merge_deep_spec.rb +0 -13
  148. data/specs/ms/gradient_program_spec.rb +0 -77
  149. data/specs/ms/msrun_spec.rb +0 -498
  150. data/specs/ms/parser_spec.rb +0 -92
  151. data/specs/ms/spectrum_spec.rb +0 -87
  152. data/specs/pi_zero_spec.rb +0 -115
  153. data/specs/qvalue_spec.rb +0 -39
  154. data/specs/roc_spec.rb +0 -251
  155. data/specs/rspec_autotest.rb +0 -149
  156. data/specs/sample_enzyme_spec.rb +0 -126
  157. data/specs/spec_helper.rb +0 -135
  158. data/specs/spec_id/aa_freqs_spec.rb +0 -52
  159. data/specs/spec_id/bioworks_spec.rb +0 -148
  160. data/specs/spec_id/digestor_spec.rb +0 -75
  161. data/specs/spec_id/precision/filter/cmdline_spec.rb +0 -20
  162. data/specs/spec_id/precision/filter/output_spec.rb +0 -31
  163. data/specs/spec_id/precision/filter_spec.rb +0 -246
  164. data/specs/spec_id/precision/prob_spec.rb +0 -44
  165. data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
  166. data/specs/spec_id/proph/pep_summary_spec.rb +0 -98
  167. data/specs/spec_id/proph/prot_summary_spec.rb +0 -128
  168. data/specs/spec_id/protein_summary_spec.rb +0 -189
  169. data/specs/spec_id/sequest/params_spec.rb +0 -68
  170. data/specs/spec_id/sequest/pepxml_spec.rb +0 -374
  171. data/specs/spec_id/sequest_spec.rb +0 -38
  172. data/specs/spec_id/sqt_spec.rb +0 -246
  173. data/specs/spec_id/srf_spec.rb +0 -172
  174. data/specs/spec_id/srf_spec_helper.rb +0 -139
  175. data/specs/spec_id_helper.rb +0 -33
  176. data/specs/spec_id_spec.rb +0 -366
  177. data/specs/spec_id_xml_spec.rb +0 -33
  178. data/specs/transmem/phobius_spec.rb +0 -425
  179. data/specs/transmem/toppred_spec.rb +0 -298
  180. data/specs/transmem_spec.rb +0 -60
  181. data/specs/transmem_spec_shared.rb +0 -64
  182. data/specs/validator/aa_est_spec.rb +0 -66
  183. data/specs/validator/aa_spec.rb +0 -40
  184. data/specs/validator/background_spec.rb +0 -67
  185. data/specs/validator/bias_spec.rb +0 -122
  186. data/specs/validator/decoy_spec.rb +0 -51
  187. data/specs/validator/fasta_helper.rb +0 -26
  188. data/specs/validator/prot_from_pep_spec.rb +0 -141
  189. data/specs/validator/transmem_spec.rb +0 -146
  190. data/specs/validator/true_pos_spec.rb +0 -58
  191. data/specs/validator_helper.rb +0 -33
  192. data/specs/xml_spec.rb +0 -12
  193. data/test_files/000_pepxml18_small.xml +0 -206
  194. data/test_files/020a.mzXML.timeIndex +0 -4710
  195. data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +0 -3973
  196. data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +0 -3872
  197. data/test_files/4-03-03_small-prot.xml +0 -321
  198. data/test_files/4-03-03_small.xml +0 -3876
  199. data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
  200. data/test_files/bioworks-3.3_10prots.xml +0 -5999
  201. data/test_files/bioworks31.params +0 -77
  202. data/test_files/bioworks32.params +0 -62
  203. data/test_files/bioworks33.params +0 -63
  204. data/test_files/bioworks_single_run_small.xml +0 -7237
  205. data/test_files/bioworks_small.fasta +0 -212
  206. data/test_files/bioworks_small.params +0 -63
  207. data/test_files/bioworks_small.phobius +0 -109
  208. data/test_files/bioworks_small.toppred.out +0 -2847
  209. data/test_files/bioworks_small.xml +0 -5610
  210. data/test_files/bioworks_with_INV_small.xml +0 -3753
  211. data/test_files/bioworks_with_SHUFF_small.xml +0 -2503
  212. data/test_files/corrupted_900.srf +0 -0
  213. data/test_files/head_of_7MIX.srf +0 -0
  214. data/test_files/interact-opd1_mods_small-prot.xml +0 -304
  215. data/test_files/messups.fasta +0 -297
  216. data/test_files/opd1/000.my_answer.100lines.xml +0 -101
  217. data/test_files/opd1/000.tpp_1.2.3.first10.xml +0 -115
  218. data/test_files/opd1/000.tpp_2.9.2.first10.xml +0 -126
  219. data/test_files/opd1/000.v2.1.mzXML.timeIndex +0 -3748
  220. data/test_files/opd1/000_020-prot.png +0 -0
  221. data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +0 -62
  222. data/test_files/opd1/000_020_3prots-prot.xml +0 -62
  223. data/test_files/opd1/opd1_cat_inv_small-prot.xml +0 -139
  224. data/test_files/opd1/sequest.3.1.params +0 -77
  225. data/test_files/opd1/sequest.3.2.params +0 -62
  226. data/test_files/opd1/twenty_scans.mzXML +0 -418
  227. data/test_files/opd1/twenty_scans.v2.1.mzXML +0 -382
  228. data/test_files/opd1/twenty_scans_answ.lmat +0 -0
  229. data/test_files/opd1/twenty_scans_answ.lmata +0 -9
  230. data/test_files/opd1_020_beginning.RAW +0 -0
  231. data/test_files/opd1_2runs_2mods/data/020.mzData.xml +0 -683
  232. data/test_files/opd1_2runs_2mods/data/020.readw.mzXML +0 -382
  233. data/test_files/opd1_2runs_2mods/data/040.mzData.xml +0 -683
  234. data/test_files/opd1_2runs_2mods/data/040.readw.mzXML +0 -382
  235. data/test_files/opd1_2runs_2mods/data/README.txt +0 -6
  236. data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +0 -753
  237. data/test_files/orbitrap_mzData/000_cut.xml +0 -1920
  238. data/test_files/pepproph_small.xml +0 -4691
  239. data/test_files/phobius.small.noheader.txt +0 -50
  240. data/test_files/phobius.small.small.txt +0 -53
  241. data/test_files/s01_anC1_ld020mM.key.txt +0 -25
  242. data/test_files/s01_anC1_ld020mM.meth +0 -0
  243. data/test_files/small.fasta +0 -297
  244. data/test_files/small.sqt +0 -87
  245. data/test_files/smallraw.RAW +0 -0
  246. data/test_files/tf_bioworks2excel.bioXML +0 -14340
  247. data/test_files/tf_bioworks2excel.txt.actual +0 -1035
  248. data/test_files/toppred.small.out +0 -416
  249. data/test_files/toppred.xml.out +0 -318
  250. data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +0 -7
  251. data/test_files/validator_hits_separate/bioworks_small_HS.xml +0 -5651
  252. data/test_files/yeast_gly_small-prot.xml +0 -265
  253. data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +0 -6
  254. data/test_files/yeast_gly_small.xml +0 -3807
  255. data/test_files/yeast_gly_small2.parentTimes +0 -6
data/specs/fasta_spec.rb DELETED
@@ -1,354 +0,0 @@
1
- require File.expand_path( File.dirname(__FILE__) + '/spec_helper' )
2
-
3
- require 'fasta'
4
-
5
- Filestring = ">gi|P1
6
- AMKRGAN
7
- >gi|P2
8
- CRGATKKTAGRPMEK
9
- >gi|P3
10
- PEPTIDE
11
- "
12
-
13
- class Fasta
14
- def proteins?
15
- (@prots.size > 0) and
16
- @prots.first.is_a? Fasta::Prot
17
- end
18
- end
19
-
20
- describe Fasta do
21
-
22
- it 'can be set from a string' do
23
- obj = Fasta.from_string(Filestring)
24
- obj.is_a?(Fasta).should be_true
25
- obj.proteins?.should be_true
26
- obj.size.should == 3
27
- matches_filestring(obj)
28
- end
29
-
30
- # given a fasta obj, asks if it matches filestring
31
- def matches_filestring(obj)
32
- heads = %w(>gi|P1 >gi|P2 >gi|P3)
33
- seqs = %w(AMKRGAN CRGATKKTAGRPMEK PEPTIDE)
34
- obj.zip(heads, seqs) do |prot, head, seq|
35
- prot.header.should == head
36
- prot.aaseq.should == seq
37
- end
38
- end
39
-
40
- end
41
-
42
- describe Fasta::Prot do
43
-
44
- it 'can extract a gi code out of ncbi sequences' do
45
- gis = ['>gi|7427923|pir||PHRBG glycogen phosphorylase (EC 2.4.1.1), muscle - rabbit', '>sp|lollygag|helloyou', '>only has one bar | and thats it', 'notme|me|nome', '>lots|an|lots|of|bars|heehee']
46
- answ = ['7427923', 'lollygag', nil, 'me','an']
47
- actual = gis.map do |head|
48
- Fasta::Prot.new(head).gi
49
- end
50
- actual.should == answ
51
- end
52
- end
53
-
54
- =begin
55
-
56
- require File.dirname(File.expand_path(__FILE__)) + '/load_bin_path'
57
- require 'test/unit'
58
- require 'fasta'
59
- require 'assert_files'
60
- require 'sample_enzyme'
61
- require 'set'
62
-
63
-
64
- module Test::Unit::Assertions
65
- @@file_display_length = 10000
66
- end
67
-
68
- class FastaTest < Test::Unit::TestCase
69
- NODELETE = false
70
-
71
- def initialize(arg)
72
- super(arg)
73
-
74
- @cat_shuffle_postfix = Fasta::CAT_SHUFF_FILE_POSTFIX
75
- @connector = Fasta::FILE_CONNECTOR
76
- @shuff_prefix = Fasta::SHUFF_PREFIX
77
- @inv_prefix = Fasta::SHUFF_PREFIX
78
- @shuff_ext = Fasta::SHUFF_FILE_POSTFIX
79
- @inv_ext = Fasta::INV_FILE_POSTFIX
80
-
81
- @tfiles = File.dirname(__FILE__) + '/tfiles/'
82
- @base_cmd = "ruby -I #{File.join(File.dirname(__FILE__), "..", "lib")} -S "
83
- @fasta_mod_cmd = @base_cmd + "fasta_mod.rb "
84
- @fasta_cat_mod_cmd = @base_cmd + "fasta_cat_mod.rb "
85
- @fasta_cat_cmd = @base_cmd + "fasta_cat.rb "
86
- @sf = @tfiles + "small.fasta"
87
- @sf_shuffle = @tfiles + "small#{@shuff_ext}.fasta"
88
- @sf_invert = @tfiles + "small#{@inv_ext}.fasta"
89
- @sf_cat = @tfiles + "small__small_SHUFF.fasta"
90
- @sf_cat_mod = @tfiles + "small#{@cat_shuffle_postfix}.fasta"
91
- @mf = @tfiles + "messups.fasta"
92
- end
93
-
94
- def test_read_file
95
- obj = Fasta.new.read_file(@sf)
96
- @tmpfile = @tfiles + "tmp.tmp"
97
- obj.write_file(@tmpfile)
98
- assert_not_equal_file_content(@tmpfile, @sf)
99
- obj2 = Fasta.new.read_file(@tmpfile)
100
- File.unlink(@tmpfile)
101
- assert_equal(obj, obj2)
102
- end
103
-
104
- def test_cat
105
- obj = Fasta.new.read_file(@sf)
106
- first_size = obj.prots.size
107
- obj << obj
108
- assert_equal(2, obj.prots.size/first_size)
109
- end
110
-
111
- def test_dup
112
- obj = Fasta.new.read_file(@sf)
113
- objd = obj.dup
114
- obj_prots = obj.prots
115
- objd.prots.each do |prot|
116
- assert(obj_prots.include?(prot))
117
- end
118
- end
119
-
120
- def test_prefix_extension
121
- assert('f_howdy.ext', Fasta.prefix_extension('f.ext', '_howdy'))
122
- assert('f.ext_howdy.ext', Fasta.prefix_extension('f.ext.ext', '_howdy'))
123
- end
124
-
125
- def test_cat_filenames
126
- assert('f1f2.ext1', Fasta.cat_filenames(['f1.ext1', 'f2.ext2']))
127
- assert('f1__f2.ext1', Fasta.cat_filenames(['f1.ext1', 'f2.ext2'], '__'))
128
- end
129
-
130
- def test_mod
131
- ## Testing shuffle:
132
- `#{@fasta_mod_cmd + 'shuffle ' + @sf}`
133
- assert(File.exist?(@sf_shuffle), "output file #{@sf_shuffle} exists")
134
- ob1 = Fasta.new.read_file(@sf)
135
- ob2 = Fasta.new.read_file(@sf_shuffle)
136
- assert_not_equal_file_content(@sf_shuffle, @sf)
137
- File.unlink @sf_shuffle
138
- assert(_same_headers?(ob1,ob2))
139
- assert(_are_shuffled?(ob1,ob2))
140
-
141
- ## Testing invert:
142
- `#{@fasta_mod_cmd + 'invert ' + @sf}`
143
- assert(File.exist?(@sf_invert), "output file #{@sf_invert} exists")
144
- ob1 = Fasta.new.read_file(@sf)
145
- ob2 = Fasta.new.read_file(@sf_invert)
146
- assert_not_equal_file_content(@sf_invert, @sf)
147
- File.unlink(@sf_invert)
148
- assert(_same_headers?(ob1,ob2))
149
- assert(_are_inverted?(ob1,ob2))
150
-
151
- ## Testing prefix
152
- #puts "#{@fasta_mod_cmd + '-p _HELLO_ invert ' + @sf}"
153
- `#{@fasta_mod_cmd + 'invert -p _HELLO_ ' + @sf}` # NOT WORKING!
154
- assert(File.exist?(@sf_invert), "output file #{@sf_invert} exists")
155
- ob1 = Fasta.new.read_file(@sf)
156
- ob2 = Fasta.new.read_file(@sf_invert)
157
- assert(_are_inverted?(ob1,ob2))
158
- assert_equal(ob1.prots.size, IO.read(@sf_invert).scan(/>_HELLO_/).size)
159
- File.unlink(@sf_invert)
160
- end
161
-
162
- ## IN PROGRESS:
163
- def Xtest_cat_mod
164
-
165
- assert(File.exist?(@sf), "prerequisite for cat tests")
166
-
167
- ## Single file to cat shuffle test
168
- puts `#{@fcat_mod_cmd + @sf}`
169
- assert(File.exist?(@sf_cat_single), "output file exists")
170
- ob1 = Fasta.new.read_file(@sf)
171
- ob2 = Fasta.new.read_file(@sf_cat_single)
172
- assert_equal(2, ob2.prots.size/ob1.prots.size)
173
- assert_equal(ob1.prots, ob2.prots[0, (ob1.prots.size)])
174
- assert_not_equal(ob1.prots, ob2.prots[(ob1.prots.size)..-1])
175
- File.unlink @sf_cat_single
176
- end
177
-
178
- ## IN PROGRESS:
179
- def Xtest_cat
180
-
181
- ## Concatenate files test:
182
- puts `#{@cat_cmd + @sf} -p ,#{@shuff_prefix} #{@sfn}`
183
- assert(File.exist?(@sf_cat), "output file #{@sf_cat} exists")
184
- ob1 = Fasta.new.read_file(@sf)
185
- ob2 = Fasta.new.read_file(@sfn)
186
- ob3 = Fasta.new.read_file(@sf_cat)
187
- assert_not_equal_file_content(@sf_cat, @sf)
188
- assert_not_equal_file_content(@sf_cat, @sfn)
189
- [@sfn,@sf_cat].each { |f| File.unlink f }
190
-
191
- ob2.header_prefix!(@shuff_prefix)
192
- ob3_prots = ob3.prots
193
- [ob1, ob2].each do |ob|
194
- ob.prots.each do |prot|
195
- unless ob3_prots.include? prot
196
- p prot
197
- flunk "protein not found in cat version"
198
- end
199
- end
200
- end
201
-
202
-
203
- # test catenation
204
- sfci = "small_CAT_INV.fasta"
205
- cat_inverted = @tfiles + sfci
206
- iccmd = @base_cmd + "fasta_cat_inverse.rb "
207
- cmd = iccmd + @sf
208
- puts `#{cmd}`
209
- assert(File.exist?(cat_inverted), "file #{cat_inverted} exists")
210
-
211
- norm = Fasta.new.read_file(@sf)
212
- cat_inv = Fasta.new.read_file(cat_inverted)
213
- File.unlink(cat_inverted)
214
-
215
- num_prots = norm.prots.size
216
- cat_norm_prots = cat_inv.prots[0, num_prots]
217
- cat_inv_prots = cat_inv.prots[num_prots..-1]
218
- norm.prots.each_with_index do |prot,i|
219
- assert_equal(prot.header, cat_norm_prots[i].header)
220
- assert_not_equal(prot.header, cat_inv_prots[i].header)
221
- assert_equal(prot.aaseq, cat_norm_prots[i].aaseq)
222
- assert_equal(prot.aaseq.reverse!, cat_inv_prots[i].aaseq)
223
- end
224
- end
225
-
226
- def test_invert_tryptic_peptides
227
- # FOR INDIVIDUAL PROTEINS:
228
- seq = 'ABCKCDERDEKDGEKWXYRRKDER'
229
- # tryptic = ABCK, CDER, DEK, DGEK, WXYR, R, K, DER
230
- tryp = SampleEnzyme.tryptic(seq)
231
- reverse_tryptic = %w(CBAK EDCR EDK EGDK YXWR R K EDR)
232
- prot = Fasta::Prot.new(nil, seq)
233
- prot.invert_tryptic_peptides!
234
- assert_equal(reverse_tryptic.join(''), prot.aaseq, "reversing tryptic peptides")
235
-
236
- seq = 'XYRABCD'
237
- prot = Fasta::Prot.new(nil, seq)
238
- prot.invert_tryptic_peptides!
239
- assert_equal('YXRDCBA', prot.aaseq, 'last peptide treated special')
240
-
241
- seq = 'XYRPABCD'
242
- prot = Fasta::Prot.new(nil, seq)
243
- prot.invert_tryptic_peptides!
244
- assert_equal('DCBAPRYX', prot.aaseq, 'with a proline')
245
-
246
- end
247
-
248
- def test_fraction_of_prots
249
- peps = [['>silly1', "PEPTIDE"], ['>silly2', "ANOTHER"], ['>silly3', "AGAIN"], ['>silly4', "LARMA"]]
250
- prots = peps.map do |header, seq|
251
- Fasta::Prot.new(header, seq)
252
- end
253
- f = Fasta.new(prots)
254
- # simple:
255
- n = f.fraction_of_prots(1.0)
256
- assert_equal(f.prots.map{|v| v.header }.to_set, n.prots.map{|v| v.header }.to_set, "same headers")
257
- assert_equal(f.prots.map{|v| v.aaseq }.to_set, n.prots.map{|v| v.aaseq }.to_set, "same aaseqs")
258
-
259
- pre = proc {|cnt| "SHUFF_f#{cnt}_" }
260
- # test prefix
261
- n = f.fraction_of_prots(1.0, pre)
262
- n.prots.each do |prot|
263
- assert_match(/^>SHUFF_f0_/, prot.header, "contains new prefix")
264
- end
265
-
266
- # smaller
267
- n = f.fraction_of_prots(0.75, pre)
268
- assert_equal(3, n.prots.size, "correct number of proteins")
269
- # bigger
270
- n = f.fraction_of_prots(2.5, pre)
271
- assert_equal(10, n.prots.size, "correct number of proteins")
272
- n.prots[0..3].each {|prt| assert_match(/^>SHUFF_f0_/, prt.header ) }
273
- n.prots[4..7].each {|prt| assert_match(/^>SHUFF_f1_/, prt.header ) }
274
- n.prots[8..9].each {|prt| assert_match(/^>SHUFF_f2_/, prt.header ) }
275
- # crazy
276
- n = f.fraction_of_prots(1.33, pre)
277
- assert_equal(6, n.prots.size, "correct number of proteins")
278
- end
279
-
280
- def test_inverted_tryptic_peptides_for_file
281
- # for a file:
282
- tmpfile = @tfiles + "fasta.tmp"
283
- fasta = Fasta.new.read_file(@sf)
284
- fasta.aaseq_invert_tryptic_peptides!
285
- fasta.write_file(tmpfile)
286
- lines = IO.readlines(tmpfile)
287
- #normal = 'MKRISTTITTTITITTGNGAG'
288
- inverted_tryptic = 'MKRGAGNGTTITITTTITTSI' ## ?????
289
- assert_equal(inverted_tryptic, lines[1].chomp)
290
- #normal = 'MATYLIGDVHGCYDELIALLHKVEFTPGKDTLWLTGDLVARGPGSLDVLRYVKSLGDSVRLVLGNHDLHL
291
- # LAVFAGISRNKPKDRLTPLLEAPDADELLNWLRRQPLLQIDEEKKLVMAHAGITPQWDLQTAKECARDVE
292
- # AVLSSDSYPFFLDAMYGDMPNNWSPELRGLGRLRFITNAFTRMRFCFPNGQLDMYSKESPEEAPAPLKPW
293
- # FAIPGPVAEEYSIAFGHWASLEGKGTPEGIYALDTGCCWGGTLTCLRWEDKQYFVQPSNRHKDLGEAAAS'
294
- inverted_tryptic = 'HLLAILEDYCGHVDGILYTAMKGPTFEVKAVLDGTLWLTDRLVDLSGPGRVYKVSDGLSRSIGAFVALLHLDHNGLVLRPKNKDRLWNLLEDADPAELLPTLRREEDIQLLPQKKATQLDWQPTIGAHAMVLKACERLEPSWNNPMDGYMADLFFPYSDSSLVAEVDRGLGRLRTFANTIFRMRSYMDLQGNPFCFKGELSAWHGFAISYEEAVPGPIAFWPKLPAPAEEPSEKLCTLTGGWCCGTDLAYIGEPTGRDEWKNSPQVFYQRHKSAAAEGLD'
295
- assert_equal(inverted_tryptic, lines[-1].chomp)
296
- File.unlink(tmpfile) unless NODELETE
297
- end
298
-
299
-
300
-
301
- ## HELPER ASSERTIONS:
302
-
303
- def _are_inverted?(obj1, obj2)
304
- obj2_prots = obj2.prots
305
- obj1.prots.each_with_index do |prot,i|
306
- if prot.aaseq.reverse != obj2_prots[i].aaseq
307
- return false
308
- end
309
- end
310
- return true
311
- end
312
-
313
- def _same_headers?(obj1, obj2)
314
- obj1.prots.each_with_index do |prot,ind|
315
- oprot = obj2.prots[ind]
316
- if prot.header != oprot.header
317
- return false
318
- end
319
- end
320
- return true
321
- end
322
-
323
- # true if all prot AA seq's are the same
324
- def _same_aaseqs?(obj1, obj2)
325
- obj2_prots = obj2.prots
326
- obj1.prots.each_with_index do |prot,i|
327
- if prot.aaseq != obj2_prots[i].aaseq
328
- return false
329
- end
330
- end
331
- return true
332
- end
333
-
334
- # for two parallel fasta objects, determines if the list of proteins
335
- # are shuffled by examining the proteins and asking of > 4 are different
336
- # returns true or false
337
- def _are_shuffled?(obj1, obj2)
338
- cnt = 0
339
- obj1.prots.each_with_index do |prot,ind|
340
- oprot = obj2.prots[ind]
341
- if prot.header == oprot.header && prot.aaseq != oprot.aaseq
342
- cnt += 1
343
- end
344
- end
345
- if cnt > 4
346
- return true
347
- else
348
- return false
349
- end
350
- end
351
-
352
- end
353
-
354
- =end
data/specs/gi_spec.rb DELETED
@@ -1,22 +0,0 @@
1
- require File.expand_path( File.dirname(__FILE__) + '/spec_helper'
2
  )
3
- require 'gi'
4
-
5
-
6
- describe GI, "given a 'GI' number" do
7
- before(:all) do
8
- @gi_num = 836805
9
- end
10
- it 'can query NCBI for annotation (fails nicely w/o connection)' do
11
- annot = GI.gi2annot([@gi_num])
12
- if annot
13
- annot.first.should == 'proteosome component PRE4 [Saccharomyces cerevisiae]'
14
- else
15
- puts "- retrieval of gi failed gracefully w/o internet connection"
16
- end
17
- end
18
-
19
- end
20
-
21
-
22
-
23
-
@@ -1,7 +0,0 @@
1
- tmp = $VERBOSE ; $VERBOSE = nil
2
- LOAD_BIN_PATH = File.expand_path(File.dirname(__FILE__) + "#{File::SEPARATOR}..#{File::SEPARATOR}bin")
3
- $VERBOSE = tmp
4
-
5
- if ENV.key?("PATH")
6
- ENV["PATH"] = LOAD_BIN_PATH + File::PATH_SEPARATOR + ENV["PATH"]
7
- end
@@ -1,13 +0,0 @@
1
- require File.expand_path( File.dirname(__FILE__) + '/spec_helper' )
2
- require 'merge_deep'
3
-
4
- describe 'merging one level deep' do
5
- it 'works' do
6
- base = {1=>"X", 3=>{6=>7, 8=>9}}
7
- another = {1=>'y', 3=>{6=>9}}
8
- ans = base.merge_deep(another, 1)
9
- ans.should == {1=>'y', 3=>{6=>9, 8=>9}}
10
- end
11
- end
12
-
13
-
@@ -1,77 +0,0 @@
1
- require File.expand_path( File.dirname(__FILE__) + '/../spec_helper' )
2
- require 'ms/gradient_program'
3
-
4
- describe GradientProgram do
5
- it 'can be set from a Thermo Xcal 2.X .meth file' do
6
- data = [
7
- [0.00, 95.0, 5.0, 0.0, 0.0, 38.0],
8
- [1.00, 90.0, 10.0, 0.0, 0.0, 38.0],
9
- [30.00, 85.0, 15.0, 0.0, 0.0, 38.0],
10
- [40.00, 80.0, 20.0, 0.0, 0.0, 38.0],
11
- [45.00, 78.0, 22.0, 0.0, 0.0, 38.0],
12
- [50.00, 72.0, 28.0, 0.0, 0.0, 38.0],
13
- [65.00, 60.0, 40.0, 0.0, 0.0, 38.0],
14
- [72.00, 10.0, 90.0, 0.0, 0.0, 38.0],
15
- [75.0, 10.0, 90.0, 0.0, 0.0, 38.0],
16
- [81.00, 10.0, 90.0, 0.0, 0.0, 38.0],
17
- [81.10, 95.0, 5.0, 0.0, 0.0, 38.0],
18
- [90.00, 95.0, 5.0, 0.0, 0.0, 38.0],
19
- ]
20
-
21
- ms_pump_expected_tps = data.map do |ar|
22
- GradientProgram::TimePoint.new(ar[0], ar[-1], ar[1,4])
23
- end
24
- ms_pump_expected = GradientProgram.new('MS Pump', ms_pump_expected_tps, %w(A B C D))
25
-
26
- data = [
27
- [0.00, 0.0, 0.0, 100.0, 0.0, 40.0],
28
- [90.0, 0.0, 0.0, 100.0, 0.0, 40.0],
29
- ]
30
- sample_pump_expected_tps = data.map {|ar| GradientProgram::TimePoint.new(ar[0], ar[-1], ar[1,4]) }
31
- sample_pump_expected = GradientProgram.new('Sample Pump', sample_pump_expected_tps, %w(A B C D))
32
-
33
- file = Tfiles + '/s01_anC1_ld020mM.meth'
34
- File.open(file) do |fh|
35
- gps = GradientProgram.all_from_handle(fh)
36
- gps[0].should == ms_pump_expected
37
- gps[1].should == sample_pump_expected
38
- end
39
- end
40
-
41
- it 'can be set from a Thermo Xcal 1.X .RAW file (but missing pump_type)' do
42
- file = Tfiles + '/opd1_020_beginning.RAW'
43
- data = [[0.0, 0.0, 0.0, 100.0, 0.0, 200.0],
44
- [1.0, 0.0, 0.0, 96.0, 4.0, 200.0],
45
- [10.0, 0.0, 0.0, 96.0, 4.0, 200.0],
46
- [11.0, 0.0, 0.0, 100.0, 0.0, 200.0],
47
- [85.0, 0.0, 0.0, 100.0, 0.0, 200.0],]
48
-
49
- time_points = data.map do |ar|
50
- GradientProgram::TimePoint.new(ar[0], ar[-1], ar[1,4])
51
- end
52
- pump_type = '' ## need to get pump type...
53
- ms_pump_expected = GradientProgram.new(pump_type, time_points, %w(A B C D))
54
-
55
- data = [[0.0, 95.0, 5.0, 0.0, 0.0, 200.0],
56
- [1.0, 95.0, 5.0, 0.0, 0.0, 200.0],
57
- [61.0, 55.0, 45.0, 0.0, 0.0, 200.0],
58
- [62.0, 5.0, 95.0, 0.0, 0.0, 200.0],
59
- [67.0, 5.0, 95.0, 0.0, 0.0, 200.0],
60
- [68.0, 95.0, 5.0, 0.0, 0.0, 200.0],
61
- [85.0, 95.0, 5.0, 0.0, 0.0, 200.0],]
62
- time_points = data.map do |ar|
63
- GradientProgram::TimePoint.new(ar[0], ar[-1], ar[1,4])
64
- end
65
- pump_type = '' ## need to get pump type...
66
- sample_pump_expected = GradientProgram.new(pump_type, time_points, %w(A B C D))
67
-
68
- # we'd like to get an older .meth file to do this on
69
- File.open(file) do |fh|
70
- gps = GradientProgram.all_from_handle(fh)
71
- gps[0].should == ms_pump_expected
72
- gps[1].should == sample_pump_expected
73
-
74
- end
75
- end
76
-
77
- end