mspire 0.4.9 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (255) hide show
  1. data/README +27 -17
  2. data/changelog.txt +31 -62
  3. data/lib/ms/calc.rb +32 -0
  4. data/lib/ms/data/interleaved.rb +60 -0
  5. data/lib/ms/data/lazy_io.rb +73 -0
  6. data/lib/ms/data/lazy_string.rb +15 -0
  7. data/lib/ms/data/simple.rb +59 -0
  8. data/lib/ms/data/transposed.rb +41 -0
  9. data/lib/ms/data.rb +57 -0
  10. data/lib/ms/format/format_error.rb +12 -0
  11. data/lib/ms/spectrum.rb +25 -384
  12. data/lib/ms/support/binary_search.rb +126 -0
  13. data/lib/ms.rb +10 -10
  14. metadata +38 -350
  15. data/INSTALL +0 -58
  16. data/README.rdoc +0 -18
  17. data/Rakefile +0 -330
  18. data/bin/aafreqs.rb +0 -23
  19. data/bin/bioworks2excel.rb +0 -14
  20. data/bin/bioworks_to_pepxml.rb +0 -148
  21. data/bin/bioworks_to_pepxml_gui.rb +0 -225
  22. data/bin/fasta_shaker.rb +0 -5
  23. data/bin/filter_and_validate.rb +0 -5
  24. data/bin/gi2annot.rb +0 -14
  25. data/bin/id_class_anal.rb +0 -112
  26. data/bin/id_precision.rb +0 -172
  27. data/bin/ms_to_lmat.rb +0 -67
  28. data/bin/pepproph_filter.rb +0 -16
  29. data/bin/prob_validate.rb +0 -6
  30. data/bin/protein_summary.rb +0 -6
  31. data/bin/protxml2prots_peps.rb +0 -32
  32. data/bin/raw_to_mzXML.rb +0 -55
  33. data/bin/run_percolator.rb +0 -122
  34. data/bin/sqt_group.rb +0 -26
  35. data/bin/srf_group.rb +0 -27
  36. data/bin/srf_to_sqt.rb +0 -40
  37. data/lib/align/chams.rb +0 -78
  38. data/lib/align.rb +0 -154
  39. data/lib/archive/targz.rb +0 -94
  40. data/lib/bsearch.rb +0 -120
  41. data/lib/core_extensions.rb +0 -16
  42. data/lib/fasta.rb +0 -626
  43. data/lib/gi.rb +0 -124
  44. data/lib/group_by.rb +0 -10
  45. data/lib/index_by.rb +0 -11
  46. data/lib/merge_deep.rb +0 -21
  47. data/lib/ms/converter/mzxml.rb +0 -77
  48. data/lib/ms/gradient_program.rb +0 -170
  49. data/lib/ms/msrun.rb +0 -244
  50. data/lib/ms/msrun_index.rb +0 -108
  51. data/lib/ms/parser/mzdata/axml.rb +0 -67
  52. data/lib/ms/parser/mzdata/dom.rb +0 -175
  53. data/lib/ms/parser/mzdata/libxml.rb +0 -7
  54. data/lib/ms/parser/mzdata.rb +0 -31
  55. data/lib/ms/parser/mzxml/axml.rb +0 -70
  56. data/lib/ms/parser/mzxml/dom.rb +0 -182
  57. data/lib/ms/parser/mzxml/hpricot.rb +0 -253
  58. data/lib/ms/parser/mzxml/libxml.rb +0 -19
  59. data/lib/ms/parser/mzxml/regexp.rb +0 -122
  60. data/lib/ms/parser/mzxml/rexml.rb +0 -72
  61. data/lib/ms/parser/mzxml/xmlparser.rb +0 -248
  62. data/lib/ms/parser/mzxml.rb +0 -282
  63. data/lib/ms/parser.rb +0 -108
  64. data/lib/ms/precursor.rb +0 -25
  65. data/lib/ms/scan.rb +0 -81
  66. data/lib/mspire.rb +0 -4
  67. data/lib/pi_zero.rb +0 -244
  68. data/lib/qvalue.rb +0 -161
  69. data/lib/roc.rb +0 -187
  70. data/lib/sample_enzyme.rb +0 -160
  71. data/lib/scan_i.rb +0 -21
  72. data/lib/spec_id/aa_freqs.rb +0 -170
  73. data/lib/spec_id/bioworks.rb +0 -497
  74. data/lib/spec_id/digestor.rb +0 -138
  75. data/lib/spec_id/mass.rb +0 -179
  76. data/lib/spec_id/parser/proph.rb +0 -335
  77. data/lib/spec_id/precision/filter/cmdline.rb +0 -218
  78. data/lib/spec_id/precision/filter/interactive.rb +0 -134
  79. data/lib/spec_id/precision/filter/output.rb +0 -148
  80. data/lib/spec_id/precision/filter.rb +0 -637
  81. data/lib/spec_id/precision/output.rb +0 -60
  82. data/lib/spec_id/precision/prob/cmdline.rb +0 -160
  83. data/lib/spec_id/precision/prob/output.rb +0 -94
  84. data/lib/spec_id/precision/prob.rb +0 -249
  85. data/lib/spec_id/proph/pep_summary.rb +0 -104
  86. data/lib/spec_id/proph/prot_summary.rb +0 -484
  87. data/lib/spec_id/proph.rb +0 -4
  88. data/lib/spec_id/protein_summary.rb +0 -489
  89. data/lib/spec_id/sequest/params.rb +0 -316
  90. data/lib/spec_id/sequest/pepxml.rb +0 -1458
  91. data/lib/spec_id/sequest.rb +0 -33
  92. data/lib/spec_id/sqt.rb +0 -349
  93. data/lib/spec_id/srf.rb +0 -973
  94. data/lib/spec_id.rb +0 -778
  95. data/lib/spec_id_xml.rb +0 -99
  96. data/lib/transmem/phobius.rb +0 -147
  97. data/lib/transmem/toppred.rb +0 -368
  98. data/lib/transmem.rb +0 -157
  99. data/lib/validator/aa.rb +0 -48
  100. data/lib/validator/aa_est.rb +0 -112
  101. data/lib/validator/background.rb +0 -77
  102. data/lib/validator/bias.rb +0 -95
  103. data/lib/validator/cmdline.rb +0 -431
  104. data/lib/validator/decoy.rb +0 -107
  105. data/lib/validator/digestion_based.rb +0 -70
  106. data/lib/validator/probability.rb +0 -51
  107. data/lib/validator/prot_from_pep.rb +0 -234
  108. data/lib/validator/q_value.rb +0 -32
  109. data/lib/validator/transmem.rb +0 -272
  110. data/lib/validator/true_pos.rb +0 -46
  111. data/lib/validator.rb +0 -197
  112. data/lib/xml.rb +0 -38
  113. data/lib/xml_style_parser.rb +0 -119
  114. data/lib/xmlparser_wrapper.rb +0 -19
  115. data/release_notes.txt +0 -2
  116. data/script/compile_and_plot_smriti_final.rb +0 -97
  117. data/script/create_little_pepxml.rb +0 -61
  118. data/script/degenerate_peptides.rb +0 -47
  119. data/script/estimate_fpr_by_cysteine.rb +0 -226
  120. data/script/extract_gradient_programs.rb +0 -56
  121. data/script/find_cysteine_background.rb +0 -137
  122. data/script/genuine_tps_and_probs.rb +0 -136
  123. data/script/get_apex_values_rexml.rb +0 -44
  124. data/script/histogram_probs.rb +0 -61
  125. data/script/mascot_fix_pepxml.rb +0 -123
  126. data/script/msvis.rb +0 -42
  127. data/script/mzXML2timeIndex.rb +0 -25
  128. data/script/peps_per_bin.rb +0 -67
  129. data/script/prep_dir.rb +0 -121
  130. data/script/simple_protein_digestion.rb +0 -27
  131. data/script/smriti_final_analysis.rb +0 -103
  132. data/script/sqt_to_meta.rb +0 -24
  133. data/script/top_hit_per_scan.rb +0 -67
  134. data/script/toppred_to_yaml.rb +0 -47
  135. data/script/tpp_installer.rb +0 -249
  136. data/specs/align_spec.rb +0 -79
  137. data/specs/bin/bioworks_to_pepxml_spec.rb +0 -79
  138. data/specs/bin/fasta_shaker_spec.rb +0 -259
  139. data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +0 -199
  140. data/specs/bin/filter_and_validate_spec.rb +0 -180
  141. data/specs/bin/ms_to_lmat_spec.rb +0 -34
  142. data/specs/bin/prob_validate_spec.rb +0 -86
  143. data/specs/bin/protein_summary_spec.rb +0 -14
  144. data/specs/fasta_spec.rb +0 -354
  145. data/specs/gi_spec.rb +0 -22
  146. data/specs/load_bin_path.rb +0 -7
  147. data/specs/merge_deep_spec.rb +0 -13
  148. data/specs/ms/gradient_program_spec.rb +0 -77
  149. data/specs/ms/msrun_spec.rb +0 -498
  150. data/specs/ms/parser_spec.rb +0 -92
  151. data/specs/ms/spectrum_spec.rb +0 -87
  152. data/specs/pi_zero_spec.rb +0 -115
  153. data/specs/qvalue_spec.rb +0 -39
  154. data/specs/roc_spec.rb +0 -251
  155. data/specs/rspec_autotest.rb +0 -149
  156. data/specs/sample_enzyme_spec.rb +0 -126
  157. data/specs/spec_helper.rb +0 -135
  158. data/specs/spec_id/aa_freqs_spec.rb +0 -52
  159. data/specs/spec_id/bioworks_spec.rb +0 -148
  160. data/specs/spec_id/digestor_spec.rb +0 -75
  161. data/specs/spec_id/precision/filter/cmdline_spec.rb +0 -20
  162. data/specs/spec_id/precision/filter/output_spec.rb +0 -31
  163. data/specs/spec_id/precision/filter_spec.rb +0 -246
  164. data/specs/spec_id/precision/prob_spec.rb +0 -44
  165. data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
  166. data/specs/spec_id/proph/pep_summary_spec.rb +0 -98
  167. data/specs/spec_id/proph/prot_summary_spec.rb +0 -128
  168. data/specs/spec_id/protein_summary_spec.rb +0 -189
  169. data/specs/spec_id/sequest/params_spec.rb +0 -68
  170. data/specs/spec_id/sequest/pepxml_spec.rb +0 -374
  171. data/specs/spec_id/sequest_spec.rb +0 -38
  172. data/specs/spec_id/sqt_spec.rb +0 -246
  173. data/specs/spec_id/srf_spec.rb +0 -172
  174. data/specs/spec_id/srf_spec_helper.rb +0 -139
  175. data/specs/spec_id_helper.rb +0 -33
  176. data/specs/spec_id_spec.rb +0 -366
  177. data/specs/spec_id_xml_spec.rb +0 -33
  178. data/specs/transmem/phobius_spec.rb +0 -425
  179. data/specs/transmem/toppred_spec.rb +0 -298
  180. data/specs/transmem_spec.rb +0 -60
  181. data/specs/transmem_spec_shared.rb +0 -64
  182. data/specs/validator/aa_est_spec.rb +0 -66
  183. data/specs/validator/aa_spec.rb +0 -40
  184. data/specs/validator/background_spec.rb +0 -67
  185. data/specs/validator/bias_spec.rb +0 -122
  186. data/specs/validator/decoy_spec.rb +0 -51
  187. data/specs/validator/fasta_helper.rb +0 -26
  188. data/specs/validator/prot_from_pep_spec.rb +0 -141
  189. data/specs/validator/transmem_spec.rb +0 -146
  190. data/specs/validator/true_pos_spec.rb +0 -58
  191. data/specs/validator_helper.rb +0 -33
  192. data/specs/xml_spec.rb +0 -12
  193. data/test_files/000_pepxml18_small.xml +0 -206
  194. data/test_files/020a.mzXML.timeIndex +0 -4710
  195. data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +0 -3973
  196. data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +0 -3872
  197. data/test_files/4-03-03_small-prot.xml +0 -321
  198. data/test_files/4-03-03_small.xml +0 -3876
  199. data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
  200. data/test_files/bioworks-3.3_10prots.xml +0 -5999
  201. data/test_files/bioworks31.params +0 -77
  202. data/test_files/bioworks32.params +0 -62
  203. data/test_files/bioworks33.params +0 -63
  204. data/test_files/bioworks_single_run_small.xml +0 -7237
  205. data/test_files/bioworks_small.fasta +0 -212
  206. data/test_files/bioworks_small.params +0 -63
  207. data/test_files/bioworks_small.phobius +0 -109
  208. data/test_files/bioworks_small.toppred.out +0 -2847
  209. data/test_files/bioworks_small.xml +0 -5610
  210. data/test_files/bioworks_with_INV_small.xml +0 -3753
  211. data/test_files/bioworks_with_SHUFF_small.xml +0 -2503
  212. data/test_files/corrupted_900.srf +0 -0
  213. data/test_files/head_of_7MIX.srf +0 -0
  214. data/test_files/interact-opd1_mods_small-prot.xml +0 -304
  215. data/test_files/messups.fasta +0 -297
  216. data/test_files/opd1/000.my_answer.100lines.xml +0 -101
  217. data/test_files/opd1/000.tpp_1.2.3.first10.xml +0 -115
  218. data/test_files/opd1/000.tpp_2.9.2.first10.xml +0 -126
  219. data/test_files/opd1/000.v2.1.mzXML.timeIndex +0 -3748
  220. data/test_files/opd1/000_020-prot.png +0 -0
  221. data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +0 -62
  222. data/test_files/opd1/000_020_3prots-prot.xml +0 -62
  223. data/test_files/opd1/opd1_cat_inv_small-prot.xml +0 -139
  224. data/test_files/opd1/sequest.3.1.params +0 -77
  225. data/test_files/opd1/sequest.3.2.params +0 -62
  226. data/test_files/opd1/twenty_scans.mzXML +0 -418
  227. data/test_files/opd1/twenty_scans.v2.1.mzXML +0 -382
  228. data/test_files/opd1/twenty_scans_answ.lmat +0 -0
  229. data/test_files/opd1/twenty_scans_answ.lmata +0 -9
  230. data/test_files/opd1_020_beginning.RAW +0 -0
  231. data/test_files/opd1_2runs_2mods/data/020.mzData.xml +0 -683
  232. data/test_files/opd1_2runs_2mods/data/020.readw.mzXML +0 -382
  233. data/test_files/opd1_2runs_2mods/data/040.mzData.xml +0 -683
  234. data/test_files/opd1_2runs_2mods/data/040.readw.mzXML +0 -382
  235. data/test_files/opd1_2runs_2mods/data/README.txt +0 -6
  236. data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +0 -753
  237. data/test_files/orbitrap_mzData/000_cut.xml +0 -1920
  238. data/test_files/pepproph_small.xml +0 -4691
  239. data/test_files/phobius.small.noheader.txt +0 -50
  240. data/test_files/phobius.small.small.txt +0 -53
  241. data/test_files/s01_anC1_ld020mM.key.txt +0 -25
  242. data/test_files/s01_anC1_ld020mM.meth +0 -0
  243. data/test_files/small.fasta +0 -297
  244. data/test_files/small.sqt +0 -87
  245. data/test_files/smallraw.RAW +0 -0
  246. data/test_files/tf_bioworks2excel.bioXML +0 -14340
  247. data/test_files/tf_bioworks2excel.txt.actual +0 -1035
  248. data/test_files/toppred.small.out +0 -416
  249. data/test_files/toppred.xml.out +0 -318
  250. data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +0 -7
  251. data/test_files/validator_hits_separate/bioworks_small_HS.xml +0 -5651
  252. data/test_files/yeast_gly_small-prot.xml +0 -265
  253. data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +0 -6
  254. data/test_files/yeast_gly_small.xml +0 -3807
  255. data/test_files/yeast_gly_small2.parentTimes +0 -6
data/bin/raw_to_mzXML.rb DELETED
@@ -1,55 +0,0 @@
1
- #!/usr/bin/ruby -w
2
-
3
- require 'optparse'
4
- require 'ms/converter/mzxml'
5
- require 'fileutils'
6
-
7
- progname = File.basename(__FILE__)
8
-
9
-
10
- opt = {}
11
- opts = OptionParser.new do |op|
12
- op.banner = "usage: #{progname} [OPTIONS] <file>.RAW ..."
13
- op.separator ""
14
- op.on("-p", "--profile", "uses profile output instead of centroid (default)") {|v| opt[:profile] = v}
15
- end
16
-
17
- opts.parse!
18
-
19
- if ARGV.size == 0
20
- puts opts
21
- exit
22
- end
23
-
24
- converter = MS::Converter::MzXML.find_mzxml_converter
25
- if converter
26
- $stderr.puts "using #{converter} to convert files"
27
- else
28
- puts "cannot find [#{MS::Converter::MzXML::Potential_mzxml_converters.join(', ')}] in the paths:"
29
- puts ENV['PATH'].split(/[:;]/).join(", ")
30
- abort
31
- end
32
-
33
- files = ARGV.to_a
34
- files.each do |file|
35
- puts "******************************************"
36
- puts "Converting: #{file}"
37
- if converter =~ /readw/
38
- centroid_or_profile = 'c'
39
- if opt[:profile]
40
- centroid_or_profile = 'p'
41
- end
42
- outfile = file.sub(/\.RAW$/i, '.mzXML')
43
- cmd = "#{converter} #{file} #{centroid_or_profile} #{outfile}"
44
- puts "Performing: '#{cmd}'"
45
- puts `#{cmd}`
46
- else
47
- ## t2x only outputs in cwd!
48
- Dir.chdir(File.dirname(file)) do |dir|
49
- puts "Performing: '#{cmd}' in #{dir}"
50
- puts `#{cmd}`
51
- system "#{converter} #{File.basename(file)}"
52
- end
53
- end
54
- puts "******************************************"
55
- end
@@ -1,122 +0,0 @@
1
- #!/usr/bin/ruby
2
-
3
- perc_cmd = 'percolator'
4
-
5
- require 'optparse'
6
- require 'spec_id/srf'
7
-
8
- # percolator_v1.02_32bit_linux -o reverse_meta.sqm normal_NOCYS/meta.sqm reverse_NOCYS/meta.sqm
9
- # percolator_v1.02_32bit_linux -o reverse_cat_meta.sqm -P INV_ reverse_cat_NOCYS/meta.sqm &
10
-
11
- file_hash = {
12
- :srg => "bioworks.srg",
13
- :sqg_in => "bioworks.sqg",
14
- :sqg_decoy => "decoy.sqg",
15
- :perc_out => "perc.sqg",
16
- :perc_stdout => "perc.stdout",
17
- :perc_stderr => "perc.stderr",
18
- :perc_ext => ".psqt",
19
- }
20
- (default_srg, sqg_in, perc_out, sqg_decoy, perc_stdout, perc_stderr, perc_ext) = file_hash.values_at(:srg, :sqg_in, :perc_out, :sqg_decoy, :perc_stdout, :perc_stderr, :perc_ext)
21
-
22
- opt = {}
23
- toclean = []
24
- opts = OptionParser.new do |op|
25
- op.banner = "usage: #{File.basename(__FILE__)} -d PATTERN <file>.srf ..."
26
- op.separator " #{File.basename(__FILE__)} -d PATTERN <file>.srg"
27
- op.separator " #{File.basename(__FILE__)} <normal>.srg <decoy>.srg"
28
- op.separator ""
29
- op.separator " creates necessary meta files in current working directory and"
30
- op.separator " runs command '#{perc_cmd}'"
31
- op.separator ""
32
- op.separator " (all in current working directory)"
33
- op.separator " 1) (if given .srf files) creates file: #{default_srg}"
34
- op.separator " 2) creates .sqt file for each srf file (placed in dir with srf file)"
35
- op.separator " 3) creates percolator (meta) input file(s): #{sqg_in}"
36
- op.separator " [and for separate searches: #{sqg_decoy}]"
37
- op.separator " 4) creates a percolator (meta) output file: #{perc_out}"
38
- op.separator " 5) runs percolator which creates a a #{perc_ext} for each .srf file"
39
- op.separator " 6) captures stdout in #{perc_stdout} and stderr in #{perc_stderr}"
40
- op.separator ""
41
- op.separator " .srg files are text files with full paths to .srf files"
42
- op.separator " create with command 'srf_group.rb'"
43
- op.separator ""
44
- op.on("-d", "--decoy <pattern>", "decoy pattern, eg.: -d REVERSE_") {|v| opt[:decoy] = v }
45
- op.on("-c", "--clean", "removes ALL generated files except #{perc_ext}") {|v| opt[:clean] = v }
46
- op.on("-v", "--verbose", "spits out info") {|v| $VERBOSE = v }
47
- end
48
- opts.parse!
49
-
50
- if ARGV.size == 0 or (!opt[:decoy] && (ARGV.size != 2))
51
- puts opts.to_s
52
- exit
53
- end
54
-
55
- #raise RunTimeError, "command #{perc_cmd} must be callable!" unless `#{perc_cmd}`.match(/Usage/)
56
-
57
- files = ARGV.to_a
58
-
59
- # create srg file:
60
- srg_files =
61
- if files[0] =~ /\.srf$/i
62
- obj = SRFGroup.new
63
- obj.filenames = files.to_a
64
- puts("CREATING: #{default_srg}") if $VERBOSE
65
- obj.to_srg(default_srg)
66
- toclean << default_srg
67
- [default_srg]
68
- elsif files[0] =~ /\.srg$/i
69
- files
70
- else
71
- abort "files must have proper extensions"
72
- end
73
-
74
- # create the sqt files:
75
- all_sqt_filenames = srg_files.map do |srg_file|
76
- srf_filenames = SRFGroup.srg_to_paths(srg_file)
77
- srf_filenames.map do |file|
78
- new_filename = file.sub(/\.srf$/i, '.sqt')
79
- puts("CREATING: #{new_filename}") if $VERBOSE
80
- SRFGroup.new([file], opt['filter']).srfs.first.to_sqt(new_filename)
81
- toclean << new_filename
82
- new_filename
83
- end
84
- end
85
-
86
- # create the percolator input file:
87
- all_sqt_filenames.zip(file_hash.values_at(:sqg_in, :sqg_decoy)) do |sqt_filenames,filename|
88
- puts("CREATING: #{filename}") if $VERBOSE
89
- File.open(filename, 'w') {|fh| fh.puts(sqt_filenames.join("\n")) }
90
- toclean << filename
91
- end
92
-
93
- # create the percolator output file:
94
- psqt_filenames = all_sqt_filenames[0].map do |file|
95
- file.sub(/\.sqt$/, perc_ext)
96
- end
97
-
98
- puts("CREATING: #{perc_out}") if $VERBOSE
99
- File.open(perc_out, 'w') {|fh| fh.puts(psqt_filenames.join("\n")) }
100
- toclean << perc_out
101
-
102
- # run percolator
103
- to_run =
104
- if opt[:decoy]
105
- "#{perc_cmd} -o #{perc_out} -P #{opt[:decoy]} #{sqg_in} 1>#{perc_stdout} 2>#{perc_stderr}"
106
- else
107
- "#{perc_cmd} -o #{perc_out} #{sqg_in} #{sqg_decoy} 1>#{perc_stdout} 2>#{perc_stderr}"
108
- end
109
-
110
- puts("RUNNING: #{to_run}") if $VERBOSE
111
- `#{to_run}`
112
-
113
- toclean << perc_stdout
114
- toclean << perc_stderr
115
-
116
- if opt[:clean]
117
- toclean.each do |file|
118
- puts("REMOVING: #{file}") if $VERBOSE
119
- File.unlink(file) if File.exist?(file)
120
- end
121
- end
122
-
data/bin/sqt_group.rb DELETED
@@ -1,26 +0,0 @@
1
- #!/usr/bin/ruby
2
-
3
- require 'optparse'
4
- require 'spec_id/sqt'
5
-
6
- $OUTFILE = 'bioworks.sqg'
7
-
8
- opts = OptionParser.new do |op|
9
- op.banner = "usage: #{File.basename(__FILE__)} <file1>.sqt <file2>.sqt ..."
10
- op.separator "outputs: 'bioworks.sqg'"
11
- op.separator ""
12
- op.separator " A '.sqg' file is an ascii text file with a list"
13
- op.separator " of the sqt files (full path names) in that group."
14
- op.separator ""
15
- op.on('-o', '--output <filename>', 'a different output name') {|v| $OUTFILE }
16
- end
17
-
18
- if ARGV.size == 0
19
- puts opts
20
- exit
21
- end
22
-
23
- obj = SQTGroup.new
24
- obj.filenames = ARGV.to_a
25
- obj.to_sqg($OUTFILE)
26
-
data/bin/srf_group.rb DELETED
@@ -1,27 +0,0 @@
1
- #!/usr/bin/ruby
2
-
3
-
4
- require 'optparse'
5
- require 'spec_id/srf'
6
-
7
- $OUTFILE = 'bioworks.srg'
8
-
9
- opts = OptionParser.new do |op|
10
- op.banner = "usage: #{File.basename(__FILE__)} <file1>.srf <file2>.srf ..."
11
- op.separator "outputs: 'bioworks.srg'"
12
- op.separator ""
13
- op.separator " A '.srg' file is an ascii text file with a list"
14
- op.separator " of the srf files (full path names) in that group."
15
- op.separator ""
16
- op.on('-o', '--output <filename>', 'a different output name') {|v| $OUTFILE }
17
- end
18
-
19
- if ARGV.size == 0
20
- puts opts
21
- exit
22
- end
23
-
24
- obj = SRFGroup.new
25
- obj.filenames = ARGV.to_a
26
- obj.to_srg($OUTFILE)
27
-
data/bin/srf_to_sqt.rb DELETED
@@ -1,40 +0,0 @@
1
- #!/usr/bin/ruby
2
-
3
- require 'spec_id/srf'
4
- require 'optparse'
5
-
6
-
7
- opt = {}
8
- opt['db-info'] = false
9
- opt['db-path'] = nil
10
- opt['filter'] = true
11
- opts = OptionParser.new do |op|
12
- op.banner = "usage: #{File.basename(__FILE__)} [OPTIONS] <file>.srf ..."
13
- op.separator "outputs: <file>.sqt ..."
14
- op.separator ""
15
- op.separator "OPTIONS"
16
- op.on("-d", "--db-info", "calculates num aa's and md5sum on db") {|v| opt['db-info'] = v }
17
- op.on("-p", "--db-path <path_to_dir>", "if your database path has changed",
18
- "and you want db-info, then give the",
19
- "path to the new *directory*",
20
- "e.g. /my/new/path") {|v| opt['db-path'] = v }
21
- op.on("-u", "--db-update", "update the sqt file to reflect --db-path") {|v| opt['db-update'] = v }
22
- op.on("-n", "--no-filter", "by default, pephit must be within",
23
- "peptide_mass_tolerance (defined in params)",
24
- "to be displayed. Turns this off.") {|v| opt['filter'] = false}
25
- op.on("-r", "--round", "round floating point values reasonably") {|v| opt['round'] = v }
26
- end
27
-
28
- opts.parse!
29
-
30
- if ARGV.size == 0
31
- puts opts.to_s
32
- exit
33
- end
34
-
35
- ARGV.each do |file|
36
- abort "file #{file} must be named .srf" if file !~ /\.srf$/i
37
- new_filename = file.sub(/\.srf$/i, '.sqt')
38
- SRFGroup.new([file], opt['filter']).srfs.first.to_sqt(new_filename, :db_info => opt['db-info'], :new_db_path => opt['db-path'], :update_db_path => opt['db-update'], :round => opt['round'])
39
- end
40
-
data/lib/align/chams.rb DELETED
@@ -1,78 +0,0 @@
1
-
2
- require 'ms/msrun'
3
-
4
- module Align; end
5
- class Align::CHAMS
6
-
7
- # Avg_score 0.52559
8
- # Scan1 Scan2 Edge_cost Path_cost Edge_direction
9
- attr_accessor :avg_score, :time_mscans, :time_nscans, :mscans, :nscans, :edge_costs, :path_costs, :directions
10
-
11
- # requires an object that will respond to [<scan_num>] to give time
12
- # (seconds) for each file
13
- def initialize(chams_file, time_by_scan_num1, time_by_scan_num2)
14
- @time_mscans = []
15
- @time_nscans = []
16
- @mscans = []
17
- @nscans = []
18
- @edge_costs = []
19
- @path_costs = []
20
- @directions = []
21
- read_chams_file(chams_file)
22
- @mscans.each_with_index do |scan,i|
23
- @time_mscans[i] = time_by_scan_num1[scan]
24
- end
25
- @nscans.each_with_index do |scan,i|
26
- @time_nscans[i] = time_by_scan_num2[scan]
27
- end
28
- end
29
-
30
- def read_chams_file(chams_file)
31
- File.open(chams_file).each do |line|
32
- if line =~ /[\d\w]/
33
- if line =~ /^# Avg_score ([\.\d])/
34
- @avg_score = $1.to_f
35
- next
36
- end
37
- end
38
- if line =~ /^#/
39
- next
40
- end
41
- arr = line.chomp.split(/\s+/)
42
- @mscans.push arr[0].to_i
43
- @nscans.push arr[1].to_i
44
- @edge_costs.push arr[2].to_f
45
- @path_costs.push arr[3].to_f
46
- @directions.push arr[4].to_f
47
- end
48
- @mscans.reverse!
49
- @nscans.reverse!
50
- @edge_costs.reverse!
51
- @path_costs.reverse!
52
- @directions.reverse!
53
- end
54
-
55
- def write_my_chams_file(filename)
56
- File.open(filename, "w") do |fh|
57
- ## As columns:
58
- #(0...@mscans.size).each do |i|
59
- # fh.print @time_mscans[i].to_s + " "
60
- # fh.print @time_nscans[i].to_s + " "
61
- # fh.print @mscans[i].to_s + " "
62
- # fh.print @nscans[i].to_s + " "
63
- # fh.print @edge_costs[i].to_s + "\n"
64
- #end
65
-
66
- # As rows:
67
- fh.print @time_mscans.join(" ") + "\n"
68
- fh.print @time_nscans.join(" ") + "\n"
69
- fh.print @mscans.join(" ") + "\n"
70
- fh.print @nscans.join(" ") + "\n"
71
- fh.print @edge_costs.join(" ") + "\n"
72
- end
73
- end
74
-
75
- end
76
-
77
-
78
-
data/lib/align.rb DELETED
@@ -1,154 +0,0 @@
1
-
2
- #require 'ms/parser'
3
- #require 'ms/parser/mzxml'
4
- require 'ms/msrun'
5
- require 'spec_id/proph'
6
- require 'vec'
7
-
8
- require 'pp'
9
-
10
- class Align
11
-
12
- # Returns an array of peptides where each peptide passes threshold criteria
13
- # and each has been updated with scans, and dta filenames dependent on
14
- # matching with the basename_noext of the mztimes files.
15
- # Each peptide is guaranteed unique by sequence+charge
16
- # mztimes is an array of mzXML or .timeIndex files
17
- # if a peptide has no scans in the given mztimes set, it is discarded
18
- def peps_with_scans(mztimes, prot_xml, pep_proph_xml, prot_prob=1.0, pep_init_prob=1.0, pep_nsp_prob=1.0)
19
-
20
- ## Create scan indices on msrun name
21
- if mztimes.class != Array ; mztimes = [mztimes] end
22
- msrun_indices = mztimes.collect do |file| MS::MSRunIndex.new(file) end
23
- scanindex_by_basename_noext = {}
24
- msrun_indices.each do |runindex|
25
- scanindex_by_basename_noext[runindex.basename_noext] = runindex.scans_by_num
26
- end
27
-
28
- dta_filenames = Proph::Pep::Parser.new.dta_filenames_by_seq_charge(pep_proph_xml, "regex")
29
-
30
- parser = Proph::Prot::Parser.new
31
- parser.get_prots_and_peps(prot_xml, prot_prob, pep_init_prob, pep_nsp_prob, "regex")
32
- peptides = parser.peps
33
- peptides = Proph::Pep.uniq_by_seqcharge(peptides)
34
- ## we update each peptide with a list of dtafilenames
35
- ## then we update with a parallel list of scans (one for each dtafn...
36
- ## unless there are multiple scans associated with each filename
37
- ## in which case it will be an array
38
- _update_filenames(peptides, dta_filenames)
39
- peptides = _update_and_filter_by_scans(peptides, scanindex_by_basename_noext)
40
- return peptides
41
- end
42
-
43
- # takes the list of filenames for each peptide, and adds a scan
44
- # indexed from by scanindex
45
- # If keys are not in scanindex_by_basename_noext, then the scan is not
46
- # in the peptide!
47
- # if a peptide has no scans, it is not returned
48
- # if a filename is not recognized, it is dropped from the list
49
- def _update_and_filter_by_scans(peptides, scanindex_by_basename_noext)
50
- newpeps = []
51
- peptides.each do |pep|
52
- newfilenames = []
53
- pep.filenames.each do |dtafilename|
54
- (dtabase,first,last,charge) = dtafilename.split('.')
55
- if scanindex_by_basename_noext.key?(dtabase)
56
- newfilenames << dtafilename
57
- if first == last
58
- pep.scans << scanindex_by_basename_noext[dtabase][first.to_i]
59
- else
60
- scans = (first.to_i...last.to_i).collect do |index|
61
- scanindex_by_basename_noext[dtabase].scans_by_num[index]
62
- end
63
- pep.scans << scans
64
- end
65
- else
66
- end
67
- end
68
- pep.filenames = newfilenames
69
- if pep.scans.size > 0
70
- newpeps << pep
71
- end
72
- end
73
- newpeps
74
- end
75
-
76
- # takes an array of peptide arrays
77
- # will find the overlapping set
78
- # returns an array of peptide arrays
79
- # assumes that each pep_group is unique on sequence+charge
80
- def overlapping_peps_by_seqcharge(pep_groups)
81
- ## CREATE overlapping set:
82
- hashes = pep_groups.collect do |group|
83
- group.hash_uniq_by(:sequence, :charge)
84
- end
85
- pep_keys = hashes.collect do |hash|
86
- hash.collect do |k,v| k end
87
- end
88
- olapping_keys = pep_keys.inject do |olap,obj|
89
- olap & obj
90
- end
91
- pep_arrays = hashes.collect do |hash|
92
- pep_array = olapping_keys.collect do |k|
93
- hash[k]
94
- end
95
- end
96
- end
97
-
98
- # tosses out any peptides from pep_groups where the
99
- # arithmetic_avg_scan_by_parent_time.time is greater than 'deviations' from
100
- # the least squares regression line assumes that each peptide is parallel
101
- # (performed iteratively)
102
- def toss_outliers(pep_groups, deviations=0.0)
103
- arr_of_vecs = pep_groups.collect do |peps|
104
- time_arr = peps.collect do |pep|
105
- pep.arithmetic_avg_scan_by_parent_time.time
106
- end
107
- VecD.new(time_arr)
108
- end
109
-
110
- # in the future this could be expanded for multiple dimensions
111
- indices = arr_of_vecs.first.outliers_iteratively(deviations, arr_of_vecs[1])
112
-
113
- # remove the peptides that are outliers
114
- #pep_groups.each do |peps| puts peps.size.to_s end
115
- pep_groups.each do |peps|
116
- indices.each do |i| peps.delete_at(i) end
117
- end
118
- #pep_groups.each do |peps| puts peps.size.to_s end
119
- pep_groups
120
- end
121
-
122
- # max_dups will toss out any peptides having > max_dups dtafilenames
123
- # Currently, this will only take 2 groups of peptides
124
- def overlapping_peps_by_seqcharge_with_filter(pep_groups, max_dups=nil, outlier_cutoff=0.0)
125
- pep_groups.collect! do |pep_group|
126
- pep_group.first.class.filter_by_max_dup_scans(max_dups, pep_group)
127
- end
128
- pep_groups = overlapping_peps_by_seqcharge(pep_groups)
129
- toss_outliers(pep_groups, outlier_cutoff)
130
- pep_groups
131
- end
132
-
133
- def _update_filenames(peptides, dta_filenames_by_seq_charge)
134
- peptides.each do |pep|
135
- pep.filenames = dta_filenames_by_seq_charge[[pep.sequence, pep.charge]]
136
- end
137
- end
138
-
139
- # Returns a hash[dtabase] -> [pep, ...]
140
- # Proteins must have peptides
141
- def _peps_by_dtabase(peptides)
142
- ## organize peptides based on filenames
143
- peptides_by_dtabase = Hash.new{|h,k| h[k] = [] }
144
- peptides.each do |k,pep|
145
- pep.filenames.each do |fn|
146
- file = fn.split(".")[0]
147
- peptides_by_file[file] << pep
148
- end
149
- end
150
- peptides_by_dtabase
151
- end
152
-
153
-
154
- end
data/lib/archive/targz.rb DELETED
@@ -1,94 +0,0 @@
1
-
2
-
3
- require 'archive/tar/minitar'
4
-
5
- require 'stringio'
6
-
7
- module Archive::Tar::Minitar
8
-
9
- # entry may be a string (the name), or it may be a hash specifying the
10
- # following:
11
- # :name (REQUIRED)
12
- # :mode 33188 (rw-r--r--) for files, 16877 (rwxr-xr-x) for dirs
13
- # (0O100644) (0O40755)
14
- # :uid nil
15
- # :gid nil
16
- # :mtime Time.now
17
- #
18
- # if data == nil, then this is considered a directory!
19
- # (use an empty string for a normal empty file)
20
- # data should be something that can be opened by StringIO
21
- def self.pack_as_file(entry, data, outputter) #:yields action, name, stats:
22
- outputter = outputter.tar if outputter.kind_of?(Archive::Tar::Minitar::Output)
23
-
24
- stats = {}
25
- stats[:uid] = nil
26
- stats[:gid] = nil
27
- stats[:mtime] = Time.now
28
-
29
- if data.nil?
30
- # a directory
31
- stats[:size] = 4096 # is this OK???
32
- stats[:mode] = 16877 # rwxr-xr-x
33
- else
34
- stats[:size] = data.size
35
- stats[:mode] = 33188 # rw-r--r--
36
- end
37
-
38
- if entry.kind_of?(Hash)
39
- name = entry[:name]
40
-
41
- entry.each { |kk, vv| stats[kk] = vv unless vv.nil? }
42
- else
43
- name = entry
44
- end
45
-
46
- if data.nil? # a directory
47
- yield :dir, name, stats if block_given?
48
- outputter.mkdir(name, stats)
49
- else # a file
50
- outputter.add_file_simple(name, stats) do |os|
51
- stats[:current] = 0
52
- yield :file_start, name, stats if block_given?
53
- StringIO.open(data, "rb") do |ff|
54
- until ff.eof?
55
- stats[:currinc] = os.write(ff.read(4096))
56
- stats[:current] += stats[:currinc]
57
- yield :file_progress, name, stats if block_given?
58
- end
59
- end
60
- yield :file_done, name, stats if block_given?
61
- end
62
- end
63
- end
64
- end
65
-
66
-
67
- require 'zlib'
68
- file_names = ['wiley/dorky1', 'dorky2', 'an_empty_dir']
69
- file_data_strings = ['my data', 'my data also', nil]
70
-
71
-
72
- module Archive ; end
73
-
74
- # usage:
75
- # require 'archive/targz'
76
- # Archive::Targz.archive_as_files("myarchive.tgz", %w(file1 file2 dir),
77
- # ['data for file1', 'data for file2', nil])
78
- module Archive::Targz
79
- # requires an archive_name (e.g., myarchive.tgz) and parallel filename and
80
- # data arrays:
81
- # filenames = %w(file1 file2 empty_dir)
82
- # data_ar = ['stuff in file 1', 'stuff in file2', nil]
83
- # nil as an entry in the data_ar means that an empty directory will be
84
- # created
85
- def self.archive_as_files(archive_name, filenames=[], data_ar=[])
86
- tgz = Zlib::GzipWriter.new(File.open(archive_name, 'wb'))
87
-
88
- Archive::Tar::Minitar::Output.open(tgz) do |outp|
89
- filenames.zip(data_ar) do |name, data|
90
- Archive::Tar::Minitar.pack_as_file(name, data, outp)
91
- end
92
- end
93
- end
94
- end