mspire 0.5.0 → 0.6.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (107) hide show
  1. data/README.rdoc +24 -0
  2. data/Rakefile +51 -0
  3. data/VERSION +1 -0
  4. data/lib/cv/description.rb +18 -0
  5. data/lib/cv/param.rb +33 -0
  6. data/lib/cv.rb +3 -0
  7. data/lib/io/bookmark.rb +13 -0
  8. data/lib/merge.rb +7 -0
  9. data/lib/ms/cvlist.rb +76 -0
  10. data/lib/ms/digester.rb +245 -0
  11. data/lib/ms/fasta.rb +86 -0
  12. data/lib/ms/ident/peptide/db.rb +243 -0
  13. data/lib/ms/ident/peptide.rb +72 -0
  14. data/lib/ms/ident/peptide_hit/qvalue.rb +56 -0
  15. data/lib/ms/ident/peptide_hit.rb +26 -0
  16. data/lib/ms/ident/pepxml/modifications.rb +83 -0
  17. data/lib/ms/ident/pepxml/msms_pipeline_analysis.rb +70 -0
  18. data/lib/ms/ident/pepxml/msms_run_summary.rb +82 -0
  19. data/lib/ms/ident/pepxml/parameters.rb +14 -0
  20. data/lib/ms/ident/pepxml/sample_enzyme.rb +165 -0
  21. data/lib/ms/ident/pepxml/search_database.rb +49 -0
  22. data/lib/ms/ident/pepxml/search_hit/modification_info.rb +79 -0
  23. data/lib/ms/ident/pepxml/search_hit.rb +144 -0
  24. data/lib/ms/ident/pepxml/search_result.rb +35 -0
  25. data/lib/ms/ident/pepxml/search_summary.rb +92 -0
  26. data/lib/ms/ident/pepxml/spectrum_query.rb +85 -0
  27. data/lib/ms/ident/pepxml.rb +112 -0
  28. data/lib/ms/ident/protein.rb +33 -0
  29. data/lib/ms/ident/protein_group.rb +80 -0
  30. data/lib/ms/ident/search.rb +114 -0
  31. data/lib/ms/ident.rb +37 -0
  32. data/lib/ms/isotope/aa.rb +59 -0
  33. data/lib/ms/mascot.rb +6 -0
  34. data/lib/ms/mass/aa.rb +79 -0
  35. data/lib/ms/mass.rb +55 -0
  36. data/lib/ms/mzml/index_list.rb +98 -0
  37. data/lib/ms/mzml/plms1.rb +34 -0
  38. data/lib/ms/mzml.rb +197 -0
  39. data/lib/ms/obo.rb +38 -0
  40. data/lib/ms/plms1.rb +156 -0
  41. data/lib/ms/quant/qspec/protein_group_comparison.rb +22 -0
  42. data/lib/ms/quant/qspec.rb +112 -0
  43. data/lib/ms/spectrum.rb +154 -8
  44. data/lib/ms.rb +3 -10
  45. data/lib/msplat.rb +2 -0
  46. data/lib/obo/ims.rb +5 -0
  47. data/lib/obo/ms.rb +7 -0
  48. data/lib/obo/ontology.rb +41 -0
  49. data/lib/obo/unit.rb +5 -0
  50. data/lib/openany.rb +23 -0
  51. data/lib/write_file_or_string.rb +18 -0
  52. data/obo/ims.obo +562 -0
  53. data/obo/ms.obo +11677 -0
  54. data/obo/unit.obo +2563 -0
  55. data/spec/ms/cvlist_spec.rb +60 -0
  56. data/spec/ms/digester_spec.rb +351 -0
  57. data/spec/ms/fasta_spec.rb +100 -0
  58. data/spec/ms/ident/peptide/db_spec.rb +108 -0
  59. data/spec/ms/ident/pepxml/sample_enzyme_spec.rb +181 -0
  60. data/spec/ms/ident/pepxml/search_hit/modification_info_spec.rb +37 -0
  61. data/spec/ms/ident/pepxml_spec.rb +442 -0
  62. data/spec/ms/ident/protein_group_spec.rb +68 -0
  63. data/spec/ms/mass_spec.rb +8 -0
  64. data/spec/ms/mzml/index_list_spec.rb +122 -0
  65. data/spec/ms/mzml/plms1_spec.rb +62 -0
  66. data/spec/ms/mzml_spec.rb +50 -0
  67. data/spec/ms/plms1_spec.rb +38 -0
  68. data/spec/ms/quant/qspec_spec.rb +25 -0
  69. data/spec/msplat_spec.rb +24 -0
  70. data/spec/obo_spec.rb +25 -0
  71. data/spec/spec_helper.rb +25 -0
  72. data/spec/testfiles/ms/ident/peptide/db/uni_11_sp_tr.fasta +69 -0
  73. data/spec/testfiles/ms/ident/peptide/db/uni_11_sp_tr.msd_clvg2.min_aaseq4.yml +728 -0
  74. data/spec/testfiles/ms/mzml/j24z.idx_comp.3.mzML +271 -0
  75. data/spec/testfiles/ms/mzml/openms.noidx_nocomp.12.mzML +330 -0
  76. data/spec/testfiles/ms/quant/kill_extra_tabs.rb +13 -0
  77. data/spec/testfiles/ms/quant/max_quant_output.provenance.txt +15 -0
  78. data/spec/testfiles/ms/quant/max_quant_output.txt +199 -0
  79. data/spec/testfiles/ms/quant/pdcd5_final.killedextratabs.tsv +199 -0
  80. data/spec/testfiles/ms/quant/pdcd5_final.killedextratabs.tsv_qspecgp +199 -0
  81. data/spec/testfiles/ms/quant/pdcd5_final.killedextratabs.tsv_qspecgp.csv +199 -0
  82. data/spec/testfiles/ms/quant/pdcd5_final.txt +199 -0
  83. data/spec/testfiles/ms/quant/pdcd5_final.txt_qspecgp +0 -0
  84. data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.CSV.csv +199 -0
  85. data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.csv +199 -0
  86. data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.csv +199 -0
  87. data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.tsv +199 -0
  88. data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.tsv_qspecgp +199 -0
  89. data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.tsv_qspecgp.csv +199 -0
  90. data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.txt +199 -0
  91. data/spec/testfiles/ms/quant/pdcd5_lfq_tabdel.txt +134 -0
  92. data/spec/testfiles/ms/quant/pdcd5_lfq_tabdel.txt_qspecgp +134 -0
  93. data/spec/testfiles/ms/quant/remove_rest_of_proteins.rb +13 -0
  94. data/spec/testfiles/ms/quant/unlog_transform.rb +13 -0
  95. data/spec/testfiles/plms1/output.key +0 -0
  96. metadata +157 -40
  97. data/README +0 -77
  98. data/changelog.txt +0 -196
  99. data/lib/ms/calc.rb +0 -32
  100. data/lib/ms/data/interleaved.rb +0 -60
  101. data/lib/ms/data/lazy_io.rb +0 -73
  102. data/lib/ms/data/lazy_string.rb +0 -15
  103. data/lib/ms/data/simple.rb +0 -59
  104. data/lib/ms/data/transposed.rb +0 -41
  105. data/lib/ms/data.rb +0 -57
  106. data/lib/ms/format/format_error.rb +0 -12
  107. data/lib/ms/support/binary_search.rb +0 -126
@@ -0,0 +1,165 @@
1
+ require 'merge'
2
+ require 'strscan'
3
+
4
+ module MS ; end
5
+ module MS::Ident ; end
6
+ class MS::Ident::Pepxml ; end
7
+
8
+ class MS::Ident::Pepxml::SampleEnzyme
9
+ include Merge
10
+ # an identifier
11
+ attr_accessor :name
12
+ # amino acids after which to cleave
13
+ attr_accessor :cut
14
+ # cleave at 'cut' amino acids UNLESS it is followed by 'no_cut'
15
+ attr_accessor :no_cut
16
+ # 'C' or 'N'
17
+ attr_accessor :sense
18
+
19
+ # Can pass in a name of an enzyme that is recognized (meaning there is a
20
+ # set_<name> method), or
21
+ # trypsin
22
+ # For other enzymes, you must set :cut, :no_cut, :name, and :sense will
23
+ def initialize(arg={})
24
+ if arg.is_a?(String)
25
+ @name = arg
26
+ send("set_#{@name}".to_sym)
27
+ else
28
+ merge!(arg)
29
+ end
30
+ end
31
+
32
+ def set_trypsin
33
+ @sense = 'C'
34
+ @cut = 'KR'
35
+ @no_cut = 'P'
36
+ end
37
+
38
+ # if an xml builder object is given, it adds to the object and returns the
39
+ # builder object, otherwise it returns an xml fragment string
40
+ def to_xml(builder=nil)
41
+ xmlb = builder || Nokogiri::XML::Builder.new
42
+ xmlb.sample_enzyme(:name => name) do |xmlb|
43
+ xmlb.specificity(:cut => cut, :no_cut => no_cut, :sense => sense)
44
+ end
45
+ builder || xmlb.doc.root.to_xml
46
+ end
47
+
48
+ # returns self
49
+ def from_pepxml_node(node)
50
+ self.name = node['name']
51
+ ch = node.child
52
+ self.cut = ch['cut']
53
+ self.no_cut= ch['no_cut']
54
+ self.sense = ch['sense']
55
+ self
56
+ end
57
+
58
+ def self.from_pepxml_node(node)
59
+ self.new.from_pepxml_node(node)
60
+ end
61
+
62
+ # takes an amino acid sequence (e.g. PEPTIDE).
63
+ # returns the number of missed cleavages
64
+ def num_missed_cleavages(aaseq)
65
+ seq_to_scan = ' ' + aaseq + ' '
66
+ raise NotImplementedError, 'need to implement for N terminal sense' if sense == 'N'
67
+ @num_missed_cleavages_regex =
68
+ if @num_missed_cleavages_regex ; @num_missed_cleavages_regex
69
+ else
70
+ regex_string = "[#{@cut}]"
71
+ if @no_cut and @no_cut != ''
72
+ regex_string << "[^#{@no_cut}]"
73
+ end
74
+ /#{regex_string}/
75
+ end
76
+ arr = aaseq.scan(@num_missed_cleavages_regex)
77
+ num = arr.size
78
+ if aaseq[-1,1] =~ @num_missed_cleavages_regex
79
+ num -= 1
80
+ end
81
+ num
82
+ end
83
+
84
+ # No arguments should contain non-standard amino acids
85
+ def num_tol_term(prev_aa, middle, next_aa)
86
+ raise NotImplementedError, 'need to implement for N terminal sense' if sense == 'N'
87
+ no_cut = @no_cut || ''
88
+ num_tol = 0
89
+ last_of_middle = middle[-1,1]
90
+ first_of_middle = middle[0,1]
91
+ if ( @cut.include?(prev_aa) && !no_cut.include?(first_of_middle) ) || prev_aa == '-'
92
+ num_tol += 1
93
+ end
94
+ if @cut.include?(last_of_middle) && !no_cut.include?(next_aa) || next_aa == '-'
95
+ num_tol += 1
96
+ end
97
+ num_tol
98
+ end
99
+ end
100
+
101
+ ###################################################
102
+ ###################################################
103
+ ###################################################
104
+ ###################################################
105
+ # This is digestion methodology:
106
+
107
+ =begin
108
+ # returns all peptides of missed cleavages <= 'missed_cleavages'
109
+ # so 2 missed cleavages will return all no missed cleavage peptides
110
+ # all 1 missed cleavages and all 2 missed cleavages.
111
+ # options:
112
+ def digest(string, missed_cleavages=0, options={})
113
+ raise NotImplementedError if @sense == 'N'
114
+ s = StringScanner.new(string)
115
+ no_cut_regex = Regexp.new("[#{@no_cut}]")
116
+ regex = Regexp.new("[#{@cut}]")
117
+ peps = []
118
+ last_pos = 0
119
+ current_pep = ''
120
+ loop do
121
+ if s.eos?
122
+ break
123
+ end
124
+ m = s.scan_until(regex)
125
+ if m ## found a cut point
126
+ last_pos = s.pos
127
+ # is the next amino acid a no_cut?
128
+ if string[s.pos,1] =~ no_cut_regex
129
+ current_pep << m
130
+ else
131
+ # cut it
132
+ current_pep << m
133
+ peps << current_pep
134
+ current_pep = ''
135
+ end
136
+ else ## didn't find a cut point
137
+ current_pep << string[last_pos..-1]
138
+ peps << current_pep
139
+ break
140
+ end
141
+ end
142
+ ## LOOP through and grab each set of missed cleavages from num down to 0
143
+ all_sets_of_peps = []
144
+ (0..missed_cleavages).to_a.reverse.each do |num_mc|
145
+ all_sets_of_peps.push( *(get_missed_cleavages(peps, num_mc)) )
146
+ end
147
+ all_sets_of_peps
148
+ end
149
+
150
+ # takes an array of peptides and returns an array containing 'num' missed
151
+ # cleavages
152
+ # DOES NOT contain peptides that contain < num of missed cleavages
153
+ # (i.e., will not return missed cleaveages of 1 or 2 if num == 3
154
+ def get_missed_cleavages(ar_of_peptide_seqs, num)
155
+ (0...(ar_of_peptide_seqs.size - num)).to_a.map do |i|
156
+ ar_of_peptide_seqs[i,num+1].join
157
+ end
158
+ end
159
+
160
+ def self.tryptic(string, missed_cleavages=0)
161
+ self.new("trypsin").digest(string, missed_cleavages)
162
+ end
163
+
164
+ end
165
+ =end
@@ -0,0 +1,49 @@
1
+ require 'ms/fasta'
2
+ require 'merge'
3
+ module MS ; end
4
+ module MS::Ident ; end
5
+
6
+ class MS::Ident::Pepxml
7
+ class SearchDatabase
8
+ include Merge
9
+ # required! the local, full path to the protein sequence database
10
+ attr_accessor :local_path
11
+ # required! 'AA' or 'NA'
12
+ attr_accessor :seq_type
13
+
14
+ # optional
15
+ attr_accessor :database_name
16
+ # optional
17
+ attr_accessor :orig_database_url
18
+ # optional
19
+ attr_accessor :database_release_date
20
+ # optional
21
+ attr_accessor :database_release_identifier
22
+ # optional
23
+ attr_accessor :size_of_residues
24
+
25
+ # takes a hash to fill in values
26
+ def initialize(hash={}, get_size_of_residues=false)
27
+ merge!(hash)
28
+ if get_size_of_residues && File.exist?(@local_path)
29
+ set_size_of_residues!
30
+ end
31
+ end
32
+
33
+ # returns self for chaining
34
+ def set_size_of_residues!
35
+ @size_of_residues = 0
36
+ MS::Fasta.foreach(@local_path) do |entry|
37
+ @size_of_residues += entry.sequence.size
38
+ end
39
+ self
40
+ end
41
+
42
+ def to_xml(builder)
43
+ attrs = [:local_path, :seq_type, :database_name, :orig_database_url, :database_release_date, :database_release_identifier, :size_of_residues].map {|k| v=send(k) ; [k, v] if v }.compact
44
+ builder.search_database(Hash[attrs])
45
+ builder
46
+ end
47
+ end
48
+
49
+ end
@@ -0,0 +1,79 @@
1
+ require 'andand'
2
+ require 'nokogiri'
3
+
4
+ module MS ; end
5
+ module MS::Ident ; end
6
+ class MS::Ident::Pepxml ; end
7
+ class MS::Ident::Pepxml::SearchHit ; end
8
+
9
+
10
+ # Positions and masses of modifications
11
+ MS::Ident::Pepxml::SearchHit::ModificationInfo = Struct.new(:modified_peptide, :mod_aminoacid_masses, :mod_nterm_mass, :mod_cterm_mass) do
12
+ ## Should be something like this:
13
+ # <modification_info mod_nterm_mass=" " mod_nterm_mass=" " modified_peptide=" ">
14
+ # <mod_aminoacid_mass position=" " mass=" "/>
15
+ # </modification_info>
16
+ # e.g.:
17
+ # <modification_info modified_peptide="GC[546]M[147]PSKEVLSAGAHR">
18
+ # <mod_aminoacid_mass position="2" mass="545.7160"/>
19
+ # <mod_aminoacid_mass position="3" mass="147.1926"/>
20
+ # </modification_info>
21
+
22
+ # Mass of modified N terminus<
23
+ #attr_accessor :mod_nterm_mass
24
+ # Mass of modified C terminus<
25
+ #attr_accessor :mod_cterm_mass
26
+ # Peptide sequence (with indicated modifications) I'm assuming that the
27
+ # native sequest indicators are OK here
28
+ #attr_accessor :modified_peptide
29
+
30
+ # These are objects of type: ...ModAminoacidMass
31
+ # position ranges from 1 to peptide length
32
+ #attr_accessor :mod_aminoacid_masses
33
+
34
+ def initialize(*args)
35
+ if args.first.is_a?(Hash)
36
+ args = args.first.values_at(*members)
37
+ end
38
+ super(*args)
39
+ end
40
+
41
+ # Will escape any xml special chars in modified_peptide
42
+ def to_xml(builder=nil)
43
+ xmlb = builder || Nokogiri::XML::Builder.new
44
+ ## Collect the modifications:
45
+ ## Create the attribute string:
46
+ atts = [:mod_nterm_mass, :mod_cterm_mass, :modified_peptide]
47
+ atts.map! {|at| (v=send(at)) && [at, v] }.compact
48
+ xmlb.modification_info(Hash[atts]) do |xmlb|
49
+ mod_aminoacid_masses.andand.each do |mod_aa_mass|
50
+ mod_aa_mass.to_xml(xmlb)
51
+ end
52
+ end
53
+ builder || xmlb.doc.root.to_s
54
+ end
55
+
56
+ def self.from_pepxml_node(node)
57
+ self.new.from_pepxml_node(node)
58
+ end
59
+
60
+ # returns self
61
+ def from_pepxml_node(node)
62
+ self[0] = node['modified_peptide']
63
+ self[2] = node['mod_nterm_mass']
64
+ self[3] = node['mod_cterm_mass']
65
+ _masses = []
66
+ node.children do |mass_n|
67
+ _masses << MS::Ident::Pepxml::SearchHit::ModificationInfo::ModAminoacidMass.new([mass_n['position'].to_i, mass_n['mass'].to_f])
68
+ end
69
+ self.mod_aminoacid_masses = _masses
70
+ self
71
+ end
72
+ end
73
+
74
+ MS::Ident::Pepxml::SearchHit::ModificationInfo::ModAminoacidMass = Struct.new(:position, :mass) do
75
+ def to_xml(builder)
76
+ builder.mod_aminoacid_mass(:position => position, :mass => mass)
77
+ builder
78
+ end
79
+ end
@@ -0,0 +1,144 @@
1
+ require 'set'
2
+ require 'merge'
3
+ require 'nokogiri'
4
+
5
+ module MS ; end
6
+ module MS::Ident ; end
7
+
8
+
9
+ class MS::Ident::Pepxml
10
+
11
+ class MS::Ident::Pepxml::SearchHit
12
+ include Merge
13
+
14
+ DEFAULT_MEMBERS = [:hit_rank, :peptide, :peptide_prev_aa, :peptide_next_aa, :num_matched_ions, :tot_num_ions, :calc_neutral_pep_mass, :massdiff, :num_tol_term, :num_missed_cleavages, :is_rejected, :protein, :num_tot_proteins, :protein_desc, :calc_pI, :protein_mw, :modification_info, :search_scores, :spectrum_query]
15
+
16
+ Required = Set.new([:hit_rank, :peptide, :protein, :num_tot_proteins, :calc_neutral_pep_mass, :massdiff])
17
+
18
+ class << self
19
+ attr_writer :members
20
+ def members
21
+ @members || DEFAULT_MEMBERS
22
+ end
23
+ end
24
+
25
+ members.each {|memb| attr_accessor memb }
26
+
27
+ # rank of the peptide hit (required)
28
+ attr_accessor :hit_rank
29
+ # Peptide aminoacid sequence (with no indicated modifications) (required)
30
+ attr_accessor :peptide
31
+
32
+ # Aminoacid preceding peptide ('-' if none)
33
+ attr_accessor :peptide_prev_aa
34
+
35
+ # Aminoacid following peptide (- if none)
36
+ attr_accessor :peptide_next_aa
37
+
38
+ # Number of peptide fragment ions found in spectrum (Integer)
39
+ attr_accessor :num_matched_ions
40
+
41
+ # Number of peptide fragment ions predicted for peptide (Integer)
42
+ attr_accessor :tot_num_ions
43
+
44
+ # (required)
45
+ attr_accessor :calc_neutral_pep_mass
46
+
47
+ # Mass(precursor ion) - Mass(peptide) (required)
48
+ attr_accessor :massdiff
49
+
50
+ # Number of peptide termini consistent with cleavage by sample enzyme
51
+ attr_accessor :num_tol_term
52
+
53
+ # Number of sample enzyme cleavage sites internal to peptide<
54
+ attr_accessor :num_missed_cleavages
55
+
56
+ # Potential use in future for user manual validation (true/false)
57
+ # by default, this will be set to false
58
+ # (the xml is expressed as a 0 or 1)
59
+ attr_accessor :is_rejected
60
+
61
+ # a protein identifier string (required)
62
+ attr_accessor :protein
63
+
64
+ # Number of unique proteins in search database containing peptide
65
+ # (required)
66
+ attr_accessor :num_tot_proteins
67
+
68
+ # Extracted from search database
69
+ attr_accessor :protein_desc
70
+
71
+ attr_accessor :calc_pI
72
+ attr_accessor :protein_mw
73
+
74
+ # a ModificationInfo object
75
+ attr_accessor :modification_info
76
+
77
+ # a Hash with keys (the score type) and values
78
+ # (to_xml calls each_pair to generate the xml, so a Struct would also
79
+ # work)
80
+ attr_accessor :search_scores
81
+
82
+ # a link back to the spectrum_query object
83
+ attr_accessor :spectrum_query
84
+
85
+
86
+ Non_standard_amino_acid_char_re = %r{[^A-Z\.\-]}
87
+
88
+ alias_method :aaseq, :peptide
89
+ alias_method :aaseq=, :peptide=
90
+
91
+ # takes either a hash or an ordered list of values to set.
92
+ # yeilds an empty search_scores hash if given a block.
93
+ # mind that you set the ModificationInfo object as needed.
94
+ def initialize(*args, &block)
95
+ @search_scores = {}
96
+ if args.first.is_a?(Hash)
97
+ merge!(args.first)
98
+ else
99
+ self.class.members.zip(args) do |k,v|
100
+ send("#{k}=", v)
101
+ end
102
+ end
103
+ block.call(@search_scores) if block
104
+ end
105
+
106
+ def members
107
+ self.class.members
108
+ end
109
+
110
+ def to_xml(builder=nil)
111
+ xmlb = builder || Nokogiri::XML::Builder.new
112
+ attrs = members[0,14].map {|k| v=send(k) ; [k, v] if v }.compact
113
+ hash_attrs = Hash[attrs]
114
+ hash_attrs[:massdiff] = hash_attrs[:massdiff].to_plus_minus_string
115
+ xmlb.search_hit(hash_attrs) do |xmlb|
116
+ @modification_info.to_xml(xmlb) if @modification_info
117
+ @search_scores.each_pair {|k,v| xmlb.search_score(:name => k, :value => v) }
118
+ end
119
+ builder || xmlb.doc.root.to_xml
120
+ end
121
+
122
+ def from_pepxml_node(node)
123
+ node.attributes
124
+ self[0] = node['hit_rank'].to_i
125
+ self[1] = node['peptide']
126
+ self[2] = node['peptide_prev_aa']
127
+ self[3] = node['peptide_next_aa']
128
+ self[4] = node['protein'] ## will this be the string?? (yes, for now)
129
+ self[5] = node['num_tot_proteins'].to_i
130
+ self[6] = node['num_matched_ions'].to_i
131
+ self[7] = node['tot_num_ions'].to_i
132
+ self[8] = node['calc_neutral_pep_mass'].to_f
133
+ self[9] = node['massdiff'].to_f
134
+ self[10] = node['num_tol_term'].to_i
135
+ self[11] = node['num_missed_cleavages'].to_i
136
+ self[12] = node['is_rejected'].to_i
137
+ self
138
+ end
139
+
140
+ Simple = Struct.new(:id, :search, :aaseq, :charge, :search_scores)
141
+ end
142
+
143
+ end
144
+
@@ -0,0 +1,35 @@
1
+ require 'nokogiri'
2
+
3
+ require 'ms/ident/pepxml/search_hit'
4
+
5
+ module MS ; end
6
+ module MS::Ident ; end
7
+ class MS::Ident::Pepxml ; end
8
+
9
+ class MS::Ident::Pepxml::SearchResult
10
+ # an array of search_hits
11
+ attr_accessor :search_hits
12
+
13
+ # if block given, then yields an empty search_hits array.
14
+ # For consistency with other objects, will also take a hash that has the key
15
+ # :search_hits and the value an array.
16
+ def initialize(search_hits = [], &block)
17
+ @search_hits = search_hits
18
+ if search_hits.is_a?(Hash)
19
+ @search_hits = search_hits[:search_hits]
20
+ end
21
+ block.call(@search_hits) if block
22
+ end
23
+
24
+ def to_xml(builder=nil)
25
+ xmlb = builder || Nokogiri::XML::Builder.new
26
+ builder.search_result do |xmlb|
27
+ search_hits.each do |sh|
28
+ sh.to_xml(xmlb)
29
+ end
30
+ end
31
+ builder || xmlb.doc.root.to_xml
32
+ end
33
+
34
+ end
35
+
@@ -0,0 +1,92 @@
1
+ require 'ms/ident/pepxml/search_database'
2
+ require 'ms/ident/pepxml/modifications'
3
+ require 'ms/ident/pepxml/parameters'
4
+
5
+ require 'nokogiri'
6
+ require 'merge'
7
+
8
+ module MS ; end
9
+ module MS::Ident ; end
10
+ class MS::Ident::Pepxml ; end
11
+
12
+
13
+ # requires these keys:
14
+ #
15
+ # :enzyme => a valid enzyme name
16
+ # :max_num_internal_cleavages => max number of internal cleavages allowed
17
+ # :min_number_termini => minimum number of termini??
18
+ class MS::Ident::Pepxml::EnzymaticSearchConstraint < Hash
19
+ end
20
+
21
+ class MS::Ident::Pepxml::SearchSummary
22
+ include Merge
23
+
24
+ DEFAULT_SEARCH_ID = '1'
25
+
26
+ attr_accessor :base_name
27
+ # required in v18-19, optional in later versions
28
+ attr_accessor :out_data_type
29
+ # required in v18-19, optional in later versions
30
+ attr_accessor :out_data
31
+ # by default, "1"
32
+ attr_accessor :search_id
33
+ # an array of MS::Ident::Pepxml::Modification objects
34
+ attr_accessor :modifications
35
+ # A SearchDatabase object (responds to :local_path and :type)
36
+ attr_accessor :search_database
37
+ # the other search paramaters as a hash
38
+ attr_accessor :parameters
39
+ # the search engine used, SEQUEST, Mascot, Comet, etc.
40
+ attr_accessor :search_engine
41
+ # required: 'average' or 'monoisotopic'
42
+ attr_accessor :precursor_mass_type
43
+ # required: 'average' or 'monoisotopic'
44
+ attr_accessor :fragment_mass_type
45
+ # An EnzymaticSearchConstraint object (at the moment this is merely a hash
46
+ # with a few required keys
47
+ attr_accessor :enzymatic_search_constraint
48
+
49
+ def block_arg
50
+ [@search_database = MS::Ident::Pepxml::SearchDatabase.new,
51
+ @enzymatic_search_constraint = MS::Ident::Pepxml::EnzymaticSearchConstraint.new,
52
+ @modifications,
53
+ @parameters = MS::Ident::Pepxml::Parameters.new,
54
+ ]
55
+ end
56
+
57
+ # initializes modifications to an empty array
58
+ def initialize(hash={}, &block)
59
+ @modifications = []
60
+ @search_id = DEFAULT_SEARCH_ID
61
+ merge!(hash, &block)
62
+ end
63
+
64
+ def to_xml(builder=nil)
65
+ # TODO: out_data and out_data_type are optional in later pepxml versions...
66
+ # should work that in...
67
+ attrs = [:base_name, :search_engine, :precursor_mass_type, :fragment_mass_type, :out_data_type, :out_data, :search_id]
68
+ hash = Hash[ attrs.map {|at| v=send(at) ; [at, v] if v }.compact ]
69
+ xmlb = builder || Nokogiri::XML::Builder.new
70
+ builder.search_summary(hash) do |xmlb|
71
+ search_database.to_xml(xmlb)
72
+ xmlb.enzymatic_search_constraint(enzymatic_search_constraint) if enzymatic_search_constraint
73
+ modifications.each do |mod|
74
+ mod.to_xml(xmlb)
75
+ end
76
+ parameters.to_xml(xmlb) if parameters
77
+ end
78
+ builder || xmlb.doc.root.to_xml
79
+ end
80
+
81
+ def self.from_pepxml_node(node)
82
+ self.new.from_pepxml_node(node)
83
+ end
84
+
85
+ def from_pepxml_node(node)
86
+ raise NotImplementedError, "not implemented just yet (just use the raw xml node)"
87
+ end
88
+
89
+ end
90
+
91
+
92
+
@@ -0,0 +1,85 @@
1
+ require 'nokogiri'
2
+ require 'ms/mass'
3
+ require 'merge'
4
+
5
+ require 'ms/ident/pepxml/search_result'
6
+
7
+ module MS ; end
8
+ module MS::Ident ; end
9
+ class MS::Ident::Pepxml ; end
10
+
11
+ # search_specification is a search constraint applied specifically to this query (a String)
12
+ class MS::Ident::Pepxml::SpectrumQuery
13
+ include Merge
14
+ DEFAULT_MEMBERS = [:spectrum, :start_scan, :end_scan, :precursor_neutral_mass, :index, :assumed_charge, :retention_time_sec, :search_specification, :search_results, :pepxml_version]
15
+
16
+ class << self
17
+ attr_writer :members
18
+ def members
19
+ @members || DEFAULT_MEMBERS
20
+ end
21
+ end
22
+
23
+ members.each {|memb| attr_accessor memb }
24
+
25
+ Required = Set.new([:spectrum, :start_scan, :end_scan, :precursor_neutral_mass, :index, :assumed_charge])
26
+ Optional = [:retention_time_sec, :search_specification]
27
+
28
+ # takes either a hash or an ordered list of values to set
29
+ # yeilds an empty search_results array if given a block
30
+ def initialize(*args, &block)
31
+ @search_results = []
32
+ if args.first.is_a?(Hash)
33
+ merge!(args.first)
34
+ else
35
+ self.class.members.zip(args) do |k,v|
36
+ send("#{k}=", v)
37
+ end
38
+ end
39
+ block.call(@search_results) if block
40
+ end
41
+
42
+ def members
43
+ self.class.members
44
+ end
45
+
46
+ ############################################################
47
+ # FOR PEPXML:
48
+ ############################################################
49
+ def to_xml(builder=nil)
50
+ xmlb = builder || Nokogiri::XML::Builder.new
51
+ # all through search_specification
52
+ attrs = members[0, 8].map {|at| v=send(at) ; [at, v] if v }
53
+ attrs_hash = Hash[attrs]
54
+ case pepxml_version
55
+ when 18
56
+ attrs_hash.delete(:retention_time_sec)
57
+ end
58
+ xmlb.spectrum_query(attrs_hash) do |xmlb|
59
+ search_results.each do |search_result|
60
+ search_result.to_xml(xmlb)
61
+ end
62
+ end
63
+ builder || xmlb.doc.root.to_xml
64
+ end
65
+
66
+ def self.from_pepxml_node(node)
67
+ self.new.from_pepxml_node(node)
68
+ end
69
+
70
+ def from_pepxml_node(node)
71
+ @spectrum = node['spectrum']
72
+ @start_scan = node['start_scan'].to_i
73
+ @end_scan = node['end_scan'].to_i
74
+ @precursor_neutral_mass = node['precursor_neutral_mass'].to_f
75
+ @index = node['index'].to_i
76
+ @assumed_charge = node['assumed_charge'].to_i
77
+ self
78
+ end
79
+
80
+ def self.calc_precursor_neutral_mass(m_plus_h, deltamass, h_plus=MS::Mass::H_PLUS)
81
+ m_plus_h - h_plus + deltamass
82
+ end
83
+ end
84
+
85
+