mspire 0.5.0 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. data/README.rdoc +24 -0
  2. data/Rakefile +51 -0
  3. data/VERSION +1 -0
  4. data/lib/cv/description.rb +18 -0
  5. data/lib/cv/param.rb +33 -0
  6. data/lib/cv.rb +3 -0
  7. data/lib/io/bookmark.rb +13 -0
  8. data/lib/merge.rb +7 -0
  9. data/lib/ms/cvlist.rb +76 -0
  10. data/lib/ms/digester.rb +245 -0
  11. data/lib/ms/fasta.rb +86 -0
  12. data/lib/ms/ident/peptide/db.rb +243 -0
  13. data/lib/ms/ident/peptide.rb +72 -0
  14. data/lib/ms/ident/peptide_hit/qvalue.rb +56 -0
  15. data/lib/ms/ident/peptide_hit.rb +26 -0
  16. data/lib/ms/ident/pepxml/modifications.rb +83 -0
  17. data/lib/ms/ident/pepxml/msms_pipeline_analysis.rb +70 -0
  18. data/lib/ms/ident/pepxml/msms_run_summary.rb +82 -0
  19. data/lib/ms/ident/pepxml/parameters.rb +14 -0
  20. data/lib/ms/ident/pepxml/sample_enzyme.rb +165 -0
  21. data/lib/ms/ident/pepxml/search_database.rb +49 -0
  22. data/lib/ms/ident/pepxml/search_hit/modification_info.rb +79 -0
  23. data/lib/ms/ident/pepxml/search_hit.rb +144 -0
  24. data/lib/ms/ident/pepxml/search_result.rb +35 -0
  25. data/lib/ms/ident/pepxml/search_summary.rb +92 -0
  26. data/lib/ms/ident/pepxml/spectrum_query.rb +85 -0
  27. data/lib/ms/ident/pepxml.rb +112 -0
  28. data/lib/ms/ident/protein.rb +33 -0
  29. data/lib/ms/ident/protein_group.rb +80 -0
  30. data/lib/ms/ident/search.rb +114 -0
  31. data/lib/ms/ident.rb +37 -0
  32. data/lib/ms/isotope/aa.rb +59 -0
  33. data/lib/ms/mascot.rb +6 -0
  34. data/lib/ms/mass/aa.rb +79 -0
  35. data/lib/ms/mass.rb +55 -0
  36. data/lib/ms/mzml/index_list.rb +98 -0
  37. data/lib/ms/mzml/plms1.rb +34 -0
  38. data/lib/ms/mzml.rb +197 -0
  39. data/lib/ms/obo.rb +38 -0
  40. data/lib/ms/plms1.rb +156 -0
  41. data/lib/ms/quant/qspec/protein_group_comparison.rb +22 -0
  42. data/lib/ms/quant/qspec.rb +112 -0
  43. data/lib/ms/spectrum.rb +154 -8
  44. data/lib/ms.rb +3 -10
  45. data/lib/msplat.rb +2 -0
  46. data/lib/obo/ims.rb +5 -0
  47. data/lib/obo/ms.rb +7 -0
  48. data/lib/obo/ontology.rb +41 -0
  49. data/lib/obo/unit.rb +5 -0
  50. data/lib/openany.rb +23 -0
  51. data/lib/write_file_or_string.rb +18 -0
  52. data/obo/ims.obo +562 -0
  53. data/obo/ms.obo +11677 -0
  54. data/obo/unit.obo +2563 -0
  55. data/spec/ms/cvlist_spec.rb +60 -0
  56. data/spec/ms/digester_spec.rb +351 -0
  57. data/spec/ms/fasta_spec.rb +100 -0
  58. data/spec/ms/ident/peptide/db_spec.rb +108 -0
  59. data/spec/ms/ident/pepxml/sample_enzyme_spec.rb +181 -0
  60. data/spec/ms/ident/pepxml/search_hit/modification_info_spec.rb +37 -0
  61. data/spec/ms/ident/pepxml_spec.rb +442 -0
  62. data/spec/ms/ident/protein_group_spec.rb +68 -0
  63. data/spec/ms/mass_spec.rb +8 -0
  64. data/spec/ms/mzml/index_list_spec.rb +122 -0
  65. data/spec/ms/mzml/plms1_spec.rb +62 -0
  66. data/spec/ms/mzml_spec.rb +50 -0
  67. data/spec/ms/plms1_spec.rb +38 -0
  68. data/spec/ms/quant/qspec_spec.rb +25 -0
  69. data/spec/msplat_spec.rb +24 -0
  70. data/spec/obo_spec.rb +25 -0
  71. data/spec/spec_helper.rb +25 -0
  72. data/spec/testfiles/ms/ident/peptide/db/uni_11_sp_tr.fasta +69 -0
  73. data/spec/testfiles/ms/ident/peptide/db/uni_11_sp_tr.msd_clvg2.min_aaseq4.yml +728 -0
  74. data/spec/testfiles/ms/mzml/j24z.idx_comp.3.mzML +271 -0
  75. data/spec/testfiles/ms/mzml/openms.noidx_nocomp.12.mzML +330 -0
  76. data/spec/testfiles/ms/quant/kill_extra_tabs.rb +13 -0
  77. data/spec/testfiles/ms/quant/max_quant_output.provenance.txt +15 -0
  78. data/spec/testfiles/ms/quant/max_quant_output.txt +199 -0
  79. data/spec/testfiles/ms/quant/pdcd5_final.killedextratabs.tsv +199 -0
  80. data/spec/testfiles/ms/quant/pdcd5_final.killedextratabs.tsv_qspecgp +199 -0
  81. data/spec/testfiles/ms/quant/pdcd5_final.killedextratabs.tsv_qspecgp.csv +199 -0
  82. data/spec/testfiles/ms/quant/pdcd5_final.txt +199 -0
  83. data/spec/testfiles/ms/quant/pdcd5_final.txt_qspecgp +0 -0
  84. data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.CSV.csv +199 -0
  85. data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.csv +199 -0
  86. data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.csv +199 -0
  87. data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.tsv +199 -0
  88. data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.tsv_qspecgp +199 -0
  89. data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.oneprot.tsv_qspecgp.csv +199 -0
  90. data/spec/testfiles/ms/quant/pdcd5_lfq_qspec.txt +199 -0
  91. data/spec/testfiles/ms/quant/pdcd5_lfq_tabdel.txt +134 -0
  92. data/spec/testfiles/ms/quant/pdcd5_lfq_tabdel.txt_qspecgp +134 -0
  93. data/spec/testfiles/ms/quant/remove_rest_of_proteins.rb +13 -0
  94. data/spec/testfiles/ms/quant/unlog_transform.rb +13 -0
  95. data/spec/testfiles/plms1/output.key +0 -0
  96. metadata +157 -40
  97. data/README +0 -77
  98. data/changelog.txt +0 -196
  99. data/lib/ms/calc.rb +0 -32
  100. data/lib/ms/data/interleaved.rb +0 -60
  101. data/lib/ms/data/lazy_io.rb +0 -73
  102. data/lib/ms/data/lazy_string.rb +0 -15
  103. data/lib/ms/data/simple.rb +0 -59
  104. data/lib/ms/data/transposed.rb +0 -41
  105. data/lib/ms/data.rb +0 -57
  106. data/lib/ms/format/format_error.rb +0 -12
  107. data/lib/ms/support/binary_search.rb +0 -126
@@ -0,0 +1,165 @@
1
+ require 'merge'
2
+ require 'strscan'
3
+
4
+ module MS ; end
5
+ module MS::Ident ; end
6
+ class MS::Ident::Pepxml ; end
7
+
8
+ class MS::Ident::Pepxml::SampleEnzyme
9
+ include Merge
10
+ # an identifier
11
+ attr_accessor :name
12
+ # amino acids after which to cleave
13
+ attr_accessor :cut
14
+ # cleave at 'cut' amino acids UNLESS it is followed by 'no_cut'
15
+ attr_accessor :no_cut
16
+ # 'C' or 'N'
17
+ attr_accessor :sense
18
+
19
+ # Can pass in a name of an enzyme that is recognized (meaning there is a
20
+ # set_<name> method), or
21
+ # trypsin
22
+ # For other enzymes, you must set :cut, :no_cut, :name, and :sense will
23
+ def initialize(arg={})
24
+ if arg.is_a?(String)
25
+ @name = arg
26
+ send("set_#{@name}".to_sym)
27
+ else
28
+ merge!(arg)
29
+ end
30
+ end
31
+
32
+ def set_trypsin
33
+ @sense = 'C'
34
+ @cut = 'KR'
35
+ @no_cut = 'P'
36
+ end
37
+
38
+ # if an xml builder object is given, it adds to the object and returns the
39
+ # builder object, otherwise it returns an xml fragment string
40
+ def to_xml(builder=nil)
41
+ xmlb = builder || Nokogiri::XML::Builder.new
42
+ xmlb.sample_enzyme(:name => name) do |xmlb|
43
+ xmlb.specificity(:cut => cut, :no_cut => no_cut, :sense => sense)
44
+ end
45
+ builder || xmlb.doc.root.to_xml
46
+ end
47
+
48
+ # returns self
49
+ def from_pepxml_node(node)
50
+ self.name = node['name']
51
+ ch = node.child
52
+ self.cut = ch['cut']
53
+ self.no_cut= ch['no_cut']
54
+ self.sense = ch['sense']
55
+ self
56
+ end
57
+
58
+ def self.from_pepxml_node(node)
59
+ self.new.from_pepxml_node(node)
60
+ end
61
+
62
+ # takes an amino acid sequence (e.g. PEPTIDE).
63
+ # returns the number of missed cleavages
64
+ def num_missed_cleavages(aaseq)
65
+ seq_to_scan = ' ' + aaseq + ' '
66
+ raise NotImplementedError, 'need to implement for N terminal sense' if sense == 'N'
67
+ @num_missed_cleavages_regex =
68
+ if @num_missed_cleavages_regex ; @num_missed_cleavages_regex
69
+ else
70
+ regex_string = "[#{@cut}]"
71
+ if @no_cut and @no_cut != ''
72
+ regex_string << "[^#{@no_cut}]"
73
+ end
74
+ /#{regex_string}/
75
+ end
76
+ arr = aaseq.scan(@num_missed_cleavages_regex)
77
+ num = arr.size
78
+ if aaseq[-1,1] =~ @num_missed_cleavages_regex
79
+ num -= 1
80
+ end
81
+ num
82
+ end
83
+
84
+ # No arguments should contain non-standard amino acids
85
+ def num_tol_term(prev_aa, middle, next_aa)
86
+ raise NotImplementedError, 'need to implement for N terminal sense' if sense == 'N'
87
+ no_cut = @no_cut || ''
88
+ num_tol = 0
89
+ last_of_middle = middle[-1,1]
90
+ first_of_middle = middle[0,1]
91
+ if ( @cut.include?(prev_aa) && !no_cut.include?(first_of_middle) ) || prev_aa == '-'
92
+ num_tol += 1
93
+ end
94
+ if @cut.include?(last_of_middle) && !no_cut.include?(next_aa) || next_aa == '-'
95
+ num_tol += 1
96
+ end
97
+ num_tol
98
+ end
99
+ end
100
+
101
+ ###################################################
102
+ ###################################################
103
+ ###################################################
104
+ ###################################################
105
+ # This is digestion methodology:
106
+
107
+ =begin
108
+ # returns all peptides of missed cleavages <= 'missed_cleavages'
109
+ # so 2 missed cleavages will return all no missed cleavage peptides
110
+ # all 1 missed cleavages and all 2 missed cleavages.
111
+ # options:
112
+ def digest(string, missed_cleavages=0, options={})
113
+ raise NotImplementedError if @sense == 'N'
114
+ s = StringScanner.new(string)
115
+ no_cut_regex = Regexp.new("[#{@no_cut}]")
116
+ regex = Regexp.new("[#{@cut}]")
117
+ peps = []
118
+ last_pos = 0
119
+ current_pep = ''
120
+ loop do
121
+ if s.eos?
122
+ break
123
+ end
124
+ m = s.scan_until(regex)
125
+ if m ## found a cut point
126
+ last_pos = s.pos
127
+ # is the next amino acid a no_cut?
128
+ if string[s.pos,1] =~ no_cut_regex
129
+ current_pep << m
130
+ else
131
+ # cut it
132
+ current_pep << m
133
+ peps << current_pep
134
+ current_pep = ''
135
+ end
136
+ else ## didn't find a cut point
137
+ current_pep << string[last_pos..-1]
138
+ peps << current_pep
139
+ break
140
+ end
141
+ end
142
+ ## LOOP through and grab each set of missed cleavages from num down to 0
143
+ all_sets_of_peps = []
144
+ (0..missed_cleavages).to_a.reverse.each do |num_mc|
145
+ all_sets_of_peps.push( *(get_missed_cleavages(peps, num_mc)) )
146
+ end
147
+ all_sets_of_peps
148
+ end
149
+
150
+ # takes an array of peptides and returns an array containing 'num' missed
151
+ # cleavages
152
+ # DOES NOT contain peptides that contain < num of missed cleavages
153
+ # (i.e., will not return missed cleaveages of 1 or 2 if num == 3
154
+ def get_missed_cleavages(ar_of_peptide_seqs, num)
155
+ (0...(ar_of_peptide_seqs.size - num)).to_a.map do |i|
156
+ ar_of_peptide_seqs[i,num+1].join
157
+ end
158
+ end
159
+
160
+ def self.tryptic(string, missed_cleavages=0)
161
+ self.new("trypsin").digest(string, missed_cleavages)
162
+ end
163
+
164
+ end
165
+ =end
@@ -0,0 +1,49 @@
1
+ require 'ms/fasta'
2
+ require 'merge'
3
+ module MS ; end
4
+ module MS::Ident ; end
5
+
6
+ class MS::Ident::Pepxml
7
+ class SearchDatabase
8
+ include Merge
9
+ # required! the local, full path to the protein sequence database
10
+ attr_accessor :local_path
11
+ # required! 'AA' or 'NA'
12
+ attr_accessor :seq_type
13
+
14
+ # optional
15
+ attr_accessor :database_name
16
+ # optional
17
+ attr_accessor :orig_database_url
18
+ # optional
19
+ attr_accessor :database_release_date
20
+ # optional
21
+ attr_accessor :database_release_identifier
22
+ # optional
23
+ attr_accessor :size_of_residues
24
+
25
+ # takes a hash to fill in values
26
+ def initialize(hash={}, get_size_of_residues=false)
27
+ merge!(hash)
28
+ if get_size_of_residues && File.exist?(@local_path)
29
+ set_size_of_residues!
30
+ end
31
+ end
32
+
33
+ # returns self for chaining
34
+ def set_size_of_residues!
35
+ @size_of_residues = 0
36
+ MS::Fasta.foreach(@local_path) do |entry|
37
+ @size_of_residues += entry.sequence.size
38
+ end
39
+ self
40
+ end
41
+
42
+ def to_xml(builder)
43
+ attrs = [:local_path, :seq_type, :database_name, :orig_database_url, :database_release_date, :database_release_identifier, :size_of_residues].map {|k| v=send(k) ; [k, v] if v }.compact
44
+ builder.search_database(Hash[attrs])
45
+ builder
46
+ end
47
+ end
48
+
49
+ end
@@ -0,0 +1,79 @@
1
+ require 'andand'
2
+ require 'nokogiri'
3
+
4
+ module MS ; end
5
+ module MS::Ident ; end
6
+ class MS::Ident::Pepxml ; end
7
+ class MS::Ident::Pepxml::SearchHit ; end
8
+
9
+
10
+ # Positions and masses of modifications
11
+ MS::Ident::Pepxml::SearchHit::ModificationInfo = Struct.new(:modified_peptide, :mod_aminoacid_masses, :mod_nterm_mass, :mod_cterm_mass) do
12
+ ## Should be something like this:
13
+ # <modification_info mod_nterm_mass=" " mod_nterm_mass=" " modified_peptide=" ">
14
+ # <mod_aminoacid_mass position=" " mass=" "/>
15
+ # </modification_info>
16
+ # e.g.:
17
+ # <modification_info modified_peptide="GC[546]M[147]PSKEVLSAGAHR">
18
+ # <mod_aminoacid_mass position="2" mass="545.7160"/>
19
+ # <mod_aminoacid_mass position="3" mass="147.1926"/>
20
+ # </modification_info>
21
+
22
+ # Mass of modified N terminus<
23
+ #attr_accessor :mod_nterm_mass
24
+ # Mass of modified C terminus<
25
+ #attr_accessor :mod_cterm_mass
26
+ # Peptide sequence (with indicated modifications) I'm assuming that the
27
+ # native sequest indicators are OK here
28
+ #attr_accessor :modified_peptide
29
+
30
+ # These are objects of type: ...ModAminoacidMass
31
+ # position ranges from 1 to peptide length
32
+ #attr_accessor :mod_aminoacid_masses
33
+
34
+ def initialize(*args)
35
+ if args.first.is_a?(Hash)
36
+ args = args.first.values_at(*members)
37
+ end
38
+ super(*args)
39
+ end
40
+
41
+ # Will escape any xml special chars in modified_peptide
42
+ def to_xml(builder=nil)
43
+ xmlb = builder || Nokogiri::XML::Builder.new
44
+ ## Collect the modifications:
45
+ ## Create the attribute string:
46
+ atts = [:mod_nterm_mass, :mod_cterm_mass, :modified_peptide]
47
+ atts.map! {|at| (v=send(at)) && [at, v] }.compact
48
+ xmlb.modification_info(Hash[atts]) do |xmlb|
49
+ mod_aminoacid_masses.andand.each do |mod_aa_mass|
50
+ mod_aa_mass.to_xml(xmlb)
51
+ end
52
+ end
53
+ builder || xmlb.doc.root.to_s
54
+ end
55
+
56
+ def self.from_pepxml_node(node)
57
+ self.new.from_pepxml_node(node)
58
+ end
59
+
60
+ # returns self
61
+ def from_pepxml_node(node)
62
+ self[0] = node['modified_peptide']
63
+ self[2] = node['mod_nterm_mass']
64
+ self[3] = node['mod_cterm_mass']
65
+ _masses = []
66
+ node.children do |mass_n|
67
+ _masses << MS::Ident::Pepxml::SearchHit::ModificationInfo::ModAminoacidMass.new([mass_n['position'].to_i, mass_n['mass'].to_f])
68
+ end
69
+ self.mod_aminoacid_masses = _masses
70
+ self
71
+ end
72
+ end
73
+
74
+ MS::Ident::Pepxml::SearchHit::ModificationInfo::ModAminoacidMass = Struct.new(:position, :mass) do
75
+ def to_xml(builder)
76
+ builder.mod_aminoacid_mass(:position => position, :mass => mass)
77
+ builder
78
+ end
79
+ end
@@ -0,0 +1,144 @@
1
+ require 'set'
2
+ require 'merge'
3
+ require 'nokogiri'
4
+
5
+ module MS ; end
6
+ module MS::Ident ; end
7
+
8
+
9
+ class MS::Ident::Pepxml
10
+
11
+ class MS::Ident::Pepxml::SearchHit
12
+ include Merge
13
+
14
+ DEFAULT_MEMBERS = [:hit_rank, :peptide, :peptide_prev_aa, :peptide_next_aa, :num_matched_ions, :tot_num_ions, :calc_neutral_pep_mass, :massdiff, :num_tol_term, :num_missed_cleavages, :is_rejected, :protein, :num_tot_proteins, :protein_desc, :calc_pI, :protein_mw, :modification_info, :search_scores, :spectrum_query]
15
+
16
+ Required = Set.new([:hit_rank, :peptide, :protein, :num_tot_proteins, :calc_neutral_pep_mass, :massdiff])
17
+
18
+ class << self
19
+ attr_writer :members
20
+ def members
21
+ @members || DEFAULT_MEMBERS
22
+ end
23
+ end
24
+
25
+ members.each {|memb| attr_accessor memb }
26
+
27
+ # rank of the peptide hit (required)
28
+ attr_accessor :hit_rank
29
+ # Peptide aminoacid sequence (with no indicated modifications) (required)
30
+ attr_accessor :peptide
31
+
32
+ # Aminoacid preceding peptide ('-' if none)
33
+ attr_accessor :peptide_prev_aa
34
+
35
+ # Aminoacid following peptide (- if none)
36
+ attr_accessor :peptide_next_aa
37
+
38
+ # Number of peptide fragment ions found in spectrum (Integer)
39
+ attr_accessor :num_matched_ions
40
+
41
+ # Number of peptide fragment ions predicted for peptide (Integer)
42
+ attr_accessor :tot_num_ions
43
+
44
+ # (required)
45
+ attr_accessor :calc_neutral_pep_mass
46
+
47
+ # Mass(precursor ion) - Mass(peptide) (required)
48
+ attr_accessor :massdiff
49
+
50
+ # Number of peptide termini consistent with cleavage by sample enzyme
51
+ attr_accessor :num_tol_term
52
+
53
+ # Number of sample enzyme cleavage sites internal to peptide<
54
+ attr_accessor :num_missed_cleavages
55
+
56
+ # Potential use in future for user manual validation (true/false)
57
+ # by default, this will be set to false
58
+ # (the xml is expressed as a 0 or 1)
59
+ attr_accessor :is_rejected
60
+
61
+ # a protein identifier string (required)
62
+ attr_accessor :protein
63
+
64
+ # Number of unique proteins in search database containing peptide
65
+ # (required)
66
+ attr_accessor :num_tot_proteins
67
+
68
+ # Extracted from search database
69
+ attr_accessor :protein_desc
70
+
71
+ attr_accessor :calc_pI
72
+ attr_accessor :protein_mw
73
+
74
+ # a ModificationInfo object
75
+ attr_accessor :modification_info
76
+
77
+ # a Hash with keys (the score type) and values
78
+ # (to_xml calls each_pair to generate the xml, so a Struct would also
79
+ # work)
80
+ attr_accessor :search_scores
81
+
82
+ # a link back to the spectrum_query object
83
+ attr_accessor :spectrum_query
84
+
85
+
86
+ Non_standard_amino_acid_char_re = %r{[^A-Z\.\-]}
87
+
88
+ alias_method :aaseq, :peptide
89
+ alias_method :aaseq=, :peptide=
90
+
91
+ # takes either a hash or an ordered list of values to set.
92
+ # yeilds an empty search_scores hash if given a block.
93
+ # mind that you set the ModificationInfo object as needed.
94
+ def initialize(*args, &block)
95
+ @search_scores = {}
96
+ if args.first.is_a?(Hash)
97
+ merge!(args.first)
98
+ else
99
+ self.class.members.zip(args) do |k,v|
100
+ send("#{k}=", v)
101
+ end
102
+ end
103
+ block.call(@search_scores) if block
104
+ end
105
+
106
+ def members
107
+ self.class.members
108
+ end
109
+
110
+ def to_xml(builder=nil)
111
+ xmlb = builder || Nokogiri::XML::Builder.new
112
+ attrs = members[0,14].map {|k| v=send(k) ; [k, v] if v }.compact
113
+ hash_attrs = Hash[attrs]
114
+ hash_attrs[:massdiff] = hash_attrs[:massdiff].to_plus_minus_string
115
+ xmlb.search_hit(hash_attrs) do |xmlb|
116
+ @modification_info.to_xml(xmlb) if @modification_info
117
+ @search_scores.each_pair {|k,v| xmlb.search_score(:name => k, :value => v) }
118
+ end
119
+ builder || xmlb.doc.root.to_xml
120
+ end
121
+
122
+ def from_pepxml_node(node)
123
+ node.attributes
124
+ self[0] = node['hit_rank'].to_i
125
+ self[1] = node['peptide']
126
+ self[2] = node['peptide_prev_aa']
127
+ self[3] = node['peptide_next_aa']
128
+ self[4] = node['protein'] ## will this be the string?? (yes, for now)
129
+ self[5] = node['num_tot_proteins'].to_i
130
+ self[6] = node['num_matched_ions'].to_i
131
+ self[7] = node['tot_num_ions'].to_i
132
+ self[8] = node['calc_neutral_pep_mass'].to_f
133
+ self[9] = node['massdiff'].to_f
134
+ self[10] = node['num_tol_term'].to_i
135
+ self[11] = node['num_missed_cleavages'].to_i
136
+ self[12] = node['is_rejected'].to_i
137
+ self
138
+ end
139
+
140
+ Simple = Struct.new(:id, :search, :aaseq, :charge, :search_scores)
141
+ end
142
+
143
+ end
144
+
@@ -0,0 +1,35 @@
1
+ require 'nokogiri'
2
+
3
+ require 'ms/ident/pepxml/search_hit'
4
+
5
+ module MS ; end
6
+ module MS::Ident ; end
7
+ class MS::Ident::Pepxml ; end
8
+
9
+ class MS::Ident::Pepxml::SearchResult
10
+ # an array of search_hits
11
+ attr_accessor :search_hits
12
+
13
+ # if block given, then yields an empty search_hits array.
14
+ # For consistency with other objects, will also take a hash that has the key
15
+ # :search_hits and the value an array.
16
+ def initialize(search_hits = [], &block)
17
+ @search_hits = search_hits
18
+ if search_hits.is_a?(Hash)
19
+ @search_hits = search_hits[:search_hits]
20
+ end
21
+ block.call(@search_hits) if block
22
+ end
23
+
24
+ def to_xml(builder=nil)
25
+ xmlb = builder || Nokogiri::XML::Builder.new
26
+ builder.search_result do |xmlb|
27
+ search_hits.each do |sh|
28
+ sh.to_xml(xmlb)
29
+ end
30
+ end
31
+ builder || xmlb.doc.root.to_xml
32
+ end
33
+
34
+ end
35
+
@@ -0,0 +1,92 @@
1
+ require 'ms/ident/pepxml/search_database'
2
+ require 'ms/ident/pepxml/modifications'
3
+ require 'ms/ident/pepxml/parameters'
4
+
5
+ require 'nokogiri'
6
+ require 'merge'
7
+
8
+ module MS ; end
9
+ module MS::Ident ; end
10
+ class MS::Ident::Pepxml ; end
11
+
12
+
13
+ # requires these keys:
14
+ #
15
+ # :enzyme => a valid enzyme name
16
+ # :max_num_internal_cleavages => max number of internal cleavages allowed
17
+ # :min_number_termini => minimum number of termini??
18
+ class MS::Ident::Pepxml::EnzymaticSearchConstraint < Hash
19
+ end
20
+
21
+ class MS::Ident::Pepxml::SearchSummary
22
+ include Merge
23
+
24
+ DEFAULT_SEARCH_ID = '1'
25
+
26
+ attr_accessor :base_name
27
+ # required in v18-19, optional in later versions
28
+ attr_accessor :out_data_type
29
+ # required in v18-19, optional in later versions
30
+ attr_accessor :out_data
31
+ # by default, "1"
32
+ attr_accessor :search_id
33
+ # an array of MS::Ident::Pepxml::Modification objects
34
+ attr_accessor :modifications
35
+ # A SearchDatabase object (responds to :local_path and :type)
36
+ attr_accessor :search_database
37
+ # the other search paramaters as a hash
38
+ attr_accessor :parameters
39
+ # the search engine used, SEQUEST, Mascot, Comet, etc.
40
+ attr_accessor :search_engine
41
+ # required: 'average' or 'monoisotopic'
42
+ attr_accessor :precursor_mass_type
43
+ # required: 'average' or 'monoisotopic'
44
+ attr_accessor :fragment_mass_type
45
+ # An EnzymaticSearchConstraint object (at the moment this is merely a hash
46
+ # with a few required keys
47
+ attr_accessor :enzymatic_search_constraint
48
+
49
+ def block_arg
50
+ [@search_database = MS::Ident::Pepxml::SearchDatabase.new,
51
+ @enzymatic_search_constraint = MS::Ident::Pepxml::EnzymaticSearchConstraint.new,
52
+ @modifications,
53
+ @parameters = MS::Ident::Pepxml::Parameters.new,
54
+ ]
55
+ end
56
+
57
+ # initializes modifications to an empty array
58
+ def initialize(hash={}, &block)
59
+ @modifications = []
60
+ @search_id = DEFAULT_SEARCH_ID
61
+ merge!(hash, &block)
62
+ end
63
+
64
+ def to_xml(builder=nil)
65
+ # TODO: out_data and out_data_type are optional in later pepxml versions...
66
+ # should work that in...
67
+ attrs = [:base_name, :search_engine, :precursor_mass_type, :fragment_mass_type, :out_data_type, :out_data, :search_id]
68
+ hash = Hash[ attrs.map {|at| v=send(at) ; [at, v] if v }.compact ]
69
+ xmlb = builder || Nokogiri::XML::Builder.new
70
+ builder.search_summary(hash) do |xmlb|
71
+ search_database.to_xml(xmlb)
72
+ xmlb.enzymatic_search_constraint(enzymatic_search_constraint) if enzymatic_search_constraint
73
+ modifications.each do |mod|
74
+ mod.to_xml(xmlb)
75
+ end
76
+ parameters.to_xml(xmlb) if parameters
77
+ end
78
+ builder || xmlb.doc.root.to_xml
79
+ end
80
+
81
+ def self.from_pepxml_node(node)
82
+ self.new.from_pepxml_node(node)
83
+ end
84
+
85
+ def from_pepxml_node(node)
86
+ raise NotImplementedError, "not implemented just yet (just use the raw xml node)"
87
+ end
88
+
89
+ end
90
+
91
+
92
+
@@ -0,0 +1,85 @@
1
+ require 'nokogiri'
2
+ require 'ms/mass'
3
+ require 'merge'
4
+
5
+ require 'ms/ident/pepxml/search_result'
6
+
7
+ module MS ; end
8
+ module MS::Ident ; end
9
+ class MS::Ident::Pepxml ; end
10
+
11
+ # search_specification is a search constraint applied specifically to this query (a String)
12
+ class MS::Ident::Pepxml::SpectrumQuery
13
+ include Merge
14
+ DEFAULT_MEMBERS = [:spectrum, :start_scan, :end_scan, :precursor_neutral_mass, :index, :assumed_charge, :retention_time_sec, :search_specification, :search_results, :pepxml_version]
15
+
16
+ class << self
17
+ attr_writer :members
18
+ def members
19
+ @members || DEFAULT_MEMBERS
20
+ end
21
+ end
22
+
23
+ members.each {|memb| attr_accessor memb }
24
+
25
+ Required = Set.new([:spectrum, :start_scan, :end_scan, :precursor_neutral_mass, :index, :assumed_charge])
26
+ Optional = [:retention_time_sec, :search_specification]
27
+
28
+ # takes either a hash or an ordered list of values to set
29
+ # yeilds an empty search_results array if given a block
30
+ def initialize(*args, &block)
31
+ @search_results = []
32
+ if args.first.is_a?(Hash)
33
+ merge!(args.first)
34
+ else
35
+ self.class.members.zip(args) do |k,v|
36
+ send("#{k}=", v)
37
+ end
38
+ end
39
+ block.call(@search_results) if block
40
+ end
41
+
42
+ def members
43
+ self.class.members
44
+ end
45
+
46
+ ############################################################
47
+ # FOR PEPXML:
48
+ ############################################################
49
+ def to_xml(builder=nil)
50
+ xmlb = builder || Nokogiri::XML::Builder.new
51
+ # all through search_specification
52
+ attrs = members[0, 8].map {|at| v=send(at) ; [at, v] if v }
53
+ attrs_hash = Hash[attrs]
54
+ case pepxml_version
55
+ when 18
56
+ attrs_hash.delete(:retention_time_sec)
57
+ end
58
+ xmlb.spectrum_query(attrs_hash) do |xmlb|
59
+ search_results.each do |search_result|
60
+ search_result.to_xml(xmlb)
61
+ end
62
+ end
63
+ builder || xmlb.doc.root.to_xml
64
+ end
65
+
66
+ def self.from_pepxml_node(node)
67
+ self.new.from_pepxml_node(node)
68
+ end
69
+
70
+ def from_pepxml_node(node)
71
+ @spectrum = node['spectrum']
72
+ @start_scan = node['start_scan'].to_i
73
+ @end_scan = node['end_scan'].to_i
74
+ @precursor_neutral_mass = node['precursor_neutral_mass'].to_f
75
+ @index = node['index'].to_i
76
+ @assumed_charge = node['assumed_charge'].to_i
77
+ self
78
+ end
79
+
80
+ def self.calc_precursor_neutral_mass(m_plus_h, deltamass, h_plus=MS::Mass::H_PLUS)
81
+ m_plus_h - h_plus + deltamass
82
+ end
83
+ end
84
+
85
+