mspire 0.6.26 → 0.7.2
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION +1 -1
- data/lib/mspire.rb +1 -1
- data/lib/{ms → mspire}/cv.rb +1 -1
- data/lib/{ms → mspire}/cv/param.rb +5 -5
- data/lib/{ms → mspire}/cv/paramable.rb +5 -5
- data/lib/{ms → mspire}/digester.rb +2 -2
- data/lib/{ms → mspire}/error_rate/decoy.rb +1 -1
- data/lib/{ms → mspire}/error_rate/qvalue.rb +4 -4
- data/lib/{ms → mspire}/fasta.rb +5 -5
- data/lib/{ms → mspire}/ident.rb +10 -10
- data/lib/{ms → mspire}/ident/peptide.rb +3 -3
- data/lib/{ms → mspire}/ident/peptide/db.rb +12 -12
- data/lib/{ms → mspire}/ident/peptide_hit.rb +6 -6
- data/lib/{ms → mspire}/ident/peptide_hit/qvalue.rb +7 -7
- data/lib/{ms → mspire}/ident/pepxml.rb +7 -7
- data/lib/{ms → mspire}/ident/pepxml/modifications.rb +7 -7
- data/lib/{ms → mspire}/ident/pepxml/msms_pipeline_analysis.rb +6 -6
- data/lib/{ms → mspire}/ident/pepxml/msms_run_summary.rb +9 -9
- data/lib/{ms → mspire}/ident/pepxml/parameters.rb +1 -1
- data/lib/{ms → mspire}/ident/pepxml/sample_enzyme.rb +4 -4
- data/lib/{ms → mspire}/ident/pepxml/search_database.rb +5 -5
- data/lib/{ms → mspire}/ident/pepxml/search_hit.rb +4 -4
- data/lib/{ms → mspire}/ident/pepxml/search_hit/modification_info.rb +7 -7
- data/lib/{ms → mspire}/ident/pepxml/search_result.rb +5 -5
- data/lib/{ms → mspire}/ident/pepxml/search_summary.rb +12 -12
- data/lib/{ms → mspire}/ident/pepxml/spectrum_query.rb +7 -7
- data/lib/{ms → mspire}/ident/protein.rb +2 -2
- data/lib/{ms → mspire}/ident/protein_group.rb +2 -2
- data/lib/{ms → mspire}/ident/search.rb +1 -1
- data/lib/{ms → mspire}/isotope.rb +3 -3
- data/lib/{ms → mspire}/isotope/aa.rb +1 -1
- data/lib/{ms → mspire}/isotope/distribution.rb +17 -17
- data/lib/{ms → mspire}/isotope/nist_isotope_info.yml +0 -0
- data/lib/{ms → mspire}/mascot.rb +1 -1
- data/lib/{ms → mspire}/mass.rb +7 -7
- data/lib/{ms → mspire}/mass/aa.rb +6 -6
- data/lib/{ms → mspire}/molecular_formula.rb +7 -7
- data/lib/{ms → mspire}/mzml.rb +55 -55
- data/lib/{ms → mspire}/mzml/activation.rb +3 -3
- data/lib/{ms → mspire}/mzml/chromatogram.rb +3 -3
- data/lib/{ms → mspire}/mzml/chromatogram_list.rb +1 -1
- data/lib/{ms → mspire}/mzml/component.rb +5 -5
- data/lib/{ms → mspire}/mzml/contact.rb +3 -3
- data/lib/{ms → mspire}/mzml/cv.rb +1 -1
- data/lib/{ms → mspire}/mzml/data_array.rb +12 -12
- data/lib/{ms → mspire}/mzml/data_array_container_like.rb +7 -7
- data/lib/{ms → mspire}/mzml/data_processing.rb +3 -3
- data/lib/{ms → mspire}/mzml/file_content.rb +3 -3
- data/lib/{ms → mspire}/mzml/file_description.rb +4 -4
- data/lib/{ms → mspire}/mzml/index_list.rb +2 -2
- data/lib/{ms → mspire}/mzml/instrument_configuration.rb +7 -7
- data/lib/{ms → mspire}/mzml/isolation_window.rb +3 -3
- data/lib/{ms → mspire}/mzml/list.rb +1 -1
- data/lib/{ms → mspire}/mzml/plms1.rb +3 -3
- data/lib/{ms → mspire}/mzml/precursor.rb +6 -6
- data/lib/{ms → mspire}/mzml/processing_method.rb +3 -3
- data/lib/{ms → mspire}/mzml/product.rb +3 -3
- data/lib/{ms → mspire}/mzml/referenceable_param_group.rb +4 -4
- data/lib/{ms → mspire}/mzml/run.rb +3 -3
- data/lib/{ms → mspire}/mzml/sample.rb +5 -5
- data/lib/{ms → mspire}/mzml/scan.rb +4 -4
- data/lib/{ms → mspire}/mzml/scan_list.rb +3 -3
- data/lib/{ms → mspire}/mzml/scan_settings.rb +5 -5
- data/lib/{ms → mspire}/mzml/selected_ion.rb +5 -5
- data/lib/{ms → mspire}/mzml/software.rb +5 -5
- data/lib/{ms → mspire}/mzml/source_file.rb +5 -5
- data/lib/{ms → mspire}/mzml/spectrum.rb +33 -20
- data/lib/{ms → mspire}/mzml/spectrum_list.rb +4 -4
- data/lib/{ms → mspire}/obo.rb +1 -1
- data/lib/{ms → mspire}/peak.rb +3 -3
- data/lib/{ms → mspire}/peak/point.rb +1 -1
- data/lib/{ms → mspire}/plms1.rb +4 -4
- data/lib/{ms → mspire}/quant/qspec.rb +4 -4
- data/lib/{ms → mspire}/quant/qspec/protein_group_comparison.rb +4 -4
- data/lib/{ms → mspire}/spectrum.rb +6 -6
- data/lib/{ms → mspire}/spectrum/centroid.rb +1 -1
- data/lib/{ms → mspire}/spectrum_like.rb +5 -5
- data/lib/{ms → mspire}/user_param.rb +2 -2
- data/script/mzml_read_binary.rb +1 -1
- data/spec/{ms → mspire}/cv/param_spec.rb +6 -6
- data/spec/{ms → mspire}/digester_spec.rb +10 -10
- data/spec/{ms → mspire}/error_rate/qvalue_spec.rb +3 -3
- data/spec/{ms → mspire}/fasta_spec.rb +10 -10
- data/spec/{ms → mspire}/ident/peptide/db_spec.rb +9 -9
- data/spec/{ms → mspire}/ident/pepxml/sample_enzyme_spec.rb +10 -10
- data/spec/{ms → mspire}/ident/pepxml/search_hit/modification_info_spec.rb +4 -4
- data/spec/{ms → mspire}/ident/pepxml_spec.rb +22 -22
- data/spec/{ms → mspire}/ident/protein_group_spec.rb +4 -4
- data/spec/{ms → mspire}/isotope/aa_spec.rb +3 -3
- data/spec/{ms → mspire}/isotope/distribution_spec.rb +4 -4
- data/spec/{ms → mspire}/isotope_spec.rb +9 -9
- data/spec/{ms → mspire}/mass_spec.rb +8 -8
- data/spec/{ms → mspire}/molecular_formula_spec.rb +4 -4
- data/spec/{ms → mspire}/mzml/cv_spec.rb +4 -4
- data/spec/{ms → mspire}/mzml/data_array_spec.rb +7 -7
- data/spec/{ms → mspire}/mzml/file_content_spec.rb +4 -4
- data/spec/{ms → mspire}/mzml/file_description_spec.rb +4 -4
- data/spec/{ms → mspire}/mzml/index_list_spec.rb +13 -13
- data/spec/{ms → mspire}/mzml/plms1_spec.rb +8 -8
- data/spec/{ms → mspire}/mzml/referenceable_param_group_spec.rb +6 -6
- data/spec/{ms → mspire}/mzml_spec.rb +30 -30
- data/spec/{ms → mspire}/peak_spec.rb +10 -10
- data/spec/{ms → mspire}/plms1_spec.rb +7 -7
- data/spec/{ms → mspire}/quant/qspec_spec.rb +2 -2
- data/spec/{ms → mspire}/spectrum_spec.rb +8 -8
- data/spec/{ms → mspire}/user_param_spec.rb +8 -8
- data/spec/testfiles/{ms → mspire}/ident/peptide/db/uni_11_sp_tr.fasta +0 -0
- data/spec/testfiles/{ms → mspire}/ident/peptide/db/uni_11_sp_tr.msd_clvg2.min_aaseq4.yml +0 -0
- data/spec/testfiles/{ms → mspire}/mzml/j24z.idx_comp.3.mzML +0 -0
- data/spec/testfiles/{ms → mspire}/mzml/mspire_simulated.MSn.check.mzML +0 -0
- data/spec/testfiles/{ms → mspire}/mzml/openms.noidx_nocomp.12.mzML +0 -0
- data/spec/testfiles/{ms → mspire}/quant/kill_extra_tabs.rb +0 -0
- data/spec/testfiles/{ms → mspire}/quant/max_quant_output.provenance.txt +0 -0
- data/spec/testfiles/{ms → mspire}/quant/max_quant_output.txt +0 -0
- data/spec/testfiles/{ms → mspire}/quant/pdcd5_final.killedextratabs.tsv +0 -0
- data/spec/testfiles/{ms → mspire}/quant/pdcd5_final.killedextratabs.tsv_qspecgp +0 -0
- data/spec/testfiles/{ms → mspire}/quant/pdcd5_final.killedextratabs.tsv_qspecgp.csv +0 -0
- data/spec/testfiles/{ms → mspire}/quant/pdcd5_final.txt +0 -0
- data/spec/testfiles/{ms → mspire}/quant/pdcd5_final.txt_qspecgp +0 -0
- data/spec/testfiles/{ms → mspire}/quant/pdcd5_lfq_qspec.CSV.csv +0 -0
- data/spec/testfiles/{ms → mspire}/quant/pdcd5_lfq_qspec.csv +0 -0
- data/spec/testfiles/{ms → mspire}/quant/pdcd5_lfq_qspec.oneprot.csv +0 -0
- data/spec/testfiles/{ms → mspire}/quant/pdcd5_lfq_qspec.oneprot.tsv +0 -0
- data/spec/testfiles/{ms → mspire}/quant/pdcd5_lfq_qspec.oneprot.tsv_qspecgp +0 -0
- data/spec/testfiles/{ms → mspire}/quant/pdcd5_lfq_qspec.oneprot.tsv_qspecgp.csv +0 -0
- data/spec/testfiles/{ms → mspire}/quant/pdcd5_lfq_qspec.txt +0 -0
- data/spec/testfiles/{ms → mspire}/quant/pdcd5_lfq_tabdel.txt +0 -0
- data/spec/testfiles/{ms → mspire}/quant/pdcd5_lfq_tabdel.txt_qspecgp +0 -0
- data/spec/testfiles/{ms → mspire}/quant/remove_rest_of_proteins.rb +0 -0
- data/spec/testfiles/{ms → mspire}/quant/unlog_transform.rb +0 -0
- metadata +148 -194
- data/lib/ms.rb +0 -3
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.7.2
|
data/lib/mspire.rb
CHANGED
data/lib/{ms → mspire}/cv.rb
RENAMED
@@ -1,7 +1,7 @@
|
|
1
1
|
require 'cv/param'
|
2
|
-
require '
|
2
|
+
require 'mspire/cv'
|
3
3
|
|
4
|
-
module
|
4
|
+
module Mspire
|
5
5
|
module CV
|
6
6
|
|
7
7
|
# a mass spec related CVParam.
|
@@ -22,12 +22,12 @@ module MS
|
|
22
22
|
when 1
|
23
23
|
nil
|
24
24
|
when 2
|
25
|
-
|
25
|
+
Mspire::CV::Param[args.pop] if args.last.is_a?(String) && args.last =~ /^[A-Za-z]+:/
|
26
26
|
when 3
|
27
|
-
|
27
|
+
Mspire::CV::Param[args.pop]
|
28
28
|
end
|
29
29
|
obo_type = args[0][/([A-Za-z]+):/,1]
|
30
|
-
self.new(obo_type, args[0],
|
30
|
+
self.new(obo_type, args[0], Mspire::CV::Obo[obo_type][args.first], args[1], unit)
|
31
31
|
end
|
32
32
|
end
|
33
33
|
end
|
@@ -1,6 +1,6 @@
|
|
1
|
-
require '
|
1
|
+
require 'mspire/cv/param'
|
2
2
|
|
3
|
-
module
|
3
|
+
module Mspire
|
4
4
|
module CV
|
5
5
|
module Paramable
|
6
6
|
|
@@ -10,15 +10,15 @@ module MS
|
|
10
10
|
describe!(*opts[:params])
|
11
11
|
end
|
12
12
|
|
13
|
-
# casts each string or array as a Param object (using
|
13
|
+
# casts each string or array as a Param object (using Mspire::CV::Param[]),
|
14
14
|
# pushes it onto the params attribute and returns the growing params object
|
15
15
|
def describe!(*args)
|
16
16
|
@params ||= []
|
17
17
|
as_params = args.map do |arg|
|
18
18
|
if arg.is_a?(Array)
|
19
|
-
|
19
|
+
Mspire::CV::Param[ *arg ]
|
20
20
|
elsif arg.is_a?(String)
|
21
|
-
|
21
|
+
Mspire::CV::Param[ arg ]
|
22
22
|
else
|
23
23
|
arg
|
24
24
|
end
|
@@ -1,10 +1,10 @@
|
|
1
1
|
require 'strscan'
|
2
2
|
|
3
|
-
module
|
3
|
+
module Mspire
|
4
4
|
|
5
5
|
# A Digester splits a protein sequence into peptides at specified sites.
|
6
6
|
#
|
7
|
-
# trypsin =
|
7
|
+
# trypsin = Mspire::Digester[:trypsin]
|
8
8
|
#
|
9
9
|
# trypsin.digest('MIVIGRSIVHPYITNEYEPFAAEKQQILSIMAG')
|
10
10
|
# # => ['MIVIGR', 'SIVHPYITNEYEPFAAEK', 'QQILSIMAG']
|
@@ -1,7 +1,7 @@
|
|
1
1
|
require 'set'
|
2
|
-
require '
|
2
|
+
require 'mspire/error_rate/decoy'
|
3
3
|
|
4
|
-
module
|
4
|
+
module Mspire
|
5
5
|
|
6
6
|
module ErrorRate
|
7
7
|
# For generating and working with q-value calculations. The q-value is the global false discovery rate when accepting that particular ID. We do not necessarily distinguish here between *how* the FDR is generated (i.e., Storey's pFDR "the occurrence of false positives" vs. Benjamini-Hochberg's FDR "the rate of false positives" [except to prefer Storey when possible] ). The main point is that we sort and threshold based on a global FDR.
|
@@ -22,7 +22,7 @@ module MS
|
|
22
22
|
# Proc.new doesn't do arity checking
|
23
23
|
hit_with_qvalue_pairs = Proc.new do |hits|
|
24
24
|
sorted_best_to_worst = (hits.sort_by(&sorting)).reverse
|
25
|
-
(target_hits, qvalues) =
|
25
|
+
(target_hits, qvalues) = Mspire::ErrorRate::Qvalue.mixed_target_decoy(sorted_best_to_worst, target_set, opts)
|
26
26
|
target_hits.zip(qvalues)
|
27
27
|
end
|
28
28
|
|
@@ -54,7 +54,7 @@ module MS
|
|
54
54
|
best_to_worst.each do |hit|
|
55
55
|
if target_setlike.include?(hit)
|
56
56
|
num_target += 1
|
57
|
-
precision =
|
57
|
+
precision = Mspire::ErrorRate::Decoy.precision(num_target, num_decoy)
|
58
58
|
target_hits << hit
|
59
59
|
qvalues << (1.0 - precision)
|
60
60
|
else
|
data/lib/{ms → mspire}/fasta.rb
RENAMED
@@ -10,12 +10,12 @@ class Bio::FastaFormat
|
|
10
10
|
alias_method :sequence, :seq
|
11
11
|
end
|
12
12
|
|
13
|
-
module
|
13
|
+
module Mspire
|
14
14
|
# A convenience class for working with fasta formatted sequence databases.
|
15
15
|
# the file which includes this class also includes Enumerable with
|
16
16
|
# Bio::FlatFile so you can do things like this:
|
17
17
|
#
|
18
|
-
# accessions =
|
18
|
+
# accessions = Mspire::Fasta.open("file.fasta") do |fasta|
|
19
19
|
# fasta.map(&:accession)
|
20
20
|
# end
|
21
21
|
#
|
@@ -24,17 +24,17 @@ module MS
|
|
24
24
|
# entry.header == entry.definition
|
25
25
|
# entry.sequence == entry.seq
|
26
26
|
#
|
27
|
-
#
|
27
|
+
# Mspire::Fasta.new accepts both an IO object or a String (a fasta formatted
|
28
28
|
# string itself)
|
29
29
|
#
|
30
30
|
# # taking an io object:
|
31
31
|
# File.open("file.fasta") do |io|
|
32
|
-
# fasta =
|
32
|
+
# fasta = Mspire::Fasta.new(io)
|
33
33
|
# ... do something with it
|
34
34
|
# end
|
35
35
|
# # taking a string
|
36
36
|
# string = ">id1 a simple header\nAAASDDEEEDDD\n>id2 header again\nPPPPPPWWWWWWTTTTYY\n"
|
37
|
-
# fasta =
|
37
|
+
# fasta = Mspire::Fasta.new(string)
|
38
38
|
# (simple, not_simple) = fasta.partition {|entry| entry.header =~ /simple/ }
|
39
39
|
module Fasta
|
40
40
|
|
data/lib/{ms → mspire}/ident.rb
RENAMED
@@ -1,30 +1,30 @@
|
|
1
1
|
|
2
|
-
require '
|
3
|
-
require '
|
4
|
-
require '
|
2
|
+
require 'mspire/ident/protein_group'
|
3
|
+
require 'mspire/ident/protein'
|
4
|
+
require 'mspire/ident/peptide_hit'
|
5
5
|
|
6
|
-
module
|
6
|
+
module Mspire
|
7
7
|
|
8
|
-
# An
|
8
|
+
# An Mspire::Ident::ProteinGroup is an array of proteins that responds to
|
9
9
|
# :peptide_hits. All protein level identifications should be stored in a
|
10
10
|
# proteingroup object.
|
11
11
|
#
|
12
|
-
# An
|
12
|
+
# An Mspire::Ident::Protein is an object representing a protein (:id,
|
13
13
|
# :sequence, :description). Note, it is not a protein hit (use a
|
14
14
|
# ProteinGroup)
|
15
15
|
#
|
16
|
-
# An
|
16
|
+
# An Mspire::Ident::PeptideHit is an object representing a match between an
|
17
17
|
# amino acid sequence and a spectrum.
|
18
18
|
#
|
19
19
|
# Typical usage:
|
20
20
|
#
|
21
|
-
# require '
|
21
|
+
# require 'mspire/ident'
|
22
22
|
#
|
23
23
|
# hit1 = PeptideHit.new(:id => 1, :aaseq => 'PEPTIDE', :search =>
|
24
|
-
#
|
24
|
+
# Mspire::Ident::Search.new, etc...)
|
25
25
|
# peptide_hits = [hit1, hit2, ...]
|
26
26
|
#
|
27
|
-
# protein_groups =
|
27
|
+
# protein_groups = Mspire::Ident::ProteinGroup.peptide_hits_to_protein_groups(peptide_hits)
|
28
28
|
# protein_groups.first.peptide_hits # => the peptide hits in that group
|
29
29
|
module Ident
|
30
30
|
# returns the filetype (if possible)
|
@@ -1,5 +1,5 @@
|
|
1
|
-
module
|
2
|
-
module
|
1
|
+
module Mspire ; end
|
2
|
+
module Mspire::Ident ; end
|
3
3
|
|
4
4
|
# A 'sequence' is a notation of a peptide that includes the leading and
|
5
5
|
# trailing amino acid after cleavage (e.g., K.PEPTIDER.E or -.STARTK.L )
|
@@ -7,7 +7,7 @@ module MS::Ident ; end
|
|
7
7
|
#
|
8
8
|
# 'aaseq' is the amino acid sequence of just the peptide with no leading or
|
9
9
|
# trailing notation (e.g., PEPTIDER or LAKKLY)
|
10
|
-
module
|
10
|
+
module Mspire::Ident::Peptide
|
11
11
|
Nonstandard_AA_re = /[^A-Z\.\-]/
|
12
12
|
|
13
13
|
class << self
|
@@ -1,22 +1,22 @@
|
|
1
|
-
require '
|
2
|
-
require '
|
1
|
+
require 'mspire/digester'
|
2
|
+
require 'mspire/fasta'
|
3
3
|
require 'optparse'
|
4
4
|
|
5
|
-
module
|
6
|
-
module
|
7
|
-
module
|
5
|
+
module Mspire ; end
|
6
|
+
module Mspire::Ident ; end
|
7
|
+
module Mspire::Ident::Peptide ; end
|
8
8
|
|
9
9
|
# the object itself is a modified Hash.
|
10
10
|
# It is initialized with the database file and a protein array can be
|
11
11
|
# retrieved with the #[] method given an amino acid sequence. All other
|
12
12
|
# methods are untested at this time and should be avoided!
|
13
|
-
class
|
13
|
+
class Mspire::Ident::Peptide::Db < Hash
|
14
14
|
MAX_NUM_AA_EXPANSION = 3
|
15
15
|
|
16
16
|
# the twenty standard amino acids
|
17
17
|
STANDARD_AA = %w(A C D E F G H I K L M N P Q R S T V W Y)
|
18
18
|
|
19
|
-
DEFAULT_PEPTIDE_CENTRIC_DB = {:missed_cleavages => 2, :min_length => 4, :enzyme =>
|
19
|
+
DEFAULT_PEPTIDE_CENTRIC_DB = {:missed_cleavages => 2, :min_length => 4, :enzyme => Mspire::Digester[:trypsin], :id_regexp => nil, :remove_digestion_file => true, :cleave_initiator_methionine => true, :expand_aa => {'X' => STANDARD_AA}}
|
20
20
|
|
21
21
|
PROTEIN_DELIMITER = "\t"
|
22
22
|
KEY_VALUE_DELIMITER = ": "
|
@@ -25,7 +25,7 @@ class MS::Ident::Peptide::Db < Hash
|
|
25
25
|
|
26
26
|
opt = {
|
27
27
|
:remove_digestion_file => true,
|
28
|
-
:enzyme =>
|
28
|
+
:enzyme => Mspire::Digester[:trypsin]
|
29
29
|
}
|
30
30
|
opts = OptionParser.new do |op|
|
31
31
|
op.banner = "usage: #{File.basename($0)} <file>.fasta ..."
|
@@ -42,9 +42,9 @@ class MS::Ident::Peptide::Db < Hash
|
|
42
42
|
op.on("--min-length <#{opt[:min_length]}>", Integer, "the minimum peptide aaseq length") {|v| opt[:min_length] = v }
|
43
43
|
op.on("--no-cleaved-methionine", "does not cleave off initiator methionine") { opt[:cleave_initiator_methionine] = false }
|
44
44
|
op.on("--no-expand-x", "don't enumerate aa 'X' possibilities") { opt[:expand_aa] = nil }
|
45
|
-
op.on("-e", "--enzyme <name>", "enzyme for digestion") {|v| opt[:enzyme] =
|
45
|
+
op.on("-e", "--enzyme <name>", "enzyme for digestion") {|v| opt[:enzyme] = Mspire::Insilico::Digester.const_get(v.upcase) }
|
46
46
|
op.on("--list-enzymes", "lists approved enzymes and exits") do
|
47
|
-
puts
|
47
|
+
puts Mspire::Digester::ENZYMES.keys.join("\n")
|
48
48
|
exit
|
49
49
|
end
|
50
50
|
end
|
@@ -56,7 +56,7 @@ class MS::Ident::Peptide::Db < Hash
|
|
56
56
|
end
|
57
57
|
|
58
58
|
argv.map do |file|
|
59
|
-
|
59
|
+
Mspire::Ident::Peptide::Db.peptide_centric_db(file, opt)
|
60
60
|
end
|
61
61
|
end
|
62
62
|
|
@@ -79,7 +79,7 @@ class MS::Ident::Peptide::Db < Hash
|
|
79
79
|
base = fasta_file.chomp(File.extname(fasta_file))
|
80
80
|
digestion_file = base + ".msd_clvg#{missed_cleavages}.peptides"
|
81
81
|
File.open(digestion_file, "w") do |fh|
|
82
|
-
|
82
|
+
Mspire::Fasta.open(fasta_file) do |fasta|
|
83
83
|
fasta.each do |prot|
|
84
84
|
peptides = enzyme.digest(prot.sequence, missed_cleavages)
|
85
85
|
if (cleave_initiator_methionine && (prot.sequence[0,1] == "M"))
|
@@ -1,22 +1,22 @@
|
|
1
1
|
require 'merge'
|
2
2
|
|
3
|
-
module
|
4
|
-
module
|
3
|
+
module Mspire ; end
|
4
|
+
module Mspire::Ident ; end
|
5
5
|
|
6
|
-
module
|
6
|
+
module Mspire::Ident::PeptideHitLike
|
7
7
|
attr_accessor :id
|
8
8
|
attr_accessor :search
|
9
9
|
attr_accessor :missed_cleavages
|
10
10
|
attr_accessor :aaseq
|
11
11
|
attr_accessor :charge
|
12
|
-
# an array of
|
12
|
+
# an array of Mspire::Ident::ProteinLike objects
|
13
13
|
attr_accessor :proteins
|
14
14
|
# relative to the set the hit is contained in!
|
15
15
|
attr_accessor :qvalue
|
16
16
|
end
|
17
17
|
|
18
|
-
class
|
19
|
-
include
|
18
|
+
class Mspire::Ident::PeptideHit
|
19
|
+
include Mspire::Ident::PeptideHitLike
|
20
20
|
include Merge
|
21
21
|
|
22
22
|
def initialize(hash)
|
@@ -1,10 +1,10 @@
|
|
1
|
-
require '
|
2
|
-
require '
|
1
|
+
require 'mspire/ident/search'
|
2
|
+
require 'mspire/ident/peptide_hit'
|
3
3
|
|
4
|
-
module
|
5
|
-
module
|
4
|
+
module Mspire ; end
|
5
|
+
module Mspire::Ident ; end
|
6
6
|
|
7
|
-
class
|
7
|
+
class Mspire::Ident::PeptideHit
|
8
8
|
module Qvalue
|
9
9
|
FILE_EXTENSION = '.phq.tsv'
|
10
10
|
FILE_DELIMITER = "\t"
|
@@ -32,7 +32,7 @@ class MS::Ident::PeptideHit
|
|
32
32
|
|
33
33
|
# returns an array of PeptideHit objects from a phq.tsv
|
34
34
|
def from_file(filename)
|
35
|
-
searches = Hash.new {|h,id| h[id] =
|
35
|
+
searches = Hash.new {|h,id| h[id] = Mspire::Ident::Search.new(id) }
|
36
36
|
peptide_hits = []
|
37
37
|
File.open(filename) do |io|
|
38
38
|
header = io.readline.chomp.split(FILE_DELIMITER)
|
@@ -40,7 +40,7 @@ class MS::Ident::PeptideHit
|
|
40
40
|
io.each do |line|
|
41
41
|
line.chomp!
|
42
42
|
(run_id, id, aaseq, charge, qvalue) = line.split(FILE_DELIMITER)
|
43
|
-
ph =
|
43
|
+
ph = Mspire::Ident::PeptideHit.new
|
44
44
|
ph.search = searches[run_id]
|
45
45
|
ph.id = id; ph.aaseq = aaseq ; ph.charge = charge.to_i ; ph.qvalue = qvalue.to_f
|
46
46
|
peptide_hits << ph
|
@@ -1,10 +1,10 @@
|
|
1
1
|
require 'nokogiri'
|
2
|
-
require '
|
3
|
-
require '
|
2
|
+
require 'mspire/ident'
|
3
|
+
require 'mspire/ident/pepxml/msms_pipeline_analysis'
|
4
4
|
|
5
5
|
require 'ostruct'
|
6
6
|
|
7
|
-
module
|
7
|
+
module Mspire ; module Ident ; end ; end
|
8
8
|
|
9
9
|
class Numeric
|
10
10
|
# returns a string with a + or - on the front
|
@@ -17,14 +17,14 @@ class Numeric
|
|
17
17
|
end
|
18
18
|
end
|
19
19
|
|
20
|
-
class
|
20
|
+
class Mspire::Ident::Pepxml
|
21
21
|
XML_STYLESHEET_LOCATION = '/tools/bin/TPP/tpp/schema/pepXML_std.xsl'
|
22
22
|
DEFAULT_PEPXML_VERSION = MsmsPipelineAnalysis::PEPXML_VERSION
|
23
23
|
XML_ENCODING = 'UTF-8'
|
24
24
|
|
25
25
|
attr_accessor :msms_pipeline_analysis
|
26
26
|
|
27
|
-
# returns an array of
|
27
|
+
# returns an array of Mspire::Ident::Pepxml::SearchHit::Simple structs
|
28
28
|
def self.simple_search_hits(file)
|
29
29
|
hit_values = File.open(file) do |io|
|
30
30
|
doc = Nokogiri::XML.parse(io, nil, nil, Nokogiri::XML::ParseOptions::DEFAULT_XML | Nokogiri::XML::ParseOptions::NOBLANKS | Nokogiri::XML::ParseOptions::STRICT)
|
@@ -40,7 +40,7 @@ class MS::Ident::Pepxml
|
|
40
40
|
search_score_nodes.each do |node|
|
41
41
|
search_scores[node['name'].to_sym] = node['value'].to_f
|
42
42
|
end
|
43
|
-
|
43
|
+
Mspire::Ident::Pepxml::SearchHit::Simple.new("hit_#{i}", Mspire::Ident::Search.new(file.chomp(File.extname(file))), aaseq, charge, search_scores)
|
44
44
|
end
|
45
45
|
end
|
46
46
|
end
|
@@ -97,7 +97,7 @@ class MS::Ident::Pepxml
|
|
97
97
|
|
98
98
|
builder = Nokogiri::XML::Builder.new(:encoding => XML_ENCODING)
|
99
99
|
msms_pipeline_analysis.to_xml(builder)
|
100
|
-
add_stylesheet(builder.doc,
|
100
|
+
add_stylesheet(builder.doc, Mspire::Ident::Pepxml::XML_STYLESHEET_LOCATION)
|
101
101
|
string = builder.doc.to_xml
|
102
102
|
|
103
103
|
if outfile
|
@@ -1,13 +1,13 @@
|
|
1
1
|
require 'merge'
|
2
2
|
require 'nokogiri'
|
3
3
|
|
4
|
-
module
|
5
|
-
module
|
6
|
-
class
|
4
|
+
module Mspire ; end
|
5
|
+
module Mspire::Ident ; end
|
6
|
+
class Mspire::Ident::Pepxml ; end
|
7
7
|
|
8
8
|
# Modified aminoacid, static or variable
|
9
9
|
# unless otherwise stated, all attributes can be anything
|
10
|
-
class
|
10
|
+
class Mspire::Ident::Pepxml::AminoacidModification
|
11
11
|
include Merge
|
12
12
|
# The amino acid (one letter code)
|
13
13
|
attr_accessor :aminoacid
|
@@ -36,7 +36,7 @@ class MS::Ident::Pepxml::AminoacidModification
|
|
36
36
|
xmlb = builder || Nokogiri::XML::Builder.new
|
37
37
|
# note massdiff: must begin with either + (nonnegative) or - [e.g.
|
38
38
|
# +1.05446 or -2.3342] consider Numeric#to_plus_minus_string in
|
39
|
-
#
|
39
|
+
# Mspire::Ident::Pepxml
|
40
40
|
attrs = [:aminoacid, :massdiff, :mass, :variable, :peptide_terminus, :symbol, :binary].map {|at| v=send(at) ; [at,v] if v }.compact
|
41
41
|
hash = Hash[attrs]
|
42
42
|
hash[:massdiff] = hash[:massdiff].to_plus_minus_string
|
@@ -46,7 +46,7 @@ class MS::Ident::Pepxml::AminoacidModification
|
|
46
46
|
end
|
47
47
|
|
48
48
|
# Modified aminoacid, static or variable
|
49
|
-
class
|
49
|
+
class Mspire::Ident::Pepxml::TerminalModification
|
50
50
|
include Merge
|
51
51
|
# n for N-terminus, c for C-terminus
|
52
52
|
attr_accessor :terminus
|
@@ -57,7 +57,7 @@ class MS::Ident::Pepxml::TerminalModification
|
|
57
57
|
# Y if both modified and unmodified terminus could be present in the
|
58
58
|
# dataset, N if only modified terminus can be present
|
59
59
|
attr_accessor :variable
|
60
|
-
#
|
60
|
+
# symbol used by search engine to designate this modification
|
61
61
|
attr_accessor :symbol
|
62
62
|
# whether modification can reside only at protein terminus (specified n or
|
63
63
|
# c)
|