mspire 0.6.26 → 0.7.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
- data/lib/mspire.rb +1 -1
- data/lib/{ms → mspire}/cv.rb +1 -1
- data/lib/{ms → mspire}/cv/param.rb +5 -5
- data/lib/{ms → mspire}/cv/paramable.rb +5 -5
- data/lib/{ms → mspire}/digester.rb +2 -2
- data/lib/{ms → mspire}/error_rate/decoy.rb +1 -1
- data/lib/{ms → mspire}/error_rate/qvalue.rb +4 -4
- data/lib/{ms → mspire}/fasta.rb +5 -5
- data/lib/{ms → mspire}/ident.rb +10 -10
- data/lib/{ms → mspire}/ident/peptide.rb +3 -3
- data/lib/{ms → mspire}/ident/peptide/db.rb +12 -12
- data/lib/{ms → mspire}/ident/peptide_hit.rb +6 -6
- data/lib/{ms → mspire}/ident/peptide_hit/qvalue.rb +7 -7
- data/lib/{ms → mspire}/ident/pepxml.rb +7 -7
- data/lib/{ms → mspire}/ident/pepxml/modifications.rb +7 -7
- data/lib/{ms → mspire}/ident/pepxml/msms_pipeline_analysis.rb +6 -6
- data/lib/{ms → mspire}/ident/pepxml/msms_run_summary.rb +9 -9
- data/lib/{ms → mspire}/ident/pepxml/parameters.rb +1 -1
- data/lib/{ms → mspire}/ident/pepxml/sample_enzyme.rb +4 -4
- data/lib/{ms → mspire}/ident/pepxml/search_database.rb +5 -5
- data/lib/{ms → mspire}/ident/pepxml/search_hit.rb +4 -4
- data/lib/{ms → mspire}/ident/pepxml/search_hit/modification_info.rb +7 -7
- data/lib/{ms → mspire}/ident/pepxml/search_result.rb +5 -5
- data/lib/{ms → mspire}/ident/pepxml/search_summary.rb +12 -12
- data/lib/{ms → mspire}/ident/pepxml/spectrum_query.rb +7 -7
- data/lib/{ms → mspire}/ident/protein.rb +2 -2
- data/lib/{ms → mspire}/ident/protein_group.rb +2 -2
- data/lib/{ms → mspire}/ident/search.rb +1 -1
- data/lib/{ms → mspire}/isotope.rb +3 -3
- data/lib/{ms → mspire}/isotope/aa.rb +1 -1
- data/lib/{ms → mspire}/isotope/distribution.rb +17 -17
- data/lib/{ms → mspire}/isotope/nist_isotope_info.yml +0 -0
- data/lib/{ms → mspire}/mascot.rb +1 -1
- data/lib/{ms → mspire}/mass.rb +7 -7
- data/lib/{ms → mspire}/mass/aa.rb +6 -6
- data/lib/{ms → mspire}/molecular_formula.rb +7 -7
- data/lib/{ms → mspire}/mzml.rb +55 -55
- data/lib/{ms → mspire}/mzml/activation.rb +3 -3
- data/lib/{ms → mspire}/mzml/chromatogram.rb +3 -3
- data/lib/{ms → mspire}/mzml/chromatogram_list.rb +1 -1
- data/lib/{ms → mspire}/mzml/component.rb +5 -5
- data/lib/{ms → mspire}/mzml/contact.rb +3 -3
- data/lib/{ms → mspire}/mzml/cv.rb +1 -1
- data/lib/{ms → mspire}/mzml/data_array.rb +12 -12
- data/lib/{ms → mspire}/mzml/data_array_container_like.rb +7 -7
- data/lib/{ms → mspire}/mzml/data_processing.rb +3 -3
- data/lib/{ms → mspire}/mzml/file_content.rb +3 -3
- data/lib/{ms → mspire}/mzml/file_description.rb +4 -4
- data/lib/{ms → mspire}/mzml/index_list.rb +2 -2
- data/lib/{ms → mspire}/mzml/instrument_configuration.rb +7 -7
- data/lib/{ms → mspire}/mzml/isolation_window.rb +3 -3
- data/lib/{ms → mspire}/mzml/list.rb +1 -1
- data/lib/{ms → mspire}/mzml/plms1.rb +3 -3
- data/lib/{ms → mspire}/mzml/precursor.rb +6 -6
- data/lib/{ms → mspire}/mzml/processing_method.rb +3 -3
- data/lib/{ms → mspire}/mzml/product.rb +3 -3
- data/lib/{ms → mspire}/mzml/referenceable_param_group.rb +4 -4
- data/lib/{ms → mspire}/mzml/run.rb +3 -3
- data/lib/{ms → mspire}/mzml/sample.rb +5 -5
- data/lib/{ms → mspire}/mzml/scan.rb +4 -4
- data/lib/{ms → mspire}/mzml/scan_list.rb +3 -3
- data/lib/{ms → mspire}/mzml/scan_settings.rb +5 -5
- data/lib/{ms → mspire}/mzml/selected_ion.rb +5 -5
- data/lib/{ms → mspire}/mzml/software.rb +5 -5
- data/lib/{ms → mspire}/mzml/source_file.rb +5 -5
- data/lib/{ms → mspire}/mzml/spectrum.rb +33 -20
- data/lib/{ms → mspire}/mzml/spectrum_list.rb +4 -4
- data/lib/{ms → mspire}/obo.rb +1 -1
- data/lib/{ms → mspire}/peak.rb +3 -3
- data/lib/{ms → mspire}/peak/point.rb +1 -1
- data/lib/{ms → mspire}/plms1.rb +4 -4
- data/lib/{ms → mspire}/quant/qspec.rb +4 -4
- data/lib/{ms → mspire}/quant/qspec/protein_group_comparison.rb +4 -4
- data/lib/{ms → mspire}/spectrum.rb +6 -6
- data/lib/{ms → mspire}/spectrum/centroid.rb +1 -1
- data/lib/{ms → mspire}/spectrum_like.rb +5 -5
- data/lib/{ms → mspire}/user_param.rb +2 -2
- data/script/mzml_read_binary.rb +1 -1
- data/spec/{ms → mspire}/cv/param_spec.rb +6 -6
- data/spec/{ms → mspire}/digester_spec.rb +10 -10
- data/spec/{ms → mspire}/error_rate/qvalue_spec.rb +3 -3
- data/spec/{ms → mspire}/fasta_spec.rb +10 -10
- data/spec/{ms → mspire}/ident/peptide/db_spec.rb +9 -9
- data/spec/{ms → mspire}/ident/pepxml/sample_enzyme_spec.rb +10 -10
- data/spec/{ms → mspire}/ident/pepxml/search_hit/modification_info_spec.rb +4 -4
- data/spec/{ms → mspire}/ident/pepxml_spec.rb +22 -22
- data/spec/{ms → mspire}/ident/protein_group_spec.rb +4 -4
- data/spec/{ms → mspire}/isotope/aa_spec.rb +3 -3
- data/spec/{ms → mspire}/isotope/distribution_spec.rb +4 -4
- data/spec/{ms → mspire}/isotope_spec.rb +9 -9
- data/spec/{ms → mspire}/mass_spec.rb +8 -8
- data/spec/{ms → mspire}/molecular_formula_spec.rb +4 -4
- data/spec/{ms → mspire}/mzml/cv_spec.rb +4 -4
- data/spec/{ms → mspire}/mzml/data_array_spec.rb +7 -7
- data/spec/{ms → mspire}/mzml/file_content_spec.rb +4 -4
- data/spec/{ms → mspire}/mzml/file_description_spec.rb +4 -4
- data/spec/{ms → mspire}/mzml/index_list_spec.rb +13 -13
- data/spec/{ms → mspire}/mzml/plms1_spec.rb +8 -8
- data/spec/{ms → mspire}/mzml/referenceable_param_group_spec.rb +6 -6
- data/spec/{ms → mspire}/mzml_spec.rb +30 -30
- data/spec/{ms → mspire}/peak_spec.rb +10 -10
- data/spec/{ms → mspire}/plms1_spec.rb +7 -7
- data/spec/{ms → mspire}/quant/qspec_spec.rb +2 -2
- data/spec/{ms → mspire}/spectrum_spec.rb +8 -8
- data/spec/{ms → mspire}/user_param_spec.rb +8 -8
- data/spec/testfiles/{ms → mspire}/ident/peptide/db/uni_11_sp_tr.fasta +0 -0
- data/spec/testfiles/{ms → mspire}/ident/peptide/db/uni_11_sp_tr.msd_clvg2.min_aaseq4.yml +0 -0
- data/spec/testfiles/{ms → mspire}/mzml/j24z.idx_comp.3.mzML +0 -0
- data/spec/testfiles/{ms → mspire}/mzml/mspire_simulated.MSn.check.mzML +0 -0
- data/spec/testfiles/{ms → mspire}/mzml/openms.noidx_nocomp.12.mzML +0 -0
- data/spec/testfiles/{ms → mspire}/quant/kill_extra_tabs.rb +0 -0
- data/spec/testfiles/{ms → mspire}/quant/max_quant_output.provenance.txt +0 -0
- data/spec/testfiles/{ms → mspire}/quant/max_quant_output.txt +0 -0
- data/spec/testfiles/{ms → mspire}/quant/pdcd5_final.killedextratabs.tsv +0 -0
- data/spec/testfiles/{ms → mspire}/quant/pdcd5_final.killedextratabs.tsv_qspecgp +0 -0
- data/spec/testfiles/{ms → mspire}/quant/pdcd5_final.killedextratabs.tsv_qspecgp.csv +0 -0
- data/spec/testfiles/{ms → mspire}/quant/pdcd5_final.txt +0 -0
- data/spec/testfiles/{ms → mspire}/quant/pdcd5_final.txt_qspecgp +0 -0
- data/spec/testfiles/{ms → mspire}/quant/pdcd5_lfq_qspec.CSV.csv +0 -0
- data/spec/testfiles/{ms → mspire}/quant/pdcd5_lfq_qspec.csv +0 -0
- data/spec/testfiles/{ms → mspire}/quant/pdcd5_lfq_qspec.oneprot.csv +0 -0
- data/spec/testfiles/{ms → mspire}/quant/pdcd5_lfq_qspec.oneprot.tsv +0 -0
- data/spec/testfiles/{ms → mspire}/quant/pdcd5_lfq_qspec.oneprot.tsv_qspecgp +0 -0
- data/spec/testfiles/{ms → mspire}/quant/pdcd5_lfq_qspec.oneprot.tsv_qspecgp.csv +0 -0
- data/spec/testfiles/{ms → mspire}/quant/pdcd5_lfq_qspec.txt +0 -0
- data/spec/testfiles/{ms → mspire}/quant/pdcd5_lfq_tabdel.txt +0 -0
- data/spec/testfiles/{ms → mspire}/quant/pdcd5_lfq_tabdel.txt_qspecgp +0 -0
- data/spec/testfiles/{ms → mspire}/quant/remove_rest_of_proteins.rb +0 -0
- data/spec/testfiles/{ms → mspire}/quant/unlog_transform.rb +0 -0
- metadata +148 -194
- data/lib/ms.rb +0 -3
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.7.2
|
data/lib/mspire.rb
CHANGED
data/lib/{ms → mspire}/cv.rb
RENAMED
@@ -1,7 +1,7 @@
|
|
1
1
|
require 'cv/param'
|
2
|
-
require '
|
2
|
+
require 'mspire/cv'
|
3
3
|
|
4
|
-
module
|
4
|
+
module Mspire
|
5
5
|
module CV
|
6
6
|
|
7
7
|
# a mass spec related CVParam.
|
@@ -22,12 +22,12 @@ module MS
|
|
22
22
|
when 1
|
23
23
|
nil
|
24
24
|
when 2
|
25
|
-
|
25
|
+
Mspire::CV::Param[args.pop] if args.last.is_a?(String) && args.last =~ /^[A-Za-z]+:/
|
26
26
|
when 3
|
27
|
-
|
27
|
+
Mspire::CV::Param[args.pop]
|
28
28
|
end
|
29
29
|
obo_type = args[0][/([A-Za-z]+):/,1]
|
30
|
-
self.new(obo_type, args[0],
|
30
|
+
self.new(obo_type, args[0], Mspire::CV::Obo[obo_type][args.first], args[1], unit)
|
31
31
|
end
|
32
32
|
end
|
33
33
|
end
|
@@ -1,6 +1,6 @@
|
|
1
|
-
require '
|
1
|
+
require 'mspire/cv/param'
|
2
2
|
|
3
|
-
module
|
3
|
+
module Mspire
|
4
4
|
module CV
|
5
5
|
module Paramable
|
6
6
|
|
@@ -10,15 +10,15 @@ module MS
|
|
10
10
|
describe!(*opts[:params])
|
11
11
|
end
|
12
12
|
|
13
|
-
# casts each string or array as a Param object (using
|
13
|
+
# casts each string or array as a Param object (using Mspire::CV::Param[]),
|
14
14
|
# pushes it onto the params attribute and returns the growing params object
|
15
15
|
def describe!(*args)
|
16
16
|
@params ||= []
|
17
17
|
as_params = args.map do |arg|
|
18
18
|
if arg.is_a?(Array)
|
19
|
-
|
19
|
+
Mspire::CV::Param[ *arg ]
|
20
20
|
elsif arg.is_a?(String)
|
21
|
-
|
21
|
+
Mspire::CV::Param[ arg ]
|
22
22
|
else
|
23
23
|
arg
|
24
24
|
end
|
@@ -1,10 +1,10 @@
|
|
1
1
|
require 'strscan'
|
2
2
|
|
3
|
-
module
|
3
|
+
module Mspire
|
4
4
|
|
5
5
|
# A Digester splits a protein sequence into peptides at specified sites.
|
6
6
|
#
|
7
|
-
# trypsin =
|
7
|
+
# trypsin = Mspire::Digester[:trypsin]
|
8
8
|
#
|
9
9
|
# trypsin.digest('MIVIGRSIVHPYITNEYEPFAAEKQQILSIMAG')
|
10
10
|
# # => ['MIVIGR', 'SIVHPYITNEYEPFAAEK', 'QQILSIMAG']
|
@@ -1,7 +1,7 @@
|
|
1
1
|
require 'set'
|
2
|
-
require '
|
2
|
+
require 'mspire/error_rate/decoy'
|
3
3
|
|
4
|
-
module
|
4
|
+
module Mspire
|
5
5
|
|
6
6
|
module ErrorRate
|
7
7
|
# For generating and working with q-value calculations. The q-value is the global false discovery rate when accepting that particular ID. We do not necessarily distinguish here between *how* the FDR is generated (i.e., Storey's pFDR "the occurrence of false positives" vs. Benjamini-Hochberg's FDR "the rate of false positives" [except to prefer Storey when possible] ). The main point is that we sort and threshold based on a global FDR.
|
@@ -22,7 +22,7 @@ module MS
|
|
22
22
|
# Proc.new doesn't do arity checking
|
23
23
|
hit_with_qvalue_pairs = Proc.new do |hits|
|
24
24
|
sorted_best_to_worst = (hits.sort_by(&sorting)).reverse
|
25
|
-
(target_hits, qvalues) =
|
25
|
+
(target_hits, qvalues) = Mspire::ErrorRate::Qvalue.mixed_target_decoy(sorted_best_to_worst, target_set, opts)
|
26
26
|
target_hits.zip(qvalues)
|
27
27
|
end
|
28
28
|
|
@@ -54,7 +54,7 @@ module MS
|
|
54
54
|
best_to_worst.each do |hit|
|
55
55
|
if target_setlike.include?(hit)
|
56
56
|
num_target += 1
|
57
|
-
precision =
|
57
|
+
precision = Mspire::ErrorRate::Decoy.precision(num_target, num_decoy)
|
58
58
|
target_hits << hit
|
59
59
|
qvalues << (1.0 - precision)
|
60
60
|
else
|
data/lib/{ms → mspire}/fasta.rb
RENAMED
@@ -10,12 +10,12 @@ class Bio::FastaFormat
|
|
10
10
|
alias_method :sequence, :seq
|
11
11
|
end
|
12
12
|
|
13
|
-
module
|
13
|
+
module Mspire
|
14
14
|
# A convenience class for working with fasta formatted sequence databases.
|
15
15
|
# the file which includes this class also includes Enumerable with
|
16
16
|
# Bio::FlatFile so you can do things like this:
|
17
17
|
#
|
18
|
-
# accessions =
|
18
|
+
# accessions = Mspire::Fasta.open("file.fasta") do |fasta|
|
19
19
|
# fasta.map(&:accession)
|
20
20
|
# end
|
21
21
|
#
|
@@ -24,17 +24,17 @@ module MS
|
|
24
24
|
# entry.header == entry.definition
|
25
25
|
# entry.sequence == entry.seq
|
26
26
|
#
|
27
|
-
#
|
27
|
+
# Mspire::Fasta.new accepts both an IO object or a String (a fasta formatted
|
28
28
|
# string itself)
|
29
29
|
#
|
30
30
|
# # taking an io object:
|
31
31
|
# File.open("file.fasta") do |io|
|
32
|
-
# fasta =
|
32
|
+
# fasta = Mspire::Fasta.new(io)
|
33
33
|
# ... do something with it
|
34
34
|
# end
|
35
35
|
# # taking a string
|
36
36
|
# string = ">id1 a simple header\nAAASDDEEEDDD\n>id2 header again\nPPPPPPWWWWWWTTTTYY\n"
|
37
|
-
# fasta =
|
37
|
+
# fasta = Mspire::Fasta.new(string)
|
38
38
|
# (simple, not_simple) = fasta.partition {|entry| entry.header =~ /simple/ }
|
39
39
|
module Fasta
|
40
40
|
|
data/lib/{ms → mspire}/ident.rb
RENAMED
@@ -1,30 +1,30 @@
|
|
1
1
|
|
2
|
-
require '
|
3
|
-
require '
|
4
|
-
require '
|
2
|
+
require 'mspire/ident/protein_group'
|
3
|
+
require 'mspire/ident/protein'
|
4
|
+
require 'mspire/ident/peptide_hit'
|
5
5
|
|
6
|
-
module
|
6
|
+
module Mspire
|
7
7
|
|
8
|
-
# An
|
8
|
+
# An Mspire::Ident::ProteinGroup is an array of proteins that responds to
|
9
9
|
# :peptide_hits. All protein level identifications should be stored in a
|
10
10
|
# proteingroup object.
|
11
11
|
#
|
12
|
-
# An
|
12
|
+
# An Mspire::Ident::Protein is an object representing a protein (:id,
|
13
13
|
# :sequence, :description). Note, it is not a protein hit (use a
|
14
14
|
# ProteinGroup)
|
15
15
|
#
|
16
|
-
# An
|
16
|
+
# An Mspire::Ident::PeptideHit is an object representing a match between an
|
17
17
|
# amino acid sequence and a spectrum.
|
18
18
|
#
|
19
19
|
# Typical usage:
|
20
20
|
#
|
21
|
-
# require '
|
21
|
+
# require 'mspire/ident'
|
22
22
|
#
|
23
23
|
# hit1 = PeptideHit.new(:id => 1, :aaseq => 'PEPTIDE', :search =>
|
24
|
-
#
|
24
|
+
# Mspire::Ident::Search.new, etc...)
|
25
25
|
# peptide_hits = [hit1, hit2, ...]
|
26
26
|
#
|
27
|
-
# protein_groups =
|
27
|
+
# protein_groups = Mspire::Ident::ProteinGroup.peptide_hits_to_protein_groups(peptide_hits)
|
28
28
|
# protein_groups.first.peptide_hits # => the peptide hits in that group
|
29
29
|
module Ident
|
30
30
|
# returns the filetype (if possible)
|
@@ -1,5 +1,5 @@
|
|
1
|
-
module
|
2
|
-
module
|
1
|
+
module Mspire ; end
|
2
|
+
module Mspire::Ident ; end
|
3
3
|
|
4
4
|
# A 'sequence' is a notation of a peptide that includes the leading and
|
5
5
|
# trailing amino acid after cleavage (e.g., K.PEPTIDER.E or -.STARTK.L )
|
@@ -7,7 +7,7 @@ module MS::Ident ; end
|
|
7
7
|
#
|
8
8
|
# 'aaseq' is the amino acid sequence of just the peptide with no leading or
|
9
9
|
# trailing notation (e.g., PEPTIDER or LAKKLY)
|
10
|
-
module
|
10
|
+
module Mspire::Ident::Peptide
|
11
11
|
Nonstandard_AA_re = /[^A-Z\.\-]/
|
12
12
|
|
13
13
|
class << self
|
@@ -1,22 +1,22 @@
|
|
1
|
-
require '
|
2
|
-
require '
|
1
|
+
require 'mspire/digester'
|
2
|
+
require 'mspire/fasta'
|
3
3
|
require 'optparse'
|
4
4
|
|
5
|
-
module
|
6
|
-
module
|
7
|
-
module
|
5
|
+
module Mspire ; end
|
6
|
+
module Mspire::Ident ; end
|
7
|
+
module Mspire::Ident::Peptide ; end
|
8
8
|
|
9
9
|
# the object itself is a modified Hash.
|
10
10
|
# It is initialized with the database file and a protein array can be
|
11
11
|
# retrieved with the #[] method given an amino acid sequence. All other
|
12
12
|
# methods are untested at this time and should be avoided!
|
13
|
-
class
|
13
|
+
class Mspire::Ident::Peptide::Db < Hash
|
14
14
|
MAX_NUM_AA_EXPANSION = 3
|
15
15
|
|
16
16
|
# the twenty standard amino acids
|
17
17
|
STANDARD_AA = %w(A C D E F G H I K L M N P Q R S T V W Y)
|
18
18
|
|
19
|
-
DEFAULT_PEPTIDE_CENTRIC_DB = {:missed_cleavages => 2, :min_length => 4, :enzyme =>
|
19
|
+
DEFAULT_PEPTIDE_CENTRIC_DB = {:missed_cleavages => 2, :min_length => 4, :enzyme => Mspire::Digester[:trypsin], :id_regexp => nil, :remove_digestion_file => true, :cleave_initiator_methionine => true, :expand_aa => {'X' => STANDARD_AA}}
|
20
20
|
|
21
21
|
PROTEIN_DELIMITER = "\t"
|
22
22
|
KEY_VALUE_DELIMITER = ": "
|
@@ -25,7 +25,7 @@ class MS::Ident::Peptide::Db < Hash
|
|
25
25
|
|
26
26
|
opt = {
|
27
27
|
:remove_digestion_file => true,
|
28
|
-
:enzyme =>
|
28
|
+
:enzyme => Mspire::Digester[:trypsin]
|
29
29
|
}
|
30
30
|
opts = OptionParser.new do |op|
|
31
31
|
op.banner = "usage: #{File.basename($0)} <file>.fasta ..."
|
@@ -42,9 +42,9 @@ class MS::Ident::Peptide::Db < Hash
|
|
42
42
|
op.on("--min-length <#{opt[:min_length]}>", Integer, "the minimum peptide aaseq length") {|v| opt[:min_length] = v }
|
43
43
|
op.on("--no-cleaved-methionine", "does not cleave off initiator methionine") { opt[:cleave_initiator_methionine] = false }
|
44
44
|
op.on("--no-expand-x", "don't enumerate aa 'X' possibilities") { opt[:expand_aa] = nil }
|
45
|
-
op.on("-e", "--enzyme <name>", "enzyme for digestion") {|v| opt[:enzyme] =
|
45
|
+
op.on("-e", "--enzyme <name>", "enzyme for digestion") {|v| opt[:enzyme] = Mspire::Insilico::Digester.const_get(v.upcase) }
|
46
46
|
op.on("--list-enzymes", "lists approved enzymes and exits") do
|
47
|
-
puts
|
47
|
+
puts Mspire::Digester::ENZYMES.keys.join("\n")
|
48
48
|
exit
|
49
49
|
end
|
50
50
|
end
|
@@ -56,7 +56,7 @@ class MS::Ident::Peptide::Db < Hash
|
|
56
56
|
end
|
57
57
|
|
58
58
|
argv.map do |file|
|
59
|
-
|
59
|
+
Mspire::Ident::Peptide::Db.peptide_centric_db(file, opt)
|
60
60
|
end
|
61
61
|
end
|
62
62
|
|
@@ -79,7 +79,7 @@ class MS::Ident::Peptide::Db < Hash
|
|
79
79
|
base = fasta_file.chomp(File.extname(fasta_file))
|
80
80
|
digestion_file = base + ".msd_clvg#{missed_cleavages}.peptides"
|
81
81
|
File.open(digestion_file, "w") do |fh|
|
82
|
-
|
82
|
+
Mspire::Fasta.open(fasta_file) do |fasta|
|
83
83
|
fasta.each do |prot|
|
84
84
|
peptides = enzyme.digest(prot.sequence, missed_cleavages)
|
85
85
|
if (cleave_initiator_methionine && (prot.sequence[0,1] == "M"))
|
@@ -1,22 +1,22 @@
|
|
1
1
|
require 'merge'
|
2
2
|
|
3
|
-
module
|
4
|
-
module
|
3
|
+
module Mspire ; end
|
4
|
+
module Mspire::Ident ; end
|
5
5
|
|
6
|
-
module
|
6
|
+
module Mspire::Ident::PeptideHitLike
|
7
7
|
attr_accessor :id
|
8
8
|
attr_accessor :search
|
9
9
|
attr_accessor :missed_cleavages
|
10
10
|
attr_accessor :aaseq
|
11
11
|
attr_accessor :charge
|
12
|
-
# an array of
|
12
|
+
# an array of Mspire::Ident::ProteinLike objects
|
13
13
|
attr_accessor :proteins
|
14
14
|
# relative to the set the hit is contained in!
|
15
15
|
attr_accessor :qvalue
|
16
16
|
end
|
17
17
|
|
18
|
-
class
|
19
|
-
include
|
18
|
+
class Mspire::Ident::PeptideHit
|
19
|
+
include Mspire::Ident::PeptideHitLike
|
20
20
|
include Merge
|
21
21
|
|
22
22
|
def initialize(hash)
|
@@ -1,10 +1,10 @@
|
|
1
|
-
require '
|
2
|
-
require '
|
1
|
+
require 'mspire/ident/search'
|
2
|
+
require 'mspire/ident/peptide_hit'
|
3
3
|
|
4
|
-
module
|
5
|
-
module
|
4
|
+
module Mspire ; end
|
5
|
+
module Mspire::Ident ; end
|
6
6
|
|
7
|
-
class
|
7
|
+
class Mspire::Ident::PeptideHit
|
8
8
|
module Qvalue
|
9
9
|
FILE_EXTENSION = '.phq.tsv'
|
10
10
|
FILE_DELIMITER = "\t"
|
@@ -32,7 +32,7 @@ class MS::Ident::PeptideHit
|
|
32
32
|
|
33
33
|
# returns an array of PeptideHit objects from a phq.tsv
|
34
34
|
def from_file(filename)
|
35
|
-
searches = Hash.new {|h,id| h[id] =
|
35
|
+
searches = Hash.new {|h,id| h[id] = Mspire::Ident::Search.new(id) }
|
36
36
|
peptide_hits = []
|
37
37
|
File.open(filename) do |io|
|
38
38
|
header = io.readline.chomp.split(FILE_DELIMITER)
|
@@ -40,7 +40,7 @@ class MS::Ident::PeptideHit
|
|
40
40
|
io.each do |line|
|
41
41
|
line.chomp!
|
42
42
|
(run_id, id, aaseq, charge, qvalue) = line.split(FILE_DELIMITER)
|
43
|
-
ph =
|
43
|
+
ph = Mspire::Ident::PeptideHit.new
|
44
44
|
ph.search = searches[run_id]
|
45
45
|
ph.id = id; ph.aaseq = aaseq ; ph.charge = charge.to_i ; ph.qvalue = qvalue.to_f
|
46
46
|
peptide_hits << ph
|
@@ -1,10 +1,10 @@
|
|
1
1
|
require 'nokogiri'
|
2
|
-
require '
|
3
|
-
require '
|
2
|
+
require 'mspire/ident'
|
3
|
+
require 'mspire/ident/pepxml/msms_pipeline_analysis'
|
4
4
|
|
5
5
|
require 'ostruct'
|
6
6
|
|
7
|
-
module
|
7
|
+
module Mspire ; module Ident ; end ; end
|
8
8
|
|
9
9
|
class Numeric
|
10
10
|
# returns a string with a + or - on the front
|
@@ -17,14 +17,14 @@ class Numeric
|
|
17
17
|
end
|
18
18
|
end
|
19
19
|
|
20
|
-
class
|
20
|
+
class Mspire::Ident::Pepxml
|
21
21
|
XML_STYLESHEET_LOCATION = '/tools/bin/TPP/tpp/schema/pepXML_std.xsl'
|
22
22
|
DEFAULT_PEPXML_VERSION = MsmsPipelineAnalysis::PEPXML_VERSION
|
23
23
|
XML_ENCODING = 'UTF-8'
|
24
24
|
|
25
25
|
attr_accessor :msms_pipeline_analysis
|
26
26
|
|
27
|
-
# returns an array of
|
27
|
+
# returns an array of Mspire::Ident::Pepxml::SearchHit::Simple structs
|
28
28
|
def self.simple_search_hits(file)
|
29
29
|
hit_values = File.open(file) do |io|
|
30
30
|
doc = Nokogiri::XML.parse(io, nil, nil, Nokogiri::XML::ParseOptions::DEFAULT_XML | Nokogiri::XML::ParseOptions::NOBLANKS | Nokogiri::XML::ParseOptions::STRICT)
|
@@ -40,7 +40,7 @@ class MS::Ident::Pepxml
|
|
40
40
|
search_score_nodes.each do |node|
|
41
41
|
search_scores[node['name'].to_sym] = node['value'].to_f
|
42
42
|
end
|
43
|
-
|
43
|
+
Mspire::Ident::Pepxml::SearchHit::Simple.new("hit_#{i}", Mspire::Ident::Search.new(file.chomp(File.extname(file))), aaseq, charge, search_scores)
|
44
44
|
end
|
45
45
|
end
|
46
46
|
end
|
@@ -97,7 +97,7 @@ class MS::Ident::Pepxml
|
|
97
97
|
|
98
98
|
builder = Nokogiri::XML::Builder.new(:encoding => XML_ENCODING)
|
99
99
|
msms_pipeline_analysis.to_xml(builder)
|
100
|
-
add_stylesheet(builder.doc,
|
100
|
+
add_stylesheet(builder.doc, Mspire::Ident::Pepxml::XML_STYLESHEET_LOCATION)
|
101
101
|
string = builder.doc.to_xml
|
102
102
|
|
103
103
|
if outfile
|
@@ -1,13 +1,13 @@
|
|
1
1
|
require 'merge'
|
2
2
|
require 'nokogiri'
|
3
3
|
|
4
|
-
module
|
5
|
-
module
|
6
|
-
class
|
4
|
+
module Mspire ; end
|
5
|
+
module Mspire::Ident ; end
|
6
|
+
class Mspire::Ident::Pepxml ; end
|
7
7
|
|
8
8
|
# Modified aminoacid, static or variable
|
9
9
|
# unless otherwise stated, all attributes can be anything
|
10
|
-
class
|
10
|
+
class Mspire::Ident::Pepxml::AminoacidModification
|
11
11
|
include Merge
|
12
12
|
# The amino acid (one letter code)
|
13
13
|
attr_accessor :aminoacid
|
@@ -36,7 +36,7 @@ class MS::Ident::Pepxml::AminoacidModification
|
|
36
36
|
xmlb = builder || Nokogiri::XML::Builder.new
|
37
37
|
# note massdiff: must begin with either + (nonnegative) or - [e.g.
|
38
38
|
# +1.05446 or -2.3342] consider Numeric#to_plus_minus_string in
|
39
|
-
#
|
39
|
+
# Mspire::Ident::Pepxml
|
40
40
|
attrs = [:aminoacid, :massdiff, :mass, :variable, :peptide_terminus, :symbol, :binary].map {|at| v=send(at) ; [at,v] if v }.compact
|
41
41
|
hash = Hash[attrs]
|
42
42
|
hash[:massdiff] = hash[:massdiff].to_plus_minus_string
|
@@ -46,7 +46,7 @@ class MS::Ident::Pepxml::AminoacidModification
|
|
46
46
|
end
|
47
47
|
|
48
48
|
# Modified aminoacid, static or variable
|
49
|
-
class
|
49
|
+
class Mspire::Ident::Pepxml::TerminalModification
|
50
50
|
include Merge
|
51
51
|
# n for N-terminus, c for C-terminus
|
52
52
|
attr_accessor :terminus
|
@@ -57,7 +57,7 @@ class MS::Ident::Pepxml::TerminalModification
|
|
57
57
|
# Y if both modified and unmodified terminus could be present in the
|
58
58
|
# dataset, N if only modified terminus can be present
|
59
59
|
attr_accessor :variable
|
60
|
-
#
|
60
|
+
# symbol used by search engine to designate this modification
|
61
61
|
attr_accessor :symbol
|
62
62
|
# whether modification can reside only at protein terminus (specified n or
|
63
63
|
# c)
|