RubyGems - protk - Versions diffs - 1.2.4 → 1.2.5 - Mend

protk 1.2.4 → 1.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

data/bin/add_retention_times.rb +89 -0
data/bin/augustus_to_proteindb.rb +193 -0
data/bin/blastxml_to_table.rb +72 -0
data/bin/feature_finder.rb +7 -1
data/bin/make_decoy.rb +10 -2
data/bin/mascot_search.rb +14 -4
data/bin/msgfplus_search.rb +14 -5
data/bin/peptide_prophet.rb +14 -7
data/bin/protxml_to_gff.rb +624 -0
data/bin/protxml_to_table.rb +19 -2
data/bin/sixframe.rb +3 -1
data/bin/tandem_search.rb +51 -23
data/bin/toppas_pipeline.rb +8 -3
data/bin/uniprot_annotation.rb +6 -1
data/ext/protk/{protk.c → decoymaker/decoymaker.c} +13 -15
data/ext/protk/decoymaker/extconf.rb +3 -0
data/ext/protk/simplealign/extconf.rb +3 -0
data/lib/protk/data/FeatureFinderIsotopeWavelet.ini +6 -6
data/lib/protk/gapped_aligner.rb +264 -0
data/lib/protk/manage_db_rakefile.rake +2 -1
data/lib/protk/mascot_util.rb +7 -2
data/lib/protk/randomize.rb +2 -2
data/lib/protk/search_tool.rb +1 -1
data/lib/protk/setup_rakefile.rake +25 -2
data/lib/protk/spreadsheet_extensions.rb +1 -0
data/lib/protk/swissprot_database.rb +11 -1
metadata +30 -8
data/bin/mascot2xml.rb +0 -87
data/ext/protk/extconf.rb +0 -3
data/lib/protk/data/pepxml_mascot_template.xml +0 -29
data/lib/protk/data/predefined_db.trembl_annotation.yaml +0 -20

data/bin/protxml_to_table.rb CHANGED Viewed

@@ -19,6 +19,11 @@ include LibXML
 tool=Tool.new([:explicit_output])
 tool.option_parser.banner = "Convert a protXML file to a tab delimited table.\n\nUsage: protxml_to_table.rb [options] file1.protXML"
+# tool.options.proteinid_regex=".*?\|.*?\|(.*)"
+# tool.option_parser.on( '--regex rexpr', 'Regex' ) do |regex|
+#   tool.options.proteinid_regex=regex
+# end
 exit unless tool.check_options
 if ( ARGV[0].nil? )
@@ -48,7 +53,7 @@ end
 column_headers=[
-	"group_number","group_probability","protein_name",
+	"group_number","group_probability","protein_name","protein_id","indistinguishable_proteins",
 	"protein_probability","coverage","peptides",
 	"num_peptides","confidence"
 ]
@@ -62,13 +67,25 @@ protein_groups.each do |protein_group|
 	proteins=protein_group.find("./#{protxml_ns_prefix}protein", protxml_ns)
-	proteins.each do |protein|
+	proteins.each do |protein|
+		indis_proteins=protein.find("./#{protxml_ns_prefix}indistinguishable_protein", protxml_ns)
+		indis_proteins_summary=""
+		indis_proteins.each { |iprot| indis_proteins_summary<<"#{iprot.attributes['protein_name']};" }
+		protein_id=""
+		if protein.attributes['protein_name'] =~ /.*?\|.*?\|(.*)/
+			protein_id=protein.attributes['protein_name'].match(/.*?\|.*?\|(.*)/)[1]
+		end
 		column_values=[]
 		column_values << protein_group.attributes['group_number']
 		column_values << protein_group.attributes['probability']
 		column_values << protein.attributes['protein_name']
+		column_values << protein_id
+		column_values << indis_proteins_summary
 		column_values << protein.attributes['probability']
 		column_values << protein.attributes['percent_coverage']
 		column_values << protein.attributes['unique_stripped_peptides']

data/bin/sixframe.rb CHANGED Viewed

@@ -41,9 +41,11 @@ end
 inname=ARGV.shift
-outfile=File.open("#{inname}.translated.fasta",'w')
+outfile=nil
 if ( tool.explicit_output != nil)
   outfile=File.open(tool.explicit_output,'w')
+else
+  outfile=File.open("#{inname}.translated.fasta",'w')
 end

data/bin/tandem_search.rb CHANGED Viewed

@@ -149,6 +149,37 @@ def set_option(std_params, tandem_key, value)
   notes[0].content=value
 end
+def append_option(std_params, tandem_key, value)
+  notes = std_params.find("/bioml/note[@type=\"input\" and @label=\"#{tandem_key}\"]")
+  if notes.length == 0
+    node = XML::Node.new('note')
+    node["type"] = "input"
+    node["label"] = tandem_key
+    node.content = value
+    std_params.find('/bioml')[0] << node
+  else
+    throw "Exactly one parameter named (#{tandem_key}) is required in parameter file" unless notes.length==1
+    notes[0].content = append_string(notes[0].content, value)
+  end
+end
+def collapse_keys(std_params, tandem_key)
+    mods=std_params.find('/bioml/note[@type="input" and @label="#{tandem_key}"]')
+    if not mods
+      first_mod = mods[0]
+      rest_mods = mods[1..-1]
+      rest_mods.each{ |node| first_mod.content = append_string(first_mod.content, node.content); node.remove!}
+    end
+end
+def append_string(first, second)
+  if first.empty?
+    second
+  else
+    "#{first},#{second}"
+  end
+end
 def generate_parameter_doc(std_params,output_path,input_path,taxo_path,current_db,search_tool,genv)
   set_option(std_params, "protein, cleavage semi", search_tool.cleavage_semi ? "yes" : "no")
   set_option(std_params, "scoring, maximum missed cleavage sites", search_tool.missed_cleavages)
@@ -301,7 +332,11 @@ def generate_parameter_doc(std_params,output_path,input_path,taxo_path,current_d
     mods=std_params.find('/bioml/note[@type="input" and @id="methionine-oxidation-variable"]')
     mods.each{ |node| node.remove!}
   end
+  # Merge all remaining id based modification into single modification.
+  collapse_keys(std_params, "residue, potential modification mass")
+  collapse_keys(std_params, "residue, modification mass")
   var_mods = search_tool.var_mods.split(",").collect { |mod| mod.lstrip.rstrip }.reject {|e| e.empty? }
   var_mods=var_mods.collect {|mod| decode_modification_string(mod) }
   fix_mods = search_tool.fix_mods.split(",").collect { |mod| mod.lstrip.rstrip }.reject { |e| e.empty? }
@@ -313,31 +348,17 @@ def generate_parameter_doc(std_params,output_path,input_path,taxo_path,current_d
   var_mods.each do |vm|
     mod_type="potential modification mass"
-    mod_type = "potential modification motif" if ( vm=~/[\[\]\(\)\{\}\!]/ )
-    mod_id_label = "custom-variable-mod-#{mod_id.to_s}"
-    mod_id=mod_id+1
-    mnode=XML::Node.new('node')
-    mnode["id"]=mod_id_label
-    mnode["type"]="input"
-    mnode["label"]="residue, #{mod_type}"
-    mnode.content=vm
-    root_bioml_node << mnode
+    mod_type = "potential modification motif" if motif?(vm)
+    label="residue, #{mod_type}"
+    append_option(std_params, label, vm)
   end
   mod_id=1
   fix_mods.each do |fm|
     mod_type="modification mass"
-    mod_type = "modification motif" if ( fm=~/[\[\]\(\)\{\}\!]/ )
-    mod_id_label = "custom-fixed-mod-#{mod_id.to_s}"
-    mod_id=mod_id+1
-    mnode=XML::Node.new('node')
-    mnode["id"]=mod_id_label
-    mnode["type"]="input"
-    mnode["label"]="residue, #{mod_type}"
-    mnode.content=fm
-    root_bioml_node << mnode
+    mod_type = "modification motif" if motif?(fm)
+    label="residue, #{mod_type}"
+    append_option(std_params, label, fm)
   end
   #p root_bioml_node
@@ -345,6 +366,13 @@ def generate_parameter_doc(std_params,output_path,input_path,taxo_path,current_d
 end
+def motif?(mod_string)
+  # 124@[ is not a modification motif, it is a residue (N-term) modification,
+  # so when checking if modification is a motif look for paired square brackets.
+  mod_string =~ /[\(\)\{\}\!]/ or mod_string =~ /\[.*\]/
+end
 def generate_taxonomy_doc(taxo_doc,current_db,search_tool)
   taxon_label=taxo_doc.find('/bioml/taxon')
@@ -425,8 +453,8 @@ ARGV.each do |filename|
     # Run the search
     #
     job_params= {:jobid => search_tool.jobid_from_filename(filename)}
-    job_params[:queue]="lowmem"
-    job_params[:vmem]="900mb"
+    job_params[:queue]="sixteen"
+    job_params[:vmem]="12gb"
     code = search_tool.run(cmd,genv,job_params,jobscript_path)
     throw "Command failed with exit code #{code}" unless code==0
   else

data/bin/toppas_pipeline.rb CHANGED Viewed

@@ -15,7 +15,7 @@ require 'libxml'
 include LibXML
-tool=Tool.new([:explicit_output, :background,:over_write])
+tool=Tool.new([:background,:over_write])
 tool.option_parser.banner = "Execute a toppas pipeline with a single inputs node\n\nUsage: toppas_pipeline.rb [options] input1 input2 ..."
 tool.options.outdir = ""
@@ -28,6 +28,11 @@ tool.option_parser.on( '--toppas-file f',"the toppas file to run" ) do |file|
   tool.options.toppas_file = file
 end
+tool.options.threads = "1"
+tool.option_parser.on( '--threads t',"Number of threads to use" ) do |tr|
+  tool.options.threads=tr
+end
 exit unless tool.check_options
 if ( ARGV[0].nil? )
@@ -67,13 +72,13 @@ throw "outdir is a required parameter" if tool.outdir==""
 throw "toppas-file is a required parameter" if tool.toppas_file==""
 throw "outdir must exist" unless Dir.exist?(tool.outdir)
-trf_path = "#{tool.toppas_file}.trf"
+trf_path = "#{Pathname.new(Tempfile.new(tool.toppas_file).path).basename.to_s}.trf"
 generate_trf(ARGV,trf_path)
 cmd=""
 cmd<<"export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:#{genv.openms_root}/lib;
-#{genv.executepipeline} -in #{Pathname.new(tool.toppas_file).realpath.to_s} -out_dir #{Pathname.new(tool.outdir).realpath.to_s} -resource_file #{Pathname.new(trf_path).realpath.to_s}"
+#{genv.executepipeline} -in #{Pathname.new(tool.toppas_file).realpath.to_s} -out_dir #{Pathname.new(tool.outdir).realpath.to_s} -resource_file #{Pathname.new(trf_path).realpath.to_s} -threads #{tool.threads}"
 run_pipeline(genv,tool,cmd,tool.outdir,tool.jobid_from_filename(tool.toppas_file))

data/bin/uniprot_annotation.rb CHANGED Viewed

@@ -24,6 +24,11 @@ tool.option_parser.on(  '--id-column num', 'Specify a column for ids (default is
   tool.options.id_column=col.to_i
 end
+tool.options.flatfiledb="swissprot"
+tool.option_parser.on(  '--flatfiledb dbname', 'Specify path to a Uniprot flatfile' ) do |dbname|
+  tool.options.flatfiledb=dbname
+end
 tool.options.fields=nil
 tool.option_parser.on(  '--fields flds', 'A comma separated list of fields to extract' ) do |flds|
   tool.options.fields=flds
@@ -42,7 +47,7 @@ genv=Constants.new
 input_file=ARGV[0]
-swissprotdb=SwissprotDatabase.new(genv)
+swissprotdb=SwissprotDatabase.new(genv,tool.flatfiledb)
 output_file=nil

data/ext/protk/{protk.c → decoymaker/decoymaker.c} RENAMED Viewed

@@ -1,6 +1,4 @@
 #include <ruby.h>
 /*                                                                                                 */
 /* make_random.c - make random protein sequence database using Markov chain with transitional      */
 /* probabilities from amino acid frequencies in a real database in FASTA format                    */
@@ -25,7 +23,8 @@
 #define MAX_SEQUENCE_LENGTH 20000
 #define MAX_LINE_LENGTH 20000 /* large enough to read in long header lines */
-static VALUE protk_make_decoys(VALUE self,VALUE input_file_in,VALUE db_length_in,VALUE output_file_in,char *prefix_string_in) {
+static VALUE decoymaker_make_decoys(VALUE self,VALUE input_file_in,VALUE db_length_in,VALUE output_file_in,char *prefix_string_in) {
   char *input_file = RSTRING_PTR(input_file_in);
   long sequences_to_generate = NUM2INT(db_length_in);
   char * output_file = RSTRING_PTR(output_file_in);
@@ -148,7 +147,7 @@ static VALUE protk_make_decoys(VALUE self,VALUE input_file_in,VALUE db_length_in
                 measured_aa_freq[a]++;
       }
   }
-	  else {a=floor(20*(float)rand()/RAND_MAX);MP[a][i]++; measured_aa_freq[a]++;} // replace B, X, Z etc. with random amino acid to preserve size distribution
+    else {a=floor(20*(float)rand()/RAND_MAX);MP[a][i]++; measured_aa_freq[a]++;} // replace B, X, Z etc. with random amino acid to preserve size distribution
   }
   MP[20][pl]++;
       measured_aa_freq[20]++; // MP[20][n] is the number of sequences of length n in the database
@@ -178,12 +177,12 @@ static VALUE protk_make_decoys(VALUE self,VALUE input_file_in,VALUE db_length_in
        x=(double)row_sum[j]*((double)rand()/RAND_MAX);
        partial_sum=MP[0][j]; i=1;
        while (partial_sum<x) {partial_sum+=MP[i][j]; i++;}
-	  if (j>=MAX_SEQUENCE_LENGTH) i=21; /* terminate when sequence has reached MAX_SEQUENCE_LENGTH */
+    if (j>=MAX_SEQUENCE_LENGTH) i=21; /* terminate when sequence has reached MAX_SEQUENCE_LENGTH */
        if (i<21)
        {
          random_sequence[j]=AMINO_ACIDS[i-1];j++;generated_aa_freq[i-1]++;
        }
-	  else /* i==21, i.e. protein sequence terminated */
+    else /* i==21, i.e. protein sequence terminated */
        {
          k=0; generated_aa_freq[20]++; generated_pl_sum+=j;
          for(l=0;l<j;l++)
@@ -196,7 +195,7 @@ static VALUE protk_make_decoys(VALUE self,VALUE input_file_in,VALUE db_length_in
         }
         random_sequence_output[k]='\0';
-	      if (!(k%61)) random_sequence_output[k-1]='\0'; /* remove extra newline for sequence length multiple of 60 */
+        if (!(k%61)) random_sequence_output[k-1]='\0'; /* remove extra newline for sequence length multiple of 60 */
         fprintf(outp,">%srp%li\n%s\n",prefix_string,protein,random_sequence_output);
         break;
       }
@@ -222,14 +221,13 @@ static VALUE protk_make_decoys(VALUE self,VALUE input_file_in,VALUE db_length_in
 }
-  /* ruby calls this to load the extension */
-void Init_protk(void) {
-  /* assume we haven't yet defined Hola */
-  VALUE klass = rb_define_class("Protk",
-    rb_cObject);
-  /* the hola_bonjour function can be called
-   * from ruby as "Hola.bonjour" */
+void Init_decoymaker(void)
+{
+  VALUE klass = rb_define_class("Decoymaker",rb_cObject);
   rb_define_singleton_method(klass,
-    "make_decoys", protk_make_decoys, 4);
+    "make_decoys", decoymaker_make_decoys, 4);
 }

data/ext/protk/decoymaker/extconf.rb ADDED Viewed

@@ -0,0 +1,3 @@
+require 'mkmf'
+create_makefile('protk/decoymaker')

data/ext/protk/simplealign/extconf.rb ADDED Viewed

@@ -0,0 +1,3 @@
+require 'mkmf'
+create_makefile('protk/simplealign')

data/lib/protk/data/FeatureFinderIsotopeWavelet.ini CHANGED Viewed

@@ -1,10 +1,10 @@
 <?xml version="1.0" encoding="ISO-8859-1"?>
-<PARAMETERS version="1.3" xsi:noNamespaceSchemaLocation="http://open-ms.sourceforge.net/schemas/Param_1_3.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+<PARAMETERS version="1.4" xsi:noNamespaceSchemaLocation="http://open-ms.sourceforge.net/schemas/Param_1_4.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
   <NODE name="FeatureFinderIsotopeWavelet" description="Detects two-dimensional features in LC-MS data.">
-    <ITEM name="version" value="1.9.0" type="string" description="Version of the tool that generated this parameters file." tags="advanced" />
+    <ITEM name="version" value="1.10.0" type="string" description="Version of the tool that generated this parameters file." tags="advanced" />
     <NODE name="1" description="Instance &apos;1&apos; section for &apos;FeatureFinderIsotopeWavelet&apos;">
-      <ITEM name="in" value="" type="string" description="input file" tags="input file,required" restrictions="*.mzML" />
-      <ITEM name="out" value="" type="string" description="output file" tags="output file,required" restrictions="*.featureXML" />
+      <ITEM name="in" value="" type="string" description="input file" tags="input file,required" supported_formats="*.mzML" />
+      <ITEM name="out" value="" type="string" description="output file" tags="output file,required" supported_formats="*.featureXML" />
       <ITEM name="log" value="" type="string" description="Name of log file (created only when specified)" tags="advanced" />
       <ITEM name="debug" value="0" type="int" description="Sets the debug level" tags="advanced" />
       <ITEM name="threads" value="1" type="int" description="Sets the number of threads allowed to be used by the TOPP tool" />
@@ -12,9 +12,9 @@
       <ITEM name="test" value="false" type="string" description="Enables the test mode (needed for internal use only)" tags="advanced" restrictions="true,false" />
       <NODE name="algorithm" description="Algorithm section">
         <ITEM name="max_charge" value="3" type="int" description="The maximal charge state to be considered." restrictions="1:" />
-        <ITEM name="intensity_threshold" value="3" type="float" description="The final threshold t&apos; is build upon the formula: t&apos; = av+t*sd, where t is the intensity_threshold, av the average intensity within the wavelet transformed signal and sd the standard deviation of the transform. If you set intensity_threshold=-1, t&apos; will be zero.#br#As the &apos;optimal&apos; value for this parameter is highly data dependent, we would recommend to start with -1, which will also extract features with very low signal-to-noise ratio. Subsequently, one might increase the threshold to find an optimized trade-off between false positives and true positives. Depending on the dynamic range of your spectra, suitable value ranges include: -1, [0:10], and if your data features even very high intensity values, t can also adopt values up to around 30. Please note that this parameter is not of an integer type, s.t. you can also use t:=0.1, e.g." />
+        <ITEM name="intensity_threshold" value="-1" type="float" description="The final threshold t&apos; is build upon the formula: t&apos; = av+t*sd, where t is the intensity_threshold, av the average intensity within the wavelet transformed signal and sd the standard deviation of the transform. If you set intensity_threshold=-1, t&apos; will be zero.#br#As the &apos;optimal&apos; value for this parameter is highly data dependent, we would recommend to start with -1, which will also extract features with very low signal-to-noise ratio. Subsequently, one might increase the threshold to find an optimized trade-off between false positives and true positives. Depending on the dynamic range of your spectra, suitable value ranges include: -1, [0:10], and if your data features even very high intensity values, t can also adopt values up to around 30. Please note that this parameter is not of an integer type, s.t. you can also use t:=0.1, e.g." />
         <ITEM name="intensity_type" value="ref" type="string" description="Determines the intensity type returned for the identified features. &apos;ref&apos; (default) returns the sum of the intensities of each isotopic peak within an isotope pattern. &apos;trans&apos; refers to the intensity of the monoisotopic peak within the wavelet transform. &apos;corrected&apos; refers also to the transformed intensity with an attempt to remove the effects of the convolution. While the latter ones might be preferable for qualitative analyses, &apos;ref&apos; might be the best option to obtain quantitative results. Please note that intensity values might be spoiled (in particular for the option &apos;ref&apos;), as soon as patterns overlap (see also the explanations given in the class documentation of FeatureFinderAlgorihtmIsotopeWavelet)." tags="advanced" restrictions="ref,trans,corrected" />
-        <ITEM name="check_ppm" value="true" type="string" description="Enables/disables a ppm test vs. the averagine model, i.e. potential peptide masses are checked for plausibility. In addition, a heuristic correcting potential mass shifts induced by the wavelet is applied." tags="advanced" restrictions="true,false" />
+        <ITEM name="check_ppm" value="false" type="string" description="Enables/disables a ppm test vs. the averagine model, i.e. potential peptide masses are checked for plausibility. In addition, a heuristic correcting potential mass shifts induced by the wavelet is applied." tags="advanced" restrictions="true,false" />
         <ITEM name="hr_data" value="false" type="string" description="Must be true in case of high-resolution data, i.e. for spectra featuring large m/z-gaps (present in FTICR and Orbitrap data, e.g.). Please check a single MS scan out of your recording, if you are unsure." restrictions="true,false" />
         <NODE name="sweep_line" description="">
           <ITEM name="rt_votes_cutoff" value="5" type="int" description="Defines the minimum number of subsequent scans where a pattern must occur to be considered as a feature." tags="advanced" restrictions="0:" />

data/lib/protk/gapped_aligner.rb ADDED Viewed

@@ -0,0 +1,264 @@
+require 'bio'
+require 'matrix'
+class PeptideFragment
+	attr_accessor :start
+	attr_accessor :end
+	attr_accessor :seq
+end
+class PeptideToGeneAlignment
+	attr_accessor :gene_seq
+	attr_accessor :pep_seq
+	attr_accessor :trace
+	def initialize(gene,peptide,trace)
+		@gene_seq = gene
+		@pep_seq = peptide
+		@trace = trace
+	end
+	def inspect
+		descr = "#{@gene_seq}\n"
+		pep_triples=""
+		@pep_seq.each_char { |c| pep_triples<<c;pep_triples<<c;pep_triples<<c }
+		# gene_seq_triples=""
+		# Bio::Sequence::NA.new(@gene_seq).translate.each_char do |c|
+		# 	gene_seq_triples<<c;gene_seq_triples<<c;gene_seq_triples<<c
+		# end
+		# descr << "#{gene_seq_triples}\n"
+		pepi=0
+		@trace.each_with_index do |move, i|
+			if move==1
+				descr<<"-"
+			elsif move==0
+				descr<<"#{pep_triples[pepi]}"
+				pepi+=1
+			end
+		end
+		descr<<"\n"
+		puts descr
+	end
+	def fragments
+		frags=[]
+		in_fragment=false
+		@trace.each_with_index do |move,i|
+			if move==0
+				frags << [i,0] unless in_fragment #Start a fragment
+				in_fragment=true
+			else
+				frags.last[1]=i-1 if in_fragment #End a fragment
+				in_fragment=false
+			end
+		end
+		if frags.last[1]==0
+			frags.last[1]=@trace.length-1
+		end
+		frags
+	end
+	def gaps
+		gps=[]
+		in_start_end=true
+		in_gap=false
+		@trace.each_with_index do |move, i|
+			if move==0
+				in_start_end=false
+				if in_gap #Ending a gap
+					gps.last[1]=i
+				end
+				in_gap=false
+			else
+				if !in_start_end && !in_gap #Starting a gap
+					in_gap=true
+					gps<<[i,0]
+				end
+			end
+		end
+		#Remove gaps that have zero length (Trailing)
+		gps=gps.collect do |gp|
+			rv=gp
+			if gp[1]==0
+				rv=nil
+			end
+			rv
+		end
+		gps.compact!
+		gps
+	end
+end
+# Uses a dynamic programming algorithm (Smith-Waterman like) to align a peptide sequence to a nucleotide.
+# This aligner assumes you are doing protogenomics and just want to assume that
+#    (a) The entire peptide sequence matches (with gaps) to the DNA sequence
+#
+class GappedAligner
+	def initialize
+		@big_penalty = -1000000000
+		@gap_open_penalty = -10000
+		@gap_extend_penalty = -1
+		@end_gap_penalty = 0
+		@match_bonus = 400
+		@match_move=0
+		@aadel_move=-1
+		@nadel_move=1
+		@triplet_offsets = [[0,-2,-1],[-1,0,-2],[-2,-1,0]]
+	end
+	def aa_deletion()
+		return @big_penalty
+	end
+	def score_na_deletion(move_type)
+		if move_type==@nadel_move
+			return @gap_extend_penalty
+		end
+		return @gap_open_penalty
+	end
+	def score_match(aa,na)
+		if aa==na
+			return @match_bonus
+		end
+		return @big_penalty
+	end
+	def traceback(from_row,from_col,dpmoves)
+		last_move = dpmoves[from_row][from_col]
+		last_row = from_row-1
+		last_col = from_col-1
+		if last_move==@aadel_move
+			last_col+=1
+		elsif last_move==@nadel_move
+			last_row+=1
+		end
+		if last_col==0 && last_row==0
+			return [last_move]
+		else
+			throw "Beyond end of array" if last_col<0 || last_row <0
+			return traceback(last_row,last_col,dpmoves).push(last_move)
+		end
+	end
+	def next_frame(previous_frame)
+		(previous_frame+1) % 3
+	end
+	def translate_na_at(j,frame,gene_seq)
+		rm = j % 3
+		start_pos=j+@triplet_offsets[rm][frame]
+		if start_pos < 0
+			return '-'
+		else
+			return gene_seq[start_pos,3].translate
+		end
+	end
+	def save_matrix(dpmatrix,pep_triples,gene_seq,name)
+		matfile=File.open("#{name}.csv", "w+")
+		matfile.write(",,")
+		gene_seq.each_char { |na| matfile.write("#{na},")  }
+		matfile.write("\n")
+		dpmatrix.each_with_index { |row,ri|
+			if ri>0
+				matfile.write("#{pep_triples[ri-1]},")
+			else
+				matfile.write(",")
+			end
+			row.each { |col|
+				matfile.write("#{col},")
+			}
+			matfile.write("\n")
+		}
+		matfile.close()
+	end
+	def calculate_dp(pep_seq,gene_seq)
+		gene_seq = Bio::Sequence::NA.new(gene_seq)
+		nrow = pep_seq.length*3+1
+		ncol = gene_seq.length+1
+		throw "Peptide sequence is longer than gene" if nrow > ncol
+		pep_triples=""
+		pep_seq.each_char { |c| pep_triples<<c;pep_triples<<c;pep_triples<<c }
+		dpmoves=Matrix.build(nrow,ncol) {|r,c| 0 }.to_a
+		dpmatrix=Matrix.build(nrow,ncol) { |r,c| 0 }.to_a
+		dpframes=Matrix.build(nrow,ncol) { |r,c| 0 }.to_a
+		# before_gap_positions = Matrix.build(nrow,ncol) { |r,c| 0 }.to_a
+		# Boundary conditions
+		(0..(nrow-1)).each { |i|
+			dpmatrix[i][0] = aa_deletion*i
+			dpmoves[i][0] = @aadel_move
+		}
+		(0..(ncol-1)).each { |j|
+			dpmatrix[0][j] = @end_gap_penalty*j
+			dpmoves[0][j] = @nadel_move
+			dpframes[0][j] = j % 3
+		}
+		dpmoves[0][0]=0
+		dpframes[0][0]=0
+		(1..(nrow-1)).each do |i|
+			(1..(ncol-1)).each do |j|
+				aa = pep_triples[i-1]
+				translated_na = translate_na_at(j-1,dpframes[i-1][j-1],gene_seq)
+				match = score_match(aa,translated_na) + dpmatrix[i-1][j-1]
+				nadel = score_na_deletion(dpmoves[i][j-1]) + dpmatrix[i][j-1]
+				# if (translated_na=="R") && (pep_seq=="FR") && (aa == "R")
+					# require 'debugger';debugger
+				# end
+				if match >= nadel
+					dpmatrix[i][j] = match
+					dpmoves[i][j] = @match_move
+					dpframes[i][j] = dpframes[i-1][j-1]
+				else
+					dpmatrix[i][j] = nadel
+					dpmoves[i][j] = @nadel_move
+					dpframes[i][j] = next_frame(dpframes[i][j-1])
+				end
+			end
+		end
+		# Find best end-point
+		end_score = dpmatrix[nrow-1].max
+		end_j = dpmatrix[nrow-1].index(end_score)
+		save_matrix(dpmatrix,pep_triples,gene_seq,"dpmatrix")
+		save_matrix(dpmoves,pep_triples,gene_seq,"moves")
+		save_matrix(dpframes,pep_triples,gene_seq,"frames")
+#		require 'debugger';debugger
+		traceback(nrow-1,end_j,dpmoves)
+	end
+	def align pep_seq, gene_seq
+		trace = calculate_dp(pep_seq,gene_seq)
+		alignment = PeptideToGeneAlignment.new(gene_seq,pep_seq,trace)
+		# puts alignment
+		# require 'debugger';debugger
+		return alignment
+	end
+end