RubyGems - full_lengther_next - Versions diffs - 0.9.9 → 1.0.2 - Mend

full_lengther_next 0.9.9 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

checksums.yaml +5 -5
data/bin/download_fln_dbs.rb +31 -26
data/bin/full_lengther_next +11 -11
data/bin/make_test_dataset.rb +9 -9
data/bin/make_user_db.rb +2 -2
data/full_lengther_next.gemspec +2 -1
data/lib/full_lengther_next/artifacts.rb +7 -7
data/lib/full_lengther_next/bio_patch.rb +93 -0
data/lib/full_lengther_next/blast_functions.rb +17 -17
data/lib/full_lengther_next/cdhit.rb +5 -5
data/lib/full_lengther_next/chimeric_seqs.rb +5 -5
data/lib/full_lengther_next/common_functions.rb +1 -1
data/lib/full_lengther_next/exonerate_result.rb +5 -5
data/lib/full_lengther_next/fl_analysis.rb +6 -6
data/lib/full_lengther_next/fln_stats.rb +2 -2
data/lib/full_lengther_next/handle_db.rb +1 -1
data/lib/full_lengther_next/my_worker.rb +6 -6
data/lib/full_lengther_next/my_worker_EST.rb +1 -1
data/lib/full_lengther_next/my_worker_manager_fln.rb +1 -1
data/lib/full_lengther_next/sequence.rb +9 -9
data/lib/full_lengther_next/une_los_hit.rb +5 -5
data/lib/full_lengther_next/version.rb +1 -1
metadata +8 -8

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
-SHA1:
-  metadata.gz: 4a6a23e7a0bc8c6ba6ad5e3f1579accb80a16bf1
-  data.tar.gz: 3980b046483ecbfe54147cffb20cf8f6a61bea69
+SHA256:
+  metadata.gz: e6d28d54912b46305ba0047c8458469afffe6660189cb1950c1d30290982e2c4
+  data.tar.gz: fca1a71701c8b1c763102623b6fc60d7699b697ebd636ce53f14886d07fb35f4
 SHA512:
-  metadata.gz: c5ecb5ebcf8077a0000a5ce404dcc6a1468654fdeac3f2a48aa3dd8f2ef3eed6db51f6123abf3452ddc33ef56c3b896ad15125fc76621105d1db6e5a486954c6
-  data.tar.gz: 795f3cea0505142218ad29411e1fdbc8e0ce2b4e72c42a18b54763f77f9e0d4c7e711836fe1f0561557224cb089bd511078e7a6a81fe87fb4e5d68fce3864910
+  metadata.gz: c1398e4d8448f10550769e4f6cb7baa2492337f377fab2f33b3de7067213f1d45ceae1a717de69c59c113c80d585d85bd7d4f1b0d6ed236fb0a5c8d7ac244f3e
+  data.tar.gz: 85c5238d3569e17509135e97383e748183fadc00c2c9eca9f984492b12ba4b4bd22e908eaae755162e4070c5eda8df1fcdffdbdb794c216537a0304b7bbe45bd

data/bin/download_fln_dbs.rb CHANGED Viewed

@@ -10,6 +10,7 @@ require 'scbi_zcat'
 require 'optparse'
 require 'cdhit'
 require 'handle_db'
+require 'bio_patch'
 ##############################################################################################
 ## METHODS
@@ -69,29 +70,30 @@ def filtering_seqs(fasta_file, max_length, black_list)
 end
 def compare_list(string, list)
-    res = FALSE
+    res = false
     list.each do |word|
         if string.include?(word)
-            res = TRUE
+            res = true
             break
         end
     end
     return res
 end
-def conecta_uniprot(my_array, formatted_db_path)
+def conecta_uniprot(my_array, formatted_db_path, no_trembl, passive_ftp)
 	Dir.mkdir(formatted_db_path) if !File.exists?(formatted_db_path)
 	varsplic_out=File.join(formatted_db_path,'uniprot_sprot_varsplic.fasta.gz')
 	$ftp = Net::FTP.new()
+	$ftp.passive = true if passive_ftp
 	$ftp.connect('ftp.ebi.ac.uk')
 	$ftp.login
 	puts "connected to UniProt"
 	my_array.each do |db_group|
 		puts "Downloading #{db_group}"
-		download_uniprot(db_group, formatted_db_path)
+		download_uniprot(db_group, formatted_db_path, no_trembl)
 	end
 	#archivo de variantes de splicing. POR QUE?
@@ -102,13 +104,13 @@ def conecta_uniprot(my_array, formatted_db_path)
 	puts "isoform files downloaded"
 end
-def download_uniprot(uniprot_group, formatted_db_path)
+def download_uniprot(uniprot_group, formatted_db_path, no_trembl)
 	sp_out=File.join(formatted_db_path,"uniprot_sprot_#{uniprot_group}.dat.gz")
 	tr_out=File.join(formatted_db_path,"uniprot_trembl_#{uniprot_group}.dat.gz")
 	$ftp.chdir("/pub/databases/uniprot/current_release/knowledgebase/taxonomic_divisions")
 	$ftp.getbinaryfile("uniprot_sprot_#{uniprot_group}.dat.gz", sp_out)
-	$ftp.getbinaryfile("uniprot_trembl_#{uniprot_group}.dat.gz", tr_out)
+	$ftp.getbinaryfile("uniprot_trembl_#{uniprot_group}.dat.gz", tr_out) if !no_trembl
 	puts "#{uniprot_group} files downloaded"
@@ -138,13 +140,13 @@ def filter_and_makeDB(formatted_db_path, dbtype, db_group, isoform_hash, prefix,
 end
 def complete?(uniprot_record)
-	complete = TRUE
+	complete = true
 	if uniprot_record.description.include?('Flags: Fragment') || #Discard non full length records
 		uniprot_record.seq[0] != 'M' ||
 		uniprot_record.seq.include?('XX') ||
 		uniprot_record.ft.keys.include?('NON_TER') ||# The residue at an extremity of the sequence is not the terminal residue. If applied to position 1, this signifies that the first position is not the N-terminus of the complete molecule. If applied to the last position, it means that this position is not the C-terminus of the complete molecule. There is no description field for this key
 		uniprot_record.ft.keys.include?('NON_CONS') # Non-consecutive residues. Indicates that two residues in a sequence are not consecutive and that there are a number of unreported or missing residues between them
-		complete = FALSE
+		complete = false
 	end
 	return complete
 end
@@ -304,29 +306,29 @@ optparse = OptionParser.new do |opts|
 		end
   end
-  options[:no_download] = FALSE
+  options[:no_download] = false
   opts.on( '-d', '--no_download', 'Only parse downloaded files without download them again') do
-		options[:no_download] = TRUE
+		options[:no_download] = true
   end
-  options[:no_ncrna] = FALSE
+  options[:no_ncrna] = false
   opts.on( '-n', '--no_ncrna', 'No use ncrna sequences') do
-		options[:no_ncrna] = TRUE
+		options[:no_ncrna] = true
   end
-  options[:only_index] = FALSE
+  options[:only_index] = false
   opts.on( '-i', '--only_index', 'Build annotation index only without do blast DB') do
-		options[:only_index] = TRUE
+		options[:only_index] = true
   end
-  options[:no_trembl] = FALSE
+  options[:no_trembl] = false
   opts.on( '-t', '--no_trembl', 'No use trembl sequences') do
-		options[:no_trembl] = TRUE
+		options[:no_trembl] = true
   end
-  options[:all] = FALSE
+  options[:all] = false
   opts.on( '-a', '--all_sequences', 'Generate databases with all sequences') do
-		options[:all] = TRUE
+		options[:all] = true
   end
   options[:cdhit] = 0
@@ -334,11 +336,15 @@ optparse = OptionParser.new do |opts|
 		options[:cdhit] = cdhit.to_f
   end
-  options[:no_uniprot] = FALSE
+  options[:no_uniprot] = false
   opts.on( '-p', '--no_uniprot', 'No use uniprot sequences') do
-		options[:no_uniprot] = TRUE
+		options[:no_uniprot] = true
   end
+  options[:passive_ftp] = false
+  opts.on( '-P', '--passive_ftp', 'Use pasive ftp') do
+                options[:passive_ftp] = true
+  end
   # Set a banner, displayed at the top of the help screen.
   opts.banner = "Usage: #{File.basename(__FILE__)} [options]  \n\n"
@@ -359,23 +365,22 @@ optparse.parse!
 ## MAIN
 ##############################################################################################
-if ENV['BLASTDB'] && File.exists?(ENV['BLASTDB'])
-  formatted_db_path = ENV['BLASTDB']
+if !ENV['BLASTDB'].nil?
+  formatted_db_path = File.expand_path(ENV['BLASTDB'])
 else # otherwise use ROOTPATH + DB
   formatted_db_path = File.expand_path(File.join(ROOT_PATH, "blast_dbs"))
-  Dir.mkdir(formatted_db_path)
 end
+Dir.mkdir(formatted_db_path) if !File.exists?(formatted_db_path)
 ENV['BLASTDB'] = formatted_db_path
 puts "Databases will be downloaded at: #{ENV['BLASTDB']}"
 puts "\nTo set the path for storing databases, execute next line in your terminal or add it to your .bash_profile:\n\n\texport BLASTDB=/my_path/\n\n"
+puts "Patched? #{Bio::UniProtKB.patched?}"
 download_ncrna(formatted_db_path, options[:no_download]) if !options[:no_ncrna]
 if !options[:no_download]
-	conecta_uniprot(options[:uniprot_div], formatted_db_path)
+	conecta_uniprot(options[:uniprot_div], formatted_db_path, options[:no_trembl], options[:passive_ftp])
 end
 if !options[:no_uniprot]

data/bin/full_lengther_next CHANGED Viewed

@@ -86,9 +86,9 @@ optparse = OptionParser.new do |opts|
 		end
 	end
-	options[:exonerate] = TRUE
+	options[:exonerate] = true
 	opts.on( '-e', '--exonerate', 'Disables exonerate analysis' ) do |exonerate|
-		options[:exonerate] = FALSE
+		options[:exonerate] = false
 	end
 	options[:fasta] = nil
@@ -106,9 +106,9 @@ optparse = OptionParser.new do |opts|
 		options[:ident] = ident.to_f
 	end
-	options[:high_clustering] = FALSE
+	options[:high_clustering] = false
 	opts.on( '-k', '--high_clustering', 'Only for representative transcriptome. Add a clustering step using pfam ids. Default false' ) do
-		options[:high_clustering] = TRUE
+		options[:high_clustering] = true
 	end
 	options[:subject_coverage] = 0.25
@@ -165,7 +165,7 @@ optparse = OptionParser.new do |opts|
 	options[:user_db] = nil
 	opts.on( '-u', '--user_db UserDB', 'User blast+ database' ) do |db|
 		options[:user_db] = db
-		if !File.exists?(File.expand_path(db+'.psq'))
+		if Dir.glob(File.expand_path(db+'*.psq')).empty?
 			puts "user database: #{options[:user_db]} was not found"
 			exit
 		end
@@ -196,9 +196,9 @@ optparse = OptionParser.new do |opts|
 		options[:training_ident] = ident.to_f
 	end
-	options[:hdd] = FALSE
+	options[:hdd] = false
 	opts.on( '-z', '--hdd', 'Write/use blast report on HDD' ) do |hdd|
-		options[:hdd] = TRUE
+		options[:hdd] = true
 	end
@@ -207,9 +207,9 @@ optparse = OptionParser.new do |opts|
 		options[:files2map] = files2map.split(';').map{|map_files| map_files.split(',')}
 	end
-	options[:remove_unmapped] = TRUE
+	options[:remove_unmapped] = true
 	opts.on('-R', '--remove_unmapped', 'When fastq files are provided, all sequences without at least a read pair are removed. When this option is enabled this filtering is disabled' ) do
-		options[:remove_unmapped] = FALSE
+		options[:remove_unmapped] = false
 	end
 	# Set a banner, displayed at the top of the help screen.
@@ -268,8 +268,8 @@ if !File.exists?(ncrna_path) && options[:acess_db].include?('c')
 end
 if options[:acess_db].include?('s') || options[:acess_db].include?('t')
-	sp_path=File.join(ENV['BLASTDB'],"sp_#{options[:tax_group]}","sp_#{options[:tax_group]}.psq")
-	if !File.exists?(sp_path)
+	sp_path=File.join(ENV['BLASTDB'],"sp_#{options[:tax_group]}","sp_#{options[:tax_group]}*.psq")
+	if Dir.glob(sp_path).empty?
 		puts "DB File #{sp_path} doesn't exists, or"
 		puts "incorrect taxon group name: #{options[:tax_group]} choose:"
 		puts optparse.help

data/bin/make_test_dataset.rb CHANGED Viewed

@@ -120,29 +120,29 @@ optparse = OptionParser.new do |opts|
           options[:duplicate] = duplicate.to_i
   end
-  options[:split]= FALSE
+  options[:split]= false
   opts.on( '-s', '--split', 'Split sequences in each case') do
           options[:duplicate] = 3
   end
-  options[:chim]= TRUE
+  options[:chim]= true
   opts.on( '-c', '--chim', 'Make sequence set of chimeras') do
-          options[:chim] = FALSE
+          options[:chim] = false
   end
-  options[:indel]= TRUE
+  options[:indel]= true
   opts.on( '-i', '--indel', 'Make sequence set of indels') do
-          options[:indel] = FALSE
+          options[:indel] = false
   end
-  options[:pair]= TRUE
+  options[:pair]= true
   opts.on( '-p', '--pair', 'Make sequence set of paired') do
-          options[:pair] = FALSE
+          options[:pair] = false
   end
-  options[:trim]= TRUE
+  options[:trim]= true
   opts.on( '-t', '--trim', 'Make sequence set of trimmed') do
-          options[:trim] = FALSE
+          options[:trim] = false
   end
   # Set a banner, displayed at the top of the help screen.

data/bin/make_user_db.rb CHANGED Viewed

@@ -63,9 +63,9 @@ optparse = OptionParser.new do |opts|
 	options[:name] = name
   end
-  options[:local] = FALSE
+  options[:local] = false
   opts.on( '-l', '--local', 'Only parse downloaded files without download them again') do
-		options[:local] = TRUE
+		options[:local] = true
   end
   options[:user_fasta] = nil

data/full_lengther_next.gemspec CHANGED Viewed

@@ -32,7 +32,8 @@ Gem::Specification.new do |spec|
   spec.add_runtime_dependency 'scbi_blast'
   spec.add_runtime_dependency 'scbi_mapreduce'
   spec.add_runtime_dependency 'scbi_zcat'
-  spec.add_runtime_dependency 'bio-cd-hit-report'
+  spec.add_runtime_dependency 'bio'
+  #spec.add_runtime_dependency 'bio-cd-hit-report' # Removed due to conflicts with bio-ruby2. This gem depends on bio ruby 1.4.3. cdhit options disabled
   spec.add_runtime_dependency 'report_html'

data/lib/full_lengther_next/artifacts.rb CHANGED Viewed

@@ -7,11 +7,11 @@ include ChimericSeqs
 ## MAIN FUNCTION
 #####################################################################
 def artifact?(seq, query, db_name, db_path, options, new_seqs)
-	artifact = FALSE
+	artifact = false
 	# UNMAPPED CONTIG DETECTION
 	if query.nil? && seq.unmapped? #If seq is misassembled stop chimera analisys
 		seq.hit = nil
-		artifact = TRUE
+		artifact = true
 		seq.type = UNMAPPED
 	end
@@ -19,7 +19,7 @@ def artifact?(seq, query, db_name, db_path, options, new_seqs)
 		# MISASSEMBLED DETECTION
 		if !artifact && misassembled_detection(query) #If seq is misassembled stop chimera analisys
 			seq.hit = query.hits.first
-			artifact = TRUE
+			artifact = true
 			seq.type = MISASSEMBLED
 			seq.warnings('ERROR#1')
 		end
@@ -35,7 +35,7 @@ def artifact?(seq, query, db_name, db_path, options, new_seqs)
 				else
 					seq.hit = query.hits.first
 				end
-				artifact = TRUE
+				artifact = true
 				seq.type = OTHER
 				seq.warnings('ERROR#2')
 			end
@@ -55,7 +55,7 @@ def artifact?(seq, query, db_name, db_path, options, new_seqs)
 				new_seqs.concat(chimera)
 				seq.db_name = db_name
 				seq.type = CHIMERA
-				artifact = TRUE
+				artifact = true
 			end
 		end
 	end
@@ -64,8 +64,8 @@ def artifact?(seq, query, db_name, db_path, options, new_seqs)
 			puts seq.prot_annot_calification
 		end
 		seq.db_name = db_name
-		seq.save_fasta = FALSE
-		seq.ignore = TRUE
+		seq.save_fasta = false
+		seq.ignore = true
 	end
 	return artifact
 end

data/lib/full_lengther_next/bio_patch.rb ADDED Viewed

@@ -0,0 +1,93 @@
+module Bio
+	class UniProtKB
+		 def self.patched?
+		 	return true
+		 end
+		 def ft(feature_key = nil)
+		    return ft[feature_key] if feature_key
+		    return @data['FT'] if @data['FT']
+		    table = []
+		    begin
+		      get('FT').split("\n").each do |line|
+		        if line =~ /^FT   \w/
+		          feature = line.chomp.ljust(74)
+		          table << [feature[ 5..12].strip,   # Feature Name
+		                    feature[14..19].strip,   # From
+		                    feature[21..26].strip,   # To
+		                    feature[34..74].strip ]  # Description
+		        else
+		          table.last << line.chomp.sub!(/^FT +/, '')
+		        end
+		      end
+		      # Joining Description lines
+		      table = table.map { |feature|
+		        ftid = feature.pop if feature.last =~ /FTId=/
+		        if feature.size > 4
+		          feature = [feature[0],
+		                     feature[1],
+		                     feature[2],
+		                     feature[3, feature.size - 3].join(" ")]
+		        end
+		        feature << if ftid then ftid else '' end
+		      }
+		      ###### PATCH TO RECOVER PARSER
+		      to_delete = []
+		      table.each_with_index do |feature, i|
+		      		name, from, to, descrition = feature
+		      		if from.empty?
+		      			coors = to.split("..")
+		      			if coors.length == 2
+		      				feature[1] = coors[0]
+		      				feature[2] = coors[1]
+		      			elsif /[^\d]/ =~ to
+		      				to_delete << i
+		      			else
+		      				feature[1] = to
+		      				feature[2] = to
+		      			end
+		      		end
+		      end
+		      to_delete.reverse_each{|i| table.delete_at(i)}
+		      #####
+		      hash = {}
+		      table.each do |feature|
+		        hash[feature[0]] = [] unless hash[feature[0]]
+		        hash[feature[0]] << {
+		          # Removing '<', '>' or '?' in FROM/TO endopoint.
+		          'From' => feature[1].sub(/\D/, '').to_i,
+		          'To'   => feature[2].sub(/\D/, '').to_i,
+		          'Description' => feature[3],
+		          'FTId' => feature[4].to_s.sub(/\/FTId=/, '').sub(/\.$/, ''),
+		          'diff' => [],
+		          'original' => feature
+		        }
+		        case feature[0]
+		        when 'VARSPLIC', 'VARIANT', 'VAR_SEQ', 'CONFLICT'
+		          case hash[feature[0]].last['Description']
+		          when /(\w[\w ]*\w*) - ?> (\w[\w ]*\w*)/
+		            original_res = $1
+		            changed_res = $2
+		            original_res = original_res.gsub(/ /,'').strip
+		            chenged_res = changed_res.gsub(/ /,'').strip
+		          when /Missing/i
+		            original_res = seq.subseq(hash[feature[0]].last['From'],
+		                                      hash[feature[0]].last['To'])
+		            changed_res = ''
+		          end
+		          hash[feature[0]].last['diff'] = [original_res, chenged_res]
+		        end
+		      end
+		    rescue
+		      raise "Invalid FT Lines(#{$!}) in #{entry_id}:, \n'#{self.get('FT')}'\n"
+		    end
+		    @data['FT'] = hash
+		  end
+	end
+end

data/lib/full_lengther_next/blast_functions.rb CHANGED Viewed

@@ -105,31 +105,31 @@ def set_thresold_evalue(hits)
 end
 def same_subject_hsp(hit, second_hit)
-	same = FALSE
+	same = false
 	if hit.acc == second_hit.acc
 		if hit.s_beg <= second_hit.s_beg && hit.s_end >= hit.s_end && (second_hit.s_beg - hit.s_end).abs > 1
-			same = TRUE
+			same = true
 		end
 	end
 	return same
 end
 def same_query_hsp(hit, second_hit)
-	same = FALSE
+	same = false
 	if hit.acc == second_hit.acc
 		if hit.q_beg <= second_hit.q_beg && hit.q_end >= hit.q_end && (second_hit.q_beg - hit.q_end).abs > 1
-			same = TRUE
+			same = true
 		end
 	end
 	return same
 end
 def same_sense?(hit, second_hit)
-	same= FALSE
+	same= false
 	hit_sense = hit.q_frame <=> 0
 	second_hit_sense = second_hit.q_frame <=> 0
 	if hit_sense == second_hit_sense
-		same = TRUE
+		same = true
 	end
 	return same
 end
@@ -158,7 +158,7 @@ def clean_by_query_length_match(blast_result, min_len_nt)
 end
-def clean_overlapping_hsps(blast_result, keep_if_diff_sense = FALSE)
+def clean_overlapping_hsps(blast_result, keep_if_diff_sense = false)
 	blast_result.querys.each do |query|
 		if query.hits.length > 1
 			query.hits.each_with_index do |hit, j|
@@ -190,7 +190,7 @@ end
 #####################################################################
 def misassembled_detection(query)
-	miss=FALSE
+	miss=false
 	hits = cluster_hsps(query.hits)
 	misassembled_hits = []
 	hits.each do |hit|
@@ -202,7 +202,7 @@ def misassembled_detection(query)
 		end
 	end
 	if misassembled_hits.length*1.0/ hits.length > 0.5
-		miss = TRUE
+		miss = true
 	else #Remove missassembled hits to avoid broken analysis
 		query.hits.reverse_each do |hsp|
 			if misassembled_hits.include?(hsp.acc)
@@ -214,16 +214,16 @@ def misassembled_detection(query)
 end
 def multiple_hsps(query, num)
-	multiple = FALSE
+	multiple = false
 	hsps = query.hits.select{|h| h.acc == query.hits.first.acc}
 	if hsps.length >= num
-		multiple = TRUE
+		multiple = true
 	end
 	return multiple
 end
 def overlapping_hsps_on_subject(query)
-	overlapping = FALSE
+	overlapping = false
 	current_hit = query.hits.first.acc
 	complete_hit = []
 	cleaned_hits = []
@@ -252,16 +252,16 @@ def clean_subject_overlapping_hsps(complete_hit, cleaned_hits)
 end
 def subject_overlapping_hsps(hit)
-	overlapping = FALSE
+	overlapping = false
 	hsp_table = hsps_relationship_subject(hit)
 	if !hsp_table.empty?
 		hit = clean_hsp_by_identity(hit, 55)
 		if hit.empty?
-			overlapping = TRUE
+			overlapping = true
 		else
 			hsp_table = hsps_relationship_subject(hit)
 			if !hsp_table.empty?
-				overlapping = TRUE
+				overlapping = true
 			end
 		end
 	end
@@ -286,10 +286,10 @@ def hsps_relationship_subject(hit)
 end
 def same_subject_hsp(hit, second_hit)
-	same = FALSE
+	same = false
 	if hit.acc == second_hit.acc
 		if hit.s_beg <= second_hit.s_beg && hit.s_end >= hit.s_end && (second_hit.s_beg - hit.s_end).abs > 1
-			same = TRUE
+			same = true
 		end
 	end
 	return same

data/lib/full_lengther_next/cdhit.rb CHANGED Viewed

@@ -59,8 +59,8 @@ class Cdhit
 			if master_seq.db != 'sp'
 				sp_seq=get_sp(cluster)
 				if !sp_seq.nil?
-					cluster.map{|seq| seq.master=FALSE}
-					sp_seq.master=TRUE
+					cluster.map{|seq| seq.master=false}
+					sp_seq.master= true
 				end
 			end
 		}
@@ -109,7 +109,7 @@ class Cdhit
 	def cd_hit_clusters(clust_file)
-		require 'bio-cd-hit-report'
+		#require 'bio-cd-hit-report'
 		report = Bio::CdHitReport.new(clust_file)
 		report.each_cluster do |cluster|
 			clust=[]
@@ -128,9 +128,9 @@ class Cdhit
 		member.gsub!('>','')
 		fields = member.split(',')
 		data = fields[1].split(' ',2)
-		master = FALSE
+		master = false
 		if data[1] == '*'
-			master = TRUE
+			master =  true
 		end
 		return data[0],master
 	end

data/lib/full_lengther_next/chimeric_seqs.rb CHANGED Viewed

@@ -110,8 +110,8 @@ module ChimericSeqs
 		seq_bak.clean_warnings
 		seq_bak.seq_name += "_split_#{hit_position}"
 		seq_bak.clean_orfs
-		seq_bak.save_fasta = TRUE
-		seq_bak.ignore = FALSE
+		seq_bak.save_fasta = true
+		seq_bak.ignore = false
 		# Cut sequence and move hit/hsps limits
 		#----------------------------------------
@@ -244,10 +244,10 @@ module ChimericSeqs
 	end
 	def hit_is_in?(h_beg, h_end, hit)
-		is=FALSE
+		is=false
 				# CONTIENE					#OVERLAP
 		if h_beg <= hit[BEG] && h_end > hit[BEG] || hit[BEG] <= h_beg && hit[STOP] > h_beg
-			is=TRUE
+			is=true
 		end
 		return is
 	end
@@ -324,7 +324,7 @@ module ChimericSeqs
 		cmd='clustalo -i -  -o /dev/null --percent-id --full --distmat-out=/dev/stdout --force'
 		clustal_matrix = nil
 		IO.popen(cmd,'w+') {|clustal|
-			clustal.sync = TRUE
+			clustal.sync = true
 			clustal.write(seq_fasta)
 			clustal.close_write
 			clustal_matrix = clustal.readlines

data/lib/full_lengther_next/common_functions.rb CHANGED Viewed

@@ -101,7 +101,7 @@ module CommonFunctions
 		hit.q_frame = -hit.q_frame
 		hit.q_end = query_fasta.length - 1 - hit.q_end
 		hit.q_beg = query_fasta.length - 1 - hit.q_beg
-		hit.reversed = TRUE
+		hit.reversed = true
 		query_fasta = query_fasta.complementary_dna # ESTO REALMENTE HACE LA REVERSO COMPLEMENTARIA.
 		if hit.class.to_s == 'ExoBlastHit'
 			hit.q_frameshift.map!{|position, num_nts|

data/lib/full_lengther_next/exonerate_result.rb CHANGED Viewed

@@ -39,7 +39,7 @@ end
 class ExonerateResult
   # Parser initialization
-  def initialize(input, seqs= nil, query_seqs = nil, all = TRUE)
+  def initialize(input, seqs= nil, query_seqs = nil, all = true)
     @querys = []
     @seqs = seqs #unigenes
     @prot_seqs = query_seqs#prot
@@ -106,8 +106,8 @@ class ExonerateResult
    #this method only works fine with --model protein2dna parameter of exonerate
  	def hiting(features, tags, query) #Convierte las coordenadas relativas del exonerate a absolutas tipo blast, definiendo solo los hits
-		do_align = FALSE
-		do_align = TRUE if !@prot_seqs.nil? && !@seqs.nil?
+		do_align = false
+		do_align = true if !@prot_seqs.nil? && !@seqs.nil?
 		start_target = features['target_start_align']#Unigen
 		start_query = features['query_start_align'] #proteina
 		ends_target = features['target_end_align']
@@ -143,7 +143,7 @@ class ExonerateResult
 				target_alignment << target_seq[counter_target, tag[TARGET]].translate
 			end
 			if tag[OPERATION] == 'F'
-				if tag[TARGET] > 0 && tag[TARGET] < 3 #TRUE FRAMESHIFT
+				if tag[TARGET] > 0 && tag[TARGET] < 3 #true FRAMESHIFT
 					gap_shift += 1
 					if tags[n_operation+1][OPERATION] != 'G' #there are frameshift that not insert a gap, we do it
 						query_alignment <<  '-' if do_align
@@ -203,7 +203,7 @@ class ExonerateResult
 	def define_hit_parameters(hit, features, tags)
 		hit.gaps = 0
 		tags.map{|aln| hit.gaps += 1 if aln[0] == 'G'}
-		hit.reversed = FALSE
+		hit.reversed = false
 		hit.align_len =(features['query_end_align'] - features['query_start_align']).abs+1
 		hit.mismatches=0
 		hit.e_val=0

data/lib/full_lengther_next/fl_analysis.rb CHANGED Viewed

@@ -171,9 +171,9 @@ module FlAnalysis
 		end
 		if atg_status == 'putative' || end_status == 'putative'
-			status = FALSE # Putative
+			status = false # Putative
 		else
-			status = TRUE # Sure
+			status = true # Sure
 		end
 		return type, status
@@ -187,7 +187,7 @@ module FlAnalysis
 			$global_warnings << ['SeqShorter', final_prot.length, final_hit.s_len]
 			if final_prot.length + 100 < final_hit.s_len || final_prot.length*2 < final_hit.s_len
 				if type == COMPLETE
-					status = FALSE
+					status = false
 					$global_warnings << 'VeryShorter'
 				end
 			end
@@ -209,7 +209,7 @@ module FlAnalysis
 			$global_warnings = [] # Clean all warnings for current sequence
 			seq.seq_nt = mark_nt_seqs(final_hit, query_fasta)
 			if type == COMPLETE
-				seq.ignore = TRUE
+				seq.ignore = true
 			end
 		end
 		if  $verbose > 2
@@ -265,8 +265,8 @@ module FlAnalysis
 ## VERBOSE METHODS
 	def show_nts
-		show = FALSE
-		show = TRUE if $verbose && $verbose > 3
+		show = false
+		show = true if $verbose && $verbose > 3
 		return show
 	end

data/lib/full_lengther_next/fln_stats.rb CHANGED Viewed

@@ -74,7 +74,7 @@ module FlnStats
 			if !$1.nil?
 				organism = $1
 			else
-				name =~ /(\w+ \w+) \(([\w ]+)\)/
+				name =~ /(\w+ \w+) \(([\w \/]+)\)/
 				if !$1.nil?
 					organism = $1
 				end
@@ -610,4 +610,4 @@ module FlnStats
 		html = 	'<div style="font-size:25px; margin: 10"><b>'+title+'</b></div>'
 		return html
 	end
-end
+end

data/lib/full_lengther_next/handle_db.rb CHANGED Viewed

@@ -35,7 +35,7 @@ end
 def do_makeblastdb(seqs, output, dbtype)
 	cmd="makeblastdb -in - -out #{output} -title #{File.basename(output)} -dbtype #{dbtype} -parse_seqids"
 	IO.popen(cmd,'w+') {|makedb|
-		makedb.sync = TRUE
+		makedb.sync = true
 		makedb.write(seqs)
 		makedb.close_write
 		puts makedb.readlines

data/lib/full_lengther_next/my_worker.rb CHANGED Viewed

@@ -193,7 +193,7 @@ class MyWorker < ScbiMapreduce::Worker
 	# ejecuta blast utilizando los parametros fichero de entrada, base de datos, tipo de blast y evalue
-	def run_blast(input, database, blast_type, evalue, additional_blast_options, do_exonerate, filter = TRUE)
+	def run_blast(input, database, blast_type, evalue, additional_blast_options, do_exonerate, filter = true)
 		if !input.empty? && !input.nil?
 			$WORKER_LOG.info "DB: #{File.basename(database)} #{input.length}"
 			blast = BatchBlast.new("-db #{database}", blast_type, "-evalue #{evalue} #{additional_blast_options}")
@@ -202,7 +202,7 @@ class MyWorker < ScbiMapreduce::Worker
 			if @options[:hdd] #Write/parse blast on Disk
 				file_name = file_path+'.blast' #Each blast is identified with database_name and first sequence's name on chunk
 				if !File.exists?(file_name)
-					blast_result = blast.do_blast_seqs(input, :table, TRUE, file_name)
+					blast_result = blast.do_blast_seqs(input, :table, true, file_name)
 				else
 					blast = nil
 					blast_result=BlastTableResult.new(file_name)
@@ -223,8 +223,8 @@ class MyWorker < ScbiMapreduce::Worker
 	end
 	def rescue_sequence(e, seq, status)
-		seq.save_fasta = FALSE
-		seq.ignore = TRUE
+		seq.save_fasta = false
+		seq.ignore = true
 		seq.type = FAILED
 		puts 	'-- '+seq.seq_name+' FAILED ANALYSIS -- '+status,
 			e.message,
@@ -232,7 +232,7 @@ class MyWorker < ScbiMapreduce::Worker
 	end
 	def  check_ncRNA(check_seqs, ncrna_path, blast_type, evalue)
-		my_blast = run_blast(check_seqs, ncrna_path, blast_type, evalue, '', FALSE, nil)
+		my_blast = run_blast(check_seqs, ncrna_path, blast_type, evalue, '', false, nil)
 		if !my_blast.nil?
 			check_seqs.each_with_index do |seq,i|
 				find_nc_rna(seq, my_blast.querys[i])
@@ -280,7 +280,7 @@ class MyWorker < ScbiMapreduce::Worker
 		if seq.type == FAILED
 			seq.type = UNKNOWN
-			seq.ignore = FALSE
+			seq.ignore = false
 		else
 			best_option.warnings(warning) if !warning.nil?
 		end

data/lib/full_lengther_next/my_worker_EST.rb CHANGED Viewed

@@ -28,7 +28,7 @@ class MyWorkerEst < MyWorker
 	#####################################################################################
 	def blastEST(array_seqs)
-		blast = run_blast(array_seqs, @blast_path, 'blastn', 1e-6, nil, FALSE)
+		blast = run_blast(array_seqs, @blast_path, 'blastn', 1e-6, nil, false)
 		if blast.nil?
 			$LOG.info 'BLAST FAILED'
 			Process.exit(-1)

data/lib/full_lengther_next/my_worker_manager_fln.rb CHANGED Viewed

@@ -444,7 +444,7 @@ class MyWorkerManagerFln < ScbiMapreduce::WorkManager
 		@@stats_hash['coding'] += 1
 		coding = select_orf(coding)
 		if coding[1] == 'complete'
-			seq.status = TRUE
+			seq.status = true
 			@@stats_hash['coding_sure'] += 1
 		else
 			@@stats_hash['coding_putative'] += 1

data/lib/full_lengther_next/sequence.rb CHANGED Viewed

@@ -16,7 +16,7 @@ class Sequence
 		@seq_aa = nil # Protein sequence generated over unigen
 		@db =nil
 		@type = UNKNOWN # See types.rb
-		@status = FALSE # TRUE => Sure, FALSE => Putative
+		@status = false # true => Sure, false => Putative
 		@id = nil #Prot or EST id, can be several => array
 		@warnings = []
 		@annotations=[]
@@ -27,9 +27,9 @@ class Sequence
 		@fpkm = []
 		@coverage_analysis = []
-		@area_without_annotation=FALSE
-		@save_fasta=TRUE
-		@ignore=FALSE
+		@area_without_annotation=false
+		@save_fasta=true
+		@ignore = false
 		@hit=nil
 		@t_code=0
 	end
@@ -86,7 +86,7 @@ class Sequence
 	def reset_classification
 		@type = UNKNOWN
-		@status = FALSE
+		@status = false
 	end
 	def clean_warnings
@@ -150,7 +150,7 @@ class Sequence
 	def test_code(test_code)
 		@t_code = test_code
 		if @t_code >= 0.95
-			@status = TRUE
+			@status = true
 		end
 	end
@@ -470,7 +470,7 @@ class Sequence
 		upstream_annotation_space = hit.q_beg
 		downstream_annotation_space = @fasta_length - hit.q_end
 		if upstream_annotation_space >= 150 || downstream_annotation_space >= 150
-			@area_without_annotation = TRUE
+			@area_without_annotation = true
 		end
 		return @area_without_annotation
 	end
@@ -490,8 +490,8 @@ class Sequence
 	end
 	def unmapped?
-		res = FALSE
-		res = TRUE if !@coverage_analysis.empty? && @coverage_analysis[3] == 0 #3 => percentage of sequence covered by reads
+		res = false
+		res = true if !@coverage_analysis.empty? && @coverage_analysis[3] == 0 #3 => percentage of sequence covered by reads
 		return res
 	end
 end

data/lib/full_lengther_next/une_los_hit.rb CHANGED Viewed

@@ -165,7 +165,7 @@ class UneLosHit
 				#if frame_ori < 0 && h.q_frame > 0 || frame_ori > 0 && h.q_frame < 0
 					if h.q_frame < 0 # si la secuencia esta al reves le damos la vuelta
 						query_fasta = reverse_seq(query_fasta_ori, h)
-						h.reversed = TRUE
+						h.reversed = true
 					end
 					misma_id << h
 				#end
@@ -176,17 +176,17 @@ class UneLosHit
 	end
 	def overlapping_hits?(hit)
-		overlap = FALSE
+		overlap = false
 		if @final_hit.q_end >= hit.q_beg && @final_hit.q_end < hit.q_end && @final_hit.q_end < hit.q_end
-			overlap = TRUE
+			overlap = true
 		end
 		return overlap
 	end
 	def separated_hits?(hit)
-		separated=FALSE
+		separated=false
 		if @final_hit.q_end < hit.q_beg && hit.q_end > @final_hit.q_end
-			separated = TRUE
+			separated = true
 		end
 		return separated
 	end

data/lib/full_lengther_next/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module FullLengtherNext
-  VERSION = '0.9.9'
+  VERSION = '1.0.2'
 end

metadata CHANGED Viewed

@@ -1,16 +1,16 @@
 --- !ruby/object:Gem::Specification
 name: full_lengther_next
 version: !ruby/object:Gem::Version
-  version: 0.9.9
+  version: 1.0.2
 platform: ruby
 authors:
 - Pedro Seoane
 - Noe Fernandez
 - Dario Guerrero
-autorequire:
+autorequire:
 bindir: bin
 cert_chain: []
-date: 2018-03-12 00:00:00.000000000 Z
+date: 2022-09-05 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: xml-simple
@@ -83,7 +83,7 @@ dependencies:
       - !ruby/object:Gem::Version
         version: '0'
 - !ruby/object:Gem::Dependency
-  name: bio-cd-hit-report
+  name: bio
   requirement: !ruby/object:Gem::Requirement
     requirements:
     - - ">="
@@ -189,6 +189,7 @@ files:
 - full_lengther_next.gemspec
 - lib/full_lengther_next.rb
 - lib/full_lengther_next/artifacts.rb
+- lib/full_lengther_next/bio_patch.rb
 - lib/full_lengther_next/blast_functions.rb
 - lib/full_lengther_next/cdhit.rb
 - lib/full_lengther_next/chimeric_seqs.rb
@@ -220,7 +221,7 @@ homepage: https://github.com/seoanezonjic
 licenses:
 - MIT
 metadata: {}
-post_install_message:
+post_install_message:
 rdoc_options: []
 require_paths:
 - lib
@@ -235,9 +236,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubyforge_project:
-rubygems_version: 2.4.8
-signing_key:
+rubygems_version: 3.3.7
+signing_key:
 specification_version: 4
 summary: Tool to annotate transcriptomes and it is able to stablish the integrity
   of each transcript. Also, FLN can detect novel genes on a target organism.