RubyGems - bioroebe - Versions diffs - 0.10.80 → 0.12.12 - Mend

bioroebe 0.10.80 → 0.12.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of bioroebe might be problematic. Click here for more details.

Files changed (242) hide show

data/lib/bioroebe/shell/enzymes.rb DELETED Viewed

@@ -1,310 +0,0 @@
-#!/usr/bin/ruby -w
-# Encoding: UTF-8
-# frozen_string_literal: true
-# =========================================================================== #
-# Enzyme-related components of the BioShell will be stored here.
-# =========================================================================== #
-# require 'bioroebe/shell/enzymes.rb'
-# =========================================================================== #
-module Bioroebe
-class Shell < ::Bioroebe::CommandlineApplication
-  require 'bioroebe/enzymes/has_this_restriction_enzyme.rb'
-  require 'bioroebe/enzymes/restriction_enzymes_file.rb'
-  # ========================================================================= #
-  # === return_random_restriction_enzyme
-  #
-  # This method will return a random restriction enzyme, such as:
-  #
-  #   ["EgeI", "GGCGCC 3"]
-  #
-  # ========================================================================= #
-  def return_random_restriction_enzyme(be_verbose = false)
-    splitted = ::Bioroebe.restriction_enzymes.sample
-    _ = splitted[1].split(' ')[0]
-    if be_verbose
-      erev 'Now adding restriction site `'+red(splitted[0])+
-           '` (cuts at '+simp(_)+').'
-    end
-    return _
-  end
-  # ========================================================================= #
-  # === find_restriction_enzymes_that_cut_at
-  #
-  # A wrapper over find_restriction_sites().
-  # ========================================================================= #
-  def find_restriction_enzymes_that_cut_at(i)
-    erev 'Trying to find restriction enzymes that '\
-         'cut at `'+sfancy(i)+rev+'`.'
-    result = find_restriction_sites(i)
-    unless result
-      erev 'Found no result for this sequence.'
-    end
-  end
-  # ========================================================================= #
-  # === show_restriction_enzymes
-  #
-  # Display the available restriction enzymes here.
-  #
-  # If we pass an argument, then we assume that we wish to show only
-  # these restriction enzymes that cut at n bp.
-  #
-  # Invocation example:
-  #
-  #   show_restriction_enzymes(:show_all)
-  #
-  # ========================================================================= #
-  def show_restriction_enzymes(optional_input = nil)
-    case optional_input
-    when nil, :show_all # This means to show everything.
-      ::Bioroebe.show_restriction_enzymes # Defined in module_methods.rb
-    else # Ok we gave input then.
-      _ = ::Bioroebe.restriction_enzymes
-      _.select! {|entry|
-        last = entry.last
-        last = last.split(' ').last
-        if last == optional_input
-          true
-        else
-          false
-        end
-      }
-      if _.empty?
-        erev 'We found no match for '+optional_input+'.'
-      else # else display the cutters.
-        erev 'These enzymes cut at `'+sfancy(optional_input)+rev+'` nucleotides.'
-        _.each {|entry|
-          entry[0] = entry[0].rjust(15)
-          entry[1] = entry[1].gsub(/ (.+)/, swarn(' \\1')+rev)
-          e "  #{entry.join(' -> ')}"
-        }
-        erev 'These are '+simp(_.size.to_s)+rev+' restriction enzymes.'
-      end
-    end
-  end
-  # ========================================================================= #
-  # === try_to_find_this_restriction_enzyme
-  #
-  # Use this method to find a specific restriction enzyme.
-  #
-  # The restriction enzymes are stored in this yaml file here:
-  #
-  #   bl $BIOROEBE/yaml/restriction_enzymes/restriction_enzymes.yml
-  #
-  # Usage example:
-  #
-  #   MvnI?
-  #
-  # ========================================================================= #
-  def try_to_find_this_restriction_enzyme(i)
-    i = i.dup if i.frozen?
-    i.delete!('?') # We do not need any '?' characters.
-    original_input = i.dup
-    # i = i.downcase # No longer downcase since as of June 2018.
-    if i.include? 'restriction'
-      i.sub!(/restriction/,'')
-    end
-    if i.include? '.site' # Assume a syntax such as: Restriction.EcoRI.site
-      e ::Bioroebe.restriction_enzyme(i)
-    else # else it will be more verbose
-      i.delete!('.') if i.include? '.'
-      if i.end_with?('1') and !::Bioroebe.has_this_restriction_enzyme?(i) # Is invalid.
-        erev 'The input `'+simp(i)+rev+'` ends with the number 1. This '\
-             'is not possible, so'
-        erev 'we replace the trailing 1 with a capital I.'
-        i[-1,1] = 'i' # Ok not a capital one, because we store in a downcased variant.
-        original_input[-1,1] = 'I'
-      end
-      if ::Bioroebe.has_this_restriction_enzyme? i
-        target_sequence_data = ::Bioroebe.return_restriction_enzyme_sequence_and_cut_position(i)
-        # =================================================================== #
-        # Tap into the method Bioroebe.restriction_enzyme
-        # =================================================================== #
-        _ = ::Bioroebe.restriction_enzyme(i) # bl $BIOROEBE/module_methods.rb
-        erev "We have found a restriction enzyme called "\
-             "#{sfancy(original_input)}#{rev}."
-        e
-        e "#{rev}The sequence this #{mediumorchid(_.size.to_s+'-cutter')}#{rev}"\
-          " relates to is: `"\
-          "#{sfancy(five_prime+simp(_)+rev)}"\
-          "#{sfancy(three_trailer)}#{rev}`"
-        e
-        # =================================================================== #
-        # The variable target_sequence_data will look like this:
-        #   ["GCCNNNNNGGC", "7", "7"]
-        # =================================================================== #
-        if target_sequence_data.last == :blunt
-          erev "This restriction enzyme will produce a "\
-               "#{seagreen('blunt')}#{rev} overhang."
-          e
-        else
-          erev "This restriction enzyme will produce a "\
-               "#{seagreen('sticky-end')}#{rev} overhang."
-          e
-        end
-        # =================================================================== #
-        # Next, show the exact cut that will be made.
-        # =================================================================== #
-        sequence = ::Bioroebe.return_sequence_that_is_cut_via_restriction_enzyme(i)
-        erev 'It will specifically cut between:     '+
-              sfancy(five_prime)+rev+
-              simp(sequence)+
-              sfancy(three_trailer)+rev
-        # =================================================================== #
-        # And the complementary sequence follows next. The colour used
-        # is swarn().
-        # =================================================================== #
-        complementary_sequence = reverse(
-          Colours.remove_escape_sequences(sequence)
-        )
-        # =================================================================== #
-        # We must insert a | at the right position.
-        # =================================================================== #
-        target_sequence_data = target_sequence_data[1].to_i
-        complementary_sequence[-target_sequence_data,0] = swarn('|')+rev
-        erev ''.ljust(38)+sfancy(leading_three_prime)+rev+
-             complementary_sequence+rev+
-             sfancy(five_prime_trailer)+rev
-      else
-        erev 'We were unable to find a restriction enzyme called '\
-             '`'+sfancy(i)+'`'+rev
-      end
-    end
-  end
-  # ========================================================================= #
-  # === try_to_find_restriction_enzymes_for
-  #
-  # This method name is a slight misnomer; we can simply find any
-  # target sequence.
-  #
-  # The method can also handle some Symbols as input, such as the symbol
-  # :shine_dalgarno, which will be replaced accordingly to the SD
-  # sequence.
-  # ========================================================================= #
-  def try_to_find_restriction_enzymes_for(
-      i
-    )
-    # ======================================================================= #
-    # === We always have to work with an Array as input
-    # ======================================================================= #
-    unless i.is_a? Array
-      i = [i]
-    end
-    i.map! {|entry|
-      case entry # Use special sequences.
-      when :shine_dalgarno
-        entry = 'AGGAGGT'
-      end
-      # ===================================================================== #
-      # Past this point, we will assume a String as input. But we will have
-      # to make sure, still.
-      # ===================================================================== #
-      entry = entry.to_s unless entry.is_a? String
-      entry.delete!('-') if entry.include? '-'
-      entry
-    }
-    i.each {|entry|
-      report_main_sequence(entry) { :with_ruler }
-      possible_results = dna_string?.scan(/#{entry}/)
-      unless possible_results.empty?
-        e
-        erev "Start nucleotide position is at: "\
-             "#{simp((dna_string?.index(entry)+1))}#{rev}"
-        e
-      end
-    }
-  end; alias find_this_sequence    try_to_find_restriction_enzymes_for # === find_this_sequence
-       alias find_in_main_sequence try_to_find_restriction_enzymes_for # === find_in_main_sequence
-  # ========================================================================= #
-  # === restriction_enzyme_digest
-  #
-  # This method allows us to simulate a restriction digest, on a
-  # DNA polymer.
-  #
-  # You can either give the matching DNA nucleotides or you can
-  # use the name of a restriction enzyme instead, such as 'EcoRI'.
-  #
-  # Usage examples:
-  #
-  #   random 750; digest_at TTGC
-  #   random 750; digest_at EcoRI
-  #   random 2000; [33,0] = GAATTC; digest_at EcoRI
-  #
-  # ========================================================================= #
-  def restriction_enzyme_digest(
-      split_at = nil # Default value is nil.
-    )
-    _ = dna_sequence? # Keep a copy of the DNA sequence.
-    # ======================================================================= #
-    # === Grab the first entry if we have an Array
-    # ======================================================================= #
-    split_at = split_at.first if split_at.is_a? Array
-    split_at = 'TTG' if split_at.nil?
-    split_at = split_at.to_s # Work on Strings past this point here.
-    split_at.sub!(/^first_/,'') if split_at.include? 'first_ATG'
-    # ======================================================================= #
-    # === Chop off all '?' in the sequence
-    # ======================================================================= #
-    split_at.delete!('?') if split_at.include? '?'
-    # ======================================================================= #
-    # Next, allow the user to substitute for names of restriction enzymes.
-    # How do we determine that a restriction enzyme was given to this
-    # method? Simple - we first remove all instances of 'A','T','C','G'
-    # in our DNA sequence string. If the string is then still not empty,
-    # we will assume that it is the name of a restriction enzyme.
-    # ======================================================================= #
-    unless (_.delete('ATGC').size > 0)
-      erev 'Assumingly a restriction enzyme was given as input.'
-      target_sequence = ::Bioroebe.restriction_enzyme(split_at)
-      # Must check for nil values still
-      if target_sequence
-        erev "Substituting with `#{simp(target_sequence)}#{rev}` next (for #{split_at})."
-        split_at = target_sequence
-      else
-        erev 'No substitute could be found for `'+sfancy(split_at)+rev+'`.'
-      end
-    end
-    if _.include? split_at
-      splitted = _.split(split_at)
-      e
-      erev 'We will next display all '+simp(splitted.size.to_s)+rev+
-           ' segments that were found (in orange is the part that '\
-           'is cut-out):'
-      e
-      splitted.each_with_index {|sequence, index|
-        index += 1
-        erev lpad?+lead_five_prime+sfancy(sequence)+rev+
-             trail_three_prime+' (size: '+
-             violet(sequence.size.to_s)+rev+')'
-        unless index > (splitted.size-1)
-          erev lpad?+lead_five_prime+orange(split_at)+rev+
-            trail_three_prime+rev
-        end
-      }
-      e
-      erev 'Note that this will NOT be the actual DNA fragments, '\
-           'because the restriction'
-      erev 'enzyme may cut differentially within that orange sequence.'
-    else # The target sequence was not included in this case.
-      erev "No target match for `"\
-           "#{simp(target_sequence)}#{rev}` was "\
-           "found in the given DNA sequence."
-    end
-  end; alias digest restriction_enzyme_digest # === digest
-  # ========================================================================= #
-  # === restriction_enzymes_run
-  # ========================================================================= #
-  def restriction_enzymes_run
-    require 'bioroebe/gui/gtk2/restriction_enzymes/restriction_enzymes.rb'
-    ::Bioroebe::GUI::Gtk::RestrictionEnzymes.start_gui_application
-  end
-end; end

data/lib/bioroebe/shell/fasta.rb DELETED Viewed

@@ -1,345 +0,0 @@
-#!/usr/bin/ruby -w
-# Encoding: UTF-8
-# frozen_string_literal: true
-# =========================================================================== #
-# require 'bioroebe/shell/fasta.rb'
-# =========================================================================== #
-module Bioroebe
-class Shell < ::Bioroebe::CommandlineApplication
-  # ========================================================================= #
-  # === handle_fasta
-  #
-  # Use this method to properly handle a fasta file.
-  #
-  # The argument should be the (local) path to a fasta file.
-  # ========================================================================= #
-  def handle_fasta(i)
-    if i.nil?
-      if File.exist? fasta_file?.to_s
-        e sfile(fasta_file?.to_s)
-      else
-        show_my_fasta_file # As a reminder.
-      end
-    else
-      i = i.to_s unless i.is_a? String # Need a String past this point.
-      if File.exist?(i) and i.end_with?('.fasta')
-        opnn; erev 'Trying to parse the file `'+sfile(i)+rev+'` next.'
-        parse_fasta_format(i)
-      else
-        fasta_files = Dir['*.fasta']
-        unless fasta_files.empty?
-          erev 'There seems to be at least one .fasta file in this '\
-               'directory ('+sdir(return_pwd)+').'
-        end
-      end
-    end
-  end; alias assign_fasta           handle_fasta # === assign_fasta
-       alias handle_this_fasta_file handle_fasta # === handle_this_fasta_file
-  require 'bioroebe/fasta_and_fastq/parse_fasta/misc.rb'
-  # ========================================================================= #
-  # === parse_fasta_format
-  #
-  # Parse FASTA format here. We will delegate into class
-  # Bioroebe::ParseFasta for that.
-  #
-  # Usage example:
-  #
-  #   pfasta NM_001180897.3_Saccharomyces_cerevisiae_S288c_Aga2p_AGA2.fasta
-  #
-  # ========================================================================= #
-  def parse_fasta_format(
-      i = nil
-    )
-    if i.is_a? Array
-      i.each {|entry|
-        parse_fasta_format(entry)
-      }
-    else
-      # ===================================================================== #
-      # === If input is only numbers.
-      # ===================================================================== #
-      i = Dir['*'][i.to_i + 1] if i =~ /^\d+$/ # <- Only numbers.
-      case i
-      # ===================================================================== #
-      # === ASSIGN
-      #
-      # This entry point can be used by the user to input ad-hoc data
-      # for a FASTA sequence.
-      # ===================================================================== #
-      when /^ASSIGN$/i
-        opnn; erev 'Input your FASTA Data now (Use __ to terminate input):'
-        i = $stdin.gets('__').chomp
-      end
-      # ===================================================================== #
-      # If we did not provide an input, we scan for entries with .fa
-      # in the current directory.
-      # ===================================================================== #
-      if i.nil?
-        unless Dir['*.fa'].empty?
-          i = Dir['*.fa']
-        end
-      end
-      if i.is_a? Array
-        i = i.first
-      end
-      if i
-        erev "Now loading from `#{sfancy(i)}#{rev}`."
-      end
-      @internal_hash[:fasta_file] = i
-      parse_fasta_object = ::Bioroebe.parse_fasta(i) # bl $RSRC/bioroebe/lib/bioroebe/fasta/parse_fasta.rb
-      # ===================================================================== #
-      # === We will store all created fasta objects in an Array
-      # ===================================================================== #
-      array_fasta? << parse_fasta_object
-      this_sequence = parse_fasta_object.sequence?
-      # ===================================================================== #
-      # Handle large sequences next - we will add a timer. The purpose of
-      # this timer is to notify the user how long it took to assign to
-      # the main string. At a later point, we can optimize the speed and
-      # do the assignment in pure C rather than ruby.
-      # ===================================================================== #
-      if this_sequence.size > 1_000_000
-        add_timer_snapshot
-        erev 'The sequence is fairly large - we will time how long it takes to'
-        erev 'assign it to the main sequence.'
-      end
-      # ===================================================================== #
-      # Obtain the type next:
-      # ===================================================================== #
-      type = parse_fasta_object.type?
-      unless type == :protein
-        set_dna_sequence(this_sequence)
-        if this_sequence.size > 1_000_000
-          add_timer_snapshot
-          n_seconds_difference = calculate_time_difference.abs.to_f.round(3).to_s
-          erev "Loading these #{springgreen(this_sequence.size.to_s)}"\
-               "#{rev}"\
-               " nucleotides "\
-               "took #{sfancy(n_seconds_difference)}#{rev} seconds."
-        end
-      end
-    end
-  end; alias parse_this_fasta_file parse_fasta_format # === parse_this_fasta_file
-  require 'bioroebe/toplevel_methods/delimiter.rb'
-  # ========================================================================= #
-  # === obtain_multiline_fasta
-  #
-  # If we want to obtain multiline FASTA input, that is input that includes
-  # the "\n" newline character, then we can use the following method here.
-  #
-  # We will use $stdin to obtain the input. The end-delimiter will
-  # be ___
-  # ========================================================================= #
-  def obtain_multiline_fasta
-    delimiter = ::Bioroebe.delimiter?
-    erev 'Input your Fasta format or nucleotide sequence next - '\
-         'delimit/end via "'+lightgreen(delimiter)+rev+'" (3x the _ '\
-         'character).'
-    # ======================================================================= #
-    # Chop away all newlines.
-    # ======================================================================= #
-    dataset = $stdin.gets(delimiter)
-    # ======================================================================= #
-    # Format the dataset a little.
-    # ======================================================================= #
-    dataset.chomp!
-    dataset.delete!('_')
-    dataset.delete!(N)
-    dataset.strip!
-    parse_fasta_format(dataset)
-    # assign_sequence(dataset)
-  end
-  # ========================================================================= #
-  # === array_fasta?
-  # ========================================================================= #
-  def array_fasta?
-    @internal_hash[:array_fasta]
-  end
-  require 'bioroebe/toplevel_methods/fasta_and_fastq.rb'
-  # ========================================================================= #
-  # === index_this_fasta_file
-  #
-  # This will index FASTA files (.fa or .fasta) via the samtools.
-  # ========================================================================= #
-  def index_this_fasta_file(i)
-    # ======================================================================= #
-    # === Handle blocks first
-    # ======================================================================= #
-    if block_given?
-      yielded = yield
-      case yielded
-      when :use_all_fasta_files_if_no_argument_was_given
-        if i.nil? or i.empty?
-          i = Dir['*.fasta']+
-              Dir['*.fa'].flatten.compact
-        end
-      end
-    end
-    if i.is_a? Array
-      i.each {|entry| index_this_fasta_file(entry) }
-    else
-      i = i.to_s # Need to work on a String past this point.
-      if File.exist? i
-        erev "Indexing the following file next, via "\
-             "#{steelblue('samtools')}#{rev}:"
-        Bioroebe.index_this_fasta_file(i)
-      else
-        no_file_exists_at(i)
-      end
-    end
-  end
-  # ========================================================================= #
-  # === try_to_display_this_fasta_entry
-  # ========================================================================= #
-  def try_to_display_this_fasta_entry(i)
-    if i.is_a? String
-      i = i.to_i - 1 # -1 because Arrays in ruby begin at 0.
-    end
-    last_entry = array_fasta?.last
-    sequence_data = last_entry[i]
-    erev sequence_data
-    if block_given?
-      yielded = yield
-      case yielded
-      # ===================================================================== #
-      # === :and_assign_it_as_well
-      # ===================================================================== #
-      when :and_assign_it_as_well
-        assign(sequence_data) # In this case it will become the new main sequence data.
-      end
-    end
-  end
-  # ========================================================================= #
-  # === parse_this_fasta_sequence
-  # ========================================================================= #
-  def parse_this_fasta_sequence(i)
-    if i and File.file?(i)
-      set_aminoacid(File.read(i).delete("\n"))
-    end
-  end
-  # ========================================================================= #
-  # === fasta?
-  #
-  # We need a query method over the main fasta object, IF it was set.
-  #
-  # Since we already have an Array that keeps track of these objects,
-  # we can simply grab the last one from that collection.
-  # ========================================================================= #
-  def fasta?
-    array_fasta?.last
-  end; alias last_fasta?       fasta? # === fasta?
-       alias last_fasta_entry? fasta? # === last_fasta_entry?
-  # ========================================================================= #
-  # === colourize_fasta_file
-  #
-  # Invocation example:
-  #
-  #   colourize_fasta_file /Depot/Temp/bioroebe/sequence.fasta
-  #
-  # ========================================================================= #
-  def colourize_fasta_file(i)
-    if i.is_a? Array
-      i.each {|entry| colourize_fasta_file(entry) }
-    else
-      # ===================================================================== #
-      # First, get the raw content of the fasta sequence here.
-      # ===================================================================== #
-      if File.exist? i
-        sequence = ::Bioroebe.parse_fasta_file(i).sequence?
-        # =================================================================== #
-        # Now that we have the sequence, colourize it.
-        # =================================================================== #
-        cliner {
-          ColourSchemeDemo.new(sequence)
-        }
-      end
-    end
-  end
-  # ========================================================================= #
-  # === to_fasta
-  #
-  # Create a Fasta format from the target sequence.
-  # ========================================================================= #
-  def to_fasta(
-      i = dna_sequence?
-    )
-    array = i.scan(/.{,80}/).reject {|entry| entry.empty? }
-    name_of_the_gene = sequence_object?.name_of_gene?.to_s
-    if name_of_the_gene.empty?
-      name_of_the_gene << 'Drosophila melanogaster chromosome'
-    end
-    array[0,0] = '>gi|671162122:c7086083-7083225 '+name_of_the_gene
-    e array.join(N)
-  end
-  # ========================================================================= #
-  # === fasta_file?
-  # ========================================================================= #
-  def fasta_file?(i = :fasta_file)
-    if @internal_hash[:fasta_file].has_key?(i)
-      @internal_hash[:fasta_file].fetch(i)
-    else
-      erev 'We could not find the key called `'+simp(i.to_s)+rev+'`.'
-    end
-  end
-  # ========================================================================= #
-  # === create_fasta_file
-  # ========================================================================= #
-  def create_fasta_file
-    set_save_file :default_fasta
-    e 'Now creating a new fasta file. Will store into `'+sfile(@save_file)+'`.'
-    _ = '>gi|12345|pir|TVHGG| some unknown protein'+N
-    _ << string?
-    save_file(_, @internal_hash[:save_file])
-  end
-  # ========================================================================= #
-  # === return_fasta_files_in_the_log_directory
-  # ========================================================================= #
-  def return_fasta_files_in_the_log_directory
-    Dir[::Bioroebe.log_dir?+'*.fa*']
-  end
-  # ========================================================================= #
-  # === use_this_fasta_file
-  #
-  # Use a fasta file based on its position.
-  #
-  # For instance, fasta file at position 2 will be the second fasta file
-  # kept in the main log directory.
-  # ========================================================================= #
-  def use_this_fasta_file(at_position = 1)
-    # ======================================================================= #
-    # We need to map the given position to the existing (local) file at hand.
-    # ======================================================================= #
-    this_fasta_file = return_fasta_files_in_the_log_directory[at_position.to_i - 1]
-    if this_fasta_file
-      assign_fasta(this_fasta_file)
-    else
-      erev 'Could not find any file at position '+simp(at_position.to_s)+rev+'.'
-      erev 'Use "'+steelblue('show_fasta_files')+rev+
-           '" to see which fasta files are available.'
-    end
-  end
-  # ========================================================================= #
-  # === show_my_fasta_file
-  # ========================================================================= #
-  def show_my_fasta_file
-    e HOME_DIRECTORY_OF_USER_X+
-      'data/science/BIOINFORMATIK/DATA/FASTA/tardigrada_fasta.ffn'
-  end
-end; end