bioroebe 0.10.80 → 0.12.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of bioroebe might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/README.md +3612 -2781
- data/bin/bioroebe +7 -1
- data/bin/bioroebe_hash +7 -0
- data/bin/codon_to_aminoacid +1 -0
- data/bioroebe.gemspec +3 -3
- data/doc/README.gen +3612 -2742
- data/doc/quality_control/commandline_applications.md +3 -3
- data/doc/todo/bioroebe_java_todo.md +22 -0
- data/doc/todo/bioroebe_todo.md +2059 -2615
- data/lib/bioroebe/aminoacids/aminoacid_substitution.rb +1 -9
- data/lib/bioroebe/aminoacids/codon_percentage.rb +1 -9
- data/lib/bioroebe/aminoacids/deduce_aminoacid_sequence.rb +1 -9
- data/lib/bioroebe/aminoacids/display_aminoacid_table.rb +1 -0
- data/lib/bioroebe/aminoacids/show_hydrophobicity.rb +1 -6
- data/lib/bioroebe/base/colours_for_base/colours_for_base.rb +18 -8
- data/lib/bioroebe/base/commandline_application/commandline_arguments.rb +15 -11
- data/lib/bioroebe/base/commandline_application/misc.rb +66 -49
- data/lib/bioroebe/base/commandline_application/opn.rb +8 -8
- data/lib/bioroebe/base/commandline_application/reset.rb +3 -2
- data/lib/bioroebe/base/misc.rb +35 -0
- data/lib/bioroebe/base/prototype/misc.rb +11 -1
- data/lib/bioroebe/codon_tables/frequencies/parse_frequency_table.rb +2 -10
- data/lib/bioroebe/codons/codons.rb +1 -1
- data/lib/bioroebe/codons/convert_this_codon_to_that_aminoacid.rb +192 -58
- data/lib/bioroebe/codons/possible_codons_for_this_aminoacid.rb +1 -9
- data/lib/bioroebe/codons/show_codon_tables.rb +6 -2
- data/lib/bioroebe/codons/show_codon_usage.rb +15 -4
- data/lib/bioroebe/colours/rev.rb +4 -1
- data/lib/bioroebe/constants/aminoacids_and_proteins.rb +1 -0
- data/lib/bioroebe/constants/database_constants.rb +1 -1
- data/lib/bioroebe/constants/files_and_directories.rb +31 -4
- data/lib/bioroebe/constants/misc.rb +20 -0
- data/lib/bioroebe/conversions/dna_to_aminoacid_sequence.rb +58 -24
- data/lib/bioroebe/count/count_amount_of_aminoacids.rb +3 -2
- data/lib/bioroebe/count/count_amount_of_nucleotides.rb +3 -0
- data/lib/bioroebe/crystal/README.md +2 -0
- data/lib/bioroebe/crystal/to_rna.cr +19 -0
- data/lib/bioroebe/data/README.md +11 -8
- data/lib/bioroebe/data/electron_microscopy/pos_example.pos +396 -0
- data/lib/bioroebe/data/electron_microscopy/test_particles.star +36 -0
- data/lib/bioroebe/data/fasta/human/Homo_sapiens_hemoglobin_subunit_alpha_HBB_mRNA.fasta +9 -0
- data/lib/bioroebe/data/fasta/human/Homo_sapiens_hemoglobin_subunit_beta_HBB_mRNA.fasta +8 -0
- data/lib/bioroebe/data/fasta/human/README.md +2 -0
- data/lib/bioroebe/electron_microscopy/coordinate_analyzer.rb +15 -18
- data/lib/bioroebe/{fasta_and_fastq/parse_fasta/run.rb → electron_microscopy/electron_microscopy_module.rb} +16 -8
- data/lib/bioroebe/electron_microscopy/fix_pos_file.rb +1 -9
- data/lib/bioroebe/electron_microscopy/flipy.rb +83 -0
- data/lib/bioroebe/electron_microscopy/parse_coordinates.rb +2 -10
- data/lib/bioroebe/electron_microscopy/read_file_xmd.rb +1 -9
- data/lib/bioroebe/electron_microscopy/simple_star_file_generator.rb +4 -9
- data/lib/bioroebe/enzymes/has_this_restriction_enzyme.rb +10 -3
- data/lib/bioroebe/enzymes/restriction_enzyme.rb +23 -1
- data/lib/bioroebe/enzymes/restriction_enzymes/statistics.rb +65 -0
- data/lib/bioroebe/fasta_and_fastq/autocorrect_the_name_of_this_fasta_file.rb +1 -9
- data/lib/bioroebe/fasta_and_fastq/compact_fasta_file/compact_fasta_file.rb +7 -9
- data/lib/bioroebe/fasta_and_fastq/fasta_defline/fasta_defline.rb +1 -5
- data/lib/bioroebe/fasta_and_fastq/fasta_to_yaml/fasta_to_yaml.rb +81 -0
- data/lib/bioroebe/fasta_and_fastq/parse_fasta/parse_fasta.rb +1465 -7
- data/lib/bioroebe/fasta_and_fastq/return_fasta_subsection_of_this_file.rb +11 -2
- data/lib/bioroebe/fasta_and_fastq/show_fasta_headers.rb +27 -12
- data/lib/bioroebe/fasta_and_fastq/simplify_fasta_header/simplify_fasta_header.rb +1 -5
- data/lib/bioroebe/fasta_and_fastq/split_this_fasta_file_into_chromosomes/constants.rb +0 -5
- data/lib/bioroebe/genome/README.md +4 -0
- data/lib/bioroebe/genome/genome.rb +67 -0
- data/lib/bioroebe/genomes/genome_pattern.rb +3 -9
- data/lib/bioroebe/gui/gtk +1 -0
- data/lib/bioroebe/gui/gtk3/alignment/alignment.rb +73 -128
- data/lib/bioroebe/gui/gtk3/controller/controller.rb +45 -27
- data/lib/bioroebe/gui/gtk3/dna_to_aminoacid_widget/dna_to_aminoacid_widget.rb +76 -50
- data/lib/bioroebe/gui/gtk3/fasta_table_widget/fasta_table_widget.rb +99 -21
- data/lib/bioroebe/gui/gtk3/hamming_distance/hamming_distance.rb +42 -28
- data/lib/bioroebe/gui/gtk3/nucleotide_analyser/nucleotide_analyser.rb +119 -71
- data/lib/bioroebe/gui/gtk3/protein_to_DNA/protein_to_DNA.rb +18 -18
- data/lib/bioroebe/gui/gtk3/random_sequence/random_sequence.rb +19 -11
- data/lib/bioroebe/gui/jruby/alignment/alignment.rb +165 -0
- data/lib/bioroebe/gui/libui/alignment/alignment.rb +3 -1
- data/lib/bioroebe/gui/libui/controller/controller.rb +116 -0
- data/lib/bioroebe/gui/libui/random_sequence/random_sequence.rb +18 -2
- data/lib/bioroebe/gui/libui/show_codon_table/show_codon_table.rb +2 -0
- data/lib/bioroebe/gui/libui/three_to_one/three_to_one.rb +8 -6
- data/lib/bioroebe/gui/shared_code/alignment/alignment_module.rb +102 -0
- data/lib/bioroebe/gui/shared_code/levensthein_distance/levensthein_distance_module.rb +18 -16
- data/lib/bioroebe/gui/shared_code/protein_to_DNA/protein_to_DNA_module.rb +14 -14
- data/lib/bioroebe/gui/swing/three_to_one/ThreeToOne$1.class +0 -0
- data/lib/bioroebe/gui/swing/three_to_one/ThreeToOne$CloseListener.class +0 -0
- data/lib/bioroebe/gui/swing/three_to_one/ThreeToOne.class +0 -0
- data/lib/bioroebe/gui/swing/three_to_one/ThreeToOne.java +141 -0
- data/lib/bioroebe/images/FORWARD_PRIMER.png +0 -0
- data/lib/bioroebe/images/REVERSE_PRIMER.png +0 -0
- data/lib/bioroebe/java/README.md +4 -0
- data/lib/bioroebe/java/bioroebe/Sequence.java +25 -1
- data/lib/bioroebe/java/bioroebe/src/main/java/bioroebe/Base.class +0 -0
- data/lib/bioroebe/java/bioroebe/{Base.java → src/main/java/bioroebe/Base.java} +15 -2
- data/lib/bioroebe/java/bioroebe/{BisulfiteTreatment.class → src/main/java/bioroebe/BisulfiteTreatment.class} +0 -0
- data/lib/bioroebe/java/bioroebe/{Codons.class → src/main/java/bioroebe/Codons.class} +0 -0
- data/lib/bioroebe/java/bioroebe/src/main/java/bioroebe/Codons.java +34 -0
- data/lib/bioroebe/java/bioroebe/src/main/java/bioroebe/Commandline.class +0 -0
- data/lib/bioroebe/java/bioroebe/src/main/java/bioroebe/Commandline.java +101 -0
- data/lib/bioroebe/java/bioroebe/{GenerateRandomDnaSequence.class → src/main/java/bioroebe/GenerateRandomDnaSequence.class} +0 -0
- data/lib/bioroebe/java/bioroebe/{GenerateRandomDnaSequence.java → src/main/java/bioroebe/GenerateRandomDnaSequence.java} +8 -2
- data/lib/bioroebe/java/bioroebe/{IsPalindrome.class → src/main/java/bioroebe/IsPalindrome.class} +0 -0
- data/lib/bioroebe/java/bioroebe/{IsPalindrome.java → src/main/java/bioroebe/IsPalindrome.java} +5 -1
- data/lib/bioroebe/java/bioroebe/src/main/java/bioroebe/PartnerNucleotide.class +0 -0
- data/lib/bioroebe/java/bioroebe/src/main/java/bioroebe/PartnerNucleotide.java +56 -0
- data/lib/bioroebe/java/bioroebe/{RemoveFile.java → src/main/java/bioroebe/RemoveFile.java} +10 -4
- data/lib/bioroebe/java/bioroebe/{RemoveNumbers.class → src/main/java/bioroebe/RemoveNumbers.class} +0 -0
- data/lib/bioroebe/java/bioroebe/{RemoveNumbers.java → src/main/java/bioroebe/RemoveNumbers.java} +1 -0
- data/lib/bioroebe/java/bioroebe/{ToCamelcase.class → src/main/java/bioroebe/ToCamelcase.class} +0 -0
- data/lib/bioroebe/java/bioroebe/{ToCamelcase.java → src/main/java/bioroebe/ToCamelcase.java} +3 -3
- data/lib/bioroebe/java/bioroebe/src/main/java/bioroebe/ToRNA.class +0 -0
- data/lib/bioroebe/java/bioroebe/src/main/java/bioroebe/ToRNA.java +42 -0
- data/lib/bioroebe/java/bioroebe/src/main/java/bioroebe/toplevel_methods/BaseComposition.class +0 -0
- data/lib/bioroebe/java/bioroebe/src/main/java/bioroebe/toplevel_methods/BaseComposition.java +75 -0
- data/lib/bioroebe/misc/ruler.rb +11 -2
- data/lib/bioroebe/nucleotides/most_likely_nucleotide_sequence_for_this_aminoacid_sequence.rb +1 -9
- data/lib/bioroebe/nucleotides/show_nucleotide_sequence.rb +7 -7
- data/lib/bioroebe/parsers/genbank_parser.rb +347 -26
- data/lib/bioroebe/parsers/gff.rb +1 -9
- data/lib/bioroebe/patterns/scan_for_repeat.rb +1 -5
- data/lib/bioroebe/pdb/fetch_fasta_sequence_from_pdb.rb +1 -9
- data/lib/bioroebe/pdb/parse_mmCIF_file.rb +1 -9
- data/lib/bioroebe/pdb/parse_pdb_file.rb +4 -10
- data/lib/bioroebe/project/project.rb +1 -1
- data/lib/bioroebe/python/README.md +1 -0
- data/lib/bioroebe/python/__pycache__/mymodule.cpython-39.pyc +0 -0
- data/lib/bioroebe/python/gui/gtk3/all_in_one.css +4 -0
- data/lib/bioroebe/python/gui/gtk3/all_in_one.py +59 -0
- data/lib/bioroebe/python/gui/gtk3/widget1.py +20 -0
- data/lib/bioroebe/python/gui/tkinter/all_in_one.py +91 -0
- data/lib/bioroebe/python/mymodule.py +8 -0
- data/lib/bioroebe/python/protein_to_dna.py +33 -0
- data/lib/bioroebe/python/shell/shell.py +19 -0
- data/lib/bioroebe/python/to_rna.py +14 -0
- data/lib/bioroebe/python/toplevel_methods/esystem.py +12 -0
- data/lib/bioroebe/python/toplevel_methods/open_in_browser.py +20 -0
- data/lib/bioroebe/python/toplevel_methods/palindromes.py +42 -0
- data/lib/bioroebe/python/toplevel_methods/rds.py +13 -0
- data/lib/bioroebe/python/toplevel_methods/shuffleseq.py +23 -0
- data/lib/bioroebe/python/toplevel_methods/three_delimiter.py +37 -0
- data/lib/bioroebe/python/toplevel_methods/time_and_date.py +43 -0
- data/lib/bioroebe/python/toplevel_methods/to_camelcase.py +21 -0
- data/lib/bioroebe/requires/require_the_bioroebe_project.rb +3 -1
- data/lib/bioroebe/sequence/alignment.rb +14 -4
- data/lib/bioroebe/sequence/dna.rb +1 -0
- data/lib/bioroebe/sequence/nucleotide_module/nucleotide_module.rb +28 -25
- data/lib/bioroebe/sequence/protein.rb +105 -3
- data/lib/bioroebe/sequence/sequence.rb +87 -21
- data/lib/bioroebe/shell/menu.rb +3829 -3714
- data/lib/bioroebe/shell/misc.rb +59 -4307
- data/lib/bioroebe/shell/readline/readline.rb +1 -1
- data/lib/bioroebe/shell/shell.rb +11255 -28
- data/lib/bioroebe/siRNA/siRNA.rb +81 -1
- data/lib/bioroebe/string_matching/find_longest_substring.rb +3 -2
- data/lib/bioroebe/string_matching/hamming_distance.rb +1 -9
- data/lib/bioroebe/taxonomy/class_methods.rb +3 -8
- data/lib/bioroebe/taxonomy/constants.rb +4 -3
- data/lib/bioroebe/taxonomy/edit.rb +2 -1
- data/lib/bioroebe/taxonomy/help/help.rb +10 -10
- data/lib/bioroebe/taxonomy/help/helpline.rb +2 -2
- data/lib/bioroebe/taxonomy/info/check_available.rb +15 -9
- data/lib/bioroebe/taxonomy/info/info.rb +18 -11
- data/lib/bioroebe/taxonomy/info/is_dna.rb +46 -36
- data/lib/bioroebe/taxonomy/interactive.rb +140 -104
- data/lib/bioroebe/taxonomy/menu.rb +27 -18
- data/lib/bioroebe/taxonomy/parse_fasta.rb +3 -1
- data/lib/bioroebe/taxonomy/shared.rb +1 -0
- data/lib/bioroebe/taxonomy/taxonomy.rb +1 -0
- data/lib/bioroebe/toplevel_methods/aminoacids_and_proteins.rb +31 -24
- data/lib/bioroebe/toplevel_methods/colourize_related_methods.rb +164 -0
- data/lib/bioroebe/toplevel_methods/databases.rb +1 -1
- data/lib/bioroebe/toplevel_methods/digest.rb +18 -8
- data/lib/bioroebe/toplevel_methods/fasta_and_fastq.rb +107 -63
- data/lib/bioroebe/toplevel_methods/file_and_directory_related_actions.rb +14 -2
- data/lib/bioroebe/toplevel_methods/frequencies.rb +8 -1
- data/lib/bioroebe/toplevel_methods/misc.rb +142 -12
- data/lib/bioroebe/toplevel_methods/nucleotides.rb +118 -46
- data/lib/bioroebe/toplevel_methods/open_in_browser.rb +2 -0
- data/lib/bioroebe/toplevel_methods/palindromes.rb +1 -2
- data/lib/bioroebe/toplevel_methods/taxonomy.rb +2 -2
- data/lib/bioroebe/toplevel_methods/to_camelcase.rb +5 -0
- data/lib/bioroebe/utility_scripts/align_open_reading_frames.rb +1 -9
- data/lib/bioroebe/utility_scripts/check_for_mismatches/check_for_mismatches.rb +1 -9
- data/lib/bioroebe/utility_scripts/compacter.rb +1 -9
- data/lib/bioroebe/utility_scripts/compseq/compseq.rb +1 -9
- data/lib/bioroebe/utility_scripts/consensus_sequence.rb +6 -6
- data/lib/bioroebe/utility_scripts/create_batch_entrez_file.rb +1 -9
- data/lib/bioroebe/utility_scripts/dot_alignment.rb +1 -9
- data/lib/bioroebe/utility_scripts/move_file_to_its_correct_location.rb +1 -4
- data/lib/bioroebe/utility_scripts/parse_taxonomy.rb +2 -2
- data/lib/bioroebe/utility_scripts/showorf/constants.rb +0 -5
- data/lib/bioroebe/utility_scripts/showorf/reset.rb +1 -4
- data/lib/bioroebe/version/version.rb +2 -2
- data/lib/bioroebe/www/embeddable_interface.rb +103 -54
- data/lib/bioroebe/www/sinatra/sinatra.rb +186 -70
- data/lib/bioroebe/yaml/aminoacids/amino_acids_long_name_to_one_letter.yml +2 -2
- data/lib/bioroebe/yaml/configuration/browser.yml +1 -1
- data/lib/bioroebe/yaml/configuration/temp_dir.yml +1 -1
- data/lib/bioroebe/yaml/genomes/README.md +3 -4
- data/lib/bioroebe/yaml/restriction_enzymes/restriction_enzymes.yml +27 -27
- metadata +81 -64
- data/doc/setup.rb +0 -1655
- data/lib/bioroebe/fasta_and_fastq/parse_fasta/constants.rb +0 -50
- data/lib/bioroebe/fasta_and_fastq/parse_fasta/initialize.rb +0 -86
- data/lib/bioroebe/fasta_and_fastq/parse_fasta/menu.rb +0 -117
- data/lib/bioroebe/fasta_and_fastq/parse_fasta/misc.rb +0 -981
- data/lib/bioroebe/fasta_and_fastq/parse_fasta/report.rb +0 -156
- data/lib/bioroebe/fasta_and_fastq/parse_fasta/reset.rb +0 -128
- data/lib/bioroebe/genbank/genbank_parser.rb +0 -291
- data/lib/bioroebe/java/bioroebe/Base.class +0 -0
- data/lib/bioroebe/java/bioroebe/Codons.java +0 -22
- data/lib/bioroebe/java/bioroebe/PartnerNucleotide.class +0 -0
- data/lib/bioroebe/java/bioroebe/PartnerNucleotide.java +0 -19
- data/lib/bioroebe/java/bioroebe.jar +0 -0
- data/lib/bioroebe/shell/add.rb +0 -108
- data/lib/bioroebe/shell/assign.rb +0 -360
- data/lib/bioroebe/shell/chop_and_cut.rb +0 -281
- data/lib/bioroebe/shell/constants.rb +0 -166
- data/lib/bioroebe/shell/download.rb +0 -335
- data/lib/bioroebe/shell/enable_and_disable.rb +0 -158
- data/lib/bioroebe/shell/enzymes.rb +0 -310
- data/lib/bioroebe/shell/fasta.rb +0 -345
- data/lib/bioroebe/shell/gtk.rb +0 -76
- data/lib/bioroebe/shell/history.rb +0 -132
- data/lib/bioroebe/shell/initialize.rb +0 -217
- data/lib/bioroebe/shell/loop.rb +0 -74
- data/lib/bioroebe/shell/prompt.rb +0 -107
- data/lib/bioroebe/shell/random.rb +0 -289
- data/lib/bioroebe/shell/reset.rb +0 -335
- data/lib/bioroebe/shell/scan_and_parse.rb +0 -135
- data/lib/bioroebe/shell/search.rb +0 -337
- data/lib/bioroebe/shell/sequences.rb +0 -200
- data/lib/bioroebe/shell/show_report_and_display.rb +0 -2901
- data/lib/bioroebe/shell/startup.rb +0 -127
- data/lib/bioroebe/shell/taxonomy.rb +0 -14
- data/lib/bioroebe/shell/tk.rb +0 -23
- data/lib/bioroebe/shell/user_input.rb +0 -88
- data/lib/bioroebe/shell/xorg.rb +0 -45
- /data/lib/bioroebe/java/bioroebe/{BisulfiteTreatment.java → src/main/java/bioroebe/BisulfiteTreatment.java} +0 -0
- /data/lib/bioroebe/java/bioroebe/{Esystem.class → src/main/java/bioroebe/Esystem.class} +0 -0
- /data/lib/bioroebe/java/bioroebe/{Esystem.java → src/main/java/bioroebe/Esystem.java} +0 -0
- /data/lib/bioroebe/java/bioroebe/{RemoveFile.class → src/main/java/bioroebe/RemoveFile.class} +0 -0
data/lib/bioroebe/siRNA/siRNA.rb
CHANGED
@@ -16,6 +16,16 @@
|
|
16
16
|
# of the antisense strand
|
17
17
|
# (4) the absence of any GC stretch of more than 9 nt in length
|
18
18
|
#
|
19
|
+
# Reference: Kumiko Ui-Tei et al. "Guidelines for the selection of highly
|
20
|
+
# effective siRNA sequences for mammalian and chick RNA interference."
|
21
|
+
#
|
22
|
+
# Nucleic Acids Res. 2004 32: 936-948.
|
23
|
+
#
|
24
|
+
# The other paper is from:
|
25
|
+
#
|
26
|
+
# Angela Reynolds et al. Rational siRNA design for RNA interference.
|
27
|
+
# Nat. Biotechnol. 2004 22: 326-330.
|
28
|
+
#
|
19
29
|
# =========================================================================== #
|
20
30
|
# require 'bioroebe/siRNA/siRNA.rb'
|
21
31
|
# =========================================================================== #
|
@@ -40,7 +50,8 @@ class SiRNA # === Bioroebe::SiRNA
|
|
40
50
|
# === uitei?
|
41
51
|
# ========================================================================= #
|
42
52
|
def uitei?(
|
43
|
-
i
|
53
|
+
i = @sequence,
|
54
|
+
be_verbose = true
|
44
55
|
)
|
45
56
|
unless i.size == 23 # 21 nt target + 2 nt overhang
|
46
57
|
if be_verbose
|
@@ -84,10 +95,79 @@ class SiRNA # === Bioroebe::SiRNA
|
|
84
95
|
return true # This is then the new default return value here.
|
85
96
|
end; alias uitei_rule? uitei? # === uitei_rule?
|
86
97
|
|
98
|
+
# ========================================================================= #
|
99
|
+
# === reynolds?
|
100
|
+
#
|
101
|
+
# This method implements the reynolds' rule.
|
102
|
+
#
|
103
|
+
# The Reynolds' rule does not require to fulfill all the
|
104
|
+
# criteria simultaneously.
|
105
|
+
#
|
106
|
+
# See: https://www.nature.com/articles/nbt936
|
107
|
+
# ========================================================================= #
|
108
|
+
def reynolds?(
|
109
|
+
i = @sequence,
|
110
|
+
be_verbose = true
|
111
|
+
)
|
112
|
+
unless i.size == 23 # 21 nt target + 2 nt overhang
|
113
|
+
if be_verbose
|
114
|
+
puts 'The size should be 23; it is '+i.to_s.size.to_s+'.'
|
115
|
+
end
|
116
|
+
return false
|
117
|
+
end
|
118
|
+
score = 0
|
119
|
+
seq19 = i[2 .. 20] # 19 nt double-stranded region of siRNA
|
120
|
+
complement_to_seq19 = ::Bioroebe.reverse_complement(seq19) # This is actually the reverse complement.
|
121
|
+
# ======================================================================= #
|
122
|
+
# === criterium 1
|
123
|
+
# ======================================================================= #
|
124
|
+
gc_number = seq19.scan(/[GC]/i).size
|
125
|
+
if (7 <= gc_number and gc_number <= 10)
|
126
|
+
score += 1
|
127
|
+
end
|
128
|
+
# ======================================================================= #
|
129
|
+
# === criterium 2
|
130
|
+
# ======================================================================= #
|
131
|
+
au_number = seq19[14..18].scan(/[AU]/i).size
|
132
|
+
score += au_number
|
133
|
+
# ======================================================================= #
|
134
|
+
# === criterium 3
|
135
|
+
#
|
136
|
+
# This is not yet implemented: Tm
|
137
|
+
# ======================================================================= #
|
138
|
+
# ======================================================================= #
|
139
|
+
# === criterium 4
|
140
|
+
# ======================================================================= #
|
141
|
+
score += 1 if seq19[18..18].match(/A/i)
|
142
|
+
# ======================================================================= #
|
143
|
+
# === criterium 5
|
144
|
+
# ======================================================================= #
|
145
|
+
score += 1 if seq19[2..2].match(/A/i)
|
146
|
+
# ======================================================================= #
|
147
|
+
# === criterium 6
|
148
|
+
# ======================================================================= #
|
149
|
+
score += 1 if seq19[9..9].match(/[U]/i)
|
150
|
+
# ======================================================================= #
|
151
|
+
# === criterium 7
|
152
|
+
# ======================================================================= #
|
153
|
+
score -= 1 if seq19[18..18].match(/[GC]/i)
|
154
|
+
# ======================================================================= #
|
155
|
+
# === criterium V8
|
156
|
+
# ======================================================================= #
|
157
|
+
score -= 1 if seq19[12..12].match(/G/i)
|
158
|
+
if score >= 6
|
159
|
+
return score
|
160
|
+
else
|
161
|
+
return false
|
162
|
+
end
|
163
|
+
end; alias reynolds_rule? reynolds? # === reynolds_rule?
|
164
|
+
|
87
165
|
end; end
|
88
166
|
|
89
167
|
if __FILE__ == $PROGRAM_NAME
|
90
168
|
alias e puts
|
91
169
|
e Bioroebe::SiRNA.new('GAGAAAATCATGCATGCATTTAT').uitei_rule?
|
92
170
|
e Bioroebe::SiRNA.new('GAAAAAAAAATGCATGCAAAAAA').uitei_rule?
|
171
|
+
e Bioroebe::SiRNA.new('GAGAAAATCATGCATGCATTTAT').reynolds_rule?
|
172
|
+
e Bioroebe::SiRNA.new('GAAAAAAAAATGCATGCAAAAAA').reynolds_rule?
|
93
173
|
end # siRNA.rb
|
@@ -109,7 +109,8 @@ class FindLongestSubstring < ::Bioroebe::CommandlineApplication # === Bioroebe::
|
|
109
109
|
# === record
|
110
110
|
# ========================================================================= #
|
111
111
|
def record(
|
112
|
-
i
|
112
|
+
i = @_,
|
113
|
+
start_position = nil
|
113
114
|
)
|
114
115
|
if i.size > @longest_substring.size
|
115
116
|
@longest_substring = i
|
@@ -148,7 +149,7 @@ class FindLongestSubstring < ::Bioroebe::CommandlineApplication # === Bioroebe::
|
|
148
149
|
@_ << entry
|
149
150
|
record(@_, index)
|
150
151
|
else
|
151
|
-
@_ = ''
|
152
|
+
@_ = ''.dup
|
152
153
|
end
|
153
154
|
}
|
154
155
|
report_longest_substring
|
@@ -20,11 +20,6 @@ module Bioroebe
|
|
20
20
|
|
21
21
|
class HammingDistance < ::Bioroebe::CommandlineApplication
|
22
22
|
|
23
|
-
# ========================================================================= #
|
24
|
-
# === NAMESPACE
|
25
|
-
# ========================================================================= #
|
26
|
-
NAMESPACE = inspect
|
27
|
-
|
28
23
|
# ========================================================================= #
|
29
24
|
# === SHOW_SPACER
|
30
25
|
# ========================================================================= #
|
@@ -57,10 +52,7 @@ class HammingDistance < ::Bioroebe::CommandlineApplication
|
|
57
52
|
# ========================================================================= #
|
58
53
|
def reset
|
59
54
|
super()
|
60
|
-
|
61
|
-
# === @namespace
|
62
|
-
# ======================================================================= #
|
63
|
-
@namespace = NAMESPACE
|
55
|
+
infer_the_namespace
|
64
56
|
# ======================================================================= #
|
65
57
|
# === @input
|
66
58
|
# ======================================================================= #
|
@@ -10,6 +10,7 @@ module Taxonomy # === Bioroebe::Taxonomy
|
|
10
10
|
|
11
11
|
require 'bioroebe/taxonomy/shared.rb'
|
12
12
|
require 'bioroebe/colours/rev.rb'
|
13
|
+
require 'bioroebe/taxonomy/constants.rb'
|
13
14
|
|
14
15
|
require 'bioroebe/toplevel_methods/extract.rb'
|
15
16
|
require 'bioroebe/toplevel_methods/time_and_date.rb'
|
@@ -17,10 +18,11 @@ module Taxonomy # === Bioroebe::Taxonomy
|
|
17
18
|
require 'bioroebe/toplevel_methods/file_and_directory_related_actions.rb'
|
18
19
|
|
19
20
|
extend Colours::E
|
21
|
+
extend Colours
|
20
22
|
extend Taxonomy::Shared
|
21
23
|
|
22
24
|
# ========================================================================= #
|
23
|
-
# === status
|
25
|
+
# === Bioroebe::Taxonomy.status
|
24
26
|
#
|
25
27
|
# Invoke this method like that:
|
26
28
|
#
|
@@ -128,13 +130,6 @@ module Taxonomy # === Bioroebe::Taxonomy
|
|
128
130
|
Taxonomy.base_dir?
|
129
131
|
end; alias base_dir? project_base_dir? # === base_dir?
|
130
132
|
|
131
|
-
# ========================================================================= #
|
132
|
-
# === Taxonomy.base_dir?
|
133
|
-
# ========================================================================= #
|
134
|
-
def self.base_dir?
|
135
|
-
PROJECT_BASE_DIR2
|
136
|
-
end
|
137
|
-
|
138
133
|
# ========================================================================= #
|
139
134
|
# === Taxonomy.be_verbose?
|
140
135
|
# ========================================================================= #
|
@@ -2,6 +2,8 @@
|
|
2
2
|
# Encoding: UTF-8
|
3
3
|
# frozen_string_literal: true
|
4
4
|
# =========================================================================== #
|
5
|
+
# include Bioroebe::Taxonomy::Constants
|
6
|
+
# =========================================================================== #
|
5
7
|
module Bioroebe
|
6
8
|
|
7
9
|
module Taxonomy
|
@@ -133,8 +135,6 @@ module Constants # === Bioroebe::Taxonomy::Constants
|
|
133
135
|
# === LAST_INTERACTIVE_COMMAND
|
134
136
|
#
|
135
137
|
# The following line must come after Shared was included.
|
136
|
-
#
|
137
|
-
# In order for this to work, PROJECT_BASE_DIR2 must exist.
|
138
138
|
# ========================================================================= #
|
139
139
|
LAST_INTERACTIVE_COMMAND = TEMP_DIR+'LAST_INTERACTIVE_COMMAND.md'
|
140
140
|
|
@@ -165,7 +165,8 @@ module Constants # === Bioroebe::Taxonomy::Constants
|
|
165
165
|
# ========================================================================= #
|
166
166
|
# === URL1
|
167
167
|
# ========================================================================= #
|
168
|
-
URL1 = NCBI_CGI_SCRIPT+
|
168
|
+
URL1 = NCBI_CGI_SCRIPT+
|
169
|
+
'mode=Undef&name=Arabidopsis+thaliana&lvl=0&srchmode=1&keep=1&unlock'
|
169
170
|
|
170
171
|
# ========================================================================= #
|
171
172
|
# == Taxonomy entries
|
@@ -54,7 +54,8 @@ module Taxonomy # === Bioroebe::Taxonomy
|
|
54
54
|
# ======================================================================= #
|
55
55
|
# === login
|
56
56
|
# ======================================================================= #
|
57
|
-
when 'login',
|
57
|
+
when 'login',
|
58
|
+
'main'
|
58
59
|
edit_login_file
|
59
60
|
# ======================================================================= #
|
60
61
|
# === instructions
|
@@ -36,19 +36,19 @@ module Taxonomy
|
|
36
36
|
Helpline[:nocolours,'# Use this to disable the '+
|
37
37
|
'colours. (Use "yescolours" to enable them again)']
|
38
38
|
end
|
39
|
-
Helpline[:taxid,'# Find out the name of the organism through
|
40
|
-
'input ID from the NCBI dataset. For example: "taxid 33"']
|
41
|
-
Helpline[:table_names?,'# Use this to show the SQL command '
|
39
|
+
Helpline[:taxid,'# Find out the name of the organism through '\
|
40
|
+
'the input ID from the NCBI dataset. For example: "taxid 33"']
|
41
|
+
Helpline[:table_names?,'# Use this to show the SQL command '\
|
42
42
|
'that was used to generate the SQL Tables.']
|
43
|
-
Helpline[:verify,'# Use this to verify that the '
|
43
|
+
Helpline[:verify,'# Use this to verify that the '\
|
44
44
|
'.sql files (nodes and names) are valid.']
|
45
|
-
Helpline[:verbose,'# be verbose, in other words provide '
|
45
|
+
Helpline[:verbose,'# be verbose, in other words provide '\
|
46
46
|
'extra information to us whenever feasible']
|
47
|
-
Helpline[:ll, '# Show the content of the current working '
|
47
|
+
Helpline[:ll, '# Show the content of the current working '\
|
48
48
|
'directory.']
|
49
|
-
Helpline[:id,'# Query the postgre database to get the ID of '
|
49
|
+
Helpline[:id,'# Query the postgre database to get the ID of '\
|
50
50
|
'a given species.']
|
51
|
-
Helpline[:download,'# Download the remote NCBI database '
|
51
|
+
Helpline[:download,'# Download the remote NCBI database '\
|
52
52
|
'(at '+simp(URL_TO_TAXONOMY_ARCHIVE)+')']
|
53
53
|
if SHALL_WE_LOG_LAST_UPDATE
|
54
54
|
Helpline[:last_update?,'# When did we last update the database']
|
@@ -56,8 +56,8 @@ module Taxonomy
|
|
56
56
|
Helpline[:update_database, '# download the remote NCBI database, '+
|
57
57
|
'extract it, generate nodes.sql and names.sql,']
|
58
58
|
_ = ' ' * Helpline::LJUST
|
59
|
-
e _+Helpline::PADDING+' # and then populate the
|
60
|
-
'with this information'
|
59
|
+
e _+Helpline::PADDING+' # and then populate the '\
|
60
|
+
'postgresql-database with this information'
|
61
61
|
e # This here to keep a trailing newline.
|
62
62
|
}
|
63
63
|
end
|
@@ -14,8 +14,8 @@ class Helpline # === Bioroebe::Taxonomy::Helpline
|
|
14
14
|
|
15
15
|
begin
|
16
16
|
require 'colours'
|
17
|
-
include Colours
|
18
|
-
extend Colours::E
|
17
|
+
include ::Colours
|
18
|
+
extend ::Colours::E
|
19
19
|
rescue LoadError; end
|
20
20
|
|
21
21
|
# ========================================================================= #
|
@@ -10,7 +10,7 @@ module Bioroebe
|
|
10
10
|
|
11
11
|
module Taxonomy
|
12
12
|
|
13
|
-
class CheckAvailable # Taxonomy::CheckAvailable
|
13
|
+
class CheckAvailable # === Bioroebe::Taxonomy::CheckAvailable
|
14
14
|
|
15
15
|
require 'bioroebe/taxonomy/shared.rb'
|
16
16
|
|
@@ -49,11 +49,17 @@ class CheckAvailable # Taxonomy::CheckAvailable
|
|
49
49
|
# === reset
|
50
50
|
# ========================================================================= #
|
51
51
|
def reset
|
52
|
+
# ======================================================================= #
|
52
53
|
# === @shall_we_copy
|
54
|
+
# ======================================================================= #
|
53
55
|
@shall_we_copy = SHALL_WE_COPY
|
56
|
+
# ======================================================================= #
|
54
57
|
# === @array_duplicates
|
58
|
+
# ======================================================================= #
|
55
59
|
@array_duplicates = []
|
60
|
+
# ======================================================================= #
|
56
61
|
# === @array_no_match
|
62
|
+
# ======================================================================= #
|
57
63
|
@array_no_match = []
|
58
64
|
end
|
59
65
|
|
@@ -63,9 +69,9 @@ class CheckAvailable # Taxonomy::CheckAvailable
|
|
63
69
|
def show_important_directories
|
64
70
|
e 'We will now run through some important local directories.'
|
65
71
|
e
|
66
|
-
e
|
67
|
-
e
|
68
|
-
e
|
72
|
+
e " #{sfancy(INFO_DIR)}"
|
73
|
+
e " #{sfancy(AA_DIR)}"
|
74
|
+
e " #{sfancy(LOCALOME_DIR)}"
|
69
75
|
e
|
70
76
|
end
|
71
77
|
|
@@ -73,8 +79,8 @@ class CheckAvailable # Taxonomy::CheckAvailable
|
|
73
79
|
# === scan_info_directory
|
74
80
|
# ========================================================================= #
|
75
81
|
def scan_info_directory
|
76
|
-
Dir[INFO_DIR
|
77
|
-
e
|
82
|
+
Dir["#{INFO_DIR}*.INFO"].each {|entry|
|
83
|
+
e " → #{sfile(entry)} (#{sfancy(File.basename(entry))})"
|
78
84
|
show_similar_entries_in_aa_dir(entry)
|
79
85
|
}
|
80
86
|
end
|
@@ -89,7 +95,7 @@ class CheckAvailable # Taxonomy::CheckAvailable
|
|
89
95
|
query_string = AA_DIR+_+'*'
|
90
96
|
result = Dir[query_string]
|
91
97
|
unless result.empty?
|
92
|
-
e
|
98
|
+
e "The corresponding match should be #{sfile(result.first.to_s)}"
|
93
99
|
if result.size > 1
|
94
100
|
e red('!!!')+' At least one more entry was '+
|
95
101
|
'found though, at '+sfile(result[1])
|
@@ -98,7 +104,7 @@ class CheckAvailable # Taxonomy::CheckAvailable
|
|
98
104
|
if @shall_we_copy
|
99
105
|
e 'We will now copy these two entries.'
|
100
106
|
copy(original_input, TEST_DIR)
|
101
|
-
copy(result.first,
|
107
|
+
copy(result.first, TEST_DIR)
|
102
108
|
end
|
103
109
|
end
|
104
110
|
else
|
@@ -140,4 +146,4 @@ end; end; end
|
|
140
146
|
|
141
147
|
if __FILE__ == $PROGRAM_NAME
|
142
148
|
Bioroebe::Taxonomy::CheckAvailable.new(ARGV)
|
143
|
-
end #
|
149
|
+
end # taxonomycheckavailable
|
@@ -14,7 +14,7 @@
|
|
14
14
|
# If however had an .INFO file is found in localomes, we will instead
|
15
15
|
# assume that a fasta file will also be nearby.
|
16
16
|
# =========================================================================== #
|
17
|
-
# require 'taxonomy/info.rb'
|
17
|
+
# require 'bioroebe/taxonomy/info.rb'
|
18
18
|
# =========================================================================== #
|
19
19
|
require 'bioroebe/base/commandline_application/commandline_application.rb'
|
20
20
|
|
@@ -27,10 +27,7 @@ class Info < ::Bioroebe::CommandlineApplication
|
|
27
27
|
require 'bioroebe/taxonomy/constants.rb'
|
28
28
|
require 'bioroebe/taxonomy/shared.rb'
|
29
29
|
|
30
|
-
|
31
|
-
# === NAMESPACE
|
32
|
-
# ========================================================================= #
|
33
|
-
NAMESPACE = inspect
|
30
|
+
include Bioroebe::Taxonomy::Constants
|
34
31
|
|
35
32
|
# ========================================================================= #
|
36
33
|
# === DEFAULT_TARGET
|
@@ -40,7 +37,6 @@ class Info < ::Bioroebe::CommandlineApplication
|
|
40
37
|
|
41
38
|
attr_accessor :location
|
42
39
|
alias location? location
|
43
|
-
attr_accessor :taxonomy_id
|
44
40
|
|
45
41
|
# ========================================================================= #
|
46
42
|
# === initialize
|
@@ -87,14 +83,11 @@ class Info < ::Bioroebe::CommandlineApplication
|
|
87
83
|
# ========================================================================= #
|
88
84
|
def reset
|
89
85
|
super()
|
90
|
-
|
91
|
-
# === @namespace
|
92
|
-
# ======================================================================= #
|
93
|
-
@namespace = NAMESPACE
|
86
|
+
infer_the_namespace
|
94
87
|
# ======================================================================= #
|
95
88
|
# === @be_verbose
|
96
89
|
# ======================================================================= #
|
97
|
-
|
90
|
+
set_be_verbose
|
98
91
|
# ======================================================================= #
|
99
92
|
# === @location
|
100
93
|
# ======================================================================= #
|
@@ -320,6 +313,20 @@ class Info < ::Bioroebe::CommandlineApplication
|
|
320
313
|
@data
|
321
314
|
end
|
322
315
|
|
316
|
+
# ========================================================================= #
|
317
|
+
# === set_taxonomy_id
|
318
|
+
# ========================================================================= #
|
319
|
+
def set_taxonomy_id(i)
|
320
|
+
@taxonomy_id = i
|
321
|
+
end; alias taxonomy_id= set_taxonomy_id # === taxonomy_id=
|
322
|
+
|
323
|
+
# ========================================================================= #
|
324
|
+
# === taxonomy_id?
|
325
|
+
# ========================================================================= #
|
326
|
+
def taxonomy_id?
|
327
|
+
@taxonomy_id
|
328
|
+
end; alias taxonomy_id taxonomy_id? # === taxonomy
|
329
|
+
|
323
330
|
# ========================================================================= #
|
324
331
|
# === run
|
325
332
|
#
|
@@ -28,7 +28,6 @@ class IsDNA # === Bioroebe::Taxonomy::IsDNA['ATTAAA']
|
|
28
28
|
# - min-Zeichen
|
29
29
|
|
30
30
|
attr_reader :n_entries # How many characters we will process in total.
|
31
|
-
attr_reader :total_characters
|
32
31
|
|
33
32
|
# ========================================================================= #
|
34
33
|
# === initialize
|
@@ -50,6 +49,9 @@ class IsDNA # === Bioroebe::Taxonomy::IsDNA['ATTAAA']
|
|
50
49
|
# === @be_verbose
|
51
50
|
# ======================================================================= #
|
52
51
|
@be_verbose = false
|
52
|
+
# ======================================================================= #
|
53
|
+
# === @n_entries
|
54
|
+
# ======================================================================= #
|
53
55
|
@n_entries = 0
|
54
56
|
end
|
55
57
|
|
@@ -75,38 +77,11 @@ class IsDNA # === Bioroebe::Taxonomy::IsDNA['ATTAAA']
|
|
75
77
|
end
|
76
78
|
|
77
79
|
# ========================================================================= #
|
78
|
-
# ===
|
79
|
-
#
|
80
|
-
# Class method for a more convenient output.
|
80
|
+
# === total_characters?
|
81
81
|
# ========================================================================= #
|
82
|
-
def
|
83
|
-
|
84
|
-
end
|
85
|
-
|
86
|
-
# ========================================================================= #
|
87
|
-
# === run
|
88
|
-
# ========================================================================= #
|
89
|
-
def run
|
90
|
-
result = true
|
91
|
-
# @input is an array.
|
92
|
-
@input.each {|entry|
|
93
|
-
splitted = entry.chars
|
94
|
-
splitted.each {|inner_entry|
|
95
|
-
@n_entries += 1
|
96
|
-
if ARRAY_VALID_DNA_SEQUENCES.include? inner_entry
|
97
|
-
else # else it can not be DNA.
|
98
|
-
if @be_verbose
|
99
|
-
e 'Nope, '+sfancy(inner_entry)+' is not DNA.'
|
100
|
-
pp splitted
|
101
|
-
end
|
102
|
-
result = false
|
103
|
-
break
|
104
|
-
end
|
105
|
-
}
|
106
|
-
}
|
107
|
-
@result = result
|
108
|
-
return result
|
109
|
-
end
|
82
|
+
def total_characters?
|
83
|
+
@total_characters
|
84
|
+
end; alias total_characters total_characters? # === total_characters
|
110
85
|
|
111
86
|
# ========================================================================= #
|
112
87
|
# === is_dna?
|
@@ -139,12 +114,47 @@ class IsDNA # === Bioroebe::Taxonomy::IsDNA['ATTAAA']
|
|
139
114
|
@result
|
140
115
|
end; alias result result? # === result
|
141
116
|
|
117
|
+
# ========================================================================= #
|
118
|
+
# === run
|
119
|
+
# ========================================================================= #
|
120
|
+
def run
|
121
|
+
result = true
|
122
|
+
# @input is an array.
|
123
|
+
@input.each {|entry|
|
124
|
+
splitted = entry.chars
|
125
|
+
splitted.each {|inner_entry|
|
126
|
+
@n_entries += 1
|
127
|
+
if ARRAY_VALID_DNA_SEQUENCES.include? inner_entry
|
128
|
+
else # else it can not be DNA.
|
129
|
+
if @be_verbose
|
130
|
+
e "Nope, #{sfancy(inner_entry)} is not DNA."
|
131
|
+
pp splitted
|
132
|
+
end
|
133
|
+
result = false
|
134
|
+
break
|
135
|
+
end
|
136
|
+
}
|
137
|
+
}
|
138
|
+
@result = result
|
139
|
+
return result
|
140
|
+
end
|
141
|
+
|
142
|
+
# ========================================================================= #
|
143
|
+
# === IsDNA['ABC']
|
144
|
+
#
|
145
|
+
# Class method for a more convenient output.
|
146
|
+
# ========================================================================= #
|
147
|
+
def self.[](i = ARGV)
|
148
|
+
return new(i).result
|
149
|
+
end
|
150
|
+
|
142
151
|
end; end; end
|
143
152
|
|
144
153
|
if __FILE__ == $PROGRAM_NAME
|
154
|
+
alias e puts
|
145
155
|
include Bioroebe::Taxonomy
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
156
|
+
e IsDNA.new('ATTTAAA').result
|
157
|
+
e IsDNA.new.result
|
158
|
+
e IsDNA.new('TGGGGTACTACTTTGATGAGTACTGTCAGGTGAACGCCACGACACGAAGCATTTCTTCTGTTGCATCCTTCATTGACTTTGATGTATTTGGCTTTGTCAATGAGATTTGCAGTGACTCATTTGAGACATACGAAGCAGTATACAACGCTTCTTACAGTTGCACCACTAACGGCGGTGCTTATCTTGAATCGGATGATAGTGGGTACGATAATTCTGGAGACCAAGGCAAAGATGGAAACAACGAAGAAAGGCACGAGCGCGAAGATAACAGAGAAGAGGAAGATAGGAACTCAAGAGACAGCCAGGAGTTTGAGATGTCAGGAGAGGACGTATGTTTTGCAGTTTACACAGCTGAACATTTTCAATCTATAAGAAACAAGGAGATCGCAGTGCACTACCTCAAGACACTGGTGC').result
|
159
|
+
e IsDNA['ABC']
|
150
160
|
end # rb is_dna.rb
|