bioroebe 0.10.80 → 0.11.32

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of bioroebe might be problematic. Click here for more details.

Files changed (210) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +3209 -2618
  3. data/bin/bioroebe +7 -1
  4. data/bioroebe.gemspec +3 -3
  5. data/doc/README.gen +3208 -2617
  6. data/doc/quality_control/commandline_applications.md +3 -3
  7. data/doc/todo/bioroebe_todo.md +2040 -2615
  8. data/lib/bioroebe/aminoacids/aminoacid_substitution.rb +1 -9
  9. data/lib/bioroebe/aminoacids/codon_percentage.rb +1 -9
  10. data/lib/bioroebe/aminoacids/deduce_aminoacid_sequence.rb +1 -9
  11. data/lib/bioroebe/aminoacids/display_aminoacid_table.rb +1 -0
  12. data/lib/bioroebe/aminoacids/show_hydrophobicity.rb +1 -6
  13. data/lib/bioroebe/base/colours_for_base/colours_for_base.rb +18 -8
  14. data/lib/bioroebe/base/commandline_application/commandline_arguments.rb +13 -11
  15. data/lib/bioroebe/base/commandline_application/misc.rb +26 -9
  16. data/lib/bioroebe/base/commandline_application/opn.rb +8 -8
  17. data/lib/bioroebe/base/commandline_application/reset.rb +3 -2
  18. data/lib/bioroebe/base/misc.rb +35 -0
  19. data/lib/bioroebe/base/prototype/misc.rb +11 -1
  20. data/lib/bioroebe/codon_tables/frequencies/parse_frequency_table.rb +2 -10
  21. data/lib/bioroebe/codons/possible_codons_for_this_aminoacid.rb +1 -9
  22. data/lib/bioroebe/codons/show_codon_tables.rb +6 -2
  23. data/lib/bioroebe/codons/show_codon_usage.rb +2 -1
  24. data/lib/bioroebe/constants/aminoacids_and_proteins.rb +1 -0
  25. data/lib/bioroebe/constants/database_constants.rb +1 -1
  26. data/lib/bioroebe/constants/files_and_directories.rb +31 -4
  27. data/lib/bioroebe/constants/misc.rb +20 -0
  28. data/lib/bioroebe/count/count_amount_of_nucleotides.rb +3 -0
  29. data/lib/bioroebe/crystal/README.md +2 -0
  30. data/lib/bioroebe/crystal/to_rna.cr +19 -0
  31. data/lib/bioroebe/data/README.md +11 -8
  32. data/lib/bioroebe/data/electron_microscopy/pos_example.pos +396 -0
  33. data/lib/bioroebe/data/electron_microscopy/test_particles.star +36 -0
  34. data/lib/bioroebe/electron_microscopy/coordinate_analyzer.rb +15 -18
  35. data/lib/bioroebe/{fasta_and_fastq/parse_fasta/run.rb → electron_microscopy/electron_microscopy_module.rb} +16 -8
  36. data/lib/bioroebe/electron_microscopy/fix_pos_file.rb +1 -9
  37. data/lib/bioroebe/electron_microscopy/flipy.rb +83 -0
  38. data/lib/bioroebe/electron_microscopy/parse_coordinates.rb +2 -10
  39. data/lib/bioroebe/electron_microscopy/read_file_xmd.rb +1 -9
  40. data/lib/bioroebe/electron_microscopy/simple_star_file_generator.rb +4 -9
  41. data/lib/bioroebe/fasta_and_fastq/autocorrect_the_name_of_this_fasta_file.rb +1 -9
  42. data/lib/bioroebe/fasta_and_fastq/compact_fasta_file/compact_fasta_file.rb +7 -9
  43. data/lib/bioroebe/fasta_and_fastq/fasta_defline/fasta_defline.rb +1 -5
  44. data/lib/bioroebe/fasta_and_fastq/fasta_to_yaml/fasta_to_yaml.rb +81 -0
  45. data/lib/bioroebe/fasta_and_fastq/parse_fasta/parse_fasta.rb +1460 -7
  46. data/lib/bioroebe/fasta_and_fastq/return_fasta_subsection_of_this_file.rb +11 -2
  47. data/lib/bioroebe/fasta_and_fastq/show_fasta_headers.rb +27 -12
  48. data/lib/bioroebe/fasta_and_fastq/simplify_fasta_header/simplify_fasta_header.rb +1 -5
  49. data/lib/bioroebe/fasta_and_fastq/split_this_fasta_file_into_chromosomes/constants.rb +0 -5
  50. data/lib/bioroebe/genome/README.md +4 -0
  51. data/lib/bioroebe/genome/genome.rb +67 -0
  52. data/lib/bioroebe/genomes/genome_pattern.rb +3 -9
  53. data/lib/bioroebe/gui/gtk +1 -0
  54. data/lib/bioroebe/gui/gtk3/controller/controller.rb +45 -27
  55. data/lib/bioroebe/gui/gtk3/dna_to_aminoacid_widget/dna_to_aminoacid_widget.rb +76 -50
  56. data/lib/bioroebe/gui/gtk3/fasta_table_widget/fasta_table_widget.rb +99 -21
  57. data/lib/bioroebe/gui/gtk3/hamming_distance/hamming_distance.rb +42 -28
  58. data/lib/bioroebe/gui/gtk3/nucleotide_analyser/nucleotide_analyser.rb +119 -71
  59. data/lib/bioroebe/gui/gtk3/protein_to_DNA/protein_to_DNA.rb +18 -18
  60. data/lib/bioroebe/gui/gtk3/random_sequence/random_sequence.rb +19 -11
  61. data/lib/bioroebe/gui/libui/three_to_one/three_to_one.rb +8 -6
  62. data/lib/bioroebe/gui/shared_code/protein_to_DNA/protein_to_DNA_module.rb +14 -14
  63. data/lib/bioroebe/gui/swing/three_to_one/ThreeToOne$1.class +0 -0
  64. data/lib/bioroebe/gui/swing/three_to_one/ThreeToOne$CloseListener.class +0 -0
  65. data/lib/bioroebe/gui/swing/three_to_one/ThreeToOne.class +0 -0
  66. data/lib/bioroebe/gui/swing/three_to_one/ThreeToOne.java +141 -0
  67. data/lib/bioroebe/java/README.md +4 -0
  68. data/lib/bioroebe/java/bioroebe/Sequence.java +25 -1
  69. data/lib/bioroebe/java/bioroebe/src/main/java/bioroebe/Base.class +0 -0
  70. data/lib/bioroebe/java/bioroebe/{Base.java → src/main/java/bioroebe/Base.java} +15 -2
  71. data/lib/bioroebe/java/bioroebe/{BisulfiteTreatment.class → src/main/java/bioroebe/BisulfiteTreatment.class} +0 -0
  72. data/lib/bioroebe/java/bioroebe/{BisulfiteTreatment.java → src/main/java/bioroebe/BisulfiteTreatment.java} +0 -0
  73. data/lib/bioroebe/java/bioroebe/{Codons.class → src/main/java/bioroebe/Codons.class} +0 -0
  74. data/lib/bioroebe/java/bioroebe/src/main/java/bioroebe/Codons.java +34 -0
  75. data/lib/bioroebe/java/bioroebe/src/main/java/bioroebe/Commandline.class +0 -0
  76. data/lib/bioroebe/java/bioroebe/src/main/java/bioroebe/Commandline.java +95 -0
  77. data/lib/bioroebe/java/bioroebe/{Esystem.class → src/main/java/bioroebe/Esystem.class} +0 -0
  78. data/lib/bioroebe/java/bioroebe/{Esystem.java → src/main/java/bioroebe/Esystem.java} +0 -0
  79. data/lib/bioroebe/java/bioroebe/{GenerateRandomDnaSequence.class → src/main/java/bioroebe/GenerateRandomDnaSequence.class} +0 -0
  80. data/lib/bioroebe/java/bioroebe/{GenerateRandomDnaSequence.java → src/main/java/bioroebe/GenerateRandomDnaSequence.java} +8 -2
  81. data/lib/bioroebe/java/bioroebe/{IsPalindrome.class → src/main/java/bioroebe/IsPalindrome.class} +0 -0
  82. data/lib/bioroebe/java/bioroebe/{IsPalindrome.java → src/main/java/bioroebe/IsPalindrome.java} +5 -1
  83. data/lib/bioroebe/java/bioroebe/src/main/java/bioroebe/PartnerNucleotide.class +0 -0
  84. data/lib/bioroebe/java/bioroebe/src/main/java/bioroebe/PartnerNucleotide.java +56 -0
  85. data/lib/bioroebe/java/bioroebe/{RemoveFile.class → src/main/java/bioroebe/RemoveFile.class} +0 -0
  86. data/lib/bioroebe/java/bioroebe/{RemoveFile.java → src/main/java/bioroebe/RemoveFile.java} +10 -4
  87. data/lib/bioroebe/java/bioroebe/{RemoveNumbers.class → src/main/java/bioroebe/RemoveNumbers.class} +0 -0
  88. data/lib/bioroebe/java/bioroebe/{RemoveNumbers.java → src/main/java/bioroebe/RemoveNumbers.java} +1 -0
  89. data/lib/bioroebe/java/bioroebe/{ToCamelcase.class → src/main/java/bioroebe/ToCamelcase.class} +0 -0
  90. data/lib/bioroebe/java/bioroebe/{ToCamelcase.java → src/main/java/bioroebe/ToCamelcase.java} +3 -3
  91. data/lib/bioroebe/java/bioroebe/src/main/java/bioroebe/ToRNA.class +0 -0
  92. data/lib/bioroebe/java/bioroebe/src/main/java/bioroebe/ToRNA.java +42 -0
  93. data/lib/bioroebe/java/bioroebe/toplevel_methods/BaseComposition.class +0 -0
  94. data/lib/bioroebe/java/bioroebe/toplevel_methods/BaseComposition.java +73 -0
  95. data/lib/bioroebe/misc/ruler.rb +1 -0
  96. data/lib/bioroebe/nucleotides/most_likely_nucleotide_sequence_for_this_aminoacid_sequence.rb +1 -9
  97. data/lib/bioroebe/nucleotides/show_nucleotide_sequence.rb +7 -7
  98. data/lib/bioroebe/parsers/genbank_parser.rb +347 -26
  99. data/lib/bioroebe/parsers/gff.rb +1 -9
  100. data/lib/bioroebe/patterns/scan_for_repeat.rb +1 -5
  101. data/lib/bioroebe/pdb/fetch_fasta_sequence_from_pdb.rb +1 -9
  102. data/lib/bioroebe/pdb/parse_mmCIF_file.rb +1 -9
  103. data/lib/bioroebe/pdb/parse_pdb_file.rb +1 -9
  104. data/lib/bioroebe/project/project.rb +1 -1
  105. data/lib/bioroebe/python/README.md +1 -0
  106. data/lib/bioroebe/python/__pycache__/mymodule.cpython-39.pyc +0 -0
  107. data/lib/bioroebe/python/gui/gtk3/all_in_one.css +4 -0
  108. data/lib/bioroebe/python/gui/gtk3/all_in_one.py +59 -0
  109. data/lib/bioroebe/python/gui/gtk3/widget1.py +20 -0
  110. data/lib/bioroebe/python/gui/tkinter/all_in_one.py +91 -0
  111. data/lib/bioroebe/python/mymodule.py +8 -0
  112. data/lib/bioroebe/python/protein_to_dna.py +33 -0
  113. data/lib/bioroebe/python/shell/shell.py +19 -0
  114. data/lib/bioroebe/python/to_rna.py +14 -0
  115. data/lib/bioroebe/python/toplevel_methods/esystem.py +12 -0
  116. data/lib/bioroebe/python/toplevel_methods/open_in_browser.py +20 -0
  117. data/lib/bioroebe/python/toplevel_methods/palindromes.py +42 -0
  118. data/lib/bioroebe/python/toplevel_methods/rds.py +13 -0
  119. data/lib/bioroebe/python/toplevel_methods/shuffleseq.py +23 -0
  120. data/lib/bioroebe/python/toplevel_methods/three_delimiter.py +37 -0
  121. data/lib/bioroebe/python/toplevel_methods/time_and_date.py +43 -0
  122. data/lib/bioroebe/python/toplevel_methods/to_camelcase.py +21 -0
  123. data/lib/bioroebe/requires/require_the_bioroebe_project.rb +3 -1
  124. data/lib/bioroebe/sequence/nucleotide_module/nucleotide_module.rb +28 -25
  125. data/lib/bioroebe/sequence/protein.rb +105 -3
  126. data/lib/bioroebe/sequence/sequence.rb +61 -2
  127. data/lib/bioroebe/shell/menu.rb +3819 -3713
  128. data/lib/bioroebe/shell/misc.rb +51 -4311
  129. data/lib/bioroebe/shell/readline/readline.rb +1 -1
  130. data/lib/bioroebe/shell/shell.rb +11250 -28
  131. data/lib/bioroebe/siRNA/siRNA.rb +81 -1
  132. data/lib/bioroebe/string_matching/find_longest_substring.rb +3 -2
  133. data/lib/bioroebe/string_matching/hamming_distance.rb +1 -9
  134. data/lib/bioroebe/taxonomy/class_methods.rb +3 -8
  135. data/lib/bioroebe/taxonomy/constants.rb +4 -3
  136. data/lib/bioroebe/taxonomy/edit.rb +2 -1
  137. data/lib/bioroebe/taxonomy/help/help.rb +10 -10
  138. data/lib/bioroebe/taxonomy/info/check_available.rb +15 -9
  139. data/lib/bioroebe/taxonomy/info/info.rb +18 -11
  140. data/lib/bioroebe/taxonomy/info/is_dna.rb +46 -36
  141. data/lib/bioroebe/taxonomy/interactive.rb +140 -104
  142. data/lib/bioroebe/taxonomy/menu.rb +27 -18
  143. data/lib/bioroebe/taxonomy/parse_fasta.rb +3 -1
  144. data/lib/bioroebe/taxonomy/shared.rb +1 -0
  145. data/lib/bioroebe/taxonomy/taxonomy.rb +1 -0
  146. data/lib/bioroebe/toplevel_methods/aminoacids_and_proteins.rb +31 -24
  147. data/lib/bioroebe/toplevel_methods/colourize_related_methods.rb +164 -0
  148. data/lib/bioroebe/toplevel_methods/databases.rb +1 -1
  149. data/lib/bioroebe/toplevel_methods/fasta_and_fastq.rb +107 -63
  150. data/lib/bioroebe/toplevel_methods/file_and_directory_related_actions.rb +14 -2
  151. data/lib/bioroebe/toplevel_methods/misc.rb +118 -11
  152. data/lib/bioroebe/toplevel_methods/nucleotides.rb +22 -5
  153. data/lib/bioroebe/toplevel_methods/open_in_browser.rb +2 -0
  154. data/lib/bioroebe/toplevel_methods/palindromes.rb +1 -2
  155. data/lib/bioroebe/toplevel_methods/taxonomy.rb +2 -2
  156. data/lib/bioroebe/toplevel_methods/to_camelcase.rb +5 -0
  157. data/lib/bioroebe/utility_scripts/align_open_reading_frames.rb +1 -9
  158. data/lib/bioroebe/utility_scripts/check_for_mismatches/check_for_mismatches.rb +1 -9
  159. data/lib/bioroebe/utility_scripts/compacter.rb +1 -9
  160. data/lib/bioroebe/utility_scripts/compseq/compseq.rb +1 -9
  161. data/lib/bioroebe/utility_scripts/create_batch_entrez_file.rb +1 -9
  162. data/lib/bioroebe/utility_scripts/dot_alignment.rb +1 -9
  163. data/lib/bioroebe/utility_scripts/move_file_to_its_correct_location.rb +1 -4
  164. data/lib/bioroebe/utility_scripts/showorf/constants.rb +0 -5
  165. data/lib/bioroebe/utility_scripts/showorf/reset.rb +1 -4
  166. data/lib/bioroebe/version/version.rb +2 -2
  167. data/lib/bioroebe/www/embeddable_interface.rb +101 -52
  168. data/lib/bioroebe/www/sinatra/sinatra.rb +186 -70
  169. data/lib/bioroebe/yaml/aminoacids/amino_acids_long_name_to_one_letter.yml +2 -2
  170. data/lib/bioroebe/yaml/configuration/browser.yml +1 -1
  171. data/lib/bioroebe/yaml/genomes/README.md +3 -4
  172. data/lib/bioroebe/yaml/restriction_enzymes/restriction_enzymes.yml +3 -3
  173. metadata +69 -64
  174. data/doc/setup.rb +0 -1655
  175. data/lib/bioroebe/fasta_and_fastq/parse_fasta/constants.rb +0 -50
  176. data/lib/bioroebe/fasta_and_fastq/parse_fasta/initialize.rb +0 -86
  177. data/lib/bioroebe/fasta_and_fastq/parse_fasta/menu.rb +0 -117
  178. data/lib/bioroebe/fasta_and_fastq/parse_fasta/misc.rb +0 -981
  179. data/lib/bioroebe/fasta_and_fastq/parse_fasta/report.rb +0 -156
  180. data/lib/bioroebe/fasta_and_fastq/parse_fasta/reset.rb +0 -128
  181. data/lib/bioroebe/genbank/genbank_parser.rb +0 -291
  182. data/lib/bioroebe/java/bioroebe/Base.class +0 -0
  183. data/lib/bioroebe/java/bioroebe/Codons.java +0 -22
  184. data/lib/bioroebe/java/bioroebe/PartnerNucleotide.class +0 -0
  185. data/lib/bioroebe/java/bioroebe/PartnerNucleotide.java +0 -19
  186. data/lib/bioroebe/java/bioroebe.jar +0 -0
  187. data/lib/bioroebe/shell/add.rb +0 -108
  188. data/lib/bioroebe/shell/assign.rb +0 -360
  189. data/lib/bioroebe/shell/chop_and_cut.rb +0 -281
  190. data/lib/bioroebe/shell/constants.rb +0 -166
  191. data/lib/bioroebe/shell/download.rb +0 -335
  192. data/lib/bioroebe/shell/enable_and_disable.rb +0 -158
  193. data/lib/bioroebe/shell/enzymes.rb +0 -310
  194. data/lib/bioroebe/shell/fasta.rb +0 -345
  195. data/lib/bioroebe/shell/gtk.rb +0 -76
  196. data/lib/bioroebe/shell/history.rb +0 -132
  197. data/lib/bioroebe/shell/initialize.rb +0 -217
  198. data/lib/bioroebe/shell/loop.rb +0 -74
  199. data/lib/bioroebe/shell/prompt.rb +0 -107
  200. data/lib/bioroebe/shell/random.rb +0 -289
  201. data/lib/bioroebe/shell/reset.rb +0 -335
  202. data/lib/bioroebe/shell/scan_and_parse.rb +0 -135
  203. data/lib/bioroebe/shell/search.rb +0 -337
  204. data/lib/bioroebe/shell/sequences.rb +0 -200
  205. data/lib/bioroebe/shell/show_report_and_display.rb +0 -2901
  206. data/lib/bioroebe/shell/startup.rb +0 -127
  207. data/lib/bioroebe/shell/taxonomy.rb +0 -14
  208. data/lib/bioroebe/shell/tk.rb +0 -23
  209. data/lib/bioroebe/shell/user_input.rb +0 -88
  210. data/lib/bioroebe/shell/xorg.rb +0 -45
@@ -0,0 +1,73 @@
1
+ /* package BaseComposition; */
2
+
3
+ public class BaseComposition {
4
+
5
+ static String[] commandline_arguments;
6
+
7
+ /*
8
+ * Main constructor (def initialize)
9
+ */
10
+ public BaseComposition() {
11
+ run();
12
+ }
13
+ /*
14
+ * run()
15
+ */
16
+ void run() {
17
+ if (commandline_arguments.length == 0) {
18
+ en("Please provide an input argument.");
19
+ }
20
+ else {
21
+ String first_argument = commandline_arguments[0];
22
+ en(
23
+ "The base composition frequencies of this sequence "+
24
+ "(length: "+first_argument.length()+") is as follows:"
25
+ );
26
+ en("");
27
+ int total_length = first_argument.length();
28
+ int n_A = 0;
29
+ int n_T = 0;
30
+ int n_C = 0;
31
+ int n_G = 0;
32
+
33
+ for (int i = 0; i < total_length; i++) {
34
+ char this_char = first_argument.charAt(i);
35
+ if (this_char == 'A') {
36
+ n_A += 1;
37
+ }
38
+ else if (this_char == 'T') {
39
+ n_T += 1;
40
+ }
41
+ else if (this_char == 'C') {
42
+ n_C += 1;
43
+ }
44
+ else if (this_char == 'G') {
45
+ n_G += 1;
46
+ }
47
+ }
48
+ en(
49
+ " A: "+Math.round((n_A * 100.0 / total_length) * Math.pow(10, 2) ) / Math.pow(10, 2)+"% "+
50
+ "T: "+Math.round((n_T * 100.0 / total_length) * Math.pow(10, 2) ) / Math.pow(10, 2)+"% "+
51
+ "C: "+Math.round((n_C * 100.0 / total_length) * Math.pow(10, 2) ) / Math.pow(10, 2)+"% "+
52
+ "G: "+Math.round((n_G * 100.0 / total_length) * Math.pow(10, 2) ) / Math.pow(10, 2)+"% "
53
+ );
54
+ en("");
55
+ }
56
+ }
57
+
58
+
59
+
60
+ public static void e(String i) {
61
+ System.out.print(i);
62
+ }
63
+
64
+ public static void en(String i) {
65
+ System.out.println(i);
66
+ }
67
+
68
+ public static void main(String[] args) {
69
+ commandline_arguments = args;
70
+ BaseComposition x = new BaseComposition();
71
+ }
72
+
73
+ }
@@ -24,6 +24,7 @@
24
24
  # variant - that one should work fine.
25
25
  # =========================================================================== #
26
26
  # require 'bioroebe/misc/ruler.rb'
27
+ # Bioroebe.ruler_return_as_string_without_colours 'ATGCTGACAGGGGGGGEEEEEE'
27
28
  # Bioroebe::Ruler.new(ARGV)
28
29
  # =========================================================================== #
29
30
  require 'bioroebe/base/commandline_application/commandline_application.rb'
@@ -27,11 +27,6 @@ class MostLikelyNucleotideSequenceForThisAminoacidSequence < ::Bioroebe::Command
27
27
  require 'bioroebe/aminoacids/codon_percentage.rb'
28
28
  require 'bioroebe/codons/possible_codons_for_this_aminoacid.rb'
29
29
 
30
- # ========================================================================= #
31
- # === NAMESPACE
32
- # ========================================================================= #
33
- NAMESPACE = inspect
34
-
35
30
  # ========================================================================= #
36
31
  # === initialize
37
32
  # ========================================================================= #
@@ -63,10 +58,7 @@ class MostLikelyNucleotideSequenceForThisAminoacidSequence < ::Bioroebe::Command
63
58
  # ========================================================================= #
64
59
  def reset
65
60
  super()
66
- # ======================================================================= #
67
- # === @namespace
68
- # ======================================================================= #
69
- @namespace = NAMESPACE
61
+ infer_the_namespace
70
62
  # ======================================================================= #
71
63
  # === @internal_hash
72
64
  # ======================================================================= #
@@ -475,13 +475,6 @@ class ShowNucleotideSequence < ::Bioroebe::Sequence # === Bioroebe::ShowNucleoti
475
475
  erev i
476
476
  end; alias display report # === display (display tag)
477
477
 
478
- # ========================================================================= #
479
- # === do_colourize_the_start_codon
480
- # ========================================================================= #
481
- def do_colourize_the_start_codon
482
- add_this_substring('ATG')
483
- end
484
-
485
478
  # ========================================================================= #
486
479
  # === colourize_dna_sequence
487
480
  # ========================================================================= #
@@ -529,6 +522,13 @@ class ShowNucleotideSequence < ::Bioroebe::Sequence # === Bioroebe::ShowNucleoti
529
522
  end; alias set_search_for search_for_this_substring # === set_search_for
530
523
  alias add_this_substring search_for_this_substring # === add_this_substring
531
524
 
525
+ # ========================================================================= #
526
+ # === do_colourize_the_start_codon
527
+ # ========================================================================= #
528
+ def do_colourize_the_start_codon
529
+ add_this_substring('ATG')
530
+ end
531
+
532
532
  # ========================================================================= #
533
533
  # === run
534
534
  # ========================================================================= #
@@ -4,10 +4,29 @@
4
4
  # =========================================================================== #
5
5
  # === Bioroebe::GenbankParser
6
6
  #
7
- # This class can be used to parse genbank-files. Their file extension is
8
- # typically ".gbk".
7
+ # This class can be used to parse genbank-files (typically stored as .genbank
8
+ # or .gbk, so their file extension is usually ".gbk").
9
+ #
10
+ # Since as of the rewrite in July 2022 the class can also handle multiple
11
+ # fasta entries now.
12
+ #
13
+ # The class is similar to class FastaParser, but instead it will only
14
+ # select the content between "ORIGIN" and "VERSION" entries.
15
+ #
16
+ # The user can pass the content of a genbank-file to this class, and it
17
+ # can then report the nucleotide sequence, e. g. the part starting after
18
+ # the ORIGIN string.
19
+ #
20
+ # The reason why this class has been created was because it is sometimes
21
+ # necessary to parse a genebank file.
22
+ #
23
+ # Usage example:
24
+ #
25
+ # Bioroebe::GenbankParser.new(ARGV)
26
+ #
9
27
  # =========================================================================== #
10
- # require 'bioroebe/parsers/genbank_parser.rb'
28
+ # require 'bioroebe/genbank/genbank_parser.rb'
29
+ # genbank_parser = Bioroebe::GenbankParser.new(ARGV)
11
30
  # =========================================================================== #
12
31
  require 'bioroebe/base/commandline_application/commandline_application.rb'
13
32
 
@@ -16,63 +35,365 @@ module Bioroebe
16
35
  class GenbankParser < ::Bioroebe::CommandlineApplication # === Bioroebe::GenbankParser
17
36
 
18
37
  # ========================================================================= #
19
- # === NAMESPACE
38
+ # === UPCASE_THE_SEQUENCE
39
+ #
40
+ # Setting this constant to true will cause this class to store the
41
+ # FASTA sequence in an upcased variant, e. g. "AGCAGCTA" rather
42
+ # than "acgatcag".
20
43
  # ========================================================================= #
21
- NAMESPACE = inspect
44
+ UPCASE_THE_SEQUENCE = true
45
+
46
+ # ========================================================================= #
47
+ # === TEST_STRING
48
+ #
49
+ # Our example test-string, to see how such a genbank file usually looks
50
+ # like.
51
+ #
52
+ # This will contain two different FASTA sequences.
53
+ # ========================================================================= #
54
+ TEST_STRING = ' /note="internal transcribed spacer 2"
55
+ ORIGIN
56
+ 1 cgtaacaagg tttccgtagg tgaaccttcg gaaggatcat tgttgagacc cccaaaaaaa
57
+ 61 cgatcgagtt aatccggagg accggtgtag tttggtctcc caggggcttt ggctactgtg
58
+ 121 gtggccgtga atttccgtcg aacctccttg ggagaattct tgatggcaat tgaacccttg
59
+ 181 gcccggcgca gtttcgcccc aagtcaaatg agatggaacc ggcggagggc atcgtcctcc
60
+ 241 atggaaccgg ggagggccgg cgttcttccg ttccccccat gaattttttt ttgacaactc
61
+ 301 tcggcaacgg atatctcggc tctttgcatc cgatgaaaga acccagcgaa atgtgataag
62
+ 361 tggtgtgaat tgcagaatcc cgtgaaccat cgagtctttg aacgcaagtt gcgcccgagg
63
+ 421 ccatcaggct aagggcacgc ctgcctgggc gttgcgtgct gcatctctct cccattgcta
64
+ 481 aggctgaaca ggcatactgt tcggccggcg cggatgagtg tttggcccct tgttcttcgg
65
+ 541 tgcgatgggt ccaagacctg ggcttttgac ggccggaaat ccggcaagag gtggacggac
66
+ 601 ggtggctgcg acgaagctgt cgtgcgaatg ccctacgctg tcgtatttga tgggccggaa
67
+ 661 taaatccctt ttgagcccca ttggaggcac gtcaacccgt gggcggtcga cggccatttg
68
+ 721 gatgcaaccc caggtcaggt gagga
69
+ //
70
+ LOCUS Z78510 750 bp DNA linear PLN 30-NOV-2006
71
+ DEFINITION P.caricinum 5.8S rRNA gene and ITS1 and ITS2 DNA.
72
+ ACCESSION Z78510
73
+ VERSION Z78510.1 GI:2765635
74
+ KEYWORDS 5.8S ribosomal RNA; 5.8S rRNA gene; internal transcribed spacer;
75
+ ITS1; ITS2.
76
+ SOURCE Phragmipedium caricinum
77
+ ORGANISM Phragmipedium caricinum
78
+ Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta;
79
+ Spermatophyta; Magnoliophyta; Liliopsida; Asparagales; Orchidaceae;
80
+ Cypripedioideae; Phragmipedium.
81
+ REFERENCE 1
82
+ AUTHORS Cox,A.V., Pridgeon,A.M., Albert,V.A. and Chase,M.W.
83
+ TITLE Phylogenetics of the slipper orchids (Cypripedioideae:
84
+ Orchidaceae): nuclear rDNA ITS sequences
85
+ JOURNAL Unpublished
86
+ REFERENCE 2 (bases 1 to 750)
87
+ AUTHORS Cox,A.V.
88
+ TITLE Direct Submission
89
+ JOURNAL Submitted (19-AUG-1996) Cox A.V., Royal Botanic Gardens, Kew,
90
+ Richmond, Surrey TW9 3AB, UK
91
+ FEATURES Location/Qualifiers
92
+ source 1..750
93
+ /organism="Phragmipedium caricinum"
94
+ /mol_type="genomic DNA"
95
+ /db_xref="taxon:53127"
96
+ misc_feature 1..380
97
+ /note="internal transcribed spacer 1"
98
+ gene 381..550
99
+ /gene="5.8S rRNA"
100
+ rRNA 381..550
101
+ /gene="5.8S rRNA"
102
+ /product="5.8S ribosomal RNA"
103
+ misc_feature 551..750
104
+ /note="internal transcribed spacer 2"
105
+ ORIGIN
106
+ 1 ctaaccaggg ttccgaggtg accttcggga ggattccttt ttaagccccc gaaaaaacga
107
+ 61 tcgaattaaa ccggaggacc ggtttaattt ggtctcccca ggggctttcc ccccttggtg
108
+ 121 gccgtgaatt tccatcgaac ccccctggga gaattcttgg tggccaatgg acccttggcc
109
+ 181 cggcgcaatt tcccccccaa tcaaatgaga taggaccggc agggggcgtc cccccccatg
110
+ 241 gaaccgggga gggccggcat tcttccgttc ccccctcgga ttttttgaca actctcgcaa
111
+ 301 cggatatctc gcctctttgc atcggatgga agaacgcagc gaaatgtgat aagtggtgtg
112
+ 361 aattgcagaa tcccgtgaac catcgagtct ttgaacgcaa gttgcgcccg aggccatcag
113
+ 421 gctaagggca cgcctgcctg ggcgttgcgt gctgcatctc tcccattgct aaggttgaac
114
+ 481 gggcatactg ttcggccggc gcggatgaga gattggcccc ttgttcttcg gtgcgatggg
115
+ 541 tccaagacct gggcttttga cggtccaaaa tccggcaaga ggtggacgga cggtggctgc
116
+ 601 gacaaagctg tcgtgcgaat gccctgcgtt gtcgtttttg atgggccgga ataaatccct
117
+ 661 tttgaacccc attggaggca cgtcaaccca tgggcggttg acggccattt ggatgcaacc
118
+ 721 ccaggtcagg tgagccaccc gctgagttta
119
+ //
120
+ LOCUS Z78509 731 bp DNA linear PLN 30-NOV-2006
121
+ DEFINITION P.pearcei 5.8S rRNA gene and ITS1 and ITS2 DNA.
122
+ ACCESSION Z78509
123
+ VERSION Z78509.1 GI:2765634
124
+ KEYWORDS 5.8S ribosomal RNA; 5.8S rRNA gene; internal transcribed spacer;
125
+ ITS1; ITS2.
126
+ SOURCE Phragmipedium pearcei
127
+ ORGANISM Phragmipedium pearcei
128
+ Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta;
129
+ Spermatophyta; Magnoliophyta; Liliopsida; Asparagales; Orchidaceae;
130
+ Cypripedioideae; Phragmipedium.
131
+ REFERENCE 1
132
+ AUTHORS Cox,A.V., Pridgeon,A.M., Albert,V.A. and Chase,M.W.
133
+ TITLE Phylogenetics of the slipper orchids (Cypripedioideae:
134
+ Orchidaceae): nuclear rDNA ITS sequences
135
+ JOURNAL Unpublished
136
+ REFERENCE 2 (bases 1 to 731)
137
+ AUTHORS Cox,A.V.
138
+ TITLE Direct Submission
139
+ JOURNAL Submitted (19-AUG-1996) Cox A.V., Royal Botanic Gardens, Kew,
140
+ Richmond, Surrey TW9 3AB, UK
141
+ FEATURES Location/Qualifiers
142
+ source 1..731
143
+ /organism="Phragmipedium pearcei"
144
+ /mol_type="genomic DNA"
145
+ /db_xref="taxon:53135"
146
+ misc_feature 1..380
147
+ /note="internal transcribed spacer 1"
148
+ gene 381..550
149
+ /gene="5.8S rRNA"
150
+ rRNA 381..550
151
+ /gene="5.8S rRNA"
152
+ /product="5.8S ribosomal RNA"
153
+ misc_feature 551..731
154
+ /note="internal transcribed spacer 2"
155
+ ORIGIN
156
+ 1 cgtaacaagg tttccgtagg tgaacctgcg gaaggatcat tgttgagacc gccaaatata
157
+ 61 cgatcgagtt aatccggagg accggtgtag tttggtctcc caggggcttt cgccgctgtg
158
+ 121 gtgaccgtga tttgccatcg agcctccttg ggagatttct tgatggcaat tgaacccttg
159
+ 181 gcccggcgca gtttcgcgcc aagtcatatg agatagaacc ggcggagggc gtcgtcctcc
160
+ 241 atggagcggg gagggccggc atgctccgtg cccccccatg aatttttctg acaactctcg
161
+ 301 gcaacggacg taacaaggtt taaatgtgat aagcaggtgt gaattgcaga atcccgtgaa
162
+ 361 ccatcgagtc tttgaacgca agttgcgccc gaggccatca ggttaagggc acgcctgcct
163
+ 421 gggcgttgcg tgctgcatct ctcccattgc taaggttgaa cgggcatact gttcggccgg
164
+ 481 cgcggatgag agtttggccc cttgttcttc ggtgcgatgg gtccaagacc tgggcttttg
165
+ 541 acggtccaaa atccggcaag aggtggacgg acggtggctg cgacagagct gtcgtgcgaa
166
+ 601 tgccctacgt tgtcgttttt gatgggccag aataaatccc ttttgaaccc cattggaggc
167
+ 661 acgtcaaccc aatggggggt gacgggcatt tggttaaccc cggcaagtta aggcacccgt
168
+ 721 taattttagg a
169
+ //
170
+ LOCUS Z78508 741 bp DNA linear PLN 30-NOV-2006'
22
171
 
23
172
  # ========================================================================= #
24
173
  # === initialize
25
174
  # ========================================================================= #
26
175
  def initialize(
27
- commandline_arguments = ARGV,
176
+ commandline_arguments = nil,
28
177
  run_already = true
29
178
  )
30
179
  reset
31
180
  set_commandline_arguments(
32
181
  commandline_arguments
33
182
  )
183
+ menu
184
+ if block_given?
185
+ yielded = yield
186
+ case yielded
187
+ # ===================================================================== #
188
+ # === :do_not_report_anything
189
+ # ===================================================================== #
190
+ when :do_not_report_anything
191
+ @internal_hash[:report_the_dataset] = false
192
+ end
193
+ end
34
194
  run if run_already
35
195
  end
36
196
 
37
197
  # ========================================================================= #
38
- # === reset
198
+ # === reset (reset tag)
39
199
  # ========================================================================= #
40
200
  def reset
41
201
  super()
202
+ infer_the_namespace
203
+ # ======================================================================= #
204
+ # === @internal_hash
205
+ # ======================================================================= #
206
+ # @internal_hash = {}
207
+ # ======================================================================= #
208
+ # === :work_on_this_file
209
+ # ======================================================================= #
210
+ @internal_hash[:work_on_this_file] = nil
211
+ # ======================================================================= #
212
+ # === :report_the_dataset
213
+ # ======================================================================= #
214
+ @internal_hash[:report_the_dataset] = true
215
+ # ======================================================================= #
216
+ # === :n_FASTA_entries_in_the_file
217
+ #
218
+ # This variable will keep track how many FASTA entries are in
219
+ # the genbank file at hand.
220
+ # ======================================================================= #
221
+ @internal_hash[:n_FASTA_entries_in_the_file] = 0
222
+ # ======================================================================= #
223
+ # === :dataset_from_all_FASTA_entries_as_a_hash
224
+ #
225
+ # This hash will contain all the FASTA sequences in the given
226
+ # genbank file at hand. This constitutes the main dataset of
227
+ # this clas.
228
+ # ======================================================================= #
229
+ @internal_hash[:dataset_from_all_FASTA_entries_as_a_hash] = {}
42
230
  end
43
231
 
44
232
  # ========================================================================= #
45
- # === run
233
+ # === menu (menu tag)
46
234
  # ========================================================================= #
47
- def run
48
- _ = first_argument?
49
- if _ and File.exist?(_)
50
- dataset = File.readlines(_).select {|line|
51
- line.start_with? ' '
52
- }.map {|entry|
53
- splitted = entry.strip.split(' ')
54
- splitted.pop # Remove the last element.
55
- splitted.join.strip
56
- }
57
- if dataset.is_a? Array
58
- dataset = dataset.join
235
+ def menu(
236
+ i = commandline_arguments_containing_leading_hyphens?
237
+ )
238
+ if i.is_a? Array
239
+ i.each {|entry| menu(entry) }
240
+ else
241
+ case i # (case tag)
242
+ # ===================================================================== #
243
+ # === gparser --help
244
+ # ===================================================================== #
245
+ when /^-?-?help$/i
246
+ show_help
247
+ exit
248
+ # ===================================================================== #
249
+ # === gparser --test
250
+ #
251
+ # This entry point can be used to test the default TEST_STRING.
252
+ # ===================================================================== #
253
+ when /^-?-?test$/i,
254
+ /^-?-?test(-|_)?string$/i
255
+ analyse_this_dataset(TEST_STRING)
256
+ exit
59
257
  end
60
- @sequence = dataset
61
258
  end
62
259
  end
63
260
 
261
+ # ========================================================================= #
262
+ # === work_on_which_file?
263
+ # ========================================================================= #
264
+ def work_on_which_file?
265
+ @internal_hash[:work_on_this_file]
266
+ end
267
+
268
+ # ========================================================================= #
269
+ # === report_the_dataset?
270
+ # ========================================================================= #
271
+ def report_the_dataset?
272
+ @internal_hash[:report_the_dataset]
273
+ end
274
+
275
+ # ========================================================================= #
276
+ # === set_work_on_this_file
277
+ # ========================================================================= #
278
+ def set_work_on_this_file(
279
+ i = first_argument?
280
+ )
281
+ @internal_hash[:work_on_this_file] = i
282
+ end
283
+
284
+ # ========================================================================= #
285
+ # === analyse_this_dataset
286
+ # ========================================================================= #
287
+ def analyse_this_dataset(dataset)
288
+ use_this_regex =
289
+ /ORIGIN[\/\-\.\s0-9a-zA-Z]+VERSION\s*[\.0-9A-Z]+/ # See: https://rubular.com/r/0q7rFIUflX7yzw
290
+ scanned = dataset.scan(use_this_regex)
291
+ @internal_hash[:n_FASTA_entries_in_the_file] = scanned
292
+ discover_the_corresponding_FASTA_entries_from_this_dataset(scanned)
293
+ consider_reporting_our_findings_to_the_user
294
+ end; alias determine_dataset analyse_this_dataset # === determine_dataset
295
+
296
+ # ========================================================================= #
297
+ # === dataset?
298
+ # ========================================================================= #
299
+ def dataset?
300
+ @internal_hash[:dataset_from_all_FASTA_entries_as_a_hash]
301
+ end; alias main_dataset? dataset? # === main_dataset?
302
+
303
+ # ========================================================================= #
304
+ # === sequences?
305
+ # ========================================================================= #
306
+ def sequences?
307
+ dataset?.values
308
+ end
309
+
64
310
  # ========================================================================= #
65
311
  # === sequence?
66
312
  # ========================================================================= #
67
313
  def sequence?
68
- @sequence
314
+ sequences?.first
69
315
  end; alias coding_sequence? sequence? # === coding_sequence?
70
316
  alias cds sequence? # === cds
71
317
 
318
+ # ========================================================================= #
319
+ # === discover_the_corresponding_FASTA_entries_from_this_dataset
320
+ # ========================================================================= #
321
+ def discover_the_corresponding_FASTA_entries_from_this_dataset(i)
322
+ regex_to_use_for_the_id = /VERSION\s*([\.A-Za-z0-9]+)/
323
+ if i.is_a? Array
324
+ i.each {|this_dataset|
325
+ this_dataset =~ regex_to_use_for_the_id
326
+ use_this_id = $1.to_s.dup
327
+ use_this_FASTA_sequence = this_dataset.scan(
328
+ /^\s*\d{1,100}([\sa-zA-Z]+)/
329
+ ).flatten.join(' ').delete(" \n")
330
+ use_this_FASTA_sequence.upcase! if UPCASE_THE_SEQUENCE
331
+ @internal_hash[:dataset_from_all_FASTA_entries_as_a_hash][use_this_id] = use_this_FASTA_sequence
332
+ }
333
+ end
334
+ end
335
+
336
+ # ========================================================================= #
337
+ # === verbose_check_whether_the_file_exists
338
+ # ========================================================================= #
339
+ def verbose_check_whether_the_file_exists
340
+ _ = @internal_hash[:work_on_this_file]
341
+ if _ and File.exist?(_)
342
+ true
343
+ else
344
+ opnn; e 'No file exists at '+sfile(_)
345
+ false
346
+ end
347
+ end
348
+
349
+ # ========================================================================= #
350
+ # === consider_reporting_our_findings_to_the_user (report tag)
351
+ # ========================================================================= #
352
+ def consider_reporting_our_findings_to_the_user
353
+ if report_the_dataset? and dataset? and !dataset?.empty?
354
+ main_dataset?.each_pair {|key, value|
355
+ e steelblue("#{key}:")
356
+ e lightblue(value)
357
+ e
358
+ }
359
+ end
360
+ end; alias report consider_reporting_our_findings_to_the_user # === report
361
+ alias report_the_dataset consider_reporting_our_findings_to_the_user # === report_the_dataset
362
+
363
+ # ========================================================================= #
364
+ # === run (run tag)
365
+ # ========================================================================= #
366
+ def run
367
+ set_work_on_this_file(first_argument?)
368
+ # ======================================================================= #
369
+ # First check whether the given file exists or not:
370
+ # ======================================================================= #
371
+ if verbose_check_whether_the_file_exists
372
+ original_dataset = File.read(@internal_hash[:work_on_this_file]) # Just store it completely.
373
+ if original_dataset.include?('ORIGIN') and original_dataset.include?('VERSION ')
374
+ analyse_this_dataset(original_dataset)
375
+ else
376
+ opnn; e 'No keywords ORIGIN and VERSION were found in this file.'
377
+ end
378
+ end
379
+ end
380
+
381
+ # ========================================================================= #
382
+ # === Bioroebe::GenbankParser[]
383
+ # ========================================================================= #
384
+ def self.[](i = '')
385
+ new(i)
386
+ end
387
+
72
388
  end; end
73
389
 
74
390
  if __FILE__ == $PROGRAM_NAME
75
- _ = Bioroebe::GenbankParser.new(ARGV)
76
- pp _.id
77
- pp _.sequence?
78
- end # genbankparser *genbank
391
+ alias e puts
392
+ genbank_parser = Bioroebe::GenbankParser.new(ARGV)
393
+ # genbank_parser = Bioroebe::GenbankParser.new('/home/Temp/bioroebe/ls_orchid.gbk')
394
+ e genbank_parser.sequence?
395
+ # e _.id
396
+ end # genbankparser *genbank
397
+ # genbankparser
398
+ # genbankparser sample_file.genbank
399
+ # genbankparser --test
@@ -68,11 +68,6 @@ module Parser
68
68
 
69
69
  class GFF < ::Bioroebe::CommandlineApplication # === Bioroebe::Parser::GFF
70
70
 
71
- # ========================================================================= #
72
- # === NAMESPACE
73
- # ========================================================================= #
74
- NAMESPACE = inspect
75
-
76
71
  # ========================================================================= #
77
72
  # === INPUT_FILE
78
73
  #
@@ -112,10 +107,7 @@ class GFF < ::Bioroebe::CommandlineApplication # === Bioroebe::Parser::GFF
112
107
  # ========================================================================= #
113
108
  def reset
114
109
  super()
115
- # ======================================================================= #
116
- # === @namespace
117
- # ======================================================================= #
118
- @namespace = NAMESPACE
110
+ infer_the_namespace
119
111
  # ======================================================================= #
120
112
  # === @input_file
121
113
  # ======================================================================= #
@@ -19,11 +19,6 @@ module Bioroebe
19
19
 
20
20
  class ScanForRepeat < ::Bioroebe::CommandlineApplication # === Bioroebe::ScanForRepeat
21
21
 
22
- # ========================================================================= #
23
- # === NAMESPACE
24
- # ========================================================================= #
25
- NAMESPACE = inspect
26
-
27
22
  # ========================================================================= #
28
23
  # === initialize
29
24
  # ========================================================================= #
@@ -63,6 +58,7 @@ class ScanForRepeat < ::Bioroebe::CommandlineApplication # === Bioroebe::ScanFor
63
58
  # ========================================================================= #
64
59
  def reset
65
60
  super()
61
+ infer_the_namespace
66
62
  # ======================================================================= #
67
63
  # === @n_repeats
68
64
  # ======================================================================= #
@@ -21,11 +21,6 @@ class FetchFastaSequenceFromPdb < ::Bioroebe::CommandlineApplication # === Bioro
21
21
 
22
22
  require 'open-uri'
23
23
 
24
- # ========================================================================= #
25
- # === NAMESPACE
26
- # ========================================================================= #
27
- NAMESPACE = inspect
28
-
29
24
  # ========================================================================= #
30
25
  # === URL_FOR_FASTA_ENTRIES_AT_THE_PDB
31
26
  # ========================================================================= #
@@ -59,10 +54,7 @@ class FetchFastaSequenceFromPdb < ::Bioroebe::CommandlineApplication # === Bioro
59
54
  # ========================================================================= #
60
55
  def reset
61
56
  super()
62
- # ======================================================================= #
63
- # === @namespace
64
- # ======================================================================= #
65
- @namespace = NAMESPACE
57
+ infer_the_namespace
66
58
  # ======================================================================= #
67
59
  # === @result
68
60
  # ======================================================================= #
@@ -13,11 +13,6 @@ module Bioroebe
13
13
 
14
14
  class ParsemmCIFFile < ::Bioroebe::CommandlineApplication # === Bioroebe::ParsemmCIFFile
15
15
 
16
- # ========================================================================= #
17
- # === NAMESPACE
18
- # ========================================================================= #
19
- NAMESPACE = inspect
20
-
21
16
  # ========================================================================= #
22
17
  # === initialize
23
18
  # ========================================================================= #
@@ -44,10 +39,7 @@ class ParsemmCIFFile < ::Bioroebe::CommandlineApplication # === Bioroebe::Parsem
44
39
  # ========================================================================= #
45
40
  def reset
46
41
  super()
47
- # ======================================================================= #
48
- # === @namespace
49
- # ======================================================================= #
50
- @namespace = NAMESPACE
42
+ infer_the_namespace
51
43
  end
52
44
 
53
45
  # ========================================================================= #
@@ -114,11 +114,6 @@ module Bioroebe
114
114
 
115
115
  class ParsePdbFile < ::Bioroebe::CommandlineApplication # === Bioroebe::ParsePdbFile
116
116
 
117
- # ========================================================================= #
118
- # === NAMESPACE
119
- # ========================================================================= #
120
- NAMESPACE = inspect
121
-
122
117
  # ========================================================================= #
123
118
  # === DEFAULT_PDB_FILE
124
119
  # ========================================================================= #
@@ -171,10 +166,7 @@ class ParsePdbFile < ::Bioroebe::CommandlineApplication # === Bioroebe::ParsePdb
171
166
  # ========================================================================= #
172
167
  def reset
173
168
  super()
174
- # ======================================================================= #
175
- # === @namespace
176
- # ======================================================================= #
177
- @namespace = NAMESPACE
169
+ infer_the_namespace
178
170
  # ======================================================================= #
179
171
  # === @do_create_a_fasta_file
180
172
  #