bioroebe 0.10.80 → 0.11.32

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of bioroebe might be problematic. Click here for more details.

Files changed (210) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +3209 -2618
  3. data/bin/bioroebe +7 -1
  4. data/bioroebe.gemspec +3 -3
  5. data/doc/README.gen +3208 -2617
  6. data/doc/quality_control/commandline_applications.md +3 -3
  7. data/doc/todo/bioroebe_todo.md +2040 -2615
  8. data/lib/bioroebe/aminoacids/aminoacid_substitution.rb +1 -9
  9. data/lib/bioroebe/aminoacids/codon_percentage.rb +1 -9
  10. data/lib/bioroebe/aminoacids/deduce_aminoacid_sequence.rb +1 -9
  11. data/lib/bioroebe/aminoacids/display_aminoacid_table.rb +1 -0
  12. data/lib/bioroebe/aminoacids/show_hydrophobicity.rb +1 -6
  13. data/lib/bioroebe/base/colours_for_base/colours_for_base.rb +18 -8
  14. data/lib/bioroebe/base/commandline_application/commandline_arguments.rb +13 -11
  15. data/lib/bioroebe/base/commandline_application/misc.rb +26 -9
  16. data/lib/bioroebe/base/commandline_application/opn.rb +8 -8
  17. data/lib/bioroebe/base/commandline_application/reset.rb +3 -2
  18. data/lib/bioroebe/base/misc.rb +35 -0
  19. data/lib/bioroebe/base/prototype/misc.rb +11 -1
  20. data/lib/bioroebe/codon_tables/frequencies/parse_frequency_table.rb +2 -10
  21. data/lib/bioroebe/codons/possible_codons_for_this_aminoacid.rb +1 -9
  22. data/lib/bioroebe/codons/show_codon_tables.rb +6 -2
  23. data/lib/bioroebe/codons/show_codon_usage.rb +2 -1
  24. data/lib/bioroebe/constants/aminoacids_and_proteins.rb +1 -0
  25. data/lib/bioroebe/constants/database_constants.rb +1 -1
  26. data/lib/bioroebe/constants/files_and_directories.rb +31 -4
  27. data/lib/bioroebe/constants/misc.rb +20 -0
  28. data/lib/bioroebe/count/count_amount_of_nucleotides.rb +3 -0
  29. data/lib/bioroebe/crystal/README.md +2 -0
  30. data/lib/bioroebe/crystal/to_rna.cr +19 -0
  31. data/lib/bioroebe/data/README.md +11 -8
  32. data/lib/bioroebe/data/electron_microscopy/pos_example.pos +396 -0
  33. data/lib/bioroebe/data/electron_microscopy/test_particles.star +36 -0
  34. data/lib/bioroebe/electron_microscopy/coordinate_analyzer.rb +15 -18
  35. data/lib/bioroebe/{fasta_and_fastq/parse_fasta/run.rb → electron_microscopy/electron_microscopy_module.rb} +16 -8
  36. data/lib/bioroebe/electron_microscopy/fix_pos_file.rb +1 -9
  37. data/lib/bioroebe/electron_microscopy/flipy.rb +83 -0
  38. data/lib/bioroebe/electron_microscopy/parse_coordinates.rb +2 -10
  39. data/lib/bioroebe/electron_microscopy/read_file_xmd.rb +1 -9
  40. data/lib/bioroebe/electron_microscopy/simple_star_file_generator.rb +4 -9
  41. data/lib/bioroebe/fasta_and_fastq/autocorrect_the_name_of_this_fasta_file.rb +1 -9
  42. data/lib/bioroebe/fasta_and_fastq/compact_fasta_file/compact_fasta_file.rb +7 -9
  43. data/lib/bioroebe/fasta_and_fastq/fasta_defline/fasta_defline.rb +1 -5
  44. data/lib/bioroebe/fasta_and_fastq/fasta_to_yaml/fasta_to_yaml.rb +81 -0
  45. data/lib/bioroebe/fasta_and_fastq/parse_fasta/parse_fasta.rb +1460 -7
  46. data/lib/bioroebe/fasta_and_fastq/return_fasta_subsection_of_this_file.rb +11 -2
  47. data/lib/bioroebe/fasta_and_fastq/show_fasta_headers.rb +27 -12
  48. data/lib/bioroebe/fasta_and_fastq/simplify_fasta_header/simplify_fasta_header.rb +1 -5
  49. data/lib/bioroebe/fasta_and_fastq/split_this_fasta_file_into_chromosomes/constants.rb +0 -5
  50. data/lib/bioroebe/genome/README.md +4 -0
  51. data/lib/bioroebe/genome/genome.rb +67 -0
  52. data/lib/bioroebe/genomes/genome_pattern.rb +3 -9
  53. data/lib/bioroebe/gui/gtk +1 -0
  54. data/lib/bioroebe/gui/gtk3/controller/controller.rb +45 -27
  55. data/lib/bioroebe/gui/gtk3/dna_to_aminoacid_widget/dna_to_aminoacid_widget.rb +76 -50
  56. data/lib/bioroebe/gui/gtk3/fasta_table_widget/fasta_table_widget.rb +99 -21
  57. data/lib/bioroebe/gui/gtk3/hamming_distance/hamming_distance.rb +42 -28
  58. data/lib/bioroebe/gui/gtk3/nucleotide_analyser/nucleotide_analyser.rb +119 -71
  59. data/lib/bioroebe/gui/gtk3/protein_to_DNA/protein_to_DNA.rb +18 -18
  60. data/lib/bioroebe/gui/gtk3/random_sequence/random_sequence.rb +19 -11
  61. data/lib/bioroebe/gui/libui/three_to_one/three_to_one.rb +8 -6
  62. data/lib/bioroebe/gui/shared_code/protein_to_DNA/protein_to_DNA_module.rb +14 -14
  63. data/lib/bioroebe/gui/swing/three_to_one/ThreeToOne$1.class +0 -0
  64. data/lib/bioroebe/gui/swing/three_to_one/ThreeToOne$CloseListener.class +0 -0
  65. data/lib/bioroebe/gui/swing/three_to_one/ThreeToOne.class +0 -0
  66. data/lib/bioroebe/gui/swing/three_to_one/ThreeToOne.java +141 -0
  67. data/lib/bioroebe/java/README.md +4 -0
  68. data/lib/bioroebe/java/bioroebe/Sequence.java +25 -1
  69. data/lib/bioroebe/java/bioroebe/src/main/java/bioroebe/Base.class +0 -0
  70. data/lib/bioroebe/java/bioroebe/{Base.java → src/main/java/bioroebe/Base.java} +15 -2
  71. data/lib/bioroebe/java/bioroebe/{BisulfiteTreatment.class → src/main/java/bioroebe/BisulfiteTreatment.class} +0 -0
  72. data/lib/bioroebe/java/bioroebe/{BisulfiteTreatment.java → src/main/java/bioroebe/BisulfiteTreatment.java} +0 -0
  73. data/lib/bioroebe/java/bioroebe/{Codons.class → src/main/java/bioroebe/Codons.class} +0 -0
  74. data/lib/bioroebe/java/bioroebe/src/main/java/bioroebe/Codons.java +34 -0
  75. data/lib/bioroebe/java/bioroebe/src/main/java/bioroebe/Commandline.class +0 -0
  76. data/lib/bioroebe/java/bioroebe/src/main/java/bioroebe/Commandline.java +95 -0
  77. data/lib/bioroebe/java/bioroebe/{Esystem.class → src/main/java/bioroebe/Esystem.class} +0 -0
  78. data/lib/bioroebe/java/bioroebe/{Esystem.java → src/main/java/bioroebe/Esystem.java} +0 -0
  79. data/lib/bioroebe/java/bioroebe/{GenerateRandomDnaSequence.class → src/main/java/bioroebe/GenerateRandomDnaSequence.class} +0 -0
  80. data/lib/bioroebe/java/bioroebe/{GenerateRandomDnaSequence.java → src/main/java/bioroebe/GenerateRandomDnaSequence.java} +8 -2
  81. data/lib/bioroebe/java/bioroebe/{IsPalindrome.class → src/main/java/bioroebe/IsPalindrome.class} +0 -0
  82. data/lib/bioroebe/java/bioroebe/{IsPalindrome.java → src/main/java/bioroebe/IsPalindrome.java} +5 -1
  83. data/lib/bioroebe/java/bioroebe/src/main/java/bioroebe/PartnerNucleotide.class +0 -0
  84. data/lib/bioroebe/java/bioroebe/src/main/java/bioroebe/PartnerNucleotide.java +56 -0
  85. data/lib/bioroebe/java/bioroebe/{RemoveFile.class → src/main/java/bioroebe/RemoveFile.class} +0 -0
  86. data/lib/bioroebe/java/bioroebe/{RemoveFile.java → src/main/java/bioroebe/RemoveFile.java} +10 -4
  87. data/lib/bioroebe/java/bioroebe/{RemoveNumbers.class → src/main/java/bioroebe/RemoveNumbers.class} +0 -0
  88. data/lib/bioroebe/java/bioroebe/{RemoveNumbers.java → src/main/java/bioroebe/RemoveNumbers.java} +1 -0
  89. data/lib/bioroebe/java/bioroebe/{ToCamelcase.class → src/main/java/bioroebe/ToCamelcase.class} +0 -0
  90. data/lib/bioroebe/java/bioroebe/{ToCamelcase.java → src/main/java/bioroebe/ToCamelcase.java} +3 -3
  91. data/lib/bioroebe/java/bioroebe/src/main/java/bioroebe/ToRNA.class +0 -0
  92. data/lib/bioroebe/java/bioroebe/src/main/java/bioroebe/ToRNA.java +42 -0
  93. data/lib/bioroebe/java/bioroebe/toplevel_methods/BaseComposition.class +0 -0
  94. data/lib/bioroebe/java/bioroebe/toplevel_methods/BaseComposition.java +73 -0
  95. data/lib/bioroebe/misc/ruler.rb +1 -0
  96. data/lib/bioroebe/nucleotides/most_likely_nucleotide_sequence_for_this_aminoacid_sequence.rb +1 -9
  97. data/lib/bioroebe/nucleotides/show_nucleotide_sequence.rb +7 -7
  98. data/lib/bioroebe/parsers/genbank_parser.rb +347 -26
  99. data/lib/bioroebe/parsers/gff.rb +1 -9
  100. data/lib/bioroebe/patterns/scan_for_repeat.rb +1 -5
  101. data/lib/bioroebe/pdb/fetch_fasta_sequence_from_pdb.rb +1 -9
  102. data/lib/bioroebe/pdb/parse_mmCIF_file.rb +1 -9
  103. data/lib/bioroebe/pdb/parse_pdb_file.rb +1 -9
  104. data/lib/bioroebe/project/project.rb +1 -1
  105. data/lib/bioroebe/python/README.md +1 -0
  106. data/lib/bioroebe/python/__pycache__/mymodule.cpython-39.pyc +0 -0
  107. data/lib/bioroebe/python/gui/gtk3/all_in_one.css +4 -0
  108. data/lib/bioroebe/python/gui/gtk3/all_in_one.py +59 -0
  109. data/lib/bioroebe/python/gui/gtk3/widget1.py +20 -0
  110. data/lib/bioroebe/python/gui/tkinter/all_in_one.py +91 -0
  111. data/lib/bioroebe/python/mymodule.py +8 -0
  112. data/lib/bioroebe/python/protein_to_dna.py +33 -0
  113. data/lib/bioroebe/python/shell/shell.py +19 -0
  114. data/lib/bioroebe/python/to_rna.py +14 -0
  115. data/lib/bioroebe/python/toplevel_methods/esystem.py +12 -0
  116. data/lib/bioroebe/python/toplevel_methods/open_in_browser.py +20 -0
  117. data/lib/bioroebe/python/toplevel_methods/palindromes.py +42 -0
  118. data/lib/bioroebe/python/toplevel_methods/rds.py +13 -0
  119. data/lib/bioroebe/python/toplevel_methods/shuffleseq.py +23 -0
  120. data/lib/bioroebe/python/toplevel_methods/three_delimiter.py +37 -0
  121. data/lib/bioroebe/python/toplevel_methods/time_and_date.py +43 -0
  122. data/lib/bioroebe/python/toplevel_methods/to_camelcase.py +21 -0
  123. data/lib/bioroebe/requires/require_the_bioroebe_project.rb +3 -1
  124. data/lib/bioroebe/sequence/nucleotide_module/nucleotide_module.rb +28 -25
  125. data/lib/bioroebe/sequence/protein.rb +105 -3
  126. data/lib/bioroebe/sequence/sequence.rb +61 -2
  127. data/lib/bioroebe/shell/menu.rb +3819 -3713
  128. data/lib/bioroebe/shell/misc.rb +51 -4311
  129. data/lib/bioroebe/shell/readline/readline.rb +1 -1
  130. data/lib/bioroebe/shell/shell.rb +11250 -28
  131. data/lib/bioroebe/siRNA/siRNA.rb +81 -1
  132. data/lib/bioroebe/string_matching/find_longest_substring.rb +3 -2
  133. data/lib/bioroebe/string_matching/hamming_distance.rb +1 -9
  134. data/lib/bioroebe/taxonomy/class_methods.rb +3 -8
  135. data/lib/bioroebe/taxonomy/constants.rb +4 -3
  136. data/lib/bioroebe/taxonomy/edit.rb +2 -1
  137. data/lib/bioroebe/taxonomy/help/help.rb +10 -10
  138. data/lib/bioroebe/taxonomy/info/check_available.rb +15 -9
  139. data/lib/bioroebe/taxonomy/info/info.rb +18 -11
  140. data/lib/bioroebe/taxonomy/info/is_dna.rb +46 -36
  141. data/lib/bioroebe/taxonomy/interactive.rb +140 -104
  142. data/lib/bioroebe/taxonomy/menu.rb +27 -18
  143. data/lib/bioroebe/taxonomy/parse_fasta.rb +3 -1
  144. data/lib/bioroebe/taxonomy/shared.rb +1 -0
  145. data/lib/bioroebe/taxonomy/taxonomy.rb +1 -0
  146. data/lib/bioroebe/toplevel_methods/aminoacids_and_proteins.rb +31 -24
  147. data/lib/bioroebe/toplevel_methods/colourize_related_methods.rb +164 -0
  148. data/lib/bioroebe/toplevel_methods/databases.rb +1 -1
  149. data/lib/bioroebe/toplevel_methods/fasta_and_fastq.rb +107 -63
  150. data/lib/bioroebe/toplevel_methods/file_and_directory_related_actions.rb +14 -2
  151. data/lib/bioroebe/toplevel_methods/misc.rb +118 -11
  152. data/lib/bioroebe/toplevel_methods/nucleotides.rb +22 -5
  153. data/lib/bioroebe/toplevel_methods/open_in_browser.rb +2 -0
  154. data/lib/bioroebe/toplevel_methods/palindromes.rb +1 -2
  155. data/lib/bioroebe/toplevel_methods/taxonomy.rb +2 -2
  156. data/lib/bioroebe/toplevel_methods/to_camelcase.rb +5 -0
  157. data/lib/bioroebe/utility_scripts/align_open_reading_frames.rb +1 -9
  158. data/lib/bioroebe/utility_scripts/check_for_mismatches/check_for_mismatches.rb +1 -9
  159. data/lib/bioroebe/utility_scripts/compacter.rb +1 -9
  160. data/lib/bioroebe/utility_scripts/compseq/compseq.rb +1 -9
  161. data/lib/bioroebe/utility_scripts/create_batch_entrez_file.rb +1 -9
  162. data/lib/bioroebe/utility_scripts/dot_alignment.rb +1 -9
  163. data/lib/bioroebe/utility_scripts/move_file_to_its_correct_location.rb +1 -4
  164. data/lib/bioroebe/utility_scripts/showorf/constants.rb +0 -5
  165. data/lib/bioroebe/utility_scripts/showorf/reset.rb +1 -4
  166. data/lib/bioroebe/version/version.rb +2 -2
  167. data/lib/bioroebe/www/embeddable_interface.rb +101 -52
  168. data/lib/bioroebe/www/sinatra/sinatra.rb +186 -70
  169. data/lib/bioroebe/yaml/aminoacids/amino_acids_long_name_to_one_letter.yml +2 -2
  170. data/lib/bioroebe/yaml/configuration/browser.yml +1 -1
  171. data/lib/bioroebe/yaml/genomes/README.md +3 -4
  172. data/lib/bioroebe/yaml/restriction_enzymes/restriction_enzymes.yml +3 -3
  173. metadata +69 -64
  174. data/doc/setup.rb +0 -1655
  175. data/lib/bioroebe/fasta_and_fastq/parse_fasta/constants.rb +0 -50
  176. data/lib/bioroebe/fasta_and_fastq/parse_fasta/initialize.rb +0 -86
  177. data/lib/bioroebe/fasta_and_fastq/parse_fasta/menu.rb +0 -117
  178. data/lib/bioroebe/fasta_and_fastq/parse_fasta/misc.rb +0 -981
  179. data/lib/bioroebe/fasta_and_fastq/parse_fasta/report.rb +0 -156
  180. data/lib/bioroebe/fasta_and_fastq/parse_fasta/reset.rb +0 -128
  181. data/lib/bioroebe/genbank/genbank_parser.rb +0 -291
  182. data/lib/bioroebe/java/bioroebe/Base.class +0 -0
  183. data/lib/bioroebe/java/bioroebe/Codons.java +0 -22
  184. data/lib/bioroebe/java/bioroebe/PartnerNucleotide.class +0 -0
  185. data/lib/bioroebe/java/bioroebe/PartnerNucleotide.java +0 -19
  186. data/lib/bioroebe/java/bioroebe.jar +0 -0
  187. data/lib/bioroebe/shell/add.rb +0 -108
  188. data/lib/bioroebe/shell/assign.rb +0 -360
  189. data/lib/bioroebe/shell/chop_and_cut.rb +0 -281
  190. data/lib/bioroebe/shell/constants.rb +0 -166
  191. data/lib/bioroebe/shell/download.rb +0 -335
  192. data/lib/bioroebe/shell/enable_and_disable.rb +0 -158
  193. data/lib/bioroebe/shell/enzymes.rb +0 -310
  194. data/lib/bioroebe/shell/fasta.rb +0 -345
  195. data/lib/bioroebe/shell/gtk.rb +0 -76
  196. data/lib/bioroebe/shell/history.rb +0 -132
  197. data/lib/bioroebe/shell/initialize.rb +0 -217
  198. data/lib/bioroebe/shell/loop.rb +0 -74
  199. data/lib/bioroebe/shell/prompt.rb +0 -107
  200. data/lib/bioroebe/shell/random.rb +0 -289
  201. data/lib/bioroebe/shell/reset.rb +0 -335
  202. data/lib/bioroebe/shell/scan_and_parse.rb +0 -135
  203. data/lib/bioroebe/shell/search.rb +0 -337
  204. data/lib/bioroebe/shell/sequences.rb +0 -200
  205. data/lib/bioroebe/shell/show_report_and_display.rb +0 -2901
  206. data/lib/bioroebe/shell/startup.rb +0 -127
  207. data/lib/bioroebe/shell/taxonomy.rb +0 -14
  208. data/lib/bioroebe/shell/tk.rb +0 -23
  209. data/lib/bioroebe/shell/user_input.rb +0 -88
  210. data/lib/bioroebe/shell/xorg.rb +0 -45
@@ -1,156 +0,0 @@
1
- #!/usr/bin/ruby -w
2
- # Encoding: UTF-8
3
- # frozen_string_literal: true
4
- # =========================================================================== #
5
- # require 'bioroebe/fasta_and_fastq/parse_fasta/report.rb'
6
- # =========================================================================== #
7
- require 'bioroebe/base/commandline_application/commandline_application.rb'
8
-
9
- module Bioroebe
10
-
11
- class ParseFasta < ::Bioroebe::CommandlineApplication # === Bioroebe::ParseFasta
12
-
13
- require 'bioroebe/sequence/dna.rb'
14
-
15
- # ========================================================================= #
16
- # === do_report_the_sequence (report tag)
17
- #
18
- # This method is used to display the main sequence at hand.
19
- # ========================================================================= #
20
- def do_report_the_sequence
21
- _ = main_sequence?
22
- # ======================================================================= #
23
- # Honour the --limit commandline flag next.
24
- # ======================================================================= #
25
- if @internal_hash[:limit_the_display_to_n_nucleotides]
26
- _ = _[0 .. (@internal_hash[:limit_the_display_to_n_nucleotides] - 1)]
27
- end
28
- if @colourize_sequence
29
- if is_polynucleotide?
30
- # =================================================================== #
31
- # Else assume this is DNA/RNA input.
32
- # =================================================================== #
33
- _.gsub!(/A/, teal('A')+rev)
34
- _.gsub!(/C/, slateblue('C')+rev)
35
- _.gsub!(/G/, royalblue('G')+rev)
36
- _.gsub!(/T/, steelblue('T')+rev)
37
- _.gsub!(/U/, steelblue('U')+rev)
38
- #else
39
- end
40
- end
41
- if condense_the_sequence_onto_a_single_line?
42
- _ = _.delete("\n")
43
- end
44
- erev colourize_this_nucleotide_sequence(_)
45
- e if condense_the_sequence_onto_a_single_line?
46
- if show_the_translated_protein_sequence?
47
- # ===================================================================== #
48
- # Do show the translated protein sequence next:
49
- # ===================================================================== #
50
- translated_into_aa = Bioroebe.to_aa(_)
51
- translated_into_aa_and_colourized = translated_into_aa.dup
52
- if translated_into_aa.include? '*'
53
- translated_into_aa_and_colourized = translated_into_aa.gsub(/\*/,tomato('*'))
54
- end
55
- erev 'The translated aminoacid sequence of '+
56
- sfancy(translated_into_aa.size.to_s)+rev+
57
- ' aminoacids is:'
58
- e
59
- erev steelblue(" #{translated_into_aa_and_colourized}")
60
- e
61
- end
62
- end; alias display do_report_the_sequence # === display
63
- alias report do_report_the_sequence # === report
64
-
65
- # ========================================================================= #
66
- # === report_the_nucleotide_composition
67
- # ========================================================================= #
68
- def report_the_nucleotide_composition
69
- if is_this_sequence_a_polynucleotide_sequence?
70
- first = @hash.values.first.upcase
71
- total_size = first.size
72
- n_adenines = first.count('A')
73
- n_thymidines = first.count('T')
74
- n_cytodines = first.count('C')
75
- n_guanines = first.count('G')
76
- erev "The nucleotide composition is as follows:"
77
- e " "\
78
- "#{steelblue(n_adenines)}#{rev}x A (#{(n_adenines * 100.0 / total_size).round(2)}%), "\
79
- "#{steelblue(n_thymidines)}#{rev}x T (#{(n_thymidines * 100.0 / total_size).round(2)}%), "\
80
- "#{steelblue(n_cytodines)}#{rev}x C (#{(n_cytodines * 100.0 / total_size).round(2)}%), "\
81
- "#{steelblue(n_guanines)}#{rev}x G (#{(n_guanines * 100.0 / total_size).round(2)}%)"
82
- elsif is_a_protein?
83
- # ===================================================================== #
84
- # Report the composition of the protein:
85
- # ===================================================================== #
86
- sequence = @hash.values.first.delete("\n")
87
- erev "The protein composition (aminoacids) is as follows:"
88
- # e colourize_this_aminoacid_sequence_for_the_commandline(" #{sequence}")
89
- e orchid(" #{sequence}")
90
- end
91
- end; alias report_the_protein_composition report_the_nucleotide_composition # === report_the_protein_composition
92
-
93
- # ========================================================================= #
94
- # === report_how_many_elements_we_have_found
95
- # ========================================================================= #
96
- def report_how_many_elements_we_have_found
97
- if @hash
98
- first = @hash.values.first.delete("\n")
99
- size = first.size.to_s
100
- if be_verbose?
101
- n_start_codons = first.count('ATG')
102
- # =================================================================== #
103
- # We upcase it since as of October 2021, as some FASTA files may
104
- # include the sequence in lowercased characters.
105
- # =================================================================== #
106
- n_start_codons += first.reverse.upcase.count('ATG')
107
- result = "This sequence contains #{simp(size.to_s)}#{rev}"\
108
- " #{nucleotides_or_aminoacids?}".dup
109
- if is_a_nucleotide?
110
- result << " and #{n_start_codons} "\
111
- "ATG codons (on both strands) in total"
112
- end
113
- result << '.'
114
- if size.to_i > 1_000_000
115
- # ================================================================= #
116
- # Format the number with '_' characters.
117
- # ================================================================= #
118
- formatted = size.to_i.to_s.reverse.split(/(.{3})/).reject(&:empty?).join('_').reverse
119
- result = result.dup if result.frozen?
120
- result << ' ('+simp(formatted+' bp')+rev+')'
121
- end
122
- erev result
123
- end
124
- end
125
- end
126
-
127
- # ========================================================================= #
128
- # === report_on_how_many_entries_we_did_work
129
- # ========================================================================= #
130
- def report_on_how_many_entries_we_did_work
131
- if be_verbose?
132
- entry_or_entries = 'entry'
133
- if @hash.keys.size > 1
134
- entry_or_entries = 'entries'
135
- end
136
- erev "We have identified a total of #{orange(@hash.keys.size)}"\
137
- "#{rev} #{entry_or_entries} in this fasta dataset."
138
- e
139
- end
140
- end
141
-
142
- # ========================================================================= #
143
- # === report_the_FASTA_header
144
- # ========================================================================= #
145
- def report_the_FASTA_header
146
- e "#{rev}The header is: #{steelblue(header?)}"
147
- end
148
-
149
- # ========================================================================= #
150
- # === report_the_sequence?
151
- # ========================================================================= #
152
- def report_the_sequence?
153
- @internal_hash[:report_the_sequence]
154
- end
155
-
156
- end; end
@@ -1,128 +0,0 @@
1
- #!/usr/bin/ruby -w
2
- # Encoding: UTF-8
3
- # frozen_string_literal: true
4
- # =========================================================================== #
5
- # require 'bioroebe/fasta_and_fastq/parse_fasta/reset.rb'
6
- # =========================================================================== #
7
- module Bioroebe
8
-
9
- class ParseFasta < ::Bioroebe::CommandlineApplication # === Bioroebe::ParseFasta
10
-
11
- # ========================================================================= #
12
- # === reset (reset tag)
13
- # ========================================================================= #
14
- def reset
15
- super()
16
- # ======================================================================= #
17
- # === @namespace
18
- # ======================================================================= #
19
- @namespace = NAMESPACE
20
- # ======================================================================= #
21
- # === @is_a_genbank_file
22
- # ======================================================================= #
23
- @is_a_genbank_file = false
24
- # ======================================================================= #
25
- # === @input_file
26
- #
27
- # This variable denotes which input file is used to read data from.
28
- #
29
- # It is nil initially because we may skip reading from an existing
30
- # file and e. g. only read from a String or some other non-file
31
- # entity.
32
- # ======================================================================= #
33
- @input_file = nil
34
- # ======================================================================= #
35
- # === @hash
36
- #
37
- # This is the main variable for the class. It will keep entries such
38
- # as this one here:
39
- #
40
- # {
41
- # "ENSMUSG00000020122|ENSMUST08" => "CCCTCC"
42
- # }
43
- #
44
- # ======================================================================= #
45
- @hash = {}
46
- # ======================================================================= #
47
- # === @internal_hash
48
- #
49
- # This Hash exists for internal configuration of the class.
50
- # ======================================================================= #
51
- @internal_hash = {}
52
- # ======================================================================= #
53
- # === :report_the_sequence
54
- # ======================================================================= #
55
- @internal_hash[:report_the_sequence] = false
56
- # ======================================================================= #
57
- # === :overwrite_the_original_file
58
- # ======================================================================= #
59
- @internal_hash[:overwrite_the_original_file] = false
60
- # ======================================================================= #
61
- # === :save_the_file
62
- # ======================================================================= #
63
- @internal_hash[:save_the_file] = false
64
- # ======================================================================= #
65
- # === :remove_numbers_from_input
66
- # ======================================================================= #
67
- @internal_hash[:remove_numbers_from_input] = false
68
- # ======================================================================= #
69
- # === :show_the_translated_protein_sequence
70
- #
71
- # This setting is false initially. If set to true via the commandline
72
- # then report() will show the translated protein sequence as well.
73
- # ======================================================================= #
74
- @internal_hash[:show_the_translated_protein_sequence] = false
75
- # ======================================================================= #
76
- # === :condense_the_sequence_onto_a_single_line
77
- #
78
- # By default the output of this class will include newlines for the
79
- # sequence. If this is not wanted by the user then the following
80
- # variable keeps track of that behaviour. You can use the flag
81
- # called --one-line to enable a condensed output, with newlines
82
- # being removed.
83
- # ======================================================================= #
84
- @internal_hash[:condense_the_sequence_onto_a_single_line] = false
85
- # ======================================================================= #
86
- # === :limit_the_display_to_n_nucleotides
87
- #
88
- # If this variable is a number rather than nil, then it will be used
89
- # to display only a limited number of nucleotides, e. g. "1000" if
90
- # the user passes in 1000.
91
- # ======================================================================= #
92
- @internal_hash[:limit_the_display_to_n_nucleotides] = nil
93
- # ======================================================================= #
94
- # === @may_we_exit
95
- # ======================================================================= #
96
- @may_we_exit = false
97
- # ======================================================================= #
98
- # === @current_key
99
- # ======================================================================= #
100
- @current_key = nil
101
- # ======================================================================= #
102
- # === @use_opn
103
- # ======================================================================= #
104
- @use_opn = ::Bioroebe.use_opn?
105
- # ======================================================================= #
106
- # === @colourize_sequence
107
- # ======================================================================= #
108
- @colourize_sequence = false
109
- # ======================================================================= #
110
- # === @sort_by_size
111
- #
112
- # If the following variable is set to true, then this class will
113
- # run a sizeseq-comparison, that is, it will compare all sequences
114
- # and output them in a size-sorted manner, similar to the EMBOSS
115
- # sizeseq action.
116
- # ======================================================================= #
117
- @sort_by_size = false
118
- # ======================================================================= #
119
- # === @show_the_header
120
- #
121
- # If this variable is true then the header will be shown.
122
- # ======================================================================= #
123
- @show_the_header = false
124
- set_round_to :default
125
- set_be_verbose
126
- end
127
-
128
- end; end
@@ -1,291 +0,0 @@
1
- #!/usr/bin/ruby -w
2
- # Encoding: UTF-8
3
- # frozen_string_literal: true
4
- # =========================================================================== #
5
- # === Bioroebe::GenbankParser
6
- #
7
- # This is similar to the FastaParser, but instead it will only select the
8
- # content between "ORIGIN" and "//" entries.
9
- #
10
- # You can pass the content of a genbank-file to this class, and it can
11
- # report the nucleotide sequence, e. g. the part starting after the
12
- # ORIGIN string.
13
- #
14
- # The reason why this class has been created was because it is sometimes
15
- # necessary to parse a genebank file.
16
- # =========================================================================== #
17
- # require 'bioroebe/genbank/genbank_parser.rb'
18
- # Bioroebe::GenbankParser.new(ARGV)
19
- # =========================================================================== #
20
- require 'bioroebe/base/commandline_application/commandline_application.rb'
21
-
22
- module Bioroebe
23
-
24
- class GenbankParser < ::Bioroebe::CommandlineApplication # === Bioroebe::GenbankParser
25
-
26
- # ========================================================================= #
27
- # === UPCASE_SEQUENCE
28
- # ========================================================================= #
29
- UPCASE_SEQUENCE = true
30
-
31
- # ========================================================================= #
32
- # === TEST_STRING
33
- #
34
- # Our example test-string, to see how such a genbank file usually looks
35
- # like.
36
- # ========================================================================= #
37
- TEST_STRING = ' /note="internal transcribed spacer 2"
38
- ORIGIN
39
- 1 cgtaacaagg tttccgtagg tgaaccttcg gaaggatcat tgttgagacc cccaaaaaaa
40
- 61 cgatcgagtt aatccggagg accggtgtag tttggtctcc caggggcttt ggctactgtg
41
- 121 gtggccgtga atttccgtcg aacctccttg ggagaattct tgatggcaat tgaacccttg
42
- 181 gcccggcgca gtttcgcccc aagtcaaatg agatggaacc ggcggagggc atcgtcctcc
43
- 241 atggaaccgg ggagggccgg cgttcttccg ttccccccat gaattttttt ttgacaactc
44
- 301 tcggcaacgg atatctcggc tctttgcatc cgatgaaaga acccagcgaa atgtgataag
45
- 361 tggtgtgaat tgcagaatcc cgtgaaccat cgagtctttg aacgcaagtt gcgcccgagg
46
- 421 ccatcaggct aagggcacgc ctgcctgggc gttgcgtgct gcatctctct cccattgcta
47
- 481 aggctgaaca ggcatactgt tcggccggcg cggatgagtg tttggcccct tgttcttcgg
48
- 541 tgcgatgggt ccaagacctg ggcttttgac ggccggaaat ccggcaagag gtggacggac
49
- 601 ggtggctgcg acgaagctgt cgtgcgaatg ccctacgctg tcgtatttga tgggccggaa
50
- 661 taaatccctt ttgagcccca ttggaggcac gtcaacccgt gggcggtcga cggccatttg
51
- 721 gatgcaaccc caggtcaggt gagga
52
- //
53
- LOCUS Z78510 750 bp DNA linear PLN 30-NOV-2006
54
- DEFINITION P.caricinum 5.8S rRNA gene and ITS1 and ITS2 DNA.
55
- ACCESSION Z78510
56
- VERSION Z78510.1 GI:2765635
57
- KEYWORDS 5.8S ribosomal RNA; 5.8S rRNA gene; internal transcribed spacer;
58
- ITS1; ITS2.
59
- SOURCE Phragmipedium caricinum
60
- ORGANISM Phragmipedium caricinum
61
- Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta;
62
- Spermatophyta; Magnoliophyta; Liliopsida; Asparagales; Orchidaceae;
63
- Cypripedioideae; Phragmipedium.
64
- REFERENCE 1
65
- AUTHORS Cox,A.V., Pridgeon,A.M., Albert,V.A. and Chase,M.W.
66
- TITLE Phylogenetics of the slipper orchids (Cypripedioideae:
67
- Orchidaceae): nuclear rDNA ITS sequences
68
- JOURNAL Unpublished
69
- REFERENCE 2 (bases 1 to 750)
70
- AUTHORS Cox,A.V.
71
- TITLE Direct Submission
72
- JOURNAL Submitted (19-AUG-1996) Cox A.V., Royal Botanic Gardens, Kew,
73
- Richmond, Surrey TW9 3AB, UK
74
- FEATURES Location/Qualifiers
75
- source 1..750
76
- /organism="Phragmipedium caricinum"
77
- /mol_type="genomic DNA"
78
- /db_xref="taxon:53127"
79
- misc_feature 1..380
80
- /note="internal transcribed spacer 1"
81
- gene 381..550
82
- /gene="5.8S rRNA"
83
- rRNA 381..550
84
- /gene="5.8S rRNA"
85
- /product="5.8S ribosomal RNA"
86
- misc_feature 551..750
87
- /note="internal transcribed spacer 2"
88
- ORIGIN
89
- 1 ctaaccaggg ttccgaggtg accttcggga ggattccttt ttaagccccc gaaaaaacga
90
- 61 tcgaattaaa ccggaggacc ggtttaattt ggtctcccca ggggctttcc ccccttggtg
91
- 121 gccgtgaatt tccatcgaac ccccctggga gaattcttgg tggccaatgg acccttggcc
92
- 181 cggcgcaatt tcccccccaa tcaaatgaga taggaccggc agggggcgtc cccccccatg
93
- 241 gaaccgggga gggccggcat tcttccgttc ccccctcgga ttttttgaca actctcgcaa
94
- 301 cggatatctc gcctctttgc atcggatgga agaacgcagc gaaatgtgat aagtggtgtg
95
- 361 aattgcagaa tcccgtgaac catcgagtct ttgaacgcaa gttgcgcccg aggccatcag
96
- 421 gctaagggca cgcctgcctg ggcgttgcgt gctgcatctc tcccattgct aaggttgaac
97
- 481 gggcatactg ttcggccggc gcggatgaga gattggcccc ttgttcttcg gtgcgatggg
98
- 541 tccaagacct gggcttttga cggtccaaaa tccggcaaga ggtggacgga cggtggctgc
99
- 601 gacaaagctg tcgtgcgaat gccctgcgtt gtcgtttttg atgggccgga ataaatccct
100
- 661 tttgaacccc attggaggca cgtcaaccca tgggcggttg acggccattt ggatgcaacc
101
- 721 ccaggtcagg tgagccaccc gctgagttta
102
- //
103
- LOCUS Z78509 731 bp DNA linear PLN 30-NOV-2006
104
- DEFINITION P.pearcei 5.8S rRNA gene and ITS1 and ITS2 DNA.
105
- ACCESSION Z78509
106
- VERSION Z78509.1 GI:2765634
107
- KEYWORDS 5.8S ribosomal RNA; 5.8S rRNA gene; internal transcribed spacer;
108
- ITS1; ITS2.
109
- SOURCE Phragmipedium pearcei
110
- ORGANISM Phragmipedium pearcei
111
- Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta;
112
- Spermatophyta; Magnoliophyta; Liliopsida; Asparagales; Orchidaceae;
113
- Cypripedioideae; Phragmipedium.
114
- REFERENCE 1
115
- AUTHORS Cox,A.V., Pridgeon,A.M., Albert,V.A. and Chase,M.W.
116
- TITLE Phylogenetics of the slipper orchids (Cypripedioideae:
117
- Orchidaceae): nuclear rDNA ITS sequences
118
- JOURNAL Unpublished
119
- REFERENCE 2 (bases 1 to 731)
120
- AUTHORS Cox,A.V.
121
- TITLE Direct Submission
122
- JOURNAL Submitted (19-AUG-1996) Cox A.V., Royal Botanic Gardens, Kew,
123
- Richmond, Surrey TW9 3AB, UK
124
- FEATURES Location/Qualifiers
125
- source 1..731
126
- /organism="Phragmipedium pearcei"
127
- /mol_type="genomic DNA"
128
- /db_xref="taxon:53135"
129
- misc_feature 1..380
130
- /note="internal transcribed spacer 1"
131
- gene 381..550
132
- /gene="5.8S rRNA"
133
- rRNA 381..550
134
- /gene="5.8S rRNA"
135
- /product="5.8S ribosomal RNA"
136
- misc_feature 551..731
137
- /note="internal transcribed spacer 2"
138
- ORIGIN
139
- 1 cgtaacaagg tttccgtagg tgaacctgcg gaaggatcat tgttgagacc gccaaatata
140
- 61 cgatcgagtt aatccggagg accggtgtag tttggtctcc caggggcttt cgccgctgtg
141
- 121 gtgaccgtga tttgccatcg agcctccttg ggagatttct tgatggcaat tgaacccttg
142
- 181 gcccggcgca gtttcgcgcc aagtcatatg agatagaacc ggcggagggc gtcgtcctcc
143
- 241 atggagcggg gagggccggc atgctccgtg cccccccatg aatttttctg acaactctcg
144
- 301 gcaacggacg taacaaggtt taaatgtgat aagcaggtgt gaattgcaga atcccgtgaa
145
- 361 ccatcgagtc tttgaacgca agttgcgccc gaggccatca ggttaagggc acgcctgcct
146
- 421 gggcgttgcg tgctgcatct ctcccattgc taaggttgaa cgggcatact gttcggccgg
147
- 481 cgcggatgag agtttggccc cttgttcttc ggtgcgatgg gtccaagacc tgggcttttg
148
- 541 acggtccaaa atccggcaag aggtggacgg acggtggctg cgacagagct gtcgtgcgaa
149
- 601 tgccctacgt tgtcgttttt gatgggccag aataaatccc ttttgaaccc cattggaggc
150
- 661 acgtcaaccc aatggggggt gacgggcatt tggttaaccc cggcaagtta aggcacccgt
151
- 721 taattttagg a
152
- //
153
- LOCUS Z78508 741 bp DNA linear PLN 30-NOV-2006'
154
-
155
- # ========================================================================= #
156
- # === initialize
157
- # ========================================================================= #
158
- def initialize(
159
- dataset = ARGV,
160
- run_already = true
161
- )
162
- reset
163
- determine_dataset(
164
- dataset
165
- )
166
- if block_given?
167
- yielded = yield
168
- case yielded
169
- # ===================================================================== #
170
- # === :do_not_report_anything
171
- # ===================================================================== #
172
- when :do_not_report_anything
173
- @report_dataset = false
174
- end
175
- end
176
- run if run_already
177
- end
178
-
179
- # ========================================================================= #
180
- # === reset
181
- # ========================================================================= #
182
- def reset
183
- super()
184
- # ======================================================================= #
185
- # === @dataset
186
- # ======================================================================= #
187
- @dataset = nil
188
- # ======================================================================= #
189
- # === @report_dataset
190
- # ======================================================================= #
191
- @report_dataset = true
192
- end
193
-
194
- # ========================================================================= #
195
- # === determine_dataset
196
- # ========================================================================= #
197
- def determine_dataset(i = nil)
198
- if i.is_a? Array
199
- i = i.first
200
- end
201
- if i.nil?
202
- e 'Please provide some input to this class.'
203
- return
204
- end
205
- # ======================================================================= #
206
- # === Handle existing .gb files here
207
- # ======================================================================= #
208
- if File.exist?(i) and File.file?(i)
209
- i = File.read(i)
210
- end
211
- case i # case tag
212
- # ======================================================================= #
213
- # === --test
214
- #
215
- # This entry point can be used to test the default TEST_STRING.
216
- # ======================================================================= #
217
- when nil,
218
- /^-?-?test$/i,
219
- /^-?-?test(-|_)?string$/i
220
- i = TEST_STRING
221
- end
222
- # ======================================================================= #
223
- # Store the results in the following array.
224
- # ======================================================================= #
225
- array = []
226
- open = false
227
- splitted = i.split(N)
228
- splitted.each {|line|
229
- if line.start_with? '//'
230
- open = false
231
- end
232
- if open
233
- array.last << line.strip.delete(' ').gsub(/\d/,'')
234
- array.flatten!
235
- end
236
- if line.start_with? 'ORIGIN'
237
- open = true
238
- array << [] # Append a new Array.
239
- end
240
- }
241
- string = array.first
242
- string.upcase! if UPCASE_SEQUENCE
243
- @dataset = string # Assign to the main @dataset here.
244
- end
245
-
246
- # ========================================================================= #
247
- # === report_dataset?
248
- # ========================================================================= #
249
- def report_dataset?
250
- @report_dataset
251
- end
252
-
253
- # ========================================================================= #
254
- # === dataset?
255
- # ========================================================================= #
256
- def dataset?
257
- @dataset
258
- end; alias sequence? dataset? # === sequence?
259
-
260
- # ========================================================================= #
261
- # === report_dataset (report tag)
262
- #
263
- # Simply output the dataset without any further processing to be done.
264
- # ========================================================================= #
265
- def report_dataset
266
- if @dataset
267
- e @dataset if report_dataset?
268
- end
269
- end; alias report report_dataset # === report
270
-
271
- # ========================================================================= #
272
- # === run
273
- # ========================================================================= #
274
- def run
275
- report_dataset
276
- end
277
-
278
- # ========================================================================= #
279
- # === Bioroebe::GenbankParser[]
280
- # ========================================================================= #
281
- def self.[](i)
282
- new(i)
283
- end
284
-
285
- end; end
286
-
287
- if __FILE__ == $PROGRAM_NAME
288
- Bioroebe::GenbankParser.new(ARGV) # Report the dataset as well.
289
- end # genbankparser
290
- # genbankparser sample_file.genbank
291
- # genbankparser --test
Binary file
@@ -1,22 +0,0 @@
1
- package bioroebe;
2
-
3
- /*
4
- * class Codons
5
- *
6
- * This class will include codon-related code.
7
- *
8
- */
9
- class Codons extends Base {
10
-
11
- /* Designate the main start codon to use. */
12
- public static String start_codon = "ATG";
13
-
14
- /* new Codons.display_start_codons() */
15
- public static void display_start_codons() {
16
- e(start_codon);
17
- }
18
- public static String return_start_codon() {
19
- return start_codon;
20
- }
21
-
22
- }
@@ -1,19 +0,0 @@
1
- package bioroebe;
2
-
3
- /*
4
- * PartnerNucleotide.partner_nucleotide("A");
5
- *
6
- * This class will simply yield the corresponding nucleotide.
7
- *
8
- */
9
- class PartnerNucleotide extends Base {
10
- public static void partner_nucleotide(String i) {
11
- switch(i) {
12
- case "A": e("T"); break;
13
- case "T": e("A"); break;
14
- case "C": e("G"); break;
15
- case "G": e("C"); break;
16
- default: e("Unhandled input "+i); break;
17
- }
18
- }
19
- }
Binary file