bioroebe 0.10.80 → 0.11.12
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of bioroebe might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/README.md +507 -310
- data/bioroebe.gemspec +3 -3
- data/doc/README.gen +506 -309
- data/doc/todo/bioroebe_todo.md +29 -40
- data/lib/bioroebe/aminoacids/display_aminoacid_table.rb +1 -0
- data/lib/bioroebe/base/colours_for_base/colours_for_base.rb +18 -8
- data/lib/bioroebe/base/commandline_application/commandline_arguments.rb +13 -11
- data/lib/bioroebe/base/commandline_application/misc.rb +18 -8
- data/lib/bioroebe/base/prototype/misc.rb +1 -1
- data/lib/bioroebe/codons/show_codon_tables.rb +6 -2
- data/lib/bioroebe/constants/aminoacids_and_proteins.rb +1 -0
- data/lib/bioroebe/constants/files_and_directories.rb +8 -1
- data/lib/bioroebe/count/count_amount_of_nucleotides.rb +3 -0
- data/lib/bioroebe/gui/gtk3/protein_to_DNA/protein_to_DNA.rb +18 -18
- data/lib/bioroebe/gui/shared_code/protein_to_DNA/protein_to_DNA_module.rb +14 -14
- data/lib/bioroebe/parsers/genbank_parser.rb +353 -24
- data/lib/bioroebe/python/README.md +1 -0
- data/lib/bioroebe/python/__pycache__/mymodule.cpython-39.pyc +0 -0
- data/lib/bioroebe/python/gui/gtk3/widget1.py +22 -0
- data/lib/bioroebe/python/mymodule.py +8 -0
- data/lib/bioroebe/python/protein_to_dna.py +30 -0
- data/lib/bioroebe/python/shell/shell.py +19 -0
- data/lib/bioroebe/python/to_rna.py +14 -0
- data/lib/bioroebe/python/toplevel_methods/to_camelcase.py +11 -0
- data/lib/bioroebe/sequence/nucleotide_module/nucleotide_module.rb +28 -25
- data/lib/bioroebe/sequence/sequence.rb +54 -2
- data/lib/bioroebe/shell/menu.rb +3336 -3304
- data/lib/bioroebe/shell/readline/readline.rb +1 -1
- data/lib/bioroebe/shell/shell.rb +11233 -28
- data/lib/bioroebe/siRNA/siRNA.rb +81 -1
- data/lib/bioroebe/string_matching/find_longest_substring.rb +3 -2
- data/lib/bioroebe/toplevel_methods/aminoacids_and_proteins.rb +31 -24
- data/lib/bioroebe/toplevel_methods/nucleotides.rb +22 -5
- data/lib/bioroebe/toplevel_methods/open_in_browser.rb +2 -0
- data/lib/bioroebe/toplevel_methods/to_camelcase.rb +5 -0
- data/lib/bioroebe/version/version.rb +2 -2
- data/lib/bioroebe/yaml/configuration/browser.yml +1 -1
- data/lib/bioroebe/yaml/restriction_enzymes/restriction_enzymes.yml +3 -3
- metadata +17 -36
- data/doc/setup.rb +0 -1655
- data/lib/bioroebe/genbank/genbank_parser.rb +0 -291
- data/lib/bioroebe/shell/add.rb +0 -108
- data/lib/bioroebe/shell/assign.rb +0 -360
- data/lib/bioroebe/shell/chop_and_cut.rb +0 -281
- data/lib/bioroebe/shell/constants.rb +0 -166
- data/lib/bioroebe/shell/download.rb +0 -335
- data/lib/bioroebe/shell/enable_and_disable.rb +0 -158
- data/lib/bioroebe/shell/enzymes.rb +0 -310
- data/lib/bioroebe/shell/fasta.rb +0 -345
- data/lib/bioroebe/shell/gtk.rb +0 -76
- data/lib/bioroebe/shell/history.rb +0 -132
- data/lib/bioroebe/shell/initialize.rb +0 -217
- data/lib/bioroebe/shell/loop.rb +0 -74
- data/lib/bioroebe/shell/misc.rb +0 -4341
- data/lib/bioroebe/shell/prompt.rb +0 -107
- data/lib/bioroebe/shell/random.rb +0 -289
- data/lib/bioroebe/shell/reset.rb +0 -335
- data/lib/bioroebe/shell/scan_and_parse.rb +0 -135
- data/lib/bioroebe/shell/search.rb +0 -337
- data/lib/bioroebe/shell/sequences.rb +0 -200
- data/lib/bioroebe/shell/show_report_and_display.rb +0 -2901
- data/lib/bioroebe/shell/startup.rb +0 -127
- data/lib/bioroebe/shell/taxonomy.rb +0 -14
- data/lib/bioroebe/shell/tk.rb +0 -23
- data/lib/bioroebe/shell/user_input.rb +0 -88
- data/lib/bioroebe/shell/xorg.rb +0 -45
@@ -4,10 +4,29 @@
|
|
4
4
|
# =========================================================================== #
|
5
5
|
# === Bioroebe::GenbankParser
|
6
6
|
#
|
7
|
-
# This class can be used to parse genbank-files
|
8
|
-
#
|
7
|
+
# This class can be used to parse genbank-files (typically stored as .genbank
|
8
|
+
# or .gbk, so their file extension is usually ".gbk").
|
9
|
+
#
|
10
|
+
# Since as of the rewrite in July 2022 the class can also handle multiple
|
11
|
+
# fasta entries now.
|
12
|
+
#
|
13
|
+
# The class is similar to class FastaParser, but instead it will only
|
14
|
+
# select the content between "ORIGIN" and "VERSION" entries.
|
15
|
+
#
|
16
|
+
# The user can pass the content of a genbank-file to this class, and it
|
17
|
+
# can then report the nucleotide sequence, e. g. the part starting after
|
18
|
+
# the ORIGIN string.
|
19
|
+
#
|
20
|
+
# The reason why this class has been created was because it is sometimes
|
21
|
+
# necessary to parse a genebank file.
|
22
|
+
#
|
23
|
+
# Usage example:
|
24
|
+
#
|
25
|
+
# Bioroebe::GenbankParser.new(ARGV)
|
26
|
+
#
|
9
27
|
# =========================================================================== #
|
10
|
-
# require 'bioroebe/
|
28
|
+
# require 'bioroebe/genbank/genbank_parser.rb'
|
29
|
+
# genbank_parser = Bioroebe::GenbankParser.new(ARGV)
|
11
30
|
# =========================================================================== #
|
12
31
|
require 'bioroebe/base/commandline_application/commandline_application.rb'
|
13
32
|
|
@@ -20,59 +39,369 @@ class GenbankParser < ::Bioroebe::CommandlineApplication # === Bioroebe::Genbank
|
|
20
39
|
# ========================================================================= #
|
21
40
|
NAMESPACE = inspect
|
22
41
|
|
42
|
+
# ========================================================================= #
|
43
|
+
# === UPCASE_THE_SEQUENCE
|
44
|
+
#
|
45
|
+
# Setting this constant to true will cause this class to store the
|
46
|
+
# FASTA sequence in an upcased variant, e. g. "AGCAGCTA" rather
|
47
|
+
# than "acgatcag".
|
48
|
+
# ========================================================================= #
|
49
|
+
UPCASE_THE_SEQUENCE = true
|
50
|
+
|
51
|
+
# ========================================================================= #
|
52
|
+
# === TEST_STRING
|
53
|
+
#
|
54
|
+
# Our example test-string, to see how such a genbank file usually looks
|
55
|
+
# like.
|
56
|
+
#
|
57
|
+
# This will contain two different FASTA sequences.
|
58
|
+
# ========================================================================= #
|
59
|
+
TEST_STRING = ' /note="internal transcribed spacer 2"
|
60
|
+
ORIGIN
|
61
|
+
1 cgtaacaagg tttccgtagg tgaaccttcg gaaggatcat tgttgagacc cccaaaaaaa
|
62
|
+
61 cgatcgagtt aatccggagg accggtgtag tttggtctcc caggggcttt ggctactgtg
|
63
|
+
121 gtggccgtga atttccgtcg aacctccttg ggagaattct tgatggcaat tgaacccttg
|
64
|
+
181 gcccggcgca gtttcgcccc aagtcaaatg agatggaacc ggcggagggc atcgtcctcc
|
65
|
+
241 atggaaccgg ggagggccgg cgttcttccg ttccccccat gaattttttt ttgacaactc
|
66
|
+
301 tcggcaacgg atatctcggc tctttgcatc cgatgaaaga acccagcgaa atgtgataag
|
67
|
+
361 tggtgtgaat tgcagaatcc cgtgaaccat cgagtctttg aacgcaagtt gcgcccgagg
|
68
|
+
421 ccatcaggct aagggcacgc ctgcctgggc gttgcgtgct gcatctctct cccattgcta
|
69
|
+
481 aggctgaaca ggcatactgt tcggccggcg cggatgagtg tttggcccct tgttcttcgg
|
70
|
+
541 tgcgatgggt ccaagacctg ggcttttgac ggccggaaat ccggcaagag gtggacggac
|
71
|
+
601 ggtggctgcg acgaagctgt cgtgcgaatg ccctacgctg tcgtatttga tgggccggaa
|
72
|
+
661 taaatccctt ttgagcccca ttggaggcac gtcaacccgt gggcggtcga cggccatttg
|
73
|
+
721 gatgcaaccc caggtcaggt gagga
|
74
|
+
//
|
75
|
+
LOCUS Z78510 750 bp DNA linear PLN 30-NOV-2006
|
76
|
+
DEFINITION P.caricinum 5.8S rRNA gene and ITS1 and ITS2 DNA.
|
77
|
+
ACCESSION Z78510
|
78
|
+
VERSION Z78510.1 GI:2765635
|
79
|
+
KEYWORDS 5.8S ribosomal RNA; 5.8S rRNA gene; internal transcribed spacer;
|
80
|
+
ITS1; ITS2.
|
81
|
+
SOURCE Phragmipedium caricinum
|
82
|
+
ORGANISM Phragmipedium caricinum
|
83
|
+
Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta;
|
84
|
+
Spermatophyta; Magnoliophyta; Liliopsida; Asparagales; Orchidaceae;
|
85
|
+
Cypripedioideae; Phragmipedium.
|
86
|
+
REFERENCE 1
|
87
|
+
AUTHORS Cox,A.V., Pridgeon,A.M., Albert,V.A. and Chase,M.W.
|
88
|
+
TITLE Phylogenetics of the slipper orchids (Cypripedioideae:
|
89
|
+
Orchidaceae): nuclear rDNA ITS sequences
|
90
|
+
JOURNAL Unpublished
|
91
|
+
REFERENCE 2 (bases 1 to 750)
|
92
|
+
AUTHORS Cox,A.V.
|
93
|
+
TITLE Direct Submission
|
94
|
+
JOURNAL Submitted (19-AUG-1996) Cox A.V., Royal Botanic Gardens, Kew,
|
95
|
+
Richmond, Surrey TW9 3AB, UK
|
96
|
+
FEATURES Location/Qualifiers
|
97
|
+
source 1..750
|
98
|
+
/organism="Phragmipedium caricinum"
|
99
|
+
/mol_type="genomic DNA"
|
100
|
+
/db_xref="taxon:53127"
|
101
|
+
misc_feature 1..380
|
102
|
+
/note="internal transcribed spacer 1"
|
103
|
+
gene 381..550
|
104
|
+
/gene="5.8S rRNA"
|
105
|
+
rRNA 381..550
|
106
|
+
/gene="5.8S rRNA"
|
107
|
+
/product="5.8S ribosomal RNA"
|
108
|
+
misc_feature 551..750
|
109
|
+
/note="internal transcribed spacer 2"
|
110
|
+
ORIGIN
|
111
|
+
1 ctaaccaggg ttccgaggtg accttcggga ggattccttt ttaagccccc gaaaaaacga
|
112
|
+
61 tcgaattaaa ccggaggacc ggtttaattt ggtctcccca ggggctttcc ccccttggtg
|
113
|
+
121 gccgtgaatt tccatcgaac ccccctggga gaattcttgg tggccaatgg acccttggcc
|
114
|
+
181 cggcgcaatt tcccccccaa tcaaatgaga taggaccggc agggggcgtc cccccccatg
|
115
|
+
241 gaaccgggga gggccggcat tcttccgttc ccccctcgga ttttttgaca actctcgcaa
|
116
|
+
301 cggatatctc gcctctttgc atcggatgga agaacgcagc gaaatgtgat aagtggtgtg
|
117
|
+
361 aattgcagaa tcccgtgaac catcgagtct ttgaacgcaa gttgcgcccg aggccatcag
|
118
|
+
421 gctaagggca cgcctgcctg ggcgttgcgt gctgcatctc tcccattgct aaggttgaac
|
119
|
+
481 gggcatactg ttcggccggc gcggatgaga gattggcccc ttgttcttcg gtgcgatggg
|
120
|
+
541 tccaagacct gggcttttga cggtccaaaa tccggcaaga ggtggacgga cggtggctgc
|
121
|
+
601 gacaaagctg tcgtgcgaat gccctgcgtt gtcgtttttg atgggccgga ataaatccct
|
122
|
+
661 tttgaacccc attggaggca cgtcaaccca tgggcggttg acggccattt ggatgcaacc
|
123
|
+
721 ccaggtcagg tgagccaccc gctgagttta
|
124
|
+
//
|
125
|
+
LOCUS Z78509 731 bp DNA linear PLN 30-NOV-2006
|
126
|
+
DEFINITION P.pearcei 5.8S rRNA gene and ITS1 and ITS2 DNA.
|
127
|
+
ACCESSION Z78509
|
128
|
+
VERSION Z78509.1 GI:2765634
|
129
|
+
KEYWORDS 5.8S ribosomal RNA; 5.8S rRNA gene; internal transcribed spacer;
|
130
|
+
ITS1; ITS2.
|
131
|
+
SOURCE Phragmipedium pearcei
|
132
|
+
ORGANISM Phragmipedium pearcei
|
133
|
+
Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta;
|
134
|
+
Spermatophyta; Magnoliophyta; Liliopsida; Asparagales; Orchidaceae;
|
135
|
+
Cypripedioideae; Phragmipedium.
|
136
|
+
REFERENCE 1
|
137
|
+
AUTHORS Cox,A.V., Pridgeon,A.M., Albert,V.A. and Chase,M.W.
|
138
|
+
TITLE Phylogenetics of the slipper orchids (Cypripedioideae:
|
139
|
+
Orchidaceae): nuclear rDNA ITS sequences
|
140
|
+
JOURNAL Unpublished
|
141
|
+
REFERENCE 2 (bases 1 to 731)
|
142
|
+
AUTHORS Cox,A.V.
|
143
|
+
TITLE Direct Submission
|
144
|
+
JOURNAL Submitted (19-AUG-1996) Cox A.V., Royal Botanic Gardens, Kew,
|
145
|
+
Richmond, Surrey TW9 3AB, UK
|
146
|
+
FEATURES Location/Qualifiers
|
147
|
+
source 1..731
|
148
|
+
/organism="Phragmipedium pearcei"
|
149
|
+
/mol_type="genomic DNA"
|
150
|
+
/db_xref="taxon:53135"
|
151
|
+
misc_feature 1..380
|
152
|
+
/note="internal transcribed spacer 1"
|
153
|
+
gene 381..550
|
154
|
+
/gene="5.8S rRNA"
|
155
|
+
rRNA 381..550
|
156
|
+
/gene="5.8S rRNA"
|
157
|
+
/product="5.8S ribosomal RNA"
|
158
|
+
misc_feature 551..731
|
159
|
+
/note="internal transcribed spacer 2"
|
160
|
+
ORIGIN
|
161
|
+
1 cgtaacaagg tttccgtagg tgaacctgcg gaaggatcat tgttgagacc gccaaatata
|
162
|
+
61 cgatcgagtt aatccggagg accggtgtag tttggtctcc caggggcttt cgccgctgtg
|
163
|
+
121 gtgaccgtga tttgccatcg agcctccttg ggagatttct tgatggcaat tgaacccttg
|
164
|
+
181 gcccggcgca gtttcgcgcc aagtcatatg agatagaacc ggcggagggc gtcgtcctcc
|
165
|
+
241 atggagcggg gagggccggc atgctccgtg cccccccatg aatttttctg acaactctcg
|
166
|
+
301 gcaacggacg taacaaggtt taaatgtgat aagcaggtgt gaattgcaga atcccgtgaa
|
167
|
+
361 ccatcgagtc tttgaacgca agttgcgccc gaggccatca ggttaagggc acgcctgcct
|
168
|
+
421 gggcgttgcg tgctgcatct ctcccattgc taaggttgaa cgggcatact gttcggccgg
|
169
|
+
481 cgcggatgag agtttggccc cttgttcttc ggtgcgatgg gtccaagacc tgggcttttg
|
170
|
+
541 acggtccaaa atccggcaag aggtggacgg acggtggctg cgacagagct gtcgtgcgaa
|
171
|
+
601 tgccctacgt tgtcgttttt gatgggccag aataaatccc ttttgaaccc cattggaggc
|
172
|
+
661 acgtcaaccc aatggggggt gacgggcatt tggttaaccc cggcaagtta aggcacccgt
|
173
|
+
721 taattttagg a
|
174
|
+
//
|
175
|
+
LOCUS Z78508 741 bp DNA linear PLN 30-NOV-2006'
|
176
|
+
|
23
177
|
# ========================================================================= #
|
24
178
|
# === initialize
|
25
179
|
# ========================================================================= #
|
26
180
|
def initialize(
|
27
|
-
commandline_arguments =
|
181
|
+
commandline_arguments = nil,
|
28
182
|
run_already = true
|
29
183
|
)
|
30
184
|
reset
|
31
185
|
set_commandline_arguments(
|
32
186
|
commandline_arguments
|
33
187
|
)
|
188
|
+
menu
|
189
|
+
if block_given?
|
190
|
+
yielded = yield
|
191
|
+
case yielded
|
192
|
+
# ===================================================================== #
|
193
|
+
# === :do_not_report_anything
|
194
|
+
# ===================================================================== #
|
195
|
+
when :do_not_report_anything
|
196
|
+
@internal_hash[:report_the_dataset] = false
|
197
|
+
end
|
198
|
+
end
|
34
199
|
run if run_already
|
35
200
|
end
|
36
201
|
|
37
202
|
# ========================================================================= #
|
38
|
-
# === reset
|
203
|
+
# === reset (reset tag)
|
39
204
|
# ========================================================================= #
|
40
205
|
def reset
|
41
206
|
super()
|
207
|
+
# ======================================================================= #
|
208
|
+
# === @namespace
|
209
|
+
# ======================================================================= #
|
210
|
+
@namespace = NAMESPACE
|
211
|
+
# ======================================================================= #
|
212
|
+
# === @internal_hash
|
213
|
+
# ======================================================================= #
|
214
|
+
@internal_hash = {}
|
215
|
+
# ======================================================================= #
|
216
|
+
# === :work_on_this_file
|
217
|
+
# ======================================================================= #
|
218
|
+
@internal_hash[:work_on_this_file] = nil
|
219
|
+
# ======================================================================= #
|
220
|
+
# === :report_the_dataset
|
221
|
+
# ======================================================================= #
|
222
|
+
@internal_hash[:report_the_dataset] = true
|
223
|
+
# ======================================================================= #
|
224
|
+
# === :n_FASTA_entries_in_the_file
|
225
|
+
#
|
226
|
+
# This variable will keep track how many FASTA entries are in
|
227
|
+
# the genbank file at hand.
|
228
|
+
# ======================================================================= #
|
229
|
+
@internal_hash[:n_FASTA_entries_in_the_file] = 0
|
230
|
+
# ======================================================================= #
|
231
|
+
# === :dataset_from_all_FASTA_entries_as_a_hash
|
232
|
+
#
|
233
|
+
# This hash will contain all the FASTA sequences in the given
|
234
|
+
# genbank file at hand. This constitutes the main dataset of
|
235
|
+
# this clas.
|
236
|
+
# ======================================================================= #
|
237
|
+
@internal_hash[:dataset_from_all_FASTA_entries_as_a_hash] = {}
|
42
238
|
end
|
43
239
|
|
44
240
|
# ========================================================================= #
|
45
|
-
# ===
|
241
|
+
# === menu (menu tag)
|
46
242
|
# ========================================================================= #
|
47
|
-
def
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
243
|
+
def menu(
|
244
|
+
i = commandline_arguments_containing_leading_hyphens?
|
245
|
+
)
|
246
|
+
if i.is_a? Array
|
247
|
+
i.each {|entry| menu(entry) }
|
248
|
+
else
|
249
|
+
case i # (case tag)
|
250
|
+
# ===================================================================== #
|
251
|
+
# === gparser --help
|
252
|
+
# ===================================================================== #
|
253
|
+
when /^-?-?help$/i
|
254
|
+
show_help
|
255
|
+
exit
|
256
|
+
# ===================================================================== #
|
257
|
+
# === gparser --test
|
258
|
+
#
|
259
|
+
# This entry point can be used to test the default TEST_STRING.
|
260
|
+
# ===================================================================== #
|
261
|
+
when /^-?-?test$/i,
|
262
|
+
/^-?-?test(-|_)?string$/i
|
263
|
+
analyse_this_dataset(TEST_STRING)
|
264
|
+
exit
|
59
265
|
end
|
60
|
-
@sequence = dataset
|
61
266
|
end
|
62
267
|
end
|
63
268
|
|
269
|
+
# ========================================================================= #
|
270
|
+
# === work_on_which_file?
|
271
|
+
# ========================================================================= #
|
272
|
+
def work_on_which_file?
|
273
|
+
@internal_hash[:work_on_this_file]
|
274
|
+
end
|
275
|
+
|
276
|
+
# ========================================================================= #
|
277
|
+
# === report_the_dataset?
|
278
|
+
# ========================================================================= #
|
279
|
+
def report_the_dataset?
|
280
|
+
@internal_hash[:report_the_dataset]
|
281
|
+
end
|
282
|
+
|
283
|
+
# ========================================================================= #
|
284
|
+
# === set_work_on_this_file
|
285
|
+
# ========================================================================= #
|
286
|
+
def set_work_on_this_file(
|
287
|
+
i = first_argument?
|
288
|
+
)
|
289
|
+
@internal_hash[:work_on_this_file] = i
|
290
|
+
end
|
291
|
+
|
292
|
+
# ========================================================================= #
|
293
|
+
# === analyse_this_dataset
|
294
|
+
# ========================================================================= #
|
295
|
+
def analyse_this_dataset(dataset)
|
296
|
+
use_this_regex =
|
297
|
+
/ORIGIN[\/\-\.\s0-9a-zA-Z]+VERSION\s*[\.0-9A-Z]+/ # See: https://rubular.com/r/0q7rFIUflX7yzw
|
298
|
+
scanned = dataset.scan(use_this_regex)
|
299
|
+
@internal_hash[:n_FASTA_entries_in_the_file] = scanned
|
300
|
+
discover_the_corresponding_FASTA_entries_from_this_dataset(scanned)
|
301
|
+
consider_reporting_our_findings_to_the_user
|
302
|
+
end; alias determine_dataset analyse_this_dataset # === determine_dataset
|
303
|
+
|
304
|
+
# ========================================================================= #
|
305
|
+
# === dataset?
|
306
|
+
# ========================================================================= #
|
307
|
+
def dataset?
|
308
|
+
@internal_hash[:dataset_from_all_FASTA_entries_as_a_hash]
|
309
|
+
end; alias main_dataset? dataset? # === main_dataset?
|
310
|
+
|
311
|
+
# ========================================================================= #
|
312
|
+
# === sequences?
|
313
|
+
# ========================================================================= #
|
314
|
+
def sequences?
|
315
|
+
dataset?.values
|
316
|
+
end
|
317
|
+
|
64
318
|
# ========================================================================= #
|
65
319
|
# === sequence?
|
66
320
|
# ========================================================================= #
|
67
321
|
def sequence?
|
68
|
-
|
322
|
+
sequences?.first
|
69
323
|
end; alias coding_sequence? sequence? # === coding_sequence?
|
70
324
|
alias cds sequence? # === cds
|
71
325
|
|
326
|
+
# ========================================================================= #
|
327
|
+
# === discover_the_corresponding_FASTA_entries_from_this_dataset
|
328
|
+
# ========================================================================= #
|
329
|
+
def discover_the_corresponding_FASTA_entries_from_this_dataset(i)
|
330
|
+
regex_to_use_for_the_id = /VERSION\s*([\.A-Za-z0-9]+)/
|
331
|
+
if i.is_a? Array
|
332
|
+
i.each {|this_dataset|
|
333
|
+
this_dataset =~ regex_to_use_for_the_id
|
334
|
+
use_this_id = $1.to_s.dup
|
335
|
+
use_this_FASTA_sequence = this_dataset.scan(
|
336
|
+
/^\s*\d{1,100}([\sa-zA-Z]+)/
|
337
|
+
).flatten.join(' ').delete(" \n")
|
338
|
+
use_this_FASTA_sequence.upcase! if UPCASE_THE_SEQUENCE
|
339
|
+
@internal_hash[:dataset_from_all_FASTA_entries_as_a_hash][use_this_id] = use_this_FASTA_sequence
|
340
|
+
}
|
341
|
+
end
|
342
|
+
end
|
343
|
+
|
344
|
+
# ========================================================================= #
|
345
|
+
# === verbose_check_whether_the_file_exists
|
346
|
+
# ========================================================================= #
|
347
|
+
def verbose_check_whether_the_file_exists
|
348
|
+
_ = @internal_hash[:work_on_this_file]
|
349
|
+
if _ and File.exist?(_)
|
350
|
+
true
|
351
|
+
else
|
352
|
+
opnn; e 'No file exists at '+sfile(_)
|
353
|
+
false
|
354
|
+
end
|
355
|
+
end
|
356
|
+
|
357
|
+
# ========================================================================= #
|
358
|
+
# === consider_reporting_our_findings_to_the_user (report tag)
|
359
|
+
# ========================================================================= #
|
360
|
+
def consider_reporting_our_findings_to_the_user
|
361
|
+
if report_the_dataset? and dataset? and !dataset?.empty?
|
362
|
+
main_dataset?.each_pair {|key, value|
|
363
|
+
e steelblue("#{key}:")
|
364
|
+
e lightblue(value)
|
365
|
+
e
|
366
|
+
}
|
367
|
+
end
|
368
|
+
end; alias report consider_reporting_our_findings_to_the_user # === report
|
369
|
+
alias report_the_dataset consider_reporting_our_findings_to_the_user # === report_the_dataset
|
370
|
+
|
371
|
+
# ========================================================================= #
|
372
|
+
# === run (run tag)
|
373
|
+
# ========================================================================= #
|
374
|
+
def run
|
375
|
+
set_work_on_this_file(first_argument?)
|
376
|
+
# ======================================================================= #
|
377
|
+
# First check whether the given file exists or not:
|
378
|
+
# ======================================================================= #
|
379
|
+
if verbose_check_whether_the_file_exists
|
380
|
+
original_dataset = File.read(@internal_hash[:work_on_this_file]) # Just store it completely.
|
381
|
+
if original_dataset.include?('ORIGIN') and original_dataset.include?('VERSION ')
|
382
|
+
analyse_this_dataset(original_dataset)
|
383
|
+
else
|
384
|
+
opnn; e 'No keywords ORIGIN and VERSION were found in this file.'
|
385
|
+
end
|
386
|
+
end
|
387
|
+
end
|
388
|
+
|
389
|
+
# ========================================================================= #
|
390
|
+
# === Bioroebe::GenbankParser[]
|
391
|
+
# ========================================================================= #
|
392
|
+
def self.[](i = '')
|
393
|
+
new(i)
|
394
|
+
end
|
395
|
+
|
72
396
|
end; end
|
73
397
|
|
74
398
|
if __FILE__ == $PROGRAM_NAME
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
399
|
+
alias e puts
|
400
|
+
genbank_parser = Bioroebe::GenbankParser.new(ARGV)
|
401
|
+
# genbank_parser = Bioroebe::GenbankParser.new('/home/Temp/bioroebe/ls_orchid.gbk')
|
402
|
+
e genbank_parser.sequence?
|
403
|
+
# e _.id
|
404
|
+
end # genbankparser *genbank
|
405
|
+
# genbankparser
|
406
|
+
# genbankparser sample_file.genbank
|
407
|
+
# genbankparser --test
|
@@ -0,0 +1 @@
|
|
1
|
+
This directory will contain some python-specific code.
|
Binary file
|
@@ -0,0 +1,22 @@
|
|
1
|
+
import gi
|
2
|
+
|
3
|
+
gi.require_version("Gtk", "3.0")
|
4
|
+
from gi.repository import Gtk
|
5
|
+
|
6
|
+
|
7
|
+
class MyWindow(Gtk.Window):
|
8
|
+
def __init__(self):
|
9
|
+
super().__init__(title="Hello World")
|
10
|
+
|
11
|
+
self.button = Gtk.Button(label="Click Here")
|
12
|
+
self.button.connect("clicked", self.on_button_clicked)
|
13
|
+
self.add(self.button)
|
14
|
+
|
15
|
+
def on_button_clicked(self, widget):
|
16
|
+
print("Hello World")
|
17
|
+
|
18
|
+
|
19
|
+
win = MyWindow()
|
20
|
+
win.connect("destroy", Gtk.main_quit)
|
21
|
+
win.show_all()
|
22
|
+
Gtk.main()
|
@@ -0,0 +1,30 @@
|
|
1
|
+
import sys
|
2
|
+
import yaml
|
3
|
+
|
4
|
+
# =========================================================================== #
|
5
|
+
# === protein_to_dna
|
6
|
+
# =========================================================================== #
|
7
|
+
def protein_to_dna(i):
|
8
|
+
output_string = ''
|
9
|
+
yaml_dataset = None
|
10
|
+
aminoacid_sequence = list(i)
|
11
|
+
|
12
|
+
with open(r'/home/x/programming/ruby/src/bioroebe/lib/bioroebe/yaml/codon_tables/1.yml') as file:
|
13
|
+
# The FullLoader parameter handles the conversion from YAML
|
14
|
+
# scalar values to Python the dictionary format
|
15
|
+
yaml_dataset = yaml.load(file, Loader=yaml.FullLoader)
|
16
|
+
yaml_dataset = { v: k for k, v in yaml_dataset.items() }
|
17
|
+
|
18
|
+
for this_aminoacid in aminoacid_sequence:
|
19
|
+
# Now we must obtain the DNA sequence.
|
20
|
+
# print(yaml_dataset[this_aminoacid])
|
21
|
+
output_string += yaml_dataset[this_aminoacid]
|
22
|
+
output_string += '-'
|
23
|
+
|
24
|
+
# print(yaml_dataset)
|
25
|
+
print(output_string[0: -1]) # Remove the last character.
|
26
|
+
|
27
|
+
if __name__ == '__main__':
|
28
|
+
i = sys.argv[1]
|
29
|
+
protein_to_dna(i)
|
30
|
+
# py protein_to_dna.py KKKLLL
|
@@ -0,0 +1,19 @@
|
|
1
|
+
#!/usr/bin/python
|
2
|
+
import sys
|
3
|
+
|
4
|
+
# =========================================================================== #
|
5
|
+
# === obtain_user_input
|
6
|
+
# =========================================================================== #
|
7
|
+
def obtain_user_input():
|
8
|
+
user_input = input("> ")
|
9
|
+
return user_input
|
10
|
+
|
11
|
+
shall_we_continue = True
|
12
|
+
|
13
|
+
while (shall_we_continue):
|
14
|
+
result = obtain_user_input
|
15
|
+
if result == "q":
|
16
|
+
shall_we_continue = False
|
17
|
+
# exit
|
18
|
+
else:
|
19
|
+
print(result)
|
@@ -0,0 +1,14 @@
|
|
1
|
+
import sys
|
2
|
+
|
3
|
+
# =========================================================================== #
|
4
|
+
# === to_rna
|
5
|
+
#
|
6
|
+
# This method will simply convert the given input, such as a
|
7
|
+
# nucleotide sequence, to RNA.
|
8
|
+
# =========================================================================== #
|
9
|
+
def to_rna(i):
|
10
|
+
converted = i.replace('T','U')
|
11
|
+
print(converted)
|
12
|
+
|
13
|
+
to_rna(sys.argv[1])
|
14
|
+
# py to_rna.py ATG
|
@@ -0,0 +1,11 @@
|
|
1
|
+
from re import sub
|
2
|
+
|
3
|
+
# =========================================================================== #
|
4
|
+
# === camel_case
|
5
|
+
# =========================================================================== #
|
6
|
+
def camel_case(i):
|
7
|
+
i = sub(r"(_|-)+", " ", i).title().replace(" ", "")
|
8
|
+
return ''.join([i[0].upper(), i[1:]])
|
9
|
+
|
10
|
+
if __name__ == "__main__":
|
11
|
+
print(camel_case('foo_bar'))
|
@@ -72,13 +72,6 @@ module NucleotideModule # === Bioroebe::NucleotideModule
|
|
72
72
|
i.tr('U','T')
|
73
73
|
end
|
74
74
|
|
75
|
-
# ========================================================================= #
|
76
|
-
# === to_dna
|
77
|
-
# ========================================================================= #
|
78
|
-
def to_dna
|
79
|
-
seq?.tr('U','T')
|
80
|
-
end; alias dna to_dna # === dna
|
81
|
-
|
82
75
|
# ========================================================================= #
|
83
76
|
# === n_random_dna
|
84
77
|
#
|
@@ -258,24 +251,6 @@ module NucleotideModule # === Bioroebe::NucleotideModule
|
|
258
251
|
::Bioroebe.codon_to_aminoacid(codon)
|
259
252
|
end; alias translate_aminoacid_into_dna codon_to_aminoacid # === translate_aminoacid_into_dna
|
260
253
|
|
261
|
-
# ========================================================================= #
|
262
|
-
# === to_dna
|
263
|
-
#
|
264
|
-
# This method will convert a RNA sequence into a DNA sequence.
|
265
|
-
#
|
266
|
-
# It will return the translation.
|
267
|
-
#
|
268
|
-
# bio; puts Bioroebe.new.to_dna('actgggcgagagklklklklk')
|
269
|
-
# ========================================================================= #
|
270
|
-
def to_dna(
|
271
|
-
i = sequence?, upcase_me = true
|
272
|
-
)
|
273
|
-
if is_RNA?
|
274
|
-
i = sequence? if i.nil?
|
275
|
-
::Bioroebe.to_dna(i, upcase_me)
|
276
|
-
end
|
277
|
-
end; alias dna to_dna # === dna
|
278
|
-
|
279
254
|
# ========================================================================= #
|
280
255
|
# === random
|
281
256
|
#
|
@@ -397,6 +372,34 @@ module NucleotideModule # === Bioroebe::NucleotideModule
|
|
397
372
|
end
|
398
373
|
end; alias gc_percent gc_percentage # === gc_percent
|
399
374
|
|
375
|
+
# ========================================================================= #
|
376
|
+
# === to_dna
|
377
|
+
#
|
378
|
+
# This method will convert a RNA sequence into a DNA sequence.
|
379
|
+
#
|
380
|
+
# It will return the translation.
|
381
|
+
#
|
382
|
+
# Usage example:
|
383
|
+
#
|
384
|
+
# require 'bioroebe'; puts Bioroebe::DNA.new('actgggcgagaguuuuUUUUUU').to_dna
|
385
|
+
#
|
386
|
+
# ========================================================================= #
|
387
|
+
def to_dna(
|
388
|
+
i = sequence?,
|
389
|
+
upcase_me = true
|
390
|
+
)
|
391
|
+
i = sequence? if i.nil?
|
392
|
+
i = ::Bioroebe.to_dna(i, upcase_me)
|
393
|
+
return i
|
394
|
+
end; alias dna to_dna # === dna
|
395
|
+
|
396
|
+
# ========================================================================= #
|
397
|
+
# === to_dna
|
398
|
+
# ========================================================================= #
|
399
|
+
# def to_dna
|
400
|
+
# seq?.tr('U','T')
|
401
|
+
# end; alias dna to_dna # === dna
|
402
|
+
|
400
403
|
end; end
|
401
404
|
|
402
405
|
if __FILE__ == $PROGRAM_NAME
|