bio 1.2.1 → 1.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/ChangeLog +3421 -0
- data/KNOWN_ISSUES.rdoc +88 -0
- data/README.rdoc +252 -0
- data/README_DEV.rdoc +285 -0
- data/Rakefile +143 -0
- data/bin/bioruby +0 -0
- data/bin/br_biofetch.rb +0 -0
- data/bin/br_bioflat.rb +12 -1
- data/bin/br_biogetseq.rb +0 -0
- data/bin/br_pmfetch.rb +4 -3
- data/bioruby.gemspec +477 -0
- data/bioruby.gemspec.erb +117 -0
- data/doc/Changes-0.7.rd +7 -0
- data/doc/Changes-1.3.rdoc +239 -0
- data/doc/Tutorial.rd +296 -184
- data/doc/Tutorial.rd.html +1031 -0
- data/doc/Tutorial.rd.ja +111 -45
- data/doc/Tutorial.rd.ja.html +2225 -0
- data/doc/bioruby.css +281 -0
- data/extconf.rb +2 -0
- data/lib/bio.rb +29 -4
- data/lib/bio/appl/blast.rb +306 -121
- data/lib/bio/appl/blast/ddbj.rb +142 -0
- data/lib/bio/appl/blast/format0.rb +35 -25
- data/lib/bio/appl/blast/format8.rb +2 -2
- data/lib/bio/appl/blast/genomenet.rb +263 -0
- data/lib/bio/appl/blast/ncbioptions.rb +220 -0
- data/lib/bio/appl/blast/remote.rb +106 -0
- data/lib/bio/appl/blast/report.rb +260 -9
- data/lib/bio/appl/blast/rexml.rb +12 -5
- data/lib/bio/appl/blast/rpsblast.rb +277 -0
- data/lib/bio/appl/blast/wublast.rb +133 -12
- data/lib/bio/appl/blast/xmlparser.rb +35 -18
- data/lib/bio/appl/blat/report.rb +46 -5
- data/lib/bio/appl/emboss.rb +62 -13
- data/lib/bio/appl/fasta.rb +9 -11
- data/lib/bio/appl/genscan/report.rb +3 -3
- data/lib/bio/appl/hmmer.rb +1 -1
- data/lib/bio/appl/hmmer/report.rb +10 -10
- data/lib/bio/appl/paml/baseml.rb +95 -0
- data/lib/bio/appl/paml/baseml/report.rb +32 -0
- data/lib/bio/appl/paml/codeml.rb +242 -0
- data/lib/bio/appl/paml/codeml/rates.rb +67 -0
- data/lib/bio/appl/paml/codeml/report.rb +67 -0
- data/lib/bio/appl/paml/common.rb +348 -0
- data/lib/bio/appl/paml/common_report.rb +38 -0
- data/lib/bio/appl/paml/yn00.rb +103 -0
- data/lib/bio/appl/paml/yn00/report.rb +32 -0
- data/lib/bio/appl/psort.rb +2 -2
- data/lib/bio/appl/pts1.rb +5 -5
- data/lib/bio/appl/tmhmm/report.rb +10 -1
- data/lib/bio/command.rb +297 -41
- data/lib/bio/compat/features.rb +157 -0
- data/lib/bio/compat/references.rb +128 -0
- data/lib/bio/db/biosql/biosql_to_biosequence.rb +67 -0
- data/lib/bio/db/biosql/sequence.rb +508 -0
- data/lib/bio/db/embl/common.rb +28 -12
- data/lib/bio/db/embl/embl.rb +107 -9
- data/lib/bio/db/embl/embl_to_biosequence.rb +85 -0
- data/lib/bio/db/embl/format_embl.rb +190 -0
- data/lib/bio/db/embl/sptr.rb +15 -16
- data/lib/bio/db/fantom.rb +6 -8
- data/lib/bio/db/fasta.rb +10 -507
- data/lib/bio/db/fasta/defline.rb +532 -0
- data/lib/bio/db/fasta/fasta_to_biosequence.rb +63 -0
- data/lib/bio/db/fasta/format_fasta.rb +97 -0
- data/lib/bio/db/genbank/common.rb +25 -8
- data/lib/bio/db/genbank/format_genbank.rb +187 -0
- data/lib/bio/db/genbank/genbank.rb +36 -1
- data/lib/bio/db/genbank/genbank_to_biosequence.rb +86 -0
- data/lib/bio/db/gff.rb +1791 -119
- data/lib/bio/db/kegg/glycan.rb +2 -6
- data/lib/bio/db/lasergene.rb +3 -3
- data/lib/bio/db/medline.rb +4 -1
- data/lib/bio/db/newick.rb +10 -10
- data/lib/bio/db/pdb/chain.rb +6 -2
- data/lib/bio/db/pdb/pdb.rb +12 -3
- data/lib/bio/db/rebase.rb +7 -8
- data/lib/bio/db/soft.rb +3 -3
- data/lib/bio/feature.rb +1 -88
- data/lib/bio/io/biosql/biodatabase.rb +64 -0
- data/lib/bio/io/biosql/bioentry.rb +29 -0
- data/lib/bio/io/biosql/bioentry_dbxref.rb +11 -0
- data/lib/bio/io/biosql/bioentry_path.rb +12 -0
- data/lib/bio/io/biosql/bioentry_qualifier_value.rb +10 -0
- data/lib/bio/io/biosql/bioentry_reference.rb +10 -0
- data/lib/bio/io/biosql/bioentry_relationship.rb +10 -0
- data/lib/bio/io/biosql/biosequence.rb +11 -0
- data/lib/bio/io/biosql/comment.rb +7 -0
- data/lib/bio/io/biosql/config/database.yml +20 -0
- data/lib/bio/io/biosql/dbxref.rb +13 -0
- data/lib/bio/io/biosql/dbxref_qualifier_value.rb +12 -0
- data/lib/bio/io/biosql/location.rb +32 -0
- data/lib/bio/io/biosql/location_qualifier_value.rb +11 -0
- data/lib/bio/io/biosql/ontology.rb +10 -0
- data/lib/bio/io/biosql/reference.rb +9 -0
- data/lib/bio/io/biosql/seqfeature.rb +32 -0
- data/lib/bio/io/biosql/seqfeature_dbxref.rb +11 -0
- data/lib/bio/io/biosql/seqfeature_path.rb +11 -0
- data/lib/bio/io/biosql/seqfeature_qualifier_value.rb +20 -0
- data/lib/bio/io/biosql/seqfeature_relationship.rb +11 -0
- data/lib/bio/io/biosql/taxon.rb +12 -0
- data/lib/bio/io/biosql/taxon_name.rb +9 -0
- data/lib/bio/io/biosql/term.rb +27 -0
- data/lib/bio/io/biosql/term_dbxref.rb +11 -0
- data/lib/bio/io/biosql/term_path.rb +12 -0
- data/lib/bio/io/biosql/term_relationship.rb +13 -0
- data/lib/bio/io/biosql/term_relationship_term.rb +11 -0
- data/lib/bio/io/biosql/term_synonym.rb +10 -0
- data/lib/bio/io/das.rb +7 -7
- data/lib/bio/io/ddbjxml.rb +57 -0
- data/lib/bio/io/ensembl.rb +2 -2
- data/lib/bio/io/fetch.rb +28 -14
- data/lib/bio/io/flatfile.rb +17 -853
- data/lib/bio/io/flatfile/autodetection.rb +545 -0
- data/lib/bio/io/flatfile/buffer.rb +237 -0
- data/lib/bio/io/flatfile/index.rb +17 -7
- data/lib/bio/io/flatfile/indexer.rb +30 -12
- data/lib/bio/io/flatfile/splitter.rb +297 -0
- data/lib/bio/io/hinv.rb +442 -0
- data/lib/bio/io/keggapi.rb +2 -2
- data/lib/bio/io/ncbirest.rb +733 -0
- data/lib/bio/io/pubmed.rb +34 -80
- data/lib/bio/io/registry.rb +2 -2
- data/lib/bio/io/sql.rb +178 -357
- data/lib/bio/io/togows.rb +458 -0
- data/lib/bio/location.rb +106 -11
- data/lib/bio/pathway.rb +120 -14
- data/lib/bio/reference.rb +115 -101
- data/lib/bio/sequence.rb +164 -183
- data/lib/bio/sequence/adapter.rb +108 -0
- data/lib/bio/sequence/common.rb +22 -45
- data/lib/bio/sequence/compat.rb +2 -2
- data/lib/bio/sequence/dblink.rb +54 -0
- data/lib/bio/sequence/format.rb +254 -77
- data/lib/bio/sequence/format_raw.rb +23 -0
- data/lib/bio/shell.rb +3 -1
- data/lib/bio/shell/core.rb +2 -2
- data/lib/bio/shell/plugin/entry.rb +33 -4
- data/lib/bio/shell/plugin/ncbirest.rb +64 -0
- data/lib/bio/shell/plugin/togows.rb +40 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/bioruby_generator.rb +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_classes.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_log.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_methods.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_modules.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_variables.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-bg.gif +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-gem.png +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-link.gif +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.css +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_controller.rb +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_helper.rb +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/commands.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/history.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/index.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/spinner.gif +0 -0
- data/lib/bio/tree.rb +4 -2
- data/lib/bio/util/color_scheme.rb +2 -2
- data/lib/bio/util/contingency_table.rb +2 -2
- data/lib/bio/util/restriction_enzyme.rb +2 -2
- data/lib/bio/util/restriction_enzyme/single_strand.rb +6 -5
- data/lib/bio/version.rb +25 -0
- data/rdoc.zsh +8 -0
- data/sample/any2fasta.rb +0 -0
- data/sample/biofetch.rb +0 -0
- data/sample/dbget +0 -0
- data/sample/demo_sequence.rb +158 -0
- data/sample/enzymes.rb +0 -0
- data/sample/fasta2tab.rb +0 -0
- data/sample/fastagrep.rb +72 -0
- data/sample/fastasort.rb +54 -0
- data/sample/fsplit.rb +0 -0
- data/sample/gb2fasta.rb +2 -3
- data/sample/gb2tab.rb +0 -0
- data/sample/gbtab2mysql.rb +0 -0
- data/sample/genes2nuc.rb +0 -0
- data/sample/genes2pep.rb +0 -0
- data/sample/genes2tab.rb +0 -0
- data/sample/genome2rb.rb +0 -0
- data/sample/genome2tab.rb +0 -0
- data/sample/goslim.rb +0 -0
- data/sample/gt2fasta.rb +0 -0
- data/sample/na2aa.rb +34 -0
- data/sample/pmfetch.rb +0 -0
- data/sample/pmsearch.rb +0 -0
- data/sample/ssearch2tab.rb +0 -0
- data/sample/tfastx2tab.rb +0 -0
- data/sample/vs-genes.rb +0 -0
- data/setup.rb +1596 -0
- data/test/data/blast/blastp-multi.m7 +188 -0
- data/test/data/command/echoarg2.bat +1 -0
- data/test/data/paml/codeml/control_file.txt +30 -0
- data/test/data/paml/codeml/output.txt +78 -0
- data/test/data/paml/codeml/rates +217 -0
- data/test/data/rpsblast/misc.rpsblast +193 -0
- data/test/data/soft/GDS100_partial.soft +0 -0
- data/test/data/soft/GSE3457_family_partial.soft +0 -0
- data/test/functional/bio/appl/test_pts1.rb +115 -0
- data/test/functional/bio/io/test_ensembl.rb +123 -80
- data/test/functional/bio/io/test_togows.rb +267 -0
- data/test/functional/bio/sequence/test_output_embl.rb +51 -0
- data/test/functional/bio/test_command.rb +301 -0
- data/test/runner.rb +17 -1
- data/test/unit/bio/appl/blast/test_ncbioptions.rb +112 -0
- data/test/unit/bio/appl/blast/test_report.rb +753 -35
- data/test/unit/bio/appl/blast/test_rpsblast.rb +398 -0
- data/test/unit/bio/appl/paml/codeml/test_rates.rb +45 -0
- data/test/unit/bio/appl/paml/codeml/test_report.rb +45 -0
- data/test/unit/bio/appl/paml/test_codeml.rb +174 -0
- data/test/unit/bio/appl/test_blast.rb +135 -4
- data/test/unit/bio/appl/test_fasta.rb +2 -2
- data/test/unit/bio/appl/test_pts1.rb +1 -64
- data/test/unit/bio/db/embl/test_common.rb +15 -15
- data/test/unit/bio/db/embl/test_embl.rb +4 -4
- data/test/unit/bio/db/embl/test_embl_rel89.rb +5 -5
- data/test/unit/bio/db/embl/test_embl_to_bioseq.rb +203 -0
- data/test/unit/bio/db/embl/test_sptr.rb +38 -1
- data/test/unit/bio/db/pdb/test_pdb.rb +2 -2
- data/test/unit/bio/db/test_gff.rb +1151 -25
- data/test/unit/bio/db/test_medline.rb +127 -0
- data/test/unit/bio/db/test_nexus.rb +5 -1
- data/test/unit/bio/db/test_prosite.rb +4 -4
- data/test/unit/bio/io/flatfile/test_autodetection.rb +375 -0
- data/test/unit/bio/io/flatfile/test_buffer.rb +251 -0
- data/test/unit/bio/io/flatfile/test_splitter.rb +369 -0
- data/test/unit/bio/io/test_ddbjxml.rb +8 -3
- data/test/unit/bio/io/test_fastacmd.rb +5 -5
- data/test/unit/bio/io/test_flatfile.rb +357 -106
- data/test/unit/bio/io/test_soapwsdl.rb +2 -2
- data/test/unit/bio/io/test_togows.rb +161 -0
- data/test/unit/bio/sequence/test_common.rb +210 -11
- data/test/unit/bio/sequence/test_compat.rb +3 -3
- data/test/unit/bio/sequence/test_dblink.rb +58 -0
- data/test/unit/bio/sequence/test_na.rb +2 -2
- data/test/unit/bio/test_command.rb +111 -50
- data/test/unit/bio/test_feature.rb +29 -1
- data/test/unit/bio/test_location.rb +566 -6
- data/test/unit/bio/test_pathway.rb +91 -65
- data/test/unit/bio/test_reference.rb +67 -13
- data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +4 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +4 -4
- data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +3 -3
- data/test/unit/bio/util/test_restriction_enzyme.rb +3 -3
- metadata +202 -167
- data/test/unit/bio/appl/blast/test_xmlparser.rb +0 -388
data/lib/bio/db/embl/common.rb
CHANGED
@@ -5,7 +5,7 @@
|
|
5
5
|
# Mitsuteru C. Nakao <n@bioruby.org>
|
6
6
|
# License:: The Ruby License
|
7
7
|
#
|
8
|
-
# $Id: common.rb,v 1.12
|
8
|
+
# $Id: common.rb,v 1.12.2.5 2008/05/07 12:22:10 ngoto Exp $
|
9
9
|
#
|
10
10
|
# == Description
|
11
11
|
#
|
@@ -73,6 +73,7 @@
|
|
73
73
|
|
74
74
|
require 'bio/db'
|
75
75
|
require 'bio/reference'
|
76
|
+
require 'bio/compat/references'
|
76
77
|
|
77
78
|
module Bio
|
78
79
|
class EMBLDB
|
@@ -270,33 +271,48 @@ module Common
|
|
270
271
|
def references
|
271
272
|
unless @data['references']
|
272
273
|
ary = self.ref.map {|ent|
|
273
|
-
hash = Hash.new
|
274
|
+
hash = Hash.new
|
274
275
|
ent.each {|key, value|
|
275
276
|
case key
|
277
|
+
when 'RN'
|
278
|
+
if /\[(\d+)\]/ =~ value.to_s
|
279
|
+
hash['embl_gb_record_number'] = $1.to_i
|
280
|
+
end
|
281
|
+
when 'RC'
|
282
|
+
unless value.to_s.strip.empty?
|
283
|
+
hash['comments'] ||= []
|
284
|
+
hash['comments'].push value
|
285
|
+
end
|
286
|
+
when 'RP'
|
287
|
+
hash['sequence_position'] = value
|
276
288
|
when 'RA'
|
277
|
-
|
289
|
+
a = value.split(/\, /)
|
290
|
+
a.each do |x|
|
291
|
+
x.sub!(/( [^ ]+)\z/, ",\\1")
|
292
|
+
end
|
293
|
+
hash['authors'] = a
|
278
294
|
when 'RT'
|
279
295
|
hash['title'] = value
|
280
296
|
when 'RL'
|
281
|
-
if
|
282
|
-
hash['journal'] = $1
|
297
|
+
if /(.*) (\d+) *(\(([^\)]+)\))?(\, |\:)([a-zA-Z\d]+\-[a-zA-Z\d]+) *\((\d+)\)\.?\z/ =~ value.to_s
|
298
|
+
hash['journal'] = $1.rstrip
|
283
299
|
hash['volume'] = $2
|
284
|
-
hash['issue'] = $
|
285
|
-
hash['pages'] = $
|
286
|
-
hash['year'] = $
|
300
|
+
hash['issue'] = $4
|
301
|
+
hash['pages'] = $6
|
302
|
+
hash['year'] = $7
|
287
303
|
else
|
288
304
|
hash['journal'] = value
|
289
305
|
end
|
290
|
-
when 'RX' # PUBMED,
|
291
|
-
value.split(
|
292
|
-
tag, xref = item.split(
|
306
|
+
when 'RX' # PUBMED, DOI, (AGRICOLA)
|
307
|
+
value.split(/\. /).each {|item|
|
308
|
+
tag, xref = item.split(/\; /).map {|i| i.strip.sub(/\.\z/, '') }
|
293
309
|
hash[ tag.downcase ] = xref
|
294
310
|
}
|
295
311
|
end
|
296
312
|
}
|
297
313
|
Reference.new(hash)
|
298
314
|
}
|
299
|
-
@data['references'] =
|
315
|
+
@data['references'] = ary.extend(Bio::References::BackwardCompatibility)
|
300
316
|
end
|
301
317
|
@data['references']
|
302
318
|
end
|
data/lib/bio/db/embl/embl.rb
CHANGED
@@ -2,10 +2,12 @@
|
|
2
2
|
# = bio/db/embl/embl.rb - EMBL database class
|
3
3
|
#
|
4
4
|
#
|
5
|
-
# Copyright:: Copyright (C) 2001-2007
|
5
|
+
# Copyright:: Copyright (C) 2001-2007
|
6
|
+
# Mitsuteru C. Nakao <n@bioruby.org>
|
7
|
+
# Jan Aerts <jan.aerts@bbsrc.ac.uk>
|
6
8
|
# License:: The Ruby License
|
7
9
|
#
|
8
|
-
# $Id: embl.rb,v 1.29
|
10
|
+
# $Id: embl.rb,v 1.29.2.7 2008/06/17 16:04:36 ngoto Exp $
|
9
11
|
#
|
10
12
|
# == Description
|
11
13
|
#
|
@@ -29,8 +31,13 @@
|
|
29
31
|
# http://www.ebi.ac.uk/embl/Documentation/User_manual/usrman.html
|
30
32
|
#
|
31
33
|
|
34
|
+
require 'date'
|
32
35
|
require 'bio/db'
|
33
36
|
require 'bio/db/embl/common'
|
37
|
+
require 'bio/compat/features'
|
38
|
+
require 'bio/compat/references'
|
39
|
+
require 'bio/sequence'
|
40
|
+
require 'bio/sequence/dblink'
|
34
41
|
|
35
42
|
module Bio
|
36
43
|
class EMBL < EMBLDB
|
@@ -120,6 +127,14 @@ class EMBL < EMBLDB
|
|
120
127
|
end
|
121
128
|
alias molecule_type molecule
|
122
129
|
|
130
|
+
def data_class
|
131
|
+
id_line('DATA_CLASS')
|
132
|
+
end
|
133
|
+
|
134
|
+
def topology
|
135
|
+
id_line('TOPOLOGY')
|
136
|
+
end
|
137
|
+
|
123
138
|
# returns DIVISION in the ID line.
|
124
139
|
# * Bio::EMBL#division -> String
|
125
140
|
def division
|
@@ -221,8 +236,8 @@ class EMBL < EMBLDB
|
|
221
236
|
# RN RC RP RX RA RT RL
|
222
237
|
#
|
223
238
|
# Bio::EMBLDB#ref
|
224
|
-
|
225
|
-
|
239
|
+
|
240
|
+
|
226
241
|
##
|
227
242
|
# DR Line; defabases cross-regerence (>=0)
|
228
243
|
# "DR database_identifier; primary_identifier; secondary_identifier."
|
@@ -246,7 +261,6 @@ class EMBL < EMBLDB
|
|
246
261
|
# FT Line; feature table data (>=0)
|
247
262
|
def ft
|
248
263
|
unless @data['FT']
|
249
|
-
@data['FT'] = Array.new
|
250
264
|
ary = Array.new
|
251
265
|
in_quote = false
|
252
266
|
@orig['FT'].each_line do |line|
|
@@ -276,7 +290,7 @@ class EMBL < EMBLDB
|
|
276
290
|
parse_qualifiers(subary)
|
277
291
|
end
|
278
292
|
|
279
|
-
@data['FT'] =
|
293
|
+
@data['FT'] = ary.extend(Bio::Features::BackwardCompatibility)
|
280
294
|
end
|
281
295
|
if block_given?
|
282
296
|
@data['FT'].each do |feature|
|
@@ -311,9 +325,9 @@ class EMBL < EMBLDB
|
|
311
325
|
#
|
312
326
|
# CC Line; comments of notes (>=0)
|
313
327
|
def cc
|
314
|
-
get('CC')
|
328
|
+
get('CC').to_s.gsub(/^CC /, '')
|
315
329
|
end
|
316
|
-
|
330
|
+
alias comment cc
|
317
331
|
|
318
332
|
##
|
319
333
|
# XX Line; spacer line (many)
|
@@ -355,13 +369,96 @@ class EMBL < EMBLDB
|
|
355
369
|
# @orig[''] as sequence
|
356
370
|
# bb Line; (blanks) sequence data (>=1)
|
357
371
|
def seq
|
358
|
-
Sequence::NA.new( fetch('').gsub(/ /,'').gsub(/\d+/,'') )
|
372
|
+
Bio::Sequence::NA.new( fetch('').gsub(/ /,'').gsub(/\d+/,'') )
|
359
373
|
end
|
360
374
|
alias naseq seq
|
361
375
|
alias ntseq seq
|
362
376
|
|
377
|
+
#--
|
363
378
|
# // Line; termination line (end; 1/entry)
|
379
|
+
#++
|
380
|
+
|
381
|
+
# modified date. Returns Date object, String or nil.
|
382
|
+
def date_modified
|
383
|
+
parse_date(self.dt['updated'])
|
384
|
+
end
|
385
|
+
|
386
|
+
# created date. Returns Date object, String or nil.
|
387
|
+
def date_created
|
388
|
+
parse_date(self.dt['created'])
|
389
|
+
end
|
390
|
+
|
391
|
+
# release number when last updated
|
392
|
+
def release_modified
|
393
|
+
parse_release_version(self.dt['updated'])[0]
|
394
|
+
end
|
395
|
+
|
396
|
+
# release number when created
|
397
|
+
def release_created
|
398
|
+
parse_release_version(self.dt['created'])[0]
|
399
|
+
end
|
364
400
|
|
401
|
+
# entry version number numbered by EMBL
|
402
|
+
def entry_version
|
403
|
+
parse_release_version(self.dt['updated'])[1]
|
404
|
+
end
|
405
|
+
|
406
|
+
# parse date string. Returns Date object.
|
407
|
+
def parse_date(str)
|
408
|
+
begin
|
409
|
+
Date.parse(str)
|
410
|
+
rescue ArgumentError, TypeError, NoMethodError, NameError
|
411
|
+
str
|
412
|
+
end
|
413
|
+
end
|
414
|
+
private :parse_date
|
415
|
+
|
416
|
+
# extracts release and version numbers from DT line
|
417
|
+
def parse_release_version(str)
|
418
|
+
return [ nil, nil ] unless str
|
419
|
+
a = str.split(/[\(\,\)]/)
|
420
|
+
dstr = a.shift
|
421
|
+
rel = nil
|
422
|
+
ver = nil
|
423
|
+
a.each do |x|
|
424
|
+
case x
|
425
|
+
when /Rel\.\s*(.+)/
|
426
|
+
rel = $1.strip
|
427
|
+
when /Version\s*(.+)/
|
428
|
+
ver = $1.strip
|
429
|
+
end
|
430
|
+
end
|
431
|
+
[ rel, ver ]
|
432
|
+
end
|
433
|
+
private :parse_release_version
|
434
|
+
|
435
|
+
# database references (DR).
|
436
|
+
# Returns an array of Bio::Sequence::DBLink objects.
|
437
|
+
def dblinks
|
438
|
+
get('DR').split(/\n/).collect { |x|
|
439
|
+
Bio::Sequence::DBLink.parse_embl_DR_line(x)
|
440
|
+
}
|
441
|
+
end
|
442
|
+
|
443
|
+
# species
|
444
|
+
def species
|
445
|
+
self.fetch('OS')
|
446
|
+
end
|
447
|
+
|
448
|
+
# taxonomy classfication
|
449
|
+
alias classification oc
|
450
|
+
|
451
|
+
# features
|
452
|
+
alias features ft
|
453
|
+
|
454
|
+
|
455
|
+
# converts the entry to Bio::Sequence object
|
456
|
+
# ---
|
457
|
+
# *Arguments*::
|
458
|
+
# *Returns*:: Bio::Sequence object
|
459
|
+
def to_biosequence
|
460
|
+
Bio::Sequence.adapter(self, Bio::Sequence::Adapter::EMBL)
|
461
|
+
end
|
365
462
|
|
366
463
|
### private methods
|
367
464
|
|
@@ -400,3 +497,4 @@ class EMBL < EMBLDB
|
|
400
497
|
end # class EMBL
|
401
498
|
|
402
499
|
end # module Bio
|
500
|
+
|
@@ -0,0 +1,85 @@
|
|
1
|
+
#
|
2
|
+
# = bio/db/embl/embl_to_biosequence.rb - Bio::EMBL to Bio::Sequence adapter module
|
3
|
+
#
|
4
|
+
# Copyright:: Copyright (C) 2008
|
5
|
+
# Naohisa Goto <ng@bioruby.org>,
|
6
|
+
# License:: The Ruby License
|
7
|
+
#
|
8
|
+
# $Id:$
|
9
|
+
#
|
10
|
+
|
11
|
+
require 'bio/sequence'
|
12
|
+
require 'bio/sequence/adapter'
|
13
|
+
|
14
|
+
# Internal use only. Normal users should not use this module.
|
15
|
+
#
|
16
|
+
# Bio::EMBL to Bio::Sequence adapter module.
|
17
|
+
# It is internally used in Bio::EMBL#to_biosequence.
|
18
|
+
#
|
19
|
+
module Bio::Sequence::Adapter::EMBL
|
20
|
+
|
21
|
+
extend Bio::Sequence::Adapter
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
def_biosequence_adapter :seq
|
26
|
+
|
27
|
+
def_biosequence_adapter :id_namespace do |orig|
|
28
|
+
'EMBL'
|
29
|
+
end
|
30
|
+
|
31
|
+
def_biosequence_adapter :entry_id
|
32
|
+
|
33
|
+
def_biosequence_adapter :primary_accession do |orig|
|
34
|
+
orig.accessions[0]
|
35
|
+
end
|
36
|
+
|
37
|
+
def_biosequence_adapter :secondary_accessions do |orig|
|
38
|
+
orig.accessions[1..-1] || []
|
39
|
+
end
|
40
|
+
|
41
|
+
def_biosequence_adapter :molecule_type
|
42
|
+
|
43
|
+
def_biosequence_adapter :data_class
|
44
|
+
|
45
|
+
def_biosequence_adapter :definition, :description
|
46
|
+
|
47
|
+
def_biosequence_adapter :topology
|
48
|
+
|
49
|
+
def_biosequence_adapter :date_created
|
50
|
+
|
51
|
+
def_biosequence_adapter :date_modified
|
52
|
+
|
53
|
+
def_biosequence_adapter :release_created
|
54
|
+
|
55
|
+
def_biosequence_adapter :release_modified
|
56
|
+
|
57
|
+
def_biosequence_adapter :entry_version
|
58
|
+
|
59
|
+
def_biosequence_adapter :division
|
60
|
+
|
61
|
+
def_biosequence_adapter :sequence_version, :version
|
62
|
+
|
63
|
+
def_biosequence_adapter :keywords
|
64
|
+
|
65
|
+
def_biosequence_adapter :species
|
66
|
+
|
67
|
+
def_biosequence_adapter :classification
|
68
|
+
|
69
|
+
#--
|
70
|
+
# unsupported yet
|
71
|
+
# def_biosequence_adapter :organelle do |orig|
|
72
|
+
# orig.fetch('OG')
|
73
|
+
# end
|
74
|
+
#++
|
75
|
+
|
76
|
+
def_biosequence_adapter :references
|
77
|
+
|
78
|
+
def_biosequence_adapter :features
|
79
|
+
|
80
|
+
def_biosequence_adapter :comments, :cc
|
81
|
+
|
82
|
+
def_biosequence_adapter :dblinks
|
83
|
+
|
84
|
+
end #module Bio::Sequence::Adapter::EMBL
|
85
|
+
|
@@ -0,0 +1,190 @@
|
|
1
|
+
#
|
2
|
+
# = bio/db/embl/format_embl.rb - EMBL format generater
|
3
|
+
#
|
4
|
+
# Copyright:: Copyright (C) 2008
|
5
|
+
# Jan Aerts <jandot@bioruby.org>,
|
6
|
+
# Naohisa Goto <ng@bioruby.org>
|
7
|
+
# License:: The Ruby License
|
8
|
+
#
|
9
|
+
# $Id: format_embl.rb,v 1.1.2.7 2008/06/19 12:45:15 ngoto Exp $
|
10
|
+
#
|
11
|
+
|
12
|
+
require 'bio/sequence/format'
|
13
|
+
|
14
|
+
module Bio::Sequence::Format::NucFormatter
|
15
|
+
|
16
|
+
# INTERNAL USE ONLY, YOU SHOULD NOT USE THIS CLASS.
|
17
|
+
# Embl format output class for Bio::Sequence.
|
18
|
+
class Embl < Bio::Sequence::Format::FormatterBase
|
19
|
+
|
20
|
+
# helper methods
|
21
|
+
include Bio::Sequence::Format::INSDFeatureHelper
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
# wrapping with EMBL style
|
26
|
+
def embl_wrap(prefix, str)
|
27
|
+
wrap(str.to_s, 80, prefix)
|
28
|
+
end
|
29
|
+
|
30
|
+
# Given words (an Array of String) are wrapping with EMBL style.
|
31
|
+
# Each word is never splitted inside the word.
|
32
|
+
def embl_wrap_words(prefix, array)
|
33
|
+
width = 80
|
34
|
+
result = []
|
35
|
+
str = nil
|
36
|
+
array.each do |x|
|
37
|
+
if str then
|
38
|
+
if str.length + 1 + x.length > width then
|
39
|
+
str = nil
|
40
|
+
else
|
41
|
+
str.concat ' '
|
42
|
+
str.concat x
|
43
|
+
end
|
44
|
+
end
|
45
|
+
unless str then
|
46
|
+
str = prefix + x
|
47
|
+
result.push str
|
48
|
+
end
|
49
|
+
end
|
50
|
+
result.join("\n")
|
51
|
+
end
|
52
|
+
|
53
|
+
# format reference
|
54
|
+
# ref:: Bio::Reference object
|
55
|
+
# hash:: (optional) a hash for RN (reference number) administration
|
56
|
+
def reference_format_embl(ref, hash = nil)
|
57
|
+
lines = Array.new
|
58
|
+
if ref.embl_gb_record_number or hash then
|
59
|
+
refno = ref.embl_gb_record_number.to_i
|
60
|
+
hash ||= {}
|
61
|
+
if refno <= 0 or hash[refno] then
|
62
|
+
refno = hash.keys.sort[-1].to_i + 1
|
63
|
+
hash[refno] = true
|
64
|
+
end
|
65
|
+
lines << embl_wrap("RN ", "[#{refno}]")
|
66
|
+
end
|
67
|
+
if ref.comments then
|
68
|
+
ref.comments.each do |cmnt|
|
69
|
+
lines << embl_wrap("RC ", cmnt)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
unless ref.sequence_position.to_s.empty? then
|
73
|
+
lines << embl_wrap("RP ", "#{ref.sequence_position}")
|
74
|
+
end
|
75
|
+
unless ref.doi.to_s.empty? then
|
76
|
+
lines << embl_wrap("RX ", "DOI; #{ref.doi}.")
|
77
|
+
end
|
78
|
+
unless ref.pubmed.to_s.empty? then
|
79
|
+
lines << embl_wrap("RX ", "PUBMED; #{ref.pubmed}.")
|
80
|
+
end
|
81
|
+
unless ref.authors.empty? then
|
82
|
+
auth = ref.authors.collect do |x|
|
83
|
+
y = x.to_s.strip.split(/\, *([^\,]+)\z/)
|
84
|
+
y[1].gsub!(/\. +/, '.') if y[1]
|
85
|
+
y.join(' ')
|
86
|
+
end
|
87
|
+
lastauth = auth.pop
|
88
|
+
auth.each { |x| x.concat ',' }
|
89
|
+
auth.push(lastauth.to_s + ';')
|
90
|
+
lines << embl_wrap_words('RA ', auth)
|
91
|
+
end
|
92
|
+
lines << embl_wrap('RT ',
|
93
|
+
(ref.title.to_s.empty? ? '' :
|
94
|
+
"\"#{ref.title}\"") + ';')
|
95
|
+
unless ref.journal.to_s.empty? then
|
96
|
+
volissue = "#{ref.volume.to_s}"
|
97
|
+
volissue = "#{volissue}(#{ref.issue})" unless ref.issue.to_s.empty?
|
98
|
+
rl = "#{ref.journal}"
|
99
|
+
rl += " #{volissue}" unless volissue.empty?
|
100
|
+
rl += ":#{ref.pages}" unless ref.pages.to_s.empty?
|
101
|
+
rl += "(#{ref.year})" unless ref.year.to_s.empty?
|
102
|
+
rl += '.'
|
103
|
+
lines << embl_wrap('RL ', rl)
|
104
|
+
end
|
105
|
+
lines << "XX"
|
106
|
+
return lines.join("\n")
|
107
|
+
end
|
108
|
+
|
109
|
+
def seq_format_embl(seq)
|
110
|
+
counter = 0
|
111
|
+
result = seq.gsub(/.{1,60}/) do |x|
|
112
|
+
counter += x.length
|
113
|
+
x = x.gsub(/.{10}/, '\0 ')
|
114
|
+
sprintf(" %-66s%9d\n", x, counter)
|
115
|
+
end
|
116
|
+
result.chomp!
|
117
|
+
result
|
118
|
+
end
|
119
|
+
|
120
|
+
def seq_composition(seq)
|
121
|
+
{ :a => seq.count('aA'),
|
122
|
+
:c => seq.count('cC'),
|
123
|
+
:g => seq.count('gG'),
|
124
|
+
:t => seq.count('tTuU'),
|
125
|
+
:other => seq.count('^aAcCgGtTuU')
|
126
|
+
}
|
127
|
+
end
|
128
|
+
|
129
|
+
# moleculue type
|
130
|
+
def mol_type_embl
|
131
|
+
if mt = molecule_type then
|
132
|
+
mt
|
133
|
+
elsif f = (features or []).find { |f| f.feature == 'source' } and
|
134
|
+
q = f.qualifiers.find { |q| q.qualifier == 'mol_type' } then
|
135
|
+
q.value
|
136
|
+
else
|
137
|
+
'NA'
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
# CC line. Comments.
|
142
|
+
def comments_format_embl(cmnts)
|
143
|
+
return '' if !cmnts or cmnts.empty?
|
144
|
+
cmnts = [ cmnts ] unless cmnts.kind_of?(Array)
|
145
|
+
a = []
|
146
|
+
cmnts.each do |str|
|
147
|
+
a.push embl_wrap('CC ', str)
|
148
|
+
end
|
149
|
+
unless a.empty? then
|
150
|
+
a.push "XX "
|
151
|
+
a.push '' # dummy to put "\n" at the end of the string
|
152
|
+
end
|
153
|
+
a.join("\n")
|
154
|
+
end
|
155
|
+
|
156
|
+
|
157
|
+
# Erb template of EMBL format for Bio::Sequence
|
158
|
+
erb_template <<'__END_OF_TEMPLATE__'
|
159
|
+
ID <%= primary_accession || entry_id %>; SV <%= sequence_version %>; <%= topology %>; <%= mol_type_embl %>; <%= data_class %>; <%= division %>; <%= seq.length %> BP.
|
160
|
+
XX
|
161
|
+
<%= embl_wrap('AC ', accessions.reject{|a| a.nil?}.join('; ') + ';') %>
|
162
|
+
XX
|
163
|
+
DT <%= format_date(date_created || null_date) %> (Rel. <%= release_created || 0 %>, Created)
|
164
|
+
DT <%= format_date(date_modified || null_date) %> (Rel. <%= release_modified || 0 %>, Last updated, Version <%= entry_version || 0 %>)
|
165
|
+
XX
|
166
|
+
<%= embl_wrap('DE ', definition) %>
|
167
|
+
XX
|
168
|
+
<%= embl_wrap('KW ', (keywords || []).join('; ') + '.') %>
|
169
|
+
XX
|
170
|
+
OS <%= species %>
|
171
|
+
<%= embl_wrap('OC ', (classification || []).join('; ') + '.') %>
|
172
|
+
XX
|
173
|
+
<% hash = {}; (references || []).each do |ref| %><%= reference_format_embl(ref, hash) %>
|
174
|
+
<% end %><% (dblinks || []).each do |r|
|
175
|
+
%>DR <%= r.database %>; <%= r.id %><% unless r.secondary_ids.empty? %>; <%= r.secondary_ids[0] %><% end %>.
|
176
|
+
<% end %><% if dblinks and !dblinks.empty? then
|
177
|
+
%>XX
|
178
|
+
<% end %><%= comments_format_embl(comments)
|
179
|
+
%>FH Key Location/Qualifiers
|
180
|
+
FH
|
181
|
+
<%= format_features_embl(features || []) %>XX
|
182
|
+
SQ Sequence <%= seq.length %> BP; <% c = seq_composition(seq) %><%= c[:a] %> A; <%= c[:c] %> C; <%= c[:g] %> G; <%= c[:t] %> T; <%= c[:other] %> other;
|
183
|
+
<%= seq_format_embl(seq) %>
|
184
|
+
//
|
185
|
+
__END_OF_TEMPLATE__
|
186
|
+
|
187
|
+
end #class Embl
|
188
|
+
|
189
|
+
end #module Bio::Sequence::Format::NucFormatter
|
190
|
+
|