bio 1.2.1 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ChangeLog +3421 -0
- data/KNOWN_ISSUES.rdoc +88 -0
- data/README.rdoc +252 -0
- data/README_DEV.rdoc +285 -0
- data/Rakefile +143 -0
- data/bin/bioruby +0 -0
- data/bin/br_biofetch.rb +0 -0
- data/bin/br_bioflat.rb +12 -1
- data/bin/br_biogetseq.rb +0 -0
- data/bin/br_pmfetch.rb +4 -3
- data/bioruby.gemspec +477 -0
- data/bioruby.gemspec.erb +117 -0
- data/doc/Changes-0.7.rd +7 -0
- data/doc/Changes-1.3.rdoc +239 -0
- data/doc/Tutorial.rd +296 -184
- data/doc/Tutorial.rd.html +1031 -0
- data/doc/Tutorial.rd.ja +111 -45
- data/doc/Tutorial.rd.ja.html +2225 -0
- data/doc/bioruby.css +281 -0
- data/extconf.rb +2 -0
- data/lib/bio.rb +29 -4
- data/lib/bio/appl/blast.rb +306 -121
- data/lib/bio/appl/blast/ddbj.rb +142 -0
- data/lib/bio/appl/blast/format0.rb +35 -25
- data/lib/bio/appl/blast/format8.rb +2 -2
- data/lib/bio/appl/blast/genomenet.rb +263 -0
- data/lib/bio/appl/blast/ncbioptions.rb +220 -0
- data/lib/bio/appl/blast/remote.rb +106 -0
- data/lib/bio/appl/blast/report.rb +260 -9
- data/lib/bio/appl/blast/rexml.rb +12 -5
- data/lib/bio/appl/blast/rpsblast.rb +277 -0
- data/lib/bio/appl/blast/wublast.rb +133 -12
- data/lib/bio/appl/blast/xmlparser.rb +35 -18
- data/lib/bio/appl/blat/report.rb +46 -5
- data/lib/bio/appl/emboss.rb +62 -13
- data/lib/bio/appl/fasta.rb +9 -11
- data/lib/bio/appl/genscan/report.rb +3 -3
- data/lib/bio/appl/hmmer.rb +1 -1
- data/lib/bio/appl/hmmer/report.rb +10 -10
- data/lib/bio/appl/paml/baseml.rb +95 -0
- data/lib/bio/appl/paml/baseml/report.rb +32 -0
- data/lib/bio/appl/paml/codeml.rb +242 -0
- data/lib/bio/appl/paml/codeml/rates.rb +67 -0
- data/lib/bio/appl/paml/codeml/report.rb +67 -0
- data/lib/bio/appl/paml/common.rb +348 -0
- data/lib/bio/appl/paml/common_report.rb +38 -0
- data/lib/bio/appl/paml/yn00.rb +103 -0
- data/lib/bio/appl/paml/yn00/report.rb +32 -0
- data/lib/bio/appl/psort.rb +2 -2
- data/lib/bio/appl/pts1.rb +5 -5
- data/lib/bio/appl/tmhmm/report.rb +10 -1
- data/lib/bio/command.rb +297 -41
- data/lib/bio/compat/features.rb +157 -0
- data/lib/bio/compat/references.rb +128 -0
- data/lib/bio/db/biosql/biosql_to_biosequence.rb +67 -0
- data/lib/bio/db/biosql/sequence.rb +508 -0
- data/lib/bio/db/embl/common.rb +28 -12
- data/lib/bio/db/embl/embl.rb +107 -9
- data/lib/bio/db/embl/embl_to_biosequence.rb +85 -0
- data/lib/bio/db/embl/format_embl.rb +190 -0
- data/lib/bio/db/embl/sptr.rb +15 -16
- data/lib/bio/db/fantom.rb +6 -8
- data/lib/bio/db/fasta.rb +10 -507
- data/lib/bio/db/fasta/defline.rb +532 -0
- data/lib/bio/db/fasta/fasta_to_biosequence.rb +63 -0
- data/lib/bio/db/fasta/format_fasta.rb +97 -0
- data/lib/bio/db/genbank/common.rb +25 -8
- data/lib/bio/db/genbank/format_genbank.rb +187 -0
- data/lib/bio/db/genbank/genbank.rb +36 -1
- data/lib/bio/db/genbank/genbank_to_biosequence.rb +86 -0
- data/lib/bio/db/gff.rb +1791 -119
- data/lib/bio/db/kegg/glycan.rb +2 -6
- data/lib/bio/db/lasergene.rb +3 -3
- data/lib/bio/db/medline.rb +4 -1
- data/lib/bio/db/newick.rb +10 -10
- data/lib/bio/db/pdb/chain.rb +6 -2
- data/lib/bio/db/pdb/pdb.rb +12 -3
- data/lib/bio/db/rebase.rb +7 -8
- data/lib/bio/db/soft.rb +3 -3
- data/lib/bio/feature.rb +1 -88
- data/lib/bio/io/biosql/biodatabase.rb +64 -0
- data/lib/bio/io/biosql/bioentry.rb +29 -0
- data/lib/bio/io/biosql/bioentry_dbxref.rb +11 -0
- data/lib/bio/io/biosql/bioentry_path.rb +12 -0
- data/lib/bio/io/biosql/bioentry_qualifier_value.rb +10 -0
- data/lib/bio/io/biosql/bioentry_reference.rb +10 -0
- data/lib/bio/io/biosql/bioentry_relationship.rb +10 -0
- data/lib/bio/io/biosql/biosequence.rb +11 -0
- data/lib/bio/io/biosql/comment.rb +7 -0
- data/lib/bio/io/biosql/config/database.yml +20 -0
- data/lib/bio/io/biosql/dbxref.rb +13 -0
- data/lib/bio/io/biosql/dbxref_qualifier_value.rb +12 -0
- data/lib/bio/io/biosql/location.rb +32 -0
- data/lib/bio/io/biosql/location_qualifier_value.rb +11 -0
- data/lib/bio/io/biosql/ontology.rb +10 -0
- data/lib/bio/io/biosql/reference.rb +9 -0
- data/lib/bio/io/biosql/seqfeature.rb +32 -0
- data/lib/bio/io/biosql/seqfeature_dbxref.rb +11 -0
- data/lib/bio/io/biosql/seqfeature_path.rb +11 -0
- data/lib/bio/io/biosql/seqfeature_qualifier_value.rb +20 -0
- data/lib/bio/io/biosql/seqfeature_relationship.rb +11 -0
- data/lib/bio/io/biosql/taxon.rb +12 -0
- data/lib/bio/io/biosql/taxon_name.rb +9 -0
- data/lib/bio/io/biosql/term.rb +27 -0
- data/lib/bio/io/biosql/term_dbxref.rb +11 -0
- data/lib/bio/io/biosql/term_path.rb +12 -0
- data/lib/bio/io/biosql/term_relationship.rb +13 -0
- data/lib/bio/io/biosql/term_relationship_term.rb +11 -0
- data/lib/bio/io/biosql/term_synonym.rb +10 -0
- data/lib/bio/io/das.rb +7 -7
- data/lib/bio/io/ddbjxml.rb +57 -0
- data/lib/bio/io/ensembl.rb +2 -2
- data/lib/bio/io/fetch.rb +28 -14
- data/lib/bio/io/flatfile.rb +17 -853
- data/lib/bio/io/flatfile/autodetection.rb +545 -0
- data/lib/bio/io/flatfile/buffer.rb +237 -0
- data/lib/bio/io/flatfile/index.rb +17 -7
- data/lib/bio/io/flatfile/indexer.rb +30 -12
- data/lib/bio/io/flatfile/splitter.rb +297 -0
- data/lib/bio/io/hinv.rb +442 -0
- data/lib/bio/io/keggapi.rb +2 -2
- data/lib/bio/io/ncbirest.rb +733 -0
- data/lib/bio/io/pubmed.rb +34 -80
- data/lib/bio/io/registry.rb +2 -2
- data/lib/bio/io/sql.rb +178 -357
- data/lib/bio/io/togows.rb +458 -0
- data/lib/bio/location.rb +106 -11
- data/lib/bio/pathway.rb +120 -14
- data/lib/bio/reference.rb +115 -101
- data/lib/bio/sequence.rb +164 -183
- data/lib/bio/sequence/adapter.rb +108 -0
- data/lib/bio/sequence/common.rb +22 -45
- data/lib/bio/sequence/compat.rb +2 -2
- data/lib/bio/sequence/dblink.rb +54 -0
- data/lib/bio/sequence/format.rb +254 -77
- data/lib/bio/sequence/format_raw.rb +23 -0
- data/lib/bio/shell.rb +3 -1
- data/lib/bio/shell/core.rb +2 -2
- data/lib/bio/shell/plugin/entry.rb +33 -4
- data/lib/bio/shell/plugin/ncbirest.rb +64 -0
- data/lib/bio/shell/plugin/togows.rb +40 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/bioruby_generator.rb +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_classes.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_log.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_methods.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_modules.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_variables.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-bg.gif +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-gem.png +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-link.gif +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.css +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_controller.rb +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_helper.rb +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/commands.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/history.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/index.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/spinner.gif +0 -0
- data/lib/bio/tree.rb +4 -2
- data/lib/bio/util/color_scheme.rb +2 -2
- data/lib/bio/util/contingency_table.rb +2 -2
- data/lib/bio/util/restriction_enzyme.rb +2 -2
- data/lib/bio/util/restriction_enzyme/single_strand.rb +6 -5
- data/lib/bio/version.rb +25 -0
- data/rdoc.zsh +8 -0
- data/sample/any2fasta.rb +0 -0
- data/sample/biofetch.rb +0 -0
- data/sample/dbget +0 -0
- data/sample/demo_sequence.rb +158 -0
- data/sample/enzymes.rb +0 -0
- data/sample/fasta2tab.rb +0 -0
- data/sample/fastagrep.rb +72 -0
- data/sample/fastasort.rb +54 -0
- data/sample/fsplit.rb +0 -0
- data/sample/gb2fasta.rb +2 -3
- data/sample/gb2tab.rb +0 -0
- data/sample/gbtab2mysql.rb +0 -0
- data/sample/genes2nuc.rb +0 -0
- data/sample/genes2pep.rb +0 -0
- data/sample/genes2tab.rb +0 -0
- data/sample/genome2rb.rb +0 -0
- data/sample/genome2tab.rb +0 -0
- data/sample/goslim.rb +0 -0
- data/sample/gt2fasta.rb +0 -0
- data/sample/na2aa.rb +34 -0
- data/sample/pmfetch.rb +0 -0
- data/sample/pmsearch.rb +0 -0
- data/sample/ssearch2tab.rb +0 -0
- data/sample/tfastx2tab.rb +0 -0
- data/sample/vs-genes.rb +0 -0
- data/setup.rb +1596 -0
- data/test/data/blast/blastp-multi.m7 +188 -0
- data/test/data/command/echoarg2.bat +1 -0
- data/test/data/paml/codeml/control_file.txt +30 -0
- data/test/data/paml/codeml/output.txt +78 -0
- data/test/data/paml/codeml/rates +217 -0
- data/test/data/rpsblast/misc.rpsblast +193 -0
- data/test/data/soft/GDS100_partial.soft +0 -0
- data/test/data/soft/GSE3457_family_partial.soft +0 -0
- data/test/functional/bio/appl/test_pts1.rb +115 -0
- data/test/functional/bio/io/test_ensembl.rb +123 -80
- data/test/functional/bio/io/test_togows.rb +267 -0
- data/test/functional/bio/sequence/test_output_embl.rb +51 -0
- data/test/functional/bio/test_command.rb +301 -0
- data/test/runner.rb +17 -1
- data/test/unit/bio/appl/blast/test_ncbioptions.rb +112 -0
- data/test/unit/bio/appl/blast/test_report.rb +753 -35
- data/test/unit/bio/appl/blast/test_rpsblast.rb +398 -0
- data/test/unit/bio/appl/paml/codeml/test_rates.rb +45 -0
- data/test/unit/bio/appl/paml/codeml/test_report.rb +45 -0
- data/test/unit/bio/appl/paml/test_codeml.rb +174 -0
- data/test/unit/bio/appl/test_blast.rb +135 -4
- data/test/unit/bio/appl/test_fasta.rb +2 -2
- data/test/unit/bio/appl/test_pts1.rb +1 -64
- data/test/unit/bio/db/embl/test_common.rb +15 -15
- data/test/unit/bio/db/embl/test_embl.rb +4 -4
- data/test/unit/bio/db/embl/test_embl_rel89.rb +5 -5
- data/test/unit/bio/db/embl/test_embl_to_bioseq.rb +203 -0
- data/test/unit/bio/db/embl/test_sptr.rb +38 -1
- data/test/unit/bio/db/pdb/test_pdb.rb +2 -2
- data/test/unit/bio/db/test_gff.rb +1151 -25
- data/test/unit/bio/db/test_medline.rb +127 -0
- data/test/unit/bio/db/test_nexus.rb +5 -1
- data/test/unit/bio/db/test_prosite.rb +4 -4
- data/test/unit/bio/io/flatfile/test_autodetection.rb +375 -0
- data/test/unit/bio/io/flatfile/test_buffer.rb +251 -0
- data/test/unit/bio/io/flatfile/test_splitter.rb +369 -0
- data/test/unit/bio/io/test_ddbjxml.rb +8 -3
- data/test/unit/bio/io/test_fastacmd.rb +5 -5
- data/test/unit/bio/io/test_flatfile.rb +357 -106
- data/test/unit/bio/io/test_soapwsdl.rb +2 -2
- data/test/unit/bio/io/test_togows.rb +161 -0
- data/test/unit/bio/sequence/test_common.rb +210 -11
- data/test/unit/bio/sequence/test_compat.rb +3 -3
- data/test/unit/bio/sequence/test_dblink.rb +58 -0
- data/test/unit/bio/sequence/test_na.rb +2 -2
- data/test/unit/bio/test_command.rb +111 -50
- data/test/unit/bio/test_feature.rb +29 -1
- data/test/unit/bio/test_location.rb +566 -6
- data/test/unit/bio/test_pathway.rb +91 -65
- data/test/unit/bio/test_reference.rb +67 -13
- data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +4 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +4 -4
- data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +3 -3
- data/test/unit/bio/util/test_restriction_enzyme.rb +3 -3
- metadata +202 -167
- data/test/unit/bio/appl/blast/test_xmlparser.rb +0 -388
data/lib/bio/io/pubmed.rb
CHANGED
|
@@ -1,15 +1,16 @@
|
|
|
1
1
|
#
|
|
2
2
|
# = bio/io/pubmed.rb - NCBI Entrez/PubMed client module
|
|
3
3
|
#
|
|
4
|
-
# Copyright:: Copyright (C) 2001, 2007 Toshiaki Katayama <k@bioruby.org>
|
|
4
|
+
# Copyright:: Copyright (C) 2001, 2007, 2008 Toshiaki Katayama <k@bioruby.org>
|
|
5
5
|
# Copyright:: Copyright (C) 2006 Jan Aerts <jan.aerts@bbsrc.ac.uk>
|
|
6
6
|
# License:: The Ruby License
|
|
7
7
|
#
|
|
8
|
-
# $Id
|
|
8
|
+
# $Id:$
|
|
9
9
|
#
|
|
10
10
|
|
|
11
|
+
require 'bio/io/ncbirest'
|
|
11
12
|
require 'bio/command'
|
|
12
|
-
require 'cgi'
|
|
13
|
+
require 'cgi'
|
|
13
14
|
|
|
14
15
|
module Bio
|
|
15
16
|
|
|
@@ -68,29 +69,7 @@ module Bio
|
|
|
68
69
|
# manuscript = Bio::PubMed.query("10592173")
|
|
69
70
|
# medline = Bio::MEDLINE.new(manuscript)
|
|
70
71
|
#
|
|
71
|
-
class PubMed
|
|
72
|
-
|
|
73
|
-
# Run retrieval scripts on weekends or between 9 pm and 5 am Eastern Time
|
|
74
|
-
# weekdays for any series of more than 100 requests.
|
|
75
|
-
# -> Not implemented yet in BioRuby
|
|
76
|
-
|
|
77
|
-
# Make no more than one request every 3 seconds.
|
|
78
|
-
NCBI_INTERVAL = 3
|
|
79
|
-
@@last_access = nil
|
|
80
|
-
|
|
81
|
-
private
|
|
82
|
-
|
|
83
|
-
def ncbi_access_wait(wait = NCBI_INTERVAL)
|
|
84
|
-
if @@last_access
|
|
85
|
-
duration = Time.now - @@last_access
|
|
86
|
-
if wait > duration
|
|
87
|
-
sleep wait - duration
|
|
88
|
-
end
|
|
89
|
-
end
|
|
90
|
-
@@last_access = Time.now
|
|
91
|
-
end
|
|
92
|
-
|
|
93
|
-
public
|
|
72
|
+
class PubMed < Bio::NCBI::REST
|
|
94
73
|
|
|
95
74
|
# Search the PubMed database by given keywords using E-Utils and returns
|
|
96
75
|
# an array of PubMed IDs.
|
|
@@ -99,39 +78,22 @@ class PubMed
|
|
|
99
78
|
# http://eutils.ncbi.nlm.nih.gov/entrez/query/static/esearch_help.html#PubMed
|
|
100
79
|
# ---
|
|
101
80
|
# *Arguments*:
|
|
102
|
-
# *
|
|
103
|
-
# *
|
|
104
|
-
#
|
|
105
|
-
#
|
|
106
|
-
#
|
|
107
|
-
#
|
|
108
|
-
#
|
|
109
|
-
#
|
|
110
|
-
#
|
|
111
|
-
#
|
|
81
|
+
# * _str_: query string (required)
|
|
82
|
+
# * _hash_: hash of E-Utils options
|
|
83
|
+
# * _retmode_: "xml", "html", ...
|
|
84
|
+
# * _rettype_: "medline", ...
|
|
85
|
+
# * _retmax_: integer (default 100)
|
|
86
|
+
# * _retstart_: integer
|
|
87
|
+
# * _field_
|
|
88
|
+
# * _reldate_
|
|
89
|
+
# * _mindate_
|
|
90
|
+
# * _maxdate_
|
|
91
|
+
# * _datetype_
|
|
112
92
|
# *Returns*:: array of PubMed IDs or a number of results
|
|
113
93
|
def esearch(str, hash = {})
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
serv = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
|
|
117
|
-
opts = {
|
|
118
|
-
"retmax" => 100,
|
|
119
|
-
"tool" => "bioruby",
|
|
120
|
-
"db" => "pubmed",
|
|
121
|
-
"term" => str
|
|
122
|
-
}
|
|
94
|
+
opts = { "db" => "pubmed" }
|
|
123
95
|
opts.update(hash)
|
|
124
|
-
|
|
125
|
-
ncbi_access_wait
|
|
126
|
-
|
|
127
|
-
response, = Bio::Command.post_form(serv, opts)
|
|
128
|
-
result = response.body
|
|
129
|
-
if opts['rettype'] == 'count'
|
|
130
|
-
result = result.scan(/<Count>(.*?)<\/Count>/m).flatten.first.to_i
|
|
131
|
-
else
|
|
132
|
-
result = result.scan(/<Id>(.*?)<\/Id>/m).flatten
|
|
133
|
-
end
|
|
134
|
-
return result
|
|
96
|
+
super(str, opts)
|
|
135
97
|
end
|
|
136
98
|
|
|
137
99
|
# Retrieve PubMed entry by PMID and returns MEDLINE formatted string using
|
|
@@ -141,29 +103,21 @@ class PubMed
|
|
|
141
103
|
# ---
|
|
142
104
|
# *Arguments*:
|
|
143
105
|
# * _ids_: list of PubMed IDs (required)
|
|
106
|
+
# * _hash_: hash of E-Utils options
|
|
107
|
+
# * _retmode_: "xml", "html", ...
|
|
108
|
+
# * _rettype_: "medline", ...
|
|
109
|
+
# * _retmax_: integer (default 100)
|
|
110
|
+
# * _retstart_: integer
|
|
111
|
+
# * _field_
|
|
112
|
+
# * _reldate_
|
|
113
|
+
# * _mindate_
|
|
114
|
+
# * _maxdate_
|
|
115
|
+
# * _datetype_
|
|
144
116
|
# *Returns*:: Array of MEDLINE formatted String
|
|
145
117
|
def efetch(ids, hash = {})
|
|
146
|
-
|
|
147
|
-
ids = ids.join(",") if ids === Array
|
|
148
|
-
|
|
149
|
-
serv = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
|
|
150
|
-
opts = {
|
|
151
|
-
"tool" => "bioruby",
|
|
152
|
-
"db" => "pubmed",
|
|
153
|
-
"retmode" => "text",
|
|
154
|
-
"rettype" => "medline",
|
|
155
|
-
"id" => ids,
|
|
156
|
-
}
|
|
118
|
+
opts = { "db" => "pubmed", "rettype" => "medline" }
|
|
157
119
|
opts.update(hash)
|
|
158
|
-
|
|
159
|
-
ncbi_access_wait
|
|
160
|
-
|
|
161
|
-
response, = Bio::Command.post_form(serv, opts)
|
|
162
|
-
result = response.body
|
|
163
|
-
if opts["retmode"] == "text"
|
|
164
|
-
result = result.split(/\n\n+/)
|
|
165
|
-
end
|
|
166
|
-
return result
|
|
120
|
+
super(ids, opts)
|
|
167
121
|
end
|
|
168
122
|
|
|
169
123
|
# Search the PubMed database by given keywords using entrez query and returns
|
|
@@ -180,7 +134,7 @@ class PubMed
|
|
|
180
134
|
ncbi_access_wait
|
|
181
135
|
|
|
182
136
|
http = Bio::Command.new_http(host)
|
|
183
|
-
response
|
|
137
|
+
response = http.get(path + CGI.escape(str))
|
|
184
138
|
result = response.body
|
|
185
139
|
result = result.scan(/value="(\d+)" id="UidCheckBox"/m).flatten
|
|
186
140
|
return result
|
|
@@ -195,12 +149,12 @@ class PubMed
|
|
|
195
149
|
def query(*ids)
|
|
196
150
|
host = "www.ncbi.nlm.nih.gov"
|
|
197
151
|
path = "/sites/entrez?tool=bioruby&cmd=Text&dopt=MEDLINE&db=PubMed&uid="
|
|
198
|
-
list = ids.join(",")
|
|
152
|
+
list = ids.collect { |x| CGI.escape(x.to_s) }.join(",")
|
|
199
153
|
|
|
200
154
|
ncbi_access_wait
|
|
201
155
|
|
|
202
156
|
http = Bio::Command.new_http(host)
|
|
203
|
-
response
|
|
157
|
+
response = http.get(path + list)
|
|
204
158
|
result = response.body
|
|
205
159
|
result = result.scan(/<pre>\s*(.*?)<\/pre>/m).flatten
|
|
206
160
|
|
|
@@ -229,7 +183,7 @@ class PubMed
|
|
|
229
183
|
ncbi_access_wait
|
|
230
184
|
|
|
231
185
|
http = Bio::Command.new_http(host)
|
|
232
|
-
response
|
|
186
|
+
response = http.get(path + CGI.escape(id.to_s))
|
|
233
187
|
result = response.body
|
|
234
188
|
if result =~ /#{id}\s+Error/
|
|
235
189
|
raise( result )
|
data/lib/bio/io/registry.rb
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
# Toshiaki Katayama <k@bioruby.org>
|
|
6
6
|
# License:: The Ruby License
|
|
7
7
|
#
|
|
8
|
-
# $Id
|
|
8
|
+
# $Id:$
|
|
9
9
|
#
|
|
10
10
|
# == Description
|
|
11
11
|
#
|
|
@@ -172,7 +172,7 @@ class Registry
|
|
|
172
172
|
def read_remote(url)
|
|
173
173
|
schema, user, host, port, reg, path, = URI.split(url)
|
|
174
174
|
Bio::Command.start_http(host, port) do |http|
|
|
175
|
-
response
|
|
175
|
+
response = http.get(path)
|
|
176
176
|
parse_stanza(response.body)
|
|
177
177
|
end
|
|
178
178
|
end
|
data/lib/bio/io/sql.rb
CHANGED
|
@@ -1,365 +1,186 @@
|
|
|
1
|
-
#
|
|
2
|
-
# = bio/io/sql.rb - BioSQL access module
|
|
3
|
-
#
|
|
4
|
-
# Copyright:: Copyright (C) 2002 Toshiaki Katayama <k@bioruby.org>
|
|
5
|
-
# Copyright:: Copyright (C) 2006 Raoul Jean Pierre Bonnal <raoul.bonnal@itb.cnr.it>
|
|
6
|
-
# License:: The Ruby License
|
|
7
|
-
#
|
|
8
|
-
# $Id: sql.rb,v 1.8 2007/04/05 23:35:41 trevor Exp $
|
|
9
|
-
#
|
|
10
1
|
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
require 'bio/sequence'
|
|
16
|
-
require 'bio/feature'
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
module Bio
|
|
20
|
-
|
|
21
|
-
class SQL
|
|
22
|
-
|
|
23
|
-
def initialize(db = 'dbi:Mysql:biosql', user = nil, pass = nil)
|
|
24
|
-
@dbh = DBI.connect(db, user, pass)
|
|
25
|
-
end
|
|
26
|
-
|
|
27
|
-
def close
|
|
28
|
-
@dbh.disconnect
|
|
29
|
-
end
|
|
30
|
-
|
|
31
|
-
# Returns Bio::SQL::Sequence object.
|
|
32
|
-
def fetch(accession) # or display_id for fall back
|
|
33
|
-
query = "select * from bioentry where accession = ?"
|
|
34
|
-
entry = @dbh.execute(query, accession).fetch
|
|
35
|
-
return Sequence.new(@dbh, entry) if entry
|
|
36
|
-
|
|
37
|
-
query = "select * from bioentry where display_id = ?"
|
|
38
|
-
entry = @dbh.execute(query, accession).fetch
|
|
39
|
-
return Sequence.new(@dbh, entry) if entry
|
|
40
|
-
end
|
|
41
|
-
alias get_by_id fetch
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
# for lazy fetching
|
|
45
|
-
|
|
46
|
-
class Sequence
|
|
47
|
-
|
|
48
|
-
def initialize(dbh, entry)
|
|
49
|
-
@dbh = dbh
|
|
50
|
-
@bioentry_id = entry['bioentry_id']
|
|
51
|
-
@database_id = entry['biodatabase_id']
|
|
52
|
-
@entry_id = entry['display_id']
|
|
53
|
-
@accession = entry['accession']
|
|
54
|
-
@version = entry['entry_version']
|
|
55
|
-
@division = entry['division']
|
|
56
|
-
end
|
|
57
|
-
attr_reader :accession, :division, :entry_id, :version
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
def to_fasta
|
|
61
|
-
if seq = seq
|
|
62
|
-
return seq.to_fasta(@accession)
|
|
63
|
-
end
|
|
64
|
-
end
|
|
65
|
-
|
|
66
|
-
# Returns Bio::Sequence::NA or AA object.
|
|
67
|
-
def seq
|
|
68
|
-
query = "select * from biosequence where bioentry_id = ?"
|
|
69
|
-
row = @dbh.execute(query, @bioentry_id).fetch
|
|
70
|
-
return unless row
|
|
71
|
-
|
|
72
|
-
mol = row['alphabet']
|
|
73
|
-
seq = row['seq']
|
|
74
|
-
|
|
75
|
-
case mol
|
|
76
|
-
when /.na/i # 'dna' or 'rna'
|
|
77
|
-
Bio::Sequence::NA.new(seq)
|
|
78
|
-
else # 'protein'
|
|
79
|
-
Bio::Sequence::AA.new(seq)
|
|
80
|
-
end
|
|
81
|
-
end
|
|
82
|
-
|
|
83
|
-
# Returns Bio::Sequence::NA or AA object (by lazy fetching).
|
|
84
|
-
def subseq(from, to)
|
|
85
|
-
length = to - from + 1
|
|
86
|
-
query = "select alphabet, substring(seq, ?, ?) as subseq" +
|
|
87
|
-
" from biosequence where bioentry_id = ?"
|
|
88
|
-
row = @dbh.execute(query, from, length, @bioentry_id).fetch
|
|
89
|
-
return unless row
|
|
90
|
-
|
|
91
|
-
mol = row['alphabet']
|
|
92
|
-
seq = row['subseq']
|
|
93
|
-
|
|
94
|
-
case mol
|
|
95
|
-
when /.na/i # 'dna' or 'rna'
|
|
96
|
-
Bio::Sequence::NA.new(seq)
|
|
97
|
-
else # 'protein'
|
|
98
|
-
Bio::Sequence::AA.new(seq)
|
|
99
|
-
end
|
|
100
|
-
end
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
# Returns Bio::Features object.
|
|
104
|
-
def features
|
|
105
|
-
array = []
|
|
106
|
-
query = "select * from seqfeature where bioentry_id = ?"
|
|
107
|
-
@dbh.execute(query, @bioentry_id).fetch_all.each do |row|
|
|
108
|
-
next unless row
|
|
109
|
-
|
|
110
|
-
f_id = row['seqfeature_id']
|
|
111
|
-
k_id = row['type_term_id']
|
|
112
|
-
s_id = row['source_term_id']
|
|
113
|
-
rank = row['rank'].to_i - 1
|
|
114
|
-
|
|
115
|
-
# key : type (gene, CDS, ...)
|
|
116
|
-
type = feature_key(k_id)
|
|
117
|
-
|
|
118
|
-
# source : database (EMBL/GenBank/SwissProt)
|
|
119
|
-
database = feature_source(s_id)
|
|
120
|
-
|
|
121
|
-
# location : position
|
|
122
|
-
locations = feature_locations(f_id)
|
|
123
|
-
|
|
124
|
-
# qualifier
|
|
125
|
-
qualifiers = feature_qualifiers(f_id)
|
|
126
|
-
|
|
127
|
-
# rank
|
|
128
|
-
array[rank] = Bio::Feature.new(type, locations, qualifiers)
|
|
129
|
-
end
|
|
130
|
-
return Bio::Features.new(array)
|
|
131
|
-
end
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
# Returns reference informations in Array of Hash (not Bio::Reference).
|
|
135
|
-
def references
|
|
136
|
-
array = []
|
|
137
|
-
query = <<-END
|
|
138
|
-
select * from bioentry_reference, reference
|
|
139
|
-
where bioentry_id = ? and
|
|
140
|
-
bioentry_reference.reference_id = reference.reference_id
|
|
141
|
-
END
|
|
142
|
-
@dbh.execute(query, @bioentry_id).fetch_all.each do |row|
|
|
143
|
-
next unless row
|
|
144
|
-
|
|
145
|
-
hash = {
|
|
146
|
-
'start' => row['start_pos'],
|
|
147
|
-
'end' => row['end_pos'],
|
|
148
|
-
'journal' => row['location'],
|
|
149
|
-
'title' => row['title'],
|
|
150
|
-
'authors' => row['authors'],
|
|
151
|
-
'medline' => row['crc']
|
|
152
|
-
}
|
|
153
|
-
hash.default = ''
|
|
154
|
-
|
|
155
|
-
rank = row['rank'].to_i - 1
|
|
156
|
-
array[rank] = hash
|
|
157
|
-
end
|
|
158
|
-
return array
|
|
159
|
-
end
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
# Returns the first comment. For complete comments, use comments method.
|
|
163
|
-
def comment
|
|
164
|
-
query = "select * from comment where bioentry_id = ?"
|
|
165
|
-
row = @dbh.execute(query, @bioentry_id).fetch
|
|
166
|
-
row ? row['comment_text'] : ''
|
|
167
|
-
end
|
|
168
|
-
|
|
169
|
-
# Returns comments in an Array of Strings.
|
|
170
|
-
def comments
|
|
171
|
-
array = []
|
|
172
|
-
query = "select * from comment where bioentry_id = ?"
|
|
173
|
-
@dbh.execute(query, @bioentry_id).fetch_all.each do |row|
|
|
174
|
-
next unless row
|
|
175
|
-
rank = row['rank'].to_i - 1
|
|
176
|
-
array[rank] = row['comment_text']
|
|
177
|
-
end
|
|
178
|
-
return array
|
|
179
|
-
end
|
|
180
|
-
|
|
181
|
-
def database
|
|
182
|
-
query = "select * from biodatabase where biodatabase_id = ?"
|
|
183
|
-
row = @dbh.execute(query, @database_id).fetch
|
|
184
|
-
row ? row['name'] : ''
|
|
185
|
-
end
|
|
186
|
-
|
|
187
|
-
def date
|
|
188
|
-
query = "select * from bioentry_date where bioentry_id = ?"
|
|
189
|
-
row = @dbh.execute(query, @bioentry_id).fetch
|
|
190
|
-
row ? row['date'] : ''
|
|
191
|
-
end
|
|
192
|
-
|
|
193
|
-
def dblink
|
|
194
|
-
query = "select * from bioentry_direct_links where source_bioentry_id = ?"
|
|
195
|
-
row = @dbh.execute(query, @bioentry_id).fetch
|
|
196
|
-
row ? [row['dbname'], row['accession']] : []
|
|
197
|
-
end
|
|
198
|
-
|
|
199
|
-
def definition
|
|
200
|
-
query = "select * from bioentry_description where bioentry_id = ?"
|
|
201
|
-
row = @dbh.execute(query, @bioentry_id).fetch
|
|
202
|
-
row ? row['description'] : ''
|
|
203
|
-
end
|
|
204
|
-
|
|
205
|
-
def keyword
|
|
206
|
-
query = "select * from bioentry_keywords where bioentry_id = ?"
|
|
207
|
-
row = @dbh.execute(query, @bioentry_id).fetch
|
|
208
|
-
row ? row['keywords'] : ''
|
|
209
|
-
end
|
|
210
|
-
|
|
211
|
-
# Use lineage, common_name, ncbi_taxa_id methods to extract in detail.
|
|
212
|
-
def taxonomy
|
|
213
|
-
query = <<-END
|
|
214
|
-
select taxon_name.name, taxon.ncbi_taxon_id from bioentry
|
|
215
|
-
join taxon_name using(taxon_id) join taxon using (taxon_id)
|
|
216
|
-
where bioentry_id = ?
|
|
217
|
-
END
|
|
218
|
-
row = @dbh.execute(query, @bioentry_id).fetch
|
|
219
|
-
# @lineage = row ? row['full_lineage'] : ''
|
|
220
|
-
@common_name = row ? row['name'] : ''
|
|
221
|
-
@ncbi_taxa_id = row ? row['ncbi_taxon_id'] : ''
|
|
222
|
-
row ? [@lineage, @common_name, @ncbi_taxa_id] : []
|
|
223
|
-
end
|
|
224
|
-
|
|
225
|
-
def lineage
|
|
226
|
-
taxonomy unless @lineage
|
|
227
|
-
return @lineage
|
|
228
|
-
end
|
|
229
|
-
|
|
230
|
-
def common_name
|
|
231
|
-
taxonomy unless @common_name
|
|
232
|
-
return @common_name
|
|
233
|
-
end
|
|
234
|
-
|
|
235
|
-
def ncbi_taxa_id
|
|
236
|
-
taxonomy unless @ncbi_taxa_id
|
|
237
|
-
return @ncbi_taxa_id
|
|
238
|
-
end
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
private
|
|
242
|
-
|
|
243
|
-
def feature_key(k_id)
|
|
244
|
-
query = "select * from term where term_id= ?"
|
|
245
|
-
row = @dbh.execute(query, k_id).fetch
|
|
246
|
-
row ? row['name'] : ''
|
|
247
|
-
end
|
|
248
|
-
|
|
249
|
-
def feature_source(s_id)
|
|
250
|
-
query = "select * from term where term_id = ?"
|
|
251
|
-
row = @dbh.execute(query, s_id).fetch
|
|
252
|
-
row ? row['name'] : ''
|
|
253
|
-
end
|
|
254
|
-
|
|
255
|
-
def feature_locations(f_id)
|
|
256
|
-
locations = []
|
|
257
|
-
query = "select * from location where seqfeature_id = ?"
|
|
258
|
-
@dbh.execute(query, f_id).fetch_all.each do |row|
|
|
259
|
-
next unless row
|
|
2
|
+
require 'rubygems'
|
|
3
|
+
require 'erb'
|
|
4
|
+
require 'composite_primary_keys'
|
|
5
|
+
# BiosqlPlug
|
|
260
6
|
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
7
|
+
=begin
|
|
8
|
+
Ok Hilmar gives to me some clarification
|
|
9
|
+
1) "EMBL/GenBank/SwissProt" name in term table, is only a convention assuming data loaded by genbank embl ans swissprot formats.
|
|
10
|
+
If your features come from others ways for example blast or alignment ... whatever.. the user as to take care about the source.
|
|
265
11
|
|
|
266
|
-
xref = feature_locations_remote(row['dbxref_if'])
|
|
267
|
-
location.xref_id = xref.shift unless xref.empty?
|
|
268
|
-
|
|
269
|
-
# just omit fuzzy location for now...
|
|
270
|
-
#feature_locations_qv(row['seqfeature_location_id'])
|
|
271
|
-
|
|
272
|
-
rank = row['rank'].to_i - 1
|
|
273
|
-
locations[rank] = location
|
|
274
|
-
end
|
|
275
|
-
return Bio::Locations.new(locations)
|
|
276
|
-
end
|
|
277
|
-
|
|
278
|
-
def feature_locations_remote(l_id)
|
|
279
|
-
query = "select * from dbxref where dbxref_id = ?"
|
|
280
|
-
row = @dbh.execute(query, l_id).fetch
|
|
281
|
-
row ? [row['accession'], row['version']] : []
|
|
282
|
-
end
|
|
283
|
-
|
|
284
|
-
def feature_locations_qv(l_id)
|
|
285
|
-
query = "select * from location_qualifier_value where location_id = ?"
|
|
286
|
-
row = @dbh.execute(query, l_id).fetch
|
|
287
|
-
row ? [row['value'], row['int_value']] : []
|
|
288
|
-
end
|
|
289
|
-
|
|
290
|
-
def feature_qualifiers(f_id)
|
|
291
|
-
qualifiers = []
|
|
292
|
-
query = "select * from seqfeature_qualifier_value where seqfeature_id = ?"
|
|
293
|
-
@dbh.execute(query, f_id).fetch_all.each do |row|
|
|
294
|
-
next unless row
|
|
295
|
-
|
|
296
|
-
key = feature_qualifiers_key(row['seqfeature_id'])
|
|
297
|
-
value = row['value']
|
|
298
|
-
qualifier = Bio::Feature::Qualifier.new(key, value)
|
|
299
|
-
|
|
300
|
-
rank = row['rank'].to_i - 1
|
|
301
|
-
qualifiers[rank] = qualifier
|
|
302
|
-
end
|
|
303
|
-
return qualifiers.compact # .compact is nasty hack for a while
|
|
304
|
-
end
|
|
305
|
-
|
|
306
|
-
def feature_qualifiers_key(q_id)
|
|
307
|
-
query = <<-END
|
|
308
|
-
select * from seqfeature_qualifier_value
|
|
309
|
-
join term using(term_id) where seqfeature_id = ?
|
|
310
|
-
END
|
|
311
|
-
row = @dbh.execute(query, q_id).fetch
|
|
312
|
-
row ? row['name'] : ''
|
|
313
|
-
end
|
|
314
|
-
end
|
|
315
|
-
|
|
316
|
-
end # SQL
|
|
317
|
-
|
|
318
|
-
end # Bio
|
|
319
12
|
|
|
13
|
+
=end
|
|
14
|
+
=begin
|
|
15
|
+
TODO:
|
|
16
|
+
1) source_term_id => surce_term and check before if the source term is present or not and the level, the root should always be something "EMBL/GenBank/SwissProt" or contestualized.
|
|
17
|
+
2) Into DummyBase class delete connection there and use Bio::ArSQL.establish_connection which reads info from a yml file.
|
|
18
|
+
3) Chk Locations in Biofeatures ArSQL
|
|
19
|
+
=end
|
|
20
|
+
module Bio
|
|
21
|
+
class SQL
|
|
22
|
+
#no check is made
|
|
23
|
+
def self.establish_connection(configurations, env)
|
|
24
|
+
#configurations is an hash similar what YAML returns.
|
|
25
|
+
#{:database=>"biorails_development", :adapter=>"postgresql", :username=>"rails", :password=>nil}
|
|
26
|
+
configurations.assert_valid_keys('development', 'production','test')
|
|
27
|
+
configurations[env].assert_valid_keys('hostname','database','adapter','username','password')
|
|
28
|
+
DummyBase.configurations = configurations
|
|
29
|
+
DummyBase.establish_connection "#{env}"
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def self.fetch_id(id)
|
|
33
|
+
Bio::SQL::Bioentry.find(id)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def self.fetch_accession(accession)
|
|
37
|
+
accession = accession.upcase
|
|
38
|
+
Bio::SQL::Bioentry.exists?(:accession => accession) ? Bio::SQL::Sequence.new(:entry=>Bio::SQL::Bioentry.find_by_accession(accession)) : nil
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def self.exists_accession(accession)
|
|
42
|
+
Bio::SQL::Bioentry.find_by_accession(accession.upcase).nil? ? false : true
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def self.exists_database(name)
|
|
46
|
+
Bio::SQL::Biodatabase.find_by_name(name).nil? ? false : true
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def self.list_entries
|
|
50
|
+
Bio::SQL::Bioentry.find(:all).collect{|entry|
|
|
51
|
+
{:id=>entry.bioentry_id, :accession=>entry.accession}
|
|
52
|
+
}
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def self.list_databases
|
|
56
|
+
Bio::SQL::Biodatabase.find(:all).collect{|entry|
|
|
57
|
+
{:id=>entry.biodatabase_id, :name => entry.name}
|
|
58
|
+
}
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def self.delete_entry_id(id)
|
|
62
|
+
Bioentry.delete(id)
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def self.delete_entry_accession(accession)
|
|
66
|
+
Bioentry.delete(Bioentry.find_by_accession(accession))
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class DummyBase < ActiveRecord::Base
|
|
71
|
+
#NOTE: Using postgresql, not setting sequence name, system will discover the name by default.
|
|
72
|
+
#NOTE: this class will not establish the connection automatically
|
|
73
|
+
self.abstract_class = true
|
|
74
|
+
self.pluralize_table_names = false
|
|
75
|
+
#prepend table name to the usual id, avoid to specify primary id for every table
|
|
76
|
+
self.primary_key_prefix_type = :table_name_with_underscore
|
|
77
|
+
#biosql_configurations=YAML::load(ERB.new(IO.read(File.join(File.dirname(__FILE__),'../config', 'database.yml'))).result)
|
|
78
|
+
#self.configurations=biosql_configurations
|
|
79
|
+
#self.establish_connection "development"
|
|
80
|
+
end #DummyBase
|
|
81
|
+
|
|
82
|
+
autoload :Biodatabase, 'bio/io/biosql/biodatabase'
|
|
83
|
+
autoload :Bioentry, 'bio/io/biosql/bioentry'
|
|
84
|
+
autoload :BioentryDbxref, 'bio/io/biosql/bioentry_dbxref'
|
|
85
|
+
autoload :BioentryPath, 'bio/io/biosql/bioentry_path'
|
|
86
|
+
autoload :BioentryQualifierValue, 'bio/io/biosql/bioentry_qualifier_value'
|
|
87
|
+
autoload :BioentryReference, 'bio/io/biosql/bioentry_reference'
|
|
88
|
+
autoload :BioentryRelationship, 'bio/io/biosql/bioentry_relationship'
|
|
89
|
+
autoload :Biosequence, 'bio/io/biosql/biosequence'
|
|
90
|
+
autoload :Comment, 'bio/io/biosql/comment'
|
|
91
|
+
autoload :Dbxref, 'bio/io/biosql/dbxref'
|
|
92
|
+
autoload :DbxrefQualifierValue, 'bio/io/biosql/dbxref_qualifier_value'
|
|
93
|
+
autoload :Location, 'bio/io/biosql/location'
|
|
94
|
+
autoload :LocationQualifierValue, 'bio/io/biosql/location_qualifier_value'
|
|
95
|
+
autoload :Ontology, 'bio/io/biosql/ontology'
|
|
96
|
+
autoload :Reference, 'bio/io/biosql/reference'
|
|
97
|
+
autoload :Seqfeature, 'bio/io/biosql/seqfeature'
|
|
98
|
+
autoload :SeqfeatureDbxref, 'bio/io/biosql/seqfeature_dbxref'
|
|
99
|
+
autoload :SeqfeaturePath, 'bio/io/biosql/seqfeature_path'
|
|
100
|
+
autoload :SeqfeatureQualifierValue, 'bio/io/biosql/seqfeature_qualifier_value'
|
|
101
|
+
autoload :SeqfeatureRelationship, 'bio/io/biosql/seqfeature_relationship'
|
|
102
|
+
autoload :Taxon, 'bio/io/biosql/taxon'
|
|
103
|
+
autoload :TaxonName, 'bio/io/biosql/taxon_name'
|
|
104
|
+
autoload :Term, 'bio/io/biosql/term'
|
|
105
|
+
autoload :TermDbxref, 'bio/io/biosql/term_dbxref'
|
|
106
|
+
autoload :TermPath, 'bio/io/biosql/term_path'
|
|
107
|
+
autoload :TermRelationship, 'bio/io/biosql/term_relationship'
|
|
108
|
+
autoload :TermRelationshipTerm, 'bio/io/biosql/term_relationship_term'
|
|
109
|
+
autoload :Sequence, 'bio/db/biosql/sequence'
|
|
110
|
+
end #biosql
|
|
111
|
+
|
|
112
|
+
end #Bio
|
|
320
113
|
|
|
321
114
|
if __FILE__ == $0
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
115
|
+
require 'rubygems'
|
|
116
|
+
require 'composite_primary_keys'
|
|
117
|
+
require 'bio'
|
|
118
|
+
require 'pp'
|
|
119
|
+
|
|
120
|
+
# pp connection = Bio::SQL.establish_connection('bio/io/biosql/config/database.yml','development')
|
|
121
|
+
connection = Bio::SQL.establish_connection({'development'=>{'database'=>"bio_test", 'adapter'=>"postgresql", 'username'=>"rails", 'password'=>nil}},'development')
|
|
122
|
+
#pp YAML::load(ERB.new(IO.read('bio/io/biosql/config/database.yml')).result)
|
|
123
|
+
if true
|
|
124
|
+
#Bio::SQL.list_entries
|
|
125
|
+
|
|
126
|
+
# biosequence = data.to_biosequence
|
|
127
|
+
# puts biosequence.output(:genbank)
|
|
128
|
+
db=Bio::SQL::Biodatabase.new(:name=>'JEFF', :authority=>'ME', :description=>'YOU')
|
|
129
|
+
db.save!
|
|
130
|
+
|
|
131
|
+
puts "### FileFile.auto"
|
|
132
|
+
if ARGV.size > 0
|
|
133
|
+
#embl = Bio::FlatFile.auto(ARGF.read)
|
|
134
|
+
Bio::FlatFile.auto(ARGF) do |ff|
|
|
135
|
+
ff.each do |data|
|
|
136
|
+
biosequence=data.to_biosequence
|
|
137
|
+
puts biosequence.output(:fasta)
|
|
138
|
+
sqlseq = Bio::SQL::Sequence.new(:biosequence=>biosequence,:biodatabase_id=>db.biodatabase_id)
|
|
139
|
+
sqlseq.save
|
|
140
|
+
sqlseq.to_biosequence.output(:fasta)
|
|
141
|
+
end
|
|
142
|
+
end
|
|
143
|
+
else
|
|
144
|
+
require 'bio/io/fetch'
|
|
145
|
+
server = Bio::Fetch.new('http://www.ebi.ac.uk/cgi-bin/dbfetch')
|
|
146
|
+
data = Bio::EMBL.new(server.fetch('embl','AJ224123'))
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
# sqlseq = Bio::SQL::Sequence.new(:biosequence=>biosequence,:biodatabase_id=>db.biodatabase_id)
|
|
151
|
+
# sqlseq.save
|
|
152
|
+
# sqlseq_bioseq=sqlseq.to_biosequence
|
|
153
|
+
# puts sqlseq_bioseq.output(:genbank)
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
# bioseq = Bio::SQL.fetch_accession('AJ224122')
|
|
158
|
+
# pp bioseq
|
|
159
|
+
# pp bioseq.entry_id
|
|
160
|
+
#TODO create a test only for tables not sequence here
|
|
161
|
+
# pp bioseq.molecule_type
|
|
162
|
+
#pp bioseq.molecule_type.class
|
|
163
|
+
#bioseq.molecule_type_update('dna', 1)
|
|
164
|
+
## pp Bio::SQL::Taxon.find(8121).taxon_names
|
|
165
|
+
|
|
166
|
+
#sqlseq.to_biosequence
|
|
167
|
+
|
|
168
|
+
# sqlseq.delete
|
|
169
|
+
|
|
170
|
+
# db.destroy
|
|
326
171
|
end
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
pp
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
pp ent1.seq.gc
|
|
342
|
-
pp ent1.subseq(1,20)
|
|
343
|
-
|
|
344
|
-
pp ent2.accession
|
|
345
|
-
pp ent2.comment
|
|
346
|
-
pp ent2.comments
|
|
347
|
-
pp ent2.common_name
|
|
348
|
-
pp ent2.database
|
|
349
|
-
pp ent2.date
|
|
350
|
-
pp ent2.dblink
|
|
351
|
-
pp ent2.definition
|
|
352
|
-
pp ent2.division
|
|
353
|
-
pp ent2.entry_id
|
|
354
|
-
pp ent2.features
|
|
355
|
-
pp ent2.keyword
|
|
356
|
-
pp ent2.lineage
|
|
357
|
-
pp ent2.ncbi_taxa_id
|
|
358
|
-
pp ent2.references
|
|
359
|
-
pp ent2.seq
|
|
360
|
-
pp ent2.subseq(1,10)
|
|
361
|
-
pp ent2.taxonomy
|
|
362
|
-
pp ent2.version
|
|
363
|
-
|
|
172
|
+
#pp bioseq.molecule_type
|
|
173
|
+
#term = Bio::SQL::Term.find_by_name('mol_type')
|
|
174
|
+
#pp term
|
|
175
|
+
#pp bioseq.entry.bioentry_qualifier_values.create(:term=>term, :rank=>2, :value=>'pippo')
|
|
176
|
+
#pp bioseq.entry.bioentry_qualifier_values.inspect
|
|
177
|
+
#pp bioseq.entry.bioentry_qualifier_values.find_all_by_term_id(26)
|
|
178
|
+
#pp primo.class
|
|
179
|
+
# pp primo.value='dna'
|
|
180
|
+
# pp primo.save
|
|
181
|
+
#pp bioseq.molecule_type= 'prova'
|
|
182
|
+
|
|
183
|
+
#Bio::SQL::BioentryQualifierValue.delete(delete.bioentry_id,delete.term_id,delete.rank)
|
|
184
|
+
|
|
185
|
+
|
|
364
186
|
end
|
|
365
|
-
|