bio 1.2.1 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ChangeLog +3421 -0
- data/KNOWN_ISSUES.rdoc +88 -0
- data/README.rdoc +252 -0
- data/README_DEV.rdoc +285 -0
- data/Rakefile +143 -0
- data/bin/bioruby +0 -0
- data/bin/br_biofetch.rb +0 -0
- data/bin/br_bioflat.rb +12 -1
- data/bin/br_biogetseq.rb +0 -0
- data/bin/br_pmfetch.rb +4 -3
- data/bioruby.gemspec +477 -0
- data/bioruby.gemspec.erb +117 -0
- data/doc/Changes-0.7.rd +7 -0
- data/doc/Changes-1.3.rdoc +239 -0
- data/doc/Tutorial.rd +296 -184
- data/doc/Tutorial.rd.html +1031 -0
- data/doc/Tutorial.rd.ja +111 -45
- data/doc/Tutorial.rd.ja.html +2225 -0
- data/doc/bioruby.css +281 -0
- data/extconf.rb +2 -0
- data/lib/bio.rb +29 -4
- data/lib/bio/appl/blast.rb +306 -121
- data/lib/bio/appl/blast/ddbj.rb +142 -0
- data/lib/bio/appl/blast/format0.rb +35 -25
- data/lib/bio/appl/blast/format8.rb +2 -2
- data/lib/bio/appl/blast/genomenet.rb +263 -0
- data/lib/bio/appl/blast/ncbioptions.rb +220 -0
- data/lib/bio/appl/blast/remote.rb +106 -0
- data/lib/bio/appl/blast/report.rb +260 -9
- data/lib/bio/appl/blast/rexml.rb +12 -5
- data/lib/bio/appl/blast/rpsblast.rb +277 -0
- data/lib/bio/appl/blast/wublast.rb +133 -12
- data/lib/bio/appl/blast/xmlparser.rb +35 -18
- data/lib/bio/appl/blat/report.rb +46 -5
- data/lib/bio/appl/emboss.rb +62 -13
- data/lib/bio/appl/fasta.rb +9 -11
- data/lib/bio/appl/genscan/report.rb +3 -3
- data/lib/bio/appl/hmmer.rb +1 -1
- data/lib/bio/appl/hmmer/report.rb +10 -10
- data/lib/bio/appl/paml/baseml.rb +95 -0
- data/lib/bio/appl/paml/baseml/report.rb +32 -0
- data/lib/bio/appl/paml/codeml.rb +242 -0
- data/lib/bio/appl/paml/codeml/rates.rb +67 -0
- data/lib/bio/appl/paml/codeml/report.rb +67 -0
- data/lib/bio/appl/paml/common.rb +348 -0
- data/lib/bio/appl/paml/common_report.rb +38 -0
- data/lib/bio/appl/paml/yn00.rb +103 -0
- data/lib/bio/appl/paml/yn00/report.rb +32 -0
- data/lib/bio/appl/psort.rb +2 -2
- data/lib/bio/appl/pts1.rb +5 -5
- data/lib/bio/appl/tmhmm/report.rb +10 -1
- data/lib/bio/command.rb +297 -41
- data/lib/bio/compat/features.rb +157 -0
- data/lib/bio/compat/references.rb +128 -0
- data/lib/bio/db/biosql/biosql_to_biosequence.rb +67 -0
- data/lib/bio/db/biosql/sequence.rb +508 -0
- data/lib/bio/db/embl/common.rb +28 -12
- data/lib/bio/db/embl/embl.rb +107 -9
- data/lib/bio/db/embl/embl_to_biosequence.rb +85 -0
- data/lib/bio/db/embl/format_embl.rb +190 -0
- data/lib/bio/db/embl/sptr.rb +15 -16
- data/lib/bio/db/fantom.rb +6 -8
- data/lib/bio/db/fasta.rb +10 -507
- data/lib/bio/db/fasta/defline.rb +532 -0
- data/lib/bio/db/fasta/fasta_to_biosequence.rb +63 -0
- data/lib/bio/db/fasta/format_fasta.rb +97 -0
- data/lib/bio/db/genbank/common.rb +25 -8
- data/lib/bio/db/genbank/format_genbank.rb +187 -0
- data/lib/bio/db/genbank/genbank.rb +36 -1
- data/lib/bio/db/genbank/genbank_to_biosequence.rb +86 -0
- data/lib/bio/db/gff.rb +1791 -119
- data/lib/bio/db/kegg/glycan.rb +2 -6
- data/lib/bio/db/lasergene.rb +3 -3
- data/lib/bio/db/medline.rb +4 -1
- data/lib/bio/db/newick.rb +10 -10
- data/lib/bio/db/pdb/chain.rb +6 -2
- data/lib/bio/db/pdb/pdb.rb +12 -3
- data/lib/bio/db/rebase.rb +7 -8
- data/lib/bio/db/soft.rb +3 -3
- data/lib/bio/feature.rb +1 -88
- data/lib/bio/io/biosql/biodatabase.rb +64 -0
- data/lib/bio/io/biosql/bioentry.rb +29 -0
- data/lib/bio/io/biosql/bioentry_dbxref.rb +11 -0
- data/lib/bio/io/biosql/bioentry_path.rb +12 -0
- data/lib/bio/io/biosql/bioentry_qualifier_value.rb +10 -0
- data/lib/bio/io/biosql/bioentry_reference.rb +10 -0
- data/lib/bio/io/biosql/bioentry_relationship.rb +10 -0
- data/lib/bio/io/biosql/biosequence.rb +11 -0
- data/lib/bio/io/biosql/comment.rb +7 -0
- data/lib/bio/io/biosql/config/database.yml +20 -0
- data/lib/bio/io/biosql/dbxref.rb +13 -0
- data/lib/bio/io/biosql/dbxref_qualifier_value.rb +12 -0
- data/lib/bio/io/biosql/location.rb +32 -0
- data/lib/bio/io/biosql/location_qualifier_value.rb +11 -0
- data/lib/bio/io/biosql/ontology.rb +10 -0
- data/lib/bio/io/biosql/reference.rb +9 -0
- data/lib/bio/io/biosql/seqfeature.rb +32 -0
- data/lib/bio/io/biosql/seqfeature_dbxref.rb +11 -0
- data/lib/bio/io/biosql/seqfeature_path.rb +11 -0
- data/lib/bio/io/biosql/seqfeature_qualifier_value.rb +20 -0
- data/lib/bio/io/biosql/seqfeature_relationship.rb +11 -0
- data/lib/bio/io/biosql/taxon.rb +12 -0
- data/lib/bio/io/biosql/taxon_name.rb +9 -0
- data/lib/bio/io/biosql/term.rb +27 -0
- data/lib/bio/io/biosql/term_dbxref.rb +11 -0
- data/lib/bio/io/biosql/term_path.rb +12 -0
- data/lib/bio/io/biosql/term_relationship.rb +13 -0
- data/lib/bio/io/biosql/term_relationship_term.rb +11 -0
- data/lib/bio/io/biosql/term_synonym.rb +10 -0
- data/lib/bio/io/das.rb +7 -7
- data/lib/bio/io/ddbjxml.rb +57 -0
- data/lib/bio/io/ensembl.rb +2 -2
- data/lib/bio/io/fetch.rb +28 -14
- data/lib/bio/io/flatfile.rb +17 -853
- data/lib/bio/io/flatfile/autodetection.rb +545 -0
- data/lib/bio/io/flatfile/buffer.rb +237 -0
- data/lib/bio/io/flatfile/index.rb +17 -7
- data/lib/bio/io/flatfile/indexer.rb +30 -12
- data/lib/bio/io/flatfile/splitter.rb +297 -0
- data/lib/bio/io/hinv.rb +442 -0
- data/lib/bio/io/keggapi.rb +2 -2
- data/lib/bio/io/ncbirest.rb +733 -0
- data/lib/bio/io/pubmed.rb +34 -80
- data/lib/bio/io/registry.rb +2 -2
- data/lib/bio/io/sql.rb +178 -357
- data/lib/bio/io/togows.rb +458 -0
- data/lib/bio/location.rb +106 -11
- data/lib/bio/pathway.rb +120 -14
- data/lib/bio/reference.rb +115 -101
- data/lib/bio/sequence.rb +164 -183
- data/lib/bio/sequence/adapter.rb +108 -0
- data/lib/bio/sequence/common.rb +22 -45
- data/lib/bio/sequence/compat.rb +2 -2
- data/lib/bio/sequence/dblink.rb +54 -0
- data/lib/bio/sequence/format.rb +254 -77
- data/lib/bio/sequence/format_raw.rb +23 -0
- data/lib/bio/shell.rb +3 -1
- data/lib/bio/shell/core.rb +2 -2
- data/lib/bio/shell/plugin/entry.rb +33 -4
- data/lib/bio/shell/plugin/ncbirest.rb +64 -0
- data/lib/bio/shell/plugin/togows.rb +40 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/bioruby_generator.rb +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_classes.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_log.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_methods.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_modules.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_variables.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-bg.gif +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-gem.png +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-link.gif +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.css +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_controller.rb +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_helper.rb +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/commands.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/history.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/index.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/spinner.gif +0 -0
- data/lib/bio/tree.rb +4 -2
- data/lib/bio/util/color_scheme.rb +2 -2
- data/lib/bio/util/contingency_table.rb +2 -2
- data/lib/bio/util/restriction_enzyme.rb +2 -2
- data/lib/bio/util/restriction_enzyme/single_strand.rb +6 -5
- data/lib/bio/version.rb +25 -0
- data/rdoc.zsh +8 -0
- data/sample/any2fasta.rb +0 -0
- data/sample/biofetch.rb +0 -0
- data/sample/dbget +0 -0
- data/sample/demo_sequence.rb +158 -0
- data/sample/enzymes.rb +0 -0
- data/sample/fasta2tab.rb +0 -0
- data/sample/fastagrep.rb +72 -0
- data/sample/fastasort.rb +54 -0
- data/sample/fsplit.rb +0 -0
- data/sample/gb2fasta.rb +2 -3
- data/sample/gb2tab.rb +0 -0
- data/sample/gbtab2mysql.rb +0 -0
- data/sample/genes2nuc.rb +0 -0
- data/sample/genes2pep.rb +0 -0
- data/sample/genes2tab.rb +0 -0
- data/sample/genome2rb.rb +0 -0
- data/sample/genome2tab.rb +0 -0
- data/sample/goslim.rb +0 -0
- data/sample/gt2fasta.rb +0 -0
- data/sample/na2aa.rb +34 -0
- data/sample/pmfetch.rb +0 -0
- data/sample/pmsearch.rb +0 -0
- data/sample/ssearch2tab.rb +0 -0
- data/sample/tfastx2tab.rb +0 -0
- data/sample/vs-genes.rb +0 -0
- data/setup.rb +1596 -0
- data/test/data/blast/blastp-multi.m7 +188 -0
- data/test/data/command/echoarg2.bat +1 -0
- data/test/data/paml/codeml/control_file.txt +30 -0
- data/test/data/paml/codeml/output.txt +78 -0
- data/test/data/paml/codeml/rates +217 -0
- data/test/data/rpsblast/misc.rpsblast +193 -0
- data/test/data/soft/GDS100_partial.soft +0 -0
- data/test/data/soft/GSE3457_family_partial.soft +0 -0
- data/test/functional/bio/appl/test_pts1.rb +115 -0
- data/test/functional/bio/io/test_ensembl.rb +123 -80
- data/test/functional/bio/io/test_togows.rb +267 -0
- data/test/functional/bio/sequence/test_output_embl.rb +51 -0
- data/test/functional/bio/test_command.rb +301 -0
- data/test/runner.rb +17 -1
- data/test/unit/bio/appl/blast/test_ncbioptions.rb +112 -0
- data/test/unit/bio/appl/blast/test_report.rb +753 -35
- data/test/unit/bio/appl/blast/test_rpsblast.rb +398 -0
- data/test/unit/bio/appl/paml/codeml/test_rates.rb +45 -0
- data/test/unit/bio/appl/paml/codeml/test_report.rb +45 -0
- data/test/unit/bio/appl/paml/test_codeml.rb +174 -0
- data/test/unit/bio/appl/test_blast.rb +135 -4
- data/test/unit/bio/appl/test_fasta.rb +2 -2
- data/test/unit/bio/appl/test_pts1.rb +1 -64
- data/test/unit/bio/db/embl/test_common.rb +15 -15
- data/test/unit/bio/db/embl/test_embl.rb +4 -4
- data/test/unit/bio/db/embl/test_embl_rel89.rb +5 -5
- data/test/unit/bio/db/embl/test_embl_to_bioseq.rb +203 -0
- data/test/unit/bio/db/embl/test_sptr.rb +38 -1
- data/test/unit/bio/db/pdb/test_pdb.rb +2 -2
- data/test/unit/bio/db/test_gff.rb +1151 -25
- data/test/unit/bio/db/test_medline.rb +127 -0
- data/test/unit/bio/db/test_nexus.rb +5 -1
- data/test/unit/bio/db/test_prosite.rb +4 -4
- data/test/unit/bio/io/flatfile/test_autodetection.rb +375 -0
- data/test/unit/bio/io/flatfile/test_buffer.rb +251 -0
- data/test/unit/bio/io/flatfile/test_splitter.rb +369 -0
- data/test/unit/bio/io/test_ddbjxml.rb +8 -3
- data/test/unit/bio/io/test_fastacmd.rb +5 -5
- data/test/unit/bio/io/test_flatfile.rb +357 -106
- data/test/unit/bio/io/test_soapwsdl.rb +2 -2
- data/test/unit/bio/io/test_togows.rb +161 -0
- data/test/unit/bio/sequence/test_common.rb +210 -11
- data/test/unit/bio/sequence/test_compat.rb +3 -3
- data/test/unit/bio/sequence/test_dblink.rb +58 -0
- data/test/unit/bio/sequence/test_na.rb +2 -2
- data/test/unit/bio/test_command.rb +111 -50
- data/test/unit/bio/test_feature.rb +29 -1
- data/test/unit/bio/test_location.rb +566 -6
- data/test/unit/bio/test_pathway.rb +91 -65
- data/test/unit/bio/test_reference.rb +67 -13
- data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +4 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +4 -4
- data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +3 -3
- data/test/unit/bio/util/test_restriction_enzyme.rb +3 -3
- metadata +202 -167
- data/test/unit/bio/appl/blast/test_xmlparser.rb +0 -388
|
@@ -0,0 +1,733 @@
|
|
|
1
|
+
#
|
|
2
|
+
# = bio/io/ncbirest.rb - NCBI Entrez client module
|
|
3
|
+
#
|
|
4
|
+
# Copyright:: Copyright (C) 2008 Toshiaki Katayama <k@bioruby.org>
|
|
5
|
+
# License:: The Ruby License
|
|
6
|
+
#
|
|
7
|
+
# $Id:$
|
|
8
|
+
#
|
|
9
|
+
|
|
10
|
+
require 'bio/command'
|
|
11
|
+
|
|
12
|
+
module Bio
|
|
13
|
+
|
|
14
|
+
# == Description
|
|
15
|
+
#
|
|
16
|
+
# The Bio::NCBI::REST class provides REST client for the NCBI E-Utilities
|
|
17
|
+
#
|
|
18
|
+
# Entrez utilities index:
|
|
19
|
+
#
|
|
20
|
+
# * http://www.ncbi.nlm.nih.gov/entrez/utils/utils_index.html
|
|
21
|
+
#
|
|
22
|
+
class NCBI
|
|
23
|
+
class REST
|
|
24
|
+
|
|
25
|
+
# Run retrieval scripts on weekends or between 9 pm and 5 am Eastern Time
|
|
26
|
+
# weekdays for any series of more than 100 requests.
|
|
27
|
+
# -> Not implemented yet in BioRuby
|
|
28
|
+
|
|
29
|
+
# Make no more than one request every 3 seconds.
|
|
30
|
+
NCBI_INTERVAL = 3
|
|
31
|
+
@@last_access = nil
|
|
32
|
+
|
|
33
|
+
private
|
|
34
|
+
|
|
35
|
+
def ncbi_access_wait(wait = NCBI_INTERVAL)
|
|
36
|
+
if @@last_access
|
|
37
|
+
duration = Time.now - @@last_access
|
|
38
|
+
if wait > duration
|
|
39
|
+
sleep wait - duration
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
@@last_access = Time.now
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
public
|
|
46
|
+
|
|
47
|
+
# List the NCBI database names E-Utils (einfo) service
|
|
48
|
+
#
|
|
49
|
+
# * http://eutils.ncbi.nlm.nih.gov/entrez/eutils/einfo.fcgi
|
|
50
|
+
#
|
|
51
|
+
# pubmed protein nucleotide nuccore nucgss nucest structure genome
|
|
52
|
+
# books cancerchromosomes cdd gap domains gene genomeprj gensat geo
|
|
53
|
+
# gds homologene journals mesh ncbisearch nlmcatalog omia omim pmc
|
|
54
|
+
# popset probe proteinclusters pcassay pccompound pcsubstance snp
|
|
55
|
+
# taxonomy toolkit unigene unists
|
|
56
|
+
#
|
|
57
|
+
# == Usage
|
|
58
|
+
#
|
|
59
|
+
# ncbi = Bio::NCBI::REST.new
|
|
60
|
+
# ncbi.einfo
|
|
61
|
+
#
|
|
62
|
+
# Bio::NCBI::REST.einfo
|
|
63
|
+
#
|
|
64
|
+
# ---
|
|
65
|
+
# *Returns*:: array of string (database names)
|
|
66
|
+
def einfo
|
|
67
|
+
serv = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/einfo.fcgi"
|
|
68
|
+
opts = {}
|
|
69
|
+
response = Bio::Command.post_form(serv, opts)
|
|
70
|
+
result = response.body
|
|
71
|
+
list = result.scan(/<DbName>(.*?)<\/DbName>/m).flatten
|
|
72
|
+
return list
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
# Search the NCBI database by given keywords using E-Utils (esearch) service
|
|
77
|
+
# and returns an array of entry IDs.
|
|
78
|
+
#
|
|
79
|
+
# For information on the possible arguments, see
|
|
80
|
+
#
|
|
81
|
+
# * http://eutils.ncbi.nlm.nih.gov/entrez/query/static/esearch_help.html
|
|
82
|
+
# * http://www.ncbi.nlm.nih.gov/books/bv.fcgi?rid=helppubmed.section.pubmedhelp.Search_Field_Descrip
|
|
83
|
+
#
|
|
84
|
+
# == Usage
|
|
85
|
+
#
|
|
86
|
+
# ncbi = Bio::NCBI::REST.new
|
|
87
|
+
# ncbi.esearch("tardigrada", {"db"=>"nucleotide", "rettype"=>"count"})
|
|
88
|
+
# ncbi.esearch("tardigrada", {"db"=>"nucleotide", "rettype"=>"gb"})
|
|
89
|
+
# ncbi.esearch("yeast kinase", {"db"=>"nuccore", "rettype"=>"gb", "retmax"=>5})
|
|
90
|
+
#
|
|
91
|
+
# Bio::NCBI::REST.esearch("tardigrada", {"db"=>"nucleotide", "rettype"=>"count"})
|
|
92
|
+
# Bio::NCBI::REST.esearch("tardigrada", {"db"=>"nucleotide", "rettype"=>"gb"})
|
|
93
|
+
# Bio::NCBI::REST.esearch("yeast kinase", {"db"=>"nuccore", "rettype"=>"gb", "retmax"=>5})
|
|
94
|
+
#
|
|
95
|
+
# ---
|
|
96
|
+
# *Arguments*:
|
|
97
|
+
# * _str_: query string (required)
|
|
98
|
+
# * _hash_: hash of E-Utils option {"db" => "nuccore", "rettype" => "gb"}
|
|
99
|
+
# * _db_: "sequences", "nucleotide", "protein", "pubmed", "taxonomy", ...
|
|
100
|
+
# * _retmode_: "text", "xml", "html", ...
|
|
101
|
+
# * _rettype_: "gb", "medline", "count", ...
|
|
102
|
+
# * _retmax_: integer (default 100)
|
|
103
|
+
# * _retstart_: integer
|
|
104
|
+
# * _field_:
|
|
105
|
+
# * "titl": Title [TI]
|
|
106
|
+
# * "tiab": Title/Abstract [TIAB]
|
|
107
|
+
# * "word": Text words [TW]
|
|
108
|
+
# * "auth": Author [AU]
|
|
109
|
+
# * "affl": Affiliation [AD]
|
|
110
|
+
# * "jour": Journal [TA]
|
|
111
|
+
# * "vol": Volume [VI]
|
|
112
|
+
# * "iss": Issue [IP]
|
|
113
|
+
# * "page": First page [PG]
|
|
114
|
+
# * "pdat": Publication date [DP]
|
|
115
|
+
# * "ptyp": Publication type [PT]
|
|
116
|
+
# * "lang": Language [LA]
|
|
117
|
+
# * "mesh": MeSH term [MH]
|
|
118
|
+
# * "majr": MeSH major topic [MAJR]
|
|
119
|
+
# * "subh": Mesh sub headings [SH]
|
|
120
|
+
# * "mhda": MeSH date [MHDA]
|
|
121
|
+
# * "ecno": EC/RN Number [rn]
|
|
122
|
+
# * "si": Secondary source ID [SI]
|
|
123
|
+
# * "uid": PubMed ID (PMID) [UI]
|
|
124
|
+
# * "fltr": Filter [FILTER] [SB]
|
|
125
|
+
# * "subs": Subset [SB]
|
|
126
|
+
# * _reldate_: 365
|
|
127
|
+
# * _mindate_: 2001
|
|
128
|
+
# * _maxdate_: 2002/01/01
|
|
129
|
+
# * _datetype_: "edat"
|
|
130
|
+
# * _limit_: maximum number of entries to be returned (0 for unlimited)
|
|
131
|
+
# * _step_: maximum number of entries retrieved at a time
|
|
132
|
+
# *Returns*:: array of entry IDs or a number of results
|
|
133
|
+
def esearch(str, hash = {}, limit = 100, step = 10000)
|
|
134
|
+
serv = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
|
|
135
|
+
opts = {
|
|
136
|
+
"tool" => "bioruby",
|
|
137
|
+
"term" => str,
|
|
138
|
+
}
|
|
139
|
+
opts.update(hash)
|
|
140
|
+
|
|
141
|
+
case opts["rettype"]
|
|
142
|
+
when "count"
|
|
143
|
+
count = esearch_count(str, opts)
|
|
144
|
+
return count
|
|
145
|
+
else
|
|
146
|
+
limit = esearch_count(str, opts) if limit == 0 # unlimit
|
|
147
|
+
|
|
148
|
+
list = []
|
|
149
|
+
0.step(limit, step) do |i|
|
|
150
|
+
retmax = [step, limit - i].min
|
|
151
|
+
opts.update("retmax" => retmax, "retstart" => i)
|
|
152
|
+
ncbi_access_wait
|
|
153
|
+
response = Bio::Command.post_form(serv, opts)
|
|
154
|
+
result = response.body
|
|
155
|
+
list += result.scan(/<Id>(.*?)<\/Id>/m).flatten
|
|
156
|
+
end
|
|
157
|
+
return list
|
|
158
|
+
end
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
# *Arguments*:: same as esearch method
|
|
162
|
+
# *Returns*:: array of entry IDs or a number of results
|
|
163
|
+
def esearch_count(str, hash = {})
|
|
164
|
+
serv = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
|
|
165
|
+
opts = {
|
|
166
|
+
"tool" => "bioruby",
|
|
167
|
+
"term" => str,
|
|
168
|
+
}
|
|
169
|
+
opts.update(hash)
|
|
170
|
+
opts.update("rettype" => "count")
|
|
171
|
+
#ncbi_access_wait
|
|
172
|
+
response = Bio::Command.post_form(serv, opts)
|
|
173
|
+
result = response.body
|
|
174
|
+
count = result.scan(/<Count>(.*?)<\/Count>/m).flatten.first.to_i
|
|
175
|
+
return count
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
# Retrieve database entries by given IDs and using E-Utils (efetch) service.
|
|
180
|
+
#
|
|
181
|
+
# For information on the possible arguments, see
|
|
182
|
+
#
|
|
183
|
+
# * http://eutils.ncbi.nlm.nih.gov/entrez/query/static/efetch_help.html
|
|
184
|
+
#
|
|
185
|
+
# == Usage
|
|
186
|
+
#
|
|
187
|
+
# ncbi = Bio::NCBI::REST.new
|
|
188
|
+
# ncbi.efetch("185041", {"db"=>"nucleotide", "rettype"=>"gb", "retmode" => "xml"})
|
|
189
|
+
# ncbi.efetch("J00231", {"db"=>"nuccore", "rettype"=>"gb", "retmode"=>"xml"})
|
|
190
|
+
# ncbi.efetch("AAA52805", {"db"=>"protein", "rettype"=>"gb"})
|
|
191
|
+
#
|
|
192
|
+
# Bio::NCBI::REST.efetch("185041", {"db"=>"nucleotide", "rettype"=>"gb", "retmode" => "xml"})
|
|
193
|
+
# Bio::NCBI::REST.efetch("J00231", {"db"=>"nuccore", "rettype"=>"gb"})
|
|
194
|
+
# Bio::NCBI::REST.efetch("AAA52805", {"db"=>"protein", "rettype"=>"gb"})
|
|
195
|
+
#
|
|
196
|
+
# ---
|
|
197
|
+
# *Arguments*:
|
|
198
|
+
# * _ids_: list of NCBI entry IDs (required)
|
|
199
|
+
# * _hash_: hash of E-Utils option {"db" => "nuccore", "rettype" => "gb"}
|
|
200
|
+
# * _db_: "sequences", "nucleotide", "protein", "pubmed", "omim", ...
|
|
201
|
+
# * _retmode_: "text", "xml", "html", ...
|
|
202
|
+
# * _rettype_: "gb", "gbc", "medline", "count",...
|
|
203
|
+
# * _step_: maximum number of entries retrieved at a time
|
|
204
|
+
# *Returns*:: String
|
|
205
|
+
def efetch(ids, hash = {}, step = 100)
|
|
206
|
+
serv = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
|
|
207
|
+
opts = {
|
|
208
|
+
"tool" => "bioruby",
|
|
209
|
+
"retmode" => "text",
|
|
210
|
+
}
|
|
211
|
+
opts.update(hash)
|
|
212
|
+
|
|
213
|
+
case ids
|
|
214
|
+
when Array
|
|
215
|
+
list = ids
|
|
216
|
+
else
|
|
217
|
+
list = ids.to_s.split(/\s*,\s*/)
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
result = ""
|
|
221
|
+
0.step(list.size, step) do |i|
|
|
222
|
+
opts["id"] = list[i, step].join(',')
|
|
223
|
+
unless opts["id"].empty?
|
|
224
|
+
ncbi_access_wait
|
|
225
|
+
response = Bio::Command.post_form(serv, opts)
|
|
226
|
+
result += response.body
|
|
227
|
+
end
|
|
228
|
+
end
|
|
229
|
+
return result.strip
|
|
230
|
+
#return result.strip.split(/\n\n+/)
|
|
231
|
+
end
|
|
232
|
+
|
|
233
|
+
def self.einfo
|
|
234
|
+
self.new.einfo
|
|
235
|
+
end
|
|
236
|
+
|
|
237
|
+
def self.esearch(*args)
|
|
238
|
+
self.new.esearch(*args)
|
|
239
|
+
end
|
|
240
|
+
|
|
241
|
+
def self.esearch_count(*args)
|
|
242
|
+
self.new.esearch_count(*args)
|
|
243
|
+
end
|
|
244
|
+
|
|
245
|
+
def self.efetch(*args)
|
|
246
|
+
self.new.efetch(*args)
|
|
247
|
+
end
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
# Shortcut methods for the ESearch service
|
|
251
|
+
class ESearch
|
|
252
|
+
|
|
253
|
+
# Search database entries by given keywords using E-Utils (esearch).
|
|
254
|
+
#
|
|
255
|
+
# * http://eutils.ncbi.nlm.nih.gov/entrez/query/static/esearch_help.html
|
|
256
|
+
#
|
|
257
|
+
# sequences = gene + genome + nucleotide + protein + popset + snp
|
|
258
|
+
# nucleotide = nuccore + nucest + nucgss
|
|
259
|
+
#
|
|
260
|
+
# * http://eutils.ncbi.nlm.nih.gov/entrez/eutils/einfo.fcgi
|
|
261
|
+
#
|
|
262
|
+
# pubmed protein nucleotide nuccore nucgss nucest structure genome
|
|
263
|
+
# books cancerchromosomes cdd gap domains gene genomeprj gensat geo
|
|
264
|
+
# gds homologene journals mesh ncbisearch nlmcatalog omia omim pmc
|
|
265
|
+
# popset probe proteinclusters pcassay pccompound pcsubstance snp
|
|
266
|
+
# taxonomy toolkit unigene unists
|
|
267
|
+
#
|
|
268
|
+
# == Usage
|
|
269
|
+
#
|
|
270
|
+
# Bio::NCBI::REST::ESearch.search("nucleotide", "tardigrada")
|
|
271
|
+
# Bio::NCBI::REST::ESearch.count("nucleotide", "tardigrada")
|
|
272
|
+
#
|
|
273
|
+
# Bio::NCBI::REST::ESearch.nucleotide("tardigrada")
|
|
274
|
+
# Bio::NCBI::REST::ESearch.popset("aldh2")
|
|
275
|
+
# Bio::NCBI::REST::ESearch.taxonomy("tardigrada")
|
|
276
|
+
# Bio::NCBI::REST::ESearch.pubmed("tardigrada", "reldate" => 365)
|
|
277
|
+
# Bio::NCBI::REST::ESearch.pubmed("mammoth mitochondrial genome")
|
|
278
|
+
# Bio::NCBI::REST::ESearch.pmc("Indonesian coelacanth genome Latimeria menadoensis")
|
|
279
|
+
# Bio::NCBI::REST::ESearch.journal("bmc bioinformatics")
|
|
280
|
+
#
|
|
281
|
+
# ncbi = Bio::NCBI::REST::ESearch.new
|
|
282
|
+
# ncbi.search("nucleotide", "tardigrada")
|
|
283
|
+
# ncbi.count("nucleotide", "tardigrada")
|
|
284
|
+
#
|
|
285
|
+
# ncbi.nucleotide("tardigrada")
|
|
286
|
+
# ncbi.popset("aldh2")
|
|
287
|
+
# ncbi.taxonomy("tardigrada")
|
|
288
|
+
# ncbi.pubmed("tardigrada", "reldate" => 365)
|
|
289
|
+
# ncbi.pubmed("mammoth mitochondrial genome")
|
|
290
|
+
# ncbi.pmc("Indonesian coelacanth genome Latimeria menadoensis")
|
|
291
|
+
# ncbi.journal("bmc bioinformatics")
|
|
292
|
+
#
|
|
293
|
+
# ---
|
|
294
|
+
#
|
|
295
|
+
# *Arguments*:
|
|
296
|
+
# * _term_: search keywords (required)
|
|
297
|
+
# * _limit_: maximum number of entries to be returned (0 for unlimited)
|
|
298
|
+
# * _hash_: hash of E-Utils option
|
|
299
|
+
# *Returns*:: array of entry IDs or a number of results
|
|
300
|
+
module Methods
|
|
301
|
+
|
|
302
|
+
# search("nucleotide", "tardigrada")
|
|
303
|
+
# search("nucleotide", "tardigrada", 0) # unlimited
|
|
304
|
+
# search("pubmed", "tardigrada")
|
|
305
|
+
# search("pubmed", "tardigrada", 5) # first five
|
|
306
|
+
# search("pubmed", "tardigrada", "reldate" => 365) # within a year
|
|
307
|
+
# search("pubmed", "tardigrada", 5, "reldate" => 365) # combination
|
|
308
|
+
# search("pubmed", "tardigrada", {"reldate" => 365}, 5) # combination 2
|
|
309
|
+
# search("journals", "bmc", 10)
|
|
310
|
+
def search(db, term, *args)
|
|
311
|
+
limit = 100
|
|
312
|
+
hash = {}
|
|
313
|
+
args.each do |arg|
|
|
314
|
+
case arg
|
|
315
|
+
when Hash
|
|
316
|
+
hash.update(arg)
|
|
317
|
+
else
|
|
318
|
+
limit = arg.to_i
|
|
319
|
+
end
|
|
320
|
+
end
|
|
321
|
+
opts = { "db" => db }
|
|
322
|
+
opts.update(hash)
|
|
323
|
+
Bio::NCBI::REST.esearch(term, opts, limit)
|
|
324
|
+
end
|
|
325
|
+
|
|
326
|
+
# count("nucleotide", "tardigrada")
|
|
327
|
+
# count("pubmed", "tardigrada")
|
|
328
|
+
# count("journals", "bmc")
|
|
329
|
+
def count(db, term, hash = {})
|
|
330
|
+
opts = { "db" => db }
|
|
331
|
+
opts.update(hash)
|
|
332
|
+
Bio::NCBI::REST.esearch_count(term, opts)
|
|
333
|
+
end
|
|
334
|
+
|
|
335
|
+
# nucleotide("tardigrada")
|
|
336
|
+
# nucleotide("tardigrada", 0)
|
|
337
|
+
# pubmed("tardigrada")
|
|
338
|
+
# pubmed("tardigrada", 5)
|
|
339
|
+
# pubmed("tardigrada", "reldate" => 365)
|
|
340
|
+
# pubmed("tardigrada", 5, "reldate" => 365)
|
|
341
|
+
# pubmed("tardigrada", {"reldate" => 365}, 5)
|
|
342
|
+
def method_missing(*args)
|
|
343
|
+
self.search(*args)
|
|
344
|
+
end
|
|
345
|
+
|
|
346
|
+
# alias for journals
|
|
347
|
+
def journal(*args)
|
|
348
|
+
self.search("journals", *args)
|
|
349
|
+
end
|
|
350
|
+
|
|
351
|
+
# alias for "nucest"
|
|
352
|
+
def est(*args)
|
|
353
|
+
self.search("nucest", *args)
|
|
354
|
+
end
|
|
355
|
+
|
|
356
|
+
# alias for "nucgss"
|
|
357
|
+
def gss(*args)
|
|
358
|
+
self.search("nucgss", *args)
|
|
359
|
+
end
|
|
360
|
+
|
|
361
|
+
end # Methods
|
|
362
|
+
|
|
363
|
+
include Methods
|
|
364
|
+
extend Methods
|
|
365
|
+
|
|
366
|
+
end # ESearch
|
|
367
|
+
|
|
368
|
+
|
|
369
|
+
# Shortcut methods for the EFetch service
|
|
370
|
+
class EFetch
|
|
371
|
+
|
|
372
|
+
module Methods
|
|
373
|
+
|
|
374
|
+
# Retrieve sequence entries by given IDs using E-Utils (efetch).
|
|
375
|
+
#
|
|
376
|
+
# * http://eutils.ncbi.nlm.nih.gov/entrez/query/static/efetchseq_help.html
|
|
377
|
+
#
|
|
378
|
+
# sequences = gene + genome + nucleotide + protein + popset + snp
|
|
379
|
+
# nucleotide = nuccore + nucest + nucgss
|
|
380
|
+
#
|
|
381
|
+
# format (rettype):
|
|
382
|
+
# * native all but Gene Default format for viewing sequences
|
|
383
|
+
# * fasta all sequence FASTA view of a sequence
|
|
384
|
+
# * gb NA sequence GenBank view for sequences
|
|
385
|
+
# * gbc NA sequence INSDSeq structured flat file
|
|
386
|
+
# * gbwithparts NA sequence GenBank CON division with sequences
|
|
387
|
+
# * est dbEST sequence EST Report
|
|
388
|
+
# * gss dbGSS sequence GSS Report
|
|
389
|
+
# * gp AA sequence GenPept view
|
|
390
|
+
# * gpc AA sequence INSDSeq structured flat file
|
|
391
|
+
# * seqid all sequence Convert GIs into seqids
|
|
392
|
+
# * acc all sequence Convert GIs into accessions
|
|
393
|
+
# * chr dbSNP only SNP Chromosome Report
|
|
394
|
+
# * flt dbSNP only SNP Flat File report
|
|
395
|
+
# * rsr dbSNP only SNP RS Cluster report
|
|
396
|
+
# * brief dbSNP only SNP ID list
|
|
397
|
+
# * docset dbSNP only SNP RS summary
|
|
398
|
+
#
|
|
399
|
+
# == Usage
|
|
400
|
+
#
|
|
401
|
+
# Bio::NCBI::REST::EFetch.sequence("123,U12345,U12345.1,gb|U12345|")
|
|
402
|
+
#
|
|
403
|
+
# list = [123, "U12345.1", "gb|U12345|"]
|
|
404
|
+
# Bio::NCBI::REST::EFetch.sequence(list)
|
|
405
|
+
# Bio::NCBI::REST::EFetch.sequence(list, "fasta")
|
|
406
|
+
# Bio::NCBI::REST::EFetch.sequence(list, "acc")
|
|
407
|
+
# Bio::NCBI::REST::EFetch.sequence(list, "xml")
|
|
408
|
+
#
|
|
409
|
+
# Bio::NCBI::REST::EFetch.sequence("AE009950")
|
|
410
|
+
# Bio::NCBI::REST::EFetch.sequence("AE009950", "gbwithparts")
|
|
411
|
+
#
|
|
412
|
+
# ncbi = Bio::NCBI::REST::EFetch.new
|
|
413
|
+
# ncbi.sequence("123,U12345,U12345.1,gb|U12345|")
|
|
414
|
+
# ncbi.sequence(list)
|
|
415
|
+
# ncbi.sequence(list, "fasta")
|
|
416
|
+
# ncbi.sequence(list, "acc")
|
|
417
|
+
# ncbi.sequence(list, "xml")
|
|
418
|
+
# ncbi.sequence("AE009950")
|
|
419
|
+
# ncbi.sequence("AE009950", "gbwithparts")
|
|
420
|
+
#
|
|
421
|
+
# ---
|
|
422
|
+
#
|
|
423
|
+
# *Arguments*:
|
|
424
|
+
# * _ids_: list of NCBI entry IDs (required)
|
|
425
|
+
# * _format_: "gb", "gbc", "fasta", "acc", "xml" etc.
|
|
426
|
+
# *Returns*:: String
|
|
427
|
+
def sequence(ids, format = "gb", hash = {})
|
|
428
|
+
case format
|
|
429
|
+
when "xml"
|
|
430
|
+
format = "gbc"
|
|
431
|
+
end
|
|
432
|
+
opts = { "db" => "sequences", "rettype" => format }
|
|
433
|
+
opts.update(hash)
|
|
434
|
+
Bio::NCBI::REST.efetch(ids, opts)
|
|
435
|
+
end
|
|
436
|
+
|
|
437
|
+
# Retrieve PubMed entries by given IDs using E-Utils (efetch).
|
|
438
|
+
#
|
|
439
|
+
# * http://eutils.ncbi.nlm.nih.gov/entrez/query/static/efetchlit_help.html
|
|
440
|
+
#
|
|
441
|
+
# == Usage
|
|
442
|
+
#
|
|
443
|
+
# Bio::NCBI::REST::EFetch.pubmed(15496913)
|
|
444
|
+
# Bio::NCBI::REST::EFetch.pubmed("15496913,11181995")
|
|
445
|
+
#
|
|
446
|
+
# list = [15496913, 11181995]
|
|
447
|
+
# Bio::NCBI::REST::EFetch.pubmed(list)
|
|
448
|
+
# Bio::NCBI::REST::EFetch.pubmed(list, "abstract")
|
|
449
|
+
# Bio::NCBI::REST::EFetch.pubmed(list, "citation")
|
|
450
|
+
# Bio::NCBI::REST::EFetch.pubmed(list, "medline")
|
|
451
|
+
# Bio::NCBI::REST::EFetch.pubmed(list, "xml")
|
|
452
|
+
#
|
|
453
|
+
# ncbi = Bio::NCBI::REST::EFetch.new
|
|
454
|
+
# ncbi.pubmed(list)
|
|
455
|
+
# ncbi.pubmed(list, "abstract")
|
|
456
|
+
# ncbi.pubmed(list, "citation")
|
|
457
|
+
# ncbi.pubmed(list, "medline")
|
|
458
|
+
# ncbi.pubmed(list, "xml")
|
|
459
|
+
#
|
|
460
|
+
# ---
|
|
461
|
+
#
|
|
462
|
+
# *Arguments*:
|
|
463
|
+
# * _ids_: list of PubMed entry IDs (required)
|
|
464
|
+
# * _format_: "abstract", "citation", "medline", "xml"
|
|
465
|
+
# *Returns*:: String
|
|
466
|
+
def pubmed(ids, format = "medline", hash = {})
|
|
467
|
+
case format
|
|
468
|
+
when "xml"
|
|
469
|
+
format = "medline"
|
|
470
|
+
mode = "xml"
|
|
471
|
+
else
|
|
472
|
+
mode = "text"
|
|
473
|
+
end
|
|
474
|
+
opts = { "db" => "pubmed", "rettype" => format, "retmode" => mode }
|
|
475
|
+
opts.update(hash)
|
|
476
|
+
Bio::NCBI::REST.efetch(ids, opts)
|
|
477
|
+
end
|
|
478
|
+
|
|
479
|
+
# Retrieve PubMed Central entries by given IDs using E-Utils (efetch).
|
|
480
|
+
#
|
|
481
|
+
# * http://eutils.ncbi.nlm.nih.gov/entrez/query/static/efetchlit_help.html
|
|
482
|
+
#
|
|
483
|
+
# == Usage
|
|
484
|
+
#
|
|
485
|
+
# Bio::NCBI::REST::EFetch.pmc(1360101)
|
|
486
|
+
# Bio::NCBI::REST::EFetch.pmc("1360101,534663")
|
|
487
|
+
#
|
|
488
|
+
# list = [1360101, 534663]
|
|
489
|
+
# Bio::NCBI::REST::EFetch.pmc(list)
|
|
490
|
+
# Bio::NCBI::REST::EFetch.pmc(list, "xml")
|
|
491
|
+
#
|
|
492
|
+
# ncbi = Bio::NCBI::REST::EFetch.new
|
|
493
|
+
# ncbi.pmc(list)
|
|
494
|
+
# ncbi.pmc(list, "xml")
|
|
495
|
+
#
|
|
496
|
+
# ---
|
|
497
|
+
#
|
|
498
|
+
# *Arguments*:
|
|
499
|
+
# * _ids_: list of PubMed Central entry IDs (required)
|
|
500
|
+
# * _format_: "docsum", "xml"
|
|
501
|
+
# *Returns*:: String
|
|
502
|
+
def pmc(ids, format = "docsum", hash = {})
|
|
503
|
+
case format
|
|
504
|
+
when "xml"
|
|
505
|
+
format = "medline"
|
|
506
|
+
mode = "xml"
|
|
507
|
+
else
|
|
508
|
+
mode = "text"
|
|
509
|
+
end
|
|
510
|
+
opts = { "db" => "pmc", "rettype" => format, "retmode" => mode }
|
|
511
|
+
Bio::NCBI::REST.efetch(ids, opts)
|
|
512
|
+
end
|
|
513
|
+
|
|
514
|
+
# Retrieve journal entries by given IDs using E-Utils (efetch).
|
|
515
|
+
#
|
|
516
|
+
# * http://eutils.ncbi.nlm.nih.gov/entrez/query/static/efetchlit_help.html
|
|
517
|
+
#
|
|
518
|
+
# == Usage
|
|
519
|
+
#
|
|
520
|
+
# Bio::NCBI::REST::EFetch.journal(21854)
|
|
521
|
+
#
|
|
522
|
+
# list = [21854, 21855]
|
|
523
|
+
# Bio::NCBI::REST::EFetch.journal(list)
|
|
524
|
+
# Bio::NCBI::REST::EFetch.journal(list, "xml")
|
|
525
|
+
#
|
|
526
|
+
# ncbi = Bio::NCBI::REST::EFetch.new
|
|
527
|
+
# ncbi.journal(list)
|
|
528
|
+
# ncbi.journal(list, "xml")
|
|
529
|
+
#
|
|
530
|
+
# ---
|
|
531
|
+
#
|
|
532
|
+
# *Arguments*:
|
|
533
|
+
# * _ids_: list of journal entry IDs (required)
|
|
534
|
+
# * _format_: "full", "xml"
|
|
535
|
+
# *Returns*:: String
|
|
536
|
+
def journal(ids, format = "full", hash = {})
|
|
537
|
+
case format
|
|
538
|
+
when "xml"
|
|
539
|
+
format = "full"
|
|
540
|
+
mode = "xml"
|
|
541
|
+
else
|
|
542
|
+
mode = "text"
|
|
543
|
+
end
|
|
544
|
+
opts = { "db" => "journals", "rettype" => format, "retmode" => mode }
|
|
545
|
+
opts.update(hash)
|
|
546
|
+
Bio::NCBI::REST.efetch(ids, opts)
|
|
547
|
+
end
|
|
548
|
+
|
|
549
|
+
# Retrieve OMIM entries by given IDs using E-Utils (efetch).
|
|
550
|
+
#
|
|
551
|
+
# * http://eutils.ncbi.nlm.nih.gov/entrez/query/static/efetchlit_help.html
|
|
552
|
+
#
|
|
553
|
+
# == Usage
|
|
554
|
+
#
|
|
555
|
+
# Bio::NCBI::REST::EFetch.omim(143100)
|
|
556
|
+
#
|
|
557
|
+
# list = [143100, 602260]
|
|
558
|
+
# Bio::NCBI::REST::EFetch.omim(list)
|
|
559
|
+
# Bio::NCBI::REST::EFetch.omim(list, "xml")
|
|
560
|
+
#
|
|
561
|
+
# ncbi = Bio::NCBI::REST::EFetch.new
|
|
562
|
+
# ncbi.omim(list)
|
|
563
|
+
# ncbi.omim(list, "xml")
|
|
564
|
+
#
|
|
565
|
+
# ---
|
|
566
|
+
#
|
|
567
|
+
# *Arguments*:
|
|
568
|
+
# * _ids_: list of OMIM entry IDs (required)
|
|
569
|
+
# * _format_: "docsum", "synopsis", "variants", "detailed", "linkout", "xml"
|
|
570
|
+
# *Returns*:: String
|
|
571
|
+
def omim(ids, format = "detailed", hash = {})
|
|
572
|
+
case format
|
|
573
|
+
when "xml"
|
|
574
|
+
format = "full"
|
|
575
|
+
mode = "xml"
|
|
576
|
+
when "linkout"
|
|
577
|
+
format = "ExternalLink"
|
|
578
|
+
mode = "text"
|
|
579
|
+
else
|
|
580
|
+
mode = "text"
|
|
581
|
+
end
|
|
582
|
+
opts = { "db" => "omim", "rettype" => format, "retmode" => mode }
|
|
583
|
+
opts.update(hash)
|
|
584
|
+
Bio::NCBI::REST.efetch(ids, opts)
|
|
585
|
+
end
|
|
586
|
+
|
|
587
|
+
# Retrieve taxonomy entries by given IDs using E-Utils (efetch).
|
|
588
|
+
#
|
|
589
|
+
# * http://eutils.ncbi.nlm.nih.gov/entrez/query/static/efetchtax_help.html
|
|
590
|
+
#
|
|
591
|
+
# == Usage
|
|
592
|
+
#
|
|
593
|
+
# Bio::NCBI::REST::EFetch.taxonomy(42241)
|
|
594
|
+
#
|
|
595
|
+
# list = [232323, 290179, 286681]
|
|
596
|
+
# Bio::NCBI::REST::EFetch.taxonomy(list)
|
|
597
|
+
# Bio::NCBI::REST::EFetch.taxonomy(list, "xml")
|
|
598
|
+
#
|
|
599
|
+
# ncbi = Bio::NCBI::REST::EFetch.new
|
|
600
|
+
# ncbi.taxonomy(list)
|
|
601
|
+
# ncbi.taxonomy(list, "xml")
|
|
602
|
+
#
|
|
603
|
+
# ---
|
|
604
|
+
#
|
|
605
|
+
# *Arguments*:
|
|
606
|
+
# * _ids_: list of Taxonomy entry IDs (required)
|
|
607
|
+
# * _format_: "brief", "docsum", "xml"
|
|
608
|
+
# *Returns*:: String
|
|
609
|
+
def taxonomy(ids, format = "docsum", hash = {})
|
|
610
|
+
case format
|
|
611
|
+
when "xml"
|
|
612
|
+
format = "full"
|
|
613
|
+
mode = "xml"
|
|
614
|
+
else
|
|
615
|
+
mode = "text"
|
|
616
|
+
end
|
|
617
|
+
opts = { "db" => "taxonomy", "rettype" => format, "retmode" => mode }
|
|
618
|
+
Bio::NCBI::REST.efetch(ids, opts)
|
|
619
|
+
end
|
|
620
|
+
|
|
621
|
+
end # Methods
|
|
622
|
+
|
|
623
|
+
include Methods
|
|
624
|
+
extend Methods
|
|
625
|
+
|
|
626
|
+
end # EFetch
|
|
627
|
+
|
|
628
|
+
|
|
629
|
+
end # REST
|
|
630
|
+
end # NCBI
|
|
631
|
+
end # Bio
|
|
632
|
+
|
|
633
|
+
|
|
634
|
+
if __FILE__ == $0
|
|
635
|
+
|
|
636
|
+
gbopts = {"db"=>"nuccore", "rettype"=>"gb"}
|
|
637
|
+
pmopts = {"db"=>"pubmed", "rettype"=>"medline"}
|
|
638
|
+
count = {"rettype" => "count"}
|
|
639
|
+
xml = {"retmode"=>"xml"}
|
|
640
|
+
max = {"retmax"=>5}
|
|
641
|
+
|
|
642
|
+
puts "=== class methods ==="
|
|
643
|
+
|
|
644
|
+
puts "--- Search NCBI by E-Utils ---"
|
|
645
|
+
|
|
646
|
+
puts Time.now
|
|
647
|
+
puts "# count of 'tardigrada' in nuccore"
|
|
648
|
+
puts Bio::NCBI::REST.esearch("tardigrada", gbopts.merge(count))
|
|
649
|
+
|
|
650
|
+
puts Time.now
|
|
651
|
+
puts "# max 5 'tardigrada' entries in nuccore"
|
|
652
|
+
puts Bio::NCBI::REST.esearch("tardigrada", gbopts.merge(max))
|
|
653
|
+
|
|
654
|
+
puts Time.now
|
|
655
|
+
puts "# count of 'yeast kinase' in nuccore"
|
|
656
|
+
puts Bio::NCBI::REST.esearch("yeast kinase", gbopts.merge(count))
|
|
657
|
+
|
|
658
|
+
puts Time.now
|
|
659
|
+
puts "# max 5 'yeast kinase' entries in nuccore (XML)"
|
|
660
|
+
puts Bio::NCBI::REST.esearch("yeast kinase", gbopts.merge(xml).merge(max))
|
|
661
|
+
|
|
662
|
+
puts Time.now
|
|
663
|
+
puts "# count of 'genome&analysis|bioinformatics' in pubmed"
|
|
664
|
+
puts Bio::NCBI::REST.esearch("(genome AND analysis) OR bioinformatics", pmopts.merge(count))
|
|
665
|
+
|
|
666
|
+
puts Time.now
|
|
667
|
+
puts "# max 5 'genome&analysis|bioinformatics' entries in pubmed (XML)"
|
|
668
|
+
puts Bio::NCBI::REST.esearch("(genome AND analysis) OR bioinformatics", pmopts.merge(xml).merge(max))
|
|
669
|
+
|
|
670
|
+
puts Time.now
|
|
671
|
+
Bio::NCBI::REST.esearch("(genome AND analysis) OR bioinformatics", pmopts.merge(max)).each do |x|
|
|
672
|
+
puts "# each of 5 'genome&analysis|bioinformatics' entries in pubmed"
|
|
673
|
+
puts x
|
|
674
|
+
end
|
|
675
|
+
|
|
676
|
+
puts "--- Retrieve NCBI entry by E-Utils ---"
|
|
677
|
+
|
|
678
|
+
puts Time.now
|
|
679
|
+
puts "# '185041' entry in nuccore"
|
|
680
|
+
puts Bio::NCBI::REST.efetch("185041", gbopts)
|
|
681
|
+
|
|
682
|
+
puts Time.now
|
|
683
|
+
puts "# 'J00231' entry in nuccore (XML)"
|
|
684
|
+
puts Bio::NCBI::REST.efetch("J00231", gbopts.merge(xml))
|
|
685
|
+
|
|
686
|
+
puts Time.now
|
|
687
|
+
puts "# 16381885 entry in pubmed"
|
|
688
|
+
puts Bio::NCBI::REST.efetch(16381885, pmopts)
|
|
689
|
+
|
|
690
|
+
puts Time.now
|
|
691
|
+
puts "# '16381885' entry in pubmed"
|
|
692
|
+
puts Bio::NCBI::REST.efetch("16381885", pmopts)
|
|
693
|
+
|
|
694
|
+
puts Time.now
|
|
695
|
+
puts "# [10592173,14693808] entries in pubmed"
|
|
696
|
+
puts Bio::NCBI::REST.efetch([10592173, 14693808], pmopts)
|
|
697
|
+
|
|
698
|
+
puts Time.now
|
|
699
|
+
puts "# [10592173,14693808] entries in pubmed (XML)"
|
|
700
|
+
puts Bio::NCBI::REST.efetch([10592173, 14693808], pmopts.merge(xml))
|
|
701
|
+
|
|
702
|
+
|
|
703
|
+
puts "=== instance methods ==="
|
|
704
|
+
|
|
705
|
+
ncbi = Bio::NCBI::REST.new
|
|
706
|
+
|
|
707
|
+
puts "--- Search NCBI by E-Utils ---"
|
|
708
|
+
|
|
709
|
+
puts Time.now
|
|
710
|
+
puts "# count of 'genome&analysis|bioinformatics' in pubmed"
|
|
711
|
+
puts ncbi.esearch("(genome AND analysis) OR bioinformatics", pmopts.merge(count))
|
|
712
|
+
|
|
713
|
+
puts Time.now
|
|
714
|
+
puts "# max 5 'genome&analysis|bioinformatics' entries in pubmed"
|
|
715
|
+
puts ncbi.esearch("(genome AND analysis) OR bioinformatics", pmopts.merge(max))
|
|
716
|
+
|
|
717
|
+
puts Time.now
|
|
718
|
+
ncbi.esearch("(genome AND analysis) OR bioinformatics", pmopts).each do |x|
|
|
719
|
+
puts "# each 'genome&analysis|bioinformatics' entries in pubmed"
|
|
720
|
+
puts x
|
|
721
|
+
end
|
|
722
|
+
|
|
723
|
+
puts "--- Retrieve NCBI entry by E-Utils ---"
|
|
724
|
+
|
|
725
|
+
puts Time.now
|
|
726
|
+
puts "# 16381885 entry in pubmed"
|
|
727
|
+
puts ncbi.efetch(16381885, pmopts)
|
|
728
|
+
|
|
729
|
+
puts Time.now
|
|
730
|
+
puts "# [10592173,14693808] entries in pubmed"
|
|
731
|
+
puts ncbi.efetch([10592173, 14693808], pmopts)
|
|
732
|
+
|
|
733
|
+
end
|