bio 1.3.1 → 1.4.0
Sign up to get free protection for your applications and to get access to all the features.
- data/ChangeLog +2105 -3728
- data/KNOWN_ISSUES.rdoc +35 -3
- data/README.rdoc +8 -2
- data/RELEASE_NOTES.rdoc +166 -0
- data/bin/bioruby +4 -1
- data/bioruby.gemspec +146 -1
- data/bioruby.gemspec.erb +3 -1
- data/doc/ChangeLog-before-1.3.1 +3961 -0
- data/doc/Tutorial.rd +154 -22
- data/doc/Tutorial.rd.html +125 -68
- data/lib/bio.rb +21 -6
- data/lib/bio/appl/bl2seq/report.rb +11 -202
- data/lib/bio/appl/blast/format0.rb +0 -193
- data/lib/bio/appl/blast/report.rb +2 -147
- data/lib/bio/appl/blast/wublast.rb +0 -208
- data/lib/bio/appl/fasta.rb +4 -19
- data/lib/bio/appl/fasta/format10.rb +0 -14
- data/lib/bio/appl/genscan/report.rb +0 -176
- data/lib/bio/appl/hmmer.rb +1 -15
- data/lib/bio/appl/hmmer/report.rb +0 -100
- data/lib/bio/appl/meme/mast.rb +156 -0
- data/lib/bio/appl/meme/mast/report.rb +91 -0
- data/lib/bio/appl/meme/motif.rb +48 -0
- data/lib/bio/appl/psort.rb +0 -111
- data/lib/bio/appl/psort/report.rb +1 -45
- data/lib/bio/appl/pts1.rb +2 -4
- data/lib/bio/appl/sosui/report.rb +5 -54
- data/lib/bio/appl/targetp/report.rb +1 -104
- data/lib/bio/appl/tmhmm/report.rb +0 -36
- data/lib/bio/command.rb +94 -10
- data/lib/bio/data/aa.rb +1 -77
- data/lib/bio/data/codontable.rb +1 -95
- data/lib/bio/data/na.rb +1 -26
- data/lib/bio/db/aaindex.rb +1 -38
- data/lib/bio/db/fasta.rb +1 -134
- data/lib/bio/db/fasta/format_qual.rb +204 -0
- data/lib/bio/db/fasta/qual.rb +102 -0
- data/lib/bio/db/fastq.rb +645 -0
- data/lib/bio/db/fastq/fastq_to_biosequence.rb +40 -0
- data/lib/bio/db/fastq/format_fastq.rb +175 -0
- data/lib/bio/db/genbank/genbank.rb +1 -86
- data/lib/bio/db/gff.rb +0 -17
- data/lib/bio/db/go.rb +4 -72
- data/lib/bio/db/kegg/common.rb +112 -0
- data/lib/bio/db/kegg/compound.rb +29 -20
- data/lib/bio/db/kegg/drug.rb +74 -34
- data/lib/bio/db/kegg/enzyme.rb +26 -5
- data/lib/bio/db/kegg/genes.rb +128 -15
- data/lib/bio/db/kegg/genome.rb +3 -41
- data/lib/bio/db/kegg/glycan.rb +19 -24
- data/lib/bio/db/kegg/orthology.rb +16 -56
- data/lib/bio/db/kegg/reaction.rb +81 -28
- data/lib/bio/db/kegg/taxonomy.rb +1 -52
- data/lib/bio/db/litdb.rb +1 -16
- data/lib/bio/db/phyloxml/phyloxml.xsd +582 -0
- data/lib/bio/db/phyloxml/phyloxml_elements.rb +1174 -0
- data/lib/bio/db/phyloxml/phyloxml_parser.rb +954 -0
- data/lib/bio/db/phyloxml/phyloxml_writer.rb +228 -0
- data/lib/bio/db/prosite.rb +2 -95
- data/lib/bio/db/rebase.rb +5 -6
- data/lib/bio/db/sanger_chromatogram/abif.rb +120 -0
- data/lib/bio/db/sanger_chromatogram/chromatogram.rb +133 -0
- data/lib/bio/db/sanger_chromatogram/chromatogram_to_biosequence.rb +32 -0
- data/lib/bio/db/sanger_chromatogram/scf.rb +210 -0
- data/lib/bio/io/das.rb +0 -44
- data/lib/bio/io/ddbjxml.rb +1 -181
- data/lib/bio/io/flatfile.rb +1 -7
- data/lib/bio/io/flatfile/autodetection.rb +6 -0
- data/lib/bio/io/keggapi.rb +0 -442
- data/lib/bio/io/ncbirest.rb +130 -132
- data/lib/bio/io/ncbisoap.rb +2 -1
- data/lib/bio/io/pubmed.rb +0 -88
- data/lib/bio/location.rb +0 -73
- data/lib/bio/pathway.rb +0 -171
- data/lib/bio/sequence.rb +18 -1
- data/lib/bio/sequence/adapter.rb +3 -0
- data/lib/bio/sequence/format.rb +16 -0
- data/lib/bio/sequence/quality_score.rb +205 -0
- data/lib/bio/tree.rb +70 -5
- data/lib/bio/util/restriction_enzyme/single_strand.rb +3 -2
- data/lib/bio/util/sirna.rb +1 -23
- data/lib/bio/version.rb +1 -1
- data/sample/demo_aaindex.rb +67 -0
- data/sample/demo_aminoacid.rb +101 -0
- data/sample/demo_bl2seq_report.rb +220 -0
- data/sample/demo_blast_report.rb +285 -0
- data/sample/demo_codontable.rb +119 -0
- data/sample/demo_das.rb +105 -0
- data/sample/demo_ddbjxml.rb +212 -0
- data/sample/demo_fasta_remote.rb +51 -0
- data/sample/demo_fastaformat.rb +105 -0
- data/sample/demo_genbank.rb +132 -0
- data/sample/demo_genscan_report.rb +202 -0
- data/sample/demo_gff1.rb +49 -0
- data/sample/demo_go.rb +98 -0
- data/sample/demo_hmmer_report.rb +149 -0
- data/sample/demo_kegg_compound.rb +57 -0
- data/sample/demo_kegg_drug.rb +65 -0
- data/sample/demo_kegg_genome.rb +74 -0
- data/sample/demo_kegg_glycan.rb +72 -0
- data/sample/demo_kegg_orthology.rb +62 -0
- data/sample/demo_kegg_reaction.rb +66 -0
- data/sample/demo_kegg_taxonomy.rb +92 -0
- data/sample/demo_keggapi.rb +502 -0
- data/sample/demo_litdb.rb +42 -0
- data/sample/demo_locations.rb +99 -0
- data/sample/demo_ncbi_rest.rb +130 -0
- data/sample/demo_nucleicacid.rb +49 -0
- data/sample/demo_pathway.rb +196 -0
- data/sample/demo_prosite.rb +120 -0
- data/sample/demo_psort.rb +138 -0
- data/sample/demo_psort_report.rb +70 -0
- data/sample/demo_pubmed.rb +118 -0
- data/sample/demo_sirna.rb +63 -0
- data/sample/demo_sosui_report.rb +89 -0
- data/sample/demo_targetp_report.rb +135 -0
- data/sample/demo_tmhmm_report.rb +68 -0
- data/sample/pmfetch.rb +13 -4
- data/sample/pmsearch.rb +15 -4
- data/sample/test_phyloxml_big.rb +205 -0
- data/test/bioruby_test_helper.rb +61 -0
- data/test/data/KEGG/1.1.1.1.enzyme +935 -0
- data/test/data/KEGG/C00025.compound +102 -0
- data/test/data/KEGG/D00063.drug +104 -0
- data/test/data/KEGG/G00024.glycan +47 -0
- data/test/data/KEGG/G01366.glycan +18 -0
- data/test/data/KEGG/K02338.orthology +902 -0
- data/test/data/KEGG/R00006.reaction +14 -0
- data/test/data/fastq/README.txt +109 -0
- data/test/data/fastq/error_diff_ids.fastq +20 -0
- data/test/data/fastq/error_double_qual.fastq +22 -0
- data/test/data/fastq/error_double_seq.fastq +22 -0
- data/test/data/fastq/error_long_qual.fastq +20 -0
- data/test/data/fastq/error_no_qual.fastq +20 -0
- data/test/data/fastq/error_qual_del.fastq +20 -0
- data/test/data/fastq/error_qual_escape.fastq +20 -0
- data/test/data/fastq/error_qual_null.fastq +0 -0
- data/test/data/fastq/error_qual_space.fastq +21 -0
- data/test/data/fastq/error_qual_tab.fastq +21 -0
- data/test/data/fastq/error_qual_unit_sep.fastq +20 -0
- data/test/data/fastq/error_qual_vtab.fastq +20 -0
- data/test/data/fastq/error_short_qual.fastq +20 -0
- data/test/data/fastq/error_spaces.fastq +20 -0
- data/test/data/fastq/error_tabs.fastq +21 -0
- data/test/data/fastq/error_trunc_at_plus.fastq +19 -0
- data/test/data/fastq/error_trunc_at_qual.fastq +19 -0
- data/test/data/fastq/error_trunc_at_seq.fastq +18 -0
- data/test/data/fastq/error_trunc_in_plus.fastq +19 -0
- data/test/data/fastq/error_trunc_in_qual.fastq +20 -0
- data/test/data/fastq/error_trunc_in_seq.fastq +18 -0
- data/test/data/fastq/error_trunc_in_title.fastq +17 -0
- data/test/data/fastq/illumina_full_range_as_illumina.fastq +8 -0
- data/test/data/fastq/illumina_full_range_as_sanger.fastq +8 -0
- data/test/data/fastq/illumina_full_range_as_solexa.fastq +8 -0
- data/test/data/fastq/illumina_full_range_original_illumina.fastq +8 -0
- data/test/data/fastq/longreads_as_illumina.fastq +40 -0
- data/test/data/fastq/longreads_as_sanger.fastq +40 -0
- data/test/data/fastq/longreads_as_solexa.fastq +40 -0
- data/test/data/fastq/longreads_original_sanger.fastq +120 -0
- data/test/data/fastq/misc_dna_as_illumina.fastq +16 -0
- data/test/data/fastq/misc_dna_as_sanger.fastq +16 -0
- data/test/data/fastq/misc_dna_as_solexa.fastq +16 -0
- data/test/data/fastq/misc_dna_original_sanger.fastq +16 -0
- data/test/data/fastq/misc_rna_as_illumina.fastq +16 -0
- data/test/data/fastq/misc_rna_as_sanger.fastq +16 -0
- data/test/data/fastq/misc_rna_as_solexa.fastq +16 -0
- data/test/data/fastq/misc_rna_original_sanger.fastq +16 -0
- data/test/data/fastq/sanger_full_range_as_illumina.fastq +8 -0
- data/test/data/fastq/sanger_full_range_as_sanger.fastq +8 -0
- data/test/data/fastq/sanger_full_range_as_solexa.fastq +8 -0
- data/test/data/fastq/sanger_full_range_original_sanger.fastq +8 -0
- data/test/data/fastq/solexa_full_range_as_illumina.fastq +8 -0
- data/test/data/fastq/solexa_full_range_as_sanger.fastq +8 -0
- data/test/data/fastq/solexa_full_range_as_solexa.fastq +8 -0
- data/test/data/fastq/solexa_full_range_original_solexa.fastq +8 -0
- data/test/data/fastq/wrapping_as_illumina.fastq +12 -0
- data/test/data/fastq/wrapping_as_sanger.fastq +12 -0
- data/test/data/fastq/wrapping_as_solexa.fastq +12 -0
- data/test/data/fastq/wrapping_original_sanger.fastq +24 -0
- data/test/data/meme/db +0 -0
- data/test/data/meme/mast +0 -0
- data/test/data/meme/mast.out +13 -0
- data/test/data/meme/meme.out +3 -0
- data/test/data/phyloxml/apaf.xml +666 -0
- data/test/data/phyloxml/bcl_2.xml +2097 -0
- data/test/data/phyloxml/made_up.xml +144 -0
- data/test/data/phyloxml/ncbi_taxonomy_mollusca_short.xml +65 -0
- data/test/data/phyloxml/phyloxml_examples.xml +415 -0
- data/test/data/sanger_chromatogram/test_chromatogram_abif.ab1 +0 -0
- data/test/data/sanger_chromatogram/test_chromatogram_scf_v2.scf +0 -0
- data/test/data/sanger_chromatogram/test_chromatogram_scf_v3.scf +0 -0
- data/test/functional/bio/appl/test_pts1.rb +7 -5
- data/test/functional/bio/io/test_ensembl.rb +4 -3
- data/test/functional/bio/io/test_pubmed.rb +9 -3
- data/test/functional/bio/io/test_soapwsdl.rb +5 -4
- data/test/functional/bio/io/test_togows.rb +5 -4
- data/test/functional/bio/sequence/test_output_embl.rb +6 -4
- data/test/functional/bio/test_command.rb +54 -5
- data/test/runner.rb +5 -3
- data/test/unit/bio/appl/bl2seq/test_report.rb +5 -4
- data/test/unit/bio/appl/blast/test_ncbioptions.rb +4 -2
- data/test/unit/bio/appl/blast/test_report.rb +5 -4
- data/test/unit/bio/appl/blast/test_rpsblast.rb +5 -4
- data/test/unit/bio/appl/gcg/test_msf.rb +5 -5
- data/test/unit/bio/appl/genscan/test_report.rb +8 -9
- data/test/unit/bio/appl/hmmer/test_report.rb +5 -4
- data/test/unit/bio/appl/iprscan/test_report.rb +6 -5
- data/test/unit/bio/appl/mafft/test_report.rb +6 -5
- data/test/unit/bio/appl/meme/mast/test_report.rb +46 -0
- data/test/unit/bio/appl/meme/test_mast.rb +103 -0
- data/test/unit/bio/appl/meme/test_motif.rb +38 -0
- data/test/unit/bio/appl/paml/codeml/test_rates.rb +5 -4
- data/test/unit/bio/appl/paml/codeml/test_report.rb +5 -4
- data/test/unit/bio/appl/paml/test_codeml.rb +5 -4
- data/test/unit/bio/appl/sim4/test_report.rb +5 -4
- data/test/unit/bio/appl/sosui/test_report.rb +6 -5
- data/test/unit/bio/appl/targetp/test_report.rb +5 -3
- data/test/unit/bio/appl/test_blast.rb +5 -4
- data/test/unit/bio/appl/test_fasta.rb +4 -2
- data/test/unit/bio/appl/test_pts1.rb +4 -2
- data/test/unit/bio/appl/tmhmm/test_report.rb +6 -5
- data/test/unit/bio/data/test_aa.rb +5 -3
- data/test/unit/bio/data/test_codontable.rb +5 -4
- data/test/unit/bio/data/test_na.rb +5 -3
- data/test/unit/bio/db/biosql/tc_biosql.rb +5 -1
- data/test/unit/bio/db/embl/test_common.rb +4 -2
- data/test/unit/bio/db/embl/test_embl.rb +6 -6
- data/test/unit/bio/db/embl/test_embl_rel89.rb +6 -6
- data/test/unit/bio/db/embl/test_embl_to_bioseq.rb +7 -8
- data/test/unit/bio/db/embl/test_sptr.rb +6 -8
- data/test/unit/bio/db/embl/test_uniprot.rb +6 -5
- data/test/unit/bio/db/fasta/test_format_qual.rb +346 -0
- data/test/unit/bio/db/kegg/test_compound.rb +146 -0
- data/test/unit/bio/db/kegg/test_drug.rb +194 -0
- data/test/unit/bio/db/kegg/test_enzyme.rb +241 -0
- data/test/unit/bio/db/kegg/test_genes.rb +32 -4
- data/test/unit/bio/db/kegg/test_glycan.rb +260 -0
- data/test/unit/bio/db/kegg/test_orthology.rb +50 -0
- data/test/unit/bio/db/kegg/test_reaction.rb +96 -0
- data/test/unit/bio/db/pdb/test_pdb.rb +4 -2
- data/test/unit/bio/db/sanger_chromatogram/test_abif.rb +76 -0
- data/test/unit/bio/db/sanger_chromatogram/test_scf.rb +98 -0
- data/test/unit/bio/db/test_aaindex.rb +6 -6
- data/test/unit/bio/db/test_fasta.rb +5 -46
- data/test/unit/bio/db/test_fastq.rb +829 -0
- data/test/unit/bio/db/test_gff.rb +4 -2
- data/test/unit/bio/db/test_lasergene.rb +7 -5
- data/test/unit/bio/db/test_medline.rb +4 -2
- data/test/unit/bio/db/test_newick.rb +6 -6
- data/test/unit/bio/db/test_nexus.rb +4 -2
- data/test/unit/bio/db/test_phyloxml.rb +769 -0
- data/test/unit/bio/db/test_phyloxml_writer.rb +328 -0
- data/test/unit/bio/db/test_prosite.rb +6 -5
- data/test/unit/bio/db/test_qual.rb +63 -0
- data/test/unit/bio/db/test_rebase.rb +5 -3
- data/test/unit/bio/db/test_soft.rb +7 -6
- data/test/unit/bio/io/flatfile/test_autodetection.rb +6 -7
- data/test/unit/bio/io/flatfile/test_buffer.rb +6 -5
- data/test/unit/bio/io/flatfile/test_splitter.rb +4 -4
- data/test/unit/bio/io/test_ddbjxml.rb +4 -3
- data/test/unit/bio/io/test_ensembl.rb +5 -3
- data/test/unit/bio/io/test_fastacmd.rb +4 -3
- data/test/unit/bio/io/test_flatfile.rb +6 -5
- data/test/unit/bio/io/test_soapwsdl.rb +4 -3
- data/test/unit/bio/io/test_togows.rb +4 -2
- data/test/unit/bio/sequence/test_aa.rb +5 -3
- data/test/unit/bio/sequence/test_common.rb +4 -2
- data/test/unit/bio/sequence/test_compat.rb +4 -2
- data/test/unit/bio/sequence/test_dblink.rb +5 -3
- data/test/unit/bio/sequence/test_na.rb +4 -2
- data/test/unit/bio/sequence/test_quality_score.rb +330 -0
- data/test/unit/bio/shell/plugin/test_seq.rb +5 -3
- data/test/unit/bio/test_alignment.rb +5 -3
- data/test/unit/bio/test_command.rb +4 -3
- data/test/unit/bio/test_db.rb +5 -3
- data/test/unit/bio/test_feature.rb +4 -2
- data/test/unit/bio/test_location.rb +4 -2
- data/test/unit/bio/test_map.rb +5 -3
- data/test/unit/bio/test_pathway.rb +4 -2
- data/test/unit/bio/test_reference.rb +4 -2
- data/test/unit/bio/test_sequence.rb +5 -3
- data/test/unit/bio/test_shell.rb +5 -3
- data/test/unit/bio/test_tree.rb +6 -6
- data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +17 -13
- data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +17 -13
- data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +4 -2
- data/test/unit/bio/util/test_color_scheme.rb +5 -3
- data/test/unit/bio/util/test_contingency_table.rb +5 -3
- data/test/unit/bio/util/test_restriction_enzyme.rb +4 -2
- data/test/unit/bio/util/test_sirna.rb +6 -4
- metadata +147 -2
data/lib/bio/io/ncbirest.rb
CHANGED
@@ -7,10 +7,69 @@
|
|
7
7
|
# $Id:$
|
8
8
|
#
|
9
9
|
|
10
|
+
require 'thread'
|
10
11
|
require 'bio/command'
|
12
|
+
require 'bio/version'
|
11
13
|
|
12
14
|
module Bio
|
13
15
|
|
16
|
+
class NCBI
|
17
|
+
|
18
|
+
autoload :SOAP, 'bio/io/ncbisoap'
|
19
|
+
|
20
|
+
# (Hash) Default parameters for Entrez (eUtils).
|
21
|
+
# They may also be used for other NCBI services.
|
22
|
+
ENTREZ_DEFAULT_PARAMETERS = {
|
23
|
+
'tool' => "#{$0} (bioruby/#{Bio::BIORUBY_VERSION_ID})",
|
24
|
+
'email' => nil,
|
25
|
+
}
|
26
|
+
|
27
|
+
# Resets Entrez (eUtils) default parameters.
|
28
|
+
# ---
|
29
|
+
# *Returns*:: (Hash) default parameters
|
30
|
+
def self.reset_entrez_default_parameters
|
31
|
+
h = {
|
32
|
+
'tool' => "#{$0} (bioruby/#{Bio::BIORUBY_VERSION_ID})",
|
33
|
+
'email' => nil,
|
34
|
+
}
|
35
|
+
ENTREZ_DEFAULT_PARAMETERS.clear
|
36
|
+
ENTREZ_DEFAULT_PARAMETERS.update(h)
|
37
|
+
end
|
38
|
+
|
39
|
+
# Gets default email address for Entrez (eUtils).
|
40
|
+
# ---
|
41
|
+
# *Returns*:: String or nil
|
42
|
+
def self.default_email
|
43
|
+
ENTREZ_DEFAULT_PARAMETERS['email']
|
44
|
+
end
|
45
|
+
|
46
|
+
# Sets default email address used for Entrez (eUtils).
|
47
|
+
# It may also be used for other NCBI services.
|
48
|
+
# ---
|
49
|
+
# *Arguments*:
|
50
|
+
# * (required) _str_: (String) email address
|
51
|
+
# *Returns*:: same as given argument
|
52
|
+
def self.default_email=(str)
|
53
|
+
ENTREZ_DEFAULT_PARAMETERS['email'] = str
|
54
|
+
end
|
55
|
+
|
56
|
+
# Gets default tool name for Entrez (eUtils).
|
57
|
+
# ---
|
58
|
+
# *Returns*:: String or nil
|
59
|
+
def self.default_tool
|
60
|
+
ENTREZ_DEFAULT_PARAMETERS['tool']
|
61
|
+
end
|
62
|
+
|
63
|
+
# Sets default tool name for Entrez (eUtils).
|
64
|
+
# It may also be used for other NCBI services.
|
65
|
+
# ---
|
66
|
+
# *Arguments*:
|
67
|
+
# * (required) _str_: (String) tool name
|
68
|
+
# *Returns*:: same as given argument
|
69
|
+
def self.default_tool=(str)
|
70
|
+
ENTREZ_DEFAULT_PARAMETERS['tool'] = str
|
71
|
+
end
|
72
|
+
|
14
73
|
# == Description
|
15
74
|
#
|
16
75
|
# The Bio::NCBI::REST class provides REST client for the NCBI E-Utilities
|
@@ -19,29 +78,81 @@ module Bio
|
|
19
78
|
#
|
20
79
|
# * http://www.ncbi.nlm.nih.gov/entrez/utils/utils_index.html
|
21
80
|
#
|
22
|
-
class NCBI
|
23
81
|
class REST
|
24
82
|
|
25
83
|
# Run retrieval scripts on weekends or between 9 pm and 5 am Eastern Time
|
26
84
|
# weekdays for any series of more than 100 requests.
|
27
85
|
# -> Not implemented yet in BioRuby
|
28
|
-
|
29
|
-
#
|
30
|
-
#
|
31
|
-
|
32
|
-
NCBI_INTERVAL = 1
|
86
|
+
#
|
87
|
+
# Wait for 1/3 seconds.
|
88
|
+
# NCBI's restriction is: "Make no more than 3 requests every 1 second.".
|
89
|
+
NCBI_INTERVAL = 1.0 / 3.0
|
33
90
|
@@last_access = nil
|
91
|
+
@@last_access_mutex = nil
|
34
92
|
|
35
93
|
private
|
36
94
|
|
95
|
+
# (Private) Sleeps until allowed to access.
|
96
|
+
# ---
|
97
|
+
# *Arguments*:
|
98
|
+
# * (required) _wait_: wait unit time
|
99
|
+
# *Returns*:: (undefined)
|
37
100
|
def ncbi_access_wait(wait = NCBI_INTERVAL)
|
38
|
-
|
39
|
-
|
40
|
-
if
|
41
|
-
|
101
|
+
@@last_access_mutex ||= Mutex.new
|
102
|
+
@@last_access_mutex.synchronize {
|
103
|
+
if @@last_access
|
104
|
+
duration = Time.now - @@last_access
|
105
|
+
if wait > duration
|
106
|
+
sleep wait - duration
|
107
|
+
end
|
42
108
|
end
|
109
|
+
@@last_access = Time.now
|
110
|
+
}
|
111
|
+
nil
|
112
|
+
end
|
113
|
+
|
114
|
+
# (Private) default parameters
|
115
|
+
# ---
|
116
|
+
# *Returns*:: Hash
|
117
|
+
def default_parameters
|
118
|
+
Bio::NCBI::ENTREZ_DEFAULT_PARAMETERS
|
119
|
+
end
|
120
|
+
|
121
|
+
# (Private) Sends query to NCBI.
|
122
|
+
# ---
|
123
|
+
# *Arguments*:
|
124
|
+
# * (required) _serv_: (String) server URI string
|
125
|
+
# * (required) _opts_: (Hash) parameters
|
126
|
+
# *Returns*:: nil
|
127
|
+
def ncbi_post_form(serv, opts)
|
128
|
+
ncbi_check_parameters(opts)
|
129
|
+
ncbi_access_wait
|
130
|
+
response = Bio::Command.post_form(serv, opts)
|
131
|
+
response
|
132
|
+
end
|
133
|
+
|
134
|
+
# (Private) Checks parameters as NCBI requires.
|
135
|
+
# If no email or tool parameter, raises an error.
|
136
|
+
#
|
137
|
+
# NCBI announces that "Effective on
|
138
|
+
# June 1, 2010, all E-utility requests, either using standard URLs or
|
139
|
+
# SOAP, must contain non-null values for both the &tool and &email
|
140
|
+
# parameters. Any E-utility request made after June 1, 2010 that does
|
141
|
+
# not contain values for both parameters will return an error explaining
|
142
|
+
# that these parameters must be included in E-utility requests."
|
143
|
+
# ---
|
144
|
+
# *Arguments*:
|
145
|
+
# * (required) _opts_: Hash containing parameters
|
146
|
+
# *Returns*:: (undefined)
|
147
|
+
def ncbi_check_parameters(opts)
|
148
|
+
#return if Time.now < Time.gm(2010,5,31)
|
149
|
+
if opts['email'].to_s.empty? then
|
150
|
+
raise 'Set email parameter for the query, or set Bio::NCBI.default_email = "(your email address)"'
|
151
|
+
end
|
152
|
+
if opts['tool'].to_s.empty? then
|
153
|
+
raise 'Set tool parameter for the query, or set Bio::NCBI.default_tool = "(your tool name)"'
|
43
154
|
end
|
44
|
-
|
155
|
+
nil
|
45
156
|
end
|
46
157
|
|
47
158
|
public
|
@@ -67,8 +178,8 @@ class REST
|
|
67
178
|
# *Returns*:: array of string (database names)
|
68
179
|
def einfo
|
69
180
|
serv = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/einfo.fcgi"
|
70
|
-
opts = {}
|
71
|
-
response =
|
181
|
+
opts = default_parameters.merge({})
|
182
|
+
response = ncbi_post_form(serv, opts)
|
72
183
|
result = response.body
|
73
184
|
list = result.scan(/<DbName>(.*?)<\/DbName>/m).flatten
|
74
185
|
return list
|
@@ -134,10 +245,7 @@ class REST
|
|
134
245
|
# *Returns*:: array of entry IDs or a number of results
|
135
246
|
def esearch(str, hash = {}, limit = nil, step = 10000)
|
136
247
|
serv = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
|
137
|
-
opts = {
|
138
|
-
"tool" => "bioruby",
|
139
|
-
"term" => str,
|
140
|
-
}
|
248
|
+
opts = default_parameters.merge({ "term" => str })
|
141
249
|
opts.update(hash)
|
142
250
|
|
143
251
|
case opts["rettype"]
|
@@ -156,8 +264,7 @@ class REST
|
|
156
264
|
0.step(limit, step) do |i|
|
157
265
|
retmax = [step, limit - i].min
|
158
266
|
opts.update("retmax" => retmax, "retstart" => i + retstart)
|
159
|
-
|
160
|
-
response = Bio::Command.post_form(serv, opts)
|
267
|
+
response = ncbi_post_form(serv, opts)
|
161
268
|
result = response.body
|
162
269
|
list += result.scan(/<Id>(.*?)<\/Id>/m).flatten
|
163
270
|
end
|
@@ -169,14 +276,10 @@ class REST
|
|
169
276
|
# *Returns*:: array of entry IDs or a number of results
|
170
277
|
def esearch_count(str, hash = {})
|
171
278
|
serv = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
|
172
|
-
opts = {
|
173
|
-
"tool" => "bioruby",
|
174
|
-
"term" => str,
|
175
|
-
}
|
279
|
+
opts = default_parameters.merge({ "term" => str })
|
176
280
|
opts.update(hash)
|
177
281
|
opts.update("rettype" => "count")
|
178
|
-
|
179
|
-
response = Bio::Command.post_form(serv, opts)
|
282
|
+
response = ncbi_post_form(serv, opts)
|
180
283
|
result = response.body
|
181
284
|
count = result.scan(/<Count>(.*?)<\/Count>/m).flatten.first.to_i
|
182
285
|
return count
|
@@ -211,10 +314,7 @@ class REST
|
|
211
314
|
# *Returns*:: String
|
212
315
|
def efetch(ids, hash = {}, step = 100)
|
213
316
|
serv = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
|
214
|
-
opts = {
|
215
|
-
"tool" => "bioruby",
|
216
|
-
"retmode" => "text",
|
217
|
-
}
|
317
|
+
opts = default_parameters.merge({ "retmode" => "text" })
|
218
318
|
opts.update(hash)
|
219
319
|
|
220
320
|
case ids
|
@@ -228,8 +328,7 @@ class REST
|
|
228
328
|
0.step(list.size, step) do |i|
|
229
329
|
opts["id"] = list[i, step].join(',')
|
230
330
|
unless opts["id"].empty?
|
231
|
-
|
232
|
-
response = Bio::Command.post_form(serv, opts)
|
331
|
+
response = ncbi_post_form(serv, opts)
|
233
332
|
result += response.body
|
234
333
|
end
|
235
334
|
end
|
@@ -637,104 +736,3 @@ end # REST
|
|
637
736
|
end # NCBI
|
638
737
|
end # Bio
|
639
738
|
|
640
|
-
|
641
|
-
if __FILE__ == $0
|
642
|
-
|
643
|
-
gbopts = {"db"=>"nuccore", "rettype"=>"gb"}
|
644
|
-
pmopts = {"db"=>"pubmed", "rettype"=>"medline"}
|
645
|
-
count = {"rettype" => "count"}
|
646
|
-
xml = {"retmode"=>"xml"}
|
647
|
-
max = {"retmax"=>5}
|
648
|
-
|
649
|
-
puts "=== class methods ==="
|
650
|
-
|
651
|
-
puts "--- Search NCBI by E-Utils ---"
|
652
|
-
|
653
|
-
puts Time.now
|
654
|
-
puts "# count of 'tardigrada' in nuccore"
|
655
|
-
puts Bio::NCBI::REST.esearch("tardigrada", gbopts.merge(count))
|
656
|
-
|
657
|
-
puts Time.now
|
658
|
-
puts "# max 5 'tardigrada' entries in nuccore"
|
659
|
-
puts Bio::NCBI::REST.esearch("tardigrada", gbopts.merge(max))
|
660
|
-
|
661
|
-
puts Time.now
|
662
|
-
puts "# count of 'yeast kinase' in nuccore"
|
663
|
-
puts Bio::NCBI::REST.esearch("yeast kinase", gbopts.merge(count))
|
664
|
-
|
665
|
-
puts Time.now
|
666
|
-
puts "# max 5 'yeast kinase' entries in nuccore (XML)"
|
667
|
-
puts Bio::NCBI::REST.esearch("yeast kinase", gbopts.merge(xml).merge(max))
|
668
|
-
|
669
|
-
puts Time.now
|
670
|
-
puts "# count of 'genome&analysis|bioinformatics' in pubmed"
|
671
|
-
puts Bio::NCBI::REST.esearch("(genome AND analysis) OR bioinformatics", pmopts.merge(count))
|
672
|
-
|
673
|
-
puts Time.now
|
674
|
-
puts "# max 5 'genome&analysis|bioinformatics' entries in pubmed (XML)"
|
675
|
-
puts Bio::NCBI::REST.esearch("(genome AND analysis) OR bioinformatics", pmopts.merge(xml).merge(max))
|
676
|
-
|
677
|
-
puts Time.now
|
678
|
-
Bio::NCBI::REST.esearch("(genome AND analysis) OR bioinformatics", pmopts.merge(max)).each do |x|
|
679
|
-
puts "# each of 5 'genome&analysis|bioinformatics' entries in pubmed"
|
680
|
-
puts x
|
681
|
-
end
|
682
|
-
|
683
|
-
puts "--- Retrieve NCBI entry by E-Utils ---"
|
684
|
-
|
685
|
-
puts Time.now
|
686
|
-
puts "# '185041' entry in nuccore"
|
687
|
-
puts Bio::NCBI::REST.efetch("185041", gbopts)
|
688
|
-
|
689
|
-
puts Time.now
|
690
|
-
puts "# 'J00231' entry in nuccore (XML)"
|
691
|
-
puts Bio::NCBI::REST.efetch("J00231", gbopts.merge(xml))
|
692
|
-
|
693
|
-
puts Time.now
|
694
|
-
puts "# 16381885 entry in pubmed"
|
695
|
-
puts Bio::NCBI::REST.efetch(16381885, pmopts)
|
696
|
-
|
697
|
-
puts Time.now
|
698
|
-
puts "# '16381885' entry in pubmed"
|
699
|
-
puts Bio::NCBI::REST.efetch("16381885", pmopts)
|
700
|
-
|
701
|
-
puts Time.now
|
702
|
-
puts "# [10592173,14693808] entries in pubmed"
|
703
|
-
puts Bio::NCBI::REST.efetch([10592173, 14693808], pmopts)
|
704
|
-
|
705
|
-
puts Time.now
|
706
|
-
puts "# [10592173,14693808] entries in pubmed (XML)"
|
707
|
-
puts Bio::NCBI::REST.efetch([10592173, 14693808], pmopts.merge(xml))
|
708
|
-
|
709
|
-
|
710
|
-
puts "=== instance methods ==="
|
711
|
-
|
712
|
-
ncbi = Bio::NCBI::REST.new
|
713
|
-
|
714
|
-
puts "--- Search NCBI by E-Utils ---"
|
715
|
-
|
716
|
-
puts Time.now
|
717
|
-
puts "# count of 'genome&analysis|bioinformatics' in pubmed"
|
718
|
-
puts ncbi.esearch("(genome AND analysis) OR bioinformatics", pmopts.merge(count))
|
719
|
-
|
720
|
-
puts Time.now
|
721
|
-
puts "# max 5 'genome&analysis|bioinformatics' entries in pubmed"
|
722
|
-
puts ncbi.esearch("(genome AND analysis) OR bioinformatics", pmopts.merge(max))
|
723
|
-
|
724
|
-
puts Time.now
|
725
|
-
ncbi.esearch("(genome AND analysis) OR bioinformatics", pmopts).each do |x|
|
726
|
-
puts "# each 'genome&analysis|bioinformatics' entries in pubmed"
|
727
|
-
puts x
|
728
|
-
end
|
729
|
-
|
730
|
-
puts "--- Retrieve NCBI entry by E-Utils ---"
|
731
|
-
|
732
|
-
puts Time.now
|
733
|
-
puts "# 16381885 entry in pubmed"
|
734
|
-
puts ncbi.efetch(16381885, pmopts)
|
735
|
-
|
736
|
-
puts Time.now
|
737
|
-
puts "# [10592173,14693808] entries in pubmed"
|
738
|
-
puts ncbi.efetch([10592173, 14693808], pmopts)
|
739
|
-
|
740
|
-
end
|
data/lib/bio/io/ncbisoap.rb
CHANGED
data/lib/bio/io/pubmed.rb
CHANGED
@@ -221,91 +221,3 @@ end # PubMed
|
|
221
221
|
|
222
222
|
end # Bio
|
223
223
|
|
224
|
-
|
225
|
-
if __FILE__ == $0
|
226
|
-
|
227
|
-
puts "=== instance methods ==="
|
228
|
-
|
229
|
-
pubmed = Bio::PubMed.new
|
230
|
-
|
231
|
-
puts "--- Search PubMed by E-Utils ---"
|
232
|
-
opts = {"rettype" => "count"}
|
233
|
-
puts Time.now
|
234
|
-
puts pubmed.esearch("(genome AND analysis) OR bioinformatics", opts)
|
235
|
-
puts Time.now
|
236
|
-
puts pubmed.esearch("(genome AND analysis) OR bioinformatics", opts)
|
237
|
-
puts Time.now
|
238
|
-
puts pubmed.esearch("(genome AND analysis) OR bioinformatics", opts)
|
239
|
-
puts Time.now
|
240
|
-
pubmed.esearch("(genome AND analysis) OR bioinformatics").each do |x|
|
241
|
-
puts x
|
242
|
-
end
|
243
|
-
|
244
|
-
puts "--- Retrieve PubMed entry by E-Utils ---"
|
245
|
-
puts Time.now
|
246
|
-
puts pubmed.efetch(16381885)
|
247
|
-
puts Time.now
|
248
|
-
puts pubmed.efetch("16381885")
|
249
|
-
puts Time.now
|
250
|
-
puts pubmed.efetch("16381885")
|
251
|
-
puts Time.now
|
252
|
-
opts = {"retmode" => "xml"}
|
253
|
-
puts pubmed.efetch([10592173, 14693808], opts)
|
254
|
-
puts Time.now
|
255
|
-
puts pubmed.efetch(["10592173", "14693808"], opts)
|
256
|
-
|
257
|
-
puts "--- Search PubMed by Entrez CGI ---"
|
258
|
-
pubmed.search("(genome AND analysis) OR bioinformatics").each do |x|
|
259
|
-
p x
|
260
|
-
end
|
261
|
-
|
262
|
-
puts "--- Retrieve PubMed entry by Entrez CGI ---"
|
263
|
-
puts pubmed.query("16381885")
|
264
|
-
|
265
|
-
|
266
|
-
puts "--- Retrieve PubMed entry by PMfetch ---"
|
267
|
-
puts pubmed.pmfetch("16381885")
|
268
|
-
|
269
|
-
|
270
|
-
puts "=== class methods ==="
|
271
|
-
|
272
|
-
|
273
|
-
puts "--- Search PubMed by E-Utils ---"
|
274
|
-
opts = {"rettype" => "count"}
|
275
|
-
puts Time.now
|
276
|
-
puts Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics", opts)
|
277
|
-
puts Time.now
|
278
|
-
puts Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics", opts)
|
279
|
-
puts Time.now
|
280
|
-
puts Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics", opts)
|
281
|
-
puts Time.now
|
282
|
-
Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics").each do |x|
|
283
|
-
puts x
|
284
|
-
end
|
285
|
-
|
286
|
-
puts "--- Retrieve PubMed entry by E-Utils ---"
|
287
|
-
puts Time.now
|
288
|
-
puts Bio::PubMed.efetch(16381885)
|
289
|
-
puts Time.now
|
290
|
-
puts Bio::PubMed.efetch("16381885")
|
291
|
-
puts Time.now
|
292
|
-
puts Bio::PubMed.efetch("16381885")
|
293
|
-
puts Time.now
|
294
|
-
opts = {"retmode" => "xml"}
|
295
|
-
puts Bio::PubMed.efetch([10592173, 14693808], opts)
|
296
|
-
puts Time.now
|
297
|
-
puts Bio::PubMed.efetch(["10592173", "14693808"], opts)
|
298
|
-
|
299
|
-
puts "--- Search PubMed by Entrez CGI ---"
|
300
|
-
Bio::PubMed.search("(genome AND analysis) OR bioinformatics").each do |x|
|
301
|
-
p x
|
302
|
-
end
|
303
|
-
|
304
|
-
puts "--- Retrieve PubMed entry by Entrez CGI ---"
|
305
|
-
puts Bio::PubMed.query("16381885")
|
306
|
-
|
307
|
-
|
308
|
-
puts "--- Retrieve PubMed entry by PMfetch ---"
|
309
|
-
puts Bio::PubMed.pmfetch("16381885")
|
310
|
-
|
311
|
-
end
|
data/lib/bio/location.rb
CHANGED
@@ -792,76 +792,3 @@ end # Bio
|
|
792
792
|
# * [RATDYIIAAB] replace(1306..1443,"aagaacatccacggagtcagaactgggctcttcacgccggatttggcgttcgaggccattgtgaaaaagcaggcaatgcaccagcaagctcagttcctacccctgcgtggacctggttatccaggagctaatcagtacagttaggtggtcaagctgaaagagccctgtctgaaa")
|
793
793
|
#
|
794
794
|
|
795
|
-
if __FILE__ == $0
|
796
|
-
puts "Test new & span methods"
|
797
|
-
[
|
798
|
-
'450',
|
799
|
-
'500..600',
|
800
|
-
'join(500..550, 600..625)',
|
801
|
-
'complement(join(500..550, 600..625))',
|
802
|
-
'join(complement(500..550), 600..625)',
|
803
|
-
'754^755',
|
804
|
-
'complement(53^54)',
|
805
|
-
'replace(4792^4793,"a")',
|
806
|
-
'replace(1905^1906,"acaaagacaccgccctacgcc")',
|
807
|
-
'157..(800.806)',
|
808
|
-
'(67.68)..(699.703)',
|
809
|
-
'(45934.45974)..46135',
|
810
|
-
'<180..(731.761)',
|
811
|
-
'(88.89)..>1122',
|
812
|
-
'complement((1700.1708)..(1715.1721))',
|
813
|
-
'complement(<22..(255.275))',
|
814
|
-
'complement((64.74)..1525)',
|
815
|
-
'join((8298.8300)..10206,1..855)',
|
816
|
-
'replace((651.655)..(651.655),"")',
|
817
|
-
'one-of(898,900)..983',
|
818
|
-
'one-of(5971..6308,5971..6309)',
|
819
|
-
'8050..one-of(10731,10758,10905,11242)',
|
820
|
-
'one-of(623,627,632)..one-of(628,633,637)',
|
821
|
-
'one-of(845,953,963,1078,1104)..1354',
|
822
|
-
'join(2035..2050,complement(1775..1818),13..345,414..992,1232..1253,1024..1157)',
|
823
|
-
'join(complement(1..61),complement(AP000007.1:252907..253505))',
|
824
|
-
'complement(join(71606..71829,75327..75446,76039..76203))',
|
825
|
-
'order(3..26,complement(964..987))',
|
826
|
-
'order(L44135.1:(454.445)..>538,<1..181)',
|
827
|
-
'<200001..<318389',
|
828
|
-
].each do |pos|
|
829
|
-
p pos
|
830
|
-
# p Bio::Locations.new(pos)
|
831
|
-
# p Bio::Locations.new(pos).span
|
832
|
-
# p Bio::Locations.new(pos).range
|
833
|
-
Bio::Locations.new(pos).each do |location|
|
834
|
-
puts "class=" + location.class.to_s
|
835
|
-
puts "start=" + location.from.to_s + "\tend=" + location.to.to_s + "\tstrand=" + location.strand.to_s
|
836
|
-
end
|
837
|
-
|
838
|
-
end
|
839
|
-
|
840
|
-
puts "Test rel2abs/abs2rel method"
|
841
|
-
[
|
842
|
-
'6..15',
|
843
|
-
'join(6..10,16..30)',
|
844
|
-
'complement(join(6..10,16..30))',
|
845
|
-
'join(complement(6..10),complement(16..30))',
|
846
|
-
'join(6..10,complement(16..30))',
|
847
|
-
].each do |pos|
|
848
|
-
loc = Bio::Locations.new(pos)
|
849
|
-
p pos
|
850
|
-
# p loc
|
851
|
-
(1..21).each do |x|
|
852
|
-
print "absolute(#{x}) #=> ", y = loc.absolute(x), "\n"
|
853
|
-
print "relative(#{y}) #=> ", y ? loc.relative(y) : y, "\n"
|
854
|
-
print "absolute(#{x}, :aa) #=> ", y = loc.absolute(x, :aa), "\n"
|
855
|
-
print "relative(#{y}, :aa) #=> ", y ? loc.relative(y, :aa) : y, "\n"
|
856
|
-
end
|
857
|
-
end
|
858
|
-
|
859
|
-
pos = 'join(complement(6..10),complement(16..30))'
|
860
|
-
loc = Bio::Locations.new(pos)
|
861
|
-
print "pos : "; p pos
|
862
|
-
print "`- loc[1] : "; p loc[1]
|
863
|
-
print " `- range : "; p loc[1].range
|
864
|
-
|
865
|
-
puts Bio::Location.new('5').<=>(Bio::Location.new('3'))
|
866
|
-
end
|
867
|
-
|