bio 1.3.1 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ChangeLog +2105 -3728
- data/KNOWN_ISSUES.rdoc +35 -3
- data/README.rdoc +8 -2
- data/RELEASE_NOTES.rdoc +166 -0
- data/bin/bioruby +4 -1
- data/bioruby.gemspec +146 -1
- data/bioruby.gemspec.erb +3 -1
- data/doc/ChangeLog-before-1.3.1 +3961 -0
- data/doc/Tutorial.rd +154 -22
- data/doc/Tutorial.rd.html +125 -68
- data/lib/bio.rb +21 -6
- data/lib/bio/appl/bl2seq/report.rb +11 -202
- data/lib/bio/appl/blast/format0.rb +0 -193
- data/lib/bio/appl/blast/report.rb +2 -147
- data/lib/bio/appl/blast/wublast.rb +0 -208
- data/lib/bio/appl/fasta.rb +4 -19
- data/lib/bio/appl/fasta/format10.rb +0 -14
- data/lib/bio/appl/genscan/report.rb +0 -176
- data/lib/bio/appl/hmmer.rb +1 -15
- data/lib/bio/appl/hmmer/report.rb +0 -100
- data/lib/bio/appl/meme/mast.rb +156 -0
- data/lib/bio/appl/meme/mast/report.rb +91 -0
- data/lib/bio/appl/meme/motif.rb +48 -0
- data/lib/bio/appl/psort.rb +0 -111
- data/lib/bio/appl/psort/report.rb +1 -45
- data/lib/bio/appl/pts1.rb +2 -4
- data/lib/bio/appl/sosui/report.rb +5 -54
- data/lib/bio/appl/targetp/report.rb +1 -104
- data/lib/bio/appl/tmhmm/report.rb +0 -36
- data/lib/bio/command.rb +94 -10
- data/lib/bio/data/aa.rb +1 -77
- data/lib/bio/data/codontable.rb +1 -95
- data/lib/bio/data/na.rb +1 -26
- data/lib/bio/db/aaindex.rb +1 -38
- data/lib/bio/db/fasta.rb +1 -134
- data/lib/bio/db/fasta/format_qual.rb +204 -0
- data/lib/bio/db/fasta/qual.rb +102 -0
- data/lib/bio/db/fastq.rb +645 -0
- data/lib/bio/db/fastq/fastq_to_biosequence.rb +40 -0
- data/lib/bio/db/fastq/format_fastq.rb +175 -0
- data/lib/bio/db/genbank/genbank.rb +1 -86
- data/lib/bio/db/gff.rb +0 -17
- data/lib/bio/db/go.rb +4 -72
- data/lib/bio/db/kegg/common.rb +112 -0
- data/lib/bio/db/kegg/compound.rb +29 -20
- data/lib/bio/db/kegg/drug.rb +74 -34
- data/lib/bio/db/kegg/enzyme.rb +26 -5
- data/lib/bio/db/kegg/genes.rb +128 -15
- data/lib/bio/db/kegg/genome.rb +3 -41
- data/lib/bio/db/kegg/glycan.rb +19 -24
- data/lib/bio/db/kegg/orthology.rb +16 -56
- data/lib/bio/db/kegg/reaction.rb +81 -28
- data/lib/bio/db/kegg/taxonomy.rb +1 -52
- data/lib/bio/db/litdb.rb +1 -16
- data/lib/bio/db/phyloxml/phyloxml.xsd +582 -0
- data/lib/bio/db/phyloxml/phyloxml_elements.rb +1174 -0
- data/lib/bio/db/phyloxml/phyloxml_parser.rb +954 -0
- data/lib/bio/db/phyloxml/phyloxml_writer.rb +228 -0
- data/lib/bio/db/prosite.rb +2 -95
- data/lib/bio/db/rebase.rb +5 -6
- data/lib/bio/db/sanger_chromatogram/abif.rb +120 -0
- data/lib/bio/db/sanger_chromatogram/chromatogram.rb +133 -0
- data/lib/bio/db/sanger_chromatogram/chromatogram_to_biosequence.rb +32 -0
- data/lib/bio/db/sanger_chromatogram/scf.rb +210 -0
- data/lib/bio/io/das.rb +0 -44
- data/lib/bio/io/ddbjxml.rb +1 -181
- data/lib/bio/io/flatfile.rb +1 -7
- data/lib/bio/io/flatfile/autodetection.rb +6 -0
- data/lib/bio/io/keggapi.rb +0 -442
- data/lib/bio/io/ncbirest.rb +130 -132
- data/lib/bio/io/ncbisoap.rb +2 -1
- data/lib/bio/io/pubmed.rb +0 -88
- data/lib/bio/location.rb +0 -73
- data/lib/bio/pathway.rb +0 -171
- data/lib/bio/sequence.rb +18 -1
- data/lib/bio/sequence/adapter.rb +3 -0
- data/lib/bio/sequence/format.rb +16 -0
- data/lib/bio/sequence/quality_score.rb +205 -0
- data/lib/bio/tree.rb +70 -5
- data/lib/bio/util/restriction_enzyme/single_strand.rb +3 -2
- data/lib/bio/util/sirna.rb +1 -23
- data/lib/bio/version.rb +1 -1
- data/sample/demo_aaindex.rb +67 -0
- data/sample/demo_aminoacid.rb +101 -0
- data/sample/demo_bl2seq_report.rb +220 -0
- data/sample/demo_blast_report.rb +285 -0
- data/sample/demo_codontable.rb +119 -0
- data/sample/demo_das.rb +105 -0
- data/sample/demo_ddbjxml.rb +212 -0
- data/sample/demo_fasta_remote.rb +51 -0
- data/sample/demo_fastaformat.rb +105 -0
- data/sample/demo_genbank.rb +132 -0
- data/sample/demo_genscan_report.rb +202 -0
- data/sample/demo_gff1.rb +49 -0
- data/sample/demo_go.rb +98 -0
- data/sample/demo_hmmer_report.rb +149 -0
- data/sample/demo_kegg_compound.rb +57 -0
- data/sample/demo_kegg_drug.rb +65 -0
- data/sample/demo_kegg_genome.rb +74 -0
- data/sample/demo_kegg_glycan.rb +72 -0
- data/sample/demo_kegg_orthology.rb +62 -0
- data/sample/demo_kegg_reaction.rb +66 -0
- data/sample/demo_kegg_taxonomy.rb +92 -0
- data/sample/demo_keggapi.rb +502 -0
- data/sample/demo_litdb.rb +42 -0
- data/sample/demo_locations.rb +99 -0
- data/sample/demo_ncbi_rest.rb +130 -0
- data/sample/demo_nucleicacid.rb +49 -0
- data/sample/demo_pathway.rb +196 -0
- data/sample/demo_prosite.rb +120 -0
- data/sample/demo_psort.rb +138 -0
- data/sample/demo_psort_report.rb +70 -0
- data/sample/demo_pubmed.rb +118 -0
- data/sample/demo_sirna.rb +63 -0
- data/sample/demo_sosui_report.rb +89 -0
- data/sample/demo_targetp_report.rb +135 -0
- data/sample/demo_tmhmm_report.rb +68 -0
- data/sample/pmfetch.rb +13 -4
- data/sample/pmsearch.rb +15 -4
- data/sample/test_phyloxml_big.rb +205 -0
- data/test/bioruby_test_helper.rb +61 -0
- data/test/data/KEGG/1.1.1.1.enzyme +935 -0
- data/test/data/KEGG/C00025.compound +102 -0
- data/test/data/KEGG/D00063.drug +104 -0
- data/test/data/KEGG/G00024.glycan +47 -0
- data/test/data/KEGG/G01366.glycan +18 -0
- data/test/data/KEGG/K02338.orthology +902 -0
- data/test/data/KEGG/R00006.reaction +14 -0
- data/test/data/fastq/README.txt +109 -0
- data/test/data/fastq/error_diff_ids.fastq +20 -0
- data/test/data/fastq/error_double_qual.fastq +22 -0
- data/test/data/fastq/error_double_seq.fastq +22 -0
- data/test/data/fastq/error_long_qual.fastq +20 -0
- data/test/data/fastq/error_no_qual.fastq +20 -0
- data/test/data/fastq/error_qual_del.fastq +20 -0
- data/test/data/fastq/error_qual_escape.fastq +20 -0
- data/test/data/fastq/error_qual_null.fastq +0 -0
- data/test/data/fastq/error_qual_space.fastq +21 -0
- data/test/data/fastq/error_qual_tab.fastq +21 -0
- data/test/data/fastq/error_qual_unit_sep.fastq +20 -0
- data/test/data/fastq/error_qual_vtab.fastq +20 -0
- data/test/data/fastq/error_short_qual.fastq +20 -0
- data/test/data/fastq/error_spaces.fastq +20 -0
- data/test/data/fastq/error_tabs.fastq +21 -0
- data/test/data/fastq/error_trunc_at_plus.fastq +19 -0
- data/test/data/fastq/error_trunc_at_qual.fastq +19 -0
- data/test/data/fastq/error_trunc_at_seq.fastq +18 -0
- data/test/data/fastq/error_trunc_in_plus.fastq +19 -0
- data/test/data/fastq/error_trunc_in_qual.fastq +20 -0
- data/test/data/fastq/error_trunc_in_seq.fastq +18 -0
- data/test/data/fastq/error_trunc_in_title.fastq +17 -0
- data/test/data/fastq/illumina_full_range_as_illumina.fastq +8 -0
- data/test/data/fastq/illumina_full_range_as_sanger.fastq +8 -0
- data/test/data/fastq/illumina_full_range_as_solexa.fastq +8 -0
- data/test/data/fastq/illumina_full_range_original_illumina.fastq +8 -0
- data/test/data/fastq/longreads_as_illumina.fastq +40 -0
- data/test/data/fastq/longreads_as_sanger.fastq +40 -0
- data/test/data/fastq/longreads_as_solexa.fastq +40 -0
- data/test/data/fastq/longreads_original_sanger.fastq +120 -0
- data/test/data/fastq/misc_dna_as_illumina.fastq +16 -0
- data/test/data/fastq/misc_dna_as_sanger.fastq +16 -0
- data/test/data/fastq/misc_dna_as_solexa.fastq +16 -0
- data/test/data/fastq/misc_dna_original_sanger.fastq +16 -0
- data/test/data/fastq/misc_rna_as_illumina.fastq +16 -0
- data/test/data/fastq/misc_rna_as_sanger.fastq +16 -0
- data/test/data/fastq/misc_rna_as_solexa.fastq +16 -0
- data/test/data/fastq/misc_rna_original_sanger.fastq +16 -0
- data/test/data/fastq/sanger_full_range_as_illumina.fastq +8 -0
- data/test/data/fastq/sanger_full_range_as_sanger.fastq +8 -0
- data/test/data/fastq/sanger_full_range_as_solexa.fastq +8 -0
- data/test/data/fastq/sanger_full_range_original_sanger.fastq +8 -0
- data/test/data/fastq/solexa_full_range_as_illumina.fastq +8 -0
- data/test/data/fastq/solexa_full_range_as_sanger.fastq +8 -0
- data/test/data/fastq/solexa_full_range_as_solexa.fastq +8 -0
- data/test/data/fastq/solexa_full_range_original_solexa.fastq +8 -0
- data/test/data/fastq/wrapping_as_illumina.fastq +12 -0
- data/test/data/fastq/wrapping_as_sanger.fastq +12 -0
- data/test/data/fastq/wrapping_as_solexa.fastq +12 -0
- data/test/data/fastq/wrapping_original_sanger.fastq +24 -0
- data/test/data/meme/db +0 -0
- data/test/data/meme/mast +0 -0
- data/test/data/meme/mast.out +13 -0
- data/test/data/meme/meme.out +3 -0
- data/test/data/phyloxml/apaf.xml +666 -0
- data/test/data/phyloxml/bcl_2.xml +2097 -0
- data/test/data/phyloxml/made_up.xml +144 -0
- data/test/data/phyloxml/ncbi_taxonomy_mollusca_short.xml +65 -0
- data/test/data/phyloxml/phyloxml_examples.xml +415 -0
- data/test/data/sanger_chromatogram/test_chromatogram_abif.ab1 +0 -0
- data/test/data/sanger_chromatogram/test_chromatogram_scf_v2.scf +0 -0
- data/test/data/sanger_chromatogram/test_chromatogram_scf_v3.scf +0 -0
- data/test/functional/bio/appl/test_pts1.rb +7 -5
- data/test/functional/bio/io/test_ensembl.rb +4 -3
- data/test/functional/bio/io/test_pubmed.rb +9 -3
- data/test/functional/bio/io/test_soapwsdl.rb +5 -4
- data/test/functional/bio/io/test_togows.rb +5 -4
- data/test/functional/bio/sequence/test_output_embl.rb +6 -4
- data/test/functional/bio/test_command.rb +54 -5
- data/test/runner.rb +5 -3
- data/test/unit/bio/appl/bl2seq/test_report.rb +5 -4
- data/test/unit/bio/appl/blast/test_ncbioptions.rb +4 -2
- data/test/unit/bio/appl/blast/test_report.rb +5 -4
- data/test/unit/bio/appl/blast/test_rpsblast.rb +5 -4
- data/test/unit/bio/appl/gcg/test_msf.rb +5 -5
- data/test/unit/bio/appl/genscan/test_report.rb +8 -9
- data/test/unit/bio/appl/hmmer/test_report.rb +5 -4
- data/test/unit/bio/appl/iprscan/test_report.rb +6 -5
- data/test/unit/bio/appl/mafft/test_report.rb +6 -5
- data/test/unit/bio/appl/meme/mast/test_report.rb +46 -0
- data/test/unit/bio/appl/meme/test_mast.rb +103 -0
- data/test/unit/bio/appl/meme/test_motif.rb +38 -0
- data/test/unit/bio/appl/paml/codeml/test_rates.rb +5 -4
- data/test/unit/bio/appl/paml/codeml/test_report.rb +5 -4
- data/test/unit/bio/appl/paml/test_codeml.rb +5 -4
- data/test/unit/bio/appl/sim4/test_report.rb +5 -4
- data/test/unit/bio/appl/sosui/test_report.rb +6 -5
- data/test/unit/bio/appl/targetp/test_report.rb +5 -3
- data/test/unit/bio/appl/test_blast.rb +5 -4
- data/test/unit/bio/appl/test_fasta.rb +4 -2
- data/test/unit/bio/appl/test_pts1.rb +4 -2
- data/test/unit/bio/appl/tmhmm/test_report.rb +6 -5
- data/test/unit/bio/data/test_aa.rb +5 -3
- data/test/unit/bio/data/test_codontable.rb +5 -4
- data/test/unit/bio/data/test_na.rb +5 -3
- data/test/unit/bio/db/biosql/tc_biosql.rb +5 -1
- data/test/unit/bio/db/embl/test_common.rb +4 -2
- data/test/unit/bio/db/embl/test_embl.rb +6 -6
- data/test/unit/bio/db/embl/test_embl_rel89.rb +6 -6
- data/test/unit/bio/db/embl/test_embl_to_bioseq.rb +7 -8
- data/test/unit/bio/db/embl/test_sptr.rb +6 -8
- data/test/unit/bio/db/embl/test_uniprot.rb +6 -5
- data/test/unit/bio/db/fasta/test_format_qual.rb +346 -0
- data/test/unit/bio/db/kegg/test_compound.rb +146 -0
- data/test/unit/bio/db/kegg/test_drug.rb +194 -0
- data/test/unit/bio/db/kegg/test_enzyme.rb +241 -0
- data/test/unit/bio/db/kegg/test_genes.rb +32 -4
- data/test/unit/bio/db/kegg/test_glycan.rb +260 -0
- data/test/unit/bio/db/kegg/test_orthology.rb +50 -0
- data/test/unit/bio/db/kegg/test_reaction.rb +96 -0
- data/test/unit/bio/db/pdb/test_pdb.rb +4 -2
- data/test/unit/bio/db/sanger_chromatogram/test_abif.rb +76 -0
- data/test/unit/bio/db/sanger_chromatogram/test_scf.rb +98 -0
- data/test/unit/bio/db/test_aaindex.rb +6 -6
- data/test/unit/bio/db/test_fasta.rb +5 -46
- data/test/unit/bio/db/test_fastq.rb +829 -0
- data/test/unit/bio/db/test_gff.rb +4 -2
- data/test/unit/bio/db/test_lasergene.rb +7 -5
- data/test/unit/bio/db/test_medline.rb +4 -2
- data/test/unit/bio/db/test_newick.rb +6 -6
- data/test/unit/bio/db/test_nexus.rb +4 -2
- data/test/unit/bio/db/test_phyloxml.rb +769 -0
- data/test/unit/bio/db/test_phyloxml_writer.rb +328 -0
- data/test/unit/bio/db/test_prosite.rb +6 -5
- data/test/unit/bio/db/test_qual.rb +63 -0
- data/test/unit/bio/db/test_rebase.rb +5 -3
- data/test/unit/bio/db/test_soft.rb +7 -6
- data/test/unit/bio/io/flatfile/test_autodetection.rb +6 -7
- data/test/unit/bio/io/flatfile/test_buffer.rb +6 -5
- data/test/unit/bio/io/flatfile/test_splitter.rb +4 -4
- data/test/unit/bio/io/test_ddbjxml.rb +4 -3
- data/test/unit/bio/io/test_ensembl.rb +5 -3
- data/test/unit/bio/io/test_fastacmd.rb +4 -3
- data/test/unit/bio/io/test_flatfile.rb +6 -5
- data/test/unit/bio/io/test_soapwsdl.rb +4 -3
- data/test/unit/bio/io/test_togows.rb +4 -2
- data/test/unit/bio/sequence/test_aa.rb +5 -3
- data/test/unit/bio/sequence/test_common.rb +4 -2
- data/test/unit/bio/sequence/test_compat.rb +4 -2
- data/test/unit/bio/sequence/test_dblink.rb +5 -3
- data/test/unit/bio/sequence/test_na.rb +4 -2
- data/test/unit/bio/sequence/test_quality_score.rb +330 -0
- data/test/unit/bio/shell/plugin/test_seq.rb +5 -3
- data/test/unit/bio/test_alignment.rb +5 -3
- data/test/unit/bio/test_command.rb +4 -3
- data/test/unit/bio/test_db.rb +5 -3
- data/test/unit/bio/test_feature.rb +4 -2
- data/test/unit/bio/test_location.rb +4 -2
- data/test/unit/bio/test_map.rb +5 -3
- data/test/unit/bio/test_pathway.rb +4 -2
- data/test/unit/bio/test_reference.rb +4 -2
- data/test/unit/bio/test_sequence.rb +5 -3
- data/test/unit/bio/test_shell.rb +5 -3
- data/test/unit/bio/test_tree.rb +6 -6
- data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +17 -13
- data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +17 -13
- data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +4 -2
- data/test/unit/bio/util/test_color_scheme.rb +5 -3
- data/test/unit/bio/util/test_contingency_table.rb +5 -3
- data/test/unit/bio/util/test_restriction_enzyme.rb +4 -2
- data/test/unit/bio/util/test_sirna.rb +6 -4
- metadata +147 -2
data/lib/bio/io/ncbirest.rb
CHANGED
|
@@ -7,10 +7,69 @@
|
|
|
7
7
|
# $Id:$
|
|
8
8
|
#
|
|
9
9
|
|
|
10
|
+
require 'thread'
|
|
10
11
|
require 'bio/command'
|
|
12
|
+
require 'bio/version'
|
|
11
13
|
|
|
12
14
|
module Bio
|
|
13
15
|
|
|
16
|
+
class NCBI
|
|
17
|
+
|
|
18
|
+
autoload :SOAP, 'bio/io/ncbisoap'
|
|
19
|
+
|
|
20
|
+
# (Hash) Default parameters for Entrez (eUtils).
|
|
21
|
+
# They may also be used for other NCBI services.
|
|
22
|
+
ENTREZ_DEFAULT_PARAMETERS = {
|
|
23
|
+
'tool' => "#{$0} (bioruby/#{Bio::BIORUBY_VERSION_ID})",
|
|
24
|
+
'email' => nil,
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
# Resets Entrez (eUtils) default parameters.
|
|
28
|
+
# ---
|
|
29
|
+
# *Returns*:: (Hash) default parameters
|
|
30
|
+
def self.reset_entrez_default_parameters
|
|
31
|
+
h = {
|
|
32
|
+
'tool' => "#{$0} (bioruby/#{Bio::BIORUBY_VERSION_ID})",
|
|
33
|
+
'email' => nil,
|
|
34
|
+
}
|
|
35
|
+
ENTREZ_DEFAULT_PARAMETERS.clear
|
|
36
|
+
ENTREZ_DEFAULT_PARAMETERS.update(h)
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
# Gets default email address for Entrez (eUtils).
|
|
40
|
+
# ---
|
|
41
|
+
# *Returns*:: String or nil
|
|
42
|
+
def self.default_email
|
|
43
|
+
ENTREZ_DEFAULT_PARAMETERS['email']
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# Sets default email address used for Entrez (eUtils).
|
|
47
|
+
# It may also be used for other NCBI services.
|
|
48
|
+
# ---
|
|
49
|
+
# *Arguments*:
|
|
50
|
+
# * (required) _str_: (String) email address
|
|
51
|
+
# *Returns*:: same as given argument
|
|
52
|
+
def self.default_email=(str)
|
|
53
|
+
ENTREZ_DEFAULT_PARAMETERS['email'] = str
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Gets default tool name for Entrez (eUtils).
|
|
57
|
+
# ---
|
|
58
|
+
# *Returns*:: String or nil
|
|
59
|
+
def self.default_tool
|
|
60
|
+
ENTREZ_DEFAULT_PARAMETERS['tool']
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# Sets default tool name for Entrez (eUtils).
|
|
64
|
+
# It may also be used for other NCBI services.
|
|
65
|
+
# ---
|
|
66
|
+
# *Arguments*:
|
|
67
|
+
# * (required) _str_: (String) tool name
|
|
68
|
+
# *Returns*:: same as given argument
|
|
69
|
+
def self.default_tool=(str)
|
|
70
|
+
ENTREZ_DEFAULT_PARAMETERS['tool'] = str
|
|
71
|
+
end
|
|
72
|
+
|
|
14
73
|
# == Description
|
|
15
74
|
#
|
|
16
75
|
# The Bio::NCBI::REST class provides REST client for the NCBI E-Utilities
|
|
@@ -19,29 +78,81 @@ module Bio
|
|
|
19
78
|
#
|
|
20
79
|
# * http://www.ncbi.nlm.nih.gov/entrez/utils/utils_index.html
|
|
21
80
|
#
|
|
22
|
-
class NCBI
|
|
23
81
|
class REST
|
|
24
82
|
|
|
25
83
|
# Run retrieval scripts on weekends or between 9 pm and 5 am Eastern Time
|
|
26
84
|
# weekdays for any series of more than 100 requests.
|
|
27
85
|
# -> Not implemented yet in BioRuby
|
|
28
|
-
|
|
29
|
-
#
|
|
30
|
-
#
|
|
31
|
-
|
|
32
|
-
NCBI_INTERVAL = 1
|
|
86
|
+
#
|
|
87
|
+
# Wait for 1/3 seconds.
|
|
88
|
+
# NCBI's restriction is: "Make no more than 3 requests every 1 second.".
|
|
89
|
+
NCBI_INTERVAL = 1.0 / 3.0
|
|
33
90
|
@@last_access = nil
|
|
91
|
+
@@last_access_mutex = nil
|
|
34
92
|
|
|
35
93
|
private
|
|
36
94
|
|
|
95
|
+
# (Private) Sleeps until allowed to access.
|
|
96
|
+
# ---
|
|
97
|
+
# *Arguments*:
|
|
98
|
+
# * (required) _wait_: wait unit time
|
|
99
|
+
# *Returns*:: (undefined)
|
|
37
100
|
def ncbi_access_wait(wait = NCBI_INTERVAL)
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
if
|
|
41
|
-
|
|
101
|
+
@@last_access_mutex ||= Mutex.new
|
|
102
|
+
@@last_access_mutex.synchronize {
|
|
103
|
+
if @@last_access
|
|
104
|
+
duration = Time.now - @@last_access
|
|
105
|
+
if wait > duration
|
|
106
|
+
sleep wait - duration
|
|
107
|
+
end
|
|
42
108
|
end
|
|
109
|
+
@@last_access = Time.now
|
|
110
|
+
}
|
|
111
|
+
nil
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
# (Private) default parameters
|
|
115
|
+
# ---
|
|
116
|
+
# *Returns*:: Hash
|
|
117
|
+
def default_parameters
|
|
118
|
+
Bio::NCBI::ENTREZ_DEFAULT_PARAMETERS
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
# (Private) Sends query to NCBI.
|
|
122
|
+
# ---
|
|
123
|
+
# *Arguments*:
|
|
124
|
+
# * (required) _serv_: (String) server URI string
|
|
125
|
+
# * (required) _opts_: (Hash) parameters
|
|
126
|
+
# *Returns*:: nil
|
|
127
|
+
def ncbi_post_form(serv, opts)
|
|
128
|
+
ncbi_check_parameters(opts)
|
|
129
|
+
ncbi_access_wait
|
|
130
|
+
response = Bio::Command.post_form(serv, opts)
|
|
131
|
+
response
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
# (Private) Checks parameters as NCBI requires.
|
|
135
|
+
# If no email or tool parameter, raises an error.
|
|
136
|
+
#
|
|
137
|
+
# NCBI announces that "Effective on
|
|
138
|
+
# June 1, 2010, all E-utility requests, either using standard URLs or
|
|
139
|
+
# SOAP, must contain non-null values for both the &tool and &email
|
|
140
|
+
# parameters. Any E-utility request made after June 1, 2010 that does
|
|
141
|
+
# not contain values for both parameters will return an error explaining
|
|
142
|
+
# that these parameters must be included in E-utility requests."
|
|
143
|
+
# ---
|
|
144
|
+
# *Arguments*:
|
|
145
|
+
# * (required) _opts_: Hash containing parameters
|
|
146
|
+
# *Returns*:: (undefined)
|
|
147
|
+
def ncbi_check_parameters(opts)
|
|
148
|
+
#return if Time.now < Time.gm(2010,5,31)
|
|
149
|
+
if opts['email'].to_s.empty? then
|
|
150
|
+
raise 'Set email parameter for the query, or set Bio::NCBI.default_email = "(your email address)"'
|
|
151
|
+
end
|
|
152
|
+
if opts['tool'].to_s.empty? then
|
|
153
|
+
raise 'Set tool parameter for the query, or set Bio::NCBI.default_tool = "(your tool name)"'
|
|
43
154
|
end
|
|
44
|
-
|
|
155
|
+
nil
|
|
45
156
|
end
|
|
46
157
|
|
|
47
158
|
public
|
|
@@ -67,8 +178,8 @@ class REST
|
|
|
67
178
|
# *Returns*:: array of string (database names)
|
|
68
179
|
def einfo
|
|
69
180
|
serv = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/einfo.fcgi"
|
|
70
|
-
opts = {}
|
|
71
|
-
response =
|
|
181
|
+
opts = default_parameters.merge({})
|
|
182
|
+
response = ncbi_post_form(serv, opts)
|
|
72
183
|
result = response.body
|
|
73
184
|
list = result.scan(/<DbName>(.*?)<\/DbName>/m).flatten
|
|
74
185
|
return list
|
|
@@ -134,10 +245,7 @@ class REST
|
|
|
134
245
|
# *Returns*:: array of entry IDs or a number of results
|
|
135
246
|
def esearch(str, hash = {}, limit = nil, step = 10000)
|
|
136
247
|
serv = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
|
|
137
|
-
opts = {
|
|
138
|
-
"tool" => "bioruby",
|
|
139
|
-
"term" => str,
|
|
140
|
-
}
|
|
248
|
+
opts = default_parameters.merge({ "term" => str })
|
|
141
249
|
opts.update(hash)
|
|
142
250
|
|
|
143
251
|
case opts["rettype"]
|
|
@@ -156,8 +264,7 @@ class REST
|
|
|
156
264
|
0.step(limit, step) do |i|
|
|
157
265
|
retmax = [step, limit - i].min
|
|
158
266
|
opts.update("retmax" => retmax, "retstart" => i + retstart)
|
|
159
|
-
|
|
160
|
-
response = Bio::Command.post_form(serv, opts)
|
|
267
|
+
response = ncbi_post_form(serv, opts)
|
|
161
268
|
result = response.body
|
|
162
269
|
list += result.scan(/<Id>(.*?)<\/Id>/m).flatten
|
|
163
270
|
end
|
|
@@ -169,14 +276,10 @@ class REST
|
|
|
169
276
|
# *Returns*:: array of entry IDs or a number of results
|
|
170
277
|
def esearch_count(str, hash = {})
|
|
171
278
|
serv = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
|
|
172
|
-
opts = {
|
|
173
|
-
"tool" => "bioruby",
|
|
174
|
-
"term" => str,
|
|
175
|
-
}
|
|
279
|
+
opts = default_parameters.merge({ "term" => str })
|
|
176
280
|
opts.update(hash)
|
|
177
281
|
opts.update("rettype" => "count")
|
|
178
|
-
|
|
179
|
-
response = Bio::Command.post_form(serv, opts)
|
|
282
|
+
response = ncbi_post_form(serv, opts)
|
|
180
283
|
result = response.body
|
|
181
284
|
count = result.scan(/<Count>(.*?)<\/Count>/m).flatten.first.to_i
|
|
182
285
|
return count
|
|
@@ -211,10 +314,7 @@ class REST
|
|
|
211
314
|
# *Returns*:: String
|
|
212
315
|
def efetch(ids, hash = {}, step = 100)
|
|
213
316
|
serv = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
|
|
214
|
-
opts = {
|
|
215
|
-
"tool" => "bioruby",
|
|
216
|
-
"retmode" => "text",
|
|
217
|
-
}
|
|
317
|
+
opts = default_parameters.merge({ "retmode" => "text" })
|
|
218
318
|
opts.update(hash)
|
|
219
319
|
|
|
220
320
|
case ids
|
|
@@ -228,8 +328,7 @@ class REST
|
|
|
228
328
|
0.step(list.size, step) do |i|
|
|
229
329
|
opts["id"] = list[i, step].join(',')
|
|
230
330
|
unless opts["id"].empty?
|
|
231
|
-
|
|
232
|
-
response = Bio::Command.post_form(serv, opts)
|
|
331
|
+
response = ncbi_post_form(serv, opts)
|
|
233
332
|
result += response.body
|
|
234
333
|
end
|
|
235
334
|
end
|
|
@@ -637,104 +736,3 @@ end # REST
|
|
|
637
736
|
end # NCBI
|
|
638
737
|
end # Bio
|
|
639
738
|
|
|
640
|
-
|
|
641
|
-
if __FILE__ == $0
|
|
642
|
-
|
|
643
|
-
gbopts = {"db"=>"nuccore", "rettype"=>"gb"}
|
|
644
|
-
pmopts = {"db"=>"pubmed", "rettype"=>"medline"}
|
|
645
|
-
count = {"rettype" => "count"}
|
|
646
|
-
xml = {"retmode"=>"xml"}
|
|
647
|
-
max = {"retmax"=>5}
|
|
648
|
-
|
|
649
|
-
puts "=== class methods ==="
|
|
650
|
-
|
|
651
|
-
puts "--- Search NCBI by E-Utils ---"
|
|
652
|
-
|
|
653
|
-
puts Time.now
|
|
654
|
-
puts "# count of 'tardigrada' in nuccore"
|
|
655
|
-
puts Bio::NCBI::REST.esearch("tardigrada", gbopts.merge(count))
|
|
656
|
-
|
|
657
|
-
puts Time.now
|
|
658
|
-
puts "# max 5 'tardigrada' entries in nuccore"
|
|
659
|
-
puts Bio::NCBI::REST.esearch("tardigrada", gbopts.merge(max))
|
|
660
|
-
|
|
661
|
-
puts Time.now
|
|
662
|
-
puts "# count of 'yeast kinase' in nuccore"
|
|
663
|
-
puts Bio::NCBI::REST.esearch("yeast kinase", gbopts.merge(count))
|
|
664
|
-
|
|
665
|
-
puts Time.now
|
|
666
|
-
puts "# max 5 'yeast kinase' entries in nuccore (XML)"
|
|
667
|
-
puts Bio::NCBI::REST.esearch("yeast kinase", gbopts.merge(xml).merge(max))
|
|
668
|
-
|
|
669
|
-
puts Time.now
|
|
670
|
-
puts "# count of 'genome&analysis|bioinformatics' in pubmed"
|
|
671
|
-
puts Bio::NCBI::REST.esearch("(genome AND analysis) OR bioinformatics", pmopts.merge(count))
|
|
672
|
-
|
|
673
|
-
puts Time.now
|
|
674
|
-
puts "# max 5 'genome&analysis|bioinformatics' entries in pubmed (XML)"
|
|
675
|
-
puts Bio::NCBI::REST.esearch("(genome AND analysis) OR bioinformatics", pmopts.merge(xml).merge(max))
|
|
676
|
-
|
|
677
|
-
puts Time.now
|
|
678
|
-
Bio::NCBI::REST.esearch("(genome AND analysis) OR bioinformatics", pmopts.merge(max)).each do |x|
|
|
679
|
-
puts "# each of 5 'genome&analysis|bioinformatics' entries in pubmed"
|
|
680
|
-
puts x
|
|
681
|
-
end
|
|
682
|
-
|
|
683
|
-
puts "--- Retrieve NCBI entry by E-Utils ---"
|
|
684
|
-
|
|
685
|
-
puts Time.now
|
|
686
|
-
puts "# '185041' entry in nuccore"
|
|
687
|
-
puts Bio::NCBI::REST.efetch("185041", gbopts)
|
|
688
|
-
|
|
689
|
-
puts Time.now
|
|
690
|
-
puts "# 'J00231' entry in nuccore (XML)"
|
|
691
|
-
puts Bio::NCBI::REST.efetch("J00231", gbopts.merge(xml))
|
|
692
|
-
|
|
693
|
-
puts Time.now
|
|
694
|
-
puts "# 16381885 entry in pubmed"
|
|
695
|
-
puts Bio::NCBI::REST.efetch(16381885, pmopts)
|
|
696
|
-
|
|
697
|
-
puts Time.now
|
|
698
|
-
puts "# '16381885' entry in pubmed"
|
|
699
|
-
puts Bio::NCBI::REST.efetch("16381885", pmopts)
|
|
700
|
-
|
|
701
|
-
puts Time.now
|
|
702
|
-
puts "# [10592173,14693808] entries in pubmed"
|
|
703
|
-
puts Bio::NCBI::REST.efetch([10592173, 14693808], pmopts)
|
|
704
|
-
|
|
705
|
-
puts Time.now
|
|
706
|
-
puts "# [10592173,14693808] entries in pubmed (XML)"
|
|
707
|
-
puts Bio::NCBI::REST.efetch([10592173, 14693808], pmopts.merge(xml))
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
puts "=== instance methods ==="
|
|
711
|
-
|
|
712
|
-
ncbi = Bio::NCBI::REST.new
|
|
713
|
-
|
|
714
|
-
puts "--- Search NCBI by E-Utils ---"
|
|
715
|
-
|
|
716
|
-
puts Time.now
|
|
717
|
-
puts "# count of 'genome&analysis|bioinformatics' in pubmed"
|
|
718
|
-
puts ncbi.esearch("(genome AND analysis) OR bioinformatics", pmopts.merge(count))
|
|
719
|
-
|
|
720
|
-
puts Time.now
|
|
721
|
-
puts "# max 5 'genome&analysis|bioinformatics' entries in pubmed"
|
|
722
|
-
puts ncbi.esearch("(genome AND analysis) OR bioinformatics", pmopts.merge(max))
|
|
723
|
-
|
|
724
|
-
puts Time.now
|
|
725
|
-
ncbi.esearch("(genome AND analysis) OR bioinformatics", pmopts).each do |x|
|
|
726
|
-
puts "# each 'genome&analysis|bioinformatics' entries in pubmed"
|
|
727
|
-
puts x
|
|
728
|
-
end
|
|
729
|
-
|
|
730
|
-
puts "--- Retrieve NCBI entry by E-Utils ---"
|
|
731
|
-
|
|
732
|
-
puts Time.now
|
|
733
|
-
puts "# 16381885 entry in pubmed"
|
|
734
|
-
puts ncbi.efetch(16381885, pmopts)
|
|
735
|
-
|
|
736
|
-
puts Time.now
|
|
737
|
-
puts "# [10592173,14693808] entries in pubmed"
|
|
738
|
-
puts ncbi.efetch([10592173, 14693808], pmopts)
|
|
739
|
-
|
|
740
|
-
end
|
data/lib/bio/io/ncbisoap.rb
CHANGED
data/lib/bio/io/pubmed.rb
CHANGED
|
@@ -221,91 +221,3 @@ end # PubMed
|
|
|
221
221
|
|
|
222
222
|
end # Bio
|
|
223
223
|
|
|
224
|
-
|
|
225
|
-
if __FILE__ == $0
|
|
226
|
-
|
|
227
|
-
puts "=== instance methods ==="
|
|
228
|
-
|
|
229
|
-
pubmed = Bio::PubMed.new
|
|
230
|
-
|
|
231
|
-
puts "--- Search PubMed by E-Utils ---"
|
|
232
|
-
opts = {"rettype" => "count"}
|
|
233
|
-
puts Time.now
|
|
234
|
-
puts pubmed.esearch("(genome AND analysis) OR bioinformatics", opts)
|
|
235
|
-
puts Time.now
|
|
236
|
-
puts pubmed.esearch("(genome AND analysis) OR bioinformatics", opts)
|
|
237
|
-
puts Time.now
|
|
238
|
-
puts pubmed.esearch("(genome AND analysis) OR bioinformatics", opts)
|
|
239
|
-
puts Time.now
|
|
240
|
-
pubmed.esearch("(genome AND analysis) OR bioinformatics").each do |x|
|
|
241
|
-
puts x
|
|
242
|
-
end
|
|
243
|
-
|
|
244
|
-
puts "--- Retrieve PubMed entry by E-Utils ---"
|
|
245
|
-
puts Time.now
|
|
246
|
-
puts pubmed.efetch(16381885)
|
|
247
|
-
puts Time.now
|
|
248
|
-
puts pubmed.efetch("16381885")
|
|
249
|
-
puts Time.now
|
|
250
|
-
puts pubmed.efetch("16381885")
|
|
251
|
-
puts Time.now
|
|
252
|
-
opts = {"retmode" => "xml"}
|
|
253
|
-
puts pubmed.efetch([10592173, 14693808], opts)
|
|
254
|
-
puts Time.now
|
|
255
|
-
puts pubmed.efetch(["10592173", "14693808"], opts)
|
|
256
|
-
|
|
257
|
-
puts "--- Search PubMed by Entrez CGI ---"
|
|
258
|
-
pubmed.search("(genome AND analysis) OR bioinformatics").each do |x|
|
|
259
|
-
p x
|
|
260
|
-
end
|
|
261
|
-
|
|
262
|
-
puts "--- Retrieve PubMed entry by Entrez CGI ---"
|
|
263
|
-
puts pubmed.query("16381885")
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
puts "--- Retrieve PubMed entry by PMfetch ---"
|
|
267
|
-
puts pubmed.pmfetch("16381885")
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
puts "=== class methods ==="
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
puts "--- Search PubMed by E-Utils ---"
|
|
274
|
-
opts = {"rettype" => "count"}
|
|
275
|
-
puts Time.now
|
|
276
|
-
puts Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics", opts)
|
|
277
|
-
puts Time.now
|
|
278
|
-
puts Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics", opts)
|
|
279
|
-
puts Time.now
|
|
280
|
-
puts Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics", opts)
|
|
281
|
-
puts Time.now
|
|
282
|
-
Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics").each do |x|
|
|
283
|
-
puts x
|
|
284
|
-
end
|
|
285
|
-
|
|
286
|
-
puts "--- Retrieve PubMed entry by E-Utils ---"
|
|
287
|
-
puts Time.now
|
|
288
|
-
puts Bio::PubMed.efetch(16381885)
|
|
289
|
-
puts Time.now
|
|
290
|
-
puts Bio::PubMed.efetch("16381885")
|
|
291
|
-
puts Time.now
|
|
292
|
-
puts Bio::PubMed.efetch("16381885")
|
|
293
|
-
puts Time.now
|
|
294
|
-
opts = {"retmode" => "xml"}
|
|
295
|
-
puts Bio::PubMed.efetch([10592173, 14693808], opts)
|
|
296
|
-
puts Time.now
|
|
297
|
-
puts Bio::PubMed.efetch(["10592173", "14693808"], opts)
|
|
298
|
-
|
|
299
|
-
puts "--- Search PubMed by Entrez CGI ---"
|
|
300
|
-
Bio::PubMed.search("(genome AND analysis) OR bioinformatics").each do |x|
|
|
301
|
-
p x
|
|
302
|
-
end
|
|
303
|
-
|
|
304
|
-
puts "--- Retrieve PubMed entry by Entrez CGI ---"
|
|
305
|
-
puts Bio::PubMed.query("16381885")
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
puts "--- Retrieve PubMed entry by PMfetch ---"
|
|
309
|
-
puts Bio::PubMed.pmfetch("16381885")
|
|
310
|
-
|
|
311
|
-
end
|
data/lib/bio/location.rb
CHANGED
|
@@ -792,76 +792,3 @@ end # Bio
|
|
|
792
792
|
# * [RATDYIIAAB] replace(1306..1443,"aagaacatccacggagtcagaactgggctcttcacgccggatttggcgttcgaggccattgtgaaaaagcaggcaatgcaccagcaagctcagttcctacccctgcgtggacctggttatccaggagctaatcagtacagttaggtggtcaagctgaaagagccctgtctgaaa")
|
|
793
793
|
#
|
|
794
794
|
|
|
795
|
-
if __FILE__ == $0
|
|
796
|
-
puts "Test new & span methods"
|
|
797
|
-
[
|
|
798
|
-
'450',
|
|
799
|
-
'500..600',
|
|
800
|
-
'join(500..550, 600..625)',
|
|
801
|
-
'complement(join(500..550, 600..625))',
|
|
802
|
-
'join(complement(500..550), 600..625)',
|
|
803
|
-
'754^755',
|
|
804
|
-
'complement(53^54)',
|
|
805
|
-
'replace(4792^4793,"a")',
|
|
806
|
-
'replace(1905^1906,"acaaagacaccgccctacgcc")',
|
|
807
|
-
'157..(800.806)',
|
|
808
|
-
'(67.68)..(699.703)',
|
|
809
|
-
'(45934.45974)..46135',
|
|
810
|
-
'<180..(731.761)',
|
|
811
|
-
'(88.89)..>1122',
|
|
812
|
-
'complement((1700.1708)..(1715.1721))',
|
|
813
|
-
'complement(<22..(255.275))',
|
|
814
|
-
'complement((64.74)..1525)',
|
|
815
|
-
'join((8298.8300)..10206,1..855)',
|
|
816
|
-
'replace((651.655)..(651.655),"")',
|
|
817
|
-
'one-of(898,900)..983',
|
|
818
|
-
'one-of(5971..6308,5971..6309)',
|
|
819
|
-
'8050..one-of(10731,10758,10905,11242)',
|
|
820
|
-
'one-of(623,627,632)..one-of(628,633,637)',
|
|
821
|
-
'one-of(845,953,963,1078,1104)..1354',
|
|
822
|
-
'join(2035..2050,complement(1775..1818),13..345,414..992,1232..1253,1024..1157)',
|
|
823
|
-
'join(complement(1..61),complement(AP000007.1:252907..253505))',
|
|
824
|
-
'complement(join(71606..71829,75327..75446,76039..76203))',
|
|
825
|
-
'order(3..26,complement(964..987))',
|
|
826
|
-
'order(L44135.1:(454.445)..>538,<1..181)',
|
|
827
|
-
'<200001..<318389',
|
|
828
|
-
].each do |pos|
|
|
829
|
-
p pos
|
|
830
|
-
# p Bio::Locations.new(pos)
|
|
831
|
-
# p Bio::Locations.new(pos).span
|
|
832
|
-
# p Bio::Locations.new(pos).range
|
|
833
|
-
Bio::Locations.new(pos).each do |location|
|
|
834
|
-
puts "class=" + location.class.to_s
|
|
835
|
-
puts "start=" + location.from.to_s + "\tend=" + location.to.to_s + "\tstrand=" + location.strand.to_s
|
|
836
|
-
end
|
|
837
|
-
|
|
838
|
-
end
|
|
839
|
-
|
|
840
|
-
puts "Test rel2abs/abs2rel method"
|
|
841
|
-
[
|
|
842
|
-
'6..15',
|
|
843
|
-
'join(6..10,16..30)',
|
|
844
|
-
'complement(join(6..10,16..30))',
|
|
845
|
-
'join(complement(6..10),complement(16..30))',
|
|
846
|
-
'join(6..10,complement(16..30))',
|
|
847
|
-
].each do |pos|
|
|
848
|
-
loc = Bio::Locations.new(pos)
|
|
849
|
-
p pos
|
|
850
|
-
# p loc
|
|
851
|
-
(1..21).each do |x|
|
|
852
|
-
print "absolute(#{x}) #=> ", y = loc.absolute(x), "\n"
|
|
853
|
-
print "relative(#{y}) #=> ", y ? loc.relative(y) : y, "\n"
|
|
854
|
-
print "absolute(#{x}, :aa) #=> ", y = loc.absolute(x, :aa), "\n"
|
|
855
|
-
print "relative(#{y}, :aa) #=> ", y ? loc.relative(y, :aa) : y, "\n"
|
|
856
|
-
end
|
|
857
|
-
end
|
|
858
|
-
|
|
859
|
-
pos = 'join(complement(6..10),complement(16..30))'
|
|
860
|
-
loc = Bio::Locations.new(pos)
|
|
861
|
-
print "pos : "; p pos
|
|
862
|
-
print "`- loc[1] : "; p loc[1]
|
|
863
|
-
print " `- range : "; p loc[1].range
|
|
864
|
-
|
|
865
|
-
puts Bio::Location.new('5').<=>(Bio::Location.new('3'))
|
|
866
|
-
end
|
|
867
|
-
|