bio 1.3.1 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (303) hide show
  1. data/ChangeLog +2105 -3728
  2. data/KNOWN_ISSUES.rdoc +35 -3
  3. data/README.rdoc +8 -2
  4. data/RELEASE_NOTES.rdoc +166 -0
  5. data/bin/bioruby +4 -1
  6. data/bioruby.gemspec +146 -1
  7. data/bioruby.gemspec.erb +3 -1
  8. data/doc/ChangeLog-before-1.3.1 +3961 -0
  9. data/doc/Tutorial.rd +154 -22
  10. data/doc/Tutorial.rd.html +125 -68
  11. data/lib/bio.rb +21 -6
  12. data/lib/bio/appl/bl2seq/report.rb +11 -202
  13. data/lib/bio/appl/blast/format0.rb +0 -193
  14. data/lib/bio/appl/blast/report.rb +2 -147
  15. data/lib/bio/appl/blast/wublast.rb +0 -208
  16. data/lib/bio/appl/fasta.rb +4 -19
  17. data/lib/bio/appl/fasta/format10.rb +0 -14
  18. data/lib/bio/appl/genscan/report.rb +0 -176
  19. data/lib/bio/appl/hmmer.rb +1 -15
  20. data/lib/bio/appl/hmmer/report.rb +0 -100
  21. data/lib/bio/appl/meme/mast.rb +156 -0
  22. data/lib/bio/appl/meme/mast/report.rb +91 -0
  23. data/lib/bio/appl/meme/motif.rb +48 -0
  24. data/lib/bio/appl/psort.rb +0 -111
  25. data/lib/bio/appl/psort/report.rb +1 -45
  26. data/lib/bio/appl/pts1.rb +2 -4
  27. data/lib/bio/appl/sosui/report.rb +5 -54
  28. data/lib/bio/appl/targetp/report.rb +1 -104
  29. data/lib/bio/appl/tmhmm/report.rb +0 -36
  30. data/lib/bio/command.rb +94 -10
  31. data/lib/bio/data/aa.rb +1 -77
  32. data/lib/bio/data/codontable.rb +1 -95
  33. data/lib/bio/data/na.rb +1 -26
  34. data/lib/bio/db/aaindex.rb +1 -38
  35. data/lib/bio/db/fasta.rb +1 -134
  36. data/lib/bio/db/fasta/format_qual.rb +204 -0
  37. data/lib/bio/db/fasta/qual.rb +102 -0
  38. data/lib/bio/db/fastq.rb +645 -0
  39. data/lib/bio/db/fastq/fastq_to_biosequence.rb +40 -0
  40. data/lib/bio/db/fastq/format_fastq.rb +175 -0
  41. data/lib/bio/db/genbank/genbank.rb +1 -86
  42. data/lib/bio/db/gff.rb +0 -17
  43. data/lib/bio/db/go.rb +4 -72
  44. data/lib/bio/db/kegg/common.rb +112 -0
  45. data/lib/bio/db/kegg/compound.rb +29 -20
  46. data/lib/bio/db/kegg/drug.rb +74 -34
  47. data/lib/bio/db/kegg/enzyme.rb +26 -5
  48. data/lib/bio/db/kegg/genes.rb +128 -15
  49. data/lib/bio/db/kegg/genome.rb +3 -41
  50. data/lib/bio/db/kegg/glycan.rb +19 -24
  51. data/lib/bio/db/kegg/orthology.rb +16 -56
  52. data/lib/bio/db/kegg/reaction.rb +81 -28
  53. data/lib/bio/db/kegg/taxonomy.rb +1 -52
  54. data/lib/bio/db/litdb.rb +1 -16
  55. data/lib/bio/db/phyloxml/phyloxml.xsd +582 -0
  56. data/lib/bio/db/phyloxml/phyloxml_elements.rb +1174 -0
  57. data/lib/bio/db/phyloxml/phyloxml_parser.rb +954 -0
  58. data/lib/bio/db/phyloxml/phyloxml_writer.rb +228 -0
  59. data/lib/bio/db/prosite.rb +2 -95
  60. data/lib/bio/db/rebase.rb +5 -6
  61. data/lib/bio/db/sanger_chromatogram/abif.rb +120 -0
  62. data/lib/bio/db/sanger_chromatogram/chromatogram.rb +133 -0
  63. data/lib/bio/db/sanger_chromatogram/chromatogram_to_biosequence.rb +32 -0
  64. data/lib/bio/db/sanger_chromatogram/scf.rb +210 -0
  65. data/lib/bio/io/das.rb +0 -44
  66. data/lib/bio/io/ddbjxml.rb +1 -181
  67. data/lib/bio/io/flatfile.rb +1 -7
  68. data/lib/bio/io/flatfile/autodetection.rb +6 -0
  69. data/lib/bio/io/keggapi.rb +0 -442
  70. data/lib/bio/io/ncbirest.rb +130 -132
  71. data/lib/bio/io/ncbisoap.rb +2 -1
  72. data/lib/bio/io/pubmed.rb +0 -88
  73. data/lib/bio/location.rb +0 -73
  74. data/lib/bio/pathway.rb +0 -171
  75. data/lib/bio/sequence.rb +18 -1
  76. data/lib/bio/sequence/adapter.rb +3 -0
  77. data/lib/bio/sequence/format.rb +16 -0
  78. data/lib/bio/sequence/quality_score.rb +205 -0
  79. data/lib/bio/tree.rb +70 -5
  80. data/lib/bio/util/restriction_enzyme/single_strand.rb +3 -2
  81. data/lib/bio/util/sirna.rb +1 -23
  82. data/lib/bio/version.rb +1 -1
  83. data/sample/demo_aaindex.rb +67 -0
  84. data/sample/demo_aminoacid.rb +101 -0
  85. data/sample/demo_bl2seq_report.rb +220 -0
  86. data/sample/demo_blast_report.rb +285 -0
  87. data/sample/demo_codontable.rb +119 -0
  88. data/sample/demo_das.rb +105 -0
  89. data/sample/demo_ddbjxml.rb +212 -0
  90. data/sample/demo_fasta_remote.rb +51 -0
  91. data/sample/demo_fastaformat.rb +105 -0
  92. data/sample/demo_genbank.rb +132 -0
  93. data/sample/demo_genscan_report.rb +202 -0
  94. data/sample/demo_gff1.rb +49 -0
  95. data/sample/demo_go.rb +98 -0
  96. data/sample/demo_hmmer_report.rb +149 -0
  97. data/sample/demo_kegg_compound.rb +57 -0
  98. data/sample/demo_kegg_drug.rb +65 -0
  99. data/sample/demo_kegg_genome.rb +74 -0
  100. data/sample/demo_kegg_glycan.rb +72 -0
  101. data/sample/demo_kegg_orthology.rb +62 -0
  102. data/sample/demo_kegg_reaction.rb +66 -0
  103. data/sample/demo_kegg_taxonomy.rb +92 -0
  104. data/sample/demo_keggapi.rb +502 -0
  105. data/sample/demo_litdb.rb +42 -0
  106. data/sample/demo_locations.rb +99 -0
  107. data/sample/demo_ncbi_rest.rb +130 -0
  108. data/sample/demo_nucleicacid.rb +49 -0
  109. data/sample/demo_pathway.rb +196 -0
  110. data/sample/demo_prosite.rb +120 -0
  111. data/sample/demo_psort.rb +138 -0
  112. data/sample/demo_psort_report.rb +70 -0
  113. data/sample/demo_pubmed.rb +118 -0
  114. data/sample/demo_sirna.rb +63 -0
  115. data/sample/demo_sosui_report.rb +89 -0
  116. data/sample/demo_targetp_report.rb +135 -0
  117. data/sample/demo_tmhmm_report.rb +68 -0
  118. data/sample/pmfetch.rb +13 -4
  119. data/sample/pmsearch.rb +15 -4
  120. data/sample/test_phyloxml_big.rb +205 -0
  121. data/test/bioruby_test_helper.rb +61 -0
  122. data/test/data/KEGG/1.1.1.1.enzyme +935 -0
  123. data/test/data/KEGG/C00025.compound +102 -0
  124. data/test/data/KEGG/D00063.drug +104 -0
  125. data/test/data/KEGG/G00024.glycan +47 -0
  126. data/test/data/KEGG/G01366.glycan +18 -0
  127. data/test/data/KEGG/K02338.orthology +902 -0
  128. data/test/data/KEGG/R00006.reaction +14 -0
  129. data/test/data/fastq/README.txt +109 -0
  130. data/test/data/fastq/error_diff_ids.fastq +20 -0
  131. data/test/data/fastq/error_double_qual.fastq +22 -0
  132. data/test/data/fastq/error_double_seq.fastq +22 -0
  133. data/test/data/fastq/error_long_qual.fastq +20 -0
  134. data/test/data/fastq/error_no_qual.fastq +20 -0
  135. data/test/data/fastq/error_qual_del.fastq +20 -0
  136. data/test/data/fastq/error_qual_escape.fastq +20 -0
  137. data/test/data/fastq/error_qual_null.fastq +0 -0
  138. data/test/data/fastq/error_qual_space.fastq +21 -0
  139. data/test/data/fastq/error_qual_tab.fastq +21 -0
  140. data/test/data/fastq/error_qual_unit_sep.fastq +20 -0
  141. data/test/data/fastq/error_qual_vtab.fastq +20 -0
  142. data/test/data/fastq/error_short_qual.fastq +20 -0
  143. data/test/data/fastq/error_spaces.fastq +20 -0
  144. data/test/data/fastq/error_tabs.fastq +21 -0
  145. data/test/data/fastq/error_trunc_at_plus.fastq +19 -0
  146. data/test/data/fastq/error_trunc_at_qual.fastq +19 -0
  147. data/test/data/fastq/error_trunc_at_seq.fastq +18 -0
  148. data/test/data/fastq/error_trunc_in_plus.fastq +19 -0
  149. data/test/data/fastq/error_trunc_in_qual.fastq +20 -0
  150. data/test/data/fastq/error_trunc_in_seq.fastq +18 -0
  151. data/test/data/fastq/error_trunc_in_title.fastq +17 -0
  152. data/test/data/fastq/illumina_full_range_as_illumina.fastq +8 -0
  153. data/test/data/fastq/illumina_full_range_as_sanger.fastq +8 -0
  154. data/test/data/fastq/illumina_full_range_as_solexa.fastq +8 -0
  155. data/test/data/fastq/illumina_full_range_original_illumina.fastq +8 -0
  156. data/test/data/fastq/longreads_as_illumina.fastq +40 -0
  157. data/test/data/fastq/longreads_as_sanger.fastq +40 -0
  158. data/test/data/fastq/longreads_as_solexa.fastq +40 -0
  159. data/test/data/fastq/longreads_original_sanger.fastq +120 -0
  160. data/test/data/fastq/misc_dna_as_illumina.fastq +16 -0
  161. data/test/data/fastq/misc_dna_as_sanger.fastq +16 -0
  162. data/test/data/fastq/misc_dna_as_solexa.fastq +16 -0
  163. data/test/data/fastq/misc_dna_original_sanger.fastq +16 -0
  164. data/test/data/fastq/misc_rna_as_illumina.fastq +16 -0
  165. data/test/data/fastq/misc_rna_as_sanger.fastq +16 -0
  166. data/test/data/fastq/misc_rna_as_solexa.fastq +16 -0
  167. data/test/data/fastq/misc_rna_original_sanger.fastq +16 -0
  168. data/test/data/fastq/sanger_full_range_as_illumina.fastq +8 -0
  169. data/test/data/fastq/sanger_full_range_as_sanger.fastq +8 -0
  170. data/test/data/fastq/sanger_full_range_as_solexa.fastq +8 -0
  171. data/test/data/fastq/sanger_full_range_original_sanger.fastq +8 -0
  172. data/test/data/fastq/solexa_full_range_as_illumina.fastq +8 -0
  173. data/test/data/fastq/solexa_full_range_as_sanger.fastq +8 -0
  174. data/test/data/fastq/solexa_full_range_as_solexa.fastq +8 -0
  175. data/test/data/fastq/solexa_full_range_original_solexa.fastq +8 -0
  176. data/test/data/fastq/wrapping_as_illumina.fastq +12 -0
  177. data/test/data/fastq/wrapping_as_sanger.fastq +12 -0
  178. data/test/data/fastq/wrapping_as_solexa.fastq +12 -0
  179. data/test/data/fastq/wrapping_original_sanger.fastq +24 -0
  180. data/test/data/meme/db +0 -0
  181. data/test/data/meme/mast +0 -0
  182. data/test/data/meme/mast.out +13 -0
  183. data/test/data/meme/meme.out +3 -0
  184. data/test/data/phyloxml/apaf.xml +666 -0
  185. data/test/data/phyloxml/bcl_2.xml +2097 -0
  186. data/test/data/phyloxml/made_up.xml +144 -0
  187. data/test/data/phyloxml/ncbi_taxonomy_mollusca_short.xml +65 -0
  188. data/test/data/phyloxml/phyloxml_examples.xml +415 -0
  189. data/test/data/sanger_chromatogram/test_chromatogram_abif.ab1 +0 -0
  190. data/test/data/sanger_chromatogram/test_chromatogram_scf_v2.scf +0 -0
  191. data/test/data/sanger_chromatogram/test_chromatogram_scf_v3.scf +0 -0
  192. data/test/functional/bio/appl/test_pts1.rb +7 -5
  193. data/test/functional/bio/io/test_ensembl.rb +4 -3
  194. data/test/functional/bio/io/test_pubmed.rb +9 -3
  195. data/test/functional/bio/io/test_soapwsdl.rb +5 -4
  196. data/test/functional/bio/io/test_togows.rb +5 -4
  197. data/test/functional/bio/sequence/test_output_embl.rb +6 -4
  198. data/test/functional/bio/test_command.rb +54 -5
  199. data/test/runner.rb +5 -3
  200. data/test/unit/bio/appl/bl2seq/test_report.rb +5 -4
  201. data/test/unit/bio/appl/blast/test_ncbioptions.rb +4 -2
  202. data/test/unit/bio/appl/blast/test_report.rb +5 -4
  203. data/test/unit/bio/appl/blast/test_rpsblast.rb +5 -4
  204. data/test/unit/bio/appl/gcg/test_msf.rb +5 -5
  205. data/test/unit/bio/appl/genscan/test_report.rb +8 -9
  206. data/test/unit/bio/appl/hmmer/test_report.rb +5 -4
  207. data/test/unit/bio/appl/iprscan/test_report.rb +6 -5
  208. data/test/unit/bio/appl/mafft/test_report.rb +6 -5
  209. data/test/unit/bio/appl/meme/mast/test_report.rb +46 -0
  210. data/test/unit/bio/appl/meme/test_mast.rb +103 -0
  211. data/test/unit/bio/appl/meme/test_motif.rb +38 -0
  212. data/test/unit/bio/appl/paml/codeml/test_rates.rb +5 -4
  213. data/test/unit/bio/appl/paml/codeml/test_report.rb +5 -4
  214. data/test/unit/bio/appl/paml/test_codeml.rb +5 -4
  215. data/test/unit/bio/appl/sim4/test_report.rb +5 -4
  216. data/test/unit/bio/appl/sosui/test_report.rb +6 -5
  217. data/test/unit/bio/appl/targetp/test_report.rb +5 -3
  218. data/test/unit/bio/appl/test_blast.rb +5 -4
  219. data/test/unit/bio/appl/test_fasta.rb +4 -2
  220. data/test/unit/bio/appl/test_pts1.rb +4 -2
  221. data/test/unit/bio/appl/tmhmm/test_report.rb +6 -5
  222. data/test/unit/bio/data/test_aa.rb +5 -3
  223. data/test/unit/bio/data/test_codontable.rb +5 -4
  224. data/test/unit/bio/data/test_na.rb +5 -3
  225. data/test/unit/bio/db/biosql/tc_biosql.rb +5 -1
  226. data/test/unit/bio/db/embl/test_common.rb +4 -2
  227. data/test/unit/bio/db/embl/test_embl.rb +6 -6
  228. data/test/unit/bio/db/embl/test_embl_rel89.rb +6 -6
  229. data/test/unit/bio/db/embl/test_embl_to_bioseq.rb +7 -8
  230. data/test/unit/bio/db/embl/test_sptr.rb +6 -8
  231. data/test/unit/bio/db/embl/test_uniprot.rb +6 -5
  232. data/test/unit/bio/db/fasta/test_format_qual.rb +346 -0
  233. data/test/unit/bio/db/kegg/test_compound.rb +146 -0
  234. data/test/unit/bio/db/kegg/test_drug.rb +194 -0
  235. data/test/unit/bio/db/kegg/test_enzyme.rb +241 -0
  236. data/test/unit/bio/db/kegg/test_genes.rb +32 -4
  237. data/test/unit/bio/db/kegg/test_glycan.rb +260 -0
  238. data/test/unit/bio/db/kegg/test_orthology.rb +50 -0
  239. data/test/unit/bio/db/kegg/test_reaction.rb +96 -0
  240. data/test/unit/bio/db/pdb/test_pdb.rb +4 -2
  241. data/test/unit/bio/db/sanger_chromatogram/test_abif.rb +76 -0
  242. data/test/unit/bio/db/sanger_chromatogram/test_scf.rb +98 -0
  243. data/test/unit/bio/db/test_aaindex.rb +6 -6
  244. data/test/unit/bio/db/test_fasta.rb +5 -46
  245. data/test/unit/bio/db/test_fastq.rb +829 -0
  246. data/test/unit/bio/db/test_gff.rb +4 -2
  247. data/test/unit/bio/db/test_lasergene.rb +7 -5
  248. data/test/unit/bio/db/test_medline.rb +4 -2
  249. data/test/unit/bio/db/test_newick.rb +6 -6
  250. data/test/unit/bio/db/test_nexus.rb +4 -2
  251. data/test/unit/bio/db/test_phyloxml.rb +769 -0
  252. data/test/unit/bio/db/test_phyloxml_writer.rb +328 -0
  253. data/test/unit/bio/db/test_prosite.rb +6 -5
  254. data/test/unit/bio/db/test_qual.rb +63 -0
  255. data/test/unit/bio/db/test_rebase.rb +5 -3
  256. data/test/unit/bio/db/test_soft.rb +7 -6
  257. data/test/unit/bio/io/flatfile/test_autodetection.rb +6 -7
  258. data/test/unit/bio/io/flatfile/test_buffer.rb +6 -5
  259. data/test/unit/bio/io/flatfile/test_splitter.rb +4 -4
  260. data/test/unit/bio/io/test_ddbjxml.rb +4 -3
  261. data/test/unit/bio/io/test_ensembl.rb +5 -3
  262. data/test/unit/bio/io/test_fastacmd.rb +4 -3
  263. data/test/unit/bio/io/test_flatfile.rb +6 -5
  264. data/test/unit/bio/io/test_soapwsdl.rb +4 -3
  265. data/test/unit/bio/io/test_togows.rb +4 -2
  266. data/test/unit/bio/sequence/test_aa.rb +5 -3
  267. data/test/unit/bio/sequence/test_common.rb +4 -2
  268. data/test/unit/bio/sequence/test_compat.rb +4 -2
  269. data/test/unit/bio/sequence/test_dblink.rb +5 -3
  270. data/test/unit/bio/sequence/test_na.rb +4 -2
  271. data/test/unit/bio/sequence/test_quality_score.rb +330 -0
  272. data/test/unit/bio/shell/plugin/test_seq.rb +5 -3
  273. data/test/unit/bio/test_alignment.rb +5 -3
  274. data/test/unit/bio/test_command.rb +4 -3
  275. data/test/unit/bio/test_db.rb +5 -3
  276. data/test/unit/bio/test_feature.rb +4 -2
  277. data/test/unit/bio/test_location.rb +4 -2
  278. data/test/unit/bio/test_map.rb +5 -3
  279. data/test/unit/bio/test_pathway.rb +4 -2
  280. data/test/unit/bio/test_reference.rb +4 -2
  281. data/test/unit/bio/test_sequence.rb +5 -3
  282. data/test/unit/bio/test_shell.rb +5 -3
  283. data/test/unit/bio/test_tree.rb +6 -6
  284. data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +4 -2
  285. data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +4 -2
  286. data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +4 -2
  287. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +4 -2
  288. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +4 -2
  289. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +4 -2
  290. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +4 -2
  291. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +4 -2
  292. data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +4 -2
  293. data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +4 -2
  294. data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +4 -2
  295. data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +4 -2
  296. data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +17 -13
  297. data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +17 -13
  298. data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +4 -2
  299. data/test/unit/bio/util/test_color_scheme.rb +5 -3
  300. data/test/unit/bio/util/test_contingency_table.rb +5 -3
  301. data/test/unit/bio/util/test_restriction_enzyme.rb +4 -2
  302. data/test/unit/bio/util/test_sirna.rb +6 -4
  303. metadata +147 -2
@@ -7,10 +7,69 @@
7
7
  # $Id:$
8
8
  #
9
9
 
10
+ require 'thread'
10
11
  require 'bio/command'
12
+ require 'bio/version'
11
13
 
12
14
  module Bio
13
15
 
16
+ class NCBI
17
+
18
+ autoload :SOAP, 'bio/io/ncbisoap'
19
+
20
+ # (Hash) Default parameters for Entrez (eUtils).
21
+ # They may also be used for other NCBI services.
22
+ ENTREZ_DEFAULT_PARAMETERS = {
23
+ 'tool' => "#{$0} (bioruby/#{Bio::BIORUBY_VERSION_ID})",
24
+ 'email' => nil,
25
+ }
26
+
27
+ # Resets Entrez (eUtils) default parameters.
28
+ # ---
29
+ # *Returns*:: (Hash) default parameters
30
+ def self.reset_entrez_default_parameters
31
+ h = {
32
+ 'tool' => "#{$0} (bioruby/#{Bio::BIORUBY_VERSION_ID})",
33
+ 'email' => nil,
34
+ }
35
+ ENTREZ_DEFAULT_PARAMETERS.clear
36
+ ENTREZ_DEFAULT_PARAMETERS.update(h)
37
+ end
38
+
39
+ # Gets default email address for Entrez (eUtils).
40
+ # ---
41
+ # *Returns*:: String or nil
42
+ def self.default_email
43
+ ENTREZ_DEFAULT_PARAMETERS['email']
44
+ end
45
+
46
+ # Sets default email address used for Entrez (eUtils).
47
+ # It may also be used for other NCBI services.
48
+ # ---
49
+ # *Arguments*:
50
+ # * (required) _str_: (String) email address
51
+ # *Returns*:: same as given argument
52
+ def self.default_email=(str)
53
+ ENTREZ_DEFAULT_PARAMETERS['email'] = str
54
+ end
55
+
56
+ # Gets default tool name for Entrez (eUtils).
57
+ # ---
58
+ # *Returns*:: String or nil
59
+ def self.default_tool
60
+ ENTREZ_DEFAULT_PARAMETERS['tool']
61
+ end
62
+
63
+ # Sets default tool name for Entrez (eUtils).
64
+ # It may also be used for other NCBI services.
65
+ # ---
66
+ # *Arguments*:
67
+ # * (required) _str_: (String) tool name
68
+ # *Returns*:: same as given argument
69
+ def self.default_tool=(str)
70
+ ENTREZ_DEFAULT_PARAMETERS['tool'] = str
71
+ end
72
+
14
73
  # == Description
15
74
  #
16
75
  # The Bio::NCBI::REST class provides REST client for the NCBI E-Utilities
@@ -19,29 +78,81 @@ module Bio
19
78
  #
20
79
  # * http://www.ncbi.nlm.nih.gov/entrez/utils/utils_index.html
21
80
  #
22
- class NCBI
23
81
  class REST
24
82
 
25
83
  # Run retrieval scripts on weekends or between 9 pm and 5 am Eastern Time
26
84
  # weekdays for any series of more than 100 requests.
27
85
  # -> Not implemented yet in BioRuby
28
-
29
- # Make no more than one request every 1 seconds.
30
- # (NCBI's restriction is "Make no more than 3 requests every 1 second.",
31
- # but limited to 1/sec partly because of keeping the value in integer.)
32
- NCBI_INTERVAL = 1
86
+ #
87
+ # Wait for 1/3 seconds.
88
+ # NCBI's restriction is: "Make no more than 3 requests every 1 second.".
89
+ NCBI_INTERVAL = 1.0 / 3.0
33
90
  @@last_access = nil
91
+ @@last_access_mutex = nil
34
92
 
35
93
  private
36
94
 
95
+ # (Private) Sleeps until allowed to access.
96
+ # ---
97
+ # *Arguments*:
98
+ # * (required) _wait_: wait unit time
99
+ # *Returns*:: (undefined)
37
100
  def ncbi_access_wait(wait = NCBI_INTERVAL)
38
- if @@last_access
39
- duration = Time.now - @@last_access
40
- if wait > duration
41
- sleep wait - duration
101
+ @@last_access_mutex ||= Mutex.new
102
+ @@last_access_mutex.synchronize {
103
+ if @@last_access
104
+ duration = Time.now - @@last_access
105
+ if wait > duration
106
+ sleep wait - duration
107
+ end
42
108
  end
109
+ @@last_access = Time.now
110
+ }
111
+ nil
112
+ end
113
+
114
+ # (Private) default parameters
115
+ # ---
116
+ # *Returns*:: Hash
117
+ def default_parameters
118
+ Bio::NCBI::ENTREZ_DEFAULT_PARAMETERS
119
+ end
120
+
121
+ # (Private) Sends query to NCBI.
122
+ # ---
123
+ # *Arguments*:
124
+ # * (required) _serv_: (String) server URI string
125
+ # * (required) _opts_: (Hash) parameters
126
+ # *Returns*:: nil
127
+ def ncbi_post_form(serv, opts)
128
+ ncbi_check_parameters(opts)
129
+ ncbi_access_wait
130
+ response = Bio::Command.post_form(serv, opts)
131
+ response
132
+ end
133
+
134
+ # (Private) Checks parameters as NCBI requires.
135
+ # If no email or tool parameter, raises an error.
136
+ #
137
+ # NCBI announces that "Effective on
138
+ # June 1, 2010, all E-utility requests, either using standard URLs or
139
+ # SOAP, must contain non-null values for both the &tool and &email
140
+ # parameters. Any E-utility request made after June 1, 2010 that does
141
+ # not contain values for both parameters will return an error explaining
142
+ # that these parameters must be included in E-utility requests."
143
+ # ---
144
+ # *Arguments*:
145
+ # * (required) _opts_: Hash containing parameters
146
+ # *Returns*:: (undefined)
147
+ def ncbi_check_parameters(opts)
148
+ #return if Time.now < Time.gm(2010,5,31)
149
+ if opts['email'].to_s.empty? then
150
+ raise 'Set email parameter for the query, or set Bio::NCBI.default_email = "(your email address)"'
151
+ end
152
+ if opts['tool'].to_s.empty? then
153
+ raise 'Set tool parameter for the query, or set Bio::NCBI.default_tool = "(your tool name)"'
43
154
  end
44
- @@last_access = Time.now
155
+ nil
45
156
  end
46
157
 
47
158
  public
@@ -67,8 +178,8 @@ class REST
67
178
  # *Returns*:: array of string (database names)
68
179
  def einfo
69
180
  serv = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/einfo.fcgi"
70
- opts = {}
71
- response = Bio::Command.post_form(serv, opts)
181
+ opts = default_parameters.merge({})
182
+ response = ncbi_post_form(serv, opts)
72
183
  result = response.body
73
184
  list = result.scan(/<DbName>(.*?)<\/DbName>/m).flatten
74
185
  return list
@@ -134,10 +245,7 @@ class REST
134
245
  # *Returns*:: array of entry IDs or a number of results
135
246
  def esearch(str, hash = {}, limit = nil, step = 10000)
136
247
  serv = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
137
- opts = {
138
- "tool" => "bioruby",
139
- "term" => str,
140
- }
248
+ opts = default_parameters.merge({ "term" => str })
141
249
  opts.update(hash)
142
250
 
143
251
  case opts["rettype"]
@@ -156,8 +264,7 @@ class REST
156
264
  0.step(limit, step) do |i|
157
265
  retmax = [step, limit - i].min
158
266
  opts.update("retmax" => retmax, "retstart" => i + retstart)
159
- ncbi_access_wait
160
- response = Bio::Command.post_form(serv, opts)
267
+ response = ncbi_post_form(serv, opts)
161
268
  result = response.body
162
269
  list += result.scan(/<Id>(.*?)<\/Id>/m).flatten
163
270
  end
@@ -169,14 +276,10 @@ class REST
169
276
  # *Returns*:: array of entry IDs or a number of results
170
277
  def esearch_count(str, hash = {})
171
278
  serv = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
172
- opts = {
173
- "tool" => "bioruby",
174
- "term" => str,
175
- }
279
+ opts = default_parameters.merge({ "term" => str })
176
280
  opts.update(hash)
177
281
  opts.update("rettype" => "count")
178
- #ncbi_access_wait
179
- response = Bio::Command.post_form(serv, opts)
282
+ response = ncbi_post_form(serv, opts)
180
283
  result = response.body
181
284
  count = result.scan(/<Count>(.*?)<\/Count>/m).flatten.first.to_i
182
285
  return count
@@ -211,10 +314,7 @@ class REST
211
314
  # *Returns*:: String
212
315
  def efetch(ids, hash = {}, step = 100)
213
316
  serv = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
214
- opts = {
215
- "tool" => "bioruby",
216
- "retmode" => "text",
217
- }
317
+ opts = default_parameters.merge({ "retmode" => "text" })
218
318
  opts.update(hash)
219
319
 
220
320
  case ids
@@ -228,8 +328,7 @@ class REST
228
328
  0.step(list.size, step) do |i|
229
329
  opts["id"] = list[i, step].join(',')
230
330
  unless opts["id"].empty?
231
- ncbi_access_wait
232
- response = Bio::Command.post_form(serv, opts)
331
+ response = ncbi_post_form(serv, opts)
233
332
  result += response.body
234
333
  end
235
334
  end
@@ -637,104 +736,3 @@ end # REST
637
736
  end # NCBI
638
737
  end # Bio
639
738
 
640
-
641
- if __FILE__ == $0
642
-
643
- gbopts = {"db"=>"nuccore", "rettype"=>"gb"}
644
- pmopts = {"db"=>"pubmed", "rettype"=>"medline"}
645
- count = {"rettype" => "count"}
646
- xml = {"retmode"=>"xml"}
647
- max = {"retmax"=>5}
648
-
649
- puts "=== class methods ==="
650
-
651
- puts "--- Search NCBI by E-Utils ---"
652
-
653
- puts Time.now
654
- puts "# count of 'tardigrada' in nuccore"
655
- puts Bio::NCBI::REST.esearch("tardigrada", gbopts.merge(count))
656
-
657
- puts Time.now
658
- puts "# max 5 'tardigrada' entries in nuccore"
659
- puts Bio::NCBI::REST.esearch("tardigrada", gbopts.merge(max))
660
-
661
- puts Time.now
662
- puts "# count of 'yeast kinase' in nuccore"
663
- puts Bio::NCBI::REST.esearch("yeast kinase", gbopts.merge(count))
664
-
665
- puts Time.now
666
- puts "# max 5 'yeast kinase' entries in nuccore (XML)"
667
- puts Bio::NCBI::REST.esearch("yeast kinase", gbopts.merge(xml).merge(max))
668
-
669
- puts Time.now
670
- puts "# count of 'genome&analysis|bioinformatics' in pubmed"
671
- puts Bio::NCBI::REST.esearch("(genome AND analysis) OR bioinformatics", pmopts.merge(count))
672
-
673
- puts Time.now
674
- puts "# max 5 'genome&analysis|bioinformatics' entries in pubmed (XML)"
675
- puts Bio::NCBI::REST.esearch("(genome AND analysis) OR bioinformatics", pmopts.merge(xml).merge(max))
676
-
677
- puts Time.now
678
- Bio::NCBI::REST.esearch("(genome AND analysis) OR bioinformatics", pmopts.merge(max)).each do |x|
679
- puts "# each of 5 'genome&analysis|bioinformatics' entries in pubmed"
680
- puts x
681
- end
682
-
683
- puts "--- Retrieve NCBI entry by E-Utils ---"
684
-
685
- puts Time.now
686
- puts "# '185041' entry in nuccore"
687
- puts Bio::NCBI::REST.efetch("185041", gbopts)
688
-
689
- puts Time.now
690
- puts "# 'J00231' entry in nuccore (XML)"
691
- puts Bio::NCBI::REST.efetch("J00231", gbopts.merge(xml))
692
-
693
- puts Time.now
694
- puts "# 16381885 entry in pubmed"
695
- puts Bio::NCBI::REST.efetch(16381885, pmopts)
696
-
697
- puts Time.now
698
- puts "# '16381885' entry in pubmed"
699
- puts Bio::NCBI::REST.efetch("16381885", pmopts)
700
-
701
- puts Time.now
702
- puts "# [10592173,14693808] entries in pubmed"
703
- puts Bio::NCBI::REST.efetch([10592173, 14693808], pmopts)
704
-
705
- puts Time.now
706
- puts "# [10592173,14693808] entries in pubmed (XML)"
707
- puts Bio::NCBI::REST.efetch([10592173, 14693808], pmopts.merge(xml))
708
-
709
-
710
- puts "=== instance methods ==="
711
-
712
- ncbi = Bio::NCBI::REST.new
713
-
714
- puts "--- Search NCBI by E-Utils ---"
715
-
716
- puts Time.now
717
- puts "# count of 'genome&analysis|bioinformatics' in pubmed"
718
- puts ncbi.esearch("(genome AND analysis) OR bioinformatics", pmopts.merge(count))
719
-
720
- puts Time.now
721
- puts "# max 5 'genome&analysis|bioinformatics' entries in pubmed"
722
- puts ncbi.esearch("(genome AND analysis) OR bioinformatics", pmopts.merge(max))
723
-
724
- puts Time.now
725
- ncbi.esearch("(genome AND analysis) OR bioinformatics", pmopts).each do |x|
726
- puts "# each 'genome&analysis|bioinformatics' entries in pubmed"
727
- puts x
728
- end
729
-
730
- puts "--- Retrieve NCBI entry by E-Utils ---"
731
-
732
- puts Time.now
733
- puts "# 16381885 entry in pubmed"
734
- puts ncbi.efetch(16381885, pmopts)
735
-
736
- puts Time.now
737
- puts "# [10592173,14693808] entries in pubmed"
738
- puts ncbi.efetch([10592173, 14693808], pmopts)
739
-
740
- end
@@ -5,9 +5,10 @@
5
5
  # Toshiaki Katayama <k@bioruby.org>
6
6
  # License:: The Ruby License
7
7
  #
8
- # $Id: ncbisoap.rb,v 1.3 2007/04/05 23:35:41 trevor Exp $
8
+ # $Id:$
9
9
  #
10
10
 
11
+ require 'bio/io/ncbirest'
11
12
  require 'bio/io/soapwsdl'
12
13
 
13
14
  module Bio
@@ -221,91 +221,3 @@ end # PubMed
221
221
 
222
222
  end # Bio
223
223
 
224
-
225
- if __FILE__ == $0
226
-
227
- puts "=== instance methods ==="
228
-
229
- pubmed = Bio::PubMed.new
230
-
231
- puts "--- Search PubMed by E-Utils ---"
232
- opts = {"rettype" => "count"}
233
- puts Time.now
234
- puts pubmed.esearch("(genome AND analysis) OR bioinformatics", opts)
235
- puts Time.now
236
- puts pubmed.esearch("(genome AND analysis) OR bioinformatics", opts)
237
- puts Time.now
238
- puts pubmed.esearch("(genome AND analysis) OR bioinformatics", opts)
239
- puts Time.now
240
- pubmed.esearch("(genome AND analysis) OR bioinformatics").each do |x|
241
- puts x
242
- end
243
-
244
- puts "--- Retrieve PubMed entry by E-Utils ---"
245
- puts Time.now
246
- puts pubmed.efetch(16381885)
247
- puts Time.now
248
- puts pubmed.efetch("16381885")
249
- puts Time.now
250
- puts pubmed.efetch("16381885")
251
- puts Time.now
252
- opts = {"retmode" => "xml"}
253
- puts pubmed.efetch([10592173, 14693808], opts)
254
- puts Time.now
255
- puts pubmed.efetch(["10592173", "14693808"], opts)
256
-
257
- puts "--- Search PubMed by Entrez CGI ---"
258
- pubmed.search("(genome AND analysis) OR bioinformatics").each do |x|
259
- p x
260
- end
261
-
262
- puts "--- Retrieve PubMed entry by Entrez CGI ---"
263
- puts pubmed.query("16381885")
264
-
265
-
266
- puts "--- Retrieve PubMed entry by PMfetch ---"
267
- puts pubmed.pmfetch("16381885")
268
-
269
-
270
- puts "=== class methods ==="
271
-
272
-
273
- puts "--- Search PubMed by E-Utils ---"
274
- opts = {"rettype" => "count"}
275
- puts Time.now
276
- puts Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics", opts)
277
- puts Time.now
278
- puts Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics", opts)
279
- puts Time.now
280
- puts Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics", opts)
281
- puts Time.now
282
- Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics").each do |x|
283
- puts x
284
- end
285
-
286
- puts "--- Retrieve PubMed entry by E-Utils ---"
287
- puts Time.now
288
- puts Bio::PubMed.efetch(16381885)
289
- puts Time.now
290
- puts Bio::PubMed.efetch("16381885")
291
- puts Time.now
292
- puts Bio::PubMed.efetch("16381885")
293
- puts Time.now
294
- opts = {"retmode" => "xml"}
295
- puts Bio::PubMed.efetch([10592173, 14693808], opts)
296
- puts Time.now
297
- puts Bio::PubMed.efetch(["10592173", "14693808"], opts)
298
-
299
- puts "--- Search PubMed by Entrez CGI ---"
300
- Bio::PubMed.search("(genome AND analysis) OR bioinformatics").each do |x|
301
- p x
302
- end
303
-
304
- puts "--- Retrieve PubMed entry by Entrez CGI ---"
305
- puts Bio::PubMed.query("16381885")
306
-
307
-
308
- puts "--- Retrieve PubMed entry by PMfetch ---"
309
- puts Bio::PubMed.pmfetch("16381885")
310
-
311
- end
@@ -792,76 +792,3 @@ end # Bio
792
792
  # * [RATDYIIAAB] replace(1306..1443,"aagaacatccacggagtcagaactgggctcttcacgccggatttggcgttcgaggccattgtgaaaaagcaggcaatgcaccagcaagctcagttcctacccctgcgtggacctggttatccaggagctaatcagtacagttaggtggtcaagctgaaagagccctgtctgaaa")
793
793
  #
794
794
 
795
- if __FILE__ == $0
796
- puts "Test new & span methods"
797
- [
798
- '450',
799
- '500..600',
800
- 'join(500..550, 600..625)',
801
- 'complement(join(500..550, 600..625))',
802
- 'join(complement(500..550), 600..625)',
803
- '754^755',
804
- 'complement(53^54)',
805
- 'replace(4792^4793,"a")',
806
- 'replace(1905^1906,"acaaagacaccgccctacgcc")',
807
- '157..(800.806)',
808
- '(67.68)..(699.703)',
809
- '(45934.45974)..46135',
810
- '<180..(731.761)',
811
- '(88.89)..>1122',
812
- 'complement((1700.1708)..(1715.1721))',
813
- 'complement(<22..(255.275))',
814
- 'complement((64.74)..1525)',
815
- 'join((8298.8300)..10206,1..855)',
816
- 'replace((651.655)..(651.655),"")',
817
- 'one-of(898,900)..983',
818
- 'one-of(5971..6308,5971..6309)',
819
- '8050..one-of(10731,10758,10905,11242)',
820
- 'one-of(623,627,632)..one-of(628,633,637)',
821
- 'one-of(845,953,963,1078,1104)..1354',
822
- 'join(2035..2050,complement(1775..1818),13..345,414..992,1232..1253,1024..1157)',
823
- 'join(complement(1..61),complement(AP000007.1:252907..253505))',
824
- 'complement(join(71606..71829,75327..75446,76039..76203))',
825
- 'order(3..26,complement(964..987))',
826
- 'order(L44135.1:(454.445)..>538,<1..181)',
827
- '<200001..<318389',
828
- ].each do |pos|
829
- p pos
830
- # p Bio::Locations.new(pos)
831
- # p Bio::Locations.new(pos).span
832
- # p Bio::Locations.new(pos).range
833
- Bio::Locations.new(pos).each do |location|
834
- puts "class=" + location.class.to_s
835
- puts "start=" + location.from.to_s + "\tend=" + location.to.to_s + "\tstrand=" + location.strand.to_s
836
- end
837
-
838
- end
839
-
840
- puts "Test rel2abs/abs2rel method"
841
- [
842
- '6..15',
843
- 'join(6..10,16..30)',
844
- 'complement(join(6..10,16..30))',
845
- 'join(complement(6..10),complement(16..30))',
846
- 'join(6..10,complement(16..30))',
847
- ].each do |pos|
848
- loc = Bio::Locations.new(pos)
849
- p pos
850
- # p loc
851
- (1..21).each do |x|
852
- print "absolute(#{x}) #=> ", y = loc.absolute(x), "\n"
853
- print "relative(#{y}) #=> ", y ? loc.relative(y) : y, "\n"
854
- print "absolute(#{x}, :aa) #=> ", y = loc.absolute(x, :aa), "\n"
855
- print "relative(#{y}, :aa) #=> ", y ? loc.relative(y, :aa) : y, "\n"
856
- end
857
- end
858
-
859
- pos = 'join(complement(6..10),complement(16..30))'
860
- loc = Bio::Locations.new(pos)
861
- print "pos : "; p pos
862
- print "`- loc[1] : "; p loc[1]
863
- print " `- range : "; p loc[1].range
864
-
865
- puts Bio::Location.new('5').<=>(Bio::Location.new('3'))
866
- end
867
-