bio 1.2.1 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (259) hide show
  1. data/ChangeLog +3421 -0
  2. data/KNOWN_ISSUES.rdoc +88 -0
  3. data/README.rdoc +252 -0
  4. data/README_DEV.rdoc +285 -0
  5. data/Rakefile +143 -0
  6. data/bin/bioruby +0 -0
  7. data/bin/br_biofetch.rb +0 -0
  8. data/bin/br_bioflat.rb +12 -1
  9. data/bin/br_biogetseq.rb +0 -0
  10. data/bin/br_pmfetch.rb +4 -3
  11. data/bioruby.gemspec +477 -0
  12. data/bioruby.gemspec.erb +117 -0
  13. data/doc/Changes-0.7.rd +7 -0
  14. data/doc/Changes-1.3.rdoc +239 -0
  15. data/doc/Tutorial.rd +296 -184
  16. data/doc/Tutorial.rd.html +1031 -0
  17. data/doc/Tutorial.rd.ja +111 -45
  18. data/doc/Tutorial.rd.ja.html +2225 -0
  19. data/doc/bioruby.css +281 -0
  20. data/extconf.rb +2 -0
  21. data/lib/bio.rb +29 -4
  22. data/lib/bio/appl/blast.rb +306 -121
  23. data/lib/bio/appl/blast/ddbj.rb +142 -0
  24. data/lib/bio/appl/blast/format0.rb +35 -25
  25. data/lib/bio/appl/blast/format8.rb +2 -2
  26. data/lib/bio/appl/blast/genomenet.rb +263 -0
  27. data/lib/bio/appl/blast/ncbioptions.rb +220 -0
  28. data/lib/bio/appl/blast/remote.rb +106 -0
  29. data/lib/bio/appl/blast/report.rb +260 -9
  30. data/lib/bio/appl/blast/rexml.rb +12 -5
  31. data/lib/bio/appl/blast/rpsblast.rb +277 -0
  32. data/lib/bio/appl/blast/wublast.rb +133 -12
  33. data/lib/bio/appl/blast/xmlparser.rb +35 -18
  34. data/lib/bio/appl/blat/report.rb +46 -5
  35. data/lib/bio/appl/emboss.rb +62 -13
  36. data/lib/bio/appl/fasta.rb +9 -11
  37. data/lib/bio/appl/genscan/report.rb +3 -3
  38. data/lib/bio/appl/hmmer.rb +1 -1
  39. data/lib/bio/appl/hmmer/report.rb +10 -10
  40. data/lib/bio/appl/paml/baseml.rb +95 -0
  41. data/lib/bio/appl/paml/baseml/report.rb +32 -0
  42. data/lib/bio/appl/paml/codeml.rb +242 -0
  43. data/lib/bio/appl/paml/codeml/rates.rb +67 -0
  44. data/lib/bio/appl/paml/codeml/report.rb +67 -0
  45. data/lib/bio/appl/paml/common.rb +348 -0
  46. data/lib/bio/appl/paml/common_report.rb +38 -0
  47. data/lib/bio/appl/paml/yn00.rb +103 -0
  48. data/lib/bio/appl/paml/yn00/report.rb +32 -0
  49. data/lib/bio/appl/psort.rb +2 -2
  50. data/lib/bio/appl/pts1.rb +5 -5
  51. data/lib/bio/appl/tmhmm/report.rb +10 -1
  52. data/lib/bio/command.rb +297 -41
  53. data/lib/bio/compat/features.rb +157 -0
  54. data/lib/bio/compat/references.rb +128 -0
  55. data/lib/bio/db/biosql/biosql_to_biosequence.rb +67 -0
  56. data/lib/bio/db/biosql/sequence.rb +508 -0
  57. data/lib/bio/db/embl/common.rb +28 -12
  58. data/lib/bio/db/embl/embl.rb +107 -9
  59. data/lib/bio/db/embl/embl_to_biosequence.rb +85 -0
  60. data/lib/bio/db/embl/format_embl.rb +190 -0
  61. data/lib/bio/db/embl/sptr.rb +15 -16
  62. data/lib/bio/db/fantom.rb +6 -8
  63. data/lib/bio/db/fasta.rb +10 -507
  64. data/lib/bio/db/fasta/defline.rb +532 -0
  65. data/lib/bio/db/fasta/fasta_to_biosequence.rb +63 -0
  66. data/lib/bio/db/fasta/format_fasta.rb +97 -0
  67. data/lib/bio/db/genbank/common.rb +25 -8
  68. data/lib/bio/db/genbank/format_genbank.rb +187 -0
  69. data/lib/bio/db/genbank/genbank.rb +36 -1
  70. data/lib/bio/db/genbank/genbank_to_biosequence.rb +86 -0
  71. data/lib/bio/db/gff.rb +1791 -119
  72. data/lib/bio/db/kegg/glycan.rb +2 -6
  73. data/lib/bio/db/lasergene.rb +3 -3
  74. data/lib/bio/db/medline.rb +4 -1
  75. data/lib/bio/db/newick.rb +10 -10
  76. data/lib/bio/db/pdb/chain.rb +6 -2
  77. data/lib/bio/db/pdb/pdb.rb +12 -3
  78. data/lib/bio/db/rebase.rb +7 -8
  79. data/lib/bio/db/soft.rb +3 -3
  80. data/lib/bio/feature.rb +1 -88
  81. data/lib/bio/io/biosql/biodatabase.rb +64 -0
  82. data/lib/bio/io/biosql/bioentry.rb +29 -0
  83. data/lib/bio/io/biosql/bioentry_dbxref.rb +11 -0
  84. data/lib/bio/io/biosql/bioentry_path.rb +12 -0
  85. data/lib/bio/io/biosql/bioentry_qualifier_value.rb +10 -0
  86. data/lib/bio/io/biosql/bioentry_reference.rb +10 -0
  87. data/lib/bio/io/biosql/bioentry_relationship.rb +10 -0
  88. data/lib/bio/io/biosql/biosequence.rb +11 -0
  89. data/lib/bio/io/biosql/comment.rb +7 -0
  90. data/lib/bio/io/biosql/config/database.yml +20 -0
  91. data/lib/bio/io/biosql/dbxref.rb +13 -0
  92. data/lib/bio/io/biosql/dbxref_qualifier_value.rb +12 -0
  93. data/lib/bio/io/biosql/location.rb +32 -0
  94. data/lib/bio/io/biosql/location_qualifier_value.rb +11 -0
  95. data/lib/bio/io/biosql/ontology.rb +10 -0
  96. data/lib/bio/io/biosql/reference.rb +9 -0
  97. data/lib/bio/io/biosql/seqfeature.rb +32 -0
  98. data/lib/bio/io/biosql/seqfeature_dbxref.rb +11 -0
  99. data/lib/bio/io/biosql/seqfeature_path.rb +11 -0
  100. data/lib/bio/io/biosql/seqfeature_qualifier_value.rb +20 -0
  101. data/lib/bio/io/biosql/seqfeature_relationship.rb +11 -0
  102. data/lib/bio/io/biosql/taxon.rb +12 -0
  103. data/lib/bio/io/biosql/taxon_name.rb +9 -0
  104. data/lib/bio/io/biosql/term.rb +27 -0
  105. data/lib/bio/io/biosql/term_dbxref.rb +11 -0
  106. data/lib/bio/io/biosql/term_path.rb +12 -0
  107. data/lib/bio/io/biosql/term_relationship.rb +13 -0
  108. data/lib/bio/io/biosql/term_relationship_term.rb +11 -0
  109. data/lib/bio/io/biosql/term_synonym.rb +10 -0
  110. data/lib/bio/io/das.rb +7 -7
  111. data/lib/bio/io/ddbjxml.rb +57 -0
  112. data/lib/bio/io/ensembl.rb +2 -2
  113. data/lib/bio/io/fetch.rb +28 -14
  114. data/lib/bio/io/flatfile.rb +17 -853
  115. data/lib/bio/io/flatfile/autodetection.rb +545 -0
  116. data/lib/bio/io/flatfile/buffer.rb +237 -0
  117. data/lib/bio/io/flatfile/index.rb +17 -7
  118. data/lib/bio/io/flatfile/indexer.rb +30 -12
  119. data/lib/bio/io/flatfile/splitter.rb +297 -0
  120. data/lib/bio/io/hinv.rb +442 -0
  121. data/lib/bio/io/keggapi.rb +2 -2
  122. data/lib/bio/io/ncbirest.rb +733 -0
  123. data/lib/bio/io/pubmed.rb +34 -80
  124. data/lib/bio/io/registry.rb +2 -2
  125. data/lib/bio/io/sql.rb +178 -357
  126. data/lib/bio/io/togows.rb +458 -0
  127. data/lib/bio/location.rb +106 -11
  128. data/lib/bio/pathway.rb +120 -14
  129. data/lib/bio/reference.rb +115 -101
  130. data/lib/bio/sequence.rb +164 -183
  131. data/lib/bio/sequence/adapter.rb +108 -0
  132. data/lib/bio/sequence/common.rb +22 -45
  133. data/lib/bio/sequence/compat.rb +2 -2
  134. data/lib/bio/sequence/dblink.rb +54 -0
  135. data/lib/bio/sequence/format.rb +254 -77
  136. data/lib/bio/sequence/format_raw.rb +23 -0
  137. data/lib/bio/shell.rb +3 -1
  138. data/lib/bio/shell/core.rb +2 -2
  139. data/lib/bio/shell/plugin/entry.rb +33 -4
  140. data/lib/bio/shell/plugin/ncbirest.rb +64 -0
  141. data/lib/bio/shell/plugin/togows.rb +40 -0
  142. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/bioruby_generator.rb +0 -0
  143. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_classes.rhtml +0 -0
  144. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_log.rhtml +0 -0
  145. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_methods.rhtml +0 -0
  146. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_modules.rhtml +0 -0
  147. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_variables.rhtml +0 -0
  148. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-bg.gif +0 -0
  149. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-gem.png +0 -0
  150. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-link.gif +0 -0
  151. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.css +0 -0
  152. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.rhtml +0 -0
  153. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_controller.rb +0 -0
  154. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_helper.rb +0 -0
  155. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/commands.rhtml +0 -0
  156. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/history.rhtml +0 -0
  157. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/index.rhtml +0 -0
  158. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/spinner.gif +0 -0
  159. data/lib/bio/tree.rb +4 -2
  160. data/lib/bio/util/color_scheme.rb +2 -2
  161. data/lib/bio/util/contingency_table.rb +2 -2
  162. data/lib/bio/util/restriction_enzyme.rb +2 -2
  163. data/lib/bio/util/restriction_enzyme/single_strand.rb +6 -5
  164. data/lib/bio/version.rb +25 -0
  165. data/rdoc.zsh +8 -0
  166. data/sample/any2fasta.rb +0 -0
  167. data/sample/biofetch.rb +0 -0
  168. data/sample/dbget +0 -0
  169. data/sample/demo_sequence.rb +158 -0
  170. data/sample/enzymes.rb +0 -0
  171. data/sample/fasta2tab.rb +0 -0
  172. data/sample/fastagrep.rb +72 -0
  173. data/sample/fastasort.rb +54 -0
  174. data/sample/fsplit.rb +0 -0
  175. data/sample/gb2fasta.rb +2 -3
  176. data/sample/gb2tab.rb +0 -0
  177. data/sample/gbtab2mysql.rb +0 -0
  178. data/sample/genes2nuc.rb +0 -0
  179. data/sample/genes2pep.rb +0 -0
  180. data/sample/genes2tab.rb +0 -0
  181. data/sample/genome2rb.rb +0 -0
  182. data/sample/genome2tab.rb +0 -0
  183. data/sample/goslim.rb +0 -0
  184. data/sample/gt2fasta.rb +0 -0
  185. data/sample/na2aa.rb +34 -0
  186. data/sample/pmfetch.rb +0 -0
  187. data/sample/pmsearch.rb +0 -0
  188. data/sample/ssearch2tab.rb +0 -0
  189. data/sample/tfastx2tab.rb +0 -0
  190. data/sample/vs-genes.rb +0 -0
  191. data/setup.rb +1596 -0
  192. data/test/data/blast/blastp-multi.m7 +188 -0
  193. data/test/data/command/echoarg2.bat +1 -0
  194. data/test/data/paml/codeml/control_file.txt +30 -0
  195. data/test/data/paml/codeml/output.txt +78 -0
  196. data/test/data/paml/codeml/rates +217 -0
  197. data/test/data/rpsblast/misc.rpsblast +193 -0
  198. data/test/data/soft/GDS100_partial.soft +0 -0
  199. data/test/data/soft/GSE3457_family_partial.soft +0 -0
  200. data/test/functional/bio/appl/test_pts1.rb +115 -0
  201. data/test/functional/bio/io/test_ensembl.rb +123 -80
  202. data/test/functional/bio/io/test_togows.rb +267 -0
  203. data/test/functional/bio/sequence/test_output_embl.rb +51 -0
  204. data/test/functional/bio/test_command.rb +301 -0
  205. data/test/runner.rb +17 -1
  206. data/test/unit/bio/appl/blast/test_ncbioptions.rb +112 -0
  207. data/test/unit/bio/appl/blast/test_report.rb +753 -35
  208. data/test/unit/bio/appl/blast/test_rpsblast.rb +398 -0
  209. data/test/unit/bio/appl/paml/codeml/test_rates.rb +45 -0
  210. data/test/unit/bio/appl/paml/codeml/test_report.rb +45 -0
  211. data/test/unit/bio/appl/paml/test_codeml.rb +174 -0
  212. data/test/unit/bio/appl/test_blast.rb +135 -4
  213. data/test/unit/bio/appl/test_fasta.rb +2 -2
  214. data/test/unit/bio/appl/test_pts1.rb +1 -64
  215. data/test/unit/bio/db/embl/test_common.rb +15 -15
  216. data/test/unit/bio/db/embl/test_embl.rb +4 -4
  217. data/test/unit/bio/db/embl/test_embl_rel89.rb +5 -5
  218. data/test/unit/bio/db/embl/test_embl_to_bioseq.rb +203 -0
  219. data/test/unit/bio/db/embl/test_sptr.rb +38 -1
  220. data/test/unit/bio/db/pdb/test_pdb.rb +2 -2
  221. data/test/unit/bio/db/test_gff.rb +1151 -25
  222. data/test/unit/bio/db/test_medline.rb +127 -0
  223. data/test/unit/bio/db/test_nexus.rb +5 -1
  224. data/test/unit/bio/db/test_prosite.rb +4 -4
  225. data/test/unit/bio/io/flatfile/test_autodetection.rb +375 -0
  226. data/test/unit/bio/io/flatfile/test_buffer.rb +251 -0
  227. data/test/unit/bio/io/flatfile/test_splitter.rb +369 -0
  228. data/test/unit/bio/io/test_ddbjxml.rb +8 -3
  229. data/test/unit/bio/io/test_fastacmd.rb +5 -5
  230. data/test/unit/bio/io/test_flatfile.rb +357 -106
  231. data/test/unit/bio/io/test_soapwsdl.rb +2 -2
  232. data/test/unit/bio/io/test_togows.rb +161 -0
  233. data/test/unit/bio/sequence/test_common.rb +210 -11
  234. data/test/unit/bio/sequence/test_compat.rb +3 -3
  235. data/test/unit/bio/sequence/test_dblink.rb +58 -0
  236. data/test/unit/bio/sequence/test_na.rb +2 -2
  237. data/test/unit/bio/test_command.rb +111 -50
  238. data/test/unit/bio/test_feature.rb +29 -1
  239. data/test/unit/bio/test_location.rb +566 -6
  240. data/test/unit/bio/test_pathway.rb +91 -65
  241. data/test/unit/bio/test_reference.rb +67 -13
  242. data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +3 -3
  243. data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +3 -3
  244. data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +3 -3
  245. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +4 -3
  246. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +3 -3
  247. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +3 -3
  248. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +3 -3
  249. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +3 -3
  250. data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +3 -3
  251. data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +3 -3
  252. data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +4 -4
  253. data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +3 -3
  254. data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +3 -3
  255. data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +3 -3
  256. data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +3 -3
  257. data/test/unit/bio/util/test_restriction_enzyme.rb +3 -3
  258. metadata +202 -167
  259. data/test/unit/bio/appl/blast/test_xmlparser.rb +0 -388
@@ -1,15 +1,16 @@
1
1
  #
2
2
  # = bio/io/pubmed.rb - NCBI Entrez/PubMed client module
3
3
  #
4
- # Copyright:: Copyright (C) 2001, 2007 Toshiaki Katayama <k@bioruby.org>
4
+ # Copyright:: Copyright (C) 2001, 2007, 2008 Toshiaki Katayama <k@bioruby.org>
5
5
  # Copyright:: Copyright (C) 2006 Jan Aerts <jan.aerts@bbsrc.ac.uk>
6
6
  # License:: The Ruby License
7
7
  #
8
- # $Id: pubmed.rb,v 1.23 2007/12/12 13:53:26 k Exp $
8
+ # $Id:$
9
9
  #
10
10
 
11
+ require 'bio/io/ncbirest'
11
12
  require 'bio/command'
12
- require 'cgi' unless defined?(CGI)
13
+ require 'cgi'
13
14
 
14
15
  module Bio
15
16
 
@@ -68,29 +69,7 @@ module Bio
68
69
  # manuscript = Bio::PubMed.query("10592173")
69
70
  # medline = Bio::MEDLINE.new(manuscript)
70
71
  #
71
- class PubMed
72
-
73
- # Run retrieval scripts on weekends or between 9 pm and 5 am Eastern Time
74
- # weekdays for any series of more than 100 requests.
75
- # -> Not implemented yet in BioRuby
76
-
77
- # Make no more than one request every 3 seconds.
78
- NCBI_INTERVAL = 3
79
- @@last_access = nil
80
-
81
- private
82
-
83
- def ncbi_access_wait(wait = NCBI_INTERVAL)
84
- if @@last_access
85
- duration = Time.now - @@last_access
86
- if wait > duration
87
- sleep wait - duration
88
- end
89
- end
90
- @@last_access = Time.now
91
- end
92
-
93
- public
72
+ class PubMed < Bio::NCBI::REST
94
73
 
95
74
  # Search the PubMed database by given keywords using E-Utils and returns
96
75
  # an array of PubMed IDs.
@@ -99,39 +78,22 @@ class PubMed
99
78
  # http://eutils.ncbi.nlm.nih.gov/entrez/query/static/esearch_help.html#PubMed
100
79
  # ---
101
80
  # *Arguments*:
102
- # * _id_: query string (required)
103
- # * _field_
104
- # * _reldate_
105
- # * _mindate_
106
- # * _maxdate_
107
- # * _datetype_
108
- # * _retstart_
109
- # * _retmax_ (default 100)
110
- # * _retmode_
111
- # * _rettype_
81
+ # * _str_: query string (required)
82
+ # * _hash_: hash of E-Utils options
83
+ # * _retmode_: "xml", "html", ...
84
+ # * _rettype_: "medline", ...
85
+ # * _retmax_: integer (default 100)
86
+ # * _retstart_: integer
87
+ # * _field_
88
+ # * _reldate_
89
+ # * _mindate_
90
+ # * _maxdate_
91
+ # * _datetype_
112
92
  # *Returns*:: array of PubMed IDs or a number of results
113
93
  def esearch(str, hash = {})
114
- return nil if str.empty?
115
-
116
- serv = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
117
- opts = {
118
- "retmax" => 100,
119
- "tool" => "bioruby",
120
- "db" => "pubmed",
121
- "term" => str
122
- }
94
+ opts = { "db" => "pubmed" }
123
95
  opts.update(hash)
124
-
125
- ncbi_access_wait
126
-
127
- response, = Bio::Command.post_form(serv, opts)
128
- result = response.body
129
- if opts['rettype'] == 'count'
130
- result = result.scan(/<Count>(.*?)<\/Count>/m).flatten.first.to_i
131
- else
132
- result = result.scan(/<Id>(.*?)<\/Id>/m).flatten
133
- end
134
- return result
96
+ super(str, opts)
135
97
  end
136
98
 
137
99
  # Retrieve PubMed entry by PMID and returns MEDLINE formatted string using
@@ -141,29 +103,21 @@ class PubMed
141
103
  # ---
142
104
  # *Arguments*:
143
105
  # * _ids_: list of PubMed IDs (required)
106
+ # * _hash_: hash of E-Utils options
107
+ # * _retmode_: "xml", "html", ...
108
+ # * _rettype_: "medline", ...
109
+ # * _retmax_: integer (default 100)
110
+ # * _retstart_: integer
111
+ # * _field_
112
+ # * _reldate_
113
+ # * _mindate_
114
+ # * _maxdate_
115
+ # * _datetype_
144
116
  # *Returns*:: Array of MEDLINE formatted String
145
117
  def efetch(ids, hash = {})
146
- return nil if ids.to_s.empty?
147
- ids = ids.join(",") if ids === Array
148
-
149
- serv = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
150
- opts = {
151
- "tool" => "bioruby",
152
- "db" => "pubmed",
153
- "retmode" => "text",
154
- "rettype" => "medline",
155
- "id" => ids,
156
- }
118
+ opts = { "db" => "pubmed", "rettype" => "medline" }
157
119
  opts.update(hash)
158
-
159
- ncbi_access_wait
160
-
161
- response, = Bio::Command.post_form(serv, opts)
162
- result = response.body
163
- if opts["retmode"] == "text"
164
- result = result.split(/\n\n+/)
165
- end
166
- return result
120
+ super(ids, opts)
167
121
  end
168
122
 
169
123
  # Search the PubMed database by given keywords using entrez query and returns
@@ -180,7 +134,7 @@ class PubMed
180
134
  ncbi_access_wait
181
135
 
182
136
  http = Bio::Command.new_http(host)
183
- response, = http.get(path + CGI.escape(str))
137
+ response = http.get(path + CGI.escape(str))
184
138
  result = response.body
185
139
  result = result.scan(/value="(\d+)" id="UidCheckBox"/m).flatten
186
140
  return result
@@ -195,12 +149,12 @@ class PubMed
195
149
  def query(*ids)
196
150
  host = "www.ncbi.nlm.nih.gov"
197
151
  path = "/sites/entrez?tool=bioruby&cmd=Text&dopt=MEDLINE&db=PubMed&uid="
198
- list = ids.join(",")
152
+ list = ids.collect { |x| CGI.escape(x.to_s) }.join(",")
199
153
 
200
154
  ncbi_access_wait
201
155
 
202
156
  http = Bio::Command.new_http(host)
203
- response, = http.get(path + list)
157
+ response = http.get(path + list)
204
158
  result = response.body
205
159
  result = result.scan(/<pre>\s*(.*?)<\/pre>/m).flatten
206
160
 
@@ -229,7 +183,7 @@ class PubMed
229
183
  ncbi_access_wait
230
184
 
231
185
  http = Bio::Command.new_http(host)
232
- response, = http.get(path + id.to_s)
186
+ response = http.get(path + CGI.escape(id.to_s))
233
187
  result = response.body
234
188
  if result =~ /#{id}\s+Error/
235
189
  raise( result )
@@ -5,7 +5,7 @@
5
5
  # Toshiaki Katayama <k@bioruby.org>
6
6
  # License:: The Ruby License
7
7
  #
8
- # $Id: registry.rb,v 1.19 2007/04/05 23:35:41 trevor Exp $
8
+ # $Id:$
9
9
  #
10
10
  # == Description
11
11
  #
@@ -172,7 +172,7 @@ class Registry
172
172
  def read_remote(url)
173
173
  schema, user, host, port, reg, path, = URI.split(url)
174
174
  Bio::Command.start_http(host, port) do |http|
175
- response, = http.get(path)
175
+ response = http.get(path)
176
176
  parse_stanza(response.body)
177
177
  end
178
178
  end
@@ -1,365 +1,186 @@
1
- #
2
- # = bio/io/sql.rb - BioSQL access module
3
- #
4
- # Copyright:: Copyright (C) 2002 Toshiaki Katayama <k@bioruby.org>
5
- # Copyright:: Copyright (C) 2006 Raoul Jean Pierre Bonnal <raoul.bonnal@itb.cnr.it>
6
- # License:: The Ruby License
7
- #
8
- # $Id: sql.rb,v 1.8 2007/04/05 23:35:41 trevor Exp $
9
- #
10
1
 
11
- begin
12
- require 'dbi'
13
- rescue LoadError
14
- end
15
- require 'bio/sequence'
16
- require 'bio/feature'
17
-
18
-
19
- module Bio
20
-
21
- class SQL
22
-
23
- def initialize(db = 'dbi:Mysql:biosql', user = nil, pass = nil)
24
- @dbh = DBI.connect(db, user, pass)
25
- end
26
-
27
- def close
28
- @dbh.disconnect
29
- end
30
-
31
- # Returns Bio::SQL::Sequence object.
32
- def fetch(accession) # or display_id for fall back
33
- query = "select * from bioentry where accession = ?"
34
- entry = @dbh.execute(query, accession).fetch
35
- return Sequence.new(@dbh, entry) if entry
36
-
37
- query = "select * from bioentry where display_id = ?"
38
- entry = @dbh.execute(query, accession).fetch
39
- return Sequence.new(@dbh, entry) if entry
40
- end
41
- alias get_by_id fetch
42
-
43
-
44
- # for lazy fetching
45
-
46
- class Sequence
47
-
48
- def initialize(dbh, entry)
49
- @dbh = dbh
50
- @bioentry_id = entry['bioentry_id']
51
- @database_id = entry['biodatabase_id']
52
- @entry_id = entry['display_id']
53
- @accession = entry['accession']
54
- @version = entry['entry_version']
55
- @division = entry['division']
56
- end
57
- attr_reader :accession, :division, :entry_id, :version
58
-
59
-
60
- def to_fasta
61
- if seq = seq
62
- return seq.to_fasta(@accession)
63
- end
64
- end
65
-
66
- # Returns Bio::Sequence::NA or AA object.
67
- def seq
68
- query = "select * from biosequence where bioentry_id = ?"
69
- row = @dbh.execute(query, @bioentry_id).fetch
70
- return unless row
71
-
72
- mol = row['alphabet']
73
- seq = row['seq']
74
-
75
- case mol
76
- when /.na/i # 'dna' or 'rna'
77
- Bio::Sequence::NA.new(seq)
78
- else # 'protein'
79
- Bio::Sequence::AA.new(seq)
80
- end
81
- end
82
-
83
- # Returns Bio::Sequence::NA or AA object (by lazy fetching).
84
- def subseq(from, to)
85
- length = to - from + 1
86
- query = "select alphabet, substring(seq, ?, ?) as subseq" +
87
- " from biosequence where bioentry_id = ?"
88
- row = @dbh.execute(query, from, length, @bioentry_id).fetch
89
- return unless row
90
-
91
- mol = row['alphabet']
92
- seq = row['subseq']
93
-
94
- case mol
95
- when /.na/i # 'dna' or 'rna'
96
- Bio::Sequence::NA.new(seq)
97
- else # 'protein'
98
- Bio::Sequence::AA.new(seq)
99
- end
100
- end
101
-
102
-
103
- # Returns Bio::Features object.
104
- def features
105
- array = []
106
- query = "select * from seqfeature where bioentry_id = ?"
107
- @dbh.execute(query, @bioentry_id).fetch_all.each do |row|
108
- next unless row
109
-
110
- f_id = row['seqfeature_id']
111
- k_id = row['type_term_id']
112
- s_id = row['source_term_id']
113
- rank = row['rank'].to_i - 1
114
-
115
- # key : type (gene, CDS, ...)
116
- type = feature_key(k_id)
117
-
118
- # source : database (EMBL/GenBank/SwissProt)
119
- database = feature_source(s_id)
120
-
121
- # location : position
122
- locations = feature_locations(f_id)
123
-
124
- # qualifier
125
- qualifiers = feature_qualifiers(f_id)
126
-
127
- # rank
128
- array[rank] = Bio::Feature.new(type, locations, qualifiers)
129
- end
130
- return Bio::Features.new(array)
131
- end
132
-
133
-
134
- # Returns reference informations in Array of Hash (not Bio::Reference).
135
- def references
136
- array = []
137
- query = <<-END
138
- select * from bioentry_reference, reference
139
- where bioentry_id = ? and
140
- bioentry_reference.reference_id = reference.reference_id
141
- END
142
- @dbh.execute(query, @bioentry_id).fetch_all.each do |row|
143
- next unless row
144
-
145
- hash = {
146
- 'start' => row['start_pos'],
147
- 'end' => row['end_pos'],
148
- 'journal' => row['location'],
149
- 'title' => row['title'],
150
- 'authors' => row['authors'],
151
- 'medline' => row['crc']
152
- }
153
- hash.default = ''
154
-
155
- rank = row['rank'].to_i - 1
156
- array[rank] = hash
157
- end
158
- return array
159
- end
160
-
161
-
162
- # Returns the first comment. For complete comments, use comments method.
163
- def comment
164
- query = "select * from comment where bioentry_id = ?"
165
- row = @dbh.execute(query, @bioentry_id).fetch
166
- row ? row['comment_text'] : ''
167
- end
168
-
169
- # Returns comments in an Array of Strings.
170
- def comments
171
- array = []
172
- query = "select * from comment where bioentry_id = ?"
173
- @dbh.execute(query, @bioentry_id).fetch_all.each do |row|
174
- next unless row
175
- rank = row['rank'].to_i - 1
176
- array[rank] = row['comment_text']
177
- end
178
- return array
179
- end
180
-
181
- def database
182
- query = "select * from biodatabase where biodatabase_id = ?"
183
- row = @dbh.execute(query, @database_id).fetch
184
- row ? row['name'] : ''
185
- end
186
-
187
- def date
188
- query = "select * from bioentry_date where bioentry_id = ?"
189
- row = @dbh.execute(query, @bioentry_id).fetch
190
- row ? row['date'] : ''
191
- end
192
-
193
- def dblink
194
- query = "select * from bioentry_direct_links where source_bioentry_id = ?"
195
- row = @dbh.execute(query, @bioentry_id).fetch
196
- row ? [row['dbname'], row['accession']] : []
197
- end
198
-
199
- def definition
200
- query = "select * from bioentry_description where bioentry_id = ?"
201
- row = @dbh.execute(query, @bioentry_id).fetch
202
- row ? row['description'] : ''
203
- end
204
-
205
- def keyword
206
- query = "select * from bioentry_keywords where bioentry_id = ?"
207
- row = @dbh.execute(query, @bioentry_id).fetch
208
- row ? row['keywords'] : ''
209
- end
210
-
211
- # Use lineage, common_name, ncbi_taxa_id methods to extract in detail.
212
- def taxonomy
213
- query = <<-END
214
- select taxon_name.name, taxon.ncbi_taxon_id from bioentry
215
- join taxon_name using(taxon_id) join taxon using (taxon_id)
216
- where bioentry_id = ?
217
- END
218
- row = @dbh.execute(query, @bioentry_id).fetch
219
- # @lineage = row ? row['full_lineage'] : ''
220
- @common_name = row ? row['name'] : ''
221
- @ncbi_taxa_id = row ? row['ncbi_taxon_id'] : ''
222
- row ? [@lineage, @common_name, @ncbi_taxa_id] : []
223
- end
224
-
225
- def lineage
226
- taxonomy unless @lineage
227
- return @lineage
228
- end
229
-
230
- def common_name
231
- taxonomy unless @common_name
232
- return @common_name
233
- end
234
-
235
- def ncbi_taxa_id
236
- taxonomy unless @ncbi_taxa_id
237
- return @ncbi_taxa_id
238
- end
239
-
240
-
241
- private
242
-
243
- def feature_key(k_id)
244
- query = "select * from term where term_id= ?"
245
- row = @dbh.execute(query, k_id).fetch
246
- row ? row['name'] : ''
247
- end
248
-
249
- def feature_source(s_id)
250
- query = "select * from term where term_id = ?"
251
- row = @dbh.execute(query, s_id).fetch
252
- row ? row['name'] : ''
253
- end
254
-
255
- def feature_locations(f_id)
256
- locations = []
257
- query = "select * from location where seqfeature_id = ?"
258
- @dbh.execute(query, f_id).fetch_all.each do |row|
259
- next unless row
2
+ require 'rubygems'
3
+ require 'erb'
4
+ require 'composite_primary_keys'
5
+ # BiosqlPlug
260
6
 
261
- location = Bio::Location.new
262
- location.strand = row['strand']
263
- location.from = row['start_pos']
264
- location.to = row['end_pos']
7
+ =begin
8
+ Ok Hilmar gives to me some clarification
9
+ 1) "EMBL/GenBank/SwissProt" name in term table, is only a convention assuming data loaded by genbank embl ans swissprot formats.
10
+ If your features come from others ways for example blast or alignment ... whatever.. the user as to take care about the source.
265
11
 
266
- xref = feature_locations_remote(row['dbxref_if'])
267
- location.xref_id = xref.shift unless xref.empty?
268
-
269
- # just omit fuzzy location for now...
270
- #feature_locations_qv(row['seqfeature_location_id'])
271
-
272
- rank = row['rank'].to_i - 1
273
- locations[rank] = location
274
- end
275
- return Bio::Locations.new(locations)
276
- end
277
-
278
- def feature_locations_remote(l_id)
279
- query = "select * from dbxref where dbxref_id = ?"
280
- row = @dbh.execute(query, l_id).fetch
281
- row ? [row['accession'], row['version']] : []
282
- end
283
-
284
- def feature_locations_qv(l_id)
285
- query = "select * from location_qualifier_value where location_id = ?"
286
- row = @dbh.execute(query, l_id).fetch
287
- row ? [row['value'], row['int_value']] : []
288
- end
289
-
290
- def feature_qualifiers(f_id)
291
- qualifiers = []
292
- query = "select * from seqfeature_qualifier_value where seqfeature_id = ?"
293
- @dbh.execute(query, f_id).fetch_all.each do |row|
294
- next unless row
295
-
296
- key = feature_qualifiers_key(row['seqfeature_id'])
297
- value = row['value']
298
- qualifier = Bio::Feature::Qualifier.new(key, value)
299
-
300
- rank = row['rank'].to_i - 1
301
- qualifiers[rank] = qualifier
302
- end
303
- return qualifiers.compact # .compact is nasty hack for a while
304
- end
305
-
306
- def feature_qualifiers_key(q_id)
307
- query = <<-END
308
- select * from seqfeature_qualifier_value
309
- join term using(term_id) where seqfeature_id = ?
310
- END
311
- row = @dbh.execute(query, q_id).fetch
312
- row ? row['name'] : ''
313
- end
314
- end
315
-
316
- end # SQL
317
-
318
- end # Bio
319
12
 
13
+ =end
14
+ =begin
15
+ TODO:
16
+ 1) source_term_id => surce_term and check before if the source term is present or not and the level, the root should always be something "EMBL/GenBank/SwissProt" or contestualized.
17
+ 2) Into DummyBase class delete connection there and use Bio::ArSQL.establish_connection which reads info from a yml file.
18
+ 3) Chk Locations in Biofeatures ArSQL
19
+ =end
20
+ module Bio
21
+ class SQL
22
+ #no check is made
23
+ def self.establish_connection(configurations, env)
24
+ #configurations is an hash similar what YAML returns.
25
+ #{:database=>"biorails_development", :adapter=>"postgresql", :username=>"rails", :password=>nil}
26
+ configurations.assert_valid_keys('development', 'production','test')
27
+ configurations[env].assert_valid_keys('hostname','database','adapter','username','password')
28
+ DummyBase.configurations = configurations
29
+ DummyBase.establish_connection "#{env}"
30
+ end
31
+
32
+ def self.fetch_id(id)
33
+ Bio::SQL::Bioentry.find(id)
34
+ end
35
+
36
+ def self.fetch_accession(accession)
37
+ accession = accession.upcase
38
+ Bio::SQL::Bioentry.exists?(:accession => accession) ? Bio::SQL::Sequence.new(:entry=>Bio::SQL::Bioentry.find_by_accession(accession)) : nil
39
+ end
40
+
41
+ def self.exists_accession(accession)
42
+ Bio::SQL::Bioentry.find_by_accession(accession.upcase).nil? ? false : true
43
+ end
44
+
45
+ def self.exists_database(name)
46
+ Bio::SQL::Biodatabase.find_by_name(name).nil? ? false : true
47
+ end
48
+
49
+ def self.list_entries
50
+ Bio::SQL::Bioentry.find(:all).collect{|entry|
51
+ {:id=>entry.bioentry_id, :accession=>entry.accession}
52
+ }
53
+ end
54
+
55
+ def self.list_databases
56
+ Bio::SQL::Biodatabase.find(:all).collect{|entry|
57
+ {:id=>entry.biodatabase_id, :name => entry.name}
58
+ }
59
+ end
60
+
61
+ def self.delete_entry_id(id)
62
+ Bioentry.delete(id)
63
+ end
64
+
65
+ def self.delete_entry_accession(accession)
66
+ Bioentry.delete(Bioentry.find_by_accession(accession))
67
+ end
68
+
69
+
70
+ class DummyBase < ActiveRecord::Base
71
+ #NOTE: Using postgresql, not setting sequence name, system will discover the name by default.
72
+ #NOTE: this class will not establish the connection automatically
73
+ self.abstract_class = true
74
+ self.pluralize_table_names = false
75
+ #prepend table name to the usual id, avoid to specify primary id for every table
76
+ self.primary_key_prefix_type = :table_name_with_underscore
77
+ #biosql_configurations=YAML::load(ERB.new(IO.read(File.join(File.dirname(__FILE__),'../config', 'database.yml'))).result)
78
+ #self.configurations=biosql_configurations
79
+ #self.establish_connection "development"
80
+ end #DummyBase
81
+
82
+ autoload :Biodatabase, 'bio/io/biosql/biodatabase'
83
+ autoload :Bioentry, 'bio/io/biosql/bioentry'
84
+ autoload :BioentryDbxref, 'bio/io/biosql/bioentry_dbxref'
85
+ autoload :BioentryPath, 'bio/io/biosql/bioentry_path'
86
+ autoload :BioentryQualifierValue, 'bio/io/biosql/bioentry_qualifier_value'
87
+ autoload :BioentryReference, 'bio/io/biosql/bioentry_reference'
88
+ autoload :BioentryRelationship, 'bio/io/biosql/bioentry_relationship'
89
+ autoload :Biosequence, 'bio/io/biosql/biosequence'
90
+ autoload :Comment, 'bio/io/biosql/comment'
91
+ autoload :Dbxref, 'bio/io/biosql/dbxref'
92
+ autoload :DbxrefQualifierValue, 'bio/io/biosql/dbxref_qualifier_value'
93
+ autoload :Location, 'bio/io/biosql/location'
94
+ autoload :LocationQualifierValue, 'bio/io/biosql/location_qualifier_value'
95
+ autoload :Ontology, 'bio/io/biosql/ontology'
96
+ autoload :Reference, 'bio/io/biosql/reference'
97
+ autoload :Seqfeature, 'bio/io/biosql/seqfeature'
98
+ autoload :SeqfeatureDbxref, 'bio/io/biosql/seqfeature_dbxref'
99
+ autoload :SeqfeaturePath, 'bio/io/biosql/seqfeature_path'
100
+ autoload :SeqfeatureQualifierValue, 'bio/io/biosql/seqfeature_qualifier_value'
101
+ autoload :SeqfeatureRelationship, 'bio/io/biosql/seqfeature_relationship'
102
+ autoload :Taxon, 'bio/io/biosql/taxon'
103
+ autoload :TaxonName, 'bio/io/biosql/taxon_name'
104
+ autoload :Term, 'bio/io/biosql/term'
105
+ autoload :TermDbxref, 'bio/io/biosql/term_dbxref'
106
+ autoload :TermPath, 'bio/io/biosql/term_path'
107
+ autoload :TermRelationship, 'bio/io/biosql/term_relationship'
108
+ autoload :TermRelationshipTerm, 'bio/io/biosql/term_relationship_term'
109
+ autoload :Sequence, 'bio/db/biosql/sequence'
110
+ end #biosql
111
+
112
+ end #Bio
320
113
 
321
114
  if __FILE__ == $0
322
- begin
323
- require 'pp'
324
- alias p pp
325
- rescue LoadError
115
+ require 'rubygems'
116
+ require 'composite_primary_keys'
117
+ require 'bio'
118
+ require 'pp'
119
+
120
+ # pp connection = Bio::SQL.establish_connection('bio/io/biosql/config/database.yml','development')
121
+ connection = Bio::SQL.establish_connection({'development'=>{'database'=>"bio_test", 'adapter'=>"postgresql", 'username'=>"rails", 'password'=>nil}},'development')
122
+ #pp YAML::load(ERB.new(IO.read('bio/io/biosql/config/database.yml')).result)
123
+ if true
124
+ #Bio::SQL.list_entries
125
+
126
+ # biosequence = data.to_biosequence
127
+ # puts biosequence.output(:genbank)
128
+ db=Bio::SQL::Biodatabase.new(:name=>'JEFF', :authority=>'ME', :description=>'YOU')
129
+ db.save!
130
+
131
+ puts "### FileFile.auto"
132
+ if ARGV.size > 0
133
+ #embl = Bio::FlatFile.auto(ARGF.read)
134
+ Bio::FlatFile.auto(ARGF) do |ff|
135
+ ff.each do |data|
136
+ biosequence=data.to_biosequence
137
+ puts biosequence.output(:fasta)
138
+ sqlseq = Bio::SQL::Sequence.new(:biosequence=>biosequence,:biodatabase_id=>db.biodatabase_id)
139
+ sqlseq.save
140
+ sqlseq.to_biosequence.output(:fasta)
141
+ end
142
+ end
143
+ else
144
+ require 'bio/io/fetch'
145
+ server = Bio::Fetch.new('http://www.ebi.ac.uk/cgi-bin/dbfetch')
146
+ data = Bio::EMBL.new(server.fetch('embl','AJ224123'))
147
+ end
148
+
149
+
150
+ # sqlseq = Bio::SQL::Sequence.new(:biosequence=>biosequence,:biodatabase_id=>db.biodatabase_id)
151
+ # sqlseq.save
152
+ # sqlseq_bioseq=sqlseq.to_biosequence
153
+ # puts sqlseq_bioseq.output(:genbank)
154
+
155
+
156
+
157
+ # bioseq = Bio::SQL.fetch_accession('AJ224122')
158
+ # pp bioseq
159
+ # pp bioseq.entry_id
160
+ #TODO create a test only for tables not sequence here
161
+ # pp bioseq.molecule_type
162
+ #pp bioseq.molecule_type.class
163
+ #bioseq.molecule_type_update('dna', 1)
164
+ ## pp Bio::SQL::Taxon.find(8121).taxon_names
165
+
166
+ #sqlseq.to_biosequence
167
+
168
+ # sqlseq.delete
169
+
170
+ # db.destroy
326
171
  end
327
-
328
- db = ARGV.empty? ? 'dbi:Mysql:database=biosql;host=localhost' : ARGV.shift
329
- serv = Bio::SQL.new(db, 'root')
330
-
331
- ent0 = serv.fetch('X76706')
332
- ent0 = serv.fetch('A15H9FIB')
333
- ent1 = serv.fetch('J01902')
334
- ent2 = serv.fetch('X04311')
335
-
336
- pp ent0.features
337
- pp ent0.references
338
-
339
- pp ent1.seq
340
- pp ent1.seq.translate
341
- pp ent1.seq.gc
342
- pp ent1.subseq(1,20)
343
-
344
- pp ent2.accession
345
- pp ent2.comment
346
- pp ent2.comments
347
- pp ent2.common_name
348
- pp ent2.database
349
- pp ent2.date
350
- pp ent2.dblink
351
- pp ent2.definition
352
- pp ent2.division
353
- pp ent2.entry_id
354
- pp ent2.features
355
- pp ent2.keyword
356
- pp ent2.lineage
357
- pp ent2.ncbi_taxa_id
358
- pp ent2.references
359
- pp ent2.seq
360
- pp ent2.subseq(1,10)
361
- pp ent2.taxonomy
362
- pp ent2.version
363
-
172
+ #pp bioseq.molecule_type
173
+ #term = Bio::SQL::Term.find_by_name('mol_type')
174
+ #pp term
175
+ #pp bioseq.entry.bioentry_qualifier_values.create(:term=>term, :rank=>2, :value=>'pippo')
176
+ #pp bioseq.entry.bioentry_qualifier_values.inspect
177
+ #pp bioseq.entry.bioentry_qualifier_values.find_all_by_term_id(26)
178
+ #pp primo.class
179
+ # pp primo.value='dna'
180
+ # pp primo.save
181
+ #pp bioseq.molecule_type= 'prova'
182
+
183
+ #Bio::SQL::BioentryQualifierValue.delete(delete.bioentry_id,delete.term_id,delete.rank)
184
+
185
+
364
186
  end
365
-