bio 1.2.1 → 1.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (259) hide show
  1. data/ChangeLog +3421 -0
  2. data/KNOWN_ISSUES.rdoc +88 -0
  3. data/README.rdoc +252 -0
  4. data/README_DEV.rdoc +285 -0
  5. data/Rakefile +143 -0
  6. data/bin/bioruby +0 -0
  7. data/bin/br_biofetch.rb +0 -0
  8. data/bin/br_bioflat.rb +12 -1
  9. data/bin/br_biogetseq.rb +0 -0
  10. data/bin/br_pmfetch.rb +4 -3
  11. data/bioruby.gemspec +477 -0
  12. data/bioruby.gemspec.erb +117 -0
  13. data/doc/Changes-0.7.rd +7 -0
  14. data/doc/Changes-1.3.rdoc +239 -0
  15. data/doc/Tutorial.rd +296 -184
  16. data/doc/Tutorial.rd.html +1031 -0
  17. data/doc/Tutorial.rd.ja +111 -45
  18. data/doc/Tutorial.rd.ja.html +2225 -0
  19. data/doc/bioruby.css +281 -0
  20. data/extconf.rb +2 -0
  21. data/lib/bio.rb +29 -4
  22. data/lib/bio/appl/blast.rb +306 -121
  23. data/lib/bio/appl/blast/ddbj.rb +142 -0
  24. data/lib/bio/appl/blast/format0.rb +35 -25
  25. data/lib/bio/appl/blast/format8.rb +2 -2
  26. data/lib/bio/appl/blast/genomenet.rb +263 -0
  27. data/lib/bio/appl/blast/ncbioptions.rb +220 -0
  28. data/lib/bio/appl/blast/remote.rb +106 -0
  29. data/lib/bio/appl/blast/report.rb +260 -9
  30. data/lib/bio/appl/blast/rexml.rb +12 -5
  31. data/lib/bio/appl/blast/rpsblast.rb +277 -0
  32. data/lib/bio/appl/blast/wublast.rb +133 -12
  33. data/lib/bio/appl/blast/xmlparser.rb +35 -18
  34. data/lib/bio/appl/blat/report.rb +46 -5
  35. data/lib/bio/appl/emboss.rb +62 -13
  36. data/lib/bio/appl/fasta.rb +9 -11
  37. data/lib/bio/appl/genscan/report.rb +3 -3
  38. data/lib/bio/appl/hmmer.rb +1 -1
  39. data/lib/bio/appl/hmmer/report.rb +10 -10
  40. data/lib/bio/appl/paml/baseml.rb +95 -0
  41. data/lib/bio/appl/paml/baseml/report.rb +32 -0
  42. data/lib/bio/appl/paml/codeml.rb +242 -0
  43. data/lib/bio/appl/paml/codeml/rates.rb +67 -0
  44. data/lib/bio/appl/paml/codeml/report.rb +67 -0
  45. data/lib/bio/appl/paml/common.rb +348 -0
  46. data/lib/bio/appl/paml/common_report.rb +38 -0
  47. data/lib/bio/appl/paml/yn00.rb +103 -0
  48. data/lib/bio/appl/paml/yn00/report.rb +32 -0
  49. data/lib/bio/appl/psort.rb +2 -2
  50. data/lib/bio/appl/pts1.rb +5 -5
  51. data/lib/bio/appl/tmhmm/report.rb +10 -1
  52. data/lib/bio/command.rb +297 -41
  53. data/lib/bio/compat/features.rb +157 -0
  54. data/lib/bio/compat/references.rb +128 -0
  55. data/lib/bio/db/biosql/biosql_to_biosequence.rb +67 -0
  56. data/lib/bio/db/biosql/sequence.rb +508 -0
  57. data/lib/bio/db/embl/common.rb +28 -12
  58. data/lib/bio/db/embl/embl.rb +107 -9
  59. data/lib/bio/db/embl/embl_to_biosequence.rb +85 -0
  60. data/lib/bio/db/embl/format_embl.rb +190 -0
  61. data/lib/bio/db/embl/sptr.rb +15 -16
  62. data/lib/bio/db/fantom.rb +6 -8
  63. data/lib/bio/db/fasta.rb +10 -507
  64. data/lib/bio/db/fasta/defline.rb +532 -0
  65. data/lib/bio/db/fasta/fasta_to_biosequence.rb +63 -0
  66. data/lib/bio/db/fasta/format_fasta.rb +97 -0
  67. data/lib/bio/db/genbank/common.rb +25 -8
  68. data/lib/bio/db/genbank/format_genbank.rb +187 -0
  69. data/lib/bio/db/genbank/genbank.rb +36 -1
  70. data/lib/bio/db/genbank/genbank_to_biosequence.rb +86 -0
  71. data/lib/bio/db/gff.rb +1791 -119
  72. data/lib/bio/db/kegg/glycan.rb +2 -6
  73. data/lib/bio/db/lasergene.rb +3 -3
  74. data/lib/bio/db/medline.rb +4 -1
  75. data/lib/bio/db/newick.rb +10 -10
  76. data/lib/bio/db/pdb/chain.rb +6 -2
  77. data/lib/bio/db/pdb/pdb.rb +12 -3
  78. data/lib/bio/db/rebase.rb +7 -8
  79. data/lib/bio/db/soft.rb +3 -3
  80. data/lib/bio/feature.rb +1 -88
  81. data/lib/bio/io/biosql/biodatabase.rb +64 -0
  82. data/lib/bio/io/biosql/bioentry.rb +29 -0
  83. data/lib/bio/io/biosql/bioentry_dbxref.rb +11 -0
  84. data/lib/bio/io/biosql/bioentry_path.rb +12 -0
  85. data/lib/bio/io/biosql/bioentry_qualifier_value.rb +10 -0
  86. data/lib/bio/io/biosql/bioentry_reference.rb +10 -0
  87. data/lib/bio/io/biosql/bioentry_relationship.rb +10 -0
  88. data/lib/bio/io/biosql/biosequence.rb +11 -0
  89. data/lib/bio/io/biosql/comment.rb +7 -0
  90. data/lib/bio/io/biosql/config/database.yml +20 -0
  91. data/lib/bio/io/biosql/dbxref.rb +13 -0
  92. data/lib/bio/io/biosql/dbxref_qualifier_value.rb +12 -0
  93. data/lib/bio/io/biosql/location.rb +32 -0
  94. data/lib/bio/io/biosql/location_qualifier_value.rb +11 -0
  95. data/lib/bio/io/biosql/ontology.rb +10 -0
  96. data/lib/bio/io/biosql/reference.rb +9 -0
  97. data/lib/bio/io/biosql/seqfeature.rb +32 -0
  98. data/lib/bio/io/biosql/seqfeature_dbxref.rb +11 -0
  99. data/lib/bio/io/biosql/seqfeature_path.rb +11 -0
  100. data/lib/bio/io/biosql/seqfeature_qualifier_value.rb +20 -0
  101. data/lib/bio/io/biosql/seqfeature_relationship.rb +11 -0
  102. data/lib/bio/io/biosql/taxon.rb +12 -0
  103. data/lib/bio/io/biosql/taxon_name.rb +9 -0
  104. data/lib/bio/io/biosql/term.rb +27 -0
  105. data/lib/bio/io/biosql/term_dbxref.rb +11 -0
  106. data/lib/bio/io/biosql/term_path.rb +12 -0
  107. data/lib/bio/io/biosql/term_relationship.rb +13 -0
  108. data/lib/bio/io/biosql/term_relationship_term.rb +11 -0
  109. data/lib/bio/io/biosql/term_synonym.rb +10 -0
  110. data/lib/bio/io/das.rb +7 -7
  111. data/lib/bio/io/ddbjxml.rb +57 -0
  112. data/lib/bio/io/ensembl.rb +2 -2
  113. data/lib/bio/io/fetch.rb +28 -14
  114. data/lib/bio/io/flatfile.rb +17 -853
  115. data/lib/bio/io/flatfile/autodetection.rb +545 -0
  116. data/lib/bio/io/flatfile/buffer.rb +237 -0
  117. data/lib/bio/io/flatfile/index.rb +17 -7
  118. data/lib/bio/io/flatfile/indexer.rb +30 -12
  119. data/lib/bio/io/flatfile/splitter.rb +297 -0
  120. data/lib/bio/io/hinv.rb +442 -0
  121. data/lib/bio/io/keggapi.rb +2 -2
  122. data/lib/bio/io/ncbirest.rb +733 -0
  123. data/lib/bio/io/pubmed.rb +34 -80
  124. data/lib/bio/io/registry.rb +2 -2
  125. data/lib/bio/io/sql.rb +178 -357
  126. data/lib/bio/io/togows.rb +458 -0
  127. data/lib/bio/location.rb +106 -11
  128. data/lib/bio/pathway.rb +120 -14
  129. data/lib/bio/reference.rb +115 -101
  130. data/lib/bio/sequence.rb +164 -183
  131. data/lib/bio/sequence/adapter.rb +108 -0
  132. data/lib/bio/sequence/common.rb +22 -45
  133. data/lib/bio/sequence/compat.rb +2 -2
  134. data/lib/bio/sequence/dblink.rb +54 -0
  135. data/lib/bio/sequence/format.rb +254 -77
  136. data/lib/bio/sequence/format_raw.rb +23 -0
  137. data/lib/bio/shell.rb +3 -1
  138. data/lib/bio/shell/core.rb +2 -2
  139. data/lib/bio/shell/plugin/entry.rb +33 -4
  140. data/lib/bio/shell/plugin/ncbirest.rb +64 -0
  141. data/lib/bio/shell/plugin/togows.rb +40 -0
  142. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/bioruby_generator.rb +0 -0
  143. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_classes.rhtml +0 -0
  144. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_log.rhtml +0 -0
  145. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_methods.rhtml +0 -0
  146. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_modules.rhtml +0 -0
  147. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_variables.rhtml +0 -0
  148. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-bg.gif +0 -0
  149. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-gem.png +0 -0
  150. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-link.gif +0 -0
  151. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.css +0 -0
  152. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.rhtml +0 -0
  153. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_controller.rb +0 -0
  154. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_helper.rb +0 -0
  155. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/commands.rhtml +0 -0
  156. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/history.rhtml +0 -0
  157. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/index.rhtml +0 -0
  158. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/spinner.gif +0 -0
  159. data/lib/bio/tree.rb +4 -2
  160. data/lib/bio/util/color_scheme.rb +2 -2
  161. data/lib/bio/util/contingency_table.rb +2 -2
  162. data/lib/bio/util/restriction_enzyme.rb +2 -2
  163. data/lib/bio/util/restriction_enzyme/single_strand.rb +6 -5
  164. data/lib/bio/version.rb +25 -0
  165. data/rdoc.zsh +8 -0
  166. data/sample/any2fasta.rb +0 -0
  167. data/sample/biofetch.rb +0 -0
  168. data/sample/dbget +0 -0
  169. data/sample/demo_sequence.rb +158 -0
  170. data/sample/enzymes.rb +0 -0
  171. data/sample/fasta2tab.rb +0 -0
  172. data/sample/fastagrep.rb +72 -0
  173. data/sample/fastasort.rb +54 -0
  174. data/sample/fsplit.rb +0 -0
  175. data/sample/gb2fasta.rb +2 -3
  176. data/sample/gb2tab.rb +0 -0
  177. data/sample/gbtab2mysql.rb +0 -0
  178. data/sample/genes2nuc.rb +0 -0
  179. data/sample/genes2pep.rb +0 -0
  180. data/sample/genes2tab.rb +0 -0
  181. data/sample/genome2rb.rb +0 -0
  182. data/sample/genome2tab.rb +0 -0
  183. data/sample/goslim.rb +0 -0
  184. data/sample/gt2fasta.rb +0 -0
  185. data/sample/na2aa.rb +34 -0
  186. data/sample/pmfetch.rb +0 -0
  187. data/sample/pmsearch.rb +0 -0
  188. data/sample/ssearch2tab.rb +0 -0
  189. data/sample/tfastx2tab.rb +0 -0
  190. data/sample/vs-genes.rb +0 -0
  191. data/setup.rb +1596 -0
  192. data/test/data/blast/blastp-multi.m7 +188 -0
  193. data/test/data/command/echoarg2.bat +1 -0
  194. data/test/data/paml/codeml/control_file.txt +30 -0
  195. data/test/data/paml/codeml/output.txt +78 -0
  196. data/test/data/paml/codeml/rates +217 -0
  197. data/test/data/rpsblast/misc.rpsblast +193 -0
  198. data/test/data/soft/GDS100_partial.soft +0 -0
  199. data/test/data/soft/GSE3457_family_partial.soft +0 -0
  200. data/test/functional/bio/appl/test_pts1.rb +115 -0
  201. data/test/functional/bio/io/test_ensembl.rb +123 -80
  202. data/test/functional/bio/io/test_togows.rb +267 -0
  203. data/test/functional/bio/sequence/test_output_embl.rb +51 -0
  204. data/test/functional/bio/test_command.rb +301 -0
  205. data/test/runner.rb +17 -1
  206. data/test/unit/bio/appl/blast/test_ncbioptions.rb +112 -0
  207. data/test/unit/bio/appl/blast/test_report.rb +753 -35
  208. data/test/unit/bio/appl/blast/test_rpsblast.rb +398 -0
  209. data/test/unit/bio/appl/paml/codeml/test_rates.rb +45 -0
  210. data/test/unit/bio/appl/paml/codeml/test_report.rb +45 -0
  211. data/test/unit/bio/appl/paml/test_codeml.rb +174 -0
  212. data/test/unit/bio/appl/test_blast.rb +135 -4
  213. data/test/unit/bio/appl/test_fasta.rb +2 -2
  214. data/test/unit/bio/appl/test_pts1.rb +1 -64
  215. data/test/unit/bio/db/embl/test_common.rb +15 -15
  216. data/test/unit/bio/db/embl/test_embl.rb +4 -4
  217. data/test/unit/bio/db/embl/test_embl_rel89.rb +5 -5
  218. data/test/unit/bio/db/embl/test_embl_to_bioseq.rb +203 -0
  219. data/test/unit/bio/db/embl/test_sptr.rb +38 -1
  220. data/test/unit/bio/db/pdb/test_pdb.rb +2 -2
  221. data/test/unit/bio/db/test_gff.rb +1151 -25
  222. data/test/unit/bio/db/test_medline.rb +127 -0
  223. data/test/unit/bio/db/test_nexus.rb +5 -1
  224. data/test/unit/bio/db/test_prosite.rb +4 -4
  225. data/test/unit/bio/io/flatfile/test_autodetection.rb +375 -0
  226. data/test/unit/bio/io/flatfile/test_buffer.rb +251 -0
  227. data/test/unit/bio/io/flatfile/test_splitter.rb +369 -0
  228. data/test/unit/bio/io/test_ddbjxml.rb +8 -3
  229. data/test/unit/bio/io/test_fastacmd.rb +5 -5
  230. data/test/unit/bio/io/test_flatfile.rb +357 -106
  231. data/test/unit/bio/io/test_soapwsdl.rb +2 -2
  232. data/test/unit/bio/io/test_togows.rb +161 -0
  233. data/test/unit/bio/sequence/test_common.rb +210 -11
  234. data/test/unit/bio/sequence/test_compat.rb +3 -3
  235. data/test/unit/bio/sequence/test_dblink.rb +58 -0
  236. data/test/unit/bio/sequence/test_na.rb +2 -2
  237. data/test/unit/bio/test_command.rb +111 -50
  238. data/test/unit/bio/test_feature.rb +29 -1
  239. data/test/unit/bio/test_location.rb +566 -6
  240. data/test/unit/bio/test_pathway.rb +91 -65
  241. data/test/unit/bio/test_reference.rb +67 -13
  242. data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +3 -3
  243. data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +3 -3
  244. data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +3 -3
  245. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +4 -3
  246. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +3 -3
  247. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +3 -3
  248. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +3 -3
  249. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +3 -3
  250. data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +3 -3
  251. data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +3 -3
  252. data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +4 -4
  253. data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +3 -3
  254. data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +3 -3
  255. data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +3 -3
  256. data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +3 -3
  257. data/test/unit/bio/util/test_restriction_enzyme.rb +3 -3
  258. metadata +202 -167
  259. data/test/unit/bio/appl/blast/test_xmlparser.rb +0 -388
@@ -1,15 +1,16 @@
1
1
  #
2
2
  # = bio/io/pubmed.rb - NCBI Entrez/PubMed client module
3
3
  #
4
- # Copyright:: Copyright (C) 2001, 2007 Toshiaki Katayama <k@bioruby.org>
4
+ # Copyright:: Copyright (C) 2001, 2007, 2008 Toshiaki Katayama <k@bioruby.org>
5
5
  # Copyright:: Copyright (C) 2006 Jan Aerts <jan.aerts@bbsrc.ac.uk>
6
6
  # License:: The Ruby License
7
7
  #
8
- # $Id: pubmed.rb,v 1.23 2007/12/12 13:53:26 k Exp $
8
+ # $Id:$
9
9
  #
10
10
 
11
+ require 'bio/io/ncbirest'
11
12
  require 'bio/command'
12
- require 'cgi' unless defined?(CGI)
13
+ require 'cgi'
13
14
 
14
15
  module Bio
15
16
 
@@ -68,29 +69,7 @@ module Bio
68
69
  # manuscript = Bio::PubMed.query("10592173")
69
70
  # medline = Bio::MEDLINE.new(manuscript)
70
71
  #
71
- class PubMed
72
-
73
- # Run retrieval scripts on weekends or between 9 pm and 5 am Eastern Time
74
- # weekdays for any series of more than 100 requests.
75
- # -> Not implemented yet in BioRuby
76
-
77
- # Make no more than one request every 3 seconds.
78
- NCBI_INTERVAL = 3
79
- @@last_access = nil
80
-
81
- private
82
-
83
- def ncbi_access_wait(wait = NCBI_INTERVAL)
84
- if @@last_access
85
- duration = Time.now - @@last_access
86
- if wait > duration
87
- sleep wait - duration
88
- end
89
- end
90
- @@last_access = Time.now
91
- end
92
-
93
- public
72
+ class PubMed < Bio::NCBI::REST
94
73
 
95
74
  # Search the PubMed database by given keywords using E-Utils and returns
96
75
  # an array of PubMed IDs.
@@ -99,39 +78,22 @@ class PubMed
99
78
  # http://eutils.ncbi.nlm.nih.gov/entrez/query/static/esearch_help.html#PubMed
100
79
  # ---
101
80
  # *Arguments*:
102
- # * _id_: query string (required)
103
- # * _field_
104
- # * _reldate_
105
- # * _mindate_
106
- # * _maxdate_
107
- # * _datetype_
108
- # * _retstart_
109
- # * _retmax_ (default 100)
110
- # * _retmode_
111
- # * _rettype_
81
+ # * _str_: query string (required)
82
+ # * _hash_: hash of E-Utils options
83
+ # * _retmode_: "xml", "html", ...
84
+ # * _rettype_: "medline", ...
85
+ # * _retmax_: integer (default 100)
86
+ # * _retstart_: integer
87
+ # * _field_
88
+ # * _reldate_
89
+ # * _mindate_
90
+ # * _maxdate_
91
+ # * _datetype_
112
92
  # *Returns*:: array of PubMed IDs or a number of results
113
93
  def esearch(str, hash = {})
114
- return nil if str.empty?
115
-
116
- serv = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
117
- opts = {
118
- "retmax" => 100,
119
- "tool" => "bioruby",
120
- "db" => "pubmed",
121
- "term" => str
122
- }
94
+ opts = { "db" => "pubmed" }
123
95
  opts.update(hash)
124
-
125
- ncbi_access_wait
126
-
127
- response, = Bio::Command.post_form(serv, opts)
128
- result = response.body
129
- if opts['rettype'] == 'count'
130
- result = result.scan(/<Count>(.*?)<\/Count>/m).flatten.first.to_i
131
- else
132
- result = result.scan(/<Id>(.*?)<\/Id>/m).flatten
133
- end
134
- return result
96
+ super(str, opts)
135
97
  end
136
98
 
137
99
  # Retrieve PubMed entry by PMID and returns MEDLINE formatted string using
@@ -141,29 +103,21 @@ class PubMed
141
103
  # ---
142
104
  # *Arguments*:
143
105
  # * _ids_: list of PubMed IDs (required)
106
+ # * _hash_: hash of E-Utils options
107
+ # * _retmode_: "xml", "html", ...
108
+ # * _rettype_: "medline", ...
109
+ # * _retmax_: integer (default 100)
110
+ # * _retstart_: integer
111
+ # * _field_
112
+ # * _reldate_
113
+ # * _mindate_
114
+ # * _maxdate_
115
+ # * _datetype_
144
116
  # *Returns*:: Array of MEDLINE formatted String
145
117
  def efetch(ids, hash = {})
146
- return nil if ids.to_s.empty?
147
- ids = ids.join(",") if ids === Array
148
-
149
- serv = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
150
- opts = {
151
- "tool" => "bioruby",
152
- "db" => "pubmed",
153
- "retmode" => "text",
154
- "rettype" => "medline",
155
- "id" => ids,
156
- }
118
+ opts = { "db" => "pubmed", "rettype" => "medline" }
157
119
  opts.update(hash)
158
-
159
- ncbi_access_wait
160
-
161
- response, = Bio::Command.post_form(serv, opts)
162
- result = response.body
163
- if opts["retmode"] == "text"
164
- result = result.split(/\n\n+/)
165
- end
166
- return result
120
+ super(ids, opts)
167
121
  end
168
122
 
169
123
  # Search the PubMed database by given keywords using entrez query and returns
@@ -180,7 +134,7 @@ class PubMed
180
134
  ncbi_access_wait
181
135
 
182
136
  http = Bio::Command.new_http(host)
183
- response, = http.get(path + CGI.escape(str))
137
+ response = http.get(path + CGI.escape(str))
184
138
  result = response.body
185
139
  result = result.scan(/value="(\d+)" id="UidCheckBox"/m).flatten
186
140
  return result
@@ -195,12 +149,12 @@ class PubMed
195
149
  def query(*ids)
196
150
  host = "www.ncbi.nlm.nih.gov"
197
151
  path = "/sites/entrez?tool=bioruby&cmd=Text&dopt=MEDLINE&db=PubMed&uid="
198
- list = ids.join(",")
152
+ list = ids.collect { |x| CGI.escape(x.to_s) }.join(",")
199
153
 
200
154
  ncbi_access_wait
201
155
 
202
156
  http = Bio::Command.new_http(host)
203
- response, = http.get(path + list)
157
+ response = http.get(path + list)
204
158
  result = response.body
205
159
  result = result.scan(/<pre>\s*(.*?)<\/pre>/m).flatten
206
160
 
@@ -229,7 +183,7 @@ class PubMed
229
183
  ncbi_access_wait
230
184
 
231
185
  http = Bio::Command.new_http(host)
232
- response, = http.get(path + id.to_s)
186
+ response = http.get(path + CGI.escape(id.to_s))
233
187
  result = response.body
234
188
  if result =~ /#{id}\s+Error/
235
189
  raise( result )
@@ -5,7 +5,7 @@
5
5
  # Toshiaki Katayama <k@bioruby.org>
6
6
  # License:: The Ruby License
7
7
  #
8
- # $Id: registry.rb,v 1.19 2007/04/05 23:35:41 trevor Exp $
8
+ # $Id:$
9
9
  #
10
10
  # == Description
11
11
  #
@@ -172,7 +172,7 @@ class Registry
172
172
  def read_remote(url)
173
173
  schema, user, host, port, reg, path, = URI.split(url)
174
174
  Bio::Command.start_http(host, port) do |http|
175
- response, = http.get(path)
175
+ response = http.get(path)
176
176
  parse_stanza(response.body)
177
177
  end
178
178
  end
@@ -1,365 +1,186 @@
1
- #
2
- # = bio/io/sql.rb - BioSQL access module
3
- #
4
- # Copyright:: Copyright (C) 2002 Toshiaki Katayama <k@bioruby.org>
5
- # Copyright:: Copyright (C) 2006 Raoul Jean Pierre Bonnal <raoul.bonnal@itb.cnr.it>
6
- # License:: The Ruby License
7
- #
8
- # $Id: sql.rb,v 1.8 2007/04/05 23:35:41 trevor Exp $
9
- #
10
1
 
11
- begin
12
- require 'dbi'
13
- rescue LoadError
14
- end
15
- require 'bio/sequence'
16
- require 'bio/feature'
17
-
18
-
19
- module Bio
20
-
21
- class SQL
22
-
23
- def initialize(db = 'dbi:Mysql:biosql', user = nil, pass = nil)
24
- @dbh = DBI.connect(db, user, pass)
25
- end
26
-
27
- def close
28
- @dbh.disconnect
29
- end
30
-
31
- # Returns Bio::SQL::Sequence object.
32
- def fetch(accession) # or display_id for fall back
33
- query = "select * from bioentry where accession = ?"
34
- entry = @dbh.execute(query, accession).fetch
35
- return Sequence.new(@dbh, entry) if entry
36
-
37
- query = "select * from bioentry where display_id = ?"
38
- entry = @dbh.execute(query, accession).fetch
39
- return Sequence.new(@dbh, entry) if entry
40
- end
41
- alias get_by_id fetch
42
-
43
-
44
- # for lazy fetching
45
-
46
- class Sequence
47
-
48
- def initialize(dbh, entry)
49
- @dbh = dbh
50
- @bioentry_id = entry['bioentry_id']
51
- @database_id = entry['biodatabase_id']
52
- @entry_id = entry['display_id']
53
- @accession = entry['accession']
54
- @version = entry['entry_version']
55
- @division = entry['division']
56
- end
57
- attr_reader :accession, :division, :entry_id, :version
58
-
59
-
60
- def to_fasta
61
- if seq = seq
62
- return seq.to_fasta(@accession)
63
- end
64
- end
65
-
66
- # Returns Bio::Sequence::NA or AA object.
67
- def seq
68
- query = "select * from biosequence where bioentry_id = ?"
69
- row = @dbh.execute(query, @bioentry_id).fetch
70
- return unless row
71
-
72
- mol = row['alphabet']
73
- seq = row['seq']
74
-
75
- case mol
76
- when /.na/i # 'dna' or 'rna'
77
- Bio::Sequence::NA.new(seq)
78
- else # 'protein'
79
- Bio::Sequence::AA.new(seq)
80
- end
81
- end
82
-
83
- # Returns Bio::Sequence::NA or AA object (by lazy fetching).
84
- def subseq(from, to)
85
- length = to - from + 1
86
- query = "select alphabet, substring(seq, ?, ?) as subseq" +
87
- " from biosequence where bioentry_id = ?"
88
- row = @dbh.execute(query, from, length, @bioentry_id).fetch
89
- return unless row
90
-
91
- mol = row['alphabet']
92
- seq = row['subseq']
93
-
94
- case mol
95
- when /.na/i # 'dna' or 'rna'
96
- Bio::Sequence::NA.new(seq)
97
- else # 'protein'
98
- Bio::Sequence::AA.new(seq)
99
- end
100
- end
101
-
102
-
103
- # Returns Bio::Features object.
104
- def features
105
- array = []
106
- query = "select * from seqfeature where bioentry_id = ?"
107
- @dbh.execute(query, @bioentry_id).fetch_all.each do |row|
108
- next unless row
109
-
110
- f_id = row['seqfeature_id']
111
- k_id = row['type_term_id']
112
- s_id = row['source_term_id']
113
- rank = row['rank'].to_i - 1
114
-
115
- # key : type (gene, CDS, ...)
116
- type = feature_key(k_id)
117
-
118
- # source : database (EMBL/GenBank/SwissProt)
119
- database = feature_source(s_id)
120
-
121
- # location : position
122
- locations = feature_locations(f_id)
123
-
124
- # qualifier
125
- qualifiers = feature_qualifiers(f_id)
126
-
127
- # rank
128
- array[rank] = Bio::Feature.new(type, locations, qualifiers)
129
- end
130
- return Bio::Features.new(array)
131
- end
132
-
133
-
134
- # Returns reference informations in Array of Hash (not Bio::Reference).
135
- def references
136
- array = []
137
- query = <<-END
138
- select * from bioentry_reference, reference
139
- where bioentry_id = ? and
140
- bioentry_reference.reference_id = reference.reference_id
141
- END
142
- @dbh.execute(query, @bioentry_id).fetch_all.each do |row|
143
- next unless row
144
-
145
- hash = {
146
- 'start' => row['start_pos'],
147
- 'end' => row['end_pos'],
148
- 'journal' => row['location'],
149
- 'title' => row['title'],
150
- 'authors' => row['authors'],
151
- 'medline' => row['crc']
152
- }
153
- hash.default = ''
154
-
155
- rank = row['rank'].to_i - 1
156
- array[rank] = hash
157
- end
158
- return array
159
- end
160
-
161
-
162
- # Returns the first comment. For complete comments, use comments method.
163
- def comment
164
- query = "select * from comment where bioentry_id = ?"
165
- row = @dbh.execute(query, @bioentry_id).fetch
166
- row ? row['comment_text'] : ''
167
- end
168
-
169
- # Returns comments in an Array of Strings.
170
- def comments
171
- array = []
172
- query = "select * from comment where bioentry_id = ?"
173
- @dbh.execute(query, @bioentry_id).fetch_all.each do |row|
174
- next unless row
175
- rank = row['rank'].to_i - 1
176
- array[rank] = row['comment_text']
177
- end
178
- return array
179
- end
180
-
181
- def database
182
- query = "select * from biodatabase where biodatabase_id = ?"
183
- row = @dbh.execute(query, @database_id).fetch
184
- row ? row['name'] : ''
185
- end
186
-
187
- def date
188
- query = "select * from bioentry_date where bioentry_id = ?"
189
- row = @dbh.execute(query, @bioentry_id).fetch
190
- row ? row['date'] : ''
191
- end
192
-
193
- def dblink
194
- query = "select * from bioentry_direct_links where source_bioentry_id = ?"
195
- row = @dbh.execute(query, @bioentry_id).fetch
196
- row ? [row['dbname'], row['accession']] : []
197
- end
198
-
199
- def definition
200
- query = "select * from bioentry_description where bioentry_id = ?"
201
- row = @dbh.execute(query, @bioentry_id).fetch
202
- row ? row['description'] : ''
203
- end
204
-
205
- def keyword
206
- query = "select * from bioentry_keywords where bioentry_id = ?"
207
- row = @dbh.execute(query, @bioentry_id).fetch
208
- row ? row['keywords'] : ''
209
- end
210
-
211
- # Use lineage, common_name, ncbi_taxa_id methods to extract in detail.
212
- def taxonomy
213
- query = <<-END
214
- select taxon_name.name, taxon.ncbi_taxon_id from bioentry
215
- join taxon_name using(taxon_id) join taxon using (taxon_id)
216
- where bioentry_id = ?
217
- END
218
- row = @dbh.execute(query, @bioentry_id).fetch
219
- # @lineage = row ? row['full_lineage'] : ''
220
- @common_name = row ? row['name'] : ''
221
- @ncbi_taxa_id = row ? row['ncbi_taxon_id'] : ''
222
- row ? [@lineage, @common_name, @ncbi_taxa_id] : []
223
- end
224
-
225
- def lineage
226
- taxonomy unless @lineage
227
- return @lineage
228
- end
229
-
230
- def common_name
231
- taxonomy unless @common_name
232
- return @common_name
233
- end
234
-
235
- def ncbi_taxa_id
236
- taxonomy unless @ncbi_taxa_id
237
- return @ncbi_taxa_id
238
- end
239
-
240
-
241
- private
242
-
243
- def feature_key(k_id)
244
- query = "select * from term where term_id= ?"
245
- row = @dbh.execute(query, k_id).fetch
246
- row ? row['name'] : ''
247
- end
248
-
249
- def feature_source(s_id)
250
- query = "select * from term where term_id = ?"
251
- row = @dbh.execute(query, s_id).fetch
252
- row ? row['name'] : ''
253
- end
254
-
255
- def feature_locations(f_id)
256
- locations = []
257
- query = "select * from location where seqfeature_id = ?"
258
- @dbh.execute(query, f_id).fetch_all.each do |row|
259
- next unless row
2
+ require 'rubygems'
3
+ require 'erb'
4
+ require 'composite_primary_keys'
5
+ # BiosqlPlug
260
6
 
261
- location = Bio::Location.new
262
- location.strand = row['strand']
263
- location.from = row['start_pos']
264
- location.to = row['end_pos']
7
+ =begin
8
+ Ok Hilmar gives to me some clarification
9
+ 1) "EMBL/GenBank/SwissProt" name in term table, is only a convention assuming data loaded by genbank embl ans swissprot formats.
10
+ If your features come from others ways for example blast or alignment ... whatever.. the user as to take care about the source.
265
11
 
266
- xref = feature_locations_remote(row['dbxref_if'])
267
- location.xref_id = xref.shift unless xref.empty?
268
-
269
- # just omit fuzzy location for now...
270
- #feature_locations_qv(row['seqfeature_location_id'])
271
-
272
- rank = row['rank'].to_i - 1
273
- locations[rank] = location
274
- end
275
- return Bio::Locations.new(locations)
276
- end
277
-
278
- def feature_locations_remote(l_id)
279
- query = "select * from dbxref where dbxref_id = ?"
280
- row = @dbh.execute(query, l_id).fetch
281
- row ? [row['accession'], row['version']] : []
282
- end
283
-
284
- def feature_locations_qv(l_id)
285
- query = "select * from location_qualifier_value where location_id = ?"
286
- row = @dbh.execute(query, l_id).fetch
287
- row ? [row['value'], row['int_value']] : []
288
- end
289
-
290
- def feature_qualifiers(f_id)
291
- qualifiers = []
292
- query = "select * from seqfeature_qualifier_value where seqfeature_id = ?"
293
- @dbh.execute(query, f_id).fetch_all.each do |row|
294
- next unless row
295
-
296
- key = feature_qualifiers_key(row['seqfeature_id'])
297
- value = row['value']
298
- qualifier = Bio::Feature::Qualifier.new(key, value)
299
-
300
- rank = row['rank'].to_i - 1
301
- qualifiers[rank] = qualifier
302
- end
303
- return qualifiers.compact # .compact is nasty hack for a while
304
- end
305
-
306
- def feature_qualifiers_key(q_id)
307
- query = <<-END
308
- select * from seqfeature_qualifier_value
309
- join term using(term_id) where seqfeature_id = ?
310
- END
311
- row = @dbh.execute(query, q_id).fetch
312
- row ? row['name'] : ''
313
- end
314
- end
315
-
316
- end # SQL
317
-
318
- end # Bio
319
12
 
13
+ =end
14
+ =begin
15
+ TODO:
16
+ 1) source_term_id => surce_term and check before if the source term is present or not and the level, the root should always be something "EMBL/GenBank/SwissProt" or contestualized.
17
+ 2) Into DummyBase class delete connection there and use Bio::ArSQL.establish_connection which reads info from a yml file.
18
+ 3) Chk Locations in Biofeatures ArSQL
19
+ =end
20
+ module Bio
21
+ class SQL
22
+ #no check is made
23
+ def self.establish_connection(configurations, env)
24
+ #configurations is an hash similar what YAML returns.
25
+ #{:database=>"biorails_development", :adapter=>"postgresql", :username=>"rails", :password=>nil}
26
+ configurations.assert_valid_keys('development', 'production','test')
27
+ configurations[env].assert_valid_keys('hostname','database','adapter','username','password')
28
+ DummyBase.configurations = configurations
29
+ DummyBase.establish_connection "#{env}"
30
+ end
31
+
32
+ def self.fetch_id(id)
33
+ Bio::SQL::Bioentry.find(id)
34
+ end
35
+
36
+ def self.fetch_accession(accession)
37
+ accession = accession.upcase
38
+ Bio::SQL::Bioentry.exists?(:accession => accession) ? Bio::SQL::Sequence.new(:entry=>Bio::SQL::Bioentry.find_by_accession(accession)) : nil
39
+ end
40
+
41
+ def self.exists_accession(accession)
42
+ Bio::SQL::Bioentry.find_by_accession(accession.upcase).nil? ? false : true
43
+ end
44
+
45
+ def self.exists_database(name)
46
+ Bio::SQL::Biodatabase.find_by_name(name).nil? ? false : true
47
+ end
48
+
49
+ def self.list_entries
50
+ Bio::SQL::Bioentry.find(:all).collect{|entry|
51
+ {:id=>entry.bioentry_id, :accession=>entry.accession}
52
+ }
53
+ end
54
+
55
+ def self.list_databases
56
+ Bio::SQL::Biodatabase.find(:all).collect{|entry|
57
+ {:id=>entry.biodatabase_id, :name => entry.name}
58
+ }
59
+ end
60
+
61
+ def self.delete_entry_id(id)
62
+ Bioentry.delete(id)
63
+ end
64
+
65
+ def self.delete_entry_accession(accession)
66
+ Bioentry.delete(Bioentry.find_by_accession(accession))
67
+ end
68
+
69
+
70
+ class DummyBase < ActiveRecord::Base
71
+ #NOTE: Using postgresql, not setting sequence name, system will discover the name by default.
72
+ #NOTE: this class will not establish the connection automatically
73
+ self.abstract_class = true
74
+ self.pluralize_table_names = false
75
+ #prepend table name to the usual id, avoid to specify primary id for every table
76
+ self.primary_key_prefix_type = :table_name_with_underscore
77
+ #biosql_configurations=YAML::load(ERB.new(IO.read(File.join(File.dirname(__FILE__),'../config', 'database.yml'))).result)
78
+ #self.configurations=biosql_configurations
79
+ #self.establish_connection "development"
80
+ end #DummyBase
81
+
82
+ autoload :Biodatabase, 'bio/io/biosql/biodatabase'
83
+ autoload :Bioentry, 'bio/io/biosql/bioentry'
84
+ autoload :BioentryDbxref, 'bio/io/biosql/bioentry_dbxref'
85
+ autoload :BioentryPath, 'bio/io/biosql/bioentry_path'
86
+ autoload :BioentryQualifierValue, 'bio/io/biosql/bioentry_qualifier_value'
87
+ autoload :BioentryReference, 'bio/io/biosql/bioentry_reference'
88
+ autoload :BioentryRelationship, 'bio/io/biosql/bioentry_relationship'
89
+ autoload :Biosequence, 'bio/io/biosql/biosequence'
90
+ autoload :Comment, 'bio/io/biosql/comment'
91
+ autoload :Dbxref, 'bio/io/biosql/dbxref'
92
+ autoload :DbxrefQualifierValue, 'bio/io/biosql/dbxref_qualifier_value'
93
+ autoload :Location, 'bio/io/biosql/location'
94
+ autoload :LocationQualifierValue, 'bio/io/biosql/location_qualifier_value'
95
+ autoload :Ontology, 'bio/io/biosql/ontology'
96
+ autoload :Reference, 'bio/io/biosql/reference'
97
+ autoload :Seqfeature, 'bio/io/biosql/seqfeature'
98
+ autoload :SeqfeatureDbxref, 'bio/io/biosql/seqfeature_dbxref'
99
+ autoload :SeqfeaturePath, 'bio/io/biosql/seqfeature_path'
100
+ autoload :SeqfeatureQualifierValue, 'bio/io/biosql/seqfeature_qualifier_value'
101
+ autoload :SeqfeatureRelationship, 'bio/io/biosql/seqfeature_relationship'
102
+ autoload :Taxon, 'bio/io/biosql/taxon'
103
+ autoload :TaxonName, 'bio/io/biosql/taxon_name'
104
+ autoload :Term, 'bio/io/biosql/term'
105
+ autoload :TermDbxref, 'bio/io/biosql/term_dbxref'
106
+ autoload :TermPath, 'bio/io/biosql/term_path'
107
+ autoload :TermRelationship, 'bio/io/biosql/term_relationship'
108
+ autoload :TermRelationshipTerm, 'bio/io/biosql/term_relationship_term'
109
+ autoload :Sequence, 'bio/db/biosql/sequence'
110
+ end #biosql
111
+
112
+ end #Bio
320
113
 
321
114
  if __FILE__ == $0
322
- begin
323
- require 'pp'
324
- alias p pp
325
- rescue LoadError
115
+ require 'rubygems'
116
+ require 'composite_primary_keys'
117
+ require 'bio'
118
+ require 'pp'
119
+
120
+ # pp connection = Bio::SQL.establish_connection('bio/io/biosql/config/database.yml','development')
121
+ connection = Bio::SQL.establish_connection({'development'=>{'database'=>"bio_test", 'adapter'=>"postgresql", 'username'=>"rails", 'password'=>nil}},'development')
122
+ #pp YAML::load(ERB.new(IO.read('bio/io/biosql/config/database.yml')).result)
123
+ if true
124
+ #Bio::SQL.list_entries
125
+
126
+ # biosequence = data.to_biosequence
127
+ # puts biosequence.output(:genbank)
128
+ db=Bio::SQL::Biodatabase.new(:name=>'JEFF', :authority=>'ME', :description=>'YOU')
129
+ db.save!
130
+
131
+ puts "### FileFile.auto"
132
+ if ARGV.size > 0
133
+ #embl = Bio::FlatFile.auto(ARGF.read)
134
+ Bio::FlatFile.auto(ARGF) do |ff|
135
+ ff.each do |data|
136
+ biosequence=data.to_biosequence
137
+ puts biosequence.output(:fasta)
138
+ sqlseq = Bio::SQL::Sequence.new(:biosequence=>biosequence,:biodatabase_id=>db.biodatabase_id)
139
+ sqlseq.save
140
+ sqlseq.to_biosequence.output(:fasta)
141
+ end
142
+ end
143
+ else
144
+ require 'bio/io/fetch'
145
+ server = Bio::Fetch.new('http://www.ebi.ac.uk/cgi-bin/dbfetch')
146
+ data = Bio::EMBL.new(server.fetch('embl','AJ224123'))
147
+ end
148
+
149
+
150
+ # sqlseq = Bio::SQL::Sequence.new(:biosequence=>biosequence,:biodatabase_id=>db.biodatabase_id)
151
+ # sqlseq.save
152
+ # sqlseq_bioseq=sqlseq.to_biosequence
153
+ # puts sqlseq_bioseq.output(:genbank)
154
+
155
+
156
+
157
+ # bioseq = Bio::SQL.fetch_accession('AJ224122')
158
+ # pp bioseq
159
+ # pp bioseq.entry_id
160
+ #TODO create a test only for tables not sequence here
161
+ # pp bioseq.molecule_type
162
+ #pp bioseq.molecule_type.class
163
+ #bioseq.molecule_type_update('dna', 1)
164
+ ## pp Bio::SQL::Taxon.find(8121).taxon_names
165
+
166
+ #sqlseq.to_biosequence
167
+
168
+ # sqlseq.delete
169
+
170
+ # db.destroy
326
171
  end
327
-
328
- db = ARGV.empty? ? 'dbi:Mysql:database=biosql;host=localhost' : ARGV.shift
329
- serv = Bio::SQL.new(db, 'root')
330
-
331
- ent0 = serv.fetch('X76706')
332
- ent0 = serv.fetch('A15H9FIB')
333
- ent1 = serv.fetch('J01902')
334
- ent2 = serv.fetch('X04311')
335
-
336
- pp ent0.features
337
- pp ent0.references
338
-
339
- pp ent1.seq
340
- pp ent1.seq.translate
341
- pp ent1.seq.gc
342
- pp ent1.subseq(1,20)
343
-
344
- pp ent2.accession
345
- pp ent2.comment
346
- pp ent2.comments
347
- pp ent2.common_name
348
- pp ent2.database
349
- pp ent2.date
350
- pp ent2.dblink
351
- pp ent2.definition
352
- pp ent2.division
353
- pp ent2.entry_id
354
- pp ent2.features
355
- pp ent2.keyword
356
- pp ent2.lineage
357
- pp ent2.ncbi_taxa_id
358
- pp ent2.references
359
- pp ent2.seq
360
- pp ent2.subseq(1,10)
361
- pp ent2.taxonomy
362
- pp ent2.version
363
-
172
+ #pp bioseq.molecule_type
173
+ #term = Bio::SQL::Term.find_by_name('mol_type')
174
+ #pp term
175
+ #pp bioseq.entry.bioentry_qualifier_values.create(:term=>term, :rank=>2, :value=>'pippo')
176
+ #pp bioseq.entry.bioentry_qualifier_values.inspect
177
+ #pp bioseq.entry.bioentry_qualifier_values.find_all_by_term_id(26)
178
+ #pp primo.class
179
+ # pp primo.value='dna'
180
+ # pp primo.save
181
+ #pp bioseq.molecule_type= 'prova'
182
+
183
+ #Bio::SQL::BioentryQualifierValue.delete(delete.bioentry_id,delete.term_id,delete.rank)
184
+
185
+
364
186
  end
365
-