bio 1.2.1 → 1.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (259) hide show
  1. data/ChangeLog +3421 -0
  2. data/KNOWN_ISSUES.rdoc +88 -0
  3. data/README.rdoc +252 -0
  4. data/README_DEV.rdoc +285 -0
  5. data/Rakefile +143 -0
  6. data/bin/bioruby +0 -0
  7. data/bin/br_biofetch.rb +0 -0
  8. data/bin/br_bioflat.rb +12 -1
  9. data/bin/br_biogetseq.rb +0 -0
  10. data/bin/br_pmfetch.rb +4 -3
  11. data/bioruby.gemspec +477 -0
  12. data/bioruby.gemspec.erb +117 -0
  13. data/doc/Changes-0.7.rd +7 -0
  14. data/doc/Changes-1.3.rdoc +239 -0
  15. data/doc/Tutorial.rd +296 -184
  16. data/doc/Tutorial.rd.html +1031 -0
  17. data/doc/Tutorial.rd.ja +111 -45
  18. data/doc/Tutorial.rd.ja.html +2225 -0
  19. data/doc/bioruby.css +281 -0
  20. data/extconf.rb +2 -0
  21. data/lib/bio.rb +29 -4
  22. data/lib/bio/appl/blast.rb +306 -121
  23. data/lib/bio/appl/blast/ddbj.rb +142 -0
  24. data/lib/bio/appl/blast/format0.rb +35 -25
  25. data/lib/bio/appl/blast/format8.rb +2 -2
  26. data/lib/bio/appl/blast/genomenet.rb +263 -0
  27. data/lib/bio/appl/blast/ncbioptions.rb +220 -0
  28. data/lib/bio/appl/blast/remote.rb +106 -0
  29. data/lib/bio/appl/blast/report.rb +260 -9
  30. data/lib/bio/appl/blast/rexml.rb +12 -5
  31. data/lib/bio/appl/blast/rpsblast.rb +277 -0
  32. data/lib/bio/appl/blast/wublast.rb +133 -12
  33. data/lib/bio/appl/blast/xmlparser.rb +35 -18
  34. data/lib/bio/appl/blat/report.rb +46 -5
  35. data/lib/bio/appl/emboss.rb +62 -13
  36. data/lib/bio/appl/fasta.rb +9 -11
  37. data/lib/bio/appl/genscan/report.rb +3 -3
  38. data/lib/bio/appl/hmmer.rb +1 -1
  39. data/lib/bio/appl/hmmer/report.rb +10 -10
  40. data/lib/bio/appl/paml/baseml.rb +95 -0
  41. data/lib/bio/appl/paml/baseml/report.rb +32 -0
  42. data/lib/bio/appl/paml/codeml.rb +242 -0
  43. data/lib/bio/appl/paml/codeml/rates.rb +67 -0
  44. data/lib/bio/appl/paml/codeml/report.rb +67 -0
  45. data/lib/bio/appl/paml/common.rb +348 -0
  46. data/lib/bio/appl/paml/common_report.rb +38 -0
  47. data/lib/bio/appl/paml/yn00.rb +103 -0
  48. data/lib/bio/appl/paml/yn00/report.rb +32 -0
  49. data/lib/bio/appl/psort.rb +2 -2
  50. data/lib/bio/appl/pts1.rb +5 -5
  51. data/lib/bio/appl/tmhmm/report.rb +10 -1
  52. data/lib/bio/command.rb +297 -41
  53. data/lib/bio/compat/features.rb +157 -0
  54. data/lib/bio/compat/references.rb +128 -0
  55. data/lib/bio/db/biosql/biosql_to_biosequence.rb +67 -0
  56. data/lib/bio/db/biosql/sequence.rb +508 -0
  57. data/lib/bio/db/embl/common.rb +28 -12
  58. data/lib/bio/db/embl/embl.rb +107 -9
  59. data/lib/bio/db/embl/embl_to_biosequence.rb +85 -0
  60. data/lib/bio/db/embl/format_embl.rb +190 -0
  61. data/lib/bio/db/embl/sptr.rb +15 -16
  62. data/lib/bio/db/fantom.rb +6 -8
  63. data/lib/bio/db/fasta.rb +10 -507
  64. data/lib/bio/db/fasta/defline.rb +532 -0
  65. data/lib/bio/db/fasta/fasta_to_biosequence.rb +63 -0
  66. data/lib/bio/db/fasta/format_fasta.rb +97 -0
  67. data/lib/bio/db/genbank/common.rb +25 -8
  68. data/lib/bio/db/genbank/format_genbank.rb +187 -0
  69. data/lib/bio/db/genbank/genbank.rb +36 -1
  70. data/lib/bio/db/genbank/genbank_to_biosequence.rb +86 -0
  71. data/lib/bio/db/gff.rb +1791 -119
  72. data/lib/bio/db/kegg/glycan.rb +2 -6
  73. data/lib/bio/db/lasergene.rb +3 -3
  74. data/lib/bio/db/medline.rb +4 -1
  75. data/lib/bio/db/newick.rb +10 -10
  76. data/lib/bio/db/pdb/chain.rb +6 -2
  77. data/lib/bio/db/pdb/pdb.rb +12 -3
  78. data/lib/bio/db/rebase.rb +7 -8
  79. data/lib/bio/db/soft.rb +3 -3
  80. data/lib/bio/feature.rb +1 -88
  81. data/lib/bio/io/biosql/biodatabase.rb +64 -0
  82. data/lib/bio/io/biosql/bioentry.rb +29 -0
  83. data/lib/bio/io/biosql/bioentry_dbxref.rb +11 -0
  84. data/lib/bio/io/biosql/bioentry_path.rb +12 -0
  85. data/lib/bio/io/biosql/bioentry_qualifier_value.rb +10 -0
  86. data/lib/bio/io/biosql/bioentry_reference.rb +10 -0
  87. data/lib/bio/io/biosql/bioentry_relationship.rb +10 -0
  88. data/lib/bio/io/biosql/biosequence.rb +11 -0
  89. data/lib/bio/io/biosql/comment.rb +7 -0
  90. data/lib/bio/io/biosql/config/database.yml +20 -0
  91. data/lib/bio/io/biosql/dbxref.rb +13 -0
  92. data/lib/bio/io/biosql/dbxref_qualifier_value.rb +12 -0
  93. data/lib/bio/io/biosql/location.rb +32 -0
  94. data/lib/bio/io/biosql/location_qualifier_value.rb +11 -0
  95. data/lib/bio/io/biosql/ontology.rb +10 -0
  96. data/lib/bio/io/biosql/reference.rb +9 -0
  97. data/lib/bio/io/biosql/seqfeature.rb +32 -0
  98. data/lib/bio/io/biosql/seqfeature_dbxref.rb +11 -0
  99. data/lib/bio/io/biosql/seqfeature_path.rb +11 -0
  100. data/lib/bio/io/biosql/seqfeature_qualifier_value.rb +20 -0
  101. data/lib/bio/io/biosql/seqfeature_relationship.rb +11 -0
  102. data/lib/bio/io/biosql/taxon.rb +12 -0
  103. data/lib/bio/io/biosql/taxon_name.rb +9 -0
  104. data/lib/bio/io/biosql/term.rb +27 -0
  105. data/lib/bio/io/biosql/term_dbxref.rb +11 -0
  106. data/lib/bio/io/biosql/term_path.rb +12 -0
  107. data/lib/bio/io/biosql/term_relationship.rb +13 -0
  108. data/lib/bio/io/biosql/term_relationship_term.rb +11 -0
  109. data/lib/bio/io/biosql/term_synonym.rb +10 -0
  110. data/lib/bio/io/das.rb +7 -7
  111. data/lib/bio/io/ddbjxml.rb +57 -0
  112. data/lib/bio/io/ensembl.rb +2 -2
  113. data/lib/bio/io/fetch.rb +28 -14
  114. data/lib/bio/io/flatfile.rb +17 -853
  115. data/lib/bio/io/flatfile/autodetection.rb +545 -0
  116. data/lib/bio/io/flatfile/buffer.rb +237 -0
  117. data/lib/bio/io/flatfile/index.rb +17 -7
  118. data/lib/bio/io/flatfile/indexer.rb +30 -12
  119. data/lib/bio/io/flatfile/splitter.rb +297 -0
  120. data/lib/bio/io/hinv.rb +442 -0
  121. data/lib/bio/io/keggapi.rb +2 -2
  122. data/lib/bio/io/ncbirest.rb +733 -0
  123. data/lib/bio/io/pubmed.rb +34 -80
  124. data/lib/bio/io/registry.rb +2 -2
  125. data/lib/bio/io/sql.rb +178 -357
  126. data/lib/bio/io/togows.rb +458 -0
  127. data/lib/bio/location.rb +106 -11
  128. data/lib/bio/pathway.rb +120 -14
  129. data/lib/bio/reference.rb +115 -101
  130. data/lib/bio/sequence.rb +164 -183
  131. data/lib/bio/sequence/adapter.rb +108 -0
  132. data/lib/bio/sequence/common.rb +22 -45
  133. data/lib/bio/sequence/compat.rb +2 -2
  134. data/lib/bio/sequence/dblink.rb +54 -0
  135. data/lib/bio/sequence/format.rb +254 -77
  136. data/lib/bio/sequence/format_raw.rb +23 -0
  137. data/lib/bio/shell.rb +3 -1
  138. data/lib/bio/shell/core.rb +2 -2
  139. data/lib/bio/shell/plugin/entry.rb +33 -4
  140. data/lib/bio/shell/plugin/ncbirest.rb +64 -0
  141. data/lib/bio/shell/plugin/togows.rb +40 -0
  142. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/bioruby_generator.rb +0 -0
  143. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_classes.rhtml +0 -0
  144. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_log.rhtml +0 -0
  145. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_methods.rhtml +0 -0
  146. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_modules.rhtml +0 -0
  147. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_variables.rhtml +0 -0
  148. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-bg.gif +0 -0
  149. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-gem.png +0 -0
  150. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-link.gif +0 -0
  151. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.css +0 -0
  152. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.rhtml +0 -0
  153. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_controller.rb +0 -0
  154. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_helper.rb +0 -0
  155. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/commands.rhtml +0 -0
  156. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/history.rhtml +0 -0
  157. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/index.rhtml +0 -0
  158. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/spinner.gif +0 -0
  159. data/lib/bio/tree.rb +4 -2
  160. data/lib/bio/util/color_scheme.rb +2 -2
  161. data/lib/bio/util/contingency_table.rb +2 -2
  162. data/lib/bio/util/restriction_enzyme.rb +2 -2
  163. data/lib/bio/util/restriction_enzyme/single_strand.rb +6 -5
  164. data/lib/bio/version.rb +25 -0
  165. data/rdoc.zsh +8 -0
  166. data/sample/any2fasta.rb +0 -0
  167. data/sample/biofetch.rb +0 -0
  168. data/sample/dbget +0 -0
  169. data/sample/demo_sequence.rb +158 -0
  170. data/sample/enzymes.rb +0 -0
  171. data/sample/fasta2tab.rb +0 -0
  172. data/sample/fastagrep.rb +72 -0
  173. data/sample/fastasort.rb +54 -0
  174. data/sample/fsplit.rb +0 -0
  175. data/sample/gb2fasta.rb +2 -3
  176. data/sample/gb2tab.rb +0 -0
  177. data/sample/gbtab2mysql.rb +0 -0
  178. data/sample/genes2nuc.rb +0 -0
  179. data/sample/genes2pep.rb +0 -0
  180. data/sample/genes2tab.rb +0 -0
  181. data/sample/genome2rb.rb +0 -0
  182. data/sample/genome2tab.rb +0 -0
  183. data/sample/goslim.rb +0 -0
  184. data/sample/gt2fasta.rb +0 -0
  185. data/sample/na2aa.rb +34 -0
  186. data/sample/pmfetch.rb +0 -0
  187. data/sample/pmsearch.rb +0 -0
  188. data/sample/ssearch2tab.rb +0 -0
  189. data/sample/tfastx2tab.rb +0 -0
  190. data/sample/vs-genes.rb +0 -0
  191. data/setup.rb +1596 -0
  192. data/test/data/blast/blastp-multi.m7 +188 -0
  193. data/test/data/command/echoarg2.bat +1 -0
  194. data/test/data/paml/codeml/control_file.txt +30 -0
  195. data/test/data/paml/codeml/output.txt +78 -0
  196. data/test/data/paml/codeml/rates +217 -0
  197. data/test/data/rpsblast/misc.rpsblast +193 -0
  198. data/test/data/soft/GDS100_partial.soft +0 -0
  199. data/test/data/soft/GSE3457_family_partial.soft +0 -0
  200. data/test/functional/bio/appl/test_pts1.rb +115 -0
  201. data/test/functional/bio/io/test_ensembl.rb +123 -80
  202. data/test/functional/bio/io/test_togows.rb +267 -0
  203. data/test/functional/bio/sequence/test_output_embl.rb +51 -0
  204. data/test/functional/bio/test_command.rb +301 -0
  205. data/test/runner.rb +17 -1
  206. data/test/unit/bio/appl/blast/test_ncbioptions.rb +112 -0
  207. data/test/unit/bio/appl/blast/test_report.rb +753 -35
  208. data/test/unit/bio/appl/blast/test_rpsblast.rb +398 -0
  209. data/test/unit/bio/appl/paml/codeml/test_rates.rb +45 -0
  210. data/test/unit/bio/appl/paml/codeml/test_report.rb +45 -0
  211. data/test/unit/bio/appl/paml/test_codeml.rb +174 -0
  212. data/test/unit/bio/appl/test_blast.rb +135 -4
  213. data/test/unit/bio/appl/test_fasta.rb +2 -2
  214. data/test/unit/bio/appl/test_pts1.rb +1 -64
  215. data/test/unit/bio/db/embl/test_common.rb +15 -15
  216. data/test/unit/bio/db/embl/test_embl.rb +4 -4
  217. data/test/unit/bio/db/embl/test_embl_rel89.rb +5 -5
  218. data/test/unit/bio/db/embl/test_embl_to_bioseq.rb +203 -0
  219. data/test/unit/bio/db/embl/test_sptr.rb +38 -1
  220. data/test/unit/bio/db/pdb/test_pdb.rb +2 -2
  221. data/test/unit/bio/db/test_gff.rb +1151 -25
  222. data/test/unit/bio/db/test_medline.rb +127 -0
  223. data/test/unit/bio/db/test_nexus.rb +5 -1
  224. data/test/unit/bio/db/test_prosite.rb +4 -4
  225. data/test/unit/bio/io/flatfile/test_autodetection.rb +375 -0
  226. data/test/unit/bio/io/flatfile/test_buffer.rb +251 -0
  227. data/test/unit/bio/io/flatfile/test_splitter.rb +369 -0
  228. data/test/unit/bio/io/test_ddbjxml.rb +8 -3
  229. data/test/unit/bio/io/test_fastacmd.rb +5 -5
  230. data/test/unit/bio/io/test_flatfile.rb +357 -106
  231. data/test/unit/bio/io/test_soapwsdl.rb +2 -2
  232. data/test/unit/bio/io/test_togows.rb +161 -0
  233. data/test/unit/bio/sequence/test_common.rb +210 -11
  234. data/test/unit/bio/sequence/test_compat.rb +3 -3
  235. data/test/unit/bio/sequence/test_dblink.rb +58 -0
  236. data/test/unit/bio/sequence/test_na.rb +2 -2
  237. data/test/unit/bio/test_command.rb +111 -50
  238. data/test/unit/bio/test_feature.rb +29 -1
  239. data/test/unit/bio/test_location.rb +566 -6
  240. data/test/unit/bio/test_pathway.rb +91 -65
  241. data/test/unit/bio/test_reference.rb +67 -13
  242. data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +3 -3
  243. data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +3 -3
  244. data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +3 -3
  245. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +4 -3
  246. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +3 -3
  247. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +3 -3
  248. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +3 -3
  249. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +3 -3
  250. data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +3 -3
  251. data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +3 -3
  252. data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +4 -4
  253. data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +3 -3
  254. data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +3 -3
  255. data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +3 -3
  256. data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +3 -3
  257. data/test/unit/bio/util/test_restriction_enzyme.rb +3 -3
  258. metadata +202 -167
  259. data/test/unit/bio/appl/blast/test_xmlparser.rb +0 -388
@@ -333,6 +333,63 @@ class XML < Bio::SOAPWSDL
333
333
  SERVER_URI = BASE_URI + "PML.wsdl"
334
334
  end
335
335
 
336
+ # === RequestManager
337
+ #
338
+ # Sequence Retrieving System
339
+ #
340
+ # * http://xml.nig.ac.jp/doc/RequestManager.txt
341
+ #
342
+ # === Examples
343
+ #
344
+ # serv = Bio::DDBJ::XML::RequestManager.new
345
+ # puts serv.getAsyncResult('20070420102828140')
346
+ #
347
+ # === WSDL Methods
348
+ #
349
+ # * getAsyncResult( requestId )
350
+ # * getAsyncResultMime( requestId )
351
+ #
352
+ # === Examples
353
+ #
354
+ # * http://xml.nig.ac.jp/doc/RequestManager.txt
355
+ #
356
+ class RequestManager < XML
357
+ SERVER_URI = BASE_URI + "RequestManager.wsdl"
358
+
359
+ # RequestManager using DDBJ REST interface
360
+ class REST
361
+ require 'bio/command'
362
+
363
+ Uri = 'http://xml.nig.ac.jp/rest/Invoke'
364
+ Service = 'RequestManager'
365
+
366
+ def getAsyncResult(requestId)
367
+ params = {
368
+ 'service' => Service,
369
+ 'method' => 'getAsyncResult',
370
+ 'requestId' => requestId.to_s
371
+ }
372
+ r = Bio::Command.post_form(Uri, params)
373
+ r.body
374
+ end
375
+ end #class REST
376
+
377
+ unless defined? new_orig then
378
+ class << RequestManager
379
+ alias new_orig new
380
+ private :new_orig
381
+ end
382
+ end
383
+
384
+ # creates a new driver
385
+ def self.new(wsdl = nil)
386
+ begin
387
+ new_orig(wsdl)
388
+ rescue RuntimeError
389
+ REST.new
390
+ end
391
+ end
392
+ end #class RequestManager
336
393
 
337
394
  # === SRS
338
395
  #
@@ -5,7 +5,7 @@
5
5
  # Mitsuteru C. Nakao <n@bioruby.org>
6
6
  # License:: The Ruby License
7
7
  #
8
- # $Id: ensembl.rb,v 1.11 2007/04/05 23:35:41 trevor Exp $
8
+ # $Id:$
9
9
  #
10
10
  # == Description
11
11
  #
@@ -185,7 +185,7 @@ class Ensembl
185
185
 
186
186
  params = defaults.update(options)
187
187
 
188
- result, = Bio::Command.post_form("#{@uri}/exportview", params)
188
+ result = Bio::Command.post_form("#{@uri}/exportview", params)
189
189
 
190
190
  return result.body
191
191
  end
@@ -5,7 +5,7 @@
5
5
  # Copyright (C) 2006 Jan Aerts <jan.aerts@bbsrc.ac.uk>
6
6
  # License:: The Ruby License
7
7
  #
8
- # $Id: fetch.rb,v 1.10 2007/04/05 23:35:41 trevor Exp $
8
+ # $Id:$
9
9
  #
10
10
  # == DESCRIPTION
11
11
  #
@@ -26,6 +26,7 @@
26
26
  #
27
27
 
28
28
  require 'uri'
29
+ require 'cgi'
29
30
  require 'bio/command'
30
31
 
31
32
  module Bio
@@ -102,11 +103,12 @@ module Bio
102
103
  # * _style_: [raw|html] (default = 'raw')
103
104
  # * _format_: name of output format (see Bio::Fetch#formats)
104
105
  def fetch(db, id, style = 'raw', format = nil)
105
- query = [ "db=#{db}", "id=#{id}", "style=#{style}" ]
106
- query.push("format=#{format}") if format
107
- query = query.join('&')
106
+ query = [ [ 'db', db ],
107
+ [ 'id', id ],
108
+ [ 'style', style ] ]
109
+ query.push([ 'format', format ]) if format
108
110
 
109
- Bio::Command.read_uri(@url + '?' + URI.escape(query))
111
+ _get(query)
110
112
  end
111
113
 
112
114
  # Shortcut for using BioRuby's BioFetch server. You can fetch an entry
@@ -139,9 +141,7 @@ module Bio
139
141
  # ---
140
142
  # *Returns*:: array of database names
141
143
  def databases
142
- query = "info=dbs"
143
-
144
- Bio::Command.read_uri(@url + '?' + URI.escape(query)).strip.split(/\s+/)
144
+ _get_single('info', 'dbs').strip.split(/\s+/)
145
145
  end
146
146
 
147
147
  # Lists the formats that are available for a given database. Like the
@@ -156,9 +156,9 @@ module Bio
156
156
  # *Returns*:: array of formats
157
157
  def formats(database = @database)
158
158
  if database
159
- query = "info=formats;db=#{database}"
160
-
161
- Bio::Command.read_uri(@url + '?' + URI.escape(query)).strip.split(/\s+/)
159
+ query = [ [ 'info', 'formats' ],
160
+ [ 'db', database ] ]
161
+ _get(query).strip.split(/\s+/)
162
162
  end
163
163
  end
164
164
 
@@ -170,11 +170,25 @@ module Bio
170
170
  # *Arguments*: none
171
171
  # *Returns*:: number
172
172
  def maxids
173
- query = "info=maxids"
173
+ _get_single('info', 'maxids').to_i
174
+ end
174
175
 
175
- Bio::Command.read_uri(@url + '?' + URI.escape(query)).to_i
176
+ private
177
+ # (private) query to the server.
178
+ # ary must be nested array, e.g. [ [ key0, val0 ], [ key1, val1 ], ... ]
179
+ def _get(ary)
180
+ query = ary.collect do |a|
181
+ "#{CGI.escape(a[0])}=#{CGI.escape(a[1])}"
182
+ end.join('&')
183
+ Bio::Command.read_uri(@url + '?' + query)
176
184
  end
177
-
185
+
186
+ # (private) query with single parameter
187
+ def _get_single(key, val)
188
+ query = "#{CGI.escape(key)}=#{CGI.escape(val)}"
189
+ Bio::Command.read_uri(@url + '?' + query)
190
+ end
191
+
178
192
  end
179
193
 
180
194
  end # module Bio
@@ -13,7 +13,6 @@
13
13
  # It can automatically detect data format, and users do not need to tell
14
14
  # the class what the data is.
15
15
  #
16
- require 'tsort'
17
16
 
18
17
  module Bio
19
18
 
@@ -23,333 +22,11 @@ module Bio
23
22
  # the class what the data is.
24
23
  class FlatFile
25
24
 
26
- include Enumerable
27
-
28
- # Wrapper for a IO (or IO-like) object.
29
- # It can input with a buffer.
30
- class BufferedInputStream
31
- # Creates a new input stream wrapper
32
- def initialize(io, path)
33
- @io = io
34
- @path = path
35
- # initialize prefetch buffer
36
- @buffer = ''
37
- end
38
-
39
- # Creates a new input stream wrapper from the given IO object.
40
- def self.for_io(io)
41
- begin
42
- path = io.path
43
- rescue NameError
44
- path = nil
45
- end
46
- self.new(io, path)
47
- end
48
-
49
- # Creates a new input stream wrapper to open file _filename_
50
- # by using File.open.
51
- # *arg is passed to File.open.
52
- #
53
- # Like File.open, a block can be accepted.
54
- def self.open_file(filename, *arg)
55
- if block_given? then
56
- File.open(filename, *arg) do |fobj|
57
- yield self.new(fobj, filename)
58
- end
59
- else
60
- fobj = File.open(filename, *arg)
61
- self.new(fobj, filename)
62
- end
63
- end
64
-
65
- # Creates a new input stream wrapper from URI specified as _uri_.
66
- # by using OpenURI.open_uri or URI#open.
67
- # _uri_ must be a String or URI object.
68
- # *arg is passed to OpenURI.open_uri or URI#open.
69
- #
70
- # Like OpenURI.open_uri, it can accept a block.
71
- def self.open_uri(uri, *arg)
72
- if uri.kind_of?(URI)
73
- if block_given?
74
- uri.open(*arg) do |fobj|
75
- yield self.new(fobj, uri.to_s)
76
- end
77
- else
78
- fobj = uri.open(*arg)
79
- self.new(fobj, uri.to_s)
80
- end
81
- else
82
- if block_given?
83
- OpenURI.open_uri(uri, *arg) do |fobj|
84
- yield self.new(fobj, uri)
85
- end
86
- else
87
- fobj = OpenURI.open_uri(uri, *arg)
88
- self.new(fobj, uri)
89
- end
90
- end
91
- end
92
-
93
- # Pathname, filename or URI to open the object.
94
- # Like File#path, returned value isn't normalized.
95
- attr_reader :path
96
-
97
- # Converts to IO object if possible
98
- def to_io
99
- @io.to_io
100
- end
101
-
102
- # Closes the IO object if possible
103
- def close
104
- @io.close
105
- end
106
-
107
- # Rewinds the IO object if possible
108
- # Internal buffer in this wrapper is cleared.
109
- def rewind
110
- r = @io.rewind
111
- @buffer = ''
112
- r
113
- end
114
-
115
- # Returns current file position
116
- def pos
117
- @io.pos - @buffer.size
118
- end
119
-
120
- # Sets current file position if possible
121
- # Internal buffer in this wrapper is cleared.
122
- def pos=(p)
123
- r = (@io.pos = p)
124
- @buffer = ''
125
- r
126
- end
127
-
128
- # Returns true if end-of-file. Otherwise, returns false.
129
- #
130
- # Note that it returns false if internal buffer is this wrapper
131
- # is not empty,
132
- def eof?
133
- if @buffer.size > 0
134
- false
135
- else
136
- @io.eof?
137
- end
138
- end
139
-
140
- # Same as IO#gets.
141
- def gets(io_rs = $/)
142
- if @buffer.size > 0
143
- if io_rs == nil then
144
- r = @buffer + @io.gets(nil).to_s
145
- @buffer = ''
146
- else
147
- if io_rs == '' then
148
- sp_rs = /\n\n/n
149
- sp_rs_orig = "\n\n"
150
- else
151
- sp_rs = Regexp.new(Regexp.escape(io_rs, 'n'), 0, 'n')
152
- sp_rs_orig = io_rs
153
- end
154
- a = @buffer.split(sp_rs, 2)
155
- if a.size > 1 then
156
- r = a[0] + sp_rs_orig
157
- @buffer = a[1]
158
- else
159
- @buffer << @io.gets(io_rs).to_s
160
- a = @buffer.split(sp_rs, 2)
161
- if a.size > 1 then
162
- r = a[0] + sp_rs_orig
163
- @buffer = a[1].to_s
164
- else
165
- r = @buffer
166
- @buffer = ''
167
- end
168
- end
169
- end
170
- r
171
- else
172
- @io.gets(io_rs)
173
- end
174
- end
175
-
176
- # Pushes back given str to the internal buffer.
177
- # Returns nil.
178
- # str must be read previously with the wrapper object.
179
- #
180
- # Note that in current implementation, the str can be everything,
181
- # but please don't depend on it.
182
- #
183
- def ungets(str)
184
- @buffer = str + @buffer
185
- nil
186
- end
187
-
188
- # Same as IO#getc.
189
- def getc
190
- if @buffer.size > 0 then
191
- r = @buffer[0]
192
- @buffer = @buffer[1..-1]
193
- else
194
- r = @io.getc
195
- end
196
- r
197
- end
198
-
199
- # Pushes back one character into the internal buffer.
200
- # Unlike IO#getc, it can be called more than one time.
201
- def ungetc(c)
202
- @buffer = sprintf("%c", c) + @buffer
203
- nil
204
- end
25
+ autoload :AutoDetect, 'bio/io/flatfile/autodetection'
26
+ autoload :Splitter, 'bio/io/flatfile/splitter'
27
+ autoload :BufferedInputStream, 'bio/io/flatfile/buffer'
205
28
 
206
- # Gets current prefetch buffer
207
- def prefetch_buffer
208
- @buffer
209
- end
210
-
211
- # It does @io.gets, and addes returned string
212
- # to the internal buffer, and returns the string.
213
- def prefetch_gets(*arg)
214
- r = @io.gets(*arg)
215
- @buffer << r if r
216
- r
217
- end
218
-
219
- # It does @io.readpartial, and addes returned string
220
- # to the internal buffer, and returns the string.
221
- def prefetch_readpartial(*arg)
222
- r = @io.readpartial(*arg)
223
- @buffer << r if r
224
- r
225
- end
226
-
227
- # Skips space characters in the stream.
228
- # returns nil.
229
- def skip_spaces
230
- ws = { ?\s => true, ?\n => true, ?\r => true, ?\t => true }
231
- while r = self.getc
232
- unless ws[r] then
233
- self.ungetc(r)
234
- break
235
- end
236
- end
237
- nil
238
- end
239
- end #class BufferedInputStream
240
-
241
- # Splitter is a class to get entries from a buffered input stream.
242
- module Splitter
243
- # This is a template of splitter.
244
- class Template
245
- # Creates a new splitter.
246
- def initialize(klass, bstream)
247
- @stream = bstream
248
- raise NotImplementedError
249
- end
250
-
251
- # skips leader of the entry.
252
- def skip_leader
253
- raise NotImplementedError
254
- end
255
-
256
- # Gets entry as a string
257
- def get_entry
258
- raise NotImplementedError
259
- end
260
-
261
- # the last entry read from the stream
262
- attr_reader :entry
263
-
264
- # a flag to write down entry start and end positions
265
- attr_accessor :entry_pos_flag
266
-
267
- # start position of the entry
268
- attr_reader :entry_start_pos
269
-
270
- # (end position of the entry) + 1
271
- attr_reader :entry_ended_pos
272
- end
273
-
274
- # Default splitter.
275
- # It sees following constants in the given class.
276
- # DELIMITER:: (String) delimiter indicates the end of a entry.
277
- # FLATFILE_HEADER:: (String) start of a entry, located on head of a line.
278
- # DELIMITER_OVERRUN:: (Integer) excess read size included in DELIMITER.
279
- #
280
- class Default < Template
281
- # Creates a new splitter.
282
- # klass:: database class
283
- # bstream:: input stream. It must be a BufferedInputStream object.
284
- def initialize(klass, bstream)
285
- @stream = bstream
286
- @delimiter = klass::DELIMITER rescue nil
287
- @header = klass::FLATFILE_HEADER rescue nil
288
- # for specific classes' benefit
289
- unless header
290
- if klass == Bio::GenBank or klass == Bio::GenPept
291
- @header = 'LOCUS '
292
- end
293
- end
294
- @delimiter_overrun = klass::DELIMITER_OVERRUN rescue nil
295
- @entry_pos_flag = nil
296
- end
297
-
298
- # (String) delimiter indicates the end of a entry.
299
- attr_accessor :delimiter
300
-
301
- # (String) start of a entry, located on head of a line.
302
- attr_accessor :header
303
-
304
- # (Integer) excess read data size included in delimiter.
305
- attr_accessor :delimiter_overrun
306
-
307
- # Skips leader of the entry.
308
- #
309
- # If @header is not nil, it reads till the contents of @header
310
- # comes at the head of a line.
311
- # If correct FLATFILE_HEADER is found, returns true.
312
- # Otherwise, returns nil.
313
- def skip_leader
314
- if @header then
315
- data = ''
316
- while s = @stream.gets(@header)
317
- data << s
318
- if data.split(/[\r\n]+/)[-1] == @header then
319
- @stream.ungets(@header)
320
- return true
321
- end
322
- end
323
- # @header was not found. For safety,
324
- # pushes back data with removing white spaces in the head.
325
- data.sub(/\A\s+/, '')
326
- @stream.ungets(data)
327
- return nil
328
- else
329
- @stream.skip_spaces
330
- return nil
331
- end
332
- end
333
-
334
- # gets a entry
335
- def get_entry
336
- p0 = @entry_pos_flag ? @stream.pos : nil
337
- e = @stream.gets(@delimiter)
338
- if e and @delimiter_overrun then
339
- if e[-@delimiter.size, @delimiter.size ] == @delimiter then
340
- overrun = e[-@delimiter_overrun, @delimiter_overrun]
341
- e[-@delimiter_overrun, @delimiter_overrun] = ''
342
- @stream.ungets(overrun)
343
- end
344
- end
345
- p1 = @entry_pos_flag ? @stream.pos : nil
346
- @entry_start_pos = p0
347
- @entry = e
348
- @entry_ended_pos = p1
349
- @entry
350
- end
351
- end #class Defalult
352
- end #module Splitter
29
+ include Enumerable
353
30
 
354
31
  #
355
32
  # Bio::FlatFile.open(file, *arg)
@@ -605,13 +282,17 @@ module Bio
605
282
  @skip_leader_mode == :everytime)
606
283
  @splitter.skip_leader
607
284
  end
608
- r = @splitter.get_entry
285
+ if raw then
286
+ r = @splitter.get_entry
287
+ else
288
+ r = @splitter.get_parsed_entry
289
+ end
609
290
  @firsttime_flag = false
610
291
  return nil unless r
611
292
  if raw then
612
293
  r
613
294
  else
614
- @entry = @dbclass.new(r)
295
+ @entry = r
615
296
  @entry
616
297
  end
617
298
  end
@@ -660,7 +341,7 @@ module Bio
660
341
  # Resets file pointer to the start of the flatfile.
661
342
  # (similar to IO#rewind)
662
343
  def rewind
663
- r = @stream.rewind
344
+ r = (@splitter || @stream).rewind
664
345
  @firsttime_flag = true
665
346
  r
666
347
  end
@@ -722,7 +403,12 @@ module Bio
722
403
  begin
723
404
  @splitter = @dbclass.flatfile_splitter(@dbclass, @stream)
724
405
  rescue NameError, NoMethodError
725
- @splitter = Splitter::Default.new(klass, @stream)
406
+ begin
407
+ splitter_class = @dbclass::FLATFILE_SPLITTER
408
+ rescue NameError
409
+ splitter_class = Splitter::Default
410
+ end
411
+ @splitter = splitter_class.new(klass, @stream)
726
412
  end
727
413
  else
728
414
  @dbclass = nil
@@ -775,528 +461,6 @@ module Bio
775
461
  AutoDetect.default.autodetect(text)
776
462
  end
777
463
 
778
-
779
- # AutoDetect automatically determines database class of given data.
780
- class AutoDetect
781
-
782
- include TSort
783
-
784
- # Array to store autodetection rules.
785
- # This is defined only for inspect.
786
- class RulesArray < Array
787
- # visualize contents
788
- def inspect
789
- "[#{self.collect { |e| e.name.inspect }.join(' ')}]"
790
- end
791
- end #class RulesArray
792
-
793
- # Template of a single rule of autodetection
794
- class RuleTemplate
795
- # Creates a new element.
796
- def self.[](*arg)
797
- self.new(*arg)
798
- end
799
-
800
- # Creates a new element.
801
- def initialize
802
- @higher_priority_elements = RulesArray.new
803
- @lower_priority_elements = RulesArray.new
804
- @name = nil
805
- end
806
-
807
- # self is prior to the _elem_.
808
- def is_prior_to(elem)
809
- return nil if self == elem
810
- elem.higher_priority_elements << self
811
- self.lower_priority_elements << elem
812
- true
813
- end
814
-
815
- # higher priority elements
816
- attr_reader :higher_priority_elements
817
- # lower priority elements
818
- attr_reader :lower_priority_elements
819
-
820
- # database classes
821
- attr_reader :dbclasses
822
-
823
- # unique name of the element
824
- attr_accessor :name
825
-
826
- # If given text (and/or meta information) is known, returns
827
- # the database class.
828
- # Otherwise, returns nil or false.
829
- #
830
- # _text_ will be a String.
831
- # _meta_ will be a Hash.
832
- # _meta_ may contain following keys.
833
- # :path => pathname, filename or uri.
834
- def guess(text, meta)
835
- nil
836
- end
837
-
838
- private
839
- # Gets constant from constant name given as a string.
840
- def str2const(str)
841
- const = Object
842
- str.split(/\:\:/).each do |x|
843
- const = const.const_get(x)
844
- end
845
- const
846
- end
847
-
848
- # Gets database class from given object.
849
- # Current implementation is:
850
- # if _obj_ is kind of String, regarded as a constant.
851
- # Otherwise, returns _obj_ as is.
852
- def get_dbclass(obj)
853
- obj.kind_of?(String) ? str2const(obj) : obj
854
- end
855
- end #class Rule_Template
856
-
857
- # RuleDebug is a class for debugging autodetect classes/methods
858
- class RuleDebug < RuleTemplate
859
- # Creates a new instance.
860
- def initialize(name)
861
- super()
862
- @name = name
863
- end
864
-
865
- # prints information to the $stderr.
866
- def guess(text, meta)
867
- $stderr.puts @name
868
- $stderr.puts text.inspect
869
- $stderr.puts meta.inspect
870
- nil
871
- end
872
- end #class RuleDebug
873
-
874
- # Special element that is always top or bottom priority.
875
- class RuleSpecial < RuleTemplate
876
- def initialize(name)
877
- #super()
878
- @name = name
879
- end
880
- # modification of @name is inhibited.
881
- def name=(x)
882
- raise 'cannot modify name'
883
- end
884
-
885
- # always returns void array
886
- def higher_priority_elements
887
- []
888
- end
889
- # always returns void array
890
- def lower_priority_elements
891
- []
892
- end
893
- end #class RuleSpecial
894
-
895
- # Special element that is always top priority.
896
- TopRule = RuleSpecial.new('top')
897
- # Special element that is always bottom priority.
898
- BottomRule = RuleSpecial.new('bottom')
899
-
900
- # A autodetection rule to use a regular expression
901
- class RuleRegexp < RuleTemplate
902
- # Creates a new instance.
903
- def initialize(dbclass, re)
904
- super()
905
- @re = re
906
- @name = dbclass.to_s
907
- @dbclass = nil
908
- @dbclass_lazy = dbclass
909
- end
910
-
911
- # database class (lazy evaluation)
912
- def dbclass
913
- unless @dbclass
914
- @dbclass = get_dbclass(@dbclass_lazy)
915
- end
916
- @dbclass
917
- end
918
- private :dbclass
919
-
920
- # returns database classes
921
- def dbclasses
922
- [ dbclass ]
923
- end
924
-
925
- # If given text matches the regexp, returns the database class.
926
- # Otherwise, returns nil or false.
927
- # _meta_ is ignored.
928
- def guess(text, meta)
929
- @re =~ text ? dbclass : nil
930
- end
931
- end #class RuleRegexp
932
-
933
- # A autodetection rule to use more than two regular expressions.
934
- # If given string matches one of the regular expressions,
935
- # returns the database class.
936
- class RuleRegexp2 < RuleRegexp
937
- # Creates a new instance.
938
- def initialize(dbclass, *regexps)
939
- super(dbclass, nil)
940
- @regexps = regexps
941
- end
942
-
943
- # If given text matches one of the regexp, returns the database class.
944
- # Otherwise, returns nil or false.
945
- # _meta_ is ignored.
946
- def guess(text, meta)
947
- @regexps.each do |re|
948
- return dbclass if re =~ text
949
- end
950
- nil
951
- end
952
- end #class RuleRegexp
953
-
954
- # A autodetection rule that passes data to the proc object.
955
- class RuleProc < RuleTemplate
956
- # Creates a new instance.
957
- def initialize(*dbclasses, &proc)
958
- super()
959
- @proc = proc
960
- @dbclasses = nil
961
- @dbclasses_lazy = dbclasses
962
- @name = dbclasses.collect { |x| x.to_s }.join('|')
963
- end
964
-
965
- # database classes (lazy evaluation)
966
- def dbclasses
967
- unless @dbclasses
968
- @dbclasses = @dbclasses_lazy.collect { |x| get_dbclass(x) }
969
- end
970
- @dbclasses
971
- end
972
-
973
- # If given text (and/or meta information) is known, returns
974
- # the database class.
975
- # Otherwise, returns nil or false.
976
- #
977
- # Refer RuleTemplate#guess for _meta_.
978
- def guess(text, meta)
979
- @proc.call(text)
980
- end
981
- end #class RuleProc
982
-
983
- # Creates a new Autodetect object
984
- def initialize
985
- # stores autodetection rules.
986
- @rules = Hash.new
987
- # stores elements (cache)
988
- @elements = nil
989
- self.add(TopRule)
990
- self.add(BottomRule)
991
- end
992
-
993
- # Adds a new element.
994
- # Returns _elem_.
995
- def add(elem)
996
- raise 'element name conflicts' if @rules[elem.name]
997
- @elements = nil
998
- @rules[elem.name] = elem
999
- elem
1000
- end
1001
-
1002
- # (required by TSort.)
1003
- # For all elements, yields each element.
1004
- def tsort_each_node(&x)
1005
- @rules.each_value(&x)
1006
- end
1007
-
1008
- # (required by TSort.)
1009
- # For a given element, yields each child
1010
- # (= lower priority elements) of the element.
1011
- def tsort_each_child(elem)
1012
- if elem == TopRule then
1013
- @rules.each_value do |e|
1014
- yield e unless e == TopRule or
1015
- e.lower_priority_elements.index(TopRule)
1016
- end
1017
- elsif elem == BottomRule then
1018
- @rules.each_value do |e|
1019
- yield e if e.higher_priority_elements.index(BottomRule)
1020
- end
1021
- else
1022
- elem.lower_priority_elements.each do |e|
1023
- yield e if e != BottomRule
1024
- end
1025
- unless elem.higher_priority_elements.index(BottomRule)
1026
- yield BottomRule
1027
- end
1028
- end
1029
- end
1030
-
1031
- # Returns current elements as an array
1032
- # whose order fulfills all elements' priorities.
1033
- def elements
1034
- unless @elements
1035
- ary = tsort
1036
- ary.reverse!
1037
- @elements = ary
1038
- end
1039
- @elements
1040
- end
1041
-
1042
- # rebuilds the object and clears internal cache.
1043
- def rehash
1044
- @rules.rehash
1045
- @elements = nil
1046
- end
1047
-
1048
- # visualizes the object (mainly for debug)
1049
- def inspect
1050
- "<#{self.class.to_s} " +
1051
- self.elements.collect { |e| e.name.inspect }.join(' ') +
1052
- ">"
1053
- end
1054
-
1055
- # Iterates over each element.
1056
- def each_rule(&x) #:yields: elem
1057
- elements.each(&x)
1058
- end
1059
-
1060
- # Autodetect from the text.
1061
- # Returns a database class if succeeded.
1062
- # Returns nil if failed.
1063
- def autodetect(text, meta = {})
1064
- r = nil
1065
- elements.each do |e|
1066
- #$stderr.puts e.name
1067
- r = e.guess(text, meta)
1068
- break if r
1069
- end
1070
- r
1071
- end
1072
-
1073
- # autodetect from the FlatFile object.
1074
- # Returns a database class if succeeded.
1075
- # Returns nil if failed.
1076
- def autodetect_flatfile(ff, lines = 31)
1077
- meta = {}
1078
- stream = ff.instance_eval { @stream }
1079
- begin
1080
- path = stream.path
1081
- rescue NameError
1082
- end
1083
- if path then
1084
- meta[:path] = path
1085
- # call autodetect onece with meta and without any read action
1086
- if r = self.autodetect(stream.prefetch_buffer, meta)
1087
- return r
1088
- end
1089
- end
1090
- # reading stream
1091
- 1.upto(lines) do |x|
1092
- break unless line = stream.prefetch_gets
1093
- if line.strip.size > 0 then
1094
- if r = self.autodetect(stream.prefetch_buffer, meta)
1095
- return r
1096
- end
1097
- end
1098
- end
1099
- return nil
1100
- end
1101
-
1102
- # default autodetect object for class method
1103
- @default = nil
1104
-
1105
- # returns the default autodetect object
1106
- def self.default
1107
- unless @default then
1108
- @default = self.make_default
1109
- end
1110
- @default
1111
- end
1112
-
1113
- # sets the default autodetect object.
1114
- def self.default=(ad)
1115
- @default = ad
1116
- end
1117
-
1118
- # make a new autodetect object
1119
- def self.[](*arg)
1120
- a = self.new
1121
- arg.each { |e| a.add(e) }
1122
- a
1123
- end
1124
-
1125
- # make a default of default autodetect object
1126
- def self.make_default
1127
- a = self[
1128
- genbank = RuleRegexp[ 'Bio::GenBank',
1129
- /^LOCUS .+ bp .*[a-z]*[DR]?NA/ ],
1130
- genpept = RuleRegexp[ 'Bio::GenPept',
1131
- /^LOCUS .+ aa .+/ ],
1132
- medline = RuleRegexp[ 'Bio::MEDLINE',
1133
- /^PMID\- [0-9]+$/ ],
1134
- embl = RuleRegexp[ 'Bio::EMBL',
1135
- /^ID .+\; .*(DNA|RNA|XXX)\;/ ],
1136
- sptr = RuleRegexp2[ 'Bio::SPTR',
1137
- /^ID .+\; *PRT\;/,
1138
- /^ID [-A-Za-z0-9_\.]+ .+\; *[0-9]+ *AA\./ ],
1139
- prosite = RuleRegexp[ 'Bio::PROSITE',
1140
- /^ID [-A-Za-z0-9_\.]+\; (PATTERN|RULE|MATRIX)\.$/ ],
1141
- transfac = RuleRegexp[ 'Bio::TRANSFAC',
1142
- /^AC [-A-Za-z0-9_\.]+$/ ],
1143
-
1144
- aaindex = RuleProc.new('Bio::AAindex1', 'Bio::AAindex2') do |text|
1145
- if /^H [-A-Z0-9_\.]+$/ =~ text then
1146
- if text =~ /^M [rc]/ then
1147
- Bio::AAindex2
1148
- elsif text =~ /^I A\/L/ then
1149
- Bio::AAindex1
1150
- else
1151
- false #fail to determine
1152
- end
1153
- else
1154
- nil
1155
- end
1156
- end,
1157
-
1158
- litdb = RuleRegexp[ 'Bio::LITDB',
1159
- /^CODE [0-9]+$/ ],
1160
- brite = RuleRegexp[ 'Bio::KEGG::BRITE',
1161
- /^Entry [A-Z0-9]+/ ],
1162
- orthology = RuleRegexp[ 'Bio::KEGG::ORTHOLOGY',
1163
- /^ENTRY .+ KO\s*/ ],
1164
- drug = RuleRegexp[ 'Bio::KEGG::DRUG',
1165
- /^ENTRY .+ Drug\s*/ ],
1166
- glycan = RuleRegexp[ 'Bio::KEGG::GLYCAN',
1167
- /^ENTRY .+ Glycan\s*/ ],
1168
- enzyme = RuleRegexp2[ 'Bio::KEGG::ENZYME',
1169
- /^ENTRY EC [0-9\.]+$/,
1170
- /^ENTRY .+ Enzyme\s*/
1171
- ],
1172
- compound = RuleRegexp2[ 'Bio::KEGG::COMPOUND',
1173
- /^ENTRY C[A-Za-z0-9\._]+$/,
1174
- /^ENTRY .+ Compound\s*/
1175
- ],
1176
- reaction = RuleRegexp2[ 'Bio::KEGG::REACTION',
1177
- /^ENTRY R[A-Za-z0-9\._]+$/,
1178
- /^ENTRY .+ Reaction\s*/
1179
- ],
1180
- genes = RuleRegexp[ 'Bio::KEGG::GENES',
1181
- /^ENTRY .+ (CDS|gene|.*RNA|Contig) / ],
1182
- genome = RuleRegexp[ 'Bio::KEGG::GENOME',
1183
- /^ENTRY [a-z]+$/ ],
1184
-
1185
- fantom = RuleProc.new('Bio::FANTOM::MaXML::Cluster',
1186
- 'Bio::FANTOM::MaXML::Sequence') do |text|
1187
- if /\<\!DOCTYPE\s+maxml\-(sequences|clusters)\s+SYSTEM/ =~ text
1188
- case $1
1189
- when 'clusters'
1190
- Bio::FANTOM::MaXML::Cluster
1191
- when 'sequences'
1192
- Bio::FANTOM::MaXML::Sequence
1193
- else
1194
- nil #unknown
1195
- end
1196
- else
1197
- nil
1198
- end
1199
- end,
1200
-
1201
- pdb = RuleRegexp[ 'Bio::PDB',
1202
- /^HEADER .{40}\d\d\-[A-Z]{3}\-\d\d [0-9A-Z]{4}/ ],
1203
- het = RuleRegexp[ 'Bio::PDB::ChemicalComponent',
1204
- /^RESIDUE +.+ +\d+\s*$/ ],
1205
-
1206
- clustal = RuleRegexp2[ 'Bio::ClustalW::Report',
1207
- /^CLUSTAL .*\(.*\).*sequence +alignment/,
1208
- /^CLUSTAL FORMAT for T-COFFEE/ ],
1209
-
1210
- gcg_msf = RuleRegexp[ 'Bio::GCG::Msf',
1211
- /^!!(N|A)A_MULTIPLE_ALIGNMENT .+/ ],
1212
-
1213
- gcg_seq = RuleRegexp[ 'Bio::GCG::Seq',
1214
- /^!!(N|A)A_SEQUENCE .+/ ],
1215
-
1216
- blastxml = RuleRegexp[ 'Bio::Blast::Report',
1217
- /\<\!DOCTYPE BlastOutput PUBLIC / ],
1218
- wublast = RuleRegexp[ 'Bio::Blast::WU::Report',
1219
- /^BLAST.? +[\-\.\w]+\-WashU +\[[\-\.\w ]+\]/ ],
1220
- wutblast = RuleRegexp[ 'Bio::Blast::WU::Report_TBlast',
1221
- /^TBLAST.? +[\-\.\w]+\-WashU +\[[\-\.\w ]+\]/ ],
1222
- blast = RuleRegexp[ 'Bio::Blast::Default::Report',
1223
- /^BLAST.? +[\-\.\w]+ +\[[\-\.\w ]+\]/ ],
1224
- tblast = RuleRegexp[ 'Bio::Blast::Default::Report_TBlast',
1225
- /^TBLAST.? +[\-\.\w]+ +\[[\-\.\w ]+\]/ ],
1226
-
1227
- blat = RuleRegexp[ 'Bio::Blat::Report',
1228
- /^psLayout version \d+/ ],
1229
- spidey = RuleRegexp[ 'Bio::Spidey::Report',
1230
- /^\-\-SPIDEY version .+\-\-$/ ],
1231
- hmmer = RuleRegexp[ 'Bio::HMMER::Report',
1232
- /^HMMER +\d+\./ ],
1233
- sim4 = RuleRegexp[ 'Bio::Sim4::Report',
1234
- /^seq1 \= .*\, \d+ bp(\r|\r?\n)seq2 \= .*\, \d+ bp(\r|\r?\n)/ ],
1235
-
1236
- fastaformat = RuleProc.new('Bio::FastaFormat',
1237
- 'Bio::NBRF',
1238
- 'Bio::FastaNumericFormat') do |text|
1239
- if /^>.+$/ =~ text
1240
- case text
1241
- when /^>([PF]1|[DR][LC]|N[13]|XX)\;.+/
1242
- Bio::NBRF
1243
- when /^>.+$\s+(^\#.*$\s*)*^\s*\d*\s*[-a-zA-Z_\.\[\]\(\)\*\+\$]+/
1244
- Bio::FastaFormat
1245
- when /^>.+$\s+^\s*\d+(\s+\d+)*\s*$/
1246
- Bio::FastaNumericFormat
1247
- else
1248
- false
1249
- end
1250
- else
1251
- nil
1252
- end
1253
- end
1254
- ]
1255
-
1256
- # dependencies
1257
- # NCBI
1258
- genbank.is_prior_to genpept
1259
- # EMBL/UniProt
1260
- embl.is_prior_to sptr
1261
- sptr.is_prior_to prosite
1262
- prosite.is_prior_to transfac
1263
- # KEGG
1264
- #aaindex.is_prior_to litdb
1265
- #litdb.is_prior_to brite
1266
- brite.is_prior_to orthology
1267
- orthology.is_prior_to drug
1268
- drug.is_prior_to glycan
1269
- glycan.is_prior_to enzyme
1270
- enzyme.is_prior_to compound
1271
- compound.is_prior_to reaction
1272
- reaction.is_prior_to genes
1273
- genes.is_prior_to genome
1274
- # PDB
1275
- pdb.is_prior_to het
1276
- # BLAST
1277
- wublast.is_prior_to wutblast
1278
- wutblast.is_prior_to blast
1279
- blast.is_prior_to tblast
1280
- # FastaFormat
1281
- BottomRule.is_prior_to(fastaformat)
1282
-
1283
- # for debug
1284
- #debug_first = RuleDebug.new('debug_first')
1285
- #a.add(debug_first)
1286
- #debug_first.is_prior_to(TopRule)
1287
-
1288
- ## for debug
1289
- #debug_last = RuleDebug.new('debug_last')
1290
- #a.add(debug_last)
1291
- #BottomRule.is_prior_to(debug_last)
1292
- #fastaformat.is_prior_to(debug_last)
1293
-
1294
- a.rehash
1295
- return a
1296
- end
1297
-
1298
- end #class AutoDetect
1299
-
1300
464
  end #class FlatFile
1301
465
 
1302
466
  end #module Bio