bio 1.2.1 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (259) hide show
  1. data/ChangeLog +3421 -0
  2. data/KNOWN_ISSUES.rdoc +88 -0
  3. data/README.rdoc +252 -0
  4. data/README_DEV.rdoc +285 -0
  5. data/Rakefile +143 -0
  6. data/bin/bioruby +0 -0
  7. data/bin/br_biofetch.rb +0 -0
  8. data/bin/br_bioflat.rb +12 -1
  9. data/bin/br_biogetseq.rb +0 -0
  10. data/bin/br_pmfetch.rb +4 -3
  11. data/bioruby.gemspec +477 -0
  12. data/bioruby.gemspec.erb +117 -0
  13. data/doc/Changes-0.7.rd +7 -0
  14. data/doc/Changes-1.3.rdoc +239 -0
  15. data/doc/Tutorial.rd +296 -184
  16. data/doc/Tutorial.rd.html +1031 -0
  17. data/doc/Tutorial.rd.ja +111 -45
  18. data/doc/Tutorial.rd.ja.html +2225 -0
  19. data/doc/bioruby.css +281 -0
  20. data/extconf.rb +2 -0
  21. data/lib/bio.rb +29 -4
  22. data/lib/bio/appl/blast.rb +306 -121
  23. data/lib/bio/appl/blast/ddbj.rb +142 -0
  24. data/lib/bio/appl/blast/format0.rb +35 -25
  25. data/lib/bio/appl/blast/format8.rb +2 -2
  26. data/lib/bio/appl/blast/genomenet.rb +263 -0
  27. data/lib/bio/appl/blast/ncbioptions.rb +220 -0
  28. data/lib/bio/appl/blast/remote.rb +106 -0
  29. data/lib/bio/appl/blast/report.rb +260 -9
  30. data/lib/bio/appl/blast/rexml.rb +12 -5
  31. data/lib/bio/appl/blast/rpsblast.rb +277 -0
  32. data/lib/bio/appl/blast/wublast.rb +133 -12
  33. data/lib/bio/appl/blast/xmlparser.rb +35 -18
  34. data/lib/bio/appl/blat/report.rb +46 -5
  35. data/lib/bio/appl/emboss.rb +62 -13
  36. data/lib/bio/appl/fasta.rb +9 -11
  37. data/lib/bio/appl/genscan/report.rb +3 -3
  38. data/lib/bio/appl/hmmer.rb +1 -1
  39. data/lib/bio/appl/hmmer/report.rb +10 -10
  40. data/lib/bio/appl/paml/baseml.rb +95 -0
  41. data/lib/bio/appl/paml/baseml/report.rb +32 -0
  42. data/lib/bio/appl/paml/codeml.rb +242 -0
  43. data/lib/bio/appl/paml/codeml/rates.rb +67 -0
  44. data/lib/bio/appl/paml/codeml/report.rb +67 -0
  45. data/lib/bio/appl/paml/common.rb +348 -0
  46. data/lib/bio/appl/paml/common_report.rb +38 -0
  47. data/lib/bio/appl/paml/yn00.rb +103 -0
  48. data/lib/bio/appl/paml/yn00/report.rb +32 -0
  49. data/lib/bio/appl/psort.rb +2 -2
  50. data/lib/bio/appl/pts1.rb +5 -5
  51. data/lib/bio/appl/tmhmm/report.rb +10 -1
  52. data/lib/bio/command.rb +297 -41
  53. data/lib/bio/compat/features.rb +157 -0
  54. data/lib/bio/compat/references.rb +128 -0
  55. data/lib/bio/db/biosql/biosql_to_biosequence.rb +67 -0
  56. data/lib/bio/db/biosql/sequence.rb +508 -0
  57. data/lib/bio/db/embl/common.rb +28 -12
  58. data/lib/bio/db/embl/embl.rb +107 -9
  59. data/lib/bio/db/embl/embl_to_biosequence.rb +85 -0
  60. data/lib/bio/db/embl/format_embl.rb +190 -0
  61. data/lib/bio/db/embl/sptr.rb +15 -16
  62. data/lib/bio/db/fantom.rb +6 -8
  63. data/lib/bio/db/fasta.rb +10 -507
  64. data/lib/bio/db/fasta/defline.rb +532 -0
  65. data/lib/bio/db/fasta/fasta_to_biosequence.rb +63 -0
  66. data/lib/bio/db/fasta/format_fasta.rb +97 -0
  67. data/lib/bio/db/genbank/common.rb +25 -8
  68. data/lib/bio/db/genbank/format_genbank.rb +187 -0
  69. data/lib/bio/db/genbank/genbank.rb +36 -1
  70. data/lib/bio/db/genbank/genbank_to_biosequence.rb +86 -0
  71. data/lib/bio/db/gff.rb +1791 -119
  72. data/lib/bio/db/kegg/glycan.rb +2 -6
  73. data/lib/bio/db/lasergene.rb +3 -3
  74. data/lib/bio/db/medline.rb +4 -1
  75. data/lib/bio/db/newick.rb +10 -10
  76. data/lib/bio/db/pdb/chain.rb +6 -2
  77. data/lib/bio/db/pdb/pdb.rb +12 -3
  78. data/lib/bio/db/rebase.rb +7 -8
  79. data/lib/bio/db/soft.rb +3 -3
  80. data/lib/bio/feature.rb +1 -88
  81. data/lib/bio/io/biosql/biodatabase.rb +64 -0
  82. data/lib/bio/io/biosql/bioentry.rb +29 -0
  83. data/lib/bio/io/biosql/bioentry_dbxref.rb +11 -0
  84. data/lib/bio/io/biosql/bioentry_path.rb +12 -0
  85. data/lib/bio/io/biosql/bioentry_qualifier_value.rb +10 -0
  86. data/lib/bio/io/biosql/bioentry_reference.rb +10 -0
  87. data/lib/bio/io/biosql/bioentry_relationship.rb +10 -0
  88. data/lib/bio/io/biosql/biosequence.rb +11 -0
  89. data/lib/bio/io/biosql/comment.rb +7 -0
  90. data/lib/bio/io/biosql/config/database.yml +20 -0
  91. data/lib/bio/io/biosql/dbxref.rb +13 -0
  92. data/lib/bio/io/biosql/dbxref_qualifier_value.rb +12 -0
  93. data/lib/bio/io/biosql/location.rb +32 -0
  94. data/lib/bio/io/biosql/location_qualifier_value.rb +11 -0
  95. data/lib/bio/io/biosql/ontology.rb +10 -0
  96. data/lib/bio/io/biosql/reference.rb +9 -0
  97. data/lib/bio/io/biosql/seqfeature.rb +32 -0
  98. data/lib/bio/io/biosql/seqfeature_dbxref.rb +11 -0
  99. data/lib/bio/io/biosql/seqfeature_path.rb +11 -0
  100. data/lib/bio/io/biosql/seqfeature_qualifier_value.rb +20 -0
  101. data/lib/bio/io/biosql/seqfeature_relationship.rb +11 -0
  102. data/lib/bio/io/biosql/taxon.rb +12 -0
  103. data/lib/bio/io/biosql/taxon_name.rb +9 -0
  104. data/lib/bio/io/biosql/term.rb +27 -0
  105. data/lib/bio/io/biosql/term_dbxref.rb +11 -0
  106. data/lib/bio/io/biosql/term_path.rb +12 -0
  107. data/lib/bio/io/biosql/term_relationship.rb +13 -0
  108. data/lib/bio/io/biosql/term_relationship_term.rb +11 -0
  109. data/lib/bio/io/biosql/term_synonym.rb +10 -0
  110. data/lib/bio/io/das.rb +7 -7
  111. data/lib/bio/io/ddbjxml.rb +57 -0
  112. data/lib/bio/io/ensembl.rb +2 -2
  113. data/lib/bio/io/fetch.rb +28 -14
  114. data/lib/bio/io/flatfile.rb +17 -853
  115. data/lib/bio/io/flatfile/autodetection.rb +545 -0
  116. data/lib/bio/io/flatfile/buffer.rb +237 -0
  117. data/lib/bio/io/flatfile/index.rb +17 -7
  118. data/lib/bio/io/flatfile/indexer.rb +30 -12
  119. data/lib/bio/io/flatfile/splitter.rb +297 -0
  120. data/lib/bio/io/hinv.rb +442 -0
  121. data/lib/bio/io/keggapi.rb +2 -2
  122. data/lib/bio/io/ncbirest.rb +733 -0
  123. data/lib/bio/io/pubmed.rb +34 -80
  124. data/lib/bio/io/registry.rb +2 -2
  125. data/lib/bio/io/sql.rb +178 -357
  126. data/lib/bio/io/togows.rb +458 -0
  127. data/lib/bio/location.rb +106 -11
  128. data/lib/bio/pathway.rb +120 -14
  129. data/lib/bio/reference.rb +115 -101
  130. data/lib/bio/sequence.rb +164 -183
  131. data/lib/bio/sequence/adapter.rb +108 -0
  132. data/lib/bio/sequence/common.rb +22 -45
  133. data/lib/bio/sequence/compat.rb +2 -2
  134. data/lib/bio/sequence/dblink.rb +54 -0
  135. data/lib/bio/sequence/format.rb +254 -77
  136. data/lib/bio/sequence/format_raw.rb +23 -0
  137. data/lib/bio/shell.rb +3 -1
  138. data/lib/bio/shell/core.rb +2 -2
  139. data/lib/bio/shell/plugin/entry.rb +33 -4
  140. data/lib/bio/shell/plugin/ncbirest.rb +64 -0
  141. data/lib/bio/shell/plugin/togows.rb +40 -0
  142. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/bioruby_generator.rb +0 -0
  143. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_classes.rhtml +0 -0
  144. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_log.rhtml +0 -0
  145. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_methods.rhtml +0 -0
  146. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_modules.rhtml +0 -0
  147. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_variables.rhtml +0 -0
  148. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-bg.gif +0 -0
  149. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-gem.png +0 -0
  150. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-link.gif +0 -0
  151. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.css +0 -0
  152. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.rhtml +0 -0
  153. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_controller.rb +0 -0
  154. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_helper.rb +0 -0
  155. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/commands.rhtml +0 -0
  156. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/history.rhtml +0 -0
  157. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/index.rhtml +0 -0
  158. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/spinner.gif +0 -0
  159. data/lib/bio/tree.rb +4 -2
  160. data/lib/bio/util/color_scheme.rb +2 -2
  161. data/lib/bio/util/contingency_table.rb +2 -2
  162. data/lib/bio/util/restriction_enzyme.rb +2 -2
  163. data/lib/bio/util/restriction_enzyme/single_strand.rb +6 -5
  164. data/lib/bio/version.rb +25 -0
  165. data/rdoc.zsh +8 -0
  166. data/sample/any2fasta.rb +0 -0
  167. data/sample/biofetch.rb +0 -0
  168. data/sample/dbget +0 -0
  169. data/sample/demo_sequence.rb +158 -0
  170. data/sample/enzymes.rb +0 -0
  171. data/sample/fasta2tab.rb +0 -0
  172. data/sample/fastagrep.rb +72 -0
  173. data/sample/fastasort.rb +54 -0
  174. data/sample/fsplit.rb +0 -0
  175. data/sample/gb2fasta.rb +2 -3
  176. data/sample/gb2tab.rb +0 -0
  177. data/sample/gbtab2mysql.rb +0 -0
  178. data/sample/genes2nuc.rb +0 -0
  179. data/sample/genes2pep.rb +0 -0
  180. data/sample/genes2tab.rb +0 -0
  181. data/sample/genome2rb.rb +0 -0
  182. data/sample/genome2tab.rb +0 -0
  183. data/sample/goslim.rb +0 -0
  184. data/sample/gt2fasta.rb +0 -0
  185. data/sample/na2aa.rb +34 -0
  186. data/sample/pmfetch.rb +0 -0
  187. data/sample/pmsearch.rb +0 -0
  188. data/sample/ssearch2tab.rb +0 -0
  189. data/sample/tfastx2tab.rb +0 -0
  190. data/sample/vs-genes.rb +0 -0
  191. data/setup.rb +1596 -0
  192. data/test/data/blast/blastp-multi.m7 +188 -0
  193. data/test/data/command/echoarg2.bat +1 -0
  194. data/test/data/paml/codeml/control_file.txt +30 -0
  195. data/test/data/paml/codeml/output.txt +78 -0
  196. data/test/data/paml/codeml/rates +217 -0
  197. data/test/data/rpsblast/misc.rpsblast +193 -0
  198. data/test/data/soft/GDS100_partial.soft +0 -0
  199. data/test/data/soft/GSE3457_family_partial.soft +0 -0
  200. data/test/functional/bio/appl/test_pts1.rb +115 -0
  201. data/test/functional/bio/io/test_ensembl.rb +123 -80
  202. data/test/functional/bio/io/test_togows.rb +267 -0
  203. data/test/functional/bio/sequence/test_output_embl.rb +51 -0
  204. data/test/functional/bio/test_command.rb +301 -0
  205. data/test/runner.rb +17 -1
  206. data/test/unit/bio/appl/blast/test_ncbioptions.rb +112 -0
  207. data/test/unit/bio/appl/blast/test_report.rb +753 -35
  208. data/test/unit/bio/appl/blast/test_rpsblast.rb +398 -0
  209. data/test/unit/bio/appl/paml/codeml/test_rates.rb +45 -0
  210. data/test/unit/bio/appl/paml/codeml/test_report.rb +45 -0
  211. data/test/unit/bio/appl/paml/test_codeml.rb +174 -0
  212. data/test/unit/bio/appl/test_blast.rb +135 -4
  213. data/test/unit/bio/appl/test_fasta.rb +2 -2
  214. data/test/unit/bio/appl/test_pts1.rb +1 -64
  215. data/test/unit/bio/db/embl/test_common.rb +15 -15
  216. data/test/unit/bio/db/embl/test_embl.rb +4 -4
  217. data/test/unit/bio/db/embl/test_embl_rel89.rb +5 -5
  218. data/test/unit/bio/db/embl/test_embl_to_bioseq.rb +203 -0
  219. data/test/unit/bio/db/embl/test_sptr.rb +38 -1
  220. data/test/unit/bio/db/pdb/test_pdb.rb +2 -2
  221. data/test/unit/bio/db/test_gff.rb +1151 -25
  222. data/test/unit/bio/db/test_medline.rb +127 -0
  223. data/test/unit/bio/db/test_nexus.rb +5 -1
  224. data/test/unit/bio/db/test_prosite.rb +4 -4
  225. data/test/unit/bio/io/flatfile/test_autodetection.rb +375 -0
  226. data/test/unit/bio/io/flatfile/test_buffer.rb +251 -0
  227. data/test/unit/bio/io/flatfile/test_splitter.rb +369 -0
  228. data/test/unit/bio/io/test_ddbjxml.rb +8 -3
  229. data/test/unit/bio/io/test_fastacmd.rb +5 -5
  230. data/test/unit/bio/io/test_flatfile.rb +357 -106
  231. data/test/unit/bio/io/test_soapwsdl.rb +2 -2
  232. data/test/unit/bio/io/test_togows.rb +161 -0
  233. data/test/unit/bio/sequence/test_common.rb +210 -11
  234. data/test/unit/bio/sequence/test_compat.rb +3 -3
  235. data/test/unit/bio/sequence/test_dblink.rb +58 -0
  236. data/test/unit/bio/sequence/test_na.rb +2 -2
  237. data/test/unit/bio/test_command.rb +111 -50
  238. data/test/unit/bio/test_feature.rb +29 -1
  239. data/test/unit/bio/test_location.rb +566 -6
  240. data/test/unit/bio/test_pathway.rb +91 -65
  241. data/test/unit/bio/test_reference.rb +67 -13
  242. data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +3 -3
  243. data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +3 -3
  244. data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +3 -3
  245. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +4 -3
  246. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +3 -3
  247. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +3 -3
  248. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +3 -3
  249. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +3 -3
  250. data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +3 -3
  251. data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +3 -3
  252. data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +4 -4
  253. data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +3 -3
  254. data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +3 -3
  255. data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +3 -3
  256. data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +3 -3
  257. data/test/unit/bio/util/test_restriction_enzyme.rb +3 -3
  258. metadata +202 -167
  259. data/test/unit/bio/appl/blast/test_xmlparser.rb +0 -388
@@ -333,6 +333,63 @@ class XML < Bio::SOAPWSDL
333
333
  SERVER_URI = BASE_URI + "PML.wsdl"
334
334
  end
335
335
 
336
+ # === RequestManager
337
+ #
338
+ # Sequence Retrieving System
339
+ #
340
+ # * http://xml.nig.ac.jp/doc/RequestManager.txt
341
+ #
342
+ # === Examples
343
+ #
344
+ # serv = Bio::DDBJ::XML::RequestManager.new
345
+ # puts serv.getAsyncResult('20070420102828140')
346
+ #
347
+ # === WSDL Methods
348
+ #
349
+ # * getAsyncResult( requestId )
350
+ # * getAsyncResultMime( requestId )
351
+ #
352
+ # === Examples
353
+ #
354
+ # * http://xml.nig.ac.jp/doc/RequestManager.txt
355
+ #
356
+ class RequestManager < XML
357
+ SERVER_URI = BASE_URI + "RequestManager.wsdl"
358
+
359
+ # RequestManager using DDBJ REST interface
360
+ class REST
361
+ require 'bio/command'
362
+
363
+ Uri = 'http://xml.nig.ac.jp/rest/Invoke'
364
+ Service = 'RequestManager'
365
+
366
+ def getAsyncResult(requestId)
367
+ params = {
368
+ 'service' => Service,
369
+ 'method' => 'getAsyncResult',
370
+ 'requestId' => requestId.to_s
371
+ }
372
+ r = Bio::Command.post_form(Uri, params)
373
+ r.body
374
+ end
375
+ end #class REST
376
+
377
+ unless defined? new_orig then
378
+ class << RequestManager
379
+ alias new_orig new
380
+ private :new_orig
381
+ end
382
+ end
383
+
384
+ # creates a new driver
385
+ def self.new(wsdl = nil)
386
+ begin
387
+ new_orig(wsdl)
388
+ rescue RuntimeError
389
+ REST.new
390
+ end
391
+ end
392
+ end #class RequestManager
336
393
 
337
394
  # === SRS
338
395
  #
@@ -5,7 +5,7 @@
5
5
  # Mitsuteru C. Nakao <n@bioruby.org>
6
6
  # License:: The Ruby License
7
7
  #
8
- # $Id: ensembl.rb,v 1.11 2007/04/05 23:35:41 trevor Exp $
8
+ # $Id:$
9
9
  #
10
10
  # == Description
11
11
  #
@@ -185,7 +185,7 @@ class Ensembl
185
185
 
186
186
  params = defaults.update(options)
187
187
 
188
- result, = Bio::Command.post_form("#{@uri}/exportview", params)
188
+ result = Bio::Command.post_form("#{@uri}/exportview", params)
189
189
 
190
190
  return result.body
191
191
  end
@@ -5,7 +5,7 @@
5
5
  # Copyright (C) 2006 Jan Aerts <jan.aerts@bbsrc.ac.uk>
6
6
  # License:: The Ruby License
7
7
  #
8
- # $Id: fetch.rb,v 1.10 2007/04/05 23:35:41 trevor Exp $
8
+ # $Id:$
9
9
  #
10
10
  # == DESCRIPTION
11
11
  #
@@ -26,6 +26,7 @@
26
26
  #
27
27
 
28
28
  require 'uri'
29
+ require 'cgi'
29
30
  require 'bio/command'
30
31
 
31
32
  module Bio
@@ -102,11 +103,12 @@ module Bio
102
103
  # * _style_: [raw|html] (default = 'raw')
103
104
  # * _format_: name of output format (see Bio::Fetch#formats)
104
105
  def fetch(db, id, style = 'raw', format = nil)
105
- query = [ "db=#{db}", "id=#{id}", "style=#{style}" ]
106
- query.push("format=#{format}") if format
107
- query = query.join('&')
106
+ query = [ [ 'db', db ],
107
+ [ 'id', id ],
108
+ [ 'style', style ] ]
109
+ query.push([ 'format', format ]) if format
108
110
 
109
- Bio::Command.read_uri(@url + '?' + URI.escape(query))
111
+ _get(query)
110
112
  end
111
113
 
112
114
  # Shortcut for using BioRuby's BioFetch server. You can fetch an entry
@@ -139,9 +141,7 @@ module Bio
139
141
  # ---
140
142
  # *Returns*:: array of database names
141
143
  def databases
142
- query = "info=dbs"
143
-
144
- Bio::Command.read_uri(@url + '?' + URI.escape(query)).strip.split(/\s+/)
144
+ _get_single('info', 'dbs').strip.split(/\s+/)
145
145
  end
146
146
 
147
147
  # Lists the formats that are available for a given database. Like the
@@ -156,9 +156,9 @@ module Bio
156
156
  # *Returns*:: array of formats
157
157
  def formats(database = @database)
158
158
  if database
159
- query = "info=formats;db=#{database}"
160
-
161
- Bio::Command.read_uri(@url + '?' + URI.escape(query)).strip.split(/\s+/)
159
+ query = [ [ 'info', 'formats' ],
160
+ [ 'db', database ] ]
161
+ _get(query).strip.split(/\s+/)
162
162
  end
163
163
  end
164
164
 
@@ -170,11 +170,25 @@ module Bio
170
170
  # *Arguments*: none
171
171
  # *Returns*:: number
172
172
  def maxids
173
- query = "info=maxids"
173
+ _get_single('info', 'maxids').to_i
174
+ end
174
175
 
175
- Bio::Command.read_uri(@url + '?' + URI.escape(query)).to_i
176
+ private
177
+ # (private) query to the server.
178
+ # ary must be nested array, e.g. [ [ key0, val0 ], [ key1, val1 ], ... ]
179
+ def _get(ary)
180
+ query = ary.collect do |a|
181
+ "#{CGI.escape(a[0])}=#{CGI.escape(a[1])}"
182
+ end.join('&')
183
+ Bio::Command.read_uri(@url + '?' + query)
176
184
  end
177
-
185
+
186
+ # (private) query with single parameter
187
+ def _get_single(key, val)
188
+ query = "#{CGI.escape(key)}=#{CGI.escape(val)}"
189
+ Bio::Command.read_uri(@url + '?' + query)
190
+ end
191
+
178
192
  end
179
193
 
180
194
  end # module Bio
@@ -13,7 +13,6 @@
13
13
  # It can automatically detect data format, and users do not need to tell
14
14
  # the class what the data is.
15
15
  #
16
- require 'tsort'
17
16
 
18
17
  module Bio
19
18
 
@@ -23,333 +22,11 @@ module Bio
23
22
  # the class what the data is.
24
23
  class FlatFile
25
24
 
26
- include Enumerable
27
-
28
- # Wrapper for a IO (or IO-like) object.
29
- # It can input with a buffer.
30
- class BufferedInputStream
31
- # Creates a new input stream wrapper
32
- def initialize(io, path)
33
- @io = io
34
- @path = path
35
- # initialize prefetch buffer
36
- @buffer = ''
37
- end
38
-
39
- # Creates a new input stream wrapper from the given IO object.
40
- def self.for_io(io)
41
- begin
42
- path = io.path
43
- rescue NameError
44
- path = nil
45
- end
46
- self.new(io, path)
47
- end
48
-
49
- # Creates a new input stream wrapper to open file _filename_
50
- # by using File.open.
51
- # *arg is passed to File.open.
52
- #
53
- # Like File.open, a block can be accepted.
54
- def self.open_file(filename, *arg)
55
- if block_given? then
56
- File.open(filename, *arg) do |fobj|
57
- yield self.new(fobj, filename)
58
- end
59
- else
60
- fobj = File.open(filename, *arg)
61
- self.new(fobj, filename)
62
- end
63
- end
64
-
65
- # Creates a new input stream wrapper from URI specified as _uri_.
66
- # by using OpenURI.open_uri or URI#open.
67
- # _uri_ must be a String or URI object.
68
- # *arg is passed to OpenURI.open_uri or URI#open.
69
- #
70
- # Like OpenURI.open_uri, it can accept a block.
71
- def self.open_uri(uri, *arg)
72
- if uri.kind_of?(URI)
73
- if block_given?
74
- uri.open(*arg) do |fobj|
75
- yield self.new(fobj, uri.to_s)
76
- end
77
- else
78
- fobj = uri.open(*arg)
79
- self.new(fobj, uri.to_s)
80
- end
81
- else
82
- if block_given?
83
- OpenURI.open_uri(uri, *arg) do |fobj|
84
- yield self.new(fobj, uri)
85
- end
86
- else
87
- fobj = OpenURI.open_uri(uri, *arg)
88
- self.new(fobj, uri)
89
- end
90
- end
91
- end
92
-
93
- # Pathname, filename or URI to open the object.
94
- # Like File#path, returned value isn't normalized.
95
- attr_reader :path
96
-
97
- # Converts to IO object if possible
98
- def to_io
99
- @io.to_io
100
- end
101
-
102
- # Closes the IO object if possible
103
- def close
104
- @io.close
105
- end
106
-
107
- # Rewinds the IO object if possible
108
- # Internal buffer in this wrapper is cleared.
109
- def rewind
110
- r = @io.rewind
111
- @buffer = ''
112
- r
113
- end
114
-
115
- # Returns current file position
116
- def pos
117
- @io.pos - @buffer.size
118
- end
119
-
120
- # Sets current file position if possible
121
- # Internal buffer in this wrapper is cleared.
122
- def pos=(p)
123
- r = (@io.pos = p)
124
- @buffer = ''
125
- r
126
- end
127
-
128
- # Returns true if end-of-file. Otherwise, returns false.
129
- #
130
- # Note that it returns false if internal buffer is this wrapper
131
- # is not empty,
132
- def eof?
133
- if @buffer.size > 0
134
- false
135
- else
136
- @io.eof?
137
- end
138
- end
139
-
140
- # Same as IO#gets.
141
- def gets(io_rs = $/)
142
- if @buffer.size > 0
143
- if io_rs == nil then
144
- r = @buffer + @io.gets(nil).to_s
145
- @buffer = ''
146
- else
147
- if io_rs == '' then
148
- sp_rs = /\n\n/n
149
- sp_rs_orig = "\n\n"
150
- else
151
- sp_rs = Regexp.new(Regexp.escape(io_rs, 'n'), 0, 'n')
152
- sp_rs_orig = io_rs
153
- end
154
- a = @buffer.split(sp_rs, 2)
155
- if a.size > 1 then
156
- r = a[0] + sp_rs_orig
157
- @buffer = a[1]
158
- else
159
- @buffer << @io.gets(io_rs).to_s
160
- a = @buffer.split(sp_rs, 2)
161
- if a.size > 1 then
162
- r = a[0] + sp_rs_orig
163
- @buffer = a[1].to_s
164
- else
165
- r = @buffer
166
- @buffer = ''
167
- end
168
- end
169
- end
170
- r
171
- else
172
- @io.gets(io_rs)
173
- end
174
- end
175
-
176
- # Pushes back given str to the internal buffer.
177
- # Returns nil.
178
- # str must be read previously with the wrapper object.
179
- #
180
- # Note that in current implementation, the str can be everything,
181
- # but please don't depend on it.
182
- #
183
- def ungets(str)
184
- @buffer = str + @buffer
185
- nil
186
- end
187
-
188
- # Same as IO#getc.
189
- def getc
190
- if @buffer.size > 0 then
191
- r = @buffer[0]
192
- @buffer = @buffer[1..-1]
193
- else
194
- r = @io.getc
195
- end
196
- r
197
- end
198
-
199
- # Pushes back one character into the internal buffer.
200
- # Unlike IO#getc, it can be called more than one time.
201
- def ungetc(c)
202
- @buffer = sprintf("%c", c) + @buffer
203
- nil
204
- end
25
+ autoload :AutoDetect, 'bio/io/flatfile/autodetection'
26
+ autoload :Splitter, 'bio/io/flatfile/splitter'
27
+ autoload :BufferedInputStream, 'bio/io/flatfile/buffer'
205
28
 
206
- # Gets current prefetch buffer
207
- def prefetch_buffer
208
- @buffer
209
- end
210
-
211
- # It does @io.gets, and addes returned string
212
- # to the internal buffer, and returns the string.
213
- def prefetch_gets(*arg)
214
- r = @io.gets(*arg)
215
- @buffer << r if r
216
- r
217
- end
218
-
219
- # It does @io.readpartial, and addes returned string
220
- # to the internal buffer, and returns the string.
221
- def prefetch_readpartial(*arg)
222
- r = @io.readpartial(*arg)
223
- @buffer << r if r
224
- r
225
- end
226
-
227
- # Skips space characters in the stream.
228
- # returns nil.
229
- def skip_spaces
230
- ws = { ?\s => true, ?\n => true, ?\r => true, ?\t => true }
231
- while r = self.getc
232
- unless ws[r] then
233
- self.ungetc(r)
234
- break
235
- end
236
- end
237
- nil
238
- end
239
- end #class BufferedInputStream
240
-
241
- # Splitter is a class to get entries from a buffered input stream.
242
- module Splitter
243
- # This is a template of splitter.
244
- class Template
245
- # Creates a new splitter.
246
- def initialize(klass, bstream)
247
- @stream = bstream
248
- raise NotImplementedError
249
- end
250
-
251
- # skips leader of the entry.
252
- def skip_leader
253
- raise NotImplementedError
254
- end
255
-
256
- # Gets entry as a string
257
- def get_entry
258
- raise NotImplementedError
259
- end
260
-
261
- # the last entry read from the stream
262
- attr_reader :entry
263
-
264
- # a flag to write down entry start and end positions
265
- attr_accessor :entry_pos_flag
266
-
267
- # start position of the entry
268
- attr_reader :entry_start_pos
269
-
270
- # (end position of the entry) + 1
271
- attr_reader :entry_ended_pos
272
- end
273
-
274
- # Default splitter.
275
- # It sees following constants in the given class.
276
- # DELIMITER:: (String) delimiter indicates the end of a entry.
277
- # FLATFILE_HEADER:: (String) start of a entry, located on head of a line.
278
- # DELIMITER_OVERRUN:: (Integer) excess read size included in DELIMITER.
279
- #
280
- class Default < Template
281
- # Creates a new splitter.
282
- # klass:: database class
283
- # bstream:: input stream. It must be a BufferedInputStream object.
284
- def initialize(klass, bstream)
285
- @stream = bstream
286
- @delimiter = klass::DELIMITER rescue nil
287
- @header = klass::FLATFILE_HEADER rescue nil
288
- # for specific classes' benefit
289
- unless header
290
- if klass == Bio::GenBank or klass == Bio::GenPept
291
- @header = 'LOCUS '
292
- end
293
- end
294
- @delimiter_overrun = klass::DELIMITER_OVERRUN rescue nil
295
- @entry_pos_flag = nil
296
- end
297
-
298
- # (String) delimiter indicates the end of a entry.
299
- attr_accessor :delimiter
300
-
301
- # (String) start of a entry, located on head of a line.
302
- attr_accessor :header
303
-
304
- # (Integer) excess read data size included in delimiter.
305
- attr_accessor :delimiter_overrun
306
-
307
- # Skips leader of the entry.
308
- #
309
- # If @header is not nil, it reads till the contents of @header
310
- # comes at the head of a line.
311
- # If correct FLATFILE_HEADER is found, returns true.
312
- # Otherwise, returns nil.
313
- def skip_leader
314
- if @header then
315
- data = ''
316
- while s = @stream.gets(@header)
317
- data << s
318
- if data.split(/[\r\n]+/)[-1] == @header then
319
- @stream.ungets(@header)
320
- return true
321
- end
322
- end
323
- # @header was not found. For safety,
324
- # pushes back data with removing white spaces in the head.
325
- data.sub(/\A\s+/, '')
326
- @stream.ungets(data)
327
- return nil
328
- else
329
- @stream.skip_spaces
330
- return nil
331
- end
332
- end
333
-
334
- # gets a entry
335
- def get_entry
336
- p0 = @entry_pos_flag ? @stream.pos : nil
337
- e = @stream.gets(@delimiter)
338
- if e and @delimiter_overrun then
339
- if e[-@delimiter.size, @delimiter.size ] == @delimiter then
340
- overrun = e[-@delimiter_overrun, @delimiter_overrun]
341
- e[-@delimiter_overrun, @delimiter_overrun] = ''
342
- @stream.ungets(overrun)
343
- end
344
- end
345
- p1 = @entry_pos_flag ? @stream.pos : nil
346
- @entry_start_pos = p0
347
- @entry = e
348
- @entry_ended_pos = p1
349
- @entry
350
- end
351
- end #class Defalult
352
- end #module Splitter
29
+ include Enumerable
353
30
 
354
31
  #
355
32
  # Bio::FlatFile.open(file, *arg)
@@ -605,13 +282,17 @@ module Bio
605
282
  @skip_leader_mode == :everytime)
606
283
  @splitter.skip_leader
607
284
  end
608
- r = @splitter.get_entry
285
+ if raw then
286
+ r = @splitter.get_entry
287
+ else
288
+ r = @splitter.get_parsed_entry
289
+ end
609
290
  @firsttime_flag = false
610
291
  return nil unless r
611
292
  if raw then
612
293
  r
613
294
  else
614
- @entry = @dbclass.new(r)
295
+ @entry = r
615
296
  @entry
616
297
  end
617
298
  end
@@ -660,7 +341,7 @@ module Bio
660
341
  # Resets file pointer to the start of the flatfile.
661
342
  # (similar to IO#rewind)
662
343
  def rewind
663
- r = @stream.rewind
344
+ r = (@splitter || @stream).rewind
664
345
  @firsttime_flag = true
665
346
  r
666
347
  end
@@ -722,7 +403,12 @@ module Bio
722
403
  begin
723
404
  @splitter = @dbclass.flatfile_splitter(@dbclass, @stream)
724
405
  rescue NameError, NoMethodError
725
- @splitter = Splitter::Default.new(klass, @stream)
406
+ begin
407
+ splitter_class = @dbclass::FLATFILE_SPLITTER
408
+ rescue NameError
409
+ splitter_class = Splitter::Default
410
+ end
411
+ @splitter = splitter_class.new(klass, @stream)
726
412
  end
727
413
  else
728
414
  @dbclass = nil
@@ -775,528 +461,6 @@ module Bio
775
461
  AutoDetect.default.autodetect(text)
776
462
  end
777
463
 
778
-
779
- # AutoDetect automatically determines database class of given data.
780
- class AutoDetect
781
-
782
- include TSort
783
-
784
- # Array to store autodetection rules.
785
- # This is defined only for inspect.
786
- class RulesArray < Array
787
- # visualize contents
788
- def inspect
789
- "[#{self.collect { |e| e.name.inspect }.join(' ')}]"
790
- end
791
- end #class RulesArray
792
-
793
- # Template of a single rule of autodetection
794
- class RuleTemplate
795
- # Creates a new element.
796
- def self.[](*arg)
797
- self.new(*arg)
798
- end
799
-
800
- # Creates a new element.
801
- def initialize
802
- @higher_priority_elements = RulesArray.new
803
- @lower_priority_elements = RulesArray.new
804
- @name = nil
805
- end
806
-
807
- # self is prior to the _elem_.
808
- def is_prior_to(elem)
809
- return nil if self == elem
810
- elem.higher_priority_elements << self
811
- self.lower_priority_elements << elem
812
- true
813
- end
814
-
815
- # higher priority elements
816
- attr_reader :higher_priority_elements
817
- # lower priority elements
818
- attr_reader :lower_priority_elements
819
-
820
- # database classes
821
- attr_reader :dbclasses
822
-
823
- # unique name of the element
824
- attr_accessor :name
825
-
826
- # If given text (and/or meta information) is known, returns
827
- # the database class.
828
- # Otherwise, returns nil or false.
829
- #
830
- # _text_ will be a String.
831
- # _meta_ will be a Hash.
832
- # _meta_ may contain following keys.
833
- # :path => pathname, filename or uri.
834
- def guess(text, meta)
835
- nil
836
- end
837
-
838
- private
839
- # Gets constant from constant name given as a string.
840
- def str2const(str)
841
- const = Object
842
- str.split(/\:\:/).each do |x|
843
- const = const.const_get(x)
844
- end
845
- const
846
- end
847
-
848
- # Gets database class from given object.
849
- # Current implementation is:
850
- # if _obj_ is kind of String, regarded as a constant.
851
- # Otherwise, returns _obj_ as is.
852
- def get_dbclass(obj)
853
- obj.kind_of?(String) ? str2const(obj) : obj
854
- end
855
- end #class Rule_Template
856
-
857
- # RuleDebug is a class for debugging autodetect classes/methods
858
- class RuleDebug < RuleTemplate
859
- # Creates a new instance.
860
- def initialize(name)
861
- super()
862
- @name = name
863
- end
864
-
865
- # prints information to the $stderr.
866
- def guess(text, meta)
867
- $stderr.puts @name
868
- $stderr.puts text.inspect
869
- $stderr.puts meta.inspect
870
- nil
871
- end
872
- end #class RuleDebug
873
-
874
- # Special element that is always top or bottom priority.
875
- class RuleSpecial < RuleTemplate
876
- def initialize(name)
877
- #super()
878
- @name = name
879
- end
880
- # modification of @name is inhibited.
881
- def name=(x)
882
- raise 'cannot modify name'
883
- end
884
-
885
- # always returns void array
886
- def higher_priority_elements
887
- []
888
- end
889
- # always returns void array
890
- def lower_priority_elements
891
- []
892
- end
893
- end #class RuleSpecial
894
-
895
- # Special element that is always top priority.
896
- TopRule = RuleSpecial.new('top')
897
- # Special element that is always bottom priority.
898
- BottomRule = RuleSpecial.new('bottom')
899
-
900
- # A autodetection rule to use a regular expression
901
- class RuleRegexp < RuleTemplate
902
- # Creates a new instance.
903
- def initialize(dbclass, re)
904
- super()
905
- @re = re
906
- @name = dbclass.to_s
907
- @dbclass = nil
908
- @dbclass_lazy = dbclass
909
- end
910
-
911
- # database class (lazy evaluation)
912
- def dbclass
913
- unless @dbclass
914
- @dbclass = get_dbclass(@dbclass_lazy)
915
- end
916
- @dbclass
917
- end
918
- private :dbclass
919
-
920
- # returns database classes
921
- def dbclasses
922
- [ dbclass ]
923
- end
924
-
925
- # If given text matches the regexp, returns the database class.
926
- # Otherwise, returns nil or false.
927
- # _meta_ is ignored.
928
- def guess(text, meta)
929
- @re =~ text ? dbclass : nil
930
- end
931
- end #class RuleRegexp
932
-
933
- # A autodetection rule to use more than two regular expressions.
934
- # If given string matches one of the regular expressions,
935
- # returns the database class.
936
- class RuleRegexp2 < RuleRegexp
937
- # Creates a new instance.
938
- def initialize(dbclass, *regexps)
939
- super(dbclass, nil)
940
- @regexps = regexps
941
- end
942
-
943
- # If given text matches one of the regexp, returns the database class.
944
- # Otherwise, returns nil or false.
945
- # _meta_ is ignored.
946
- def guess(text, meta)
947
- @regexps.each do |re|
948
- return dbclass if re =~ text
949
- end
950
- nil
951
- end
952
- end #class RuleRegexp
953
-
954
- # A autodetection rule that passes data to the proc object.
955
- class RuleProc < RuleTemplate
956
- # Creates a new instance.
957
- def initialize(*dbclasses, &proc)
958
- super()
959
- @proc = proc
960
- @dbclasses = nil
961
- @dbclasses_lazy = dbclasses
962
- @name = dbclasses.collect { |x| x.to_s }.join('|')
963
- end
964
-
965
- # database classes (lazy evaluation)
966
- def dbclasses
967
- unless @dbclasses
968
- @dbclasses = @dbclasses_lazy.collect { |x| get_dbclass(x) }
969
- end
970
- @dbclasses
971
- end
972
-
973
- # If given text (and/or meta information) is known, returns
974
- # the database class.
975
- # Otherwise, returns nil or false.
976
- #
977
- # Refer RuleTemplate#guess for _meta_.
978
- def guess(text, meta)
979
- @proc.call(text)
980
- end
981
- end #class RuleProc
982
-
983
- # Creates a new Autodetect object
984
- def initialize
985
- # stores autodetection rules.
986
- @rules = Hash.new
987
- # stores elements (cache)
988
- @elements = nil
989
- self.add(TopRule)
990
- self.add(BottomRule)
991
- end
992
-
993
- # Adds a new element.
994
- # Returns _elem_.
995
- def add(elem)
996
- raise 'element name conflicts' if @rules[elem.name]
997
- @elements = nil
998
- @rules[elem.name] = elem
999
- elem
1000
- end
1001
-
1002
- # (required by TSort.)
1003
- # For all elements, yields each element.
1004
- def tsort_each_node(&x)
1005
- @rules.each_value(&x)
1006
- end
1007
-
1008
- # (required by TSort.)
1009
- # For a given element, yields each child
1010
- # (= lower priority elements) of the element.
1011
- def tsort_each_child(elem)
1012
- if elem == TopRule then
1013
- @rules.each_value do |e|
1014
- yield e unless e == TopRule or
1015
- e.lower_priority_elements.index(TopRule)
1016
- end
1017
- elsif elem == BottomRule then
1018
- @rules.each_value do |e|
1019
- yield e if e.higher_priority_elements.index(BottomRule)
1020
- end
1021
- else
1022
- elem.lower_priority_elements.each do |e|
1023
- yield e if e != BottomRule
1024
- end
1025
- unless elem.higher_priority_elements.index(BottomRule)
1026
- yield BottomRule
1027
- end
1028
- end
1029
- end
1030
-
1031
- # Returns current elements as an array
1032
- # whose order fulfills all elements' priorities.
1033
- def elements
1034
- unless @elements
1035
- ary = tsort
1036
- ary.reverse!
1037
- @elements = ary
1038
- end
1039
- @elements
1040
- end
1041
-
1042
- # rebuilds the object and clears internal cache.
1043
- def rehash
1044
- @rules.rehash
1045
- @elements = nil
1046
- end
1047
-
1048
- # visualizes the object (mainly for debug)
1049
- def inspect
1050
- "<#{self.class.to_s} " +
1051
- self.elements.collect { |e| e.name.inspect }.join(' ') +
1052
- ">"
1053
- end
1054
-
1055
- # Iterates over each element.
1056
- def each_rule(&x) #:yields: elem
1057
- elements.each(&x)
1058
- end
1059
-
1060
- # Autodetect from the text.
1061
- # Returns a database class if succeeded.
1062
- # Returns nil if failed.
1063
- def autodetect(text, meta = {})
1064
- r = nil
1065
- elements.each do |e|
1066
- #$stderr.puts e.name
1067
- r = e.guess(text, meta)
1068
- break if r
1069
- end
1070
- r
1071
- end
1072
-
1073
- # autodetect from the FlatFile object.
1074
- # Returns a database class if succeeded.
1075
- # Returns nil if failed.
1076
- def autodetect_flatfile(ff, lines = 31)
1077
- meta = {}
1078
- stream = ff.instance_eval { @stream }
1079
- begin
1080
- path = stream.path
1081
- rescue NameError
1082
- end
1083
- if path then
1084
- meta[:path] = path
1085
- # call autodetect onece with meta and without any read action
1086
- if r = self.autodetect(stream.prefetch_buffer, meta)
1087
- return r
1088
- end
1089
- end
1090
- # reading stream
1091
- 1.upto(lines) do |x|
1092
- break unless line = stream.prefetch_gets
1093
- if line.strip.size > 0 then
1094
- if r = self.autodetect(stream.prefetch_buffer, meta)
1095
- return r
1096
- end
1097
- end
1098
- end
1099
- return nil
1100
- end
1101
-
1102
- # default autodetect object for class method
1103
- @default = nil
1104
-
1105
- # returns the default autodetect object
1106
- def self.default
1107
- unless @default then
1108
- @default = self.make_default
1109
- end
1110
- @default
1111
- end
1112
-
1113
- # sets the default autodetect object.
1114
- def self.default=(ad)
1115
- @default = ad
1116
- end
1117
-
1118
- # make a new autodetect object
1119
- def self.[](*arg)
1120
- a = self.new
1121
- arg.each { |e| a.add(e) }
1122
- a
1123
- end
1124
-
1125
- # make a default of default autodetect object
1126
- def self.make_default
1127
- a = self[
1128
- genbank = RuleRegexp[ 'Bio::GenBank',
1129
- /^LOCUS .+ bp .*[a-z]*[DR]?NA/ ],
1130
- genpept = RuleRegexp[ 'Bio::GenPept',
1131
- /^LOCUS .+ aa .+/ ],
1132
- medline = RuleRegexp[ 'Bio::MEDLINE',
1133
- /^PMID\- [0-9]+$/ ],
1134
- embl = RuleRegexp[ 'Bio::EMBL',
1135
- /^ID .+\; .*(DNA|RNA|XXX)\;/ ],
1136
- sptr = RuleRegexp2[ 'Bio::SPTR',
1137
- /^ID .+\; *PRT\;/,
1138
- /^ID [-A-Za-z0-9_\.]+ .+\; *[0-9]+ *AA\./ ],
1139
- prosite = RuleRegexp[ 'Bio::PROSITE',
1140
- /^ID [-A-Za-z0-9_\.]+\; (PATTERN|RULE|MATRIX)\.$/ ],
1141
- transfac = RuleRegexp[ 'Bio::TRANSFAC',
1142
- /^AC [-A-Za-z0-9_\.]+$/ ],
1143
-
1144
- aaindex = RuleProc.new('Bio::AAindex1', 'Bio::AAindex2') do |text|
1145
- if /^H [-A-Z0-9_\.]+$/ =~ text then
1146
- if text =~ /^M [rc]/ then
1147
- Bio::AAindex2
1148
- elsif text =~ /^I A\/L/ then
1149
- Bio::AAindex1
1150
- else
1151
- false #fail to determine
1152
- end
1153
- else
1154
- nil
1155
- end
1156
- end,
1157
-
1158
- litdb = RuleRegexp[ 'Bio::LITDB',
1159
- /^CODE [0-9]+$/ ],
1160
- brite = RuleRegexp[ 'Bio::KEGG::BRITE',
1161
- /^Entry [A-Z0-9]+/ ],
1162
- orthology = RuleRegexp[ 'Bio::KEGG::ORTHOLOGY',
1163
- /^ENTRY .+ KO\s*/ ],
1164
- drug = RuleRegexp[ 'Bio::KEGG::DRUG',
1165
- /^ENTRY .+ Drug\s*/ ],
1166
- glycan = RuleRegexp[ 'Bio::KEGG::GLYCAN',
1167
- /^ENTRY .+ Glycan\s*/ ],
1168
- enzyme = RuleRegexp2[ 'Bio::KEGG::ENZYME',
1169
- /^ENTRY EC [0-9\.]+$/,
1170
- /^ENTRY .+ Enzyme\s*/
1171
- ],
1172
- compound = RuleRegexp2[ 'Bio::KEGG::COMPOUND',
1173
- /^ENTRY C[A-Za-z0-9\._]+$/,
1174
- /^ENTRY .+ Compound\s*/
1175
- ],
1176
- reaction = RuleRegexp2[ 'Bio::KEGG::REACTION',
1177
- /^ENTRY R[A-Za-z0-9\._]+$/,
1178
- /^ENTRY .+ Reaction\s*/
1179
- ],
1180
- genes = RuleRegexp[ 'Bio::KEGG::GENES',
1181
- /^ENTRY .+ (CDS|gene|.*RNA|Contig) / ],
1182
- genome = RuleRegexp[ 'Bio::KEGG::GENOME',
1183
- /^ENTRY [a-z]+$/ ],
1184
-
1185
- fantom = RuleProc.new('Bio::FANTOM::MaXML::Cluster',
1186
- 'Bio::FANTOM::MaXML::Sequence') do |text|
1187
- if /\<\!DOCTYPE\s+maxml\-(sequences|clusters)\s+SYSTEM/ =~ text
1188
- case $1
1189
- when 'clusters'
1190
- Bio::FANTOM::MaXML::Cluster
1191
- when 'sequences'
1192
- Bio::FANTOM::MaXML::Sequence
1193
- else
1194
- nil #unknown
1195
- end
1196
- else
1197
- nil
1198
- end
1199
- end,
1200
-
1201
- pdb = RuleRegexp[ 'Bio::PDB',
1202
- /^HEADER .{40}\d\d\-[A-Z]{3}\-\d\d [0-9A-Z]{4}/ ],
1203
- het = RuleRegexp[ 'Bio::PDB::ChemicalComponent',
1204
- /^RESIDUE +.+ +\d+\s*$/ ],
1205
-
1206
- clustal = RuleRegexp2[ 'Bio::ClustalW::Report',
1207
- /^CLUSTAL .*\(.*\).*sequence +alignment/,
1208
- /^CLUSTAL FORMAT for T-COFFEE/ ],
1209
-
1210
- gcg_msf = RuleRegexp[ 'Bio::GCG::Msf',
1211
- /^!!(N|A)A_MULTIPLE_ALIGNMENT .+/ ],
1212
-
1213
- gcg_seq = RuleRegexp[ 'Bio::GCG::Seq',
1214
- /^!!(N|A)A_SEQUENCE .+/ ],
1215
-
1216
- blastxml = RuleRegexp[ 'Bio::Blast::Report',
1217
- /\<\!DOCTYPE BlastOutput PUBLIC / ],
1218
- wublast = RuleRegexp[ 'Bio::Blast::WU::Report',
1219
- /^BLAST.? +[\-\.\w]+\-WashU +\[[\-\.\w ]+\]/ ],
1220
- wutblast = RuleRegexp[ 'Bio::Blast::WU::Report_TBlast',
1221
- /^TBLAST.? +[\-\.\w]+\-WashU +\[[\-\.\w ]+\]/ ],
1222
- blast = RuleRegexp[ 'Bio::Blast::Default::Report',
1223
- /^BLAST.? +[\-\.\w]+ +\[[\-\.\w ]+\]/ ],
1224
- tblast = RuleRegexp[ 'Bio::Blast::Default::Report_TBlast',
1225
- /^TBLAST.? +[\-\.\w]+ +\[[\-\.\w ]+\]/ ],
1226
-
1227
- blat = RuleRegexp[ 'Bio::Blat::Report',
1228
- /^psLayout version \d+/ ],
1229
- spidey = RuleRegexp[ 'Bio::Spidey::Report',
1230
- /^\-\-SPIDEY version .+\-\-$/ ],
1231
- hmmer = RuleRegexp[ 'Bio::HMMER::Report',
1232
- /^HMMER +\d+\./ ],
1233
- sim4 = RuleRegexp[ 'Bio::Sim4::Report',
1234
- /^seq1 \= .*\, \d+ bp(\r|\r?\n)seq2 \= .*\, \d+ bp(\r|\r?\n)/ ],
1235
-
1236
- fastaformat = RuleProc.new('Bio::FastaFormat',
1237
- 'Bio::NBRF',
1238
- 'Bio::FastaNumericFormat') do |text|
1239
- if /^>.+$/ =~ text
1240
- case text
1241
- when /^>([PF]1|[DR][LC]|N[13]|XX)\;.+/
1242
- Bio::NBRF
1243
- when /^>.+$\s+(^\#.*$\s*)*^\s*\d*\s*[-a-zA-Z_\.\[\]\(\)\*\+\$]+/
1244
- Bio::FastaFormat
1245
- when /^>.+$\s+^\s*\d+(\s+\d+)*\s*$/
1246
- Bio::FastaNumericFormat
1247
- else
1248
- false
1249
- end
1250
- else
1251
- nil
1252
- end
1253
- end
1254
- ]
1255
-
1256
- # dependencies
1257
- # NCBI
1258
- genbank.is_prior_to genpept
1259
- # EMBL/UniProt
1260
- embl.is_prior_to sptr
1261
- sptr.is_prior_to prosite
1262
- prosite.is_prior_to transfac
1263
- # KEGG
1264
- #aaindex.is_prior_to litdb
1265
- #litdb.is_prior_to brite
1266
- brite.is_prior_to orthology
1267
- orthology.is_prior_to drug
1268
- drug.is_prior_to glycan
1269
- glycan.is_prior_to enzyme
1270
- enzyme.is_prior_to compound
1271
- compound.is_prior_to reaction
1272
- reaction.is_prior_to genes
1273
- genes.is_prior_to genome
1274
- # PDB
1275
- pdb.is_prior_to het
1276
- # BLAST
1277
- wublast.is_prior_to wutblast
1278
- wutblast.is_prior_to blast
1279
- blast.is_prior_to tblast
1280
- # FastaFormat
1281
- BottomRule.is_prior_to(fastaformat)
1282
-
1283
- # for debug
1284
- #debug_first = RuleDebug.new('debug_first')
1285
- #a.add(debug_first)
1286
- #debug_first.is_prior_to(TopRule)
1287
-
1288
- ## for debug
1289
- #debug_last = RuleDebug.new('debug_last')
1290
- #a.add(debug_last)
1291
- #BottomRule.is_prior_to(debug_last)
1292
- #fastaformat.is_prior_to(debug_last)
1293
-
1294
- a.rehash
1295
- return a
1296
- end
1297
-
1298
- end #class AutoDetect
1299
-
1300
464
  end #class FlatFile
1301
465
 
1302
466
  end #module Bio