bio 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (340) hide show
  1. data/bin/bioruby +14 -122
  2. data/bin/br_biofetch.rb +2 -2
  3. data/bin/br_bioflat.rb +2 -2
  4. data/bin/br_biogetseq.rb +2 -2
  5. data/bin/br_pmfetch.rb +3 -3
  6. data/doc/Changes-0.7.rd +77 -0
  7. data/doc/KEGG_API.rd +523 -232
  8. data/doc/KEGG_API.rd.ja +529 -207
  9. data/doc/Tutorial.rd +48 -11
  10. data/lib/bio.rb +59 -6
  11. data/lib/bio/alignment.rb +713 -103
  12. data/lib/bio/appl/bl2seq/report.rb +2 -18
  13. data/lib/bio/appl/blast.rb +108 -91
  14. data/lib/bio/appl/blast/format0.rb +33 -18
  15. data/lib/bio/appl/blast/format8.rb +6 -20
  16. data/lib/bio/appl/blast/report.rb +293 -429
  17. data/lib/bio/appl/blast/rexml.rb +8 -22
  18. data/lib/bio/appl/blast/wublast.rb +21 -12
  19. data/lib/bio/appl/blast/xmlparser.rb +180 -183
  20. data/lib/bio/appl/blat/report.rb +127 -30
  21. data/lib/bio/appl/clustalw.rb +87 -59
  22. data/lib/bio/appl/clustalw/report.rb +20 -22
  23. data/lib/bio/appl/emboss.rb +113 -20
  24. data/lib/bio/appl/fasta.rb +173 -198
  25. data/lib/bio/appl/fasta/format10.rb +244 -347
  26. data/lib/bio/appl/gcg/msf.rb +212 -0
  27. data/lib/bio/appl/gcg/seq.rb +195 -0
  28. data/lib/bio/appl/genscan/report.rb +5 -23
  29. data/lib/bio/appl/hmmer.rb +8 -45
  30. data/lib/bio/appl/hmmer/report.rb +2 -20
  31. data/lib/bio/appl/iprscan/report.rb +374 -0
  32. data/lib/bio/appl/mafft.rb +87 -50
  33. data/lib/bio/appl/mafft/report.rb +151 -44
  34. data/lib/bio/appl/muscle.rb +52 -0
  35. data/lib/bio/appl/phylip/alignment.rb +129 -0
  36. data/lib/bio/appl/phylip/distance_matrix.rb +96 -0
  37. data/lib/bio/appl/probcons.rb +41 -0
  38. data/lib/bio/appl/psort.rb +89 -96
  39. data/lib/bio/appl/psort/report.rb +6 -22
  40. data/lib/bio/appl/pts1.rb +263 -0
  41. data/lib/bio/appl/sim4.rb +26 -36
  42. data/lib/bio/appl/sim4/report.rb +2 -18
  43. data/lib/bio/appl/sosui/report.rb +5 -20
  44. data/lib/bio/appl/spidey/report.rb +2 -2
  45. data/lib/bio/appl/targetp/report.rb +4 -20
  46. data/lib/bio/appl/tcoffee.rb +55 -0
  47. data/lib/bio/appl/tmhmm/report.rb +4 -20
  48. data/lib/bio/command.rb +235 -64
  49. data/lib/bio/data/aa.rb +21 -26
  50. data/lib/bio/data/codontable.rb +2 -20
  51. data/lib/bio/data/na.rb +19 -4
  52. data/lib/bio/db.rb +27 -12
  53. data/lib/bio/db/aaindex.rb +2 -20
  54. data/lib/bio/db/embl/common.rb +4 -21
  55. data/lib/bio/db/embl/embl.rb +33 -85
  56. data/lib/bio/db/embl/sptr.rb +612 -302
  57. data/lib/bio/db/embl/swissprot.rb +10 -29
  58. data/lib/bio/db/embl/trembl.rb +10 -29
  59. data/lib/bio/db/embl/uniprot.rb +10 -29
  60. data/lib/bio/db/fantom.rb +15 -20
  61. data/lib/bio/db/fasta.rb +3 -3
  62. data/lib/bio/db/genbank/common.rb +37 -46
  63. data/lib/bio/db/genbank/ddbj.rb +6 -18
  64. data/lib/bio/db/genbank/genbank.rb +47 -186
  65. data/lib/bio/db/genbank/genpept.rb +4 -17
  66. data/lib/bio/db/genbank/refseq.rb +4 -17
  67. data/lib/bio/db/gff.rb +103 -35
  68. data/lib/bio/db/go.rb +4 -20
  69. data/lib/bio/db/kegg/brite.rb +26 -36
  70. data/lib/bio/db/kegg/compound.rb +81 -85
  71. data/lib/bio/db/kegg/drug.rb +98 -0
  72. data/lib/bio/db/kegg/enzyme.rb +133 -110
  73. data/lib/bio/db/kegg/expression.rb +2 -20
  74. data/lib/bio/db/kegg/genes.rb +208 -238
  75. data/lib/bio/db/kegg/genome.rb +164 -285
  76. data/lib/bio/db/kegg/glycan.rb +114 -157
  77. data/lib/bio/db/kegg/keggtab.rb +242 -303
  78. data/lib/bio/db/kegg/kgml.rb +117 -160
  79. data/lib/bio/db/kegg/orthology.rb +112 -0
  80. data/lib/bio/db/kegg/reaction.rb +54 -69
  81. data/lib/bio/db/kegg/taxonomy.rb +331 -0
  82. data/lib/bio/db/lasergene.rb +209 -0
  83. data/lib/bio/db/litdb.rb +3 -27
  84. data/lib/bio/db/medline.rb +228 -249
  85. data/lib/bio/db/nbrf.rb +3 -3
  86. data/lib/bio/db/newick.rb +510 -0
  87. data/lib/bio/db/nexus.rb +1854 -0
  88. data/lib/bio/db/pdb.rb +5 -17
  89. data/lib/bio/db/pdb/atom.rb +2 -18
  90. data/lib/bio/db/pdb/chain.rb +2 -18
  91. data/lib/bio/db/pdb/chemicalcomponent.rb +2 -18
  92. data/lib/bio/db/pdb/model.rb +2 -18
  93. data/lib/bio/db/pdb/pdb.rb +73 -34
  94. data/lib/bio/db/pdb/residue.rb +4 -20
  95. data/lib/bio/db/pdb/utils.rb +2 -18
  96. data/lib/bio/db/prosite.rb +403 -422
  97. data/lib/bio/db/rebase.rb +84 -40
  98. data/lib/bio/db/soft.rb +404 -0
  99. data/lib/bio/db/transfac.rb +5 -17
  100. data/lib/bio/feature.rb +106 -52
  101. data/lib/bio/io/das.rb +32 -42
  102. data/lib/bio/io/dbget.rb +2 -20
  103. data/lib/bio/io/ddbjxml.rb +77 -138
  104. data/lib/bio/io/ebisoap.rb +158 -0
  105. data/lib/bio/io/ensembl.rb +229 -0
  106. data/lib/bio/io/fastacmd.rb +89 -82
  107. data/lib/bio/io/fetch.rb +163 -96
  108. data/lib/bio/io/flatfile.rb +170 -73
  109. data/lib/bio/io/flatfile/bdb.rb +3 -16
  110. data/lib/bio/io/flatfile/index.rb +2 -2
  111. data/lib/bio/io/flatfile/indexer.rb +3 -2
  112. data/lib/bio/io/higet.rb +12 -31
  113. data/lib/bio/io/keggapi.rb +210 -269
  114. data/lib/bio/io/ncbisoap.rb +155 -0
  115. data/lib/bio/io/pubmed.rb +169 -147
  116. data/lib/bio/io/registry.rb +4 -20
  117. data/lib/bio/io/soapwsdl.rb +43 -38
  118. data/lib/bio/io/sql.rb +242 -305
  119. data/lib/bio/location.rb +407 -285
  120. data/lib/bio/map.rb +410 -0
  121. data/lib/bio/pathway.rb +558 -695
  122. data/lib/bio/reference.rb +272 -75
  123. data/lib/bio/sequence.rb +255 -13
  124. data/lib/bio/sequence/aa.rb +71 -10
  125. data/lib/bio/sequence/common.rb +187 -33
  126. data/lib/bio/sequence/compat.rb +59 -4
  127. data/lib/bio/sequence/format.rb +54 -7
  128. data/lib/bio/sequence/generic.rb +3 -3
  129. data/lib/bio/sequence/na.rb +328 -26
  130. data/lib/bio/shell.rb +11 -4
  131. data/lib/bio/shell/core.rb +221 -160
  132. data/lib/bio/shell/demo.rb +18 -15
  133. data/lib/bio/shell/interface.rb +14 -12
  134. data/lib/bio/shell/irb.rb +95 -0
  135. data/lib/bio/shell/object.rb +45 -26
  136. data/lib/bio/shell/plugin/blast.rb +42 -0
  137. data/lib/bio/shell/plugin/codon.rb +22 -14
  138. data/lib/bio/shell/plugin/das.rb +58 -0
  139. data/lib/bio/shell/plugin/emboss.rb +2 -2
  140. data/lib/bio/shell/plugin/entry.rb +22 -11
  141. data/lib/bio/shell/plugin/flatfile.rb +2 -2
  142. data/lib/bio/shell/plugin/keggapi.rb +13 -6
  143. data/lib/bio/shell/plugin/midi.rb +4 -4
  144. data/lib/bio/shell/plugin/obda.rb +2 -2
  145. data/lib/bio/shell/plugin/psort.rb +56 -0
  146. data/lib/bio/shell/plugin/seq.rb +35 -8
  147. data/lib/bio/shell/plugin/soap.rb +87 -0
  148. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/bioruby_generator.rb +29 -0
  149. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_classes.rhtml +4 -0
  150. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_log.rhtml +27 -0
  151. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_methods.rhtml +11 -0
  152. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_modules.rhtml +4 -0
  153. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_variables.rhtml +7 -0
  154. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-bg.gif +0 -0
  155. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-console.png +0 -0
  156. data/lib/bio/shell/rails/{public/images/icon.png → vendor/plugins/generators/bioruby/templates/bioruby-gem.png} +0 -0
  157. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-link.gif +0 -0
  158. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby.css +369 -0
  159. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby.rhtml +47 -0
  160. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby_controller.rb +144 -0
  161. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby_helper.rb +47 -0
  162. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/commands.rhtml +8 -0
  163. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/history.rhtml +10 -0
  164. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/index.rhtml +22 -0
  165. data/lib/bio/shell/script.rb +25 -0
  166. data/lib/bio/shell/setup.rb +109 -0
  167. data/lib/bio/shell/web.rb +70 -58
  168. data/lib/bio/tree.rb +850 -0
  169. data/lib/bio/util/color_scheme.rb +84 -107
  170. data/lib/bio/util/color_scheme/buried.rb +5 -24
  171. data/lib/bio/util/color_scheme/helix.rb +5 -24
  172. data/lib/bio/util/color_scheme/hydropathy.rb +5 -24
  173. data/lib/bio/util/color_scheme/nucleotide.rb +5 -24
  174. data/lib/bio/util/color_scheme/strand.rb +5 -24
  175. data/lib/bio/util/color_scheme/taylor.rb +5 -24
  176. data/lib/bio/util/color_scheme/turn.rb +5 -24
  177. data/lib/bio/util/color_scheme/zappo.rb +5 -24
  178. data/lib/bio/util/contingency_table.rb +70 -43
  179. data/lib/bio/util/restriction_enzyme.rb +228 -0
  180. data/lib/bio/util/restriction_enzyme/analysis.rb +249 -0
  181. data/lib/bio/util/restriction_enzyme/analysis_basic.rb +217 -0
  182. data/lib/bio/util/restriction_enzyme/cut_symbol.rb +107 -0
  183. data/lib/bio/util/restriction_enzyme/double_stranded.rb +321 -0
  184. data/lib/bio/util/restriction_enzyme/double_stranded/aligned_strands.rb +130 -0
  185. data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair.rb +103 -0
  186. data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair_in_enzyme_notation.rb +38 -0
  187. data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations.rb +76 -0
  188. data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations_in_enzyme_notation.rb +107 -0
  189. data/lib/bio/util/restriction_enzyme/enzymes.yaml +7061 -0
  190. data/lib/bio/util/restriction_enzyme/range/cut_range.rb +24 -0
  191. data/lib/bio/util/restriction_enzyme/range/cut_ranges.rb +47 -0
  192. data/lib/bio/util/restriction_enzyme/range/horizontal_cut_range.rb +67 -0
  193. data/lib/bio/util/restriction_enzyme/range/sequence_range.rb +257 -0
  194. data/lib/bio/util/restriction_enzyme/range/sequence_range/calculated_cuts.rb +242 -0
  195. data/lib/bio/util/restriction_enzyme/range/sequence_range/fragment.rb +51 -0
  196. data/lib/bio/util/restriction_enzyme/range/sequence_range/fragments.rb +41 -0
  197. data/lib/bio/util/restriction_enzyme/range/vertical_cut_range.rb +77 -0
  198. data/lib/bio/util/restriction_enzyme/single_strand.rb +199 -0
  199. data/lib/bio/util/restriction_enzyme/single_strand/cut_locations_in_enzyme_notation.rb +135 -0
  200. data/lib/bio/util/restriction_enzyme/single_strand_complement.rb +23 -0
  201. data/lib/bio/util/restriction_enzyme/string_formatting.rb +111 -0
  202. data/lib/bio/util/sirna.rb +4 -22
  203. data/sample/color_scheme_na.rb +4 -12
  204. data/sample/enzymes.rb +78 -0
  205. data/sample/goslim.rb +5 -13
  206. data/sample/psortplot_html.rb +4 -12
  207. data/test/data/blast/2.2.15.blastp.m7 +876 -0
  208. data/test/data/embl/AB090716.embl.rel89 +63 -0
  209. data/test/data/fasta/example1.txt +75 -0
  210. data/test/data/fasta/example2.txt +21 -0
  211. data/test/data/iprscan/merged.raw +32 -0
  212. data/test/data/iprscan/merged.txt +74 -0
  213. data/test/data/soft/GDS100_partial.soft +92 -0
  214. data/test/data/soft/GSE3457_family_partial.soft +874 -0
  215. data/test/functional/bio/io/test_ensembl.rb +103 -0
  216. data/test/functional/bio/io/test_soapwsdl.rb +5 -17
  217. data/test/unit/bio/appl/bl2seq/test_report.rb +2 -2
  218. data/test/unit/bio/appl/blast/test_report.rb +3 -16
  219. data/test/unit/bio/appl/blast/test_xmlparser.rb +4 -16
  220. data/test/unit/bio/appl/genscan/test_report.rb +3 -16
  221. data/test/unit/bio/appl/hmmer/test_report.rb +3 -16
  222. data/test/unit/bio/appl/iprscan/test_report.rb +338 -0
  223. data/test/unit/bio/appl/mafft/test_report.rb +63 -0
  224. data/test/unit/bio/appl/sosui/test_report.rb +3 -16
  225. data/test/unit/bio/appl/targetp/test_report.rb +3 -16
  226. data/test/unit/bio/appl/test_blast.rb +3 -16
  227. data/test/unit/bio/appl/test_fasta.rb +4 -16
  228. data/test/unit/bio/appl/test_pts1.rb +140 -0
  229. data/test/unit/bio/appl/tmhmm/test_report.rb +3 -16
  230. data/test/unit/bio/data/test_aa.rb +4 -17
  231. data/test/unit/bio/data/test_codontable.rb +3 -16
  232. data/test/unit/bio/data/test_na.rb +3 -3
  233. data/test/unit/bio/db/embl/test_common.rb +3 -16
  234. data/test/unit/bio/db/embl/test_embl.rb +3 -16
  235. data/test/unit/bio/db/embl/test_embl_rel89.rb +219 -0
  236. data/test/unit/bio/db/embl/test_sptr.rb +1548 -41
  237. data/test/unit/bio/db/embl/test_uniprot.rb +3 -16
  238. data/test/unit/bio/db/kegg/test_genes.rb +3 -16
  239. data/test/unit/bio/db/pdb/test_pdb.rb +7 -24
  240. data/test/unit/bio/db/test_aaindex.rb +2 -2
  241. data/test/unit/bio/db/test_fasta.rb +3 -16
  242. data/test/unit/bio/db/test_gff.rb +3 -16
  243. data/test/unit/bio/db/test_lasergene.rb +95 -0
  244. data/test/unit/bio/db/test_newick.rb +56 -0
  245. data/test/unit/bio/db/test_nexus.rb +360 -0
  246. data/test/unit/bio/db/test_prosite.rb +5 -18
  247. data/test/unit/bio/db/test_rebase.rb +11 -25
  248. data/test/unit/bio/db/test_soft.rb +138 -0
  249. data/test/unit/bio/io/test_ddbjxml.rb +5 -17
  250. data/test/unit/bio/io/test_ensembl.rb +109 -0
  251. data/test/unit/bio/io/test_fastacmd.rb +3 -16
  252. data/test/unit/bio/io/test_flatfile.rb +237 -0
  253. data/test/unit/bio/io/test_soapwsdl.rb +4 -17
  254. data/test/unit/bio/sequence/test_aa.rb +3 -3
  255. data/test/unit/bio/sequence/test_common.rb +3 -16
  256. data/test/unit/bio/sequence/test_compat.rb +3 -16
  257. data/test/unit/bio/sequence/test_na.rb +29 -3
  258. data/test/unit/bio/shell/plugin/test_seq.rb +8 -8
  259. data/test/unit/bio/test_alignment.rb +16 -27
  260. data/test/unit/bio/test_command.rb +242 -25
  261. data/test/unit/bio/test_db.rb +3 -16
  262. data/test/unit/bio/test_feature.rb +4 -16
  263. data/test/unit/bio/test_location.rb +4 -16
  264. data/test/unit/bio/test_map.rb +230 -0
  265. data/test/unit/bio/test_pathway.rb +4 -16
  266. data/test/unit/bio/test_reference.rb +2 -2
  267. data/test/unit/bio/test_sequence.rb +7 -19
  268. data/test/unit/bio/test_shell.rb +3 -16
  269. data/test/unit/bio/test_tree.rb +593 -0
  270. data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +299 -0
  271. data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +103 -0
  272. data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +240 -0
  273. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +100 -0
  274. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +75 -0
  275. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +73 -0
  276. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +53 -0
  277. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +104 -0
  278. data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +83 -0
  279. data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +246 -0
  280. data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +44 -0
  281. data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +115 -0
  282. data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +147 -0
  283. data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +147 -0
  284. data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +60 -0
  285. data/test/unit/bio/util/test_color_scheme.rb +6 -18
  286. data/test/unit/bio/util/test_contingency_table.rb +6 -18
  287. data/test/unit/bio/util/test_restriction_enzyme.rb +42 -0
  288. data/test/unit/bio/util/test_sirna.rb +3 -16
  289. metadata +228 -169
  290. data/doc/BioRuby.rd.ja +0 -225
  291. data/doc/Design.rd.ja +0 -341
  292. data/doc/TODO.rd.ja +0 -138
  293. data/lib/bio/appl/fasta/format6.rb +0 -37
  294. data/lib/bio/db/kegg/cell.rb +0 -88
  295. data/lib/bio/db/kegg/ko.rb +0 -178
  296. data/lib/bio/shell/rails/Rakefile +0 -10
  297. data/lib/bio/shell/rails/app/controllers/application.rb +0 -4
  298. data/lib/bio/shell/rails/app/controllers/shell_controller.rb +0 -94
  299. data/lib/bio/shell/rails/app/helpers/application_helper.rb +0 -3
  300. data/lib/bio/shell/rails/app/models/shell_connection.rb +0 -30
  301. data/lib/bio/shell/rails/app/views/layouts/shell.rhtml +0 -37
  302. data/lib/bio/shell/rails/app/views/shell/history.rhtml +0 -5
  303. data/lib/bio/shell/rails/app/views/shell/index.rhtml +0 -2
  304. data/lib/bio/shell/rails/app/views/shell/show.rhtml +0 -13
  305. data/lib/bio/shell/rails/config/boot.rb +0 -19
  306. data/lib/bio/shell/rails/config/database.yml +0 -85
  307. data/lib/bio/shell/rails/config/environment.rb +0 -53
  308. data/lib/bio/shell/rails/config/environments/development.rb +0 -19
  309. data/lib/bio/shell/rails/config/environments/production.rb +0 -19
  310. data/lib/bio/shell/rails/config/environments/test.rb +0 -19
  311. data/lib/bio/shell/rails/config/routes.rb +0 -19
  312. data/lib/bio/shell/rails/doc/README_FOR_APP +0 -2
  313. data/lib/bio/shell/rails/public/404.html +0 -8
  314. data/lib/bio/shell/rails/public/500.html +0 -8
  315. data/lib/bio/shell/rails/public/dispatch.cgi +0 -10
  316. data/lib/bio/shell/rails/public/dispatch.fcgi +0 -24
  317. data/lib/bio/shell/rails/public/dispatch.rb +0 -10
  318. data/lib/bio/shell/rails/public/favicon.ico +0 -0
  319. data/lib/bio/shell/rails/public/images/rails.png +0 -0
  320. data/lib/bio/shell/rails/public/index.html +0 -277
  321. data/lib/bio/shell/rails/public/javascripts/controls.js +0 -750
  322. data/lib/bio/shell/rails/public/javascripts/dragdrop.js +0 -584
  323. data/lib/bio/shell/rails/public/javascripts/effects.js +0 -854
  324. data/lib/bio/shell/rails/public/javascripts/prototype.js +0 -1785
  325. data/lib/bio/shell/rails/public/robots.txt +0 -1
  326. data/lib/bio/shell/rails/public/stylesheets/main.css +0 -187
  327. data/lib/bio/shell/rails/script/about +0 -3
  328. data/lib/bio/shell/rails/script/breakpointer +0 -3
  329. data/lib/bio/shell/rails/script/console +0 -3
  330. data/lib/bio/shell/rails/script/destroy +0 -3
  331. data/lib/bio/shell/rails/script/generate +0 -3
  332. data/lib/bio/shell/rails/script/performance/benchmarker +0 -3
  333. data/lib/bio/shell/rails/script/performance/profiler +0 -3
  334. data/lib/bio/shell/rails/script/plugin +0 -3
  335. data/lib/bio/shell/rails/script/process/reaper +0 -3
  336. data/lib/bio/shell/rails/script/process/spawner +0 -3
  337. data/lib/bio/shell/rails/script/process/spinner +0 -3
  338. data/lib/bio/shell/rails/script/runner +0 -3
  339. data/lib/bio/shell/rails/script/server +0 -42
  340. data/lib/bio/shell/rails/test/test_helper.rb +0 -28
@@ -1,114 +1,181 @@
1
1
  #
2
2
  # = bio/io/biofetch.rb - BioFetch access module
3
3
  #
4
- # Copyright:: Copyright (C) 2002, 2005
5
- # Toshiaki Katayama <k@bioruby.org>
6
- # License:: LGPL
4
+ # Copyright:: Copyright (C) 2002, 2005 Toshiaki Katayama <k@bioruby.org>,
5
+ # Copyright (C) 2006 Jan Aerts <jan.aerts@bbsrc.ac.uk>
6
+ # License:: The Ruby License
7
7
  #
8
- # $Id: fetch.rb,v 1.4 2005/12/18 15:58:42 k Exp $
8
+ # $Id: fetch.rb,v 1.10 2007/04/05 23:35:41 trevor Exp $
9
9
  #
10
- #--
10
+ # == DESCRIPTION
11
11
  #
12
- # This library is free software; you can redistribute it and/or
13
- # modify it under the terms of the GNU Lesser General Public
14
- # License as published by the Free Software Foundation; either
15
- # version 2 of the License, or (at your option) any later version.
12
+ # Using BioRuby BioFetch server
16
13
  #
17
- # This library is distributed in the hope that it will be useful,
18
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
19
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20
- # Lesser General Public License for more details.
14
+ # br_server = Bio::Fetch.new()
15
+ # puts br_server.databases
16
+ # puts br_server.formats('embl')
17
+ # puts br_server.maxids
21
18
  #
22
- # You should have received a copy of the GNU Lesser General Public
23
- # License along with this library; if not, write to the Free Software
24
- # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25
- #
26
- #++
19
+ # Using EBI BioFetch server
27
20
  #
21
+ # ebi_server = Bio::Fetch.new('http://www.ebi.ac.uk/cgi-bin/dbfetch')
22
+ # puts ebi_server.fetch('embl', 'J00231', 'raw')
23
+ # puts ebi_server.fetch('embl', 'J00231', 'html')
24
+ # puts Bio::Fetch.query('genbank', 'J00231')
25
+ # puts Bio::Fetch.query('genbank', 'J00231', 'raw', 'fasta')
26
+ #
28
27
 
29
28
  require 'uri'
30
- require 'net/http'
29
+ require 'bio/command'
31
30
 
32
31
  module Bio
33
-
34
- class Fetch
35
-
36
- # Create a new Bio::Fetch server object.
37
- # Use Bio::Fetch.new('http://www.ebi.ac.uk/cgi-bin/dbfetch') to connect
38
- # to EBI BioFetch server.
39
- def initialize(url = 'http://bioruby.org/cgi-bin/biofetch.rb')
40
- schema, user, @host, @port, reg, @path, = URI.split(url)
41
- end
42
-
43
- # Set default database to dbname (prepare for get_by_id).
44
- attr_accessor :database
45
-
46
- # Get raw database entry by id (mainly used by Bio::Registry).
47
- def get_by_id(id)
48
- fetch(@database, id)
49
- end
50
-
51
- # Fetch a database entry as specified by database (db), entry id (id),
52
- # 'raw' text or 'html' (style), and format. When using BioRuby's
53
- # BioFetch server, value for the format should not be set.
54
- def fetch(db, id, style = 'raw', format = nil)
55
- data = [ "db=#{db}", "id=#{id}", "style=#{style}" ]
56
- data.push("format=#{format}") if format
57
- data = data.join('&')
58
-
59
- responce, result = Net::HTTP.new(@host, @port).post(@path, data)
60
- return result
61
- end
62
-
63
- # Short cut for using BioRuby's BioFetch server. You can fetch an entry
64
- # without creating instance of BioFetch server.
65
- def self.query(*args)
66
- self.new.fetch(*args)
67
- end
68
-
69
- # What databases are available?
70
- def databases
71
- query = "info=dbs"
72
- responce, result = Net::HTTP.new(@host, @port).post(@path, query)
73
- return result
74
- end
75
-
76
- # What formats does the database X have?
77
- def formats(database = @database)
78
- if database
79
- query = "info=formats;db=#{database}"
80
- responce, result = Net::HTTP.new(@host, @port).post(@path, query)
81
- return result
32
+ # = DESCRIPTION
33
+ # The Bio::Fetch class provides an interface to dbfetch servers. Given
34
+ # a database name and an accession number, these servers return the associated
35
+ # record. For example, for the embl database on the EBI, that would be a
36
+ # nucleic or amino acid sequence.
37
+ #
38
+ # Possible dbfetch servers include:
39
+ # * http://bioruby.org/cgi-bin/biofetch.rb (default)
40
+ # * http://www.ebi.ac.uk/cgi-bin/dbfetch
41
+ #
42
+ # If you're behind a proxy server, be sure to set your HTTP_PROXY
43
+ # environment variable accordingly.
44
+ #
45
+ # = USAGE
46
+ # require 'bio'
47
+ #
48
+ # # Retrieve the sequence of accession number M33388 from the EMBL
49
+ # # database.
50
+ # server = Bio::Fetch.new() #uses default server
51
+ # puts server.fetch('embl','M33388')
52
+ #
53
+ # # Do the same thing without creating a Bio::Fetch object. This method always
54
+ # # uses the default dbfetch server: http://bioruby.org/cgi-bin/biofetch.rb
55
+ # puts Bio::Fetch.query('embl','M33388')
56
+ #
57
+ # # To know what databases are available on the bioruby dbfetch server:
58
+ # server = Bio::Fetch.new()
59
+ # puts server.databases
60
+ #
61
+ # # Some databases provide their data in different formats (e.g. 'fasta',
62
+ # # 'genbank' or 'embl'). To check which formats are supported by a given
63
+ # # database:
64
+ # puts server.formats('embl')
65
+ #
66
+ class Fetch
67
+
68
+ # Create a new Bio::Fetch server object that can subsequently be queried
69
+ # using the Bio::Fetch#fetch method
70
+ # ---
71
+ # *Arguments*:
72
+ # * _url_: URL of dbfetch server (default = 'http://bioruby.org/cgi-bin/biofetch.rb')
73
+ # *Returns*:: Bio::Fetch object
74
+ def initialize(url = 'http://bioruby.org/cgi-bin/biofetch.rb')
75
+ @url = url
76
+ schema, user, @host, @port, reg, @path, = URI.split(@url)
82
77
  end
78
+
79
+ # The default database to query
80
+ #--
81
+ # This will be used by the get_by_id method
82
+ #++
83
+ attr_accessor :database
84
+
85
+ # Get raw database entry by id. This method lets the Bio::Registry class
86
+ # use Bio::Fetch objects.
87
+ def get_by_id(id)
88
+ fetch(@database, id)
89
+ end
90
+
91
+ # Fetch a database entry as specified by database (db), entry id (id),
92
+ # 'raw' text or 'html' (style), and format. When using BioRuby's
93
+ # BioFetch server, value for the format should not be set.
94
+ # Examples:
95
+ # server = Bio::Fetch.new('http://www.ebi.ac.uk/cgi-bin/dbfetch')
96
+ # puts server.fetch('embl','M33388','raw','fasta')
97
+ # puts server.fetch('refseq','NM_12345','html','embl')
98
+ # ---
99
+ # *Arguments*:
100
+ # * _database_: name of database to query (see Bio::Fetch#databases to get list of supported databases)
101
+ # * _id_: single ID or ID list separated by commas or white space
102
+ # * _style_: [raw|html] (default = 'raw')
103
+ # * _format_: name of output format (see Bio::Fetch#formats)
104
+ def fetch(db, id, style = 'raw', format = nil)
105
+ query = [ "db=#{db}", "id=#{id}", "style=#{style}" ]
106
+ query.push("format=#{format}") if format
107
+ query = query.join('&')
108
+
109
+ Bio::Command.read_uri(@url + '?' + URI.escape(query))
110
+ end
111
+
112
+ # Shortcut for using BioRuby's BioFetch server. You can fetch an entry
113
+ # without creating an instance of BioFetch server. This method uses the
114
+ # default dbfetch server, which is http://bioruby.org/cgi-bin/biofetch.rb
115
+ #
116
+ # Example:
117
+ # puts Bio::Fetch.query('refseq','NM_12345')
118
+ #
119
+ # ---
120
+ # *Arguments*:
121
+ # * _database_: name of database to query (see Bio::Fetch#databases to get list of supported databases)
122
+ # * _id_: single ID or ID list separated by commas or white space
123
+ # * _style_: [raw|html] (default = 'raw')
124
+ # * _format_: name of output format (see Bio::Fetch#formats)
125
+ def self.query(*args)
126
+ self.new.fetch(*args)
127
+ end
128
+
129
+ # Using this method, the user can ask a dbfetch server what databases
130
+ # it supports. This would normally be the first step you'd take when
131
+ # you use a dbfetch server for the first time.
132
+ # Example:
133
+ # server = Bio::Fetch.new()
134
+ # puts server.databases # returns "aa aax bl cpd dgenes dr ec eg emb ..."
135
+ #
136
+ # This method only works for the bioruby dbfetch server. For a list
137
+ # of databases available from the EBI, see the EBI website at
138
+ # http://www.ebi.ac.uk/cgi-bin/dbfetch/
139
+ # ---
140
+ # *Returns*:: array of database names
141
+ def databases
142
+ query = "info=dbs"
143
+
144
+ Bio::Command.read_uri(@url + '?' + URI.escape(query)).strip.split(/\s+/)
145
+ end
146
+
147
+ # Lists the formats that are available for a given database. Like the
148
+ # Bio::Fetch#databases method, this method is only available on
149
+ # the bioruby dbfetch server.
150
+ # Example:
151
+ # server = Bio::Fetch.new()
152
+ # puts server.formats('embl') # returns "default fasta"
153
+ # ---
154
+ # *Arguments*:
155
+ # * _database_:: name of database you want the supported formats for
156
+ # *Returns*:: array of formats
157
+ def formats(database = @database)
158
+ if database
159
+ query = "info=formats;db=#{database}"
160
+
161
+ Bio::Command.read_uri(@url + '?' + URI.escape(query)).strip.split(/\s+/)
162
+ end
163
+ end
164
+
165
+ # A dbfetch server will only return entries up to a given maximum number.
166
+ # This method retrieves that number from the server. As for the databases
167
+ # and formats methods, the maxids method only works for the bioruby
168
+ # dbfetch server.
169
+ # ---
170
+ # *Arguments*: none
171
+ # *Returns*:: number
172
+ def maxids
173
+ query = "info=maxids"
174
+
175
+ Bio::Command.read_uri(@url + '?' + URI.escape(query)).to_i
176
+ end
177
+
83
178
  end
84
179
 
85
- # How many entries can be retrieved simultaneously?
86
- def maxids
87
- query = "info=maxids"
88
- responce, result = Net::HTTP.new(@host, @port).post(@path, query)
89
- return result
90
- end
91
-
92
- end
93
-
94
180
  end # module Bio
95
181
 
96
-
97
-
98
- if __FILE__ == $0
99
-
100
- # bfserv = Bio::Fetch.new('http://www.ebi.ac.uk:80/cgi-bin/dbfetch')
101
- bfserv = Bio::Fetch.new('http://www.ebi.ac.uk/cgi-bin/dbfetch')
102
- puts "# test 1"
103
- puts bfserv.fetch('embl', 'J00231', 'raw')
104
- puts "# test 2"
105
- puts bfserv.fetch('embl', 'J00231', 'html')
106
-
107
- puts "# test 3"
108
- puts Bio::Fetch.query('genbank', 'J00231')
109
- puts "# test 4"
110
- puts Bio::Fetch.query('genbank', 'J00231', 'raw', 'fasta')
111
-
112
- end
113
-
114
-
@@ -3,9 +3,9 @@
3
3
  #
4
4
  # Copyright (C) 2001-2006 Naohisa Goto <ng@bioruby.org>
5
5
  #
6
- # License:: Ruby's
6
+ # License:: The Ruby License
7
7
  #
8
- # $Id: flatfile.rb,v 1.46 2006/02/22 10:01:27 ngoto Exp $
8
+ # $Id: flatfile.rb,v 1.60 2007/07/09 14:08:34 ngoto Exp $
9
9
  #
10
10
  #
11
11
  # Bio::FlatFile is a helper and wrapper class to read a biological data file.
@@ -34,7 +34,6 @@ module Bio
34
34
  @path = path
35
35
  # initialize prefetch buffer
36
36
  @buffer = ''
37
- @path = path
38
37
  end
39
38
 
40
39
  # Creates a new input stream wrapper from the given IO object.
@@ -262,6 +261,9 @@ module Bio
262
261
  # the last entry read from the stream
263
262
  attr_reader :entry
264
263
 
264
+ # a flag to write down entry start and end positions
265
+ attr_accessor :entry_pos_flag
266
+
265
267
  # start position of the entry
266
268
  attr_reader :entry_start_pos
267
269
 
@@ -290,6 +292,7 @@ module Bio
290
292
  end
291
293
  end
292
294
  @delimiter_overrun = klass::DELIMITER_OVERRUN rescue nil
295
+ @entry_pos_flag = nil
293
296
  end
294
297
 
295
298
  # (String) delimiter indicates the end of a entry.
@@ -330,7 +333,7 @@ module Bio
330
333
 
331
334
  # gets a entry
332
335
  def get_entry
333
- p0 = @stream.pos
336
+ p0 = @entry_pos_flag ? @stream.pos : nil
334
337
  e = @stream.gets(@delimiter)
335
338
  if e and @delimiter_overrun then
336
339
  if e[-@delimiter.size, @delimiter.size ] == @delimiter then
@@ -339,7 +342,7 @@ module Bio
339
342
  @stream.ungets(overrun)
340
343
  end
341
344
  end
342
- p1 = @stream.pos
345
+ p1 = @entry_pos_flag ? @stream.pos : nil
343
346
  @entry_start_pos = p0
344
347
  @entry = e
345
348
  @entry_ended_pos = p1
@@ -422,7 +425,7 @@ module Bio
422
425
  # check if file is filename or IO object
423
426
  unless file.respond_to?(:gets)
424
427
  # 'file' is a filename
425
- self.open_file(file, *arg, &block)
428
+ _open_file(dbclass, file, *arg, &block)
426
429
  else
427
430
  # 'file' is a IO object
428
431
  ff = self.new(dbclass, file)
@@ -462,15 +465,27 @@ module Bio
462
465
  # Otherwise, it returns a new FlatFile object.
463
466
  #
464
467
  def self.open_file(filename, *arg)
468
+ _open_file(nil, filename, *arg)
469
+ end
470
+
471
+ # Same as FlatFile.open(dbclass, filename, *arg),
472
+ # except that it only accept filename and doesn't accept IO object.
473
+ #
474
+ # It can accept a block.
475
+ # If a block is given, it returns the block's return value.
476
+ # Otherwise, it returns a new FlatFile object.
477
+ #
478
+ def self._open_file(dbclass, filename, *arg)
465
479
  if block_given? then
466
480
  BufferedInputStream.open_file(filename, *arg) do |stream|
467
- yield self.new(nil, stream)
481
+ yield self.new(dbclass, stream)
468
482
  end
469
483
  else
470
484
  stream = BufferedInputStream.open_file(filename, *arg)
471
- self.new(nil, stream)
485
+ self.new(dbclass, stream)
472
486
  end
473
487
  end
488
+ private_class_method :_open_file
474
489
 
475
490
  # Opens URI specified as _uri_.
476
491
  # _uri_ must be a String or URI object.
@@ -493,6 +508,20 @@ module Bio
493
508
  end
494
509
  end
495
510
 
511
+ # Executes the block for every entry in the stream.
512
+ # Same as FlatFile.open(*arg) { |ff| ff.each { |entry| ... }}.
513
+ #
514
+ # * Example
515
+ # Bio::FlatFile.foreach('test.fst') { |e| puts e.definition }
516
+ #
517
+ def self.foreach(*arg)
518
+ self.open(*arg) do |flatfileobj|
519
+ flatfileobj.each do |entry|
520
+ yield entry
521
+ end
522
+ end
523
+ end
524
+
496
525
  # Same as FlatFile.open, except that 'stream' should be a opened
497
526
  # stream object (IO, File, ..., who have the 'gets' method).
498
527
  #
@@ -518,13 +547,11 @@ module Bio
518
547
  #
519
548
  def initialize(dbclass, stream)
520
549
  # 2nd arg: IO object
521
- if @stream.kind_of?(BufferedInputStream)
550
+ if stream.kind_of?(BufferedInputStream)
522
551
  @stream = stream
523
552
  else
524
553
  @stream = BufferedInputStream.for_io(stream)
525
554
  end
526
- # default is raw mode
527
- self.raw = false
528
555
  # 1st arg: database class (or file format autodetection)
529
556
  if dbclass then
530
557
  self.dbclass = dbclass
@@ -534,6 +561,8 @@ module Bio
534
561
  #
535
562
  @skip_leader_mode = :firsttime
536
563
  @firsttime_flag = true
564
+ # default raw mode is false
565
+ self.raw = false
537
566
  end
538
567
 
539
568
  # The mode how to skip leader of the data.
@@ -563,8 +592,14 @@ module Bio
563
592
  @stream.path
564
593
  end
565
594
 
595
+ # Exception class to be raised when data format hasn't been specified.
596
+ class UnknownDataFormatError < IOError
597
+ end
598
+
566
599
  # Get next entry.
567
600
  def next_entry
601
+ raise UnknownDataFormatError,
602
+ 'file format auto-detection failed?' unless @dbclass
568
603
  if @skip_leader_mode and
569
604
  ((@firsttime_flag and @skip_leader_mode == :firsttime) or
570
605
  @skip_leader_mode == :everytime)
@@ -587,6 +622,16 @@ module Bio
587
622
  @splitter.entry
588
623
  end
589
624
 
625
+ # a flag to write down entry start and end positions
626
+ def entry_pos_flag
627
+ @splitter.entry_pos_flag
628
+ end
629
+
630
+ # Sets flag to write down entry start and end positions
631
+ def entry_pos_flag=(x)
632
+ @splitter.entry_pos_flag = x
633
+ end
634
+
590
635
  # start position of the last entry
591
636
  def entry_start_pos
592
637
  @splitter.entry_start_pos
@@ -736,21 +781,26 @@ module Bio
736
781
 
737
782
  include TSort
738
783
 
784
+ # Array to store autodetection rules.
785
+ # This is defined only for inspect.
786
+ class RulesArray < Array
787
+ # visualize contents
788
+ def inspect
789
+ "[#{self.collect { |e| e.name.inspect }.join(' ')}]"
790
+ end
791
+ end #class RulesArray
792
+
739
793
  # Template of a single rule of autodetection
740
794
  class RuleTemplate
741
795
  # Creates a new element.
742
796
  def self.[](*arg)
743
797
  self.new(*arg)
744
798
  end
745
-
799
+
746
800
  # Creates a new element.
747
801
  def initialize
748
- a = Array.new
749
- def a.inspect
750
- "[#{self.collect { |e| e.name.inspect }.join(' ')}]"
751
- end
752
- @higher_priority_elements = a.clone
753
- @lower_priority_elements = a.clone
802
+ @higher_priority_elements = RulesArray.new
803
+ @lower_priority_elements = RulesArray.new
754
804
  @name = nil
755
805
  end
756
806
 
@@ -784,6 +834,24 @@ module Bio
784
834
  def guess(text, meta)
785
835
  nil
786
836
  end
837
+
838
+ private
839
+ # Gets constant from constant name given as a string.
840
+ def str2const(str)
841
+ const = Object
842
+ str.split(/\:\:/).each do |x|
843
+ const = const.const_get(x)
844
+ end
845
+ const
846
+ end
847
+
848
+ # Gets database class from given object.
849
+ # Current implementation is:
850
+ # if _obj_ is kind of String, regarded as a constant.
851
+ # Otherwise, returns _obj_ as is.
852
+ def get_dbclass(obj)
853
+ obj.kind_of?(String) ? str2const(obj) : obj
854
+ end
787
855
  end #class Rule_Template
788
856
 
789
857
  # RuleDebug is a class for debugging autodetect classes/methods
@@ -835,40 +903,49 @@ module Bio
835
903
  def initialize(dbclass, re)
836
904
  super()
837
905
  @re = re
838
- @dbclass = dbclass
839
- @dbclasses = [ dbclass ]
840
906
  @name = dbclass.to_s
907
+ @dbclass = nil
908
+ @dbclass_lazy = dbclass
909
+ end
910
+
911
+ # database class (lazy evaluation)
912
+ def dbclass
913
+ unless @dbclass
914
+ @dbclass = get_dbclass(@dbclass_lazy)
915
+ end
916
+ @dbclass
917
+ end
918
+ private :dbclass
919
+
920
+ # returns database classes
921
+ def dbclasses
922
+ [ dbclass ]
841
923
  end
842
924
 
843
925
  # If given text matches the regexp, returns the database class.
844
926
  # Otherwise, returns nil or false.
845
927
  # _meta_ is ignored.
846
928
  def guess(text, meta)
847
- @re =~ text ? @dbclass : nil
929
+ @re =~ text ? dbclass : nil
848
930
  end
849
931
  end #class RuleRegexp
850
932
 
851
933
  # A autodetection rule to use more than two regular expressions.
852
- class RuleRegexp2 < RuleTemplate
934
+ # If given string matches one of the regular expressions,
935
+ # returns the database class.
936
+ class RuleRegexp2 < RuleRegexp
853
937
  # Creates a new instance.
854
938
  def initialize(dbclass, *regexps)
855
- super()
939
+ super(dbclass, nil)
856
940
  @regexps = regexps
857
- @dbclass = dbclass
858
- @dbclasses = [ dbclass ]
859
- if name
860
- @name = name
861
- else
862
- @name = @dbclass.to_s
863
- end
864
941
  end
865
942
 
866
- # If given text matches the regexp, returns the database class.
943
+ # If given text matches one of the regexp, returns the database class.
867
944
  # Otherwise, returns nil or false.
868
945
  # _meta_ is ignored.
869
946
  def guess(text, meta)
870
947
  @regexps.each do |re|
871
- return @dbclass if re =~ text
948
+ return dbclass if re =~ text
872
949
  end
873
950
  nil
874
951
  end
@@ -880,10 +957,19 @@ module Bio
880
957
  def initialize(*dbclasses, &proc)
881
958
  super()
882
959
  @proc = proc
883
- @dbclasses = dbclasses
960
+ @dbclasses = nil
961
+ @dbclasses_lazy = dbclasses
884
962
  @name = dbclasses.collect { |x| x.to_s }.join('|')
885
963
  end
886
964
 
965
+ # database classes (lazy evaluation)
966
+ def dbclasses
967
+ unless @dbclasses
968
+ @dbclasses = @dbclasses_lazy.collect { |x| get_dbclass(x) }
969
+ end
970
+ @dbclasses
971
+ end
972
+
887
973
  # If given text (and/or meta information) is known, returns
888
974
  # the database class.
889
975
  # Otherwise, returns nil or false.
@@ -1039,22 +1125,23 @@ module Bio
1039
1125
  # make a default of default autodetect object
1040
1126
  def self.make_default
1041
1127
  a = self[
1042
- genbank = RuleRegexp[ Bio::GenBank,
1128
+ genbank = RuleRegexp[ 'Bio::GenBank',
1043
1129
  /^LOCUS .+ bp .*[a-z]*[DR]?NA/ ],
1044
- genpept = RuleRegexp[ Bio::GenPept,
1130
+ genpept = RuleRegexp[ 'Bio::GenPept',
1045
1131
  /^LOCUS .+ aa .+/ ],
1046
- medline = RuleRegexp[ Bio::MEDLINE,
1132
+ medline = RuleRegexp[ 'Bio::MEDLINE',
1047
1133
  /^UI \- [0-9]+$/ ],
1048
- embl = RuleRegexp[ Bio::EMBL,
1134
+ embl = RuleRegexp[ 'Bio::EMBL',
1049
1135
  /^ID .+\; .*(DNA|RNA|XXX)\;/ ],
1050
- sptr = RuleRegexp[ Bio::SPTR,
1051
- /^ID .+\; *PRT\;/ ],
1052
- prosite = RuleRegexp[ Bio::PROSITE,
1136
+ sptr = RuleRegexp2[ 'Bio::SPTR',
1137
+ /^ID .+\; *PRT\;/,
1138
+ /^ID [-A-Za-z0-9_\.]+ .+\; *[0-9]+ *AA\./ ],
1139
+ prosite = RuleRegexp[ 'Bio::PROSITE',
1053
1140
  /^ID [-A-Za-z0-9_\.]+\; (PATTERN|RULE|MATRIX)\.$/ ],
1054
- transfac = RuleRegexp[ Bio::TRANSFAC,
1141
+ transfac = RuleRegexp[ 'Bio::TRANSFAC',
1055
1142
  /^AC [-A-Za-z0-9_\.]+$/ ],
1056
1143
 
1057
- aaindex = RuleProc.new(Bio::AAindex1, Bio::AAindex2) do |text|
1144
+ aaindex = RuleProc.new('Bio::AAindex1', 'Bio::AAindex2') do |text|
1058
1145
  if /^H [-A-Z0-9_\.]+$/ =~ text then
1059
1146
  if text =~ /^M [rc]/ then
1060
1147
  Bio::AAindex2
@@ -1068,33 +1155,35 @@ module Bio
1068
1155
  end
1069
1156
  end,
1070
1157
 
1071
- litdb = RuleRegexp[ Bio::LITDB,
1158
+ litdb = RuleRegexp[ 'Bio::LITDB',
1072
1159
  /^CODE [0-9]+$/ ],
1073
- brite = RuleRegexp[ Bio::KEGG::BRITE,
1160
+ brite = RuleRegexp[ 'Bio::KEGG::BRITE',
1074
1161
  /^Entry [A-Z0-9]+/ ],
1075
- ko = RuleRegexp[ Bio::KEGG::KO,
1162
+ orthology = RuleRegexp[ 'Bio::KEGG::ORTHOLOGY',
1076
1163
  /^ENTRY .+ KO\s*/ ],
1077
- glycan = RuleRegexp[ Bio::KEGG::GLYCAN,
1164
+ drug = RuleRegexp[ 'Bio::KEGG::DRUG',
1165
+ /^ENTRY .+ Drug\s*/ ],
1166
+ glycan = RuleRegexp[ 'Bio::KEGG::GLYCAN',
1078
1167
  /^ENTRY .+ Glycan\s*/ ],
1079
- enzyme = RuleRegexp2[ Bio::KEGG::ENZYME,
1168
+ enzyme = RuleRegexp2[ 'Bio::KEGG::ENZYME',
1080
1169
  /^ENTRY EC [0-9\.]+$/,
1081
1170
  /^ENTRY .+ Enzyme\s*/
1082
1171
  ],
1083
- compound = RuleRegexp2[ Bio::KEGG::COMPOUND,
1172
+ compound = RuleRegexp2[ 'Bio::KEGG::COMPOUND',
1084
1173
  /^ENTRY C[A-Za-z0-9\._]+$/,
1085
1174
  /^ENTRY .+ Compound\s*/
1086
1175
  ],
1087
- reaction = RuleRegexp2[ Bio::KEGG::REACTION,
1176
+ reaction = RuleRegexp2[ 'Bio::KEGG::REACTION',
1088
1177
  /^ENTRY R[A-Za-z0-9\._]+$/,
1089
1178
  /^ENTRY .+ Reaction\s*/
1090
1179
  ],
1091
- genes = RuleRegexp[ Bio::KEGG::GENES,
1092
- /^ENTRY .+ (CDS|gene|.*RNA) / ],
1093
- genome = RuleRegexp[ Bio::KEGG::GENOME,
1180
+ genes = RuleRegexp[ 'Bio::KEGG::GENES',
1181
+ /^ENTRY .+ (CDS|gene|.*RNA|Contig) / ],
1182
+ genome = RuleRegexp[ 'Bio::KEGG::GENOME',
1094
1183
  /^ENTRY [a-z]+$/ ],
1095
1184
 
1096
- fantom = RuleProc.new(Bio::FANTOM::MaXML::Cluster,
1097
- Bio::FANTOM::MaXML::Sequence) do |text|
1185
+ fantom = RuleProc.new('Bio::FANTOM::MaXML::Cluster',
1186
+ 'Bio::FANTOM::MaXML::Sequence') do |text|
1098
1187
  if /\<\!DOCTYPE\s+maxml\-(sequences|clusters)\s+SYSTEM/ =~ text
1099
1188
  case $1
1100
1189
  when 'clusters'
@@ -1109,37 +1198,44 @@ module Bio
1109
1198
  end
1110
1199
  end,
1111
1200
 
1112
- pdb = RuleRegexp[ Bio::PDB,
1201
+ pdb = RuleRegexp[ 'Bio::PDB',
1113
1202
  /^HEADER .{40}\d\d\-[A-Z]{3}\-\d\d [0-9A-Z]{4}/ ],
1114
- het = RuleRegexp[ Bio::PDB::ChemicalComponent,
1203
+ het = RuleRegexp[ 'Bio::PDB::ChemicalComponent',
1115
1204
  /^RESIDUE +.+ +\d+\s*$/ ],
1116
1205
 
1117
- clustal = RuleRegexp[ Bio::ClustalW::Report,
1118
- /^CLUSTAL .*\(.*\).*sequence +alignment/ ],
1206
+ clustal = RuleRegexp2[ 'Bio::ClustalW::Report',
1207
+ /^CLUSTAL .*\(.*\).*sequence +alignment/,
1208
+ /^CLUSTAL FORMAT for T-COFFEE/ ],
1209
+
1210
+ gcg_msf = RuleRegexp[ 'Bio::GCG::Msf',
1211
+ /^!!(N|A)A_MULTIPLE_ALIGNMENT .+/ ],
1212
+
1213
+ gcg_seq = RuleRegexp[ 'Bio::GCG::Seq',
1214
+ /^!!(N|A)A_SEQUENCE .+/ ],
1119
1215
 
1120
- blastxml = RuleRegexp[ Bio::Blast::Report,
1216
+ blastxml = RuleRegexp[ 'Bio::Blast::Report',
1121
1217
  /\<\!DOCTYPE BlastOutput PUBLIC / ],
1122
- wublast = RuleRegexp[ Bio::Blast::WU::Report,
1218
+ wublast = RuleRegexp[ 'Bio::Blast::WU::Report',
1123
1219
  /^BLAST.? +[\-\.\w]+\-WashU +\[[\-\.\w ]+\]/ ],
1124
- wutblast = RuleRegexp[ Bio::Blast::WU::Report_TBlast,
1220
+ wutblast = RuleRegexp[ 'Bio::Blast::WU::Report_TBlast',
1125
1221
  /^TBLAST.? +[\-\.\w]+\-WashU +\[[\-\.\w ]+\]/ ],
1126
- blast = RuleRegexp[ Bio::Blast::Default::Report,
1222
+ blast = RuleRegexp[ 'Bio::Blast::Default::Report',
1127
1223
  /^BLAST.? +[\-\.\w]+ +\[[\-\.\w ]+\]/ ],
1128
- tblast = RuleRegexp[ Bio::Blast::Default::Report_TBlast,
1224
+ tblast = RuleRegexp[ 'Bio::Blast::Default::Report_TBlast',
1129
1225
  /^TBLAST.? +[\-\.\w]+ +\[[\-\.\w ]+\]/ ],
1130
1226
 
1131
- blat = RuleRegexp[ Bio::Blat::Report,
1132
- /^psLayout version \d+\s*$/ ],
1133
- spidey = RuleRegexp[ Bio::Spidey::Report,
1227
+ blat = RuleRegexp[ 'Bio::Blat::Report',
1228
+ /^psLayout version \d+/ ],
1229
+ spidey = RuleRegexp[ 'Bio::Spidey::Report',
1134
1230
  /^\-\-SPIDEY version .+\-\-$/ ],
1135
- hmmer = RuleRegexp[ Bio::HMMER::Report,
1231
+ hmmer = RuleRegexp[ 'Bio::HMMER::Report',
1136
1232
  /^HMMER +\d+\./ ],
1137
- sim4 = RuleRegexp[ Bio::Sim4::Report,
1233
+ sim4 = RuleRegexp[ 'Bio::Sim4::Report',
1138
1234
  /^seq1 \= .*\, \d+ bp(\r|\r?\n)seq2 \= .*\, \d+ bp(\r|\r?\n)/ ],
1139
1235
 
1140
- fastaformat = RuleProc.new(Bio::FastaFormat,
1141
- Bio::NBRF,
1142
- Bio::FastaNumericFormat) do |text|
1236
+ fastaformat = RuleProc.new('Bio::FastaFormat',
1237
+ 'Bio::NBRF',
1238
+ 'Bio::FastaNumericFormat') do |text|
1143
1239
  if /^>.+$/ =~ text
1144
1240
  case text
1145
1241
  when /^>([PF]1|[DR][LC]|N[13]|XX)\;.+/
@@ -1167,8 +1263,9 @@ module Bio
1167
1263
  # KEGG
1168
1264
  #aaindex.is_prior_to litdb
1169
1265
  #litdb.is_prior_to brite
1170
- brite.is_prior_to ko
1171
- ko.is_prior_to glycan
1266
+ brite.is_prior_to orthology
1267
+ orthology.is_prior_to drug
1268
+ drug.is_prior_to glycan
1172
1269
  glycan.is_prior_to enzyme
1173
1270
  enzyme.is_prior_to compound
1174
1271
  compound.is_prior_to reaction