bio 1.0.0 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (340) hide show
  1. data/bin/bioruby +14 -122
  2. data/bin/br_biofetch.rb +2 -2
  3. data/bin/br_bioflat.rb +2 -2
  4. data/bin/br_biogetseq.rb +2 -2
  5. data/bin/br_pmfetch.rb +3 -3
  6. data/doc/Changes-0.7.rd +77 -0
  7. data/doc/KEGG_API.rd +523 -232
  8. data/doc/KEGG_API.rd.ja +529 -207
  9. data/doc/Tutorial.rd +48 -11
  10. data/lib/bio.rb +59 -6
  11. data/lib/bio/alignment.rb +713 -103
  12. data/lib/bio/appl/bl2seq/report.rb +2 -18
  13. data/lib/bio/appl/blast.rb +108 -91
  14. data/lib/bio/appl/blast/format0.rb +33 -18
  15. data/lib/bio/appl/blast/format8.rb +6 -20
  16. data/lib/bio/appl/blast/report.rb +293 -429
  17. data/lib/bio/appl/blast/rexml.rb +8 -22
  18. data/lib/bio/appl/blast/wublast.rb +21 -12
  19. data/lib/bio/appl/blast/xmlparser.rb +180 -183
  20. data/lib/bio/appl/blat/report.rb +127 -30
  21. data/lib/bio/appl/clustalw.rb +87 -59
  22. data/lib/bio/appl/clustalw/report.rb +20 -22
  23. data/lib/bio/appl/emboss.rb +113 -20
  24. data/lib/bio/appl/fasta.rb +173 -198
  25. data/lib/bio/appl/fasta/format10.rb +244 -347
  26. data/lib/bio/appl/gcg/msf.rb +212 -0
  27. data/lib/bio/appl/gcg/seq.rb +195 -0
  28. data/lib/bio/appl/genscan/report.rb +5 -23
  29. data/lib/bio/appl/hmmer.rb +8 -45
  30. data/lib/bio/appl/hmmer/report.rb +2 -20
  31. data/lib/bio/appl/iprscan/report.rb +374 -0
  32. data/lib/bio/appl/mafft.rb +87 -50
  33. data/lib/bio/appl/mafft/report.rb +151 -44
  34. data/lib/bio/appl/muscle.rb +52 -0
  35. data/lib/bio/appl/phylip/alignment.rb +129 -0
  36. data/lib/bio/appl/phylip/distance_matrix.rb +96 -0
  37. data/lib/bio/appl/probcons.rb +41 -0
  38. data/lib/bio/appl/psort.rb +89 -96
  39. data/lib/bio/appl/psort/report.rb +6 -22
  40. data/lib/bio/appl/pts1.rb +263 -0
  41. data/lib/bio/appl/sim4.rb +26 -36
  42. data/lib/bio/appl/sim4/report.rb +2 -18
  43. data/lib/bio/appl/sosui/report.rb +5 -20
  44. data/lib/bio/appl/spidey/report.rb +2 -2
  45. data/lib/bio/appl/targetp/report.rb +4 -20
  46. data/lib/bio/appl/tcoffee.rb +55 -0
  47. data/lib/bio/appl/tmhmm/report.rb +4 -20
  48. data/lib/bio/command.rb +235 -64
  49. data/lib/bio/data/aa.rb +21 -26
  50. data/lib/bio/data/codontable.rb +2 -20
  51. data/lib/bio/data/na.rb +19 -4
  52. data/lib/bio/db.rb +27 -12
  53. data/lib/bio/db/aaindex.rb +2 -20
  54. data/lib/bio/db/embl/common.rb +4 -21
  55. data/lib/bio/db/embl/embl.rb +33 -85
  56. data/lib/bio/db/embl/sptr.rb +612 -302
  57. data/lib/bio/db/embl/swissprot.rb +10 -29
  58. data/lib/bio/db/embl/trembl.rb +10 -29
  59. data/lib/bio/db/embl/uniprot.rb +10 -29
  60. data/lib/bio/db/fantom.rb +15 -20
  61. data/lib/bio/db/fasta.rb +3 -3
  62. data/lib/bio/db/genbank/common.rb +37 -46
  63. data/lib/bio/db/genbank/ddbj.rb +6 -18
  64. data/lib/bio/db/genbank/genbank.rb +47 -186
  65. data/lib/bio/db/genbank/genpept.rb +4 -17
  66. data/lib/bio/db/genbank/refseq.rb +4 -17
  67. data/lib/bio/db/gff.rb +103 -35
  68. data/lib/bio/db/go.rb +4 -20
  69. data/lib/bio/db/kegg/brite.rb +26 -36
  70. data/lib/bio/db/kegg/compound.rb +81 -85
  71. data/lib/bio/db/kegg/drug.rb +98 -0
  72. data/lib/bio/db/kegg/enzyme.rb +133 -110
  73. data/lib/bio/db/kegg/expression.rb +2 -20
  74. data/lib/bio/db/kegg/genes.rb +208 -238
  75. data/lib/bio/db/kegg/genome.rb +164 -285
  76. data/lib/bio/db/kegg/glycan.rb +114 -157
  77. data/lib/bio/db/kegg/keggtab.rb +242 -303
  78. data/lib/bio/db/kegg/kgml.rb +117 -160
  79. data/lib/bio/db/kegg/orthology.rb +112 -0
  80. data/lib/bio/db/kegg/reaction.rb +54 -69
  81. data/lib/bio/db/kegg/taxonomy.rb +331 -0
  82. data/lib/bio/db/lasergene.rb +209 -0
  83. data/lib/bio/db/litdb.rb +3 -27
  84. data/lib/bio/db/medline.rb +228 -249
  85. data/lib/bio/db/nbrf.rb +3 -3
  86. data/lib/bio/db/newick.rb +510 -0
  87. data/lib/bio/db/nexus.rb +1854 -0
  88. data/lib/bio/db/pdb.rb +5 -17
  89. data/lib/bio/db/pdb/atom.rb +2 -18
  90. data/lib/bio/db/pdb/chain.rb +2 -18
  91. data/lib/bio/db/pdb/chemicalcomponent.rb +2 -18
  92. data/lib/bio/db/pdb/model.rb +2 -18
  93. data/lib/bio/db/pdb/pdb.rb +73 -34
  94. data/lib/bio/db/pdb/residue.rb +4 -20
  95. data/lib/bio/db/pdb/utils.rb +2 -18
  96. data/lib/bio/db/prosite.rb +403 -422
  97. data/lib/bio/db/rebase.rb +84 -40
  98. data/lib/bio/db/soft.rb +404 -0
  99. data/lib/bio/db/transfac.rb +5 -17
  100. data/lib/bio/feature.rb +106 -52
  101. data/lib/bio/io/das.rb +32 -42
  102. data/lib/bio/io/dbget.rb +2 -20
  103. data/lib/bio/io/ddbjxml.rb +77 -138
  104. data/lib/bio/io/ebisoap.rb +158 -0
  105. data/lib/bio/io/ensembl.rb +229 -0
  106. data/lib/bio/io/fastacmd.rb +89 -82
  107. data/lib/bio/io/fetch.rb +163 -96
  108. data/lib/bio/io/flatfile.rb +170 -73
  109. data/lib/bio/io/flatfile/bdb.rb +3 -16
  110. data/lib/bio/io/flatfile/index.rb +2 -2
  111. data/lib/bio/io/flatfile/indexer.rb +3 -2
  112. data/lib/bio/io/higet.rb +12 -31
  113. data/lib/bio/io/keggapi.rb +210 -269
  114. data/lib/bio/io/ncbisoap.rb +155 -0
  115. data/lib/bio/io/pubmed.rb +169 -147
  116. data/lib/bio/io/registry.rb +4 -20
  117. data/lib/bio/io/soapwsdl.rb +43 -38
  118. data/lib/bio/io/sql.rb +242 -305
  119. data/lib/bio/location.rb +407 -285
  120. data/lib/bio/map.rb +410 -0
  121. data/lib/bio/pathway.rb +558 -695
  122. data/lib/bio/reference.rb +272 -75
  123. data/lib/bio/sequence.rb +255 -13
  124. data/lib/bio/sequence/aa.rb +71 -10
  125. data/lib/bio/sequence/common.rb +187 -33
  126. data/lib/bio/sequence/compat.rb +59 -4
  127. data/lib/bio/sequence/format.rb +54 -7
  128. data/lib/bio/sequence/generic.rb +3 -3
  129. data/lib/bio/sequence/na.rb +328 -26
  130. data/lib/bio/shell.rb +11 -4
  131. data/lib/bio/shell/core.rb +221 -160
  132. data/lib/bio/shell/demo.rb +18 -15
  133. data/lib/bio/shell/interface.rb +14 -12
  134. data/lib/bio/shell/irb.rb +95 -0
  135. data/lib/bio/shell/object.rb +45 -26
  136. data/lib/bio/shell/plugin/blast.rb +42 -0
  137. data/lib/bio/shell/plugin/codon.rb +22 -14
  138. data/lib/bio/shell/plugin/das.rb +58 -0
  139. data/lib/bio/shell/plugin/emboss.rb +2 -2
  140. data/lib/bio/shell/plugin/entry.rb +22 -11
  141. data/lib/bio/shell/plugin/flatfile.rb +2 -2
  142. data/lib/bio/shell/plugin/keggapi.rb +13 -6
  143. data/lib/bio/shell/plugin/midi.rb +4 -4
  144. data/lib/bio/shell/plugin/obda.rb +2 -2
  145. data/lib/bio/shell/plugin/psort.rb +56 -0
  146. data/lib/bio/shell/plugin/seq.rb +35 -8
  147. data/lib/bio/shell/plugin/soap.rb +87 -0
  148. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/bioruby_generator.rb +29 -0
  149. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_classes.rhtml +4 -0
  150. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_log.rhtml +27 -0
  151. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_methods.rhtml +11 -0
  152. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_modules.rhtml +4 -0
  153. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_variables.rhtml +7 -0
  154. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-bg.gif +0 -0
  155. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-console.png +0 -0
  156. data/lib/bio/shell/rails/{public/images/icon.png → vendor/plugins/generators/bioruby/templates/bioruby-gem.png} +0 -0
  157. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-link.gif +0 -0
  158. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby.css +369 -0
  159. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby.rhtml +47 -0
  160. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby_controller.rb +144 -0
  161. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby_helper.rb +47 -0
  162. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/commands.rhtml +8 -0
  163. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/history.rhtml +10 -0
  164. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/index.rhtml +22 -0
  165. data/lib/bio/shell/script.rb +25 -0
  166. data/lib/bio/shell/setup.rb +109 -0
  167. data/lib/bio/shell/web.rb +70 -58
  168. data/lib/bio/tree.rb +850 -0
  169. data/lib/bio/util/color_scheme.rb +84 -107
  170. data/lib/bio/util/color_scheme/buried.rb +5 -24
  171. data/lib/bio/util/color_scheme/helix.rb +5 -24
  172. data/lib/bio/util/color_scheme/hydropathy.rb +5 -24
  173. data/lib/bio/util/color_scheme/nucleotide.rb +5 -24
  174. data/lib/bio/util/color_scheme/strand.rb +5 -24
  175. data/lib/bio/util/color_scheme/taylor.rb +5 -24
  176. data/lib/bio/util/color_scheme/turn.rb +5 -24
  177. data/lib/bio/util/color_scheme/zappo.rb +5 -24
  178. data/lib/bio/util/contingency_table.rb +70 -43
  179. data/lib/bio/util/restriction_enzyme.rb +228 -0
  180. data/lib/bio/util/restriction_enzyme/analysis.rb +249 -0
  181. data/lib/bio/util/restriction_enzyme/analysis_basic.rb +217 -0
  182. data/lib/bio/util/restriction_enzyme/cut_symbol.rb +107 -0
  183. data/lib/bio/util/restriction_enzyme/double_stranded.rb +321 -0
  184. data/lib/bio/util/restriction_enzyme/double_stranded/aligned_strands.rb +130 -0
  185. data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair.rb +103 -0
  186. data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair_in_enzyme_notation.rb +38 -0
  187. data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations.rb +76 -0
  188. data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations_in_enzyme_notation.rb +107 -0
  189. data/lib/bio/util/restriction_enzyme/enzymes.yaml +7061 -0
  190. data/lib/bio/util/restriction_enzyme/range/cut_range.rb +24 -0
  191. data/lib/bio/util/restriction_enzyme/range/cut_ranges.rb +47 -0
  192. data/lib/bio/util/restriction_enzyme/range/horizontal_cut_range.rb +67 -0
  193. data/lib/bio/util/restriction_enzyme/range/sequence_range.rb +257 -0
  194. data/lib/bio/util/restriction_enzyme/range/sequence_range/calculated_cuts.rb +242 -0
  195. data/lib/bio/util/restriction_enzyme/range/sequence_range/fragment.rb +51 -0
  196. data/lib/bio/util/restriction_enzyme/range/sequence_range/fragments.rb +41 -0
  197. data/lib/bio/util/restriction_enzyme/range/vertical_cut_range.rb +77 -0
  198. data/lib/bio/util/restriction_enzyme/single_strand.rb +199 -0
  199. data/lib/bio/util/restriction_enzyme/single_strand/cut_locations_in_enzyme_notation.rb +135 -0
  200. data/lib/bio/util/restriction_enzyme/single_strand_complement.rb +23 -0
  201. data/lib/bio/util/restriction_enzyme/string_formatting.rb +111 -0
  202. data/lib/bio/util/sirna.rb +4 -22
  203. data/sample/color_scheme_na.rb +4 -12
  204. data/sample/enzymes.rb +78 -0
  205. data/sample/goslim.rb +5 -13
  206. data/sample/psortplot_html.rb +4 -12
  207. data/test/data/blast/2.2.15.blastp.m7 +876 -0
  208. data/test/data/embl/AB090716.embl.rel89 +63 -0
  209. data/test/data/fasta/example1.txt +75 -0
  210. data/test/data/fasta/example2.txt +21 -0
  211. data/test/data/iprscan/merged.raw +32 -0
  212. data/test/data/iprscan/merged.txt +74 -0
  213. data/test/data/soft/GDS100_partial.soft +92 -0
  214. data/test/data/soft/GSE3457_family_partial.soft +874 -0
  215. data/test/functional/bio/io/test_ensembl.rb +103 -0
  216. data/test/functional/bio/io/test_soapwsdl.rb +5 -17
  217. data/test/unit/bio/appl/bl2seq/test_report.rb +2 -2
  218. data/test/unit/bio/appl/blast/test_report.rb +3 -16
  219. data/test/unit/bio/appl/blast/test_xmlparser.rb +4 -16
  220. data/test/unit/bio/appl/genscan/test_report.rb +3 -16
  221. data/test/unit/bio/appl/hmmer/test_report.rb +3 -16
  222. data/test/unit/bio/appl/iprscan/test_report.rb +338 -0
  223. data/test/unit/bio/appl/mafft/test_report.rb +63 -0
  224. data/test/unit/bio/appl/sosui/test_report.rb +3 -16
  225. data/test/unit/bio/appl/targetp/test_report.rb +3 -16
  226. data/test/unit/bio/appl/test_blast.rb +3 -16
  227. data/test/unit/bio/appl/test_fasta.rb +4 -16
  228. data/test/unit/bio/appl/test_pts1.rb +140 -0
  229. data/test/unit/bio/appl/tmhmm/test_report.rb +3 -16
  230. data/test/unit/bio/data/test_aa.rb +4 -17
  231. data/test/unit/bio/data/test_codontable.rb +3 -16
  232. data/test/unit/bio/data/test_na.rb +3 -3
  233. data/test/unit/bio/db/embl/test_common.rb +3 -16
  234. data/test/unit/bio/db/embl/test_embl.rb +3 -16
  235. data/test/unit/bio/db/embl/test_embl_rel89.rb +219 -0
  236. data/test/unit/bio/db/embl/test_sptr.rb +1548 -41
  237. data/test/unit/bio/db/embl/test_uniprot.rb +3 -16
  238. data/test/unit/bio/db/kegg/test_genes.rb +3 -16
  239. data/test/unit/bio/db/pdb/test_pdb.rb +7 -24
  240. data/test/unit/bio/db/test_aaindex.rb +2 -2
  241. data/test/unit/bio/db/test_fasta.rb +3 -16
  242. data/test/unit/bio/db/test_gff.rb +3 -16
  243. data/test/unit/bio/db/test_lasergene.rb +95 -0
  244. data/test/unit/bio/db/test_newick.rb +56 -0
  245. data/test/unit/bio/db/test_nexus.rb +360 -0
  246. data/test/unit/bio/db/test_prosite.rb +5 -18
  247. data/test/unit/bio/db/test_rebase.rb +11 -25
  248. data/test/unit/bio/db/test_soft.rb +138 -0
  249. data/test/unit/bio/io/test_ddbjxml.rb +5 -17
  250. data/test/unit/bio/io/test_ensembl.rb +109 -0
  251. data/test/unit/bio/io/test_fastacmd.rb +3 -16
  252. data/test/unit/bio/io/test_flatfile.rb +237 -0
  253. data/test/unit/bio/io/test_soapwsdl.rb +4 -17
  254. data/test/unit/bio/sequence/test_aa.rb +3 -3
  255. data/test/unit/bio/sequence/test_common.rb +3 -16
  256. data/test/unit/bio/sequence/test_compat.rb +3 -16
  257. data/test/unit/bio/sequence/test_na.rb +29 -3
  258. data/test/unit/bio/shell/plugin/test_seq.rb +8 -8
  259. data/test/unit/bio/test_alignment.rb +16 -27
  260. data/test/unit/bio/test_command.rb +242 -25
  261. data/test/unit/bio/test_db.rb +3 -16
  262. data/test/unit/bio/test_feature.rb +4 -16
  263. data/test/unit/bio/test_location.rb +4 -16
  264. data/test/unit/bio/test_map.rb +230 -0
  265. data/test/unit/bio/test_pathway.rb +4 -16
  266. data/test/unit/bio/test_reference.rb +2 -2
  267. data/test/unit/bio/test_sequence.rb +7 -19
  268. data/test/unit/bio/test_shell.rb +3 -16
  269. data/test/unit/bio/test_tree.rb +593 -0
  270. data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +299 -0
  271. data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +103 -0
  272. data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +240 -0
  273. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +100 -0
  274. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +75 -0
  275. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +73 -0
  276. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +53 -0
  277. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +104 -0
  278. data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +83 -0
  279. data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +246 -0
  280. data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +44 -0
  281. data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +115 -0
  282. data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +147 -0
  283. data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +147 -0
  284. data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +60 -0
  285. data/test/unit/bio/util/test_color_scheme.rb +6 -18
  286. data/test/unit/bio/util/test_contingency_table.rb +6 -18
  287. data/test/unit/bio/util/test_restriction_enzyme.rb +42 -0
  288. data/test/unit/bio/util/test_sirna.rb +3 -16
  289. metadata +228 -169
  290. data/doc/BioRuby.rd.ja +0 -225
  291. data/doc/Design.rd.ja +0 -341
  292. data/doc/TODO.rd.ja +0 -138
  293. data/lib/bio/appl/fasta/format6.rb +0 -37
  294. data/lib/bio/db/kegg/cell.rb +0 -88
  295. data/lib/bio/db/kegg/ko.rb +0 -178
  296. data/lib/bio/shell/rails/Rakefile +0 -10
  297. data/lib/bio/shell/rails/app/controllers/application.rb +0 -4
  298. data/lib/bio/shell/rails/app/controllers/shell_controller.rb +0 -94
  299. data/lib/bio/shell/rails/app/helpers/application_helper.rb +0 -3
  300. data/lib/bio/shell/rails/app/models/shell_connection.rb +0 -30
  301. data/lib/bio/shell/rails/app/views/layouts/shell.rhtml +0 -37
  302. data/lib/bio/shell/rails/app/views/shell/history.rhtml +0 -5
  303. data/lib/bio/shell/rails/app/views/shell/index.rhtml +0 -2
  304. data/lib/bio/shell/rails/app/views/shell/show.rhtml +0 -13
  305. data/lib/bio/shell/rails/config/boot.rb +0 -19
  306. data/lib/bio/shell/rails/config/database.yml +0 -85
  307. data/lib/bio/shell/rails/config/environment.rb +0 -53
  308. data/lib/bio/shell/rails/config/environments/development.rb +0 -19
  309. data/lib/bio/shell/rails/config/environments/production.rb +0 -19
  310. data/lib/bio/shell/rails/config/environments/test.rb +0 -19
  311. data/lib/bio/shell/rails/config/routes.rb +0 -19
  312. data/lib/bio/shell/rails/doc/README_FOR_APP +0 -2
  313. data/lib/bio/shell/rails/public/404.html +0 -8
  314. data/lib/bio/shell/rails/public/500.html +0 -8
  315. data/lib/bio/shell/rails/public/dispatch.cgi +0 -10
  316. data/lib/bio/shell/rails/public/dispatch.fcgi +0 -24
  317. data/lib/bio/shell/rails/public/dispatch.rb +0 -10
  318. data/lib/bio/shell/rails/public/favicon.ico +0 -0
  319. data/lib/bio/shell/rails/public/images/rails.png +0 -0
  320. data/lib/bio/shell/rails/public/index.html +0 -277
  321. data/lib/bio/shell/rails/public/javascripts/controls.js +0 -750
  322. data/lib/bio/shell/rails/public/javascripts/dragdrop.js +0 -584
  323. data/lib/bio/shell/rails/public/javascripts/effects.js +0 -854
  324. data/lib/bio/shell/rails/public/javascripts/prototype.js +0 -1785
  325. data/lib/bio/shell/rails/public/robots.txt +0 -1
  326. data/lib/bio/shell/rails/public/stylesheets/main.css +0 -187
  327. data/lib/bio/shell/rails/script/about +0 -3
  328. data/lib/bio/shell/rails/script/breakpointer +0 -3
  329. data/lib/bio/shell/rails/script/console +0 -3
  330. data/lib/bio/shell/rails/script/destroy +0 -3
  331. data/lib/bio/shell/rails/script/generate +0 -3
  332. data/lib/bio/shell/rails/script/performance/benchmarker +0 -3
  333. data/lib/bio/shell/rails/script/performance/profiler +0 -3
  334. data/lib/bio/shell/rails/script/plugin +0 -3
  335. data/lib/bio/shell/rails/script/process/reaper +0 -3
  336. data/lib/bio/shell/rails/script/process/spawner +0 -3
  337. data/lib/bio/shell/rails/script/process/spinner +0 -3
  338. data/lib/bio/shell/rails/script/runner +0 -3
  339. data/lib/bio/shell/rails/script/server +0 -42
  340. data/lib/bio/shell/rails/test/test_helper.rb +0 -28
@@ -1,114 +1,181 @@
1
1
  #
2
2
  # = bio/io/biofetch.rb - BioFetch access module
3
3
  #
4
- # Copyright:: Copyright (C) 2002, 2005
5
- # Toshiaki Katayama <k@bioruby.org>
6
- # License:: LGPL
4
+ # Copyright:: Copyright (C) 2002, 2005 Toshiaki Katayama <k@bioruby.org>,
5
+ # Copyright (C) 2006 Jan Aerts <jan.aerts@bbsrc.ac.uk>
6
+ # License:: The Ruby License
7
7
  #
8
- # $Id: fetch.rb,v 1.4 2005/12/18 15:58:42 k Exp $
8
+ # $Id: fetch.rb,v 1.10 2007/04/05 23:35:41 trevor Exp $
9
9
  #
10
- #--
10
+ # == DESCRIPTION
11
11
  #
12
- # This library is free software; you can redistribute it and/or
13
- # modify it under the terms of the GNU Lesser General Public
14
- # License as published by the Free Software Foundation; either
15
- # version 2 of the License, or (at your option) any later version.
12
+ # Using BioRuby BioFetch server
16
13
  #
17
- # This library is distributed in the hope that it will be useful,
18
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
19
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20
- # Lesser General Public License for more details.
14
+ # br_server = Bio::Fetch.new()
15
+ # puts br_server.databases
16
+ # puts br_server.formats('embl')
17
+ # puts br_server.maxids
21
18
  #
22
- # You should have received a copy of the GNU Lesser General Public
23
- # License along with this library; if not, write to the Free Software
24
- # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25
- #
26
- #++
19
+ # Using EBI BioFetch server
27
20
  #
21
+ # ebi_server = Bio::Fetch.new('http://www.ebi.ac.uk/cgi-bin/dbfetch')
22
+ # puts ebi_server.fetch('embl', 'J00231', 'raw')
23
+ # puts ebi_server.fetch('embl', 'J00231', 'html')
24
+ # puts Bio::Fetch.query('genbank', 'J00231')
25
+ # puts Bio::Fetch.query('genbank', 'J00231', 'raw', 'fasta')
26
+ #
28
27
 
29
28
  require 'uri'
30
- require 'net/http'
29
+ require 'bio/command'
31
30
 
32
31
  module Bio
33
-
34
- class Fetch
35
-
36
- # Create a new Bio::Fetch server object.
37
- # Use Bio::Fetch.new('http://www.ebi.ac.uk/cgi-bin/dbfetch') to connect
38
- # to EBI BioFetch server.
39
- def initialize(url = 'http://bioruby.org/cgi-bin/biofetch.rb')
40
- schema, user, @host, @port, reg, @path, = URI.split(url)
41
- end
42
-
43
- # Set default database to dbname (prepare for get_by_id).
44
- attr_accessor :database
45
-
46
- # Get raw database entry by id (mainly used by Bio::Registry).
47
- def get_by_id(id)
48
- fetch(@database, id)
49
- end
50
-
51
- # Fetch a database entry as specified by database (db), entry id (id),
52
- # 'raw' text or 'html' (style), and format. When using BioRuby's
53
- # BioFetch server, value for the format should not be set.
54
- def fetch(db, id, style = 'raw', format = nil)
55
- data = [ "db=#{db}", "id=#{id}", "style=#{style}" ]
56
- data.push("format=#{format}") if format
57
- data = data.join('&')
58
-
59
- responce, result = Net::HTTP.new(@host, @port).post(@path, data)
60
- return result
61
- end
62
-
63
- # Short cut for using BioRuby's BioFetch server. You can fetch an entry
64
- # without creating instance of BioFetch server.
65
- def self.query(*args)
66
- self.new.fetch(*args)
67
- end
68
-
69
- # What databases are available?
70
- def databases
71
- query = "info=dbs"
72
- responce, result = Net::HTTP.new(@host, @port).post(@path, query)
73
- return result
74
- end
75
-
76
- # What formats does the database X have?
77
- def formats(database = @database)
78
- if database
79
- query = "info=formats;db=#{database}"
80
- responce, result = Net::HTTP.new(@host, @port).post(@path, query)
81
- return result
32
+ # = DESCRIPTION
33
+ # The Bio::Fetch class provides an interface to dbfetch servers. Given
34
+ # a database name and an accession number, these servers return the associated
35
+ # record. For example, for the embl database on the EBI, that would be a
36
+ # nucleic or amino acid sequence.
37
+ #
38
+ # Possible dbfetch servers include:
39
+ # * http://bioruby.org/cgi-bin/biofetch.rb (default)
40
+ # * http://www.ebi.ac.uk/cgi-bin/dbfetch
41
+ #
42
+ # If you're behind a proxy server, be sure to set your HTTP_PROXY
43
+ # environment variable accordingly.
44
+ #
45
+ # = USAGE
46
+ # require 'bio'
47
+ #
48
+ # # Retrieve the sequence of accession number M33388 from the EMBL
49
+ # # database.
50
+ # server = Bio::Fetch.new() #uses default server
51
+ # puts server.fetch('embl','M33388')
52
+ #
53
+ # # Do the same thing without creating a Bio::Fetch object. This method always
54
+ # # uses the default dbfetch server: http://bioruby.org/cgi-bin/biofetch.rb
55
+ # puts Bio::Fetch.query('embl','M33388')
56
+ #
57
+ # # To know what databases are available on the bioruby dbfetch server:
58
+ # server = Bio::Fetch.new()
59
+ # puts server.databases
60
+ #
61
+ # # Some databases provide their data in different formats (e.g. 'fasta',
62
+ # # 'genbank' or 'embl'). To check which formats are supported by a given
63
+ # # database:
64
+ # puts server.formats('embl')
65
+ #
66
+ class Fetch
67
+
68
+ # Create a new Bio::Fetch server object that can subsequently be queried
69
+ # using the Bio::Fetch#fetch method
70
+ # ---
71
+ # *Arguments*:
72
+ # * _url_: URL of dbfetch server (default = 'http://bioruby.org/cgi-bin/biofetch.rb')
73
+ # *Returns*:: Bio::Fetch object
74
+ def initialize(url = 'http://bioruby.org/cgi-bin/biofetch.rb')
75
+ @url = url
76
+ schema, user, @host, @port, reg, @path, = URI.split(@url)
82
77
  end
78
+
79
+ # The default database to query
80
+ #--
81
+ # This will be used by the get_by_id method
82
+ #++
83
+ attr_accessor :database
84
+
85
+ # Get raw database entry by id. This method lets the Bio::Registry class
86
+ # use Bio::Fetch objects.
87
+ def get_by_id(id)
88
+ fetch(@database, id)
89
+ end
90
+
91
+ # Fetch a database entry as specified by database (db), entry id (id),
92
+ # 'raw' text or 'html' (style), and format. When using BioRuby's
93
+ # BioFetch server, value for the format should not be set.
94
+ # Examples:
95
+ # server = Bio::Fetch.new('http://www.ebi.ac.uk/cgi-bin/dbfetch')
96
+ # puts server.fetch('embl','M33388','raw','fasta')
97
+ # puts server.fetch('refseq','NM_12345','html','embl')
98
+ # ---
99
+ # *Arguments*:
100
+ # * _database_: name of database to query (see Bio::Fetch#databases to get list of supported databases)
101
+ # * _id_: single ID or ID list separated by commas or white space
102
+ # * _style_: [raw|html] (default = 'raw')
103
+ # * _format_: name of output format (see Bio::Fetch#formats)
104
+ def fetch(db, id, style = 'raw', format = nil)
105
+ query = [ "db=#{db}", "id=#{id}", "style=#{style}" ]
106
+ query.push("format=#{format}") if format
107
+ query = query.join('&')
108
+
109
+ Bio::Command.read_uri(@url + '?' + URI.escape(query))
110
+ end
111
+
112
+ # Shortcut for using BioRuby's BioFetch server. You can fetch an entry
113
+ # without creating an instance of BioFetch server. This method uses the
114
+ # default dbfetch server, which is http://bioruby.org/cgi-bin/biofetch.rb
115
+ #
116
+ # Example:
117
+ # puts Bio::Fetch.query('refseq','NM_12345')
118
+ #
119
+ # ---
120
+ # *Arguments*:
121
+ # * _database_: name of database to query (see Bio::Fetch#databases to get list of supported databases)
122
+ # * _id_: single ID or ID list separated by commas or white space
123
+ # * _style_: [raw|html] (default = 'raw')
124
+ # * _format_: name of output format (see Bio::Fetch#formats)
125
+ def self.query(*args)
126
+ self.new.fetch(*args)
127
+ end
128
+
129
+ # Using this method, the user can ask a dbfetch server what databases
130
+ # it supports. This would normally be the first step you'd take when
131
+ # you use a dbfetch server for the first time.
132
+ # Example:
133
+ # server = Bio::Fetch.new()
134
+ # puts server.databases # returns "aa aax bl cpd dgenes dr ec eg emb ..."
135
+ #
136
+ # This method only works for the bioruby dbfetch server. For a list
137
+ # of databases available from the EBI, see the EBI website at
138
+ # http://www.ebi.ac.uk/cgi-bin/dbfetch/
139
+ # ---
140
+ # *Returns*:: array of database names
141
+ def databases
142
+ query = "info=dbs"
143
+
144
+ Bio::Command.read_uri(@url + '?' + URI.escape(query)).strip.split(/\s+/)
145
+ end
146
+
147
+ # Lists the formats that are available for a given database. Like the
148
+ # Bio::Fetch#databases method, this method is only available on
149
+ # the bioruby dbfetch server.
150
+ # Example:
151
+ # server = Bio::Fetch.new()
152
+ # puts server.formats('embl') # returns "default fasta"
153
+ # ---
154
+ # *Arguments*:
155
+ # * _database_:: name of database you want the supported formats for
156
+ # *Returns*:: array of formats
157
+ def formats(database = @database)
158
+ if database
159
+ query = "info=formats;db=#{database}"
160
+
161
+ Bio::Command.read_uri(@url + '?' + URI.escape(query)).strip.split(/\s+/)
162
+ end
163
+ end
164
+
165
+ # A dbfetch server will only return entries up to a given maximum number.
166
+ # This method retrieves that number from the server. As for the databases
167
+ # and formats methods, the maxids method only works for the bioruby
168
+ # dbfetch server.
169
+ # ---
170
+ # *Arguments*: none
171
+ # *Returns*:: number
172
+ def maxids
173
+ query = "info=maxids"
174
+
175
+ Bio::Command.read_uri(@url + '?' + URI.escape(query)).to_i
176
+ end
177
+
83
178
  end
84
179
 
85
- # How many entries can be retrieved simultaneously?
86
- def maxids
87
- query = "info=maxids"
88
- responce, result = Net::HTTP.new(@host, @port).post(@path, query)
89
- return result
90
- end
91
-
92
- end
93
-
94
180
  end # module Bio
95
181
 
96
-
97
-
98
- if __FILE__ == $0
99
-
100
- # bfserv = Bio::Fetch.new('http://www.ebi.ac.uk:80/cgi-bin/dbfetch')
101
- bfserv = Bio::Fetch.new('http://www.ebi.ac.uk/cgi-bin/dbfetch')
102
- puts "# test 1"
103
- puts bfserv.fetch('embl', 'J00231', 'raw')
104
- puts "# test 2"
105
- puts bfserv.fetch('embl', 'J00231', 'html')
106
-
107
- puts "# test 3"
108
- puts Bio::Fetch.query('genbank', 'J00231')
109
- puts "# test 4"
110
- puts Bio::Fetch.query('genbank', 'J00231', 'raw', 'fasta')
111
-
112
- end
113
-
114
-
@@ -3,9 +3,9 @@
3
3
  #
4
4
  # Copyright (C) 2001-2006 Naohisa Goto <ng@bioruby.org>
5
5
  #
6
- # License:: Ruby's
6
+ # License:: The Ruby License
7
7
  #
8
- # $Id: flatfile.rb,v 1.46 2006/02/22 10:01:27 ngoto Exp $
8
+ # $Id: flatfile.rb,v 1.60 2007/07/09 14:08:34 ngoto Exp $
9
9
  #
10
10
  #
11
11
  # Bio::FlatFile is a helper and wrapper class to read a biological data file.
@@ -34,7 +34,6 @@ module Bio
34
34
  @path = path
35
35
  # initialize prefetch buffer
36
36
  @buffer = ''
37
- @path = path
38
37
  end
39
38
 
40
39
  # Creates a new input stream wrapper from the given IO object.
@@ -262,6 +261,9 @@ module Bio
262
261
  # the last entry read from the stream
263
262
  attr_reader :entry
264
263
 
264
+ # a flag to write down entry start and end positions
265
+ attr_accessor :entry_pos_flag
266
+
265
267
  # start position of the entry
266
268
  attr_reader :entry_start_pos
267
269
 
@@ -290,6 +292,7 @@ module Bio
290
292
  end
291
293
  end
292
294
  @delimiter_overrun = klass::DELIMITER_OVERRUN rescue nil
295
+ @entry_pos_flag = nil
293
296
  end
294
297
 
295
298
  # (String) delimiter indicates the end of a entry.
@@ -330,7 +333,7 @@ module Bio
330
333
 
331
334
  # gets a entry
332
335
  def get_entry
333
- p0 = @stream.pos
336
+ p0 = @entry_pos_flag ? @stream.pos : nil
334
337
  e = @stream.gets(@delimiter)
335
338
  if e and @delimiter_overrun then
336
339
  if e[-@delimiter.size, @delimiter.size ] == @delimiter then
@@ -339,7 +342,7 @@ module Bio
339
342
  @stream.ungets(overrun)
340
343
  end
341
344
  end
342
- p1 = @stream.pos
345
+ p1 = @entry_pos_flag ? @stream.pos : nil
343
346
  @entry_start_pos = p0
344
347
  @entry = e
345
348
  @entry_ended_pos = p1
@@ -422,7 +425,7 @@ module Bio
422
425
  # check if file is filename or IO object
423
426
  unless file.respond_to?(:gets)
424
427
  # 'file' is a filename
425
- self.open_file(file, *arg, &block)
428
+ _open_file(dbclass, file, *arg, &block)
426
429
  else
427
430
  # 'file' is a IO object
428
431
  ff = self.new(dbclass, file)
@@ -462,15 +465,27 @@ module Bio
462
465
  # Otherwise, it returns a new FlatFile object.
463
466
  #
464
467
  def self.open_file(filename, *arg)
468
+ _open_file(nil, filename, *arg)
469
+ end
470
+
471
+ # Same as FlatFile.open(dbclass, filename, *arg),
472
+ # except that it only accept filename and doesn't accept IO object.
473
+ #
474
+ # It can accept a block.
475
+ # If a block is given, it returns the block's return value.
476
+ # Otherwise, it returns a new FlatFile object.
477
+ #
478
+ def self._open_file(dbclass, filename, *arg)
465
479
  if block_given? then
466
480
  BufferedInputStream.open_file(filename, *arg) do |stream|
467
- yield self.new(nil, stream)
481
+ yield self.new(dbclass, stream)
468
482
  end
469
483
  else
470
484
  stream = BufferedInputStream.open_file(filename, *arg)
471
- self.new(nil, stream)
485
+ self.new(dbclass, stream)
472
486
  end
473
487
  end
488
+ private_class_method :_open_file
474
489
 
475
490
  # Opens URI specified as _uri_.
476
491
  # _uri_ must be a String or URI object.
@@ -493,6 +508,20 @@ module Bio
493
508
  end
494
509
  end
495
510
 
511
+ # Executes the block for every entry in the stream.
512
+ # Same as FlatFile.open(*arg) { |ff| ff.each { |entry| ... }}.
513
+ #
514
+ # * Example
515
+ # Bio::FlatFile.foreach('test.fst') { |e| puts e.definition }
516
+ #
517
+ def self.foreach(*arg)
518
+ self.open(*arg) do |flatfileobj|
519
+ flatfileobj.each do |entry|
520
+ yield entry
521
+ end
522
+ end
523
+ end
524
+
496
525
  # Same as FlatFile.open, except that 'stream' should be a opened
497
526
  # stream object (IO, File, ..., who have the 'gets' method).
498
527
  #
@@ -518,13 +547,11 @@ module Bio
518
547
  #
519
548
  def initialize(dbclass, stream)
520
549
  # 2nd arg: IO object
521
- if @stream.kind_of?(BufferedInputStream)
550
+ if stream.kind_of?(BufferedInputStream)
522
551
  @stream = stream
523
552
  else
524
553
  @stream = BufferedInputStream.for_io(stream)
525
554
  end
526
- # default is raw mode
527
- self.raw = false
528
555
  # 1st arg: database class (or file format autodetection)
529
556
  if dbclass then
530
557
  self.dbclass = dbclass
@@ -534,6 +561,8 @@ module Bio
534
561
  #
535
562
  @skip_leader_mode = :firsttime
536
563
  @firsttime_flag = true
564
+ # default raw mode is false
565
+ self.raw = false
537
566
  end
538
567
 
539
568
  # The mode how to skip leader of the data.
@@ -563,8 +592,14 @@ module Bio
563
592
  @stream.path
564
593
  end
565
594
 
595
+ # Exception class to be raised when data format hasn't been specified.
596
+ class UnknownDataFormatError < IOError
597
+ end
598
+
566
599
  # Get next entry.
567
600
  def next_entry
601
+ raise UnknownDataFormatError,
602
+ 'file format auto-detection failed?' unless @dbclass
568
603
  if @skip_leader_mode and
569
604
  ((@firsttime_flag and @skip_leader_mode == :firsttime) or
570
605
  @skip_leader_mode == :everytime)
@@ -587,6 +622,16 @@ module Bio
587
622
  @splitter.entry
588
623
  end
589
624
 
625
+ # a flag to write down entry start and end positions
626
+ def entry_pos_flag
627
+ @splitter.entry_pos_flag
628
+ end
629
+
630
+ # Sets flag to write down entry start and end positions
631
+ def entry_pos_flag=(x)
632
+ @splitter.entry_pos_flag = x
633
+ end
634
+
590
635
  # start position of the last entry
591
636
  def entry_start_pos
592
637
  @splitter.entry_start_pos
@@ -736,21 +781,26 @@ module Bio
736
781
 
737
782
  include TSort
738
783
 
784
+ # Array to store autodetection rules.
785
+ # This is defined only for inspect.
786
+ class RulesArray < Array
787
+ # visualize contents
788
+ def inspect
789
+ "[#{self.collect { |e| e.name.inspect }.join(' ')}]"
790
+ end
791
+ end #class RulesArray
792
+
739
793
  # Template of a single rule of autodetection
740
794
  class RuleTemplate
741
795
  # Creates a new element.
742
796
  def self.[](*arg)
743
797
  self.new(*arg)
744
798
  end
745
-
799
+
746
800
  # Creates a new element.
747
801
  def initialize
748
- a = Array.new
749
- def a.inspect
750
- "[#{self.collect { |e| e.name.inspect }.join(' ')}]"
751
- end
752
- @higher_priority_elements = a.clone
753
- @lower_priority_elements = a.clone
802
+ @higher_priority_elements = RulesArray.new
803
+ @lower_priority_elements = RulesArray.new
754
804
  @name = nil
755
805
  end
756
806
 
@@ -784,6 +834,24 @@ module Bio
784
834
  def guess(text, meta)
785
835
  nil
786
836
  end
837
+
838
+ private
839
+ # Gets constant from constant name given as a string.
840
+ def str2const(str)
841
+ const = Object
842
+ str.split(/\:\:/).each do |x|
843
+ const = const.const_get(x)
844
+ end
845
+ const
846
+ end
847
+
848
+ # Gets database class from given object.
849
+ # Current implementation is:
850
+ # if _obj_ is kind of String, regarded as a constant.
851
+ # Otherwise, returns _obj_ as is.
852
+ def get_dbclass(obj)
853
+ obj.kind_of?(String) ? str2const(obj) : obj
854
+ end
787
855
  end #class Rule_Template
788
856
 
789
857
  # RuleDebug is a class for debugging autodetect classes/methods
@@ -835,40 +903,49 @@ module Bio
835
903
  def initialize(dbclass, re)
836
904
  super()
837
905
  @re = re
838
- @dbclass = dbclass
839
- @dbclasses = [ dbclass ]
840
906
  @name = dbclass.to_s
907
+ @dbclass = nil
908
+ @dbclass_lazy = dbclass
909
+ end
910
+
911
+ # database class (lazy evaluation)
912
+ def dbclass
913
+ unless @dbclass
914
+ @dbclass = get_dbclass(@dbclass_lazy)
915
+ end
916
+ @dbclass
917
+ end
918
+ private :dbclass
919
+
920
+ # returns database classes
921
+ def dbclasses
922
+ [ dbclass ]
841
923
  end
842
924
 
843
925
  # If given text matches the regexp, returns the database class.
844
926
  # Otherwise, returns nil or false.
845
927
  # _meta_ is ignored.
846
928
  def guess(text, meta)
847
- @re =~ text ? @dbclass : nil
929
+ @re =~ text ? dbclass : nil
848
930
  end
849
931
  end #class RuleRegexp
850
932
 
851
933
  # A autodetection rule to use more than two regular expressions.
852
- class RuleRegexp2 < RuleTemplate
934
+ # If given string matches one of the regular expressions,
935
+ # returns the database class.
936
+ class RuleRegexp2 < RuleRegexp
853
937
  # Creates a new instance.
854
938
  def initialize(dbclass, *regexps)
855
- super()
939
+ super(dbclass, nil)
856
940
  @regexps = regexps
857
- @dbclass = dbclass
858
- @dbclasses = [ dbclass ]
859
- if name
860
- @name = name
861
- else
862
- @name = @dbclass.to_s
863
- end
864
941
  end
865
942
 
866
- # If given text matches the regexp, returns the database class.
943
+ # If given text matches one of the regexp, returns the database class.
867
944
  # Otherwise, returns nil or false.
868
945
  # _meta_ is ignored.
869
946
  def guess(text, meta)
870
947
  @regexps.each do |re|
871
- return @dbclass if re =~ text
948
+ return dbclass if re =~ text
872
949
  end
873
950
  nil
874
951
  end
@@ -880,10 +957,19 @@ module Bio
880
957
  def initialize(*dbclasses, &proc)
881
958
  super()
882
959
  @proc = proc
883
- @dbclasses = dbclasses
960
+ @dbclasses = nil
961
+ @dbclasses_lazy = dbclasses
884
962
  @name = dbclasses.collect { |x| x.to_s }.join('|')
885
963
  end
886
964
 
965
+ # database classes (lazy evaluation)
966
+ def dbclasses
967
+ unless @dbclasses
968
+ @dbclasses = @dbclasses_lazy.collect { |x| get_dbclass(x) }
969
+ end
970
+ @dbclasses
971
+ end
972
+
887
973
  # If given text (and/or meta information) is known, returns
888
974
  # the database class.
889
975
  # Otherwise, returns nil or false.
@@ -1039,22 +1125,23 @@ module Bio
1039
1125
  # make a default of default autodetect object
1040
1126
  def self.make_default
1041
1127
  a = self[
1042
- genbank = RuleRegexp[ Bio::GenBank,
1128
+ genbank = RuleRegexp[ 'Bio::GenBank',
1043
1129
  /^LOCUS .+ bp .*[a-z]*[DR]?NA/ ],
1044
- genpept = RuleRegexp[ Bio::GenPept,
1130
+ genpept = RuleRegexp[ 'Bio::GenPept',
1045
1131
  /^LOCUS .+ aa .+/ ],
1046
- medline = RuleRegexp[ Bio::MEDLINE,
1132
+ medline = RuleRegexp[ 'Bio::MEDLINE',
1047
1133
  /^UI \- [0-9]+$/ ],
1048
- embl = RuleRegexp[ Bio::EMBL,
1134
+ embl = RuleRegexp[ 'Bio::EMBL',
1049
1135
  /^ID .+\; .*(DNA|RNA|XXX)\;/ ],
1050
- sptr = RuleRegexp[ Bio::SPTR,
1051
- /^ID .+\; *PRT\;/ ],
1052
- prosite = RuleRegexp[ Bio::PROSITE,
1136
+ sptr = RuleRegexp2[ 'Bio::SPTR',
1137
+ /^ID .+\; *PRT\;/,
1138
+ /^ID [-A-Za-z0-9_\.]+ .+\; *[0-9]+ *AA\./ ],
1139
+ prosite = RuleRegexp[ 'Bio::PROSITE',
1053
1140
  /^ID [-A-Za-z0-9_\.]+\; (PATTERN|RULE|MATRIX)\.$/ ],
1054
- transfac = RuleRegexp[ Bio::TRANSFAC,
1141
+ transfac = RuleRegexp[ 'Bio::TRANSFAC',
1055
1142
  /^AC [-A-Za-z0-9_\.]+$/ ],
1056
1143
 
1057
- aaindex = RuleProc.new(Bio::AAindex1, Bio::AAindex2) do |text|
1144
+ aaindex = RuleProc.new('Bio::AAindex1', 'Bio::AAindex2') do |text|
1058
1145
  if /^H [-A-Z0-9_\.]+$/ =~ text then
1059
1146
  if text =~ /^M [rc]/ then
1060
1147
  Bio::AAindex2
@@ -1068,33 +1155,35 @@ module Bio
1068
1155
  end
1069
1156
  end,
1070
1157
 
1071
- litdb = RuleRegexp[ Bio::LITDB,
1158
+ litdb = RuleRegexp[ 'Bio::LITDB',
1072
1159
  /^CODE [0-9]+$/ ],
1073
- brite = RuleRegexp[ Bio::KEGG::BRITE,
1160
+ brite = RuleRegexp[ 'Bio::KEGG::BRITE',
1074
1161
  /^Entry [A-Z0-9]+/ ],
1075
- ko = RuleRegexp[ Bio::KEGG::KO,
1162
+ orthology = RuleRegexp[ 'Bio::KEGG::ORTHOLOGY',
1076
1163
  /^ENTRY .+ KO\s*/ ],
1077
- glycan = RuleRegexp[ Bio::KEGG::GLYCAN,
1164
+ drug = RuleRegexp[ 'Bio::KEGG::DRUG',
1165
+ /^ENTRY .+ Drug\s*/ ],
1166
+ glycan = RuleRegexp[ 'Bio::KEGG::GLYCAN',
1078
1167
  /^ENTRY .+ Glycan\s*/ ],
1079
- enzyme = RuleRegexp2[ Bio::KEGG::ENZYME,
1168
+ enzyme = RuleRegexp2[ 'Bio::KEGG::ENZYME',
1080
1169
  /^ENTRY EC [0-9\.]+$/,
1081
1170
  /^ENTRY .+ Enzyme\s*/
1082
1171
  ],
1083
- compound = RuleRegexp2[ Bio::KEGG::COMPOUND,
1172
+ compound = RuleRegexp2[ 'Bio::KEGG::COMPOUND',
1084
1173
  /^ENTRY C[A-Za-z0-9\._]+$/,
1085
1174
  /^ENTRY .+ Compound\s*/
1086
1175
  ],
1087
- reaction = RuleRegexp2[ Bio::KEGG::REACTION,
1176
+ reaction = RuleRegexp2[ 'Bio::KEGG::REACTION',
1088
1177
  /^ENTRY R[A-Za-z0-9\._]+$/,
1089
1178
  /^ENTRY .+ Reaction\s*/
1090
1179
  ],
1091
- genes = RuleRegexp[ Bio::KEGG::GENES,
1092
- /^ENTRY .+ (CDS|gene|.*RNA) / ],
1093
- genome = RuleRegexp[ Bio::KEGG::GENOME,
1180
+ genes = RuleRegexp[ 'Bio::KEGG::GENES',
1181
+ /^ENTRY .+ (CDS|gene|.*RNA|Contig) / ],
1182
+ genome = RuleRegexp[ 'Bio::KEGG::GENOME',
1094
1183
  /^ENTRY [a-z]+$/ ],
1095
1184
 
1096
- fantom = RuleProc.new(Bio::FANTOM::MaXML::Cluster,
1097
- Bio::FANTOM::MaXML::Sequence) do |text|
1185
+ fantom = RuleProc.new('Bio::FANTOM::MaXML::Cluster',
1186
+ 'Bio::FANTOM::MaXML::Sequence') do |text|
1098
1187
  if /\<\!DOCTYPE\s+maxml\-(sequences|clusters)\s+SYSTEM/ =~ text
1099
1188
  case $1
1100
1189
  when 'clusters'
@@ -1109,37 +1198,44 @@ module Bio
1109
1198
  end
1110
1199
  end,
1111
1200
 
1112
- pdb = RuleRegexp[ Bio::PDB,
1201
+ pdb = RuleRegexp[ 'Bio::PDB',
1113
1202
  /^HEADER .{40}\d\d\-[A-Z]{3}\-\d\d [0-9A-Z]{4}/ ],
1114
- het = RuleRegexp[ Bio::PDB::ChemicalComponent,
1203
+ het = RuleRegexp[ 'Bio::PDB::ChemicalComponent',
1115
1204
  /^RESIDUE +.+ +\d+\s*$/ ],
1116
1205
 
1117
- clustal = RuleRegexp[ Bio::ClustalW::Report,
1118
- /^CLUSTAL .*\(.*\).*sequence +alignment/ ],
1206
+ clustal = RuleRegexp2[ 'Bio::ClustalW::Report',
1207
+ /^CLUSTAL .*\(.*\).*sequence +alignment/,
1208
+ /^CLUSTAL FORMAT for T-COFFEE/ ],
1209
+
1210
+ gcg_msf = RuleRegexp[ 'Bio::GCG::Msf',
1211
+ /^!!(N|A)A_MULTIPLE_ALIGNMENT .+/ ],
1212
+
1213
+ gcg_seq = RuleRegexp[ 'Bio::GCG::Seq',
1214
+ /^!!(N|A)A_SEQUENCE .+/ ],
1119
1215
 
1120
- blastxml = RuleRegexp[ Bio::Blast::Report,
1216
+ blastxml = RuleRegexp[ 'Bio::Blast::Report',
1121
1217
  /\<\!DOCTYPE BlastOutput PUBLIC / ],
1122
- wublast = RuleRegexp[ Bio::Blast::WU::Report,
1218
+ wublast = RuleRegexp[ 'Bio::Blast::WU::Report',
1123
1219
  /^BLAST.? +[\-\.\w]+\-WashU +\[[\-\.\w ]+\]/ ],
1124
- wutblast = RuleRegexp[ Bio::Blast::WU::Report_TBlast,
1220
+ wutblast = RuleRegexp[ 'Bio::Blast::WU::Report_TBlast',
1125
1221
  /^TBLAST.? +[\-\.\w]+\-WashU +\[[\-\.\w ]+\]/ ],
1126
- blast = RuleRegexp[ Bio::Blast::Default::Report,
1222
+ blast = RuleRegexp[ 'Bio::Blast::Default::Report',
1127
1223
  /^BLAST.? +[\-\.\w]+ +\[[\-\.\w ]+\]/ ],
1128
- tblast = RuleRegexp[ Bio::Blast::Default::Report_TBlast,
1224
+ tblast = RuleRegexp[ 'Bio::Blast::Default::Report_TBlast',
1129
1225
  /^TBLAST.? +[\-\.\w]+ +\[[\-\.\w ]+\]/ ],
1130
1226
 
1131
- blat = RuleRegexp[ Bio::Blat::Report,
1132
- /^psLayout version \d+\s*$/ ],
1133
- spidey = RuleRegexp[ Bio::Spidey::Report,
1227
+ blat = RuleRegexp[ 'Bio::Blat::Report',
1228
+ /^psLayout version \d+/ ],
1229
+ spidey = RuleRegexp[ 'Bio::Spidey::Report',
1134
1230
  /^\-\-SPIDEY version .+\-\-$/ ],
1135
- hmmer = RuleRegexp[ Bio::HMMER::Report,
1231
+ hmmer = RuleRegexp[ 'Bio::HMMER::Report',
1136
1232
  /^HMMER +\d+\./ ],
1137
- sim4 = RuleRegexp[ Bio::Sim4::Report,
1233
+ sim4 = RuleRegexp[ 'Bio::Sim4::Report',
1138
1234
  /^seq1 \= .*\, \d+ bp(\r|\r?\n)seq2 \= .*\, \d+ bp(\r|\r?\n)/ ],
1139
1235
 
1140
- fastaformat = RuleProc.new(Bio::FastaFormat,
1141
- Bio::NBRF,
1142
- Bio::FastaNumericFormat) do |text|
1236
+ fastaformat = RuleProc.new('Bio::FastaFormat',
1237
+ 'Bio::NBRF',
1238
+ 'Bio::FastaNumericFormat') do |text|
1143
1239
  if /^>.+$/ =~ text
1144
1240
  case text
1145
1241
  when /^>([PF]1|[DR][LC]|N[13]|XX)\;.+/
@@ -1167,8 +1263,9 @@ module Bio
1167
1263
  # KEGG
1168
1264
  #aaindex.is_prior_to litdb
1169
1265
  #litdb.is_prior_to brite
1170
- brite.is_prior_to ko
1171
- ko.is_prior_to glycan
1266
+ brite.is_prior_to orthology
1267
+ orthology.is_prior_to drug
1268
+ drug.is_prior_to glycan
1172
1269
  glycan.is_prior_to enzyme
1173
1270
  enzyme.is_prior_to compound
1174
1271
  compound.is_prior_to reaction