bio 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (340) hide show
  1. data/bin/bioruby +14 -122
  2. data/bin/br_biofetch.rb +2 -2
  3. data/bin/br_bioflat.rb +2 -2
  4. data/bin/br_biogetseq.rb +2 -2
  5. data/bin/br_pmfetch.rb +3 -3
  6. data/doc/Changes-0.7.rd +77 -0
  7. data/doc/KEGG_API.rd +523 -232
  8. data/doc/KEGG_API.rd.ja +529 -207
  9. data/doc/Tutorial.rd +48 -11
  10. data/lib/bio.rb +59 -6
  11. data/lib/bio/alignment.rb +713 -103
  12. data/lib/bio/appl/bl2seq/report.rb +2 -18
  13. data/lib/bio/appl/blast.rb +108 -91
  14. data/lib/bio/appl/blast/format0.rb +33 -18
  15. data/lib/bio/appl/blast/format8.rb +6 -20
  16. data/lib/bio/appl/blast/report.rb +293 -429
  17. data/lib/bio/appl/blast/rexml.rb +8 -22
  18. data/lib/bio/appl/blast/wublast.rb +21 -12
  19. data/lib/bio/appl/blast/xmlparser.rb +180 -183
  20. data/lib/bio/appl/blat/report.rb +127 -30
  21. data/lib/bio/appl/clustalw.rb +87 -59
  22. data/lib/bio/appl/clustalw/report.rb +20 -22
  23. data/lib/bio/appl/emboss.rb +113 -20
  24. data/lib/bio/appl/fasta.rb +173 -198
  25. data/lib/bio/appl/fasta/format10.rb +244 -347
  26. data/lib/bio/appl/gcg/msf.rb +212 -0
  27. data/lib/bio/appl/gcg/seq.rb +195 -0
  28. data/lib/bio/appl/genscan/report.rb +5 -23
  29. data/lib/bio/appl/hmmer.rb +8 -45
  30. data/lib/bio/appl/hmmer/report.rb +2 -20
  31. data/lib/bio/appl/iprscan/report.rb +374 -0
  32. data/lib/bio/appl/mafft.rb +87 -50
  33. data/lib/bio/appl/mafft/report.rb +151 -44
  34. data/lib/bio/appl/muscle.rb +52 -0
  35. data/lib/bio/appl/phylip/alignment.rb +129 -0
  36. data/lib/bio/appl/phylip/distance_matrix.rb +96 -0
  37. data/lib/bio/appl/probcons.rb +41 -0
  38. data/lib/bio/appl/psort.rb +89 -96
  39. data/lib/bio/appl/psort/report.rb +6 -22
  40. data/lib/bio/appl/pts1.rb +263 -0
  41. data/lib/bio/appl/sim4.rb +26 -36
  42. data/lib/bio/appl/sim4/report.rb +2 -18
  43. data/lib/bio/appl/sosui/report.rb +5 -20
  44. data/lib/bio/appl/spidey/report.rb +2 -2
  45. data/lib/bio/appl/targetp/report.rb +4 -20
  46. data/lib/bio/appl/tcoffee.rb +55 -0
  47. data/lib/bio/appl/tmhmm/report.rb +4 -20
  48. data/lib/bio/command.rb +235 -64
  49. data/lib/bio/data/aa.rb +21 -26
  50. data/lib/bio/data/codontable.rb +2 -20
  51. data/lib/bio/data/na.rb +19 -4
  52. data/lib/bio/db.rb +27 -12
  53. data/lib/bio/db/aaindex.rb +2 -20
  54. data/lib/bio/db/embl/common.rb +4 -21
  55. data/lib/bio/db/embl/embl.rb +33 -85
  56. data/lib/bio/db/embl/sptr.rb +612 -302
  57. data/lib/bio/db/embl/swissprot.rb +10 -29
  58. data/lib/bio/db/embl/trembl.rb +10 -29
  59. data/lib/bio/db/embl/uniprot.rb +10 -29
  60. data/lib/bio/db/fantom.rb +15 -20
  61. data/lib/bio/db/fasta.rb +3 -3
  62. data/lib/bio/db/genbank/common.rb +37 -46
  63. data/lib/bio/db/genbank/ddbj.rb +6 -18
  64. data/lib/bio/db/genbank/genbank.rb +47 -186
  65. data/lib/bio/db/genbank/genpept.rb +4 -17
  66. data/lib/bio/db/genbank/refseq.rb +4 -17
  67. data/lib/bio/db/gff.rb +103 -35
  68. data/lib/bio/db/go.rb +4 -20
  69. data/lib/bio/db/kegg/brite.rb +26 -36
  70. data/lib/bio/db/kegg/compound.rb +81 -85
  71. data/lib/bio/db/kegg/drug.rb +98 -0
  72. data/lib/bio/db/kegg/enzyme.rb +133 -110
  73. data/lib/bio/db/kegg/expression.rb +2 -20
  74. data/lib/bio/db/kegg/genes.rb +208 -238
  75. data/lib/bio/db/kegg/genome.rb +164 -285
  76. data/lib/bio/db/kegg/glycan.rb +114 -157
  77. data/lib/bio/db/kegg/keggtab.rb +242 -303
  78. data/lib/bio/db/kegg/kgml.rb +117 -160
  79. data/lib/bio/db/kegg/orthology.rb +112 -0
  80. data/lib/bio/db/kegg/reaction.rb +54 -69
  81. data/lib/bio/db/kegg/taxonomy.rb +331 -0
  82. data/lib/bio/db/lasergene.rb +209 -0
  83. data/lib/bio/db/litdb.rb +3 -27
  84. data/lib/bio/db/medline.rb +228 -249
  85. data/lib/bio/db/nbrf.rb +3 -3
  86. data/lib/bio/db/newick.rb +510 -0
  87. data/lib/bio/db/nexus.rb +1854 -0
  88. data/lib/bio/db/pdb.rb +5 -17
  89. data/lib/bio/db/pdb/atom.rb +2 -18
  90. data/lib/bio/db/pdb/chain.rb +2 -18
  91. data/lib/bio/db/pdb/chemicalcomponent.rb +2 -18
  92. data/lib/bio/db/pdb/model.rb +2 -18
  93. data/lib/bio/db/pdb/pdb.rb +73 -34
  94. data/lib/bio/db/pdb/residue.rb +4 -20
  95. data/lib/bio/db/pdb/utils.rb +2 -18
  96. data/lib/bio/db/prosite.rb +403 -422
  97. data/lib/bio/db/rebase.rb +84 -40
  98. data/lib/bio/db/soft.rb +404 -0
  99. data/lib/bio/db/transfac.rb +5 -17
  100. data/lib/bio/feature.rb +106 -52
  101. data/lib/bio/io/das.rb +32 -42
  102. data/lib/bio/io/dbget.rb +2 -20
  103. data/lib/bio/io/ddbjxml.rb +77 -138
  104. data/lib/bio/io/ebisoap.rb +158 -0
  105. data/lib/bio/io/ensembl.rb +229 -0
  106. data/lib/bio/io/fastacmd.rb +89 -82
  107. data/lib/bio/io/fetch.rb +163 -96
  108. data/lib/bio/io/flatfile.rb +170 -73
  109. data/lib/bio/io/flatfile/bdb.rb +3 -16
  110. data/lib/bio/io/flatfile/index.rb +2 -2
  111. data/lib/bio/io/flatfile/indexer.rb +3 -2
  112. data/lib/bio/io/higet.rb +12 -31
  113. data/lib/bio/io/keggapi.rb +210 -269
  114. data/lib/bio/io/ncbisoap.rb +155 -0
  115. data/lib/bio/io/pubmed.rb +169 -147
  116. data/lib/bio/io/registry.rb +4 -20
  117. data/lib/bio/io/soapwsdl.rb +43 -38
  118. data/lib/bio/io/sql.rb +242 -305
  119. data/lib/bio/location.rb +407 -285
  120. data/lib/bio/map.rb +410 -0
  121. data/lib/bio/pathway.rb +558 -695
  122. data/lib/bio/reference.rb +272 -75
  123. data/lib/bio/sequence.rb +255 -13
  124. data/lib/bio/sequence/aa.rb +71 -10
  125. data/lib/bio/sequence/common.rb +187 -33
  126. data/lib/bio/sequence/compat.rb +59 -4
  127. data/lib/bio/sequence/format.rb +54 -7
  128. data/lib/bio/sequence/generic.rb +3 -3
  129. data/lib/bio/sequence/na.rb +328 -26
  130. data/lib/bio/shell.rb +11 -4
  131. data/lib/bio/shell/core.rb +221 -160
  132. data/lib/bio/shell/demo.rb +18 -15
  133. data/lib/bio/shell/interface.rb +14 -12
  134. data/lib/bio/shell/irb.rb +95 -0
  135. data/lib/bio/shell/object.rb +45 -26
  136. data/lib/bio/shell/plugin/blast.rb +42 -0
  137. data/lib/bio/shell/plugin/codon.rb +22 -14
  138. data/lib/bio/shell/plugin/das.rb +58 -0
  139. data/lib/bio/shell/plugin/emboss.rb +2 -2
  140. data/lib/bio/shell/plugin/entry.rb +22 -11
  141. data/lib/bio/shell/plugin/flatfile.rb +2 -2
  142. data/lib/bio/shell/plugin/keggapi.rb +13 -6
  143. data/lib/bio/shell/plugin/midi.rb +4 -4
  144. data/lib/bio/shell/plugin/obda.rb +2 -2
  145. data/lib/bio/shell/plugin/psort.rb +56 -0
  146. data/lib/bio/shell/plugin/seq.rb +35 -8
  147. data/lib/bio/shell/plugin/soap.rb +87 -0
  148. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/bioruby_generator.rb +29 -0
  149. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_classes.rhtml +4 -0
  150. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_log.rhtml +27 -0
  151. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_methods.rhtml +11 -0
  152. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_modules.rhtml +4 -0
  153. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_variables.rhtml +7 -0
  154. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-bg.gif +0 -0
  155. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-console.png +0 -0
  156. data/lib/bio/shell/rails/{public/images/icon.png → vendor/plugins/generators/bioruby/templates/bioruby-gem.png} +0 -0
  157. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-link.gif +0 -0
  158. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby.css +369 -0
  159. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby.rhtml +47 -0
  160. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby_controller.rb +144 -0
  161. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby_helper.rb +47 -0
  162. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/commands.rhtml +8 -0
  163. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/history.rhtml +10 -0
  164. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/index.rhtml +22 -0
  165. data/lib/bio/shell/script.rb +25 -0
  166. data/lib/bio/shell/setup.rb +109 -0
  167. data/lib/bio/shell/web.rb +70 -58
  168. data/lib/bio/tree.rb +850 -0
  169. data/lib/bio/util/color_scheme.rb +84 -107
  170. data/lib/bio/util/color_scheme/buried.rb +5 -24
  171. data/lib/bio/util/color_scheme/helix.rb +5 -24
  172. data/lib/bio/util/color_scheme/hydropathy.rb +5 -24
  173. data/lib/bio/util/color_scheme/nucleotide.rb +5 -24
  174. data/lib/bio/util/color_scheme/strand.rb +5 -24
  175. data/lib/bio/util/color_scheme/taylor.rb +5 -24
  176. data/lib/bio/util/color_scheme/turn.rb +5 -24
  177. data/lib/bio/util/color_scheme/zappo.rb +5 -24
  178. data/lib/bio/util/contingency_table.rb +70 -43
  179. data/lib/bio/util/restriction_enzyme.rb +228 -0
  180. data/lib/bio/util/restriction_enzyme/analysis.rb +249 -0
  181. data/lib/bio/util/restriction_enzyme/analysis_basic.rb +217 -0
  182. data/lib/bio/util/restriction_enzyme/cut_symbol.rb +107 -0
  183. data/lib/bio/util/restriction_enzyme/double_stranded.rb +321 -0
  184. data/lib/bio/util/restriction_enzyme/double_stranded/aligned_strands.rb +130 -0
  185. data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair.rb +103 -0
  186. data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair_in_enzyme_notation.rb +38 -0
  187. data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations.rb +76 -0
  188. data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations_in_enzyme_notation.rb +107 -0
  189. data/lib/bio/util/restriction_enzyme/enzymes.yaml +7061 -0
  190. data/lib/bio/util/restriction_enzyme/range/cut_range.rb +24 -0
  191. data/lib/bio/util/restriction_enzyme/range/cut_ranges.rb +47 -0
  192. data/lib/bio/util/restriction_enzyme/range/horizontal_cut_range.rb +67 -0
  193. data/lib/bio/util/restriction_enzyme/range/sequence_range.rb +257 -0
  194. data/lib/bio/util/restriction_enzyme/range/sequence_range/calculated_cuts.rb +242 -0
  195. data/lib/bio/util/restriction_enzyme/range/sequence_range/fragment.rb +51 -0
  196. data/lib/bio/util/restriction_enzyme/range/sequence_range/fragments.rb +41 -0
  197. data/lib/bio/util/restriction_enzyme/range/vertical_cut_range.rb +77 -0
  198. data/lib/bio/util/restriction_enzyme/single_strand.rb +199 -0
  199. data/lib/bio/util/restriction_enzyme/single_strand/cut_locations_in_enzyme_notation.rb +135 -0
  200. data/lib/bio/util/restriction_enzyme/single_strand_complement.rb +23 -0
  201. data/lib/bio/util/restriction_enzyme/string_formatting.rb +111 -0
  202. data/lib/bio/util/sirna.rb +4 -22
  203. data/sample/color_scheme_na.rb +4 -12
  204. data/sample/enzymes.rb +78 -0
  205. data/sample/goslim.rb +5 -13
  206. data/sample/psortplot_html.rb +4 -12
  207. data/test/data/blast/2.2.15.blastp.m7 +876 -0
  208. data/test/data/embl/AB090716.embl.rel89 +63 -0
  209. data/test/data/fasta/example1.txt +75 -0
  210. data/test/data/fasta/example2.txt +21 -0
  211. data/test/data/iprscan/merged.raw +32 -0
  212. data/test/data/iprscan/merged.txt +74 -0
  213. data/test/data/soft/GDS100_partial.soft +92 -0
  214. data/test/data/soft/GSE3457_family_partial.soft +874 -0
  215. data/test/functional/bio/io/test_ensembl.rb +103 -0
  216. data/test/functional/bio/io/test_soapwsdl.rb +5 -17
  217. data/test/unit/bio/appl/bl2seq/test_report.rb +2 -2
  218. data/test/unit/bio/appl/blast/test_report.rb +3 -16
  219. data/test/unit/bio/appl/blast/test_xmlparser.rb +4 -16
  220. data/test/unit/bio/appl/genscan/test_report.rb +3 -16
  221. data/test/unit/bio/appl/hmmer/test_report.rb +3 -16
  222. data/test/unit/bio/appl/iprscan/test_report.rb +338 -0
  223. data/test/unit/bio/appl/mafft/test_report.rb +63 -0
  224. data/test/unit/bio/appl/sosui/test_report.rb +3 -16
  225. data/test/unit/bio/appl/targetp/test_report.rb +3 -16
  226. data/test/unit/bio/appl/test_blast.rb +3 -16
  227. data/test/unit/bio/appl/test_fasta.rb +4 -16
  228. data/test/unit/bio/appl/test_pts1.rb +140 -0
  229. data/test/unit/bio/appl/tmhmm/test_report.rb +3 -16
  230. data/test/unit/bio/data/test_aa.rb +4 -17
  231. data/test/unit/bio/data/test_codontable.rb +3 -16
  232. data/test/unit/bio/data/test_na.rb +3 -3
  233. data/test/unit/bio/db/embl/test_common.rb +3 -16
  234. data/test/unit/bio/db/embl/test_embl.rb +3 -16
  235. data/test/unit/bio/db/embl/test_embl_rel89.rb +219 -0
  236. data/test/unit/bio/db/embl/test_sptr.rb +1548 -41
  237. data/test/unit/bio/db/embl/test_uniprot.rb +3 -16
  238. data/test/unit/bio/db/kegg/test_genes.rb +3 -16
  239. data/test/unit/bio/db/pdb/test_pdb.rb +7 -24
  240. data/test/unit/bio/db/test_aaindex.rb +2 -2
  241. data/test/unit/bio/db/test_fasta.rb +3 -16
  242. data/test/unit/bio/db/test_gff.rb +3 -16
  243. data/test/unit/bio/db/test_lasergene.rb +95 -0
  244. data/test/unit/bio/db/test_newick.rb +56 -0
  245. data/test/unit/bio/db/test_nexus.rb +360 -0
  246. data/test/unit/bio/db/test_prosite.rb +5 -18
  247. data/test/unit/bio/db/test_rebase.rb +11 -25
  248. data/test/unit/bio/db/test_soft.rb +138 -0
  249. data/test/unit/bio/io/test_ddbjxml.rb +5 -17
  250. data/test/unit/bio/io/test_ensembl.rb +109 -0
  251. data/test/unit/bio/io/test_fastacmd.rb +3 -16
  252. data/test/unit/bio/io/test_flatfile.rb +237 -0
  253. data/test/unit/bio/io/test_soapwsdl.rb +4 -17
  254. data/test/unit/bio/sequence/test_aa.rb +3 -3
  255. data/test/unit/bio/sequence/test_common.rb +3 -16
  256. data/test/unit/bio/sequence/test_compat.rb +3 -16
  257. data/test/unit/bio/sequence/test_na.rb +29 -3
  258. data/test/unit/bio/shell/plugin/test_seq.rb +8 -8
  259. data/test/unit/bio/test_alignment.rb +16 -27
  260. data/test/unit/bio/test_command.rb +242 -25
  261. data/test/unit/bio/test_db.rb +3 -16
  262. data/test/unit/bio/test_feature.rb +4 -16
  263. data/test/unit/bio/test_location.rb +4 -16
  264. data/test/unit/bio/test_map.rb +230 -0
  265. data/test/unit/bio/test_pathway.rb +4 -16
  266. data/test/unit/bio/test_reference.rb +2 -2
  267. data/test/unit/bio/test_sequence.rb +7 -19
  268. data/test/unit/bio/test_shell.rb +3 -16
  269. data/test/unit/bio/test_tree.rb +593 -0
  270. data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +299 -0
  271. data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +103 -0
  272. data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +240 -0
  273. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +100 -0
  274. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +75 -0
  275. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +73 -0
  276. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +53 -0
  277. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +104 -0
  278. data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +83 -0
  279. data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +246 -0
  280. data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +44 -0
  281. data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +115 -0
  282. data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +147 -0
  283. data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +147 -0
  284. data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +60 -0
  285. data/test/unit/bio/util/test_color_scheme.rb +6 -18
  286. data/test/unit/bio/util/test_contingency_table.rb +6 -18
  287. data/test/unit/bio/util/test_restriction_enzyme.rb +42 -0
  288. data/test/unit/bio/util/test_sirna.rb +3 -16
  289. metadata +228 -169
  290. data/doc/BioRuby.rd.ja +0 -225
  291. data/doc/Design.rd.ja +0 -341
  292. data/doc/TODO.rd.ja +0 -138
  293. data/lib/bio/appl/fasta/format6.rb +0 -37
  294. data/lib/bio/db/kegg/cell.rb +0 -88
  295. data/lib/bio/db/kegg/ko.rb +0 -178
  296. data/lib/bio/shell/rails/Rakefile +0 -10
  297. data/lib/bio/shell/rails/app/controllers/application.rb +0 -4
  298. data/lib/bio/shell/rails/app/controllers/shell_controller.rb +0 -94
  299. data/lib/bio/shell/rails/app/helpers/application_helper.rb +0 -3
  300. data/lib/bio/shell/rails/app/models/shell_connection.rb +0 -30
  301. data/lib/bio/shell/rails/app/views/layouts/shell.rhtml +0 -37
  302. data/lib/bio/shell/rails/app/views/shell/history.rhtml +0 -5
  303. data/lib/bio/shell/rails/app/views/shell/index.rhtml +0 -2
  304. data/lib/bio/shell/rails/app/views/shell/show.rhtml +0 -13
  305. data/lib/bio/shell/rails/config/boot.rb +0 -19
  306. data/lib/bio/shell/rails/config/database.yml +0 -85
  307. data/lib/bio/shell/rails/config/environment.rb +0 -53
  308. data/lib/bio/shell/rails/config/environments/development.rb +0 -19
  309. data/lib/bio/shell/rails/config/environments/production.rb +0 -19
  310. data/lib/bio/shell/rails/config/environments/test.rb +0 -19
  311. data/lib/bio/shell/rails/config/routes.rb +0 -19
  312. data/lib/bio/shell/rails/doc/README_FOR_APP +0 -2
  313. data/lib/bio/shell/rails/public/404.html +0 -8
  314. data/lib/bio/shell/rails/public/500.html +0 -8
  315. data/lib/bio/shell/rails/public/dispatch.cgi +0 -10
  316. data/lib/bio/shell/rails/public/dispatch.fcgi +0 -24
  317. data/lib/bio/shell/rails/public/dispatch.rb +0 -10
  318. data/lib/bio/shell/rails/public/favicon.ico +0 -0
  319. data/lib/bio/shell/rails/public/images/rails.png +0 -0
  320. data/lib/bio/shell/rails/public/index.html +0 -277
  321. data/lib/bio/shell/rails/public/javascripts/controls.js +0 -750
  322. data/lib/bio/shell/rails/public/javascripts/dragdrop.js +0 -584
  323. data/lib/bio/shell/rails/public/javascripts/effects.js +0 -854
  324. data/lib/bio/shell/rails/public/javascripts/prototype.js +0 -1785
  325. data/lib/bio/shell/rails/public/robots.txt +0 -1
  326. data/lib/bio/shell/rails/public/stylesheets/main.css +0 -187
  327. data/lib/bio/shell/rails/script/about +0 -3
  328. data/lib/bio/shell/rails/script/breakpointer +0 -3
  329. data/lib/bio/shell/rails/script/console +0 -3
  330. data/lib/bio/shell/rails/script/destroy +0 -3
  331. data/lib/bio/shell/rails/script/generate +0 -3
  332. data/lib/bio/shell/rails/script/performance/benchmarker +0 -3
  333. data/lib/bio/shell/rails/script/performance/profiler +0 -3
  334. data/lib/bio/shell/rails/script/plugin +0 -3
  335. data/lib/bio/shell/rails/script/process/reaper +0 -3
  336. data/lib/bio/shell/rails/script/process/spawner +0 -3
  337. data/lib/bio/shell/rails/script/process/spinner +0 -3
  338. data/lib/bio/shell/rails/script/runner +0 -3
  339. data/lib/bio/shell/rails/script/server +0 -42
  340. data/lib/bio/shell/rails/test/test_helper.rb +0 -28
@@ -1,10 +1,10 @@
1
1
  #
2
2
  # = bio/db/embl/sptr.rb - UniProt/SwissProt and TrEMBL database class
3
3
  #
4
- # Copyright:: Copyright (C) 2001-2005 Mitsuteru C. Nakao <n@bioruby.org>
5
- # License:: LGPL
4
+ # Copyright:: Copyright (C) 2001-2006 Mitsuteru C. Nakao <n@bioruby.org>
5
+ # License:: The Ruby License
6
6
  #
7
- # $Id: sptr.rb,v 1.30 2006/01/28 06:40:38 nakao Exp $
7
+ # $Id: sptr.rb,v 1.36 2007/04/05 23:35:40 trevor Exp $
8
8
  #
9
9
  # == Description
10
10
  #
@@ -31,24 +31,7 @@
31
31
  # * The UniProtKB/SwissProt/TrEMBL User Manual
32
32
  # http://www.expasy.org/sprot/userman.html
33
33
  #
34
- #--
35
- #
36
- # This library is free software; you can redistribute it and/or
37
- # modify it under the terms of the GNU Lesser General Public
38
- # License as published by the Free Software Foundation; either
39
- # version 2 of the License, or (at your option) any later version.
40
- #
41
- # This library is distributed in the hope that it will be useful,
42
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
43
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
44
- # Lesser General Public License for more details.
45
- #
46
- # You should have received a copy of the GNU Lesser General Public
47
- # License along with this library; if not, write to the Free Software
48
- # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
49
- #
50
- #++
51
- #
34
+
52
35
 
53
36
  require 'bio/db'
54
37
  require 'bio/db/embl/common'
@@ -62,7 +45,6 @@ class SPTR < EMBLDB
62
45
  @@entry_regrexp = /[A-Z0-9]{1,4}_[A-Z0-9]{1,5}/
63
46
  @@data_class = ["STANDARD", "PRELIMINARY"]
64
47
 
65
-
66
48
  # returns a Hash of the ID line.
67
49
  #
68
50
  # returns a content (Int or String) of the ID line by a given key.
@@ -73,30 +55,25 @@ class SPTR < EMBLDB
73
55
  # #"ID #{ENTRY_NAME} #{DATA_CLASS}; #{MOLECULE_TYPE}; #{SEQUENCE_LENGTH}."
74
56
  #
75
57
  # === Examples
76
- # obj.id_line #=> {"ENTRY_NAME"=>"P53_HUMAN", "DATA_CLASS"=>"STANDARD", "SEQUENCE_LENGTH"=>393, "MOLECULE_TYPE"=>"PRT"}
58
+ # obj.id_line #=> {"ENTRY_NAME"=>"P53_HUMAN", "DATA_CLASS"=>"STANDARD",
59
+ # "SEQUENCE_LENGTH"=>393, "MOLECULE_TYPE"=>"PRT"}
77
60
  #
78
61
  # obj.id_line('ENTRY_NAME') #=> "P53_HUMAN"
79
62
  #
80
63
  def id_line(key = nil)
81
- unless @data['ID']
82
- tmp = Hash.new
83
- a = @orig['ID'].split(/ +/)
84
- tmp['ENTRY_NAME'] = a[1]
85
- tmp['DATA_CLASS'] = a[2].sub(/;/,'')
86
- tmp['MOLECULE_TYPE'] = a[3].sub(/;/,'')
87
- tmp['SEQUENCE_LENGTH'] = a[4].to_i
88
- @data['ID'] = tmp
89
- end
90
-
91
- if key
92
- @data['ID'][key] # String/Int
93
- else
94
- @data['ID'] # Hash
95
- end
64
+ return id_line[key] if key
65
+ return @data['ID'] if @data['ID']
66
+
67
+ part = @orig['ID'].split(/ +/)
68
+ @data['ID'] = {
69
+ 'ENTRY_NAME' => part[1],
70
+ 'DATA_CLASS' => part[2].sub(/;/,''),
71
+ 'MOLECULE_TYPE' => part[3].sub(/;/,''),
72
+ 'SEQUENCE_LENGTH' => part[4].to_i
73
+ }
96
74
  end
97
75
 
98
76
 
99
-
100
77
  # returns a ENTRY_NAME in the ID line.
101
78
  #
102
79
  def entry_id
@@ -144,20 +121,15 @@ class SPTR < EMBLDB
144
121
  # DT DD-MMM-YYY (rel. NN, Last sequence update)
145
122
  # DT DD-MMM-YYY (rel. NN, Last annotation update)
146
123
  def dt(key = nil)
147
- unless @data['DT']
148
- tmp = Hash.new
149
- a = self.get('DT').split(/\n/)
150
- tmp['created'] = a[0].sub(/\w{2} /,'').strip
151
- tmp['sequence'] = a[1].sub(/\w{2} /,'').strip
152
- tmp['annotation'] = a[2].sub(/\w{2} /,'').strip
153
- @data['DT'] = tmp
154
- end
155
-
156
- if key
157
- @data['DT'][key]
158
- else
159
- @data['DT']
160
- end
124
+ return dt[key] if key
125
+ return @data['DT'] if @data['DT']
126
+
127
+ part = self.get('DT').split(/\n/)
128
+ @data['DT'] = {
129
+ 'created' => part[0].sub(/\w{2} /,'').strip,
130
+ 'sequence' => part[1].sub(/\w{2} /,'').strip,
131
+ 'annotation' => part[2].sub(/\w{2} /,'').strip
132
+ }
161
133
  end
162
134
 
163
135
 
@@ -214,16 +186,18 @@ class SPTR < EMBLDB
214
186
  #
215
187
  # === GN Line: Gene name(s) (>=0, optional)
216
188
  def gn
217
- return @data['GN'] if @data['GN']
218
-
219
- case fetch('GN')
220
- when /Name=/ then
221
- return gn_uniprot_parser
222
- else
223
- return gn_old_parser
189
+ unless @data['GN']
190
+ case fetch('GN')
191
+ when /Name=/,/ORFNames=/
192
+ @data['GN'] = gn_uniprot_parser
193
+ else
194
+ @data['GN'] = gn_old_parser
195
+ end
224
196
  end
197
+ @data['GN']
225
198
  end
226
199
 
200
+
227
201
  # returns contents in the old style GN line.
228
202
  # === GN Line: Gene name(s) (>=0, optional)
229
203
  # GN HNS OR DRDX OR OSMZ OR BGLY.
@@ -245,7 +219,7 @@ class SPTR < EMBLDB
245
219
  }
246
220
  }
247
221
  end
248
- return @data['GN'] = names
222
+ @data['GN'] = names
249
223
  end
250
224
  private :gn_old_parser
251
225
 
@@ -318,11 +292,13 @@ class SPTR < EMBLDB
318
292
  # OS Genus species (name0) (name1).
319
293
  # OS Genus species (name0), G s0 (name0), and G s (name0) (name1).
320
294
  # OS Homo sapiens (Human), and Rarrus norveticus (Rat)
295
+ # OS Hippotis sp. Clark and Watts 825.
296
+ # OS unknown cyperaceous sp.
321
297
  def os(num = nil)
322
298
  unless @data['OS']
323
299
  os = Array.new
324
300
  fetch('OS').split(/, and|, /).each do |tmp|
325
- if tmp =~ /([A-Z][a-z]* *[\w\d \:\'\+\-]+[\w\d])/
301
+ if tmp =~ /(\w+ *[\w\d \:\'\+\-\.]+[\w\d\.])/
326
302
  org = $1
327
303
  tmp =~ /(\(.+\))/
328
304
  os.push({'name' => $1, 'os' => org})
@@ -375,17 +351,224 @@ class SPTR < EMBLDB
375
351
  return @data['OX']
376
352
  end
377
353
 
354
+ # === The OH Line;
355
+ #
356
+ # OH NCBI_TaxID=TaxID; HostName.
357
+ # http://br.expasy.org/sprot/userman.html#OH_line
358
+ def oh
359
+ unless @data['OH']
360
+ @data['OH'] = fetch('OH').split("\. ").map {|x|
361
+ if x =~ /NCBI_TaxID=(\d+);/
362
+ taxid = $1
363
+ else
364
+ raise ArgumentError, ["Error: Invalid OH line format (#{self.entry_id}):",
365
+ $!, "\n", get('OH'), "\n"].join
366
+
367
+ end
368
+ if x =~ /NCBI_TaxID=\d+; (.+)/
369
+ host_name = $1
370
+ host_name.sub!(/\.$/, '')
371
+ else
372
+ host_name = nil
373
+ end
374
+ {'NCBI_TaxID' => taxid, 'HostName' => host_name}
375
+ }
376
+ end
377
+ @data['OH']
378
+ end
379
+
380
+
378
381
 
379
382
  # Bio::EMBLDB::Common#ref -> Array
380
383
  # R Lines
381
384
  # RN RC RP RX RA RT RL
382
385
 
386
+ # returns contents in the R lines.
387
+ # * Bio::EMBLDB::Common#ref -> [ <refernece information Hash>* ]
388
+ # where <reference information Hash> is:
389
+ # {'RN' => '', 'RC' => '', 'RP' => '', 'RX' => '',
390
+ # 'RA' => '', 'RT' => '', 'RL' => '', 'RG' => ''}
391
+ #
392
+ # R Lines
393
+ # * RN RC RP RX RA RT RL RG
394
+ def ref
395
+ unless @data['R']
396
+ @data['R'] = [get('R').split(/\nRN /)].flatten.map { |str|
397
+ hash = {'RN' => '', 'RC' => '', 'RP' => '', 'RX' => '',
398
+ 'RA' => '', 'RT' => '', 'RL' => '', 'RG' => ''}
399
+ str = 'RN ' + str unless /^RN / =~ str
400
+
401
+ str.split("\n").each do |line|
402
+ if /^(R[NPXARLCTG]) (.+)/ =~ line
403
+ hash[$1] += $2 + ' '
404
+ else
405
+ raise "Invalid format in R lines, \n[#{line}]\n"
406
+ end
407
+ end
408
+
409
+ hash['RN'] = set_RN(hash['RN'])
410
+ hash['RC'] = set_RC(hash['RC'])
411
+ hash['RP'] = set_RP(hash['RP'])
412
+ hash['RX'] = set_RX(hash['RX'])
413
+ hash['RA'] = set_RA(hash['RA'])
414
+ hash['RT'] = set_RT(hash['RT'])
415
+ hash['RL'] = set_RL(hash['RL'])
416
+ hash['RG'] = set_RG(hash['RG'])
417
+
418
+ hash
419
+ }
420
+
421
+ end
422
+ @data['R']
423
+ end
424
+
425
+ def set_RN(data)
426
+ data.strip
427
+ end
428
+
429
+ def set_RC(data)
430
+ data.scan(/([STP]\w+)=(.+);/).map { |comment|
431
+ [comment[1].split(/, and |, /)].flatten.map { |text|
432
+ {'Token' => comment[0], 'Text' => text}
433
+ }
434
+ }.flatten
435
+ end
436
+ private :set_RC
437
+
438
+ def set_RP(data)
439
+ data = data.strip
440
+ data = data.sub(/\.$/, '')
441
+ data.split(/, AND |, /i).map {|x|
442
+ x = x.strip
443
+ x = x.gsub(' ', ' ')
444
+ }
445
+ end
446
+ private :set_RP
383
447
 
384
- @@cc_topics = ['ALTERNATIVE PRODUCTS','CATALYTIC ACTIVITY','CAUTION',
385
- 'COFACTOR','DATABASE','DEVELOPMENTAL STAGE','DISEASE','DOMAIN',
386
- 'ENZYME REGULATION','FUNCTION','INDUCTION','MASS SPECTROMETRY',
387
- 'MISCELLANEOUS','PATHWAY','PHARMACEUTICAL','POLYMORPHISM','PTM',
388
- 'SIMILARITY','SUBCELLULAR LOCATION','SUBUNIT','TISSUE SPECIFICITY']
448
+ def set_RX(data)
449
+ rx = {'MEDLINE' => nil, 'PubMed' => nil, 'DOI' => nil}
450
+ if data =~ /MEDLINE=(.+?);/
451
+ rx['MEDLINE'] = $1
452
+ end
453
+ if data =~ /PubMed=(.+?);/
454
+ rx['PubMed'] = $1
455
+ end
456
+ if data =~ /DOI=(.+?);/
457
+ rx['DOI'] = $1
458
+ end
459
+ rx
460
+ end
461
+ private :set_RX
462
+
463
+ def set_RA(data)
464
+ data = data.sub(/; *$/, '')
465
+ end
466
+ private :set_RA
467
+
468
+ def set_RT(data)
469
+ data = data.sub(/; *$/, '')
470
+ data = data.gsub(/(^"|"$)/, '')
471
+ end
472
+ private :set_RT
473
+
474
+ def set_RL(data)
475
+ data = data.strip
476
+ end
477
+ private :set_RL
478
+
479
+ def set_RG(data)
480
+ data = data.split('; ')
481
+ end
482
+ private :set_RG
483
+
484
+
485
+
486
+ # returns Bio::Reference object from Bio::EMBLDB::Common#ref.
487
+ # * Bio::EMBLDB::Common#ref -> Bio::References
488
+ def references
489
+ unless @data['references']
490
+ ary = self.ref.map {|ent|
491
+ hash = Hash.new('')
492
+ ent.each {|key, value|
493
+ case key
494
+ when 'RA'
495
+ hash['authors'] = value.split(/, /)
496
+ when 'RT'
497
+ hash['title'] = value
498
+ when 'RL'
499
+ if value =~ /(.*) (\d+) \((\d+)\), (\d+-\d+) \((\d+)\)$/
500
+ hash['journal'] = $1
501
+ hash['volume'] = $2
502
+ hash['issue'] = $3
503
+ hash['pages'] = $4
504
+ hash['year'] = $5
505
+ else
506
+ hash['journal'] = value
507
+ end
508
+ when 'RX' # PUBMED, MEDLINE
509
+ value.split('.').each {|item|
510
+ tag, xref = item.split(/; /).map {|i| i.strip }
511
+ hash[ tag.downcase ] = xref
512
+ }
513
+ end
514
+ }
515
+ Reference.new(hash)
516
+ }
517
+ @data['references'] = References.new(ary)
518
+ end
519
+ @data['references']
520
+ end
521
+
522
+
523
+
524
+
525
+
526
+
527
+ # === The HI line
528
+ # Bio::SPTR#hi #=> hash
529
+ def hi
530
+ unless @data['HI']
531
+ @data['HI'] = []
532
+ fetch('HI').split(/\. /).each do |hlist|
533
+ hash = {'Category' => '', 'Keywords' => [], 'Keyword' => ''}
534
+ hash['Category'], hash['Keywords'] = hlist.split(': ')
535
+ hash['Keywords'] = hash['Keywords'].split('; ')
536
+ hash['Keyword'] = hash['Keywords'].pop
537
+ hash['Keyword'].sub!(/\.$/, '')
538
+ @data['HI'] << hash
539
+ end
540
+ end
541
+ @data['HI']
542
+ end
543
+
544
+
545
+ @@cc_topics = ['PHARMACEUTICAL',
546
+ 'BIOTECHNOLOGY',
547
+ 'TOXIC DOSE',
548
+ 'ALLERGEN',
549
+ 'RNA EDITING',
550
+ 'POLYMORPHISM',
551
+ 'BIOPHYSICOCHEMICAL PROPERTIES',
552
+ 'MASS SPECTROMETRY',
553
+ 'WEB RESOURCE',
554
+ 'ENZYME REGULATION',
555
+ 'DISEASE',
556
+ 'INTERACTION',
557
+ 'DEVELOPMENTAL STAGE',
558
+ 'INDUCTION',
559
+ 'CAUTION',
560
+ 'ALTERNATIVE PRODUCTS',
561
+ 'DOMAIN',
562
+ 'PTM',
563
+ 'MISCELLANEOUS',
564
+ 'TISSUE SPECIFICITY',
565
+ 'COFACTOR',
566
+ 'PATHWAY',
567
+ 'SUBUNIT',
568
+ 'CATALYTIC ACTIVITY',
569
+ 'SUBCELLULAR LOCATION',
570
+ 'FUNCTION',
571
+ 'SIMILARITY']
389
572
  # returns contents in the CC lines.
390
573
  # * Bio::SPTR#cc -> Hash
391
574
  #
@@ -425,27 +608,44 @@ class SPTR < EMBLDB
425
608
  # CC -!- TOPIC: FIRST LINE OF A COMMENT BLOCK;
426
609
  # CC SECOND AND SUBSEQUENT LINES OF A COMMENT BLOCK.
427
610
  #
428
- def cc(tag = nil)
611
+ # See also http://www.expasy.org/sprot/userman.html#CC_line
612
+ #
613
+ def cc(topic = nil)
429
614
  unless @data['CC']
430
615
  cc = Hash.new
431
- cmt = '-' * (77 - 4 + 1)
616
+ comment_border= '-' * (77 - 4 + 1)
432
617
  dlm = /-!- /
433
618
 
434
- return cc if get('CC').size == 0 # 12KD_MYCSM has no CC lines.
619
+ # 12KD_MYCSM has no CC lines.
620
+ return cc if get('CC').size == 0
621
+
622
+ cc_raw = fetch('CC')
623
+
624
+ # Removing the copyright statement.
625
+ cc_raw.sub!(/ *---.+---/m, '')
626
+
627
+ # Not any CC Lines without the copyright statement.
628
+ return cc if cc_raw == ''
435
629
 
436
630
  begin
437
- fetch('CC').split(/#{cmt}/)[0].sub(dlm,'').split(dlm).each do |tmp|
631
+ cc_raw, copyright = cc_raw.split(/#{comment_border}/)[0]
632
+ cc_raw = cc_raw.sub(dlm,'')
633
+ cc_raw.split(dlm).each do |tmp|
634
+ tmp = tmp.strip
635
+
438
636
  if /(^[A-Z ]+[A-Z]): (.+)/ =~ tmp
439
637
  key = $1
440
- body = $2.gsub(/- (?!AND)/,'-')
638
+ body = $2
639
+ body.gsub!(/- (?!AND)/,'-')
640
+ body.strip!
441
641
  unless cc[key]
442
642
  cc[key] = [body]
443
643
  else
444
644
  cc[key].push(body)
445
645
  end
446
646
  else
447
- raise ["Error: [#{entry_id}]: CC Lines", '',
448
- tmp, '', '', fetch('CC'),''].join("\n")
647
+ raise ["Error: [#{entry_id}]: CC Lines", '"', tmp, '"',
648
+ '', get('CC'),''].join("\n")
449
649
  end
450
650
  end
451
651
  rescue NameError
@@ -461,29 +661,62 @@ class SPTR < EMBLDB
461
661
  @data['CC'] = cc
462
662
  end
463
663
 
464
- case tag
465
- when 'ALTERNATIVE PRODUCTS'
466
- ap = @data['CC']['ALTERNATIVE PRODUCTS'].to_s
467
- return ap unless ap
468
-
469
- # Event, Named isoforms, Comment, [Name, Synonyms, IsoId, Sequnce]+
470
- tmp = {'Event' => nil, 'Named isoforms' => nil, 'Comment' => nil, 'Variants' => []}
471
-
472
- if /Event=(.+?);/ =~ ap
473
- tmp['Event'] = $1
474
- end
475
- if /Named isoforms=(\S+?);/ =~ ap
476
- tmp['Named isoforms'] = $1
477
- end
478
- if /Comment=(.+?);/m =~ ap
479
- tmp['Comment'] = $1
480
- end
481
- ap.scan(/Name=.+?Sequence=.+?;/).each do |ent|
482
- tmp['Variants'] << cc_ap_variants_parse(ent)
483
- end
484
- return tmp
485
-
486
664
 
665
+ case topic
666
+ when 'ALLERGEN'
667
+ return @data['CC'][topic]
668
+ when 'ALTERNATIVE PRODUCTS'
669
+ return cc_alternative_products(@data['CC'][topic])
670
+ when 'BIOPHYSICOCHEMICAL PROPERTIES'
671
+ return cc_biophysiochemical_properties(@data['CC'][topic])
672
+ when 'BIOTECHNOLOGY'
673
+ return @data['CC'][topic]
674
+ when 'CATALITIC ACTIVITY'
675
+ return cc_catalytic_activity(@data['CC'][topic])
676
+ when 'CAUTION'
677
+ return cc_caution(@data['CC'][topic])
678
+ when 'COFACTOR'
679
+ return @data['CC'][topic]
680
+ when 'DEVELOPMENTAL STAGE'
681
+ return @data['CC'][topic].to_s
682
+ when 'DISEASE'
683
+ return @data['CC'][topic].to_s
684
+ when 'DOMAIN'
685
+ return @data['CC'][topic]
686
+ when 'ENZYME REGULATION'
687
+ return @data['CC'][topic].to_s
688
+ when 'FUNCTION'
689
+ return @data['CC'][topic].to_s
690
+ when 'INDUCTION'
691
+ return @data['CC'][topic].to_s
692
+ when 'INTERACTION'
693
+ return cc_interaction(@data['CC'][topic])
694
+ when 'MASS SPECTROMETRY'
695
+ return cc_mass_spectrometry(@data['CC'][topic])
696
+ when 'MISCELLANEOUS'
697
+ return @data['CC'][topic]
698
+ when 'PATHWAY'
699
+ return cc_pathway(@data['CC'][topic])
700
+ when 'PHARMACEUTICAL'
701
+ return @data['CC'][topic]
702
+ when 'POLYMORPHISM'
703
+ return @data['CC'][topic]
704
+ when 'PTM'
705
+ return @data['CC'][topic]
706
+ when 'RNA EDITING'
707
+ return cc_rna_editing(@data['CC'][topic])
708
+ when 'SIMILARITY'
709
+ return @data['CC'][topic]
710
+ when 'SUBCELLULAR LOCATION'
711
+ return cc_subcellular_location(@data['CC'][topic])
712
+ when 'SUBUNIT'
713
+ return @data['CC'][topic]
714
+ when 'TISSUE SPECIFICITY'
715
+ return @data['CC'][topic]
716
+ when 'TOXIC DOSE'
717
+ return @data['CC'][topic]
718
+ when 'WEB RESOURCE'
719
+ return cc_web_resource(@data['CC'][topic])
487
720
  when 'DATABASE'
488
721
  # DATABASE: NAME=Text[; NOTE=Text][; WWW="Address"][; FTP="Address"].
489
722
  tmp = Array.new
@@ -507,73 +740,208 @@ class SPTR < EMBLDB
507
740
  tmp.push(db)
508
741
  end
509
742
  return tmp
510
-
511
- when 'MASS SPECTOROMETRY'
512
- # MASS SPECTROMETRY: MW=XXX[; MW_ERR=XX][; METHOD=XX][;RANGE=XX-XX].
513
- tmp = Array.new
514
- ms = @data['CC']['MASS SPECTOROMETRY']
515
- return ms unless ms
516
-
517
- ms.each do |m|
518
- mass = {'MW'=>nil,'MW_ERR'=>nil,'METHOD'=>nil,'RANGE'=>nil}
519
- m.sub(/.$/,'').split(/;/).each do |line|
520
- case line
521
- when /MW=(.+)/
522
- mass['MW'] = $1.to_f
523
- when /MW_ERR=(.+)/
524
- mass['MW_ERR'] = $1.to_f
525
- when /METHOD="(.+)"/
526
- mass['METHOD'] = $1.to_s
527
- when /RANGE="(\d+-\d+)"/
528
- mass['RANGE'] = $1 # RANGE class ?
529
- end
530
- end
531
- tmp.push(mass)
532
- end
533
- return tmp
534
-
535
- when 'INTERACTION'
536
- return cc_interaction_parse(@data['CC']['INTERACTION'].to_s)
537
-
538
743
  when nil
539
744
  return @data['CC']
540
-
541
745
  else
542
- return @data['CC'][tag]
746
+ return @data['CC'][topic]
543
747
  end
544
748
  end
545
749
 
546
750
 
751
+ def cc_alternative_products(data)
752
+ ap = data.to_s
753
+ return ap unless ap
754
+
755
+ # Event, Named isoforms, Comment, [Name, Synonyms, IsoId, Sequnce]+
756
+ tmp = {'Event' => "", 'Named isoforms' => "", 'Comment' => "",
757
+ 'Variants' => []}
758
+ if /Event=(.+?);/ =~ ap
759
+ tmp['Event'] = $1
760
+ tmp['Event'] = tmp['Event'].sub(/;/,'').split(/, /)
761
+ end
762
+ if /Named isoforms=(\S+?);/ =~ ap
763
+ tmp['Named isoforms'] = $1
764
+ end
765
+ if /Comment=(.+?);/m =~ ap
766
+ tmp['Comment'] = $1
767
+ end
768
+ ap.scan(/Name=.+?Sequence=.+?;/).each do |ent|
769
+ tmp['Variants'] << cc_alternative_products_variants(ent)
770
+ end
771
+ return tmp
772
+ end
773
+ private :cc_alternative_products
547
774
 
548
- def cc_ap_variants_parse(ent)
549
- hsh = {}
550
- ent.split(/; /).map {|e| e.split(/=/) }.each do |e|
775
+ def cc_alternative_products_variants(data)
776
+ variant = {'Name' => '', 'Synonyms' => [], 'IsoId' => [], 'Sequence' => []}
777
+ data.split(/; /).map {|x| x.split(/=/) }.each do |e|
551
778
  case e[0]
552
- when 'Sequence'
779
+ when 'Sequence', 'Synonyms', 'IsoId'
553
780
  e[1] = e[1].sub(/;/,'').split(/, /)
554
781
  end
555
- hsh[e[0]] = e[1]
782
+ variant[e[0]] = e[1]
783
+ end
784
+ variant
785
+ end
786
+ private :cc_alternative_products_variants
787
+
788
+
789
+ def cc_biophysiochemical_properties(data)
790
+ data = data[0]
791
+
792
+ hash = {'Absorption' => {},
793
+ 'Kinetic parameters' => {},
794
+ 'pH dependence' => "",
795
+ 'Redox potential' => "",
796
+ 'Temperature dependence' => ""}
797
+ if data =~ /Absorption: Abs\(max\)=(.+?);/
798
+ hash['Absorption']['Abs(max)'] = $1
799
+ end
800
+ if data =~ /Absorption: Abs\(max\)=.+; Note=(.+?);/
801
+ hash['Absorption']['Note'] = $1
802
+ end
803
+ if data =~ /Kinetic parameters: KM=(.+?); Vmax=(.+?);/
804
+ hash['Kinetic parameters']['KM'] = $1
805
+ hash['Kinetic parameters']['Vmax'] = $2
556
806
  end
557
- return hsh
807
+ if data =~ /Kinetic parameters: KM=.+; Vmax=.+; Note=(.+?);/
808
+ hash['Kinetic parameters']['Note'] = $1
809
+ end
810
+ if data =~ /pH dependence: (.+?);/
811
+ hash['pH dependence'] = $1
812
+ end
813
+ if data =~ /Redox potential: (.+?);/
814
+ hash['Redox potential'] = $1
815
+ end
816
+ if data =~ /Temperature dependence: (.+?);/
817
+ hash['Temperature dependence'] = $1
818
+ end
819
+ hash
820
+ end
821
+ private :cc_biophysiochemical_properties
822
+
823
+
824
+ def cc_caution(data)
825
+ data.to_s
558
826
  end
559
- private :cc_ap_variants_parse
827
+ private :cc_caution
560
828
 
561
829
 
562
830
  # returns conteins in a line of the CC INTERACTION section.
563
831
  #
564
832
  # CC P46527:CDKN1B; NbExp=1; IntAct=EBI-359815, EBI-519280;
565
- def cc_interaction_parse(str)
833
+ def cc_interaction(data)
834
+ str = data.to_s
566
835
  it = str.scan(/(.+?); NbExp=(.+?); IntAct=(.+?);/)
567
836
  it.map {|ent|
568
- {:partner_id => ent[0].strip,
569
- :nbexp => ent[1].strip,
570
- :intact_acc => ent[2].split(', ') }
837
+ ent.map! {|x| x.strip }
838
+ if ent[0] =~ /^(.+):(.+)/
839
+ spac = $1
840
+ spid = $2.split(' ')[0]
841
+ optid = nil
842
+ elsif ent[0] =~ /Self/
843
+ spac = self.entry_id
844
+ spid = self.entry_id
845
+ optid = nil
846
+ end
847
+ if ent[0] =~ /^.+:.+ (.+)/
848
+ optid = $1
849
+ end
850
+
851
+ {'SP_Ac' => spac,
852
+ 'identifier' => spid,
853
+ 'NbExp' => ent[1],
854
+ 'IntAct' => ent[2].split(', '),
855
+ 'optional_identifier' => optid}
856
+ }
857
+ end
858
+ private :cc_interaction
859
+
860
+
861
+ def cc_mass_spectrometry(data)
862
+ # MASS SPECTROMETRY: MW=XXX[; MW_ERR=XX][; METHOD=XX][;RANGE=XX-XX].
863
+ return data unless data
864
+
865
+ data.map { |m|
866
+ mass = {'MW' => nil, 'MW_ERR' => nil, 'METHOD' => nil, 'RANGE' => nil,
867
+ 'NOTE' => nil}
868
+ m.sub(/.$/,'').split(/;/).each do |line|
869
+ case line
870
+ when /MW=(.+)/
871
+ mass['MW'] = $1
872
+ when /MW_ERR=(.+)/
873
+ mass['MW_ERR'] = $1
874
+ when /METHOD=(.+)/
875
+ mass['METHOD'] = $1
876
+ when /RANGE=(\d+-\d+)/
877
+ mass['RANGE'] = $1 # RANGE class ?
878
+ when /NOTE=(.+)/
879
+ mass['NOTE'] = $1
880
+ end
881
+ end
882
+ mass
883
+ }
884
+ end
885
+ private :cc_mass_spectrometry
886
+
887
+
888
+ def cc_pathway(data)
889
+ data.map {|x| x.sub(/\.$/, '') }.map {|x|
890
+ x.split(/; | and |: /)
891
+ }[0]
892
+ end
893
+ private :cc_pathway
894
+
895
+
896
+ def cc_rna_editing(data)
897
+ data = data.to_s
898
+ entry = {'Modified_positions' => [], 'Note' => ""}
899
+ if data =~ /Modified_positions=(.+?)(\.|;)/
900
+ entry['Modified_positions'] = $1.sub(/\.$/, '').split(', ')
901
+ else
902
+ raise ArgumentError, "Invarid CC RNA Editing lines (#{self.entry_id}):#{$!}\n#{get('CC')}"
903
+ end
904
+ if data =~ /Note=(.+)/
905
+ entry['Note'] = $1
906
+ end
907
+ entry
908
+ end
909
+ private :cc_rna_editing
910
+
911
+
912
+ def cc_subcellular_location(data)
913
+ data.map {|x|
914
+ x.split('. ').map {|y|
915
+ y.split('; ').map {|z|
916
+ z.sub(/\.$/, '')
917
+ }
918
+ }
919
+ }[0]
920
+ end
921
+ private :cc_subcellular_location
922
+
923
+
924
+ # CC -!- WEB RESOURCE: NAME=ResourceName[; NOTE=FreeText][; URL=WWWAddress].
925
+ def cc_web_resource(data)
926
+ data.map {|x|
927
+ entry = {'NAME' => nil, 'NOTE' => nil, 'URL' => nil}
928
+ x.split(';').each do |y|
929
+ case y
930
+ when /NAME=(.+)/
931
+ entry['NAME'] = $1.strip
932
+ when /NOTE=(.+)/
933
+ entry['NOTE'] = $1.strip
934
+ when /URL="(.+)"/
935
+ entry['URL'] = $1.strip
936
+ end
937
+ end
938
+ entry
571
939
  }
572
940
  end
573
- private :cc_interaction_parse
941
+
574
942
 
575
943
  # returns databases cross-references in the DR lines.
576
- # * Bio::EMBLDB#dr -> Hash w/in Array
944
+ # * Bio::SPTR#dr -> Hash w/in Array
577
945
  #
578
946
  # === DR Line; defabases cross-reference (>=0)
579
947
  # DR database_identifier; primary_identifier; secondary_identifier.
@@ -585,6 +953,24 @@ class SPTR < EMBLDB
585
953
  'PROSITE','REBASE','AARHUS/GHENT-2DPAGE','SGD','STYGENE','SUBTILIST',
586
954
  'SWISS-2DPAGE','TIGR','TRANSFAC','TUBERCULIST','WORMPEP','YEPD','ZFIN']
587
955
 
956
+ # Backup Bio::EMBLDB#dr as embl_dr
957
+ alias :embl_dr :dr
958
+
959
+ # Bio::SPTR#dr
960
+ def dr(key = nil)
961
+ unless key
962
+ embl_dr
963
+ else
964
+ embl_dr[key].map {|x|
965
+ {'Accession' => x[0],
966
+ 'Version' => x[1],
967
+ ' ' => x[2],
968
+ 'Molecular Type' => x[3]}
969
+ }
970
+ end
971
+ end
972
+
973
+
588
974
  # Bio::EMBLDB::Common#kw - Array
589
975
  # #keywords -> Array
590
976
  #
@@ -592,10 +978,29 @@ class SPTR < EMBLDB
592
978
  # KW [Keyword;]+
593
979
 
594
980
 
595
- # returns conteins in the feature table.
981
+ # returns contents in the feature table.
982
+ #
983
+ # == Examples
984
+ #
985
+ # sp = Bio::SPTR.new(entry)
986
+ # ft = sp.ft
987
+ # ft.class #=> Hash
988
+ # ft.keys.each do |feature_key|
989
+ # ft[feature_key].each do |feature|
990
+ # feature['From'] #=> '1'
991
+ # feature['To'] #=> '21'
992
+ # feature['Description'] #=> ''
993
+ # feature['FTId'] #=> ''
994
+ # feature['diff'] #=> []
995
+ # feature['original'] #=> [feature_key, '1', '21', '', '']
996
+ # end
997
+ # end
998
+ #
596
999
  # * Bio::SPTR#ft -> Hash
597
- # {'feature_name' => [{'From' => str, 'To' => str,
598
- # 'Description' => str, 'FTId' => str}],...}
1000
+ # {FEATURE_KEY => [{'From' => int, 'To' => int,
1001
+ # 'Description' => aStr, 'FTId' => aStr,
1002
+ # 'diff' => [original_residues, changed_residues],
1003
+ # 'original' => aAry }],...}
599
1004
  #
600
1005
  # returns an Array of the information about the feature_name in the feature table.
601
1006
  # * Bio::SPTR#ft(feature_name) -> Array of Hash
@@ -611,106 +1016,87 @@ class SPTR < EMBLDB
611
1016
  # 22-27 `TO' endpoint
612
1017
  # 35-75 Description (>=0 per key)
613
1018
  # ----- -----------------
614
- def ft(feature_name = nil)
615
- unless @data['FT']
616
- table = Hash.new()
617
- last_feature = nil
618
-
619
- begin
620
- get('FT').split(/\n/).each {|line|
621
-
622
- feature = line[5..12].strip
1019
+ #
1020
+ # Note: 'FROM' and 'TO' endopoints are allowed to use non-numerial charactors
1021
+ # including '<', '>' or '?'. (c.f. '<1', '?42')
1022
+ #
1023
+ # See also http://www.expasy.org/sprot/userman.html#FT_line
1024
+ #
1025
+ def ft(feature_key = nil)
1026
+ return ft[feature_key] if feature_key
1027
+ return @data['FT'] if @data['FT']
623
1028
 
624
- if feature == '' and line[34..74]
625
- tmp = ' ' + line[34..74].strip
626
- table[last_feature].last['Description'] << tmp
627
-
628
- next unless /\.$/ =~ line
629
- else
630
- from = line[14..19].strip
631
- to = line[21..26].strip
632
- desc = line[34..74].strip if line[34..74]
633
-
634
- table[feature] = [] unless table[feature]
635
- table[feature] << {
636
- 'From' => from.to_i,
637
- 'To' => to.to_i,
638
- 'Description' => desc,
639
- 'diff' => [],
640
- 'FTId' => nil }
641
- last_feature = feature
642
- next
643
- end
1029
+ table = []
1030
+ begin
1031
+ get('FT').split("\n").each do |line|
1032
+ if line =~ /^FT \w/
1033
+ feature = line.chomp.ljust(74)
1034
+ table << [feature[ 5..12].strip, # Feature Name
1035
+ feature[14..19].strip, # From
1036
+ feature[21..26].strip, # To
1037
+ feature[34..74].strip ] # Description
1038
+ else
1039
+ table.last << line.chomp.sub!(/^FT +/, '')
1040
+ end
1041
+ end
644
1042
 
645
- case last_feature
646
- when 'VARSPLIC', 'VARIANT', 'CONFLICT'
647
- if /FTId=(.+?)\./ =~ line # version 41 >
648
- ftid = $1
649
- table[last_feature].last['FTId'] = ftid
650
- table[last_feature].last['Description'].sub!(/ \/FTId=#{ftid}./,'')
651
- end
1043
+ # Joining Description lines
1044
+ table = table.map { |feature|
1045
+ ftid = feature.pop if feature.last =~ /FTId=/
1046
+ if feature.size > 4
1047
+ feature = [feature[0],
1048
+ feature[1],
1049
+ feature[2],
1050
+ feature[3, feature.size - 3].join(" ")]
1051
+ end
1052
+ feature << if ftid then ftid else '' end
1053
+ }
652
1054
 
653
- case table[last_feature].last['Description']
654
- when /(\w[\w ]*\w*) - ?> (\w[\w ]*\w*)/
655
- original = $1
656
- swap = $2
657
- original = original.gsub(/ /,'').strip
658
- swap = swap.gsub(/ /,'').strip
659
- when /Missing/i
660
- original = seq.subseq(table[last_feature].last['From'],
661
- table[last_feature].last['To'])
662
- swap = ''
663
- else
664
- raise line
665
- end
666
- table[last_feature].last['diff'] = [original, swap]
667
- end
1055
+ hash = {}
1056
+ table.each do |feature|
1057
+ hash[feature[0]] = [] unless hash[feature[0]]
1058
+ hash[feature[0]] << {
1059
+ # Removing '<', '>' or '?' in FROM/TO endopoint.
1060
+ 'From' => feature[1].sub(/\D/, '').to_i,
1061
+ 'To' => feature[2].sub(/\D/, '').to_i,
1062
+ 'Description' => feature[3],
1063
+ 'FTId' => feature[4].to_s.sub(/\/FTId=/, '').sub(/\.$/, ''),
1064
+ 'diff' => [],
1065
+ 'original' => feature
668
1066
  }
669
1067
 
670
- rescue
671
- raise "Invalid FT Lines(#{$!}) in #{entry_id}:, \n" +
672
- "'#{self.get('FT')}'\n"
673
- end
674
-
675
- table.each_key do |k|
676
- table[k].each do |e|
677
- if / -> / =~ e['Description']
678
- pattern = /([A-Z][A-Z ]*[A-Z]*) -> ([A-Z][A-Z ]*[A-Z]*)/
679
- e['Description'].sub!(pattern) {
680
- a = $1
681
- b = $2
682
- a.gsub(/ /,'') + " -> " + b.gsub(/ /,'')
683
- }
684
- end
685
- if /- [\w\d]/ =~ e['Description']
686
- e['Description'].gsub!(/([\w\d]- [\w\d]+)/) {
687
- a = $1
688
- if /- AND/ =~ a
689
- a
690
- else
691
- a.sub(/ /,'')
692
- end
693
- }
1068
+ case feature[0]
1069
+ when 'VARSPLIC', 'VARIANT', 'VAR_SEQ', 'CONFLICT'
1070
+ case hash[feature[0]].last['Description']
1071
+ when /(\w[\w ]*\w*) - ?> (\w[\w ]*\w*)/
1072
+ original_res = $1
1073
+ changed_res = $2
1074
+ original_res = original_res.gsub(/ /,'').strip
1075
+ chenged_res = changed_res.gsub(/ /,'').strip
1076
+ when /Missing/i
1077
+ original_res = seq.subseq(hash[feature[0]].last['From'],
1078
+ hash[feature[0]].last['To'])
1079
+ changed_res = ''
694
1080
  end
1081
+ hash[feature[0]].last['diff'] = [original_res, chenged_res]
695
1082
  end
696
1083
  end
697
- @data['FT'] = table
1084
+ rescue
1085
+ raise "Invalid FT Lines(#{$!}) in #{entry_id}:, \n'#{self.get('FT')}'\n"
698
1086
  end
699
1087
 
700
- if feature_name
701
- @data['FT'][feature_name]
702
- else
703
- @data['FT']
704
- end
1088
+ @data['FT'] = hash
705
1089
  end
706
1090
 
707
1091
 
1092
+
708
1093
  # returns a Hash of conteins in the SQ lines.
709
1094
  # * Bio::SPTRL#sq -> hsh
710
1095
  #
711
1096
  # returns a value of a key given in the SQ lines.
712
1097
  # * Bio::SPTRL#sq(key) -> int or str
713
- # * Keys: ['MW', 'mw', 'molecular', 'weight', 'aalen', 'len', 'length', 'CRC64']
1098
+ # * Keys: ['MW', 'mw', 'molecular', 'weight', 'aalen', 'len', 'length',
1099
+ # 'CRC64']
714
1100
  #
715
1101
  # === SQ Line; sequence header (1/entry)
716
1102
  # SQ SEQUENCE 233 AA; 25630 MW; 146A1B48A1475C86 CRC64;
@@ -759,84 +1145,6 @@ end # class SPTR
759
1145
  end # module Bio
760
1146
 
761
1147
 
762
- if __FILE__ == $0
763
- # Usage: ruby __FILE__ uniprot_sprot.dat
764
- # Usage: ruby __FILE__ uniprot_sprot.dat | egrep '^RuntimeError'
765
-
766
- begin
767
- require 'pp'
768
- alias pp p
769
- rescue LoadError
770
- end
771
-
772
- def cmd(cmd, tag = nil, ent = $ent)
773
- puts " ==> #{cmd} "
774
- puts Bio::SPTR.new(ent).get(tag) if tag
775
- begin
776
- p eval(cmd)
777
- rescue RuntimeError
778
- puts "RuntimeError(#{Bio::SPTR.new($ent).entry_id})}: #{$!} "
779
- end
780
- puts
781
- end
782
-
783
-
784
- while $ent = $<.gets(Bio::SPTR::RS)
785
-
786
- cmd "Bio::SPTR.new($ent).entry_id"
787
-
788
- cmd "Bio::SPTR.new($ent).id_line", 'ID'
789
- cmd "Bio::SPTR.new($ent).entry"
790
- cmd "Bio::SPTR.new($ent).entry_name"
791
- cmd "Bio::SPTR.new($ent).molecule"
792
- cmd "Bio::SPTR.new($ent).sequence_length"
793
-
794
- cmd "Bio::SPTR.new($ent).ac", 'AC'
795
- cmd "Bio::SPTR.new($ent).accession"
796
-
797
-
798
- cmd "Bio::SPTR.new($ent).gn", 'GN'
799
- cmd "Bio::SPTR.new($ent).gene_name"
800
- cmd "Bio::SPTR.new($ent).gene_names"
801
-
802
- cmd "Bio::SPTR.new($ent).dt", "DT"
803
- ['created','annotation','sequence'].each do |key|
804
- cmd "Bio::SPTR.new($ent).dt('#{key}')"
805
- end
806
-
807
- cmd "Bio::SPTR.new($ent).de", 'DE'
808
- cmd "Bio::SPTR.new($ent).definition"
809
- cmd "Bio::SPTR.new($ent).protein_name"
810
- cmd "Bio::SPTR.new($ent).synonyms"
811
-
812
- cmd "Bio::SPTR.new($ent).kw", 'KW'
813
-
814
- cmd "Bio::SPTR.new($ent).os", 'OS'
815
-
816
- cmd "Bio::SPTR.new($ent).oc", 'OC'
817
-
818
- cmd "Bio::SPTR.new($ent).og", 'OG'
819
-
820
- cmd "Bio::SPTR.new($ent).ox", 'OX'
821
-
822
- cmd "Bio::SPTR.new($ent).ref", 'R'
823
-
824
- cmd "Bio::SPTR.new($ent).cc", 'CC'
825
- cmd "Bio::SPTR.new($ent).cc('ALTERNATIVE PRODUCTS')"
826
- cmd "Bio::SPTR.new($ent).cc('DATABASE')"
827
- cmd "Bio::SPTR.new($ent).cc('MASS SPECTOMETRY')"
828
-
829
- cmd "Bio::SPTR.new($ent).dr", 'DR'
830
-
831
- cmd "Bio::SPTR.new($ent).ft", 'FT'
832
- cmd "Bio::SPTR.new($ent).ft['DOMAIN']"
833
-
834
- cmd "Bio::SPTR.new($ent).sq", "SQ"
835
- cmd "Bio::SPTR.new($ent).seq"
836
- end
837
-
838
- end
839
-
840
1148
 
841
1149
  =begin
842
1150
 
@@ -955,6 +1263,7 @@ Class for a entry in the SWISS-PROT/TrEMBL database.
955
1263
  # OG - organelle (0 or 1 per entry; optional)
956
1264
  # OC - organism classification (>=1 per entry)
957
1265
  # OX - organism taxonomy x-ref (>=1 per entry)
1266
+ # OH - Organism Host
958
1267
  # RN - reference number (>=1 per entry)
959
1268
  # RP - reference positions (>=1 per entry)
960
1269
  # RC - reference comment(s) (>=0 per entry; optional)
@@ -962,6 +1271,7 @@ Class for a entry in the SWISS-PROT/TrEMBL database.
962
1271
  # RA - reference author(s) (>=1 per entry)
963
1272
  # RT - reference title (>=0 per entry; optional)
964
1273
  # RL - reference location (>=1 per entry)
1274
+ # RG - reference group(s)
965
1275
  # CC - comments or notes (>=0 per entry; optional)
966
1276
  # DR - database cross-references (>=0 per entry; optional)
967
1277
  # KW - keywords (>=1 per entry)