bio 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (340) hide show
  1. data/bin/bioruby +14 -122
  2. data/bin/br_biofetch.rb +2 -2
  3. data/bin/br_bioflat.rb +2 -2
  4. data/bin/br_biogetseq.rb +2 -2
  5. data/bin/br_pmfetch.rb +3 -3
  6. data/doc/Changes-0.7.rd +77 -0
  7. data/doc/KEGG_API.rd +523 -232
  8. data/doc/KEGG_API.rd.ja +529 -207
  9. data/doc/Tutorial.rd +48 -11
  10. data/lib/bio.rb +59 -6
  11. data/lib/bio/alignment.rb +713 -103
  12. data/lib/bio/appl/bl2seq/report.rb +2 -18
  13. data/lib/bio/appl/blast.rb +108 -91
  14. data/lib/bio/appl/blast/format0.rb +33 -18
  15. data/lib/bio/appl/blast/format8.rb +6 -20
  16. data/lib/bio/appl/blast/report.rb +293 -429
  17. data/lib/bio/appl/blast/rexml.rb +8 -22
  18. data/lib/bio/appl/blast/wublast.rb +21 -12
  19. data/lib/bio/appl/blast/xmlparser.rb +180 -183
  20. data/lib/bio/appl/blat/report.rb +127 -30
  21. data/lib/bio/appl/clustalw.rb +87 -59
  22. data/lib/bio/appl/clustalw/report.rb +20 -22
  23. data/lib/bio/appl/emboss.rb +113 -20
  24. data/lib/bio/appl/fasta.rb +173 -198
  25. data/lib/bio/appl/fasta/format10.rb +244 -347
  26. data/lib/bio/appl/gcg/msf.rb +212 -0
  27. data/lib/bio/appl/gcg/seq.rb +195 -0
  28. data/lib/bio/appl/genscan/report.rb +5 -23
  29. data/lib/bio/appl/hmmer.rb +8 -45
  30. data/lib/bio/appl/hmmer/report.rb +2 -20
  31. data/lib/bio/appl/iprscan/report.rb +374 -0
  32. data/lib/bio/appl/mafft.rb +87 -50
  33. data/lib/bio/appl/mafft/report.rb +151 -44
  34. data/lib/bio/appl/muscle.rb +52 -0
  35. data/lib/bio/appl/phylip/alignment.rb +129 -0
  36. data/lib/bio/appl/phylip/distance_matrix.rb +96 -0
  37. data/lib/bio/appl/probcons.rb +41 -0
  38. data/lib/bio/appl/psort.rb +89 -96
  39. data/lib/bio/appl/psort/report.rb +6 -22
  40. data/lib/bio/appl/pts1.rb +263 -0
  41. data/lib/bio/appl/sim4.rb +26 -36
  42. data/lib/bio/appl/sim4/report.rb +2 -18
  43. data/lib/bio/appl/sosui/report.rb +5 -20
  44. data/lib/bio/appl/spidey/report.rb +2 -2
  45. data/lib/bio/appl/targetp/report.rb +4 -20
  46. data/lib/bio/appl/tcoffee.rb +55 -0
  47. data/lib/bio/appl/tmhmm/report.rb +4 -20
  48. data/lib/bio/command.rb +235 -64
  49. data/lib/bio/data/aa.rb +21 -26
  50. data/lib/bio/data/codontable.rb +2 -20
  51. data/lib/bio/data/na.rb +19 -4
  52. data/lib/bio/db.rb +27 -12
  53. data/lib/bio/db/aaindex.rb +2 -20
  54. data/lib/bio/db/embl/common.rb +4 -21
  55. data/lib/bio/db/embl/embl.rb +33 -85
  56. data/lib/bio/db/embl/sptr.rb +612 -302
  57. data/lib/bio/db/embl/swissprot.rb +10 -29
  58. data/lib/bio/db/embl/trembl.rb +10 -29
  59. data/lib/bio/db/embl/uniprot.rb +10 -29
  60. data/lib/bio/db/fantom.rb +15 -20
  61. data/lib/bio/db/fasta.rb +3 -3
  62. data/lib/bio/db/genbank/common.rb +37 -46
  63. data/lib/bio/db/genbank/ddbj.rb +6 -18
  64. data/lib/bio/db/genbank/genbank.rb +47 -186
  65. data/lib/bio/db/genbank/genpept.rb +4 -17
  66. data/lib/bio/db/genbank/refseq.rb +4 -17
  67. data/lib/bio/db/gff.rb +103 -35
  68. data/lib/bio/db/go.rb +4 -20
  69. data/lib/bio/db/kegg/brite.rb +26 -36
  70. data/lib/bio/db/kegg/compound.rb +81 -85
  71. data/lib/bio/db/kegg/drug.rb +98 -0
  72. data/lib/bio/db/kegg/enzyme.rb +133 -110
  73. data/lib/bio/db/kegg/expression.rb +2 -20
  74. data/lib/bio/db/kegg/genes.rb +208 -238
  75. data/lib/bio/db/kegg/genome.rb +164 -285
  76. data/lib/bio/db/kegg/glycan.rb +114 -157
  77. data/lib/bio/db/kegg/keggtab.rb +242 -303
  78. data/lib/bio/db/kegg/kgml.rb +117 -160
  79. data/lib/bio/db/kegg/orthology.rb +112 -0
  80. data/lib/bio/db/kegg/reaction.rb +54 -69
  81. data/lib/bio/db/kegg/taxonomy.rb +331 -0
  82. data/lib/bio/db/lasergene.rb +209 -0
  83. data/lib/bio/db/litdb.rb +3 -27
  84. data/lib/bio/db/medline.rb +228 -249
  85. data/lib/bio/db/nbrf.rb +3 -3
  86. data/lib/bio/db/newick.rb +510 -0
  87. data/lib/bio/db/nexus.rb +1854 -0
  88. data/lib/bio/db/pdb.rb +5 -17
  89. data/lib/bio/db/pdb/atom.rb +2 -18
  90. data/lib/bio/db/pdb/chain.rb +2 -18
  91. data/lib/bio/db/pdb/chemicalcomponent.rb +2 -18
  92. data/lib/bio/db/pdb/model.rb +2 -18
  93. data/lib/bio/db/pdb/pdb.rb +73 -34
  94. data/lib/bio/db/pdb/residue.rb +4 -20
  95. data/lib/bio/db/pdb/utils.rb +2 -18
  96. data/lib/bio/db/prosite.rb +403 -422
  97. data/lib/bio/db/rebase.rb +84 -40
  98. data/lib/bio/db/soft.rb +404 -0
  99. data/lib/bio/db/transfac.rb +5 -17
  100. data/lib/bio/feature.rb +106 -52
  101. data/lib/bio/io/das.rb +32 -42
  102. data/lib/bio/io/dbget.rb +2 -20
  103. data/lib/bio/io/ddbjxml.rb +77 -138
  104. data/lib/bio/io/ebisoap.rb +158 -0
  105. data/lib/bio/io/ensembl.rb +229 -0
  106. data/lib/bio/io/fastacmd.rb +89 -82
  107. data/lib/bio/io/fetch.rb +163 -96
  108. data/lib/bio/io/flatfile.rb +170 -73
  109. data/lib/bio/io/flatfile/bdb.rb +3 -16
  110. data/lib/bio/io/flatfile/index.rb +2 -2
  111. data/lib/bio/io/flatfile/indexer.rb +3 -2
  112. data/lib/bio/io/higet.rb +12 -31
  113. data/lib/bio/io/keggapi.rb +210 -269
  114. data/lib/bio/io/ncbisoap.rb +155 -0
  115. data/lib/bio/io/pubmed.rb +169 -147
  116. data/lib/bio/io/registry.rb +4 -20
  117. data/lib/bio/io/soapwsdl.rb +43 -38
  118. data/lib/bio/io/sql.rb +242 -305
  119. data/lib/bio/location.rb +407 -285
  120. data/lib/bio/map.rb +410 -0
  121. data/lib/bio/pathway.rb +558 -695
  122. data/lib/bio/reference.rb +272 -75
  123. data/lib/bio/sequence.rb +255 -13
  124. data/lib/bio/sequence/aa.rb +71 -10
  125. data/lib/bio/sequence/common.rb +187 -33
  126. data/lib/bio/sequence/compat.rb +59 -4
  127. data/lib/bio/sequence/format.rb +54 -7
  128. data/lib/bio/sequence/generic.rb +3 -3
  129. data/lib/bio/sequence/na.rb +328 -26
  130. data/lib/bio/shell.rb +11 -4
  131. data/lib/bio/shell/core.rb +221 -160
  132. data/lib/bio/shell/demo.rb +18 -15
  133. data/lib/bio/shell/interface.rb +14 -12
  134. data/lib/bio/shell/irb.rb +95 -0
  135. data/lib/bio/shell/object.rb +45 -26
  136. data/lib/bio/shell/plugin/blast.rb +42 -0
  137. data/lib/bio/shell/plugin/codon.rb +22 -14
  138. data/lib/bio/shell/plugin/das.rb +58 -0
  139. data/lib/bio/shell/plugin/emboss.rb +2 -2
  140. data/lib/bio/shell/plugin/entry.rb +22 -11
  141. data/lib/bio/shell/plugin/flatfile.rb +2 -2
  142. data/lib/bio/shell/plugin/keggapi.rb +13 -6
  143. data/lib/bio/shell/plugin/midi.rb +4 -4
  144. data/lib/bio/shell/plugin/obda.rb +2 -2
  145. data/lib/bio/shell/plugin/psort.rb +56 -0
  146. data/lib/bio/shell/plugin/seq.rb +35 -8
  147. data/lib/bio/shell/plugin/soap.rb +87 -0
  148. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/bioruby_generator.rb +29 -0
  149. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_classes.rhtml +4 -0
  150. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_log.rhtml +27 -0
  151. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_methods.rhtml +11 -0
  152. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_modules.rhtml +4 -0
  153. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_variables.rhtml +7 -0
  154. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-bg.gif +0 -0
  155. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-console.png +0 -0
  156. data/lib/bio/shell/rails/{public/images/icon.png → vendor/plugins/generators/bioruby/templates/bioruby-gem.png} +0 -0
  157. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-link.gif +0 -0
  158. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby.css +369 -0
  159. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby.rhtml +47 -0
  160. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby_controller.rb +144 -0
  161. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby_helper.rb +47 -0
  162. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/commands.rhtml +8 -0
  163. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/history.rhtml +10 -0
  164. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/index.rhtml +22 -0
  165. data/lib/bio/shell/script.rb +25 -0
  166. data/lib/bio/shell/setup.rb +109 -0
  167. data/lib/bio/shell/web.rb +70 -58
  168. data/lib/bio/tree.rb +850 -0
  169. data/lib/bio/util/color_scheme.rb +84 -107
  170. data/lib/bio/util/color_scheme/buried.rb +5 -24
  171. data/lib/bio/util/color_scheme/helix.rb +5 -24
  172. data/lib/bio/util/color_scheme/hydropathy.rb +5 -24
  173. data/lib/bio/util/color_scheme/nucleotide.rb +5 -24
  174. data/lib/bio/util/color_scheme/strand.rb +5 -24
  175. data/lib/bio/util/color_scheme/taylor.rb +5 -24
  176. data/lib/bio/util/color_scheme/turn.rb +5 -24
  177. data/lib/bio/util/color_scheme/zappo.rb +5 -24
  178. data/lib/bio/util/contingency_table.rb +70 -43
  179. data/lib/bio/util/restriction_enzyme.rb +228 -0
  180. data/lib/bio/util/restriction_enzyme/analysis.rb +249 -0
  181. data/lib/bio/util/restriction_enzyme/analysis_basic.rb +217 -0
  182. data/lib/bio/util/restriction_enzyme/cut_symbol.rb +107 -0
  183. data/lib/bio/util/restriction_enzyme/double_stranded.rb +321 -0
  184. data/lib/bio/util/restriction_enzyme/double_stranded/aligned_strands.rb +130 -0
  185. data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair.rb +103 -0
  186. data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair_in_enzyme_notation.rb +38 -0
  187. data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations.rb +76 -0
  188. data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations_in_enzyme_notation.rb +107 -0
  189. data/lib/bio/util/restriction_enzyme/enzymes.yaml +7061 -0
  190. data/lib/bio/util/restriction_enzyme/range/cut_range.rb +24 -0
  191. data/lib/bio/util/restriction_enzyme/range/cut_ranges.rb +47 -0
  192. data/lib/bio/util/restriction_enzyme/range/horizontal_cut_range.rb +67 -0
  193. data/lib/bio/util/restriction_enzyme/range/sequence_range.rb +257 -0
  194. data/lib/bio/util/restriction_enzyme/range/sequence_range/calculated_cuts.rb +242 -0
  195. data/lib/bio/util/restriction_enzyme/range/sequence_range/fragment.rb +51 -0
  196. data/lib/bio/util/restriction_enzyme/range/sequence_range/fragments.rb +41 -0
  197. data/lib/bio/util/restriction_enzyme/range/vertical_cut_range.rb +77 -0
  198. data/lib/bio/util/restriction_enzyme/single_strand.rb +199 -0
  199. data/lib/bio/util/restriction_enzyme/single_strand/cut_locations_in_enzyme_notation.rb +135 -0
  200. data/lib/bio/util/restriction_enzyme/single_strand_complement.rb +23 -0
  201. data/lib/bio/util/restriction_enzyme/string_formatting.rb +111 -0
  202. data/lib/bio/util/sirna.rb +4 -22
  203. data/sample/color_scheme_na.rb +4 -12
  204. data/sample/enzymes.rb +78 -0
  205. data/sample/goslim.rb +5 -13
  206. data/sample/psortplot_html.rb +4 -12
  207. data/test/data/blast/2.2.15.blastp.m7 +876 -0
  208. data/test/data/embl/AB090716.embl.rel89 +63 -0
  209. data/test/data/fasta/example1.txt +75 -0
  210. data/test/data/fasta/example2.txt +21 -0
  211. data/test/data/iprscan/merged.raw +32 -0
  212. data/test/data/iprscan/merged.txt +74 -0
  213. data/test/data/soft/GDS100_partial.soft +92 -0
  214. data/test/data/soft/GSE3457_family_partial.soft +874 -0
  215. data/test/functional/bio/io/test_ensembl.rb +103 -0
  216. data/test/functional/bio/io/test_soapwsdl.rb +5 -17
  217. data/test/unit/bio/appl/bl2seq/test_report.rb +2 -2
  218. data/test/unit/bio/appl/blast/test_report.rb +3 -16
  219. data/test/unit/bio/appl/blast/test_xmlparser.rb +4 -16
  220. data/test/unit/bio/appl/genscan/test_report.rb +3 -16
  221. data/test/unit/bio/appl/hmmer/test_report.rb +3 -16
  222. data/test/unit/bio/appl/iprscan/test_report.rb +338 -0
  223. data/test/unit/bio/appl/mafft/test_report.rb +63 -0
  224. data/test/unit/bio/appl/sosui/test_report.rb +3 -16
  225. data/test/unit/bio/appl/targetp/test_report.rb +3 -16
  226. data/test/unit/bio/appl/test_blast.rb +3 -16
  227. data/test/unit/bio/appl/test_fasta.rb +4 -16
  228. data/test/unit/bio/appl/test_pts1.rb +140 -0
  229. data/test/unit/bio/appl/tmhmm/test_report.rb +3 -16
  230. data/test/unit/bio/data/test_aa.rb +4 -17
  231. data/test/unit/bio/data/test_codontable.rb +3 -16
  232. data/test/unit/bio/data/test_na.rb +3 -3
  233. data/test/unit/bio/db/embl/test_common.rb +3 -16
  234. data/test/unit/bio/db/embl/test_embl.rb +3 -16
  235. data/test/unit/bio/db/embl/test_embl_rel89.rb +219 -0
  236. data/test/unit/bio/db/embl/test_sptr.rb +1548 -41
  237. data/test/unit/bio/db/embl/test_uniprot.rb +3 -16
  238. data/test/unit/bio/db/kegg/test_genes.rb +3 -16
  239. data/test/unit/bio/db/pdb/test_pdb.rb +7 -24
  240. data/test/unit/bio/db/test_aaindex.rb +2 -2
  241. data/test/unit/bio/db/test_fasta.rb +3 -16
  242. data/test/unit/bio/db/test_gff.rb +3 -16
  243. data/test/unit/bio/db/test_lasergene.rb +95 -0
  244. data/test/unit/bio/db/test_newick.rb +56 -0
  245. data/test/unit/bio/db/test_nexus.rb +360 -0
  246. data/test/unit/bio/db/test_prosite.rb +5 -18
  247. data/test/unit/bio/db/test_rebase.rb +11 -25
  248. data/test/unit/bio/db/test_soft.rb +138 -0
  249. data/test/unit/bio/io/test_ddbjxml.rb +5 -17
  250. data/test/unit/bio/io/test_ensembl.rb +109 -0
  251. data/test/unit/bio/io/test_fastacmd.rb +3 -16
  252. data/test/unit/bio/io/test_flatfile.rb +237 -0
  253. data/test/unit/bio/io/test_soapwsdl.rb +4 -17
  254. data/test/unit/bio/sequence/test_aa.rb +3 -3
  255. data/test/unit/bio/sequence/test_common.rb +3 -16
  256. data/test/unit/bio/sequence/test_compat.rb +3 -16
  257. data/test/unit/bio/sequence/test_na.rb +29 -3
  258. data/test/unit/bio/shell/plugin/test_seq.rb +8 -8
  259. data/test/unit/bio/test_alignment.rb +16 -27
  260. data/test/unit/bio/test_command.rb +242 -25
  261. data/test/unit/bio/test_db.rb +3 -16
  262. data/test/unit/bio/test_feature.rb +4 -16
  263. data/test/unit/bio/test_location.rb +4 -16
  264. data/test/unit/bio/test_map.rb +230 -0
  265. data/test/unit/bio/test_pathway.rb +4 -16
  266. data/test/unit/bio/test_reference.rb +2 -2
  267. data/test/unit/bio/test_sequence.rb +7 -19
  268. data/test/unit/bio/test_shell.rb +3 -16
  269. data/test/unit/bio/test_tree.rb +593 -0
  270. data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +299 -0
  271. data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +103 -0
  272. data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +240 -0
  273. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +100 -0
  274. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +75 -0
  275. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +73 -0
  276. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +53 -0
  277. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +104 -0
  278. data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +83 -0
  279. data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +246 -0
  280. data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +44 -0
  281. data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +115 -0
  282. data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +147 -0
  283. data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +147 -0
  284. data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +60 -0
  285. data/test/unit/bio/util/test_color_scheme.rb +6 -18
  286. data/test/unit/bio/util/test_contingency_table.rb +6 -18
  287. data/test/unit/bio/util/test_restriction_enzyme.rb +42 -0
  288. data/test/unit/bio/util/test_sirna.rb +3 -16
  289. metadata +228 -169
  290. data/doc/BioRuby.rd.ja +0 -225
  291. data/doc/Design.rd.ja +0 -341
  292. data/doc/TODO.rd.ja +0 -138
  293. data/lib/bio/appl/fasta/format6.rb +0 -37
  294. data/lib/bio/db/kegg/cell.rb +0 -88
  295. data/lib/bio/db/kegg/ko.rb +0 -178
  296. data/lib/bio/shell/rails/Rakefile +0 -10
  297. data/lib/bio/shell/rails/app/controllers/application.rb +0 -4
  298. data/lib/bio/shell/rails/app/controllers/shell_controller.rb +0 -94
  299. data/lib/bio/shell/rails/app/helpers/application_helper.rb +0 -3
  300. data/lib/bio/shell/rails/app/models/shell_connection.rb +0 -30
  301. data/lib/bio/shell/rails/app/views/layouts/shell.rhtml +0 -37
  302. data/lib/bio/shell/rails/app/views/shell/history.rhtml +0 -5
  303. data/lib/bio/shell/rails/app/views/shell/index.rhtml +0 -2
  304. data/lib/bio/shell/rails/app/views/shell/show.rhtml +0 -13
  305. data/lib/bio/shell/rails/config/boot.rb +0 -19
  306. data/lib/bio/shell/rails/config/database.yml +0 -85
  307. data/lib/bio/shell/rails/config/environment.rb +0 -53
  308. data/lib/bio/shell/rails/config/environments/development.rb +0 -19
  309. data/lib/bio/shell/rails/config/environments/production.rb +0 -19
  310. data/lib/bio/shell/rails/config/environments/test.rb +0 -19
  311. data/lib/bio/shell/rails/config/routes.rb +0 -19
  312. data/lib/bio/shell/rails/doc/README_FOR_APP +0 -2
  313. data/lib/bio/shell/rails/public/404.html +0 -8
  314. data/lib/bio/shell/rails/public/500.html +0 -8
  315. data/lib/bio/shell/rails/public/dispatch.cgi +0 -10
  316. data/lib/bio/shell/rails/public/dispatch.fcgi +0 -24
  317. data/lib/bio/shell/rails/public/dispatch.rb +0 -10
  318. data/lib/bio/shell/rails/public/favicon.ico +0 -0
  319. data/lib/bio/shell/rails/public/images/rails.png +0 -0
  320. data/lib/bio/shell/rails/public/index.html +0 -277
  321. data/lib/bio/shell/rails/public/javascripts/controls.js +0 -750
  322. data/lib/bio/shell/rails/public/javascripts/dragdrop.js +0 -584
  323. data/lib/bio/shell/rails/public/javascripts/effects.js +0 -854
  324. data/lib/bio/shell/rails/public/javascripts/prototype.js +0 -1785
  325. data/lib/bio/shell/rails/public/robots.txt +0 -1
  326. data/lib/bio/shell/rails/public/stylesheets/main.css +0 -187
  327. data/lib/bio/shell/rails/script/about +0 -3
  328. data/lib/bio/shell/rails/script/breakpointer +0 -3
  329. data/lib/bio/shell/rails/script/console +0 -3
  330. data/lib/bio/shell/rails/script/destroy +0 -3
  331. data/lib/bio/shell/rails/script/generate +0 -3
  332. data/lib/bio/shell/rails/script/performance/benchmarker +0 -3
  333. data/lib/bio/shell/rails/script/performance/profiler +0 -3
  334. data/lib/bio/shell/rails/script/plugin +0 -3
  335. data/lib/bio/shell/rails/script/process/reaper +0 -3
  336. data/lib/bio/shell/rails/script/process/spawner +0 -3
  337. data/lib/bio/shell/rails/script/process/spinner +0 -3
  338. data/lib/bio/shell/rails/script/runner +0 -3
  339. data/lib/bio/shell/rails/script/server +0 -42
  340. data/lib/bio/shell/rails/test/test_helper.rb +0 -28
@@ -1,262 +1,46 @@
1
1
  #
2
2
  # = bio/location.rb - Locations/Location class (GenBank location format)
3
3
  #
4
- # Copyright:: Copyright (C) 2001, 2005
5
- # KATAYAMA Toshiaki <k@bioruby.org>
6
- # License:: LGPL
4
+ # Copyright:: Copyright (C) 2001, 2005 Toshiaki Katayama <k@bioruby.org>
5
+ # Copyright:: Copyright (C) 2006 Jan Aerts <jan.aerts@bbsrc.ac.uk>
6
+ # License:: The Ruby License
7
7
  #
8
- # $Id: location.rb,v 0.22 2005/12/18 15:50:06 k Exp $
8
+ # $Id: location.rb,v 0.28 2007/04/05 23:35:39 trevor Exp $
9
9
  #
10
- # == Appendix : GenBank location descriptor classification
11
- #
12
- # === Definition of the position notation of the GenBank location format
13
- #
14
- # According to the GenBank manual 'gbrel.txt', I classified position notations
15
- # into 10 patterns - (A) to (J).
16
- #
17
- # 3.4.12.2 Feature Location
18
- #
19
- # The second column of the feature descriptor line designates the
20
- # location of the feature in the sequence. The location descriptor
21
- # begins at position 22. Several conventions are used to indicate
22
- # sequence location.
23
- #
24
- # Base numbers in location descriptors refer to numbering in the entry,
25
- # which is not necessarily the same as the numbering scheme used in the
26
- # published report. The first base in the presented sequence is numbered
27
- # base 1. Sequences are presented in the 5 to 3 direction.
28
- #
29
- # Location descriptors can be one of the following:
30
- #
31
- # (A) 1. A single base;
32
- #
33
- # (B) 2. A contiguous span of bases;
34
- #
35
- # (C) 3. A site between two bases;
36
- #
37
- # (D) 4. A single base chosen from a range of bases;
38
- #
39
- # (E) 5. A single base chosen from among two or more specified bases;
40
- #
41
- # (F) 6. A joining of sequence spans;
42
- #
43
- # (G) 7. A reference to an entry other than the one to which the feature
44
- # belongs (i.e., a remote entry), followed by a location descriptor
45
- # referring to the remote sequence;
46
- #
47
- # (H) 8. A literal sequence (a string of bases enclosed in quotation marks).
48
- #
49
- #
50
- # (C) A site between two residues, such as an endonuclease cleavage site, is
51
- # indicated by listing the two bases separated by a carat (e.g., 23^24).
52
- #
53
- # (D) A single residue chosen from a range of residues is indicated by the
54
- # number of the first and last bases in the range separated by a single
55
- # period (e.g., 23.79). The symbols < and > indicate that the end point
56
- # (I) of the range is beyond the specified base number.
57
- #
58
- # (B) A contiguous span of bases is indicated by the number of the first and
59
- # last bases in the range separated by two periods (e.g., 23..79). The
60
- # (I) symbols < and > indicate that the end point of the range is beyond the
61
- # specified base number. Starting and ending positions can be indicated
62
- # by base number or by one of the operators described below.
63
- #
64
- # Operators are prefixes that specify what must be done to the indicated
65
- # sequence to locate the feature. The following are the operators
66
- # available, along with their most common format and a description.
67
- #
68
- # (J) complement (location): The feature is complementary to the location
69
- # indicated. Complementary strands are read 5 to 3.
70
- #
71
- # (F) join (location, location, .. location): The indicated elements should
72
- # be placed end to end to form one contiguous sequence.
73
- #
74
- # (F) order (location, location, .. location): The elements are found in the
75
- # specified order in the 5 to 3 direction, but nothing is implied about
76
- # the rationality of joining them.
77
- #
78
- # (F) group (location, location, .. location): The elements are related and
79
- # should be grouped together, but no order is implied.
80
- #
81
- # (E) one-of (location, location, .. location): The element can be any one,
82
- # but only one, of the items listed.
83
- #
84
- # === Reduction strategy of the position notations
85
- #
86
- # (A) Location n
87
- #
88
- # (B) Location n..m
89
- #
90
- # (C) Location n^m
91
- #
92
- # (D) (n.m) => Location n
93
- #
94
- # (E) one-of(n,m,..) => Location n
95
- # one-of(n..m,..) => Location n..m
96
- #
97
- # (F) order(loc,loc,..) => join(loc, loc,..)
98
- # group(loc,loc,..) => join(loc, loc,..)
99
- # join(loc,loc,..) => Sequence
100
- #
101
- # (G) ID:loc => Location with ID
102
- #
103
- # (H) "atgc" => Location only with Sequence
104
- #
105
- # (I) <n => Location n with lt flag
106
- # >n => Location n with gt flag
107
- # <n..m => Location n..m with lt flag
108
- # n..>m => Location n..m with gt flag
109
- # <n..>m => Location n..m with lt, gt flag
110
- #
111
- # (J) complement(loc) => Sequence
112
- #
113
- # (K) replace(loc, str) => Location with replacement Sequence
114
- #
115
- # === GenBank location examples
116
- #
117
- # (C) n^m
118
- #
119
- # * [AB015179] 754^755
120
- # * [AF179299] complement(53^54)
121
- # * [CELXOL1ES] replace(4480^4481,"")
122
- # * [ECOUW87] replace(4792^4793,"a")
123
- # * [APLPCII] replace(1905^1906,"acaaagacaccgccctacgcc")
124
- #
125
- # (D) (n.m)
126
- #
127
- # * [HACSODA] 157..(800.806)
128
- # * [HALSODB] (67.68)..(699.703)
129
- # * [AP001918] (45934.45974)..46135
130
- # * [BACSPOJ] <180..(731.761)
131
- # * [BBU17998] (88.89)..>1122
132
- # * [ECHTGA] complement((1700.1708)..(1715.1721))
133
- # * [ECPAP17] complement(<22..(255.275))
134
- # * [LPATOVGNS] complement((64.74)..1525)
135
- # * [PIP404CG] join((8298.8300)..10206,1..855)
136
- # * [BOVMHDQBY4] join(M30006.1:(392.467)..575,M30005.1:415..681,M30004.1:129..410,M30004.1:907..1017,521..534)
137
- # * [HUMMIC2A] replace((651.655)..(651.655),"")
138
- # * [HUMSOD102] order(L44135.1:(454.445)..>538,<1..181)
139
- #
140
- # (E) one-of
141
- #
142
- # * [ECU17136] one-of(898,900)..983
143
- # * [CELCYT1A] one-of(5971..6308,5971..6309)
144
- # * [DMU17742] 8050..one-of(10731,10758,10905,11242)
145
- # * [PFU27807] one-of(623,627,632)..one-of(628,633,637)
146
- # * [BTBAINH1] one-of(845,953,963,1078,1104)..1354
147
- # * [ATU39449] join(one-of(969..1094,970..1094,995..1094,1018..1094),1518..1587,1726..2119,2220..2833,2945..3215)
148
- #
149
- # (F) join, order, group
150
- #
151
- # * [AB037374S2] join(AB037374.1:1..177,1..807)
152
- # * [AP000001] join(complement(1..61),complement(AP000007.1:252907..253505))
153
- # * [ASNOS11] join(AF130124.1:<2563..2964,AF130125.1:21..157,AF130126.1:12..174,AF130127.1:21..112,AF130128.1:21..162,AF130128.1:281..595,AF130128.1:661..842,AF130128.1:916..1030,AF130129.1:21..115,AF130130.1:21..165,AF130131.1:21..125,AF130132.1:21..428,AF130132.1:492..746,AF130133.1:21..168,AF130133.1:232..401,AF130133.1:475..906,AF130133.1:970..1107,AF130133.1:1176..1367,21..>128)
154
- #
155
- # * [AARPOB2] order(AF194507.1:<1..510,1..>871)
156
- # * [AF006691] order(912..1918,20410..21416)
157
- # * [AF024666] order(complement(18919..19224),complement(13965..14892))
158
- # * [AF264948] order(27066..27076,27089..27099,27283..27314,27330..27352)
159
- # * [D63363] order(3..26,complement(964..987))
160
- # * [ECOCURLI2] order(complement(1009..>1260),complement(AF081827.1:<1..177))
161
- # * [S72388S2] order(join(S72388.1:757..911,S72388.1:609..1542),1..>139)
162
- # * [HEYRRE07] order(complement(1..38),complement(M82666.1:1..140),complement(M82665.1:1..176),complement(M82664.1:1..215),complement(M82663.1:1..185),complement(M82662.1:1..49),complement(M82661.1:1..133))
163
- # * [COL11A1G34] order(AF101079.1:558..1307,AF101080.1:1..749,AF101081.1:1..898,AF101082.1:1..486,AF101083.1:1..942,AF101084.1:1..1734,AF101085.1:1..2385,AF101086.1:1..1813,AF101087.1:1..2287,AF101088.1:1..1073,AF101089.1:1..989,AF101090.1:1..5017,AF101091.1:1..3401,AF101092.1:1..1225,AF101093.1:1..1072,AF101094.1:1..989,AF101095.1:1..1669,AF101096.1:1..918,AF101097.1:1..1114,AF101098.1:1..1074,AF101099.1:1..1709,AF101100.1:1..986,AF101101.1:1..1934,AF101102.1:1..1699,AF101103.1:1..940,AF101104.1:1..2330,AF101105.1:1..4467,AF101106.1:1..1876,AF101107.1:1..2465,AF101108.1:1..1150,AF101109.1:1..1170,AF101110.1:1..1158,AF101111.1:1..1193,1..611)
164
- #
165
- # group() are found in the COMMENT field only (in GenBank 122.0)
166
- #
167
- # gbpat2.seq: FT repeat_region group(598..606,611..619)
168
- # gbpat2.seq: FT repeat_region group(8..16,1457..1464).
169
- # gbpat2.seq: FT variation group(t1,t2)
170
- # gbpat2.seq: FT variation group(t1,t3)
171
- # gbpat2.seq: FT variation group(t1,t2,t3)
172
- # gbpat2.seq: FT repeat_region group(11..202,203..394)
173
- # gbpri9.seq:COMMENT Residues reported = 'group(1..2145);'.
174
- #
175
- # (G) ID:location
176
- #
177
- # * [AARPOB2] order(AF194507.1:<1..510,1..>871)
178
- # * [AF178221S4] join(AF178221.1:<1..60,AF178222.1:1..63,AF178223.1:1..42,1..>90)
179
- # * [BOVMHDQBY4] join(M30006.1:(392.467)..575,M30005.1:415..681,M30004.1:129..410,M30004.1:907..1017,521..534)
180
- # * [HUMSOD102] order(L44135.1:(454.445)..>538,<1..181)
181
- # * [SL16SRRN1] order(<1..>267,X67092.1:<1..>249,X67093.1:<1..>233)
182
- #
183
- # (I) <, >
184
- #
185
- # * [A5U48871] <1..>318
186
- # * [AA23SRRNP] <1..388
187
- # * [AA23SRRNP] 503..>1010
188
- # * [AAM5961] complement(<1..229)
189
- # * [AAM5961] complement(5231..>5598)
190
- # * [AF043934] join(<1,60..99,161..241,302..370,436..594,676..887,993..1141,1209..1329,1387..1559,1626..1646,1708..>1843)
191
- # * [BACSPOJ] <180..(731.761)
192
- # * [BBU17998] (88.89)..>1122
193
- # * [AARPOB2] order(AF194507.1:<1..510,1..>871)
194
- # * [SL16SRRN1] order(<1..>267,X67092.1:<1..>249,X67093.1:<1..>233)
195
- #
196
- # (J) complement
197
- #
198
- # * [AF179299] complement(53^54) <= hoge insertion site etc.
199
- # * [AP000001] join(complement(1..61),complement(AP000007.1:252907..253505))
200
- # * [AF209868S2] order(complement(1..>308),complement(AF209868.1:75..336))
201
- # * [AP000001] join(complement(1..61),complement(AP000007.1:252907..253505))
202
- # * [CPPLCG] complement(<1..(1093.1098))
203
- # * [D63363] order(3..26,complement(964..987))
204
- # * [ECHTGA] complement((1700.1708)..(1715.1721))
205
- # * [ECOUXW] order(complement(1658..1663),complement(1636..1641))
206
- # * [LPATOVGNS] complement((64.74)..1525)
207
- # * [AF129075] complement(join(71606..71829,75327..75446,76039..76203,76282..76353,76914..77029,77114..77201,77276..77342,78138..78316,79755..79892,81501..81562,81676..81856,82341..82490,84208..84287,85032..85122,88316..88403))
208
- # * [ZFDYST2] join(AF137145.1:<1..18,complement(<1..99))
209
- #
210
- # (K) replace
211
- #
212
- # * [CSU27710] replace(64,"A")
213
- # * [CELXOL1ES] replace(5256,"t")
214
- # * [ANICPC] replace(1..468,"")
215
- # * [CSU27710] replace(67..68,"GC")
216
- # * [CELXOL1ES] replace(4480^4481,"") <= ? only one case in GenBank 122.0
217
- # * [ECOUW87] replace(4792^4793,"a")
218
- # * [CEU34893] replace(1..22,"ggttttaacccagttactcaag")
219
- # * [APLPCII] replace(1905^1906,"acaaagacaccgccctacgcc")
220
- # * [MBDR3S1] replace(1400..>9281,"")
221
- # * [HUMMHDPB1F] replace(complement(36..37),"ttc")
222
- # * [HUMMIC2A] replace((651.655)..(651.655),"")
223
- # * [LEIMDRPGP] replace(1..1554,"L01572")
224
- # * [TRBND3] replace(376..395,"atttgtgtgtggtaatta")
225
- # * [TRBND3] replace(376..395,"atttgtgtgggtaatttta")
226
- # * [TRBND3] replace(376..395,"attttgttgttgttttgttttgaatta")
227
- # * [TRBND3] replace(376..395,"atgtgtggtgaatta")
228
- # * [TRBND3] replace(376..395,"atgtgtgtggtaatta")
229
- # * [TRBND3] replace(376..395,"gatttgttgtggtaatttta")
230
- # * [MSU09460] replace(193, <= replace(193, "t")
231
- # * [HUMMAGE12X] replace(3002..3003, <= replace(3002..3003, "GC")
232
- # * [ADR40FIB] replace(510..520, <= replace(510..520, "taatcctaccg")
233
- # * [RATDYIIAAB] replace(1306..1443,"aagaacatccacggagtcagaactgggctcttcacgccggatttggcgttcgaggccattgtgaaaaagcaggcaatgcaccagcaagctcagttcctacccctgcgtggacctggttatccaggagctaatcagtacagttaggtggtcaagctgaaagagccctgtctgaaa")
234
- #
235
- #--
10
+
11
+ module Bio
12
+
13
+ # == Description
236
14
  #
237
- # This library is free software; you can redistribute it and/or
238
- # modify it under the terms of the GNU Lesser General Public
239
- # License as published by the Free Software Foundation; either
240
- # version 2 of the License, or (at your option) any later version.
15
+ # The Bio::Location class describes the position of a genomic locus.
16
+ # Typically, Bio::Location objects are created automatically when the
17
+ # user creates a Bio::Locations object, instead of initialized directly.
241
18
  #
242
- # This library is distributed in the hope that it will be useful,
243
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
244
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
245
- # Lesser General Public License for more details.
19
+ # == Usage
246
20
  #
247
- # You should have received a copy of the GNU Lesser General Public
248
- # License along with this library; if not, write to the Free Software
249
- # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21
+ # location = Bio::Location.new('500..550')
22
+ # puts "start=" + location.from.to_s + ";end=" + location.to.to_s
250
23
  #
251
- #++
24
+ # #, or better: through Bio::Locations
25
+ # locations = Bio::Locations.new('500..550')
26
+ # locations.each do |location|
27
+ # puts "start=" + location.from.to_s + ";end=" + location.to.to_s
28
+ # end
252
29
  #
253
-
254
- module Bio
255
-
256
30
  class Location
257
31
 
258
- # Pass a range of the 'location' segment. The 'location' segment can be
259
- # 'ID:' + ('n' or 'n..m' or 'n^m' or "seq") with '<' or '>'.
32
+ include Comparable
33
+
34
+ # Parses a'location' segment, which can be 'ID:' + ('n' or 'n..m' or 'n^m'
35
+ # or "seq") with '<' or '>', and returns a Bio::Location object.
36
+ #
37
+ # location = Bio::Location.new('500..550')
38
+ #
39
+ # ---
40
+ # *Arguments*:
41
+ # * (required) _str_: GenBank style position string (see Bio::Locations
42
+ # documentation)
43
+ # *Returns*:: the Bio::Location object
260
44
  def initialize(location = nil)
261
45
 
262
46
  if location
@@ -273,7 +57,7 @@ class Location
273
57
 
274
58
  # s : start base, e : end base => from, to
275
59
  case location
276
- when /^[<>]?(\d+)$/ # (A, I) n
60
+ when /^[<>]?(\d+)$/ # (A, I) n
277
61
  s = e = $1.to_i
278
62
  when /^[<>]?(\d+)\.\.[<>]?(\d+)$/ # (B, I) n..m
279
63
  s = $1.to_i
@@ -310,66 +94,252 @@ class Location
310
94
 
311
95
  attr_accessor :from, :to, :strand, :sequence, :lt, :gt, :xref_id
312
96
 
313
- # Complement the sequence from outside.
97
+ # Complements the sequence (i.e. alternates the strand).
98
+ # ---
99
+ # *Returns*:: the Bio::Location object
314
100
  def complement
315
101
  @strand *= -1
316
102
  self # return Location object
317
103
  end
318
104
 
319
- # Replace the sequence from outside.
105
+ # Replaces the sequence of the location.
106
+ # ---
107
+ # *Arguments*:
108
+ # * (required) _sequence_: sequence to be used to replace the sequence
109
+ # at the location
110
+ # *Returns*:: the Bio::Location object
320
111
  def replace(sequence)
321
- @sequence = sequence.downcase
112
+ @sequence = sequence.downcase
322
113
  self # return Location object
323
114
  end
324
115
 
325
- # Returns a range (from..to) of the segment as a Range object.
116
+ # Returns the range (from..to) of the location as a Range object.
326
117
  def range
327
118
  @from..@to
328
119
  end
329
120
 
330
- end # class location
121
+ # Check where a Bio::Location object is located compared to another
122
+ # Bio::Location object (mainly to facilitate the use of Comparable).
123
+ # A location A is upstream of location B if the start position of
124
+ # location A is smaller than the start position of location B. If
125
+ # they're the same, the end positions are checked.
126
+ # ---
127
+ # *Arguments*:
128
+ # * (required) _other location_: a Bio::Location object
129
+ # *Returns*::
130
+ # * 1 if self < other location
131
+ # * -1 if self > other location
132
+ # * 0 if both location are the same
133
+ # * nil if the argument is not a Bio::Location object
134
+ def <=>(other)
135
+ if ! other.kind_of?(Bio::Location)
136
+ return nil
137
+ end
331
138
 
139
+ if @from.to_f < other.from.to_f
140
+ return -1
141
+ elsif @from.to_f > other.from.to_f
142
+ return 1
143
+ end
144
+
145
+ if @to.to_f < other.to.to_f
146
+ return -1
147
+ elsif @to.to_f > other.to.to_f
148
+ return 1
149
+ end
150
+ return 0
151
+ end
332
152
 
153
+ end # Location
154
+
155
+ # == Description
156
+ #
157
+ # The Bio::Locations class is a container for Bio::Location objects:
158
+ # creating a Bio::Locations object (based on a GenBank style position string)
159
+ # will spawn an array of Bio::Location objects.
160
+ #
161
+ # == Usage
162
+ #
163
+ # locations = Bio::Locations.new('join(complement(500..550), 600..625)')
164
+ # locations.each do |loc|
165
+ # puts "class = " + loc.class.to_s
166
+ # puts "range = #{loc.from}..#{loc.to} (strand = #{loc.strand})"
167
+ # end
168
+ # # Output would be:
169
+ # # class = Bio::Location
170
+ # # range = 500..550 (strand = -1)
171
+ # # class = Bio::Location
172
+ # # range = 600..625 (strand = 1)
173
+ #
174
+ # # For the following three location strings, print the span and range
175
+ # ['one-of(898,900)..983',
176
+ # 'one-of(5971..6308,5971..6309)',
177
+ # '8050..one-of(10731,10758,10905,11242)'].each do |loc|
178
+ # location = Bio::Locations.new(loc)
179
+ # puts location.span
180
+ # puts location.range
181
+ # end
182
+ #
183
+ # === GenBank location descriptor classification
184
+ #
185
+ # ==== Definition of the position notation of the GenBank location format
186
+ #
187
+ # According to the GenBank manual 'gbrel.txt', position notations were
188
+ # classified into 10 patterns - (A) to (J).
189
+ #
190
+ # 3.4.12.2 Feature Location
191
+ #
192
+ # The second column of the feature descriptor line designates the
193
+ # location of the feature in the sequence. The location descriptor
194
+ # begins at position 22. Several conventions are used to indicate
195
+ # sequence location.
196
+ #
197
+ # Base numbers in location descriptors refer to numbering in the entry,
198
+ # which is not necessarily the same as the numbering scheme used in the
199
+ # published report. The first base in the presented sequence is numbered
200
+ # base 1. Sequences are presented in the 5 to 3 direction.
201
+ #
202
+ # Location descriptors can be one of the following:
203
+ #
204
+ # (A) 1. A single base;
205
+ #
206
+ # (B) 2. A contiguous span of bases;
207
+ #
208
+ # (C) 3. A site between two bases;
209
+ #
210
+ # (D) 4. A single base chosen from a range of bases;
211
+ #
212
+ # (E) 5. A single base chosen from among two or more specified bases;
213
+ #
214
+ # (F) 6. A joining of sequence spans;
215
+ #
216
+ # (G) 7. A reference to an entry other than the one to which the feature
217
+ # belongs (i.e., a remote entry), followed by a location descriptor
218
+ # referring to the remote sequence;
219
+ #
220
+ # (H) 8. A literal sequence (a string of bases enclosed in quotation marks).
221
+ #
222
+ # ==== Description commented with pattern IDs.
223
+ #
224
+ # (C) A site between two residues, such as an endonuclease cleavage site, is
225
+ # indicated by listing the two bases separated by a carat (e.g., 23^24).
226
+ #
227
+ # (D) A single residue chosen from a range of residues is indicated by the
228
+ # number of the first and last bases in the range separated by a single
229
+ # period (e.g., 23.79). The symbols < and > indicate that the end point
230
+ # (I) of the range is beyond the specified base number.
231
+ #
232
+ # (B) A contiguous span of bases is indicated by the number of the first and
233
+ # last bases in the range separated by two periods (e.g., 23..79). The
234
+ # (I) symbols < and > indicate that the end point of the range is beyond the
235
+ # specified base number. Starting and ending positions can be indicated
236
+ # by base number or by one of the operators described below.
237
+ #
238
+ # Operators are prefixes that specify what must be done to the indicated
239
+ # sequence to locate the feature. The following are the operators
240
+ # available, along with their most common format and a description.
241
+ #
242
+ # (J) complement (location): The feature is complementary to the location
243
+ # indicated. Complementary strands are read 5 to 3.
244
+ #
245
+ # (F) join (location, location, .. location): The indicated elements should
246
+ # be placed end to end to form one contiguous sequence.
247
+ #
248
+ # (F) order (location, location, .. location): The elements are found in the
249
+ # specified order in the 5 to 3 direction, but nothing is implied about
250
+ # the rationality of joining them.
251
+ #
252
+ # (F) group (location, location, .. location): The elements are related and
253
+ # should be grouped together, but no order is implied.
254
+ #
255
+ # (E) one-of (location, location, .. location): The element can be any one,
256
+ # but only one, of the items listed.
257
+ #
258
+ # === Reduction strategy of the position notations
259
+ #
260
+ # * (A) Location n
261
+ # * (B) Location n..m
262
+ # * (C) Location n^m
263
+ # * (D) (n.m) => Location n
264
+ # * (E)
265
+ # * one-of(n,m,..) => Location n
266
+ # * one-of(n..m,..) => Location n..m
267
+ # * (F)
268
+ # * order(loc,loc,..) => join(loc, loc,..)
269
+ # * group(loc,loc,..) => join(loc, loc,..)
270
+ # * join(loc,loc,..) => Sequence
271
+ # * (G) ID:loc => Location with ID
272
+ # * (H) "atgc" => Location only with Sequence
273
+ # * (I)
274
+ # * <n => Location n with lt flag
275
+ # * >n => Location n with gt flag
276
+ # * <n..m => Location n..m with lt flag
277
+ # * n..>m => Location n..m with gt flag
278
+ # * <n..>m => Location n..m with lt, gt flag
279
+ # * (J) complement(loc) => Sequence
280
+ # * (K) replace(loc, str) => Location with replacement Sequence
281
+ #
333
282
  class Locations
334
283
 
335
284
  include Enumerable
336
285
 
337
- # Parse a GenBank style position string and returns a Locations object,
338
- # which contains a list of Location objects.
286
+ # Parses a GenBank style position string and returns a Bio::Locations
287
+ # object, which contains a list of Bio::Location objects.
288
+ #
289
+ # locations = Bio::Locations.new('join(complement(500..550), 600..625)')
290
+ #
291
+ # ---
292
+ # *Arguments*:
293
+ # * (required) _str_: GenBank style position string
294
+ # *Returns*:: Bio::Locations object
339
295
  def initialize(position)
340
296
  if position.is_a? Array
341
297
  @locations = position
342
298
  else
343
299
  position = gbl_cleanup(position) # preprocessing
344
- @locations = gbl_pos2loc(position) # create an Array of Location
300
+ @locations = gbl_pos2loc(position) # create an Array of Bio::Location objects
345
301
  end
346
302
  end
303
+
304
+ # An Array of Bio::Location objects
347
305
  attr_accessor :locations
348
306
 
349
- # Iterates on each Location object.
307
+ # Evaluate equality of Bio::Locations object.
308
+ def equals?(other)
309
+ if ! other.kind_of?(Bio::Locations)
310
+ return nil
311
+ end
312
+ if self.sort == other.sort
313
+ return true
314
+ else
315
+ return false
316
+ end
317
+ end
318
+
319
+ # Iterates on each Bio::Location object.
350
320
  def each
351
321
  @locations.each do |x|
352
322
  yield(x)
353
323
  end
354
324
  end
355
325
 
356
- # Returns nth Location object.
326
+ # Returns nth Bio::Location object.
357
327
  def [](n)
358
328
  @locations[n]
359
329
  end
360
330
 
361
- # Returns first Location object.
331
+ # Returns first Bio::Location object.
362
332
  def first
363
333
  @locations.first
364
334
  end
365
335
 
366
- # Returns last Location object.
336
+ # Returns last Bio::Location object.
367
337
  def last
368
338
  @locations.last
369
339
  end
370
340
 
371
341
  # Returns an Array containing overall min and max position [min, max]
372
- # of this Locations object.
342
+ # of this Bio::Locations object.
373
343
  def span
374
344
  span_min = @locations.min { |a,b| a.from <=> b.from }
375
345
  span_max = @locations.max { |a,b| a.to <=> b.to }
@@ -396,9 +366,22 @@ class Locations
396
366
  end
397
367
  alias size length
398
368
 
399
- # Convert absolute position in DNA (na) to relative position in RNA (na).
400
- # If type == :aa,
401
- # convert absolute position in DNA (na) to relative position in Protein (aa).
369
+ # Converts absolute position in the whole of the DNA sequence to relative
370
+ # position in the locus.
371
+ #
372
+ # This method can for example be used to relate positions in a DNA-sequence
373
+ # with those in RNA. In this use, the optional ':aa'-flag returns the
374
+ # position of the associated amino-acid rather than the nucleotide.
375
+ #
376
+ # loc = Bio::Locations.new('complement(12838..13533)')
377
+ # puts loc.relative(13524) # => 10
378
+ # puts loc.relative(13506, :aa) # => 3
379
+ #
380
+ # ---
381
+ # *Arguments*:
382
+ # * (required) _position_: nucleotide position within whole of the sequence
383
+ # * _:aa_: flag that lets method return position in aminoacid coordinates
384
+ # *Returns*:: position within the location
402
385
  def relative(n, type = nil)
403
386
  case type
404
387
  when :location
@@ -414,18 +397,23 @@ class Locations
414
397
  end
415
398
  end
416
399
 
417
- # Convert relative position in RNA (na) to absolute position in DNA (na).
418
- # If type == :aa,
419
- # convert relative position in Protein (aa) -> absolute position in DNA (na).
420
- #
421
- # * Examples
400
+ # Converts relative position in the locus to position in the whole of the
401
+ # DNA sequence.
402
+ #
403
+ # This method can for example be used to relate positions in a DNA-sequence
404
+ # with those in RNA. In this use, the optional ':aa'-flag returns the
405
+ # position of the associated amino-acid rather than the nucleotide.
422
406
  #
423
- # loc = Bio::Locations.new('complement(12838..13533)')
424
- # loc.absolute(10) #=> 13524 (rel2abs)
425
- # loc.relative(13524) #=> 10 (abs2rel)
426
- # loc.absolute(10, :aa) #=> 13506 (rel2abs)
427
- # loc.relative(13506, :aa) #=> 10 (abs2rel)
407
+ # loc = Bio::Locations.new('complement(12838..13533)')
408
+ # puts loc.absolute(10) # => 13524
409
+ # puts loc.absolute(10, :aa) # => 13506
428
410
  #
411
+ # ---
412
+ # *Arguments*:
413
+ # * (required) _position_: nucleotide position within locus
414
+ # * _:aa_: flag to be used if _position_ is a aminoacid position rather than
415
+ # a nucleotide position
416
+ # *Returns*:: position within the whole of the sequence
429
417
  def absolute(n, type = nil)
430
418
  case type
431
419
  when :location
@@ -452,9 +440,9 @@ class Locations
452
440
  # <match> $1 ( $2 $3 not )
453
441
  position.gsub!(/(\.{2})?\(?([<>\d]+)\.([<>\d]+)(?!:)\)?/) do |match|
454
442
  if $1
455
- $1 + $3 # ..(n.m) => ..m
443
+ $1 + $3 # ..(n.m) => ..m
456
444
  else
457
- $2 # (?n.m)? => n
445
+ $2 # (?n.m)? => n
458
446
  end
459
447
  end
460
448
 
@@ -462,9 +450,9 @@ class Locations
462
450
  # <match> .. one-of ($2 ,$3 )
463
451
  position.gsub!(/(\.{2})?one-of\(([^,]+),([^)]+)\)/) do |match|
464
452
  if $1
465
- $1 + $3.gsub(/.*,(.*)/, '\1') # ..one-of(n,m) => ..m
453
+ $1 + $3.gsub(/.*,(.*)/, '\1') # ..one-of(n,m) => ..m
466
454
  else
467
- $2 # one-of(n,m) => n
455
+ $2 # one-of(n,m) => n
468
456
  end
469
457
  end
470
458
 
@@ -514,7 +502,7 @@ class Locations
514
502
  ary << gbl_pos2loc(position)
515
503
  end
516
504
 
517
- when /^complement\((.*)\)$/ # (J) complement()
505
+ when /^complement\((.*)\)$/ # (J) complement()
518
506
  position = $1
519
507
  gbl_pos2loc(position).reverse_each do |location|
520
508
  ary << location.complement
@@ -579,17 +567,144 @@ class Locations
579
567
  end
580
568
  end
581
569
  end
582
- return nil # out of range
570
+ return nil # out of range
583
571
  end
584
572
 
585
- end # class Locations
573
+ end # Locations
586
574
 
587
- end # module Bio
575
+ end # Bio
588
576
 
589
577
 
578
+
579
+ # === GenBank location examples
580
+ #
581
+ # (C) n^m
582
+ #
583
+ # * [AB015179] 754^755
584
+ # * [AF179299] complement(53^54)
585
+ # * [CELXOL1ES] replace(4480^4481,"")
586
+ # * [ECOUW87] replace(4792^4793,"a")
587
+ # * [APLPCII] replace(1905^1906,"acaaagacaccgccctacgcc")
588
+ #
589
+ # (D) (n.m)
590
+ #
591
+ # * [HACSODA] 157..(800.806)
592
+ # * [HALSODB] (67.68)..(699.703)
593
+ # * [AP001918] (45934.45974)..46135
594
+ # * [BACSPOJ] <180..(731.761)
595
+ # * [BBU17998] (88.89)..>1122
596
+ # * [ECHTGA] complement((1700.1708)..(1715.1721))
597
+ # * [ECPAP17] complement(<22..(255.275))
598
+ # * [LPATOVGNS] complement((64.74)..1525)
599
+ # * [PIP404CG] join((8298.8300)..10206,1..855)
600
+ # * [BOVMHDQBY4] join(M30006.1:(392.467)..575,M30005.1:415..681,M30004.1:129..410,M30004.1:907..1017,521..534)
601
+ # * [HUMMIC2A] replace((651.655)..(651.655),"")
602
+ # * [HUMSOD102] order(L44135.1:(454.445)..>538,<1..181)
603
+ #
604
+ # (E) one-of
605
+ #
606
+ # * [ECU17136] one-of(898,900)..983
607
+ # * [CELCYT1A] one-of(5971..6308,5971..6309)
608
+ # * [DMU17742] 8050..one-of(10731,10758,10905,11242)
609
+ # * [PFU27807] one-of(623,627,632)..one-of(628,633,637)
610
+ # * [BTBAINH1] one-of(845,953,963,1078,1104)..1354
611
+ # * [ATU39449] join(one-of(969..1094,970..1094,995..1094,1018..1094),1518..1587,1726..2119,2220..2833,2945..3215)
612
+ #
613
+ # (F) join, order, group
614
+ #
615
+ # * [AB037374S2] join(AB037374.1:1..177,1..807)
616
+ # * [AP000001] join(complement(1..61),complement(AP000007.1:252907..253505))
617
+ # * [ASNOS11] join(AF130124.1:<2563..2964,AF130125.1:21..157,AF130126.1:12..174,AF130127.1:21..112,AF130128.1:21..162,AF130128.1:281..595,AF130128.1:661..842,AF130128.1:916..1030,AF130129.1:21..115,AF130130.1:21..165,AF130131.1:21..125,AF130132.1:21..428,AF130132.1:492..746,AF130133.1:21..168,AF130133.1:232..401,AF130133.1:475..906,AF130133.1:970..1107,AF130133.1:1176..1367,21..>128)
618
+ #
619
+ # * [AARPOB2] order(AF194507.1:<1..510,1..>871)
620
+ # * [AF006691] order(912..1918,20410..21416)
621
+ # * [AF024666] order(complement(18919..19224),complement(13965..14892))
622
+ # * [AF264948] order(27066..27076,27089..27099,27283..27314,27330..27352)
623
+ # * [D63363] order(3..26,complement(964..987))
624
+ # * [ECOCURLI2] order(complement(1009..>1260),complement(AF081827.1:<1..177))
625
+ # * [S72388S2] order(join(S72388.1:757..911,S72388.1:609..1542),1..>139)
626
+ # * [HEYRRE07] order(complement(1..38),complement(M82666.1:1..140),complement(M82665.1:1..176),complement(M82664.1:1..215),complement(M82663.1:1..185),complement(M82662.1:1..49),complement(M82661.1:1..133))
627
+ # * [COL11A1G34] order(AF101079.1:558..1307,AF101080.1:1..749,AF101081.1:1..898,AF101082.1:1..486,AF101083.1:1..942,AF101084.1:1..1734,AF101085.1:1..2385,AF101086.1:1..1813,AF101087.1:1..2287,AF101088.1:1..1073,AF101089.1:1..989,AF101090.1:1..5017,AF101091.1:1..3401,AF101092.1:1..1225,AF101093.1:1..1072,AF101094.1:1..989,AF101095.1:1..1669,AF101096.1:1..918,AF101097.1:1..1114,AF101098.1:1..1074,AF101099.1:1..1709,AF101100.1:1..986,AF101101.1:1..1934,AF101102.1:1..1699,AF101103.1:1..940,AF101104.1:1..2330,AF101105.1:1..4467,AF101106.1:1..1876,AF101107.1:1..2465,AF101108.1:1..1150,AF101109.1:1..1170,AF101110.1:1..1158,AF101111.1:1..1193,1..611)
628
+ #
629
+ # group() are found in the COMMENT field only (in GenBank 122.0)
630
+ #
631
+ # gbpat2.seq: FT repeat_region group(598..606,611..619)
632
+ # gbpat2.seq: FT repeat_region group(8..16,1457..1464).
633
+ # gbpat2.seq: FT variation group(t1,t2)
634
+ # gbpat2.seq: FT variation group(t1,t3)
635
+ # gbpat2.seq: FT variation group(t1,t2,t3)
636
+ # gbpat2.seq: FT repeat_region group(11..202,203..394)
637
+ # gbpri9.seq:COMMENT Residues reported = 'group(1..2145);'.
638
+ #
639
+ # (G) ID:location
640
+ #
641
+ # * [AARPOB2] order(AF194507.1:<1..510,1..>871)
642
+ # * [AF178221S4] join(AF178221.1:<1..60,AF178222.1:1..63,AF178223.1:1..42,1..>90)
643
+ # * [BOVMHDQBY4] join(M30006.1:(392.467)..575,M30005.1:415..681,M30004.1:129..410,M30004.1:907..1017,521..534)
644
+ # * [HUMSOD102] order(L44135.1:(454.445)..>538,<1..181)
645
+ # * [SL16SRRN1] order(<1..>267,X67092.1:<1..>249,X67093.1:<1..>233)
646
+ #
647
+ # (I) <, >
648
+ #
649
+ # * [A5U48871] <1..>318
650
+ # * [AA23SRRNP] <1..388
651
+ # * [AA23SRRNP] 503..>1010
652
+ # * [AAM5961] complement(<1..229)
653
+ # * [AAM5961] complement(5231..>5598)
654
+ # * [AF043934] join(<1,60..99,161..241,302..370,436..594,676..887,993..1141,1209..1329,1387..1559,1626..1646,1708..>1843)
655
+ # * [BACSPOJ] <180..(731.761)
656
+ # * [BBU17998] (88.89)..>1122
657
+ # * [AARPOB2] order(AF194507.1:<1..510,1..>871)
658
+ # * [SL16SRRN1] order(<1..>267,X67092.1:<1..>249,X67093.1:<1..>233)
659
+ #
660
+ # (J) complement
661
+ #
662
+ # * [AF179299] complement(53^54) <= hoge insertion site etc.
663
+ # * [AP000001] join(complement(1..61),complement(AP000007.1:252907..253505))
664
+ # * [AF209868S2] order(complement(1..>308),complement(AF209868.1:75..336))
665
+ # * [AP000001] join(complement(1..61),complement(AP000007.1:252907..253505))
666
+ # * [CPPLCG] complement(<1..(1093.1098))
667
+ # * [D63363] order(3..26,complement(964..987))
668
+ # * [ECHTGA] complement((1700.1708)..(1715.1721))
669
+ # * [ECOUXW] order(complement(1658..1663),complement(1636..1641))
670
+ # * [LPATOVGNS] complement((64.74)..1525)
671
+ # * [AF129075] complement(join(71606..71829,75327..75446,76039..76203,76282..76353,76914..77029,77114..77201,77276..77342,78138..78316,79755..79892,81501..81562,81676..81856,82341..82490,84208..84287,85032..85122,88316..88403))
672
+ # * [ZFDYST2] join(AF137145.1:<1..18,complement(<1..99))
673
+ #
674
+ # (K) replace
675
+ #
676
+ # * [CSU27710] replace(64,"A")
677
+ # * [CELXOL1ES] replace(5256,"t")
678
+ # * [ANICPC] replace(1..468,"")
679
+ # * [CSU27710] replace(67..68,"GC")
680
+ # * [CELXOL1ES] replace(4480^4481,"") <= ? only one case in GenBank 122.0
681
+ # * [ECOUW87] replace(4792^4793,"a")
682
+ # * [CEU34893] replace(1..22,"ggttttaacccagttactcaag")
683
+ # * [APLPCII] replace(1905^1906,"acaaagacaccgccctacgcc")
684
+ # * [MBDR3S1] replace(1400..>9281,"")
685
+ # * [HUMMHDPB1F] replace(complement(36..37),"ttc")
686
+ # * [HUMMIC2A] replace((651.655)..(651.655),"")
687
+ # * [LEIMDRPGP] replace(1..1554,"L01572")
688
+ # * [TRBND3] replace(376..395,"atttgtgtgtggtaatta")
689
+ # * [TRBND3] replace(376..395,"atttgtgtgggtaatttta")
690
+ # * [TRBND3] replace(376..395,"attttgttgttgttttgttttgaatta")
691
+ # * [TRBND3] replace(376..395,"atgtgtggtgaatta")
692
+ # * [TRBND3] replace(376..395,"atgtgtgtggtaatta")
693
+ # * [TRBND3] replace(376..395,"gatttgttgtggtaatttta")
694
+ # * [MSU09460] replace(193, <= replace(193, "t")
695
+ # * [HUMMAGE12X] replace(3002..3003, <= replace(3002..3003, "GC")
696
+ # * [ADR40FIB] replace(510..520, <= replace(510..520, "taatcctaccg")
697
+ # * [RATDYIIAAB] replace(1306..1443,"aagaacatccacggagtcagaactgggctcttcacgccggatttggcgttcgaggccattgtgaaaaagcaggcaatgcaccagcaagctcagttcctacccctgcgtggacctggttatccaggagctaatcagtacagttaggtggtcaagctgaaagagccctgtctgaaa")
698
+ #
699
+
590
700
  if __FILE__ == $0
591
701
  puts "Test new & span methods"
592
702
  [
703
+ '450',
704
+ '500..600',
705
+ 'join(500..550, 600..625)',
706
+ 'complement(join(500..550, 600..625))',
707
+ 'join(complement(500..550), 600..625)',
593
708
  '754^755',
594
709
  'complement(53^54)',
595
710
  'replace(4792^4793,"a")',
@@ -617,9 +732,14 @@ if __FILE__ == $0
617
732
  '<200001..<318389',
618
733
  ].each do |pos|
619
734
  p pos
620
- p Bio::Locations.new(pos).span
621
- p Bio::Locations.new(pos).range
622
- p Bio::Locations.new(pos)
735
+ # p Bio::Locations.new(pos)
736
+ # p Bio::Locations.new(pos).span
737
+ # p Bio::Locations.new(pos).range
738
+ Bio::Locations.new(pos).each do |location|
739
+ puts "class=" + location.class.to_s
740
+ puts "start=" + location.from.to_s + "\tend=" + location.to.to_s + "\tstrand=" + location.strand.to_s
741
+ end
742
+
623
743
  end
624
744
 
625
745
  puts "Test rel2abs/abs2rel method"
@@ -646,5 +766,7 @@ if __FILE__ == $0
646
766
  print "pos : "; p pos
647
767
  print "`- loc[1] : "; p loc[1]
648
768
  print " `- range : "; p loc[1].range
769
+
770
+ puts Bio::Location.new('5').<=>(Bio::Location.new('3'))
649
771
  end
650
772