bio 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (340) hide show
  1. data/bin/bioruby +14 -122
  2. data/bin/br_biofetch.rb +2 -2
  3. data/bin/br_bioflat.rb +2 -2
  4. data/bin/br_biogetseq.rb +2 -2
  5. data/bin/br_pmfetch.rb +3 -3
  6. data/doc/Changes-0.7.rd +77 -0
  7. data/doc/KEGG_API.rd +523 -232
  8. data/doc/KEGG_API.rd.ja +529 -207
  9. data/doc/Tutorial.rd +48 -11
  10. data/lib/bio.rb +59 -6
  11. data/lib/bio/alignment.rb +713 -103
  12. data/lib/bio/appl/bl2seq/report.rb +2 -18
  13. data/lib/bio/appl/blast.rb +108 -91
  14. data/lib/bio/appl/blast/format0.rb +33 -18
  15. data/lib/bio/appl/blast/format8.rb +6 -20
  16. data/lib/bio/appl/blast/report.rb +293 -429
  17. data/lib/bio/appl/blast/rexml.rb +8 -22
  18. data/lib/bio/appl/blast/wublast.rb +21 -12
  19. data/lib/bio/appl/blast/xmlparser.rb +180 -183
  20. data/lib/bio/appl/blat/report.rb +127 -30
  21. data/lib/bio/appl/clustalw.rb +87 -59
  22. data/lib/bio/appl/clustalw/report.rb +20 -22
  23. data/lib/bio/appl/emboss.rb +113 -20
  24. data/lib/bio/appl/fasta.rb +173 -198
  25. data/lib/bio/appl/fasta/format10.rb +244 -347
  26. data/lib/bio/appl/gcg/msf.rb +212 -0
  27. data/lib/bio/appl/gcg/seq.rb +195 -0
  28. data/lib/bio/appl/genscan/report.rb +5 -23
  29. data/lib/bio/appl/hmmer.rb +8 -45
  30. data/lib/bio/appl/hmmer/report.rb +2 -20
  31. data/lib/bio/appl/iprscan/report.rb +374 -0
  32. data/lib/bio/appl/mafft.rb +87 -50
  33. data/lib/bio/appl/mafft/report.rb +151 -44
  34. data/lib/bio/appl/muscle.rb +52 -0
  35. data/lib/bio/appl/phylip/alignment.rb +129 -0
  36. data/lib/bio/appl/phylip/distance_matrix.rb +96 -0
  37. data/lib/bio/appl/probcons.rb +41 -0
  38. data/lib/bio/appl/psort.rb +89 -96
  39. data/lib/bio/appl/psort/report.rb +6 -22
  40. data/lib/bio/appl/pts1.rb +263 -0
  41. data/lib/bio/appl/sim4.rb +26 -36
  42. data/lib/bio/appl/sim4/report.rb +2 -18
  43. data/lib/bio/appl/sosui/report.rb +5 -20
  44. data/lib/bio/appl/spidey/report.rb +2 -2
  45. data/lib/bio/appl/targetp/report.rb +4 -20
  46. data/lib/bio/appl/tcoffee.rb +55 -0
  47. data/lib/bio/appl/tmhmm/report.rb +4 -20
  48. data/lib/bio/command.rb +235 -64
  49. data/lib/bio/data/aa.rb +21 -26
  50. data/lib/bio/data/codontable.rb +2 -20
  51. data/lib/bio/data/na.rb +19 -4
  52. data/lib/bio/db.rb +27 -12
  53. data/lib/bio/db/aaindex.rb +2 -20
  54. data/lib/bio/db/embl/common.rb +4 -21
  55. data/lib/bio/db/embl/embl.rb +33 -85
  56. data/lib/bio/db/embl/sptr.rb +612 -302
  57. data/lib/bio/db/embl/swissprot.rb +10 -29
  58. data/lib/bio/db/embl/trembl.rb +10 -29
  59. data/lib/bio/db/embl/uniprot.rb +10 -29
  60. data/lib/bio/db/fantom.rb +15 -20
  61. data/lib/bio/db/fasta.rb +3 -3
  62. data/lib/bio/db/genbank/common.rb +37 -46
  63. data/lib/bio/db/genbank/ddbj.rb +6 -18
  64. data/lib/bio/db/genbank/genbank.rb +47 -186
  65. data/lib/bio/db/genbank/genpept.rb +4 -17
  66. data/lib/bio/db/genbank/refseq.rb +4 -17
  67. data/lib/bio/db/gff.rb +103 -35
  68. data/lib/bio/db/go.rb +4 -20
  69. data/lib/bio/db/kegg/brite.rb +26 -36
  70. data/lib/bio/db/kegg/compound.rb +81 -85
  71. data/lib/bio/db/kegg/drug.rb +98 -0
  72. data/lib/bio/db/kegg/enzyme.rb +133 -110
  73. data/lib/bio/db/kegg/expression.rb +2 -20
  74. data/lib/bio/db/kegg/genes.rb +208 -238
  75. data/lib/bio/db/kegg/genome.rb +164 -285
  76. data/lib/bio/db/kegg/glycan.rb +114 -157
  77. data/lib/bio/db/kegg/keggtab.rb +242 -303
  78. data/lib/bio/db/kegg/kgml.rb +117 -160
  79. data/lib/bio/db/kegg/orthology.rb +112 -0
  80. data/lib/bio/db/kegg/reaction.rb +54 -69
  81. data/lib/bio/db/kegg/taxonomy.rb +331 -0
  82. data/lib/bio/db/lasergene.rb +209 -0
  83. data/lib/bio/db/litdb.rb +3 -27
  84. data/lib/bio/db/medline.rb +228 -249
  85. data/lib/bio/db/nbrf.rb +3 -3
  86. data/lib/bio/db/newick.rb +510 -0
  87. data/lib/bio/db/nexus.rb +1854 -0
  88. data/lib/bio/db/pdb.rb +5 -17
  89. data/lib/bio/db/pdb/atom.rb +2 -18
  90. data/lib/bio/db/pdb/chain.rb +2 -18
  91. data/lib/bio/db/pdb/chemicalcomponent.rb +2 -18
  92. data/lib/bio/db/pdb/model.rb +2 -18
  93. data/lib/bio/db/pdb/pdb.rb +73 -34
  94. data/lib/bio/db/pdb/residue.rb +4 -20
  95. data/lib/bio/db/pdb/utils.rb +2 -18
  96. data/lib/bio/db/prosite.rb +403 -422
  97. data/lib/bio/db/rebase.rb +84 -40
  98. data/lib/bio/db/soft.rb +404 -0
  99. data/lib/bio/db/transfac.rb +5 -17
  100. data/lib/bio/feature.rb +106 -52
  101. data/lib/bio/io/das.rb +32 -42
  102. data/lib/bio/io/dbget.rb +2 -20
  103. data/lib/bio/io/ddbjxml.rb +77 -138
  104. data/lib/bio/io/ebisoap.rb +158 -0
  105. data/lib/bio/io/ensembl.rb +229 -0
  106. data/lib/bio/io/fastacmd.rb +89 -82
  107. data/lib/bio/io/fetch.rb +163 -96
  108. data/lib/bio/io/flatfile.rb +170 -73
  109. data/lib/bio/io/flatfile/bdb.rb +3 -16
  110. data/lib/bio/io/flatfile/index.rb +2 -2
  111. data/lib/bio/io/flatfile/indexer.rb +3 -2
  112. data/lib/bio/io/higet.rb +12 -31
  113. data/lib/bio/io/keggapi.rb +210 -269
  114. data/lib/bio/io/ncbisoap.rb +155 -0
  115. data/lib/bio/io/pubmed.rb +169 -147
  116. data/lib/bio/io/registry.rb +4 -20
  117. data/lib/bio/io/soapwsdl.rb +43 -38
  118. data/lib/bio/io/sql.rb +242 -305
  119. data/lib/bio/location.rb +407 -285
  120. data/lib/bio/map.rb +410 -0
  121. data/lib/bio/pathway.rb +558 -695
  122. data/lib/bio/reference.rb +272 -75
  123. data/lib/bio/sequence.rb +255 -13
  124. data/lib/bio/sequence/aa.rb +71 -10
  125. data/lib/bio/sequence/common.rb +187 -33
  126. data/lib/bio/sequence/compat.rb +59 -4
  127. data/lib/bio/sequence/format.rb +54 -7
  128. data/lib/bio/sequence/generic.rb +3 -3
  129. data/lib/bio/sequence/na.rb +328 -26
  130. data/lib/bio/shell.rb +11 -4
  131. data/lib/bio/shell/core.rb +221 -160
  132. data/lib/bio/shell/demo.rb +18 -15
  133. data/lib/bio/shell/interface.rb +14 -12
  134. data/lib/bio/shell/irb.rb +95 -0
  135. data/lib/bio/shell/object.rb +45 -26
  136. data/lib/bio/shell/plugin/blast.rb +42 -0
  137. data/lib/bio/shell/plugin/codon.rb +22 -14
  138. data/lib/bio/shell/plugin/das.rb +58 -0
  139. data/lib/bio/shell/plugin/emboss.rb +2 -2
  140. data/lib/bio/shell/plugin/entry.rb +22 -11
  141. data/lib/bio/shell/plugin/flatfile.rb +2 -2
  142. data/lib/bio/shell/plugin/keggapi.rb +13 -6
  143. data/lib/bio/shell/plugin/midi.rb +4 -4
  144. data/lib/bio/shell/plugin/obda.rb +2 -2
  145. data/lib/bio/shell/plugin/psort.rb +56 -0
  146. data/lib/bio/shell/plugin/seq.rb +35 -8
  147. data/lib/bio/shell/plugin/soap.rb +87 -0
  148. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/bioruby_generator.rb +29 -0
  149. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_classes.rhtml +4 -0
  150. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_log.rhtml +27 -0
  151. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_methods.rhtml +11 -0
  152. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_modules.rhtml +4 -0
  153. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_variables.rhtml +7 -0
  154. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-bg.gif +0 -0
  155. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-console.png +0 -0
  156. data/lib/bio/shell/rails/{public/images/icon.png → vendor/plugins/generators/bioruby/templates/bioruby-gem.png} +0 -0
  157. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-link.gif +0 -0
  158. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby.css +369 -0
  159. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby.rhtml +47 -0
  160. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby_controller.rb +144 -0
  161. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby_helper.rb +47 -0
  162. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/commands.rhtml +8 -0
  163. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/history.rhtml +10 -0
  164. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/index.rhtml +22 -0
  165. data/lib/bio/shell/script.rb +25 -0
  166. data/lib/bio/shell/setup.rb +109 -0
  167. data/lib/bio/shell/web.rb +70 -58
  168. data/lib/bio/tree.rb +850 -0
  169. data/lib/bio/util/color_scheme.rb +84 -107
  170. data/lib/bio/util/color_scheme/buried.rb +5 -24
  171. data/lib/bio/util/color_scheme/helix.rb +5 -24
  172. data/lib/bio/util/color_scheme/hydropathy.rb +5 -24
  173. data/lib/bio/util/color_scheme/nucleotide.rb +5 -24
  174. data/lib/bio/util/color_scheme/strand.rb +5 -24
  175. data/lib/bio/util/color_scheme/taylor.rb +5 -24
  176. data/lib/bio/util/color_scheme/turn.rb +5 -24
  177. data/lib/bio/util/color_scheme/zappo.rb +5 -24
  178. data/lib/bio/util/contingency_table.rb +70 -43
  179. data/lib/bio/util/restriction_enzyme.rb +228 -0
  180. data/lib/bio/util/restriction_enzyme/analysis.rb +249 -0
  181. data/lib/bio/util/restriction_enzyme/analysis_basic.rb +217 -0
  182. data/lib/bio/util/restriction_enzyme/cut_symbol.rb +107 -0
  183. data/lib/bio/util/restriction_enzyme/double_stranded.rb +321 -0
  184. data/lib/bio/util/restriction_enzyme/double_stranded/aligned_strands.rb +130 -0
  185. data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair.rb +103 -0
  186. data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair_in_enzyme_notation.rb +38 -0
  187. data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations.rb +76 -0
  188. data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations_in_enzyme_notation.rb +107 -0
  189. data/lib/bio/util/restriction_enzyme/enzymes.yaml +7061 -0
  190. data/lib/bio/util/restriction_enzyme/range/cut_range.rb +24 -0
  191. data/lib/bio/util/restriction_enzyme/range/cut_ranges.rb +47 -0
  192. data/lib/bio/util/restriction_enzyme/range/horizontal_cut_range.rb +67 -0
  193. data/lib/bio/util/restriction_enzyme/range/sequence_range.rb +257 -0
  194. data/lib/bio/util/restriction_enzyme/range/sequence_range/calculated_cuts.rb +242 -0
  195. data/lib/bio/util/restriction_enzyme/range/sequence_range/fragment.rb +51 -0
  196. data/lib/bio/util/restriction_enzyme/range/sequence_range/fragments.rb +41 -0
  197. data/lib/bio/util/restriction_enzyme/range/vertical_cut_range.rb +77 -0
  198. data/lib/bio/util/restriction_enzyme/single_strand.rb +199 -0
  199. data/lib/bio/util/restriction_enzyme/single_strand/cut_locations_in_enzyme_notation.rb +135 -0
  200. data/lib/bio/util/restriction_enzyme/single_strand_complement.rb +23 -0
  201. data/lib/bio/util/restriction_enzyme/string_formatting.rb +111 -0
  202. data/lib/bio/util/sirna.rb +4 -22
  203. data/sample/color_scheme_na.rb +4 -12
  204. data/sample/enzymes.rb +78 -0
  205. data/sample/goslim.rb +5 -13
  206. data/sample/psortplot_html.rb +4 -12
  207. data/test/data/blast/2.2.15.blastp.m7 +876 -0
  208. data/test/data/embl/AB090716.embl.rel89 +63 -0
  209. data/test/data/fasta/example1.txt +75 -0
  210. data/test/data/fasta/example2.txt +21 -0
  211. data/test/data/iprscan/merged.raw +32 -0
  212. data/test/data/iprscan/merged.txt +74 -0
  213. data/test/data/soft/GDS100_partial.soft +92 -0
  214. data/test/data/soft/GSE3457_family_partial.soft +874 -0
  215. data/test/functional/bio/io/test_ensembl.rb +103 -0
  216. data/test/functional/bio/io/test_soapwsdl.rb +5 -17
  217. data/test/unit/bio/appl/bl2seq/test_report.rb +2 -2
  218. data/test/unit/bio/appl/blast/test_report.rb +3 -16
  219. data/test/unit/bio/appl/blast/test_xmlparser.rb +4 -16
  220. data/test/unit/bio/appl/genscan/test_report.rb +3 -16
  221. data/test/unit/bio/appl/hmmer/test_report.rb +3 -16
  222. data/test/unit/bio/appl/iprscan/test_report.rb +338 -0
  223. data/test/unit/bio/appl/mafft/test_report.rb +63 -0
  224. data/test/unit/bio/appl/sosui/test_report.rb +3 -16
  225. data/test/unit/bio/appl/targetp/test_report.rb +3 -16
  226. data/test/unit/bio/appl/test_blast.rb +3 -16
  227. data/test/unit/bio/appl/test_fasta.rb +4 -16
  228. data/test/unit/bio/appl/test_pts1.rb +140 -0
  229. data/test/unit/bio/appl/tmhmm/test_report.rb +3 -16
  230. data/test/unit/bio/data/test_aa.rb +4 -17
  231. data/test/unit/bio/data/test_codontable.rb +3 -16
  232. data/test/unit/bio/data/test_na.rb +3 -3
  233. data/test/unit/bio/db/embl/test_common.rb +3 -16
  234. data/test/unit/bio/db/embl/test_embl.rb +3 -16
  235. data/test/unit/bio/db/embl/test_embl_rel89.rb +219 -0
  236. data/test/unit/bio/db/embl/test_sptr.rb +1548 -41
  237. data/test/unit/bio/db/embl/test_uniprot.rb +3 -16
  238. data/test/unit/bio/db/kegg/test_genes.rb +3 -16
  239. data/test/unit/bio/db/pdb/test_pdb.rb +7 -24
  240. data/test/unit/bio/db/test_aaindex.rb +2 -2
  241. data/test/unit/bio/db/test_fasta.rb +3 -16
  242. data/test/unit/bio/db/test_gff.rb +3 -16
  243. data/test/unit/bio/db/test_lasergene.rb +95 -0
  244. data/test/unit/bio/db/test_newick.rb +56 -0
  245. data/test/unit/bio/db/test_nexus.rb +360 -0
  246. data/test/unit/bio/db/test_prosite.rb +5 -18
  247. data/test/unit/bio/db/test_rebase.rb +11 -25
  248. data/test/unit/bio/db/test_soft.rb +138 -0
  249. data/test/unit/bio/io/test_ddbjxml.rb +5 -17
  250. data/test/unit/bio/io/test_ensembl.rb +109 -0
  251. data/test/unit/bio/io/test_fastacmd.rb +3 -16
  252. data/test/unit/bio/io/test_flatfile.rb +237 -0
  253. data/test/unit/bio/io/test_soapwsdl.rb +4 -17
  254. data/test/unit/bio/sequence/test_aa.rb +3 -3
  255. data/test/unit/bio/sequence/test_common.rb +3 -16
  256. data/test/unit/bio/sequence/test_compat.rb +3 -16
  257. data/test/unit/bio/sequence/test_na.rb +29 -3
  258. data/test/unit/bio/shell/plugin/test_seq.rb +8 -8
  259. data/test/unit/bio/test_alignment.rb +16 -27
  260. data/test/unit/bio/test_command.rb +242 -25
  261. data/test/unit/bio/test_db.rb +3 -16
  262. data/test/unit/bio/test_feature.rb +4 -16
  263. data/test/unit/bio/test_location.rb +4 -16
  264. data/test/unit/bio/test_map.rb +230 -0
  265. data/test/unit/bio/test_pathway.rb +4 -16
  266. data/test/unit/bio/test_reference.rb +2 -2
  267. data/test/unit/bio/test_sequence.rb +7 -19
  268. data/test/unit/bio/test_shell.rb +3 -16
  269. data/test/unit/bio/test_tree.rb +593 -0
  270. data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +299 -0
  271. data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +103 -0
  272. data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +240 -0
  273. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +100 -0
  274. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +75 -0
  275. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +73 -0
  276. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +53 -0
  277. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +104 -0
  278. data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +83 -0
  279. data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +246 -0
  280. data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +44 -0
  281. data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +115 -0
  282. data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +147 -0
  283. data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +147 -0
  284. data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +60 -0
  285. data/test/unit/bio/util/test_color_scheme.rb +6 -18
  286. data/test/unit/bio/util/test_contingency_table.rb +6 -18
  287. data/test/unit/bio/util/test_restriction_enzyme.rb +42 -0
  288. data/test/unit/bio/util/test_sirna.rb +3 -16
  289. metadata +228 -169
  290. data/doc/BioRuby.rd.ja +0 -225
  291. data/doc/Design.rd.ja +0 -341
  292. data/doc/TODO.rd.ja +0 -138
  293. data/lib/bio/appl/fasta/format6.rb +0 -37
  294. data/lib/bio/db/kegg/cell.rb +0 -88
  295. data/lib/bio/db/kegg/ko.rb +0 -178
  296. data/lib/bio/shell/rails/Rakefile +0 -10
  297. data/lib/bio/shell/rails/app/controllers/application.rb +0 -4
  298. data/lib/bio/shell/rails/app/controllers/shell_controller.rb +0 -94
  299. data/lib/bio/shell/rails/app/helpers/application_helper.rb +0 -3
  300. data/lib/bio/shell/rails/app/models/shell_connection.rb +0 -30
  301. data/lib/bio/shell/rails/app/views/layouts/shell.rhtml +0 -37
  302. data/lib/bio/shell/rails/app/views/shell/history.rhtml +0 -5
  303. data/lib/bio/shell/rails/app/views/shell/index.rhtml +0 -2
  304. data/lib/bio/shell/rails/app/views/shell/show.rhtml +0 -13
  305. data/lib/bio/shell/rails/config/boot.rb +0 -19
  306. data/lib/bio/shell/rails/config/database.yml +0 -85
  307. data/lib/bio/shell/rails/config/environment.rb +0 -53
  308. data/lib/bio/shell/rails/config/environments/development.rb +0 -19
  309. data/lib/bio/shell/rails/config/environments/production.rb +0 -19
  310. data/lib/bio/shell/rails/config/environments/test.rb +0 -19
  311. data/lib/bio/shell/rails/config/routes.rb +0 -19
  312. data/lib/bio/shell/rails/doc/README_FOR_APP +0 -2
  313. data/lib/bio/shell/rails/public/404.html +0 -8
  314. data/lib/bio/shell/rails/public/500.html +0 -8
  315. data/lib/bio/shell/rails/public/dispatch.cgi +0 -10
  316. data/lib/bio/shell/rails/public/dispatch.fcgi +0 -24
  317. data/lib/bio/shell/rails/public/dispatch.rb +0 -10
  318. data/lib/bio/shell/rails/public/favicon.ico +0 -0
  319. data/lib/bio/shell/rails/public/images/rails.png +0 -0
  320. data/lib/bio/shell/rails/public/index.html +0 -277
  321. data/lib/bio/shell/rails/public/javascripts/controls.js +0 -750
  322. data/lib/bio/shell/rails/public/javascripts/dragdrop.js +0 -584
  323. data/lib/bio/shell/rails/public/javascripts/effects.js +0 -854
  324. data/lib/bio/shell/rails/public/javascripts/prototype.js +0 -1785
  325. data/lib/bio/shell/rails/public/robots.txt +0 -1
  326. data/lib/bio/shell/rails/public/stylesheets/main.css +0 -187
  327. data/lib/bio/shell/rails/script/about +0 -3
  328. data/lib/bio/shell/rails/script/breakpointer +0 -3
  329. data/lib/bio/shell/rails/script/console +0 -3
  330. data/lib/bio/shell/rails/script/destroy +0 -3
  331. data/lib/bio/shell/rails/script/generate +0 -3
  332. data/lib/bio/shell/rails/script/performance/benchmarker +0 -3
  333. data/lib/bio/shell/rails/script/performance/profiler +0 -3
  334. data/lib/bio/shell/rails/script/plugin +0 -3
  335. data/lib/bio/shell/rails/script/process/reaper +0 -3
  336. data/lib/bio/shell/rails/script/process/spawner +0 -3
  337. data/lib/bio/shell/rails/script/process/spinner +0 -3
  338. data/lib/bio/shell/rails/script/runner +0 -3
  339. data/lib/bio/shell/rails/script/server +0 -42
  340. data/lib/bio/shell/rails/test/test_helper.rb +0 -28
@@ -1,19 +1,33 @@
1
1
  #
2
- # = bio/db/rebase.rb - Interface for EMBOSS formatted REBASE files
2
+ # bio/db/rebase.rb - Interface for EMBOSS formatted REBASE files
3
3
  #
4
- # Copyright:: Copyright (C) 2005 Trevor Wennblom <trevor@corevx.com>
5
- # License:: LGPL
4
+ # Author:: Trevor Wennblom <mailto:trevor@corevx.com>
5
+ # Copyright:: Copyright (c) 2005-2007 Midwinter Laboratories, LLC (http://midwinterlabs.com)
6
+ # License:: The Ruby License
6
7
  #
7
- # $Id: rebase.rb,v 1.3 2006/02/27 13:22:05 k Exp $
8
+ # $Id: rebase.rb,v 1.8 2007/04/05 23:35:40 trevor Exp $
8
9
  #
10
+
11
+ autoload :YAML, 'yaml'
12
+
13
+ module Bio #:nodoc:
14
+
15
+ autoload :Reference, 'bio/reference'
16
+
17
+ #
18
+ # bio/db/rebase.rb - Interface for EMBOSS formatted REBASE files
9
19
  #
10
- # == Synopsis
20
+ # Author:: Trevor Wennblom <mailto:trevor@corevx.com>
21
+ # Copyright:: Copyright (c) 2005-2007 Midwinter Laboratories, LLC (http://midwinterlabs.com)
22
+ # License:: The Ruby License
23
+ #
24
+ #
25
+ # = Description
11
26
  #
12
27
  # Bio::REBASE provides utilties for interacting with REBASE data in EMBOSS
13
28
  # format. REBASE is the Restriction Enzyme Database, more information
14
29
  # can be found here:
15
30
  #
16
-
17
31
  # * http://rebase.neb.com
18
32
  #
19
33
  # EMBOSS formatted files located at:
@@ -30,9 +44,9 @@
30
44
  # % wget ftp://ftp.neb.com/pub/rebase/emboss*
31
45
  #
32
46
  #
33
- # == Usage
47
+ # = Usage
34
48
  #
35
- # require 'bio/db/rebase'
49
+ # require 'bio'
36
50
  # require 'pp'
37
51
  #
38
52
  # enz = File.read('emboss_e')
@@ -65,6 +79,7 @@
65
79
  # rebase = Bio::REBASE.load_yaml( 'enz.yaml', 'ref.yaml', 'sup.yaml' )
66
80
  #
67
81
  # pp rebase.enzymes[0..4] # ["AarI", "AasI", "AatI", "AatII", "Acc16I"]
82
+ # pp rebase.enzyme_name?('aasi') # true
68
83
  # pp rebase['AarI'].pattern # "CACCTGC"
69
84
  # pp rebase['AarI'].blunt? # false
70
85
  # pp rebase['AarI'].organism # "Arthrobacter aurescens SS2-322"
@@ -92,37 +107,11 @@
92
107
  # rebase.each do |name, info|
93
108
  # pp "#{name}: #{info.methylation}" unless info.methylation.empty?
94
109
  # end
95
- #
96
- #
97
- #--
98
- #
99
- # This library is free software; you can redistribute it and/or
100
- # modify it under the terms of the GNU Lesser General Public
101
- # License as published by the Free Software Foundation; either
102
- # version 2 of the License, or (at your option) any later version.
103
110
  #
104
- # This library is distributed in the hope that it will be useful,
105
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
106
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
107
- # Lesser General Public License for more details.
108
- #
109
- # You should have received a copy of the GNU Lesser General Public
110
- # License along with this library; if not, write to the Free Software
111
- # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
112
- #
113
- #++
114
- #
115
-
116
- autoload :YAML, 'yaml'
117
-
118
- module Bio
119
-
120
- autoload :Reference, 'reference'
121
-
122
111
 
123
112
  class REBASE
124
113
 
125
- class DynamicMethod_Hash < Hash
114
+ class DynamicMethod_Hash < Hash #:nodoc:
126
115
  # Define a writer or reader
127
116
  # * Allows hash[:kay]= to be accessed like hash.key=
128
117
  # * Allows hash[:key] to be accessed like hash.key
@@ -142,7 +131,7 @@ class REBASE
142
131
  end
143
132
  end
144
133
 
145
- class EnzymeEntry < DynamicMethod_Hash
134
+ class EnzymeEntry < DynamicMethod_Hash #:nodoc:
146
135
  @@supplier_data = {}
147
136
  def self.supplier_data=(d); @@supplier_data = d; end
148
137
 
@@ -153,23 +142,39 @@ class REBASE
153
142
  end
154
143
  end
155
144
 
145
+ # Calls _block_ once for each element in <tt>@data</tt> hash, passing that element as a parameter.
146
+ #
147
+ # ---
148
+ # *Arguments*
149
+ # * Accepts a block
150
+ # *Returns*:: results of _block_ operations
156
151
  def each
157
- @data.each { |v| yield v }
152
+ @data.each { |item| yield item }
158
153
  end
159
154
 
160
155
  # Make the instantiated class act like a Hash on @data
161
156
  # Does the equivalent and more of this:
162
157
  # def []( key ); @data[ key ]; end
163
158
  # def size; @data.size; end
164
- def method_missing(method_id, *args)
159
+ def method_missing(method_id, *args) #:nodoc:
165
160
  self.class.class_eval do
166
161
  define_method(method_id) { |a| Hash.instance_method(method_id).bind(@data).call(a) }
167
162
  end
168
163
  Hash.instance_method(method_id).bind(@data).call(*args)
169
164
  end
170
165
 
171
- # All your REBASE are belong to us.
166
+ # Constructor
167
+ #
168
+ # ---
169
+ # *Arguments*
170
+ # * +enzyme_lines+: (_required_) contents of EMBOSS formatted enzymes file
171
+ # * +reference_lines+: (_optional_) contents of EMBOSS formatted references file
172
+ # * +supplier_lines+: (_optional_) contents of EMBOSS formatted suppliers files
173
+ # * +yaml+: (_optional_, _default_ +false+) enzyme_lines, reference_lines, and supplier_lines are read as YAML if set to true
174
+ # *Returns*:: Bio::REBASE
172
175
  def initialize( enzyme_lines, reference_lines = nil, supplier_lines = nil, yaml = false )
176
+ # All your REBASE are belong to us.
177
+
173
178
  if yaml
174
179
  @enzyme_data = enzyme_lines
175
180
  @reference_data = reference_lines
@@ -185,24 +190,57 @@ class REBASE
185
190
  end
186
191
 
187
192
  # List the enzymes available
193
+ #
194
+ # ---
195
+ # *Arguments*
196
+ # * _none_
197
+ # *Returns*:: +Array+ sorted enzyme names
188
198
  def enzymes
189
199
  @data.keys.sort
190
200
  end
201
+
202
+ # Check if supplied name is the name of an available enzyme
203
+ #
204
+ # ---
205
+ # *Arguments*
206
+ # * +name+: Enzyme name
207
+ # *Returns*:: +true/false+
208
+ def enzyme_name?(name)
209
+ enzymes.each do |e|
210
+ return true if e.downcase == name.downcase
211
+ end
212
+ return false
213
+ end
191
214
 
192
215
  # Save the current data
193
216
  # rebase.save_yaml( 'enz.yaml' )
194
217
  # rebase.save_yaml( 'enz.yaml', 'ref.yaml' )
195
218
  # rebase.save_yaml( 'enz.yaml', 'ref.yaml', 'sup.yaml' )
219
+ #
220
+ # ---
221
+ # *Arguments*
222
+ # * +f_enzyme+: (_required_) Filename to save YAML formatted output of enzyme data
223
+ # * +f_reference+: (_optional_) Filename to save YAML formatted output of reference data
224
+ # * +f_supplier+: (_optional_) Filename to save YAML formatted output of supplier data
225
+ # *Returns*:: nothing
196
226
  def save_yaml( f_enzyme, f_reference=nil, f_supplier=nil )
197
227
  File.open(f_enzyme, 'w') { |f| f.puts YAML.dump(@enzyme_data) }
198
228
  File.open(f_reference, 'w') { |f| f.puts YAML.dump(@reference_data) } if f_reference
199
229
  File.open(f_supplier, 'w') { |f| f.puts YAML.dump(@supplier_data) } if f_supplier
230
+ return
200
231
  end
201
232
 
202
233
  # Read REBASE EMBOSS-formatted files
203
234
  # rebase = Bio::REBASE.read( 'emboss_e' )
204
235
  # rebase = Bio::REBASE.read( 'emboss_e', 'emboss_r' )
205
236
  # rebase = Bio::REBASE.read( 'emboss_e', 'emboss_r', 'emboss_s' )
237
+ #
238
+ # ---
239
+ # *Arguments*
240
+ # * +f_enzyme+: (_required_) Filename to read enzyme data
241
+ # * +f_reference+: (_optional_) Filename to read reference data
242
+ # * +f_supplier+: (_optional_) Filename to read supplier data
243
+ # *Returns*:: Bio::REBASE object
206
244
  def self.read( f_enzyme, f_reference=nil, f_supplier=nil )
207
245
  e = IO.readlines(f_enzyme)
208
246
  r = f_reference ? IO.readlines(f_reference) : nil
@@ -214,6 +252,13 @@ class REBASE
214
252
  # rebase = Bio::REBASE.load_yaml( 'enz.yaml' )
215
253
  # rebase = Bio::REBASE.load_yaml( 'enz.yaml', 'ref.yaml' )
216
254
  # rebase = Bio::REBASE.load_yaml( 'enz.yaml', 'ref.yaml', 'sup.yaml' )
255
+ #
256
+ # ---
257
+ # *Arguments*
258
+ # * +f_enzyme+: (_required_) Filename to read YAML-formatted enzyme data
259
+ # * +f_reference+: (_optional_) Filename to read YAML-formatted reference data
260
+ # * +f_supplier+: (_optional_) Filename to read YAML-formatted supplier data
261
+ # *Returns*:: Bio::REBASE object
217
262
  def self.load_yaml( f_enzyme, f_reference=nil, f_supplier=nil )
218
263
  e = YAML.load_file(f_enzyme)
219
264
  r = f_reference ? YAML.load_file(f_reference) : nil
@@ -409,5 +454,4 @@ class REBASE
409
454
  end
410
455
 
411
456
  end # REBASE
412
-
413
457
  end # Bio
@@ -0,0 +1,404 @@
1
+ #
2
+ # bio/db/soft.rb - Interface for SOFT formatted files
3
+ #
4
+ # Author:: Trevor Wennblom <mailto:trevor@corevx.com>
5
+ # Copyright:: Copyright (c) 2007 Midwinter Laboratories, LLC (http://midwinterlabs.com)
6
+ # License:: The Ruby License
7
+ #
8
+ # $Id: soft.rb,v 1.2 2007/04/05 23:35:40 trevor Exp $
9
+ #
10
+
11
+ module Bio #:nodoc:
12
+
13
+ #
14
+ # bio/db/soft.rb - Interface for SOFT formatted files
15
+ #
16
+ # Author:: Trevor Wennblom <mailto:trevor@corevx.com>
17
+ # Copyright:: Copyright (c) 2007 Midwinter Laboratories, LLC (http://midwinterlabs.com)
18
+ # License:: The Ruby License
19
+ #
20
+ #
21
+ # = Description
22
+ #
23
+ # "SOFT (Simple Omnibus in Text Format) is a compact, simple, line-based,
24
+ # ASCII text format that incorporates experimental data and metadata."
25
+ # -- <em>GEO, National Center for Biotechnology Information</em>
26
+ #
27
+ # The Bio::SOFT module reads SOFT Series or Platform formatted files that
28
+ # contain information
29
+ # describing one database, one series, one platform, and many samples (GEO
30
+ # accessions). The data from the file can then be viewed with Ruby methods.
31
+ #
32
+ # Bio::SOFT also supports the reading of SOFT DataSet files which contain
33
+ # one database, one dataset, and many subsets.
34
+ #
35
+ # Format specification is located here:
36
+ # * http://www.ncbi.nlm.nih.gov/projects/geo/info/soft2.html#SOFTformat
37
+ #
38
+ # SOFT data files may be directly downloaded here:
39
+ # * ftp://ftp.ncbi.nih.gov/pub/geo/DATA/SOFT
40
+ #
41
+ # NCBI's Gene Expression Omnibus (GEO) is here:
42
+ # * http://www.ncbi.nlm.nih.gov/geo
43
+ #
44
+ # = Usage
45
+ #
46
+ # If an attribute has more than one value then the values are stored in an
47
+ # Array of String objects. Otherwise the attribute is stored as a String.
48
+ #
49
+ # The platform and each sample may contain a table of data. A dataset from a
50
+ # DataSet file may also contain a table.
51
+ #
52
+ # Attributes are dynamically created based on the data in the file.
53
+ # Predefined keys have not been created in advance due to the variability of
54
+ # SOFT files in-the-wild.
55
+ #
56
+ # Keys are generally stored as Symbols. In the case of keys for samples and
57
+ # table headings may alternatively be accessed with Strings.
58
+ # The names of samples (geo accessions) are case sensitive. Table headers
59
+ # are case insensitive.
60
+ #
61
+ # require 'bio'
62
+ #
63
+ # lines = IO.readlines('GSE3457_family.soft')
64
+ # soft = Bio::SOFT.new(lines)
65
+ #
66
+ # soft.platform[:geo_accession] # => "GPL2092"
67
+ # soft.platform[:organism] # => "Populus"
68
+ # soft.platform[:contributor] # => ["Jingyi,,Li", "Olga,,Shevchenko", "Steve,H,Strauss", "Amy,M,Brunner"]
69
+ # soft.platform[:data_row_count] # => "240"
70
+ # soft.platform.keys.sort {|a,b| a.to_s <=> b.to_s}[0..2] # => [:contact_address, :contact_city, :contact_country]
71
+ # soft.platform[:"contact_zip/postal_code"] # => "97331"
72
+ # soft.platform[:table].header # => ["ID", "GB_ACC", "SPOT_ID", "Function/Family", "ORGANISM", "SEQUENCE"]
73
+ # soft.platform[:table].header_description # => {"ORGANISM"=>"sequence sources", "SEQUENCE"=>"oligo sequence used", "Function/Family"=>"gene functions and family", "ID"=>"", "SPOT_ID"=>"", "GB_ACC"=>"Gene bank accession number"}
74
+ # soft.platform[:table].rows.size # => 240
75
+ # soft.platform[:table].rows[5] # => ["A039P68U", "AI163321", "", "TF, flowering protein CONSTANS", "P. tremula x P. tremuloides", "AGAAAATTCGATATACTGTCCGTAAAGAGGTAGCACTTAGAATGCAACGGAATAAAGGGCAGTTCACCTC"]
76
+ # soft.platform[:table].rows[5][4] # => "P. tremula x P. tremuloides"
77
+ # soft.platform[:table].rows[5][:organism] # => "P. tremula x P. tremuloides"
78
+ # soft.platform[:table].rows[5]['ORGANISM'] # => "P. tremula x P. tremuloides"
79
+ #
80
+ # soft.series[:geo_accession] # => "GSE3457"
81
+ # soft.series[:contributor] # => ["Jingyi,,Li", "Olga,,Shevchenko", "Ove,,Nilsson", "Steve,H,Strauss", "Amy,M,Brunner"]
82
+ # soft.series[:platform_id] # => "GPL2092"
83
+ # soft.series[:sample_id].size # => 74
84
+ # soft.series[:sample_id][0..4] # => ["GSM77557", "GSM77558", "GSM77559", "GSM77560", "GSM77561"]
85
+ #
86
+ # soft.database[:name] # => "Gene Expression Omnibus (GEO)"
87
+ # soft.database[:ref] # => "Nucleic Acids Res. 2005 Jan 1;33 Database Issue:D562-6"
88
+ # soft.database[:institute] # => "NCBI NLM NIH"
89
+ #
90
+ # soft.samples.size # => 74
91
+ # soft.samples[:GSM77600][:series_id] # => "GSE3457"
92
+ # soft.samples['GSM77600'][:series_id] # => "GSE3457"
93
+ # soft.samples[:GSM77600][:platform_id] # => "GPL2092"
94
+ # soft.samples[:GSM77600][:type] # => "RNA"
95
+ # soft.samples[:GSM77600][:title] # => "jst2b2"
96
+ # soft.samples[:GSM77600][:table].header # => ["ID_REF", "VALUE"]
97
+ # soft.samples[:GSM77600][:table].header_description # => {"ID_REF"=>"", "VALUE"=>"normalized signal intensities"}
98
+ # soft.samples[:GSM77600][:table].rows.size # => 217
99
+ # soft.samples[:GSM77600][:table].rows[5] # => ["A039P68U", "8.19"]
100
+ # soft.samples[:GSM77600][:table].rows[5][0] # => "A039P68U"
101
+ # soft.samples[:GSM77600][:table].rows[5][:id_ref] # => "A039P68U"
102
+ # soft.samples[:GSM77600][:table].rows[5]['ID_REF'] # => "A039P68U"
103
+ #
104
+ #
105
+ # lines = IO.readlines('GDS100.soft')
106
+ # soft = Bio::SOFT.new(lines)
107
+ #
108
+ # soft.database[:name] # => "Gene Expression Omnibus (GEO)"
109
+ # soft.database[:ref] # => "Nucleic Acids Res. 2005 Jan 1;33 Database Issue:D562-6"
110
+ # soft.database[:institute] # => "NCBI NLM NIH"
111
+ #
112
+ # soft.subsets.size # => 8
113
+ # soft.subsets.keys # => ["GDS100_1", "GDS100_2", "GDS100_3", "GDS100_4", "GDS100_5", "GDS100_6", "GDS100_7", "GDS100_8"]
114
+ # soft.subsets[:GDS100_7] # => {:dataset_id=>"GDS100", :type=>"time", :sample_id=>"GSM548,GSM543", :description=>"60 minute"}
115
+ # soft.subsets['GDS100_7'][:sample_id] # => "GSM548,GSM543"
116
+ # soft.subsets[:GDS100_7][:sample_id] # => "GSM548,GSM543"
117
+ # soft.subsets[:GDS100_7][:dataset_id] # => "GDS100"
118
+ #
119
+ # soft.dataset[:order] # => "none"
120
+ # soft.dataset[:sample_organism] # => "Escherichia coli"
121
+ # soft.dataset[:table].header # => ["ID_REF", "IDENTIFIER", "GSM549", "GSM542", "GSM543", "GSM547", "GSM544", "GSM545", "GSM546", "GSM548"]
122
+ # soft.dataset[:table].rows.size # => 5764
123
+ # soft.dataset[:table].rows[5] # => ["6", "EMPTY", "0.097", "0.217", "0.242", "0.067", "0.104", "0.162", "0.104", "0.154"]
124
+ # soft.dataset[:table].rows[5][4] # => "0.242"
125
+ # soft.dataset[:table].rows[5][:gsm549] # => "0.097"
126
+ # soft.dataset[:table].rows[5][:GSM549] # => "0.097"
127
+ # soft.dataset[:table].rows[5]['GSM549'] # => "0.097"
128
+ #
129
+ class SOFT
130
+ attr_accessor :database
131
+ attr_accessor :series, :platform, :samples
132
+ attr_accessor :dataset, :subsets
133
+
134
+ LINE_TYPE_ENTITY_INDICATOR = '^'
135
+ LINE_TYPE_ENTITY_ATTRIBUTE = '!'
136
+ LINE_TYPE_TABLE_HEADER = '#'
137
+ # data table row defined by absence of line type character
138
+
139
+ TABLE_COLUMN_DELIMITER = "\t"
140
+
141
+ # Constructor
142
+ #
143
+ # ---
144
+ # *Arguments*
145
+ # * +lines+: (_required_) contents of SOFT formatted file
146
+ # *Returns*:: Bio::SOFT
147
+ def initialize(lines=nil)
148
+ @database = Database.new
149
+
150
+ @series = Series.new
151
+ @platform = Platform.new
152
+ @samples = Samples.new
153
+
154
+ @dataset = Dataset.new
155
+ @subsets = Subsets.new
156
+
157
+ process(lines)
158
+ end
159
+
160
+ # Classes for Platform and Series files
161
+
162
+ class Samples < Hash #:nodoc:
163
+ def [](x)
164
+ x = x.to_s if x.kind_of?( Symbol )
165
+ super(x)
166
+ end
167
+ end
168
+
169
+ class Entity < Hash #:nodoc:
170
+ end
171
+
172
+ class Sample < Entity #:nodoc:
173
+ end
174
+
175
+ class Platform < Entity #:nodoc:
176
+ end
177
+
178
+ class Series < Entity #:nodoc:
179
+ end
180
+
181
+ # Classes for DataSet files
182
+
183
+ class Subsets < Samples #:nodoc:
184
+ end
185
+
186
+ class Subset < Entity #:nodoc:
187
+ end
188
+
189
+ class Dataset < Entity #:nodoc:
190
+ end
191
+
192
+ # Classes important for all types
193
+
194
+ class Database < Entity #:nodoc:
195
+ end
196
+
197
+ class Table #:nodoc:
198
+ attr_accessor :header
199
+ attr_accessor :header_description
200
+ attr_accessor :rows
201
+
202
+ class Header < Array #:nodoc:
203
+ # @column_index contains column name => numerical index of column
204
+ attr_accessor :column_index
205
+
206
+ def initialize
207
+ @column_index = {}
208
+ end
209
+ end
210
+
211
+ class Row < Array #:nodoc:
212
+ attr_accessor :header_object
213
+
214
+ def initialize( n, header_object=nil )
215
+ @header_object = header_object
216
+ super(n)
217
+ end
218
+
219
+ def [](x)
220
+ if x.kind_of?( Fixnum )
221
+ super(x)
222
+ else
223
+ begin
224
+ x = x.to_s.downcase.to_sym
225
+ z = @header_object.column_index[x]
226
+ unless z.kind_of?( Fixnum )
227
+ raise IndexError, "#{x.inspect} is not a valid index. Contents of @header_object.column_index: #{@header_object.column_index.inspect}"
228
+ end
229
+ self[ z ]
230
+ rescue NoMethodError
231
+ unless @header_object
232
+ $stderr.puts "Table::Row @header_object undefined!"
233
+ end
234
+ raise
235
+ end
236
+ end
237
+ end
238
+ end
239
+
240
+ def initialize()
241
+ @header_description = {}
242
+ @header = Header.new
243
+ @rows = []
244
+ end
245
+
246
+ def add_header( line )
247
+ raise "Can only define one header" unless @header.empty?
248
+ @header = @header.concat( parse_row( line ) ) # beware of clobbering this into an Array
249
+ @header.each_with_index do |key, i|
250
+ @header.column_index[key.downcase.to_sym] = i
251
+ end
252
+ end
253
+
254
+ def add_row( line )
255
+ @rows << Row.new( parse_row( line ), @header )
256
+ end
257
+
258
+ def add_header_or_row( line )
259
+ @header.empty? ? add_header( line ) : add_row( line )
260
+ end
261
+
262
+ protected
263
+ def parse_row( line )
264
+ line.split( TABLE_COLUMN_DELIMITER )
265
+ end
266
+ end
267
+
268
+ #########
269
+ protected
270
+ #########
271
+
272
+ def process(lines)
273
+ current_indicator = nil
274
+ current_class_accessor = nil
275
+ in_table = false
276
+
277
+ lines.each_with_index do |line, line_number|
278
+ line.strip!
279
+ next if line.nil? or line.empty?
280
+ case line[0].chr
281
+ when LINE_TYPE_ENTITY_INDICATOR
282
+ current_indicator, value = split_label_value_in( line[1..-1] )
283
+
284
+ case current_indicator
285
+ when 'DATABASE'
286
+ current_class_accessor = @database
287
+ when 'DATASET'
288
+ current_class_accessor = @dataset
289
+ when 'PLATFORM'
290
+ current_class_accessor = @platform
291
+ when 'SERIES'
292
+ current_class_accessor = @series
293
+ when 'SAMPLE'
294
+ @samples[value] = Sample.new
295
+ current_class_accessor = @samples[value]
296
+ when 'SUBSET'
297
+ @subsets[value] = Subset.new
298
+ current_class_accessor = @subsets[value]
299
+ else
300
+ custom_raise( line_number, error_msg(40, line) )
301
+ end
302
+
303
+ when LINE_TYPE_ENTITY_ATTRIBUTE
304
+ if( current_indicator == nil )
305
+ custom_raise( line_number, error_msg(30) )
306
+ end
307
+
308
+ # Handle lines such as '!platform_table_begin' and '!platform_table_end'
309
+ if in_table
310
+ if line =~ %r{table_begin}
311
+ next
312
+ elsif line =~ %r{table_end}
313
+ in_table = false
314
+ next
315
+ end
316
+ end
317
+
318
+ key, value = split_label_value_in( line, true )
319
+ key_s = key.to_sym
320
+
321
+ if current_class_accessor.include?( key_s )
322
+ if current_class_accessor[ key_s ].class != Array
323
+ current_class_accessor[ key_s ] = [ current_class_accessor[ key_s ] ]
324
+ end
325
+ current_class_accessor[key.to_sym] << value
326
+ else
327
+ current_class_accessor[key.to_sym] = value
328
+ end
329
+
330
+ when LINE_TYPE_TABLE_HEADER
331
+ if( (current_indicator != 'SAMPLE') and (current_indicator != 'PLATFORM') and (current_indicator != 'DATASET') )
332
+ custom_raise( line_number, error_msg(20, current_indicator.inspect) )
333
+ end
334
+
335
+ in_table = true # may be redundant, computationally not worth checking
336
+
337
+ # We only expect one table per platform or sample
338
+ current_class_accessor[:table] ||= Table.new
339
+ key, value = split_label_value_in( line )
340
+ # key[1..-1] -- Remove first character which is the LINE_TYPE_TABLE_HEADER
341
+ current_class_accessor[:table].header_description[ key[1..-1] ] = value
342
+
343
+ else
344
+ # Type: No line type - should be a row in a table.
345
+
346
+ if( (current_indicator == nil) or (in_table == false) )
347
+ custom_raise( line_number, error_msg(10) )
348
+ end
349
+ current_class_accessor[:table].add_header_or_row( line )
350
+ end
351
+ end
352
+ end
353
+
354
+ def error_msg( i, extra_info=nil )
355
+ case i
356
+ when 10
357
+ x = ["Lines without line-type characters are rows in a table, but",
358
+ "a line containing an entity indicator such as",
359
+ "\"#{LINE_TYPE_ENTITY_INDICATOR}SAMPLE\",",
360
+ "\"#{LINE_TYPE_ENTITY_INDICATOR}PLATFORM\",",
361
+ "or \"#{LINE_TYPE_ENTITY_INDICATOR}DATASET\" has not been",
362
+ "previously encountered or it does not appear that this line is",
363
+ "in a table."]
364
+ when 20
365
+ # tables are allowed inside samples and platforms
366
+ x = ["Tables are only allowed inside SAMPLE and PLATFORM.",
367
+ "Current table information found inside #{extra_info}."]
368
+ when 30
369
+ x = ["Entity attribute line (\"#{LINE_TYPE_ENTITY_ATTRIBUTE}\")",
370
+ "found before entity indicator line (\"#{LINE_TYPE_ENTITY_INDICATOR}\")"]
371
+ when 40
372
+ x = ["Unkown entity indicator. Must be DATABASE, SAMPLE, PLATFORM,",
373
+ "SERIES, DATASET, or SUBSET."]
374
+ else
375
+ raise IndexError, "Unknown error message requested."
376
+ end
377
+
378
+ x.join(" ")
379
+ end
380
+
381
+ def custom_raise( line_number_with_0_based_indexing, msg )
382
+ raise ["Error processing input line: #{line_number_with_0_based_indexing+1}",
383
+ msg].join("\t")
384
+ end
385
+
386
+ def split_label_value_in( line, shift_key=false )
387
+ line =~ %r{\s*=\s*}
388
+ key, value = $`, $'
389
+
390
+ if shift_key
391
+ key =~ %r{_}
392
+ key = $'
393
+ end
394
+
395
+ if( (key == nil) or (value == nil) )
396
+ puts line.inspect
397
+ raise
398
+ end
399
+
400
+ [key, value]
401
+ end
402
+
403
+ end # SOFT
404
+ end # Bio