bio 1.3.1 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (303) hide show
  1. data/ChangeLog +2105 -3728
  2. data/KNOWN_ISSUES.rdoc +35 -3
  3. data/README.rdoc +8 -2
  4. data/RELEASE_NOTES.rdoc +166 -0
  5. data/bin/bioruby +4 -1
  6. data/bioruby.gemspec +146 -1
  7. data/bioruby.gemspec.erb +3 -1
  8. data/doc/ChangeLog-before-1.3.1 +3961 -0
  9. data/doc/Tutorial.rd +154 -22
  10. data/doc/Tutorial.rd.html +125 -68
  11. data/lib/bio.rb +21 -6
  12. data/lib/bio/appl/bl2seq/report.rb +11 -202
  13. data/lib/bio/appl/blast/format0.rb +0 -193
  14. data/lib/bio/appl/blast/report.rb +2 -147
  15. data/lib/bio/appl/blast/wublast.rb +0 -208
  16. data/lib/bio/appl/fasta.rb +4 -19
  17. data/lib/bio/appl/fasta/format10.rb +0 -14
  18. data/lib/bio/appl/genscan/report.rb +0 -176
  19. data/lib/bio/appl/hmmer.rb +1 -15
  20. data/lib/bio/appl/hmmer/report.rb +0 -100
  21. data/lib/bio/appl/meme/mast.rb +156 -0
  22. data/lib/bio/appl/meme/mast/report.rb +91 -0
  23. data/lib/bio/appl/meme/motif.rb +48 -0
  24. data/lib/bio/appl/psort.rb +0 -111
  25. data/lib/bio/appl/psort/report.rb +1 -45
  26. data/lib/bio/appl/pts1.rb +2 -4
  27. data/lib/bio/appl/sosui/report.rb +5 -54
  28. data/lib/bio/appl/targetp/report.rb +1 -104
  29. data/lib/bio/appl/tmhmm/report.rb +0 -36
  30. data/lib/bio/command.rb +94 -10
  31. data/lib/bio/data/aa.rb +1 -77
  32. data/lib/bio/data/codontable.rb +1 -95
  33. data/lib/bio/data/na.rb +1 -26
  34. data/lib/bio/db/aaindex.rb +1 -38
  35. data/lib/bio/db/fasta.rb +1 -134
  36. data/lib/bio/db/fasta/format_qual.rb +204 -0
  37. data/lib/bio/db/fasta/qual.rb +102 -0
  38. data/lib/bio/db/fastq.rb +645 -0
  39. data/lib/bio/db/fastq/fastq_to_biosequence.rb +40 -0
  40. data/lib/bio/db/fastq/format_fastq.rb +175 -0
  41. data/lib/bio/db/genbank/genbank.rb +1 -86
  42. data/lib/bio/db/gff.rb +0 -17
  43. data/lib/bio/db/go.rb +4 -72
  44. data/lib/bio/db/kegg/common.rb +112 -0
  45. data/lib/bio/db/kegg/compound.rb +29 -20
  46. data/lib/bio/db/kegg/drug.rb +74 -34
  47. data/lib/bio/db/kegg/enzyme.rb +26 -5
  48. data/lib/bio/db/kegg/genes.rb +128 -15
  49. data/lib/bio/db/kegg/genome.rb +3 -41
  50. data/lib/bio/db/kegg/glycan.rb +19 -24
  51. data/lib/bio/db/kegg/orthology.rb +16 -56
  52. data/lib/bio/db/kegg/reaction.rb +81 -28
  53. data/lib/bio/db/kegg/taxonomy.rb +1 -52
  54. data/lib/bio/db/litdb.rb +1 -16
  55. data/lib/bio/db/phyloxml/phyloxml.xsd +582 -0
  56. data/lib/bio/db/phyloxml/phyloxml_elements.rb +1174 -0
  57. data/lib/bio/db/phyloxml/phyloxml_parser.rb +954 -0
  58. data/lib/bio/db/phyloxml/phyloxml_writer.rb +228 -0
  59. data/lib/bio/db/prosite.rb +2 -95
  60. data/lib/bio/db/rebase.rb +5 -6
  61. data/lib/bio/db/sanger_chromatogram/abif.rb +120 -0
  62. data/lib/bio/db/sanger_chromatogram/chromatogram.rb +133 -0
  63. data/lib/bio/db/sanger_chromatogram/chromatogram_to_biosequence.rb +32 -0
  64. data/lib/bio/db/sanger_chromatogram/scf.rb +210 -0
  65. data/lib/bio/io/das.rb +0 -44
  66. data/lib/bio/io/ddbjxml.rb +1 -181
  67. data/lib/bio/io/flatfile.rb +1 -7
  68. data/lib/bio/io/flatfile/autodetection.rb +6 -0
  69. data/lib/bio/io/keggapi.rb +0 -442
  70. data/lib/bio/io/ncbirest.rb +130 -132
  71. data/lib/bio/io/ncbisoap.rb +2 -1
  72. data/lib/bio/io/pubmed.rb +0 -88
  73. data/lib/bio/location.rb +0 -73
  74. data/lib/bio/pathway.rb +0 -171
  75. data/lib/bio/sequence.rb +18 -1
  76. data/lib/bio/sequence/adapter.rb +3 -0
  77. data/lib/bio/sequence/format.rb +16 -0
  78. data/lib/bio/sequence/quality_score.rb +205 -0
  79. data/lib/bio/tree.rb +70 -5
  80. data/lib/bio/util/restriction_enzyme/single_strand.rb +3 -2
  81. data/lib/bio/util/sirna.rb +1 -23
  82. data/lib/bio/version.rb +1 -1
  83. data/sample/demo_aaindex.rb +67 -0
  84. data/sample/demo_aminoacid.rb +101 -0
  85. data/sample/demo_bl2seq_report.rb +220 -0
  86. data/sample/demo_blast_report.rb +285 -0
  87. data/sample/demo_codontable.rb +119 -0
  88. data/sample/demo_das.rb +105 -0
  89. data/sample/demo_ddbjxml.rb +212 -0
  90. data/sample/demo_fasta_remote.rb +51 -0
  91. data/sample/demo_fastaformat.rb +105 -0
  92. data/sample/demo_genbank.rb +132 -0
  93. data/sample/demo_genscan_report.rb +202 -0
  94. data/sample/demo_gff1.rb +49 -0
  95. data/sample/demo_go.rb +98 -0
  96. data/sample/demo_hmmer_report.rb +149 -0
  97. data/sample/demo_kegg_compound.rb +57 -0
  98. data/sample/demo_kegg_drug.rb +65 -0
  99. data/sample/demo_kegg_genome.rb +74 -0
  100. data/sample/demo_kegg_glycan.rb +72 -0
  101. data/sample/demo_kegg_orthology.rb +62 -0
  102. data/sample/demo_kegg_reaction.rb +66 -0
  103. data/sample/demo_kegg_taxonomy.rb +92 -0
  104. data/sample/demo_keggapi.rb +502 -0
  105. data/sample/demo_litdb.rb +42 -0
  106. data/sample/demo_locations.rb +99 -0
  107. data/sample/demo_ncbi_rest.rb +130 -0
  108. data/sample/demo_nucleicacid.rb +49 -0
  109. data/sample/demo_pathway.rb +196 -0
  110. data/sample/demo_prosite.rb +120 -0
  111. data/sample/demo_psort.rb +138 -0
  112. data/sample/demo_psort_report.rb +70 -0
  113. data/sample/demo_pubmed.rb +118 -0
  114. data/sample/demo_sirna.rb +63 -0
  115. data/sample/demo_sosui_report.rb +89 -0
  116. data/sample/demo_targetp_report.rb +135 -0
  117. data/sample/demo_tmhmm_report.rb +68 -0
  118. data/sample/pmfetch.rb +13 -4
  119. data/sample/pmsearch.rb +15 -4
  120. data/sample/test_phyloxml_big.rb +205 -0
  121. data/test/bioruby_test_helper.rb +61 -0
  122. data/test/data/KEGG/1.1.1.1.enzyme +935 -0
  123. data/test/data/KEGG/C00025.compound +102 -0
  124. data/test/data/KEGG/D00063.drug +104 -0
  125. data/test/data/KEGG/G00024.glycan +47 -0
  126. data/test/data/KEGG/G01366.glycan +18 -0
  127. data/test/data/KEGG/K02338.orthology +902 -0
  128. data/test/data/KEGG/R00006.reaction +14 -0
  129. data/test/data/fastq/README.txt +109 -0
  130. data/test/data/fastq/error_diff_ids.fastq +20 -0
  131. data/test/data/fastq/error_double_qual.fastq +22 -0
  132. data/test/data/fastq/error_double_seq.fastq +22 -0
  133. data/test/data/fastq/error_long_qual.fastq +20 -0
  134. data/test/data/fastq/error_no_qual.fastq +20 -0
  135. data/test/data/fastq/error_qual_del.fastq +20 -0
  136. data/test/data/fastq/error_qual_escape.fastq +20 -0
  137. data/test/data/fastq/error_qual_null.fastq +0 -0
  138. data/test/data/fastq/error_qual_space.fastq +21 -0
  139. data/test/data/fastq/error_qual_tab.fastq +21 -0
  140. data/test/data/fastq/error_qual_unit_sep.fastq +20 -0
  141. data/test/data/fastq/error_qual_vtab.fastq +20 -0
  142. data/test/data/fastq/error_short_qual.fastq +20 -0
  143. data/test/data/fastq/error_spaces.fastq +20 -0
  144. data/test/data/fastq/error_tabs.fastq +21 -0
  145. data/test/data/fastq/error_trunc_at_plus.fastq +19 -0
  146. data/test/data/fastq/error_trunc_at_qual.fastq +19 -0
  147. data/test/data/fastq/error_trunc_at_seq.fastq +18 -0
  148. data/test/data/fastq/error_trunc_in_plus.fastq +19 -0
  149. data/test/data/fastq/error_trunc_in_qual.fastq +20 -0
  150. data/test/data/fastq/error_trunc_in_seq.fastq +18 -0
  151. data/test/data/fastq/error_trunc_in_title.fastq +17 -0
  152. data/test/data/fastq/illumina_full_range_as_illumina.fastq +8 -0
  153. data/test/data/fastq/illumina_full_range_as_sanger.fastq +8 -0
  154. data/test/data/fastq/illumina_full_range_as_solexa.fastq +8 -0
  155. data/test/data/fastq/illumina_full_range_original_illumina.fastq +8 -0
  156. data/test/data/fastq/longreads_as_illumina.fastq +40 -0
  157. data/test/data/fastq/longreads_as_sanger.fastq +40 -0
  158. data/test/data/fastq/longreads_as_solexa.fastq +40 -0
  159. data/test/data/fastq/longreads_original_sanger.fastq +120 -0
  160. data/test/data/fastq/misc_dna_as_illumina.fastq +16 -0
  161. data/test/data/fastq/misc_dna_as_sanger.fastq +16 -0
  162. data/test/data/fastq/misc_dna_as_solexa.fastq +16 -0
  163. data/test/data/fastq/misc_dna_original_sanger.fastq +16 -0
  164. data/test/data/fastq/misc_rna_as_illumina.fastq +16 -0
  165. data/test/data/fastq/misc_rna_as_sanger.fastq +16 -0
  166. data/test/data/fastq/misc_rna_as_solexa.fastq +16 -0
  167. data/test/data/fastq/misc_rna_original_sanger.fastq +16 -0
  168. data/test/data/fastq/sanger_full_range_as_illumina.fastq +8 -0
  169. data/test/data/fastq/sanger_full_range_as_sanger.fastq +8 -0
  170. data/test/data/fastq/sanger_full_range_as_solexa.fastq +8 -0
  171. data/test/data/fastq/sanger_full_range_original_sanger.fastq +8 -0
  172. data/test/data/fastq/solexa_full_range_as_illumina.fastq +8 -0
  173. data/test/data/fastq/solexa_full_range_as_sanger.fastq +8 -0
  174. data/test/data/fastq/solexa_full_range_as_solexa.fastq +8 -0
  175. data/test/data/fastq/solexa_full_range_original_solexa.fastq +8 -0
  176. data/test/data/fastq/wrapping_as_illumina.fastq +12 -0
  177. data/test/data/fastq/wrapping_as_sanger.fastq +12 -0
  178. data/test/data/fastq/wrapping_as_solexa.fastq +12 -0
  179. data/test/data/fastq/wrapping_original_sanger.fastq +24 -0
  180. data/test/data/meme/db +0 -0
  181. data/test/data/meme/mast +0 -0
  182. data/test/data/meme/mast.out +13 -0
  183. data/test/data/meme/meme.out +3 -0
  184. data/test/data/phyloxml/apaf.xml +666 -0
  185. data/test/data/phyloxml/bcl_2.xml +2097 -0
  186. data/test/data/phyloxml/made_up.xml +144 -0
  187. data/test/data/phyloxml/ncbi_taxonomy_mollusca_short.xml +65 -0
  188. data/test/data/phyloxml/phyloxml_examples.xml +415 -0
  189. data/test/data/sanger_chromatogram/test_chromatogram_abif.ab1 +0 -0
  190. data/test/data/sanger_chromatogram/test_chromatogram_scf_v2.scf +0 -0
  191. data/test/data/sanger_chromatogram/test_chromatogram_scf_v3.scf +0 -0
  192. data/test/functional/bio/appl/test_pts1.rb +7 -5
  193. data/test/functional/bio/io/test_ensembl.rb +4 -3
  194. data/test/functional/bio/io/test_pubmed.rb +9 -3
  195. data/test/functional/bio/io/test_soapwsdl.rb +5 -4
  196. data/test/functional/bio/io/test_togows.rb +5 -4
  197. data/test/functional/bio/sequence/test_output_embl.rb +6 -4
  198. data/test/functional/bio/test_command.rb +54 -5
  199. data/test/runner.rb +5 -3
  200. data/test/unit/bio/appl/bl2seq/test_report.rb +5 -4
  201. data/test/unit/bio/appl/blast/test_ncbioptions.rb +4 -2
  202. data/test/unit/bio/appl/blast/test_report.rb +5 -4
  203. data/test/unit/bio/appl/blast/test_rpsblast.rb +5 -4
  204. data/test/unit/bio/appl/gcg/test_msf.rb +5 -5
  205. data/test/unit/bio/appl/genscan/test_report.rb +8 -9
  206. data/test/unit/bio/appl/hmmer/test_report.rb +5 -4
  207. data/test/unit/bio/appl/iprscan/test_report.rb +6 -5
  208. data/test/unit/bio/appl/mafft/test_report.rb +6 -5
  209. data/test/unit/bio/appl/meme/mast/test_report.rb +46 -0
  210. data/test/unit/bio/appl/meme/test_mast.rb +103 -0
  211. data/test/unit/bio/appl/meme/test_motif.rb +38 -0
  212. data/test/unit/bio/appl/paml/codeml/test_rates.rb +5 -4
  213. data/test/unit/bio/appl/paml/codeml/test_report.rb +5 -4
  214. data/test/unit/bio/appl/paml/test_codeml.rb +5 -4
  215. data/test/unit/bio/appl/sim4/test_report.rb +5 -4
  216. data/test/unit/bio/appl/sosui/test_report.rb +6 -5
  217. data/test/unit/bio/appl/targetp/test_report.rb +5 -3
  218. data/test/unit/bio/appl/test_blast.rb +5 -4
  219. data/test/unit/bio/appl/test_fasta.rb +4 -2
  220. data/test/unit/bio/appl/test_pts1.rb +4 -2
  221. data/test/unit/bio/appl/tmhmm/test_report.rb +6 -5
  222. data/test/unit/bio/data/test_aa.rb +5 -3
  223. data/test/unit/bio/data/test_codontable.rb +5 -4
  224. data/test/unit/bio/data/test_na.rb +5 -3
  225. data/test/unit/bio/db/biosql/tc_biosql.rb +5 -1
  226. data/test/unit/bio/db/embl/test_common.rb +4 -2
  227. data/test/unit/bio/db/embl/test_embl.rb +6 -6
  228. data/test/unit/bio/db/embl/test_embl_rel89.rb +6 -6
  229. data/test/unit/bio/db/embl/test_embl_to_bioseq.rb +7 -8
  230. data/test/unit/bio/db/embl/test_sptr.rb +6 -8
  231. data/test/unit/bio/db/embl/test_uniprot.rb +6 -5
  232. data/test/unit/bio/db/fasta/test_format_qual.rb +346 -0
  233. data/test/unit/bio/db/kegg/test_compound.rb +146 -0
  234. data/test/unit/bio/db/kegg/test_drug.rb +194 -0
  235. data/test/unit/bio/db/kegg/test_enzyme.rb +241 -0
  236. data/test/unit/bio/db/kegg/test_genes.rb +32 -4
  237. data/test/unit/bio/db/kegg/test_glycan.rb +260 -0
  238. data/test/unit/bio/db/kegg/test_orthology.rb +50 -0
  239. data/test/unit/bio/db/kegg/test_reaction.rb +96 -0
  240. data/test/unit/bio/db/pdb/test_pdb.rb +4 -2
  241. data/test/unit/bio/db/sanger_chromatogram/test_abif.rb +76 -0
  242. data/test/unit/bio/db/sanger_chromatogram/test_scf.rb +98 -0
  243. data/test/unit/bio/db/test_aaindex.rb +6 -6
  244. data/test/unit/bio/db/test_fasta.rb +5 -46
  245. data/test/unit/bio/db/test_fastq.rb +829 -0
  246. data/test/unit/bio/db/test_gff.rb +4 -2
  247. data/test/unit/bio/db/test_lasergene.rb +7 -5
  248. data/test/unit/bio/db/test_medline.rb +4 -2
  249. data/test/unit/bio/db/test_newick.rb +6 -6
  250. data/test/unit/bio/db/test_nexus.rb +4 -2
  251. data/test/unit/bio/db/test_phyloxml.rb +769 -0
  252. data/test/unit/bio/db/test_phyloxml_writer.rb +328 -0
  253. data/test/unit/bio/db/test_prosite.rb +6 -5
  254. data/test/unit/bio/db/test_qual.rb +63 -0
  255. data/test/unit/bio/db/test_rebase.rb +5 -3
  256. data/test/unit/bio/db/test_soft.rb +7 -6
  257. data/test/unit/bio/io/flatfile/test_autodetection.rb +6 -7
  258. data/test/unit/bio/io/flatfile/test_buffer.rb +6 -5
  259. data/test/unit/bio/io/flatfile/test_splitter.rb +4 -4
  260. data/test/unit/bio/io/test_ddbjxml.rb +4 -3
  261. data/test/unit/bio/io/test_ensembl.rb +5 -3
  262. data/test/unit/bio/io/test_fastacmd.rb +4 -3
  263. data/test/unit/bio/io/test_flatfile.rb +6 -5
  264. data/test/unit/bio/io/test_soapwsdl.rb +4 -3
  265. data/test/unit/bio/io/test_togows.rb +4 -2
  266. data/test/unit/bio/sequence/test_aa.rb +5 -3
  267. data/test/unit/bio/sequence/test_common.rb +4 -2
  268. data/test/unit/bio/sequence/test_compat.rb +4 -2
  269. data/test/unit/bio/sequence/test_dblink.rb +5 -3
  270. data/test/unit/bio/sequence/test_na.rb +4 -2
  271. data/test/unit/bio/sequence/test_quality_score.rb +330 -0
  272. data/test/unit/bio/shell/plugin/test_seq.rb +5 -3
  273. data/test/unit/bio/test_alignment.rb +5 -3
  274. data/test/unit/bio/test_command.rb +4 -3
  275. data/test/unit/bio/test_db.rb +5 -3
  276. data/test/unit/bio/test_feature.rb +4 -2
  277. data/test/unit/bio/test_location.rb +4 -2
  278. data/test/unit/bio/test_map.rb +5 -3
  279. data/test/unit/bio/test_pathway.rb +4 -2
  280. data/test/unit/bio/test_reference.rb +4 -2
  281. data/test/unit/bio/test_sequence.rb +5 -3
  282. data/test/unit/bio/test_shell.rb +5 -3
  283. data/test/unit/bio/test_tree.rb +6 -6
  284. data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +4 -2
  285. data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +4 -2
  286. data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +4 -2
  287. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +4 -2
  288. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +4 -2
  289. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +4 -2
  290. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +4 -2
  291. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +4 -2
  292. data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +4 -2
  293. data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +4 -2
  294. data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +4 -2
  295. data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +4 -2
  296. data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +17 -13
  297. data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +17 -13
  298. data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +4 -2
  299. data/test/unit/bio/util/test_color_scheme.rb +5 -3
  300. data/test/unit/bio/util/test_contingency_table.rb +5 -3
  301. data/test/unit/bio/util/test_restriction_enzyme.rb +4 -2
  302. data/test/unit/bio/util/test_sirna.rb +6 -4
  303. metadata +147 -2
@@ -4,19 +4,44 @@
4
4
  # Copyright:: Copyright (C) 2001, 2002, 2004, 2007 Toshiaki Katayama <k@bioruby.org>
5
5
  # License:: The Ruby License
6
6
  #
7
- # $Id: compound.rb,v 0.17 2007/11/27 07:09:43 k Exp $
7
+ # $Id:$
8
8
  #
9
9
 
10
10
  require 'bio/db'
11
+ require 'bio/db/kegg/common'
11
12
 
12
13
  module Bio
13
14
  class KEGG
14
15
 
16
+ # == Description
17
+ #
18
+ # Bio::KEGG::COMPOUND is a parser class for the KEGG COMPOUND database entry.
19
+ # KEGG COMPOUND is a chemical structure database.
20
+ #
21
+ # == References
22
+ #
23
+ # * http://www.genome.jp/kegg/compound/
24
+ #
15
25
  class COMPOUND < KEGGDB
16
26
 
17
27
  DELIMITER = RS = "\n///\n"
18
28
  TAGSIZE = 12
19
29
 
30
+ include Common::DblinksAsHash
31
+ # Returns a Hash of the DB name and an Array of entry IDs in DBLINKS field.
32
+ def dblinks_as_hash; super; end if false #dummy for RDoc
33
+ alias dblinks dblinks_as_hash
34
+
35
+ include Common::PathwaysAsHash
36
+ # Returns a Hash of the pathway ID and name in PATHWAY field.
37
+ def pathways_as_hash; super; end if false #dummy for RDoc
38
+ alias pathways pathways_as_hash
39
+
40
+ # Creates a new Bio::KEGG::COMPOUND object.
41
+ # ---
42
+ # *Arguments*:
43
+ # * (required) _entry_: (String) single entry as a string
44
+ # *Returns*:: Bio::KEGG::COMPOUND object
20
45
  def initialize(entry)
21
46
  super(entry, TAGSIZE)
22
47
  end
@@ -31,6 +56,7 @@ class COMPOUND < KEGGDB
31
56
  field_fetch('NAME').split(/\s*;\s*/)
32
57
  end
33
58
 
59
+ # The first name recorded in the NAME field.
34
60
  def name
35
61
  names.first
36
62
  end
@@ -75,7 +101,7 @@ class COMPOUND < KEGGDB
75
101
  end
76
102
 
77
103
  # PATHWAY
78
- def pathways
104
+ def pathways_as_strings
79
105
  lines_fetch('PATHWAY')
80
106
  end
81
107
 
@@ -93,7 +119,7 @@ class COMPOUND < KEGGDB
93
119
  end
94
120
 
95
121
  # DBLINKS
96
- def dblinks
122
+ def dblinks_as_strings
97
123
  lines_fetch('DBLINKS')
98
124
  end
99
125
 
@@ -112,20 +138,3 @@ end # COMPOUND
112
138
  end # KEGG
113
139
  end # Bio
114
140
 
115
-
116
- if __FILE__ == $0
117
- entry = ARGF.read
118
- cpd = Bio::KEGG::COMPOUND.new(entry)
119
- p cpd.entry_id
120
- p cpd.names
121
- p cpd.name
122
- p cpd.formula
123
- p cpd.mass
124
- p cpd.reactions
125
- p cpd.rpairs
126
- p cpd.pathways
127
- p cpd.enzymes
128
- p cpd.dblinks
129
- p cpd.kcf
130
- end
131
-
@@ -4,95 +4,135 @@
4
4
  # Copyright:: Copyright (C) 2007 Toshiaki Katayama <k@bioruby.org>
5
5
  # License:: The Ruby License
6
6
  #
7
- # $Id: drug.rb,v 1.3 2007/06/28 11:27:24 k Exp $
7
+ # $Id:$
8
8
  #
9
9
 
10
10
  require 'bio/db'
11
+ require 'bio/db/kegg/common'
11
12
 
12
13
  module Bio
13
14
  class KEGG
14
15
 
16
+ # == Description
17
+ #
18
+ # Bio::KEGG::DRUG is a parser class for the KEGG DRUG database entry.
19
+ # KEGG DRUG is a drug information database.
20
+ #
21
+ # == References
22
+ #
23
+ # * http://www.genome.jp/kegg/drug/
24
+ #
15
25
  class DRUG < KEGGDB
16
26
 
17
27
  DELIMITER = RS = "\n///\n"
18
28
  TAGSIZE = 12
19
29
 
30
+ include Common::DblinksAsHash
31
+ # Returns a Hash of the DB name and an Array of entry IDs in DBLINKS field.
32
+ def dblinks_as_hash; super; end if false #dummy for RDoc
33
+ alias dblinks dblinks_as_hash
34
+
35
+ include Common::PathwaysAsHash
36
+ # Returns a Hash of the pathway ID and name in PATHWAY field.
37
+ def pathways_as_hash; super; end if false #dummy for RDoc
38
+ alias pathways pathways_as_hash
39
+
40
+ # Creates a new Bio::KEGG::DRUG object.
41
+ # ---
42
+ # *Arguments*:
43
+ # * (required) _entry_: (String) single entry as a string
44
+ # *Returns*:: Bio::KEGG::DRUG object
20
45
  def initialize(entry)
21
46
  super(entry, TAGSIZE)
22
47
  end
23
48
 
24
- # ENTRY
49
+ # ID of the entry, described in the ENTRY line.
50
+ # ---
51
+ # *Returns*:: String
25
52
  def entry_id
26
53
  field_fetch('ENTRY')[/\S+/]
27
54
  end
28
55
 
29
- # NAME
56
+ # Names described in the NAME line.
57
+ # ---
58
+ # *Returns*:: Array containing String objects
30
59
  def names
31
60
  field_fetch('NAME').split(/\s*;\s*/)
32
61
  end
33
62
 
63
+ # The first name recorded in the NAME field.
64
+ # ---
65
+ # *Returns*:: String
34
66
  def name
35
67
  names.first
36
68
  end
37
69
 
38
- # FORMULA
70
+ # Chemical formula described in the FORMULA line.
71
+ # ---
72
+ # *Returns*:: String
39
73
  def formula
40
74
  field_fetch('FORMULA')
41
75
  end
42
76
 
43
- # MASS
77
+ # Molecular weight described in the MASS line.
78
+ # ---
79
+ # *Returns*:: Float
44
80
  def mass
45
81
  field_fetch('MASS').to_f
46
82
  end
47
83
 
48
- # ACTIVITY
84
+ # Biological or chemical activity described in the ACTIVITY line.
85
+ # ---
86
+ # *Returns*:: String
49
87
  def activity
50
88
  field_fetch('ACTIVITY')
51
89
  end
52
90
 
53
- # REMARK
91
+ # REMARK lines.
92
+ # ---
93
+ # *Returns*:: String
54
94
  def remark
55
95
  field_fetch('REMARK')
56
96
  end
57
97
 
58
- # COMMENT
59
- def comment
60
- field_fetch('COMMENT')
61
- end
62
-
63
- # PATHWAY
64
- def pathways
65
- lines_fetch('DBLINKS')
98
+ # List of KEGG Pathway IDs with short descriptions,
99
+ # described in the PATHWAY lines.
100
+ # ---
101
+ # *Returns*:: Array containing String objects
102
+ def pathways_as_strings
103
+ lines_fetch('PATHWAY')
66
104
  end
67
105
 
68
- # DBLINKS
69
- def dblinks
106
+ # List of database names and IDs, described in the DBLINKS lines.
107
+ # ---
108
+ # *Returns*:: Array containing String objects
109
+ def dblinks_as_strings
70
110
  lines_fetch('DBLINKS')
71
111
  end
72
112
 
73
- # ATOM, BOND
113
+ # ATOM, BOND lines.
114
+ # ---
115
+ # *Returns*:: String
74
116
  def kcf
75
117
  return "#{get('ATOM')}#{get('BOND')}"
76
118
  end
77
119
 
120
+ # COMMENT lines.
121
+ # ---
122
+ # *Returns*:: String
123
+ def comment
124
+ field_fetch('COMMENT')
125
+ end
126
+
127
+ # Product names described in the PRODUCTS lines.
128
+ # ---
129
+ # *Returns*:: Array containing String objects
130
+ def products
131
+ lines_fetch('PRODUCTS')
132
+ end
133
+
78
134
  end # DRUG
79
135
 
80
136
  end # KEGG
81
137
  end # Bio
82
138
 
83
-
84
- if __FILE__ == $0
85
- entry = ARGF.read # dr:D00001
86
- dr = Bio::KEGG::DRUG.new(entry)
87
- p dr.entry_id
88
- p dr.names
89
- p dr.name
90
- p dr.formula
91
- p dr.mass
92
- p dr.activity
93
- p dr.remark
94
- p dr.comment
95
- p dr.dblinks
96
- p dr.kcf
97
- end
98
-
@@ -4,10 +4,11 @@
4
4
  # Copyright:: Copyright (C) 2001, 2002, 2007 Toshiaki Katayama <k@bioruby.org>
5
5
  # License:: The Ruby License
6
6
  #
7
- # $Id: enzyme.rb,v 0.12 2007/12/14 16:20:38 k Exp $
7
+ # $Id:$
8
8
  #
9
9
 
10
10
  require 'bio/db'
11
+ require 'bio/db/kegg/common'
11
12
 
12
13
  module Bio
13
14
  class KEGG
@@ -17,6 +18,26 @@ class ENZYME < KEGGDB
17
18
  DELIMITER = RS = "\n///\n"
18
19
  TAGSIZE = 12
19
20
 
21
+ include Common::DblinksAsHash
22
+ # Returns a Hash of the DB name and an Array of entry IDs in DBLINKS field.
23
+ def dblinks_as_hash; super; end if false #dummy for RDoc
24
+ alias dblinks dblinks_as_hash
25
+
26
+ include Common::PathwaysAsHash
27
+ # Returns a Hash of the pathway ID and name in PATHWAY field.
28
+ def pathways_as_hash; super; end if false #dummy for RDoc
29
+ alias pathways pathways_as_hash
30
+
31
+ include Common::OrthologsAsHash
32
+ # Returns a Hash of the orthology ID and definition in ORTHOLOGY field.
33
+ def orthologs_as_hash; super; end if false #dummy for RDoc
34
+ alias orthologs orthologs_as_hash
35
+
36
+ include Common::GenesAsHash
37
+ # Returns a Hash of the organism ID and an Array of entry IDs in GENES field.
38
+ def genes_as_hash; super; end if false #dummy for RDoc
39
+ alias genes genes_as_hash
40
+
20
41
  def initialize(entry)
21
42
  super(entry, TAGSIZE)
22
43
  end
@@ -102,17 +123,17 @@ class ENZYME < KEGGDB
102
123
  end
103
124
 
104
125
  # PATHWAY
105
- def pathways
126
+ def pathways_as_strings
106
127
  lines_fetch('PATHWAY')
107
128
  end
108
129
 
109
130
  # ORTHOLOGY
110
- def orthologs
131
+ def orthologs_as_strings
111
132
  lines_fetch('ORTHOLOGY')
112
133
  end
113
134
 
114
135
  # GENES
115
- def genes
136
+ def genes_as_strings
116
137
  lines_fetch('GENES')
117
138
  end
118
139
 
@@ -137,7 +158,7 @@ class ENZYME < KEGGDB
137
158
  # REFERENCE
138
159
 
139
160
  # DBLINKS
140
- def dblinks
161
+ def dblinks_as_strings
141
162
  lines_fetch('DBLINKS')
142
163
  end
143
164
 
@@ -5,7 +5,7 @@
5
5
  # Toshiaki Katayama <k@bioruby.org>
6
6
  # License:: The Ruby License
7
7
  #
8
- # $Id: genes.rb,v 0.26 2007/12/14 16:20:38 k Exp $
8
+ # $Id:$
9
9
  #
10
10
  #
11
11
  # == KEGG GENES parser
@@ -38,7 +38,7 @@
38
38
  #
39
39
  # # PATHWAY
40
40
  # p entry.pathway # => String
41
- # p entry.pathways # => Array
41
+ # p entry.pathways # => Hash
42
42
  #
43
43
  # # POSITION
44
44
  # p entry.position # => String
@@ -52,6 +52,9 @@
52
52
  # # DBLINKS
53
53
  # p entry.dblinks # => Hash of Array
54
54
  #
55
+ # # STRUCTURE
56
+ # p entry.structure # => Array
57
+ #
55
58
  # # CODON_USAGE
56
59
  # p entry.codon_usage # => Hash
57
60
  # p entry.cu_list # => Array
@@ -73,18 +76,53 @@ module Bio
73
76
  autoload :Locations, 'bio/location'
74
77
  autoload :Sequence, 'bio/sequence'
75
78
 
79
+ require 'bio/db/kegg/common'
80
+
76
81
  class KEGG
77
82
 
83
+ # == Description
84
+ #
85
+ # KEGG GENES entry parser.
86
+ #
87
+ # == References
88
+ #
89
+ # * http://www.genome.jp/kegg/genes.html
90
+ #
78
91
  class GENES < KEGGDB
79
92
 
80
93
  DELIMITER = RS = "\n///\n"
81
94
  TAGSIZE = 12
82
95
 
96
+ include Common::DblinksAsHash
97
+ # Returns a Hash of the DB name and an Array of entry IDs in DBLINKS field.
98
+ def dblinks_as_hash; super; end if false #dummy for RDoc
99
+ alias dblinks dblinks_as_hash
100
+
101
+ include Common::PathwaysAsHash
102
+ # Returns a Hash of the pathway ID and name in PATHWAY field.
103
+ def pathways_as_hash; super; end if false #dummy for RDoc
104
+ alias pathways pathways_as_hash
105
+
106
+ include Common::OrthologsAsHash
107
+ # Returns a Hash of the orthology ID and definition in ORTHOLOGY field.
108
+ def orthologs_as_hash; super; end if false #dummy for RDoc
109
+ alias orthologs orthologs_as_hash
110
+
111
+ # Creates a new Bio::KEGG::GENES object.
112
+ # ---
113
+ # *Arguments*:
114
+ # * (required) _entry_: (String) single entry as a string
115
+ # *Returns*:: Bio::KEGG::GENES object
83
116
  def initialize(entry)
84
117
  super(entry, TAGSIZE)
85
118
  end
86
119
 
87
-
120
+ # Returns the "ENTRY" line content as a Hash.
121
+ # For example,
122
+ # {"organism"=>"E.coli", "division"=>"CDS", "id"=>"b0356"}
123
+ #
124
+ # ---
125
+ # *Returns*:: Hash
88
126
  def entry
89
127
  unless @data['ENTRY']
90
128
  hash = Hash.new('')
@@ -99,34 +137,59 @@ class GENES < KEGGDB
99
137
  @data['ENTRY']
100
138
  end
101
139
 
140
+ # ID of the entry, described in the ENTRY line.
141
+ # ---
142
+ # *Returns*:: String
102
143
  def entry_id
103
144
  entry['id']
104
145
  end
105
146
 
147
+ # Division of the entry, described in the ENTRY line.
148
+ # ---
149
+ # *Returns*:: String
106
150
  def division
107
151
  entry['division'] # CDS, tRNA etc.
108
152
  end
109
153
 
154
+ # Organism name of the entry, described in the ENTRY line.
155
+ # ---
156
+ # *Returns*:: String
110
157
  def organism
111
158
  entry['organism'] # H.sapiens etc.
112
159
  end
113
160
 
161
+ # Returns the NAME line.
162
+ # ---
163
+ # *Returns*:: String
114
164
  def name
115
165
  field_fetch('NAME')
116
166
  end
117
167
 
168
+ # Names of the entry as an Array, described in the NAME line.
169
+ #
170
+ # ---
171
+ # *Returns*:: Array containing String
118
172
  def genes
119
173
  name.split(', ')
120
174
  end
121
175
 
176
+ # Returns the first gene name described in the NAME line.
177
+ # ---
178
+ # *Returns*:: String
122
179
  def gene
123
180
  genes.first
124
181
  end
125
182
 
183
+ # Definition of the entry, described in the DEFINITION line.
184
+ # ---
185
+ # *Returns*:: String
126
186
  def definition
127
187
  field_fetch('DEFINITION')
128
188
  end
129
189
 
190
+ # Enzyme's EC numbers shown in the DEFINITION line.
191
+ # ---
192
+ # *Returns*:: Array containing String
130
193
  def eclinks
131
194
  ec_list = definition.slice(/\[EC:(.*?)\]/, 1)
132
195
  if ec_list
@@ -136,18 +199,30 @@ class GENES < KEGGDB
136
199
  end
137
200
  end
138
201
 
139
- def orthologs
202
+ # Orthologs described in the ORTHOLOGY lines.
203
+ # ---
204
+ # *Returns*:: Array containing String
205
+ def orthologs_as_strings
140
206
  lines_fetch('ORTHOLOGY')
141
207
  end
142
208
 
209
+ # Returns the PATHWAY lines as a String.
210
+ # ---
211
+ # *Returns*:: String
143
212
  def pathway
144
213
  field_fetch('PATHWAY')
145
214
  end
146
215
 
147
- def pathways
148
- pathway.scan(/\[PATH:(.*?)\]/).flatten
216
+ # Pathways described in the PATHWAY lines.
217
+ # ---
218
+ # *Returns*:: Array containing String
219
+ def pathways_as_strings
220
+ lines_fetch('PATHWAY')
149
221
  end
150
222
 
223
+ # The position in the genome described in the POSITION line.
224
+ # ---
225
+ # *Returns*:: String
151
226
  def position
152
227
  unless @data['POSITION']
153
228
  @data['POSITION'] = fetch('POSITION').gsub(/\s/, '')
@@ -155,6 +230,9 @@ class GENES < KEGGDB
155
230
  @data['POSITION']
156
231
  end
157
232
 
233
+ # Chromosome described in the POSITION line.
234
+ # ---
235
+ # *Returns*:: String or nil
158
236
  def chromosome
159
237
  if position[/:/]
160
238
  position.sub(/:.*/, '')
@@ -165,14 +243,25 @@ class GENES < KEGGDB
165
243
  end
166
244
  end
167
245
 
246
+ # The position in the genome described in the POSITION line
247
+ # as GenBank feature table location formatted string.
248
+ # ---
249
+ # *Returns*:: String
168
250
  def gbposition
169
251
  position.sub(/.*?:/, '')
170
252
  end
171
253
 
254
+ # The position in the genome described in the POSITION line
255
+ # as Bio::Locations object.
256
+ # ---
257
+ # *Returns*:: Bio::Locations object
172
258
  def locations
173
259
  Bio::Locations.new(gbposition)
174
260
  end
175
261
 
262
+ # Motif information described in the MOTIF lines.
263
+ # ---
264
+ # *Returns*:: Hash
176
265
  def motif
177
266
  unless @data['MOTIF']
178
267
  hash = {}
@@ -191,18 +280,27 @@ class GENES < KEGGDB
191
280
  @data['MOTIF'] # Hash of Array of IDs in MOTIF
192
281
  end
193
282
 
194
- def dblinks
195
- unless @data['DBLINKS']
196
- hash = {}
197
- get('DBLINKS').scan(/(\S+):\s*(.*)\n?/).each do |db, str|
198
- id_array = str.strip.split(/\s+/)
199
- hash[db] = id_array
200
- end
201
- @data['DBLINKS'] = hash
283
+ # Links to other databases described in the DBLINKS lines.
284
+ # ---
285
+ # *Returns*:: Array containing String objects
286
+ def dblinks_as_strings
287
+ lines_fetch('DBLINKS')
288
+ end
289
+
290
+ # Returns structure ID information described in the STRUCTURE lines.
291
+ # ---
292
+ # *Returns*:: Array containing String
293
+ def structure
294
+ unless @data['STRUCTURE']
295
+ @data['STRUCTURE'] = fetch('STRUCTURE').sub(/(PDB: )*/,'').split(/\s+/)
202
296
  end
203
- @data['DBLINKS'] # Hash of Array of IDs in DBLINKS
297
+ @data['STRUCTURE'] # ['PDB:1A9X', ...]
204
298
  end
299
+ alias structures structure
205
300
 
301
+ # Codon usage data described in the CODON_USAGE lines.
302
+ # ---
303
+ # *Returns*:: Hash
206
304
  def codon_usage(codon = nil)
207
305
  unless @data['CODON_USAGE']
208
306
  hash = Hash.new
@@ -220,6 +318,9 @@ class GENES < KEGGDB
220
318
  @data['CODON_USAGE']
221
319
  end
222
320
 
321
+ # Codon usage data described in the CODON_USAGE lines as an array.
322
+ # ---
323
+ # *Returns*:: Array
223
324
  def cu_list
224
325
  ary = []
225
326
  get('CODON_USAGE').sub(/.*/,'').each_line do |line| # cut 1st line
@@ -230,6 +331,9 @@ class GENES < KEGGDB
230
331
  return ary
231
332
  end
232
333
 
334
+ # Returns amino acid sequence described in the AASEQ lines.
335
+ # ---
336
+ # *Returns*:: Bio::Sequence::AA object
233
337
  def aaseq
234
338
  unless @data['AASEQ']
235
339
  @data['AASEQ'] = Bio::Sequence::AA.new(fetch('AASEQ').gsub(/\d+/, ''))
@@ -237,10 +341,16 @@ class GENES < KEGGDB
237
341
  @data['AASEQ']
238
342
  end
239
343
 
344
+ # Returns length of the amino acid sequence described in the AASEQ lines.
345
+ # ---
346
+ # *Returns*:: Integer
240
347
  def aalen
241
348
  fetch('AASEQ')[/\d+/].to_i
242
349
  end
243
350
 
351
+ # Returns nucleic acid sequence described in the NTSEQ lines.
352
+ # ---
353
+ # *Returns*:: Bio::Sequence::NA object
244
354
  def ntseq
245
355
  unless @data['NTSEQ']
246
356
  @data['NTSEQ'] = Bio::Sequence::NA.new(fetch('NTSEQ').gsub(/\d+/, ''))
@@ -249,6 +359,9 @@ class GENES < KEGGDB
249
359
  end
250
360
  alias naseq ntseq
251
361
 
362
+ # Returns nucleic acid sequence length.
363
+ # ---
364
+ # *Returns*:: Integer
252
365
  def ntlen
253
366
  fetch('NTSEQ')[/\d+/].to_i
254
367
  end