bio 1.3.1 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (303) hide show
  1. data/ChangeLog +2105 -3728
  2. data/KNOWN_ISSUES.rdoc +35 -3
  3. data/README.rdoc +8 -2
  4. data/RELEASE_NOTES.rdoc +166 -0
  5. data/bin/bioruby +4 -1
  6. data/bioruby.gemspec +146 -1
  7. data/bioruby.gemspec.erb +3 -1
  8. data/doc/ChangeLog-before-1.3.1 +3961 -0
  9. data/doc/Tutorial.rd +154 -22
  10. data/doc/Tutorial.rd.html +125 -68
  11. data/lib/bio.rb +21 -6
  12. data/lib/bio/appl/bl2seq/report.rb +11 -202
  13. data/lib/bio/appl/blast/format0.rb +0 -193
  14. data/lib/bio/appl/blast/report.rb +2 -147
  15. data/lib/bio/appl/blast/wublast.rb +0 -208
  16. data/lib/bio/appl/fasta.rb +4 -19
  17. data/lib/bio/appl/fasta/format10.rb +0 -14
  18. data/lib/bio/appl/genscan/report.rb +0 -176
  19. data/lib/bio/appl/hmmer.rb +1 -15
  20. data/lib/bio/appl/hmmer/report.rb +0 -100
  21. data/lib/bio/appl/meme/mast.rb +156 -0
  22. data/lib/bio/appl/meme/mast/report.rb +91 -0
  23. data/lib/bio/appl/meme/motif.rb +48 -0
  24. data/lib/bio/appl/psort.rb +0 -111
  25. data/lib/bio/appl/psort/report.rb +1 -45
  26. data/lib/bio/appl/pts1.rb +2 -4
  27. data/lib/bio/appl/sosui/report.rb +5 -54
  28. data/lib/bio/appl/targetp/report.rb +1 -104
  29. data/lib/bio/appl/tmhmm/report.rb +0 -36
  30. data/lib/bio/command.rb +94 -10
  31. data/lib/bio/data/aa.rb +1 -77
  32. data/lib/bio/data/codontable.rb +1 -95
  33. data/lib/bio/data/na.rb +1 -26
  34. data/lib/bio/db/aaindex.rb +1 -38
  35. data/lib/bio/db/fasta.rb +1 -134
  36. data/lib/bio/db/fasta/format_qual.rb +204 -0
  37. data/lib/bio/db/fasta/qual.rb +102 -0
  38. data/lib/bio/db/fastq.rb +645 -0
  39. data/lib/bio/db/fastq/fastq_to_biosequence.rb +40 -0
  40. data/lib/bio/db/fastq/format_fastq.rb +175 -0
  41. data/lib/bio/db/genbank/genbank.rb +1 -86
  42. data/lib/bio/db/gff.rb +0 -17
  43. data/lib/bio/db/go.rb +4 -72
  44. data/lib/bio/db/kegg/common.rb +112 -0
  45. data/lib/bio/db/kegg/compound.rb +29 -20
  46. data/lib/bio/db/kegg/drug.rb +74 -34
  47. data/lib/bio/db/kegg/enzyme.rb +26 -5
  48. data/lib/bio/db/kegg/genes.rb +128 -15
  49. data/lib/bio/db/kegg/genome.rb +3 -41
  50. data/lib/bio/db/kegg/glycan.rb +19 -24
  51. data/lib/bio/db/kegg/orthology.rb +16 -56
  52. data/lib/bio/db/kegg/reaction.rb +81 -28
  53. data/lib/bio/db/kegg/taxonomy.rb +1 -52
  54. data/lib/bio/db/litdb.rb +1 -16
  55. data/lib/bio/db/phyloxml/phyloxml.xsd +582 -0
  56. data/lib/bio/db/phyloxml/phyloxml_elements.rb +1174 -0
  57. data/lib/bio/db/phyloxml/phyloxml_parser.rb +954 -0
  58. data/lib/bio/db/phyloxml/phyloxml_writer.rb +228 -0
  59. data/lib/bio/db/prosite.rb +2 -95
  60. data/lib/bio/db/rebase.rb +5 -6
  61. data/lib/bio/db/sanger_chromatogram/abif.rb +120 -0
  62. data/lib/bio/db/sanger_chromatogram/chromatogram.rb +133 -0
  63. data/lib/bio/db/sanger_chromatogram/chromatogram_to_biosequence.rb +32 -0
  64. data/lib/bio/db/sanger_chromatogram/scf.rb +210 -0
  65. data/lib/bio/io/das.rb +0 -44
  66. data/lib/bio/io/ddbjxml.rb +1 -181
  67. data/lib/bio/io/flatfile.rb +1 -7
  68. data/lib/bio/io/flatfile/autodetection.rb +6 -0
  69. data/lib/bio/io/keggapi.rb +0 -442
  70. data/lib/bio/io/ncbirest.rb +130 -132
  71. data/lib/bio/io/ncbisoap.rb +2 -1
  72. data/lib/bio/io/pubmed.rb +0 -88
  73. data/lib/bio/location.rb +0 -73
  74. data/lib/bio/pathway.rb +0 -171
  75. data/lib/bio/sequence.rb +18 -1
  76. data/lib/bio/sequence/adapter.rb +3 -0
  77. data/lib/bio/sequence/format.rb +16 -0
  78. data/lib/bio/sequence/quality_score.rb +205 -0
  79. data/lib/bio/tree.rb +70 -5
  80. data/lib/bio/util/restriction_enzyme/single_strand.rb +3 -2
  81. data/lib/bio/util/sirna.rb +1 -23
  82. data/lib/bio/version.rb +1 -1
  83. data/sample/demo_aaindex.rb +67 -0
  84. data/sample/demo_aminoacid.rb +101 -0
  85. data/sample/demo_bl2seq_report.rb +220 -0
  86. data/sample/demo_blast_report.rb +285 -0
  87. data/sample/demo_codontable.rb +119 -0
  88. data/sample/demo_das.rb +105 -0
  89. data/sample/demo_ddbjxml.rb +212 -0
  90. data/sample/demo_fasta_remote.rb +51 -0
  91. data/sample/demo_fastaformat.rb +105 -0
  92. data/sample/demo_genbank.rb +132 -0
  93. data/sample/demo_genscan_report.rb +202 -0
  94. data/sample/demo_gff1.rb +49 -0
  95. data/sample/demo_go.rb +98 -0
  96. data/sample/demo_hmmer_report.rb +149 -0
  97. data/sample/demo_kegg_compound.rb +57 -0
  98. data/sample/demo_kegg_drug.rb +65 -0
  99. data/sample/demo_kegg_genome.rb +74 -0
  100. data/sample/demo_kegg_glycan.rb +72 -0
  101. data/sample/demo_kegg_orthology.rb +62 -0
  102. data/sample/demo_kegg_reaction.rb +66 -0
  103. data/sample/demo_kegg_taxonomy.rb +92 -0
  104. data/sample/demo_keggapi.rb +502 -0
  105. data/sample/demo_litdb.rb +42 -0
  106. data/sample/demo_locations.rb +99 -0
  107. data/sample/demo_ncbi_rest.rb +130 -0
  108. data/sample/demo_nucleicacid.rb +49 -0
  109. data/sample/demo_pathway.rb +196 -0
  110. data/sample/demo_prosite.rb +120 -0
  111. data/sample/demo_psort.rb +138 -0
  112. data/sample/demo_psort_report.rb +70 -0
  113. data/sample/demo_pubmed.rb +118 -0
  114. data/sample/demo_sirna.rb +63 -0
  115. data/sample/demo_sosui_report.rb +89 -0
  116. data/sample/demo_targetp_report.rb +135 -0
  117. data/sample/demo_tmhmm_report.rb +68 -0
  118. data/sample/pmfetch.rb +13 -4
  119. data/sample/pmsearch.rb +15 -4
  120. data/sample/test_phyloxml_big.rb +205 -0
  121. data/test/bioruby_test_helper.rb +61 -0
  122. data/test/data/KEGG/1.1.1.1.enzyme +935 -0
  123. data/test/data/KEGG/C00025.compound +102 -0
  124. data/test/data/KEGG/D00063.drug +104 -0
  125. data/test/data/KEGG/G00024.glycan +47 -0
  126. data/test/data/KEGG/G01366.glycan +18 -0
  127. data/test/data/KEGG/K02338.orthology +902 -0
  128. data/test/data/KEGG/R00006.reaction +14 -0
  129. data/test/data/fastq/README.txt +109 -0
  130. data/test/data/fastq/error_diff_ids.fastq +20 -0
  131. data/test/data/fastq/error_double_qual.fastq +22 -0
  132. data/test/data/fastq/error_double_seq.fastq +22 -0
  133. data/test/data/fastq/error_long_qual.fastq +20 -0
  134. data/test/data/fastq/error_no_qual.fastq +20 -0
  135. data/test/data/fastq/error_qual_del.fastq +20 -0
  136. data/test/data/fastq/error_qual_escape.fastq +20 -0
  137. data/test/data/fastq/error_qual_null.fastq +0 -0
  138. data/test/data/fastq/error_qual_space.fastq +21 -0
  139. data/test/data/fastq/error_qual_tab.fastq +21 -0
  140. data/test/data/fastq/error_qual_unit_sep.fastq +20 -0
  141. data/test/data/fastq/error_qual_vtab.fastq +20 -0
  142. data/test/data/fastq/error_short_qual.fastq +20 -0
  143. data/test/data/fastq/error_spaces.fastq +20 -0
  144. data/test/data/fastq/error_tabs.fastq +21 -0
  145. data/test/data/fastq/error_trunc_at_plus.fastq +19 -0
  146. data/test/data/fastq/error_trunc_at_qual.fastq +19 -0
  147. data/test/data/fastq/error_trunc_at_seq.fastq +18 -0
  148. data/test/data/fastq/error_trunc_in_plus.fastq +19 -0
  149. data/test/data/fastq/error_trunc_in_qual.fastq +20 -0
  150. data/test/data/fastq/error_trunc_in_seq.fastq +18 -0
  151. data/test/data/fastq/error_trunc_in_title.fastq +17 -0
  152. data/test/data/fastq/illumina_full_range_as_illumina.fastq +8 -0
  153. data/test/data/fastq/illumina_full_range_as_sanger.fastq +8 -0
  154. data/test/data/fastq/illumina_full_range_as_solexa.fastq +8 -0
  155. data/test/data/fastq/illumina_full_range_original_illumina.fastq +8 -0
  156. data/test/data/fastq/longreads_as_illumina.fastq +40 -0
  157. data/test/data/fastq/longreads_as_sanger.fastq +40 -0
  158. data/test/data/fastq/longreads_as_solexa.fastq +40 -0
  159. data/test/data/fastq/longreads_original_sanger.fastq +120 -0
  160. data/test/data/fastq/misc_dna_as_illumina.fastq +16 -0
  161. data/test/data/fastq/misc_dna_as_sanger.fastq +16 -0
  162. data/test/data/fastq/misc_dna_as_solexa.fastq +16 -0
  163. data/test/data/fastq/misc_dna_original_sanger.fastq +16 -0
  164. data/test/data/fastq/misc_rna_as_illumina.fastq +16 -0
  165. data/test/data/fastq/misc_rna_as_sanger.fastq +16 -0
  166. data/test/data/fastq/misc_rna_as_solexa.fastq +16 -0
  167. data/test/data/fastq/misc_rna_original_sanger.fastq +16 -0
  168. data/test/data/fastq/sanger_full_range_as_illumina.fastq +8 -0
  169. data/test/data/fastq/sanger_full_range_as_sanger.fastq +8 -0
  170. data/test/data/fastq/sanger_full_range_as_solexa.fastq +8 -0
  171. data/test/data/fastq/sanger_full_range_original_sanger.fastq +8 -0
  172. data/test/data/fastq/solexa_full_range_as_illumina.fastq +8 -0
  173. data/test/data/fastq/solexa_full_range_as_sanger.fastq +8 -0
  174. data/test/data/fastq/solexa_full_range_as_solexa.fastq +8 -0
  175. data/test/data/fastq/solexa_full_range_original_solexa.fastq +8 -0
  176. data/test/data/fastq/wrapping_as_illumina.fastq +12 -0
  177. data/test/data/fastq/wrapping_as_sanger.fastq +12 -0
  178. data/test/data/fastq/wrapping_as_solexa.fastq +12 -0
  179. data/test/data/fastq/wrapping_original_sanger.fastq +24 -0
  180. data/test/data/meme/db +0 -0
  181. data/test/data/meme/mast +0 -0
  182. data/test/data/meme/mast.out +13 -0
  183. data/test/data/meme/meme.out +3 -0
  184. data/test/data/phyloxml/apaf.xml +666 -0
  185. data/test/data/phyloxml/bcl_2.xml +2097 -0
  186. data/test/data/phyloxml/made_up.xml +144 -0
  187. data/test/data/phyloxml/ncbi_taxonomy_mollusca_short.xml +65 -0
  188. data/test/data/phyloxml/phyloxml_examples.xml +415 -0
  189. data/test/data/sanger_chromatogram/test_chromatogram_abif.ab1 +0 -0
  190. data/test/data/sanger_chromatogram/test_chromatogram_scf_v2.scf +0 -0
  191. data/test/data/sanger_chromatogram/test_chromatogram_scf_v3.scf +0 -0
  192. data/test/functional/bio/appl/test_pts1.rb +7 -5
  193. data/test/functional/bio/io/test_ensembl.rb +4 -3
  194. data/test/functional/bio/io/test_pubmed.rb +9 -3
  195. data/test/functional/bio/io/test_soapwsdl.rb +5 -4
  196. data/test/functional/bio/io/test_togows.rb +5 -4
  197. data/test/functional/bio/sequence/test_output_embl.rb +6 -4
  198. data/test/functional/bio/test_command.rb +54 -5
  199. data/test/runner.rb +5 -3
  200. data/test/unit/bio/appl/bl2seq/test_report.rb +5 -4
  201. data/test/unit/bio/appl/blast/test_ncbioptions.rb +4 -2
  202. data/test/unit/bio/appl/blast/test_report.rb +5 -4
  203. data/test/unit/bio/appl/blast/test_rpsblast.rb +5 -4
  204. data/test/unit/bio/appl/gcg/test_msf.rb +5 -5
  205. data/test/unit/bio/appl/genscan/test_report.rb +8 -9
  206. data/test/unit/bio/appl/hmmer/test_report.rb +5 -4
  207. data/test/unit/bio/appl/iprscan/test_report.rb +6 -5
  208. data/test/unit/bio/appl/mafft/test_report.rb +6 -5
  209. data/test/unit/bio/appl/meme/mast/test_report.rb +46 -0
  210. data/test/unit/bio/appl/meme/test_mast.rb +103 -0
  211. data/test/unit/bio/appl/meme/test_motif.rb +38 -0
  212. data/test/unit/bio/appl/paml/codeml/test_rates.rb +5 -4
  213. data/test/unit/bio/appl/paml/codeml/test_report.rb +5 -4
  214. data/test/unit/bio/appl/paml/test_codeml.rb +5 -4
  215. data/test/unit/bio/appl/sim4/test_report.rb +5 -4
  216. data/test/unit/bio/appl/sosui/test_report.rb +6 -5
  217. data/test/unit/bio/appl/targetp/test_report.rb +5 -3
  218. data/test/unit/bio/appl/test_blast.rb +5 -4
  219. data/test/unit/bio/appl/test_fasta.rb +4 -2
  220. data/test/unit/bio/appl/test_pts1.rb +4 -2
  221. data/test/unit/bio/appl/tmhmm/test_report.rb +6 -5
  222. data/test/unit/bio/data/test_aa.rb +5 -3
  223. data/test/unit/bio/data/test_codontable.rb +5 -4
  224. data/test/unit/bio/data/test_na.rb +5 -3
  225. data/test/unit/bio/db/biosql/tc_biosql.rb +5 -1
  226. data/test/unit/bio/db/embl/test_common.rb +4 -2
  227. data/test/unit/bio/db/embl/test_embl.rb +6 -6
  228. data/test/unit/bio/db/embl/test_embl_rel89.rb +6 -6
  229. data/test/unit/bio/db/embl/test_embl_to_bioseq.rb +7 -8
  230. data/test/unit/bio/db/embl/test_sptr.rb +6 -8
  231. data/test/unit/bio/db/embl/test_uniprot.rb +6 -5
  232. data/test/unit/bio/db/fasta/test_format_qual.rb +346 -0
  233. data/test/unit/bio/db/kegg/test_compound.rb +146 -0
  234. data/test/unit/bio/db/kegg/test_drug.rb +194 -0
  235. data/test/unit/bio/db/kegg/test_enzyme.rb +241 -0
  236. data/test/unit/bio/db/kegg/test_genes.rb +32 -4
  237. data/test/unit/bio/db/kegg/test_glycan.rb +260 -0
  238. data/test/unit/bio/db/kegg/test_orthology.rb +50 -0
  239. data/test/unit/bio/db/kegg/test_reaction.rb +96 -0
  240. data/test/unit/bio/db/pdb/test_pdb.rb +4 -2
  241. data/test/unit/bio/db/sanger_chromatogram/test_abif.rb +76 -0
  242. data/test/unit/bio/db/sanger_chromatogram/test_scf.rb +98 -0
  243. data/test/unit/bio/db/test_aaindex.rb +6 -6
  244. data/test/unit/bio/db/test_fasta.rb +5 -46
  245. data/test/unit/bio/db/test_fastq.rb +829 -0
  246. data/test/unit/bio/db/test_gff.rb +4 -2
  247. data/test/unit/bio/db/test_lasergene.rb +7 -5
  248. data/test/unit/bio/db/test_medline.rb +4 -2
  249. data/test/unit/bio/db/test_newick.rb +6 -6
  250. data/test/unit/bio/db/test_nexus.rb +4 -2
  251. data/test/unit/bio/db/test_phyloxml.rb +769 -0
  252. data/test/unit/bio/db/test_phyloxml_writer.rb +328 -0
  253. data/test/unit/bio/db/test_prosite.rb +6 -5
  254. data/test/unit/bio/db/test_qual.rb +63 -0
  255. data/test/unit/bio/db/test_rebase.rb +5 -3
  256. data/test/unit/bio/db/test_soft.rb +7 -6
  257. data/test/unit/bio/io/flatfile/test_autodetection.rb +6 -7
  258. data/test/unit/bio/io/flatfile/test_buffer.rb +6 -5
  259. data/test/unit/bio/io/flatfile/test_splitter.rb +4 -4
  260. data/test/unit/bio/io/test_ddbjxml.rb +4 -3
  261. data/test/unit/bio/io/test_ensembl.rb +5 -3
  262. data/test/unit/bio/io/test_fastacmd.rb +4 -3
  263. data/test/unit/bio/io/test_flatfile.rb +6 -5
  264. data/test/unit/bio/io/test_soapwsdl.rb +4 -3
  265. data/test/unit/bio/io/test_togows.rb +4 -2
  266. data/test/unit/bio/sequence/test_aa.rb +5 -3
  267. data/test/unit/bio/sequence/test_common.rb +4 -2
  268. data/test/unit/bio/sequence/test_compat.rb +4 -2
  269. data/test/unit/bio/sequence/test_dblink.rb +5 -3
  270. data/test/unit/bio/sequence/test_na.rb +4 -2
  271. data/test/unit/bio/sequence/test_quality_score.rb +330 -0
  272. data/test/unit/bio/shell/plugin/test_seq.rb +5 -3
  273. data/test/unit/bio/test_alignment.rb +5 -3
  274. data/test/unit/bio/test_command.rb +4 -3
  275. data/test/unit/bio/test_db.rb +5 -3
  276. data/test/unit/bio/test_feature.rb +4 -2
  277. data/test/unit/bio/test_location.rb +4 -2
  278. data/test/unit/bio/test_map.rb +5 -3
  279. data/test/unit/bio/test_pathway.rb +4 -2
  280. data/test/unit/bio/test_reference.rb +4 -2
  281. data/test/unit/bio/test_sequence.rb +5 -3
  282. data/test/unit/bio/test_shell.rb +5 -3
  283. data/test/unit/bio/test_tree.rb +6 -6
  284. data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +4 -2
  285. data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +4 -2
  286. data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +4 -2
  287. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +4 -2
  288. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +4 -2
  289. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +4 -2
  290. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +4 -2
  291. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +4 -2
  292. data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +4 -2
  293. data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +4 -2
  294. data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +4 -2
  295. data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +4 -2
  296. data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +17 -13
  297. data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +17 -13
  298. data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +4 -2
  299. data/test/unit/bio/util/test_color_scheme.rb +5 -3
  300. data/test/unit/bio/util/test_contingency_table.rb +5 -3
  301. data/test/unit/bio/util/test_restriction_enzyme.rb +4 -2
  302. data/test/unit/bio/util/test_sirna.rb +6 -4
  303. metadata +147 -2
@@ -0,0 +1,202 @@
1
+ #
2
+ # = sample/demo_genscan_report.rb - demonstration of Bio::Genscan::Report
3
+ #
4
+ # Copyright:: Copyright (C) 2003
5
+ # Mitsuteru C. Nakao <n@bioruby.org>
6
+ # License:: The Ruby License
7
+ #
8
+ #
9
+ # == Description
10
+ #
11
+ # Demonstration of Bio::Genscan::Report, parser class for Genscan output.
12
+ #
13
+ # == Usage
14
+ #
15
+ # Usage 1: Without arguments, demonstrates using preset sample data.
16
+ #
17
+ # $ ruby demo_genscan.rb
18
+ #
19
+ # Usage 2: When a "-" is specified as the argument, read data from stdin.
20
+ #
21
+ # $ cat testdata | ruby demo_genscan.rb -
22
+ #
23
+ # Usage 3: Specify a file containing a Genscan output.
24
+ #
25
+ # $ ruby demo_genscan.rb file
26
+ #
27
+ # Example usage using test data:
28
+ #
29
+ # $ ruby -Ilib sample/demo_genscan.rb test/data/genscan/sample.report
30
+ #
31
+ # == Development information
32
+ #
33
+ # The code was moved from lib/bio/appl/genscan/report.rb and modified:
34
+ # * Changed the way to read preset sample data.
35
+ #
36
+
37
+ require 'bio'
38
+
39
+ #if __FILE__ == $0
40
+
41
+ if ARGV.empty? then
42
+ report = DATA.read
43
+ elsif ARGV.size == 1 and ARGV[0] == '-' then
44
+ ARGV.shift
45
+ report = $<.read
46
+ else
47
+ report = ARGF.read
48
+ end
49
+
50
+ puts "= class Bio::Genscan::Report "
51
+ report = Bio::Genscan::Report.new(report)
52
+
53
+
54
+ print " report.genscan_version #=> "
55
+ p report.genscan_version
56
+ print " report.date_run #=> "
57
+ p report.date_run
58
+ print " report.time #=> "
59
+ p report.time
60
+
61
+ print " report.query_name #=> "
62
+ p report.query_name
63
+ print " report.length #=> "
64
+ p report.length
65
+ print " report.gccontent #=> "
66
+ p report.gccontent
67
+ print " report.isochore #=> "
68
+ p report.isochore
69
+
70
+ print " report.matrix #=> "
71
+ p report.matrix
72
+
73
+ puts " report.predictions (Array of Bio::Genscan::Report::Gene) "
74
+ print " report.predictions.size #=> "
75
+ p report.predictions.size
76
+
77
+
78
+ report.predictions.each {|gene|
79
+ puts "\n== class Bio::Genscan::Report::Gene "
80
+ print " gene.number #=> "
81
+ p gene.number
82
+ print " gene.aaseq (Bio::FastaFormat) #=> "
83
+ p gene.aaseq
84
+ print " gene.naseq (Bio::FastaFormat) #=> "
85
+ p gene.naseq
86
+ print " ene.promoter (Bio::Genscan::Report::Exon) #=> "
87
+ p gene.promoter
88
+ print " gene.polyA (Bio::Genscan::Report::Exon) #=> "
89
+ p gene.polyA
90
+ puts " gene.exons (Array of Bio::Genscan::Report::Exon) "
91
+ print " gene.exons.size #=> "
92
+ p gene.exons.size
93
+
94
+
95
+ gene.exons.each {|exon|
96
+ puts "\n== class Bio::Genscan::Report::Exon "
97
+ print " exon.number #=> "
98
+ p exon.number
99
+ print " exon.exon_type #=> "
100
+ p exon.exon_type
101
+ print " exon.exon_type_long #=> "
102
+ p exon.exon_type_long
103
+ print " exon.strand #=> "
104
+ p exon.strand
105
+ print " exon.first #=> "
106
+ p exon.first
107
+ print " exon.last #=> "
108
+ p exon.last
109
+ print " exon.range (Range) #=> "
110
+ p exon.range
111
+ print " exon.frame #=> "
112
+ p exon.frame
113
+ print " exon.phase #=> "
114
+ p exon.phase
115
+ print " exon.acceptor_score #=> "
116
+ p exon.acceptor_score
117
+ print " exon.donor_score #=> "
118
+ p exon.donor_score
119
+ print " exon.initiation_score #=> "
120
+ p exon.initiation_score
121
+ print " exon.termination_score #=> "
122
+ p exon.termination_score
123
+ print " exon.score #=> "
124
+ p exon.score
125
+ print " exon.p_value #=> "
126
+ p exon.p_value
127
+ print " exon.t_score #=> "
128
+ p exon.t_score
129
+ puts
130
+ }
131
+ puts
132
+ }
133
+
134
+ #end
135
+
136
+ ### Sample Genscan report is attached below.
137
+ ### The lines after the "__END__" can be accessed by using "DATA".
138
+
139
+ __END__
140
+ GENSCAN 1.0 Date run: 30-May-103 Time: 14:06:28
141
+
142
+ Sequence HUMRASH : 12942 bp : 68.17% C+G : Isochore 4 (57 - 100 C+G%)
143
+
144
+ Parameter matrix: HumanIso.smat
145
+
146
+ Predicted genes/exons:
147
+
148
+ Gn.Ex Type S .Begin ...End .Len Fr Ph I/Ac Do/T CodRg P.... Tscr..
149
+ ----- ---- - ------ ------ ---- -- -- ---- ---- ----- ----- ------
150
+
151
+ 1.01 Init + 1664 1774 111 1 0 94 83 212 0.997 21.33
152
+ 1.02 Intr + 2042 2220 179 1 2 104 66 408 0.997 40.12
153
+ 1.03 Intr + 2374 2533 160 1 1 89 94 302 0.999 32.08
154
+ 1.04 Term + 3231 3350 120 2 0 115 48 202 0.980 18.31
155
+ 1.05 PlyA + 3722 3727 6 -5.80
156
+
157
+ 2.00 Prom + 6469 6508 40 -7.92
158
+ 2.01 Init + 8153 8263 111 1 0 94 83 212 0.998 21.33
159
+ 2.02 Intr + 8531 8709 179 1 2 104 66 408 0.997 40.12
160
+ 2.03 Intr + 8863 9022 160 1 1 89 94 302 0.999 32.08
161
+ 2.04 Term + 9720 9839 120 2 0 115 48 202 0.961 18.31
162
+
163
+ Predicted peptide sequence(s):
164
+
165
+ Predicted coding sequence(s):
166
+
167
+
168
+ >HUMRASH|GENSCAN_predicted_peptide_1|189_aa
169
+ MTEYKLVVVGAGGVGKSALTIQLIQNHFVDEYDPTIEDSYRKQVVIDGETCLLDILDTAG
170
+ QEEYSAMRDQYMRTGEGFLCVFAINNTKSFEDIHQYREQIKRVKDSDDVPMVLVGNKCDL
171
+ AARTVESRQAQDLARSYGIPYIETSAKTRQGVEDAFYTLVREIRQHKLRKLNPPDESGPG
172
+ CMSCKCVLS
173
+
174
+ >HUMRASH|GENSCAN_predicted_CDS_1|570_bp
175
+ atgacggaatataagctggtggtggtgggcgccggcggtgtgggcaagagtgcgctgacc
176
+ atccagctgatccagaaccattttgtggacgaatacgaccccactatagaggattcctac
177
+ cggaagcaggtggtcattgatggggagacgtgcctgttggacatcctggataccgccggc
178
+ caggaggagtacagcgccatgcgggaccagtacatgcgcaccggggagggcttcctgtgt
179
+ gtgtttgccatcaacaacaccaagtcttttgaggacatccaccagtacagggagcagatc
180
+ aaacgggtgaaggactcggatgacgtgcccatggtgctggtggggaacaagtgtgacctg
181
+ gctgcacgcactgtggaatctcggcaggctcaggacctcgcccgaagctacggcatcccc
182
+ tacatcgagacctcggccaagacccggcagggagtggaggatgccttctacacgttggtg
183
+ cgtgagatccggcagcacaagctgcggaagctgaaccctcctgatgagagtggccccggc
184
+ tgcatgagctgcaagtgtgtgctctcctga
185
+
186
+ >HUMRASH|GENSCAN_predicted_peptide_2|189_aa
187
+ MTEYKLVVVGAGGVGKSALTIQLIQNHFVDEYDPTIEDSYRKQVVIDGETCLLDILDTAG
188
+ QEEYSAMRDQYMRTGEGFLCVFAINNTKSFEDIHQYREQIKRVKDSDDVPMVLVGNKCDL
189
+ AARTVESRQAQDLARSYGIPYIETSAKTRQGVEDAFYTLVREIRQHKLRKLNPPDESGPG
190
+ CMSCKCVLS
191
+
192
+ >HUMRASH|GENSCAN_predicted_CDS_2|570_bp
193
+ atgacggaatataagctggtggtggtgggcgccggcggtgtgggcaagagtgcgctgacc
194
+ atccagctgatccagaaccattttgtggacgaatacgaccccactatagaggattcctac
195
+ cggaagcaggtggtcattgatggggagacgtgcctgttggacatcctggataccgccggc
196
+ caggaggagtacagcgccatgcgggaccagtacatgcgcaccggggagggcttcctgtgt
197
+ gtgtttgccatcaacaacaccaagtcttttgaggacatccaccagtacagggagcagatc
198
+ aaacgggtgaaggactcggatgacgtgcccatggtgctggtggggaacaagtgtgacctg
199
+ gctgcacgcactgtggaatctcggcaggctcaggacctcgcccgaagctacggcatcccc
200
+ tacatcgagacctcggccaagacccggcagggagtggaggatgccttctacacgttggtg
201
+ cgtgagatccggcagcacaagctgcggaagctgaaccctcctgatgagagtggccccggc
202
+ tgcatgagctgcaagtgtgtgctctcctga
@@ -0,0 +1,49 @@
1
+ #
2
+ # = sample/demo_gff1.rb - very simple demonstration of Bio::GFF
3
+ #
4
+ # Copyright:: Copyright (C) 2003, 2005
5
+ # Toshiaki Katayama <k@bioruby.org>
6
+ # 2006 Jan Aerts <jan.aerts@bbsrc.ac.uk>
7
+ # 2008 Naohisa Goto <ng@bioruby.org>
8
+ # License:: The Ruby License
9
+ #
10
+ #
11
+ # == Description
12
+ #
13
+ # Very simple demonstration of Bio::GFF, parser classes for GFF formatted
14
+ # text.
15
+ #
16
+ # == Usage
17
+ #
18
+ # Simply run this script.
19
+ #
20
+ # $ ruby demo_gff1.rb
21
+ #
22
+ # == To do
23
+ #
24
+ # Bio::GFF and related classes have many functions, and we should write
25
+ # more example and/or demonstration codes.
26
+ #
27
+ # == Development information
28
+ #
29
+ # The code was moved from lib/bio/db/gff.rb.
30
+ #
31
+
32
+ require 'bio'
33
+
34
+ #if __FILE__ == $0
35
+ begin
36
+ require 'pp'
37
+ alias p pp
38
+ rescue LoadError
39
+ end
40
+
41
+ this_gff = "SEQ1\tEMBL\tatg\t103\t105\t.\t+\t0\n"
42
+ this_gff << "SEQ1\tEMBL\texon\t103\t172\t.\t+\t0\n"
43
+ this_gff << "SEQ1\tEMBL\tsplice5\t172\t173\t.\t+\t.\n"
44
+ this_gff << "SEQ1\tnetgene\tsplice5\t172\t173\t0.94\t+\t.\n"
45
+ this_gff << "SEQ1\tgenie\tsp5-20\t163\t182\t2.3\t+\t.\n"
46
+ this_gff << "SEQ1\tgenie\tsp5-10\t168\t177\t2.1\t+\t.\n"
47
+ this_gff << "SEQ1\tgrail\tATG\t17\t19\t2.1\t-\t0\n"
48
+ p Bio::GFF.new(this_gff)
49
+ #end
@@ -0,0 +1,98 @@
1
+ #
2
+ # = sample/demo_go.rb - demonstration of Bio::GO, classes for Gene Ontology
3
+ #
4
+ # Copyright:: Copyright (C) 2003
5
+ # Mitsuteru C. Nakao <n@bioruby.org>
6
+ # License:: The Ruby License
7
+ #
8
+ #
9
+ # == Description
10
+ #
11
+ # Demonstration of Bio::GO, classes for Gene Ontology.
12
+ #
13
+ # == Requirement
14
+ #
15
+ # Internet connection is needed.
16
+ #
17
+ # == Usage
18
+ #
19
+ # Simply run this script.
20
+ #
21
+ # $ ruby demo_go.rb
22
+ #
23
+ # == Note
24
+ #
25
+ # The code was originally written in 2003, and it can only parse GO format
26
+ # that is deprecated and no new data is available after August 2009.
27
+ #
28
+ # == Development information
29
+ #
30
+ # The code was moved from lib/bio/db/go.rb.
31
+ #
32
+
33
+ require 'bio'
34
+
35
+ #if __FILE__ == $0
36
+
37
+ def wget(url)
38
+ Bio::Command.read_uri(url)
39
+ end
40
+
41
+
42
+
43
+ go_c_url = 'http://www.geneontology.org/ontology/component.ontology'
44
+ ga_url = 'http://www.geneontology.org/gene-associations/gene_association.sgd.gz'
45
+ e2g_url = 'http://www.geneontology.org/external2go/spkw2go'
46
+
47
+
48
+
49
+ puts "\n #==> Bio::GO::Ontology"
50
+ p go_c_url
51
+ component_ontology = wget(go_c_url)
52
+ comp = Bio::GO::Ontology.new(component_ontology)
53
+
54
+ [['0003673', '0005632'],
55
+ ['0003673', '0005619'],
56
+ ['0003673', '0004649']].each {|pair|
57
+ puts
58
+ p pair
59
+ p [:pair, pair.map {|i| [comp.id2term[i], comp.goid2term(i)] }]
60
+ puts "\n #==> comp.bfs_shortest_path(pair[0], pair[1])"
61
+ p comp.bfs_shortest_path(pair[0], pair[1])
62
+ }
63
+
64
+
65
+ puts "\n #==> Bio::GO::External2go"
66
+ p e2g_url
67
+ spkw2go = Bio::GO::External2go.parser(wget(e2g_url))
68
+
69
+ puts "\n #==> spkw2go.dbs"
70
+ p spkw2go.dbs
71
+
72
+ puts "\n #==> spkw2go[1]"
73
+ p spkw2go[1]
74
+
75
+
76
+
77
+ require 'zlib'
78
+ puts "\n #==> Bio::GO::GeenAssociation"
79
+ p ga_url
80
+ #
81
+ # The workaround (Zlib::MAX_WBITS + 32) is taken from:
82
+ # http://d.hatena.ne.jp/ksef-3go/20070924/1190563143
83
+ #
84
+ ga = Zlib::Inflate.new(Zlib::MAX_WBITS + 32).inflate(wget(ga_url))
85
+ #ga = Zlib::Inflate.inflate(wget(ga_url))
86
+ ga = Bio::GO::GeneAssociation.parser(ga)
87
+
88
+ puts "\n #==> ga.size"
89
+ p ga.size
90
+
91
+ puts "\n #==> ga[100]"
92
+ p ga[100]
93
+
94
+
95
+
96
+
97
+
98
+ #end
@@ -0,0 +1,149 @@
1
+ #
2
+ # = sample/demo_hmmer_report.rb - demonstration of Bio::HMMER::Report
3
+ #
4
+ # Copyright:: Copyright (C) 2002
5
+ # Hiroshi Suga <suga@biophys.kyoto-u.ac.jp>,
6
+ # Copyright:: Copyright (C) 2005
7
+ # Masashi Fujita <fujita@kuicr.kyoto-u.ac.jp>
8
+ # License:: The Ruby License
9
+ #
10
+ #
11
+ # == Description
12
+ #
13
+ # Demonstration of Bio::HMMER::Report (HMMER output parser).
14
+ #
15
+ # Note that it (and Bio::HMMER::Report) supports HMMER 2.x.
16
+ # HMMER 3.x is currently not supported.
17
+ #
18
+ # == Usage
19
+ #
20
+ # Specify a file containing a HMMER result.
21
+ #
22
+ # $ ruby demo_hmmer_report.rb file
23
+ #
24
+ # Example usage using test data:
25
+ #
26
+ # $ ruby -Ilib sample/demo_hmmer_report.rb test/data/HMMER/hmmsearch.out
27
+ # $ ruby -Ilib sample/demo_blast_report.rb test/data/HMMER/hmmpfam.out
28
+ #
29
+ # == Development information
30
+ #
31
+ # The code was moved from lib/bio/appl/hmmer/report.rb.
32
+ #
33
+
34
+ require 'bio'
35
+
36
+ #if __FILE__ == $0
37
+
38
+ =begin
39
+
40
+ #
41
+ # for multiple reports in a single output file (hmmpfam)
42
+ #
43
+ Bio::HMMER.reports(ARGF.read) do |report|
44
+ report.hits.each do |hit|
45
+ hit.hsps.each do |hsp|
46
+ end
47
+ end
48
+ end
49
+
50
+ =end
51
+
52
+ begin
53
+ require 'pp'
54
+ alias p pp
55
+ rescue LoadError
56
+ end
57
+
58
+ rep = Bio::HMMER::Report.new(ARGF.read)
59
+ p rep
60
+
61
+ indent = 18
62
+
63
+ puts "### hmmer result"
64
+ print "name : ".rjust(indent)
65
+ p rep.program['name']
66
+ print "version : ".rjust(indent)
67
+ p rep.program['version']
68
+ print "copyright : ".rjust(indent)
69
+ p rep.program['copyright']
70
+ print "license : ".rjust(indent)
71
+ p rep.program['license']
72
+
73
+ print "HMM file : ".rjust(indent)
74
+ p rep.parameter['HMM file']
75
+ print "Sequence file : ".rjust(indent)
76
+ p rep.parameter['Sequence file']
77
+
78
+ print "Query sequence : ".rjust(indent)
79
+ p rep.query_info['Query sequence']
80
+ print "Accession : ".rjust(indent)
81
+ p rep.query_info['Accession']
82
+ print "Description : ".rjust(indent)
83
+ p rep.query_info['Description']
84
+
85
+ rep.each do |hit|
86
+ puts "## each hit"
87
+ print "accession : ".rjust(indent)
88
+ p [ hit.accession, hit.target_id, hit.hit_id, hit.entry_id ]
89
+ print "description : ".rjust(indent)
90
+ p [ hit.description, hit.definition ]
91
+ print "target_def : ".rjust(indent)
92
+ p hit.target_def
93
+ print "score : ".rjust(indent)
94
+ p [ hit.score, hit.bit_score ]
95
+ print "evalue : ".rjust(indent)
96
+ p hit.evalue
97
+ print "num : ".rjust(indent)
98
+ p hit.num
99
+
100
+ hit.each do |hsp|
101
+ puts "## each hsp"
102
+ print "accession : ".rjust(indent)
103
+ p [ hsp.accession, hsp.target_id ]
104
+ print "domain : ".rjust(indent)
105
+ p hsp.domain
106
+ print "seq_f : ".rjust(indent)
107
+ p hsp.seq_f
108
+ print "seq_t : ".rjust(indent)
109
+ p hsp.seq_t
110
+ print "seq_ft : ".rjust(indent)
111
+ p hsp.seq_ft
112
+ print "hmm_f : ".rjust(indent)
113
+ p hsp.hmm_f
114
+ print "hmm_t : ".rjust(indent)
115
+ p hsp.hmm_t
116
+ print "hmm_ft : ".rjust(indent)
117
+ p hsp.hmm_ft
118
+ print "score : ".rjust(indent)
119
+ p [ hsp.score, hsp.bit_score ]
120
+ print "evalue : ".rjust(indent)
121
+ p hsp.evalue
122
+ print "midline : ".rjust(indent)
123
+ p hsp.midline
124
+ print "hmmseq : ".rjust(indent)
125
+ p hsp.hmmseq
126
+ print "flatseq : ".rjust(indent)
127
+ p hsp.flatseq
128
+ print "query_frame : ".rjust(indent)
129
+ p hsp.query_frame
130
+ print "target_frame : ".rjust(indent)
131
+ p hsp.target_frame
132
+
133
+ print "query_seq : ".rjust(indent)
134
+ p hsp.query_seq # hmmseq, flatseq
135
+ print "target_seq : ".rjust(indent)
136
+ p hsp.target_seq # flatseq, hmmseq
137
+ print "target_from : ".rjust(indent)
138
+ p hsp.target_from # seq_f, hmm_f
139
+ print "target_to : ".rjust(indent)
140
+ p hsp.target_to # seq_t, hmm_t
141
+ print "query_from : ".rjust(indent)
142
+ p hsp.query_from # hmm_f, seq_f
143
+ print "query_to : ".rjust(indent)
144
+ p hsp.query_to # hmm_t, seq_t
145
+ end
146
+ end
147
+
148
+ #end
149
+