bio 1.3.1 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (303) hide show
  1. data/ChangeLog +2105 -3728
  2. data/KNOWN_ISSUES.rdoc +35 -3
  3. data/README.rdoc +8 -2
  4. data/RELEASE_NOTES.rdoc +166 -0
  5. data/bin/bioruby +4 -1
  6. data/bioruby.gemspec +146 -1
  7. data/bioruby.gemspec.erb +3 -1
  8. data/doc/ChangeLog-before-1.3.1 +3961 -0
  9. data/doc/Tutorial.rd +154 -22
  10. data/doc/Tutorial.rd.html +125 -68
  11. data/lib/bio.rb +21 -6
  12. data/lib/bio/appl/bl2seq/report.rb +11 -202
  13. data/lib/bio/appl/blast/format0.rb +0 -193
  14. data/lib/bio/appl/blast/report.rb +2 -147
  15. data/lib/bio/appl/blast/wublast.rb +0 -208
  16. data/lib/bio/appl/fasta.rb +4 -19
  17. data/lib/bio/appl/fasta/format10.rb +0 -14
  18. data/lib/bio/appl/genscan/report.rb +0 -176
  19. data/lib/bio/appl/hmmer.rb +1 -15
  20. data/lib/bio/appl/hmmer/report.rb +0 -100
  21. data/lib/bio/appl/meme/mast.rb +156 -0
  22. data/lib/bio/appl/meme/mast/report.rb +91 -0
  23. data/lib/bio/appl/meme/motif.rb +48 -0
  24. data/lib/bio/appl/psort.rb +0 -111
  25. data/lib/bio/appl/psort/report.rb +1 -45
  26. data/lib/bio/appl/pts1.rb +2 -4
  27. data/lib/bio/appl/sosui/report.rb +5 -54
  28. data/lib/bio/appl/targetp/report.rb +1 -104
  29. data/lib/bio/appl/tmhmm/report.rb +0 -36
  30. data/lib/bio/command.rb +94 -10
  31. data/lib/bio/data/aa.rb +1 -77
  32. data/lib/bio/data/codontable.rb +1 -95
  33. data/lib/bio/data/na.rb +1 -26
  34. data/lib/bio/db/aaindex.rb +1 -38
  35. data/lib/bio/db/fasta.rb +1 -134
  36. data/lib/bio/db/fasta/format_qual.rb +204 -0
  37. data/lib/bio/db/fasta/qual.rb +102 -0
  38. data/lib/bio/db/fastq.rb +645 -0
  39. data/lib/bio/db/fastq/fastq_to_biosequence.rb +40 -0
  40. data/lib/bio/db/fastq/format_fastq.rb +175 -0
  41. data/lib/bio/db/genbank/genbank.rb +1 -86
  42. data/lib/bio/db/gff.rb +0 -17
  43. data/lib/bio/db/go.rb +4 -72
  44. data/lib/bio/db/kegg/common.rb +112 -0
  45. data/lib/bio/db/kegg/compound.rb +29 -20
  46. data/lib/bio/db/kegg/drug.rb +74 -34
  47. data/lib/bio/db/kegg/enzyme.rb +26 -5
  48. data/lib/bio/db/kegg/genes.rb +128 -15
  49. data/lib/bio/db/kegg/genome.rb +3 -41
  50. data/lib/bio/db/kegg/glycan.rb +19 -24
  51. data/lib/bio/db/kegg/orthology.rb +16 -56
  52. data/lib/bio/db/kegg/reaction.rb +81 -28
  53. data/lib/bio/db/kegg/taxonomy.rb +1 -52
  54. data/lib/bio/db/litdb.rb +1 -16
  55. data/lib/bio/db/phyloxml/phyloxml.xsd +582 -0
  56. data/lib/bio/db/phyloxml/phyloxml_elements.rb +1174 -0
  57. data/lib/bio/db/phyloxml/phyloxml_parser.rb +954 -0
  58. data/lib/bio/db/phyloxml/phyloxml_writer.rb +228 -0
  59. data/lib/bio/db/prosite.rb +2 -95
  60. data/lib/bio/db/rebase.rb +5 -6
  61. data/lib/bio/db/sanger_chromatogram/abif.rb +120 -0
  62. data/lib/bio/db/sanger_chromatogram/chromatogram.rb +133 -0
  63. data/lib/bio/db/sanger_chromatogram/chromatogram_to_biosequence.rb +32 -0
  64. data/lib/bio/db/sanger_chromatogram/scf.rb +210 -0
  65. data/lib/bio/io/das.rb +0 -44
  66. data/lib/bio/io/ddbjxml.rb +1 -181
  67. data/lib/bio/io/flatfile.rb +1 -7
  68. data/lib/bio/io/flatfile/autodetection.rb +6 -0
  69. data/lib/bio/io/keggapi.rb +0 -442
  70. data/lib/bio/io/ncbirest.rb +130 -132
  71. data/lib/bio/io/ncbisoap.rb +2 -1
  72. data/lib/bio/io/pubmed.rb +0 -88
  73. data/lib/bio/location.rb +0 -73
  74. data/lib/bio/pathway.rb +0 -171
  75. data/lib/bio/sequence.rb +18 -1
  76. data/lib/bio/sequence/adapter.rb +3 -0
  77. data/lib/bio/sequence/format.rb +16 -0
  78. data/lib/bio/sequence/quality_score.rb +205 -0
  79. data/lib/bio/tree.rb +70 -5
  80. data/lib/bio/util/restriction_enzyme/single_strand.rb +3 -2
  81. data/lib/bio/util/sirna.rb +1 -23
  82. data/lib/bio/version.rb +1 -1
  83. data/sample/demo_aaindex.rb +67 -0
  84. data/sample/demo_aminoacid.rb +101 -0
  85. data/sample/demo_bl2seq_report.rb +220 -0
  86. data/sample/demo_blast_report.rb +285 -0
  87. data/sample/demo_codontable.rb +119 -0
  88. data/sample/demo_das.rb +105 -0
  89. data/sample/demo_ddbjxml.rb +212 -0
  90. data/sample/demo_fasta_remote.rb +51 -0
  91. data/sample/demo_fastaformat.rb +105 -0
  92. data/sample/demo_genbank.rb +132 -0
  93. data/sample/demo_genscan_report.rb +202 -0
  94. data/sample/demo_gff1.rb +49 -0
  95. data/sample/demo_go.rb +98 -0
  96. data/sample/demo_hmmer_report.rb +149 -0
  97. data/sample/demo_kegg_compound.rb +57 -0
  98. data/sample/demo_kegg_drug.rb +65 -0
  99. data/sample/demo_kegg_genome.rb +74 -0
  100. data/sample/demo_kegg_glycan.rb +72 -0
  101. data/sample/demo_kegg_orthology.rb +62 -0
  102. data/sample/demo_kegg_reaction.rb +66 -0
  103. data/sample/demo_kegg_taxonomy.rb +92 -0
  104. data/sample/demo_keggapi.rb +502 -0
  105. data/sample/demo_litdb.rb +42 -0
  106. data/sample/demo_locations.rb +99 -0
  107. data/sample/demo_ncbi_rest.rb +130 -0
  108. data/sample/demo_nucleicacid.rb +49 -0
  109. data/sample/demo_pathway.rb +196 -0
  110. data/sample/demo_prosite.rb +120 -0
  111. data/sample/demo_psort.rb +138 -0
  112. data/sample/demo_psort_report.rb +70 -0
  113. data/sample/demo_pubmed.rb +118 -0
  114. data/sample/demo_sirna.rb +63 -0
  115. data/sample/demo_sosui_report.rb +89 -0
  116. data/sample/demo_targetp_report.rb +135 -0
  117. data/sample/demo_tmhmm_report.rb +68 -0
  118. data/sample/pmfetch.rb +13 -4
  119. data/sample/pmsearch.rb +15 -4
  120. data/sample/test_phyloxml_big.rb +205 -0
  121. data/test/bioruby_test_helper.rb +61 -0
  122. data/test/data/KEGG/1.1.1.1.enzyme +935 -0
  123. data/test/data/KEGG/C00025.compound +102 -0
  124. data/test/data/KEGG/D00063.drug +104 -0
  125. data/test/data/KEGG/G00024.glycan +47 -0
  126. data/test/data/KEGG/G01366.glycan +18 -0
  127. data/test/data/KEGG/K02338.orthology +902 -0
  128. data/test/data/KEGG/R00006.reaction +14 -0
  129. data/test/data/fastq/README.txt +109 -0
  130. data/test/data/fastq/error_diff_ids.fastq +20 -0
  131. data/test/data/fastq/error_double_qual.fastq +22 -0
  132. data/test/data/fastq/error_double_seq.fastq +22 -0
  133. data/test/data/fastq/error_long_qual.fastq +20 -0
  134. data/test/data/fastq/error_no_qual.fastq +20 -0
  135. data/test/data/fastq/error_qual_del.fastq +20 -0
  136. data/test/data/fastq/error_qual_escape.fastq +20 -0
  137. data/test/data/fastq/error_qual_null.fastq +0 -0
  138. data/test/data/fastq/error_qual_space.fastq +21 -0
  139. data/test/data/fastq/error_qual_tab.fastq +21 -0
  140. data/test/data/fastq/error_qual_unit_sep.fastq +20 -0
  141. data/test/data/fastq/error_qual_vtab.fastq +20 -0
  142. data/test/data/fastq/error_short_qual.fastq +20 -0
  143. data/test/data/fastq/error_spaces.fastq +20 -0
  144. data/test/data/fastq/error_tabs.fastq +21 -0
  145. data/test/data/fastq/error_trunc_at_plus.fastq +19 -0
  146. data/test/data/fastq/error_trunc_at_qual.fastq +19 -0
  147. data/test/data/fastq/error_trunc_at_seq.fastq +18 -0
  148. data/test/data/fastq/error_trunc_in_plus.fastq +19 -0
  149. data/test/data/fastq/error_trunc_in_qual.fastq +20 -0
  150. data/test/data/fastq/error_trunc_in_seq.fastq +18 -0
  151. data/test/data/fastq/error_trunc_in_title.fastq +17 -0
  152. data/test/data/fastq/illumina_full_range_as_illumina.fastq +8 -0
  153. data/test/data/fastq/illumina_full_range_as_sanger.fastq +8 -0
  154. data/test/data/fastq/illumina_full_range_as_solexa.fastq +8 -0
  155. data/test/data/fastq/illumina_full_range_original_illumina.fastq +8 -0
  156. data/test/data/fastq/longreads_as_illumina.fastq +40 -0
  157. data/test/data/fastq/longreads_as_sanger.fastq +40 -0
  158. data/test/data/fastq/longreads_as_solexa.fastq +40 -0
  159. data/test/data/fastq/longreads_original_sanger.fastq +120 -0
  160. data/test/data/fastq/misc_dna_as_illumina.fastq +16 -0
  161. data/test/data/fastq/misc_dna_as_sanger.fastq +16 -0
  162. data/test/data/fastq/misc_dna_as_solexa.fastq +16 -0
  163. data/test/data/fastq/misc_dna_original_sanger.fastq +16 -0
  164. data/test/data/fastq/misc_rna_as_illumina.fastq +16 -0
  165. data/test/data/fastq/misc_rna_as_sanger.fastq +16 -0
  166. data/test/data/fastq/misc_rna_as_solexa.fastq +16 -0
  167. data/test/data/fastq/misc_rna_original_sanger.fastq +16 -0
  168. data/test/data/fastq/sanger_full_range_as_illumina.fastq +8 -0
  169. data/test/data/fastq/sanger_full_range_as_sanger.fastq +8 -0
  170. data/test/data/fastq/sanger_full_range_as_solexa.fastq +8 -0
  171. data/test/data/fastq/sanger_full_range_original_sanger.fastq +8 -0
  172. data/test/data/fastq/solexa_full_range_as_illumina.fastq +8 -0
  173. data/test/data/fastq/solexa_full_range_as_sanger.fastq +8 -0
  174. data/test/data/fastq/solexa_full_range_as_solexa.fastq +8 -0
  175. data/test/data/fastq/solexa_full_range_original_solexa.fastq +8 -0
  176. data/test/data/fastq/wrapping_as_illumina.fastq +12 -0
  177. data/test/data/fastq/wrapping_as_sanger.fastq +12 -0
  178. data/test/data/fastq/wrapping_as_solexa.fastq +12 -0
  179. data/test/data/fastq/wrapping_original_sanger.fastq +24 -0
  180. data/test/data/meme/db +0 -0
  181. data/test/data/meme/mast +0 -0
  182. data/test/data/meme/mast.out +13 -0
  183. data/test/data/meme/meme.out +3 -0
  184. data/test/data/phyloxml/apaf.xml +666 -0
  185. data/test/data/phyloxml/bcl_2.xml +2097 -0
  186. data/test/data/phyloxml/made_up.xml +144 -0
  187. data/test/data/phyloxml/ncbi_taxonomy_mollusca_short.xml +65 -0
  188. data/test/data/phyloxml/phyloxml_examples.xml +415 -0
  189. data/test/data/sanger_chromatogram/test_chromatogram_abif.ab1 +0 -0
  190. data/test/data/sanger_chromatogram/test_chromatogram_scf_v2.scf +0 -0
  191. data/test/data/sanger_chromatogram/test_chromatogram_scf_v3.scf +0 -0
  192. data/test/functional/bio/appl/test_pts1.rb +7 -5
  193. data/test/functional/bio/io/test_ensembl.rb +4 -3
  194. data/test/functional/bio/io/test_pubmed.rb +9 -3
  195. data/test/functional/bio/io/test_soapwsdl.rb +5 -4
  196. data/test/functional/bio/io/test_togows.rb +5 -4
  197. data/test/functional/bio/sequence/test_output_embl.rb +6 -4
  198. data/test/functional/bio/test_command.rb +54 -5
  199. data/test/runner.rb +5 -3
  200. data/test/unit/bio/appl/bl2seq/test_report.rb +5 -4
  201. data/test/unit/bio/appl/blast/test_ncbioptions.rb +4 -2
  202. data/test/unit/bio/appl/blast/test_report.rb +5 -4
  203. data/test/unit/bio/appl/blast/test_rpsblast.rb +5 -4
  204. data/test/unit/bio/appl/gcg/test_msf.rb +5 -5
  205. data/test/unit/bio/appl/genscan/test_report.rb +8 -9
  206. data/test/unit/bio/appl/hmmer/test_report.rb +5 -4
  207. data/test/unit/bio/appl/iprscan/test_report.rb +6 -5
  208. data/test/unit/bio/appl/mafft/test_report.rb +6 -5
  209. data/test/unit/bio/appl/meme/mast/test_report.rb +46 -0
  210. data/test/unit/bio/appl/meme/test_mast.rb +103 -0
  211. data/test/unit/bio/appl/meme/test_motif.rb +38 -0
  212. data/test/unit/bio/appl/paml/codeml/test_rates.rb +5 -4
  213. data/test/unit/bio/appl/paml/codeml/test_report.rb +5 -4
  214. data/test/unit/bio/appl/paml/test_codeml.rb +5 -4
  215. data/test/unit/bio/appl/sim4/test_report.rb +5 -4
  216. data/test/unit/bio/appl/sosui/test_report.rb +6 -5
  217. data/test/unit/bio/appl/targetp/test_report.rb +5 -3
  218. data/test/unit/bio/appl/test_blast.rb +5 -4
  219. data/test/unit/bio/appl/test_fasta.rb +4 -2
  220. data/test/unit/bio/appl/test_pts1.rb +4 -2
  221. data/test/unit/bio/appl/tmhmm/test_report.rb +6 -5
  222. data/test/unit/bio/data/test_aa.rb +5 -3
  223. data/test/unit/bio/data/test_codontable.rb +5 -4
  224. data/test/unit/bio/data/test_na.rb +5 -3
  225. data/test/unit/bio/db/biosql/tc_biosql.rb +5 -1
  226. data/test/unit/bio/db/embl/test_common.rb +4 -2
  227. data/test/unit/bio/db/embl/test_embl.rb +6 -6
  228. data/test/unit/bio/db/embl/test_embl_rel89.rb +6 -6
  229. data/test/unit/bio/db/embl/test_embl_to_bioseq.rb +7 -8
  230. data/test/unit/bio/db/embl/test_sptr.rb +6 -8
  231. data/test/unit/bio/db/embl/test_uniprot.rb +6 -5
  232. data/test/unit/bio/db/fasta/test_format_qual.rb +346 -0
  233. data/test/unit/bio/db/kegg/test_compound.rb +146 -0
  234. data/test/unit/bio/db/kegg/test_drug.rb +194 -0
  235. data/test/unit/bio/db/kegg/test_enzyme.rb +241 -0
  236. data/test/unit/bio/db/kegg/test_genes.rb +32 -4
  237. data/test/unit/bio/db/kegg/test_glycan.rb +260 -0
  238. data/test/unit/bio/db/kegg/test_orthology.rb +50 -0
  239. data/test/unit/bio/db/kegg/test_reaction.rb +96 -0
  240. data/test/unit/bio/db/pdb/test_pdb.rb +4 -2
  241. data/test/unit/bio/db/sanger_chromatogram/test_abif.rb +76 -0
  242. data/test/unit/bio/db/sanger_chromatogram/test_scf.rb +98 -0
  243. data/test/unit/bio/db/test_aaindex.rb +6 -6
  244. data/test/unit/bio/db/test_fasta.rb +5 -46
  245. data/test/unit/bio/db/test_fastq.rb +829 -0
  246. data/test/unit/bio/db/test_gff.rb +4 -2
  247. data/test/unit/bio/db/test_lasergene.rb +7 -5
  248. data/test/unit/bio/db/test_medline.rb +4 -2
  249. data/test/unit/bio/db/test_newick.rb +6 -6
  250. data/test/unit/bio/db/test_nexus.rb +4 -2
  251. data/test/unit/bio/db/test_phyloxml.rb +769 -0
  252. data/test/unit/bio/db/test_phyloxml_writer.rb +328 -0
  253. data/test/unit/bio/db/test_prosite.rb +6 -5
  254. data/test/unit/bio/db/test_qual.rb +63 -0
  255. data/test/unit/bio/db/test_rebase.rb +5 -3
  256. data/test/unit/bio/db/test_soft.rb +7 -6
  257. data/test/unit/bio/io/flatfile/test_autodetection.rb +6 -7
  258. data/test/unit/bio/io/flatfile/test_buffer.rb +6 -5
  259. data/test/unit/bio/io/flatfile/test_splitter.rb +4 -4
  260. data/test/unit/bio/io/test_ddbjxml.rb +4 -3
  261. data/test/unit/bio/io/test_ensembl.rb +5 -3
  262. data/test/unit/bio/io/test_fastacmd.rb +4 -3
  263. data/test/unit/bio/io/test_flatfile.rb +6 -5
  264. data/test/unit/bio/io/test_soapwsdl.rb +4 -3
  265. data/test/unit/bio/io/test_togows.rb +4 -2
  266. data/test/unit/bio/sequence/test_aa.rb +5 -3
  267. data/test/unit/bio/sequence/test_common.rb +4 -2
  268. data/test/unit/bio/sequence/test_compat.rb +4 -2
  269. data/test/unit/bio/sequence/test_dblink.rb +5 -3
  270. data/test/unit/bio/sequence/test_na.rb +4 -2
  271. data/test/unit/bio/sequence/test_quality_score.rb +330 -0
  272. data/test/unit/bio/shell/plugin/test_seq.rb +5 -3
  273. data/test/unit/bio/test_alignment.rb +5 -3
  274. data/test/unit/bio/test_command.rb +4 -3
  275. data/test/unit/bio/test_db.rb +5 -3
  276. data/test/unit/bio/test_feature.rb +4 -2
  277. data/test/unit/bio/test_location.rb +4 -2
  278. data/test/unit/bio/test_map.rb +5 -3
  279. data/test/unit/bio/test_pathway.rb +4 -2
  280. data/test/unit/bio/test_reference.rb +4 -2
  281. data/test/unit/bio/test_sequence.rb +5 -3
  282. data/test/unit/bio/test_shell.rb +5 -3
  283. data/test/unit/bio/test_tree.rb +6 -6
  284. data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +4 -2
  285. data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +4 -2
  286. data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +4 -2
  287. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +4 -2
  288. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +4 -2
  289. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +4 -2
  290. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +4 -2
  291. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +4 -2
  292. data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +4 -2
  293. data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +4 -2
  294. data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +4 -2
  295. data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +4 -2
  296. data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +17 -13
  297. data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +17 -13
  298. data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +4 -2
  299. data/test/unit/bio/util/test_color_scheme.rb +5 -3
  300. data/test/unit/bio/util/test_contingency_table.rb +5 -3
  301. data/test/unit/bio/util/test_restriction_enzyme.rb +4 -2
  302. data/test/unit/bio/util/test_sirna.rb +6 -4
  303. metadata +147 -2
@@ -0,0 +1,89 @@
1
+ #
2
+ # = sample/demo_sosui_report.rb - demonstration of Bio::SOSUI::Report
3
+ #
4
+ # Copyright:: Copyright (C) 2003
5
+ # Mitsuteru C. Nakao <n@bioruby.org>
6
+ # License:: The Ruby License
7
+ #
8
+ #
9
+ # == Description
10
+ #
11
+ # Demonstration of Bio::SOSUI::Report, SOSUI output parser.
12
+ #
13
+ # SOSUI performs classification and secondary structures prediction
14
+ # of membrane proteins.
15
+ #
16
+ # == Usage
17
+ #
18
+ # Usage 1: Without arguments, runs demo using preset example data.
19
+ #
20
+ # $ ruby demo_sosui_report.rb
21
+ #
22
+ # Usage 2: Specify files containing SOSUI reports.
23
+ #
24
+ # $ ruby demo_sosui_report.rb files...
25
+ #
26
+ # Example usage using test data:
27
+ #
28
+ # $ ruby -Ilib sample/demo_sosui_report.rb test/data/SOSUI/sample.report
29
+ #
30
+ # == References
31
+ #
32
+ # * http://bp.nuap.nagoya-u.ac.jp/sosui/
33
+ #
34
+ # == Development information
35
+ #
36
+ # The code was moved from lib/bio/appl/sosui/report.rb, and modified as below:
37
+ # * Disables internal sample data when arguments are specified.
38
+ # * Method name is changed.
39
+ # * Bug fix about tmhs demo.
40
+
41
+ require 'bio'
42
+
43
+ begin
44
+ require 'pp'
45
+ alias p pp
46
+ rescue LoadError
47
+ end
48
+
49
+
50
+ sample = <<HOGE
51
+ >HOGE1
52
+ MEMBRANE PROTEIN
53
+ NUMBER OF TM HELIX = 6
54
+ TM 1 12- 34 SECONDARY LLVPILLPEKCYDQLFVQWDLLH
55
+ TM 2 36- 58 PRIMARY PCLKILLSKGLGLGIVAGSLLVK
56
+ TM 3 102- 124 SECONDARY SWGEALFLMLQTITICFLVMHYR
57
+ TM 4 126- 148 PRIMARY QTVKGVAFLACYGLVLLVLLSPL
58
+ TM 5 152- 174 SECONDARY TVVTLLQASNVPAVVVGRLLQAA
59
+ TM 6 214- 236 SECONDARY AGTFVVSSLCNGLIAAQLLFYWN
60
+
61
+ >HOGE2
62
+ SOLUBLE PROTEIN
63
+
64
+ HOGE
65
+
66
+ def demo_sosui_report(ent)
67
+ puts '==='
68
+ puts ent
69
+ puts '==='
70
+ sosui = Bio::SOSUI::Report.new(ent)
71
+ p [:entry_id, sosui.entry_id]
72
+ p [:prediction, sosui.prediction]
73
+ p [:tmhs, sosui.tmhs]
74
+ end
75
+
76
+ if ARGV.empty? then
77
+
78
+ sample.split(/#{Bio::SOSUI::Report::DELIMITER}/).each {|ent|
79
+ demo_sosui_report(ent)
80
+ }
81
+
82
+ else
83
+
84
+ while ent = $<.gets(Bio::SOSUI::Report::DELIMITER)
85
+ demo_sosui_report(ent)
86
+ end
87
+
88
+ end
89
+
@@ -0,0 +1,135 @@
1
+ #
2
+ # = sample/demo_targetp_report.rb - demonstration of Bio::TargetP::Report
3
+ #
4
+ # Copyright:: Copyright (C) 2003
5
+ # Mitsuteru C. Nakao <n@bioruby.org>
6
+ # License:: The Ruby License
7
+ #
8
+ #
9
+ # == Description
10
+ #
11
+ # Demonstration of Bio::TargetP::Report, TargetP output parser.
12
+ #
13
+ # == Usage
14
+ #
15
+ # Usage 1: Without arguments, runs demo using preset example data.
16
+ #
17
+ # $ ruby demo_targetp_report.rb
18
+ #
19
+ # Usage 2: Specify files containing TargetP reports.
20
+ #
21
+ # $ ruby demo_targetp_report.rb files...
22
+ #
23
+ # == References
24
+ #
25
+ # * http://www.cbs.dtu.dk/services/TargetP/
26
+ #
27
+ # == Development information
28
+ #
29
+ # The code was moved from lib/bio/appl/targetp/report.rb, and modified
30
+ # as below:
31
+ # * Disables internal sample data when arguments are specified.
32
+ # * Method name is changed.
33
+ #
34
+
35
+ require 'bio'
36
+
37
+
38
+ begin
39
+ require 'pp'
40
+ alias p pp
41
+ rescue LoadError
42
+ end
43
+
44
+
45
+ plant = <<HOGE
46
+
47
+ ### ### ### T A R G E T P 1.0 prediction results ### ### ###
48
+
49
+ # Number of input sequences: 1
50
+ # Cleavage site predictions not included.
51
+ # Using PLANT networks.
52
+
53
+ # Name Length cTP mTP SP other Loc. RC
54
+ #----------------------------------------------------------------------------------
55
+ MGI_2141503 640 0.031 0.161 0.271 0.844 _ 3
56
+ #----------------------------------------------------------------------------------
57
+ # cutoff 0.00 0.00 0.00 0.00
58
+
59
+
60
+ HOGE
61
+
62
+ plant_c = <<HOGE
63
+
64
+ ### ### ### T A R G E T P 1.0 prediction results ### ### ###
65
+
66
+ # Number of input sequences: 1
67
+ # Cleavage site predictions included.
68
+ # Using PLANT networks.
69
+
70
+ # Name Length cTP mTP SP other Loc. RC TPlen
71
+ #----------------------------------------------------------------------------------
72
+ MGI_2141503 640 0.031 0.161 0.271 0.844 _ 3 -
73
+ #----------------------------------------------------------------------------------
74
+ # cutoff 0.00 0.00 0.00 0.00
75
+
76
+
77
+
78
+ HOGE
79
+
80
+ non_plant_c = <<HOGE
81
+
82
+ ### ### ### T A R G E T P 1.0 prediction results ### ### ###
83
+
84
+ # Number of input sequences: 1
85
+ # Cleavage site predictions included.
86
+ # Using NON-PLANT networks.
87
+
88
+ # Name Length mTP SP other Loc. RC TPlen
89
+ #--------------------------------------------------------------------------
90
+ MGI_96083 2187 0.292 0.053 0.746 _ 3 -
91
+ #--------------------------------------------------------------------------
92
+ # cutoff 0.00 0.00 0.00
93
+
94
+
95
+
96
+ HOGE
97
+
98
+
99
+ def demo_targetp_report(e)
100
+ puts e
101
+ ent = Bio::TargetP::Report.new(e)
102
+ pp ent
103
+
104
+ p [:entry_id, ent.entry_id]
105
+ p [:name, ent.name]
106
+ p [:version, ent.version]
107
+ p [:query_sequnces, ent.query_sequences]
108
+ p [:cleavage_site_prediction, ent.cleavage_site_prediction]
109
+ p [:networks, ent.networks]
110
+ p [:query_len, ent.query_len]
111
+ p [:prediction, ent.prediction]
112
+ p [:pred_Name, ent.pred['Name']]
113
+ p [:pred_SP, ent.pred['SP']]
114
+ p [:pred_mTP, ent.pred['mTP']]
115
+ p [:cutoff, ent.cutoff]
116
+ p [:loc, ent.loc]
117
+ p [:rc, ent.rc]
118
+
119
+ puts '=='
120
+ end
121
+
122
+ if ARGV.empty? then
123
+
124
+ [plant, plant_c, non_plant_c].each {|e|
125
+ demo_targetp_report(e)
126
+ }
127
+
128
+ else
129
+
130
+ while ent = $<.gets(Bio::TargetP::Report::DELIMITER)
131
+ demo_targetp_report(ent)
132
+ end
133
+
134
+ end
135
+
@@ -0,0 +1,68 @@
1
+ #
2
+ # = sample/demo_tmhmm_report.rb - demonstration of Bio::TMHMM::Report
3
+ #
4
+ # Copyright:: Copyright (C) 2003
5
+ # Mitsuteru C. Nakao <n@bioruby.org>
6
+ # License:: The Ruby License
7
+ #
8
+ #
9
+ # == Description
10
+ #
11
+ # Demonstration of Bio::TMHMM::Report, TMHMM output parser.
12
+ #
13
+ # == Usage
14
+ #
15
+ # Specify files containing SOSUI reports.
16
+ #
17
+ # $ ruby demo_tmhmm_report.rb files...
18
+ #
19
+ # Example usage using test data:
20
+ #
21
+ # $ ruby -Ilib sample/demo_tmhmm_report.rb test/data/TMHMM/sample.report
22
+ #
23
+ # == References
24
+ #
25
+ # * http://www.cbs.dtu.dk/services/TMHMM/
26
+ #
27
+ # == Development information
28
+ #
29
+ # The code was moved from lib/bio/appl/tmhmm/report.rb.
30
+ #
31
+
32
+ require 'bio'
33
+
34
+ #if __FILE__ == $0
35
+
36
+ begin
37
+ require 'pp'
38
+ alias p pp
39
+ rescue LoadError
40
+ end
41
+
42
+ Bio::TMHMM.reports(ARGF.read) do |ent|
43
+ puts '==>'
44
+ puts ent.to_s
45
+ pp ent
46
+
47
+ p [:entry_id, ent.entry_id]
48
+ p [:query_len, ent.query_len]
49
+ p [:predicted_tmhs, ent.predicted_tmhs]
50
+ p [:tmhs_size, ent.tmhs.size]
51
+ p [:exp_aas_in_tmhs, ent.exp_aas_in_tmhs]
52
+ p [:exp_first_60aa, ent.exp_first_60aa]
53
+ p [:total_prob_of_N_in, ent.total_prob_of_N_in]
54
+
55
+ ent.tmhs.each do |t|
56
+ p t
57
+ p [:entry_id, t.entry_id]
58
+ p [:version, t.version]
59
+ p [:status, t.status]
60
+ p [:range, t.range]
61
+ p [:pos, t.pos]
62
+ end
63
+
64
+ p [:helix, ent.helix]
65
+ p ent.tmhs.map {|t| t if t.status == 'TMhelix' }.compact
66
+ end
67
+
68
+ #end
@@ -18,12 +18,14 @@
18
18
  # along with this program; if not, write to the Free Software
19
19
  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20
20
  #
21
- # $Id: pmfetch.rb,v 1.2 2002/07/23 04:52:03 k Exp $
21
+ # $Id:$
22
22
  #
23
23
 
24
24
  require 'bio'
25
25
 
26
- if ARGV[0] =~ /-f/
26
+ Bio::NCBI.default_email = 'staff@bioruby.org'
27
+
28
+ if ARGV[0] =~ /\A\-f/
27
29
  ARGV.shift
28
30
  form = ARGV.shift
29
31
  else
@@ -31,12 +33,19 @@ else
31
33
  end
32
34
 
33
35
  ARGV.each do |id|
34
- entry = Bio::PubMed.query(id)
36
+ entries = Bio::PubMed.efetch(id)
37
+ if entries and entries.size == 1 then
38
+ entry = entries[0]
39
+ else
40
+ # dummy entry if not found or possibly incorrect result
41
+ entry = 'PMID- '
42
+ end
35
43
  case form
36
44
  when 'medline'
37
45
  puts entry
38
46
  else
39
- puts Bio::MEDLINE.new(entry).reference.send(form)
47
+ puts Bio::MEDLINE.new(entry).reference.__send__(form.intern)
40
48
  end
49
+ print "\n"
41
50
  end
42
51
 
@@ -18,25 +18,36 @@
18
18
  # along with this program; if not, write to the Free Software
19
19
  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20
20
  #
21
- # $Id: pmsearch.rb,v 1.2 2002/07/23 04:52:03 k Exp $
21
+ # $Id:$
22
22
  #
23
23
 
24
24
  require 'bio'
25
25
 
26
- if ARGV[0] =~ /-f/
26
+ Bio::NCBI.default_email = 'staff@bioruby.org'
27
+
28
+ if ARGV[0] =~ /\A\-f/
27
29
  ARGV.shift
28
30
  form = ARGV.shift
29
31
  else
30
32
  form = 'bibtex'
31
33
  end
32
34
 
33
- entries = Bio::PubMed.search(ARGV.join(' '))
35
+ keywords = ARGV.join(' ')
36
+ uids = Bio::PubMed.esearch(keywords)
37
+
38
+ if uids and !uids.empty? then
39
+ entries = Bio::PubMed.efetch(uids)
40
+ else
41
+ entries = []
42
+ end
43
+
34
44
  entries.each do |entry|
35
45
  case form
36
46
  when 'medline'
37
47
  puts entry
38
48
  else
39
- puts Bio::MEDLINE.new(entry).reference.send(form)
49
+ puts Bio::MEDLINE.new(entry).reference.__send__(form.intern)
40
50
  end
51
+ print "\n"
41
52
  end
42
53
 
@@ -0,0 +1,205 @@
1
+ #
2
+ # = sample/test_phyloxml_big.rb - Tests for Bio::PhyloXML. Testing very big files.
3
+ #
4
+ # Copyright:: Copyright (C) 2009
5
+ # Diana Jaunzeikare <latvianlinuxgirl@gmail.com>
6
+ # Naohisa Goto <ng@bioruby.org>
7
+ # License:: The Ruby License
8
+ #
9
+
10
+ # libraries needed for the tests
11
+ require 'libxml'
12
+ require 'pathname'
13
+ require 'test/unit'
14
+ require 'digest/sha1'
15
+
16
+ require 'bio/command'
17
+ require 'bio/db/phyloxml/phyloxml_parser'
18
+ require 'bio/db/phyloxml/phyloxml_writer'
19
+
20
+ PhyloXMLBigDataPath = ARGV.shift
21
+
22
+ if !PhyloXMLBigDataPath then
23
+ exit_code = 0
24
+ elsif !File.directory?(PhyloXMLBigDataPath) then
25
+ exit_code = 1
26
+ else
27
+ exit_code = false
28
+ end
29
+
30
+ if exit_code then
31
+ puts "Usage: #{$0} path_to_data (test options...)"
32
+ puts ""
33
+ puts "Requirements:"
34
+ puts " - Write permission to the path_to_data"
35
+ puts " - Internet connection for downloading test data"
36
+ puts " - unzip command to extract downloaded test data"
37
+ puts ""
38
+ puts "You may want to run Ruby with -rubygems and -I<path_to_bioruby_lib>."
39
+ puts ""
40
+ puts "Example of usage using /tmp:"
41
+ puts " $ mkdir /tmp/phyloxml"
42
+ puts " $ ruby -rubygems -I lib #{$0} /tmp/phyloxml -v"
43
+ puts ""
44
+ exit(exit_code)
45
+ end
46
+
47
+ module TestPhyloXMLBigData
48
+
49
+ module_function
50
+
51
+ def metazoa_xml
52
+ #puts "Metazoa 30MB"
53
+ filename = 'ncbi_taxonomy_metazoa.xml'
54
+ uri = "http://www.phylosoft.org/archaeopteryx/examples/data/ncbi_taxonomy_metazoa.xml.zip"
55
+ download_and_unzip_if_not_found(filename, uri, "1M", "33M")
56
+ end
57
+
58
+ def metazoa_test_xml
59
+ #puts "writing Metazoa 30MB"
60
+ File.join PhyloXMLBigDataPath, 'writer_test_ncbi_taxonomy_metazoa.xml'
61
+ end
62
+
63
+ def metazoa_roundtrip_xml
64
+ #puts "writing Metazoa 30MB roundtrip"
65
+ File.join PhyloXMLBigDataPath, 'roundtrip_test_ncbi_taxonomy_metazoa.xml'
66
+ end
67
+
68
+ def mollusca_xml
69
+ #puts "Mollusca 1.5MB"
70
+ filename = 'ncbi_taxonomy_mollusca.xml'
71
+ uri = "http://www.phylosoft.org/archaeopteryx/examples/data/ncbi_taxonomy_mollusca.xml.zip"
72
+ download_and_unzip_if_not_found(filename, uri, "67K", "1.5M")
73
+ end
74
+
75
+ def mollusca_test_xml
76
+ #puts "Writing Mollusca 1.5MB"
77
+ File.join PhyloXMLBigDataPath, 'writer_test_ncbi_taxonomy_mollusca.xml'
78
+ end
79
+
80
+ def mollusca_roundtrip_xml
81
+ #puts "Writing Mollusca 1.5MB roundtrip"
82
+ File.join PhyloXMLBigDataPath, 'roundtrip_test_ncbi_taxonomy_mollusca.xml'
83
+ end
84
+
85
+ def life_xml
86
+ #Right now this file is not compatible with xsd 1.10
87
+ filename = 'tol_life_on_earth_1.xml'
88
+ uri = "http://www.phylosoft.org/archaeopteryx/examples/data/tol_life_on_earth_1.xml.zip"
89
+
90
+ download_and_unzip_if_not_found(filename, uri, '10M', '45M')
91
+ end
92
+
93
+ def life_test_xml
94
+ File.join PhyloXMLBigDataPath, 'writer_test_tol_life_on_earth_1.xml'
95
+ end
96
+
97
+ def life_roundtrip_xml
98
+ File.join PhyloXMLBigDataPath, 'roundtrip_test_tol_life_on_earth_1.xml'
99
+ end
100
+
101
+ def unzip_file(file, target_dir)
102
+ flag = system('unzip', "#{file}.zip", "-d", target_dir)
103
+ unless flag then
104
+ raise "Failed to unzip #{file}.zip"
105
+ end
106
+ file
107
+ end
108
+
109
+ def download_and_unzip_if_not_found(basename, uri, zipsize, origsize)
110
+ file = File.join PhyloXMLBigDataPath, basename
111
+ return file if File.exists?(file)
112
+
113
+ if File.exists?("#{file}.zip")
114
+ unzip_file(file, PhyloXMLBigDataPath)
115
+ return file
116
+ end
117
+
118
+ puts "File #{basename} does not exist. Do you want to download it? (If yes, ~#{zipsize}B zip file will be downloaded and extracted (to #{origsize}B), if no, the test will be skipped.) y/n?"
119
+ res = gets
120
+ if res.to_s.chomp.downcase == "y"
121
+ File.open("#{file}.zip", "wb") do |f|
122
+ f.write(Bio::Command.read_uri(uri))
123
+ end
124
+ puts "File downloaded."
125
+ self.unzip_file(file, PhyloXMLBigDataPath)
126
+ return file
127
+ else
128
+ return nil
129
+ #return File.join PHYLOXML_TEST_DATA, "#{basename}.stub"
130
+ end
131
+ end
132
+
133
+ end #end module TestPhyloXMLBigData
134
+
135
+ module Bio
136
+
137
+ class TestPhyloXMLBig < Test::Unit::TestCase
138
+
139
+ def do_test_next_tree(readfilename)
140
+ raise "the test is skipped" unless readfilename
141
+ filesizeMB = File.size(readfilename) / 1048576.0
142
+ printf "Reading %s (%2.1f MB)\n", readfilename, filesizeMB
143
+
144
+ begin
145
+ phyloxml = Bio::PhyloXML::Parser.open(readfilename)
146
+ rescue NoMethodError
147
+ phyloxml = Bio::PhyloXML::Parser.new(readfilename)
148
+ end
149
+ tree = nil
150
+ assert_nothing_raised {
151
+ tree = phyloxml.next_tree
152
+ }
153
+ tree
154
+ end
155
+ private :do_test_next_tree
156
+
157
+ def do_test_write(tree, writefilename)
158
+ printf "Writing to %s\n", writefilename
159
+ writer = Bio::PhyloXML::Writer.new(writefilename)
160
+ assert_nothing_raised {
161
+ writer.write(tree)
162
+ }
163
+
164
+ # checks file size and sha1sum
165
+ str = File.open(writefilename, 'rb') { |f| f.read }
166
+ sha1 = Digest::SHA1.hexdigest(str)
167
+ puts "Wrote #{str.length} bytes."
168
+ puts "sha1: #{sha1}"
169
+ end
170
+ private :do_test_write
171
+
172
+ def test_mollusca
173
+ tree = do_test_next_tree(TestPhyloXMLBigData.mollusca_xml)
174
+ do_test_write(tree, TestPhyloXMLBigData.mollusca_test_xml)
175
+
176
+ tree2 = do_test_next_tree(TestPhyloXMLBigData.mollusca_test_xml)
177
+ do_test_write(tree2, TestPhyloXMLBigData.mollusca_roundtrip_xml)
178
+ end
179
+
180
+ def test_metazoa
181
+ tree = do_test_next_tree(TestPhyloXMLBigData.metazoa_xml)
182
+ do_test_write(tree, TestPhyloXMLBigData.metazoa_test_xml)
183
+
184
+ tree2 = do_test_next_tree(TestPhyloXMLBigData.metazoa_test_xml)
185
+ do_test_write(tree2, TestPhyloXMLBigData.metazoa_roundtrip_xml)
186
+ end
187
+
188
+ if false
189
+ # Disabled because of the error.
190
+ # LibXML::XML::Error: Fatal error: Input is not proper UTF-8,
191
+ # indicate encoding !
192
+ # Bytes: 0xE9 0x6B 0x65 0x73 at tol_life_on_earth_1.xml:132170.
193
+ #
194
+ def test_life
195
+ tree = do_test_next_tree(TestPhyloXMLBigData.life_xml)
196
+ do_test_write(tree, TestPhyloXMLBigData.life_test_xml)
197
+
198
+ tree2 = do_test_next_tree(TestPhyloXMLBigData.life_test_xml)
199
+ do_test_write(tree2, TestPhyloXMLBigData.life_roundtrip_xml)
200
+ end
201
+ end #if false
202
+
203
+ end
204
+
205
+ end