bio 1.3.1 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (303) hide show
  1. data/ChangeLog +2105 -3728
  2. data/KNOWN_ISSUES.rdoc +35 -3
  3. data/README.rdoc +8 -2
  4. data/RELEASE_NOTES.rdoc +166 -0
  5. data/bin/bioruby +4 -1
  6. data/bioruby.gemspec +146 -1
  7. data/bioruby.gemspec.erb +3 -1
  8. data/doc/ChangeLog-before-1.3.1 +3961 -0
  9. data/doc/Tutorial.rd +154 -22
  10. data/doc/Tutorial.rd.html +125 -68
  11. data/lib/bio.rb +21 -6
  12. data/lib/bio/appl/bl2seq/report.rb +11 -202
  13. data/lib/bio/appl/blast/format0.rb +0 -193
  14. data/lib/bio/appl/blast/report.rb +2 -147
  15. data/lib/bio/appl/blast/wublast.rb +0 -208
  16. data/lib/bio/appl/fasta.rb +4 -19
  17. data/lib/bio/appl/fasta/format10.rb +0 -14
  18. data/lib/bio/appl/genscan/report.rb +0 -176
  19. data/lib/bio/appl/hmmer.rb +1 -15
  20. data/lib/bio/appl/hmmer/report.rb +0 -100
  21. data/lib/bio/appl/meme/mast.rb +156 -0
  22. data/lib/bio/appl/meme/mast/report.rb +91 -0
  23. data/lib/bio/appl/meme/motif.rb +48 -0
  24. data/lib/bio/appl/psort.rb +0 -111
  25. data/lib/bio/appl/psort/report.rb +1 -45
  26. data/lib/bio/appl/pts1.rb +2 -4
  27. data/lib/bio/appl/sosui/report.rb +5 -54
  28. data/lib/bio/appl/targetp/report.rb +1 -104
  29. data/lib/bio/appl/tmhmm/report.rb +0 -36
  30. data/lib/bio/command.rb +94 -10
  31. data/lib/bio/data/aa.rb +1 -77
  32. data/lib/bio/data/codontable.rb +1 -95
  33. data/lib/bio/data/na.rb +1 -26
  34. data/lib/bio/db/aaindex.rb +1 -38
  35. data/lib/bio/db/fasta.rb +1 -134
  36. data/lib/bio/db/fasta/format_qual.rb +204 -0
  37. data/lib/bio/db/fasta/qual.rb +102 -0
  38. data/lib/bio/db/fastq.rb +645 -0
  39. data/lib/bio/db/fastq/fastq_to_biosequence.rb +40 -0
  40. data/lib/bio/db/fastq/format_fastq.rb +175 -0
  41. data/lib/bio/db/genbank/genbank.rb +1 -86
  42. data/lib/bio/db/gff.rb +0 -17
  43. data/lib/bio/db/go.rb +4 -72
  44. data/lib/bio/db/kegg/common.rb +112 -0
  45. data/lib/bio/db/kegg/compound.rb +29 -20
  46. data/lib/bio/db/kegg/drug.rb +74 -34
  47. data/lib/bio/db/kegg/enzyme.rb +26 -5
  48. data/lib/bio/db/kegg/genes.rb +128 -15
  49. data/lib/bio/db/kegg/genome.rb +3 -41
  50. data/lib/bio/db/kegg/glycan.rb +19 -24
  51. data/lib/bio/db/kegg/orthology.rb +16 -56
  52. data/lib/bio/db/kegg/reaction.rb +81 -28
  53. data/lib/bio/db/kegg/taxonomy.rb +1 -52
  54. data/lib/bio/db/litdb.rb +1 -16
  55. data/lib/bio/db/phyloxml/phyloxml.xsd +582 -0
  56. data/lib/bio/db/phyloxml/phyloxml_elements.rb +1174 -0
  57. data/lib/bio/db/phyloxml/phyloxml_parser.rb +954 -0
  58. data/lib/bio/db/phyloxml/phyloxml_writer.rb +228 -0
  59. data/lib/bio/db/prosite.rb +2 -95
  60. data/lib/bio/db/rebase.rb +5 -6
  61. data/lib/bio/db/sanger_chromatogram/abif.rb +120 -0
  62. data/lib/bio/db/sanger_chromatogram/chromatogram.rb +133 -0
  63. data/lib/bio/db/sanger_chromatogram/chromatogram_to_biosequence.rb +32 -0
  64. data/lib/bio/db/sanger_chromatogram/scf.rb +210 -0
  65. data/lib/bio/io/das.rb +0 -44
  66. data/lib/bio/io/ddbjxml.rb +1 -181
  67. data/lib/bio/io/flatfile.rb +1 -7
  68. data/lib/bio/io/flatfile/autodetection.rb +6 -0
  69. data/lib/bio/io/keggapi.rb +0 -442
  70. data/lib/bio/io/ncbirest.rb +130 -132
  71. data/lib/bio/io/ncbisoap.rb +2 -1
  72. data/lib/bio/io/pubmed.rb +0 -88
  73. data/lib/bio/location.rb +0 -73
  74. data/lib/bio/pathway.rb +0 -171
  75. data/lib/bio/sequence.rb +18 -1
  76. data/lib/bio/sequence/adapter.rb +3 -0
  77. data/lib/bio/sequence/format.rb +16 -0
  78. data/lib/bio/sequence/quality_score.rb +205 -0
  79. data/lib/bio/tree.rb +70 -5
  80. data/lib/bio/util/restriction_enzyme/single_strand.rb +3 -2
  81. data/lib/bio/util/sirna.rb +1 -23
  82. data/lib/bio/version.rb +1 -1
  83. data/sample/demo_aaindex.rb +67 -0
  84. data/sample/demo_aminoacid.rb +101 -0
  85. data/sample/demo_bl2seq_report.rb +220 -0
  86. data/sample/demo_blast_report.rb +285 -0
  87. data/sample/demo_codontable.rb +119 -0
  88. data/sample/demo_das.rb +105 -0
  89. data/sample/demo_ddbjxml.rb +212 -0
  90. data/sample/demo_fasta_remote.rb +51 -0
  91. data/sample/demo_fastaformat.rb +105 -0
  92. data/sample/demo_genbank.rb +132 -0
  93. data/sample/demo_genscan_report.rb +202 -0
  94. data/sample/demo_gff1.rb +49 -0
  95. data/sample/demo_go.rb +98 -0
  96. data/sample/demo_hmmer_report.rb +149 -0
  97. data/sample/demo_kegg_compound.rb +57 -0
  98. data/sample/demo_kegg_drug.rb +65 -0
  99. data/sample/demo_kegg_genome.rb +74 -0
  100. data/sample/demo_kegg_glycan.rb +72 -0
  101. data/sample/demo_kegg_orthology.rb +62 -0
  102. data/sample/demo_kegg_reaction.rb +66 -0
  103. data/sample/demo_kegg_taxonomy.rb +92 -0
  104. data/sample/demo_keggapi.rb +502 -0
  105. data/sample/demo_litdb.rb +42 -0
  106. data/sample/demo_locations.rb +99 -0
  107. data/sample/demo_ncbi_rest.rb +130 -0
  108. data/sample/demo_nucleicacid.rb +49 -0
  109. data/sample/demo_pathway.rb +196 -0
  110. data/sample/demo_prosite.rb +120 -0
  111. data/sample/demo_psort.rb +138 -0
  112. data/sample/demo_psort_report.rb +70 -0
  113. data/sample/demo_pubmed.rb +118 -0
  114. data/sample/demo_sirna.rb +63 -0
  115. data/sample/demo_sosui_report.rb +89 -0
  116. data/sample/demo_targetp_report.rb +135 -0
  117. data/sample/demo_tmhmm_report.rb +68 -0
  118. data/sample/pmfetch.rb +13 -4
  119. data/sample/pmsearch.rb +15 -4
  120. data/sample/test_phyloxml_big.rb +205 -0
  121. data/test/bioruby_test_helper.rb +61 -0
  122. data/test/data/KEGG/1.1.1.1.enzyme +935 -0
  123. data/test/data/KEGG/C00025.compound +102 -0
  124. data/test/data/KEGG/D00063.drug +104 -0
  125. data/test/data/KEGG/G00024.glycan +47 -0
  126. data/test/data/KEGG/G01366.glycan +18 -0
  127. data/test/data/KEGG/K02338.orthology +902 -0
  128. data/test/data/KEGG/R00006.reaction +14 -0
  129. data/test/data/fastq/README.txt +109 -0
  130. data/test/data/fastq/error_diff_ids.fastq +20 -0
  131. data/test/data/fastq/error_double_qual.fastq +22 -0
  132. data/test/data/fastq/error_double_seq.fastq +22 -0
  133. data/test/data/fastq/error_long_qual.fastq +20 -0
  134. data/test/data/fastq/error_no_qual.fastq +20 -0
  135. data/test/data/fastq/error_qual_del.fastq +20 -0
  136. data/test/data/fastq/error_qual_escape.fastq +20 -0
  137. data/test/data/fastq/error_qual_null.fastq +0 -0
  138. data/test/data/fastq/error_qual_space.fastq +21 -0
  139. data/test/data/fastq/error_qual_tab.fastq +21 -0
  140. data/test/data/fastq/error_qual_unit_sep.fastq +20 -0
  141. data/test/data/fastq/error_qual_vtab.fastq +20 -0
  142. data/test/data/fastq/error_short_qual.fastq +20 -0
  143. data/test/data/fastq/error_spaces.fastq +20 -0
  144. data/test/data/fastq/error_tabs.fastq +21 -0
  145. data/test/data/fastq/error_trunc_at_plus.fastq +19 -0
  146. data/test/data/fastq/error_trunc_at_qual.fastq +19 -0
  147. data/test/data/fastq/error_trunc_at_seq.fastq +18 -0
  148. data/test/data/fastq/error_trunc_in_plus.fastq +19 -0
  149. data/test/data/fastq/error_trunc_in_qual.fastq +20 -0
  150. data/test/data/fastq/error_trunc_in_seq.fastq +18 -0
  151. data/test/data/fastq/error_trunc_in_title.fastq +17 -0
  152. data/test/data/fastq/illumina_full_range_as_illumina.fastq +8 -0
  153. data/test/data/fastq/illumina_full_range_as_sanger.fastq +8 -0
  154. data/test/data/fastq/illumina_full_range_as_solexa.fastq +8 -0
  155. data/test/data/fastq/illumina_full_range_original_illumina.fastq +8 -0
  156. data/test/data/fastq/longreads_as_illumina.fastq +40 -0
  157. data/test/data/fastq/longreads_as_sanger.fastq +40 -0
  158. data/test/data/fastq/longreads_as_solexa.fastq +40 -0
  159. data/test/data/fastq/longreads_original_sanger.fastq +120 -0
  160. data/test/data/fastq/misc_dna_as_illumina.fastq +16 -0
  161. data/test/data/fastq/misc_dna_as_sanger.fastq +16 -0
  162. data/test/data/fastq/misc_dna_as_solexa.fastq +16 -0
  163. data/test/data/fastq/misc_dna_original_sanger.fastq +16 -0
  164. data/test/data/fastq/misc_rna_as_illumina.fastq +16 -0
  165. data/test/data/fastq/misc_rna_as_sanger.fastq +16 -0
  166. data/test/data/fastq/misc_rna_as_solexa.fastq +16 -0
  167. data/test/data/fastq/misc_rna_original_sanger.fastq +16 -0
  168. data/test/data/fastq/sanger_full_range_as_illumina.fastq +8 -0
  169. data/test/data/fastq/sanger_full_range_as_sanger.fastq +8 -0
  170. data/test/data/fastq/sanger_full_range_as_solexa.fastq +8 -0
  171. data/test/data/fastq/sanger_full_range_original_sanger.fastq +8 -0
  172. data/test/data/fastq/solexa_full_range_as_illumina.fastq +8 -0
  173. data/test/data/fastq/solexa_full_range_as_sanger.fastq +8 -0
  174. data/test/data/fastq/solexa_full_range_as_solexa.fastq +8 -0
  175. data/test/data/fastq/solexa_full_range_original_solexa.fastq +8 -0
  176. data/test/data/fastq/wrapping_as_illumina.fastq +12 -0
  177. data/test/data/fastq/wrapping_as_sanger.fastq +12 -0
  178. data/test/data/fastq/wrapping_as_solexa.fastq +12 -0
  179. data/test/data/fastq/wrapping_original_sanger.fastq +24 -0
  180. data/test/data/meme/db +0 -0
  181. data/test/data/meme/mast +0 -0
  182. data/test/data/meme/mast.out +13 -0
  183. data/test/data/meme/meme.out +3 -0
  184. data/test/data/phyloxml/apaf.xml +666 -0
  185. data/test/data/phyloxml/bcl_2.xml +2097 -0
  186. data/test/data/phyloxml/made_up.xml +144 -0
  187. data/test/data/phyloxml/ncbi_taxonomy_mollusca_short.xml +65 -0
  188. data/test/data/phyloxml/phyloxml_examples.xml +415 -0
  189. data/test/data/sanger_chromatogram/test_chromatogram_abif.ab1 +0 -0
  190. data/test/data/sanger_chromatogram/test_chromatogram_scf_v2.scf +0 -0
  191. data/test/data/sanger_chromatogram/test_chromatogram_scf_v3.scf +0 -0
  192. data/test/functional/bio/appl/test_pts1.rb +7 -5
  193. data/test/functional/bio/io/test_ensembl.rb +4 -3
  194. data/test/functional/bio/io/test_pubmed.rb +9 -3
  195. data/test/functional/bio/io/test_soapwsdl.rb +5 -4
  196. data/test/functional/bio/io/test_togows.rb +5 -4
  197. data/test/functional/bio/sequence/test_output_embl.rb +6 -4
  198. data/test/functional/bio/test_command.rb +54 -5
  199. data/test/runner.rb +5 -3
  200. data/test/unit/bio/appl/bl2seq/test_report.rb +5 -4
  201. data/test/unit/bio/appl/blast/test_ncbioptions.rb +4 -2
  202. data/test/unit/bio/appl/blast/test_report.rb +5 -4
  203. data/test/unit/bio/appl/blast/test_rpsblast.rb +5 -4
  204. data/test/unit/bio/appl/gcg/test_msf.rb +5 -5
  205. data/test/unit/bio/appl/genscan/test_report.rb +8 -9
  206. data/test/unit/bio/appl/hmmer/test_report.rb +5 -4
  207. data/test/unit/bio/appl/iprscan/test_report.rb +6 -5
  208. data/test/unit/bio/appl/mafft/test_report.rb +6 -5
  209. data/test/unit/bio/appl/meme/mast/test_report.rb +46 -0
  210. data/test/unit/bio/appl/meme/test_mast.rb +103 -0
  211. data/test/unit/bio/appl/meme/test_motif.rb +38 -0
  212. data/test/unit/bio/appl/paml/codeml/test_rates.rb +5 -4
  213. data/test/unit/bio/appl/paml/codeml/test_report.rb +5 -4
  214. data/test/unit/bio/appl/paml/test_codeml.rb +5 -4
  215. data/test/unit/bio/appl/sim4/test_report.rb +5 -4
  216. data/test/unit/bio/appl/sosui/test_report.rb +6 -5
  217. data/test/unit/bio/appl/targetp/test_report.rb +5 -3
  218. data/test/unit/bio/appl/test_blast.rb +5 -4
  219. data/test/unit/bio/appl/test_fasta.rb +4 -2
  220. data/test/unit/bio/appl/test_pts1.rb +4 -2
  221. data/test/unit/bio/appl/tmhmm/test_report.rb +6 -5
  222. data/test/unit/bio/data/test_aa.rb +5 -3
  223. data/test/unit/bio/data/test_codontable.rb +5 -4
  224. data/test/unit/bio/data/test_na.rb +5 -3
  225. data/test/unit/bio/db/biosql/tc_biosql.rb +5 -1
  226. data/test/unit/bio/db/embl/test_common.rb +4 -2
  227. data/test/unit/bio/db/embl/test_embl.rb +6 -6
  228. data/test/unit/bio/db/embl/test_embl_rel89.rb +6 -6
  229. data/test/unit/bio/db/embl/test_embl_to_bioseq.rb +7 -8
  230. data/test/unit/bio/db/embl/test_sptr.rb +6 -8
  231. data/test/unit/bio/db/embl/test_uniprot.rb +6 -5
  232. data/test/unit/bio/db/fasta/test_format_qual.rb +346 -0
  233. data/test/unit/bio/db/kegg/test_compound.rb +146 -0
  234. data/test/unit/bio/db/kegg/test_drug.rb +194 -0
  235. data/test/unit/bio/db/kegg/test_enzyme.rb +241 -0
  236. data/test/unit/bio/db/kegg/test_genes.rb +32 -4
  237. data/test/unit/bio/db/kegg/test_glycan.rb +260 -0
  238. data/test/unit/bio/db/kegg/test_orthology.rb +50 -0
  239. data/test/unit/bio/db/kegg/test_reaction.rb +96 -0
  240. data/test/unit/bio/db/pdb/test_pdb.rb +4 -2
  241. data/test/unit/bio/db/sanger_chromatogram/test_abif.rb +76 -0
  242. data/test/unit/bio/db/sanger_chromatogram/test_scf.rb +98 -0
  243. data/test/unit/bio/db/test_aaindex.rb +6 -6
  244. data/test/unit/bio/db/test_fasta.rb +5 -46
  245. data/test/unit/bio/db/test_fastq.rb +829 -0
  246. data/test/unit/bio/db/test_gff.rb +4 -2
  247. data/test/unit/bio/db/test_lasergene.rb +7 -5
  248. data/test/unit/bio/db/test_medline.rb +4 -2
  249. data/test/unit/bio/db/test_newick.rb +6 -6
  250. data/test/unit/bio/db/test_nexus.rb +4 -2
  251. data/test/unit/bio/db/test_phyloxml.rb +769 -0
  252. data/test/unit/bio/db/test_phyloxml_writer.rb +328 -0
  253. data/test/unit/bio/db/test_prosite.rb +6 -5
  254. data/test/unit/bio/db/test_qual.rb +63 -0
  255. data/test/unit/bio/db/test_rebase.rb +5 -3
  256. data/test/unit/bio/db/test_soft.rb +7 -6
  257. data/test/unit/bio/io/flatfile/test_autodetection.rb +6 -7
  258. data/test/unit/bio/io/flatfile/test_buffer.rb +6 -5
  259. data/test/unit/bio/io/flatfile/test_splitter.rb +4 -4
  260. data/test/unit/bio/io/test_ddbjxml.rb +4 -3
  261. data/test/unit/bio/io/test_ensembl.rb +5 -3
  262. data/test/unit/bio/io/test_fastacmd.rb +4 -3
  263. data/test/unit/bio/io/test_flatfile.rb +6 -5
  264. data/test/unit/bio/io/test_soapwsdl.rb +4 -3
  265. data/test/unit/bio/io/test_togows.rb +4 -2
  266. data/test/unit/bio/sequence/test_aa.rb +5 -3
  267. data/test/unit/bio/sequence/test_common.rb +4 -2
  268. data/test/unit/bio/sequence/test_compat.rb +4 -2
  269. data/test/unit/bio/sequence/test_dblink.rb +5 -3
  270. data/test/unit/bio/sequence/test_na.rb +4 -2
  271. data/test/unit/bio/sequence/test_quality_score.rb +330 -0
  272. data/test/unit/bio/shell/plugin/test_seq.rb +5 -3
  273. data/test/unit/bio/test_alignment.rb +5 -3
  274. data/test/unit/bio/test_command.rb +4 -3
  275. data/test/unit/bio/test_db.rb +5 -3
  276. data/test/unit/bio/test_feature.rb +4 -2
  277. data/test/unit/bio/test_location.rb +4 -2
  278. data/test/unit/bio/test_map.rb +5 -3
  279. data/test/unit/bio/test_pathway.rb +4 -2
  280. data/test/unit/bio/test_reference.rb +4 -2
  281. data/test/unit/bio/test_sequence.rb +5 -3
  282. data/test/unit/bio/test_shell.rb +5 -3
  283. data/test/unit/bio/test_tree.rb +6 -6
  284. data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +4 -2
  285. data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +4 -2
  286. data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +4 -2
  287. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +4 -2
  288. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +4 -2
  289. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +4 -2
  290. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +4 -2
  291. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +4 -2
  292. data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +4 -2
  293. data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +4 -2
  294. data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +4 -2
  295. data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +4 -2
  296. data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +17 -13
  297. data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +17 -13
  298. data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +4 -2
  299. data/test/unit/bio/util/test_color_scheme.rb +5 -3
  300. data/test/unit/bio/util/test_contingency_table.rb +5 -3
  301. data/test/unit/bio/util/test_restriction_enzyme.rb +4 -2
  302. data/test/unit/bio/util/test_sirna.rb +6 -4
  303. metadata +147 -2
@@ -0,0 +1,228 @@
1
+ #
2
+ # = bio/db/phyloxml_writer.rb - PhyloXML writer
3
+ #
4
+ # Copyright:: Copyright (C) 2009
5
+ # Diana Jaunzeikare <latvianlinuxgirl@gmail.com>
6
+ # License:: The Ruby License
7
+ #
8
+ # $Id:$
9
+ #
10
+ # == Description
11
+ #
12
+ # This file containts writer for PhyloXML.
13
+ #
14
+ # == Requirements
15
+ #
16
+ # Libxml2 XML parser is required. Install libxml-ruby bindings from
17
+ # http://libxml.rubyforge.org or
18
+ #
19
+ # gem install -r libxml-ruby
20
+ #
21
+ # == References
22
+ #
23
+ # * http://www.phyloxml.org
24
+ #
25
+ # * https://www.nescent.org/wg_phyloinformatics/PhyloSoC:PhyloXML_support_in_BioRuby
26
+
27
+ require 'libxml'
28
+ require 'bio/db/phyloxml/phyloxml_elements'
29
+
30
+ module Bio
31
+
32
+ module PhyloXML
33
+
34
+ # == Description
35
+ #
36
+ # Bio::PhyloXML::Writer is for writing phyloXML (version 1.10) format files.
37
+ #
38
+ # == Requirements
39
+ #
40
+ # Libxml2 XML parser is required. Install libxml-ruby bindings from
41
+ # http://libxml.rubyforge.org or
42
+ #
43
+ # gem install -r libxml-ruby
44
+ #
45
+ # == Usage
46
+ #
47
+ # require 'bio'
48
+ #
49
+ # # Create new phyloxml parser
50
+ # phyloxml = Bio::PhyloXML::Parser.open('example.xml')
51
+ #
52
+ # # Read in some trees from file
53
+ # tree1 = phyloxml.next_tree
54
+ # tree2 = phyloxml.next_tree
55
+ #
56
+ # # Create new phyloxml writer
57
+ # writer = Bio::PhyloXML::Writer.new('tree.xml')
58
+ #
59
+ # # Write tree to the file tree.xml
60
+ # writer.write(tree1)
61
+ #
62
+ # # Add another tree to the file
63
+ # writer.write(tree2)
64
+ #
65
+ # == References
66
+ #
67
+ # http://www.phyloxml.org/documentation/version_100/phyloxml.xsd.html
68
+
69
+ class Writer
70
+
71
+ include LibXML
72
+
73
+ SCHEMA_LOCATION = 'http://www.phyloxml.org http://www.phyloxml.org/1.10/phyloxml.xsd'
74
+
75
+ attr_accessor :write_branch_length_as_subelement
76
+
77
+ #
78
+ # Create new Writer object. As parameters provide filename of xml file
79
+ # you wish to create. Optional parameter is whether to indent or no.
80
+ # Default is true. By default branch_length is written as subelement of
81
+ # clade element.
82
+ #
83
+ def initialize(filename, indent=true)
84
+ @write_branch_length_as_subelement = true #default value
85
+ @filename = filename
86
+ @indent = indent
87
+
88
+ @doc = XML::Document.new()
89
+ @doc.root = XML::Node.new('phyloxml')
90
+ @root = @doc.root
91
+ @root['xmlns:xsi'] = 'http://www.w3.org/2001/XMLSchema-instance'
92
+ @root['xsi:schemaLocation'] = SCHEMA_LOCATION
93
+ @root['xmlns'] = 'http://www.phyloxml.org'
94
+
95
+ #@todo save encoding to be UTF-8. (However it is the default one).
96
+ #it gives error NameError: uninitialized constant LibXML::XML::Encoding
97
+ #@doc.encoding = XML::Encoding::UTF_8
98
+
99
+ @doc.save(@filename, :indent => true)
100
+ end
101
+
102
+ #
103
+ # Write a tree to a file in phyloxml format.
104
+ #
105
+ # require 'Bio'
106
+ # writer = Bio::PhyloXML::Writer.new
107
+ # writer.write(tree)
108
+ #
109
+ def write(tree)
110
+ @root << phylogeny = XML::Node.new('phylogeny')
111
+
112
+ PhyloXML::Writer.generate_xml(phylogeny, tree, [
113
+ [:attr, 'rooted'],
114
+ [:simple, 'name', tree.name],
115
+ [:complex, 'id', tree.phylogeny_id],
116
+ [:simple, 'description', tree.description],
117
+ [:simple, 'date', tree.date],
118
+ [:objarr, 'confidence', 'confidences']])
119
+
120
+ root_clade = tree.root.to_xml(nil, @write_branch_length_as_subelement)
121
+
122
+ phylogeny << root_clade
123
+
124
+ tree.children(tree.root).each do |node|
125
+ root_clade << node_to_xml(tree, node, tree.root)
126
+ end
127
+
128
+ Bio::PhyloXML::Writer::generate_xml(phylogeny, tree, [
129
+ [:objarr, 'clade_relation', 'clade_relations'],
130
+ [:objarr, 'sequence_relation', 'sequence_relations'],
131
+ [:objarr, 'property', 'properties']] )
132
+
133
+ @doc.save(@filename, :indent => @indent)
134
+ end #writer#write
135
+
136
+
137
+ #
138
+ # PhyloXML Schema allows to save data in different xml format after all
139
+ # phylogeny elements. This method is to write these additional data.
140
+ #
141
+ # parser = PhyloXML::Parser.open('phyloxml_examples.xml')
142
+ # writer = PhyloXML::Writer.new('new.xml')
143
+ #
144
+ # parser.each do |tree|
145
+ # writer.write(tree)
146
+ # end
147
+ #
148
+ # # When all the trees are read in by the parser, whats left is saved at
149
+ # # PhyloXML::Parser#other
150
+ # writer.write(parser.other)
151
+ #
152
+
153
+ def write_other(other_arr)
154
+ other_arr.each do |other_obj|
155
+ @root << other_obj.to_xml
156
+ end
157
+ @doc.save(@filename, :indent => @indent)
158
+ end
159
+
160
+ #class method
161
+
162
+ #
163
+ # Used by to_xml methods of PhyloXML element classes. Generally not to be
164
+ # invoked directly.
165
+ #
166
+ def self.generate_xml(root, elem, subelement_array)
167
+ #example usage: generate_xml(node, self, [[ :complex,'accession', ], [:simple, 'name', @name], [:simple, 'location', @location]])
168
+ subelement_array.each do |subelem|
169
+ if subelem[0] == :simple
170
+ root << XML::Node.new(subelem[1], subelem[2].to_s) if subelem[2] != nil and not subelem[2].to_s.empty?
171
+
172
+ elsif subelem[0] == :complex
173
+ root << subelem[2].send("to_xml") if subelem[2] != nil
174
+
175
+ elsif subelem[0] == :pattern
176
+ #seq, self, [[:pattern, 'symbol', @symbol, "\S{1,10}"]
177
+ if subelem[2] != nil
178
+ if subelem[2] =~ subelem[3]
179
+ root << XML::Node.new(subelem[1], subelem[2])
180
+ else
181
+ raise "#{subelem[2]} is not a valid value of #{subelem[1]}. It should follow pattern #{subelem[3]}"
182
+ end
183
+ end
184
+
185
+ elsif subelem[0] == :objarr
186
+ #[:objarr, 'annotation', 'annotations']])
187
+ obj_arr = elem.send(subelem[2])
188
+ obj_arr.each do |arr_elem|
189
+ root << arr_elem.to_xml
190
+ end
191
+
192
+ elsif subelem[0] == :simplearr
193
+ # [:simplearr, 'common_name', @common_names]
194
+ subelem[2].each do |elem_val|
195
+ root << XML::Node.new(subelem[1], elem_val)
196
+ end
197
+ elsif subelem[0] == :attr
198
+ #[:attr, 'rooted']
199
+ obj = elem.send(subelem[1])
200
+ if obj != nil
201
+ root[subelem[1]] = obj.to_s
202
+ end
203
+ else
204
+ raise "Not supported type of element by method generate_xml."
205
+ end
206
+ end
207
+ return root
208
+ end
209
+
210
+ private
211
+
212
+ def node_to_xml(tree, node, parent)
213
+ edge = tree.get_edge(parent, node)
214
+ branch_length = edge.distance
215
+
216
+ clade = node.to_xml(branch_length, @write_branch_length_as_subelement)
217
+
218
+ tree.children(node).each do |new_node|
219
+ clade << node_to_xml(tree, new_node, node)
220
+ end
221
+
222
+ return clade
223
+ end
224
+
225
+ end
226
+
227
+ end
228
+ end
@@ -2,9 +2,9 @@
2
2
  # = bio/db/prosite.rb - PROSITE database class
3
3
  #
4
4
  # Copyright:: Copyright (C) 2001 Toshiaki Katayama <k@bioruby.org>
5
- # Licence:: Ruby's
5
+ # License:: The Ruby License
6
6
  #
7
- # $Id: prosite.rb,v 0.16 2006/09/19 06:03:51 k Exp $
7
+ # $Id:$
8
8
  #
9
9
 
10
10
  require 'bio/db'
@@ -502,96 +502,3 @@ end # PROSITE
502
502
 
503
503
  end # Bio
504
504
 
505
-
506
- if __FILE__ == $0
507
-
508
- begin
509
- require 'pp'
510
- alias p pp
511
- rescue LoadError
512
- end
513
-
514
- ps = Bio::PROSITE.new(ARGF.read)
515
-
516
- list = %w(
517
- name
518
- division
519
- ac
520
- entry_id
521
- dt
522
- date
523
- de
524
- definition
525
- pa
526
- pattern
527
- ma
528
- profile
529
- ru
530
- rule
531
- nr
532
- statistics
533
- release
534
- swissprot_release_number
535
- swissprot_release_sequences
536
- total
537
- total_hits
538
- total_sequences
539
- positive
540
- positive_hits
541
- positive_sequences
542
- unknown
543
- unknown_hits
544
- unknown_sequences
545
- false_pos
546
- false_positive_hits
547
- false_positive_sequences
548
- false_neg
549
- false_negative_hits
550
- partial
551
- cc
552
- comment
553
- max_repeat
554
- site
555
- skip_flag
556
- dr
557
- sp_xref
558
- pdb_xref
559
- pdoc_xref
560
- )
561
-
562
- list.each do |method|
563
- puts ">>> #{method}"
564
- p ps.send(method)
565
- end
566
-
567
- puts ">>> taxon_range"
568
- p ps.taxon_range
569
- puts ">>> taxon_range(expand)"
570
- p ps.taxon_range(true)
571
-
572
- puts ">>> list_truepositive"
573
- p ps.list_truepositive
574
- puts ">>> list_truepositive(by_name)"
575
- p ps.list_truepositive(true)
576
-
577
- puts ">>> list_falsenegative"
578
- p ps.list_falsenegative
579
- puts ">>> list_falsenegative(by_name)"
580
- p ps.list_falsenegative(true)
581
-
582
- puts ">>> list_falsepositive"
583
- p ps.list_falsepositive
584
- puts ">>> list_falsepositive(by_name)"
585
- p ps.list_falsepositive(true)
586
-
587
- puts ">>> list_potentialhit"
588
- p ps.list_potentialhit
589
- puts ">>> list_potentialhit(by_name)"
590
- p ps.list_potentialhit(true)
591
-
592
- puts ">>> list_unknown"
593
- p ps.list_unknown
594
- puts ">>> list_unknown(by_name)"
595
- p ps.list_unknown(true)
596
-
597
- end
@@ -40,7 +40,7 @@ module Bio
40
40
  # To easily get started with the data you can simply type this command
41
41
  # at your shell prompt:
42
42
  #
43
- # % wget ftp://ftp.neb.com/pub/rebase/emboss*
43
+ # % wget "ftp://ftp.neb.com/pub/rebase/emboss_*"
44
44
  #
45
45
  #
46
46
  # = Usage
@@ -195,7 +195,7 @@ class REBASE
195
195
  # * _none_
196
196
  # *Returns*:: +Array+ sorted enzyme names
197
197
  def enzymes
198
- @data.keys.sort
198
+ @enzyme_names
199
199
  end
200
200
 
201
201
  # Check if supplied name is the name of an available enzyme
@@ -205,10 +205,7 @@ class REBASE
205
205
  # * +name+: Enzyme name
206
206
  # *Returns*:: +true/false+
207
207
  def enzyme_name?(name)
208
- enzymes.each do |e|
209
- return true if e.downcase == name.downcase
210
- end
211
- return false
208
+ @enzyme_names_downcased.include?(name.downcase)
212
209
  end
213
210
 
214
211
  # Save the current data
@@ -290,6 +287,8 @@ class REBASE
290
287
  d.references = []
291
288
  end
292
289
 
290
+ @enzyme_names = @data.keys.sort
291
+ @enzyme_names_downcased = @enzyme_names.map{|a| a.downcase}
293
292
  setup_enzyme_and_reference_association
294
293
  end
295
294
 
@@ -0,0 +1,120 @@
1
+ #
2
+ # = bio/db/sanger_chromatogram/abif.rb - Abif class
3
+ #
4
+ # Copyright:: Copyright (C) 2009 Anthony Underwood <anthony.underwood@hpa.org.uk>, <email2ants@gmail.com>
5
+ # License:: The Ruby License
6
+ #
7
+
8
+ require 'bio/db/sanger_chromatogram/chromatogram'
9
+
10
+ module Bio
11
+ # == Description
12
+ #
13
+ # This class inherits from the SangerChromatogram superclass. It captures the information contained
14
+ # within an ABIF format chromatogram file generated by DNA sequencing. See the SangerChromatogram class
15
+ # for usage.
16
+ class Abif < SangerChromatogram
17
+ DATA_TYPES = { 1 => 'byte', 2 => 'char', 3 => 'word', 4 => 'short', 5 => 'long',
18
+ 7 => 'float', 8 => 'double', 10 => 'date', 11 => 'time', 18 => 'pString',
19
+ 19 => 'cString', 12 => 'thumb', 13 => 'bool', 6 => 'rational', 9 => 'BCD',
20
+ 14 => 'point', 15 => 'rect', 16 => 'vPoint', 17 => 'vRect', 20 => 'tag',
21
+ 128 => 'deltaComp', 256 => 'LZWComp', 384 => 'deltaLZW', 1024 => 'user'} # User defined data types have tags numbers >= 1024
22
+
23
+ PACK_TYPES = { 'byte' => 'C', 'char' => 'c', 'word' => 'n', 'short' => 'n', 'long' => 'N',
24
+ 'date' => 'nCC', 'time' => 'CCCC', 'pString' => 'CA*', 'cString' => 'Z*',
25
+ 'float' => 'g', 'double' => 'G',
26
+ 'bool' => 'C', 'thumb' => 'NNCC', 'rational' => 'NN', 'point' => 'nn',
27
+ 'rect' => 'nnnn', 'vPoint' => 'NN', 'vRect' => 'NNNN', 'tag' => 'NN'} # Specifies how to pack each data type
28
+
29
+ #sequence attributes
30
+
31
+ # The sample title as entered when sequencing the sample (String)
32
+ attr_accessor :sample_title
33
+ # The chemistry used when sequencing e.g Dye terminators => 'term.' (String)
34
+ attr_accessor :chemistry
35
+
36
+ # see SangerChromatogram class for how to create an Abif object and its usage
37
+ def initialize(string)
38
+ header = string.slice(0,128)
39
+ # read in header info
40
+ @chromatogram_type, @version, @directory_tag_name, @directory_tag_number, @directory_element_type, @directory_element_size, @directory_number_of_elements, @directory_data_size, @directory_data_offset, @directory_data_handle= header.unpack("a4 n a4 N n n N N N N")
41
+ @version = @version/100.to_f
42
+ get_directory_entries(string)
43
+ # get sequence
44
+ @sequence = @directory_entries["PBAS"][1].data.map{|char| char.chr.downcase}.join("")
45
+ #get peak indices
46
+ @peak_indices = @directory_entries["PLOC"][1].data
47
+ #get qualities
48
+ @qualities = @directory_entries["PCON"][1].data
49
+ # get sample title
50
+ @sample_title = @directory_entries["SMPL"][1].data
51
+ @directory_entries["PDMF"].size > 2 ? @dye_mobility = @directory_entries["PDMF"][2].data : @dye_mobility = @directory_entries["PDMF"][1].data
52
+ #get trace data
53
+ @chemistry = @directory_entries["phCH"][1].data
54
+ base_order = @directory_entries["FWO_"][1].data.map{|char| char.chr.downcase}
55
+ (9..12).each do |data_index|
56
+ self.instance_variable_set("@#{base_order[data_index-9]}trace", @directory_entries["DATA"][data_index].data)
57
+ end
58
+
59
+ end
60
+
61
+ # Returns the data for the name.
62
+ # If not found, returns nil.
63
+ # ---
64
+ # *Arguments*:
65
+ # * (required) _name_: (String) name of the data
66
+ # * (required) <em>tag_number</em>: (Integer) tag number (default 1)
67
+ # *Returns*:: any data type or nil
68
+ def data(name, tag_number = 1)
69
+ d = @directory_entries[name]
70
+ d ? d[tag_number].data : nil
71
+ end
72
+
73
+ private
74
+ def get_directory_entries(string)
75
+ @directory_entries = Hash.new
76
+ offset = @directory_data_offset
77
+ @directory_number_of_elements.times do
78
+ entry = DirectoryEntry.new
79
+ entry_fields = string.slice(offset, @directory_element_size)
80
+ entry.name, entry.tag_number, entry.element_type, entry.element_size, entry.number_of_elements, entry.data_size, entry.data_offset = entry_fields.unpack("a4 N n n N N N")
81
+ # populate the entry with the data it refers to
82
+ if entry.data_size > 4
83
+ get_entry_data(entry, string)
84
+ else
85
+ get_entry_data(entry, entry_fields)
86
+ end
87
+ if @directory_entries.has_key?(entry.name)
88
+ @directory_entries[entry.name][entry.tag_number] = entry
89
+ else
90
+ @directory_entries[entry.name] = Array.new
91
+ @directory_entries[entry.name][entry.tag_number] = entry
92
+ end
93
+ offset += @directory_element_size
94
+ end
95
+ end
96
+ def get_entry_data(entry, string)
97
+ if entry.data_size > 4
98
+ raw_data = string.slice(entry.data_offset, entry.data_size)
99
+ else
100
+ raw_data = string.slice(20,4)
101
+ end
102
+ if entry.element_type > 1023
103
+ # user defined data: not processed as yet by this bioruby module
104
+ entry.data = raw_data
105
+ else
106
+ pack_type = PACK_TYPES[DATA_TYPES[entry.element_type]]
107
+ pack_type.match(/\*/) ? unpack_string = pack_type : unpack_string = "#{pack_type}#{entry.number_of_elements}"
108
+ entry.data = raw_data.unpack(unpack_string)
109
+ if pack_type == "CA*" # pascal string where the first byte is a charcter count and should therefore be removed
110
+ entry.data.shift
111
+ end
112
+ end
113
+ end
114
+
115
+ class DirectoryEntry
116
+ attr_accessor :name, :tag_number, :element_type, :element_size, :number_of_elements, :data_size, :data_offset
117
+ attr_accessor :data
118
+ end
119
+ end
120
+ end