bio 1.3.1 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (303) hide show
  1. data/ChangeLog +2105 -3728
  2. data/KNOWN_ISSUES.rdoc +35 -3
  3. data/README.rdoc +8 -2
  4. data/RELEASE_NOTES.rdoc +166 -0
  5. data/bin/bioruby +4 -1
  6. data/bioruby.gemspec +146 -1
  7. data/bioruby.gemspec.erb +3 -1
  8. data/doc/ChangeLog-before-1.3.1 +3961 -0
  9. data/doc/Tutorial.rd +154 -22
  10. data/doc/Tutorial.rd.html +125 -68
  11. data/lib/bio.rb +21 -6
  12. data/lib/bio/appl/bl2seq/report.rb +11 -202
  13. data/lib/bio/appl/blast/format0.rb +0 -193
  14. data/lib/bio/appl/blast/report.rb +2 -147
  15. data/lib/bio/appl/blast/wublast.rb +0 -208
  16. data/lib/bio/appl/fasta.rb +4 -19
  17. data/lib/bio/appl/fasta/format10.rb +0 -14
  18. data/lib/bio/appl/genscan/report.rb +0 -176
  19. data/lib/bio/appl/hmmer.rb +1 -15
  20. data/lib/bio/appl/hmmer/report.rb +0 -100
  21. data/lib/bio/appl/meme/mast.rb +156 -0
  22. data/lib/bio/appl/meme/mast/report.rb +91 -0
  23. data/lib/bio/appl/meme/motif.rb +48 -0
  24. data/lib/bio/appl/psort.rb +0 -111
  25. data/lib/bio/appl/psort/report.rb +1 -45
  26. data/lib/bio/appl/pts1.rb +2 -4
  27. data/lib/bio/appl/sosui/report.rb +5 -54
  28. data/lib/bio/appl/targetp/report.rb +1 -104
  29. data/lib/bio/appl/tmhmm/report.rb +0 -36
  30. data/lib/bio/command.rb +94 -10
  31. data/lib/bio/data/aa.rb +1 -77
  32. data/lib/bio/data/codontable.rb +1 -95
  33. data/lib/bio/data/na.rb +1 -26
  34. data/lib/bio/db/aaindex.rb +1 -38
  35. data/lib/bio/db/fasta.rb +1 -134
  36. data/lib/bio/db/fasta/format_qual.rb +204 -0
  37. data/lib/bio/db/fasta/qual.rb +102 -0
  38. data/lib/bio/db/fastq.rb +645 -0
  39. data/lib/bio/db/fastq/fastq_to_biosequence.rb +40 -0
  40. data/lib/bio/db/fastq/format_fastq.rb +175 -0
  41. data/lib/bio/db/genbank/genbank.rb +1 -86
  42. data/lib/bio/db/gff.rb +0 -17
  43. data/lib/bio/db/go.rb +4 -72
  44. data/lib/bio/db/kegg/common.rb +112 -0
  45. data/lib/bio/db/kegg/compound.rb +29 -20
  46. data/lib/bio/db/kegg/drug.rb +74 -34
  47. data/lib/bio/db/kegg/enzyme.rb +26 -5
  48. data/lib/bio/db/kegg/genes.rb +128 -15
  49. data/lib/bio/db/kegg/genome.rb +3 -41
  50. data/lib/bio/db/kegg/glycan.rb +19 -24
  51. data/lib/bio/db/kegg/orthology.rb +16 -56
  52. data/lib/bio/db/kegg/reaction.rb +81 -28
  53. data/lib/bio/db/kegg/taxonomy.rb +1 -52
  54. data/lib/bio/db/litdb.rb +1 -16
  55. data/lib/bio/db/phyloxml/phyloxml.xsd +582 -0
  56. data/lib/bio/db/phyloxml/phyloxml_elements.rb +1174 -0
  57. data/lib/bio/db/phyloxml/phyloxml_parser.rb +954 -0
  58. data/lib/bio/db/phyloxml/phyloxml_writer.rb +228 -0
  59. data/lib/bio/db/prosite.rb +2 -95
  60. data/lib/bio/db/rebase.rb +5 -6
  61. data/lib/bio/db/sanger_chromatogram/abif.rb +120 -0
  62. data/lib/bio/db/sanger_chromatogram/chromatogram.rb +133 -0
  63. data/lib/bio/db/sanger_chromatogram/chromatogram_to_biosequence.rb +32 -0
  64. data/lib/bio/db/sanger_chromatogram/scf.rb +210 -0
  65. data/lib/bio/io/das.rb +0 -44
  66. data/lib/bio/io/ddbjxml.rb +1 -181
  67. data/lib/bio/io/flatfile.rb +1 -7
  68. data/lib/bio/io/flatfile/autodetection.rb +6 -0
  69. data/lib/bio/io/keggapi.rb +0 -442
  70. data/lib/bio/io/ncbirest.rb +130 -132
  71. data/lib/bio/io/ncbisoap.rb +2 -1
  72. data/lib/bio/io/pubmed.rb +0 -88
  73. data/lib/bio/location.rb +0 -73
  74. data/lib/bio/pathway.rb +0 -171
  75. data/lib/bio/sequence.rb +18 -1
  76. data/lib/bio/sequence/adapter.rb +3 -0
  77. data/lib/bio/sequence/format.rb +16 -0
  78. data/lib/bio/sequence/quality_score.rb +205 -0
  79. data/lib/bio/tree.rb +70 -5
  80. data/lib/bio/util/restriction_enzyme/single_strand.rb +3 -2
  81. data/lib/bio/util/sirna.rb +1 -23
  82. data/lib/bio/version.rb +1 -1
  83. data/sample/demo_aaindex.rb +67 -0
  84. data/sample/demo_aminoacid.rb +101 -0
  85. data/sample/demo_bl2seq_report.rb +220 -0
  86. data/sample/demo_blast_report.rb +285 -0
  87. data/sample/demo_codontable.rb +119 -0
  88. data/sample/demo_das.rb +105 -0
  89. data/sample/demo_ddbjxml.rb +212 -0
  90. data/sample/demo_fasta_remote.rb +51 -0
  91. data/sample/demo_fastaformat.rb +105 -0
  92. data/sample/demo_genbank.rb +132 -0
  93. data/sample/demo_genscan_report.rb +202 -0
  94. data/sample/demo_gff1.rb +49 -0
  95. data/sample/demo_go.rb +98 -0
  96. data/sample/demo_hmmer_report.rb +149 -0
  97. data/sample/demo_kegg_compound.rb +57 -0
  98. data/sample/demo_kegg_drug.rb +65 -0
  99. data/sample/demo_kegg_genome.rb +74 -0
  100. data/sample/demo_kegg_glycan.rb +72 -0
  101. data/sample/demo_kegg_orthology.rb +62 -0
  102. data/sample/demo_kegg_reaction.rb +66 -0
  103. data/sample/demo_kegg_taxonomy.rb +92 -0
  104. data/sample/demo_keggapi.rb +502 -0
  105. data/sample/demo_litdb.rb +42 -0
  106. data/sample/demo_locations.rb +99 -0
  107. data/sample/demo_ncbi_rest.rb +130 -0
  108. data/sample/demo_nucleicacid.rb +49 -0
  109. data/sample/demo_pathway.rb +196 -0
  110. data/sample/demo_prosite.rb +120 -0
  111. data/sample/demo_psort.rb +138 -0
  112. data/sample/demo_psort_report.rb +70 -0
  113. data/sample/demo_pubmed.rb +118 -0
  114. data/sample/demo_sirna.rb +63 -0
  115. data/sample/demo_sosui_report.rb +89 -0
  116. data/sample/demo_targetp_report.rb +135 -0
  117. data/sample/demo_tmhmm_report.rb +68 -0
  118. data/sample/pmfetch.rb +13 -4
  119. data/sample/pmsearch.rb +15 -4
  120. data/sample/test_phyloxml_big.rb +205 -0
  121. data/test/bioruby_test_helper.rb +61 -0
  122. data/test/data/KEGG/1.1.1.1.enzyme +935 -0
  123. data/test/data/KEGG/C00025.compound +102 -0
  124. data/test/data/KEGG/D00063.drug +104 -0
  125. data/test/data/KEGG/G00024.glycan +47 -0
  126. data/test/data/KEGG/G01366.glycan +18 -0
  127. data/test/data/KEGG/K02338.orthology +902 -0
  128. data/test/data/KEGG/R00006.reaction +14 -0
  129. data/test/data/fastq/README.txt +109 -0
  130. data/test/data/fastq/error_diff_ids.fastq +20 -0
  131. data/test/data/fastq/error_double_qual.fastq +22 -0
  132. data/test/data/fastq/error_double_seq.fastq +22 -0
  133. data/test/data/fastq/error_long_qual.fastq +20 -0
  134. data/test/data/fastq/error_no_qual.fastq +20 -0
  135. data/test/data/fastq/error_qual_del.fastq +20 -0
  136. data/test/data/fastq/error_qual_escape.fastq +20 -0
  137. data/test/data/fastq/error_qual_null.fastq +0 -0
  138. data/test/data/fastq/error_qual_space.fastq +21 -0
  139. data/test/data/fastq/error_qual_tab.fastq +21 -0
  140. data/test/data/fastq/error_qual_unit_sep.fastq +20 -0
  141. data/test/data/fastq/error_qual_vtab.fastq +20 -0
  142. data/test/data/fastq/error_short_qual.fastq +20 -0
  143. data/test/data/fastq/error_spaces.fastq +20 -0
  144. data/test/data/fastq/error_tabs.fastq +21 -0
  145. data/test/data/fastq/error_trunc_at_plus.fastq +19 -0
  146. data/test/data/fastq/error_trunc_at_qual.fastq +19 -0
  147. data/test/data/fastq/error_trunc_at_seq.fastq +18 -0
  148. data/test/data/fastq/error_trunc_in_plus.fastq +19 -0
  149. data/test/data/fastq/error_trunc_in_qual.fastq +20 -0
  150. data/test/data/fastq/error_trunc_in_seq.fastq +18 -0
  151. data/test/data/fastq/error_trunc_in_title.fastq +17 -0
  152. data/test/data/fastq/illumina_full_range_as_illumina.fastq +8 -0
  153. data/test/data/fastq/illumina_full_range_as_sanger.fastq +8 -0
  154. data/test/data/fastq/illumina_full_range_as_solexa.fastq +8 -0
  155. data/test/data/fastq/illumina_full_range_original_illumina.fastq +8 -0
  156. data/test/data/fastq/longreads_as_illumina.fastq +40 -0
  157. data/test/data/fastq/longreads_as_sanger.fastq +40 -0
  158. data/test/data/fastq/longreads_as_solexa.fastq +40 -0
  159. data/test/data/fastq/longreads_original_sanger.fastq +120 -0
  160. data/test/data/fastq/misc_dna_as_illumina.fastq +16 -0
  161. data/test/data/fastq/misc_dna_as_sanger.fastq +16 -0
  162. data/test/data/fastq/misc_dna_as_solexa.fastq +16 -0
  163. data/test/data/fastq/misc_dna_original_sanger.fastq +16 -0
  164. data/test/data/fastq/misc_rna_as_illumina.fastq +16 -0
  165. data/test/data/fastq/misc_rna_as_sanger.fastq +16 -0
  166. data/test/data/fastq/misc_rna_as_solexa.fastq +16 -0
  167. data/test/data/fastq/misc_rna_original_sanger.fastq +16 -0
  168. data/test/data/fastq/sanger_full_range_as_illumina.fastq +8 -0
  169. data/test/data/fastq/sanger_full_range_as_sanger.fastq +8 -0
  170. data/test/data/fastq/sanger_full_range_as_solexa.fastq +8 -0
  171. data/test/data/fastq/sanger_full_range_original_sanger.fastq +8 -0
  172. data/test/data/fastq/solexa_full_range_as_illumina.fastq +8 -0
  173. data/test/data/fastq/solexa_full_range_as_sanger.fastq +8 -0
  174. data/test/data/fastq/solexa_full_range_as_solexa.fastq +8 -0
  175. data/test/data/fastq/solexa_full_range_original_solexa.fastq +8 -0
  176. data/test/data/fastq/wrapping_as_illumina.fastq +12 -0
  177. data/test/data/fastq/wrapping_as_sanger.fastq +12 -0
  178. data/test/data/fastq/wrapping_as_solexa.fastq +12 -0
  179. data/test/data/fastq/wrapping_original_sanger.fastq +24 -0
  180. data/test/data/meme/db +0 -0
  181. data/test/data/meme/mast +0 -0
  182. data/test/data/meme/mast.out +13 -0
  183. data/test/data/meme/meme.out +3 -0
  184. data/test/data/phyloxml/apaf.xml +666 -0
  185. data/test/data/phyloxml/bcl_2.xml +2097 -0
  186. data/test/data/phyloxml/made_up.xml +144 -0
  187. data/test/data/phyloxml/ncbi_taxonomy_mollusca_short.xml +65 -0
  188. data/test/data/phyloxml/phyloxml_examples.xml +415 -0
  189. data/test/data/sanger_chromatogram/test_chromatogram_abif.ab1 +0 -0
  190. data/test/data/sanger_chromatogram/test_chromatogram_scf_v2.scf +0 -0
  191. data/test/data/sanger_chromatogram/test_chromatogram_scf_v3.scf +0 -0
  192. data/test/functional/bio/appl/test_pts1.rb +7 -5
  193. data/test/functional/bio/io/test_ensembl.rb +4 -3
  194. data/test/functional/bio/io/test_pubmed.rb +9 -3
  195. data/test/functional/bio/io/test_soapwsdl.rb +5 -4
  196. data/test/functional/bio/io/test_togows.rb +5 -4
  197. data/test/functional/bio/sequence/test_output_embl.rb +6 -4
  198. data/test/functional/bio/test_command.rb +54 -5
  199. data/test/runner.rb +5 -3
  200. data/test/unit/bio/appl/bl2seq/test_report.rb +5 -4
  201. data/test/unit/bio/appl/blast/test_ncbioptions.rb +4 -2
  202. data/test/unit/bio/appl/blast/test_report.rb +5 -4
  203. data/test/unit/bio/appl/blast/test_rpsblast.rb +5 -4
  204. data/test/unit/bio/appl/gcg/test_msf.rb +5 -5
  205. data/test/unit/bio/appl/genscan/test_report.rb +8 -9
  206. data/test/unit/bio/appl/hmmer/test_report.rb +5 -4
  207. data/test/unit/bio/appl/iprscan/test_report.rb +6 -5
  208. data/test/unit/bio/appl/mafft/test_report.rb +6 -5
  209. data/test/unit/bio/appl/meme/mast/test_report.rb +46 -0
  210. data/test/unit/bio/appl/meme/test_mast.rb +103 -0
  211. data/test/unit/bio/appl/meme/test_motif.rb +38 -0
  212. data/test/unit/bio/appl/paml/codeml/test_rates.rb +5 -4
  213. data/test/unit/bio/appl/paml/codeml/test_report.rb +5 -4
  214. data/test/unit/bio/appl/paml/test_codeml.rb +5 -4
  215. data/test/unit/bio/appl/sim4/test_report.rb +5 -4
  216. data/test/unit/bio/appl/sosui/test_report.rb +6 -5
  217. data/test/unit/bio/appl/targetp/test_report.rb +5 -3
  218. data/test/unit/bio/appl/test_blast.rb +5 -4
  219. data/test/unit/bio/appl/test_fasta.rb +4 -2
  220. data/test/unit/bio/appl/test_pts1.rb +4 -2
  221. data/test/unit/bio/appl/tmhmm/test_report.rb +6 -5
  222. data/test/unit/bio/data/test_aa.rb +5 -3
  223. data/test/unit/bio/data/test_codontable.rb +5 -4
  224. data/test/unit/bio/data/test_na.rb +5 -3
  225. data/test/unit/bio/db/biosql/tc_biosql.rb +5 -1
  226. data/test/unit/bio/db/embl/test_common.rb +4 -2
  227. data/test/unit/bio/db/embl/test_embl.rb +6 -6
  228. data/test/unit/bio/db/embl/test_embl_rel89.rb +6 -6
  229. data/test/unit/bio/db/embl/test_embl_to_bioseq.rb +7 -8
  230. data/test/unit/bio/db/embl/test_sptr.rb +6 -8
  231. data/test/unit/bio/db/embl/test_uniprot.rb +6 -5
  232. data/test/unit/bio/db/fasta/test_format_qual.rb +346 -0
  233. data/test/unit/bio/db/kegg/test_compound.rb +146 -0
  234. data/test/unit/bio/db/kegg/test_drug.rb +194 -0
  235. data/test/unit/bio/db/kegg/test_enzyme.rb +241 -0
  236. data/test/unit/bio/db/kegg/test_genes.rb +32 -4
  237. data/test/unit/bio/db/kegg/test_glycan.rb +260 -0
  238. data/test/unit/bio/db/kegg/test_orthology.rb +50 -0
  239. data/test/unit/bio/db/kegg/test_reaction.rb +96 -0
  240. data/test/unit/bio/db/pdb/test_pdb.rb +4 -2
  241. data/test/unit/bio/db/sanger_chromatogram/test_abif.rb +76 -0
  242. data/test/unit/bio/db/sanger_chromatogram/test_scf.rb +98 -0
  243. data/test/unit/bio/db/test_aaindex.rb +6 -6
  244. data/test/unit/bio/db/test_fasta.rb +5 -46
  245. data/test/unit/bio/db/test_fastq.rb +829 -0
  246. data/test/unit/bio/db/test_gff.rb +4 -2
  247. data/test/unit/bio/db/test_lasergene.rb +7 -5
  248. data/test/unit/bio/db/test_medline.rb +4 -2
  249. data/test/unit/bio/db/test_newick.rb +6 -6
  250. data/test/unit/bio/db/test_nexus.rb +4 -2
  251. data/test/unit/bio/db/test_phyloxml.rb +769 -0
  252. data/test/unit/bio/db/test_phyloxml_writer.rb +328 -0
  253. data/test/unit/bio/db/test_prosite.rb +6 -5
  254. data/test/unit/bio/db/test_qual.rb +63 -0
  255. data/test/unit/bio/db/test_rebase.rb +5 -3
  256. data/test/unit/bio/db/test_soft.rb +7 -6
  257. data/test/unit/bio/io/flatfile/test_autodetection.rb +6 -7
  258. data/test/unit/bio/io/flatfile/test_buffer.rb +6 -5
  259. data/test/unit/bio/io/flatfile/test_splitter.rb +4 -4
  260. data/test/unit/bio/io/test_ddbjxml.rb +4 -3
  261. data/test/unit/bio/io/test_ensembl.rb +5 -3
  262. data/test/unit/bio/io/test_fastacmd.rb +4 -3
  263. data/test/unit/bio/io/test_flatfile.rb +6 -5
  264. data/test/unit/bio/io/test_soapwsdl.rb +4 -3
  265. data/test/unit/bio/io/test_togows.rb +4 -2
  266. data/test/unit/bio/sequence/test_aa.rb +5 -3
  267. data/test/unit/bio/sequence/test_common.rb +4 -2
  268. data/test/unit/bio/sequence/test_compat.rb +4 -2
  269. data/test/unit/bio/sequence/test_dblink.rb +5 -3
  270. data/test/unit/bio/sequence/test_na.rb +4 -2
  271. data/test/unit/bio/sequence/test_quality_score.rb +330 -0
  272. data/test/unit/bio/shell/plugin/test_seq.rb +5 -3
  273. data/test/unit/bio/test_alignment.rb +5 -3
  274. data/test/unit/bio/test_command.rb +4 -3
  275. data/test/unit/bio/test_db.rb +5 -3
  276. data/test/unit/bio/test_feature.rb +4 -2
  277. data/test/unit/bio/test_location.rb +4 -2
  278. data/test/unit/bio/test_map.rb +5 -3
  279. data/test/unit/bio/test_pathway.rb +4 -2
  280. data/test/unit/bio/test_reference.rb +4 -2
  281. data/test/unit/bio/test_sequence.rb +5 -3
  282. data/test/unit/bio/test_shell.rb +5 -3
  283. data/test/unit/bio/test_tree.rb +6 -6
  284. data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +4 -2
  285. data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +4 -2
  286. data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +4 -2
  287. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +4 -2
  288. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +4 -2
  289. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +4 -2
  290. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +4 -2
  291. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +4 -2
  292. data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +4 -2
  293. data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +4 -2
  294. data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +4 -2
  295. data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +4 -2
  296. data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +17 -13
  297. data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +17 -13
  298. data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +4 -2
  299. data/test/unit/bio/util/test_color_scheme.rb +5 -3
  300. data/test/unit/bio/util/test_contingency_table.rb +5 -3
  301. data/test/unit/bio/util/test_restriction_enzyme.rb +4 -2
  302. data/test/unit/bio/util/test_sirna.rb +6 -4
  303. metadata +147 -2
@@ -0,0 +1,1174 @@
1
+ #
2
+ # = bio/db/phyloxml_elements.rb - PhyloXML Element classes
3
+ #
4
+ # Copyright:: Copyright (C) 2009
5
+ # Diana Jaunzeikare <latvianlinuxgirl@gmail.com>
6
+ # License:: The Ruby License
7
+ #
8
+ # $Id:$
9
+ #
10
+ # == Description
11
+ #
12
+ # This file containts the classes to represent PhyloXML elements.
13
+ #
14
+ # == References
15
+ #
16
+ # * http://www.phyloxml.org
17
+ #
18
+ # * https://www.nescent.org/wg_phyloinformatics/PhyloSoC:PhyloXML_support_in_BioRuby
19
+
20
+ require 'bio/tree'
21
+ require 'bio/sequence'
22
+ require 'bio/reference'
23
+
24
+ # Autoload definition
25
+ module Bio
26
+ module PhyloXML
27
+ autoload :Parser, 'bio/db/phyloxml/phyloxml_parser'
28
+ autoload :Writer, 'bio/db/phyloxml/phyloxml_writer'
29
+ end
30
+ end
31
+
32
+ require 'libxml'
33
+
34
+ module Bio
35
+
36
+ # This is general Taxonomy class.
37
+
38
+ class Taxonomy
39
+ #pattern = [a-zA-Z0-9_]{2,10} Can refer to any code/abbreviation/mnemonic, such as Bsu for Bacillus subtilis.
40
+ attr_accessor :code
41
+
42
+ # String.
43
+ attr_accessor :scientific_name
44
+ # An array of strings
45
+ attr_accessor :common_names
46
+
47
+ # value comes from list: domain kingdom, subkingdom, branch, infrakingdom,
48
+ # superphylum, phylum, subphylum, infraphylum, microphylum, superdivision,
49
+ # division, subdivision, infradivision, superclass, class, subclass,
50
+ # infraclass, superlegion, legion, sublegion, infralegion, supercohort,
51
+ # cohort, subcohort, infracohort, superorder, order, suborder,
52
+ # superfamily, family, subfamily, supertribe, tribe, subtribe, infratribe,
53
+ # genus, subgenus, superspecies, species, subspecies, variety, subvariety,
54
+ # form, subform, cultivar, unknown, other
55
+ attr_accessor :rank
56
+
57
+ # is used to keep the authority, such as 'J. G. Cooper, 1863', associated with the 'scientific_name'.
58
+ attr_accessor :authority
59
+
60
+ # An array of strings. Holds synonyms for scientific names or common names.
61
+ attr_accessor :synonyms
62
+
63
+
64
+ def initialize
65
+ @common_names = []
66
+ @synonyms = []
67
+ end
68
+ end
69
+
70
+ module PhyloXML
71
+
72
+
73
+ # Taxonomy class
74
+ class Taxonomy < Bio::Taxonomy
75
+ # String. Unique identifier of a taxon.
76
+ attr_accessor :taxonomy_id
77
+ #Used to link other elements to a taxonomy (on the xml-level)
78
+ attr_accessor :id_source
79
+ # Uri object
80
+ attr_accessor :uri
81
+
82
+ # Array of Other objects. Used to save additional information from other than
83
+ # PhyloXML namspace.
84
+ attr_accessor :other
85
+
86
+ def initialize
87
+ super
88
+ @other = []
89
+ end
90
+
91
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
92
+ def to_xml
93
+ taxonomy = LibXML::XML::Node.new('taxonomy')
94
+ taxonomy["type"] = @type if @type != nil
95
+ taxonomy["id_source"] = @id_source if @id_source != nil
96
+
97
+ PhyloXML::Writer.generate_xml(taxonomy, self, [[:complex, 'id', @taxonomy_id],
98
+ [:pattern, 'code', @code, Regexp.new("^[a-zA-Z0-9_]{2,10}$")],
99
+ [:simple, 'scientific_name', @scientific_name],
100
+ [:simple, 'authority', @authority],
101
+ [:simplearr, 'common_name', @common_names],
102
+ [:simplearr, 'synonym', @synonyms],
103
+ [:simple, 'rank', @rank],
104
+ [:complex, 'uri',@uri]])
105
+ #@todo anything else
106
+
107
+
108
+ return taxonomy
109
+ end
110
+
111
+ end
112
+
113
+ # Object to hold one phylogeny element (and its subelements.) Extended version of Bio::Tree.
114
+ class Tree < Bio::Tree
115
+ # String. Name of tree (name subelement of phylogeny element).
116
+ attr_accessor :name
117
+
118
+ # Id object.
119
+ attr_accessor :phylogeny_id
120
+
121
+ # String. Description of tree.
122
+ attr_accessor :description
123
+
124
+ # Boolean. Can be used to indicate that the phylogeny is not allowed to be rooted differently (i.e. because it is associated with root dependent data, such as gene duplications).
125
+ attr_accessor :rerootable
126
+
127
+ # Boolean. Required element.
128
+ attr_accessor :rooted
129
+
130
+ # Array of Property object. Allows for typed and referenced properties from external resources to be attached.
131
+ attr_accessor :properties
132
+
133
+ # CladeRelation object. This is used to express a typed relationship between two clades. For example it could be used to describe multiple parents of a clade.
134
+ attr_accessor :clade_relations
135
+
136
+ # SequenceRelation object. This is used to express a typed relationship between two sequences. For example it could be used to describe an orthology.
137
+ attr_accessor :sequence_relations
138
+
139
+ # Array of confidence object
140
+ attr_accessor :confidences
141
+
142
+ # String.
143
+ attr_accessor :branch_length_unit
144
+
145
+ # String. Indicate the type of phylogeny (i.e. 'gene tree').
146
+ attr_accessor :type
147
+
148
+ # String. Date
149
+ attr_accessor :date
150
+
151
+ # Array of Other objects. Used to save additional information from other than
152
+ # PhyloXML namspace.
153
+ attr_accessor :other
154
+
155
+ def initialize
156
+ super
157
+ @sequence_relations = []
158
+ @clade_relations = []
159
+ @confidences = []
160
+ @properties = []
161
+ @other = []
162
+ end
163
+
164
+ end
165
+
166
+
167
+ # == Description
168
+ # Class to hold clade element of phyloXML.
169
+ class Node
170
+
171
+ # Events at the root node of a clade (e.g. one gene duplication).
172
+ attr_accessor :events
173
+
174
+ # String. Used to link other elements to a clade (node) (on the xml-level).
175
+ attr_accessor :id_source
176
+
177
+ # String. Name of the node.
178
+ attr_accessor :name
179
+
180
+ # Float. Branch width for this node (including parent branch). Applies for the whole clade unless overwritten in sub-clades.
181
+ attr_reader :width
182
+
183
+ def width=(str)
184
+ @width = str.to_f
185
+ end
186
+
187
+ # Array of Taxonomy objects. Describes taxonomic information for a clade.
188
+ attr_accessor :taxonomies
189
+
190
+ # Array of Confidence objects. Indicates the support for a clade/parent branch.
191
+ attr_accessor :confidences
192
+
193
+ # BranchColor object. Apply for the whole clade unless overwritten in sub-clade.
194
+ attr_accessor :color
195
+
196
+ # Id object
197
+ attr_accessor :node_id
198
+
199
+ # Array of Sequence objects. Represents a molecular sequence (Protein, DNA, RNA) associated with a node.
200
+ attr_accessor :sequences
201
+
202
+ # BinaryCharacters object. The names and/or counts of binary characters present, gained, and lost at the root of a clade.
203
+ attr_accessor :binary_characters
204
+
205
+ # Array of Distribution objects. The geographic distribution of the items of a clade (species, sequences), intended for phylogeographic applications.
206
+ attr_accessor :distributions
207
+
208
+ # Date object. A date associated with a clade/node.
209
+ attr_accessor :date
210
+
211
+ #Array of Reference objects. A literature reference for a clade.
212
+ attr_accessor :references
213
+
214
+ #An array of Property objects, for example depth for sea animals.
215
+ attr_accessor :properties
216
+
217
+ # Array of Other objects. Used to save additional information from other than
218
+ # PhyloXML namspace.
219
+ attr_accessor :other
220
+
221
+ def initialize
222
+ @confidences = []
223
+ @sequences = []
224
+ @taxonomies = []
225
+ @distributions = []
226
+ @references = []
227
+ @properties = []
228
+ @other = []
229
+ end
230
+
231
+
232
+ # Converts to a Bio::Tree::Node object. If it contains several taxonomies
233
+ # Bio::Tree::Node#scientific name will get the scientific name of the first
234
+ # taxonomy.
235
+ #
236
+ # If there are several confidence values, the first with bootstrap type will
237
+ # be returned as Bio::Tree::Node#bootstrap
238
+ #
239
+ # tree = phyloxmlparser.next_tree
240
+ #
241
+ # node = tree.get_node_by_name("A").to_biotreenode
242
+ #
243
+ # ---
244
+ # *Returns*:: Bio::Tree::Node
245
+ def to_biotreenode
246
+ node = Bio::Tree::Node.new
247
+ node.name = @name
248
+ node.scientific_name = @taxonomies[0].scientific_name if not @taxonomies.empty?
249
+ #@todo what if there are more?
250
+ node.taxonomy_id = @taxonomies[0].taxononmy_id if @taxonomies[0] != nil
251
+
252
+ if not @confidences.empty?
253
+ @confidences.each do |confidence|
254
+ if confidence.type == "bootstrap"
255
+ node.bootstrap = confidence.value
256
+ break
257
+ end
258
+ end
259
+ end
260
+ return node
261
+ end
262
+
263
+ # Extracts the relevant information from node (specifically taxonomy and
264
+ # sequence) to create Bio::Sequence object. Node can have several sequences,
265
+ # so parameter to this method is to specify which sequence to extract.
266
+ #
267
+ # ---
268
+ # *Returns*:: Bio::Sequence
269
+ def extract_biosequence(seq_i=0)
270
+
271
+ seq = @sequences[seq_i].to_biosequence
272
+ seq.classification = []
273
+ @taxonomies.each do |t|
274
+ seq.classification << t.scientific_name
275
+ if t.rank == "species"
276
+ seq.species = t.scientific_name
277
+ end
278
+ end
279
+
280
+ #seq.division => .. http://www.ebi.ac.uk/embl/Documentation/User_manual/usrman.html#3_2
281
+ # It doesn't seem there is anything in PhyloXML corresponding to this.
282
+
283
+ return seq
284
+ end
285
+
286
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
287
+ def to_xml(branch_length, write_branch_length_as_subelement)
288
+ clade = LibXML::XML::Node.new('clade')
289
+
290
+ PhyloXML::Writer.generate_xml(clade, self, [[:simple, 'name', @name]])
291
+
292
+ if branch_length != nil
293
+ if write_branch_length_as_subelement
294
+ clade << LibXML::XML::Node.new('branch_length', branch_length.to_s)
295
+ else
296
+ clade["branch_length"] = branch_length.to_s
297
+ end
298
+ end
299
+
300
+ #generate all elements, except clade
301
+ PhyloXML::Writer.generate_xml(clade, self, [
302
+ [:attr, "id_source"],
303
+ [:objarr, 'confidence', 'confidences'],
304
+ [:simple, 'width', @width],
305
+ [:complex, 'branch_color', @branch_color],
306
+ [:simple, 'node_id', @node_id],
307
+ [:objarr, 'taxonomy', 'taxonomies'],
308
+ [:objarr, 'sequence', 'sequences'],
309
+ [:complex, 'events', @events],
310
+ [:complex, 'binary_characters', @binary_characters],
311
+ [:objarr, 'distribution', 'distributions'],
312
+ [:complex, 'date', @date],
313
+ [:objarr, 'reference', 'references'],
314
+ [:objarr, 'propery', 'properties']])
315
+
316
+ return clade
317
+ end
318
+
319
+ end #Node
320
+
321
+ # == Description
322
+ # Events at the root node of a clade (e.g. one gene duplication).
323
+ class Events
324
+ #value comes from list: transfer, fusion, speciation_or_duplication, other, mixed, unassigned
325
+ attr_accessor :type
326
+
327
+ # Integer
328
+ attr_reader :duplications, :speciations, :losses
329
+
330
+ # Confidence object
331
+ attr_reader :confidence
332
+
333
+ def confidence=(type, value)
334
+ @confidence = Confidence.new(type, value)
335
+ end
336
+
337
+ def confidence=(conf)
338
+ @confidence = conf
339
+ end
340
+
341
+ def duplications=(str)
342
+ @duplications = str.to_i
343
+ end
344
+
345
+ def losses=(str)
346
+ @losses = str.to_i
347
+ end
348
+
349
+ def speciations=(str)
350
+ @speciations=str.to_i
351
+ end
352
+
353
+ def type=(str)
354
+ @type = str
355
+ #@todo add unit test for this
356
+ if not ['transfer','fusion','speciation_or_duplication','other','mixed', 'unassigned'].include?(str)
357
+ raise "Warning #{str} is not one of the allowed values"
358
+ end
359
+ end
360
+
361
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
362
+ def to_xml
363
+ #@todo add unit test
364
+ events = LibXML::XML::Node.new('events')
365
+ PhyloXML::Writer.generate_xml(events, self, [
366
+ [:simple, 'type', @type],
367
+ [:simple, 'duplications', @duplications],
368
+ [:simple, 'speciations', @speciations],
369
+ [:simple, 'losses', @losses],
370
+ [:complex, 'confidence', @confidence]])
371
+ return events
372
+ end
373
+
374
+ end
375
+
376
+ # A general purpose confidence element. For example this can be used to express
377
+ # the bootstrap support value of a clade (in which case the 'type' attribute
378
+ # is 'bootstrap').
379
+ class Confidence
380
+ # String. The type of confidence measure, for example, bootstrap.
381
+ attr_accessor :type
382
+ # Float. The value of confidence measure.
383
+ attr_accessor :value
384
+
385
+ def initialize(type, value)
386
+ @type = type
387
+ @value = value.to_f
388
+ end
389
+
390
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
391
+ def to_xml
392
+ if @type == nil
393
+ raise "Type is a required attribute for confidence."
394
+ else
395
+ confidence = LibXML::XML::Node.new('confidence', @value.to_s)
396
+ confidence["type"] = @type
397
+ return confidence
398
+ end
399
+ end
400
+ end
401
+
402
+ # == Description
403
+ #
404
+ # The geographic distribution of the items of a clade (species, sequences),
405
+ # intended for phylogeographic applications.
406
+ class Distribution
407
+ # String. Free text description of location.
408
+ attr_accessor :desc
409
+ # Array of Point objects. Holds coordinates of the location.
410
+ attr_accessor :points
411
+ # Array of Polygon objects.
412
+ attr_accessor :polygons
413
+
414
+ def initialize
415
+ @points = []
416
+ @polygons = []
417
+ end
418
+
419
+
420
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
421
+ def to_xml
422
+ distr = LibXML::XML::Node.new('distribution')
423
+ PhyloXML::Writer.generate_xml(distr, self, [
424
+ [:simple, 'desc', @desc],
425
+ [:objarr, 'point', 'points'],
426
+ [:objarr, 'polygon', 'polygons']])
427
+ return distr
428
+ end
429
+
430
+ end #Distribution class
431
+
432
+
433
+ # == Description
434
+ #
435
+ # The coordinates of a point with an optional altitude. Required attribute
436
+ # 'geodetic_datum' is used to indicate the geodetic datum (also called
437
+ # 'map datum'), for example Google's KML uses 'WGS84'.
438
+ class Point
439
+ # Float. Latitude
440
+ attr_accessor :lat
441
+
442
+ # Float. Longitute
443
+ attr_accessor :long
444
+
445
+ # Float. Altitude
446
+ attr_accessor :alt
447
+
448
+ # String. Altitude unit.
449
+ attr_accessor :alt_unit
450
+
451
+ # Geodedic datum / map datum
452
+ attr_accessor :geodetic_datum
453
+
454
+ def lat=(str)
455
+ @lat = str.to_f unless str.nil?
456
+ end
457
+
458
+ def long=(str)
459
+ @long = str.to_f unless str.nil?
460
+ end
461
+
462
+ def alt=(str)
463
+ @alt = str.to_f unless str.nil?
464
+ end
465
+
466
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
467
+ def to_xml
468
+ raise "Geodedic datum is a required attribute of Point element." if @geodetic_datum.nil?
469
+
470
+ p = LibXML::XML::Node.new('point')
471
+ p["geodetic_datum"] = @geodetic_datum
472
+ p["alt_unit"] = @alt_unit if @alt_unit != nil
473
+ PhyloXML::Writer.generate_xml(p, self, [
474
+ [:simple, 'lat', @lat],
475
+ [:simple, 'long', @long],
476
+ [:simple, 'alt', @alt]])
477
+ return p
478
+ #@todo check if characters are correctly generated, like Zuric
479
+ end
480
+
481
+ end
482
+
483
+
484
+ # == Description
485
+ #
486
+ # A polygon defined by a list of Points objects.
487
+ class Polygon
488
+ # Array of Point objects.
489
+ attr_accessor :points
490
+
491
+ def initialize
492
+ @points = []
493
+ end
494
+
495
+
496
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
497
+ def to_xml
498
+ if @points.length > 2
499
+ pol = LibXML::XML::Node.new('polygon')
500
+ @points.each do |p|
501
+ pol << p.to_xml
502
+ end
503
+ return pol
504
+ end
505
+ end
506
+
507
+
508
+ end
509
+
510
+ # == Description
511
+ # Element Sequence is used to represent a molecular sequence (Protein, DNA,
512
+ # RNA) associated with a node.
513
+ class Sequence
514
+ # Type of sequence (rna, dna, protein)
515
+ attr_accessor :type
516
+
517
+ # Full name (e.g. muscle Actin )
518
+ attr_accessor :name
519
+
520
+ # String. Used to link with other elements.
521
+ attr_accessor :id_source
522
+
523
+ # String. One intended use for 'id_ref' is to link a sequence to a taxonomy
524
+ # (via the taxonomy's 'id_source') in the case of multiple sequences and taxonomies per node.
525
+ attr_accessor :id_ref
526
+
527
+ # short (maximal ten characters) symbol of the sequence (e.g. 'ACTM')
528
+ attr_accessor :symbol
529
+ # Accession object. Holds source and identifier for the sequence.
530
+ attr_accessor :accession
531
+ # String. Location of a sequence on a genome/chromosome
532
+ attr_accessor :location
533
+ # String. The actual sequence is stored here.
534
+ attr_reader :mol_seq
535
+
536
+ # Boolean. used to indicated that this molecular sequence is aligned with
537
+ # all other sequences in the same phylogeny for which 'is aligned' is true
538
+ # as well (which, in most cases, means that gaps were introduced, and that
539
+ # all sequences for which 'is aligned' is true must have the same length)
540
+ attr_reader :is_aligned
541
+
542
+ # Uri object
543
+ attr_accessor :uri
544
+ # Array of Annotation objects. Annotations of molecular sequence.
545
+ attr_accessor :annotations
546
+ # DomainArchitecture object. Describes domain architecture of a protein.
547
+ attr_accessor :domain_architecture
548
+
549
+ # Array of Other objects. Used to save additional information from other than
550
+ # PhyloXML namspace.
551
+ attr_accessor :other
552
+
553
+ def initialize
554
+ @annotations = []
555
+ @other = []
556
+ end
557
+
558
+ def is_aligned=(str)
559
+ if str=='true'
560
+ @is_aligned=true
561
+ elsif str=='false'
562
+ @is_aligned = false
563
+ else
564
+ @is_aligned = nil
565
+ end
566
+ end
567
+
568
+ def is_aligned?
569
+ @is_aligned
570
+ end
571
+
572
+ def mol_seq=(str)
573
+ if str =~ /^[a-zA-Z\.\-\?\*_]+$/
574
+ @mol_seq = str
575
+ else
576
+ raise "mol_seq element of Sequence does not follow the pattern."
577
+ end
578
+ end
579
+
580
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
581
+ def to_xml
582
+
583
+ seq = LibXML::XML::Node.new('sequence')
584
+ if @type != nil
585
+ if ["dna", "rna", "protein"].include?(@type)
586
+ seq["type"] = @type
587
+ else
588
+ raise "Type attribute of Sequence has to be one of dna, rna or a."
589
+ end
590
+ end
591
+
592
+ PhyloXML::Writer.generate_xml(seq, self, [
593
+ [:attr, 'id_source'],
594
+ [:attr, 'id_ref'],
595
+ [:pattern, 'symbol', @symbol, Regexp.new("^\\S{1,10}$")],
596
+ [:complex, 'accession', @accession],
597
+ [:simple, 'name', @name],
598
+ [:simple, 'location', @location]])
599
+
600
+ if @mol_seq != nil
601
+ molseq = LibXML::XML::Node.new('mol_seq', @mol_seq)
602
+ molseq["is_aligned"] = @is_aligned.to_s if @is_aligned != nil
603
+ seq << molseq
604
+ end
605
+
606
+ PhyloXML::Writer.generate_xml(seq, self, [
607
+ #[:pattern, 'mol_seq', @mol_seq, Regexp.new("^[a-zA-Z\.\-\?\*_]+$")],
608
+ [:complex, 'uri', @uri],
609
+ [:objarr, 'annotation', 'annotations'],
610
+ [:complex, 'domain_architecture', @domain_architecture]])
611
+ #@todo test domain_architecture
612
+ #any
613
+ return seq
614
+ end
615
+
616
+ # converts Bio::PhyloXML:Sequence to Bio::Sequence object.
617
+ # ---
618
+ # *Returns*:: Bio::Sequence
619
+ def to_biosequence
620
+ #type is not a required attribute in phyloxml (nor any other Sequence
621
+ #element) it might not hold any value, so we will not check what type it is.
622
+ seq = Bio::Sequence.auto(@mol_seq)
623
+
624
+ seq.id_namespace = @accession.source
625
+ seq.entry_id = @accession.value
626
+ # seq.primary_accession = @accession.value could be this
627
+ seq.definition = @name
628
+ #seq.comments = @name //this one?
629
+ if @uri != nil
630
+ h = {'url' => @uri.uri,
631
+ 'title' => @uri.desc }
632
+ ref = Bio::Reference.new(h)
633
+ seq.references << ref
634
+ end
635
+ seq.molecule_type = 'RNA' if @type == 'rna'
636
+ seq.molecule_type = 'DNA' if @type == 'dna'
637
+
638
+ #@todo deal with the properties. There might be properties which look
639
+ #like bio sequence attributes or features
640
+ return seq
641
+ end
642
+
643
+ end
644
+
645
+ # == Description
646
+ # Element Accession is used to capture the local part in a sequence
647
+ # identifier.
648
+ class Accession
649
+ #String. Source of the accession id. Example: "UniProtKB"
650
+ attr_accessor :source
651
+
652
+ #String. Value of the accession id. Example: "P17304"
653
+ attr_accessor :value
654
+
655
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
656
+ def to_xml
657
+ raise "Source attribute is required for Accession object." if @source == nil
658
+ accession = LibXML::XML::Node.new('accession', @value)
659
+ accession['source'] = @source
660
+ return accession
661
+ end
662
+
663
+ end
664
+
665
+ # A uniform resource identifier. In general, this is expected to be an URL
666
+ # (for example, to link to an image on a website, in which case the 'type'
667
+ # attribute might be 'image' and 'desc' might be 'image of a California
668
+ # sea hare')
669
+ class Uri
670
+ # String. Description of the uri. For example, image of a California sea hare'
671
+ attr_accessor :desc
672
+ # String. For example, image.
673
+ attr_accessor :type
674
+ # String. URL of the resource.
675
+ attr_accessor :uri
676
+
677
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
678
+ def to_xml
679
+ if @uri != nil
680
+ xml_node = LibXML::XML::Node.new('uri', @uri)
681
+ Writer.generate_xml(xml_node, self, [
682
+ [:attr, 'desc'],
683
+ [:attr, 'type']])
684
+ return xml_node
685
+ end
686
+ end
687
+ end
688
+
689
+ # == Description
690
+ #
691
+ # The annotation of a molecular sequence. It is recommended to annotate by
692
+ # using the optional 'ref' attribute (some examples of acceptable values
693
+ # for the ref attribute: 'GO:0008270', 'KEGG:Tetrachloroethene degradation',
694
+ # 'EC:1.1.1.1').
695
+ class Annotation
696
+ # String. For example, 'GO:0008270', 'KEGG:Tetrachloroethene degradation',
697
+ # 'EC:1.1.1.1'
698
+ attr_accessor :ref
699
+ # String
700
+ attr_accessor :source
701
+ # String. evidence for a annotation as free text (e.g. 'experimental')
702
+ attr_accessor :evidence
703
+ # String. Type of the annotation.
704
+ attr_accessor :type
705
+ # String. Free text description.
706
+ attr_accessor :desc
707
+ # Confidence object. Type and value of support for a annotation.
708
+ attr_accessor :confidence
709
+ # Array of Property objects. Allows for further, typed and referenced
710
+ # annotations from external resources
711
+ attr_accessor :properties
712
+ # Uri object.
713
+ attr_accessor :uri
714
+
715
+ def initialize
716
+ #@todo add unit test for this, since didn't break anything when changed from property to properties
717
+ @properties = []
718
+ end
719
+
720
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
721
+ def to_xml
722
+ annot = LibXML::XML::Node.new('annotation')
723
+ annot["ref"] = @ref if @ref != nil
724
+ PhyloXML::Writer.generate_xml(annot, self, [[:simple, 'desc', @desc],
725
+ [:complex, 'confidence', @confidence],
726
+ [:objarr, 'property', 'properties'],
727
+ [:complex, 'uri', @uri]])
728
+ return annot
729
+ end
730
+ end
731
+
732
+ class Id
733
+ # The provider of Id, for example, NCBI.
734
+ attr_accessor :provider
735
+ # The value of Id.
736
+ attr_accessor :value
737
+
738
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
739
+ def to_xml
740
+ xml_node = LibXML::XML::Node.new('id', @value)
741
+ xml_node["provider"] = @provider if @provider != nil
742
+ return xml_node
743
+ end
744
+ end
745
+
746
+ # == Description
747
+ # This indicates the color of a node when rendered (the color applies
748
+ # to the whole node and its children unless overwritten by the
749
+ # color(s) of sub clades).
750
+ class BranchColor
751
+ #Integer
752
+ attr_reader :red, :green, :blue
753
+
754
+ def red=(str)
755
+ @red = str.to_i
756
+ end
757
+
758
+ def green=(str)
759
+ @green = str.to_i
760
+ end
761
+
762
+ def blue=(str)
763
+ @blue = str.to_i
764
+ end
765
+
766
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
767
+ def to_xml
768
+ #@todo add unit test
769
+ if @red == nil
770
+ raise "Subelement red of BranchColor element should not be nil"
771
+ elsif @green == nil
772
+ raise "Subelement green of BranchColor element should not be nil"
773
+ elsif @blue == nil
774
+ raise "Subelement blue of BranchColor element should not be nil"
775
+ end
776
+
777
+ c = LibXML::XML::Node.new('branch_color')
778
+ PhyloXML::Writer.generate_xml(c, self, [
779
+ [:simple, 'red', @red],
780
+ [:simple, 'green', @green],
781
+ [:simple, 'blue', @blue]])
782
+ return c
783
+ end
784
+
785
+ end
786
+
787
+ # == Description
788
+ # A date associated with a clade/node. Its value can be numerical by
789
+ # using the 'value' element and/or free text with the 'desc' element'
790
+ # (e.g. 'Silurian'). If a numerical value is used, it is recommended to
791
+ # employ the 'unit' attribute to indicate the type of the numerical
792
+ # value (e.g. 'mya' for 'million years ago').
793
+ class Date
794
+ # String. Units in which value is stored.
795
+ attr_accessor :unit
796
+
797
+ # Free text description of the date.
798
+ attr_accessor :desc
799
+
800
+ # Integer. Minimum and maximum of the value.
801
+ attr_reader :minimum, :maximum
802
+
803
+ # Integer. Value of the date.
804
+ attr_reader :value
805
+
806
+ def minimum=(str)
807
+ @minimum = str.to_i
808
+ end
809
+
810
+ def maximum=(str)
811
+ @maximum = str.to_i
812
+ end
813
+
814
+ def value= (str)
815
+ @value = str.to_i
816
+ end
817
+
818
+ # Returns value + unit, for exampe "7 mya"
819
+ def to_s
820
+ return "#{value} #{unit}"
821
+ end
822
+
823
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
824
+ def to_xml
825
+ date = LibXML::XML::Node.new('date')
826
+ PhyloXML::Writer.generate_xml(date, self, [
827
+ [:attr, 'unit'],
828
+ [:simple, 'desc', @desc],
829
+ [:simple, 'value', @value],
830
+ [:simple, 'minimum', @minimum],
831
+ [:simple, 'maximum', @maximum]])
832
+ return date
833
+ end
834
+
835
+ end
836
+
837
+ # == Description
838
+ # This is used describe the domain architecture of a protein. Attribute
839
+ # 'length' is the total length of the protein
840
+ class DomainArchitecture
841
+ # Integer. Total length of the protein
842
+ attr_accessor :length
843
+
844
+ # Array of ProteinDomain objects.
845
+ attr_reader :domains
846
+
847
+ def length=(str)
848
+ @length = str.to_i
849
+ end
850
+
851
+ def initialize
852
+ @domains = []
853
+ end
854
+
855
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
856
+ def to_xml
857
+ xml_node = LibXML::XML::Node.new('domain_architecture')
858
+ PhyloXML::Writer.generate_xml(xml_node, self,[
859
+ [:attr, 'length'],
860
+ [:objarr, 'domain', 'domains']])
861
+ return xml_node
862
+ end
863
+ end
864
+
865
+
866
+ # == Description
867
+ # To represent an individual domain in a domain architecture. The
868
+ # name/unique identifier is described via the 'id' attribute.
869
+ class ProteinDomain
870
+ #Float, for example to store E-values 4.7E-14
871
+ attr_accessor :confidence
872
+
873
+ # String
874
+ attr_accessor :id, :value
875
+
876
+ # Integer. Beginning of the domain.
877
+ attr_reader :from
878
+
879
+ # Integer. End of the domain.
880
+ attr_reader :to
881
+
882
+ def from=(str)
883
+ @from = str.to_i
884
+ end
885
+
886
+ def to=(str)
887
+ @to = str.to_i
888
+ end
889
+
890
+ def confidence=(str)
891
+ @confidence = str.to_f
892
+ end
893
+
894
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
895
+ def to_xml
896
+ if @from == nil
897
+ raise "from attribute of ProteinDomain class is required."
898
+ elsif @to == nil
899
+ raise "to attribute of ProteinDomain class is required."
900
+ else
901
+ xml_node = LibXML::XML::Node.new('domain', @value)
902
+ xml_node["from"] = @from.to_s
903
+ xml_node["to"] = @to.to_s
904
+ xml_node["id"] = @id if @id != nil
905
+ xml_node["confidence"] = @confidence.to_s
906
+
907
+ return xml_node
908
+ end
909
+
910
+ end
911
+
912
+ end
913
+
914
+
915
+ #Property allows for typed and referenced properties from external resources
916
+ #to be attached to 'Phylogeny', 'Clade', and 'Annotation'. The value of a
917
+ #property is its mixed (free text) content. Attribute 'datatype' indicates
918
+ #the type of a property and is limited to xsd-datatypes (e.g. 'xsd:string',
919
+ #'xsd:boolean', 'xsd:integer', 'xsd:decimal', 'xsd:float', 'xsd:double',
920
+ #'xsd:date', 'xsd:anyURI'). Attribute 'applies_to' indicates the item to
921
+ #which a property applies to (e.g. 'node' for the parent node of a clade,
922
+ #'parent_branch' for the parent branch of a clade). Attribute 'id_ref' allows
923
+ #to attached a property specifically to one element (on the xml-level).
924
+ #Optional attribute 'unit' is used to indicate the unit of the property.
925
+ #An example: <property datatype="xsd:integer" ref="NOAA:depth" applies_to="clade" unit="METRIC:m"> 200 </property>
926
+ class Property
927
+ # String
928
+ attr_accessor :ref, :unit, :id_ref, :value
929
+
930
+ # String
931
+ attr_reader :datatype, :applies_to
932
+
933
+ def datatype=(str)
934
+ #@todo add unit test or maybe remove, if assume that xml is valid.
935
+ unless ['xsd:string','xsd:boolean','xsd:decimal','xsd:float','xsd:double',
936
+ 'xsd:duration','xsd:dateTime','xsd:time','xsd:date','xsd:gYearMonth',
937
+ 'xsd:gYear','xsd:gMonthDay','xsd:gDay','xsd:gMonth','xsd:hexBinary',
938
+ 'xsd:base64Binary','xsd:anyURI','xsd:normalizedString','xsd:token',
939
+ 'xsd:integer','xsd:nonPositiveInteger','xsd:negativeInteger',
940
+ 'xsd:long','xsd:int','xsd:short','xsd:byte','xsd:nonNegativeInteger',
941
+ 'xsd:unsignedLong','xsd:unsignedInt','xsd:unsignedShort',
942
+ 'xsd:unsignedByte','xsd:positiveInteger'].include?(str)
943
+ raise "Warning: #{str} is not in the list of allowed values."
944
+ end
945
+ @datatype = str
946
+ end
947
+
948
+ def applies_to=(str)
949
+ unless ['phylogeny','clade','node','annotation','parent_branch','other'].include?(str)
950
+ puts "Warning: #{str} is not in the list of allowed values."
951
+ end
952
+ @applies_to = str
953
+ end
954
+
955
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
956
+ def to_xml
957
+ #@todo write unit test for this
958
+ raise "ref is an required element of property" if @ref.nil?
959
+ raise "datatype is an required element of property" if @datatype.nil?
960
+ raise "applies_to is an required element of property" if @applies_to.nil?
961
+
962
+ property = LibXML::XML::Node.new('property')
963
+ Writer.generate_xml(property, self, [
964
+ [:attr, 'ref'],
965
+ [:attr, 'unit'],
966
+ [:attr, 'datatype'],
967
+ [:attr, 'applies_to'],
968
+ [:attr, 'id_ref']])
969
+
970
+ property << @value if @value != nil
971
+ return property
972
+ end
973
+ end
974
+
975
+ # == Description
976
+ # A literature reference for a clade. It is recommended to use the 'doi'
977
+ # attribute instead of the free text 'desc' element whenever possible.
978
+ class Reference
979
+ # String. Digital Object Identifier.
980
+ attr_accessor :doi
981
+
982
+ # String. Free text description.
983
+ attr_accessor :desc
984
+
985
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
986
+ def to_xml
987
+ ref = LibXML::XML::Node.new('reference')
988
+ Writer.generate_xml(ref, self, [
989
+ [:attr, 'doi'],
990
+ [:simple, 'desc', @desc]])
991
+ return ref
992
+ end
993
+
994
+ end
995
+
996
+ # == Description
997
+ #
998
+ # This is used to express a typed relationship between two clades.
999
+ # For example it could be used to describe multiple parents of a clade.
1000
+ class CladeRelation
1001
+ # Float
1002
+ attr_accessor :distance
1003
+ # String. Id of the referenced parents of a clade.
1004
+ attr_accessor :id_ref_0, :id_ref_1
1005
+ # String
1006
+ attr_accessor :type
1007
+ # Confidence object
1008
+ attr_accessor :confidence
1009
+
1010
+ def distance=(str)
1011
+ @distance = str.to_f
1012
+ end
1013
+
1014
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
1015
+ def to_xml
1016
+ if @id_ref_0 == nil or @id_ref_1 == nil or @type == nil
1017
+ raise "Attributes id_ref_0, id_ref_1, type are required elements by SequenceRelation element."
1018
+ else
1019
+ cr = LibXML::XML::Node.new('clade_relation')
1020
+ Writer.generate_xml(cr, self, [
1021
+ [:attr, 'id_ref_0'],
1022
+ [:attr, 'id_ref_1'],
1023
+ [:attr, 'distance'],
1024
+ [:attr, 'type'],
1025
+ [:complex, 'confidence', @confidnece]])
1026
+
1027
+ return cr
1028
+ end
1029
+ end
1030
+
1031
+ end
1032
+
1033
+
1034
+ # == Description
1035
+ # The names and/or counts of binary characters present, gained, and
1036
+ # lost at the root of a clade.
1037
+ class BinaryCharacters
1038
+ attr_accessor :bc_type, :gained, :lost, :present, :absent
1039
+ attr_reader :gained_count, :lost_count, :present_count, :absent_count
1040
+
1041
+ def gained_count=(str)
1042
+ @gained_count = str.to_i
1043
+ end
1044
+
1045
+ def lost_count=(str)
1046
+ @lost_count = str.to_i
1047
+ end
1048
+
1049
+ def present_count=(str)
1050
+ @present_count = str.to_i
1051
+ end
1052
+
1053
+ def absent_count=(str)
1054
+ @absent_count = str.to_i
1055
+ end
1056
+
1057
+ def initialize
1058
+ @gained = []
1059
+ @lost = []
1060
+ @present = []
1061
+ @absent = []
1062
+ end
1063
+
1064
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
1065
+ def to_xml
1066
+ bc = LibXML::XML::Node.new('binary_characters')
1067
+ bc['type'] = @bc_type
1068
+ PhyloXML::Writer.generate_xml(bc, self, [
1069
+ [:attr, 'gained_count'],
1070
+ [:attr, 'lost_count'],
1071
+ [:attr, 'present_count'],
1072
+ [:attr, 'absent_count']])
1073
+
1074
+ if not @gained.empty?
1075
+ gained_xml = LibXML::XML::Node.new('gained')
1076
+ PhyloXML::Writer.generate_xml(gained_xml, self, [[:simplearr, 'bc', @gained]])
1077
+ bc << gained_xml
1078
+ end
1079
+
1080
+ if not @lost.empty?
1081
+ lost_xml = LibXML::XML::Node.new('lost')
1082
+ PhyloXML::Writer.generate_xml(lost_xml, self, [[:simplearr, 'bc', @lost]])
1083
+ bc << lost_xml
1084
+ end
1085
+
1086
+ if not @present.empty?
1087
+ present_xml = LibXML::XML::Node.new('present')
1088
+ PhyloXML::Writer.generate_xml(present_xml, self, [[:simplearr, 'bc', @present]])
1089
+ bc << present_xml
1090
+ end
1091
+
1092
+ if not @absent.empty?
1093
+ absent_xml = LibXML::XML::Node.new('absent')
1094
+ PhyloXML::Writer.generate_xml(absent_xml, self, [[:simplearr, 'bc', @absent]])
1095
+ bc << absent_xml
1096
+ end
1097
+
1098
+ return bc
1099
+ end
1100
+
1101
+
1102
+ end
1103
+
1104
+ # == Description
1105
+ # This is used to express a typed relationship between two sequences.
1106
+ # For example it could be used to describe an orthology (in which case
1107
+ # attribute 'type' is 'orthology').
1108
+ class SequenceRelation
1109
+ # String
1110
+ attr_accessor :id_ref_0, :id_ref_1, :type
1111
+ # Float
1112
+ attr_reader :distance
1113
+
1114
+ #@todo it has Confidences objects.
1115
+
1116
+ def distance=(str)
1117
+ @distance = str.to_f if str != nil
1118
+ end
1119
+
1120
+ def type=(str)
1121
+ #@todo do warning instead?
1122
+ #@todo do validation at actually writing xml
1123
+ allowed_values = ["orthology", "one_to_one_orthology", "super_orthology", "paralogy",
1124
+ "ultra_paralogy", "xenology", "unknown", "other"]
1125
+ if not allowed_values.include? str
1126
+ raise "SequenceRelation#type has to be one one of #{allowed_values.join("; ")}"
1127
+ else
1128
+ @type = str
1129
+ end
1130
+ end
1131
+
1132
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
1133
+ def to_xml
1134
+ if @id_ref_0 == nil or @id_ref_1 == nil or @type == nil
1135
+ raise "Attributes id_ref_0, id_ref_1, type are required elements by SequenceRelation element."
1136
+ else
1137
+ sr = LibXML::XML::Node.new('sequence_relation')
1138
+ sr['id_ref_0'] = @id_ref_0
1139
+ sr['id_ref_1'] = @id_ref_1
1140
+ sr['distance'] = @distance.to_s if @distance != nil
1141
+ sr['type'] = @type
1142
+ return sr
1143
+ end
1144
+ end
1145
+
1146
+ end
1147
+
1148
+ class Other
1149
+ attr_accessor :element_name, :attributes, :children, :value
1150
+
1151
+ def initialize
1152
+ @children = []
1153
+ @attributes = Hash.new
1154
+ end
1155
+
1156
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
1157
+ def to_xml
1158
+ o = LibXML::XML::Node.new(@element_name)
1159
+ @attributes.each do |key, value|
1160
+ o[key] = value
1161
+ end
1162
+ o << value if value != nil
1163
+ children.each do |child_node|
1164
+ o << child_node.to_xml
1165
+ end
1166
+ return o
1167
+ end
1168
+
1169
+ end
1170
+
1171
+
1172
+ end #module PhyloXML
1173
+
1174
+ end #end module Bio