bio 1.3.1 → 1.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (303) hide show
  1. data/ChangeLog +2105 -3728
  2. data/KNOWN_ISSUES.rdoc +35 -3
  3. data/README.rdoc +8 -2
  4. data/RELEASE_NOTES.rdoc +166 -0
  5. data/bin/bioruby +4 -1
  6. data/bioruby.gemspec +146 -1
  7. data/bioruby.gemspec.erb +3 -1
  8. data/doc/ChangeLog-before-1.3.1 +3961 -0
  9. data/doc/Tutorial.rd +154 -22
  10. data/doc/Tutorial.rd.html +125 -68
  11. data/lib/bio.rb +21 -6
  12. data/lib/bio/appl/bl2seq/report.rb +11 -202
  13. data/lib/bio/appl/blast/format0.rb +0 -193
  14. data/lib/bio/appl/blast/report.rb +2 -147
  15. data/lib/bio/appl/blast/wublast.rb +0 -208
  16. data/lib/bio/appl/fasta.rb +4 -19
  17. data/lib/bio/appl/fasta/format10.rb +0 -14
  18. data/lib/bio/appl/genscan/report.rb +0 -176
  19. data/lib/bio/appl/hmmer.rb +1 -15
  20. data/lib/bio/appl/hmmer/report.rb +0 -100
  21. data/lib/bio/appl/meme/mast.rb +156 -0
  22. data/lib/bio/appl/meme/mast/report.rb +91 -0
  23. data/lib/bio/appl/meme/motif.rb +48 -0
  24. data/lib/bio/appl/psort.rb +0 -111
  25. data/lib/bio/appl/psort/report.rb +1 -45
  26. data/lib/bio/appl/pts1.rb +2 -4
  27. data/lib/bio/appl/sosui/report.rb +5 -54
  28. data/lib/bio/appl/targetp/report.rb +1 -104
  29. data/lib/bio/appl/tmhmm/report.rb +0 -36
  30. data/lib/bio/command.rb +94 -10
  31. data/lib/bio/data/aa.rb +1 -77
  32. data/lib/bio/data/codontable.rb +1 -95
  33. data/lib/bio/data/na.rb +1 -26
  34. data/lib/bio/db/aaindex.rb +1 -38
  35. data/lib/bio/db/fasta.rb +1 -134
  36. data/lib/bio/db/fasta/format_qual.rb +204 -0
  37. data/lib/bio/db/fasta/qual.rb +102 -0
  38. data/lib/bio/db/fastq.rb +645 -0
  39. data/lib/bio/db/fastq/fastq_to_biosequence.rb +40 -0
  40. data/lib/bio/db/fastq/format_fastq.rb +175 -0
  41. data/lib/bio/db/genbank/genbank.rb +1 -86
  42. data/lib/bio/db/gff.rb +0 -17
  43. data/lib/bio/db/go.rb +4 -72
  44. data/lib/bio/db/kegg/common.rb +112 -0
  45. data/lib/bio/db/kegg/compound.rb +29 -20
  46. data/lib/bio/db/kegg/drug.rb +74 -34
  47. data/lib/bio/db/kegg/enzyme.rb +26 -5
  48. data/lib/bio/db/kegg/genes.rb +128 -15
  49. data/lib/bio/db/kegg/genome.rb +3 -41
  50. data/lib/bio/db/kegg/glycan.rb +19 -24
  51. data/lib/bio/db/kegg/orthology.rb +16 -56
  52. data/lib/bio/db/kegg/reaction.rb +81 -28
  53. data/lib/bio/db/kegg/taxonomy.rb +1 -52
  54. data/lib/bio/db/litdb.rb +1 -16
  55. data/lib/bio/db/phyloxml/phyloxml.xsd +582 -0
  56. data/lib/bio/db/phyloxml/phyloxml_elements.rb +1174 -0
  57. data/lib/bio/db/phyloxml/phyloxml_parser.rb +954 -0
  58. data/lib/bio/db/phyloxml/phyloxml_writer.rb +228 -0
  59. data/lib/bio/db/prosite.rb +2 -95
  60. data/lib/bio/db/rebase.rb +5 -6
  61. data/lib/bio/db/sanger_chromatogram/abif.rb +120 -0
  62. data/lib/bio/db/sanger_chromatogram/chromatogram.rb +133 -0
  63. data/lib/bio/db/sanger_chromatogram/chromatogram_to_biosequence.rb +32 -0
  64. data/lib/bio/db/sanger_chromatogram/scf.rb +210 -0
  65. data/lib/bio/io/das.rb +0 -44
  66. data/lib/bio/io/ddbjxml.rb +1 -181
  67. data/lib/bio/io/flatfile.rb +1 -7
  68. data/lib/bio/io/flatfile/autodetection.rb +6 -0
  69. data/lib/bio/io/keggapi.rb +0 -442
  70. data/lib/bio/io/ncbirest.rb +130 -132
  71. data/lib/bio/io/ncbisoap.rb +2 -1
  72. data/lib/bio/io/pubmed.rb +0 -88
  73. data/lib/bio/location.rb +0 -73
  74. data/lib/bio/pathway.rb +0 -171
  75. data/lib/bio/sequence.rb +18 -1
  76. data/lib/bio/sequence/adapter.rb +3 -0
  77. data/lib/bio/sequence/format.rb +16 -0
  78. data/lib/bio/sequence/quality_score.rb +205 -0
  79. data/lib/bio/tree.rb +70 -5
  80. data/lib/bio/util/restriction_enzyme/single_strand.rb +3 -2
  81. data/lib/bio/util/sirna.rb +1 -23
  82. data/lib/bio/version.rb +1 -1
  83. data/sample/demo_aaindex.rb +67 -0
  84. data/sample/demo_aminoacid.rb +101 -0
  85. data/sample/demo_bl2seq_report.rb +220 -0
  86. data/sample/demo_blast_report.rb +285 -0
  87. data/sample/demo_codontable.rb +119 -0
  88. data/sample/demo_das.rb +105 -0
  89. data/sample/demo_ddbjxml.rb +212 -0
  90. data/sample/demo_fasta_remote.rb +51 -0
  91. data/sample/demo_fastaformat.rb +105 -0
  92. data/sample/demo_genbank.rb +132 -0
  93. data/sample/demo_genscan_report.rb +202 -0
  94. data/sample/demo_gff1.rb +49 -0
  95. data/sample/demo_go.rb +98 -0
  96. data/sample/demo_hmmer_report.rb +149 -0
  97. data/sample/demo_kegg_compound.rb +57 -0
  98. data/sample/demo_kegg_drug.rb +65 -0
  99. data/sample/demo_kegg_genome.rb +74 -0
  100. data/sample/demo_kegg_glycan.rb +72 -0
  101. data/sample/demo_kegg_orthology.rb +62 -0
  102. data/sample/demo_kegg_reaction.rb +66 -0
  103. data/sample/demo_kegg_taxonomy.rb +92 -0
  104. data/sample/demo_keggapi.rb +502 -0
  105. data/sample/demo_litdb.rb +42 -0
  106. data/sample/demo_locations.rb +99 -0
  107. data/sample/demo_ncbi_rest.rb +130 -0
  108. data/sample/demo_nucleicacid.rb +49 -0
  109. data/sample/demo_pathway.rb +196 -0
  110. data/sample/demo_prosite.rb +120 -0
  111. data/sample/demo_psort.rb +138 -0
  112. data/sample/demo_psort_report.rb +70 -0
  113. data/sample/demo_pubmed.rb +118 -0
  114. data/sample/demo_sirna.rb +63 -0
  115. data/sample/demo_sosui_report.rb +89 -0
  116. data/sample/demo_targetp_report.rb +135 -0
  117. data/sample/demo_tmhmm_report.rb +68 -0
  118. data/sample/pmfetch.rb +13 -4
  119. data/sample/pmsearch.rb +15 -4
  120. data/sample/test_phyloxml_big.rb +205 -0
  121. data/test/bioruby_test_helper.rb +61 -0
  122. data/test/data/KEGG/1.1.1.1.enzyme +935 -0
  123. data/test/data/KEGG/C00025.compound +102 -0
  124. data/test/data/KEGG/D00063.drug +104 -0
  125. data/test/data/KEGG/G00024.glycan +47 -0
  126. data/test/data/KEGG/G01366.glycan +18 -0
  127. data/test/data/KEGG/K02338.orthology +902 -0
  128. data/test/data/KEGG/R00006.reaction +14 -0
  129. data/test/data/fastq/README.txt +109 -0
  130. data/test/data/fastq/error_diff_ids.fastq +20 -0
  131. data/test/data/fastq/error_double_qual.fastq +22 -0
  132. data/test/data/fastq/error_double_seq.fastq +22 -0
  133. data/test/data/fastq/error_long_qual.fastq +20 -0
  134. data/test/data/fastq/error_no_qual.fastq +20 -0
  135. data/test/data/fastq/error_qual_del.fastq +20 -0
  136. data/test/data/fastq/error_qual_escape.fastq +20 -0
  137. data/test/data/fastq/error_qual_null.fastq +0 -0
  138. data/test/data/fastq/error_qual_space.fastq +21 -0
  139. data/test/data/fastq/error_qual_tab.fastq +21 -0
  140. data/test/data/fastq/error_qual_unit_sep.fastq +20 -0
  141. data/test/data/fastq/error_qual_vtab.fastq +20 -0
  142. data/test/data/fastq/error_short_qual.fastq +20 -0
  143. data/test/data/fastq/error_spaces.fastq +20 -0
  144. data/test/data/fastq/error_tabs.fastq +21 -0
  145. data/test/data/fastq/error_trunc_at_plus.fastq +19 -0
  146. data/test/data/fastq/error_trunc_at_qual.fastq +19 -0
  147. data/test/data/fastq/error_trunc_at_seq.fastq +18 -0
  148. data/test/data/fastq/error_trunc_in_plus.fastq +19 -0
  149. data/test/data/fastq/error_trunc_in_qual.fastq +20 -0
  150. data/test/data/fastq/error_trunc_in_seq.fastq +18 -0
  151. data/test/data/fastq/error_trunc_in_title.fastq +17 -0
  152. data/test/data/fastq/illumina_full_range_as_illumina.fastq +8 -0
  153. data/test/data/fastq/illumina_full_range_as_sanger.fastq +8 -0
  154. data/test/data/fastq/illumina_full_range_as_solexa.fastq +8 -0
  155. data/test/data/fastq/illumina_full_range_original_illumina.fastq +8 -0
  156. data/test/data/fastq/longreads_as_illumina.fastq +40 -0
  157. data/test/data/fastq/longreads_as_sanger.fastq +40 -0
  158. data/test/data/fastq/longreads_as_solexa.fastq +40 -0
  159. data/test/data/fastq/longreads_original_sanger.fastq +120 -0
  160. data/test/data/fastq/misc_dna_as_illumina.fastq +16 -0
  161. data/test/data/fastq/misc_dna_as_sanger.fastq +16 -0
  162. data/test/data/fastq/misc_dna_as_solexa.fastq +16 -0
  163. data/test/data/fastq/misc_dna_original_sanger.fastq +16 -0
  164. data/test/data/fastq/misc_rna_as_illumina.fastq +16 -0
  165. data/test/data/fastq/misc_rna_as_sanger.fastq +16 -0
  166. data/test/data/fastq/misc_rna_as_solexa.fastq +16 -0
  167. data/test/data/fastq/misc_rna_original_sanger.fastq +16 -0
  168. data/test/data/fastq/sanger_full_range_as_illumina.fastq +8 -0
  169. data/test/data/fastq/sanger_full_range_as_sanger.fastq +8 -0
  170. data/test/data/fastq/sanger_full_range_as_solexa.fastq +8 -0
  171. data/test/data/fastq/sanger_full_range_original_sanger.fastq +8 -0
  172. data/test/data/fastq/solexa_full_range_as_illumina.fastq +8 -0
  173. data/test/data/fastq/solexa_full_range_as_sanger.fastq +8 -0
  174. data/test/data/fastq/solexa_full_range_as_solexa.fastq +8 -0
  175. data/test/data/fastq/solexa_full_range_original_solexa.fastq +8 -0
  176. data/test/data/fastq/wrapping_as_illumina.fastq +12 -0
  177. data/test/data/fastq/wrapping_as_sanger.fastq +12 -0
  178. data/test/data/fastq/wrapping_as_solexa.fastq +12 -0
  179. data/test/data/fastq/wrapping_original_sanger.fastq +24 -0
  180. data/test/data/meme/db +0 -0
  181. data/test/data/meme/mast +0 -0
  182. data/test/data/meme/mast.out +13 -0
  183. data/test/data/meme/meme.out +3 -0
  184. data/test/data/phyloxml/apaf.xml +666 -0
  185. data/test/data/phyloxml/bcl_2.xml +2097 -0
  186. data/test/data/phyloxml/made_up.xml +144 -0
  187. data/test/data/phyloxml/ncbi_taxonomy_mollusca_short.xml +65 -0
  188. data/test/data/phyloxml/phyloxml_examples.xml +415 -0
  189. data/test/data/sanger_chromatogram/test_chromatogram_abif.ab1 +0 -0
  190. data/test/data/sanger_chromatogram/test_chromatogram_scf_v2.scf +0 -0
  191. data/test/data/sanger_chromatogram/test_chromatogram_scf_v3.scf +0 -0
  192. data/test/functional/bio/appl/test_pts1.rb +7 -5
  193. data/test/functional/bio/io/test_ensembl.rb +4 -3
  194. data/test/functional/bio/io/test_pubmed.rb +9 -3
  195. data/test/functional/bio/io/test_soapwsdl.rb +5 -4
  196. data/test/functional/bio/io/test_togows.rb +5 -4
  197. data/test/functional/bio/sequence/test_output_embl.rb +6 -4
  198. data/test/functional/bio/test_command.rb +54 -5
  199. data/test/runner.rb +5 -3
  200. data/test/unit/bio/appl/bl2seq/test_report.rb +5 -4
  201. data/test/unit/bio/appl/blast/test_ncbioptions.rb +4 -2
  202. data/test/unit/bio/appl/blast/test_report.rb +5 -4
  203. data/test/unit/bio/appl/blast/test_rpsblast.rb +5 -4
  204. data/test/unit/bio/appl/gcg/test_msf.rb +5 -5
  205. data/test/unit/bio/appl/genscan/test_report.rb +8 -9
  206. data/test/unit/bio/appl/hmmer/test_report.rb +5 -4
  207. data/test/unit/bio/appl/iprscan/test_report.rb +6 -5
  208. data/test/unit/bio/appl/mafft/test_report.rb +6 -5
  209. data/test/unit/bio/appl/meme/mast/test_report.rb +46 -0
  210. data/test/unit/bio/appl/meme/test_mast.rb +103 -0
  211. data/test/unit/bio/appl/meme/test_motif.rb +38 -0
  212. data/test/unit/bio/appl/paml/codeml/test_rates.rb +5 -4
  213. data/test/unit/bio/appl/paml/codeml/test_report.rb +5 -4
  214. data/test/unit/bio/appl/paml/test_codeml.rb +5 -4
  215. data/test/unit/bio/appl/sim4/test_report.rb +5 -4
  216. data/test/unit/bio/appl/sosui/test_report.rb +6 -5
  217. data/test/unit/bio/appl/targetp/test_report.rb +5 -3
  218. data/test/unit/bio/appl/test_blast.rb +5 -4
  219. data/test/unit/bio/appl/test_fasta.rb +4 -2
  220. data/test/unit/bio/appl/test_pts1.rb +4 -2
  221. data/test/unit/bio/appl/tmhmm/test_report.rb +6 -5
  222. data/test/unit/bio/data/test_aa.rb +5 -3
  223. data/test/unit/bio/data/test_codontable.rb +5 -4
  224. data/test/unit/bio/data/test_na.rb +5 -3
  225. data/test/unit/bio/db/biosql/tc_biosql.rb +5 -1
  226. data/test/unit/bio/db/embl/test_common.rb +4 -2
  227. data/test/unit/bio/db/embl/test_embl.rb +6 -6
  228. data/test/unit/bio/db/embl/test_embl_rel89.rb +6 -6
  229. data/test/unit/bio/db/embl/test_embl_to_bioseq.rb +7 -8
  230. data/test/unit/bio/db/embl/test_sptr.rb +6 -8
  231. data/test/unit/bio/db/embl/test_uniprot.rb +6 -5
  232. data/test/unit/bio/db/fasta/test_format_qual.rb +346 -0
  233. data/test/unit/bio/db/kegg/test_compound.rb +146 -0
  234. data/test/unit/bio/db/kegg/test_drug.rb +194 -0
  235. data/test/unit/bio/db/kegg/test_enzyme.rb +241 -0
  236. data/test/unit/bio/db/kegg/test_genes.rb +32 -4
  237. data/test/unit/bio/db/kegg/test_glycan.rb +260 -0
  238. data/test/unit/bio/db/kegg/test_orthology.rb +50 -0
  239. data/test/unit/bio/db/kegg/test_reaction.rb +96 -0
  240. data/test/unit/bio/db/pdb/test_pdb.rb +4 -2
  241. data/test/unit/bio/db/sanger_chromatogram/test_abif.rb +76 -0
  242. data/test/unit/bio/db/sanger_chromatogram/test_scf.rb +98 -0
  243. data/test/unit/bio/db/test_aaindex.rb +6 -6
  244. data/test/unit/bio/db/test_fasta.rb +5 -46
  245. data/test/unit/bio/db/test_fastq.rb +829 -0
  246. data/test/unit/bio/db/test_gff.rb +4 -2
  247. data/test/unit/bio/db/test_lasergene.rb +7 -5
  248. data/test/unit/bio/db/test_medline.rb +4 -2
  249. data/test/unit/bio/db/test_newick.rb +6 -6
  250. data/test/unit/bio/db/test_nexus.rb +4 -2
  251. data/test/unit/bio/db/test_phyloxml.rb +769 -0
  252. data/test/unit/bio/db/test_phyloxml_writer.rb +328 -0
  253. data/test/unit/bio/db/test_prosite.rb +6 -5
  254. data/test/unit/bio/db/test_qual.rb +63 -0
  255. data/test/unit/bio/db/test_rebase.rb +5 -3
  256. data/test/unit/bio/db/test_soft.rb +7 -6
  257. data/test/unit/bio/io/flatfile/test_autodetection.rb +6 -7
  258. data/test/unit/bio/io/flatfile/test_buffer.rb +6 -5
  259. data/test/unit/bio/io/flatfile/test_splitter.rb +4 -4
  260. data/test/unit/bio/io/test_ddbjxml.rb +4 -3
  261. data/test/unit/bio/io/test_ensembl.rb +5 -3
  262. data/test/unit/bio/io/test_fastacmd.rb +4 -3
  263. data/test/unit/bio/io/test_flatfile.rb +6 -5
  264. data/test/unit/bio/io/test_soapwsdl.rb +4 -3
  265. data/test/unit/bio/io/test_togows.rb +4 -2
  266. data/test/unit/bio/sequence/test_aa.rb +5 -3
  267. data/test/unit/bio/sequence/test_common.rb +4 -2
  268. data/test/unit/bio/sequence/test_compat.rb +4 -2
  269. data/test/unit/bio/sequence/test_dblink.rb +5 -3
  270. data/test/unit/bio/sequence/test_na.rb +4 -2
  271. data/test/unit/bio/sequence/test_quality_score.rb +330 -0
  272. data/test/unit/bio/shell/plugin/test_seq.rb +5 -3
  273. data/test/unit/bio/test_alignment.rb +5 -3
  274. data/test/unit/bio/test_command.rb +4 -3
  275. data/test/unit/bio/test_db.rb +5 -3
  276. data/test/unit/bio/test_feature.rb +4 -2
  277. data/test/unit/bio/test_location.rb +4 -2
  278. data/test/unit/bio/test_map.rb +5 -3
  279. data/test/unit/bio/test_pathway.rb +4 -2
  280. data/test/unit/bio/test_reference.rb +4 -2
  281. data/test/unit/bio/test_sequence.rb +5 -3
  282. data/test/unit/bio/test_shell.rb +5 -3
  283. data/test/unit/bio/test_tree.rb +6 -6
  284. data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +4 -2
  285. data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +4 -2
  286. data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +4 -2
  287. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +4 -2
  288. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +4 -2
  289. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +4 -2
  290. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +4 -2
  291. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +4 -2
  292. data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +4 -2
  293. data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +4 -2
  294. data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +4 -2
  295. data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +4 -2
  296. data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +17 -13
  297. data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +17 -13
  298. data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +4 -2
  299. data/test/unit/bio/util/test_color_scheme.rb +5 -3
  300. data/test/unit/bio/util/test_contingency_table.rb +5 -3
  301. data/test/unit/bio/util/test_restriction_enzyme.rb +4 -2
  302. data/test/unit/bio/util/test_sirna.rb +6 -4
  303. metadata +147 -2
@@ -0,0 +1,1174 @@
1
+ #
2
+ # = bio/db/phyloxml_elements.rb - PhyloXML Element classes
3
+ #
4
+ # Copyright:: Copyright (C) 2009
5
+ # Diana Jaunzeikare <latvianlinuxgirl@gmail.com>
6
+ # License:: The Ruby License
7
+ #
8
+ # $Id:$
9
+ #
10
+ # == Description
11
+ #
12
+ # This file containts the classes to represent PhyloXML elements.
13
+ #
14
+ # == References
15
+ #
16
+ # * http://www.phyloxml.org
17
+ #
18
+ # * https://www.nescent.org/wg_phyloinformatics/PhyloSoC:PhyloXML_support_in_BioRuby
19
+
20
+ require 'bio/tree'
21
+ require 'bio/sequence'
22
+ require 'bio/reference'
23
+
24
+ # Autoload definition
25
+ module Bio
26
+ module PhyloXML
27
+ autoload :Parser, 'bio/db/phyloxml/phyloxml_parser'
28
+ autoload :Writer, 'bio/db/phyloxml/phyloxml_writer'
29
+ end
30
+ end
31
+
32
+ require 'libxml'
33
+
34
+ module Bio
35
+
36
+ # This is general Taxonomy class.
37
+
38
+ class Taxonomy
39
+ #pattern = [a-zA-Z0-9_]{2,10} Can refer to any code/abbreviation/mnemonic, such as Bsu for Bacillus subtilis.
40
+ attr_accessor :code
41
+
42
+ # String.
43
+ attr_accessor :scientific_name
44
+ # An array of strings
45
+ attr_accessor :common_names
46
+
47
+ # value comes from list: domain kingdom, subkingdom, branch, infrakingdom,
48
+ # superphylum, phylum, subphylum, infraphylum, microphylum, superdivision,
49
+ # division, subdivision, infradivision, superclass, class, subclass,
50
+ # infraclass, superlegion, legion, sublegion, infralegion, supercohort,
51
+ # cohort, subcohort, infracohort, superorder, order, suborder,
52
+ # superfamily, family, subfamily, supertribe, tribe, subtribe, infratribe,
53
+ # genus, subgenus, superspecies, species, subspecies, variety, subvariety,
54
+ # form, subform, cultivar, unknown, other
55
+ attr_accessor :rank
56
+
57
+ # is used to keep the authority, such as 'J. G. Cooper, 1863', associated with the 'scientific_name'.
58
+ attr_accessor :authority
59
+
60
+ # An array of strings. Holds synonyms for scientific names or common names.
61
+ attr_accessor :synonyms
62
+
63
+
64
+ def initialize
65
+ @common_names = []
66
+ @synonyms = []
67
+ end
68
+ end
69
+
70
+ module PhyloXML
71
+
72
+
73
+ # Taxonomy class
74
+ class Taxonomy < Bio::Taxonomy
75
+ # String. Unique identifier of a taxon.
76
+ attr_accessor :taxonomy_id
77
+ #Used to link other elements to a taxonomy (on the xml-level)
78
+ attr_accessor :id_source
79
+ # Uri object
80
+ attr_accessor :uri
81
+
82
+ # Array of Other objects. Used to save additional information from other than
83
+ # PhyloXML namspace.
84
+ attr_accessor :other
85
+
86
+ def initialize
87
+ super
88
+ @other = []
89
+ end
90
+
91
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
92
+ def to_xml
93
+ taxonomy = LibXML::XML::Node.new('taxonomy')
94
+ taxonomy["type"] = @type if @type != nil
95
+ taxonomy["id_source"] = @id_source if @id_source != nil
96
+
97
+ PhyloXML::Writer.generate_xml(taxonomy, self, [[:complex, 'id', @taxonomy_id],
98
+ [:pattern, 'code', @code, Regexp.new("^[a-zA-Z0-9_]{2,10}$")],
99
+ [:simple, 'scientific_name', @scientific_name],
100
+ [:simple, 'authority', @authority],
101
+ [:simplearr, 'common_name', @common_names],
102
+ [:simplearr, 'synonym', @synonyms],
103
+ [:simple, 'rank', @rank],
104
+ [:complex, 'uri',@uri]])
105
+ #@todo anything else
106
+
107
+
108
+ return taxonomy
109
+ end
110
+
111
+ end
112
+
113
+ # Object to hold one phylogeny element (and its subelements.) Extended version of Bio::Tree.
114
+ class Tree < Bio::Tree
115
+ # String. Name of tree (name subelement of phylogeny element).
116
+ attr_accessor :name
117
+
118
+ # Id object.
119
+ attr_accessor :phylogeny_id
120
+
121
+ # String. Description of tree.
122
+ attr_accessor :description
123
+
124
+ # Boolean. Can be used to indicate that the phylogeny is not allowed to be rooted differently (i.e. because it is associated with root dependent data, such as gene duplications).
125
+ attr_accessor :rerootable
126
+
127
+ # Boolean. Required element.
128
+ attr_accessor :rooted
129
+
130
+ # Array of Property object. Allows for typed and referenced properties from external resources to be attached.
131
+ attr_accessor :properties
132
+
133
+ # CladeRelation object. This is used to express a typed relationship between two clades. For example it could be used to describe multiple parents of a clade.
134
+ attr_accessor :clade_relations
135
+
136
+ # SequenceRelation object. This is used to express a typed relationship between two sequences. For example it could be used to describe an orthology.
137
+ attr_accessor :sequence_relations
138
+
139
+ # Array of confidence object
140
+ attr_accessor :confidences
141
+
142
+ # String.
143
+ attr_accessor :branch_length_unit
144
+
145
+ # String. Indicate the type of phylogeny (i.e. 'gene tree').
146
+ attr_accessor :type
147
+
148
+ # String. Date
149
+ attr_accessor :date
150
+
151
+ # Array of Other objects. Used to save additional information from other than
152
+ # PhyloXML namspace.
153
+ attr_accessor :other
154
+
155
+ def initialize
156
+ super
157
+ @sequence_relations = []
158
+ @clade_relations = []
159
+ @confidences = []
160
+ @properties = []
161
+ @other = []
162
+ end
163
+
164
+ end
165
+
166
+
167
+ # == Description
168
+ # Class to hold clade element of phyloXML.
169
+ class Node
170
+
171
+ # Events at the root node of a clade (e.g. one gene duplication).
172
+ attr_accessor :events
173
+
174
+ # String. Used to link other elements to a clade (node) (on the xml-level).
175
+ attr_accessor :id_source
176
+
177
+ # String. Name of the node.
178
+ attr_accessor :name
179
+
180
+ # Float. Branch width for this node (including parent branch). Applies for the whole clade unless overwritten in sub-clades.
181
+ attr_reader :width
182
+
183
+ def width=(str)
184
+ @width = str.to_f
185
+ end
186
+
187
+ # Array of Taxonomy objects. Describes taxonomic information for a clade.
188
+ attr_accessor :taxonomies
189
+
190
+ # Array of Confidence objects. Indicates the support for a clade/parent branch.
191
+ attr_accessor :confidences
192
+
193
+ # BranchColor object. Apply for the whole clade unless overwritten in sub-clade.
194
+ attr_accessor :color
195
+
196
+ # Id object
197
+ attr_accessor :node_id
198
+
199
+ # Array of Sequence objects. Represents a molecular sequence (Protein, DNA, RNA) associated with a node.
200
+ attr_accessor :sequences
201
+
202
+ # BinaryCharacters object. The names and/or counts of binary characters present, gained, and lost at the root of a clade.
203
+ attr_accessor :binary_characters
204
+
205
+ # Array of Distribution objects. The geographic distribution of the items of a clade (species, sequences), intended for phylogeographic applications.
206
+ attr_accessor :distributions
207
+
208
+ # Date object. A date associated with a clade/node.
209
+ attr_accessor :date
210
+
211
+ #Array of Reference objects. A literature reference for a clade.
212
+ attr_accessor :references
213
+
214
+ #An array of Property objects, for example depth for sea animals.
215
+ attr_accessor :properties
216
+
217
+ # Array of Other objects. Used to save additional information from other than
218
+ # PhyloXML namspace.
219
+ attr_accessor :other
220
+
221
+ def initialize
222
+ @confidences = []
223
+ @sequences = []
224
+ @taxonomies = []
225
+ @distributions = []
226
+ @references = []
227
+ @properties = []
228
+ @other = []
229
+ end
230
+
231
+
232
+ # Converts to a Bio::Tree::Node object. If it contains several taxonomies
233
+ # Bio::Tree::Node#scientific name will get the scientific name of the first
234
+ # taxonomy.
235
+ #
236
+ # If there are several confidence values, the first with bootstrap type will
237
+ # be returned as Bio::Tree::Node#bootstrap
238
+ #
239
+ # tree = phyloxmlparser.next_tree
240
+ #
241
+ # node = tree.get_node_by_name("A").to_biotreenode
242
+ #
243
+ # ---
244
+ # *Returns*:: Bio::Tree::Node
245
+ def to_biotreenode
246
+ node = Bio::Tree::Node.new
247
+ node.name = @name
248
+ node.scientific_name = @taxonomies[0].scientific_name if not @taxonomies.empty?
249
+ #@todo what if there are more?
250
+ node.taxonomy_id = @taxonomies[0].taxononmy_id if @taxonomies[0] != nil
251
+
252
+ if not @confidences.empty?
253
+ @confidences.each do |confidence|
254
+ if confidence.type == "bootstrap"
255
+ node.bootstrap = confidence.value
256
+ break
257
+ end
258
+ end
259
+ end
260
+ return node
261
+ end
262
+
263
+ # Extracts the relevant information from node (specifically taxonomy and
264
+ # sequence) to create Bio::Sequence object. Node can have several sequences,
265
+ # so parameter to this method is to specify which sequence to extract.
266
+ #
267
+ # ---
268
+ # *Returns*:: Bio::Sequence
269
+ def extract_biosequence(seq_i=0)
270
+
271
+ seq = @sequences[seq_i].to_biosequence
272
+ seq.classification = []
273
+ @taxonomies.each do |t|
274
+ seq.classification << t.scientific_name
275
+ if t.rank == "species"
276
+ seq.species = t.scientific_name
277
+ end
278
+ end
279
+
280
+ #seq.division => .. http://www.ebi.ac.uk/embl/Documentation/User_manual/usrman.html#3_2
281
+ # It doesn't seem there is anything in PhyloXML corresponding to this.
282
+
283
+ return seq
284
+ end
285
+
286
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
287
+ def to_xml(branch_length, write_branch_length_as_subelement)
288
+ clade = LibXML::XML::Node.new('clade')
289
+
290
+ PhyloXML::Writer.generate_xml(clade, self, [[:simple, 'name', @name]])
291
+
292
+ if branch_length != nil
293
+ if write_branch_length_as_subelement
294
+ clade << LibXML::XML::Node.new('branch_length', branch_length.to_s)
295
+ else
296
+ clade["branch_length"] = branch_length.to_s
297
+ end
298
+ end
299
+
300
+ #generate all elements, except clade
301
+ PhyloXML::Writer.generate_xml(clade, self, [
302
+ [:attr, "id_source"],
303
+ [:objarr, 'confidence', 'confidences'],
304
+ [:simple, 'width', @width],
305
+ [:complex, 'branch_color', @branch_color],
306
+ [:simple, 'node_id', @node_id],
307
+ [:objarr, 'taxonomy', 'taxonomies'],
308
+ [:objarr, 'sequence', 'sequences'],
309
+ [:complex, 'events', @events],
310
+ [:complex, 'binary_characters', @binary_characters],
311
+ [:objarr, 'distribution', 'distributions'],
312
+ [:complex, 'date', @date],
313
+ [:objarr, 'reference', 'references'],
314
+ [:objarr, 'propery', 'properties']])
315
+
316
+ return clade
317
+ end
318
+
319
+ end #Node
320
+
321
+ # == Description
322
+ # Events at the root node of a clade (e.g. one gene duplication).
323
+ class Events
324
+ #value comes from list: transfer, fusion, speciation_or_duplication, other, mixed, unassigned
325
+ attr_accessor :type
326
+
327
+ # Integer
328
+ attr_reader :duplications, :speciations, :losses
329
+
330
+ # Confidence object
331
+ attr_reader :confidence
332
+
333
+ def confidence=(type, value)
334
+ @confidence = Confidence.new(type, value)
335
+ end
336
+
337
+ def confidence=(conf)
338
+ @confidence = conf
339
+ end
340
+
341
+ def duplications=(str)
342
+ @duplications = str.to_i
343
+ end
344
+
345
+ def losses=(str)
346
+ @losses = str.to_i
347
+ end
348
+
349
+ def speciations=(str)
350
+ @speciations=str.to_i
351
+ end
352
+
353
+ def type=(str)
354
+ @type = str
355
+ #@todo add unit test for this
356
+ if not ['transfer','fusion','speciation_or_duplication','other','mixed', 'unassigned'].include?(str)
357
+ raise "Warning #{str} is not one of the allowed values"
358
+ end
359
+ end
360
+
361
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
362
+ def to_xml
363
+ #@todo add unit test
364
+ events = LibXML::XML::Node.new('events')
365
+ PhyloXML::Writer.generate_xml(events, self, [
366
+ [:simple, 'type', @type],
367
+ [:simple, 'duplications', @duplications],
368
+ [:simple, 'speciations', @speciations],
369
+ [:simple, 'losses', @losses],
370
+ [:complex, 'confidence', @confidence]])
371
+ return events
372
+ end
373
+
374
+ end
375
+
376
+ # A general purpose confidence element. For example this can be used to express
377
+ # the bootstrap support value of a clade (in which case the 'type' attribute
378
+ # is 'bootstrap').
379
+ class Confidence
380
+ # String. The type of confidence measure, for example, bootstrap.
381
+ attr_accessor :type
382
+ # Float. The value of confidence measure.
383
+ attr_accessor :value
384
+
385
+ def initialize(type, value)
386
+ @type = type
387
+ @value = value.to_f
388
+ end
389
+
390
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
391
+ def to_xml
392
+ if @type == nil
393
+ raise "Type is a required attribute for confidence."
394
+ else
395
+ confidence = LibXML::XML::Node.new('confidence', @value.to_s)
396
+ confidence["type"] = @type
397
+ return confidence
398
+ end
399
+ end
400
+ end
401
+
402
+ # == Description
403
+ #
404
+ # The geographic distribution of the items of a clade (species, sequences),
405
+ # intended for phylogeographic applications.
406
+ class Distribution
407
+ # String. Free text description of location.
408
+ attr_accessor :desc
409
+ # Array of Point objects. Holds coordinates of the location.
410
+ attr_accessor :points
411
+ # Array of Polygon objects.
412
+ attr_accessor :polygons
413
+
414
+ def initialize
415
+ @points = []
416
+ @polygons = []
417
+ end
418
+
419
+
420
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
421
+ def to_xml
422
+ distr = LibXML::XML::Node.new('distribution')
423
+ PhyloXML::Writer.generate_xml(distr, self, [
424
+ [:simple, 'desc', @desc],
425
+ [:objarr, 'point', 'points'],
426
+ [:objarr, 'polygon', 'polygons']])
427
+ return distr
428
+ end
429
+
430
+ end #Distribution class
431
+
432
+
433
+ # == Description
434
+ #
435
+ # The coordinates of a point with an optional altitude. Required attribute
436
+ # 'geodetic_datum' is used to indicate the geodetic datum (also called
437
+ # 'map datum'), for example Google's KML uses 'WGS84'.
438
+ class Point
439
+ # Float. Latitude
440
+ attr_accessor :lat
441
+
442
+ # Float. Longitute
443
+ attr_accessor :long
444
+
445
+ # Float. Altitude
446
+ attr_accessor :alt
447
+
448
+ # String. Altitude unit.
449
+ attr_accessor :alt_unit
450
+
451
+ # Geodedic datum / map datum
452
+ attr_accessor :geodetic_datum
453
+
454
+ def lat=(str)
455
+ @lat = str.to_f unless str.nil?
456
+ end
457
+
458
+ def long=(str)
459
+ @long = str.to_f unless str.nil?
460
+ end
461
+
462
+ def alt=(str)
463
+ @alt = str.to_f unless str.nil?
464
+ end
465
+
466
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
467
+ def to_xml
468
+ raise "Geodedic datum is a required attribute of Point element." if @geodetic_datum.nil?
469
+
470
+ p = LibXML::XML::Node.new('point')
471
+ p["geodetic_datum"] = @geodetic_datum
472
+ p["alt_unit"] = @alt_unit if @alt_unit != nil
473
+ PhyloXML::Writer.generate_xml(p, self, [
474
+ [:simple, 'lat', @lat],
475
+ [:simple, 'long', @long],
476
+ [:simple, 'alt', @alt]])
477
+ return p
478
+ #@todo check if characters are correctly generated, like Zuric
479
+ end
480
+
481
+ end
482
+
483
+
484
+ # == Description
485
+ #
486
+ # A polygon defined by a list of Points objects.
487
+ class Polygon
488
+ # Array of Point objects.
489
+ attr_accessor :points
490
+
491
+ def initialize
492
+ @points = []
493
+ end
494
+
495
+
496
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
497
+ def to_xml
498
+ if @points.length > 2
499
+ pol = LibXML::XML::Node.new('polygon')
500
+ @points.each do |p|
501
+ pol << p.to_xml
502
+ end
503
+ return pol
504
+ end
505
+ end
506
+
507
+
508
+ end
509
+
510
+ # == Description
511
+ # Element Sequence is used to represent a molecular sequence (Protein, DNA,
512
+ # RNA) associated with a node.
513
+ class Sequence
514
+ # Type of sequence (rna, dna, protein)
515
+ attr_accessor :type
516
+
517
+ # Full name (e.g. muscle Actin )
518
+ attr_accessor :name
519
+
520
+ # String. Used to link with other elements.
521
+ attr_accessor :id_source
522
+
523
+ # String. One intended use for 'id_ref' is to link a sequence to a taxonomy
524
+ # (via the taxonomy's 'id_source') in the case of multiple sequences and taxonomies per node.
525
+ attr_accessor :id_ref
526
+
527
+ # short (maximal ten characters) symbol of the sequence (e.g. 'ACTM')
528
+ attr_accessor :symbol
529
+ # Accession object. Holds source and identifier for the sequence.
530
+ attr_accessor :accession
531
+ # String. Location of a sequence on a genome/chromosome
532
+ attr_accessor :location
533
+ # String. The actual sequence is stored here.
534
+ attr_reader :mol_seq
535
+
536
+ # Boolean. used to indicated that this molecular sequence is aligned with
537
+ # all other sequences in the same phylogeny for which 'is aligned' is true
538
+ # as well (which, in most cases, means that gaps were introduced, and that
539
+ # all sequences for which 'is aligned' is true must have the same length)
540
+ attr_reader :is_aligned
541
+
542
+ # Uri object
543
+ attr_accessor :uri
544
+ # Array of Annotation objects. Annotations of molecular sequence.
545
+ attr_accessor :annotations
546
+ # DomainArchitecture object. Describes domain architecture of a protein.
547
+ attr_accessor :domain_architecture
548
+
549
+ # Array of Other objects. Used to save additional information from other than
550
+ # PhyloXML namspace.
551
+ attr_accessor :other
552
+
553
+ def initialize
554
+ @annotations = []
555
+ @other = []
556
+ end
557
+
558
+ def is_aligned=(str)
559
+ if str=='true'
560
+ @is_aligned=true
561
+ elsif str=='false'
562
+ @is_aligned = false
563
+ else
564
+ @is_aligned = nil
565
+ end
566
+ end
567
+
568
+ def is_aligned?
569
+ @is_aligned
570
+ end
571
+
572
+ def mol_seq=(str)
573
+ if str =~ /^[a-zA-Z\.\-\?\*_]+$/
574
+ @mol_seq = str
575
+ else
576
+ raise "mol_seq element of Sequence does not follow the pattern."
577
+ end
578
+ end
579
+
580
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
581
+ def to_xml
582
+
583
+ seq = LibXML::XML::Node.new('sequence')
584
+ if @type != nil
585
+ if ["dna", "rna", "protein"].include?(@type)
586
+ seq["type"] = @type
587
+ else
588
+ raise "Type attribute of Sequence has to be one of dna, rna or a."
589
+ end
590
+ end
591
+
592
+ PhyloXML::Writer.generate_xml(seq, self, [
593
+ [:attr, 'id_source'],
594
+ [:attr, 'id_ref'],
595
+ [:pattern, 'symbol', @symbol, Regexp.new("^\\S{1,10}$")],
596
+ [:complex, 'accession', @accession],
597
+ [:simple, 'name', @name],
598
+ [:simple, 'location', @location]])
599
+
600
+ if @mol_seq != nil
601
+ molseq = LibXML::XML::Node.new('mol_seq', @mol_seq)
602
+ molseq["is_aligned"] = @is_aligned.to_s if @is_aligned != nil
603
+ seq << molseq
604
+ end
605
+
606
+ PhyloXML::Writer.generate_xml(seq, self, [
607
+ #[:pattern, 'mol_seq', @mol_seq, Regexp.new("^[a-zA-Z\.\-\?\*_]+$")],
608
+ [:complex, 'uri', @uri],
609
+ [:objarr, 'annotation', 'annotations'],
610
+ [:complex, 'domain_architecture', @domain_architecture]])
611
+ #@todo test domain_architecture
612
+ #any
613
+ return seq
614
+ end
615
+
616
+ # converts Bio::PhyloXML:Sequence to Bio::Sequence object.
617
+ # ---
618
+ # *Returns*:: Bio::Sequence
619
+ def to_biosequence
620
+ #type is not a required attribute in phyloxml (nor any other Sequence
621
+ #element) it might not hold any value, so we will not check what type it is.
622
+ seq = Bio::Sequence.auto(@mol_seq)
623
+
624
+ seq.id_namespace = @accession.source
625
+ seq.entry_id = @accession.value
626
+ # seq.primary_accession = @accession.value could be this
627
+ seq.definition = @name
628
+ #seq.comments = @name //this one?
629
+ if @uri != nil
630
+ h = {'url' => @uri.uri,
631
+ 'title' => @uri.desc }
632
+ ref = Bio::Reference.new(h)
633
+ seq.references << ref
634
+ end
635
+ seq.molecule_type = 'RNA' if @type == 'rna'
636
+ seq.molecule_type = 'DNA' if @type == 'dna'
637
+
638
+ #@todo deal with the properties. There might be properties which look
639
+ #like bio sequence attributes or features
640
+ return seq
641
+ end
642
+
643
+ end
644
+
645
+ # == Description
646
+ # Element Accession is used to capture the local part in a sequence
647
+ # identifier.
648
+ class Accession
649
+ #String. Source of the accession id. Example: "UniProtKB"
650
+ attr_accessor :source
651
+
652
+ #String. Value of the accession id. Example: "P17304"
653
+ attr_accessor :value
654
+
655
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
656
+ def to_xml
657
+ raise "Source attribute is required for Accession object." if @source == nil
658
+ accession = LibXML::XML::Node.new('accession', @value)
659
+ accession['source'] = @source
660
+ return accession
661
+ end
662
+
663
+ end
664
+
665
+ # A uniform resource identifier. In general, this is expected to be an URL
666
+ # (for example, to link to an image on a website, in which case the 'type'
667
+ # attribute might be 'image' and 'desc' might be 'image of a California
668
+ # sea hare')
669
+ class Uri
670
+ # String. Description of the uri. For example, image of a California sea hare'
671
+ attr_accessor :desc
672
+ # String. For example, image.
673
+ attr_accessor :type
674
+ # String. URL of the resource.
675
+ attr_accessor :uri
676
+
677
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
678
+ def to_xml
679
+ if @uri != nil
680
+ xml_node = LibXML::XML::Node.new('uri', @uri)
681
+ Writer.generate_xml(xml_node, self, [
682
+ [:attr, 'desc'],
683
+ [:attr, 'type']])
684
+ return xml_node
685
+ end
686
+ end
687
+ end
688
+
689
+ # == Description
690
+ #
691
+ # The annotation of a molecular sequence. It is recommended to annotate by
692
+ # using the optional 'ref' attribute (some examples of acceptable values
693
+ # for the ref attribute: 'GO:0008270', 'KEGG:Tetrachloroethene degradation',
694
+ # 'EC:1.1.1.1').
695
+ class Annotation
696
+ # String. For example, 'GO:0008270', 'KEGG:Tetrachloroethene degradation',
697
+ # 'EC:1.1.1.1'
698
+ attr_accessor :ref
699
+ # String
700
+ attr_accessor :source
701
+ # String. evidence for a annotation as free text (e.g. 'experimental')
702
+ attr_accessor :evidence
703
+ # String. Type of the annotation.
704
+ attr_accessor :type
705
+ # String. Free text description.
706
+ attr_accessor :desc
707
+ # Confidence object. Type and value of support for a annotation.
708
+ attr_accessor :confidence
709
+ # Array of Property objects. Allows for further, typed and referenced
710
+ # annotations from external resources
711
+ attr_accessor :properties
712
+ # Uri object.
713
+ attr_accessor :uri
714
+
715
+ def initialize
716
+ #@todo add unit test for this, since didn't break anything when changed from property to properties
717
+ @properties = []
718
+ end
719
+
720
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
721
+ def to_xml
722
+ annot = LibXML::XML::Node.new('annotation')
723
+ annot["ref"] = @ref if @ref != nil
724
+ PhyloXML::Writer.generate_xml(annot, self, [[:simple, 'desc', @desc],
725
+ [:complex, 'confidence', @confidence],
726
+ [:objarr, 'property', 'properties'],
727
+ [:complex, 'uri', @uri]])
728
+ return annot
729
+ end
730
+ end
731
+
732
+ class Id
733
+ # The provider of Id, for example, NCBI.
734
+ attr_accessor :provider
735
+ # The value of Id.
736
+ attr_accessor :value
737
+
738
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
739
+ def to_xml
740
+ xml_node = LibXML::XML::Node.new('id', @value)
741
+ xml_node["provider"] = @provider if @provider != nil
742
+ return xml_node
743
+ end
744
+ end
745
+
746
+ # == Description
747
+ # This indicates the color of a node when rendered (the color applies
748
+ # to the whole node and its children unless overwritten by the
749
+ # color(s) of sub clades).
750
+ class BranchColor
751
+ #Integer
752
+ attr_reader :red, :green, :blue
753
+
754
+ def red=(str)
755
+ @red = str.to_i
756
+ end
757
+
758
+ def green=(str)
759
+ @green = str.to_i
760
+ end
761
+
762
+ def blue=(str)
763
+ @blue = str.to_i
764
+ end
765
+
766
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
767
+ def to_xml
768
+ #@todo add unit test
769
+ if @red == nil
770
+ raise "Subelement red of BranchColor element should not be nil"
771
+ elsif @green == nil
772
+ raise "Subelement green of BranchColor element should not be nil"
773
+ elsif @blue == nil
774
+ raise "Subelement blue of BranchColor element should not be nil"
775
+ end
776
+
777
+ c = LibXML::XML::Node.new('branch_color')
778
+ PhyloXML::Writer.generate_xml(c, self, [
779
+ [:simple, 'red', @red],
780
+ [:simple, 'green', @green],
781
+ [:simple, 'blue', @blue]])
782
+ return c
783
+ end
784
+
785
+ end
786
+
787
+ # == Description
788
+ # A date associated with a clade/node. Its value can be numerical by
789
+ # using the 'value' element and/or free text with the 'desc' element'
790
+ # (e.g. 'Silurian'). If a numerical value is used, it is recommended to
791
+ # employ the 'unit' attribute to indicate the type of the numerical
792
+ # value (e.g. 'mya' for 'million years ago').
793
+ class Date
794
+ # String. Units in which value is stored.
795
+ attr_accessor :unit
796
+
797
+ # Free text description of the date.
798
+ attr_accessor :desc
799
+
800
+ # Integer. Minimum and maximum of the value.
801
+ attr_reader :minimum, :maximum
802
+
803
+ # Integer. Value of the date.
804
+ attr_reader :value
805
+
806
+ def minimum=(str)
807
+ @minimum = str.to_i
808
+ end
809
+
810
+ def maximum=(str)
811
+ @maximum = str.to_i
812
+ end
813
+
814
+ def value= (str)
815
+ @value = str.to_i
816
+ end
817
+
818
+ # Returns value + unit, for exampe "7 mya"
819
+ def to_s
820
+ return "#{value} #{unit}"
821
+ end
822
+
823
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
824
+ def to_xml
825
+ date = LibXML::XML::Node.new('date')
826
+ PhyloXML::Writer.generate_xml(date, self, [
827
+ [:attr, 'unit'],
828
+ [:simple, 'desc', @desc],
829
+ [:simple, 'value', @value],
830
+ [:simple, 'minimum', @minimum],
831
+ [:simple, 'maximum', @maximum]])
832
+ return date
833
+ end
834
+
835
+ end
836
+
837
+ # == Description
838
+ # This is used describe the domain architecture of a protein. Attribute
839
+ # 'length' is the total length of the protein
840
+ class DomainArchitecture
841
+ # Integer. Total length of the protein
842
+ attr_accessor :length
843
+
844
+ # Array of ProteinDomain objects.
845
+ attr_reader :domains
846
+
847
+ def length=(str)
848
+ @length = str.to_i
849
+ end
850
+
851
+ def initialize
852
+ @domains = []
853
+ end
854
+
855
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
856
+ def to_xml
857
+ xml_node = LibXML::XML::Node.new('domain_architecture')
858
+ PhyloXML::Writer.generate_xml(xml_node, self,[
859
+ [:attr, 'length'],
860
+ [:objarr, 'domain', 'domains']])
861
+ return xml_node
862
+ end
863
+ end
864
+
865
+
866
+ # == Description
867
+ # To represent an individual domain in a domain architecture. The
868
+ # name/unique identifier is described via the 'id' attribute.
869
+ class ProteinDomain
870
+ #Float, for example to store E-values 4.7E-14
871
+ attr_accessor :confidence
872
+
873
+ # String
874
+ attr_accessor :id, :value
875
+
876
+ # Integer. Beginning of the domain.
877
+ attr_reader :from
878
+
879
+ # Integer. End of the domain.
880
+ attr_reader :to
881
+
882
+ def from=(str)
883
+ @from = str.to_i
884
+ end
885
+
886
+ def to=(str)
887
+ @to = str.to_i
888
+ end
889
+
890
+ def confidence=(str)
891
+ @confidence = str.to_f
892
+ end
893
+
894
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
895
+ def to_xml
896
+ if @from == nil
897
+ raise "from attribute of ProteinDomain class is required."
898
+ elsif @to == nil
899
+ raise "to attribute of ProteinDomain class is required."
900
+ else
901
+ xml_node = LibXML::XML::Node.new('domain', @value)
902
+ xml_node["from"] = @from.to_s
903
+ xml_node["to"] = @to.to_s
904
+ xml_node["id"] = @id if @id != nil
905
+ xml_node["confidence"] = @confidence.to_s
906
+
907
+ return xml_node
908
+ end
909
+
910
+ end
911
+
912
+ end
913
+
914
+
915
+ #Property allows for typed and referenced properties from external resources
916
+ #to be attached to 'Phylogeny', 'Clade', and 'Annotation'. The value of a
917
+ #property is its mixed (free text) content. Attribute 'datatype' indicates
918
+ #the type of a property and is limited to xsd-datatypes (e.g. 'xsd:string',
919
+ #'xsd:boolean', 'xsd:integer', 'xsd:decimal', 'xsd:float', 'xsd:double',
920
+ #'xsd:date', 'xsd:anyURI'). Attribute 'applies_to' indicates the item to
921
+ #which a property applies to (e.g. 'node' for the parent node of a clade,
922
+ #'parent_branch' for the parent branch of a clade). Attribute 'id_ref' allows
923
+ #to attached a property specifically to one element (on the xml-level).
924
+ #Optional attribute 'unit' is used to indicate the unit of the property.
925
+ #An example: <property datatype="xsd:integer" ref="NOAA:depth" applies_to="clade" unit="METRIC:m"> 200 </property>
926
+ class Property
927
+ # String
928
+ attr_accessor :ref, :unit, :id_ref, :value
929
+
930
+ # String
931
+ attr_reader :datatype, :applies_to
932
+
933
+ def datatype=(str)
934
+ #@todo add unit test or maybe remove, if assume that xml is valid.
935
+ unless ['xsd:string','xsd:boolean','xsd:decimal','xsd:float','xsd:double',
936
+ 'xsd:duration','xsd:dateTime','xsd:time','xsd:date','xsd:gYearMonth',
937
+ 'xsd:gYear','xsd:gMonthDay','xsd:gDay','xsd:gMonth','xsd:hexBinary',
938
+ 'xsd:base64Binary','xsd:anyURI','xsd:normalizedString','xsd:token',
939
+ 'xsd:integer','xsd:nonPositiveInteger','xsd:negativeInteger',
940
+ 'xsd:long','xsd:int','xsd:short','xsd:byte','xsd:nonNegativeInteger',
941
+ 'xsd:unsignedLong','xsd:unsignedInt','xsd:unsignedShort',
942
+ 'xsd:unsignedByte','xsd:positiveInteger'].include?(str)
943
+ raise "Warning: #{str} is not in the list of allowed values."
944
+ end
945
+ @datatype = str
946
+ end
947
+
948
+ def applies_to=(str)
949
+ unless ['phylogeny','clade','node','annotation','parent_branch','other'].include?(str)
950
+ puts "Warning: #{str} is not in the list of allowed values."
951
+ end
952
+ @applies_to = str
953
+ end
954
+
955
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
956
+ def to_xml
957
+ #@todo write unit test for this
958
+ raise "ref is an required element of property" if @ref.nil?
959
+ raise "datatype is an required element of property" if @datatype.nil?
960
+ raise "applies_to is an required element of property" if @applies_to.nil?
961
+
962
+ property = LibXML::XML::Node.new('property')
963
+ Writer.generate_xml(property, self, [
964
+ [:attr, 'ref'],
965
+ [:attr, 'unit'],
966
+ [:attr, 'datatype'],
967
+ [:attr, 'applies_to'],
968
+ [:attr, 'id_ref']])
969
+
970
+ property << @value if @value != nil
971
+ return property
972
+ end
973
+ end
974
+
975
+ # == Description
976
+ # A literature reference for a clade. It is recommended to use the 'doi'
977
+ # attribute instead of the free text 'desc' element whenever possible.
978
+ class Reference
979
+ # String. Digital Object Identifier.
980
+ attr_accessor :doi
981
+
982
+ # String. Free text description.
983
+ attr_accessor :desc
984
+
985
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
986
+ def to_xml
987
+ ref = LibXML::XML::Node.new('reference')
988
+ Writer.generate_xml(ref, self, [
989
+ [:attr, 'doi'],
990
+ [:simple, 'desc', @desc]])
991
+ return ref
992
+ end
993
+
994
+ end
995
+
996
+ # == Description
997
+ #
998
+ # This is used to express a typed relationship between two clades.
999
+ # For example it could be used to describe multiple parents of a clade.
1000
+ class CladeRelation
1001
+ # Float
1002
+ attr_accessor :distance
1003
+ # String. Id of the referenced parents of a clade.
1004
+ attr_accessor :id_ref_0, :id_ref_1
1005
+ # String
1006
+ attr_accessor :type
1007
+ # Confidence object
1008
+ attr_accessor :confidence
1009
+
1010
+ def distance=(str)
1011
+ @distance = str.to_f
1012
+ end
1013
+
1014
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
1015
+ def to_xml
1016
+ if @id_ref_0 == nil or @id_ref_1 == nil or @type == nil
1017
+ raise "Attributes id_ref_0, id_ref_1, type are required elements by SequenceRelation element."
1018
+ else
1019
+ cr = LibXML::XML::Node.new('clade_relation')
1020
+ Writer.generate_xml(cr, self, [
1021
+ [:attr, 'id_ref_0'],
1022
+ [:attr, 'id_ref_1'],
1023
+ [:attr, 'distance'],
1024
+ [:attr, 'type'],
1025
+ [:complex, 'confidence', @confidnece]])
1026
+
1027
+ return cr
1028
+ end
1029
+ end
1030
+
1031
+ end
1032
+
1033
+
1034
+ # == Description
1035
+ # The names and/or counts of binary characters present, gained, and
1036
+ # lost at the root of a clade.
1037
+ class BinaryCharacters
1038
+ attr_accessor :bc_type, :gained, :lost, :present, :absent
1039
+ attr_reader :gained_count, :lost_count, :present_count, :absent_count
1040
+
1041
+ def gained_count=(str)
1042
+ @gained_count = str.to_i
1043
+ end
1044
+
1045
+ def lost_count=(str)
1046
+ @lost_count = str.to_i
1047
+ end
1048
+
1049
+ def present_count=(str)
1050
+ @present_count = str.to_i
1051
+ end
1052
+
1053
+ def absent_count=(str)
1054
+ @absent_count = str.to_i
1055
+ end
1056
+
1057
+ def initialize
1058
+ @gained = []
1059
+ @lost = []
1060
+ @present = []
1061
+ @absent = []
1062
+ end
1063
+
1064
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
1065
+ def to_xml
1066
+ bc = LibXML::XML::Node.new('binary_characters')
1067
+ bc['type'] = @bc_type
1068
+ PhyloXML::Writer.generate_xml(bc, self, [
1069
+ [:attr, 'gained_count'],
1070
+ [:attr, 'lost_count'],
1071
+ [:attr, 'present_count'],
1072
+ [:attr, 'absent_count']])
1073
+
1074
+ if not @gained.empty?
1075
+ gained_xml = LibXML::XML::Node.new('gained')
1076
+ PhyloXML::Writer.generate_xml(gained_xml, self, [[:simplearr, 'bc', @gained]])
1077
+ bc << gained_xml
1078
+ end
1079
+
1080
+ if not @lost.empty?
1081
+ lost_xml = LibXML::XML::Node.new('lost')
1082
+ PhyloXML::Writer.generate_xml(lost_xml, self, [[:simplearr, 'bc', @lost]])
1083
+ bc << lost_xml
1084
+ end
1085
+
1086
+ if not @present.empty?
1087
+ present_xml = LibXML::XML::Node.new('present')
1088
+ PhyloXML::Writer.generate_xml(present_xml, self, [[:simplearr, 'bc', @present]])
1089
+ bc << present_xml
1090
+ end
1091
+
1092
+ if not @absent.empty?
1093
+ absent_xml = LibXML::XML::Node.new('absent')
1094
+ PhyloXML::Writer.generate_xml(absent_xml, self, [[:simplearr, 'bc', @absent]])
1095
+ bc << absent_xml
1096
+ end
1097
+
1098
+ return bc
1099
+ end
1100
+
1101
+
1102
+ end
1103
+
1104
+ # == Description
1105
+ # This is used to express a typed relationship between two sequences.
1106
+ # For example it could be used to describe an orthology (in which case
1107
+ # attribute 'type' is 'orthology').
1108
+ class SequenceRelation
1109
+ # String
1110
+ attr_accessor :id_ref_0, :id_ref_1, :type
1111
+ # Float
1112
+ attr_reader :distance
1113
+
1114
+ #@todo it has Confidences objects.
1115
+
1116
+ def distance=(str)
1117
+ @distance = str.to_f if str != nil
1118
+ end
1119
+
1120
+ def type=(str)
1121
+ #@todo do warning instead?
1122
+ #@todo do validation at actually writing xml
1123
+ allowed_values = ["orthology", "one_to_one_orthology", "super_orthology", "paralogy",
1124
+ "ultra_paralogy", "xenology", "unknown", "other"]
1125
+ if not allowed_values.include? str
1126
+ raise "SequenceRelation#type has to be one one of #{allowed_values.join("; ")}"
1127
+ else
1128
+ @type = str
1129
+ end
1130
+ end
1131
+
1132
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
1133
+ def to_xml
1134
+ if @id_ref_0 == nil or @id_ref_1 == nil or @type == nil
1135
+ raise "Attributes id_ref_0, id_ref_1, type are required elements by SequenceRelation element."
1136
+ else
1137
+ sr = LibXML::XML::Node.new('sequence_relation')
1138
+ sr['id_ref_0'] = @id_ref_0
1139
+ sr['id_ref_1'] = @id_ref_1
1140
+ sr['distance'] = @distance.to_s if @distance != nil
1141
+ sr['type'] = @type
1142
+ return sr
1143
+ end
1144
+ end
1145
+
1146
+ end
1147
+
1148
+ class Other
1149
+ attr_accessor :element_name, :attributes, :children, :value
1150
+
1151
+ def initialize
1152
+ @children = []
1153
+ @attributes = Hash.new
1154
+ end
1155
+
1156
+ # Converts elements to xml representation. Called by PhyloXML::Writer class.
1157
+ def to_xml
1158
+ o = LibXML::XML::Node.new(@element_name)
1159
+ @attributes.each do |key, value|
1160
+ o[key] = value
1161
+ end
1162
+ o << value if value != nil
1163
+ children.each do |child_node|
1164
+ o << child_node.to_xml
1165
+ end
1166
+ return o
1167
+ end
1168
+
1169
+ end
1170
+
1171
+
1172
+ end #module PhyloXML
1173
+
1174
+ end #end module Bio