bio 1.4.3.0001 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (158) hide show
  1. checksums.yaml +7 -0
  2. data/.travis.yml +39 -33
  3. data/BSDL +22 -0
  4. data/COPYING +2 -2
  5. data/COPYING.ja +36 -36
  6. data/ChangeLog +2404 -1025
  7. data/KNOWN_ISSUES.rdoc +15 -55
  8. data/README.rdoc +17 -23
  9. data/RELEASE_NOTES.rdoc +246 -183
  10. data/Rakefile +3 -2
  11. data/bin/br_biofetch.rb +29 -5
  12. data/bioruby.gemspec +15 -32
  13. data/bioruby.gemspec.erb +10 -20
  14. data/doc/ChangeLog-1.4.3 +1478 -0
  15. data/doc/RELEASE_NOTES-1.4.3.rdoc +204 -0
  16. data/doc/Tutorial.rd +0 -6
  17. data/doc/Tutorial.rd.html +7 -12
  18. data/doc/Tutorial.rd.ja +960 -1064
  19. data/doc/Tutorial.rd.ja.html +977 -1067
  20. data/gemfiles/Gemfile.travis-jruby1.8 +2 -1
  21. data/gemfiles/Gemfile.travis-jruby1.9 +2 -4
  22. data/gemfiles/Gemfile.travis-rbx +13 -0
  23. data/gemfiles/Gemfile.travis-ruby1.8 +2 -1
  24. data/gemfiles/Gemfile.travis-ruby1.9 +2 -4
  25. data/gemfiles/Gemfile.travis-ruby2.2 +9 -0
  26. data/lib/bio.rb +10 -43
  27. data/lib/bio/alignment.rb +8 -14
  28. data/lib/bio/appl/blast.rb +1 -2
  29. data/lib/bio/appl/blast/format0.rb +18 -7
  30. data/lib/bio/appl/blast/remote.rb +0 -9
  31. data/lib/bio/appl/blast/report.rb +1 -1
  32. data/lib/bio/appl/clustalw/report.rb +3 -1
  33. data/lib/bio/appl/genscan/report.rb +1 -2
  34. data/lib/bio/appl/iprscan/report.rb +1 -2
  35. data/lib/bio/appl/meme/mast.rb +4 -4
  36. data/lib/bio/appl/meme/mast/report.rb +1 -1
  37. data/lib/bio/appl/paml/codeml.rb +2 -2
  38. data/lib/bio/appl/paml/codeml/report.rb +1 -0
  39. data/lib/bio/appl/paml/common.rb +1 -1
  40. data/lib/bio/appl/sosui/report.rb +1 -2
  41. data/lib/bio/command.rb +62 -2
  42. data/lib/bio/data/aa.rb +13 -31
  43. data/lib/bio/data/codontable.rb +1 -2
  44. data/lib/bio/db/biosql/biosql_to_biosequence.rb +1 -0
  45. data/lib/bio/db/biosql/sequence.rb +1 -1
  46. data/lib/bio/db/embl/common.rb +1 -1
  47. data/lib/bio/db/embl/embl.rb +5 -4
  48. data/lib/bio/db/embl/format_embl.rb +3 -3
  49. data/lib/bio/db/embl/sptr.rb +9 -1444
  50. data/lib/bio/db/embl/swissprot.rb +12 -29
  51. data/lib/bio/db/embl/trembl.rb +13 -30
  52. data/lib/bio/db/embl/uniprot.rb +12 -29
  53. data/lib/bio/db/embl/uniprotkb.rb +1455 -0
  54. data/lib/bio/db/fasta.rb +17 -0
  55. data/lib/bio/db/fasta/defline.rb +1 -3
  56. data/lib/bio/db/fastq.rb +1 -1
  57. data/lib/bio/db/genbank/ddbj.rb +9 -5
  58. data/lib/bio/db/genbank/refseq.rb +11 -3
  59. data/lib/bio/db/gff.rb +3 -4
  60. data/lib/bio/db/go.rb +5 -6
  61. data/lib/bio/db/kegg/module.rb +4 -5
  62. data/lib/bio/db/kegg/pathway.rb +4 -5
  63. data/lib/bio/db/kegg/reaction.rb +1 -1
  64. data/lib/bio/db/nexus.rb +3 -2
  65. data/lib/bio/db/pdb/pdb.rb +2 -2
  66. data/lib/bio/db/phyloxml/phyloxml_elements.rb +82 -59
  67. data/lib/bio/db/phyloxml/phyloxml_parser.rb +2 -2
  68. data/lib/bio/db/phyloxml/phyloxml_writer.rb +1 -2
  69. data/lib/bio/db/sanger_chromatogram/chromatogram.rb +1 -2
  70. data/lib/bio/db/transfac.rb +1 -1
  71. data/lib/bio/io/das.rb +40 -41
  72. data/lib/bio/io/fastacmd.rb +0 -16
  73. data/lib/bio/io/fetch.rb +111 -55
  74. data/lib/bio/io/flatfile/buffer.rb +4 -5
  75. data/lib/bio/io/hinv.rb +2 -3
  76. data/lib/bio/io/ncbirest.rb +43 -6
  77. data/lib/bio/io/pubmed.rb +76 -81
  78. data/lib/bio/io/togows.rb +33 -10
  79. data/lib/bio/map.rb +1 -1
  80. data/lib/bio/pathway.rb +1 -1
  81. data/lib/bio/sequence/compat.rb +1 -1
  82. data/lib/bio/sequence/na.rb +63 -12
  83. data/lib/bio/shell.rb +0 -2
  84. data/lib/bio/shell/core.rb +5 -6
  85. data/lib/bio/shell/interface.rb +3 -4
  86. data/lib/bio/shell/irb.rb +1 -2
  87. data/lib/bio/shell/plugin/entry.rb +2 -3
  88. data/lib/bio/shell/plugin/seq.rb +7 -6
  89. data/lib/bio/shell/setup.rb +1 -2
  90. data/lib/bio/tree.rb +2 -2
  91. data/lib/bio/util/contingency_table.rb +0 -2
  92. data/lib/bio/util/restriction_enzyme/range/sequence_range.rb +2 -2
  93. data/lib/bio/util/sirna.rb +76 -16
  94. data/lib/bio/version.rb +8 -9
  95. data/sample/benchmark_clustalw_report.rb +47 -0
  96. data/sample/biofetch.rb +248 -151
  97. data/setup.rb +6 -7
  98. data/test/data/clustalw/example1-seqnos.aln +58 -0
  99. data/test/network/bio/appl/blast/test_remote.rb +1 -15
  100. data/test/network/bio/appl/test_blast.rb +0 -12
  101. data/test/network/bio/io/test_pubmed.rb +49 -0
  102. data/test/network/bio/io/test_togows.rb +0 -1
  103. data/test/network/bio/test_command.rb +65 -2
  104. data/test/unit/bio/appl/bl2seq/test_report.rb +0 -1
  105. data/test/unit/bio/appl/blast/test_report.rb +110 -48
  106. data/test/unit/bio/appl/clustalw/test_report.rb +67 -51
  107. data/test/unit/bio/appl/sim4/test_report.rb +46 -17
  108. data/test/unit/bio/appl/test_blast.rb +2 -2
  109. data/test/unit/bio/db/embl/test_embl.rb +0 -1
  110. data/test/unit/bio/db/embl/test_embl_rel89.rb +0 -1
  111. data/test/unit/bio/db/embl/{test_sptr.rb → test_uniprotkb.rb} +111 -115
  112. data/test/unit/bio/db/embl/{test_uniprot_new_part.rb → test_uniprotkb_new_part.rb} +11 -11
  113. data/test/unit/bio/db/genbank/test_genbank.rb +10 -4
  114. data/test/unit/bio/db/pdb/test_pdb.rb +14 -8
  115. data/test/unit/bio/db/test_fasta.rb +41 -1
  116. data/test/unit/bio/db/test_fastq.rb +14 -4
  117. data/test/unit/bio/db/test_gff.rb +2 -2
  118. data/test/unit/bio/db/test_phyloxml.rb +30 -30
  119. data/test/unit/bio/db/test_phyloxml_writer.rb +2 -2
  120. data/test/unit/bio/io/flatfile/test_autodetection.rb +1 -2
  121. data/test/unit/bio/io/flatfile/test_buffer.rb +7 -1
  122. data/test/unit/bio/io/flatfile/test_splitter.rb +1 -1
  123. data/test/unit/bio/io/test_togows.rb +3 -2
  124. data/test/unit/bio/sequence/test_dblink.rb +1 -1
  125. data/test/unit/bio/sequence/test_na.rb +3 -1
  126. data/test/unit/bio/test_alignment.rb +1 -2
  127. data/test/unit/bio/test_command.rb +5 -4
  128. data/test/unit/bio/test_db.rb +4 -2
  129. data/test/unit/bio/test_pathway.rb +25 -10
  130. data/test/unit/bio/util/test_sirna.rb +22 -22
  131. metadata +656 -1430
  132. data/doc/KEGG_API.rd +0 -1843
  133. data/doc/KEGG_API.rd.ja +0 -1834
  134. data/extconf.rb +0 -2
  135. data/lib/bio/appl/blast/ddbj.rb +0 -131
  136. data/lib/bio/db/kegg/taxonomy.rb +0 -280
  137. data/lib/bio/io/dbget.rb +0 -194
  138. data/lib/bio/io/ddbjrest.rb +0 -344
  139. data/lib/bio/io/ddbjxml.rb +0 -458
  140. data/lib/bio/io/ebisoap.rb +0 -158
  141. data/lib/bio/io/ensembl.rb +0 -229
  142. data/lib/bio/io/higet.rb +0 -73
  143. data/lib/bio/io/keggapi.rb +0 -363
  144. data/lib/bio/io/ncbisoap.rb +0 -156
  145. data/lib/bio/io/soapwsdl.rb +0 -119
  146. data/lib/bio/shell/plugin/keggapi.rb +0 -181
  147. data/lib/bio/shell/plugin/soap.rb +0 -87
  148. data/sample/dbget +0 -37
  149. data/sample/demo_ddbjxml.rb +0 -212
  150. data/sample/demo_kegg_taxonomy.rb +0 -92
  151. data/sample/demo_keggapi.rb +0 -502
  152. data/sample/psortplot_html.rb +0 -214
  153. data/test/network/bio/io/test_ddbjrest.rb +0 -47
  154. data/test/network/bio/io/test_ensembl.rb +0 -230
  155. data/test/network/bio/io/test_soapwsdl.rb +0 -53
  156. data/test/unit/bio/io/test_ddbjxml.rb +0 -81
  157. data/test/unit/bio/io/test_ensembl.rb +0 -111
  158. data/test/unit/bio/io/test_soapwsdl.rb +0 -33
@@ -68,6 +68,7 @@ module Bio
68
68
  # A larger range of methods for dealing with Fasta definition lines can be found in FastaDefline, accessed through the FastaFormat#identifiers method.
69
69
  #
70
70
  # f.entry_id #=> "gi|398365175"
71
+ # f.first_name #=> "gi|398365175|ref|NP_009718.3|"
71
72
  # f.definition #=> "gi|398365175|ref|NP_009718.3| Cdc28p [Saccharomyces cerevisiae S288c]"
72
73
  # f.identifiers #=> Bio::FastaDefline instance
73
74
  # f.accession #=> "NP_009718"
@@ -90,6 +91,7 @@ module Bio
90
91
  # f.entry #=> ">abc 123 456\nASDF"
91
92
  #
92
93
  # f.entry_id #=> "abc"
94
+ # f.first_name #=> "abc"
93
95
  # f.definition #=> "abc 123 456"
94
96
  # f.comment #=> nil
95
97
  # f.accession #=> nil
@@ -282,6 +284,21 @@ module Bio
282
284
  def locus
283
285
  identifiers.locus
284
286
  end
287
+
288
+ # Returns the first name (word) of the definition line - everything
289
+ # before the first whitespace.
290
+ #
291
+ # >abc def #=> 'abc'
292
+ # >gi|398365175|ref|NP_009718.3| Cdc28p [Saccharomyces cerevisiae S288c] #=> 'gi|398365175|ref|NP_009718.3|'
293
+ # >abc #=> 'abc'
294
+ def first_name
295
+ index = definition.index(/\s/)
296
+ if index.nil?
297
+ return @definition
298
+ else
299
+ return @definition[0...index]
300
+ end
301
+ end
285
302
 
286
303
  end #class FastaFormat
287
304
 
@@ -6,7 +6,6 @@
6
6
  # Toshiaki Katayama <k@bioruby.org>
7
7
  # License:: The Ruby License
8
8
  #
9
- # $Id: defline.rb,v 1.1.2.1 2008/06/20 13:22:32 ngoto Exp $
10
9
  #
11
10
  # == Description
12
11
  #
@@ -292,7 +291,6 @@ module Bio
292
291
  while token = ary.shift
293
292
  if labels = self.class::NSIDs[token] then
294
293
  di = [ token ]
295
- idtype = token
296
294
  labels.each do |x|
297
295
  token = ary.shift
298
296
  break unless token
@@ -391,7 +389,7 @@ module Bio
391
389
  # Shows words used in the defline. Returns an Array.
392
390
  def words(case_sensitive = nil, kill_regexp = self.class::KillRegexpArray,
393
391
  kwhash = self.class::KillWordsHash)
394
- a = descriptions.join(' ').split(/[\.\,\;\:\(\)\[\]\{\}\<\>\"\'\`\~\/\|\?\!\&\@\#\s\x00-\x1f\x7f]+/)
392
+ a = descriptions.join(' ').split(/[\.\,\;\:\(\)\[\]\{\}\<\>\"\'\`\~\/\|\?\!\&\@\# \x00-\x1f\x7f]+/)
395
393
  a.collect! do |x|
396
394
  x.sub!(/\A[\$\*\-\+]+/, '')
397
395
  x.sub!(/[\$\*\-\=]+\z/, '')
@@ -495,7 +495,7 @@ class Fastq
495
495
  # ---
496
496
  # *Returns*:: (String or nil) format name
497
497
  def format
498
- @format ? @format.name : nil
498
+ ((defined? @format) && @format) ? @format.name : nil
499
499
  end
500
500
 
501
501
 
@@ -5,16 +5,20 @@
5
5
  # License:: The Ruby License
6
6
  #
7
7
 
8
- require 'bio/db/genbank/genbank'
8
+ warn "Bio::DDBJ is deprecated. Use Bio::GenBank."
9
9
 
10
10
  module Bio
11
11
 
12
- class DDBJ < GenBank
12
+ require 'bio/db/genbank/genbank' unless const_defined?(:GenBank)
13
13
 
14
- autoload :XML, 'bio/io/ddbjxml'
15
- autoload :REST, 'bio/io/ddbjrest'
14
+ # Bio::DDBJ is deprecated. Use Bio::GenBank.
15
+ class DDBJ < GenBank
16
16
 
17
- # Nothing to do (DDBJ database format is completely same as GenBank)
17
+ # Bio::DDBJ is deprecated. Use Bio::GenBank.
18
+ def initialize(str)
19
+ warn "Bio::DDBJ is deprecated. Use Bio::GenBank."
20
+ super(str)
21
+ end
18
22
 
19
23
  end # DDBJ
20
24
 
@@ -4,15 +4,23 @@
4
4
  # Copyright:: Copyright (C) 2000-2004 Toshiaki Katayama <k@bioruby.org>
5
5
  # License:: The Ruby License
6
6
  #
7
- # $Id: refseq.rb,v 1.8 2007/04/05 23:35:40 trevor Exp $
8
7
  #
9
8
 
10
- require 'bio/db/genbank/genbank'
9
+ warn "Bio::RefSeq is deprecated. Use Bio::GenBank."
11
10
 
12
11
  module Bio
13
12
 
13
+ require 'bio/db/genbank/genbank' unless const_defined?(:GenBank)
14
+
15
+ # Bio::RefSeq is deprecated. Use Bio::GenBank.
14
16
  class RefSeq < GenBank
15
- # Nothing to do (RefSeq database format is completely same as GenBank)
17
+
18
+ # Bio::RefSeq is deprecated. Use Bio::GenBank.
19
+ def initialize(str)
20
+ warn "Bio::RefSeq is deprecated. Use Bio::GenBank."
21
+ super(str)
22
+ end
23
+
16
24
  end
17
25
 
18
26
  end # Bio
@@ -8,7 +8,6 @@
8
8
  # 2008 Naohisa Goto <ng@bioruby.org>
9
9
  # License:: The Ruby License
10
10
  #
11
- # $Id:$
12
11
  #
13
12
  require 'uri'
14
13
  require 'strscan'
@@ -236,10 +235,10 @@ module Bio
236
235
  CHAR2BACKSLASH.merge({ '"' => '"', "\\" => "\\" }).freeze
237
236
 
238
237
  # prohibited characters in GFF2 columns
239
- PROHIBITED_GFF2_COLUMNS = /[\t\r\n\x00-\x1f\x7f\xfe\xff]/
238
+ PROHIBITED_GFF2_COLUMNS = /[\t\r\n\x00-\x08\x0b\x0c\x0e-\x1f\x7f\xfe\xff]/
240
239
 
241
240
  # prohibited characters in GFF2 attribute tags
242
- PROHIBITED_GFF2_TAGS = /[\s\"\;\t\r\n\x00-\x1f\x7f\xfe\xff]/
241
+ PROHIBITED_GFF2_TAGS = /[\s\"\;\x00-\x08\x0e-\x1f\x7f\xfe\xff]/
243
242
 
244
243
  private
245
244
  # (private) escapes GFF2 free text string
@@ -1066,7 +1065,7 @@ module Bio
1066
1065
 
1067
1066
  # parses given string and returns SequenceRegion class
1068
1067
  def self.parse(str)
1069
- dummy, seqid, start, endpos =
1068
+ _, seqid, start, endpos =
1070
1069
  str.chomp.split(/\s+/, 4).collect { |x| unescape(x) }
1071
1070
  self.new(seqid, start, endpos)
1072
1071
  end
@@ -5,7 +5,6 @@
5
5
  # Mitsuteru C. Nakao <n@bioruby.org>
6
6
  # License:: The Ruby License
7
7
  #
8
- # $Id:$
9
8
  #
10
9
  # == Gene Ontology
11
10
  #
@@ -104,12 +103,12 @@ class GO
104
103
  depth = $1.length.to_i
105
104
  rel = $2
106
105
  term = $3
107
- goid1 = goid = $4
106
+ goid1 = $4
108
107
  en = $5
109
108
  goids = parse_goids(line) # GO:ID[ ; GO:ID...]
110
- synonyms = parse_synonyms(line) # synonym:Term[ ; synonym:Term...]
109
+ parse_synonyms(line) # synonym:Term[ ; synonym:Term...]
111
110
  stack[depth] = goids.first
112
- @id2term[goid] = term
111
+ @id2term[goid1] = term
113
112
 
114
113
  next if depth == 0
115
114
 
@@ -128,8 +127,8 @@ class GO
128
127
  rel1 = $1
129
128
  term1 = $2
130
129
  goid1 = $3
131
- goids1 = parse_goids(line)
132
- synonyms1 = parse_synonyms(line)
130
+ parse_goids(line)
131
+ parse_synonyms(line)
133
132
 
134
133
  @id2term[goid1] = term1
135
134
  goids.each {|goid|
@@ -5,7 +5,6 @@
5
5
  # Copyright:: Copyright (C) 2010 Toshiaki Katayama <k@bioruby.org>
6
6
  # License:: The Ruby License
7
7
  #
8
- # $Id:$
9
8
  #
10
9
 
11
10
  require 'bio/db'
@@ -81,7 +80,7 @@ class MODULE < KEGGDB
81
80
  # ---
82
81
  # *Returns*:: Hash of pathway ID and its definition
83
82
  def pathways_as_hash
84
- unless @pathways_as_hash
83
+ unless (defined? @pathways_as_hash) && @pathways_as_hash
85
84
  @pathways_as_hash = strings_as_hash(pathways_as_strings)
86
85
  end
87
86
  @pathways_as_hash
@@ -100,7 +99,7 @@ class MODULE < KEGGDB
100
99
  # ---
101
100
  # *Returns*:: Hash of orthology ID and its definition
102
101
  def orthologs_as_hash
103
- unless @orthologs_as_hash
102
+ unless (defined? @orthologs_as_hash) && @orthologs_as_hash
104
103
  @orthologs_as_hash = strings_as_hash(orthologs_as_strings)
105
104
  end
106
105
  @orthologs_as_hash
@@ -126,7 +125,7 @@ class MODULE < KEGGDB
126
125
  # ---
127
126
  # *Returns*:: Hash of reaction ID and its definition
128
127
  def reactions_as_hash
129
- unless @reactions_as_hash
128
+ unless (defined? @reactions_as_hash) && @reactions_as_hash
130
129
  @reactions_as_hash = strings_as_hash(reactions_as_strings)
131
130
  end
132
131
  @reactions_as_hash
@@ -145,7 +144,7 @@ class MODULE < KEGGDB
145
144
  # ---
146
145
  # *Returns*:: Hash of compound ID and its definition
147
146
  def compounds_as_hash
148
- unless @compounds_as_hash
147
+ unless (defined? @compounds_as_hash) && @compounds_as_hash
149
148
  @compounds_as_hash = strings_as_hash(compounds_as_strings)
150
149
  end
151
150
  @compounds_as_hash
@@ -5,7 +5,6 @@
5
5
  # Copyright:: Copyright (C) 2010 Toshiaki Katayama <k@bioruby.org>
6
6
  # License:: The Ruby License
7
7
  #
8
- # $Id:$
9
8
  #
10
9
 
11
10
  require 'bio/db'
@@ -127,7 +126,7 @@ class PATHWAY < KEGGDB
127
126
  # ---
128
127
  # *Returns*:: Hash of disease ID and its definition
129
128
  def diseases_as_hash
130
- unless @diseases_as_hash
129
+ unless (defined? @diseases_as_hash) && @diseases_as_hash
131
130
  @diseases_as_hash = strings_as_hash(diseases_as_strings)
132
131
  end
133
132
  @diseases_as_hash
@@ -166,7 +165,7 @@ class PATHWAY < KEGGDB
166
165
  # ---
167
166
  # *Returns*:: Hash of gene ID and its definition
168
167
  def genes_as_hash
169
- unless @genes_as_hash
168
+ unless (defined? @genes_as_hash) && @genes_as_hash
170
169
  @genes_as_hash = strings_as_hash(genes_as_strings)
171
170
  end
172
171
  @genes_as_hash
@@ -192,7 +191,7 @@ class PATHWAY < KEGGDB
192
191
  # ---
193
192
  # *Returns*:: Hash of reaction ID and its definition
194
193
  def reactions_as_hash
195
- unless @reactions_as_hash
194
+ unless (defined? @reactions_as_hash) && @reactions_as_hash
196
195
  @reactions_as_hash = strings_as_hash(reactions_as_strings)
197
196
  end
198
197
  @reactions_as_hash
@@ -210,7 +209,7 @@ class PATHWAY < KEGGDB
210
209
  # ---
211
210
  # *Returns*:: Hash of compound ID and its definition
212
211
  def compounds_as_hash
213
- unless @compounds_as_hash
212
+ unless (defined? @compounds_as_hash) && @compounds_as_hash
214
213
  @compounds_as_hash = strings_as_hash(compounds_as_strings)
215
214
  end
216
215
  @compounds_as_hash
@@ -86,7 +86,7 @@ class REACTION < KEGGDB
86
86
  unless defined? @rpairs_as_hash
87
87
  rps = {}
88
88
  rpairs_as_strings.each do |line|
89
- namespace, entry_id, name, rptype = line.split(/\s+/)
89
+ _, entry_id, name, rptype = line.split(/\s+/)
90
90
  rps[entry_id] = [ name, rptype ]
91
91
  end
92
92
  @rpairs_as_hash = rps
@@ -299,7 +299,7 @@ module Bio
299
299
  ary = str.split(/[\s+=]/)
300
300
  ary.collect! { |x| x.strip!; x.empty? ? nil : x }
301
301
  ary.compact!
302
- in_comment = false
302
+ #in_comment = false
303
303
  comment_level = 0
304
304
 
305
305
  # Main loop
@@ -807,6 +807,7 @@ module Bio
807
807
  # *Returns*:: String
808
808
  def to_nexus
809
809
  str = "generic block \"" + get_name + "\" [do not know how to write in nexus format]"
810
+ str
810
811
  end
811
812
 
812
813
  # Adds a token to this.
@@ -1716,7 +1717,7 @@ module Bio
1716
1717
  return "empty"
1717
1718
  end
1718
1719
  str = String.new
1719
- row_array = to_nexus_row_array( spacer = " ", false )
1720
+ row_array = to_nexus_row_array( " ", false )
1720
1721
  row_array.each do | row |
1721
1722
  str << row << END_OF_LINE
1722
1723
  end
@@ -608,8 +608,8 @@ module Bio
608
608
 
609
609
  # SEQRES record class
610
610
  SEQRES =
611
- def_rec(#[ 9, 10, Pdb_Integer, :serNum ],
612
- [ 9, 10, Pdb_Continuation, nil ],
611
+ def_rec(#[ 8, 10, Pdb_Integer, :serNum ],
612
+ [ 8, 10, Pdb_Continuation, nil ], # PDB v3.2 (2008)
613
613
  [ 12, 12, Pdb_Character, :chainID ],
614
614
  [ 14, 17, Pdb_Integer, :numRes ],
615
615
  [ 20, 22, Pdb_Residue_name, :resName ],
@@ -5,7 +5,6 @@
5
5
  # Diana Jaunzeikare <latvianlinuxgirl@gmail.com>
6
6
  # License:: The Ruby License
7
7
  #
8
- # $Id:$
9
8
  #
10
9
  # == Description
11
10
  #
@@ -91,17 +90,17 @@ module PhyloXML
91
90
  # Converts elements to xml representation. Called by PhyloXML::Writer class.
92
91
  def to_xml
93
92
  taxonomy = LibXML::XML::Node.new('taxonomy')
94
- taxonomy["type"] = @type if @type != nil
95
- taxonomy["id_source"] = @id_source if @id_source != nil
96
-
97
- PhyloXML::Writer.generate_xml(taxonomy, self, [[:complex, 'id', @taxonomy_id],
98
- [:pattern, 'code', @code, Regexp.new("^[a-zA-Z0-9_]{2,10}$")],
99
- [:simple, 'scientific_name', @scientific_name],
100
- [:simple, 'authority', @authority],
101
- [:simplearr, 'common_name', @common_names],
102
- [:simplearr, 'synonym', @synonyms],
103
- [:simple, 'rank', @rank],
104
- [:complex, 'uri',@uri]])
93
+ taxonomy["type"] = @type if (defined? @type) && @type
94
+ taxonomy["id_source"] = @id_source if (defined? @id_source) && @id_source
95
+
96
+ PhyloXML::Writer.generate_xml(taxonomy, self, [[:complex, 'id', (defined? @taxonomy_id) ? @taxonomy_id : nil],
97
+ [:pattern, 'code', (defined? @code) ? @code : nil, Regexp.new("^[a-zA-Z0-9_]{2,10}$")],
98
+ [:simple, 'scientific_name', (defined? @scientific_name) ? @scientific_name : nil],
99
+ [:simple, 'authority', (defined? @authority) ? @authority : nil],
100
+ [:simplearr, 'common_name', (defined? @common_names) ? @common_names : nil],
101
+ [:simplearr, 'synonym', (defined? @synonyms) ? @synonyms : nil],
102
+ [:simple, 'rank', (defined? @rank) ? @rank : nil],
103
+ [:complex, 'uri',(defined? @uri) ? @uri : nil]])
105
104
  #@todo anything else
106
105
 
107
106
 
@@ -161,7 +160,7 @@ module PhyloXML
161
160
  @other = []
162
161
  end
163
162
 
164
- end
163
+ end
165
164
 
166
165
 
167
166
  # == Description
@@ -287,7 +286,7 @@ module PhyloXML
287
286
  def to_xml(branch_length, write_branch_length_as_subelement)
288
287
  clade = LibXML::XML::Node.new('clade')
289
288
 
290
- PhyloXML::Writer.generate_xml(clade, self, [[:simple, 'name', @name]])
289
+ PhyloXML::Writer.generate_xml(clade, self, [[:simple, 'name', (defined? @name) ? @name : nil]])
291
290
 
292
291
  if branch_length != nil
293
292
  if write_branch_length_as_subelement
@@ -301,15 +300,15 @@ module PhyloXML
301
300
  PhyloXML::Writer.generate_xml(clade, self, [
302
301
  [:attr, "id_source"],
303
302
  [:objarr, 'confidence', 'confidences'],
304
- [:simple, 'width', @width],
305
- [:complex, 'branch_color', @branch_color],
306
- [:simple, 'node_id', @node_id],
303
+ [:simple, 'width', (defined? @width) ? @width : nil],
304
+ [:complex, 'branch_color', (defined? @branch_color) ? @branch_color : nil],
305
+ [:simple, 'node_id', (defined? @node_id) ? @node_id : nil],
307
306
  [:objarr, 'taxonomy', 'taxonomies'],
308
307
  [:objarr, 'sequence', 'sequences'],
309
- [:complex, 'events', @events],
310
- [:complex, 'binary_characters', @binary_characters],
308
+ [:complex, 'events', (defined? @events) ? @events : nil],
309
+ [:complex, 'binary_characters', (defined? @binary_characters) ? @binary_characters : nil],
311
310
  [:objarr, 'distribution', 'distributions'],
312
- [:complex, 'date', @date],
311
+ [:complex, 'date', (defined? @date) ? @date : nil],
313
312
  [:objarr, 'reference', 'references'],
314
313
  [:objarr, 'propery', 'properties']])
315
314
 
@@ -322,7 +321,7 @@ module PhyloXML
322
321
  # Events at the root node of a clade (e.g. one gene duplication).
323
322
  class Events
324
323
  #value comes from list: transfer, fusion, speciation_or_duplication, other, mixed, unassigned
325
- attr_accessor :type
324
+ attr_reader :type
326
325
 
327
326
  # Integer
328
327
  attr_reader :duplications, :speciations, :losses
@@ -330,26 +329,33 @@ module PhyloXML
330
329
  # Confidence object
331
330
  attr_reader :confidence
332
331
 
333
- def confidence=(type, value)
334
- @confidence = Confidence.new(type, value)
335
- end
332
+ #---
333
+ #def confidence=(type, value)
334
+ # @confidence = Confidence.new(type, value)
335
+ #end
336
+ #+++
336
337
 
338
+ # Confidence object
337
339
  def confidence=(conf)
338
340
  @confidence = conf
339
341
  end
340
342
 
343
+ # Integer
341
344
  def duplications=(str)
342
345
  @duplications = str.to_i
343
346
  end
344
347
 
348
+ # Integer
345
349
  def losses=(str)
346
350
  @losses = str.to_i
347
351
  end
348
352
 
353
+ # Integer
349
354
  def speciations=(str)
350
355
  @speciations=str.to_i
351
356
  end
352
357
 
358
+ #value comes from list: transfer, fusion, speciation_or_duplication, other, mixed, unassigned
353
359
  def type=(str)
354
360
  @type = str
355
361
  #@todo add unit test for this
@@ -363,11 +369,11 @@ module PhyloXML
363
369
  #@todo add unit test
364
370
  events = LibXML::XML::Node.new('events')
365
371
  PhyloXML::Writer.generate_xml(events, self, [
366
- [:simple, 'type', @type],
367
- [:simple, 'duplications', @duplications],
368
- [:simple, 'speciations', @speciations],
369
- [:simple, 'losses', @losses],
370
- [:complex, 'confidence', @confidence]])
372
+ [:simple, 'type', (defined? @type) ? @type : nil],
373
+ [:simple, 'duplications', (defined? @duplications) ? @duplications : nil],
374
+ [:simple, 'speciations', (defined? @speciations) ? @speciations : nil],
375
+ [:simple, 'losses', (defined? @losses) ? @losses : nil],
376
+ [:complex, 'confidence', (defined? @confidence) ? @confidence : nil]])
371
377
  return events
372
378
  end
373
379
 
@@ -437,13 +443,13 @@ module PhyloXML
437
443
  # 'map datum'), for example Google's KML uses 'WGS84'.
438
444
  class Point
439
445
  # Float. Latitude
440
- attr_accessor :lat
446
+ attr_reader :lat
441
447
 
442
448
  # Float. Longitute
443
- attr_accessor :long
449
+ attr_reader :long
444
450
 
445
451
  # Float. Altitude
446
- attr_accessor :alt
452
+ attr_reader :alt
447
453
 
448
454
  # String. Altitude unit.
449
455
  attr_accessor :alt_unit
@@ -451,14 +457,17 @@ module PhyloXML
451
457
  # Geodedic datum / map datum
452
458
  attr_accessor :geodetic_datum
453
459
 
460
+ # Float. Latitude
454
461
  def lat=(str)
455
462
  @lat = str.to_f unless str.nil?
456
463
  end
457
464
 
465
+ # Float. Longitute
458
466
  def long=(str)
459
467
  @long = str.to_f unless str.nil?
460
468
  end
461
469
 
470
+ # Float. Altitude
462
471
  def alt=(str)
463
472
  @alt = str.to_f unless str.nil?
464
473
  end
@@ -581,7 +590,7 @@ module PhyloXML
581
590
  def to_xml
582
591
 
583
592
  seq = LibXML::XML::Node.new('sequence')
584
- if @type != nil
593
+ if (defined? @type) && @type
585
594
  if ["dna", "rna", "protein"].include?(@type)
586
595
  seq["type"] = @type
587
596
  else
@@ -592,22 +601,22 @@ module PhyloXML
592
601
  PhyloXML::Writer.generate_xml(seq, self, [
593
602
  [:attr, 'id_source'],
594
603
  [:attr, 'id_ref'],
595
- [:pattern, 'symbol', @symbol, Regexp.new("^\\S{1,10}$")],
596
- [:complex, 'accession', @accession],
597
- [:simple, 'name', @name],
598
- [:simple, 'location', @location]])
604
+ [:pattern, 'symbol', (defined? @symbol) ? @symbol : nil, Regexp.new("^\\S{1,10}$")],
605
+ [:complex, 'accession', (defined? @accession) ? @accession : nil],
606
+ [:simple, 'name', (defined? @name) ? @name : nil],
607
+ [:simple, 'location', (defined? @location) ? @location : nil]])
599
608
 
600
- if @mol_seq != nil
609
+ if (defined? @mol_seq) && @mol_seq
601
610
  molseq = LibXML::XML::Node.new('mol_seq', @mol_seq)
602
- molseq["is_aligned"] = @is_aligned.to_s if @is_aligned != nil
611
+ molseq["is_aligned"] = @is_aligned.to_s if (defined? @is_aligned) && @is_aligned != nil
603
612
  seq << molseq
604
613
  end
605
614
 
606
615
  PhyloXML::Writer.generate_xml(seq, self, [
607
616
  #[:pattern, 'mol_seq', @mol_seq, Regexp.new("^[a-zA-Z\.\-\?\*_]+$")],
608
- [:complex, 'uri', @uri],
617
+ [:complex, 'uri', (defined? @uri) ? @uri : nil],
609
618
  [:objarr, 'annotation', 'annotations'],
610
- [:complex, 'domain_architecture', @domain_architecture]])
619
+ [:complex, 'domain_architecture', (defined? @domain_architecture) ? @domain_architecture : nil]])
611
620
  #@todo test domain_architecture
612
621
  #any
613
622
  return seq
@@ -626,7 +635,7 @@ module PhyloXML
626
635
  # seq.primary_accession = @accession.value could be this
627
636
  seq.definition = @name
628
637
  #seq.comments = @name //this one?
629
- if @uri != nil
638
+ if (defined? @uri) && @uri
630
639
  h = {'url' => @uri.uri,
631
640
  'title' => @uri.desc }
632
641
  ref = Bio::Reference.new(h)
@@ -720,11 +729,11 @@ module PhyloXML
720
729
  # Converts elements to xml representation. Called by PhyloXML::Writer class.
721
730
  def to_xml
722
731
  annot = LibXML::XML::Node.new('annotation')
723
- annot["ref"] = @ref if @ref != nil
724
- PhyloXML::Writer.generate_xml(annot, self, [[:simple, 'desc', @desc],
725
- [:complex, 'confidence', @confidence],
732
+ annot["ref"] = @ref if (defined? @ref) && @ref
733
+ PhyloXML::Writer.generate_xml(annot, self, [[:simple, 'desc', (defined? @desc) ? @desc : nil],
734
+ [:complex, 'confidence', (defined? @confidence) ? @confidence : nil],
726
735
  [:objarr, 'property', 'properties'],
727
- [:complex, 'uri', @uri]])
736
+ [:complex, 'uri', (defined? @uri) ? @uri : nil]])
728
737
  return annot
729
738
  end
730
739
  end
@@ -825,10 +834,10 @@ module PhyloXML
825
834
  date = LibXML::XML::Node.new('date')
826
835
  PhyloXML::Writer.generate_xml(date, self, [
827
836
  [:attr, 'unit'],
828
- [:simple, 'desc', @desc],
829
- [:simple, 'value', @value],
830
- [:simple, 'minimum', @minimum],
831
- [:simple, 'maximum', @maximum]])
837
+ [:simple, 'desc', (defined? @desc) ? @desc : nil],
838
+ [:simple, 'value', (defined? @value) ? @value : nil],
839
+ [:simple, 'minimum', (defined? @minimum) ? @minimum : nil],
840
+ [:simple, 'maximum', (defined? @maximum) ? @maximum : nil]])
832
841
  return date
833
842
  end
834
843
 
@@ -839,11 +848,12 @@ module PhyloXML
839
848
  # 'length' is the total length of the protein
840
849
  class DomainArchitecture
841
850
  # Integer. Total length of the protein
842
- attr_accessor :length
851
+ attr_reader :length
843
852
 
844
853
  # Array of ProteinDomain objects.
845
854
  attr_reader :domains
846
855
 
856
+ # Integer. Total length of the protein
847
857
  def length=(str)
848
858
  @length = str.to_i
849
859
  end
@@ -868,7 +878,7 @@ module PhyloXML
868
878
  # name/unique identifier is described via the 'id' attribute.
869
879
  class ProteinDomain
870
880
  #Float, for example to store E-values 4.7E-14
871
- attr_accessor :confidence
881
+ attr_reader :confidence
872
882
 
873
883
  # String
874
884
  attr_accessor :id, :value
@@ -879,14 +889,17 @@ module PhyloXML
879
889
  # Integer. End of the domain.
880
890
  attr_reader :to
881
891
 
892
+ # Integer. Beginning of the domain.
882
893
  def from=(str)
883
894
  @from = str.to_i
884
895
  end
885
896
 
897
+ # Integer. End of the domain.
886
898
  def to=(str)
887
899
  @to = str.to_i
888
900
  end
889
901
 
902
+ #Float, for example to store E-values 4.7E-14
890
903
  def confidence=(str)
891
904
  @confidence = str.to_f
892
905
  end
@@ -901,7 +914,7 @@ module PhyloXML
901
914
  xml_node = LibXML::XML::Node.new('domain', @value)
902
915
  xml_node["from"] = @from.to_s
903
916
  xml_node["to"] = @to.to_s
904
- xml_node["id"] = @id if @id != nil
917
+ xml_node["id"] = @id if (defined? @id) && @id
905
918
  xml_node["confidence"] = @confidence.to_s
906
919
 
907
920
  return xml_node
@@ -987,7 +1000,7 @@ module PhyloXML
987
1000
  ref = LibXML::XML::Node.new('reference')
988
1001
  Writer.generate_xml(ref, self, [
989
1002
  [:attr, 'doi'],
990
- [:simple, 'desc', @desc]])
1003
+ [:simple, 'desc', (defined? @desc) ? @desc : nil]])
991
1004
  return ref
992
1005
  end
993
1006
 
@@ -999,7 +1012,7 @@ module PhyloXML
999
1012
  # For example it could be used to describe multiple parents of a clade.
1000
1013
  class CladeRelation
1001
1014
  # Float
1002
- attr_accessor :distance
1015
+ attr_reader :distance
1003
1016
  # String. Id of the referenced parents of a clade.
1004
1017
  attr_accessor :id_ref_0, :id_ref_1
1005
1018
  # String
@@ -1007,6 +1020,7 @@ module PhyloXML
1007
1020
  # Confidence object
1008
1021
  attr_accessor :confidence
1009
1022
 
1023
+ # Float
1010
1024
  def distance=(str)
1011
1025
  @distance = str.to_f
1012
1026
  end
@@ -1022,7 +1036,7 @@ module PhyloXML
1022
1036
  [:attr, 'id_ref_1'],
1023
1037
  [:attr, 'distance'],
1024
1038
  [:attr, 'type'],
1025
- [:complex, 'confidence', @confidnece]])
1039
+ [:complex, 'confidence', (defined? @confidnece) ? @confidnece : nil]])
1026
1040
 
1027
1041
  return cr
1028
1042
  end
@@ -1107,7 +1121,13 @@ module PhyloXML
1107
1121
  # attribute 'type' is 'orthology').
1108
1122
  class SequenceRelation
1109
1123
  # String
1110
- attr_accessor :id_ref_0, :id_ref_1, :type
1124
+ attr_accessor :id_ref_0, :id_ref_1
1125
+
1126
+ # String. Allowed values: "orthology", "one_to_one_orthology",
1127
+ # "super_orthology", "paralogy", "ultra_paralogy", "xenology",
1128
+ # "unknown", "other"
1129
+ attr_reader :type
1130
+
1111
1131
  # Float
1112
1132
  attr_reader :distance
1113
1133
 
@@ -1117,6 +1137,9 @@ module PhyloXML
1117
1137
  @distance = str.to_f if str != nil
1118
1138
  end
1119
1139
 
1140
+ # String. Allowed values: "orthology", "one_to_one_orthology",
1141
+ # "super_orthology", "paralogy", "ultra_paralogy", "xenology",
1142
+ # "unknown", "other"
1120
1143
  def type=(str)
1121
1144
  #@todo do warning instead?
1122
1145
  #@todo do validation at actually writing xml
@@ -1137,7 +1160,7 @@ module PhyloXML
1137
1160
  sr = LibXML::XML::Node.new('sequence_relation')
1138
1161
  sr['id_ref_0'] = @id_ref_0
1139
1162
  sr['id_ref_1'] = @id_ref_1
1140
- sr['distance'] = @distance.to_s if @distance != nil
1163
+ sr['distance'] = @distance.to_s if (defined? @distance) && @distance
1141
1164
  sr['type'] = @type
1142
1165
  return sr
1143
1166
  end
@@ -1145,7 +1168,7 @@ module PhyloXML
1145
1168
 
1146
1169
  end
1147
1170
 
1148
- class Other
1171
+ class Other
1149
1172
  attr_accessor :element_name, :attributes, :children, :value
1150
1173
 
1151
1174
  def initialize