bio 1.4.2 → 1.4.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (139) hide show
  1. data/.travis.yml +66 -0
  2. data/ChangeLog +989 -4524
  3. data/KNOWN_ISSUES.rdoc +67 -2
  4. data/README.rdoc +89 -23
  5. data/README_DEV.rdoc +93 -2
  6. data/RELEASE_NOTES.rdoc +167 -95
  7. data/Rakefile +199 -7
  8. data/bioruby.gemspec +27 -12
  9. data/bioruby.gemspec.erb +6 -3
  10. data/doc/ChangeLog-before-1.4.2 +5013 -0
  11. data/doc/RELEASE_NOTES-1.4.2.rdoc +132 -0
  12. data/doc/Tutorial.rd +21 -3
  13. data/doc/Tutorial.rd.html +20 -12
  14. data/etc/bioinformatics/seqdatabase.ini +13 -196
  15. data/gemfiles/Gemfile.travis-jruby1.8 +7 -0
  16. data/gemfiles/Gemfile.travis-jruby1.9 +10 -0
  17. data/gemfiles/Gemfile.travis-ruby1.8 +7 -0
  18. data/gemfiles/Gemfile.travis-ruby1.9 +10 -0
  19. data/gemfiles/modify-Gemfile.rb +28 -0
  20. data/gemfiles/prepare-gemspec.rb +25 -0
  21. data/lib/bio/alignment.rb +1 -1
  22. data/lib/bio/appl/bl2seq/report.rb +3 -3
  23. data/lib/bio/appl/blast/ddbj.rb +0 -3
  24. data/lib/bio/appl/blast/format0.rb +4 -22
  25. data/lib/bio/appl/blast/genomenet.rb +33 -16
  26. data/lib/bio/appl/blast/ncbioptions.rb +8 -3
  27. data/lib/bio/appl/blast/remote.rb +6 -5
  28. data/lib/bio/appl/blast/report.rb +10 -6
  29. data/lib/bio/appl/blast/rpsblast.rb +3 -2
  30. data/lib/bio/appl/blast/wublast.rb +3 -3
  31. data/lib/bio/command.rb +118 -36
  32. data/lib/bio/data/na.rb +1 -1
  33. data/lib/bio/db/embl/embl.rb +74 -0
  34. data/lib/bio/db/embl/format_embl.rb +0 -4
  35. data/lib/bio/db/fasta.rb +57 -45
  36. data/lib/bio/db/fasta/defline.rb +1 -1
  37. data/lib/bio/db/fasta/format_fasta.rb +0 -4
  38. data/lib/bio/db/fasta/format_qual.rb +0 -5
  39. data/lib/bio/db/fastq/format_fastq.rb +0 -1
  40. data/lib/bio/db/genbank/format_genbank.rb +0 -4
  41. data/lib/bio/db/gff.rb +41 -12
  42. data/lib/bio/db/kegg/genes.rb +3 -3
  43. data/lib/bio/db/kegg/kgml.rb +465 -64
  44. data/lib/bio/db/newick.rb +0 -244
  45. data/lib/bio/db/pdb.rb +1 -4
  46. data/lib/bio/db/pdb/atom.rb +3 -2
  47. data/lib/bio/db/pdb/chain.rb +2 -3
  48. data/lib/bio/db/pdb/chemicalcomponent.rb +3 -2
  49. data/lib/bio/db/pdb/model.rb +2 -2
  50. data/lib/bio/db/pdb/pdb.rb +2 -1
  51. data/lib/bio/db/pdb/residue.rb +2 -2
  52. data/lib/bio/db/pdb/utils.rb +7 -4
  53. data/lib/bio/db/phyloxml/phyloxml_parser.rb +52 -5
  54. data/lib/bio/feature.rb +2 -3
  55. data/lib/bio/io/flatfile/autodetection.rb +1 -1
  56. data/lib/bio/io/flatfile/buffer.rb +84 -0
  57. data/lib/bio/sequence.rb +6 -4
  58. data/lib/bio/sequence/aa.rb +3 -5
  59. data/lib/bio/sequence/adapter.rb +6 -6
  60. data/lib/bio/sequence/common.rb +3 -3
  61. data/lib/bio/sequence/compat.rb +2 -7
  62. data/lib/bio/sequence/dblink.rb +6 -5
  63. data/lib/bio/sequence/format.rb +0 -6
  64. data/lib/bio/sequence/format_raw.rb +0 -4
  65. data/lib/bio/sequence/generic.rb +3 -4
  66. data/lib/bio/sequence/na.rb +4 -6
  67. data/lib/bio/sequence/quality_score.rb +2 -0
  68. data/lib/bio/sequence/sequence_masker.rb +3 -0
  69. data/lib/bio/shell/core.rb +1 -0
  70. data/lib/bio/tree.rb +1 -2
  71. data/lib/bio/tree/output.rb +264 -0
  72. data/lib/bio/util/restriction_enzyme.rb +1 -3
  73. data/lib/bio/util/restriction_enzyme/analysis.rb +8 -5
  74. data/lib/bio/util/restriction_enzyme/analysis_basic.rb +4 -3
  75. data/lib/bio/util/restriction_enzyme/cut_symbol.rb +3 -2
  76. data/lib/bio/util/restriction_enzyme/dense_int_array.rb +3 -0
  77. data/lib/bio/util/restriction_enzyme/double_stranded.rb +3 -4
  78. data/lib/bio/util/restriction_enzyme/double_stranded/aligned_strands.rb +3 -4
  79. data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair.rb +3 -4
  80. data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair_in_enzyme_notation.rb +3 -4
  81. data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations.rb +3 -4
  82. data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations_in_enzyme_notation.rb +3 -4
  83. data/lib/bio/util/restriction_enzyme/range/cut_range.rb +3 -4
  84. data/lib/bio/util/restriction_enzyme/range/cut_ranges.rb +3 -4
  85. data/lib/bio/util/restriction_enzyme/range/horizontal_cut_range.rb +3 -4
  86. data/lib/bio/util/restriction_enzyme/range/sequence_range.rb +3 -4
  87. data/lib/bio/util/restriction_enzyme/range/sequence_range/calculated_cuts.rb +3 -4
  88. data/lib/bio/util/restriction_enzyme/range/sequence_range/fragment.rb +3 -4
  89. data/lib/bio/util/restriction_enzyme/range/sequence_range/fragments.rb +3 -4
  90. data/lib/bio/util/restriction_enzyme/range/vertical_cut_range.rb +3 -4
  91. data/lib/bio/util/restriction_enzyme/single_strand.rb +3 -3
  92. data/lib/bio/util/restriction_enzyme/single_strand/cut_locations_in_enzyme_notation.rb +3 -4
  93. data/lib/bio/util/restriction_enzyme/single_strand_complement.rb +3 -4
  94. data/lib/bio/util/restriction_enzyme/sorted_num_array.rb +3 -0
  95. data/lib/bio/util/restriction_enzyme/string_formatting.rb +3 -4
  96. data/lib/bio/version.rb +11 -2
  97. data/sample/seqdatabase.ini +210 -0
  98. data/test/bioruby_test_helper.rb +37 -12
  99. data/test/data/KEGG/test.kgml +37 -0
  100. data/test/data/command/echoarg2.bat +0 -0
  101. data/test/data/command/echoarg2.sh +4 -0
  102. data/test/functional/bio/test_command.rb +58 -28
  103. data/test/{functional → network}/bio/appl/blast/test_remote.rb +0 -0
  104. data/test/{functional → network}/bio/appl/test_blast.rb +0 -0
  105. data/test/{functional → network}/bio/appl/test_pts1.rb +0 -0
  106. data/test/{functional → network}/bio/io/test_ddbjrest.rb +0 -0
  107. data/test/{functional → network}/bio/io/test_ensembl.rb +0 -0
  108. data/test/{functional → network}/bio/io/test_pubmed.rb +0 -0
  109. data/test/{functional → network}/bio/io/test_soapwsdl.rb +0 -0
  110. data/test/{functional → network}/bio/io/test_togows.rb +0 -0
  111. data/test/network/bio/test_command.rb +35 -0
  112. data/test/runner.rb +16 -6
  113. data/test/unit/bio/appl/blast/test_report.rb +119 -0
  114. data/test/unit/bio/appl/blast/test_rpsblast.rb +1 -0
  115. data/test/unit/bio/data/test_na.rb +1 -1
  116. data/test/unit/bio/db/embl/test_embl.rb +2 -7
  117. data/test/unit/bio/db/embl/test_embl_rel89.rb +2 -7
  118. data/test/unit/bio/db/fasta/test_defline.rb +1 -1
  119. data/test/unit/bio/db/genbank/test_genpept.rb +1 -1
  120. data/test/unit/bio/db/kegg/test_drug.rb +1 -1
  121. data/test/unit/bio/db/kegg/test_genome.rb +1 -1
  122. data/test/unit/bio/db/kegg/test_glycan.rb +1 -1
  123. data/test/unit/bio/db/kegg/test_kgml.rb +1022 -0
  124. data/test/unit/bio/db/sanger_chromatogram/test_abif.rb +2 -1
  125. data/test/unit/bio/db/sanger_chromatogram/test_scf.rb +4 -2
  126. data/test/unit/bio/db/test_newick.rb +2 -0
  127. data/test/unit/bio/db/test_phyloxml.rb +54 -2
  128. data/test/unit/bio/db/test_phyloxml_writer.rb +15 -9
  129. data/test/unit/bio/db/test_soft.rb +1 -1
  130. data/test/unit/bio/io/flatfile/test_autodetection.rb +6 -0
  131. data/test/unit/bio/io/flatfile/test_buffer.rb +141 -0
  132. data/test/unit/bio/sequence/test_common.rb +36 -4
  133. data/test/unit/bio/sequence/test_na.rb +1 -1
  134. data/test/unit/bio/test_command.rb +9 -4
  135. data/test/unit/bio/test_sequence.rb +2 -2
  136. data/test/unit/bio/test_tree.rb +11 -11
  137. data/test/unit/bio/util/test_restriction_enzyme.rb +1 -1
  138. metadata +1428 -655
  139. data/rdoc.zsh +0 -8
@@ -162,7 +162,7 @@ class NucleicAcid
162
162
  'y' => '[tcy]',
163
163
  'r' => '[agr]',
164
164
  'w' => '[atw]',
165
- 's' => '[gcw]',
165
+ 's' => '[gcs]',
166
166
  'k' => '[tgk]',
167
167
  'm' => '[acm]',
168
168
  'b' => '[tgcyskb]',
@@ -196,53 +196,127 @@ class EMBL < EMBLDB
196
196
 
197
197
 
198
198
 
199
+ #--
199
200
  ##
200
201
  # DE Line; description (>=1)
201
202
  #
203
+ #++
202
204
 
203
205
 
206
+ #--
204
207
  ##
205
208
  # KW Line; keyword (>=1)
206
209
  # KW [Keyword;]+
207
210
  #
208
211
  # Bio::EMBLDB#kw -> Array
209
212
  # #keywords -> Array
213
+ #++
210
214
 
211
215
 
216
+ #--
212
217
  ##
213
218
  # OS Line; organism species (>=1)
214
219
  # OS Genus species (name)
215
220
  # "OS Trifolium repens (white clover)"
216
221
  #
217
222
  # Bio::EMBLDB#os -> Array
223
+ #++
218
224
 
225
+ # returns contents in the OS line.
226
+ # * Bio::EMBL#os -> Array of <OS Hash>
227
+ # where <OS Hash> is:
228
+ # [{'name'=>'Human', 'os'=>'Homo sapiens'},
229
+ # {'name'=>'Rat', 'os'=>'Rattus norveticus'}]
230
+ # * Bio::EMBL#os[0]['name'] => "Human"
231
+ # * Bio::EMBL#os[0] => {'name'=>"Human", 'os'=>'Homo sapiens'}
232
+ #--
233
+ # * Bio::EMBL#os(0) => "Homo sapiens (Human)"
234
+ #++
235
+ #
236
+ # OS Line; organism species (>=1)
237
+ # OS Trifolium repens (white clover)
238
+ #
239
+ # Typically, OS line shows "Genus species (name)" style:
240
+ # OS Genus species (name)
241
+ #
242
+ # Other examples:
243
+ # OS uncultured bacterium
244
+ # OS xxxxxx metagenome
245
+ # OS Cloning vector xxxxxxxx
246
+ # Complicated examples:
247
+ # OS Poeciliopsis gracilis (Poeciliopsis gracilis (Heckel, 1848))
248
+ # OS Etmopterus sp. B Last & Stevens, 1994 (bristled lanternshark)
249
+ # OS Galaxias sp. D (Allibone et al., 1996) (Pool Burn galaxias)
250
+ # OS Sicydiinae sp. 'Keith et al., 2010'
251
+ # OS Acanthopagrus sp. 'Jean & Lee, 2008'
252
+ # OS Gaussia princeps (T. Scott, 1894)
253
+ # OS Rana sp. 8 Hillis & Wilcox, 2005
254
+ # OS Contracaecum rudolphii C D'Amelio et al., 2007
255
+ # OS Partula sp. 'Mt. Marau, Tahiti'
256
+ # OS Leptocephalus sp. 'type II larva' (Smith, 1989)
257
+ # OS Tayloria grandis (D.G.Long) Goffinet & A.J.Shaw, 2002
258
+ # OS Non-A, non-B hepatitis virus
259
+ # OS Canidae (dog, coyote, wolf, fox)
260
+ # OS Salmonella enterica subsp. enterica serovar 4,[5],12:i:-
261
+ # OS Yersinia enterocolitica (type O:5,27)
262
+ # OS Influenza A virus (A/green-winged teal/OH/72/99(H6N1,4))
263
+ # OS Influenza A virus (A/Beijing/352/1989,(highgrowth reassortant NIB26)(H3N2))
264
+ # OS Recombinant Hepatitis C virus H77(5'UTR-NS2)/JFH1_V787A,Q1247L
265
+ #
266
+ def os(num = nil)
267
+ unless @data['OS']
268
+ os = Array.new
269
+ tmp = fetch('OS')
270
+ if /([A-Z][a-z]* *[\w\d \:\'\+\-]+[\w\d]) *\(([\w\d ]+)\)\s*\z/ =~ tmp
271
+ org = $1
272
+ os.push({'name' => $2, 'os' => $1})
273
+ else
274
+ os.push({'name' => nil, 'os' => tmp})
275
+ end
276
+ @data['OS'] = os
277
+ end
278
+ if num
279
+ # EX. "Trifolium repens (white clover)"
280
+ "#{@data['OS'][num]['os']} {#data['OS'][num]['name']"
281
+ end
282
+ @data['OS']
283
+ end
219
284
 
285
+
286
+ #--
220
287
  ##
221
288
  # OC Line; organism classification (>=1)
222
289
  #
223
290
  # Bio::EMBLDB#oc -> Array
291
+ #++
224
292
 
225
293
 
294
+ #--
226
295
  ##
227
296
  # OG Line; organella (0 or 1/entry)
228
297
  # ["Mitochondrion", "Chloroplast","Kinetoplast", "Cyanelle", "Plastid"]
229
298
  # or a plasmid name (e.g. "Plasmid pBR322").
230
299
  #
231
300
  # Bio::EMBLDB#og -> String
301
+ #++
232
302
 
233
303
 
304
+ #--
234
305
  ##
235
306
  # R Lines
236
307
  # RN RC RP RX RA RT RL
237
308
  #
238
309
  # Bio::EMBLDB#ref
310
+ #++
239
311
 
240
312
 
313
+ #--
241
314
  ##
242
315
  # DR Line; defabases cross-regerence (>=0)
243
316
  # "DR database_identifier; primary_identifier; secondary_identifier."
244
317
  #
245
318
  # Bio::EMBLDB#dr
319
+ #++
246
320
 
247
321
 
248
322
  # returns feature table header (String) in the feature header (FH) line.
@@ -6,10 +6,6 @@
6
6
  # Naohisa Goto <ng@bioruby.org>
7
7
  # License:: The Ruby License
8
8
  #
9
- # $Id: format_embl.rb,v 1.1.2.7 2008/06/19 12:45:15 ngoto Exp $
10
- #
11
-
12
- require 'bio/sequence/format'
13
9
 
14
10
  module Bio::Sequence::Format::NucFormatter
15
11
 
@@ -35,7 +35,7 @@ module Bio
35
35
 
36
36
  # Treats a FASTA formatted entry, such as:
37
37
  #
38
- # >id and/or some comments <== comment line
38
+ # >id and/or some comments <== definition line
39
39
  # ATGCATGCATGCATGCATGCATGCATGCATGCATGC <== sequence lines
40
40
  # ATGCATGCATGCATGCATGCATGCATGCATGCATGC
41
41
  # ATGCATGCATGC
@@ -45,52 +45,64 @@ module Bio
45
45
  #
46
46
  # === Examples
47
47
  #
48
- # f_str = <<END_OF_STRING
49
- # >sce:YBR160W CDC28, SRM5; cyclin-dependent protein kinase catalytic subunit [EC:2.7.1.-] [SP:CC28_YEAST]
50
- # MSGELANYKRLEKVGEGTYGVVYKALDLRPGQGQRVVALKKIRLESEDEG
51
- # VPSTAIREISLLKELKDDNIVRLYDIVHSDAHKLYLVFEFLDLDLKRYME
52
- # GIPKDQPLGADIVKKFMMQLCKGIAYCHSHRILHRDLKPQNLLINKDGNL
53
- # KLGDFGLARAFGVPLRAYTHEIVTLWYRAPEVLLGGKQYSTGVDTWSIGC
54
- # IFAEMCNRKPIFSGDSEIDQIFKIFRVLGTPNEAIWPDIVYLPDFKPSFP
55
- # QWRRKDLSQVVPSLDPRGIDLLDKLLAYDPINRISARRAAIHPYFQES
56
- # >sce:YBR274W CHK1; probable serine/threonine-protein kinase [EC:2.7.1.-] [SP:KB9S_YEAST]
57
- # MSLSQVSPLPHIKDVVLGDTVGQGAFACVKNAHLQMDPSIILAVKFIHVP
58
- # TCKKMGLSDKDITKEVVLQSKCSKHPNVLRLIDCNVSKEYMWIILEMADG
59
- # GDLFDKIEPDVGVDSDVAQFYFQQLVSAINYLHVECGVAHRDIKPENILL
60
- # DKNGNLKLADFGLASQFRRKDGTLRVSMDQRGSPPYMAPEVLYSEEGYYA
61
- # DRTDIWSIGILLFVLLTGQTPWELPSLENEDFVFFIENDGNLNWGPWSKI
62
- # EFTHLNLLRKILQPDPNKRVTLKALKLHPWVLRRASFSGDDGLCNDPELL
63
- # AKKLFSHLKVSLSNENYLKFTQDTNSNNRYISTQPIGNELAELEHDSMHF
64
- # QTVSNTQRAFTSYDSNTNYNSGTGMTQEAKWTQFISYDIAALQFHSDEND
65
- # CNELVKRHLQFNPNKLTKFYTLQPMDVLLPILEKALNLSQIRVKPDLFAN
66
- # FERLCELLGYDNVFPLIINIKTKSNGGYQLCGSISIIKIEEELKSVGFER
67
- # KTGDPLEWRRLFKKISTICRDIILIPN
48
+ # fasta_string = <<END_OF_STRING
49
+ # >gi|398365175|ref|NP_009718.3| Cdc28p [Saccharomyces cerevisiae S288c]
50
+ # MSGELANYKRLEKVGEGTYGVVYKALDLRPGQGQRVVALKKIRLESEDEGVPSTAIREISLLKELKDDNI
51
+ # VRLYDIVHSDAHKLYLVFEFLDLDLKRYMEGIPKDQPLGADIVKKFMMQLCKGIAYCHSHRILHRDLKPQ
52
+ # NLLINKDGNLKLGDFGLARAFGVPLRAYTHEIVTLWYRAPEVLLGGKQYSTGVDTWSIGCIFAEMCNRKP
53
+ # IFSGDSEIDQIFKIFRVLGTPNEAIWPDIVYLPDFKPSFPQWRRKDLSQVVPSLDPRGIDLLDKLLAYDP
54
+ # INRISARRAAIHPYFQES
68
55
  # END_OF_STRING
69
56
  #
70
- # f = Bio::FastaFormat.new(f_str)
71
- # puts "### FastaFormat"
72
- # puts "# entry"
73
- # puts f.entry
74
- # puts "# entry_id"
75
- # p f.entry_id
76
- # puts "# definition"
77
- # p f.definition
78
- # puts "# data"
79
- # p f.data
80
- # puts "# seq"
81
- # p f.seq
82
- # puts "# seq.type"
83
- # p f.seq.type
84
- # puts "# length"
85
- # p f.length
86
- # puts "# aaseq"
87
- # p f.aaseq
88
- # puts "# aaseq.type"
89
- # p f.aaseq.type
90
- # puts "# aaseq.composition"
91
- # p f.aaseq.composition
92
- # puts "# aalen"
93
- # p f.aalen
57
+ # f = Bio::FastaFormat.new(fasta_string)
58
+ #
59
+ # f.entry #=> ">gi|398365175|ref|NP_009718.3| Cdc28p [Saccharomyces cerevisiae S288c]\n"+
60
+ # # MSGELANYKRLEKVGEGTYGVVYKALDLRPGQGQRVVALKKIRLESEDEGVPSTAIREISLLKELKDDNI\n"+
61
+ # # VRLYDIVHSDAHKLYLVFEFLDLDLKRYMEGIPKDQPLGADIVKKFMMQLCKGIAYCHSHRILHRDLKPQ\n"+
62
+ # # NLLINKDGNLKLGDFGLARAFGVPLRAYTHEIVTLWYRAPEVLLGGKQYSTGVDTWSIGCIFAEMCNRKP\n"+
63
+ # # IFSGDSEIDQIFKIFRVLGTPNEAIWPDIVYLPDFKPSFPQWRRKDLSQVVPSLDPRGIDLLDKLLAYDP\n"+
64
+ # # INRISARRAAIHPYFQES"
65
+ #
66
+ # ==== Methods related to the name of the sequence
67
+ #
68
+ # A larger range of methods for dealing with Fasta definition lines can be found in FastaDefline, accessed through the FastaFormat#identifiers method.
69
+ #
70
+ # f.entry_id #=> "gi|398365175"
71
+ # f.definition #=> "gi|398365175|ref|NP_009718.3| Cdc28p [Saccharomyces cerevisiae S288c]"
72
+ # f.identifiers #=> Bio::FastaDefline instance
73
+ # f.accession #=> "NP_009718"
74
+ # f.accessions #=> ["NP_009718"]
75
+ # f.acc_version #=> "NP_009718.3"
76
+ # f.comment #=> nil
77
+ #
78
+ # ==== Methods related to the actual sequence
79
+ #
80
+ # f.seq #=> "MSGELANYKRLEKVGEGTYGVVYKALDLRPGQGQRVVALKKIRLESEDEGVPSTAIREISLLKELKDDNIVRLYDIVHSDAHKLYLVFEFLDLDLKRYMEGIPKDQPLGADIVKKFMMQLCKGIAYCHSHRILHRDLKPQNLLINKDGNLKLGDFGLARAFGVPLRAYTHEIVTLWYRAPEVLLGGKQYSTGVDTWSIGCIFAEMCNRKPIFSGDSEIDQIFKIFRVLGTPNEAIWPDIVYLPDFKPSFPQWRRKDLSQVVPSLDPRGIDLLDKLLAYDPINRISARRAAIHPYFQES"
81
+ # f.data #=> "\nMSGELANYKRLEKVGEGTYGVVYKALDLRPGQGQRVVALKKIRLESEDEGVPSTAIREISLLKELKDDNI\nVRLYDIVHSDAHKLYLVFEFLDLDLKRYMEGIPKDQPLGADIVKKFMMQLCKGIAYCHSHRILHRDLKPQ\nNLLINKDGNLKLGDFGLARAFGVPLRAYTHEIVTLWYRAPEVLLGGKQYSTGVDTWSIGCIFAEMCNRKP\nIFSGDSEIDQIFKIFRVLGTPNEAIWPDIVYLPDFKPSFPQWRRKDLSQVVPSLDPRGIDLLDKLLAYDP\nINRISARRAAIHPYFQES\n"
82
+ # f.length #=> 298
83
+ # f.aaseq #=> "MSGELANYKRLEKVGEGTYGVVYKALDLRPGQGQRVVALKKIRLESEDEGVPSTAIREISLLKELKDDNIVRLYDIVHSDAHKLYLVFEFLDLDLKRYMEGIPKDQPLGADIVKKFMMQLCKGIAYCHSHRILHRDLKPQNLLINKDGNLKLGDFGLARAFGVPLRAYTHEIVTLWYRAPEVLLGGKQYSTGVDTWSIGCIFAEMCNRKPIFSGDSEIDQIFKIFRVLGTPNEAIWPDIVYLPDFKPSFPQWRRKDLSQVVPSLDPRGIDLLDKLLAYDPINRISARRAAIHPYFQES"
84
+ # f.aaseq.composition #=> {"M"=>5, "S"=>15, "G"=>21, "E"=>16, "L"=>36, "A"=>17, "N"=>8, "Y"=>13, "K"=>22, "R"=>20, "V"=>18, "T"=>7, "D"=>23, "P"=>17, "Q"=>10, "I"=>23, "H"=>7, "F"=>12, "C"=>4, "W"=>4}
85
+ # f.aalen #=> 298
86
+ #
87
+ #
88
+ # === A less structured fasta entry
89
+ #
90
+ # f.entry #=> ">abc 123 456\nASDF"
91
+ #
92
+ # f.entry_id #=> "abc"
93
+ # f.definition #=> "abc 123 456"
94
+ # f.comment #=> nil
95
+ # f.accession #=> nil
96
+ # f.accessions #=> []
97
+ # f.acc_version #=> nil
98
+ #
99
+ # f.seq #=> "ASDF"
100
+ # f.data #=> "\nASDF\n"
101
+ # f.length #=> 4
102
+ # f.aaseq #=> "ASDF"
103
+ # f.aaseq.composition #=> {"A"=>1, "S"=>1, "D"=>1, "F"=>1}
104
+ # f.aalen #=> 4
105
+ #
94
106
  #
95
107
  # === References
96
108
  #
@@ -120,7 +120,7 @@ module Bio
120
120
  # ["gi", "2147182"], ["pir", nil, "I51898"], ["gi", "544724"],
121
121
  # ["gb", "AAB29504.1", nil], ["Cavia"]]
122
122
  #
123
- # === Refereneces
123
+ # === References
124
124
  #
125
125
  # * Fasta format description (NCBI)
126
126
  # http://www.ncbi.nlm.nih.gov/BLAST/fasta.shtml
@@ -7,10 +7,6 @@
7
7
  # Jan Aerts <jan.aerts@bbsrc.ac.uk>
8
8
  # License:: The Ruby License
9
9
  #
10
- # $Id: format_fasta.rb,v 1.1.2.1 2008/03/04 11:26:59 ngoto Exp $
11
- #
12
-
13
- require 'bio/sequence/format'
14
10
 
15
11
  module Bio::Sequence::Format::Formatter
16
12
 
@@ -5,11 +5,6 @@
5
5
  # Naohisa Goto <ng@bioruby.org>
6
6
  # License:: The Ruby License
7
7
  #
8
- # $Id:$
9
- #
10
-
11
- require 'bio/sequence/format'
12
- require 'bio/sequence/quality_score'
13
8
 
14
9
  module Bio::Sequence::Format::Formatter
15
10
 
@@ -7,7 +7,6 @@
7
7
  #
8
8
 
9
9
  require 'bio/db/fastq'
10
- require 'bio/sequence/format'
11
10
 
12
11
  module Bio::Sequence::Format::Formatter
13
12
 
@@ -4,10 +4,6 @@
4
4
  # Copyright:: Copyright (C) 2008 Naohisa Goto <ng@bioruby.org>
5
5
  # License:: The Ruby License
6
6
  #
7
- # $Id: format_genbank.rb,v 1.1.2.5 2008/06/17 15:59:24 ngoto Exp $
8
- #
9
-
10
- require 'bio/sequence/format'
11
7
 
12
8
  module Bio::Sequence::Format::NucFormatter
13
9
 
@@ -454,7 +454,8 @@ module Bio
454
454
 
455
455
  # Return the record as a GFF2 compatible string
456
456
  def to_s
457
- cmnt = if @comment and !@comment.to_s.strip.empty? then
457
+ cmnt = if defined?(@comment) and @comment and
458
+ !@comment.to_s.strip.empty? then
458
459
  @comment.gsub(/[\r\n]+/, ' ')
459
460
  else
460
461
  false
@@ -996,21 +997,46 @@ module Bio
996
997
  str.empty? ? '.' : str
997
998
  end
998
999
 
1000
+ if URI.const_defined?(:Parser) then
1001
+ # (private) URI::Parser object for escape/unescape GFF3 columns
1002
+ URI_PARSER = URI::Parser.new
1003
+
1004
+ # (private) the same as URI::Parser#escape(str, unsafe)
1005
+ def _escape(str, unsafe)
1006
+ URI_PARSER.escape(str, unsafe)
1007
+ end
1008
+
1009
+ # (private) the same as URI::Parser#unescape(str)
1010
+ def _unescape(str)
1011
+ URI_PARSER.unescape(str)
1012
+ end
1013
+ else
1014
+ # (private) the same as URI.escape(str, unsafe)
1015
+ def _escape(str, unsafe)
1016
+ URI.escape(str, unsafe)
1017
+ end
1018
+
1019
+ # (private) the same as URI.unescape(str)
1020
+ def _unescape(str)
1021
+ URI.unescape(str)
1022
+ end
1023
+ end
1024
+
999
1025
  # Return the string corresponding to these characters unescaped
1000
1026
  def unescape(string)
1001
- URI.unescape(string)
1027
+ _unescape(string)
1002
1028
  end
1003
1029
 
1004
1030
  # Escape a column according to the specification at
1005
1031
  # http://song.sourceforge.net/gff3.shtml.
1006
1032
  def escape(string)
1007
- URI.escape(string, UNSAFE)
1033
+ _escape(string, UNSAFE)
1008
1034
  end
1009
1035
 
1010
1036
  # Escape seqid column according to the specification at
1011
1037
  # http://song.sourceforge.net/gff3.shtml.
1012
1038
  def escape_seqid(string)
1013
- URI.escape(string, UNSAFE_SEQID)
1039
+ _escape(string, UNSAFE_SEQID)
1014
1040
  end
1015
1041
 
1016
1042
  # Escape attribute according to the specification at
@@ -1019,7 +1045,7 @@ module Bio
1019
1045
  # are escaped: ",=;".
1020
1046
  # Returns the string corresponding to these characters escaped.
1021
1047
  def escape_attribute(string)
1022
- URI.escape(string, UNSAFE_ATTRIBUTE)
1048
+ _escape(string, UNSAFE_ATTRIBUTE)
1023
1049
  end
1024
1050
  end #module Escape
1025
1051
 
@@ -1028,6 +1054,7 @@ module Bio
1028
1054
  # Stores meta-data "##sequence-region seqid start end".
1029
1055
  class SequenceRegion
1030
1056
  include Escape
1057
+ extend Escape
1031
1058
 
1032
1059
  # creates a new SequenceRegion class
1033
1060
  def initialize(seqid, start, endpos)
@@ -1039,7 +1066,7 @@ module Bio
1039
1066
  # parses given string and returns SequenceRegion class
1040
1067
  def self.parse(str)
1041
1068
  dummy, seqid, start, endpos =
1042
- str.chomp.split(/\s+/, 4).collect { |x| URI.unescape(x) }
1069
+ str.chomp.split(/\s+/, 4).collect { |x| unescape(x) }
1043
1070
  self.new(seqid, start, endpos)
1044
1071
  end
1045
1072
 
@@ -1139,7 +1166,8 @@ module Bio
1139
1166
 
1140
1167
  # Return the record as a GFF3 compatible string
1141
1168
  def to_s
1142
- cmnt = if @comment and !@comment.to_s.strip.empty? then
1169
+ cmnt = if defined?(@comment) and @comment and
1170
+ !@comment.to_s.strip.empty? then
1143
1171
  @comment.gsub(/[\r\n]+/, ' ')
1144
1172
  else
1145
1173
  false
@@ -1163,6 +1191,7 @@ module Bio
1163
1191
  # data of "Target" attribute.
1164
1192
  class Target
1165
1193
  include GFF3::Escape
1194
+ extend GFF3::Escape
1166
1195
 
1167
1196
  # Creates a new Target object.
1168
1197
  def initialize(target_id, start, endpos, strand = nil)
@@ -1190,7 +1219,7 @@ module Bio
1190
1219
  #
1191
1220
  def self.parse(str)
1192
1221
  target_id, start, endpos, strand =
1193
- str.split(/ +/, 4).collect { |x| URI.unescape(x) }
1222
+ str.split(/ +/, 4).collect { |x| unescape(x) }
1194
1223
  self.new(target_id, start, endpos, strand)
1195
1224
  end
1196
1225
 
@@ -1332,15 +1361,15 @@ module Bio
1332
1361
 
1333
1362
  # rest of data_ref
1334
1363
  len = 0
1335
- data_ref.each do |ref|
1336
- len += ref.length if ref.code == :M
1364
+ data_ref.each do |r|
1365
+ len += r.length if r.code == :M
1337
1366
  end
1338
1367
  data.push Code.new(:D, len) if len > 0
1339
1368
 
1340
1369
  # rest of data_tgt
1341
1370
  len = 0
1342
- data_tgt.each do |tgt|
1343
- len += tgt.length if tgt.code == :M
1371
+ data_tgt.each do |t|
1372
+ len += t.length if t.code == :M
1344
1373
  end
1345
1374
  data.push Code.new(:I, len) if len > 0
1346
1375