bio 1.4.2 → 1.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. data/.travis.yml +66 -0
  2. data/ChangeLog +989 -4524
  3. data/KNOWN_ISSUES.rdoc +67 -2
  4. data/README.rdoc +89 -23
  5. data/README_DEV.rdoc +93 -2
  6. data/RELEASE_NOTES.rdoc +167 -95
  7. data/Rakefile +199 -7
  8. data/bioruby.gemspec +27 -12
  9. data/bioruby.gemspec.erb +6 -3
  10. data/doc/ChangeLog-before-1.4.2 +5013 -0
  11. data/doc/RELEASE_NOTES-1.4.2.rdoc +132 -0
  12. data/doc/Tutorial.rd +21 -3
  13. data/doc/Tutorial.rd.html +20 -12
  14. data/etc/bioinformatics/seqdatabase.ini +13 -196
  15. data/gemfiles/Gemfile.travis-jruby1.8 +7 -0
  16. data/gemfiles/Gemfile.travis-jruby1.9 +10 -0
  17. data/gemfiles/Gemfile.travis-ruby1.8 +7 -0
  18. data/gemfiles/Gemfile.travis-ruby1.9 +10 -0
  19. data/gemfiles/modify-Gemfile.rb +28 -0
  20. data/gemfiles/prepare-gemspec.rb +25 -0
  21. data/lib/bio/alignment.rb +1 -1
  22. data/lib/bio/appl/bl2seq/report.rb +3 -3
  23. data/lib/bio/appl/blast/ddbj.rb +0 -3
  24. data/lib/bio/appl/blast/format0.rb +4 -22
  25. data/lib/bio/appl/blast/genomenet.rb +33 -16
  26. data/lib/bio/appl/blast/ncbioptions.rb +8 -3
  27. data/lib/bio/appl/blast/remote.rb +6 -5
  28. data/lib/bio/appl/blast/report.rb +10 -6
  29. data/lib/bio/appl/blast/rpsblast.rb +3 -2
  30. data/lib/bio/appl/blast/wublast.rb +3 -3
  31. data/lib/bio/command.rb +118 -36
  32. data/lib/bio/data/na.rb +1 -1
  33. data/lib/bio/db/embl/embl.rb +74 -0
  34. data/lib/bio/db/embl/format_embl.rb +0 -4
  35. data/lib/bio/db/fasta.rb +57 -45
  36. data/lib/bio/db/fasta/defline.rb +1 -1
  37. data/lib/bio/db/fasta/format_fasta.rb +0 -4
  38. data/lib/bio/db/fasta/format_qual.rb +0 -5
  39. data/lib/bio/db/fastq/format_fastq.rb +0 -1
  40. data/lib/bio/db/genbank/format_genbank.rb +0 -4
  41. data/lib/bio/db/gff.rb +41 -12
  42. data/lib/bio/db/kegg/genes.rb +3 -3
  43. data/lib/bio/db/kegg/kgml.rb +465 -64
  44. data/lib/bio/db/newick.rb +0 -244
  45. data/lib/bio/db/pdb.rb +1 -4
  46. data/lib/bio/db/pdb/atom.rb +3 -2
  47. data/lib/bio/db/pdb/chain.rb +2 -3
  48. data/lib/bio/db/pdb/chemicalcomponent.rb +3 -2
  49. data/lib/bio/db/pdb/model.rb +2 -2
  50. data/lib/bio/db/pdb/pdb.rb +2 -1
  51. data/lib/bio/db/pdb/residue.rb +2 -2
  52. data/lib/bio/db/pdb/utils.rb +7 -4
  53. data/lib/bio/db/phyloxml/phyloxml_parser.rb +52 -5
  54. data/lib/bio/feature.rb +2 -3
  55. data/lib/bio/io/flatfile/autodetection.rb +1 -1
  56. data/lib/bio/io/flatfile/buffer.rb +84 -0
  57. data/lib/bio/sequence.rb +6 -4
  58. data/lib/bio/sequence/aa.rb +3 -5
  59. data/lib/bio/sequence/adapter.rb +6 -6
  60. data/lib/bio/sequence/common.rb +3 -3
  61. data/lib/bio/sequence/compat.rb +2 -7
  62. data/lib/bio/sequence/dblink.rb +6 -5
  63. data/lib/bio/sequence/format.rb +0 -6
  64. data/lib/bio/sequence/format_raw.rb +0 -4
  65. data/lib/bio/sequence/generic.rb +3 -4
  66. data/lib/bio/sequence/na.rb +4 -6
  67. data/lib/bio/sequence/quality_score.rb +2 -0
  68. data/lib/bio/sequence/sequence_masker.rb +3 -0
  69. data/lib/bio/shell/core.rb +1 -0
  70. data/lib/bio/tree.rb +1 -2
  71. data/lib/bio/tree/output.rb +264 -0
  72. data/lib/bio/util/restriction_enzyme.rb +1 -3
  73. data/lib/bio/util/restriction_enzyme/analysis.rb +8 -5
  74. data/lib/bio/util/restriction_enzyme/analysis_basic.rb +4 -3
  75. data/lib/bio/util/restriction_enzyme/cut_symbol.rb +3 -2
  76. data/lib/bio/util/restriction_enzyme/dense_int_array.rb +3 -0
  77. data/lib/bio/util/restriction_enzyme/double_stranded.rb +3 -4
  78. data/lib/bio/util/restriction_enzyme/double_stranded/aligned_strands.rb +3 -4
  79. data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair.rb +3 -4
  80. data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair_in_enzyme_notation.rb +3 -4
  81. data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations.rb +3 -4
  82. data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations_in_enzyme_notation.rb +3 -4
  83. data/lib/bio/util/restriction_enzyme/range/cut_range.rb +3 -4
  84. data/lib/bio/util/restriction_enzyme/range/cut_ranges.rb +3 -4
  85. data/lib/bio/util/restriction_enzyme/range/horizontal_cut_range.rb +3 -4
  86. data/lib/bio/util/restriction_enzyme/range/sequence_range.rb +3 -4
  87. data/lib/bio/util/restriction_enzyme/range/sequence_range/calculated_cuts.rb +3 -4
  88. data/lib/bio/util/restriction_enzyme/range/sequence_range/fragment.rb +3 -4
  89. data/lib/bio/util/restriction_enzyme/range/sequence_range/fragments.rb +3 -4
  90. data/lib/bio/util/restriction_enzyme/range/vertical_cut_range.rb +3 -4
  91. data/lib/bio/util/restriction_enzyme/single_strand.rb +3 -3
  92. data/lib/bio/util/restriction_enzyme/single_strand/cut_locations_in_enzyme_notation.rb +3 -4
  93. data/lib/bio/util/restriction_enzyme/single_strand_complement.rb +3 -4
  94. data/lib/bio/util/restriction_enzyme/sorted_num_array.rb +3 -0
  95. data/lib/bio/util/restriction_enzyme/string_formatting.rb +3 -4
  96. data/lib/bio/version.rb +11 -2
  97. data/sample/seqdatabase.ini +210 -0
  98. data/test/bioruby_test_helper.rb +37 -12
  99. data/test/data/KEGG/test.kgml +37 -0
  100. data/test/data/command/echoarg2.bat +0 -0
  101. data/test/data/command/echoarg2.sh +4 -0
  102. data/test/functional/bio/test_command.rb +58 -28
  103. data/test/{functional → network}/bio/appl/blast/test_remote.rb +0 -0
  104. data/test/{functional → network}/bio/appl/test_blast.rb +0 -0
  105. data/test/{functional → network}/bio/appl/test_pts1.rb +0 -0
  106. data/test/{functional → network}/bio/io/test_ddbjrest.rb +0 -0
  107. data/test/{functional → network}/bio/io/test_ensembl.rb +0 -0
  108. data/test/{functional → network}/bio/io/test_pubmed.rb +0 -0
  109. data/test/{functional → network}/bio/io/test_soapwsdl.rb +0 -0
  110. data/test/{functional → network}/bio/io/test_togows.rb +0 -0
  111. data/test/network/bio/test_command.rb +35 -0
  112. data/test/runner.rb +16 -6
  113. data/test/unit/bio/appl/blast/test_report.rb +119 -0
  114. data/test/unit/bio/appl/blast/test_rpsblast.rb +1 -0
  115. data/test/unit/bio/data/test_na.rb +1 -1
  116. data/test/unit/bio/db/embl/test_embl.rb +2 -7
  117. data/test/unit/bio/db/embl/test_embl_rel89.rb +2 -7
  118. data/test/unit/bio/db/fasta/test_defline.rb +1 -1
  119. data/test/unit/bio/db/genbank/test_genpept.rb +1 -1
  120. data/test/unit/bio/db/kegg/test_drug.rb +1 -1
  121. data/test/unit/bio/db/kegg/test_genome.rb +1 -1
  122. data/test/unit/bio/db/kegg/test_glycan.rb +1 -1
  123. data/test/unit/bio/db/kegg/test_kgml.rb +1022 -0
  124. data/test/unit/bio/db/sanger_chromatogram/test_abif.rb +2 -1
  125. data/test/unit/bio/db/sanger_chromatogram/test_scf.rb +4 -2
  126. data/test/unit/bio/db/test_newick.rb +2 -0
  127. data/test/unit/bio/db/test_phyloxml.rb +54 -2
  128. data/test/unit/bio/db/test_phyloxml_writer.rb +15 -9
  129. data/test/unit/bio/db/test_soft.rb +1 -1
  130. data/test/unit/bio/io/flatfile/test_autodetection.rb +6 -0
  131. data/test/unit/bio/io/flatfile/test_buffer.rb +141 -0
  132. data/test/unit/bio/sequence/test_common.rb +36 -4
  133. data/test/unit/bio/sequence/test_na.rb +1 -1
  134. data/test/unit/bio/test_command.rb +9 -4
  135. data/test/unit/bio/test_sequence.rb +2 -2
  136. data/test/unit/bio/test_tree.rb +11 -11
  137. data/test/unit/bio/util/test_restriction_enzyme.rb +1 -1
  138. metadata +1428 -655
  139. data/rdoc.zsh +0 -8
@@ -162,7 +162,7 @@ class NucleicAcid
162
162
  'y' => '[tcy]',
163
163
  'r' => '[agr]',
164
164
  'w' => '[atw]',
165
- 's' => '[gcw]',
165
+ 's' => '[gcs]',
166
166
  'k' => '[tgk]',
167
167
  'm' => '[acm]',
168
168
  'b' => '[tgcyskb]',
@@ -196,53 +196,127 @@ class EMBL < EMBLDB
196
196
 
197
197
 
198
198
 
199
+ #--
199
200
  ##
200
201
  # DE Line; description (>=1)
201
202
  #
203
+ #++
202
204
 
203
205
 
206
+ #--
204
207
  ##
205
208
  # KW Line; keyword (>=1)
206
209
  # KW [Keyword;]+
207
210
  #
208
211
  # Bio::EMBLDB#kw -> Array
209
212
  # #keywords -> Array
213
+ #++
210
214
 
211
215
 
216
+ #--
212
217
  ##
213
218
  # OS Line; organism species (>=1)
214
219
  # OS Genus species (name)
215
220
  # "OS Trifolium repens (white clover)"
216
221
  #
217
222
  # Bio::EMBLDB#os -> Array
223
+ #++
218
224
 
225
+ # returns contents in the OS line.
226
+ # * Bio::EMBL#os -> Array of <OS Hash>
227
+ # where <OS Hash> is:
228
+ # [{'name'=>'Human', 'os'=>'Homo sapiens'},
229
+ # {'name'=>'Rat', 'os'=>'Rattus norveticus'}]
230
+ # * Bio::EMBL#os[0]['name'] => "Human"
231
+ # * Bio::EMBL#os[0] => {'name'=>"Human", 'os'=>'Homo sapiens'}
232
+ #--
233
+ # * Bio::EMBL#os(0) => "Homo sapiens (Human)"
234
+ #++
235
+ #
236
+ # OS Line; organism species (>=1)
237
+ # OS Trifolium repens (white clover)
238
+ #
239
+ # Typically, OS line shows "Genus species (name)" style:
240
+ # OS Genus species (name)
241
+ #
242
+ # Other examples:
243
+ # OS uncultured bacterium
244
+ # OS xxxxxx metagenome
245
+ # OS Cloning vector xxxxxxxx
246
+ # Complicated examples:
247
+ # OS Poeciliopsis gracilis (Poeciliopsis gracilis (Heckel, 1848))
248
+ # OS Etmopterus sp. B Last & Stevens, 1994 (bristled lanternshark)
249
+ # OS Galaxias sp. D (Allibone et al., 1996) (Pool Burn galaxias)
250
+ # OS Sicydiinae sp. 'Keith et al., 2010'
251
+ # OS Acanthopagrus sp. 'Jean & Lee, 2008'
252
+ # OS Gaussia princeps (T. Scott, 1894)
253
+ # OS Rana sp. 8 Hillis & Wilcox, 2005
254
+ # OS Contracaecum rudolphii C D'Amelio et al., 2007
255
+ # OS Partula sp. 'Mt. Marau, Tahiti'
256
+ # OS Leptocephalus sp. 'type II larva' (Smith, 1989)
257
+ # OS Tayloria grandis (D.G.Long) Goffinet & A.J.Shaw, 2002
258
+ # OS Non-A, non-B hepatitis virus
259
+ # OS Canidae (dog, coyote, wolf, fox)
260
+ # OS Salmonella enterica subsp. enterica serovar 4,[5],12:i:-
261
+ # OS Yersinia enterocolitica (type O:5,27)
262
+ # OS Influenza A virus (A/green-winged teal/OH/72/99(H6N1,4))
263
+ # OS Influenza A virus (A/Beijing/352/1989,(highgrowth reassortant NIB26)(H3N2))
264
+ # OS Recombinant Hepatitis C virus H77(5'UTR-NS2)/JFH1_V787A,Q1247L
265
+ #
266
+ def os(num = nil)
267
+ unless @data['OS']
268
+ os = Array.new
269
+ tmp = fetch('OS')
270
+ if /([A-Z][a-z]* *[\w\d \:\'\+\-]+[\w\d]) *\(([\w\d ]+)\)\s*\z/ =~ tmp
271
+ org = $1
272
+ os.push({'name' => $2, 'os' => $1})
273
+ else
274
+ os.push({'name' => nil, 'os' => tmp})
275
+ end
276
+ @data['OS'] = os
277
+ end
278
+ if num
279
+ # EX. "Trifolium repens (white clover)"
280
+ "#{@data['OS'][num]['os']} {#data['OS'][num]['name']"
281
+ end
282
+ @data['OS']
283
+ end
219
284
 
285
+
286
+ #--
220
287
  ##
221
288
  # OC Line; organism classification (>=1)
222
289
  #
223
290
  # Bio::EMBLDB#oc -> Array
291
+ #++
224
292
 
225
293
 
294
+ #--
226
295
  ##
227
296
  # OG Line; organella (0 or 1/entry)
228
297
  # ["Mitochondrion", "Chloroplast","Kinetoplast", "Cyanelle", "Plastid"]
229
298
  # or a plasmid name (e.g. "Plasmid pBR322").
230
299
  #
231
300
  # Bio::EMBLDB#og -> String
301
+ #++
232
302
 
233
303
 
304
+ #--
234
305
  ##
235
306
  # R Lines
236
307
  # RN RC RP RX RA RT RL
237
308
  #
238
309
  # Bio::EMBLDB#ref
310
+ #++
239
311
 
240
312
 
313
+ #--
241
314
  ##
242
315
  # DR Line; defabases cross-regerence (>=0)
243
316
  # "DR database_identifier; primary_identifier; secondary_identifier."
244
317
  #
245
318
  # Bio::EMBLDB#dr
319
+ #++
246
320
 
247
321
 
248
322
  # returns feature table header (String) in the feature header (FH) line.
@@ -6,10 +6,6 @@
6
6
  # Naohisa Goto <ng@bioruby.org>
7
7
  # License:: The Ruby License
8
8
  #
9
- # $Id: format_embl.rb,v 1.1.2.7 2008/06/19 12:45:15 ngoto Exp $
10
- #
11
-
12
- require 'bio/sequence/format'
13
9
 
14
10
  module Bio::Sequence::Format::NucFormatter
15
11
 
@@ -35,7 +35,7 @@ module Bio
35
35
 
36
36
  # Treats a FASTA formatted entry, such as:
37
37
  #
38
- # >id and/or some comments <== comment line
38
+ # >id and/or some comments <== definition line
39
39
  # ATGCATGCATGCATGCATGCATGCATGCATGCATGC <== sequence lines
40
40
  # ATGCATGCATGCATGCATGCATGCATGCATGCATGC
41
41
  # ATGCATGCATGC
@@ -45,52 +45,64 @@ module Bio
45
45
  #
46
46
  # === Examples
47
47
  #
48
- # f_str = <<END_OF_STRING
49
- # >sce:YBR160W CDC28, SRM5; cyclin-dependent protein kinase catalytic subunit [EC:2.7.1.-] [SP:CC28_YEAST]
50
- # MSGELANYKRLEKVGEGTYGVVYKALDLRPGQGQRVVALKKIRLESEDEG
51
- # VPSTAIREISLLKELKDDNIVRLYDIVHSDAHKLYLVFEFLDLDLKRYME
52
- # GIPKDQPLGADIVKKFMMQLCKGIAYCHSHRILHRDLKPQNLLINKDGNL
53
- # KLGDFGLARAFGVPLRAYTHEIVTLWYRAPEVLLGGKQYSTGVDTWSIGC
54
- # IFAEMCNRKPIFSGDSEIDQIFKIFRVLGTPNEAIWPDIVYLPDFKPSFP
55
- # QWRRKDLSQVVPSLDPRGIDLLDKLLAYDPINRISARRAAIHPYFQES
56
- # >sce:YBR274W CHK1; probable serine/threonine-protein kinase [EC:2.7.1.-] [SP:KB9S_YEAST]
57
- # MSLSQVSPLPHIKDVVLGDTVGQGAFACVKNAHLQMDPSIILAVKFIHVP
58
- # TCKKMGLSDKDITKEVVLQSKCSKHPNVLRLIDCNVSKEYMWIILEMADG
59
- # GDLFDKIEPDVGVDSDVAQFYFQQLVSAINYLHVECGVAHRDIKPENILL
60
- # DKNGNLKLADFGLASQFRRKDGTLRVSMDQRGSPPYMAPEVLYSEEGYYA
61
- # DRTDIWSIGILLFVLLTGQTPWELPSLENEDFVFFIENDGNLNWGPWSKI
62
- # EFTHLNLLRKILQPDPNKRVTLKALKLHPWVLRRASFSGDDGLCNDPELL
63
- # AKKLFSHLKVSLSNENYLKFTQDTNSNNRYISTQPIGNELAELEHDSMHF
64
- # QTVSNTQRAFTSYDSNTNYNSGTGMTQEAKWTQFISYDIAALQFHSDEND
65
- # CNELVKRHLQFNPNKLTKFYTLQPMDVLLPILEKALNLSQIRVKPDLFAN
66
- # FERLCELLGYDNVFPLIINIKTKSNGGYQLCGSISIIKIEEELKSVGFER
67
- # KTGDPLEWRRLFKKISTICRDIILIPN
48
+ # fasta_string = <<END_OF_STRING
49
+ # >gi|398365175|ref|NP_009718.3| Cdc28p [Saccharomyces cerevisiae S288c]
50
+ # MSGELANYKRLEKVGEGTYGVVYKALDLRPGQGQRVVALKKIRLESEDEGVPSTAIREISLLKELKDDNI
51
+ # VRLYDIVHSDAHKLYLVFEFLDLDLKRYMEGIPKDQPLGADIVKKFMMQLCKGIAYCHSHRILHRDLKPQ
52
+ # NLLINKDGNLKLGDFGLARAFGVPLRAYTHEIVTLWYRAPEVLLGGKQYSTGVDTWSIGCIFAEMCNRKP
53
+ # IFSGDSEIDQIFKIFRVLGTPNEAIWPDIVYLPDFKPSFPQWRRKDLSQVVPSLDPRGIDLLDKLLAYDP
54
+ # INRISARRAAIHPYFQES
68
55
  # END_OF_STRING
69
56
  #
70
- # f = Bio::FastaFormat.new(f_str)
71
- # puts "### FastaFormat"
72
- # puts "# entry"
73
- # puts f.entry
74
- # puts "# entry_id"
75
- # p f.entry_id
76
- # puts "# definition"
77
- # p f.definition
78
- # puts "# data"
79
- # p f.data
80
- # puts "# seq"
81
- # p f.seq
82
- # puts "# seq.type"
83
- # p f.seq.type
84
- # puts "# length"
85
- # p f.length
86
- # puts "# aaseq"
87
- # p f.aaseq
88
- # puts "# aaseq.type"
89
- # p f.aaseq.type
90
- # puts "# aaseq.composition"
91
- # p f.aaseq.composition
92
- # puts "# aalen"
93
- # p f.aalen
57
+ # f = Bio::FastaFormat.new(fasta_string)
58
+ #
59
+ # f.entry #=> ">gi|398365175|ref|NP_009718.3| Cdc28p [Saccharomyces cerevisiae S288c]\n"+
60
+ # # MSGELANYKRLEKVGEGTYGVVYKALDLRPGQGQRVVALKKIRLESEDEGVPSTAIREISLLKELKDDNI\n"+
61
+ # # VRLYDIVHSDAHKLYLVFEFLDLDLKRYMEGIPKDQPLGADIVKKFMMQLCKGIAYCHSHRILHRDLKPQ\n"+
62
+ # # NLLINKDGNLKLGDFGLARAFGVPLRAYTHEIVTLWYRAPEVLLGGKQYSTGVDTWSIGCIFAEMCNRKP\n"+
63
+ # # IFSGDSEIDQIFKIFRVLGTPNEAIWPDIVYLPDFKPSFPQWRRKDLSQVVPSLDPRGIDLLDKLLAYDP\n"+
64
+ # # INRISARRAAIHPYFQES"
65
+ #
66
+ # ==== Methods related to the name of the sequence
67
+ #
68
+ # A larger range of methods for dealing with Fasta definition lines can be found in FastaDefline, accessed through the FastaFormat#identifiers method.
69
+ #
70
+ # f.entry_id #=> "gi|398365175"
71
+ # f.definition #=> "gi|398365175|ref|NP_009718.3| Cdc28p [Saccharomyces cerevisiae S288c]"
72
+ # f.identifiers #=> Bio::FastaDefline instance
73
+ # f.accession #=> "NP_009718"
74
+ # f.accessions #=> ["NP_009718"]
75
+ # f.acc_version #=> "NP_009718.3"
76
+ # f.comment #=> nil
77
+ #
78
+ # ==== Methods related to the actual sequence
79
+ #
80
+ # f.seq #=> "MSGELANYKRLEKVGEGTYGVVYKALDLRPGQGQRVVALKKIRLESEDEGVPSTAIREISLLKELKDDNIVRLYDIVHSDAHKLYLVFEFLDLDLKRYMEGIPKDQPLGADIVKKFMMQLCKGIAYCHSHRILHRDLKPQNLLINKDGNLKLGDFGLARAFGVPLRAYTHEIVTLWYRAPEVLLGGKQYSTGVDTWSIGCIFAEMCNRKPIFSGDSEIDQIFKIFRVLGTPNEAIWPDIVYLPDFKPSFPQWRRKDLSQVVPSLDPRGIDLLDKLLAYDPINRISARRAAIHPYFQES"
81
+ # f.data #=> "\nMSGELANYKRLEKVGEGTYGVVYKALDLRPGQGQRVVALKKIRLESEDEGVPSTAIREISLLKELKDDNI\nVRLYDIVHSDAHKLYLVFEFLDLDLKRYMEGIPKDQPLGADIVKKFMMQLCKGIAYCHSHRILHRDLKPQ\nNLLINKDGNLKLGDFGLARAFGVPLRAYTHEIVTLWYRAPEVLLGGKQYSTGVDTWSIGCIFAEMCNRKP\nIFSGDSEIDQIFKIFRVLGTPNEAIWPDIVYLPDFKPSFPQWRRKDLSQVVPSLDPRGIDLLDKLLAYDP\nINRISARRAAIHPYFQES\n"
82
+ # f.length #=> 298
83
+ # f.aaseq #=> "MSGELANYKRLEKVGEGTYGVVYKALDLRPGQGQRVVALKKIRLESEDEGVPSTAIREISLLKELKDDNIVRLYDIVHSDAHKLYLVFEFLDLDLKRYMEGIPKDQPLGADIVKKFMMQLCKGIAYCHSHRILHRDLKPQNLLINKDGNLKLGDFGLARAFGVPLRAYTHEIVTLWYRAPEVLLGGKQYSTGVDTWSIGCIFAEMCNRKPIFSGDSEIDQIFKIFRVLGTPNEAIWPDIVYLPDFKPSFPQWRRKDLSQVVPSLDPRGIDLLDKLLAYDPINRISARRAAIHPYFQES"
84
+ # f.aaseq.composition #=> {"M"=>5, "S"=>15, "G"=>21, "E"=>16, "L"=>36, "A"=>17, "N"=>8, "Y"=>13, "K"=>22, "R"=>20, "V"=>18, "T"=>7, "D"=>23, "P"=>17, "Q"=>10, "I"=>23, "H"=>7, "F"=>12, "C"=>4, "W"=>4}
85
+ # f.aalen #=> 298
86
+ #
87
+ #
88
+ # === A less structured fasta entry
89
+ #
90
+ # f.entry #=> ">abc 123 456\nASDF"
91
+ #
92
+ # f.entry_id #=> "abc"
93
+ # f.definition #=> "abc 123 456"
94
+ # f.comment #=> nil
95
+ # f.accession #=> nil
96
+ # f.accessions #=> []
97
+ # f.acc_version #=> nil
98
+ #
99
+ # f.seq #=> "ASDF"
100
+ # f.data #=> "\nASDF\n"
101
+ # f.length #=> 4
102
+ # f.aaseq #=> "ASDF"
103
+ # f.aaseq.composition #=> {"A"=>1, "S"=>1, "D"=>1, "F"=>1}
104
+ # f.aalen #=> 4
105
+ #
94
106
  #
95
107
  # === References
96
108
  #
@@ -120,7 +120,7 @@ module Bio
120
120
  # ["gi", "2147182"], ["pir", nil, "I51898"], ["gi", "544724"],
121
121
  # ["gb", "AAB29504.1", nil], ["Cavia"]]
122
122
  #
123
- # === Refereneces
123
+ # === References
124
124
  #
125
125
  # * Fasta format description (NCBI)
126
126
  # http://www.ncbi.nlm.nih.gov/BLAST/fasta.shtml
@@ -7,10 +7,6 @@
7
7
  # Jan Aerts <jan.aerts@bbsrc.ac.uk>
8
8
  # License:: The Ruby License
9
9
  #
10
- # $Id: format_fasta.rb,v 1.1.2.1 2008/03/04 11:26:59 ngoto Exp $
11
- #
12
-
13
- require 'bio/sequence/format'
14
10
 
15
11
  module Bio::Sequence::Format::Formatter
16
12
 
@@ -5,11 +5,6 @@
5
5
  # Naohisa Goto <ng@bioruby.org>
6
6
  # License:: The Ruby License
7
7
  #
8
- # $Id:$
9
- #
10
-
11
- require 'bio/sequence/format'
12
- require 'bio/sequence/quality_score'
13
8
 
14
9
  module Bio::Sequence::Format::Formatter
15
10
 
@@ -7,7 +7,6 @@
7
7
  #
8
8
 
9
9
  require 'bio/db/fastq'
10
- require 'bio/sequence/format'
11
10
 
12
11
  module Bio::Sequence::Format::Formatter
13
12
 
@@ -4,10 +4,6 @@
4
4
  # Copyright:: Copyright (C) 2008 Naohisa Goto <ng@bioruby.org>
5
5
  # License:: The Ruby License
6
6
  #
7
- # $Id: format_genbank.rb,v 1.1.2.5 2008/06/17 15:59:24 ngoto Exp $
8
- #
9
-
10
- require 'bio/sequence/format'
11
7
 
12
8
  module Bio::Sequence::Format::NucFormatter
13
9
 
@@ -454,7 +454,8 @@ module Bio
454
454
 
455
455
  # Return the record as a GFF2 compatible string
456
456
  def to_s
457
- cmnt = if @comment and !@comment.to_s.strip.empty? then
457
+ cmnt = if defined?(@comment) and @comment and
458
+ !@comment.to_s.strip.empty? then
458
459
  @comment.gsub(/[\r\n]+/, ' ')
459
460
  else
460
461
  false
@@ -996,21 +997,46 @@ module Bio
996
997
  str.empty? ? '.' : str
997
998
  end
998
999
 
1000
+ if URI.const_defined?(:Parser) then
1001
+ # (private) URI::Parser object for escape/unescape GFF3 columns
1002
+ URI_PARSER = URI::Parser.new
1003
+
1004
+ # (private) the same as URI::Parser#escape(str, unsafe)
1005
+ def _escape(str, unsafe)
1006
+ URI_PARSER.escape(str, unsafe)
1007
+ end
1008
+
1009
+ # (private) the same as URI::Parser#unescape(str)
1010
+ def _unescape(str)
1011
+ URI_PARSER.unescape(str)
1012
+ end
1013
+ else
1014
+ # (private) the same as URI.escape(str, unsafe)
1015
+ def _escape(str, unsafe)
1016
+ URI.escape(str, unsafe)
1017
+ end
1018
+
1019
+ # (private) the same as URI.unescape(str)
1020
+ def _unescape(str)
1021
+ URI.unescape(str)
1022
+ end
1023
+ end
1024
+
999
1025
  # Return the string corresponding to these characters unescaped
1000
1026
  def unescape(string)
1001
- URI.unescape(string)
1027
+ _unescape(string)
1002
1028
  end
1003
1029
 
1004
1030
  # Escape a column according to the specification at
1005
1031
  # http://song.sourceforge.net/gff3.shtml.
1006
1032
  def escape(string)
1007
- URI.escape(string, UNSAFE)
1033
+ _escape(string, UNSAFE)
1008
1034
  end
1009
1035
 
1010
1036
  # Escape seqid column according to the specification at
1011
1037
  # http://song.sourceforge.net/gff3.shtml.
1012
1038
  def escape_seqid(string)
1013
- URI.escape(string, UNSAFE_SEQID)
1039
+ _escape(string, UNSAFE_SEQID)
1014
1040
  end
1015
1041
 
1016
1042
  # Escape attribute according to the specification at
@@ -1019,7 +1045,7 @@ module Bio
1019
1045
  # are escaped: ",=;".
1020
1046
  # Returns the string corresponding to these characters escaped.
1021
1047
  def escape_attribute(string)
1022
- URI.escape(string, UNSAFE_ATTRIBUTE)
1048
+ _escape(string, UNSAFE_ATTRIBUTE)
1023
1049
  end
1024
1050
  end #module Escape
1025
1051
 
@@ -1028,6 +1054,7 @@ module Bio
1028
1054
  # Stores meta-data "##sequence-region seqid start end".
1029
1055
  class SequenceRegion
1030
1056
  include Escape
1057
+ extend Escape
1031
1058
 
1032
1059
  # creates a new SequenceRegion class
1033
1060
  def initialize(seqid, start, endpos)
@@ -1039,7 +1066,7 @@ module Bio
1039
1066
  # parses given string and returns SequenceRegion class
1040
1067
  def self.parse(str)
1041
1068
  dummy, seqid, start, endpos =
1042
- str.chomp.split(/\s+/, 4).collect { |x| URI.unescape(x) }
1069
+ str.chomp.split(/\s+/, 4).collect { |x| unescape(x) }
1043
1070
  self.new(seqid, start, endpos)
1044
1071
  end
1045
1072
 
@@ -1139,7 +1166,8 @@ module Bio
1139
1166
 
1140
1167
  # Return the record as a GFF3 compatible string
1141
1168
  def to_s
1142
- cmnt = if @comment and !@comment.to_s.strip.empty? then
1169
+ cmnt = if defined?(@comment) and @comment and
1170
+ !@comment.to_s.strip.empty? then
1143
1171
  @comment.gsub(/[\r\n]+/, ' ')
1144
1172
  else
1145
1173
  false
@@ -1163,6 +1191,7 @@ module Bio
1163
1191
  # data of "Target" attribute.
1164
1192
  class Target
1165
1193
  include GFF3::Escape
1194
+ extend GFF3::Escape
1166
1195
 
1167
1196
  # Creates a new Target object.
1168
1197
  def initialize(target_id, start, endpos, strand = nil)
@@ -1190,7 +1219,7 @@ module Bio
1190
1219
  #
1191
1220
  def self.parse(str)
1192
1221
  target_id, start, endpos, strand =
1193
- str.split(/ +/, 4).collect { |x| URI.unescape(x) }
1222
+ str.split(/ +/, 4).collect { |x| unescape(x) }
1194
1223
  self.new(target_id, start, endpos, strand)
1195
1224
  end
1196
1225
 
@@ -1332,15 +1361,15 @@ module Bio
1332
1361
 
1333
1362
  # rest of data_ref
1334
1363
  len = 0
1335
- data_ref.each do |ref|
1336
- len += ref.length if ref.code == :M
1364
+ data_ref.each do |r|
1365
+ len += r.length if r.code == :M
1337
1366
  end
1338
1367
  data.push Code.new(:D, len) if len > 0
1339
1368
 
1340
1369
  # rest of data_tgt
1341
1370
  len = 0
1342
- data_tgt.each do |tgt|
1343
- len += tgt.length if tgt.code == :M
1371
+ data_tgt.each do |t|
1372
+ len += t.length if t.code == :M
1344
1373
  end
1345
1374
  data.push Code.new(:I, len) if len > 0
1346
1375