RubyGems - bio - Versions diffs - 1.2.1 → 1.3.0 - Mend

bio 1.2.1 → 1.3.0

Files changed (259) hide show

data/ChangeLog +3421 -0
data/KNOWN_ISSUES.rdoc +88 -0
data/README.rdoc +252 -0
data/README_DEV.rdoc +285 -0
data/Rakefile +143 -0
data/bin/bioruby +0 -0
data/bin/br_biofetch.rb +0 -0
data/bin/br_bioflat.rb +12 -1
data/bin/br_biogetseq.rb +0 -0
data/bin/br_pmfetch.rb +4 -3
data/bioruby.gemspec +477 -0
data/bioruby.gemspec.erb +117 -0
data/doc/Changes-0.7.rd +7 -0
data/doc/Changes-1.3.rdoc +239 -0
data/doc/Tutorial.rd +296 -184
data/doc/Tutorial.rd.html +1031 -0
data/doc/Tutorial.rd.ja +111 -45
data/doc/Tutorial.rd.ja.html +2225 -0
data/doc/bioruby.css +281 -0
data/extconf.rb +2 -0
data/lib/bio.rb +29 -4
data/lib/bio/appl/blast.rb +306 -121
data/lib/bio/appl/blast/ddbj.rb +142 -0
data/lib/bio/appl/blast/format0.rb +35 -25
data/lib/bio/appl/blast/format8.rb +2 -2
data/lib/bio/appl/blast/genomenet.rb +263 -0
data/lib/bio/appl/blast/ncbioptions.rb +220 -0
data/lib/bio/appl/blast/remote.rb +106 -0
data/lib/bio/appl/blast/report.rb +260 -9
data/lib/bio/appl/blast/rexml.rb +12 -5
data/lib/bio/appl/blast/rpsblast.rb +277 -0
data/lib/bio/appl/blast/wublast.rb +133 -12
data/lib/bio/appl/blast/xmlparser.rb +35 -18
data/lib/bio/appl/blat/report.rb +46 -5
data/lib/bio/appl/emboss.rb +62 -13
data/lib/bio/appl/fasta.rb +9 -11
data/lib/bio/appl/genscan/report.rb +3 -3
data/lib/bio/appl/hmmer.rb +1 -1
data/lib/bio/appl/hmmer/report.rb +10 -10
data/lib/bio/appl/paml/baseml.rb +95 -0
data/lib/bio/appl/paml/baseml/report.rb +32 -0
data/lib/bio/appl/paml/codeml.rb +242 -0
data/lib/bio/appl/paml/codeml/rates.rb +67 -0
data/lib/bio/appl/paml/codeml/report.rb +67 -0
data/lib/bio/appl/paml/common.rb +348 -0
data/lib/bio/appl/paml/common_report.rb +38 -0
data/lib/bio/appl/paml/yn00.rb +103 -0
data/lib/bio/appl/paml/yn00/report.rb +32 -0
data/lib/bio/appl/psort.rb +2 -2
data/lib/bio/appl/pts1.rb +5 -5
data/lib/bio/appl/tmhmm/report.rb +10 -1
data/lib/bio/command.rb +297 -41
data/lib/bio/compat/features.rb +157 -0
data/lib/bio/compat/references.rb +128 -0
data/lib/bio/db/biosql/biosql_to_biosequence.rb +67 -0
data/lib/bio/db/biosql/sequence.rb +508 -0
data/lib/bio/db/embl/common.rb +28 -12
data/lib/bio/db/embl/embl.rb +107 -9
data/lib/bio/db/embl/embl_to_biosequence.rb +85 -0
data/lib/bio/db/embl/format_embl.rb +190 -0
data/lib/bio/db/embl/sptr.rb +15 -16
data/lib/bio/db/fantom.rb +6 -8
data/lib/bio/db/fasta.rb +10 -507
data/lib/bio/db/fasta/defline.rb +532 -0
data/lib/bio/db/fasta/fasta_to_biosequence.rb +63 -0
data/lib/bio/db/fasta/format_fasta.rb +97 -0
data/lib/bio/db/genbank/common.rb +25 -8
data/lib/bio/db/genbank/format_genbank.rb +187 -0
data/lib/bio/db/genbank/genbank.rb +36 -1
data/lib/bio/db/genbank/genbank_to_biosequence.rb +86 -0
data/lib/bio/db/gff.rb +1791 -119
data/lib/bio/db/kegg/glycan.rb +2 -6
data/lib/bio/db/lasergene.rb +3 -3
data/lib/bio/db/medline.rb +4 -1
data/lib/bio/db/newick.rb +10 -10
data/lib/bio/db/pdb/chain.rb +6 -2
data/lib/bio/db/pdb/pdb.rb +12 -3
data/lib/bio/db/rebase.rb +7 -8
data/lib/bio/db/soft.rb +3 -3
data/lib/bio/feature.rb +1 -88
data/lib/bio/io/biosql/biodatabase.rb +64 -0
data/lib/bio/io/biosql/bioentry.rb +29 -0
data/lib/bio/io/biosql/bioentry_dbxref.rb +11 -0
data/lib/bio/io/biosql/bioentry_path.rb +12 -0
data/lib/bio/io/biosql/bioentry_qualifier_value.rb +10 -0
data/lib/bio/io/biosql/bioentry_reference.rb +10 -0
data/lib/bio/io/biosql/bioentry_relationship.rb +10 -0
data/lib/bio/io/biosql/biosequence.rb +11 -0
data/lib/bio/io/biosql/comment.rb +7 -0
data/lib/bio/io/biosql/config/database.yml +20 -0
data/lib/bio/io/biosql/dbxref.rb +13 -0
data/lib/bio/io/biosql/dbxref_qualifier_value.rb +12 -0
data/lib/bio/io/biosql/location.rb +32 -0
data/lib/bio/io/biosql/location_qualifier_value.rb +11 -0
data/lib/bio/io/biosql/ontology.rb +10 -0
data/lib/bio/io/biosql/reference.rb +9 -0
data/lib/bio/io/biosql/seqfeature.rb +32 -0
data/lib/bio/io/biosql/seqfeature_dbxref.rb +11 -0
data/lib/bio/io/biosql/seqfeature_path.rb +11 -0
data/lib/bio/io/biosql/seqfeature_qualifier_value.rb +20 -0
data/lib/bio/io/biosql/seqfeature_relationship.rb +11 -0
data/lib/bio/io/biosql/taxon.rb +12 -0
data/lib/bio/io/biosql/taxon_name.rb +9 -0
data/lib/bio/io/biosql/term.rb +27 -0
data/lib/bio/io/biosql/term_dbxref.rb +11 -0
data/lib/bio/io/biosql/term_path.rb +12 -0
data/lib/bio/io/biosql/term_relationship.rb +13 -0
data/lib/bio/io/biosql/term_relationship_term.rb +11 -0
data/lib/bio/io/biosql/term_synonym.rb +10 -0
data/lib/bio/io/das.rb +7 -7
data/lib/bio/io/ddbjxml.rb +57 -0
data/lib/bio/io/ensembl.rb +2 -2
data/lib/bio/io/fetch.rb +28 -14
data/lib/bio/io/flatfile.rb +17 -853
data/lib/bio/io/flatfile/autodetection.rb +545 -0
data/lib/bio/io/flatfile/buffer.rb +237 -0
data/lib/bio/io/flatfile/index.rb +17 -7
data/lib/bio/io/flatfile/indexer.rb +30 -12
data/lib/bio/io/flatfile/splitter.rb +297 -0
data/lib/bio/io/hinv.rb +442 -0
data/lib/bio/io/keggapi.rb +2 -2
data/lib/bio/io/ncbirest.rb +733 -0
data/lib/bio/io/pubmed.rb +34 -80
data/lib/bio/io/registry.rb +2 -2
data/lib/bio/io/sql.rb +178 -357
data/lib/bio/io/togows.rb +458 -0
data/lib/bio/location.rb +106 -11
data/lib/bio/pathway.rb +120 -14
data/lib/bio/reference.rb +115 -101
data/lib/bio/sequence.rb +164 -183
data/lib/bio/sequence/adapter.rb +108 -0
data/lib/bio/sequence/common.rb +22 -45
data/lib/bio/sequence/compat.rb +2 -2
data/lib/bio/sequence/dblink.rb +54 -0
data/lib/bio/sequence/format.rb +254 -77
data/lib/bio/sequence/format_raw.rb +23 -0
data/lib/bio/shell.rb +3 -1
data/lib/bio/shell/core.rb +2 -2
data/lib/bio/shell/plugin/entry.rb +33 -4
data/lib/bio/shell/plugin/ncbirest.rb +64 -0
data/lib/bio/shell/plugin/togows.rb +40 -0
data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/bioruby_generator.rb +0 -0
data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_classes.rhtml +0 -0
data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_log.rhtml +0 -0
data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_methods.rhtml +0 -0
data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_modules.rhtml +0 -0
data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_variables.rhtml +0 -0
data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-bg.gif +0 -0
data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-gem.png +0 -0
data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-link.gif +0 -0
data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.css +0 -0
data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.rhtml +0 -0
data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_controller.rb +0 -0
data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_helper.rb +0 -0
data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/commands.rhtml +0 -0
data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/history.rhtml +0 -0
data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/index.rhtml +0 -0
data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/spinner.gif +0 -0
data/lib/bio/tree.rb +4 -2
data/lib/bio/util/color_scheme.rb +2 -2
data/lib/bio/util/contingency_table.rb +2 -2
data/lib/bio/util/restriction_enzyme.rb +2 -2
data/lib/bio/util/restriction_enzyme/single_strand.rb +6 -5
data/lib/bio/version.rb +25 -0
data/rdoc.zsh +8 -0
data/sample/any2fasta.rb +0 -0
data/sample/biofetch.rb +0 -0
data/sample/dbget +0 -0
data/sample/demo_sequence.rb +158 -0
data/sample/enzymes.rb +0 -0
data/sample/fasta2tab.rb +0 -0
data/sample/fastagrep.rb +72 -0
data/sample/fastasort.rb +54 -0
data/sample/fsplit.rb +0 -0
data/sample/gb2fasta.rb +2 -3
data/sample/gb2tab.rb +0 -0
data/sample/gbtab2mysql.rb +0 -0
data/sample/genes2nuc.rb +0 -0
data/sample/genes2pep.rb +0 -0
data/sample/genes2tab.rb +0 -0
data/sample/genome2rb.rb +0 -0
data/sample/genome2tab.rb +0 -0
data/sample/goslim.rb +0 -0
data/sample/gt2fasta.rb +0 -0
data/sample/na2aa.rb +34 -0
data/sample/pmfetch.rb +0 -0
data/sample/pmsearch.rb +0 -0
data/sample/ssearch2tab.rb +0 -0
data/sample/tfastx2tab.rb +0 -0
data/sample/vs-genes.rb +0 -0
data/setup.rb +1596 -0
data/test/data/blast/blastp-multi.m7 +188 -0
data/test/data/command/echoarg2.bat +1 -0
data/test/data/paml/codeml/control_file.txt +30 -0
data/test/data/paml/codeml/output.txt +78 -0
data/test/data/paml/codeml/rates +217 -0
data/test/data/rpsblast/misc.rpsblast +193 -0
data/test/data/soft/GDS100_partial.soft +0 -0
data/test/data/soft/GSE3457_family_partial.soft +0 -0
data/test/functional/bio/appl/test_pts1.rb +115 -0
data/test/functional/bio/io/test_ensembl.rb +123 -80
data/test/functional/bio/io/test_togows.rb +267 -0
data/test/functional/bio/sequence/test_output_embl.rb +51 -0
data/test/functional/bio/test_command.rb +301 -0
data/test/runner.rb +17 -1
data/test/unit/bio/appl/blast/test_ncbioptions.rb +112 -0
data/test/unit/bio/appl/blast/test_report.rb +753 -35
data/test/unit/bio/appl/blast/test_rpsblast.rb +398 -0
data/test/unit/bio/appl/paml/codeml/test_rates.rb +45 -0
data/test/unit/bio/appl/paml/codeml/test_report.rb +45 -0
data/test/unit/bio/appl/paml/test_codeml.rb +174 -0
data/test/unit/bio/appl/test_blast.rb +135 -4
data/test/unit/bio/appl/test_fasta.rb +2 -2
data/test/unit/bio/appl/test_pts1.rb +1 -64
data/test/unit/bio/db/embl/test_common.rb +15 -15
data/test/unit/bio/db/embl/test_embl.rb +4 -4
data/test/unit/bio/db/embl/test_embl_rel89.rb +5 -5
data/test/unit/bio/db/embl/test_embl_to_bioseq.rb +203 -0
data/test/unit/bio/db/embl/test_sptr.rb +38 -1
data/test/unit/bio/db/pdb/test_pdb.rb +2 -2
data/test/unit/bio/db/test_gff.rb +1151 -25
data/test/unit/bio/db/test_medline.rb +127 -0
data/test/unit/bio/db/test_nexus.rb +5 -1
data/test/unit/bio/db/test_prosite.rb +4 -4
data/test/unit/bio/io/flatfile/test_autodetection.rb +375 -0
data/test/unit/bio/io/flatfile/test_buffer.rb +251 -0
data/test/unit/bio/io/flatfile/test_splitter.rb +369 -0
data/test/unit/bio/io/test_ddbjxml.rb +8 -3
data/test/unit/bio/io/test_fastacmd.rb +5 -5
data/test/unit/bio/io/test_flatfile.rb +357 -106
data/test/unit/bio/io/test_soapwsdl.rb +2 -2
data/test/unit/bio/io/test_togows.rb +161 -0
data/test/unit/bio/sequence/test_common.rb +210 -11
data/test/unit/bio/sequence/test_compat.rb +3 -3
data/test/unit/bio/sequence/test_dblink.rb +58 -0
data/test/unit/bio/sequence/test_na.rb +2 -2
data/test/unit/bio/test_command.rb +111 -50
data/test/unit/bio/test_feature.rb +29 -1
data/test/unit/bio/test_location.rb +566 -6
data/test/unit/bio/test_pathway.rb +91 -65
data/test/unit/bio/test_reference.rb +67 -13
data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +3 -3
data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +3 -3
data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +3 -3
data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +4 -3
data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +3 -3
data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +3 -3
data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +3 -3
data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +3 -3
data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +3 -3
data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +3 -3
data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +4 -4
data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +3 -3
data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +3 -3
data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +3 -3
data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +3 -3
data/test/unit/bio/util/test_restriction_enzyme.rb +3 -3
metadata +202 -167
data/test/unit/bio/appl/blast/test_xmlparser.rb +0 -388

@@ -0,0 +1,117 @@
+Gem::Specification.new do |s|
+  s.name = 'bio'
+  s.version = "<% ###### Below is executed in ERB environment ######
+    # Version can be specified by the environment variable
+    env_ver = ENV['BIORUBY_GEM_VERSION']
+    env_ver = nil if env_ver.to_s.strip.empty?
+    # By default, determined from lib/bio/version.rb
+    load "./lib/bio/version.rb" unless defined?(BIO_VERSION_RB_LOADED)
+    case Bio::BIORUBY_EXTRA_VERSION
+    when nil
+      suffix = nil
+    when /\A\.(\d+)\z/
+      suffix = $1
+    when /\-alpha(\d+)/
+      decrement = true
+      suffix = 9000 + $1.to_i
+    when /\-pre(\d+)/
+      decrement = true
+      suffix = 9500 + $1.to_i
+    when /\-rc(\d+)/
+      decrement = true
+      suffix = 9900 + $1.to_i
+    else
+      suffix = "0000"
+    end
+    ver = Bio::BIORUBY_VERSION.reverse.collect do |i|
+            if decrement then
+              i -= 1
+              i < 0 ? (i += 10) : decrement = false
+            end
+            i
+          end.reverse
+    ver.push suffix if suffix
+    %><%=
+    (env_ver || ver.join('.'))
+       ###### Above is executed in ERB environment ######
+    %>"
+  s.author = "BioRuby project"
+  s.email = "staff@bioruby.org"
+  s.homepage = "http://bioruby.org/"
+  s.rubyforge_project = "bioruby"
+  s.summary = "Bioinformatics library"
+  s.description = "BioRuby is a library for bioinformatics (biology + information science)."
+  s.platform = Gem::Platform::RUBY
+  s.files = [
+    <% ###### Below is executed in ERB environment ######
+    # Gets file list from the "git ls-files" command.
+    files = (`git ls-files` rescue nil).to_s.split(/\r?\n/)
+    files.delete_if { |x| x.empty? }
+    # When git-ls-files isn't available, creates a list from current files.
+    if !($?.success?) or files.size <= 0 then
+      files =
+        [ "README.rdoc", "README_DEV.rdoc",
+          "ChangeLog", "KNOWN_ISSUES.rdoc",
+          "Rakefile", "bioruby.gemspec.erb",
+          "bioruby.gemspec", "setup.rb",
+          "extconf.rb", "rdoc.zsh"
+        ] + Dir.glob("{bin,doc,etc,lib,sample,test}/**/*").delete_if do |item|
+              case item
+              when /(\A|\/)CVS(\z|\/)/, /(\A|\/)rdoc(\z|\/)/, /\~\z/
+                true
+              else
+                false
+              end
+            end
+    end
+    %><%=
+    files.sort.collect { |x| x.dump }.join(",\n    ")
+       ###### Above is executed in ERB environment ######
+    %>
+  ]
+  s.has_rdoc = true
+  s.extra_rdoc_files = [
+    <%= ###### Below is executed in ERB environment ######
+        # Files whose suffix are .rdoc are selected.
+        rdoc_files = files.find_all { |item| /\.rdoc\z/ =~ item }
+        # Fail safe settings
+        if rdoc_files.empty? then
+          rdoc_files = [ 'README.rdoc', 'README_DEV.rdoc',
+                         'doc/Changes-1.3.rdoc' ]
+        end
+        rdoc_files.push "ChangeLog" unless rdoc_files.include?("ChangeLog")
+        rdoc_files.sort.collect { |x| x.dump }.join(",\n    ")
+        ###### Above is executed in ERB environment ######
+    %>
+  ]
+  s.rdoc_options << '--main' << 'README.rdoc'
+  s.rdoc_options << '--title' << 'BioRuby API documentation'
+  s.rdoc_options << '--exclude' << '\.yaml\z'
+  s.rdoc_options << '--line-numbers' << '--inline-source'
+  s.require_path = 'lib'
+  s.autorequire = 'bio'
+  s.bindir = "bin"
+  s.executables = [
+    <%= ###### Below is executed in ERB environment ######
+        # Files in bin/ directory are selected.
+        exec_files = files.find_all { |item| /\Abin\// =~ item }
+        # Non-executable files are removed from the list.
+        exec_files.delete_if { |item| !File.executable?(item) }
+        # strip "bin/"
+        exec_files.collect! { |item| item.sub(/\Abin\//, '') }
+        # Fail safe settings
+        if exec_files.empty? then
+            exec_files = [ "bioruby", "br_biofetch.rb", "br_biogetseq.rb", "br_bioflat.rb", "br_pmfetch.rb" ]
+        end
+        exec_files.sort.collect { |x| x.dump }.join(",\n    ")
+        ###### Above is executed in ERB environment ######
+    %>
+  ]
+  s.default_executable = "bioruby"
+end

data/doc/Changes-0.7.rd CHANGED

@@ -338,6 +338,13 @@ In 1.1.0:
   instead of a string or nil: score, percent_identity, percent_positive,
   percent_gaps.
+--- BioRuby Shell
+In 1.1.0:
+* Shell commands seq, ent, obj are renamed to getseq, getent, getobj,
+  respectively.
 === Deleted files
 : lib/bio/db/genbank.rb

data/doc/Changes-1.3.rdoc ADDED

@@ -0,0 +1,239 @@
+= Incompatible and important changes since the BioRuby 1.2.1 release
+A lot of changes have been made to the BioRuby after the version 1.2.1
+is released.
+== New features
+=== Support for sequence output with improvements of Bio::Sequence
+The outputting of EMBL and GenBank formatted text are now supported in the
+Bio::Sequence class. See the document of Bio::Sequence#output for details.
+You can also create Bio::Sequence objects from many kinds of data such as
+Bio::GenBank, Bio::EMBL, and Bio::FastaFormat by using the to_biosequence
+method.
+=== BioSQL support
+BioSQL support is completely rewritten by using ActiveRecord.
+=== Bio::Blast
+Bio::Blast#reports can parse NCBI default (-m 0) format and tabular (-m 8)
+format, in addition to XML (-m 7) format.
+Bio::Blast::Report now supports XML format with multiple query sequences
+generated by blastall 2.2.14 or later.
+Bio::Blast.remote supports DDBJ, in addition to GenomeNet.
+In addition, a list of available blast databases on remote sites
+can be obtained by using Bio::Blast::Remote::DDBJ.databases and
+Bio::Blast::Remote::GenomeNet.databases methods. Note that the above
+remote blast methods may be changed in the future to support NCBI.
+Bio::Blast::RPSBlast::Report is newly added, a parser for NCBI RPS Blast
+(Reversed Position Specific Blast) default (-m 0 option) results.
+=== Bio::GFF::GFF2 and Bio::GFF::GFF3
+The outputting of GFF2/GFF3-formatted text is now supported. However, many
+incompatible changes have been made (See below for details).
+=== Bio::Hinv
+H-Invitational Database web service (REST) client class is newly added.
+=== Bio::NCBI::REST
+NCBI E-Utilities client class is newly added.
+=== Bio::PAML::Codeml and Bio::PAML::Codeml::Report
+Bio::PAML::Codeml, wrapper for PAML codeml program, and
+Bio::PAML::Codeml::Report, parser for codeml result are newly added,
+though some of them are still under construction and too specific to
+particular use cases.
+=== Bio::Locations
+New method Bio::Locations#to_s is added to support output of features.
+=== Bio::TogoWS::REST
+TogoWS REST client class is newly added. Information about TogoWS REST service
+can be found on http://togows.dbcls.jp/site/en/rest.html.
+== Deprecated classes
+=== Bio::Features
+Bio::Features is obsoleted and changed to an array of Bio::Feature object
+with some backward compatibility methods.  The backward compatibility methods
+will soon be removed in the future.
+=== Bio::References
+Bio::References is obsoleted and changed to an array of Bio::Reference object
+with some backward compatibility methods.  The backward compatibility methods
+will soon be removed in the future.
+== Incompatible changes
+=== Bio::BIORUBY_VERSION
+Definition of the constant Bio::BIORUBY_VERSION is moved from lib/bio.rb to
+lib/bio/version.rb. Normally, the autoload mechanism of Ruby correctly loads
+the version.rb, but special scripts directly using bio.rb may be needed to
+be changed.
+Bio::BIORUBY_VERSION is changed to be frozen.
+New constants Bio::BIORUBY_EXTRA_VERSION and Bio::BIORUBY_VERSION_ID are
+added. See their RDoc for details.
+=== Bio::Sequence
+Bio::Sequence#date is removed.  Alternatively, date_created or date_modified
+can be used.
+Bio::Sequence#taxonomy is changed to be an alias of classification, and
+the data type is changed to an array of string.
+=== Bio::Locations and Bio::Location
+A carat in a location (e.g. "123^124") is now parsed, instead of being
+replaced by "..".  To distinguish from normal "..", a new attribute
+Bio::Location#carat is used.
+"order(...)" or "group(...)" are also parsed, instead of being regarded
+as "join(...)".  To distinguish from "join(...)", a new attribute
+Bio::Locations#operator is used.  When "order(...)" or "group(...)",
+the attribute is set to :order or :group, respectively.  Note that
+"group(...)" is already deprecated in EMBL/GenBank/DDBJ.
+=== Bio::Blast
+Return value of Bio::Blast#exec_* is changed to String instead of Report
+object. Parsing the string is now processed in Bio::Blast#query method.
+Bio::Blast#exec_genomenet_tab and Bio::Blast#server="genomenet_tab" is
+deprecated.
+Bio::Blast#options=() can now change the following attributes: program, db,
+format, matrix, and filter.
+Bio::Blast.reports now supports default (-m 0) and tabular (-m 8) formats.
+Old implementation (only supports XML) is renamed to Bio::Blast.reports_xml,
+to keep compatibility for older BLAST XML documents which might not be parsed
+by the new Bio::Blast.reports nor Bio::FlatFile, although we are not sure
+whether such documents really exist or not.
+=== Bio::Blast::Default::Report and Bio::Blast::WU::Report
+Iteration#lambda, #kappa, #entropy, #gapped_lambda, #gapped_kappa,
+and #gapped_entropy, and the same methods in the Report class are
+changed to return float or nil instead of string or nil.
+=== Bio::Blat
+When reading BLAT psl (or pslx) data by using Bio::FlatFile, it checks
+each query name and returns a new entry object when the query name is
+changed from previous queries. This is, data is stored to two or more
+Bio::Blat::Report objects, instead of previous version's behavior
+(always reads all data at once and stores to a Bio::Blat::Report object).
+=== Bio::GFF, Bio::GFF::GFF2 and Bio::GFF::GFF3
+Bio::GFF::Record#comments is renamed to #comment, and #comments= is
+renamed to #comment=, because they only allow a single String (or nil)
+and the plural form "comments" may be confusable.  The "comments" and
+"comments=" methods can still be used, but warning messages will be
+shown when using in GFF2::Record and GFF3::Record objects.
+See below about GFF2 and/or GFF3 specific changes.
+=== Bio::GFF::GFF2 and Bio::GFF::GFF3
+Bio::GFF::GFF2::Record.new and Bio::GFF::GFF3::Record.new can also
+get 9 arguments corresponding to GFF columns, which helps to create
+Record object directly without formatted text.
+Bio::GFF::GFF2::Record#start, #end, and #frame return Integer or nil,
+and #score returns Float or nil, instead of String or nil.
+The same changes are also made to Bio::GFF::GFF3::Record.
+Bio::GFF::GFF2::Record#attributes and Bio::GFF::GFF3::Record#attributes
+are changed to return a nested Array, containing [ tag, value ] pairs,
+because of supporting multiple tags in the same tag names.  If you want
+to get a Hash, use Record#attributes_to_hash method, though some
+tag-value pairs in the same tag names may be lost.  Note that
+Bio::GFF::Record#attribute still returns a Hash for compatibility.
+New methods for getting, setting and manipulating attributes are added
+to Bio::GFF::GFF2::Record and Bio::GFF::GFF3::Record classes:
+attribute, get_attribute, get_attributes, set_attribute, replace_attributes,
+add_attribute, delete_attribute, delete_attributes, sort_attributes_by_tag!.
+It is recommended to use these methods instead of directly manipulating
+the array returned by Record#attributes.
+Bio::GFF::GFF2#to_s, Bio::GFF::GFF3#to_s, Bio::GFF::GFF2::Record#to_s,
+and Bio::GFF::GFF3::Record#to_s are added to support output of
+GFF2/GFF3 data.
+=== Bio::GFF::GFF2
+GFF2 attribute values are now automatically unescaped.  In addition,
+if a value of an attribute is consisted of two or more tokens delimited
+by spaces, an object of the new class Bio::GFF::GFF2::Record::Value is
+returned instead of String.  The new class Bio::GFF::GFF2::Record::Value
+aims to store a parsed value of an attribute.  If you really want to get
+unparsed string, Bio::GFF::GFF2::Record::Value#to_s can be used.
+The metadata (lines beginning with "##") are parsed to
+Bio::GFF::GFF2::MetaData objects and are stored to Bio::GFF::GFF2#metadata
+as an array, except the "##gff-version" line.  The "##gff-version" version
+string is stored to the Bio::GFF::GFF2#gff_version as a string.
+=== Bio::GFF::GFF3
+Aliases of columns which are renamed in the GFF3 specification are added
+to the Bio::GFF::GFF3::Record class: seqid (column 1; alias of "seqname"),
+feature_type (column 3; alias of "feature"; in the GFF3 spec, it is
+called "type", but because "type" is already used by Ruby, we use
+"feature_type"), phase (column 8; formerly "frame"). Original names can
+still be used because they are only aliases.
+Sequences bundled within GFF3 after "##FASTA" are now supported
+(Bio::GFF::GFF3#sequences).
+GFF3 attribute keys and values are automatically unescaped. Each attribute
+value is stored as a string, except for special attributes listed below:
+* Bio::GFF::GFF3::Record::Target to store a "Target" attribute.
+* Bio::GFF::GFF3::Record::Gap to store a "Gap" attribute.
+The metadata (lines beginning with "##") are parsed to
+Bio::GFF::GFF3::MetaData objects and stored to Bio::GFF::GFF3#metadata
+as an array, except "##gff-version", "##sequence-region", "###",
+and "##FASTA" lines.
+* "##gff-version" version string is stored to Bio::GFF::GFF3#gff_version.
+* "##sequence-region" lines are parsed to Bio::GFF::GFF3::SequenceRegion
+  objects and stored to Bio::GFF::GFF3#sequence_regions as an array.
+* "###" lines are parsed to Bio::GFF::GFF3::RecordBoundary objects.
+* "##FASTA" is regarded as the beginning of bundled sequences.
+=== Bio::Pathway
+Bio::Pathway#cliquishness is changed to calculate cliquishness (clustering
+coefficient) for not only undirected graphs but also directed graphs.
+In Bio::Pathway#to_matrix, dump_matrix, dump_list, and depth_first_search
+methods, to avoid dependency to the order of objects in Hash#each (and
+each_keys etc.), Bio::Pathway#index is used to specify preferences of
+nodes in a graph.
+=== Bio::SQL and BioSQL related classes
+BioSQL support is completely rewritten by using ActiveRecord. See documents
+in lib/bio/io/sql.rb, lib/bio/io/biosql, and lib/bio/db/biosql for details
+of changes and usage of the classes/modules.

data/doc/Tutorial.rd CHANGED

@@ -1,49 +1,79 @@
+# This document is generated with a version of rd2html (part of Hiki)
+#
+# A possible test run could be from rdtool (on Debian package rdtool)
+#
+#   ruby -I lib ./bin/rd2 ~/cvs/opensource/bioruby/doc/Tutorial.rd
+#
+# or with style sheet:
+#
+#   ruby -I lib ./bin/rd2 -r rd/rd2html-lib.rb --with-c
+ss=bioruby.css ~/cvs/opensource/bioruby/doc/Tutorial.rd > ~/bioruby.html
+#
+# in Debian:
+#
+#   rd2 -r rd/rd2html-lib  --with-css="/home/wrk/izip/cvs/opensource/bioruby/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby.css" Tutorial.rd > index.html
+#
+# A common problem is tabs in the text file! TABs are not allowed.
+#
+# To add tests run Toshiaki's bioruby shell and paste in the query plus
+# results.
+#
+# To run the embedded Ruby doctests you can get the doctest.rb from Pjotr.
 =begin
+#doctest Testing bioruby
-See the document in the CVS repository ./doc/((<Tutorial.rd|URL:http://cvs.open-bio.org/cgi-bin/viewcvs/viewcvs.cgi/*checkout*/bioruby/doc/Tutorial.rd?rev=HEAD&cvsroot=bioruby&content-type=text/plain>)) - for a potentially more up-to-date edition. This one was updated:
+= BioRuby Tutorial
-  $Id: Tutorial.rd,v 1.13 2007/07/09 12:28:07 pjotr Exp $
+Editor: PjotrPrins <p .at. bioruby.org>
-Translated into English: Naohisa Goto <ng@bioruby.org>
+* Copyright (C) 2001-2003 KATAYAMA Toshiaki <k .at. bioruby.org>
+* Copyright (C) 2005-2008 Pjotr Prins, Naohisa Goto and others
-Editor:                  PjotrPrins <p@bioruby.org>
+The latest version resides in the CVS repository ./doc/((<Tutorial.rd|URL:http://cvs.open-bio.org/cgi-bin/viewcvs/viewcvs.cgi/*checkout*/bioruby/doc/Tutorial.rd?rev=HEAD&cvsroot=bioruby&content-type=text/plain>)). This one was updated:
-Copyright (C) 2001-2003 KATAYAMA Toshiaki <k@bioruby.org>, 2005-2007 Pjotr Prins, Naohisa Goto and others
+  $Id: Tutorial.rd,v 1.22 2008/05/19 12:22:05 pjotr Exp $
-IMPORTANT NOTICE: This page is maintained in the BioRuby CVS
-repository. Please edit the file there otherwise changes may get
-lost. See ((<BioRuby Developer Information>)) for CVS and mailing list
-access.
-= BioRuby Tutorial
+in preparation for the ((<BioHackathlon 2008|URL:http://hackathon.dbcls.jp/>))
 == Introduction
-This is a tutorial for using Bioruby. For BioRuby you need to install
-Ruby and the BioRuby package on your computer. For each following the
-instruction on the respective websites. (EDITOR's NOTE: include URL's)
-(EDITOR's NOTE: describe rdoc use for individual classes)
+This is a tutorial for using Bioruby. A basic knowledge of Ruby is required.
+If you want to know more about the programming langauge Ruby we recommend the
+excellent book ((<Programming Ruby|URL:http://www.pragprog.com/titles/ruby>))
+by Dave Thomas and Andy Hunt - some of it is online
+((<here|URL:http://www.rubycentral.com/pickaxe/>)).
-For further information on the Ruby language see the section 'Further
-reading' at the end.
+For BioRuby you need to install Ruby and the BioRuby package on your computer
 You can check whether Ruby is installed on your computer and what
 version it has with the
-	% ruby -v
+  % ruby -v
 command. Showing something like:
   ruby 1.8.5 (2006-08-25) [powerpc-linux]
+If you see no such thing you'll have to install Ruby using your installation
+manager. For more information see the
+((<Ruby|URL:http://www.ruby-lang.org/en/>)) website.
+Once Ruby is works download and install Bioruby using the links on the
+((<Bioruby|URL:http://bioruby.org/>)) website.
+A lot of BioRuby's documentation exists in the source code and unit tests. To
+really dive in you will need the latest source code tree. The embedded rdoc
+documentation can be viewed online at
+((<bioruby's rdoc|URL:http://bioruby.org/rdoc/>)). But first lets start!
 == Trying Bioruby
 Bioruby comes with its own shell. After unpacking the sources run the
 following command
-  $BIORUBY/bin/bioruby
+  ./bin/bioruby  or
+  ruby -I lib bin/bioruby
 and you should see a prompt
@@ -52,10 +82,14 @@ and you should see a prompt
 Now test the following:
   bioruby> seq = Bio::Sequence::NA.new("atgcatgcaaaa")
-  bioruby> puts seq
-  atgcatgcaaaa
-  bioruby> puts seq.complement
-  ttttgcatgcat
+  ==> "atgcatgcaaaa"
+  bioruby> seq.complement
+  ==> "ttttgcatgcat"
+See the the Bioruby shell section below for more tweaking. If you have trouble running
+examples also check the section below on trouble shooting. You can also post a
+question to the mailing list. BioRuby developers usually try to help.
 == Working with nucleic / amino acid sequences (Bio::Sequence class)
@@ -68,33 +102,48 @@ calculated, and so on. When translating into amino acid sequences the
 frame can be specified and optionally the condon table selected (as
 defined in codontable.rb).
+  bioruby> seq = Bio::Sequence::NA.new("atgcatgcaaaa")
+  ==> "atgcatgcaaaa"
+  # complemental sequence (Bio::Sequence::NA object)
+  bioruby> seq.complement
+  ==> "ttttgcatgcat"
+  bioruby> seq.subseq(3,8) # gets subsequence of positions 3 to 8
+  ==> "gcatgc"
+  bioruby> seq.gc_percent
+  ==> 33
+  bioruby> seq.composition
+  ==> {"a"=>6, "c"=>2, "g"=>2, "t"=>2}
+  bioruby> seq.translate
+  ==> "MHAK"
+  bioruby> seq.translate(2)        # translate from frame 2
+  ==> "CMQ"
+  bioruby> seq.translate(1,11)     # codon table 11
+  ==> "MHAK"
+  bioruby> seq.translate.codes
+  ==> ["Met", "His", "Ala", "Lys"]
+  bioruby> seq.translate.names
+  ==> ["methionine", "histidine", "alanine", "lysine"]
+  bioruby>  seq.translate.composition
+  ==> {"K"=>1, "A"=>1, "M"=>1, "H"=>1}
+  bioruby> seq.translate.molecular_weight
+  ==> 485.605
+  bioruby> seq.complement.translate
+  ==> "FCMH"
+get a random sequence with the same NA count:
+  bioruby> counts = {'a'=>seq.count('a'),'c'=>seq.count('c'),'g'=>seq.count('g'),'t'=>seq.count('t')}
+  ==> {"a"=>6, "c"=>2, "g"=>2, "t"=>2}
+  bioruby!> randomseq = Bio::Sequence::NA.randomize(counts)
+  ==!> "aaacatgaagtc"
+  bioruby!> print counts
+  a6c2g2t2
+  bioruby!> p counts
+  {"a"=>6, "c"=>2, "g"=>2, "t"=>2}
-    #!/usr/bin/env ruby
-    require 'bio'
-    seq = Bio::Sequence::NA.new("atgcatgcaaaa")
-    puts seq                            # original sequence
-    puts seq.complement                 # complemental sequence (Bio::Sequence::NA object)
-    puts seq.subseq(3,8)                # gets subsequence of positions 3 to 8
-    p seq.gc_percent                    # GC percent (BioRuby 0.6.X: Float, BioRuby 0.7 or later: Integer)
-    p seq.composition                   # nucleic acid compositions (Hash)
-    puts seq.translate                  # translation (Bio::Sequence::AA object)
-    puts seq.translate(2)               # translation from frame 2 (default is frame 1)
-    puts seq.translate(1,11)            # using codon table No.11 (see http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi)
-    p seq.translate.codes               # shows three-letter codes (Array)
-    p seq.translate.names               # shows amino acid names (Array)
-    p seq.translate.composition         # amino acid compositions (Hash)
-    p seq.translate.molecular_weight    # calculating molecular weight (Float)
-    puts seq.complement.translate       # translation of complemental strand
-		counts = {'a'=>seq.count('a'),'c'=>seq.count('c'),'g'=>seq.count('g'),'t'=>seq.count('t')}
-    p randomseq = Bio::Sequence::NA.randomize(counts)  # reshuffle sequence with same freq.
 The p, print and puts methods are standard Ruby ways of outputting to
 the screen. If you want to know more about standard Ruby commands you
@@ -105,9 +154,9 @@ Windows). For example
   % ri p
   % ri File.open
-Nucleic acid sequence is an object of +Bio::Sequence::NA+ class, and
-amino acid sequence is an object of +Bio::Sequence::AA+ class.  Shared
-methods are in the parent +Bio::Sequence+ class.
+Nucleic acid sequence is an object of Bio::Sequence::NA class, and
+amino acid sequence is an object of Bio::Sequence::AA class.  Shared
+methods are in the parent Bio::Sequence class.
 As Bio::Sequence class inherits Ruby's String class, you can use
 String class methods. For example, to get a subsequence, you can
@@ -116,15 +165,12 @@ not only use subseq(from, to) but also String#[].
 Please take note that the Ruby's string's are base 0 - i.e. the first letter
 has index 0, for example:
-  s = 'abc'
-  puts s[0].chr
-  >a
-  puts s[0..1]
-  >ab
+  bioruby> s = 'abc'
+  ==> "abc"
+  bioruby> s[0].chr
+  ==> "a"
+  bioruby> s[0..1]
+  ==> "ab"
 So when using String methods, you should subtract 1 from positions
 conventionally used in biology.  (subseq method will throw an exception if you
@@ -136,55 +182,71 @@ way of writing concise and clear code using 'closures'. Each sliding
 window creates a subsequence which is supplied to the enclosed block
 through a variable named +s+.
-* Shows average percentage of GC content for 100 bases (stepping
-the default one base at a time)
+Show average percentage of GC content for 20 bases (stepping the default one base at a time)
-    seq.window_search(100) do |s|
-      puts s.gc_percent
-    end
+  bioruby> seq = Bio::Sequence::NA.new("atgcatgcaattaagctaatcccaattagatcatcccgatcatcaaaaaaaaaa")
+  ==> "atgcatgcaattaagctaatcccaattagatcatcccgatcatcaaaaaaaaaa"
+  bioruby> a=[]; seq.window_search(20) { |s| a.push s.gc_percent }
+  bioruby> a
+  ==> [30, 35, 40, 40, 35, 35, 35, 30, 25, 30, 30, 30, 35, 35, 35, 35, 35, 40, 45, 45, 45, 45, 40, 35, 40, 40, 40, 40, 40, 35, 35, 35, 30, 30, 30]
 Since the class of each subsequence is the same as original sequence
 (Bio::Sequence::NA or Bio::Sequence::AA or Bio::Sequence), you can
 use all methods on the subsequence. For example,
-* Shows translation results for 15 bases shifting a codon at a time
+Shows translation results for 15 bases shifting a codon at a time
+  bioruby> a = []
+  bioruby> seq.window_search(15, 3) do |s|
+  bioruby>   a.push s.translate
+  bioruby> end
+  bioruby> a
+  ==> ["MHAIK", "HAIKL", "AIKLI", "IKLIP", "KLIPI", "LIPIR", "IPIRS", "PIRSS", "IRSSR", "RSSRS", "SSRSS", "SRSSK", "RSSKK", "SSKKK"]
-    seq.window_search(15, 3) do |s|
-      puts s.translate
-    end
 Finally, the window_search method returns the last leftover
 subsequence. This allows for example
-* Divide a genome sequence into sections of 10000bp and
-  output FASTA formatted sequences. The 1000bp at the start and end of
-	each subsequence overlapped. At the 3' end of the sequence the
-  leftover subsequence shorter than 10000bp is also added
+Divide a genome sequence into sections of 10000bp and
+output FASTA formatted sequences (line width 60 chars). The 1000bp at the
+start and end of each subsequence overlapped. At the 3' end of the sequence
+the leftover is also added:
     i = 1
+    textwidth=60
     remainder = seq.window_search(10000, 9000) do |s|
-      puts s.to_fasta("segment #{i}", 60)
+      puts s.to_fasta("segment #{i}", textwidth)
       i += 1
     end
-    puts remainder.to_fasta("segment #{i}", 60)
+    if remainder
+      puts remainder.to_fasta("segment #{i}", textwidth)
+    end
 If you don't want the overlapping window, set window size and stepping
 size to equal values.
 Other examples
-* Count the codon usage
+Count the codon usage
-    codon_usage = Hash.new(0)
-    seq.window_search(3, 3) do |s|
-      codon_usage[s] += 1
-    end
+  bioruby> codon_usage = Hash.new(0)
+  bioruby> seq.window_search(3, 3) do |s|
+  bioruby>   codon_usage[s] += 1
+  bioruby> end
+  bioruby> codon_usage
+  ==> {"cat"=>1, "aaa"=>3, "cca"=>1, "att"=>2, "aga"=>1, "atc"=>1, "cta"=>1, "gca"=>1, "cga"=>1, "tca"=>3, "aag"=>1, "tcc"=>1, "atg"=>1}
-* Calculate molecular weight for each 10-aa peptide (or 10-nt nucleic acid)
-    seq.window_search(10, 10) do |s|
-      puts s.molecular_weight
-    end
+Calculate molecular weight for each 10-aa peptide (or 10-nt nucleic acid)
+  bioruby> a = []
+  bioruby> seq.window_search(10, 10) do |s|
+  bioruby>   a.push s.molecular_weight
+  bioruby> end
+  bioruby> a
+  ==> [3096.2062, 3086.1962, 3056.1762, 3023.1262, 3073.2262]
 In most cases, sequences are read from files or retrieved from databases.
 For example:
@@ -210,6 +272,10 @@ For example, translates my_naseq.txt:
     % ruby na2aa.rb my_naseq.txt
+or use a pipe!
+    % cat my_naseq.txt|ruby na2aa.rb
 Outputs
     VAIFPKAMTGAKNQSSDICLMPHVGLIRRGQRRIRHLVQMSDAA*
@@ -218,8 +284,9 @@ You can also write this, a bit fanciful, as a one-liner script.
     % ruby -r bio -e 'p Bio::Sequence::NA.new($<.read).translate' my_naseq.txt
-In the next section we will retrieve data from databases instead of
-using raw sequence files.
+In the next section we will retrieve data from databases instead of using raw
+sequence files. One generic example of the above can be found in
+./sample/na2aa.rb.
 == Parsing GenBank data (Bio::GenBank class)
@@ -243,7 +310,8 @@ the data:
       print ">#{gb.accession} "         # Accession
       puts gb.definition                # Definition
-      puts gb.naseq                     # Nucleic acid sequence (Bio::Sequence::NA object)
+      puts gb.naseq                     # Nucleic acid sequence
+                                        # (Bio::Sequence::NA object)
     end
 But that has the disadvantage the code is tied to GenBank input. A more
@@ -251,9 +319,9 @@ generic method is to use Bio::FlatFile which allows you to use different
 input formats:
     #!/usr/bin/env ruby
     require 'bio'
     ff = Bio::FlatFile.new(Bio::GenBank, ARGF)
     ff.each_entry do |gb|
       definition = "#{gb.accession} #{gb.definition}"
@@ -288,9 +356,6 @@ Again another option is to use the Bio::DB.open class:
       puts gb.naseq.to_fasta(definition, 60)
     end
-(TRANSLATOR'S NOTE: Bio::DB.open have not been used so well.)
-(EDITOR's NOTE: Test code)
 Next, we are going to parse the GenBank 'features', which is normally
 very complicated:
@@ -333,12 +398,12 @@ very complicated:
       end
     end
-* Note: In this example Feature#assoc method makes a Hash from a
-  feature object. It is useful because you can get data from the hash
-  by using qualifiers as keys.
-  (But there is a risk some information is lost when two or more
-  qualifiers are the same. Therefore an Array is returned by
-  Feature#feature)
+Note: In this example Feature#assoc method makes a Hash from a
+feature object. It is useful because you can get data from the hash
+by using qualifiers as keys.
+(But there is a risk some information is lost when two or more
+qualifiers are the same. Therefore an Array is returned by
+Feature#feature)
 Bio::Sequence#splicing splices subsequence from nucleic acid sequence
 according to location information used in GenBank, EMBL and DDBJ.
@@ -352,11 +417,11 @@ feature style location text but also Bio::Locations object. For more
 information about location format and Bio::Locations class, see
 bio/location.rb.
-* Splice according to location string used in a GenBank entry
+Splice according to location string used in a GenBank entry
     naseq.splicing('join(2035..2050,complement(1775..1818),13..345')
-* Generate Bio::Locations object and pass the splicing method
+Generate Bio::Locations object and pass the splicing method
     locs = Bio::Locations.new('join((8298.8300)..10206,1..855)')
     naseq.splicing(locs)
@@ -364,17 +429,16 @@ bio/location.rb.
 You can also use the splicing method for amino acid sequences
 (Bio::Sequence::AA objects).
-* Splicing peptide from a protein (e.g. signal peptide)
+Splicing peptide from a protein (e.g. signal peptide)
     aaseq.splicing('21..119')
-(EDITOR's NOTE: why use STRINGs here?)
 === More databases
 Databases in BioRuby are essentially accessed like that of GenBank
-with classes like Bio::GenBank, Bio::KEGG::GENES,
-(EDITOR's NOTE: include complete list)
+with classes like Bio::GenBank, Bio::KEGG::GENES. A full list can be found in
+the ./lib/bio/db directory of the BioRuby source tree.
 In many cases the Bio::DatabaseClass acts as a factory pattern
 and recognises the database type automatically - returning a
@@ -401,7 +465,14 @@ database class?
     end
 An example that can take any input, filter using a regular expression to output
-to a FASTA file can be found in sample/any2fasta.rb.
+to a FASTA file can be found in sample/any2fasta.rb. With this technique it is
+possible to write a Unix type grep/sort pipe for sequence information. One
+example using scripts in the BIORUBY sample folder:
+  fastagrep.rb '/At|Dm/' database.seq | fastasort.rb
+greps the database for Arabidopsis and Drosophila entries and sorts the output
+to FASTA.
 Other methods to extract specific data from database objects can be
 different between databases, though some methods are common (see the
@@ -427,35 +498,30 @@ multiple Bio::Reference objects as an Array. And some classes have a
 Bio::Alignment class in bio/alignment.rb is a container class like Ruby's Hash,
 Array and BioPerl's Bio::SimpleAlign.  A very simple example is:
-  require 'bio'
-  seqs = [ 'atgca', 'aagca', 'acgca', 'acgcg' ]
-  seqs = seqs.collect{ |x| Bio::Sequence::NA.new(x) }
+  bioruby> seqs = [ 'atgca', 'aagca', 'acgca', 'acgcg' ]
+  bioruby> seqs = seqs.collect{ |x| Bio::Sequence::NA.new(x) }
   # creates alignment object
-  a = Bio::Alignment.new(seqs)
-  # shows consensus sequence
-  p a.consensus             # ==> "a?gc?"
+  bioruby> a = Bio::Alignment.new(seqs)
+  bioruby> a.consensus
+  ==> "a?gc?"
   # shows IUPAC consensus
-  p a.consensus_iupac       # ==> "ahgcr"
+  a.consensus_iupac
+  ==> "ahgcr"
   # iterates over each seq
   a.each { |x| p x }
-    # ==>
-    #    "atgca"
-    #    "aagca"
-    #    "acgca"
-    #    "acgcg"
+  # ==>
+  #    "atgca"
+  #    "aagca"
+  #    "acgca"
+  #    "acgcg"
   # iterates over each site
   a.each_site { |x| p x }
-    # ==>
-    #    ["a", "a", "a", "a"]
-    #    ["t", "a", "c", "c"]
-    #    ["g", "g", "g", "g"]
-    #    ["c", "c", "c", "c"]
-    #    ["a", "a", "a", "g"]
+  # ==>
+  #    ["a", "a", "a", "a"]
+  #    ["t", "a", "c", "c"]
+  #    ["g", "g", "g", "g"]
+  #    ["c", "c", "c", "c"]
+  #    ["a", "a", "a", "g"]
   # doing alignment by using CLUSTAL W.
   # clustalw command must be installed.
@@ -469,21 +535,22 @@ library of commonly used REs (from REBASE) which can be used to cut single
 stranded RNA or dubbel stranded DNA into fragments. To list all enzymes:
   rebase = Bio::RestrictionEnzyme.rebase
-	rebase.each do |enzyme_name, info|
-		p enzyme_name
+  rebase.each do |enzyme_name, info|
+    p enzyme_name
   end
 and cut a sequence with an enzyme follow up with:
-   res = seq.cut_with_enzyme('EcoRII', {:max_permutations => 0}, {:view_ranges => true})
+   res = seq.cut_with_enzyme('EcoRII', {:max_permutations => 0},
+     {:view_ranges => true})
    if res.kind_of? Symbol #error
       err = Err.find_by_code(res.to_s)
       unless err
         err = Err.new(:code => res.to_s)
       end
    end
-	 res.each do |frag|
-	    em = EnzymeMatch.new
+   res.each do |frag|
+      em = EnzymeMatch.new
       em.p_left = frag.p_left
       em.p_right = frag.p_right
@@ -493,7 +560,7 @@ and cut a sequence with an enzyme follow up with:
       em.err = nil
       em.enzyme = ar_enz
       em.sequence = ar_seq
-			p em
+      p em
     end
@@ -510,21 +577,21 @@ local machine.
 Install the fasta program on your machine (the command name looks like
 fasta34. FASTA can be downloaded from ftp://ftp.virginia.edu/pub/fasta/).
 First, you must prepare your FASTA-formatted database sequence file
-target.pep and FASTA-formatted query.pep.  (TRANSLATOR'S NOTE: I think
-we should provide sample data to readers.)
+target.pep and FASTA-formatted query.pep.
     #!/usr/bin/env ruby
     require 'bio'
-    # Creates FASTA factory object ("ssearch" instead of "fasta34" can also work)
+    # Creates FASTA factory object ("ssearch" instead of
+    # "fasta34" can also work)
     factory = Bio::Fasta.local('fasta34', ARGV.pop)
     (EDITOR's NOTE: not consistent pop command)
-    # Reads FASTA-formatted files (TRANSLATOR'S NOTE: something wrong in Japanese text)
     ff = Bio::FlatFile.new(Bio::FastaFormat, ARGF)
-    # Iterates over each entry. the variable "entry" is a Bio::FastaFormat object.
+    # Iterates over each entry. the variable "entry" is a
+    # Bio::FastaFormat object:
     ff.each do |entry|
       # shows definition line (begins with '>') to the standard error output
       $stderr.puts "Searching ... " + entry.definition
@@ -536,7 +603,8 @@ we should provide sample data to readers.)
       report.each do |hit|
         # If E-value is smaller than 0.0001
         if hit.evalue < 0.0001
-          # shows identifier of query and hit, E-value, start and end positions of homologous region (TRANSLATOR'S NOTE: should I change Japanese document?)
+          # shows identifier of query and hit, E-value, start and
+          # end positions of homologous region
           print "#{hit.query_id} : evalue #{hit.evalue}\t#{hit.target_id} at "
           p hit.lap_at
         end
@@ -550,7 +618,6 @@ We named above script as f_search.rb. You can execute as follows:
 In above script, the variable "factory" is a factory object for executing
 FASTA many times easily. Instead of using Fasta#query method,
 Bio::Sequence#fasta method can be used.
-(TRANSLATOR'S NOTE: Bio::Sequence#fasta are not so frequently used.)
     seq = ">test seq\nYQVLEEIGRGSFGSVRKVIHIPTKKLLVRKDIKYGHMNSKE"
     seq.fasta(factory)
@@ -566,7 +633,6 @@ Bio::Fasta#query returns Bio::Fasta::Report object.
 We can get almost all information described in FASTA report text
 with the Report object. For example, getting information for hits:
     report.each do |hit|
       puts hit.evalue           # E-value
       puts hit.sw               # Smith-Waterman score (*)
@@ -575,15 +641,19 @@ with the Report object. For example, getting information for hits:
       puts hit.query_id         # identifier of query sequence
       puts hit.query_def        # definition(comment line) of query sequence
       puts hit.query_len        # length of query sequence
-      puts hit.query_seq        # query sequence (TRANSLATOR'S NOTE: sequence of homologous region of query sequence)
+      puts hit.query_seq        # sequence of homologous region
       puts hit.target_id        # identifier of hit sequence
       puts hit.target_def       # definition(comment line) of hit sequence
       puts hit.target_len       # length of hit sequence
-      puts hit.target_seq       # hit sequence (TRANSLATOR'S NOTE: sequence of homologous region of hit sequence)
-      puts hit.query_start      # start position of homologous region in query sequence
-      puts hit.query_end        # end position of homologous region in query sequence
-      puts hit.target_start     # start posiotion of homologous region in hit(target) sequence
-      puts hit.target_end       # end position of homologous region in hit(target) sequence
+      puts hit.target_seq       # hit of homologous region of hit sequence
+      puts hit.query_start      # start position of homologous
+                                # region in query sequence
+      puts hit.query_end        # end position of homologous region
+                                # in query sequence
+      puts hit.target_start     # start posiotion of homologous region
+                                # in hit(target) sequence
+      puts hit.target_end       # end position of homologous region
+                                # in hit(target) sequence
       puts hit.lap_at           # array of above four numbers
     end
@@ -676,25 +746,25 @@ There are some additional BLAST methods, for example, bit_score and
 midline.
     report.each do |hit|
-      puts hit.bit_score        # bit score (*)
-      puts hit.query_seq        # query sequence (TRANSLATOR'S NOTE: sequence of homologous region of query sequence)
-      puts hit.midline          # middle line string of alignment of homologous region (*)
-      puts hit.target_seq       # hit sequence (TRANSLATOR'S NOTE: sequence of homologous region of query sequence)
-      puts hit.evalue           # E-value
-      puts hit.identity         # % identity
-      puts hit.overlap          # length of overlapping region
-      puts hit.query_id         # identifier of query sequence
-      puts hit.query_def        # definition(comment line) of query sequence
-      puts hit.query_len        # length of query sequence
-      puts hit.target_id        # identifier of hit sequence
-      puts hit.target_def       # definition(comment line) of hit sequence
-      puts hit.target_len       # length of hit sequence
-      puts hit.query_start      # start position of homologous region in query sequence
-      puts hit.query_end        # end position of homologous region in query sequence
-      puts hit.target_start     # start position of homologous region in hit(target) sequence
-      puts hit.target_end       # end position of homologous region in hit(target) sequence
-      puts hit.lap_at           # array of above four numbers
+      puts hit.bit_score
+      puts hit.query_seq
+      puts hit.midline
+      puts hit.target_seq
+      puts hit.evalue
+      puts hit.identity
+      puts hit.overlap
+      puts hit.query_id
+      puts hit.query_def
+      puts hit.query_len
+      puts hit.target_id
+      puts hit.target_def
+      puts hit.target_len
+      puts hit.query_start
+      puts hit.query_end
+      puts hit.target_start
+      puts hit.target_end
+      puts hit.lap_at
     end
 For simplicity and API compatibility, some information such as score
@@ -1131,39 +1201,66 @@ to be written...
 == The BioRuby example programs
-Some sample programs are stored in samples/ directry.
-Some programs are obsolete. Since samples are not enough,
-practical and interesting samples are welcome.
+Some sample programs are stored in ./samples/ directory. Run for example:
-to be written...
+  ./sample/na2aa.rb test/data/fasta/example1.txt
+== Unit testing and doctests
-(EDITOR's NOTE: I would like some examples automatically
-included - with output)
+BioRuby comes with an extensive testing framework with over 1300 tests and 2700
+assertions. To run the unit tests:
+  cd test
+  ruby runner.rb
+We have also started with doctest for Ruby. We are porting the examples
+in this tutorial to doctest - more info upcoming.
 == Further reading
-See the BioRuby in anger Wiki and the class documentation for more
-information on BioRuby.
+See the BioRuby in anger Wiki.  A lot of BioRuby's documentation exists in the
+source code and unit tests. To really dive in you will need the latest source
+code tree. The embedded rdoc documentation can be viewed online at
+((<URL:http://bioruby.org/rdoc/>)).
+== BioRuby Shell
+The BioRuby shell implementation you find in ./lib/bio/shell. It is very interesting
+as it uses IRB (the Ruby intepreter) which is a powerful environment described in
+((<Programming Ruby's irb chapter|URL:http://ruby-doc.org/docs/ProgrammingRuby/html/irb.html>)). IRB commands can directly be typed in the shell, e.g.
+  bioruby!> IRB.conf[:PROMPT_MODE]
+  ==!> :PROMPT_C
-The best book to get for understanding and getting productive with the
-Ruby language is 'Programming Ruby' by Dave Thomas and Andy
-Hunt. Strongly recommended!
+optionally you also may want to install the optional Ruby readline support -
+with Debian libreadline-ruby. To edit a previous line you may have to press
+line down (arrow down) first.
+= Helpful tools
+Apart from rdoc you may also want to use rtags - which allows jumping around
+source code by clicking on class and method names.
+  cd bioruby/lib
+  rtags -R --vi
+For a tutorial see ((<URL:http://rtags.rubyforge.org/>))
 = APPENDIX
 == KEGG API
-Please refer to KEGG_API.rd.ja (TRANSLATOR'S NOTE: English version: ((<URL:http://www.genome.jp/kegg/soap/doc/keggapi_manual.html>)) ) and
+Please refer to KEGG_API.rd.ja (English version: ((<URL:http://www.genome.jp/kegg/soap/doc/keggapi_manual.html>)) ) and
   * ((<URL:http://www.genome.jp/kegg/soap/>))
 == Comparing BioProjects
-For a quick functional comparison of BioRuby, BioPerl, BioPython and Bioconductor (R) see ((<http://sciruby.codeforpeople.com/sr.cgi/BioProjects>))
+For a quick functional comparison of BioRuby, BioPerl, BioPython and Bioconductor (R) see ((<URL:http://sciruby.codeforpeople.com/sr.cgi/BioProjects>))
 == Using BioRuby with R
-Using Ruby with R Pjotr wrote a section on SciRuby. See ((<ULR:http://sciruby.codeforpeople.com/sr.cgi/RubyWithRlang>))
+Using Ruby with R Pjotr wrote a section on SciRuby. See ((<URL:http://sciruby.codeforpeople.com/sr.cgi/RubyWithRlang>))
 == Using BioPerl or BioPython from Ruby
@@ -1180,5 +1277,20 @@ painful, as the gem standard for packages evolved late and some still
 force you to copy things by hand. Therefore read the README's
 carefully that come with each package.
-=end
+== Trouble shooting
+* Error: in `require': no such file to load -- bio (LoadError)
+Ruby fails to find the BioRuby libraries - add it to the RUBYLIB path, or pass
+it to the interpeter. For example:
+  ruby -I~/cvs/bioruby/lib yourprogram.rb
+== Modifying this page
+IMPORTANT NOTICE: This page is maintained in the BioRuby CVS
+repository. Please edit the file there otherwise changes may get
+lost. See ((<BioRuby Developer Information>)) for CVS and mailing list
+access.
+=end