RubyGems - reubypathdb - Versions diffs - 0.2.0 → 0.3.0 - Mend

reubypathdb 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

data/Gemfile +13 -0
data/Gemfile.lock +20 -0
data/README.rdoc +3 -1
data/Rakefile +28 -28
data/VERSION +1 -1
data/lib/eupathdb_gff.rb +2 -1
data/lib/eupathdb_species_data.rb +413 -0
data/lib/reubypathdb.rb +1 -0
data/reubypathdb.gemspec +21 -11
data/test/test_eupathdb_species_data.rb +113 -0
metadata +62 -13

data/Gemfile ADDED Viewed

@@ -0,0 +1,13 @@
+source "http://rubygems.org"
+# Add dependencies required to use your gem here.
+# Example:
+#   gem "activesupport", ">= 2.3.5"
+# Add dependencies to develop your gem here.
+# Include everything needed to run rake, tests, features, etc.
+group :development do
+  gem "shoulda", ">= 0"
+  gem "bundler", "~> 1.0.0"
+  gem "jeweler", "~> 1.6.4"
+  gem "rcov", ">= 0"
+end

data/Gemfile.lock ADDED Viewed

@@ -0,0 +1,20 @@
+GEM
+  remote: http://rubygems.org/
+  specs:
+    git (1.2.5)
+    jeweler (1.6.4)
+      bundler (~> 1.0)
+      git (>= 1.2.5)
+      rake
+    rake (0.9.2)
+    rcov (0.9.10)
+    shoulda (2.11.3)
+PLATFORMS
+  ruby
+DEPENDENCIES
+  bundler (~> 1.0.0)
+  jeweler (~> 1.6.4)
+  rcov
+  shoulda

data/README.rdoc CHANGED Viewed

@@ -1,6 +1,8 @@
 = eupathdb
-Description goes here.
+ALPHA software! Most likely the interface to the methods will change, and often.
+Reubypathdb is a collection of Ruby methods associated with EuPathDB(.org) databases. Reubypathdb focuses on using files downloaded from the downloads sections of different databases, e.g. the GFF file and the gene information file for each species.
 == Note on Patches/Pull Requests

data/Rakefile CHANGED Viewed

@@ -1,22 +1,29 @@
+# encoding: utf-8
 require 'rubygems'
+require 'bundler'
+begin
+  Bundler.setup(:default, :development)
+rescue Bundler::BundlerError => e
+  $stderr.puts e.message
+  $stderr.puts "Run `bundle install` to install missing gems"
+  exit e.status_code
+end
 require 'rake'
-begin
-  require 'jeweler'
-  Jeweler::Tasks.new do |gem|
-    gem.name = "reubypathdb"
-    gem.summary = %Q{Classes to help parsing EuPathDB data files}
-    gem.description = %Q{Classes to help parsing EuPathDB data files}
-    gem.email = "donttrustben near gmail.com"
-    gem.homepage = "http://github.com/wwood/reubypathdb"
-    gem.authors = ["Ben J Woodcroft"]
-    gem.add_development_dependency "thoughtbot-shoulda", ">= 0"
-    # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
-  end
-  Jeweler::GemcutterTasks.new
-rescue LoadError
-  puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
+require 'jeweler'
+Jeweler::Tasks.new do |gem|
+  # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
+  gem.name = "reubypathdb"
+  gem.homepage = "http://github.com/wwood/reubypathdb"
+  gem.license = "MIT"
+  gem.summary = %Q{Classes to help parsing EuPathDB data files}
+  gem.description = %Q{Classes to help parsing EuPathDB data files}
+  gem.email = "donttrustben near gmail.com"
+  gem.authors = ["Ben J Woodcroft"]
+  # dependencies defined in Gemfile
 end
+Jeweler::RubygemsDotOrgTasks.new
 require 'rake/testtask'
 Rake::TestTask.new(:test) do |test|
@@ -25,21 +32,14 @@ Rake::TestTask.new(:test) do |test|
   test.verbose = true
 end
-begin
-  require 'rcov/rcovtask'
-  Rcov::RcovTask.new do |test|
-    test.libs << 'test'
-    test.pattern = 'test/**/test_*.rb'
-    test.verbose = true
-  end
-rescue LoadError
-  task :rcov do
-    abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
-  end
+require 'rcov/rcovtask'
+Rcov::RcovTask.new do |test|
+  test.libs << 'test'
+  test.pattern = 'test/**/test_*.rb'
+  test.verbose = true
+  test.rcov_opts << '--exclude "gems/*"'
 end
-task :test => :check_dependencies
 task :default => :test
 require 'rake/rdoctask'

data/VERSION CHANGED Viewed

	@@ -1 +1 @@
1	- 0.2.0
1	+ 0.3.0

data/lib/eupathdb_gff.rb CHANGED Viewed

@@ -18,7 +18,8 @@ class EupathDBGFF < JgiGenesGff
       'rRNA',
       'tRNA',
       'snRNA',
-      'transcript'
+      'transcript',
+      'ncRNA',
     ]
   end

data/lib/eupathdb_species_data.rb ADDED Viewed

@@ -0,0 +1,413 @@
+# A class dedicated to recording 'administrative' data about the databases,
+# answering questions such as "which species are recorded in ToxoDB?" for instance.
+#
+# It is also meant for dealing with locally cached version of the files, where
+# all the data is stored in a base directory with a specified structure.
+#
+# TODO: functions for the info and the local caching should probably be separated
+# into separate classes, and the directory structure of the local versions shouldn't
+# be forced on the user.
+class EuPathDBSpeciesData
+  @@data = {
+    ## PlasmoDB
+    'Plasmodium falciparum' => {
+      :name => 'Plasmodium falciparum',
+      :source => 'PlasmoDB',
+      :fasta_file_species_name => 'Plasmodium_falciparum_3D7',
+      :sequencing_centre_abbreviation => 'psu',
+      :behind_usage_policy => true,
+    },
+    'Plasmodium yoelii' => {
+      :directory => 'yoelii',
+      :name => 'Plasmodium yoelii',
+      :sequencing_centre_abbreviation => 'TIGR',
+      :fasta_file_species_name => 'Plasmodium_yoelii_yoelii_str._17XNL',
+      :proteins_fasta_filename => lambda {|version| "PyoeliiAnnotatedProteins_PlasmoDB-#{version}.fasta"},
+      #:transcripts_fasta_filename => lambda {|version| "PyoeliiAllTranscripts_PlasmoDB-#{version}.fasta"},
+      :source => 'PlasmoDB'
+    },
+    'Plasmodium vivax' => {
+      :name => 'Plasmodium vivax',
+      :sequencing_centre_abbreviation => 'gb',
+      :fasta_file_species_name => 'Plasmodium_vivax_SaI-1',
+      :proteins_fasta_filename => lambda {|version| "PvivaxAnnotatedProteins_PlasmoDB-#{version}.fasta"},
+      :source => 'PlasmoDB'
+    },
+    'Plasmodium berghei' => {
+      :name => 'Plasmodium berghei',
+      :sequencing_centre_abbreviation => 'psu',
+      :fasta_file_species_name => 'Plasmodium_berghei_str._ANKA',
+      :proteins_fasta_filename => lambda {|version| "PbergheiAnnotatedProteins_PlasmoDB-#{version}.fasta"},
+      #:transcripts_fasta_filename => lambda {|version| "PbergheiAllTranscripts_PlasmoDB-#{version}.fasta"},
+      :source => 'PlasmoDB'
+    },
+    'Plasmodium chabaudi' => {
+      :name => 'Plasmodium chabaudi',
+      :sequencing_centre_abbreviation => 'psu',
+      :fasta_file_species_name => 'Plasmodium_chabaudi_chabaudi',
+      :proteins_fasta_filename => lambda {|version| "PchabaudiAnnotatedProteins_PlasmoDB-#{version}.fasta"},
+      :source => 'PlasmoDB',
+      :behind_usage_policy => true,
+    },
+    'Plasmodium knowlesi' => {
+      :name => 'Plasmodium knowlesi',
+      :sequencing_centre_abbreviation => 'psu',
+      :fasta_file_species_name => 'Plasmodium_knowlesi_strain_H',
+      :source => 'PlasmoDB',
+      :behind_usage_policy => true,
+    },
+    ## ToxoDB
+    'Neospora caninum' => {
+      :name => 'Neospora caninum',
+      :sequencing_centre_abbreviation => 'psu',
+      :fasta_file_species_name => 'Neospora_caninum',
+      :database_download_folder => 'NeosporaCaninum',
+      :representative_strain_name => 'NeosporaCaninum',
+      :proteins_fasta_filename => lambda {|version| "NeosporaCaninumAnnotatedProteins_ToxoDB-#{version}.fasta"},
+      :transcripts_fasta_filename => lambda {|version| "NeosporaCaninumAnnotatedTranscripts_ToxoDB-#{version}.fasta"},
+      :source => 'ToxoDB',
+      :behind_usage_policy => true,
+    },
+    'Eimeria tenella' => {
+      :name => 'Eimeria tenella',
+      :sequencing_centre_abbreviation => 'GeneDB',
+      :fasta_file_species_name => 'EtenellaHoughton',
+      :source => 'ToxoDB',
+      :database_download_folder => 'EtenellaHoughton',
+      :behind_usage_policy => true,
+    },
+    'Toxoplasma gondii' => {
+      :name => 'Toxoplasma gondii',
+      :sequencing_centre_abbreviation => 'gb',
+      :fasta_file_species_name => 'Toxoplasma_gondii_ME49',
+      :database_download_folder => 'TgondiiME49',
+      :gene_information_filename => lambda {|version| "TgondiiME49Gene_ToxoDB-#{version}.txt"},
+      :proteins_fasta_filename => lambda {|version| "TgondiiME49AnnotatedProteins_ToxoDB-#{version}.fasta"},
+      :transcripts_fasta_filename => lambda {|version| "TgondiiME49AnnotatedTranscripts_ToxoDB-#{version}.fasta"},
+      :gff_filename => lambda {|version| "TgondiiME49_ToxoDB-#{version}.gff"},
+      :genomic_fasta_filename => lambda {|version| "TgondiiME49Genomic_ToxoDB-#{version}.fasta"},
+      :source => 'ToxoDB'
+    },
+    ## CryptoDB
+    'Cryptosporidium parvum' => {
+      :name => 'Cryptosporidium parvum',
+      :sequencing_centre_abbreviation => 'gb',
+      :fasta_file_species_name => 'Cryptosporidium_parvum',
+      :proteins_fasta_filename => lambda {|version| "CparvumAnnotatedProteins_CryptoDB-#{version}.fasta"},
+      :transcripts_fasta_filename => lambda {|version| "CparvumAnnotatedTranscripts_CryptoDB-#{version}.fasta"},
+      #:gff_filename => lambda {|version| "c_parvum_iowa_ii.gff"}, #changed as of version 4.3
+      :source => 'CryptoDB'
+    },
+    'Cryptosporidium hominis' => {
+      :name => 'Cryptosporidium hominis',
+      :sequencing_centre_abbreviation => 'gb',
+      :fasta_file_species_name => 'Cryptosporidium_hominis',
+      :proteins_fasta_filename => lambda {|version| "ChominisAnnotatedProteins_CryptoDB-#{version}.fasta"},
+      :transcripts_fasta_filename => lambda {|version| "ChominisAnnotatedTranscripts_CryptoDB-#{version}.fasta"},
+      #:gff_filename => lambda {|version| "c_hominis_tu502.gff"}, #changed as of version 4.3
+      :source => 'CryptoDB'
+    },
+    'Cryptosporidium muris' => {
+      :name => 'Cryptosporidium muris',
+      :sequencing_centre_abbreviation => 'gb',
+      :fasta_file_species_name => 'Cryptosporidium_muris',
+      :proteins_fasta_filename => lambda {|version| "CmurisAnnotatedProteins_CryptoDB-#{version}.fasta"},
+      :transcripts_fasta_filename => lambda {|version| "CmurisAnnotatedTranscripts_CryptoDB-#{version}.fasta"},
+      #:gff_filename => lambda {|version| "c_muris.gff"}, #changed as of version 4.3
+      :source => 'CryptoDB'
+    },
+    ## PiroplasmaDB
+    'Theileria annulata' => {
+      :name => 'Theileria annulata',
+      :database_download_folder => 'TannulataAnkara',
+      :sequencing_centre_abbreviation => 'Genbank',
+      :fasta_file_species_name => 'Theileria_annulata_strain_Ankara',
+      :source => 'PiroplasmaDB',
+    },
+    'Theileria parva' => {
+      :name => 'Theileria parva',
+      :database_download_folder => 'TparvaMuguga',
+      :sequencing_centre_abbreviation => 'Genbank',
+      :fasta_file_species_name => 'Theileria_parva_strain_Muguga',
+      :source => 'PiroplasmaDB',
+    },
+    'Babesia bovis' => {
+      :name => 'Babesia bovis',
+      :database_download_folder => 'BbovisT2Bo',
+      :representative_strain_name => 'BbovisT2Bo',
+      :sequencing_centre_abbreviation => 'Genbank',
+      :fasta_file_species_name => 'Babesia_bovis_T2Bo',
+      :source => 'PiroplasmaDB',
+    },
+    ## FungiDB
+    'Candida albicans' => {
+      :name => 'Candida albicans',
+      :database_download_folder => 'Candida_albicans_SC5314',
+      :sequencing_centre_abbreviation => 'CGD',
+      :fasta_file_species_name => 'Candida_albicans_SC5314',
+      :source => 'FungiDB',
+    },
+    ## TriTrypDB
+    'Trypanosoma brucei' => {
+      :name => 'Trypanosoma brucei',
+      :sequencing_centre_abbreviation => 'GeneDB',
+      :source => 'TriTrypDB',
+      :representative_strain_name => 'TbruceiTreu927',
+      :fasta_file_species_name => 'Trypanosoma_brucei_TREU927',
+    },
+  }
+  # Duplicate so both the species name and genus-species name work
+  @@data.keys.each do |key|
+    # name is full name of the species by default
+    @@data[key][:name] ||= key
+    # the species name without genus can also be used
+    splits = key.split(' ')
+    raise unless splits.length == 2
+    raise if @@data[splits[1]]
+    @@data[splits[1]] = @@data[key]
+  end
+  SOURCE_VERSIONS = {
+    'PlasmoDB' => '7.2',#
+    'ToxoDB' => '6.4',#'7.0',#
+    'CryptoDB' => '4.4',#'4.5',#
+    'PiroplasmaDB' => '1.0',#'1.1',#
+    'FungiDB' => '1.0',
+    'TriTrypDB' => '3.2',
+  }
+  DATABASES = SOURCE_VERSIONS.keys
+  # Create a new object about one particular species. The species can be specified
+  # by a nickname, which is either the full binomal name of the specie e.g.
+  # "Plasmodium falciparum", or by simply the second part (the species name without
+  # the genus name) e.g. 'falciparum'.
+  #
+  # base_data_directory is the directory where locally cached version of the downloaded
+  # files are stored.
+  def initialize(nickname, base_data_directory=nil)
+    @species_data = @@data[nickname] # try the full name
+    @species_data ||= @@data[nickname.capitalize.gsub('_',' ')] #try replacing underscores
+    if @species_data.nil? # try using just the second word
+      splits = nickname.split(' ')
+      if splits.length == 2
+        @species_data = @@data[splits[1]]
+      end
+    end
+    @base_data_directory = base_data_directory
+    raise Exception, "Couldn't find species data for #{nickname}" unless @species_data
+  end
+  def method_missing(symbol)
+    answer = @species_data[symbol]
+    return answer unless answer.nil?
+    super
+  end
+  # The path to the EuPathDB gene information table (stored as a gzip)
+  def gene_information_gzfile_path
+    "#{local_download_directory}/#{gene_information_gzfile_filename}"
+  end
+  # The path to the EuPathDB gene information table (stored as a gzip)
+  def gene_information_gzfile_filename
+    "#{gene_information_filename}.gz"
+  end
+  def gene_information_path
+    "#{local_download_directory}/#{gene_information_filename}"
+  end
+  def representative_strain_name
+    return @species_data[:representative_strain_name] unless @species_data[:representative_strain_name].nil?
+    return one_word_name
+  end
+  def gene_information_filename
+    f = @species_data[:gene_information_filename]
+    if f
+      "#{f.call(version)}"
+    else      # TgondiiME49Gene_ToxoDB-5.2.txt.gz
+      # PfalciparumGene_PlasmoDB-6.1.txt.gz
+      "#{representative_strain_name}Gene_#{database}-#{version}.txt"
+    end
+  end
+  def version
+    SOURCE_VERSIONS[@species_data[:source]]
+  end
+  def protein_fasta_filename
+    if @species_data[:proteins_fasta_filename]
+      return "#{@species_data[:proteins_fasta_filename].call(version)}"
+    else
+      return "#{representative_strain_name}AnnotatedProteins_#{database}-#{version}.fasta"
+    end
+  end
+  def protein_fasta_path
+    return File.join(local_download_directory,protein_fasta_filename)
+  end
+  def protein_blast_database_path
+    "/blastdb/#{protein_fasta_filename}"
+  end
+  def transcript_fasta_filename
+    if @species_data[:transcripts_fasta_filename]
+      return "#{@species_data[:transcripts_fasta_filename].call(version)}"
+    else
+      return "#{representative_strain_name}AnnotatedTranscripts_#{database}-#{version}.fasta"
+    end
+  end
+  def transcript_fasta_path
+    File.join(local_download_directory,transcript_fasta_filename)
+  end
+  def genomic_fasta_filename
+    genomic = @species_data[:genomic_fasta_filename]
+    if genomic
+      return "#{genomic.call(version)}"
+    else
+      return "#{representative_strain_name}Genomic_#{database}-#{version}.fasta"
+    end
+  end
+  def gff_filename
+    if @species_data[:gff_filename]
+      return @species_data[:gff_filename].call(version)
+    else
+      return "#{representative_strain_name}_#{database}-#{version}.gff"
+    end
+  end
+  def gff_path
+    File.join(local_download_directory,gff_filename)
+  end
+  def database
+    @species_data[:source]
+  end
+  def eu_path_db_download_directory
+    directories = {}
+    SOURCE_VERSIONS.each do |db, version|
+      # 'PlasmoDB' => "http://plasmodb.org/common/downloads/release-#{SOURCE_VERSIONS['PlasmoDB']}",
+      directories[db] = "http://#{db.downcase}.org/common/downloads/release-#{version}"
+    end
+    raise Exception, "Base URL for database '#{database}' not known" if directories[database].nil?
+    return "#{directories[database]}/#{one_word_name}"
+  end
+  def eu_path_db_fasta_download_directory
+    path = "#{eu_path_db_download_directory}/fasta"
+    path = "#{path}/data" if @species_data[:behind_usage_policy]
+    path
+  end
+  def eu_path_db_gff_download_directory
+    path = "#{eu_path_db_download_directory}/gff"
+    path = "#{path}/data" if @species_data[:behind_usage_policy]
+    path
+  end
+  def eu_path_db_txt_download_directory
+    path = "#{eu_path_db_download_directory}/txt"
+    path = "#{path}/data" if @species_data[:behind_usage_policy]
+    path
+  end
+  # Plasmodium chabaudi => Pchabaudi
+  def one_word_name
+    return @species_data[:database_download_folder] unless @species_data[:database_download_folder].nil?
+    splits = @species_data[:name].split(' ')
+    raise unless splits.length == 2
+    return "#{splits[0][0..0]}#{splits[1]}"
+  end
+  def local_download_directory
+    s = @species_data
+    "#{@base_data_directory}/#{s[:name]}/genome/#{s[:source]}/#{SOURCE_VERSIONS[s[:source]]}"
+  end
+  # an array of directory names. mkdir is called on each of them in order,
+  # otherwise mkdir throws errors because there isn't sufficient folders
+  # to build on.
+  def directories_for_mkdir
+    if @base_data_directory.nil?
+      raise Exception, "Unable to generate directories when @base_data_directory is not set"
+    end
+    s = @species_data
+    components = [
+      @base_data_directory,
+    s[:name],
+      'genome',
+    s[:source],
+    SOURCE_VERSIONS[s[:source]]
+    ]
+     (0..components.length-1).collect do |i|
+      components[0..i].join('/')
+    end
+  end
+  # Return a list of the species names that are included in the EuPathDB database
+  def self.species_data_from_database(database_name, base_download_directory=nil)
+    species = @@data.select {|name, info|
+    info[:source].downcase == database_name.downcase
+    }
+    species.collect do |name_info|
+      EuPathDBSpeciesData.new(name_info[0], base_download_directory)
+    end
+  end
+  # Download all the data files from all the EuPathDB databases, or just one single database.
+  # Requires wget to be available on the command line
+  def self.download(base_download_directory, database_name=nil)
+    # by default, download everything
+    if database_name.nil?
+      EuPathDBSpeciesData::DATABASES.each do |d|
+        download base_download_directory, d
+      end
+    else
+      # Download the new files from the relevant database
+      EuPathDBSpeciesData.species_data_from_database(database_name, base_download_directory).each do |spd|
+        spd.directories_for_mkdir.each do |directory|
+          unless File.exists?(directory)
+            Dir.mkdir(directory)
+          end
+        end
+        Dir.chdir(spd.local_download_directory) do
+          p spd.eu_path_db_fasta_download_directory
+          # protein
+          unless File.exists?(spd.protein_fasta_filename)
+            `wget #{spd.eu_path_db_fasta_download_directory}/#{spd.protein_fasta_filename}`
+          end
+          # gff
+          unless File.exists?(spd.gff_filename)
+            `wget #{spd.eu_path_db_gff_download_directory}/#{spd.gff_filename}`
+          end
+          # transcripts
+          unless File.exists?(spd.transcript_fasta_filename)
+            `wget #{spd.eu_path_db_fasta_download_directory}/#{spd.transcript_fasta_filename}`
+          end
+          # gene information table
+          unless File.exists?(spd.gene_information_filename)
+            `wget '#{spd.eu_path_db_txt_download_directory}/#{spd.gene_information_filename}'`
+          end
+          # genomic
+          unless File.exists?(spd.genomic_fasta_filename)
+            `wget '#{spd.eu_path_db_fasta_download_directory}/#{spd.genomic_fasta_filename}'`
+          end
+        end
+      end
+    end
+  end
+end

data/lib/reubypathdb.rb CHANGED Viewed

@@ -1,2 +1,3 @@
 require 'eupathdb_gene_information_table'
 require 'eupathdb_gff'
+require 'eupathdb_species_data'

data/reubypathdb.gemspec CHANGED Viewed

@@ -5,11 +5,11 @@
 Gem::Specification.new do |s|
   s.name = %q{reubypathdb}
-  s.version = "0.2.0"
+  s.version = "0.3.0"
   s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
   s.authors = ["Ben J Woodcroft"]
-  s.date = %q{2011-04-19}
+  s.date = %q{2011-08-26}
   s.description = %q{Classes to help parsing EuPathDB data files}
   s.email = %q{donttrustben near gmail.com}
   s.extra_rdoc_files = [
@@ -18,38 +18,48 @@ Gem::Specification.new do |s|
   ]
   s.files = [
     ".document",
+    "Gemfile",
+    "Gemfile.lock",
     "LICENSE",
     "README.rdoc",
     "Rakefile",
     "VERSION",
     "lib/eupathdb_gene_information_table.rb",
     "lib/eupathdb_gff.rb",
+    "lib/eupathdb_species_data.rb",
     "lib/jgi_genes.rb",
     "lib/reubypathdb.rb",
     "reubypathdb.gemspec",
     "test/data/eupathGeneInformation.txt",
     "test/helper.rb",
-    "test/test_eupathdb_gene_information_table.rb"
+    "test/test_eupathdb_gene_information_table.rb",
+    "test/test_eupathdb_species_data.rb"
   ]
   s.homepage = %q{http://github.com/wwood/reubypathdb}
+  s.licenses = ["MIT"]
   s.require_paths = ["lib"]
-  s.rubygems_version = %q{1.6.2}
+  s.rubygems_version = %q{1.6.1}
   s.summary = %q{Classes to help parsing EuPathDB data files}
-  s.test_files = [
-    "test/helper.rb",
-    "test/test_eupathdb_gene_information_table.rb"
-  ]
   if s.respond_to? :specification_version then
     s.specification_version = 3
     if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
-      s.add_development_dependency(%q<thoughtbot-shoulda>, [">= 0"])
+      s.add_development_dependency(%q<shoulda>, [">= 0"])
+      s.add_development_dependency(%q<bundler>, ["~> 1.0.0"])
+      s.add_development_dependency(%q<jeweler>, ["~> 1.6.4"])
+      s.add_development_dependency(%q<rcov>, [">= 0"])
     else
-      s.add_dependency(%q<thoughtbot-shoulda>, [">= 0"])
+      s.add_dependency(%q<shoulda>, [">= 0"])
+      s.add_dependency(%q<bundler>, ["~> 1.0.0"])
+      s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
+      s.add_dependency(%q<rcov>, [">= 0"])
     end
   else
-    s.add_dependency(%q<thoughtbot-shoulda>, [">= 0"])
+    s.add_dependency(%q<shoulda>, [">= 0"])
+    s.add_dependency(%q<bundler>, ["~> 1.0.0"])
+    s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
+    s.add_dependency(%q<rcov>, [">= 0"])
   end
 end

data/test/test_eupathdb_species_data.rb ADDED Viewed

@@ -0,0 +1,113 @@
+require 'helper'
+require 'eupathdb_species_data'
+class EuPathDBSpeciesDataTest < Test::Unit::TestCase
+  def base_dir
+    '/home/ben/phd/data'
+  end
+  def test_method_missing
+    spd = EuPathDBSpeciesData.new('Plasmodium yoelii')
+    assert_equal 'yoelii', spd.directory
+  end
+  def test_nickname
+    spd = EuPathDBSpeciesData.new('Plasmodium yoelii').fasta_file_species_name
+    assert_equal spd, EuPathDBSpeciesData.new('yoelii').fasta_file_species_name
+    assert_equal spd, EuPathDBSpeciesData.new('P. yoelii').fasta_file_species_name #check for not exactly the last name but close enough
+  end
+  def test_protein_data_path
+    spd = EuPathDBSpeciesData.new('Plasmodium yoelii', base_dir)
+    assert_equal "/home/ben/phd/data/Plasmodium yoelii/genome/PlasmoDB/#{EuPathDBSpeciesData::SOURCE_VERSIONS['PlasmoDB']}/PyoeliiAnnotatedProteins_PlasmoDB-#{EuPathDBSpeciesData::SOURCE_VERSIONS['PlasmoDB']}.fasta",
+    spd.protein_fasta_path
+  end
+  def test_one_word_name
+    spd = EuPathDBSpeciesData.new('Plasmodium chabaudi')
+    assert_equal 'Pchabaudi', spd.one_word_name
+  end
+  def test_download_directory
+    spd = EuPathDBSpeciesData.new('Plasmodium chabaudi')
+    assert_equal "http://plasmodb.org/common/downloads/release-#{EuPathDBSpeciesData::SOURCE_VERSIONS['PlasmoDB']}/Pchabaudi", spd.eu_path_db_download_directory
+  end
+  def test_transcript_path_default
+    spd = EuPathDBSpeciesData.new('Plasmodium chabaudi', base_dir)
+    assert_equal "/home/ben/phd/data/Plasmodium chabaudi/genome/PlasmoDB/#{EuPathDBSpeciesData::SOURCE_VERSIONS['PlasmoDB']}/PchabaudiAnnotatedTranscripts_PlasmoDB-#{EuPathDBSpeciesData::SOURCE_VERSIONS['PlasmoDB']}.fasta",
+    spd.transcript_fasta_path
+  end
+  def test_transcript_fasta_filename
+    spd = EuPathDBSpeciesData.new('falciparum')
+    assert_equal "Pfalciparum_PlasmoDB-#{EuPathDBSpeciesData::SOURCE_VERSIONS['PlasmoDB']}.gff",
+    spd.gff_filename
+  end
+  def test_gzfile_path_toxo
+    spd = EuPathDBSpeciesData.new('gondii', base_dir)
+    assert_equal "/home/ben/phd/data/Toxoplasma gondii/genome/ToxoDB/#{EuPathDBSpeciesData::SOURCE_VERSIONS['ToxoDB']}/TgondiiME49Gene_ToxoDB-#{EuPathDBSpeciesData::SOURCE_VERSIONS['ToxoDB']}.txt.gz",
+    spd.gene_information_gzfile_path
+  end
+  def test_gzfile_path_default
+    spd = EuPathDBSpeciesData.new('falciparum', base_dir)
+    assert_equal "/home/ben/phd/data/Plasmodium falciparum/genome/PlasmoDB/#{EuPathDBSpeciesData::SOURCE_VERSIONS['PlasmoDB']}/PfalciparumGene_PlasmoDB-#{EuPathDBSpeciesData::SOURCE_VERSIONS['PlasmoDB']}.txt.gz",
+    spd.gene_information_gzfile_path
+  end
+  def test_gzfile_filename_default
+    spd = EuPathDBSpeciesData.new('falciparum')
+    assert_equal "PfalciparumGene_PlasmoDB-#{EuPathDBSpeciesData::SOURCE_VERSIONS['PlasmoDB']}.txt.gz",
+    spd.gene_information_gzfile_filename
+  end
+  def test_directories_for_mkdir
+    spd = EuPathDBSpeciesData.new('gondii', base_dir)
+    assert_equal [
+      '/home/ben/phd/data',
+      '/home/ben/phd/data/Toxoplasma gondii',
+      '/home/ben/phd/data/Toxoplasma gondii/genome',
+      '/home/ben/phd/data/Toxoplasma gondii/genome/ToxoDB',
+      "/home/ben/phd/data/Toxoplasma gondii/genome/ToxoDB/#{EuPathDBSpeciesData::SOURCE_VERSIONS['ToxoDB']}"
+    ],
+    spd.directories_for_mkdir
+  end
+  def test_one_word_name
+    assert_equal 'NeosporaCaninum', EuPathDBSpeciesData.new('Neospora caninum').one_word_name
+    spd = EuPathDBSpeciesData.new('Plasmodium falciparum')
+    assert_equal 'Pfalciparum', spd.one_word_name
+  end
+  def test_genomic_filename
+    spd = EuPathDBSpeciesData.new('falciparum')
+    assert_equal "PfalciparumGenomic_PlasmoDB-#{EuPathDBSpeciesData::SOURCE_VERSIONS['PlasmoDB']}.fasta",
+    spd.genomic_fasta_filename
+  end
+  def test_transcripts_name_without_block
+    spd = EuPathDBSpeciesData.new('Babesia bovis')
+    assert_equal "BbovisT2BoAnnotatedTranscripts_PiroplasmaDB-#{EuPathDBSpeciesData::SOURCE_VERSIONS['PiroplasmaDB']}.fasta",
+    spd.transcript_fasta_filename
+  end
+  def test_behind_usage_policy
+    spd = EuPathDBSpeciesData.new('Plasmodium chabaudi')
+    assert_equal "http://plasmodb.org/common/downloads/release-#{EuPathDBSpeciesData::SOURCE_VERSIONS['PlasmoDB']}/Pchabaudi/fasta/data",
+    spd.eu_path_db_fasta_download_directory
+ end
+ def test_behind_usage_policy
+    spd = EuPathDBSpeciesData.new('Plasmodium vivax')
+    assert_equal "http://plasmodb.org/common/downloads/release-#{EuPathDBSpeciesData::SOURCE_VERSIONS['PlasmoDB']}/Pvivax/fasta",
+    spd.eu_path_db_fasta_download_directory
+ end
+ def test_representative_strain_name
+   spd = EuPathDBSpeciesData.new('Trypanosoma brucei')
+   assert_equal "http://tritrypdb.org/common/downloads/release-#{EuPathDBSpeciesData::SOURCE_VERSIONS['TriTrypDB']}/Tbrucei/fasta/TbruceiTreu927Genomic_TriTrypDB-#{EuPathDBSpeciesData::SOURCE_VERSIONS['TriTrypDB']}.fasta",
+    "#{spd.eu_path_db_fasta_download_directory}/#{spd.genomic_fasta_filename}"
+ end
+end

metadata CHANGED Viewed

@@ -1,13 +1,13 @@
 --- !ruby/object:Gem::Specification
 name: reubypathdb
 version: !ruby/object:Gem::Version
-  hash: 23
+  hash: 19
   prerelease:
   segments:
   - 0
-  - 2
+  - 3
   - 0
-  version: 0.2.0
+  version: 0.3.0
 platform: ruby
 authors:
 - Ben J Woodcroft
@@ -15,12 +15,11 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2011-04-19 00:00:00 +10:00
+date: 2011-08-26 00:00:00 +10:00
 default_executable:
 dependencies:
 - !ruby/object:Gem::Dependency
-  name: thoughtbot-shoulda
-  prerelease: false
+  type: :development
   requirement: &id001 !ruby/object:Gem::Requirement
     none: false
     requirements:
@@ -30,8 +29,55 @@ dependencies:
         segments:
         - 0
         version: "0"
-  type: :development
   version_requirements: *id001
+  name: shoulda
+  prerelease: false
+- !ruby/object:Gem::Dependency
+  type: :development
+  requirement: &id002 !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ~>
+      - !ruby/object:Gem::Version
+        hash: 23
+        segments:
+        - 1
+        - 0
+        - 0
+        version: 1.0.0
+  version_requirements: *id002
+  name: bundler
+  prerelease: false
+- !ruby/object:Gem::Dependency
+  type: :development
+  requirement: &id003 !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ~>
+      - !ruby/object:Gem::Version
+        hash: 7
+        segments:
+        - 1
+        - 6
+        - 4
+        version: 1.6.4
+  version_requirements: *id003
+  name: jeweler
+  prerelease: false
+- !ruby/object:Gem::Dependency
+  type: :development
+  requirement: &id004 !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        hash: 3
+        segments:
+        - 0
+        version: "0"
+  version_requirements: *id004
+  name: rcov
+  prerelease: false
 description: Classes to help parsing EuPathDB data files
 email: donttrustben near gmail.com
 executables: []
@@ -43,22 +89,26 @@ extra_rdoc_files:
 - README.rdoc
 files:
 - .document
+- Gemfile
+- Gemfile.lock
 - LICENSE
 - README.rdoc
 - Rakefile
 - VERSION
 - lib/eupathdb_gene_information_table.rb
 - lib/eupathdb_gff.rb
+- lib/eupathdb_species_data.rb
 - lib/jgi_genes.rb
 - lib/reubypathdb.rb
 - reubypathdb.gemspec
 - test/data/eupathGeneInformation.txt
 - test/helper.rb
 - test/test_eupathdb_gene_information_table.rb
+- test/test_eupathdb_species_data.rb
 has_rdoc: true
 homepage: http://github.com/wwood/reubypathdb
-licenses: []
+licenses:
+- MIT
 post_install_message:
 rdoc_options: []
@@ -85,10 +135,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
 requirements: []
 rubyforge_project:
-rubygems_version: 1.6.2
+rubygems_version: 1.6.1
 signing_key:
 specification_version: 3
 summary: Classes to help parsing EuPathDB data files
-test_files:
-- test/helper.rb
-- test/test_eupathdb_gene_information_table.rb
+test_files: []