reubypathdb 0.2.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile ADDED
@@ -0,0 +1,13 @@
1
+ source "http://rubygems.org"
2
+ # Add dependencies required to use your gem here.
3
+ # Example:
4
+ # gem "activesupport", ">= 2.3.5"
5
+
6
+ # Add dependencies to develop your gem here.
7
+ # Include everything needed to run rake, tests, features, etc.
8
+ group :development do
9
+ gem "shoulda", ">= 0"
10
+ gem "bundler", "~> 1.0.0"
11
+ gem "jeweler", "~> 1.6.4"
12
+ gem "rcov", ">= 0"
13
+ end
data/Gemfile.lock ADDED
@@ -0,0 +1,20 @@
1
+ GEM
2
+ remote: http://rubygems.org/
3
+ specs:
4
+ git (1.2.5)
5
+ jeweler (1.6.4)
6
+ bundler (~> 1.0)
7
+ git (>= 1.2.5)
8
+ rake
9
+ rake (0.9.2)
10
+ rcov (0.9.10)
11
+ shoulda (2.11.3)
12
+
13
+ PLATFORMS
14
+ ruby
15
+
16
+ DEPENDENCIES
17
+ bundler (~> 1.0.0)
18
+ jeweler (~> 1.6.4)
19
+ rcov
20
+ shoulda
data/README.rdoc CHANGED
@@ -1,6 +1,8 @@
1
1
  = eupathdb
2
2
 
3
- Description goes here.
3
+ ALPHA software! Most likely the interface to the methods will change, and often.
4
+
5
+ Reubypathdb is a collection of Ruby methods associated with EuPathDB(.org) databases. Reubypathdb focuses on using files downloaded from the downloads sections of different databases, e.g. the GFF file and the gene information file for each species.
4
6
 
5
7
  == Note on Patches/Pull Requests
6
8
 
data/Rakefile CHANGED
@@ -1,22 +1,29 @@
1
+ # encoding: utf-8
2
+
1
3
  require 'rubygems'
4
+ require 'bundler'
5
+ begin
6
+ Bundler.setup(:default, :development)
7
+ rescue Bundler::BundlerError => e
8
+ $stderr.puts e.message
9
+ $stderr.puts "Run `bundle install` to install missing gems"
10
+ exit e.status_code
11
+ end
2
12
  require 'rake'
3
13
 
4
- begin
5
- require 'jeweler'
6
- Jeweler::Tasks.new do |gem|
7
- gem.name = "reubypathdb"
8
- gem.summary = %Q{Classes to help parsing EuPathDB data files}
9
- gem.description = %Q{Classes to help parsing EuPathDB data files}
10
- gem.email = "donttrustben near gmail.com"
11
- gem.homepage = "http://github.com/wwood/reubypathdb"
12
- gem.authors = ["Ben J Woodcroft"]
13
- gem.add_development_dependency "thoughtbot-shoulda", ">= 0"
14
- # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
15
- end
16
- Jeweler::GemcutterTasks.new
17
- rescue LoadError
18
- puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
14
+ require 'jeweler'
15
+ Jeweler::Tasks.new do |gem|
16
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
17
+ gem.name = "reubypathdb"
18
+ gem.homepage = "http://github.com/wwood/reubypathdb"
19
+ gem.license = "MIT"
20
+ gem.summary = %Q{Classes to help parsing EuPathDB data files}
21
+ gem.description = %Q{Classes to help parsing EuPathDB data files}
22
+ gem.email = "donttrustben near gmail.com"
23
+ gem.authors = ["Ben J Woodcroft"]
24
+ # dependencies defined in Gemfile
19
25
  end
26
+ Jeweler::RubygemsDotOrgTasks.new
20
27
 
21
28
  require 'rake/testtask'
22
29
  Rake::TestTask.new(:test) do |test|
@@ -25,21 +32,14 @@ Rake::TestTask.new(:test) do |test|
25
32
  test.verbose = true
26
33
  end
27
34
 
28
- begin
29
- require 'rcov/rcovtask'
30
- Rcov::RcovTask.new do |test|
31
- test.libs << 'test'
32
- test.pattern = 'test/**/test_*.rb'
33
- test.verbose = true
34
- end
35
- rescue LoadError
36
- task :rcov do
37
- abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
38
- end
35
+ require 'rcov/rcovtask'
36
+ Rcov::RcovTask.new do |test|
37
+ test.libs << 'test'
38
+ test.pattern = 'test/**/test_*.rb'
39
+ test.verbose = true
40
+ test.rcov_opts << '--exclude "gems/*"'
39
41
  end
40
42
 
41
- task :test => :check_dependencies
42
-
43
43
  task :default => :test
44
44
 
45
45
  require 'rake/rdoctask'
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.2.0
1
+ 0.3.0
data/lib/eupathdb_gff.rb CHANGED
@@ -18,7 +18,8 @@ class EupathDBGFF < JgiGenesGff
18
18
  'rRNA',
19
19
  'tRNA',
20
20
  'snRNA',
21
- 'transcript'
21
+ 'transcript',
22
+ 'ncRNA',
22
23
  ]
23
24
  end
24
25
 
@@ -0,0 +1,413 @@
1
+
2
+ # A class dedicated to recording 'administrative' data about the databases,
3
+ # answering questions such as "which species are recorded in ToxoDB?" for instance.
4
+ #
5
+ # It is also meant for dealing with locally cached version of the files, where
6
+ # all the data is stored in a base directory with a specified structure.
7
+ #
8
+ # TODO: functions for the info and the local caching should probably be separated
9
+ # into separate classes, and the directory structure of the local versions shouldn't
10
+ # be forced on the user.
11
+ class EuPathDBSpeciesData
12
+ @@data = {
13
+ ## PlasmoDB
14
+ 'Plasmodium falciparum' => {
15
+ :name => 'Plasmodium falciparum',
16
+ :source => 'PlasmoDB',
17
+ :fasta_file_species_name => 'Plasmodium_falciparum_3D7',
18
+ :sequencing_centre_abbreviation => 'psu',
19
+ :behind_usage_policy => true,
20
+ },
21
+ 'Plasmodium yoelii' => {
22
+ :directory => 'yoelii',
23
+ :name => 'Plasmodium yoelii',
24
+ :sequencing_centre_abbreviation => 'TIGR',
25
+ :fasta_file_species_name => 'Plasmodium_yoelii_yoelii_str._17XNL',
26
+ :proteins_fasta_filename => lambda {|version| "PyoeliiAnnotatedProteins_PlasmoDB-#{version}.fasta"},
27
+ #:transcripts_fasta_filename => lambda {|version| "PyoeliiAllTranscripts_PlasmoDB-#{version}.fasta"},
28
+ :source => 'PlasmoDB'
29
+ },
30
+ 'Plasmodium vivax' => {
31
+ :name => 'Plasmodium vivax',
32
+ :sequencing_centre_abbreviation => 'gb',
33
+ :fasta_file_species_name => 'Plasmodium_vivax_SaI-1',
34
+ :proteins_fasta_filename => lambda {|version| "PvivaxAnnotatedProteins_PlasmoDB-#{version}.fasta"},
35
+ :source => 'PlasmoDB'
36
+ },
37
+ 'Plasmodium berghei' => {
38
+ :name => 'Plasmodium berghei',
39
+ :sequencing_centre_abbreviation => 'psu',
40
+ :fasta_file_species_name => 'Plasmodium_berghei_str._ANKA',
41
+ :proteins_fasta_filename => lambda {|version| "PbergheiAnnotatedProteins_PlasmoDB-#{version}.fasta"},
42
+ #:transcripts_fasta_filename => lambda {|version| "PbergheiAllTranscripts_PlasmoDB-#{version}.fasta"},
43
+ :source => 'PlasmoDB'
44
+ },
45
+ 'Plasmodium chabaudi' => {
46
+ :name => 'Plasmodium chabaudi',
47
+ :sequencing_centre_abbreviation => 'psu',
48
+ :fasta_file_species_name => 'Plasmodium_chabaudi_chabaudi',
49
+ :proteins_fasta_filename => lambda {|version| "PchabaudiAnnotatedProteins_PlasmoDB-#{version}.fasta"},
50
+ :source => 'PlasmoDB',
51
+ :behind_usage_policy => true,
52
+ },
53
+ 'Plasmodium knowlesi' => {
54
+ :name => 'Plasmodium knowlesi',
55
+ :sequencing_centre_abbreviation => 'psu',
56
+ :fasta_file_species_name => 'Plasmodium_knowlesi_strain_H',
57
+ :source => 'PlasmoDB',
58
+ :behind_usage_policy => true,
59
+ },
60
+ ## ToxoDB
61
+ 'Neospora caninum' => {
62
+ :name => 'Neospora caninum',
63
+ :sequencing_centre_abbreviation => 'psu',
64
+ :fasta_file_species_name => 'Neospora_caninum',
65
+ :database_download_folder => 'NeosporaCaninum',
66
+ :representative_strain_name => 'NeosporaCaninum',
67
+ :proteins_fasta_filename => lambda {|version| "NeosporaCaninumAnnotatedProteins_ToxoDB-#{version}.fasta"},
68
+ :transcripts_fasta_filename => lambda {|version| "NeosporaCaninumAnnotatedTranscripts_ToxoDB-#{version}.fasta"},
69
+ :source => 'ToxoDB',
70
+ :behind_usage_policy => true,
71
+ },
72
+ 'Eimeria tenella' => {
73
+ :name => 'Eimeria tenella',
74
+ :sequencing_centre_abbreviation => 'GeneDB',
75
+ :fasta_file_species_name => 'EtenellaHoughton',
76
+ :source => 'ToxoDB',
77
+ :database_download_folder => 'EtenellaHoughton',
78
+ :behind_usage_policy => true,
79
+ },
80
+ 'Toxoplasma gondii' => {
81
+ :name => 'Toxoplasma gondii',
82
+ :sequencing_centre_abbreviation => 'gb',
83
+ :fasta_file_species_name => 'Toxoplasma_gondii_ME49',
84
+ :database_download_folder => 'TgondiiME49',
85
+ :gene_information_filename => lambda {|version| "TgondiiME49Gene_ToxoDB-#{version}.txt"},
86
+ :proteins_fasta_filename => lambda {|version| "TgondiiME49AnnotatedProteins_ToxoDB-#{version}.fasta"},
87
+ :transcripts_fasta_filename => lambda {|version| "TgondiiME49AnnotatedTranscripts_ToxoDB-#{version}.fasta"},
88
+ :gff_filename => lambda {|version| "TgondiiME49_ToxoDB-#{version}.gff"},
89
+ :genomic_fasta_filename => lambda {|version| "TgondiiME49Genomic_ToxoDB-#{version}.fasta"},
90
+ :source => 'ToxoDB'
91
+ },
92
+ ## CryptoDB
93
+ 'Cryptosporidium parvum' => {
94
+ :name => 'Cryptosporidium parvum',
95
+ :sequencing_centre_abbreviation => 'gb',
96
+ :fasta_file_species_name => 'Cryptosporidium_parvum',
97
+ :proteins_fasta_filename => lambda {|version| "CparvumAnnotatedProteins_CryptoDB-#{version}.fasta"},
98
+ :transcripts_fasta_filename => lambda {|version| "CparvumAnnotatedTranscripts_CryptoDB-#{version}.fasta"},
99
+ #:gff_filename => lambda {|version| "c_parvum_iowa_ii.gff"}, #changed as of version 4.3
100
+ :source => 'CryptoDB'
101
+ },
102
+ 'Cryptosporidium hominis' => {
103
+ :name => 'Cryptosporidium hominis',
104
+ :sequencing_centre_abbreviation => 'gb',
105
+ :fasta_file_species_name => 'Cryptosporidium_hominis',
106
+ :proteins_fasta_filename => lambda {|version| "ChominisAnnotatedProteins_CryptoDB-#{version}.fasta"},
107
+ :transcripts_fasta_filename => lambda {|version| "ChominisAnnotatedTranscripts_CryptoDB-#{version}.fasta"},
108
+ #:gff_filename => lambda {|version| "c_hominis_tu502.gff"}, #changed as of version 4.3
109
+ :source => 'CryptoDB'
110
+ },
111
+ 'Cryptosporidium muris' => {
112
+ :name => 'Cryptosporidium muris',
113
+ :sequencing_centre_abbreviation => 'gb',
114
+ :fasta_file_species_name => 'Cryptosporidium_muris',
115
+ :proteins_fasta_filename => lambda {|version| "CmurisAnnotatedProteins_CryptoDB-#{version}.fasta"},
116
+ :transcripts_fasta_filename => lambda {|version| "CmurisAnnotatedTranscripts_CryptoDB-#{version}.fasta"},
117
+ #:gff_filename => lambda {|version| "c_muris.gff"}, #changed as of version 4.3
118
+ :source => 'CryptoDB'
119
+ },
120
+ ## PiroplasmaDB
121
+ 'Theileria annulata' => {
122
+ :name => 'Theileria annulata',
123
+ :database_download_folder => 'TannulataAnkara',
124
+ :sequencing_centre_abbreviation => 'Genbank',
125
+ :fasta_file_species_name => 'Theileria_annulata_strain_Ankara',
126
+ :source => 'PiroplasmaDB',
127
+ },
128
+ 'Theileria parva' => {
129
+ :name => 'Theileria parva',
130
+ :database_download_folder => 'TparvaMuguga',
131
+ :sequencing_centre_abbreviation => 'Genbank',
132
+ :fasta_file_species_name => 'Theileria_parva_strain_Muguga',
133
+ :source => 'PiroplasmaDB',
134
+ },
135
+ 'Babesia bovis' => {
136
+ :name => 'Babesia bovis',
137
+ :database_download_folder => 'BbovisT2Bo',
138
+ :representative_strain_name => 'BbovisT2Bo',
139
+ :sequencing_centre_abbreviation => 'Genbank',
140
+ :fasta_file_species_name => 'Babesia_bovis_T2Bo',
141
+ :source => 'PiroplasmaDB',
142
+ },
143
+ ## FungiDB
144
+ 'Candida albicans' => {
145
+ :name => 'Candida albicans',
146
+ :database_download_folder => 'Candida_albicans_SC5314',
147
+ :sequencing_centre_abbreviation => 'CGD',
148
+ :fasta_file_species_name => 'Candida_albicans_SC5314',
149
+ :source => 'FungiDB',
150
+ },
151
+ ## TriTrypDB
152
+ 'Trypanosoma brucei' => {
153
+ :name => 'Trypanosoma brucei',
154
+ :sequencing_centre_abbreviation => 'GeneDB',
155
+ :source => 'TriTrypDB',
156
+ :representative_strain_name => 'TbruceiTreu927',
157
+ :fasta_file_species_name => 'Trypanosoma_brucei_TREU927',
158
+ },
159
+ }
160
+ # Duplicate so both the species name and genus-species name work
161
+ @@data.keys.each do |key|
162
+ # name is full name of the species by default
163
+ @@data[key][:name] ||= key
164
+
165
+ # the species name without genus can also be used
166
+ splits = key.split(' ')
167
+ raise unless splits.length == 2
168
+ raise if @@data[splits[1]]
169
+ @@data[splits[1]] = @@data[key]
170
+ end
171
+
172
+ SOURCE_VERSIONS = {
173
+ 'PlasmoDB' => '7.2',#
174
+ 'ToxoDB' => '6.4',#'7.0',#
175
+ 'CryptoDB' => '4.4',#'4.5',#
176
+ 'PiroplasmaDB' => '1.0',#'1.1',#
177
+ 'FungiDB' => '1.0',
178
+ 'TriTrypDB' => '3.2',
179
+ }
180
+ DATABASES = SOURCE_VERSIONS.keys
181
+
182
+ # Create a new object about one particular species. The species can be specified
183
+ # by a nickname, which is either the full binomal name of the specie e.g.
184
+ # "Plasmodium falciparum", or by simply the second part (the species name without
185
+ # the genus name) e.g. 'falciparum'.
186
+ #
187
+ # base_data_directory is the directory where locally cached version of the downloaded
188
+ # files are stored.
189
+ def initialize(nickname, base_data_directory=nil)
190
+ @species_data = @@data[nickname] # try the full name
191
+ @species_data ||= @@data[nickname.capitalize.gsub('_',' ')] #try replacing underscores
192
+ if @species_data.nil? # try using just the second word
193
+ splits = nickname.split(' ')
194
+ if splits.length == 2
195
+ @species_data = @@data[splits[1]]
196
+ end
197
+ end
198
+
199
+ @base_data_directory = base_data_directory
200
+
201
+ raise Exception, "Couldn't find species data for #{nickname}" unless @species_data
202
+ end
203
+
204
+ def method_missing(symbol)
205
+ answer = @species_data[symbol]
206
+ return answer unless answer.nil?
207
+ super
208
+ end
209
+
210
+ # The path to the EuPathDB gene information table (stored as a gzip)
211
+ def gene_information_gzfile_path
212
+ "#{local_download_directory}/#{gene_information_gzfile_filename}"
213
+ end
214
+
215
+ # The path to the EuPathDB gene information table (stored as a gzip)
216
+ def gene_information_gzfile_filename
217
+ "#{gene_information_filename}.gz"
218
+ end
219
+
220
+ def gene_information_path
221
+ "#{local_download_directory}/#{gene_information_filename}"
222
+ end
223
+
224
+ def representative_strain_name
225
+ return @species_data[:representative_strain_name] unless @species_data[:representative_strain_name].nil?
226
+ return one_word_name
227
+ end
228
+
229
+ def gene_information_filename
230
+ f = @species_data[:gene_information_filename]
231
+ if f
232
+ "#{f.call(version)}"
233
+ else # TgondiiME49Gene_ToxoDB-5.2.txt.gz
234
+ # PfalciparumGene_PlasmoDB-6.1.txt.gz
235
+ "#{representative_strain_name}Gene_#{database}-#{version}.txt"
236
+ end
237
+ end
238
+
239
+ def version
240
+ SOURCE_VERSIONS[@species_data[:source]]
241
+ end
242
+
243
+ def protein_fasta_filename
244
+ if @species_data[:proteins_fasta_filename]
245
+ return "#{@species_data[:proteins_fasta_filename].call(version)}"
246
+ else
247
+ return "#{representative_strain_name}AnnotatedProteins_#{database}-#{version}.fasta"
248
+ end
249
+ end
250
+
251
+ def protein_fasta_path
252
+ return File.join(local_download_directory,protein_fasta_filename)
253
+ end
254
+
255
+ def protein_blast_database_path
256
+ "/blastdb/#{protein_fasta_filename}"
257
+ end
258
+
259
+ def transcript_fasta_filename
260
+ if @species_data[:transcripts_fasta_filename]
261
+ return "#{@species_data[:transcripts_fasta_filename].call(version)}"
262
+ else
263
+ return "#{representative_strain_name}AnnotatedTranscripts_#{database}-#{version}.fasta"
264
+ end
265
+ end
266
+
267
+ def transcript_fasta_path
268
+ File.join(local_download_directory,transcript_fasta_filename)
269
+ end
270
+
271
+ def genomic_fasta_filename
272
+ genomic = @species_data[:genomic_fasta_filename]
273
+ if genomic
274
+ return "#{genomic.call(version)}"
275
+ else
276
+ return "#{representative_strain_name}Genomic_#{database}-#{version}.fasta"
277
+ end
278
+ end
279
+
280
+ def gff_filename
281
+ if @species_data[:gff_filename]
282
+ return @species_data[:gff_filename].call(version)
283
+ else
284
+ return "#{representative_strain_name}_#{database}-#{version}.gff"
285
+ end
286
+ end
287
+
288
+ def gff_path
289
+ File.join(local_download_directory,gff_filename)
290
+ end
291
+
292
+ def database
293
+ @species_data[:source]
294
+ end
295
+
296
+ def eu_path_db_download_directory
297
+ directories = {}
298
+ SOURCE_VERSIONS.each do |db, version|
299
+ # 'PlasmoDB' => "http://plasmodb.org/common/downloads/release-#{SOURCE_VERSIONS['PlasmoDB']}",
300
+ directories[db] = "http://#{db.downcase}.org/common/downloads/release-#{version}"
301
+ end
302
+ raise Exception, "Base URL for database '#{database}' not known" if directories[database].nil?
303
+ return "#{directories[database]}/#{one_word_name}"
304
+ end
305
+
306
+ def eu_path_db_fasta_download_directory
307
+ path = "#{eu_path_db_download_directory}/fasta"
308
+ path = "#{path}/data" if @species_data[:behind_usage_policy]
309
+ path
310
+ end
311
+
312
+ def eu_path_db_gff_download_directory
313
+ path = "#{eu_path_db_download_directory}/gff"
314
+ path = "#{path}/data" if @species_data[:behind_usage_policy]
315
+ path
316
+ end
317
+
318
+ def eu_path_db_txt_download_directory
319
+ path = "#{eu_path_db_download_directory}/txt"
320
+ path = "#{path}/data" if @species_data[:behind_usage_policy]
321
+ path
322
+ end
323
+
324
+ # Plasmodium chabaudi => Pchabaudi
325
+ def one_word_name
326
+ return @species_data[:database_download_folder] unless @species_data[:database_download_folder].nil?
327
+ splits = @species_data[:name].split(' ')
328
+ raise unless splits.length == 2
329
+ return "#{splits[0][0..0]}#{splits[1]}"
330
+ end
331
+
332
+ def local_download_directory
333
+ s = @species_data
334
+ "#{@base_data_directory}/#{s[:name]}/genome/#{s[:source]}/#{SOURCE_VERSIONS[s[:source]]}"
335
+ end
336
+
337
+ # an array of directory names. mkdir is called on each of them in order,
338
+ # otherwise mkdir throws errors because there isn't sufficient folders
339
+ # to build on.
340
+ def directories_for_mkdir
341
+ if @base_data_directory.nil?
342
+ raise Exception, "Unable to generate directories when @base_data_directory is not set"
343
+ end
344
+
345
+ s = @species_data
346
+ components = [
347
+ @base_data_directory,
348
+ s[:name],
349
+ 'genome',
350
+ s[:source],
351
+ SOURCE_VERSIONS[s[:source]]
352
+ ]
353
+
354
+ (0..components.length-1).collect do |i|
355
+ components[0..i].join('/')
356
+ end
357
+ end
358
+
359
+ # Return a list of the species names that are included in the EuPathDB database
360
+ def self.species_data_from_database(database_name, base_download_directory=nil)
361
+ species = @@data.select {|name, info|
362
+ info[:source].downcase == database_name.downcase
363
+ }
364
+ species.collect do |name_info|
365
+ EuPathDBSpeciesData.new(name_info[0], base_download_directory)
366
+ end
367
+ end
368
+
369
+ # Download all the data files from all the EuPathDB databases, or just one single database.
370
+ # Requires wget to be available on the command line
371
+ def self.download(base_download_directory, database_name=nil)
372
+ # by default, download everything
373
+ if database_name.nil?
374
+ EuPathDBSpeciesData::DATABASES.each do |d|
375
+ download base_download_directory, d
376
+ end
377
+ else
378
+ # Download the new files from the relevant database
379
+ EuPathDBSpeciesData.species_data_from_database(database_name, base_download_directory).each do |spd|
380
+ spd.directories_for_mkdir.each do |directory|
381
+ unless File.exists?(directory)
382
+ Dir.mkdir(directory)
383
+ end
384
+ end
385
+
386
+ Dir.chdir(spd.local_download_directory) do
387
+ p spd.eu_path_db_fasta_download_directory
388
+
389
+ # protein
390
+ unless File.exists?(spd.protein_fasta_filename)
391
+ `wget #{spd.eu_path_db_fasta_download_directory}/#{spd.protein_fasta_filename}`
392
+ end
393
+ # gff
394
+ unless File.exists?(spd.gff_filename)
395
+ `wget #{spd.eu_path_db_gff_download_directory}/#{spd.gff_filename}`
396
+ end
397
+ # transcripts
398
+ unless File.exists?(spd.transcript_fasta_filename)
399
+ `wget #{spd.eu_path_db_fasta_download_directory}/#{spd.transcript_fasta_filename}`
400
+ end
401
+ # gene information table
402
+ unless File.exists?(spd.gene_information_filename)
403
+ `wget '#{spd.eu_path_db_txt_download_directory}/#{spd.gene_information_filename}'`
404
+ end
405
+ # genomic
406
+ unless File.exists?(spd.genomic_fasta_filename)
407
+ `wget '#{spd.eu_path_db_fasta_download_directory}/#{spd.genomic_fasta_filename}'`
408
+ end
409
+ end
410
+ end
411
+ end
412
+ end
413
+ end
data/lib/reubypathdb.rb CHANGED
@@ -1,2 +1,3 @@
1
1
  require 'eupathdb_gene_information_table'
2
2
  require 'eupathdb_gff'
3
+ require 'eupathdb_species_data'
data/reubypathdb.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{reubypathdb}
8
- s.version = "0.2.0"
8
+ s.version = "0.3.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Ben J Woodcroft"]
12
- s.date = %q{2011-04-19}
12
+ s.date = %q{2011-08-26}
13
13
  s.description = %q{Classes to help parsing EuPathDB data files}
14
14
  s.email = %q{donttrustben near gmail.com}
15
15
  s.extra_rdoc_files = [
@@ -18,38 +18,48 @@ Gem::Specification.new do |s|
18
18
  ]
19
19
  s.files = [
20
20
  ".document",
21
+ "Gemfile",
22
+ "Gemfile.lock",
21
23
  "LICENSE",
22
24
  "README.rdoc",
23
25
  "Rakefile",
24
26
  "VERSION",
25
27
  "lib/eupathdb_gene_information_table.rb",
26
28
  "lib/eupathdb_gff.rb",
29
+ "lib/eupathdb_species_data.rb",
27
30
  "lib/jgi_genes.rb",
28
31
  "lib/reubypathdb.rb",
29
32
  "reubypathdb.gemspec",
30
33
  "test/data/eupathGeneInformation.txt",
31
34
  "test/helper.rb",
32
- "test/test_eupathdb_gene_information_table.rb"
35
+ "test/test_eupathdb_gene_information_table.rb",
36
+ "test/test_eupathdb_species_data.rb"
33
37
  ]
34
38
  s.homepage = %q{http://github.com/wwood/reubypathdb}
39
+ s.licenses = ["MIT"]
35
40
  s.require_paths = ["lib"]
36
- s.rubygems_version = %q{1.6.2}
41
+ s.rubygems_version = %q{1.6.1}
37
42
  s.summary = %q{Classes to help parsing EuPathDB data files}
38
- s.test_files = [
39
- "test/helper.rb",
40
- "test/test_eupathdb_gene_information_table.rb"
41
- ]
42
43
 
43
44
  if s.respond_to? :specification_version then
44
45
  s.specification_version = 3
45
46
 
46
47
  if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
47
- s.add_development_dependency(%q<thoughtbot-shoulda>, [">= 0"])
48
+ s.add_development_dependency(%q<shoulda>, [">= 0"])
49
+ s.add_development_dependency(%q<bundler>, ["~> 1.0.0"])
50
+ s.add_development_dependency(%q<jeweler>, ["~> 1.6.4"])
51
+ s.add_development_dependency(%q<rcov>, [">= 0"])
48
52
  else
49
- s.add_dependency(%q<thoughtbot-shoulda>, [">= 0"])
53
+ s.add_dependency(%q<shoulda>, [">= 0"])
54
+ s.add_dependency(%q<bundler>, ["~> 1.0.0"])
55
+ s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
56
+ s.add_dependency(%q<rcov>, [">= 0"])
50
57
  end
51
58
  else
52
- s.add_dependency(%q<thoughtbot-shoulda>, [">= 0"])
59
+ s.add_dependency(%q<shoulda>, [">= 0"])
60
+ s.add_dependency(%q<bundler>, ["~> 1.0.0"])
61
+ s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
62
+ s.add_dependency(%q<rcov>, [">= 0"])
53
63
  end
54
64
  end
55
65
 
@@ -0,0 +1,113 @@
1
+ require 'helper'
2
+ require 'eupathdb_species_data'
3
+
4
+ class EuPathDBSpeciesDataTest < Test::Unit::TestCase
5
+ def base_dir
6
+ '/home/ben/phd/data'
7
+ end
8
+
9
+ def test_method_missing
10
+ spd = EuPathDBSpeciesData.new('Plasmodium yoelii')
11
+ assert_equal 'yoelii', spd.directory
12
+ end
13
+
14
+ def test_nickname
15
+ spd = EuPathDBSpeciesData.new('Plasmodium yoelii').fasta_file_species_name
16
+ assert_equal spd, EuPathDBSpeciesData.new('yoelii').fasta_file_species_name
17
+ assert_equal spd, EuPathDBSpeciesData.new('P. yoelii').fasta_file_species_name #check for not exactly the last name but close enough
18
+ end
19
+
20
+ def test_protein_data_path
21
+ spd = EuPathDBSpeciesData.new('Plasmodium yoelii', base_dir)
22
+ assert_equal "/home/ben/phd/data/Plasmodium yoelii/genome/PlasmoDB/#{EuPathDBSpeciesData::SOURCE_VERSIONS['PlasmoDB']}/PyoeliiAnnotatedProteins_PlasmoDB-#{EuPathDBSpeciesData::SOURCE_VERSIONS['PlasmoDB']}.fasta",
23
+ spd.protein_fasta_path
24
+ end
25
+
26
+ def test_one_word_name
27
+ spd = EuPathDBSpeciesData.new('Plasmodium chabaudi')
28
+ assert_equal 'Pchabaudi', spd.one_word_name
29
+ end
30
+
31
+ def test_download_directory
32
+ spd = EuPathDBSpeciesData.new('Plasmodium chabaudi')
33
+ assert_equal "http://plasmodb.org/common/downloads/release-#{EuPathDBSpeciesData::SOURCE_VERSIONS['PlasmoDB']}/Pchabaudi", spd.eu_path_db_download_directory
34
+ end
35
+
36
+ def test_transcript_path_default
37
+ spd = EuPathDBSpeciesData.new('Plasmodium chabaudi', base_dir)
38
+ assert_equal "/home/ben/phd/data/Plasmodium chabaudi/genome/PlasmoDB/#{EuPathDBSpeciesData::SOURCE_VERSIONS['PlasmoDB']}/PchabaudiAnnotatedTranscripts_PlasmoDB-#{EuPathDBSpeciesData::SOURCE_VERSIONS['PlasmoDB']}.fasta",
39
+ spd.transcript_fasta_path
40
+ end
41
+
42
+ def test_transcript_fasta_filename
43
+ spd = EuPathDBSpeciesData.new('falciparum')
44
+ assert_equal "Pfalciparum_PlasmoDB-#{EuPathDBSpeciesData::SOURCE_VERSIONS['PlasmoDB']}.gff",
45
+ spd.gff_filename
46
+ end
47
+
48
+ def test_gzfile_path_toxo
49
+ spd = EuPathDBSpeciesData.new('gondii', base_dir)
50
+ assert_equal "/home/ben/phd/data/Toxoplasma gondii/genome/ToxoDB/#{EuPathDBSpeciesData::SOURCE_VERSIONS['ToxoDB']}/TgondiiME49Gene_ToxoDB-#{EuPathDBSpeciesData::SOURCE_VERSIONS['ToxoDB']}.txt.gz",
51
+ spd.gene_information_gzfile_path
52
+ end
53
+
54
+ def test_gzfile_path_default
55
+ spd = EuPathDBSpeciesData.new('falciparum', base_dir)
56
+ assert_equal "/home/ben/phd/data/Plasmodium falciparum/genome/PlasmoDB/#{EuPathDBSpeciesData::SOURCE_VERSIONS['PlasmoDB']}/PfalciparumGene_PlasmoDB-#{EuPathDBSpeciesData::SOURCE_VERSIONS['PlasmoDB']}.txt.gz",
57
+ spd.gene_information_gzfile_path
58
+ end
59
+
60
+ def test_gzfile_filename_default
61
+ spd = EuPathDBSpeciesData.new('falciparum')
62
+ assert_equal "PfalciparumGene_PlasmoDB-#{EuPathDBSpeciesData::SOURCE_VERSIONS['PlasmoDB']}.txt.gz",
63
+ spd.gene_information_gzfile_filename
64
+ end
65
+
66
+ def test_directories_for_mkdir
67
+ spd = EuPathDBSpeciesData.new('gondii', base_dir)
68
+ assert_equal [
69
+ '/home/ben/phd/data',
70
+ '/home/ben/phd/data/Toxoplasma gondii',
71
+ '/home/ben/phd/data/Toxoplasma gondii/genome',
72
+ '/home/ben/phd/data/Toxoplasma gondii/genome/ToxoDB',
73
+ "/home/ben/phd/data/Toxoplasma gondii/genome/ToxoDB/#{EuPathDBSpeciesData::SOURCE_VERSIONS['ToxoDB']}"
74
+ ],
75
+ spd.directories_for_mkdir
76
+ end
77
+
78
+ def test_one_word_name
79
+ assert_equal 'NeosporaCaninum', EuPathDBSpeciesData.new('Neospora caninum').one_word_name
80
+ spd = EuPathDBSpeciesData.new('Plasmodium falciparum')
81
+ assert_equal 'Pfalciparum', spd.one_word_name
82
+ end
83
+
84
+ def test_genomic_filename
85
+ spd = EuPathDBSpeciesData.new('falciparum')
86
+ assert_equal "PfalciparumGenomic_PlasmoDB-#{EuPathDBSpeciesData::SOURCE_VERSIONS['PlasmoDB']}.fasta",
87
+ spd.genomic_fasta_filename
88
+ end
89
+
90
+ def test_transcripts_name_without_block
91
+ spd = EuPathDBSpeciesData.new('Babesia bovis')
92
+ assert_equal "BbovisT2BoAnnotatedTranscripts_PiroplasmaDB-#{EuPathDBSpeciesData::SOURCE_VERSIONS['PiroplasmaDB']}.fasta",
93
+ spd.transcript_fasta_filename
94
+ end
95
+
96
+ def test_behind_usage_policy
97
+ spd = EuPathDBSpeciesData.new('Plasmodium chabaudi')
98
+ assert_equal "http://plasmodb.org/common/downloads/release-#{EuPathDBSpeciesData::SOURCE_VERSIONS['PlasmoDB']}/Pchabaudi/fasta/data",
99
+ spd.eu_path_db_fasta_download_directory
100
+ end
101
+
102
+ def test_behind_usage_policy
103
+ spd = EuPathDBSpeciesData.new('Plasmodium vivax')
104
+ assert_equal "http://plasmodb.org/common/downloads/release-#{EuPathDBSpeciesData::SOURCE_VERSIONS['PlasmoDB']}/Pvivax/fasta",
105
+ spd.eu_path_db_fasta_download_directory
106
+ end
107
+
108
+ def test_representative_strain_name
109
+ spd = EuPathDBSpeciesData.new('Trypanosoma brucei')
110
+ assert_equal "http://tritrypdb.org/common/downloads/release-#{EuPathDBSpeciesData::SOURCE_VERSIONS['TriTrypDB']}/Tbrucei/fasta/TbruceiTreu927Genomic_TriTrypDB-#{EuPathDBSpeciesData::SOURCE_VERSIONS['TriTrypDB']}.fasta",
111
+ "#{spd.eu_path_db_fasta_download_directory}/#{spd.genomic_fasta_filename}"
112
+ end
113
+ end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: reubypathdb
3
3
  version: !ruby/object:Gem::Version
4
- hash: 23
4
+ hash: 19
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
- - 2
8
+ - 3
9
9
  - 0
10
- version: 0.2.0
10
+ version: 0.3.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - Ben J Woodcroft
@@ -15,12 +15,11 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-04-19 00:00:00 +10:00
18
+ date: 2011-08-26 00:00:00 +10:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
22
- name: thoughtbot-shoulda
23
- prerelease: false
22
+ type: :development
24
23
  requirement: &id001 !ruby/object:Gem::Requirement
25
24
  none: false
26
25
  requirements:
@@ -30,8 +29,55 @@ dependencies:
30
29
  segments:
31
30
  - 0
32
31
  version: "0"
33
- type: :development
34
32
  version_requirements: *id001
33
+ name: shoulda
34
+ prerelease: false
35
+ - !ruby/object:Gem::Dependency
36
+ type: :development
37
+ requirement: &id002 !ruby/object:Gem::Requirement
38
+ none: false
39
+ requirements:
40
+ - - ~>
41
+ - !ruby/object:Gem::Version
42
+ hash: 23
43
+ segments:
44
+ - 1
45
+ - 0
46
+ - 0
47
+ version: 1.0.0
48
+ version_requirements: *id002
49
+ name: bundler
50
+ prerelease: false
51
+ - !ruby/object:Gem::Dependency
52
+ type: :development
53
+ requirement: &id003 !ruby/object:Gem::Requirement
54
+ none: false
55
+ requirements:
56
+ - - ~>
57
+ - !ruby/object:Gem::Version
58
+ hash: 7
59
+ segments:
60
+ - 1
61
+ - 6
62
+ - 4
63
+ version: 1.6.4
64
+ version_requirements: *id003
65
+ name: jeweler
66
+ prerelease: false
67
+ - !ruby/object:Gem::Dependency
68
+ type: :development
69
+ requirement: &id004 !ruby/object:Gem::Requirement
70
+ none: false
71
+ requirements:
72
+ - - ">="
73
+ - !ruby/object:Gem::Version
74
+ hash: 3
75
+ segments:
76
+ - 0
77
+ version: "0"
78
+ version_requirements: *id004
79
+ name: rcov
80
+ prerelease: false
35
81
  description: Classes to help parsing EuPathDB data files
36
82
  email: donttrustben near gmail.com
37
83
  executables: []
@@ -43,22 +89,26 @@ extra_rdoc_files:
43
89
  - README.rdoc
44
90
  files:
45
91
  - .document
92
+ - Gemfile
93
+ - Gemfile.lock
46
94
  - LICENSE
47
95
  - README.rdoc
48
96
  - Rakefile
49
97
  - VERSION
50
98
  - lib/eupathdb_gene_information_table.rb
51
99
  - lib/eupathdb_gff.rb
100
+ - lib/eupathdb_species_data.rb
52
101
  - lib/jgi_genes.rb
53
102
  - lib/reubypathdb.rb
54
103
  - reubypathdb.gemspec
55
104
  - test/data/eupathGeneInformation.txt
56
105
  - test/helper.rb
57
106
  - test/test_eupathdb_gene_information_table.rb
107
+ - test/test_eupathdb_species_data.rb
58
108
  has_rdoc: true
59
109
  homepage: http://github.com/wwood/reubypathdb
60
- licenses: []
61
-
110
+ licenses:
111
+ - MIT
62
112
  post_install_message:
63
113
  rdoc_options: []
64
114
 
@@ -85,10 +135,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
85
135
  requirements: []
86
136
 
87
137
  rubyforge_project:
88
- rubygems_version: 1.6.2
138
+ rubygems_version: 1.6.1
89
139
  signing_key:
90
140
  specification_version: 3
91
141
  summary: Classes to help parsing EuPathDB data files
92
- test_files:
93
- - test/helper.rb
94
- - test/test_eupathdb_gene_information_table.rb
142
+ test_files: []
143
+