bio-sra 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.document +5 -0
- data/.travis.yml +12 -0
- data/Gemfile +20 -0
- data/LICENSE.txt +20 -0
- data/README.md +66 -0
- data/Rakefile +634 -0
- data/VERSION +1 -0
- data/bin/sra_download +170 -0
- data/config/database.yml +7 -0
- data/lib/bio-sra.rb +19 -0
- data/lib/bio/sra/connect.rb +39 -0
- data/lib/bio/sra/sra.rb +152 -0
- data/lib/bio/sra/tables.rb +467 -0
- data/test/helper.rb +18 -0
- data/test/test_bio-sra.rb +78 -0
- data/test/test_sra_download.rb +86 -0
- metadata +216 -0
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.1.0
|
data/bin/sra_download
ADDED
@@ -0,0 +1,170 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'optparse'
|
4
|
+
require 'bio-logger'
|
5
|
+
require 'csv'
|
6
|
+
require 'bio-commandeer'
|
7
|
+
|
8
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
9
|
+
require 'bio-sra'
|
10
|
+
|
11
|
+
SCRIPT_NAME = File.basename(__FILE__); LOG_NAME = SCRIPT_NAME.gsub('.rb','')
|
12
|
+
|
13
|
+
# Parse command line options into the options hash
|
14
|
+
options = {
|
15
|
+
:logger => 'stderr',
|
16
|
+
:format => :sra,
|
17
|
+
:accessions_file => nil,
|
18
|
+
:download_all_from_study => false,
|
19
|
+
:treat_input_as_runs => false,
|
20
|
+
}
|
21
|
+
o = OptionParser.new do |opts|
|
22
|
+
opts.banner = "
|
23
|
+
Usage: #{SCRIPT_NAME} <SRA_ACCESSION>
|
24
|
+
|
25
|
+
Download data from SRA \n"
|
26
|
+
|
27
|
+
opts.on('-f', "--file FILENAME", "Provide a file of accession numbers, separated by whitespace or commas [default: not used, use the first argument <SRA_ACCESSION>]") do |f|
|
28
|
+
options[:accessions_file] = f
|
29
|
+
end
|
30
|
+
opts.on("--format FORMAT", "format for download [default: 'sra']") do |f|
|
31
|
+
format_string_to_sym = {
|
32
|
+
'sralite' => :sralite, # no longer supported by NCBI?
|
33
|
+
'sra' => :sra,
|
34
|
+
}
|
35
|
+
options[:format] = format_string_to_sym[f]
|
36
|
+
|
37
|
+
if options[:format].nil?
|
38
|
+
raise "Unexpected file format specified '#{f}'. I require one of #{format_string_to_sym.keys.join(', ') }"
|
39
|
+
end
|
40
|
+
end
|
41
|
+
opts.on("--dry-run", "Don't download any instead print the URLs to download to the command line [default: not this, do download]") do
|
42
|
+
options[:dry_run] = true
|
43
|
+
end
|
44
|
+
|
45
|
+
# logger options
|
46
|
+
opts.separator "\nVerbosity:\n\n"
|
47
|
+
opts.on("-q", "--quiet", "Run quietly, set logging to ERROR level [default INFO]") do |q|
|
48
|
+
Bio::Log::CLI.trace('error')
|
49
|
+
end
|
50
|
+
opts.on("--logger filename",String,"Log to file [default #{options[:logger] }]") do | name |
|
51
|
+
options[:logger] = name
|
52
|
+
end
|
53
|
+
opts.on("--trace options",String,"Set log level [default INFO]. e.g. '--trace debug' to set logging level to DEBUG") do | s |
|
54
|
+
Bio::Log::CLI.trace(s)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
o.parse!
|
58
|
+
|
59
|
+
if options[:accessions_file].nil? and ARGV.length == 0
|
60
|
+
$stderr.puts o
|
61
|
+
exit 1
|
62
|
+
end
|
63
|
+
|
64
|
+
# Setup logging
|
65
|
+
Bio::Log::CLI.logger(options[:logger]) #bio-logger defaults to STDERR not STDOUT, I disagree
|
66
|
+
log = Bio::Log::LoggerPlus.new(LOG_NAME)
|
67
|
+
Bio::Log::CLI.configure(LOG_NAME)
|
68
|
+
|
69
|
+
almost_accessions = nil
|
70
|
+
if options[:accessions_file]
|
71
|
+
log.debug "Reading SRA accessions from file #{options[:accessions_file] }"
|
72
|
+
almost_accessions = File.open(options[:accessions_file]).read.split(/[\s,]+/)
|
73
|
+
else
|
74
|
+
almost_accessions = ARGV.collect{|r| r.split(/[\s,]+/)}.flatten
|
75
|
+
end
|
76
|
+
# Remove empty strings and extra digits at the end e.g. SRA029325.1 => SRA029325
|
77
|
+
accessions = almost_accessions.reject{|a| a==''}.collect{|a| a.gsub(/\.\d+$/,'')}
|
78
|
+
log.info "Read in #{accessions.length} accessions"
|
79
|
+
|
80
|
+
# Do we need to connect to the database? Only yes if there are accessions that are not runs
|
81
|
+
options[:treat_input_as_runs] = true
|
82
|
+
accessions.each do |acc|
|
83
|
+
unless Bio::SRA::Accession.classify_accession_type(acc) == Bio::SRA::RUN
|
84
|
+
log.debug "Found accession number #{acc} that does not appear to be a run accession, so need to connect to database" if log.debug?
|
85
|
+
options[:treat_input_as_runs] = false
|
86
|
+
break
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
# Connect to the database if required
|
91
|
+
log.info "Connecting to database.."
|
92
|
+
Bio::SRA::Connection.connect unless options[:treat_input_as_runs]
|
93
|
+
|
94
|
+
log.info "Collecting a list of runs to download.."
|
95
|
+
runs = []
|
96
|
+
accessions.each do |acc|
|
97
|
+
if options[:treat_input_as_runs]
|
98
|
+
runs.push [acc, acc]
|
99
|
+
else
|
100
|
+
acc_type = Bio::SRA::Accession.classify_accession_type(acc)
|
101
|
+
|
102
|
+
# Convert Run ActiveRecords into simple accessions
|
103
|
+
sra_runs = case acc_type
|
104
|
+
when Bio::SRA::RUN then
|
105
|
+
Bio::SRA::Tables::SRA.where(:run_accession => acc)
|
106
|
+
when Bio::SRA::EXPERIMENT then
|
107
|
+
Bio::SRA::Tables::SRA.where(:experiment_accession => acc)
|
108
|
+
when Bio::SRA::SAMPLE then
|
109
|
+
Bio::SRA::Tables::SRA.where(:sample_accession => acc)
|
110
|
+
when Bio::SRA::STUDY then
|
111
|
+
Bio::SRA::Tables::SRA.where(:study_accession => acc)
|
112
|
+
when Bio::SRA::SUBMISSION then
|
113
|
+
Bio::SRA::Tables::SRA.where(:submission_accession => acc)
|
114
|
+
else
|
115
|
+
raise "Programming error: unexpected accession type: #{acc_type}"
|
116
|
+
end
|
117
|
+
|
118
|
+
if sra_runs.empty?
|
119
|
+
log.warn "Unable to find accession number #{acc} in the metadata database, skipping"
|
120
|
+
next
|
121
|
+
else
|
122
|
+
log.debug "Found #{sra_runs.length} runs to download for accession number #{acc}"
|
123
|
+
end
|
124
|
+
|
125
|
+
sra_runs.each do |r|
|
126
|
+
runs.push [r.run_accession, acc]
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
# Remove duplicate runs
|
132
|
+
runs.uniq! do |run_acc|
|
133
|
+
run_acc[0]
|
134
|
+
end
|
135
|
+
log.info "Found #{runs.length} unique run files to download, downloading them now.."
|
136
|
+
num_skipped = 0
|
137
|
+
num_downloaded = 0
|
138
|
+
runs.each_with_index do |run_acc, index|
|
139
|
+
run = run_acc[0]
|
140
|
+
acc = run_acc[1]
|
141
|
+
|
142
|
+
download_path = "#{run}.#{options[:format] }"
|
143
|
+
log.debug "Downloading to #{download_path}"
|
144
|
+
|
145
|
+
if File.exist?(download_path)
|
146
|
+
log.debug "Skipping download of run #{download_path} since a file of that accession already exists"
|
147
|
+
num_skipped += 1
|
148
|
+
next
|
149
|
+
end
|
150
|
+
|
151
|
+
if run == acc
|
152
|
+
log.info "Downloading run #{run} (#{index+1}/#{runs.length})"
|
153
|
+
else
|
154
|
+
log.info "Downloading run #{run} from #{acc} (#{index+1}/#{runs.length})"
|
155
|
+
end
|
156
|
+
|
157
|
+
url = Bio::SRA::Accession.run_download_url(run, :format => options[:format])
|
158
|
+
|
159
|
+
num_downloaded += 1
|
160
|
+
if options[:dry_run]
|
161
|
+
puts url
|
162
|
+
else
|
163
|
+
`wget #{url.inspect}`
|
164
|
+
end
|
165
|
+
end
|
166
|
+
if options[:dry_run]
|
167
|
+
log.info "Finished printing #{num_downloaded} URLs, ignoring #{num_skipped} already downloaded"
|
168
|
+
else
|
169
|
+
log.info "Finished downloading #{num_downloaded}, ignoring #{num_skipped} already downloaded"
|
170
|
+
end
|
data/config/database.yml
ADDED
data/lib/bio-sra.rb
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
# Please require your code below, respecting the naming conventions in the
|
2
|
+
# bioruby directory tree.
|
3
|
+
#
|
4
|
+
# For example, say you have a plugin named bio-plugin, the only uncommented
|
5
|
+
# line in this file would be
|
6
|
+
#
|
7
|
+
# require 'bio/bio-plugin/plugin'
|
8
|
+
#
|
9
|
+
# In this file only require other files. Avoid other source code.
|
10
|
+
require 'active_record'
|
11
|
+
|
12
|
+
require 'bio-logger'
|
13
|
+
Bio::Log::LoggerPlus.new('bio-sra')
|
14
|
+
|
15
|
+
require 'bio/sra/connect'
|
16
|
+
require 'bio/sra/sra'
|
17
|
+
require 'bio/sra/tables'
|
18
|
+
|
19
|
+
|
@@ -0,0 +1,39 @@
|
|
1
|
+
module Bio
|
2
|
+
module SRA
|
3
|
+
def self.connect
|
4
|
+
Connection.connect
|
5
|
+
end
|
6
|
+
|
7
|
+
class Connection < ActiveRecord::Base
|
8
|
+
self.abstract_class = true
|
9
|
+
|
10
|
+
# Connect to a metadata database.
|
11
|
+
#
|
12
|
+
# * sra_metadb_path: a path to the gunzipped SRAmetadb.sqlite file which is the database. By default this is in the db/ directory of this gem, but that probably isn't where the db file is.
|
13
|
+
#
|
14
|
+
# You can download the file like so:
|
15
|
+
#
|
16
|
+
# $ wget http://watson.nci.nih.gov/~zhujack/SRAmetadb.sqlite.gz
|
17
|
+
# # gunzip SRAmetadb.sqlite.gz
|
18
|
+
def self.connect(sra_metadb_path=File.join(File.dirname(__FILE__),'..','..','..','db','SRAmetadb.sqlite'))
|
19
|
+
log = Bio::Log::LoggerPlus['bio-sra']
|
20
|
+
log.info "Attempting to connect to database #{sra_metadb_path}"
|
21
|
+
|
22
|
+
# default:
|
23
|
+
# adapter: sqlite3
|
24
|
+
# database: db/SRAmetadb.sqlite
|
25
|
+
# pool: 5
|
26
|
+
# timeout: 5000
|
27
|
+
|
28
|
+
options = {
|
29
|
+
:adapter => 'sqlite3',
|
30
|
+
:database => sra_metadb_path,
|
31
|
+
:pool => 5,
|
32
|
+
:timeout => 5000,
|
33
|
+
}
|
34
|
+
|
35
|
+
establish_connection(options)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
data/lib/bio/sra/sra.rb
ADDED
@@ -0,0 +1,152 @@
|
|
1
|
+
module Bio
|
2
|
+
module SRA
|
3
|
+
SUBMISSION = 'submission'
|
4
|
+
STUDY = 'study'
|
5
|
+
SAMPLE = 'sample'
|
6
|
+
EXPERIMENT = 'experiment'
|
7
|
+
RUN = 'run'
|
8
|
+
|
9
|
+
class Accession
|
10
|
+
@@log = Bio::Log::LoggerPlus['bio-sra']
|
11
|
+
|
12
|
+
# valid_in_type <- c(SRA = "submission", ERA = "submission",
|
13
|
+
# DRA = "submission", SRP = "study", ERP = "study", DRP = "study",
|
14
|
+
# SRS = "sample", ERS = "sample", DRS = "sample", SRX = "experiment",
|
15
|
+
# ERX = "experiment", DRX = "experiment", SRR = "run",
|
16
|
+
# ERR = "run", DRR = "run")
|
17
|
+
ACCESSION_TO_TYPE = {
|
18
|
+
'SRA' => Bio::SRA::SUBMISSION,
|
19
|
+
'ERA' => Bio::SRA::SUBMISSION,
|
20
|
+
'DRA' => Bio::SRA::SUBMISSION,
|
21
|
+
'SRP' => Bio::SRA::STUDY,
|
22
|
+
'ERP' => Bio::SRA::STUDY,
|
23
|
+
'DRP' => Bio::SRA::STUDY,
|
24
|
+
'SRS' => Bio::SRA::SAMPLE,
|
25
|
+
'ERS' => Bio::SRA::SAMPLE,
|
26
|
+
'DRS' => Bio::SRA::SAMPLE,
|
27
|
+
'SRX' => Bio::SRA::EXPERIMENT,
|
28
|
+
'ERX' => Bio::SRA::EXPERIMENT,
|
29
|
+
'DRX' => Bio::SRA::EXPERIMENT,
|
30
|
+
'SRR' => Bio::SRA::RUN,
|
31
|
+
'ERR' => Bio::SRA::RUN,
|
32
|
+
'DRR' => Bio::SRA::RUN,
|
33
|
+
}
|
34
|
+
|
35
|
+
TYPE_TO_COLUMN = {
|
36
|
+
Bio::SRA::SUBMISSION => :submission_accession,
|
37
|
+
Bio::SRA::STUDY => :study_accession,
|
38
|
+
Bio::SRA::SAMPLE => :sample_accession,
|
39
|
+
Bio::SRA::EXPERIMENT => :experiment_accession,
|
40
|
+
Bio::SRA::RUN => :run_accession,
|
41
|
+
}
|
42
|
+
|
43
|
+
def self.classify_accession_type(accession)
|
44
|
+
type = ACCESSION_TO_TYPE[accession[0..2]]
|
45
|
+
if type.nil?
|
46
|
+
raise "Unrecognised accession string '#{accession}'"
|
47
|
+
end
|
48
|
+
@@log.debug "Classified #{accession} as SRA type '#{type}'" if @@log.debug?
|
49
|
+
return type
|
50
|
+
end
|
51
|
+
|
52
|
+
# Given an accession, return the column name it in the SRA table that contains it as a symbol.
|
53
|
+
# e.g. accession_to_column_name('SRR617581') => :run_accession
|
54
|
+
def self.accession_to_column_name(accession)
|
55
|
+
TYPE_TO_COLUMN[classify_accession_type(accession)]
|
56
|
+
end
|
57
|
+
|
58
|
+
def self.format_symbol_to_extension(format_symbol)
|
59
|
+
non_standard_extensions = {
|
60
|
+
:sralite => '.lite.sra',
|
61
|
+
:fastq_gz => '.fastq.gz',
|
62
|
+
}
|
63
|
+
style = format_symbol_to_standard_text format_symbol
|
64
|
+
|
65
|
+
# Default extension is the same as the format
|
66
|
+
style_extension = non_standard_extensions[format_symbol]
|
67
|
+
style_extension ||= ".#{style}"
|
68
|
+
|
69
|
+
return style_extension
|
70
|
+
end
|
71
|
+
|
72
|
+
def self.format_symbol_to_standard_text(format_symbol)
|
73
|
+
formats = {
|
74
|
+
:sralite => 'litesra',
|
75
|
+
:sra => 'sra',
|
76
|
+
:fastq_gz => 'fastq',
|
77
|
+
:sff => 'sff'
|
78
|
+
}
|
79
|
+
style = formats[format_symbol]
|
80
|
+
if style.nil?
|
81
|
+
raise "Unexpected download format detected #{format_symbol}, I need one of '#{formats.keys.join(', ')}'"
|
82
|
+
end
|
83
|
+
return style
|
84
|
+
end
|
85
|
+
|
86
|
+
# Return the URL where a run can be downloaded. Only works if the accession is a run accession e.g. SRR000002 or DRR000002. To get run accessions from other accession type e.g. SRP000002, try Bio::SRA::Sra
|
87
|
+
#
|
88
|
+
# Options:
|
89
|
+
# :source: either :ncbi (default), or :ebi
|
90
|
+
# :format: either :sralite (default if :source if :ncbi), :fastq_gz (default if :source is :ebi), :sra
|
91
|
+
# :layout: either :single (default), :paired1, or :paired2. :paired1 for the first half, :paired2 for the second half. Only required when :source => :ebi, otherwise not used
|
92
|
+
def self.run_download_url(run_accession, options={})
|
93
|
+
options ||= {}
|
94
|
+
options[:source] ||= :ncbi
|
95
|
+
if options[:source] == :ebi
|
96
|
+
options[:format] ||= :fastq
|
97
|
+
options[:layout] ||= :single
|
98
|
+
else
|
99
|
+
options[:format] ||= :sralite #default to sralite
|
100
|
+
end
|
101
|
+
|
102
|
+
|
103
|
+
type = classify_accession_type(run_accession)
|
104
|
+
unless type == Bio::SRA::RUN
|
105
|
+
raise "Unexpected type of accession for '#{run_accession}': Expected #{Bio::SRA::RUN} but was #{type}"
|
106
|
+
end
|
107
|
+
|
108
|
+
style = format_symbol_to_standard_text options[:format]
|
109
|
+
style_extension = format_symbol_to_extension options[:format]
|
110
|
+
|
111
|
+
if options[:source] == :ncbi
|
112
|
+
# e.g.
|
113
|
+
# ftp://ftp-trace.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/litesra/DRR/DRR000/DRR000002/DRR000002/DRR000002.lite.sra
|
114
|
+
[
|
115
|
+
"ftp://ftp-trace.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun",
|
116
|
+
style,
|
117
|
+
run_accession[0..2],
|
118
|
+
run_accession[0..5],
|
119
|
+
run_accession,
|
120
|
+
"#{run_accession}#{style_extension}"
|
121
|
+
].join('/')
|
122
|
+
elsif options[:source] == :ebi
|
123
|
+
unless style == 'fastq'
|
124
|
+
raise "Unexpected format for download detected #{options[:format]} in combination with :source => :ebi. Require :fastq_gz"
|
125
|
+
end
|
126
|
+
ok_layouts = [:single, :paired1, :paired2]
|
127
|
+
unless ok_layouts.include?(options[:layout])
|
128
|
+
raise "Unexpected layout for download detected #{options[:layout]} in combination with :source => :ebi. Require on of #{ok_layouts.join(', ')}."
|
129
|
+
end
|
130
|
+
# e.g. for paired ended
|
131
|
+
# ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR069/SRR069027/SRR069027_1.fastq.gz
|
132
|
+
# e.g. for single end
|
133
|
+
# ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR404/SRR404303/SRR404303.fastq.gz
|
134
|
+
most = [
|
135
|
+
'ftp://ftp.sra.ebi.ac.uk/vol1/',
|
136
|
+
style,
|
137
|
+
run_accession[0..5],
|
138
|
+
run_accession,
|
139
|
+
]
|
140
|
+
if options[:layout] == :single
|
141
|
+
most.push "#{run_accession}#{style_extension}"
|
142
|
+
elsif options[:layout] == :format1
|
143
|
+
most.push "#{run_accession}_1#{style_extension}"
|
144
|
+
elsif options[:layout] == :format2
|
145
|
+
most.push "#{run_accession}_2#{style_extension}"
|
146
|
+
end
|
147
|
+
return most.join('/')
|
148
|
+
end
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|
152
|
+
end
|
@@ -0,0 +1,467 @@
|
|
1
|
+
module Bio
|
2
|
+
module SRA
|
3
|
+
module Tables
|
4
|
+
# > pp Bio::SRA::Tables::SRA.column_names
|
5
|
+
# ["sra_ID",
|
6
|
+
# "SRR_bamFile",
|
7
|
+
# "SRX_bamFile",
|
8
|
+
# "SRX_fastqFTP",
|
9
|
+
# "run_ID",
|
10
|
+
# "run_alias",
|
11
|
+
# "run_accession",
|
12
|
+
# "run_date",
|
13
|
+
# "updated_date",
|
14
|
+
# "spots",
|
15
|
+
# "bases",
|
16
|
+
# "run_center",
|
17
|
+
# "experiment_name",
|
18
|
+
# "run_url_link",
|
19
|
+
# "run_entrez_link",
|
20
|
+
# "run_attribute",
|
21
|
+
# "experiment_ID",
|
22
|
+
# "experiment_alias",
|
23
|
+
# "experiment_accession",
|
24
|
+
# "experiment_title",
|
25
|
+
# "study_name",
|
26
|
+
# "sample_name",
|
27
|
+
# "design_description",
|
28
|
+
# "library_name",
|
29
|
+
# "library_strategy",
|
30
|
+
# "library_source",
|
31
|
+
# "library_selection",
|
32
|
+
# "library_layout",
|
33
|
+
# "library_construction_protocol",
|
34
|
+
# "adapter_spec",
|
35
|
+
# "read_spec",
|
36
|
+
# "platform",
|
37
|
+
# "instrument_model",
|
38
|
+
# "instrument_name",
|
39
|
+
# "platform_parameters",
|
40
|
+
# "sequence_space",
|
41
|
+
# "base_caller",
|
42
|
+
# "quality_scorer",
|
43
|
+
# "number_of_levels",
|
44
|
+
# "multiplier",
|
45
|
+
# "qtype",
|
46
|
+
# "experiment_url_link",
|
47
|
+
# "experiment_entrez_link",
|
48
|
+
# "experiment_attribute",
|
49
|
+
# "sample_ID",
|
50
|
+
# "sample_alias",
|
51
|
+
# "sample_accession",
|
52
|
+
# "taxon_id",
|
53
|
+
# "common_name",
|
54
|
+
# "anonymized_name",
|
55
|
+
# "individual_name",
|
56
|
+
# "description",
|
57
|
+
# "sample_url_link",
|
58
|
+
# "sample_entrez_link",
|
59
|
+
# "sample_attribute",
|
60
|
+
# "study_ID",
|
61
|
+
# "study_alias",
|
62
|
+
# "study_accession",
|
63
|
+
# "study_title",
|
64
|
+
# "study_type",
|
65
|
+
# "study_abstract",
|
66
|
+
# "center_project_name",
|
67
|
+
# "study_description",
|
68
|
+
# "study_url_link",
|
69
|
+
# "study_entrez_link",
|
70
|
+
# "study_attribute",
|
71
|
+
# "related_studies",
|
72
|
+
# "primary_study",
|
73
|
+
# "submission_ID",
|
74
|
+
# "submission_accession",
|
75
|
+
# "submission_comment",
|
76
|
+
# "submission_center",
|
77
|
+
# "submission_lab",
|
78
|
+
# "submission_date"]
|
79
|
+
class SRA < Connection
|
80
|
+
self.table_name = 'sra'
|
81
|
+
self.primary_key = 'sra_ID'
|
82
|
+
|
83
|
+
# Foreign keys
|
84
|
+
belongs_to :submission, :foreign_key => 'submission_ID', :class_name => 'Submission', :primary_key => 'submission_ID'
|
85
|
+
belongs_to :experiment, :foreign_key => 'experiment_ID', :class_name => 'Experiment', :primary_key => 'experiment_ID'
|
86
|
+
belongs_to :study, :foreign_key => 'study_ID', :class_name => 'Study', :primary_key => 'study_ID'
|
87
|
+
belongs_to :sample, :foreign_key => 'sample_ID', :class_name => 'Sample', :primary_key => 'sample_ID'
|
88
|
+
belongs_to :run, :foreign_key => 'run_ID', :class_name => 'Run', :primary_key => 'run_ID'
|
89
|
+
|
90
|
+
# named_scope for finding by an arbitrary SRA accession number e.g.
|
91
|
+
# SRA.accession('SRA049809').all #=> Array of SRA objects that are part of the SRA049809 submission
|
92
|
+
# SRA.accession('SRA049809').first #=> SRA object for the SRR404303 run (there is only 1 since this is a run accession)
|
93
|
+
scope :accession, lambda {|accession|
|
94
|
+
type = Bio::SRA::Accession.classify_accession_type(accession)
|
95
|
+
{:conditions => {"#{type}_accession".to_sym => accession}}
|
96
|
+
}
|
97
|
+
|
98
|
+
# URLs of all the runs in this project
|
99
|
+
def study_download_urls(options = {})
|
100
|
+
SRA.where(:study_accession => study_accession).all.collect do |run|
|
101
|
+
run.download_url(options)
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
# Return the URL where this SRA entry can be downloaded
|
106
|
+
# sraFileDir <- paste("ftp://ftp-trace.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByExp/",
|
107
|
+
# sraType, "/", substring(sra_acc$experiment[i], 1,
|
108
|
+
# 3), "/", substring(sra_acc$experiment[i], 1,
|
109
|
+
# 6), "/", sra_acc$experiment[i], "/", sra_acc$run[i],
|
110
|
+
# "/", sep = "")
|
111
|
+
def download_url(options = {})
|
112
|
+
Bio::SRA::Accession.run_download_url(run_accession, options)
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
# pp Bio::SRA::Tables::Submission.column_names
|
117
|
+
# ["submission_ID",
|
118
|
+
# "submission_alias",
|
119
|
+
# "submission_accession",
|
120
|
+
# "submission_comment",
|
121
|
+
# "files",
|
122
|
+
# "broker_name",
|
123
|
+
# "center_name",
|
124
|
+
# "lab_name",
|
125
|
+
# "submission_date",
|
126
|
+
# "sra_link",
|
127
|
+
# "submission_url_link",
|
128
|
+
# "xref_link",
|
129
|
+
# "submission_entrez_link",
|
130
|
+
# "ddbj_link",
|
131
|
+
# "ena_link",
|
132
|
+
# "submission_attribute",
|
133
|
+
# "sradb_updated"]
|
134
|
+
class Submission < Connection
|
135
|
+
self.table_name = 'submission'
|
136
|
+
self.primary_key = 'submission_ID'
|
137
|
+
has_many :sras, :foreign_key => 'submission_ID', :class_name => 'SRA'
|
138
|
+
end
|
139
|
+
|
140
|
+
# pp Bio::SRA::Tables::Experiment.column_names
|
141
|
+
# ["experiment_ID",
|
142
|
+
# "bamFile",
|
143
|
+
# "fastqFTP",
|
144
|
+
# "experiment_alias",
|
145
|
+
# "experiment_accession",
|
146
|
+
# "broker_name",
|
147
|
+
# "center_name",
|
148
|
+
# "title",
|
149
|
+
# "study_name",
|
150
|
+
# "study_accession",
|
151
|
+
# "design_description",
|
152
|
+
# "sample_name",
|
153
|
+
# "sample_accession",
|
154
|
+
# "sample_member",
|
155
|
+
# "library_name",
|
156
|
+
# "library_strategy",
|
157
|
+
# "library_source",
|
158
|
+
# "library_selection",
|
159
|
+
# "library_layout",
|
160
|
+
# "targeted_loci",
|
161
|
+
# "library_construction_protocol",
|
162
|
+
# "spot_length",
|
163
|
+
# "adapter_spec",
|
164
|
+
# "read_spec",
|
165
|
+
# "platform",
|
166
|
+
# "instrument_model",
|
167
|
+
# "platform_parameters",
|
168
|
+
# "sequence_space",
|
169
|
+
# "base_caller",
|
170
|
+
# "quality_scorer",
|
171
|
+
# "number_of_levels",
|
172
|
+
# "multiplier",
|
173
|
+
# "qtype",
|
174
|
+
# "sra_link",
|
175
|
+
# "experiment_url_link",
|
176
|
+
# "xref_link",
|
177
|
+
# "experiment_entrez_link",
|
178
|
+
# "ddbj_link",
|
179
|
+
# "ena_link",
|
180
|
+
# "experiment_attribute",
|
181
|
+
# "submission_accession",
|
182
|
+
# "sradb_updated"]
|
183
|
+
class Experiment < Connection
|
184
|
+
self.table_name = 'experiment'
|
185
|
+
self.primary_key = 'experiment_ID'
|
186
|
+
has_many :sras, :foreign_key => 'experiment_ID', :class_name => 'SRA'
|
187
|
+
end
|
188
|
+
|
189
|
+
# pp Bio::SRA::Tables::Run.column_names
|
190
|
+
# ["run_ID",
|
191
|
+
# "bamFile",
|
192
|
+
# "run_alias",
|
193
|
+
# "run_accession",
|
194
|
+
# "broker_name",
|
195
|
+
# "instrument_name",
|
196
|
+
# "run_date",
|
197
|
+
# "run_file",
|
198
|
+
# "run_center",
|
199
|
+
# "total_data_blocks",
|
200
|
+
# "experiment_accession",
|
201
|
+
# "experiment_name",
|
202
|
+
# "sra_link",
|
203
|
+
# "run_url_link",
|
204
|
+
# "xref_link",
|
205
|
+
# "run_entrez_link",
|
206
|
+
# "ddbj_link",
|
207
|
+
# "ena_link",
|
208
|
+
# "run_attribute",
|
209
|
+
# "submission_accession",
|
210
|
+
# "sradb_updated"]
|
211
|
+
class Run < Connection
|
212
|
+
self.table_name = 'run'
|
213
|
+
self.primary_key = 'run_ID'
|
214
|
+
has_many :sras, :foreign_key => 'run_ID', :class_name => 'SRA'
|
215
|
+
end
|
216
|
+
|
217
|
+
# pp Bio::SRA::Tables::Sample.column_names
|
218
|
+
# ["sample_ID",
|
219
|
+
# "sample_alias",
|
220
|
+
# "sample_accession",
|
221
|
+
# "broker_name",
|
222
|
+
# "center_name",
|
223
|
+
# "taxon_id",
|
224
|
+
# "scientific_name",
|
225
|
+
# "common_name",
|
226
|
+
# "anonymized_name",
|
227
|
+
# "individual_name",
|
228
|
+
# "description",
|
229
|
+
# "sra_link",
|
230
|
+
# "sample_url_link",
|
231
|
+
# "xref_link",
|
232
|
+
# "sample_entrez_link",
|
233
|
+
# "ddbj_link",
|
234
|
+
# "ena_link",
|
235
|
+
# "sample_attribute",
|
236
|
+
# "submission_accession",
|
237
|
+
# "sradb_updated"]
|
238
|
+
class Sample < Connection
|
239
|
+
self.table_name = 'sample'
|
240
|
+
self.primary_key = 'sample_ID'
|
241
|
+
has_many :sras, :foreign_key => 'sample_ID', :class_name => 'SRA'
|
242
|
+
end
|
243
|
+
|
244
|
+
# pp Bio::SRA::Tables::Study.column_names
|
245
|
+
# ["study_ID",
|
246
|
+
# "study_alias",
|
247
|
+
# "study_accession",
|
248
|
+
# "study_title",
|
249
|
+
# "study_type",
|
250
|
+
# "study_abstract",
|
251
|
+
# "broker_name",
|
252
|
+
# "center_name",
|
253
|
+
# "center_project_name",
|
254
|
+
# "study_description",
|
255
|
+
# "related_studies",
|
256
|
+
# "primary_study",
|
257
|
+
# "sra_link",
|
258
|
+
# "study_url_link",
|
259
|
+
# "xref_link",
|
260
|
+
# "study_entrez_link",
|
261
|
+
# "ddbj_link",
|
262
|
+
# "ena_link",
|
263
|
+
# "study_attribute",
|
264
|
+
# "submission_accession",
|
265
|
+
# "sradb_updated"]
|
266
|
+
class Study < Connection
|
267
|
+
self.table_name = 'study'
|
268
|
+
self.primary_key = 'study_ID'
|
269
|
+
has_many :sras, :foreign_key => 'study_ID', :class_name => 'SRA'
|
270
|
+
end
|
271
|
+
|
272
|
+
# > pp Bio::SRA::Tables::SRAFt.column_names
|
273
|
+
# ["SRR_bamFile",
|
274
|
+
# "SRX_bamFile",
|
275
|
+
# "SRX_fastqFTP",
|
276
|
+
# "run_ID",
|
277
|
+
# "run_alias",
|
278
|
+
# "run_accession",
|
279
|
+
# "run_date",
|
280
|
+
# "updated_date",
|
281
|
+
# "spots",
|
282
|
+
# "bases",
|
283
|
+
# "run_center",
|
284
|
+
# "experiment_name",
|
285
|
+
# "run_url_link",
|
286
|
+
# "run_entrez_link",
|
287
|
+
# "run_attribute",
|
288
|
+
# "experiment_ID",
|
289
|
+
# "experiment_alias",
|
290
|
+
# "experiment_accession",
|
291
|
+
# "experiment_title",
|
292
|
+
# "study_name",
|
293
|
+
# "sample_name",
|
294
|
+
# "design_description",
|
295
|
+
# "library_name",
|
296
|
+
# "library_strategy",
|
297
|
+
# "library_source",
|
298
|
+
# "library_selection",
|
299
|
+
# "library_layout",
|
300
|
+
# "library_construction_protocol",
|
301
|
+
# "adapter_spec",
|
302
|
+
# "read_spec",
|
303
|
+
# "platform",
|
304
|
+
# "instrument_model",
|
305
|
+
# "instrument_name",
|
306
|
+
# "platform_parameters",
|
307
|
+
# "sequence_space",
|
308
|
+
# "base_caller",
|
309
|
+
# "quality_scorer",
|
310
|
+
# "number_of_levels",
|
311
|
+
# "multiplier",
|
312
|
+
# "qtype",
|
313
|
+
# "experiment_url_link",
|
314
|
+
# "experiment_entrez_link",
|
315
|
+
# "experiment_attribute",
|
316
|
+
# "sample_ID",
|
317
|
+
# "sample_alias",
|
318
|
+
# "sample_accession",
|
319
|
+
# "taxon_id",
|
320
|
+
# "common_name",
|
321
|
+
# "anonymized_name",
|
322
|
+
# "individual_name",
|
323
|
+
# "description",
|
324
|
+
# "sample_url_link",
|
325
|
+
# "sample_entrez_link",
|
326
|
+
# "sample_attribute",
|
327
|
+
# "study_ID",
|
328
|
+
# "study_alias",
|
329
|
+
# "study_accession",
|
330
|
+
# "study_title",
|
331
|
+
# "study_type",
|
332
|
+
# "study_abstract",
|
333
|
+
# "center_project_name",
|
334
|
+
# "study_description",
|
335
|
+
# "study_url_link",
|
336
|
+
# "study_entrez_link",
|
337
|
+
# "study_attribute",
|
338
|
+
# "related_studies",
|
339
|
+
# "primary_study",
|
340
|
+
# "submission_ID",
|
341
|
+
# "submission_accession",
|
342
|
+
# "submission_comment",
|
343
|
+
# "submission_center",
|
344
|
+
# "submission_lab",
|
345
|
+
# "submission_date",
|
346
|
+
# "sradb_updated"]
|
347
|
+
class SRAFt < Connection
|
348
|
+
self.table_name = 'sra_ft'
|
349
|
+
end
|
350
|
+
|
351
|
+
# pp Bio::SRA::Tables::SRAFtContent.column_names
|
352
|
+
# ["docid",
|
353
|
+
# "c0SRR_bamFile",
|
354
|
+
# "c1SRX_bamFile",
|
355
|
+
# "c2SRX_fastqFTP",
|
356
|
+
# "c3run_ID",
|
357
|
+
# "c4run_alias",
|
358
|
+
# "c5run_accession",
|
359
|
+
# "c6run_date",
|
360
|
+
# "c7updated_date",
|
361
|
+
# "c8spots",
|
362
|
+
# "c9bases",
|
363
|
+
# "c10run_center",
|
364
|
+
# "c11experiment_name",
|
365
|
+
# "c12run_url_link",
|
366
|
+
# "c13run_entrez_link",
|
367
|
+
# "c14run_attribute",
|
368
|
+
# "c15experiment_ID",
|
369
|
+
# "c16experiment_alias",
|
370
|
+
# "c17experiment_accession",
|
371
|
+
# "c18experiment_title",
|
372
|
+
# "c19study_name",
|
373
|
+
# "c20sample_name",
|
374
|
+
# "c21design_description",
|
375
|
+
# "c22library_name",
|
376
|
+
# "c23library_strategy",
|
377
|
+
# "c24library_source",
|
378
|
+
# "c25library_selection",
|
379
|
+
# "c26library_layout",
|
380
|
+
# "c27library_construction_protocol",
|
381
|
+
# "c28adapter_spec",
|
382
|
+
# "c29read_spec",
|
383
|
+
# "c30platform",
|
384
|
+
# "c31instrument_model",
|
385
|
+
# "c32instrument_name",
|
386
|
+
# "c33platform_parameters",
|
387
|
+
# "c34sequence_space",
|
388
|
+
# "c35base_caller",
|
389
|
+
# "c36quality_scorer",
|
390
|
+
# "c37number_of_levels",
|
391
|
+
# "c38multiplier",
|
392
|
+
# "c39qtype",
|
393
|
+
# "c40experiment_url_link",
|
394
|
+
# "c41experiment_entrez_link",
|
395
|
+
# "c42experiment_attribute",
|
396
|
+
# "c43sample_ID",
|
397
|
+
# "c44sample_alias",
|
398
|
+
# "c45sample_accession",
|
399
|
+
# "c46taxon_id",
|
400
|
+
# "c47common_name",
|
401
|
+
# "c48anonymized_name",
|
402
|
+
# "c49individual_name",
|
403
|
+
# "c50description",
|
404
|
+
# "c51sample_url_link",
|
405
|
+
# "c52sample_entrez_link",
|
406
|
+
# "c53sample_attribute",
|
407
|
+
# "c54study_ID",
|
408
|
+
# "c55study_alias",
|
409
|
+
# "c56study_accession",
|
410
|
+
# "c57study_title",
|
411
|
+
# "c58study_type",
|
412
|
+
# "c59study_abstract",
|
413
|
+
# "c60center_project_name",
|
414
|
+
# "c61study_description",
|
415
|
+
# "c62study_url_link",
|
416
|
+
# "c63study_entrez_link",
|
417
|
+
# "c64study_attribute",
|
418
|
+
# "c65related_studies",
|
419
|
+
# "c66primary_study",
|
420
|
+
# "c67submission_ID",
|
421
|
+
# "c68submission_accession",
|
422
|
+
# "c69submission_comment",
|
423
|
+
# "c70submission_center",
|
424
|
+
# "c71submission_lab",
|
425
|
+
# "c72submission_date",
|
426
|
+
# "c73sradb_updated"]
|
427
|
+
class SRAFtContent < Connection
|
428
|
+
self.table_name = 'sra_ft_content'
|
429
|
+
end
|
430
|
+
|
431
|
+
# pp Bio::SRA::Tables::SRAFtSegDir.column_names
|
432
|
+
# ["level", "idx", "start_block", "leaves_end_block", "end_block", "root"]
|
433
|
+
class SRAFtSegDir < Connection
|
434
|
+
self.table_name = 'sra_ft_segdir'
|
435
|
+
end
|
436
|
+
|
437
|
+
# pp Bio::SRA::Tables::SRAFtSegments.column_names
|
438
|
+
# ["blockid", "block"]
|
439
|
+
class SRAFtSegments < Connection
|
440
|
+
self.table_name = 'sra_ft_segments'
|
441
|
+
end
|
442
|
+
|
443
|
+
# pp Bio::SRA::Tables::MetaInfo.column_names
|
444
|
+
# ["name", "value"]
|
445
|
+
class MetaInfo < Connection
|
446
|
+
self.table_name = 'metaInfo'
|
447
|
+
end
|
448
|
+
|
449
|
+
# This table holds information about each of the columns
|
450
|
+
# in this SRAmetadb database
|
451
|
+
#
|
452
|
+
# pp Bio::SRA::Tables::ColDesc.column_names
|
453
|
+
# ["col_desc_ID",
|
454
|
+
# "table_name",
|
455
|
+
# "field_name",
|
456
|
+
# "type",
|
457
|
+
# "description",
|
458
|
+
# "value_list",
|
459
|
+
# "sradb_updated"]
|
460
|
+
class ColDesc < Connection
|
461
|
+
self.table_name = 'col_desc'
|
462
|
+
self.primary_key = 'col_desc_ID'
|
463
|
+
self.inheritance_column = nil
|
464
|
+
end
|
465
|
+
end
|
466
|
+
end
|
467
|
+
end
|