bio-sra 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.0
@@ -0,0 +1,170 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'optparse'
4
+ require 'bio-logger'
5
+ require 'csv'
6
+ require 'bio-commandeer'
7
+
8
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
9
+ require 'bio-sra'
10
+
11
+ SCRIPT_NAME = File.basename(__FILE__); LOG_NAME = SCRIPT_NAME.gsub('.rb','')
12
+
13
+ # Parse command line options into the options hash
14
+ options = {
15
+ :logger => 'stderr',
16
+ :format => :sra,
17
+ :accessions_file => nil,
18
+ :download_all_from_study => false,
19
+ :treat_input_as_runs => false,
20
+ }
21
+ o = OptionParser.new do |opts|
22
+ opts.banner = "
23
+ Usage: #{SCRIPT_NAME} <SRA_ACCESSION>
24
+
25
+ Download data from SRA \n"
26
+
27
+ opts.on('-f', "--file FILENAME", "Provide a file of accession numbers, separated by whitespace or commas [default: not used, use the first argument <SRA_ACCESSION>]") do |f|
28
+ options[:accessions_file] = f
29
+ end
30
+ opts.on("--format FORMAT", "format for download [default: 'sra']") do |f|
31
+ format_string_to_sym = {
32
+ 'sralite' => :sralite, # no longer supported by NCBI?
33
+ 'sra' => :sra,
34
+ }
35
+ options[:format] = format_string_to_sym[f]
36
+
37
+ if options[:format].nil?
38
+ raise "Unexpected file format specified '#{f}'. I require one of #{format_string_to_sym.keys.join(', ') }"
39
+ end
40
+ end
41
+ opts.on("--dry-run", "Don't download any instead print the URLs to download to the command line [default: not this, do download]") do
42
+ options[:dry_run] = true
43
+ end
44
+
45
+ # logger options
46
+ opts.separator "\nVerbosity:\n\n"
47
+ opts.on("-q", "--quiet", "Run quietly, set logging to ERROR level [default INFO]") do |q|
48
+ Bio::Log::CLI.trace('error')
49
+ end
50
+ opts.on("--logger filename",String,"Log to file [default #{options[:logger] }]") do | name |
51
+ options[:logger] = name
52
+ end
53
+ opts.on("--trace options",String,"Set log level [default INFO]. e.g. '--trace debug' to set logging level to DEBUG") do | s |
54
+ Bio::Log::CLI.trace(s)
55
+ end
56
+ end
57
+ o.parse!
58
+
59
+ if options[:accessions_file].nil? and ARGV.length == 0
60
+ $stderr.puts o
61
+ exit 1
62
+ end
63
+
64
+ # Setup logging
65
+ Bio::Log::CLI.logger(options[:logger]) #bio-logger defaults to STDERR not STDOUT, I disagree
66
+ log = Bio::Log::LoggerPlus.new(LOG_NAME)
67
+ Bio::Log::CLI.configure(LOG_NAME)
68
+
69
+ almost_accessions = nil
70
+ if options[:accessions_file]
71
+ log.debug "Reading SRA accessions from file #{options[:accessions_file] }"
72
+ almost_accessions = File.open(options[:accessions_file]).read.split(/[\s,]+/)
73
+ else
74
+ almost_accessions = ARGV.collect{|r| r.split(/[\s,]+/)}.flatten
75
+ end
76
+ # Remove empty strings and extra digits at the end e.g. SRA029325.1 => SRA029325
77
+ accessions = almost_accessions.reject{|a| a==''}.collect{|a| a.gsub(/\.\d+$/,'')}
78
+ log.info "Read in #{accessions.length} accessions"
79
+
80
+ # Do we need to connect to the database? Only yes if there are accessions that are not runs
81
+ options[:treat_input_as_runs] = true
82
+ accessions.each do |acc|
83
+ unless Bio::SRA::Accession.classify_accession_type(acc) == Bio::SRA::RUN
84
+ log.debug "Found accession number #{acc} that does not appear to be a run accession, so need to connect to database" if log.debug?
85
+ options[:treat_input_as_runs] = false
86
+ break
87
+ end
88
+ end
89
+
90
+ # Connect to the database if required
91
+ log.info "Connecting to database.."
92
+ Bio::SRA::Connection.connect unless options[:treat_input_as_runs]
93
+
94
+ log.info "Collecting a list of runs to download.."
95
+ runs = []
96
+ accessions.each do |acc|
97
+ if options[:treat_input_as_runs]
98
+ runs.push [acc, acc]
99
+ else
100
+ acc_type = Bio::SRA::Accession.classify_accession_type(acc)
101
+
102
+ # Convert Run ActiveRecords into simple accessions
103
+ sra_runs = case acc_type
104
+ when Bio::SRA::RUN then
105
+ Bio::SRA::Tables::SRA.where(:run_accession => acc)
106
+ when Bio::SRA::EXPERIMENT then
107
+ Bio::SRA::Tables::SRA.where(:experiment_accession => acc)
108
+ when Bio::SRA::SAMPLE then
109
+ Bio::SRA::Tables::SRA.where(:sample_accession => acc)
110
+ when Bio::SRA::STUDY then
111
+ Bio::SRA::Tables::SRA.where(:study_accession => acc)
112
+ when Bio::SRA::SUBMISSION then
113
+ Bio::SRA::Tables::SRA.where(:submission_accession => acc)
114
+ else
115
+ raise "Programming error: unexpected accession type: #{acc_type}"
116
+ end
117
+
118
+ if sra_runs.empty?
119
+ log.warn "Unable to find accession number #{acc} in the metadata database, skipping"
120
+ next
121
+ else
122
+ log.debug "Found #{sra_runs.length} runs to download for accession number #{acc}"
123
+ end
124
+
125
+ sra_runs.each do |r|
126
+ runs.push [r.run_accession, acc]
127
+ end
128
+ end
129
+ end
130
+
131
+ # Remove duplicate runs
132
+ runs.uniq! do |run_acc|
133
+ run_acc[0]
134
+ end
135
+ log.info "Found #{runs.length} unique run files to download, downloading them now.."
136
+ num_skipped = 0
137
+ num_downloaded = 0
138
+ runs.each_with_index do |run_acc, index|
139
+ run = run_acc[0]
140
+ acc = run_acc[1]
141
+
142
+ download_path = "#{run}.#{options[:format] }"
143
+ log.debug "Downloading to #{download_path}"
144
+
145
+ if File.exist?(download_path)
146
+ log.debug "Skipping download of run #{download_path} since a file of that accession already exists"
147
+ num_skipped += 1
148
+ next
149
+ end
150
+
151
+ if run == acc
152
+ log.info "Downloading run #{run} (#{index+1}/#{runs.length})"
153
+ else
154
+ log.info "Downloading run #{run} from #{acc} (#{index+1}/#{runs.length})"
155
+ end
156
+
157
+ url = Bio::SRA::Accession.run_download_url(run, :format => options[:format])
158
+
159
+ num_downloaded += 1
160
+ if options[:dry_run]
161
+ puts url
162
+ else
163
+ `wget #{url.inspect}`
164
+ end
165
+ end
166
+ if options[:dry_run]
167
+ log.info "Finished printing #{num_downloaded} URLs, ignoring #{num_skipped} already downloaded"
168
+ else
169
+ log.info "Finished downloading #{num_downloaded}, ignoring #{num_skipped} already downloaded"
170
+ end
@@ -0,0 +1,7 @@
1
+ # SQLite version 3.x
2
+ # gem install sqlite3
3
+ default:
4
+ adapter: sqlite3
5
+ database: db/SRAmetadb.sqlite
6
+ pool: 5
7
+ timeout: 5000
@@ -0,0 +1,19 @@
1
+ # Please require your code below, respecting the naming conventions in the
2
+ # bioruby directory tree.
3
+ #
4
+ # For example, say you have a plugin named bio-plugin, the only uncommented
5
+ # line in this file would be
6
+ #
7
+ # require 'bio/bio-plugin/plugin'
8
+ #
9
+ # In this file only require other files. Avoid other source code.
10
+ require 'active_record'
11
+
12
+ require 'bio-logger'
13
+ Bio::Log::LoggerPlus.new('bio-sra')
14
+
15
+ require 'bio/sra/connect'
16
+ require 'bio/sra/sra'
17
+ require 'bio/sra/tables'
18
+
19
+
@@ -0,0 +1,39 @@
1
+ module Bio
2
+ module SRA
3
+ def self.connect
4
+ Connection.connect
5
+ end
6
+
7
+ class Connection < ActiveRecord::Base
8
+ self.abstract_class = true
9
+
10
+ # Connect to a metadata database.
11
+ #
12
+ # * sra_metadb_path: a path to the gunzipped SRAmetadb.sqlite file which is the database. By default this is in the db/ directory of this gem, but that probably isn't where the db file is.
13
+ #
14
+ # You can download the file like so:
15
+ #
16
+ # $ wget http://watson.nci.nih.gov/~zhujack/SRAmetadb.sqlite.gz
17
+ # # gunzip SRAmetadb.sqlite.gz
18
+ def self.connect(sra_metadb_path=File.join(File.dirname(__FILE__),'..','..','..','db','SRAmetadb.sqlite'))
19
+ log = Bio::Log::LoggerPlus['bio-sra']
20
+ log.info "Attempting to connect to database #{sra_metadb_path}"
21
+
22
+ # default:
23
+ # adapter: sqlite3
24
+ # database: db/SRAmetadb.sqlite
25
+ # pool: 5
26
+ # timeout: 5000
27
+
28
+ options = {
29
+ :adapter => 'sqlite3',
30
+ :database => sra_metadb_path,
31
+ :pool => 5,
32
+ :timeout => 5000,
33
+ }
34
+
35
+ establish_connection(options)
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,152 @@
1
+ module Bio
2
+ module SRA
3
+ SUBMISSION = 'submission'
4
+ STUDY = 'study'
5
+ SAMPLE = 'sample'
6
+ EXPERIMENT = 'experiment'
7
+ RUN = 'run'
8
+
9
+ class Accession
10
+ @@log = Bio::Log::LoggerPlus['bio-sra']
11
+
12
+ # valid_in_type <- c(SRA = "submission", ERA = "submission",
13
+ # DRA = "submission", SRP = "study", ERP = "study", DRP = "study",
14
+ # SRS = "sample", ERS = "sample", DRS = "sample", SRX = "experiment",
15
+ # ERX = "experiment", DRX = "experiment", SRR = "run",
16
+ # ERR = "run", DRR = "run")
17
+ ACCESSION_TO_TYPE = {
18
+ 'SRA' => Bio::SRA::SUBMISSION,
19
+ 'ERA' => Bio::SRA::SUBMISSION,
20
+ 'DRA' => Bio::SRA::SUBMISSION,
21
+ 'SRP' => Bio::SRA::STUDY,
22
+ 'ERP' => Bio::SRA::STUDY,
23
+ 'DRP' => Bio::SRA::STUDY,
24
+ 'SRS' => Bio::SRA::SAMPLE,
25
+ 'ERS' => Bio::SRA::SAMPLE,
26
+ 'DRS' => Bio::SRA::SAMPLE,
27
+ 'SRX' => Bio::SRA::EXPERIMENT,
28
+ 'ERX' => Bio::SRA::EXPERIMENT,
29
+ 'DRX' => Bio::SRA::EXPERIMENT,
30
+ 'SRR' => Bio::SRA::RUN,
31
+ 'ERR' => Bio::SRA::RUN,
32
+ 'DRR' => Bio::SRA::RUN,
33
+ }
34
+
35
+ TYPE_TO_COLUMN = {
36
+ Bio::SRA::SUBMISSION => :submission_accession,
37
+ Bio::SRA::STUDY => :study_accession,
38
+ Bio::SRA::SAMPLE => :sample_accession,
39
+ Bio::SRA::EXPERIMENT => :experiment_accession,
40
+ Bio::SRA::RUN => :run_accession,
41
+ }
42
+
43
+ def self.classify_accession_type(accession)
44
+ type = ACCESSION_TO_TYPE[accession[0..2]]
45
+ if type.nil?
46
+ raise "Unrecognised accession string '#{accession}'"
47
+ end
48
+ @@log.debug "Classified #{accession} as SRA type '#{type}'" if @@log.debug?
49
+ return type
50
+ end
51
+
52
+ # Given an accession, return the column name it in the SRA table that contains it as a symbol.
53
+ # e.g. accession_to_column_name('SRR617581') => :run_accession
54
+ def self.accession_to_column_name(accession)
55
+ TYPE_TO_COLUMN[classify_accession_type(accession)]
56
+ end
57
+
58
+ def self.format_symbol_to_extension(format_symbol)
59
+ non_standard_extensions = {
60
+ :sralite => '.lite.sra',
61
+ :fastq_gz => '.fastq.gz',
62
+ }
63
+ style = format_symbol_to_standard_text format_symbol
64
+
65
+ # Default extension is the same as the format
66
+ style_extension = non_standard_extensions[format_symbol]
67
+ style_extension ||= ".#{style}"
68
+
69
+ return style_extension
70
+ end
71
+
72
+ def self.format_symbol_to_standard_text(format_symbol)
73
+ formats = {
74
+ :sralite => 'litesra',
75
+ :sra => 'sra',
76
+ :fastq_gz => 'fastq',
77
+ :sff => 'sff'
78
+ }
79
+ style = formats[format_symbol]
80
+ if style.nil?
81
+ raise "Unexpected download format detected #{format_symbol}, I need one of '#{formats.keys.join(', ')}'"
82
+ end
83
+ return style
84
+ end
85
+
86
+ # Return the URL where a run can be downloaded. Only works if the accession is a run accession e.g. SRR000002 or DRR000002. To get run accessions from other accession type e.g. SRP000002, try Bio::SRA::Sra
87
+ #
88
+ # Options:
89
+ # :source: either :ncbi (default), or :ebi
90
+ # :format: either :sralite (default if :source if :ncbi), :fastq_gz (default if :source is :ebi), :sra
91
+ # :layout: either :single (default), :paired1, or :paired2. :paired1 for the first half, :paired2 for the second half. Only required when :source => :ebi, otherwise not used
92
+ def self.run_download_url(run_accession, options={})
93
+ options ||= {}
94
+ options[:source] ||= :ncbi
95
+ if options[:source] == :ebi
96
+ options[:format] ||= :fastq
97
+ options[:layout] ||= :single
98
+ else
99
+ options[:format] ||= :sralite #default to sralite
100
+ end
101
+
102
+
103
+ type = classify_accession_type(run_accession)
104
+ unless type == Bio::SRA::RUN
105
+ raise "Unexpected type of accession for '#{run_accession}': Expected #{Bio::SRA::RUN} but was #{type}"
106
+ end
107
+
108
+ style = format_symbol_to_standard_text options[:format]
109
+ style_extension = format_symbol_to_extension options[:format]
110
+
111
+ if options[:source] == :ncbi
112
+ # e.g.
113
+ # ftp://ftp-trace.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/litesra/DRR/DRR000/DRR000002/DRR000002/DRR000002.lite.sra
114
+ [
115
+ "ftp://ftp-trace.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun",
116
+ style,
117
+ run_accession[0..2],
118
+ run_accession[0..5],
119
+ run_accession,
120
+ "#{run_accession}#{style_extension}"
121
+ ].join('/')
122
+ elsif options[:source] == :ebi
123
+ unless style == 'fastq'
124
+ raise "Unexpected format for download detected #{options[:format]} in combination with :source => :ebi. Require :fastq_gz"
125
+ end
126
+ ok_layouts = [:single, :paired1, :paired2]
127
+ unless ok_layouts.include?(options[:layout])
128
+ raise "Unexpected layout for download detected #{options[:layout]} in combination with :source => :ebi. Require on of #{ok_layouts.join(', ')}."
129
+ end
130
+ # e.g. for paired ended
131
+ # ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR069/SRR069027/SRR069027_1.fastq.gz
132
+ # e.g. for single end
133
+ # ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR404/SRR404303/SRR404303.fastq.gz
134
+ most = [
135
+ 'ftp://ftp.sra.ebi.ac.uk/vol1/',
136
+ style,
137
+ run_accession[0..5],
138
+ run_accession,
139
+ ]
140
+ if options[:layout] == :single
141
+ most.push "#{run_accession}#{style_extension}"
142
+ elsif options[:layout] == :format1
143
+ most.push "#{run_accession}_1#{style_extension}"
144
+ elsif options[:layout] == :format2
145
+ most.push "#{run_accession}_2#{style_extension}"
146
+ end
147
+ return most.join('/')
148
+ end
149
+ end
150
+ end
151
+ end
152
+ end
@@ -0,0 +1,467 @@
1
+ module Bio
2
+ module SRA
3
+ module Tables
4
+ # > pp Bio::SRA::Tables::SRA.column_names
5
+ # ["sra_ID",
6
+ # "SRR_bamFile",
7
+ # "SRX_bamFile",
8
+ # "SRX_fastqFTP",
9
+ # "run_ID",
10
+ # "run_alias",
11
+ # "run_accession",
12
+ # "run_date",
13
+ # "updated_date",
14
+ # "spots",
15
+ # "bases",
16
+ # "run_center",
17
+ # "experiment_name",
18
+ # "run_url_link",
19
+ # "run_entrez_link",
20
+ # "run_attribute",
21
+ # "experiment_ID",
22
+ # "experiment_alias",
23
+ # "experiment_accession",
24
+ # "experiment_title",
25
+ # "study_name",
26
+ # "sample_name",
27
+ # "design_description",
28
+ # "library_name",
29
+ # "library_strategy",
30
+ # "library_source",
31
+ # "library_selection",
32
+ # "library_layout",
33
+ # "library_construction_protocol",
34
+ # "adapter_spec",
35
+ # "read_spec",
36
+ # "platform",
37
+ # "instrument_model",
38
+ # "instrument_name",
39
+ # "platform_parameters",
40
+ # "sequence_space",
41
+ # "base_caller",
42
+ # "quality_scorer",
43
+ # "number_of_levels",
44
+ # "multiplier",
45
+ # "qtype",
46
+ # "experiment_url_link",
47
+ # "experiment_entrez_link",
48
+ # "experiment_attribute",
49
+ # "sample_ID",
50
+ # "sample_alias",
51
+ # "sample_accession",
52
+ # "taxon_id",
53
+ # "common_name",
54
+ # "anonymized_name",
55
+ # "individual_name",
56
+ # "description",
57
+ # "sample_url_link",
58
+ # "sample_entrez_link",
59
+ # "sample_attribute",
60
+ # "study_ID",
61
+ # "study_alias",
62
+ # "study_accession",
63
+ # "study_title",
64
+ # "study_type",
65
+ # "study_abstract",
66
+ # "center_project_name",
67
+ # "study_description",
68
+ # "study_url_link",
69
+ # "study_entrez_link",
70
+ # "study_attribute",
71
+ # "related_studies",
72
+ # "primary_study",
73
+ # "submission_ID",
74
+ # "submission_accession",
75
+ # "submission_comment",
76
+ # "submission_center",
77
+ # "submission_lab",
78
+ # "submission_date"]
79
+ class SRA < Connection
80
+ self.table_name = 'sra'
81
+ self.primary_key = 'sra_ID'
82
+
83
+ # Foreign keys
84
+ belongs_to :submission, :foreign_key => 'submission_ID', :class_name => 'Submission', :primary_key => 'submission_ID'
85
+ belongs_to :experiment, :foreign_key => 'experiment_ID', :class_name => 'Experiment', :primary_key => 'experiment_ID'
86
+ belongs_to :study, :foreign_key => 'study_ID', :class_name => 'Study', :primary_key => 'study_ID'
87
+ belongs_to :sample, :foreign_key => 'sample_ID', :class_name => 'Sample', :primary_key => 'sample_ID'
88
+ belongs_to :run, :foreign_key => 'run_ID', :class_name => 'Run', :primary_key => 'run_ID'
89
+
90
+ # named_scope for finding by an arbitrary SRA accession number e.g.
91
+ # SRA.accession('SRA049809').all #=> Array of SRA objects that are part of the SRA049809 submission
92
+ # SRA.accession('SRA049809').first #=> SRA object for the SRR404303 run (there is only 1 since this is a run accession)
93
+ scope :accession, lambda {|accession|
94
+ type = Bio::SRA::Accession.classify_accession_type(accession)
95
+ {:conditions => {"#{type}_accession".to_sym => accession}}
96
+ }
97
+
98
+ # URLs of all the runs in this project
99
+ def study_download_urls(options = {})
100
+ SRA.where(:study_accession => study_accession).all.collect do |run|
101
+ run.download_url(options)
102
+ end
103
+ end
104
+
105
+ # Return the URL where this SRA entry can be downloaded
106
+ # sraFileDir <- paste("ftp://ftp-trace.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByExp/",
107
+ # sraType, "/", substring(sra_acc$experiment[i], 1,
108
+ # 3), "/", substring(sra_acc$experiment[i], 1,
109
+ # 6), "/", sra_acc$experiment[i], "/", sra_acc$run[i],
110
+ # "/", sep = "")
111
+ def download_url(options = {})
112
+ Bio::SRA::Accession.run_download_url(run_accession, options)
113
+ end
114
+ end
115
+
116
+ # pp Bio::SRA::Tables::Submission.column_names
117
+ # ["submission_ID",
118
+ # "submission_alias",
119
+ # "submission_accession",
120
+ # "submission_comment",
121
+ # "files",
122
+ # "broker_name",
123
+ # "center_name",
124
+ # "lab_name",
125
+ # "submission_date",
126
+ # "sra_link",
127
+ # "submission_url_link",
128
+ # "xref_link",
129
+ # "submission_entrez_link",
130
+ # "ddbj_link",
131
+ # "ena_link",
132
+ # "submission_attribute",
133
+ # "sradb_updated"]
134
+ class Submission < Connection
135
+ self.table_name = 'submission'
136
+ self.primary_key = 'submission_ID'
137
+ has_many :sras, :foreign_key => 'submission_ID', :class_name => 'SRA'
138
+ end
139
+
140
+ # pp Bio::SRA::Tables::Experiment.column_names
141
+ # ["experiment_ID",
142
+ # "bamFile",
143
+ # "fastqFTP",
144
+ # "experiment_alias",
145
+ # "experiment_accession",
146
+ # "broker_name",
147
+ # "center_name",
148
+ # "title",
149
+ # "study_name",
150
+ # "study_accession",
151
+ # "design_description",
152
+ # "sample_name",
153
+ # "sample_accession",
154
+ # "sample_member",
155
+ # "library_name",
156
+ # "library_strategy",
157
+ # "library_source",
158
+ # "library_selection",
159
+ # "library_layout",
160
+ # "targeted_loci",
161
+ # "library_construction_protocol",
162
+ # "spot_length",
163
+ # "adapter_spec",
164
+ # "read_spec",
165
+ # "platform",
166
+ # "instrument_model",
167
+ # "platform_parameters",
168
+ # "sequence_space",
169
+ # "base_caller",
170
+ # "quality_scorer",
171
+ # "number_of_levels",
172
+ # "multiplier",
173
+ # "qtype",
174
+ # "sra_link",
175
+ # "experiment_url_link",
176
+ # "xref_link",
177
+ # "experiment_entrez_link",
178
+ # "ddbj_link",
179
+ # "ena_link",
180
+ # "experiment_attribute",
181
+ # "submission_accession",
182
+ # "sradb_updated"]
183
+ class Experiment < Connection
184
+ self.table_name = 'experiment'
185
+ self.primary_key = 'experiment_ID'
186
+ has_many :sras, :foreign_key => 'experiment_ID', :class_name => 'SRA'
187
+ end
188
+
189
+ # pp Bio::SRA::Tables::Run.column_names
190
+ # ["run_ID",
191
+ # "bamFile",
192
+ # "run_alias",
193
+ # "run_accession",
194
+ # "broker_name",
195
+ # "instrument_name",
196
+ # "run_date",
197
+ # "run_file",
198
+ # "run_center",
199
+ # "total_data_blocks",
200
+ # "experiment_accession",
201
+ # "experiment_name",
202
+ # "sra_link",
203
+ # "run_url_link",
204
+ # "xref_link",
205
+ # "run_entrez_link",
206
+ # "ddbj_link",
207
+ # "ena_link",
208
+ # "run_attribute",
209
+ # "submission_accession",
210
+ # "sradb_updated"]
211
+ class Run < Connection
212
+ self.table_name = 'run'
213
+ self.primary_key = 'run_ID'
214
+ has_many :sras, :foreign_key => 'run_ID', :class_name => 'SRA'
215
+ end
216
+
217
+ # pp Bio::SRA::Tables::Sample.column_names
218
+ # ["sample_ID",
219
+ # "sample_alias",
220
+ # "sample_accession",
221
+ # "broker_name",
222
+ # "center_name",
223
+ # "taxon_id",
224
+ # "scientific_name",
225
+ # "common_name",
226
+ # "anonymized_name",
227
+ # "individual_name",
228
+ # "description",
229
+ # "sra_link",
230
+ # "sample_url_link",
231
+ # "xref_link",
232
+ # "sample_entrez_link",
233
+ # "ddbj_link",
234
+ # "ena_link",
235
+ # "sample_attribute",
236
+ # "submission_accession",
237
+ # "sradb_updated"]
238
+ class Sample < Connection
239
+ self.table_name = 'sample'
240
+ self.primary_key = 'sample_ID'
241
+ has_many :sras, :foreign_key => 'sample_ID', :class_name => 'SRA'
242
+ end
243
+
244
+ # pp Bio::SRA::Tables::Study.column_names
245
+ # ["study_ID",
246
+ # "study_alias",
247
+ # "study_accession",
248
+ # "study_title",
249
+ # "study_type",
250
+ # "study_abstract",
251
+ # "broker_name",
252
+ # "center_name",
253
+ # "center_project_name",
254
+ # "study_description",
255
+ # "related_studies",
256
+ # "primary_study",
257
+ # "sra_link",
258
+ # "study_url_link",
259
+ # "xref_link",
260
+ # "study_entrez_link",
261
+ # "ddbj_link",
262
+ # "ena_link",
263
+ # "study_attribute",
264
+ # "submission_accession",
265
+ # "sradb_updated"]
266
+ class Study < Connection
267
+ self.table_name = 'study'
268
+ self.primary_key = 'study_ID'
269
+ has_many :sras, :foreign_key => 'study_ID', :class_name => 'SRA'
270
+ end
271
+
272
+ # > pp Bio::SRA::Tables::SRAFt.column_names
273
+ # ["SRR_bamFile",
274
+ # "SRX_bamFile",
275
+ # "SRX_fastqFTP",
276
+ # "run_ID",
277
+ # "run_alias",
278
+ # "run_accession",
279
+ # "run_date",
280
+ # "updated_date",
281
+ # "spots",
282
+ # "bases",
283
+ # "run_center",
284
+ # "experiment_name",
285
+ # "run_url_link",
286
+ # "run_entrez_link",
287
+ # "run_attribute",
288
+ # "experiment_ID",
289
+ # "experiment_alias",
290
+ # "experiment_accession",
291
+ # "experiment_title",
292
+ # "study_name",
293
+ # "sample_name",
294
+ # "design_description",
295
+ # "library_name",
296
+ # "library_strategy",
297
+ # "library_source",
298
+ # "library_selection",
299
+ # "library_layout",
300
+ # "library_construction_protocol",
301
+ # "adapter_spec",
302
+ # "read_spec",
303
+ # "platform",
304
+ # "instrument_model",
305
+ # "instrument_name",
306
+ # "platform_parameters",
307
+ # "sequence_space",
308
+ # "base_caller",
309
+ # "quality_scorer",
310
+ # "number_of_levels",
311
+ # "multiplier",
312
+ # "qtype",
313
+ # "experiment_url_link",
314
+ # "experiment_entrez_link",
315
+ # "experiment_attribute",
316
+ # "sample_ID",
317
+ # "sample_alias",
318
+ # "sample_accession",
319
+ # "taxon_id",
320
+ # "common_name",
321
+ # "anonymized_name",
322
+ # "individual_name",
323
+ # "description",
324
+ # "sample_url_link",
325
+ # "sample_entrez_link",
326
+ # "sample_attribute",
327
+ # "study_ID",
328
+ # "study_alias",
329
+ # "study_accession",
330
+ # "study_title",
331
+ # "study_type",
332
+ # "study_abstract",
333
+ # "center_project_name",
334
+ # "study_description",
335
+ # "study_url_link",
336
+ # "study_entrez_link",
337
+ # "study_attribute",
338
+ # "related_studies",
339
+ # "primary_study",
340
+ # "submission_ID",
341
+ # "submission_accession",
342
+ # "submission_comment",
343
+ # "submission_center",
344
+ # "submission_lab",
345
+ # "submission_date",
346
+ # "sradb_updated"]
347
+ class SRAFt < Connection
348
+ self.table_name = 'sra_ft'
349
+ end
350
+
351
+ # pp Bio::SRA::Tables::SRAFtContent.column_names
352
+ # ["docid",
353
+ # "c0SRR_bamFile",
354
+ # "c1SRX_bamFile",
355
+ # "c2SRX_fastqFTP",
356
+ # "c3run_ID",
357
+ # "c4run_alias",
358
+ # "c5run_accession",
359
+ # "c6run_date",
360
+ # "c7updated_date",
361
+ # "c8spots",
362
+ # "c9bases",
363
+ # "c10run_center",
364
+ # "c11experiment_name",
365
+ # "c12run_url_link",
366
+ # "c13run_entrez_link",
367
+ # "c14run_attribute",
368
+ # "c15experiment_ID",
369
+ # "c16experiment_alias",
370
+ # "c17experiment_accession",
371
+ # "c18experiment_title",
372
+ # "c19study_name",
373
+ # "c20sample_name",
374
+ # "c21design_description",
375
+ # "c22library_name",
376
+ # "c23library_strategy",
377
+ # "c24library_source",
378
+ # "c25library_selection",
379
+ # "c26library_layout",
380
+ # "c27library_construction_protocol",
381
+ # "c28adapter_spec",
382
+ # "c29read_spec",
383
+ # "c30platform",
384
+ # "c31instrument_model",
385
+ # "c32instrument_name",
386
+ # "c33platform_parameters",
387
+ # "c34sequence_space",
388
+ # "c35base_caller",
389
+ # "c36quality_scorer",
390
+ # "c37number_of_levels",
391
+ # "c38multiplier",
392
+ # "c39qtype",
393
+ # "c40experiment_url_link",
394
+ # "c41experiment_entrez_link",
395
+ # "c42experiment_attribute",
396
+ # "c43sample_ID",
397
+ # "c44sample_alias",
398
+ # "c45sample_accession",
399
+ # "c46taxon_id",
400
+ # "c47common_name",
401
+ # "c48anonymized_name",
402
+ # "c49individual_name",
403
+ # "c50description",
404
+ # "c51sample_url_link",
405
+ # "c52sample_entrez_link",
406
+ # "c53sample_attribute",
407
+ # "c54study_ID",
408
+ # "c55study_alias",
409
+ # "c56study_accession",
410
+ # "c57study_title",
411
+ # "c58study_type",
412
+ # "c59study_abstract",
413
+ # "c60center_project_name",
414
+ # "c61study_description",
415
+ # "c62study_url_link",
416
+ # "c63study_entrez_link",
417
+ # "c64study_attribute",
418
+ # "c65related_studies",
419
+ # "c66primary_study",
420
+ # "c67submission_ID",
421
+ # "c68submission_accession",
422
+ # "c69submission_comment",
423
+ # "c70submission_center",
424
+ # "c71submission_lab",
425
+ # "c72submission_date",
426
+ # "c73sradb_updated"]
427
+ class SRAFtContent < Connection
428
+ self.table_name = 'sra_ft_content'
429
+ end
430
+
431
+ # pp Bio::SRA::Tables::SRAFtSegDir.column_names
432
+ # ["level", "idx", "start_block", "leaves_end_block", "end_block", "root"]
433
+ class SRAFtSegDir < Connection
434
+ self.table_name = 'sra_ft_segdir'
435
+ end
436
+
437
+ # pp Bio::SRA::Tables::SRAFtSegments.column_names
438
+ # ["blockid", "block"]
439
+ class SRAFtSegments < Connection
440
+ self.table_name = 'sra_ft_segments'
441
+ end
442
+
443
+ # pp Bio::SRA::Tables::MetaInfo.column_names
444
+ # ["name", "value"]
445
+ class MetaInfo < Connection
446
+ self.table_name = 'metaInfo'
447
+ end
448
+
449
+ # This table holds information about each of the columns
450
+ # in this SRAmetadb database
451
+ #
452
+ # pp Bio::SRA::Tables::ColDesc.column_names
453
+ # ["col_desc_ID",
454
+ # "table_name",
455
+ # "field_name",
456
+ # "type",
457
+ # "description",
458
+ # "value_list",
459
+ # "sradb_updated"]
460
+ class ColDesc < Connection
461
+ self.table_name = 'col_desc'
462
+ self.primary_key = 'col_desc_ID'
463
+ self.inheritance_column = nil
464
+ end
465
+ end
466
+ end
467
+ end