solvebio 1.6.1 → 1.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.bumpversion.cfg +6 -0
- data/.gitignore +5 -4
- data/.travis.yml +1 -1
- data/Gemfile +3 -0
- data/README.md +34 -34
- data/Rakefile +1 -18
- data/bin/solvebio.rb +14 -16
- data/installer +64 -0
- data/lib/solvebio.rb +50 -11
- data/lib/solvebio/acccount.rb +4 -0
- data/lib/solvebio/annotation.rb +11 -0
- data/lib/solvebio/api_operations.rb +147 -0
- data/lib/solvebio/api_resource.rb +32 -0
- data/lib/solvebio/cli.rb +75 -0
- data/lib/solvebio/cli/auth.rb +106 -0
- data/lib/solvebio/cli/credentials.rb +54 -0
- data/lib/{cli → solvebio/cli}/irb.rb +0 -23
- data/lib/solvebio/cli/irbrc.rb +48 -0
- data/lib/solvebio/cli/tutorial.rb +12 -0
- data/lib/solvebio/client.rb +149 -0
- data/lib/solvebio/dataset.rb +60 -0
- data/lib/solvebio/dataset_field.rb +12 -0
- data/lib/solvebio/depository.rb +38 -0
- data/lib/solvebio/depository_version.rb +40 -0
- data/lib/solvebio/errors.rb +64 -0
- data/lib/solvebio/filter.rb +315 -0
- data/lib/solvebio/list_object.rb +73 -0
- data/lib/solvebio/locale.rb +43 -0
- data/lib/solvebio/query.rb +341 -0
- data/lib/solvebio/sample.rb +54 -0
- data/lib/solvebio/singleton_api_resource.rb +25 -0
- data/lib/solvebio/solve_object.rb +164 -0
- data/lib/solvebio/tabulate.rb +589 -0
- data/lib/solvebio/user.rb +4 -0
- data/lib/solvebio/util.rb +59 -0
- data/lib/solvebio/version.rb +3 -0
- data/solvebio.gemspec +10 -18
- data/test/helper.rb +6 -2
- data/test/solvebio/data/.gitignore +1 -0
- data/test/solvebio/data/.netrc +6 -0
- data/test/{data → solvebio/data}/netrc-save +0 -0
- data/test/solvebio/data/sample.vcf.gz +0 -0
- data/test/solvebio/data/test_creds +3 -0
- data/test/solvebio/test_annotation.rb +45 -0
- data/test/solvebio/test_client.rb +29 -0
- data/test/solvebio/test_conversion.rb +14 -0
- data/test/solvebio/test_credentials.rb +67 -0
- data/test/solvebio/test_dataset.rb +52 -0
- data/test/solvebio/test_depository.rb +24 -0
- data/test/solvebio/test_depositoryversion.rb +22 -0
- data/test/solvebio/test_error.rb +31 -0
- data/test/solvebio/test_filter.rb +86 -0
- data/test/solvebio/test_query.rb +282 -0
- data/test/solvebio/test_query_batch.rb +38 -0
- data/test/solvebio/test_query_init.rb +30 -0
- data/test/solvebio/test_query_tabulate.rb +73 -0
- data/test/solvebio/test_ratelimit.rb +31 -0
- data/test/solvebio/test_resource.rb +29 -0
- data/test/solvebio/test_sample_access.rb +60 -0
- data/test/solvebio/test_sample_download.rb +20 -0
- data/test/solvebio/test_tabulate.rb +129 -0
- data/test/solvebio/test_util.rb +39 -0
- metadata +100 -85
- data/Makefile +0 -17
- data/demo/README.md +0 -14
- data/demo/cheatsheet.rb +0 -31
- data/demo/dataset/facets.rb +0 -13
- data/demo/dataset/field.rb +0 -13
- data/demo/depository/README.md +0 -24
- data/demo/depository/all.rb +0 -13
- data/demo/depository/retrieve.rb +0 -13
- data/demo/depository/versions-all.rb +0 -13
- data/demo/query/query-filter.rb +0 -30
- data/demo/query/query.rb +0 -13
- data/demo/query/range-filter.rb +0 -18
- data/demo/test-api.rb +0 -98
- data/lib/cli/auth.rb +0 -122
- data/lib/cli/help.rb +0 -13
- data/lib/cli/irbrc.rb +0 -54
- data/lib/cli/options.rb +0 -75
- data/lib/client.rb +0 -154
- data/lib/credentials.rb +0 -67
- data/lib/errors.rb +0 -81
- data/lib/filter.rb +0 -312
- data/lib/locale.rb +0 -47
- data/lib/main.rb +0 -46
- data/lib/query.rb +0 -414
- data/lib/resource/annotation.rb +0 -23
- data/lib/resource/apiresource.rb +0 -241
- data/lib/resource/dataset.rb +0 -91
- data/lib/resource/datasetfield.rb +0 -37
- data/lib/resource/depository.rb +0 -50
- data/lib/resource/depositoryversion.rb +0 -69
- data/lib/resource/main.rb +0 -123
- data/lib/resource/sample.rb +0 -75
- data/lib/resource/solveobject.rb +0 -122
- data/lib/resource/user.rb +0 -5
- data/lib/tabulate.rb +0 -706
- data/lib/util.rb +0 -29
- data/test/Makefile +0 -9
- data/test/data/sample.vcf.gz +0 -0
- data/test/test-annotation.rb +0 -46
- data/test/test-auth.rb +0 -58
- data/test/test-client.rb +0 -27
- data/test/test-conversion.rb +0 -13
- data/test/test-dataset.rb +0 -42
- data/test/test-depository.rb +0 -35
- data/test/test-error.rb +0 -36
- data/test/test-filter.rb +0 -70
- data/test/test-netrc.rb +0 -52
- data/test/test-query-batch.rb +0 -40
- data/test/test-query-init.rb +0 -29
- data/test/test-query-paging.rb +0 -102
- data/test/test-query.rb +0 -71
- data/test/test-resource.rb +0 -40
- data/test/test-sample-access.rb +0 -59
- data/test/test-sample-download.rb +0 -20
- data/test/test-tabulate.rb +0 -131
- data/test/test-util.rb +0 -42
data/lib/filter.rb
DELETED
|
@@ -1,312 +0,0 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
require_relative 'main'
|
|
3
|
-
|
|
4
|
-
# SolveBio::Filter objects.
|
|
5
|
-
#
|
|
6
|
-
# Makes it easier to create filters cumulatively using ``&`` (and),
|
|
7
|
-
# ``|`` (or) and ``~`` (not) operations.
|
|
8
|
-
#
|
|
9
|
-
# == Example
|
|
10
|
-
#
|
|
11
|
-
# require 'solvebio'
|
|
12
|
-
|
|
13
|
-
# f = SolveBio::Filter.new #=> <Filter []>
|
|
14
|
-
|
|
15
|
-
# f &= SolveBio::Filter.new :price => 'Free' #=> <Filter [[:price, "Free"]]>
|
|
16
|
-
|
|
17
|
-
# f |= SolveBio::Filter.new :style => 'Mexican' #=> <Filter [{:or=>[[:price, "Free"], [:style, "Mexican"]]}]>
|
|
18
|
-
#
|
|
19
|
-
# The final result is a filter that can be used in a query which match es
|
|
20
|
-
# "price = 'Free' or style = 'Mexican'".
|
|
21
|
-
#
|
|
22
|
-
# By default, each key/value pairs are AND'ed together. However, you can change that
|
|
23
|
-
# to OR by passing in +:or+ as the last argument.
|
|
24
|
-
#
|
|
25
|
-
# * `<field>='value` matches if the field is term filter (exact term)
|
|
26
|
-
# * `<field>__in=[<item1>, ...]` matches any of the terms <item1> and so on
|
|
27
|
-
# * `<field>__range=[<start>, <end>]` matches anything from <start> to <end>
|
|
28
|
-
# * `<field>__between=[<start>, <end>]` matches anything between <start> to <end> not include either <start> or <end>
|
|
29
|
-
#
|
|
30
|
-
# String terms are not analyzed and are always assumed to be exact matches.
|
|
31
|
-
#
|
|
32
|
-
# Numeric columns can be selected by range using:
|
|
33
|
-
#
|
|
34
|
-
# * `<field>__gt`: greater than
|
|
35
|
-
# * `<field>__gte`: greater than or equal to
|
|
36
|
-
# * `<field>__lt`: less than
|
|
37
|
-
# * `<field>__lte`: less than or equal to
|
|
38
|
-
#
|
|
39
|
-
# Field action examples:
|
|
40
|
-
#
|
|
41
|
-
# dataset.query(:gene__in => ['BRCA', 'GATA3'],
|
|
42
|
-
# :chr => '3',
|
|
43
|
-
# :start__gt => 10000,
|
|
44
|
-
# :end__lte => 20000)
|
|
45
|
-
|
|
46
|
-
class SolveBio::Filter
|
|
47
|
-
|
|
48
|
-
attr_accessor :filters
|
|
49
|
-
|
|
50
|
-
# Creates a new Filter, the first argument is expected to be Hash or an Array.
|
|
51
|
-
def initialize(filters={}, conn=:and)
|
|
52
|
-
if filters.kind_of?(Hash)
|
|
53
|
-
@filters = SolveBio::Filter.
|
|
54
|
-
normalize(filters.keys.sort.map{|key| [key, filters[key]]})
|
|
55
|
-
elsif filters.kind_of?(Array)
|
|
56
|
-
@filters = SolveBio::Filter.normalize(filters)
|
|
57
|
-
elsif filters.kind_of?(SolveBio::Filter)
|
|
58
|
-
@filters = SolveBio::Filter.deep_copy(filters.filters)
|
|
59
|
-
return self
|
|
60
|
-
else
|
|
61
|
-
raise TypeError, "Invalid filter type #{filters.class}"
|
|
62
|
-
end
|
|
63
|
-
@filters = [{conn => @filters}] if filters.size > 1
|
|
64
|
-
self
|
|
65
|
-
end
|
|
66
|
-
|
|
67
|
-
def inspect
|
|
68
|
-
return "<SolveBio::Filter #{@filters.inspect}>"
|
|
69
|
-
end
|
|
70
|
-
|
|
71
|
-
def empty?
|
|
72
|
-
@filters.empty?
|
|
73
|
-
end
|
|
74
|
-
|
|
75
|
-
# Deep copy
|
|
76
|
-
def clone
|
|
77
|
-
SolveBio::Filter.deep_copy(self)
|
|
78
|
-
end
|
|
79
|
-
|
|
80
|
-
# OR and AND will create a new Filter, with the filters from both Filter
|
|
81
|
-
# objects combined with the connector `conn`.
|
|
82
|
-
# FIXME: should we allow a default conn parameter?
|
|
83
|
-
def combine(other, conn=:and)
|
|
84
|
-
|
|
85
|
-
return other.clone if self.empty?
|
|
86
|
-
|
|
87
|
-
if other.empty?
|
|
88
|
-
return self.clone
|
|
89
|
-
elsif self.filters[0].member?(conn)
|
|
90
|
-
f = self.clone
|
|
91
|
-
f.filters[0][conn] += other.filters
|
|
92
|
-
elsif other.filters[0].member?(conn)
|
|
93
|
-
f = other.clone
|
|
94
|
-
f.filters[0][conn] += self.filters
|
|
95
|
-
else
|
|
96
|
-
f = initialize(self.clone.filters + other.filters, conn)
|
|
97
|
-
end
|
|
98
|
-
|
|
99
|
-
return f
|
|
100
|
-
end
|
|
101
|
-
|
|
102
|
-
def |(other)
|
|
103
|
-
return self.combine(other, :or)
|
|
104
|
-
end
|
|
105
|
-
|
|
106
|
-
def &(other)
|
|
107
|
-
return self.combine(other, :and)
|
|
108
|
-
end
|
|
109
|
-
|
|
110
|
-
def ~()
|
|
111
|
-
f = self.clone
|
|
112
|
-
|
|
113
|
-
# not of null filter is null fiter
|
|
114
|
-
return f if f.empty?
|
|
115
|
-
|
|
116
|
-
# length of self_filters should never be more than 1
|
|
117
|
-
filters = f.filters.first
|
|
118
|
-
if filters.kind_of?(Hash) and
|
|
119
|
-
filters.member?(:not)
|
|
120
|
-
# The filters are already a single dictionary
|
|
121
|
-
# containing a 'not'. Swap out the 'not'
|
|
122
|
-
f.filters = [filters[:not]]
|
|
123
|
-
else
|
|
124
|
-
# 'not' blocks can contain only dicts or a single tuple filter
|
|
125
|
-
# so we get the first element from the filter list
|
|
126
|
-
f.filters = [{:not => filters}]
|
|
127
|
-
end
|
|
128
|
-
|
|
129
|
-
return f
|
|
130
|
-
end
|
|
131
|
-
|
|
132
|
-
# Checks and normalizes filter array tuples
|
|
133
|
-
def self.normalize(ary)
|
|
134
|
-
ary.map do |tuple|
|
|
135
|
-
unless tuple.kind_of?(Array)
|
|
136
|
-
raise(TypeError,
|
|
137
|
-
"Invalid filter element #{tuple.class}; want Array")
|
|
138
|
-
end
|
|
139
|
-
unless tuple.size == 2
|
|
140
|
-
raise(TypeError,
|
|
141
|
-
"filter element size must be 2; is #{tuple.size}")
|
|
142
|
-
end
|
|
143
|
-
key, value = tuple
|
|
144
|
-
if key.to_s =~ /.+__(.+)$/
|
|
145
|
-
op = $1
|
|
146
|
-
unless %w(gt gte lt lte in range between).member?(op)
|
|
147
|
-
raise(TypeError,
|
|
148
|
-
"Invalid field operation #{op} in #{key}")
|
|
149
|
-
end
|
|
150
|
-
case op
|
|
151
|
-
when 'gt', 'gte', 'lt', 'lte'
|
|
152
|
-
begin
|
|
153
|
-
value = Float(value)
|
|
154
|
-
rescue
|
|
155
|
-
raise(TypeError,
|
|
156
|
-
"Invalid field value #{value} for #{key}; " +
|
|
157
|
-
"should be a number")
|
|
158
|
-
end
|
|
159
|
-
tuple = [key, value]
|
|
160
|
-
when 'range', 'between'
|
|
161
|
-
if value.kind_of?(Range)
|
|
162
|
-
value = [value.min, value.max]
|
|
163
|
-
end
|
|
164
|
-
unless value.kind_of?(Array)
|
|
165
|
-
raise(TypeError,
|
|
166
|
-
"Invalid field value #{value} for #{key}; " +
|
|
167
|
-
"should be an array")
|
|
168
|
-
end
|
|
169
|
-
unless value.size == 2
|
|
170
|
-
raise(TypeError,
|
|
171
|
-
"Invalid field value #{value} for #{key}; " +
|
|
172
|
-
"array should have exactly two values")
|
|
173
|
-
end
|
|
174
|
-
if value.first > value.last
|
|
175
|
-
raise(IndexError,
|
|
176
|
-
"Invalid field value #{value} for #{key}; " +
|
|
177
|
-
"start value not greater than end value")
|
|
178
|
-
end
|
|
179
|
-
|
|
180
|
-
# FIXME: Should we check that value contains only numbers?
|
|
181
|
-
tuple = [key, value]
|
|
182
|
-
when 'in'
|
|
183
|
-
unless value.kind_of?(Array)
|
|
184
|
-
raise(TypeError,
|
|
185
|
-
"Invalid field value #{value} for #{key}; " +
|
|
186
|
-
"should be an array")
|
|
187
|
-
end
|
|
188
|
-
|
|
189
|
-
end
|
|
190
|
-
end
|
|
191
|
-
tuple
|
|
192
|
-
end
|
|
193
|
-
end
|
|
194
|
-
|
|
195
|
-
def self.deep_copy(obj)
|
|
196
|
-
Marshal.load(Marshal.dump(obj))
|
|
197
|
-
end
|
|
198
|
-
|
|
199
|
-
# Takes an Array of filter items and returns an Array that can be
|
|
200
|
-
# passed off (when converted to JSON) to a SolveBio client filter
|
|
201
|
-
# parameter. As such, the output format is highly dependent on
|
|
202
|
-
# the SolveBio API format.
|
|
203
|
-
#
|
|
204
|
-
# The filter items can be either a SolveBio::Filter, or Hash of
|
|
205
|
-
# the right form, or an Array of the right form.
|
|
206
|
-
def self.process_filters(filters)
|
|
207
|
-
rv = []
|
|
208
|
-
filters.each do |f|
|
|
209
|
-
if f.kind_of?(SolveBio::Filter)
|
|
210
|
-
if f.filters
|
|
211
|
-
rv << process_filters(f.filters)
|
|
212
|
-
next
|
|
213
|
-
end
|
|
214
|
-
elsif f.kind_of?(Hash)
|
|
215
|
-
key = f.keys[0]
|
|
216
|
-
val = f[key]
|
|
217
|
-
|
|
218
|
-
if val.kind_of?(Hash)
|
|
219
|
-
filter_filters = process_filters(val)
|
|
220
|
-
if filter_filters.size == 1
|
|
221
|
-
filter_filters = filter_filters[0]
|
|
222
|
-
end
|
|
223
|
-
rv << {key => filter_filters}
|
|
224
|
-
else
|
|
225
|
-
rv << {key => process_filters(val)}
|
|
226
|
-
end
|
|
227
|
-
elsif f.kind_of?(Array)
|
|
228
|
-
rv << f
|
|
229
|
-
else
|
|
230
|
-
raise TypeError, "Invalid filter class #{f.class}"
|
|
231
|
-
end
|
|
232
|
-
end
|
|
233
|
-
return rv
|
|
234
|
-
end
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
end
|
|
238
|
-
|
|
239
|
-
# Helper class that generates Range Filters from UCSC-style ranges.
|
|
240
|
-
class SolveBio::RangeFilter < SolveBio::Filter
|
|
241
|
-
SUPPORTED_BUILDS = ['hg18', 'hg19', 'hg38']
|
|
242
|
-
|
|
243
|
-
# Handles UCSC-style range queries (hg19:chr1:100-200)
|
|
244
|
-
def self.from_string(string, overlap=false)
|
|
245
|
-
begin
|
|
246
|
-
build, chromosome, pos = string.split(':')
|
|
247
|
-
rescue ValueError
|
|
248
|
-
raise ValueError,
|
|
249
|
-
'Please use UCSC-style format: "hg19:chr2:1000-2000"'
|
|
250
|
-
end
|
|
251
|
-
|
|
252
|
-
if pos.member?('-')
|
|
253
|
-
start, last = pos.replace(',', '').split('-')
|
|
254
|
-
else
|
|
255
|
-
start = last = pos.replace(',', '')
|
|
256
|
-
end
|
|
257
|
-
|
|
258
|
-
return self.new(build, chromosome, start, last, overlap=overlap)
|
|
259
|
-
end
|
|
260
|
-
|
|
261
|
-
# Shortcut to do range queries on supported datasets.
|
|
262
|
-
def initialize(build, chromosome, start, last, overlap=false)
|
|
263
|
-
if !SUPPORTED_BUILDS.member?(build.downcase)
|
|
264
|
-
msg = "Build #{build} not supported for range filters. " +
|
|
265
|
-
"Supported builds are: #{SUPPORTED_BUILDS.join(', ')}"
|
|
266
|
-
raise Exception, msg
|
|
267
|
-
end
|
|
268
|
-
|
|
269
|
-
f = SolveBio::Filter.new({"#{build}_start__range" => [start, last]})
|
|
270
|
-
|
|
271
|
-
if overlap
|
|
272
|
-
f |= SolveBio::Filter.
|
|
273
|
-
new({"#{build}_end__range" => [start, last]})
|
|
274
|
-
else
|
|
275
|
-
f &= SolveBio::Filter.
|
|
276
|
-
new({"#{build}_end__range" => [start, last]})
|
|
277
|
-
end
|
|
278
|
-
|
|
279
|
-
f &= SolveBio::Filter.
|
|
280
|
-
new({"#{build}_chromosome" => chromosome.sub('chr', '')})
|
|
281
|
-
@filters = f.filters
|
|
282
|
-
end
|
|
283
|
-
|
|
284
|
-
def inspect
|
|
285
|
-
return "<RangeFilter #{@filters}>"
|
|
286
|
-
end
|
|
287
|
-
end
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
# Demo/test code
|
|
291
|
-
if __FILE__ == $0
|
|
292
|
-
filters =
|
|
293
|
-
SolveBio::Filter.new(:omim_id => 144650) |
|
|
294
|
-
SolveBio::Filter.new(:omim_id => 144600) |
|
|
295
|
-
SolveBio::Filter.new(:omim_id => 145300)
|
|
296
|
-
puts filters.inspect
|
|
297
|
-
puts SolveBio::Filter.process_filters([[:omim_id, nil]]).inspect
|
|
298
|
-
f = SolveBio::Filter.new
|
|
299
|
-
puts "%s, empty?: %s" % [f.inspect, f.empty?]
|
|
300
|
-
f_not = ~f
|
|
301
|
-
puts "%s, empty?: %s" % [f_not.inspect, f_not.empty?]
|
|
302
|
-
f2 = SolveBio::Filter.new({:style => 'Mexican', :price => 'Free'})
|
|
303
|
-
puts "%s, empty? %s" % [f2.inspect, f2.empty?]
|
|
304
|
-
f2_not = ~f2
|
|
305
|
-
puts "%s, empty? %s" % [f2_not.inspect, f2_not.empty?]
|
|
306
|
-
# FIXME: using a hash means we can't repeat chr1. Is this intended?
|
|
307
|
-
f2_or = SolveBio::Filter.new({:chr1 => '3', :chr2 => '4'}, :or)
|
|
308
|
-
puts "%s, empty %s" % [f2_or.inspect, f2_or.empty?]
|
|
309
|
-
f2_or = SolveBio::Filter.new({:chr1 => '3'}) | SolveBio::Filter.new({:chr2 => '4'})
|
|
310
|
-
puts "%s, empty %s" % [f2_or.inspect, f2_or.empty?]
|
|
311
|
-
puts((f2_or & f2).inspect)
|
|
312
|
-
end
|
data/lib/locale.rb
DELETED
|
@@ -1,47 +0,0 @@
|
|
|
1
|
-
require_relative 'main'
|
|
2
|
-
module SolveBio::Locale
|
|
3
|
-
|
|
4
|
-
# Used only if r18n-core is not around
|
|
5
|
-
@thousands_sep = ','
|
|
6
|
-
@locale = ENV['LANG'] || ENV['LC_NUMERIC'] || 'en_US.UTF-8'
|
|
7
|
-
def thousands_sep
|
|
8
|
-
@thousands_sep
|
|
9
|
-
end
|
|
10
|
-
def thousands_sep=(value)
|
|
11
|
-
@thousands_sep = value
|
|
12
|
-
end
|
|
13
|
-
|
|
14
|
-
begin
|
|
15
|
-
old_verbose = $VERBOSE
|
|
16
|
-
$VERBOSE = false
|
|
17
|
-
require 'r18n-core'
|
|
18
|
-
R18n.set(@locale)
|
|
19
|
-
$VERBOSE = old_verbose
|
|
20
|
-
have_r18n = true
|
|
21
|
-
rescue LoadError
|
|
22
|
-
have_r18n = false
|
|
23
|
-
end
|
|
24
|
-
if have_r18n
|
|
25
|
-
def pretty_int(num)
|
|
26
|
-
R18n::l(num)
|
|
27
|
-
end
|
|
28
|
-
else
|
|
29
|
-
def pretty_int(num)
|
|
30
|
-
num.to_s.reverse.scan(/\d{1,3}/).join(@thousands_sep).reverse
|
|
31
|
-
end
|
|
32
|
-
end
|
|
33
|
-
|
|
34
|
-
module_function :pretty_int
|
|
35
|
-
|
|
36
|
-
end
|
|
37
|
-
|
|
38
|
-
class Fixnum
|
|
39
|
-
include SolveBio::Locale
|
|
40
|
-
def pretty_int
|
|
41
|
-
SolveBio::Locale.pretty_int(self)
|
|
42
|
-
end
|
|
43
|
-
end
|
|
44
|
-
|
|
45
|
-
if __FILE__ == $0
|
|
46
|
-
puts 10000.pretty_int
|
|
47
|
-
end
|
data/lib/main.rb
DELETED
|
@@ -1,46 +0,0 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
# SolveBio Ruby Client
|
|
3
|
-
# ~~~~~~~~~~~~~~~~~~~
|
|
4
|
-
#
|
|
5
|
-
# This is the Ruby client & library for the SolveBio API.
|
|
6
|
-
#
|
|
7
|
-
# Have questions or comments? email us at: contact@solvebio.com
|
|
8
|
-
|
|
9
|
-
require 'logger'
|
|
10
|
-
require 'fileutils'
|
|
11
|
-
|
|
12
|
-
module SolveBio
|
|
13
|
-
|
|
14
|
-
VERSION = '1.6.1'
|
|
15
|
-
@api_key = ENV['SOLVEBIO_API_KEY']
|
|
16
|
-
logfile =
|
|
17
|
-
if ENV['SOLVEBIO_LOGFILE']
|
|
18
|
-
ENV['SOLVEBIO_LOGFILE']
|
|
19
|
-
else
|
|
20
|
-
dir = File::expand_path '~/.solvebio'
|
|
21
|
-
FileUtils.mkdir_p(dir) unless File.exist? dir
|
|
22
|
-
File::expand_path File.join(dir, 'solvebio.log')
|
|
23
|
-
end
|
|
24
|
-
@logger = Logger.new(logfile)
|
|
25
|
-
API_HOST = ENV['SOLVEBIO_API_HOST'] || 'https://api.solvebio.com'
|
|
26
|
-
|
|
27
|
-
# Config info in reports and requests. Encapsulate more?
|
|
28
|
-
RUBY_VERSION = RbConfig::CONFIG['RUBY_PROGRAM_VERSION']
|
|
29
|
-
RUBY_IMPLEMENTATION = RbConfig::CONFIG['RUBY_SO_NAME']
|
|
30
|
-
#PLATFORM = ???
|
|
31
|
-
#PROCESSOR = ???
|
|
32
|
-
ARCHITECTURE = RbConfig::CONFIG['arch']
|
|
33
|
-
|
|
34
|
-
def logger
|
|
35
|
-
@logger
|
|
36
|
-
end
|
|
37
|
-
def api_key
|
|
38
|
-
@api_key
|
|
39
|
-
end
|
|
40
|
-
def api_key=(value)
|
|
41
|
-
@api_key = value
|
|
42
|
-
end
|
|
43
|
-
|
|
44
|
-
module_function :logger, :api_key, :api_key=
|
|
45
|
-
|
|
46
|
-
end
|
data/lib/query.rb
DELETED
|
@@ -1,414 +0,0 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
require 'pp'
|
|
3
|
-
require_relative 'client'
|
|
4
|
-
require_relative 'filter'
|
|
5
|
-
require_relative 'locale'
|
|
6
|
-
require_relative 'tabulate'
|
|
7
|
-
|
|
8
|
-
# A Query API request wrapper that generates a request from Filter
|
|
9
|
-
# objects, and can iterate through streaming result sets.
|
|
10
|
-
class SolveBio::PagingQuery
|
|
11
|
-
|
|
12
|
-
include Enumerable
|
|
13
|
-
|
|
14
|
-
MAXIMUM_LIMIT ||= 100
|
|
15
|
-
|
|
16
|
-
attr_accessor :filters
|
|
17
|
-
attr_reader :dataset_id
|
|
18
|
-
|
|
19
|
-
def initialize(dataset_id, params={})
|
|
20
|
-
@dataset_id = dataset_id
|
|
21
|
-
|
|
22
|
-
begin
|
|
23
|
-
@limit = Integer(dataset_id)
|
|
24
|
-
rescue
|
|
25
|
-
raise TypeError, "'dataset_id' parameter must an Integer"
|
|
26
|
-
end
|
|
27
|
-
|
|
28
|
-
@data_url = "/v1/datasets/#{dataset_id}/data"
|
|
29
|
-
|
|
30
|
-
@total = @results = @response = nil
|
|
31
|
-
reset_range_window
|
|
32
|
-
|
|
33
|
-
# results per request
|
|
34
|
-
@limit = MAXIMUM_LIMIT
|
|
35
|
-
begin
|
|
36
|
-
@limit = Integer(params[:limit])
|
|
37
|
-
rescue
|
|
38
|
-
raise TypeError, "'limit' parameter must an Integer >= 0"
|
|
39
|
-
end if params.member?(:limit)
|
|
40
|
-
|
|
41
|
-
@result_class = params[:result_class] || Hash
|
|
42
|
-
@debug = params[:debug] || false
|
|
43
|
-
@fields = params[:fields]
|
|
44
|
-
@filters = []
|
|
45
|
-
|
|
46
|
-
# parameter error checking
|
|
47
|
-
if @limit < 0
|
|
48
|
-
raise RangeError, "'limit' parameter must be >= 0"
|
|
49
|
-
end
|
|
50
|
-
self
|
|
51
|
-
end
|
|
52
|
-
|
|
53
|
-
def total
|
|
54
|
-
warmup('Query total')
|
|
55
|
-
@total = @response["total"]
|
|
56
|
-
end
|
|
57
|
-
|
|
58
|
-
def clone(filters=[])
|
|
59
|
-
result =
|
|
60
|
-
initialize(@dataset_id,
|
|
61
|
-
{
|
|
62
|
-
:limit => @limit,
|
|
63
|
-
:total => total, # This causes an HTTP request
|
|
64
|
-
:result_class => @result_class,
|
|
65
|
-
:debug => @debug,
|
|
66
|
-
:fields => @fields
|
|
67
|
-
})
|
|
68
|
-
|
|
69
|
-
result.filters += @filters unless @filters.empty?
|
|
70
|
-
result.filters += filters unless filters.empty?
|
|
71
|
-
|
|
72
|
-
return result
|
|
73
|
-
end
|
|
74
|
-
|
|
75
|
-
# Returns this Query instance with the query args combined with
|
|
76
|
-
# existing set with AND.
|
|
77
|
-
#
|
|
78
|
-
# kwargs are simply passed to a new SolveBio::Filter object and
|
|
79
|
-
# combined to any other filters with AND.
|
|
80
|
-
#
|
|
81
|
-
# By default, everything is combined using AND. If you provide
|
|
82
|
-
# multiple filters in a single filter call, those are ANDed
|
|
83
|
-
# together. If you provide multiple filters in multiple filter
|
|
84
|
-
# calls, those are ANDed together.
|
|
85
|
-
#
|
|
86
|
-
# If you want something different, use the F class which supports
|
|
87
|
-
# ``&`` (and), ``|`` (or) and ``~`` (not) operators. Then call
|
|
88
|
-
# filter once with the resulting Filter instance.
|
|
89
|
-
def filter(params={}, conn=:and)
|
|
90
|
-
if filters.kind_of?(SolveBio::Filter)
|
|
91
|
-
return Marshal.load(Marshal.dump(params.filters))
|
|
92
|
-
else
|
|
93
|
-
return clone(SolveBio::Filter.new(params, conn).filters)
|
|
94
|
-
end
|
|
95
|
-
end
|
|
96
|
-
|
|
97
|
-
# Shortcut to do range queries on supported datasets.
|
|
98
|
-
def range(chromosome, start, last, strand=nil, overlap=true)
|
|
99
|
-
# TODO: ensure dataset supports range queries?
|
|
100
|
-
return self.
|
|
101
|
-
clone([self.new(chromosome, start, last, strand, overlap)])
|
|
102
|
-
end
|
|
103
|
-
|
|
104
|
-
def size
|
|
105
|
-
warmup('PagingQuery size')
|
|
106
|
-
return @total
|
|
107
|
-
end
|
|
108
|
-
alias_method :length, :size
|
|
109
|
-
|
|
110
|
-
def empty?
|
|
111
|
-
warmup('empty?')
|
|
112
|
-
return @total == 0
|
|
113
|
-
end
|
|
114
|
-
|
|
115
|
-
# Convert SolveBio::QueryPaging object to a String type
|
|
116
|
-
def to_s
|
|
117
|
-
if total == 0 or @limit == 0
|
|
118
|
-
return 'query returned 0 results'
|
|
119
|
-
end
|
|
120
|
-
|
|
121
|
-
sorted_items = SolveBio::Tabulate.
|
|
122
|
-
tabulate(self[0].to_a.sort_by{|x| x[0]})
|
|
123
|
-
msg =
|
|
124
|
-
"\n%s\n\n... %s more results." %
|
|
125
|
-
[sorted_items, ['Fields', 'Data'], ['right', 'left'],
|
|
126
|
-
(@total - 1).pretty_int]
|
|
127
|
-
return msg
|
|
128
|
-
end
|
|
129
|
-
|
|
130
|
-
def to_pp
|
|
131
|
-
if total == 0 or @limit == 0
|
|
132
|
-
return 'query returned 0 results'
|
|
133
|
-
end
|
|
134
|
-
msg = "\n#{self[0].pretty_inspect}\n" +
|
|
135
|
-
"\n... #{(@total-1).pretty_int} more results."
|
|
136
|
-
return msg
|
|
137
|
-
end
|
|
138
|
-
|
|
139
|
-
# Convert SolveBio::QueryPaging object to a Hash type
|
|
140
|
-
def to_h
|
|
141
|
-
self[0]
|
|
142
|
-
end
|
|
143
|
-
|
|
144
|
-
def inspect
|
|
145
|
-
return '<%s: @dataset_id=%s, @total=%s, @limit=%s, @debug=%s>' %
|
|
146
|
-
[self.class, @dataset_id, @total ? @total : '?',
|
|
147
|
-
@limit, @debug]
|
|
148
|
-
end
|
|
149
|
-
|
|
150
|
-
# warmup result set...
|
|
151
|
-
def warmup(what)
|
|
152
|
-
unless @response
|
|
153
|
-
SolveBio::logger.debug("warmup #{what}")
|
|
154
|
-
execute
|
|
155
|
-
end
|
|
156
|
-
end
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
# FIXME: consider creating instance variables from
|
|
160
|
-
# a response object and then using attr_reader to make that
|
|
161
|
-
# visible. This is instead of:
|
|
162
|
-
# # One hacky way to define attributes (methods) on an object.
|
|
163
|
-
# # Replaces Python's __getattr__
|
|
164
|
-
# def method_missing(meth, *args, &block)
|
|
165
|
-
# if @response.nil?
|
|
166
|
-
# logger.debug('warmup ([]): %s' % key)
|
|
167
|
-
# execute
|
|
168
|
-
# end
|
|
169
|
-
|
|
170
|
-
# if @response.member?(meth)
|
|
171
|
-
# return @response[meth]
|
|
172
|
-
# end
|
|
173
|
-
|
|
174
|
-
# msg = "'%s' object has no attribute '%s'" % [self.class, meth]
|
|
175
|
-
# raise NoMethodError, msg
|
|
176
|
-
# end
|
|
177
|
-
|
|
178
|
-
# Retrieve an item or range from the set of results
|
|
179
|
-
def [](key)
|
|
180
|
-
# warmup result set...
|
|
181
|
-
warmup("[#{key}]")
|
|
182
|
-
|
|
183
|
-
unless [Range, Fixnum].member?(key.class)
|
|
184
|
-
raise TypeError, "Expecting index value to be a Range or Fixnum; is #{key.class}"
|
|
185
|
-
end
|
|
186
|
-
if @limit < 0
|
|
187
|
-
raise IndexError, 'Indexing not supporting when limit < 0.'
|
|
188
|
-
end
|
|
189
|
-
if key.kind_of?(Range)
|
|
190
|
-
if key.begin < 0 or key.end < 0
|
|
191
|
-
raise IndexError, 'Negative indexing is not supported'
|
|
192
|
-
end
|
|
193
|
-
if key.begin > key.end
|
|
194
|
-
raise IndexError, 'Backwards indexing is not supported'
|
|
195
|
-
end
|
|
196
|
-
elsif key < 0
|
|
197
|
-
raise IndexError, 'Negative indexing is not supported'
|
|
198
|
-
end
|
|
199
|
-
|
|
200
|
-
# FIXME: is it right that we can assume that the results are in
|
|
201
|
-
# @results. Do I need another index check?
|
|
202
|
-
|
|
203
|
-
result =
|
|
204
|
-
if key.kind_of?(Range)
|
|
205
|
-
@results[(0...key.end - key.begin)]
|
|
206
|
-
else
|
|
207
|
-
@request_range = self.to_range(key)
|
|
208
|
-
@results[0]
|
|
209
|
-
end
|
|
210
|
-
# reset request range
|
|
211
|
-
@request_range = (0..Float::INFINITY)
|
|
212
|
-
return result
|
|
213
|
-
end
|
|
214
|
-
|
|
215
|
-
# "each" must be defined in an Enumerator. Allows the Query object
|
|
216
|
-
# to be an iterable. Iterates through the internal cache using a
|
|
217
|
-
# cursor.
|
|
218
|
-
def each(*pass)
|
|
219
|
-
return self unless block_given?
|
|
220
|
-
i = 0
|
|
221
|
-
|
|
222
|
-
@delta = @request_range.end - @request_range.begin
|
|
223
|
-
while i < total and i < @delta
|
|
224
|
-
i_offset = i + @request_range.begin
|
|
225
|
-
if @window_range.include?(i_offset)
|
|
226
|
-
result_start = i_offset - @window_range.begin
|
|
227
|
-
SolveBio::logger.debug(' PagingQuery window range: [%s...%s]' %
|
|
228
|
-
[result_start, result_start + 1])
|
|
229
|
-
else
|
|
230
|
-
SolveBio::logger.debug('executing query. offset/limit: %6d/%d' %
|
|
231
|
-
[i_offset, @limit])
|
|
232
|
-
execute({:offset => i_offset, :limit => @limit})
|
|
233
|
-
result_start = i % @limit
|
|
234
|
-
end
|
|
235
|
-
yield @results[result_start]
|
|
236
|
-
@delta = @request_range.end - @request_range.begin
|
|
237
|
-
i += 1
|
|
238
|
-
end
|
|
239
|
-
return self
|
|
240
|
-
end
|
|
241
|
-
|
|
242
|
-
# range operations
|
|
243
|
-
def to_range(range_or_idx)
|
|
244
|
-
return range_or_idx.kind_of?(Range) ? range_or_idx :
|
|
245
|
-
(range_or_idx..range_or_idx + 1)
|
|
246
|
-
end
|
|
247
|
-
|
|
248
|
-
def reset_request_range
|
|
249
|
-
@request_range = (0..Float::INFINITY)
|
|
250
|
-
end
|
|
251
|
-
|
|
252
|
-
def reset_range_window
|
|
253
|
-
@window = []
|
|
254
|
-
@window_range = (0..Float::INFINITY)
|
|
255
|
-
reset_request_range
|
|
256
|
-
end
|
|
257
|
-
|
|
258
|
-
def build_query
|
|
259
|
-
q = {
|
|
260
|
-
:limit => @limit,
|
|
261
|
-
:debug => @debug
|
|
262
|
-
}
|
|
263
|
-
|
|
264
|
-
if @filters
|
|
265
|
-
filters = SolveBio::Filter.process_filters(@filters)
|
|
266
|
-
if filters.size > 1
|
|
267
|
-
q[:filters] = [{:and => filters}]
|
|
268
|
-
else
|
|
269
|
-
q[:filters] = filters
|
|
270
|
-
end
|
|
271
|
-
end
|
|
272
|
-
|
|
273
|
-
if @fields
|
|
274
|
-
q[:fields] = @fields
|
|
275
|
-
end
|
|
276
|
-
|
|
277
|
-
return q
|
|
278
|
-
end
|
|
279
|
-
|
|
280
|
-
# Executes a query and returns the request parameters and response.
|
|
281
|
-
def execute(params={})
|
|
282
|
-
_params = build_query()
|
|
283
|
-
_params.merge!(params)
|
|
284
|
-
SolveBio::logger.debug("querying dataset: #{_params}")
|
|
285
|
-
|
|
286
|
-
@response = SolveBio::Client.client.post(@data_url, _params)
|
|
287
|
-
@total = @response['total']
|
|
288
|
-
SolveBio::logger.
|
|
289
|
-
debug("query response took: #{@response['took']} ms, " +
|
|
290
|
-
"total: #{@total}")
|
|
291
|
-
|
|
292
|
-
# update window
|
|
293
|
-
offset = _params[:offset] || 0
|
|
294
|
-
@results = @response['results']
|
|
295
|
-
@window = @results
|
|
296
|
-
@window_range = (offset ... offset + @results.size)
|
|
297
|
-
|
|
298
|
-
return _params, @response
|
|
299
|
-
end
|
|
300
|
-
end
|
|
301
|
-
|
|
302
|
-
class SolveBio::Query < SolveBio::PagingQuery
|
|
303
|
-
def initialize(dataset_id, params={})
|
|
304
|
-
super
|
|
305
|
-
return self
|
|
306
|
-
end
|
|
307
|
-
|
|
308
|
-
def total
|
|
309
|
-
warmup('Query total')
|
|
310
|
-
@total
|
|
311
|
-
end
|
|
312
|
-
|
|
313
|
-
def size
|
|
314
|
-
warmup('Query size')
|
|
315
|
-
[@total, @results.size].min
|
|
316
|
-
end
|
|
317
|
-
alias_method :length, :size
|
|
318
|
-
|
|
319
|
-
# "each" must be defined in an Enumerator. Allows the Query object
|
|
320
|
-
# to be an iterable. Iterates through the internal cache using a
|
|
321
|
-
# cursor.
|
|
322
|
-
def each(*pass)
|
|
323
|
-
return self unless block_given?
|
|
324
|
-
i = 0
|
|
325
|
-
while i < size and i < @limit
|
|
326
|
-
i_offset = i + @request_range.begin
|
|
327
|
-
if @window_range.include?(i_offset)
|
|
328
|
-
result_start = i_offset - @window_range.begin
|
|
329
|
-
SolveBio::logger.debug(' Query window range: [%s...%s]' %
|
|
330
|
-
[result_start, result_start + 1])
|
|
331
|
-
else
|
|
332
|
-
SolveBio::logger.debug('executing query. offset/limit: %6d/%d' %
|
|
333
|
-
[i_offset, @limit])
|
|
334
|
-
execute({:offset => i_offset, :limit => @limit})
|
|
335
|
-
result_start = i % @limit
|
|
336
|
-
end
|
|
337
|
-
yield @results[result_start]
|
|
338
|
-
i += 1
|
|
339
|
-
end
|
|
340
|
-
return self
|
|
341
|
-
end
|
|
342
|
-
|
|
343
|
-
def [](key)
|
|
344
|
-
# Note: super does other parameter checks.
|
|
345
|
-
if key.kind_of?(Fixnum) and key >= @window_range.end
|
|
346
|
-
raise IndexError, "Invalid index #{key} >= #{@window_range.end}"
|
|
347
|
-
end
|
|
348
|
-
super[key]
|
|
349
|
-
# FIXME: Dunno why the above isn't enough.
|
|
350
|
-
@results[key]
|
|
351
|
-
end
|
|
352
|
-
end
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
# BatchQuery accepts a list of Query objects and executes them
|
|
356
|
-
# in a single request to /v1/batch_query.
|
|
357
|
-
class SolveBio::BatchQuery
|
|
358
|
-
# Expects a list of Query objects.
|
|
359
|
-
def initialize(queries)
|
|
360
|
-
unless queries.kind_of?(Array)
|
|
361
|
-
queries = [queries]
|
|
362
|
-
end
|
|
363
|
-
|
|
364
|
-
@queries = queries
|
|
365
|
-
end
|
|
366
|
-
|
|
367
|
-
def build_query
|
|
368
|
-
query = {:queries => []}
|
|
369
|
-
|
|
370
|
-
@queries.each do |i|
|
|
371
|
-
q = i.build_query
|
|
372
|
-
q.merge!(:dataset => i.dataset_id)
|
|
373
|
-
query[:queries] << q
|
|
374
|
-
end
|
|
375
|
-
|
|
376
|
-
return query
|
|
377
|
-
end
|
|
378
|
-
|
|
379
|
-
def execute(params={})
|
|
380
|
-
_params = build_query()
|
|
381
|
-
_params.merge!(params)
|
|
382
|
-
response = SolveBio::Client.client.post('/v1/batch_query', _params)
|
|
383
|
-
return response
|
|
384
|
-
end
|
|
385
|
-
end
|
|
386
|
-
|
|
387
|
-
# Demo/test code
|
|
388
|
-
if __FILE__ == $0
|
|
389
|
-
if SolveBio::api_key
|
|
390
|
-
test_dataset_name = 'ClinVar/2.0.0-1/Variants'
|
|
391
|
-
require_relative 'solvebio'
|
|
392
|
-
require_relative 'errors'
|
|
393
|
-
dataset = SolveBio::Dataset.retrieve(test_dataset_name)
|
|
394
|
-
|
|
395
|
-
# # A filter
|
|
396
|
-
# limit = 5
|
|
397
|
-
# results = dataset.query({:paging=>false, :limit => limit}).
|
|
398
|
-
# filter({:alternate_alleles => nil})
|
|
399
|
-
# puts results.size
|
|
400
|
-
|
|
401
|
-
limit = 2
|
|
402
|
-
# results = dataset.query({:limit => limit, :paging =>false})
|
|
403
|
-
# puts results.size
|
|
404
|
-
# results.each_with_index { |val, i|
|
|
405
|
-
# puts "#{i}: #{val}"
|
|
406
|
-
# }
|
|
407
|
-
# puts "#{limit-1}: #{results[limit-1]}"
|
|
408
|
-
results = dataset.query({:limit => limit, :paging=>true})
|
|
409
|
-
# puts results.size
|
|
410
|
-
puts results.to_s
|
|
411
|
-
else
|
|
412
|
-
puts 'Set SolveBio::api_key to run demo'
|
|
413
|
-
end
|
|
414
|
-
end
|