solvebio 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. data/.gitignore +7 -0
  2. data/.travis.yml +13 -0
  3. data/Gemfile +4 -0
  4. data/Gemspec +3 -0
  5. data/LICENSE +21 -0
  6. data/Makefile +17 -0
  7. data/README.md +64 -0
  8. data/Rakefile +59 -0
  9. data/bin/solvebio.rb +36 -0
  10. data/demo/README.md +14 -0
  11. data/demo/dataset/facets.rb +13 -0
  12. data/demo/dataset/field.rb +13 -0
  13. data/demo/depository/README.md +24 -0
  14. data/demo/depository/all.rb +13 -0
  15. data/demo/depository/retrieve.rb +13 -0
  16. data/demo/depository/versions-all.rb +13 -0
  17. data/demo/query/query-filter.rb +30 -0
  18. data/demo/query/query.rb +13 -0
  19. data/demo/query/range-filter.rb +18 -0
  20. data/demo/test-api.rb +98 -0
  21. data/lib/apiresource.rb +130 -0
  22. data/lib/cli/auth.rb +122 -0
  23. data/lib/cli/help.rb +13 -0
  24. data/lib/cli/irb.rb +58 -0
  25. data/lib/cli/irbrc.rb +53 -0
  26. data/lib/cli/options.rb +75 -0
  27. data/lib/client.rb +152 -0
  28. data/lib/credentials.rb +67 -0
  29. data/lib/errors.rb +81 -0
  30. data/lib/filter.rb +312 -0
  31. data/lib/help.rb +46 -0
  32. data/lib/locale.rb +47 -0
  33. data/lib/main.rb +37 -0
  34. data/lib/query.rb +415 -0
  35. data/lib/resource.rb +414 -0
  36. data/lib/solvebio.rb +14 -0
  37. data/lib/solveobject.rb +101 -0
  38. data/lib/tabulate.rb +706 -0
  39. data/solvebio.gemspec +75 -0
  40. data/test/data/netrc-save +6 -0
  41. data/test/helper.rb +3 -0
  42. data/test/test-auth.rb +54 -0
  43. data/test/test-client.rb +27 -0
  44. data/test/test-error.rb +36 -0
  45. data/test/test-filter.rb +70 -0
  46. data/test/test-netrc.rb +42 -0
  47. data/test/test-query-batch.rb +60 -0
  48. data/test/test-query-init.rb +29 -0
  49. data/test/test-query-paging.rb +123 -0
  50. data/test/test-query.rb +88 -0
  51. data/test/test-resource.rb +47 -0
  52. data/test/test-solveobject.rb +27 -0
  53. data/test/test-tabulate.rb +127 -0
  54. metadata +158 -0
data/lib/help.rb ADDED
@@ -0,0 +1,46 @@
1
+ #!/usr/bin/env ruby
2
+ # -*- coding: utf-8 -*-
3
+ require 'uri'
4
+ require_relative 'main'
5
+
6
+ module SolveBio::HelpableAPIResource
7
+
8
+ attr_reader :have_launchy
9
+
10
+ @@have_launchy = false
11
+ begin
12
+ @@have_launchy = require 'launchy'
13
+ rescue LoadError
14
+ end
15
+
16
+ def self.included base
17
+ base.send :include, InstanceMethods
18
+ end
19
+
20
+ module InstanceMethods
21
+ def help
22
+ open_help(self['full_name'])
23
+ end
24
+ end
25
+
26
+ def open_help(path)
27
+ url = URI::join('https://www.solvebio.com/', path)
28
+ if @@have_launchy
29
+ Launchy.open(url)
30
+ else
31
+ puts('The SolveBio Ruby client needs the "launchy" gem to ' +
32
+ "open help url: #{url.to_s}")
33
+ end
34
+ end
35
+ end
36
+
37
+ # Demo code
38
+ if __FILE__ == $0
39
+ include SolveBio::HelpableAPIResource
40
+ if @@have_launchy
41
+ open_help('docs')
42
+ sleep 1
43
+ else
44
+ puts "Don't have launchy"
45
+ end
46
+ end
data/lib/locale.rb ADDED
@@ -0,0 +1,47 @@
1
+ require_relative 'main'
2
+ module SolveBio::Locale
3
+
4
+ # Used only if r18n-core is not around
5
+ @thousands_sep = ','
6
+ @locale = ENV['LANG'] || ENV['LC_NUMERIC'] || 'en_US.UTF-8'
7
+ def thousands_sep
8
+ @thousands_sep
9
+ end
10
+ def thousands_sep=(value)
11
+ @thousands_sep = value
12
+ end
13
+
14
+ begin
15
+ old_verbose = $VERBOSE
16
+ $VERBOSE = false
17
+ require 'r18n-core'
18
+ R18n.set(@locale)
19
+ $VERBOSE = old_verbose
20
+ have_r18n = true
21
+ rescue LoadError
22
+ have_r18n = false
23
+ end
24
+ if have_r18n
25
+ def pretty_int(num)
26
+ R18n::l(num)
27
+ end
28
+ else
29
+ def pretty_int(num)
30
+ num.to_s.reverse.scan(/\d{1,3}/).join(@thousands_sep).reverse
31
+ end
32
+ end
33
+
34
+ module_function :pretty_int
35
+
36
+ end
37
+
38
+ class Fixnum
39
+ include SolveBio::Locale
40
+ def pretty_int
41
+ SolveBio::Locale.pretty_int(self)
42
+ end
43
+ end
44
+
45
+ if __FILE__ == $0
46
+ puts 10000.pretty_int
47
+ end
data/lib/main.rb ADDED
@@ -0,0 +1,37 @@
1
+ # -*- coding: utf-8 -*-
2
+ # SolveBio Ruby Client
3
+ # ~~~~~~~~~~~~~~~~~~~
4
+ #
5
+ # This is the Ruby client & library for the SolveBio API.
6
+ #
7
+ # Have questions or comments? email us at: contact@solvebio.com
8
+
9
+ require 'logger'
10
+
11
+ module SolveBio
12
+
13
+ VERSION = '1.5.0'
14
+ @api_key = ENV['SOLVEBIO_API_KEY']
15
+ @logger = Logger.new('/tmp/solvebio.log')
16
+ API_HOST = ENV['SOLVEBIO_API_HOST'] || 'https://api.solvebio.com'
17
+
18
+ # Config info in reports and requests. Encapsulate more?
19
+ RUBY_VERSION = RbConfig::CONFIG['RUBY_PROGRAM_VERSION']
20
+ RUBY_IMPLEMENTATION = RbConfig::CONFIG['RUBY_SO_NAME']
21
+ #PLATFORM = ???
22
+ #PROCESSOR = ???
23
+ ARCHITECTURE = RbConfig::CONFIG['arch']
24
+
25
+ def logger
26
+ @logger
27
+ end
28
+ def api_key
29
+ @api_key
30
+ end
31
+ def api_key=(value)
32
+ @api_key = value
33
+ end
34
+
35
+ module_function :logger, :api_key, :api_key=
36
+
37
+ end
data/lib/query.rb ADDED
@@ -0,0 +1,415 @@
1
+ # -*- coding: utf-8 -*-
2
+ require 'pp'
3
+ require_relative 'client'
4
+ require_relative 'filter'
5
+ require_relative 'locale'
6
+ require_relative 'tabulate'
7
+
8
+ # A Query API request wrapper that generates a request from Filter
9
+ # objects, and can iterate through streaming result sets.
10
+ class SolveBio::PagingQuery
11
+
12
+ include Enumerable
13
+
14
+ MAXIMUM_LIMIT ||= 100
15
+
16
+ attr_accessor :filters
17
+ attr_reader :dataset_id
18
+
19
+ def initialize(dataset_id, params={})
20
+ @dataset_id = dataset_id
21
+
22
+ begin
23
+ @limit = Integer(dataset_id)
24
+ rescue
25
+ raise TypeError, "'dataset_id' parameter must an Integer"
26
+ end
27
+
28
+ @data_url = "/v1/datasets/#{dataset_id}/data"
29
+
30
+ @total = @results = @response = nil
31
+ reset_range_window
32
+
33
+ # results per request
34
+ @limit = MAXIMUM_LIMIT
35
+ begin
36
+ @limit = Integer(params[:limit])
37
+ rescue
38
+ raise TypeError, "'limit' parameter must an Integer >= 0"
39
+ end if params.member?(:limit)
40
+
41
+ @result_class = params[:result_class] || Hash
42
+ @debug = params[:debug] || false
43
+ @fields = params[:fields]
44
+ @filters = []
45
+
46
+ # parameter error checking
47
+ if @limit < 0
48
+ raise RangeError, "'limit' parameter must be >= 0"
49
+ end
50
+ self
51
+ end
52
+
53
+ def total
54
+ warmup('Query total')
55
+ @total = @response["total"]
56
+ end
57
+
58
+ def clone(filters=[])
59
+ result =
60
+ initialize(@dataset_id,
61
+ {
62
+ :limit => @limit,
63
+ :total => total, # This causes an HTTP request
64
+ :result_class => @result_class,
65
+ :debug => @debug,
66
+ :fields => @fields
67
+ })
68
+
69
+ result.filters += @filters unless @filters.empty?
70
+ result.filters += filters unless filters.empty?
71
+
72
+ return result
73
+ end
74
+
75
+ # Returns this Query instance with the query args combined with
76
+ # existing set with AND.
77
+ #
78
+ # kwargs are simply passed to a new SolveBio::Filter object and
79
+ # combined to any other filters with AND.
80
+ #
81
+ # By default, everything is combined using AND. If you provide
82
+ # multiple filters in a single filter call, those are ANDed
83
+ # together. If you provide multiple filters in multiple filter
84
+ # calls, those are ANDed together.
85
+ #
86
+ # If you want something different, use the F class which supports
87
+ # ``&`` (and), ``|`` (or) and ``~`` (not) operators. Then call
88
+ # filter once with the resulting Filter instance.
89
+ def filter(params={}, conn=:and)
90
+ if filters.kind_of?(SolveBio::Filter)
91
+ return Marshal.load(Marshal.dump(params.filters))
92
+ else
93
+ return clone(SolveBio::Filter.new(params, conn).filters)
94
+ end
95
+ end
96
+
97
+ # Shortcut to do range queries on supported datasets.
98
+ def range(chromosome, start, last, strand=nil, overlap=true)
99
+ # TODO: ensure dataset supports range queries?
100
+ return self.
101
+ clone([self.new(chromosome, start, last, strand, overlap)])
102
+ end
103
+
104
+ def size
105
+ warmup('PagingQuery size')
106
+ return @total
107
+ end
108
+ alias_method :length, :size
109
+
110
+ def empty?
111
+ warmup('empty?')
112
+ return @total == 0
113
+ end
114
+
115
+ # Convert SolveBio::QueryPaging object to a String type
116
+ def to_s
117
+ if total == 0 or @limit == 0
118
+ return 'query returned 0 results'
119
+ end
120
+
121
+ msg =
122
+ "\n%s\n\n... %s more results." %
123
+ [SolveBio::Tabulate.tabulate(self[0].to_a,
124
+ ['Fields', 'Data'],
125
+ ['right', 'left']),
126
+ (@total - 1).pretty_int]
127
+ return msg
128
+ end
129
+
130
+ def to_pp
131
+ if total == 0 or @limit == 0
132
+ return 'query returned 0 results'
133
+ end
134
+ msg = "\n#{self[0].pretty_inspect}\n" +
135
+ "\n... #{(@total-1).pretty_int} more results."
136
+ return msg
137
+ end
138
+
139
+ # Convert SolveBio::QueryPaging object to a Hash type
140
+ def to_h
141
+ self[0]
142
+ end
143
+
144
+ def inspect
145
+ return '<%s: @dataset_id=%s, @total=%s, @limit=%s, @debug=%s>' %
146
+ [self.class, @dataset_id, @total ? @total : '?',
147
+ @limit, @debug]
148
+ end
149
+
150
+ # warmup result set...
151
+ def warmup(what)
152
+ unless @response
153
+ SolveBio::logger.debug("warmup #{what}")
154
+ execute
155
+ end
156
+ end
157
+
158
+
159
+ # FIXME: consider creating instance variables from
160
+ # a response object and then using attr_reader to make that
161
+ # visible. This is instead of:
162
+ # # One hacky way to define attributes (methods) on an object.
163
+ # # Replaces Python's __getattr__
164
+ # def method_missing(meth, *args, &block)
165
+ # if @response.nil?
166
+ # logger.debug('warmup ([]): %s' % key)
167
+ # execute
168
+ # end
169
+
170
+ # if @response.member?(meth)
171
+ # return @response[meth]
172
+ # end
173
+
174
+ # msg = "'%s' object has no attribute '%s'" % [self.class, meth]
175
+ # raise NoMethodError, msg
176
+ # end
177
+
178
+ # Retrieve an item or range from the set of results
179
+ def [](key)
180
+ # warmup result set...
181
+ warmup("[#{key}]")
182
+
183
+ unless [Range, Fixnum].member?(key.class)
184
+ raise TypeError, "Expecting index value to be a Range or Fixnum; is #{key.class}"
185
+ end
186
+ if @limit < 0
187
+ raise IndexError, 'Indexing not supporting when limit < 0.'
188
+ end
189
+ if key.kind_of?(Range)
190
+ if key.begin < 0 or key.end < 0
191
+ raise IndexError, 'Negative indexing is not supported'
192
+ end
193
+ if key.begin > key.end
194
+ raise IndexError, 'Backwards indexing is not supported'
195
+ end
196
+ elsif key < 0
197
+ raise IndexError, 'Negative indexing is not supported'
198
+ end
199
+
200
+ # FIXME: is it right that we can assume that the results are in
201
+ # @results. Do I need another index check?
202
+
203
+ result =
204
+ if key.kind_of?(Range)
205
+ @results[(0...key.end - key.begin)]
206
+ else
207
+ @request_range = self.to_range(key)
208
+ @results[0]
209
+ end
210
+ # reset request range
211
+ @request_range = (0..Float::INFINITY)
212
+ return result
213
+ end
214
+
215
+ # "each" must be defined in an Enumerator. Allows the Query object
216
+ # to be an iterable. Iterates through the internal cache using a
217
+ # cursor.
218
+ def each(*pass)
219
+ return self unless block_given?
220
+ i = 0
221
+
222
+ @delta = @request_range.end - @request_range.begin
223
+ while i < total and i < @delta
224
+ i_offset = i + @request_range.begin
225
+ if @window_range.include?(i_offset)
226
+ result_start = i_offset - @window_range.begin
227
+ SolveBio::logger.debug(' PagingQuery window range: [%s...%s]' %
228
+ [result_start, result_start + 1])
229
+ else
230
+ SolveBio::logger.debug('executing query. offset/limit: %6d/%d' %
231
+ [i_offset, @limit])
232
+ execute({:offset => i_offset, :limit => @limit})
233
+ result_start = i % @limit
234
+ end
235
+ yield @results[result_start]
236
+ @delta = @request_range.end - @request_range.begin
237
+ i += 1
238
+ end
239
+ return self
240
+ end
241
+
242
+ # range operations
243
+ def to_range(range_or_idx)
244
+ return range_or_idx.kind_of?(Range) ? range_or_idx :
245
+ (range_or_idx..range_or_idx + 1)
246
+ end
247
+
248
+ def reset_request_range
249
+ @request_range = (0..Float::INFINITY)
250
+ end
251
+
252
+ def reset_range_window
253
+ @window = []
254
+ @window_range = (0..Float::INFINITY)
255
+ reset_request_range
256
+ end
257
+
258
+ def build_query
259
+ q = {
260
+ :limit => @limit,
261
+ :debug => @debug
262
+ }
263
+
264
+ if @filters
265
+ filters = SolveBio::Filter.process_filters(@filters)
266
+ if filters.size > 1
267
+ q[:filters] = [{:and => filters}]
268
+ else
269
+ q[:filters] = filters
270
+ end
271
+ end
272
+
273
+ if @fields
274
+ q[:fields] = @fields
275
+ end
276
+
277
+ return q
278
+ end
279
+
280
+ # Executes a query and returns the request parameters and response.
281
+ def execute(params={})
282
+ _params = build_query()
283
+ _params.merge!(params)
284
+ SolveBio::logger.debug("querying dataset: #{_params}")
285
+
286
+ @response = SolveBio::Client.client.request('post', @data_url, _params)
287
+ @total = @response['total']
288
+ SolveBio::logger.
289
+ debug("query response took: #{@response['took']} ms, " +
290
+ "total: #{@total}")
291
+
292
+ # update window
293
+ offset = _params[:offset] || 0
294
+ @results = @response['results']
295
+ @window = @results
296
+ @window_range = (offset ... offset + @results.size)
297
+
298
+ return _params, @response
299
+ end
300
+ end
301
+
302
+ class SolveBio::Query < SolveBio::PagingQuery
303
+ def initialize(dataset_id, params={})
304
+ super
305
+ return self
306
+ end
307
+
308
+ def total
309
+ warmup('Query total')
310
+ @total
311
+ end
312
+
313
+ def size
314
+ warmup('Query size')
315
+ [@total, @results.size].min
316
+ end
317
+ alias_method :length, :size
318
+
319
+ # "each" must be defined in an Enumerator. Allows the Query object
320
+ # to be an iterable. Iterates through the internal cache using a
321
+ # cursor.
322
+ def each(*pass)
323
+ return self unless block_given?
324
+ i = 0
325
+ while i < size and i < @limit
326
+ i_offset = i + @request_range.begin
327
+ if @window_range.include?(i_offset)
328
+ result_start = i_offset - @window_range.begin
329
+ SolveBio::logger.debug(' Query window range: [%s...%s]' %
330
+ [result_start, result_start + 1])
331
+ else
332
+ SolveBio::logger.debug('executing query. offset/limit: %6d/%d' %
333
+ [i_offset, @limit])
334
+ execute({:offset => i_offset, :limit => @limit})
335
+ result_start = i % @limit
336
+ end
337
+ yield @results[result_start]
338
+ i += 1
339
+ end
340
+ return self
341
+ end
342
+
343
+ def [](key)
344
+ # Note: super does other parameter checks.
345
+ if key.kind_of?(Fixnum) and key >= @window_range.end
346
+ raise IndexError, "Invalid index #{key} >= #{@window_range.end}"
347
+ end
348
+ super[key]
349
+ # FIXME: Dunno why the above isn't enough.
350
+ @results[key]
351
+ end
352
+ end
353
+
354
+
355
+ # BatchQuery accepts a list of Query objects and executes them
356
+ # in a single request to /v1/batch_query.
357
+ class SolveBio::BatchQuery
358
+ # Expects a list of Query objects.
359
+ def initialize(queries)
360
+ unless queries.kind_of?(Array)
361
+ queries = [queries]
362
+ end
363
+
364
+ @queries = queries
365
+ end
366
+
367
+ def build_query
368
+ query = {:queries => []}
369
+
370
+ @queries.each do |i|
371
+ q = i.build_query
372
+ q.merge!(:dataset => i.dataset_id)
373
+ query[:queries] << q
374
+ end
375
+
376
+ return query
377
+ end
378
+
379
+ def execute(params={})
380
+ _params = build_query()
381
+ _params.merge!(params)
382
+ response = SolveBio::Client.
383
+ client.request('post', '/v1/batch_query', _params)
384
+ return response
385
+ end
386
+ end
387
+
388
+ # Demo/test code
389
+ if __FILE__ == $0
390
+ if SolveBio::api_key
391
+ test_dataset_name = 'ClinVar/2.0.0-1/Variants'
392
+ require_relative 'solvebio'
393
+ require_relative 'errors'
394
+ dataset = SolveBio::Dataset.retrieve(test_dataset_name)
395
+
396
+ # # A filter
397
+ # limit = 5
398
+ # results = dataset.query({:paging=>false, :limit => limit}).
399
+ # filter({:alternate_alleles => nil})
400
+ # puts results.size
401
+
402
+ limit = 2
403
+ # results = dataset.query({:limit => limit, :paging =>false})
404
+ # puts results.size
405
+ # results.each_with_index { |val, i|
406
+ # puts "#{i}: #{val}"
407
+ # }
408
+ # puts "#{limit-1}: #{results[limit-1]}"
409
+ results = dataset.query({:limit => limit, :paging=>true})
410
+ # puts results.size
411
+ puts results.to_s
412
+ else
413
+ puts 'Set SolveBio::api_key to run demo'
414
+ end
415
+ end