solvebio 1.6.1 → 1.7.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (119) hide show
  1. data/.bumpversion.cfg +6 -0
  2. data/.gitignore +5 -4
  3. data/.travis.yml +1 -1
  4. data/Gemfile +3 -0
  5. data/README.md +34 -34
  6. data/Rakefile +1 -18
  7. data/bin/solvebio.rb +14 -16
  8. data/installer +64 -0
  9. data/lib/solvebio.rb +50 -11
  10. data/lib/solvebio/acccount.rb +4 -0
  11. data/lib/solvebio/annotation.rb +11 -0
  12. data/lib/solvebio/api_operations.rb +147 -0
  13. data/lib/solvebio/api_resource.rb +32 -0
  14. data/lib/solvebio/cli.rb +75 -0
  15. data/lib/solvebio/cli/auth.rb +106 -0
  16. data/lib/solvebio/cli/credentials.rb +54 -0
  17. data/lib/{cli → solvebio/cli}/irb.rb +0 -23
  18. data/lib/solvebio/cli/irbrc.rb +48 -0
  19. data/lib/solvebio/cli/tutorial.rb +12 -0
  20. data/lib/solvebio/client.rb +149 -0
  21. data/lib/solvebio/dataset.rb +60 -0
  22. data/lib/solvebio/dataset_field.rb +12 -0
  23. data/lib/solvebio/depository.rb +38 -0
  24. data/lib/solvebio/depository_version.rb +40 -0
  25. data/lib/solvebio/errors.rb +64 -0
  26. data/lib/solvebio/filter.rb +315 -0
  27. data/lib/solvebio/list_object.rb +73 -0
  28. data/lib/solvebio/locale.rb +43 -0
  29. data/lib/solvebio/query.rb +341 -0
  30. data/lib/solvebio/sample.rb +54 -0
  31. data/lib/solvebio/singleton_api_resource.rb +25 -0
  32. data/lib/solvebio/solve_object.rb +164 -0
  33. data/lib/solvebio/tabulate.rb +589 -0
  34. data/lib/solvebio/user.rb +4 -0
  35. data/lib/solvebio/util.rb +59 -0
  36. data/lib/solvebio/version.rb +3 -0
  37. data/solvebio.gemspec +10 -18
  38. data/test/helper.rb +6 -2
  39. data/test/solvebio/data/.gitignore +1 -0
  40. data/test/solvebio/data/.netrc +6 -0
  41. data/test/{data → solvebio/data}/netrc-save +0 -0
  42. data/test/solvebio/data/sample.vcf.gz +0 -0
  43. data/test/solvebio/data/test_creds +3 -0
  44. data/test/solvebio/test_annotation.rb +45 -0
  45. data/test/solvebio/test_client.rb +29 -0
  46. data/test/solvebio/test_conversion.rb +14 -0
  47. data/test/solvebio/test_credentials.rb +67 -0
  48. data/test/solvebio/test_dataset.rb +52 -0
  49. data/test/solvebio/test_depository.rb +24 -0
  50. data/test/solvebio/test_depositoryversion.rb +22 -0
  51. data/test/solvebio/test_error.rb +31 -0
  52. data/test/solvebio/test_filter.rb +86 -0
  53. data/test/solvebio/test_query.rb +282 -0
  54. data/test/solvebio/test_query_batch.rb +38 -0
  55. data/test/solvebio/test_query_init.rb +30 -0
  56. data/test/solvebio/test_query_tabulate.rb +73 -0
  57. data/test/solvebio/test_ratelimit.rb +31 -0
  58. data/test/solvebio/test_resource.rb +29 -0
  59. data/test/solvebio/test_sample_access.rb +60 -0
  60. data/test/solvebio/test_sample_download.rb +20 -0
  61. data/test/solvebio/test_tabulate.rb +129 -0
  62. data/test/solvebio/test_util.rb +39 -0
  63. metadata +100 -85
  64. data/Makefile +0 -17
  65. data/demo/README.md +0 -14
  66. data/demo/cheatsheet.rb +0 -31
  67. data/demo/dataset/facets.rb +0 -13
  68. data/demo/dataset/field.rb +0 -13
  69. data/demo/depository/README.md +0 -24
  70. data/demo/depository/all.rb +0 -13
  71. data/demo/depository/retrieve.rb +0 -13
  72. data/demo/depository/versions-all.rb +0 -13
  73. data/demo/query/query-filter.rb +0 -30
  74. data/demo/query/query.rb +0 -13
  75. data/demo/query/range-filter.rb +0 -18
  76. data/demo/test-api.rb +0 -98
  77. data/lib/cli/auth.rb +0 -122
  78. data/lib/cli/help.rb +0 -13
  79. data/lib/cli/irbrc.rb +0 -54
  80. data/lib/cli/options.rb +0 -75
  81. data/lib/client.rb +0 -154
  82. data/lib/credentials.rb +0 -67
  83. data/lib/errors.rb +0 -81
  84. data/lib/filter.rb +0 -312
  85. data/lib/locale.rb +0 -47
  86. data/lib/main.rb +0 -46
  87. data/lib/query.rb +0 -414
  88. data/lib/resource/annotation.rb +0 -23
  89. data/lib/resource/apiresource.rb +0 -241
  90. data/lib/resource/dataset.rb +0 -91
  91. data/lib/resource/datasetfield.rb +0 -37
  92. data/lib/resource/depository.rb +0 -50
  93. data/lib/resource/depositoryversion.rb +0 -69
  94. data/lib/resource/main.rb +0 -123
  95. data/lib/resource/sample.rb +0 -75
  96. data/lib/resource/solveobject.rb +0 -122
  97. data/lib/resource/user.rb +0 -5
  98. data/lib/tabulate.rb +0 -706
  99. data/lib/util.rb +0 -29
  100. data/test/Makefile +0 -9
  101. data/test/data/sample.vcf.gz +0 -0
  102. data/test/test-annotation.rb +0 -46
  103. data/test/test-auth.rb +0 -58
  104. data/test/test-client.rb +0 -27
  105. data/test/test-conversion.rb +0 -13
  106. data/test/test-dataset.rb +0 -42
  107. data/test/test-depository.rb +0 -35
  108. data/test/test-error.rb +0 -36
  109. data/test/test-filter.rb +0 -70
  110. data/test/test-netrc.rb +0 -52
  111. data/test/test-query-batch.rb +0 -40
  112. data/test/test-query-init.rb +0 -29
  113. data/test/test-query-paging.rb +0 -102
  114. data/test/test-query.rb +0 -71
  115. data/test/test-resource.rb +0 -40
  116. data/test/test-sample-access.rb +0 -59
  117. data/test/test-sample-download.rb +0 -20
  118. data/test/test-tabulate.rb +0 -131
  119. data/test/test-util.rb +0 -42
@@ -0,0 +1,73 @@
1
+ module SolveBio
2
+ class ListObject < SolveObject
3
+
4
+ def [](k)
5
+ case k
6
+ when String, Symbol
7
+ super
8
+ else
9
+ raise ArgumentError.new("ListObject types only support String keys. Try: #data[#{k.inspect}])")
10
+ end
11
+ end
12
+
13
+ def retrieve(id)
14
+ response = Client.request('get', "#{url}/#{id}")
15
+ Util.to_solve_object(response)
16
+ end
17
+
18
+ def all(params={})
19
+ resp = Client.request('get', url, {:params => params})
20
+ Util.to_solve_object(resp)
21
+ end
22
+
23
+ def create(params={})
24
+ resp = Client.request('post', url, {:params => params})
25
+ Util.to_solve_object(resp)
26
+ end
27
+
28
+ def next_page(params={})
29
+ if self.links.next
30
+ resp = Client.request('get', self.links.next, {:params => params})
31
+ Util.to_solve_object(resp)
32
+ end
33
+ return nil
34
+ end
35
+
36
+ def prev_page(params={})
37
+ if self.links.prev
38
+ resp = Client.request('get', self.links.prev, {:params => params})
39
+ Util.to_solve_object(resp)
40
+ end
41
+ return nil
42
+ end
43
+
44
+ def at(i)
45
+ self.to_a[i]
46
+ end
47
+
48
+ def to_a
49
+ return Util.to_solve_object(self.data)
50
+ end
51
+
52
+ def each(*pass)
53
+ return self unless block_given?
54
+ i = 0
55
+ ary = self.dup
56
+ done = false
57
+ until done
58
+ if i >= ary.data.size
59
+ ary = next_page
60
+ break unless ary
61
+ i = 0
62
+ end
63
+ yield(ary.at(i))
64
+ i += 1
65
+ end
66
+ return self
67
+ end
68
+
69
+ def first
70
+ self.data[0]
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,43 @@
1
+ module SolveBio
2
+ module Locale
3
+ # Used only if r18n-core is not around
4
+ @thousands_sep = ','
5
+ @locale = ENV['LANG'] || ENV['LC_NUMERIC'] || 'en_US.UTF-8'
6
+ def thousands_sep
7
+ @thousands_sep
8
+ end
9
+ def thousands_sep=(value)
10
+ @thousands_sep = value
11
+ end
12
+
13
+ begin
14
+ old_verbose = $VERBOSE
15
+ $VERBOSE = false
16
+ require 'r18n-core'
17
+ R18n.set(@locale)
18
+ $VERBOSE = old_verbose
19
+ have_r18n = true
20
+ rescue LoadError
21
+ have_r18n = false
22
+ end
23
+ if have_r18n
24
+ def pretty_int(num)
25
+ R18n::l(num)
26
+ end
27
+ else
28
+ def pretty_int(num)
29
+ num.to_s.reverse.scan(/\d{1,3}/).join(@thousands_sep).reverse
30
+ end
31
+ end
32
+
33
+ module_function :pretty_int
34
+ end
35
+ end
36
+
37
+ class Fixnum
38
+ include SolveBio::Locale
39
+
40
+ def pretty_int
41
+ SolveBio::Locale.pretty_int(self)
42
+ end
43
+ end
@@ -0,0 +1,341 @@
1
+ # -*- coding: utf-8 -*-
2
+ module SolveBio
3
+ class Query
4
+ # A Query API request wrapper that generates a request from Filter
5
+ # objects, and can iterate through streaming result sets.
6
+ include Enumerable
7
+
8
+ # 2**62 - 1 fits Rubywise into a 64-bit Fixnum
9
+ INT_MAX ||= 4_611_686_018_427_387_903
10
+
11
+ # The maximum number of results fetched in one go. Note however
12
+ # that iterating over a query can cause more fetches.
13
+ DEFAULT_PAGE_SIZE ||= 100
14
+
15
+ attr_reader :dataset_id
16
+ attr_accessor :filters
17
+ attr_accessor :limit
18
+ attr_accessor :page_size
19
+ attr_accessor :slice
20
+ attr_reader :response
21
+ attr_reader :page_offset
22
+
23
+ # Creates a new Query object.
24
+ #
25
+ # Parameters:
26
+ # - `dataset_id`: Unique ID of dataset to query.
27
+ # - `genome_build`: The genome build to use for the query.
28
+ # - `fields` (optional): List of specific fields to retrieve.
29
+ # - `filters` (optional): List of filter objects.
30
+ # - `limit` (optional): Maximum number of query results to return.
31
+ # - `page_size` (optional): Max number of results to fetch per query page.
32
+ def initialize(dataset_id, params={})
33
+ unless dataset_id.is_a?(Fixnum) or dataset_id.respond_to?(:to_str)
34
+ raise TypeError, "'dataset_id' parameter must an Integer or String"
35
+ end
36
+
37
+ @dataset_id = dataset_id
38
+ @data_url = params[:data_url] || "/v1/datasets/#{dataset_id}/data"
39
+ @genome_build = params[:genome_build]
40
+ @fields = params[:fields]
41
+ @filters = params[:filters].kind_of?(SolveBio::Filter) ? params[:filters].filters : (params[:filters] || [])
42
+
43
+ @response = nil
44
+ # Limit defines the total number of results that will be returned
45
+ # from a query involving 1 or more pagination requests.
46
+ @limit = params[:limit] || INT_MAX
47
+ # Page limit and page offset are the low level API limit and offset params.
48
+ # page_offset may be changed periodically during sequential pagination requests.
49
+ @page_size = params[:page_size] || DEFAULT_PAGE_SIZE
50
+ # Page offset can only be set by execute()
51
+ # It always contains the current absolute offset contained in the buffer.
52
+ @page_offset = nil
53
+ # @slice is set to tell the Query object that is being sliced and "def each" should not
54
+ # reset the page_offset to 0 before iterating.
55
+ @slice = nil
56
+
57
+ begin
58
+ @limit = Integer(@limit)
59
+ raise RangeError if @limit < 0
60
+ rescue
61
+ raise TypeError, "'limit' parameter must an Integer >= 0"
62
+ end
63
+
64
+ begin
65
+ @page_size = Integer(@page_size)
66
+ raise RangeError if @page_size <= 0
67
+ rescue
68
+ raise TypeError, "'page_size' parameter must an Integer > 0"
69
+ end
70
+
71
+ self
72
+ end
73
+
74
+ def clone(filters=[])
75
+ q = Query.new(@dataset_id, {
76
+ :data_url => @data_url,
77
+ :genome_build => @genome_build,
78
+ :fields => @fields,
79
+ :limit => @limit,
80
+ :page_size => @page_size
81
+ })
82
+
83
+ q.filters += @filters unless @filters.empty?
84
+ q.filters += filters unless filters.empty?
85
+ q
86
+ end
87
+
88
+ # Returns this Query instance with the query args combined with
89
+ # existing set with AND.
90
+ #
91
+ # kwargs are simply passed to a new SolveBio::Filter object and
92
+ # combined to any other filters with AND.
93
+ #
94
+ # By default, everything is combined using AND. If you provide
95
+ # multiple filters in a single filter call, those are ANDed
96
+ # together. If you provide multiple filters in multiple filter
97
+ # calls, those are ANDed together.
98
+ #
99
+ # If you want something different, use the F class which supports
100
+ # ``&`` (and), ``|`` (or) and ``~`` (not) operators. Then call
101
+ # filter once with the resulting Filter instance.
102
+ def filter(params={}, conn=:and)
103
+ return clone(Filter.new(params, conn).filters)
104
+ end
105
+
106
+ # Shortcut to do range queries on supported datasets.
107
+ def range(chromosome, start, stop, exact=false)
108
+ return clone(GenomicFilter.new(chromosome, start, stop, exact).filters)
109
+ end
110
+
111
+ # Shortcut to do a single position filter on genomic datasets.
112
+ def position(chromosome, position, exact=false)
113
+ return clone(GenomicFilter.new(chromosome, position, position, exact).filters)
114
+ end
115
+
116
+ # Returns the total number of results in the result-set.
117
+ # The count is dependent on the filters, but independent of any limit.
118
+ # It is like SQL:
119
+ # SELECT COUNT(*) FROM <depository> [WHERE condition].
120
+ # See also size() a function that is dependent on limit.
121
+ # Requires at least one request.
122
+ def count
123
+ execute unless @response
124
+ @response[:total]
125
+ end
126
+ alias_method(:total, :count)
127
+
128
+ # Returns the total number of results that will be retrieved
129
+ # given @limit set by the user.
130
+ # Requires at least one API request to retrieve the total count.
131
+ #
132
+ # In SQL it is like:
133
+ # SELECT COUNT(*) FROM (
134
+ # SELECT * FROM <table> [WHERE condition] [LIMIT number]
135
+ # )
136
+ def size
137
+ [@limit, count].min
138
+ end
139
+ alias_method(:length, :size)
140
+
141
+ def empty?
142
+ return size == 0
143
+ end
144
+
145
+ # Convert SolveBio::QueryPaging object to a String type
146
+ def to_s
147
+ if @limit == 0 || count == 0
148
+ return 'Query returned 0 results'
149
+ end
150
+
151
+ result = Tabulate.tabulate(buffer[0], ['Fields', 'Data'], ['right', 'left'], true)
152
+ return "\n#{result}\n\n... #{(size - 1).pretty_int} more results."
153
+ end
154
+
155
+ # Convert SolveBio::QueryPaging object to a Hash type
156
+ def to_h
157
+ self[0]
158
+ end
159
+
160
+ # Retrieve an item or range from the set of results
161
+ def [](key)
162
+ unless [Range, Fixnum].member?(key.class)
163
+ raise TypeError, "Expecting index value to be a Range or Fixnum; is #{key.class}"
164
+ end
165
+
166
+ if key.kind_of?(Range)
167
+ start, stop = key.begin, key.end
168
+
169
+ # Reverse ranges aren't supported
170
+ return [] if (start > stop)
171
+
172
+ if start < 0 or stop < 0
173
+ raise IndexError, 'Negative indexing is not supported'
174
+ end
175
+
176
+ # If a slice is already set, the new slice should be relative to it
177
+ if @slice
178
+ start += @slice.begin
179
+ stop = [@slice.begin + stop, @slice.end].min
180
+ # Return nil if the user requests something outside the current slice
181
+ return nil if start >= @slice.end
182
+ end
183
+
184
+ # Does the current buffer contain the desired range?
185
+ if buffer && start >= @page_offset && stop < (@page_offset + buffer.length)
186
+ # Cursor's buffer has the items already
187
+ # Avoid a query and just return the buffered items.
188
+ # Calculate the offsets relative to the buffer.
189
+ start = start - @page_offset
190
+ stop = stop - @page_offset - 1
191
+ return buffer[start..stop]
192
+ end
193
+
194
+ # We need to make a few requests to get the data between start and stop.
195
+ # We should respect the user's @limit (used by each()) if it is smaller than the given Range.
196
+ # To prevent the state of page_size and page_offset from being stored, we'll clone this object first.
197
+ q = clone()
198
+ q.limit = [stop-start, @limit].min
199
+ # Setting range will signal to "each" which page_offset to start at.
200
+ q.slice = Range.new(start, stop)
201
+ return q
202
+ end
203
+
204
+ if key < 0
205
+ raise IndexError, 'Negative indexing is not supported'
206
+ end
207
+
208
+ # If a slice already exists, the key is relative to that slice
209
+ if @slice
210
+ key = key + @slice.begin
211
+ # Return nil if the user requests something outside the slice
212
+ return nil if key >= @slice.end
213
+ end
214
+
215
+ # If the value at key is already in the buffer, return it.
216
+ if buffer && key >= @page_offset && key < (@page_offset + buffer.length)
217
+ return buffer[key - @page_offset]
218
+ end
219
+
220
+ # Otherwise, use key as the new page_offset and fetch a new page of results
221
+ q = clone()
222
+ q.limit = [1, @limit].min
223
+ q.execute(key)
224
+ return q.buffer[0]
225
+ end
226
+
227
+ def each(*args)
228
+ return self unless block_given?
229
+
230
+ # When calling each, we always reset the offset and buffer, unless called from
231
+ # the slice function (def []).
232
+ if @slice
233
+ execute(@slice.begin)
234
+ else
235
+ execute(0)
236
+ end
237
+
238
+ # Keep track when iterating through the buffer
239
+ buffer_idx = 0
240
+ # This will yield a max of @limit or count() results, whichever comes first.
241
+ 0.upto(size - 1).each do |i|
242
+ # i is the current index within the result-set.
243
+ # @page_offset + i is the current absolute index within the result-set.
244
+
245
+ if buffer_idx == buffer.length
246
+ # No more buffer! Get more results
247
+ execute(@page_offset + buffer_idx)
248
+ # Reset the buffer index.
249
+ buffer_idx = 0
250
+ end
251
+
252
+ yield buffer[buffer_idx]
253
+ buffer_idx += 1
254
+ end
255
+ end
256
+
257
+ def to_range(range_or_idx)
258
+ return range_or_idx.kind_of?(Range) ? range_or_idx :
259
+ (range_or_idx..range_or_idx + 1)
260
+ end
261
+
262
+ def buffer
263
+ return nil unless @response
264
+ @response[:results]
265
+ end
266
+
267
+ def build_query
268
+ q = {}
269
+
270
+ if @filters
271
+ filters = Filter.process_filters(@filters)
272
+ if filters.size > 1
273
+ q[:filters] = [{:and => filters}]
274
+ else
275
+ q[:filters] = filters
276
+ end
277
+ end
278
+
279
+ q[:fields] = @fields if @fields
280
+ q[:genome_build] = @genome_build if @genome_build
281
+
282
+ return q
283
+ end
284
+
285
+ def execute(offset=0)
286
+ # Executes the current query.
287
+ params = build_query()
288
+
289
+ # Always set the page offset before querying.
290
+ @page_offset = offset
291
+
292
+ params.merge!(
293
+ :offset => @page_offset,
294
+ # The user's limit trumps the page limit if it's smaller
295
+ :limit => [@page_size, @limit].min
296
+ )
297
+
298
+ SolveBio::logger.debug("Executing query with offset: #{params[:offset]} limit: #{params[:limit]}")
299
+ # TODO: handle request errors and reset page_offset
300
+ @response = Client.post(@data_url, params)
301
+ SolveBio::logger.debug("Query response took #{@response[:took]}ms, buffer size: #{buffer.length}, total: #{@response[:total]}")
302
+ return params, @response
303
+ end
304
+
305
+ end
306
+
307
+ # BatchQuery accepts a list of Query objects and executes them
308
+ # in a single request to /v1/batch_query.
309
+ class BatchQuery
310
+ # Expects a list of Query objects.
311
+ def initialize(queries)
312
+ unless queries.kind_of?(Array)
313
+ queries = [queries]
314
+ end
315
+
316
+ @queries = queries
317
+ end
318
+
319
+ def build_query
320
+ query = {:queries => []}
321
+
322
+ @queries.each do |i|
323
+ q = i.build_query
324
+ q.merge!(
325
+ :dataset => i.dataset_id,
326
+ :limit => [i.page_size, i.limit].min
327
+ )
328
+ query[:queries] << q
329
+ end
330
+
331
+ return query
332
+ end
333
+
334
+ def execute(params={})
335
+ _params = build_query()
336
+ _params.merge!(params)
337
+ response = Client.post('/v1/batch_query', _params)
338
+ return response
339
+ end
340
+ end
341
+ end