solvebio 1.6.1 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. data/.bumpversion.cfg +6 -0
  2. data/.gitignore +5 -4
  3. data/.travis.yml +1 -1
  4. data/Gemfile +3 -0
  5. data/README.md +34 -34
  6. data/Rakefile +1 -18
  7. data/bin/solvebio.rb +14 -16
  8. data/installer +64 -0
  9. data/lib/solvebio.rb +50 -11
  10. data/lib/solvebio/acccount.rb +4 -0
  11. data/lib/solvebio/annotation.rb +11 -0
  12. data/lib/solvebio/api_operations.rb +147 -0
  13. data/lib/solvebio/api_resource.rb +32 -0
  14. data/lib/solvebio/cli.rb +75 -0
  15. data/lib/solvebio/cli/auth.rb +106 -0
  16. data/lib/solvebio/cli/credentials.rb +54 -0
  17. data/lib/{cli → solvebio/cli}/irb.rb +0 -23
  18. data/lib/solvebio/cli/irbrc.rb +48 -0
  19. data/lib/solvebio/cli/tutorial.rb +12 -0
  20. data/lib/solvebio/client.rb +149 -0
  21. data/lib/solvebio/dataset.rb +60 -0
  22. data/lib/solvebio/dataset_field.rb +12 -0
  23. data/lib/solvebio/depository.rb +38 -0
  24. data/lib/solvebio/depository_version.rb +40 -0
  25. data/lib/solvebio/errors.rb +64 -0
  26. data/lib/solvebio/filter.rb +315 -0
  27. data/lib/solvebio/list_object.rb +73 -0
  28. data/lib/solvebio/locale.rb +43 -0
  29. data/lib/solvebio/query.rb +341 -0
  30. data/lib/solvebio/sample.rb +54 -0
  31. data/lib/solvebio/singleton_api_resource.rb +25 -0
  32. data/lib/solvebio/solve_object.rb +164 -0
  33. data/lib/solvebio/tabulate.rb +589 -0
  34. data/lib/solvebio/user.rb +4 -0
  35. data/lib/solvebio/util.rb +59 -0
  36. data/lib/solvebio/version.rb +3 -0
  37. data/solvebio.gemspec +10 -18
  38. data/test/helper.rb +6 -2
  39. data/test/solvebio/data/.gitignore +1 -0
  40. data/test/solvebio/data/.netrc +6 -0
  41. data/test/{data → solvebio/data}/netrc-save +0 -0
  42. data/test/solvebio/data/sample.vcf.gz +0 -0
  43. data/test/solvebio/data/test_creds +3 -0
  44. data/test/solvebio/test_annotation.rb +45 -0
  45. data/test/solvebio/test_client.rb +29 -0
  46. data/test/solvebio/test_conversion.rb +14 -0
  47. data/test/solvebio/test_credentials.rb +67 -0
  48. data/test/solvebio/test_dataset.rb +52 -0
  49. data/test/solvebio/test_depository.rb +24 -0
  50. data/test/solvebio/test_depositoryversion.rb +22 -0
  51. data/test/solvebio/test_error.rb +31 -0
  52. data/test/solvebio/test_filter.rb +86 -0
  53. data/test/solvebio/test_query.rb +282 -0
  54. data/test/solvebio/test_query_batch.rb +38 -0
  55. data/test/solvebio/test_query_init.rb +30 -0
  56. data/test/solvebio/test_query_tabulate.rb +73 -0
  57. data/test/solvebio/test_ratelimit.rb +31 -0
  58. data/test/solvebio/test_resource.rb +29 -0
  59. data/test/solvebio/test_sample_access.rb +60 -0
  60. data/test/solvebio/test_sample_download.rb +20 -0
  61. data/test/solvebio/test_tabulate.rb +129 -0
  62. data/test/solvebio/test_util.rb +39 -0
  63. metadata +100 -85
  64. data/Makefile +0 -17
  65. data/demo/README.md +0 -14
  66. data/demo/cheatsheet.rb +0 -31
  67. data/demo/dataset/facets.rb +0 -13
  68. data/demo/dataset/field.rb +0 -13
  69. data/demo/depository/README.md +0 -24
  70. data/demo/depository/all.rb +0 -13
  71. data/demo/depository/retrieve.rb +0 -13
  72. data/demo/depository/versions-all.rb +0 -13
  73. data/demo/query/query-filter.rb +0 -30
  74. data/demo/query/query.rb +0 -13
  75. data/demo/query/range-filter.rb +0 -18
  76. data/demo/test-api.rb +0 -98
  77. data/lib/cli/auth.rb +0 -122
  78. data/lib/cli/help.rb +0 -13
  79. data/lib/cli/irbrc.rb +0 -54
  80. data/lib/cli/options.rb +0 -75
  81. data/lib/client.rb +0 -154
  82. data/lib/credentials.rb +0 -67
  83. data/lib/errors.rb +0 -81
  84. data/lib/filter.rb +0 -312
  85. data/lib/locale.rb +0 -47
  86. data/lib/main.rb +0 -46
  87. data/lib/query.rb +0 -414
  88. data/lib/resource/annotation.rb +0 -23
  89. data/lib/resource/apiresource.rb +0 -241
  90. data/lib/resource/dataset.rb +0 -91
  91. data/lib/resource/datasetfield.rb +0 -37
  92. data/lib/resource/depository.rb +0 -50
  93. data/lib/resource/depositoryversion.rb +0 -69
  94. data/lib/resource/main.rb +0 -123
  95. data/lib/resource/sample.rb +0 -75
  96. data/lib/resource/solveobject.rb +0 -122
  97. data/lib/resource/user.rb +0 -5
  98. data/lib/tabulate.rb +0 -706
  99. data/lib/util.rb +0 -29
  100. data/test/Makefile +0 -9
  101. data/test/data/sample.vcf.gz +0 -0
  102. data/test/test-annotation.rb +0 -46
  103. data/test/test-auth.rb +0 -58
  104. data/test/test-client.rb +0 -27
  105. data/test/test-conversion.rb +0 -13
  106. data/test/test-dataset.rb +0 -42
  107. data/test/test-depository.rb +0 -35
  108. data/test/test-error.rb +0 -36
  109. data/test/test-filter.rb +0 -70
  110. data/test/test-netrc.rb +0 -52
  111. data/test/test-query-batch.rb +0 -40
  112. data/test/test-query-init.rb +0 -29
  113. data/test/test-query-paging.rb +0 -102
  114. data/test/test-query.rb +0 -71
  115. data/test/test-resource.rb +0 -40
  116. data/test/test-sample-access.rb +0 -59
  117. data/test/test-sample-download.rb +0 -20
  118. data/test/test-tabulate.rb +0 -131
  119. data/test/test-util.rb +0 -42
@@ -0,0 +1,73 @@
1
+ module SolveBio
2
+ class ListObject < SolveObject
3
+
4
+ def [](k)
5
+ case k
6
+ when String, Symbol
7
+ super
8
+ else
9
+ raise ArgumentError.new("ListObject types only support String keys. Try: #data[#{k.inspect}])")
10
+ end
11
+ end
12
+
13
+ def retrieve(id)
14
+ response = Client.request('get', "#{url}/#{id}")
15
+ Util.to_solve_object(response)
16
+ end
17
+
18
+ def all(params={})
19
+ resp = Client.request('get', url, {:params => params})
20
+ Util.to_solve_object(resp)
21
+ end
22
+
23
+ def create(params={})
24
+ resp = Client.request('post', url, {:params => params})
25
+ Util.to_solve_object(resp)
26
+ end
27
+
28
+ def next_page(params={})
29
+ if self.links.next
30
+ resp = Client.request('get', self.links.next, {:params => params})
31
+ Util.to_solve_object(resp)
32
+ end
33
+ return nil
34
+ end
35
+
36
+ def prev_page(params={})
37
+ if self.links.prev
38
+ resp = Client.request('get', self.links.prev, {:params => params})
39
+ Util.to_solve_object(resp)
40
+ end
41
+ return nil
42
+ end
43
+
44
+ def at(i)
45
+ self.to_a[i]
46
+ end
47
+
48
+ def to_a
49
+ return Util.to_solve_object(self.data)
50
+ end
51
+
52
+ def each(*pass)
53
+ return self unless block_given?
54
+ i = 0
55
+ ary = self.dup
56
+ done = false
57
+ until done
58
+ if i >= ary.data.size
59
+ ary = next_page
60
+ break unless ary
61
+ i = 0
62
+ end
63
+ yield(ary.at(i))
64
+ i += 1
65
+ end
66
+ return self
67
+ end
68
+
69
+ def first
70
+ self.data[0]
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,43 @@
1
+ module SolveBio
2
+ module Locale
3
+ # Used only if r18n-core is not around
4
+ @thousands_sep = ','
5
+ @locale = ENV['LANG'] || ENV['LC_NUMERIC'] || 'en_US.UTF-8'
6
+ def thousands_sep
7
+ @thousands_sep
8
+ end
9
+ def thousands_sep=(value)
10
+ @thousands_sep = value
11
+ end
12
+
13
+ begin
14
+ old_verbose = $VERBOSE
15
+ $VERBOSE = false
16
+ require 'r18n-core'
17
+ R18n.set(@locale)
18
+ $VERBOSE = old_verbose
19
+ have_r18n = true
20
+ rescue LoadError
21
+ have_r18n = false
22
+ end
23
+ if have_r18n
24
+ def pretty_int(num)
25
+ R18n::l(num)
26
+ end
27
+ else
28
+ def pretty_int(num)
29
+ num.to_s.reverse.scan(/\d{1,3}/).join(@thousands_sep).reverse
30
+ end
31
+ end
32
+
33
+ module_function :pretty_int
34
+ end
35
+ end
36
+
37
+ class Fixnum
38
+ include SolveBio::Locale
39
+
40
+ def pretty_int
41
+ SolveBio::Locale.pretty_int(self)
42
+ end
43
+ end
@@ -0,0 +1,341 @@
1
+ # -*- coding: utf-8 -*-
2
+ module SolveBio
3
+ class Query
4
+ # A Query API request wrapper that generates a request from Filter
5
+ # objects, and can iterate through streaming result sets.
6
+ include Enumerable
7
+
8
+ # 2**62 - 1 fits Rubywise into a 64-bit Fixnum
9
+ INT_MAX ||= 4_611_686_018_427_387_903
10
+
11
+ # The maximum number of results fetched in one go. Note however
12
+ # that iterating over a query can cause more fetches.
13
+ DEFAULT_PAGE_SIZE ||= 100
14
+
15
+ attr_reader :dataset_id
16
+ attr_accessor :filters
17
+ attr_accessor :limit
18
+ attr_accessor :page_size
19
+ attr_accessor :slice
20
+ attr_reader :response
21
+ attr_reader :page_offset
22
+
23
+ # Creates a new Query object.
24
+ #
25
+ # Parameters:
26
+ # - `dataset_id`: Unique ID of dataset to query.
27
+ # - `genome_build`: The genome build to use for the query.
28
+ # - `fields` (optional): List of specific fields to retrieve.
29
+ # - `filters` (optional): List of filter objects.
30
+ # - `limit` (optional): Maximum number of query results to return.
31
+ # - `page_size` (optional): Max number of results to fetch per query page.
32
+ def initialize(dataset_id, params={})
33
+ unless dataset_id.is_a?(Fixnum) or dataset_id.respond_to?(:to_str)
34
+ raise TypeError, "'dataset_id' parameter must an Integer or String"
35
+ end
36
+
37
+ @dataset_id = dataset_id
38
+ @data_url = params[:data_url] || "/v1/datasets/#{dataset_id}/data"
39
+ @genome_build = params[:genome_build]
40
+ @fields = params[:fields]
41
+ @filters = params[:filters].kind_of?(SolveBio::Filter) ? params[:filters].filters : (params[:filters] || [])
42
+
43
+ @response = nil
44
+ # Limit defines the total number of results that will be returned
45
+ # from a query involving 1 or more pagination requests.
46
+ @limit = params[:limit] || INT_MAX
47
+ # Page limit and page offset are the low level API limit and offset params.
48
+ # page_offset may be changed periodically during sequential pagination requests.
49
+ @page_size = params[:page_size] || DEFAULT_PAGE_SIZE
50
+ # Page offset can only be set by execute()
51
+ # It always contains the current absolute offset contained in the buffer.
52
+ @page_offset = nil
53
+ # @slice is set to tell the Query object that is being sliced and "def each" should not
54
+ # reset the page_offset to 0 before iterating.
55
+ @slice = nil
56
+
57
+ begin
58
+ @limit = Integer(@limit)
59
+ raise RangeError if @limit < 0
60
+ rescue
61
+ raise TypeError, "'limit' parameter must an Integer >= 0"
62
+ end
63
+
64
+ begin
65
+ @page_size = Integer(@page_size)
66
+ raise RangeError if @page_size <= 0
67
+ rescue
68
+ raise TypeError, "'page_size' parameter must an Integer > 0"
69
+ end
70
+
71
+ self
72
+ end
73
+
74
+ def clone(filters=[])
75
+ q = Query.new(@dataset_id, {
76
+ :data_url => @data_url,
77
+ :genome_build => @genome_build,
78
+ :fields => @fields,
79
+ :limit => @limit,
80
+ :page_size => @page_size
81
+ })
82
+
83
+ q.filters += @filters unless @filters.empty?
84
+ q.filters += filters unless filters.empty?
85
+ q
86
+ end
87
+
88
+ # Returns this Query instance with the query args combined with
89
+ # existing set with AND.
90
+ #
91
+ # kwargs are simply passed to a new SolveBio::Filter object and
92
+ # combined to any other filters with AND.
93
+ #
94
+ # By default, everything is combined using AND. If you provide
95
+ # multiple filters in a single filter call, those are ANDed
96
+ # together. If you provide multiple filters in multiple filter
97
+ # calls, those are ANDed together.
98
+ #
99
+ # If you want something different, use the F class which supports
100
+ # ``&`` (and), ``|`` (or) and ``~`` (not) operators. Then call
101
+ # filter once with the resulting Filter instance.
102
+ def filter(params={}, conn=:and)
103
+ return clone(Filter.new(params, conn).filters)
104
+ end
105
+
106
+ # Shortcut to do range queries on supported datasets.
107
+ def range(chromosome, start, stop, exact=false)
108
+ return clone(GenomicFilter.new(chromosome, start, stop, exact).filters)
109
+ end
110
+
111
+ # Shortcut to do a single position filter on genomic datasets.
112
+ def position(chromosome, position, exact=false)
113
+ return clone(GenomicFilter.new(chromosome, position, position, exact).filters)
114
+ end
115
+
116
+ # Returns the total number of results in the result-set.
117
+ # The count is dependent on the filters, but independent of any limit.
118
+ # It is like SQL:
119
+ # SELECT COUNT(*) FROM <depository> [WHERE condition].
120
+ # See also size() a function that is dependent on limit.
121
+ # Requires at least one request.
122
+ def count
123
+ execute unless @response
124
+ @response[:total]
125
+ end
126
+ alias_method(:total, :count)
127
+
128
+ # Returns the total number of results that will be retrieved
129
+ # given @limit set by the user.
130
+ # Requires at least one API request to retrieve the total count.
131
+ #
132
+ # In SQL it is like:
133
+ # SELECT COUNT(*) FROM (
134
+ # SELECT * FROM <table> [WHERE condition] [LIMIT number]
135
+ # )
136
+ def size
137
+ [@limit, count].min
138
+ end
139
+ alias_method(:length, :size)
140
+
141
+ def empty?
142
+ return size == 0
143
+ end
144
+
145
+ # Convert SolveBio::QueryPaging object to a String type
146
+ def to_s
147
+ if @limit == 0 || count == 0
148
+ return 'Query returned 0 results'
149
+ end
150
+
151
+ result = Tabulate.tabulate(buffer[0], ['Fields', 'Data'], ['right', 'left'], true)
152
+ return "\n#{result}\n\n... #{(size - 1).pretty_int} more results."
153
+ end
154
+
155
+ # Convert SolveBio::QueryPaging object to a Hash type
156
+ def to_h
157
+ self[0]
158
+ end
159
+
160
+ # Retrieve an item or range from the set of results
161
+ def [](key)
162
+ unless [Range, Fixnum].member?(key.class)
163
+ raise TypeError, "Expecting index value to be a Range or Fixnum; is #{key.class}"
164
+ end
165
+
166
+ if key.kind_of?(Range)
167
+ start, stop = key.begin, key.end
168
+
169
+ # Reverse ranges aren't supported
170
+ return [] if (start > stop)
171
+
172
+ if start < 0 or stop < 0
173
+ raise IndexError, 'Negative indexing is not supported'
174
+ end
175
+
176
+ # If a slice is already set, the new slice should be relative to it
177
+ if @slice
178
+ start += @slice.begin
179
+ stop = [@slice.begin + stop, @slice.end].min
180
+ # Return nil if the user requests something outside the current slice
181
+ return nil if start >= @slice.end
182
+ end
183
+
184
+ # Does the current buffer contain the desired range?
185
+ if buffer && start >= @page_offset && stop < (@page_offset + buffer.length)
186
+ # Cursor's buffer has the items already
187
+ # Avoid a query and just return the buffered items.
188
+ # Calculate the offsets relative to the buffer.
189
+ start = start - @page_offset
190
+ stop = stop - @page_offset - 1
191
+ return buffer[start..stop]
192
+ end
193
+
194
+ # We need to make a few requests to get the data between start and stop.
195
+ # We should respect the user's @limit (used by each()) if it is smaller than the given Range.
196
+ # To prevent the state of page_size and page_offset from being stored, we'll clone this object first.
197
+ q = clone()
198
+ q.limit = [stop-start, @limit].min
199
+ # Setting range will signal to "each" which page_offset to start at.
200
+ q.slice = Range.new(start, stop)
201
+ return q
202
+ end
203
+
204
+ if key < 0
205
+ raise IndexError, 'Negative indexing is not supported'
206
+ end
207
+
208
+ # If a slice already exists, the key is relative to that slice
209
+ if @slice
210
+ key = key + @slice.begin
211
+ # Return nil if the user requests something outside the slice
212
+ return nil if key >= @slice.end
213
+ end
214
+
215
+ # If the value at key is already in the buffer, return it.
216
+ if buffer && key >= @page_offset && key < (@page_offset + buffer.length)
217
+ return buffer[key - @page_offset]
218
+ end
219
+
220
+ # Otherwise, use key as the new page_offset and fetch a new page of results
221
+ q = clone()
222
+ q.limit = [1, @limit].min
223
+ q.execute(key)
224
+ return q.buffer[0]
225
+ end
226
+
227
+ def each(*args)
228
+ return self unless block_given?
229
+
230
+ # When calling each, we always reset the offset and buffer, unless called from
231
+ # the slice function (def []).
232
+ if @slice
233
+ execute(@slice.begin)
234
+ else
235
+ execute(0)
236
+ end
237
+
238
+ # Keep track when iterating through the buffer
239
+ buffer_idx = 0
240
+ # This will yield a max of @limit or count() results, whichever comes first.
241
+ 0.upto(size - 1).each do |i|
242
+ # i is the current index within the result-set.
243
+ # @page_offset + i is the current absolute index within the result-set.
244
+
245
+ if buffer_idx == buffer.length
246
+ # No more buffer! Get more results
247
+ execute(@page_offset + buffer_idx)
248
+ # Reset the buffer index.
249
+ buffer_idx = 0
250
+ end
251
+
252
+ yield buffer[buffer_idx]
253
+ buffer_idx += 1
254
+ end
255
+ end
256
+
257
+ def to_range(range_or_idx)
258
+ return range_or_idx.kind_of?(Range) ? range_or_idx :
259
+ (range_or_idx..range_or_idx + 1)
260
+ end
261
+
262
+ def buffer
263
+ return nil unless @response
264
+ @response[:results]
265
+ end
266
+
267
+ def build_query
268
+ q = {}
269
+
270
+ if @filters
271
+ filters = Filter.process_filters(@filters)
272
+ if filters.size > 1
273
+ q[:filters] = [{:and => filters}]
274
+ else
275
+ q[:filters] = filters
276
+ end
277
+ end
278
+
279
+ q[:fields] = @fields if @fields
280
+ q[:genome_build] = @genome_build if @genome_build
281
+
282
+ return q
283
+ end
284
+
285
+ def execute(offset=0)
286
+ # Executes the current query.
287
+ params = build_query()
288
+
289
+ # Always set the page offset before querying.
290
+ @page_offset = offset
291
+
292
+ params.merge!(
293
+ :offset => @page_offset,
294
+ # The user's limit trumps the page limit if it's smaller
295
+ :limit => [@page_size, @limit].min
296
+ )
297
+
298
+ SolveBio::logger.debug("Executing query with offset: #{params[:offset]} limit: #{params[:limit]}")
299
+ # TODO: handle request errors and reset page_offset
300
+ @response = Client.post(@data_url, params)
301
+ SolveBio::logger.debug("Query response took #{@response[:took]}ms, buffer size: #{buffer.length}, total: #{@response[:total]}")
302
+ return params, @response
303
+ end
304
+
305
+ end
306
+
307
+ # BatchQuery accepts a list of Query objects and executes them
308
+ # in a single request to /v1/batch_query.
309
+ class BatchQuery
310
+ # Expects a list of Query objects.
311
+ def initialize(queries)
312
+ unless queries.kind_of?(Array)
313
+ queries = [queries]
314
+ end
315
+
316
+ @queries = queries
317
+ end
318
+
319
+ def build_query
320
+ query = {:queries => []}
321
+
322
+ @queries.each do |i|
323
+ q = i.build_query
324
+ q.merge!(
325
+ :dataset => i.dataset_id,
326
+ :limit => [i.page_size, i.limit].min
327
+ )
328
+ query[:queries] << q
329
+ end
330
+
331
+ return query
332
+ end
333
+
334
+ def execute(params={})
335
+ _params = build_query()
336
+ _params.merge!(params)
337
+ response = Client.post('/v1/batch_query', _params)
338
+ return response
339
+ end
340
+ end
341
+ end