solvebio 1.6.1 → 1.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.bumpversion.cfg +6 -0
- data/.gitignore +5 -4
- data/.travis.yml +1 -1
- data/Gemfile +3 -0
- data/README.md +34 -34
- data/Rakefile +1 -18
- data/bin/solvebio.rb +14 -16
- data/installer +64 -0
- data/lib/solvebio.rb +50 -11
- data/lib/solvebio/acccount.rb +4 -0
- data/lib/solvebio/annotation.rb +11 -0
- data/lib/solvebio/api_operations.rb +147 -0
- data/lib/solvebio/api_resource.rb +32 -0
- data/lib/solvebio/cli.rb +75 -0
- data/lib/solvebio/cli/auth.rb +106 -0
- data/lib/solvebio/cli/credentials.rb +54 -0
- data/lib/{cli → solvebio/cli}/irb.rb +0 -23
- data/lib/solvebio/cli/irbrc.rb +48 -0
- data/lib/solvebio/cli/tutorial.rb +12 -0
- data/lib/solvebio/client.rb +149 -0
- data/lib/solvebio/dataset.rb +60 -0
- data/lib/solvebio/dataset_field.rb +12 -0
- data/lib/solvebio/depository.rb +38 -0
- data/lib/solvebio/depository_version.rb +40 -0
- data/lib/solvebio/errors.rb +64 -0
- data/lib/solvebio/filter.rb +315 -0
- data/lib/solvebio/list_object.rb +73 -0
- data/lib/solvebio/locale.rb +43 -0
- data/lib/solvebio/query.rb +341 -0
- data/lib/solvebio/sample.rb +54 -0
- data/lib/solvebio/singleton_api_resource.rb +25 -0
- data/lib/solvebio/solve_object.rb +164 -0
- data/lib/solvebio/tabulate.rb +589 -0
- data/lib/solvebio/user.rb +4 -0
- data/lib/solvebio/util.rb +59 -0
- data/lib/solvebio/version.rb +3 -0
- data/solvebio.gemspec +10 -18
- data/test/helper.rb +6 -2
- data/test/solvebio/data/.gitignore +1 -0
- data/test/solvebio/data/.netrc +6 -0
- data/test/{data → solvebio/data}/netrc-save +0 -0
- data/test/solvebio/data/sample.vcf.gz +0 -0
- data/test/solvebio/data/test_creds +3 -0
- data/test/solvebio/test_annotation.rb +45 -0
- data/test/solvebio/test_client.rb +29 -0
- data/test/solvebio/test_conversion.rb +14 -0
- data/test/solvebio/test_credentials.rb +67 -0
- data/test/solvebio/test_dataset.rb +52 -0
- data/test/solvebio/test_depository.rb +24 -0
- data/test/solvebio/test_depositoryversion.rb +22 -0
- data/test/solvebio/test_error.rb +31 -0
- data/test/solvebio/test_filter.rb +86 -0
- data/test/solvebio/test_query.rb +282 -0
- data/test/solvebio/test_query_batch.rb +38 -0
- data/test/solvebio/test_query_init.rb +30 -0
- data/test/solvebio/test_query_tabulate.rb +73 -0
- data/test/solvebio/test_ratelimit.rb +31 -0
- data/test/solvebio/test_resource.rb +29 -0
- data/test/solvebio/test_sample_access.rb +60 -0
- data/test/solvebio/test_sample_download.rb +20 -0
- data/test/solvebio/test_tabulate.rb +129 -0
- data/test/solvebio/test_util.rb +39 -0
- metadata +100 -85
- data/Makefile +0 -17
- data/demo/README.md +0 -14
- data/demo/cheatsheet.rb +0 -31
- data/demo/dataset/facets.rb +0 -13
- data/demo/dataset/field.rb +0 -13
- data/demo/depository/README.md +0 -24
- data/demo/depository/all.rb +0 -13
- data/demo/depository/retrieve.rb +0 -13
- data/demo/depository/versions-all.rb +0 -13
- data/demo/query/query-filter.rb +0 -30
- data/demo/query/query.rb +0 -13
- data/demo/query/range-filter.rb +0 -18
- data/demo/test-api.rb +0 -98
- data/lib/cli/auth.rb +0 -122
- data/lib/cli/help.rb +0 -13
- data/lib/cli/irbrc.rb +0 -54
- data/lib/cli/options.rb +0 -75
- data/lib/client.rb +0 -154
- data/lib/credentials.rb +0 -67
- data/lib/errors.rb +0 -81
- data/lib/filter.rb +0 -312
- data/lib/locale.rb +0 -47
- data/lib/main.rb +0 -46
- data/lib/query.rb +0 -414
- data/lib/resource/annotation.rb +0 -23
- data/lib/resource/apiresource.rb +0 -241
- data/lib/resource/dataset.rb +0 -91
- data/lib/resource/datasetfield.rb +0 -37
- data/lib/resource/depository.rb +0 -50
- data/lib/resource/depositoryversion.rb +0 -69
- data/lib/resource/main.rb +0 -123
- data/lib/resource/sample.rb +0 -75
- data/lib/resource/solveobject.rb +0 -122
- data/lib/resource/user.rb +0 -5
- data/lib/tabulate.rb +0 -706
- data/lib/util.rb +0 -29
- data/test/Makefile +0 -9
- data/test/data/sample.vcf.gz +0 -0
- data/test/test-annotation.rb +0 -46
- data/test/test-auth.rb +0 -58
- data/test/test-client.rb +0 -27
- data/test/test-conversion.rb +0 -13
- data/test/test-dataset.rb +0 -42
- data/test/test-depository.rb +0 -35
- data/test/test-error.rb +0 -36
- data/test/test-filter.rb +0 -70
- data/test/test-netrc.rb +0 -52
- data/test/test-query-batch.rb +0 -40
- data/test/test-query-init.rb +0 -29
- data/test/test-query-paging.rb +0 -102
- data/test/test-query.rb +0 -71
- data/test/test-resource.rb +0 -40
- data/test/test-sample-access.rb +0 -59
- data/test/test-sample-download.rb +0 -20
- data/test/test-tabulate.rb +0 -131
- data/test/test-util.rb +0 -42
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
module SolveBio
|
|
2
|
+
class Dataset < APIResource
|
|
3
|
+
include SolveBio::APIOperations::Create
|
|
4
|
+
include SolveBio::APIOperations::Update
|
|
5
|
+
include SolveBio::APIOperations::List
|
|
6
|
+
include SolveBio::APIOperations::Delete
|
|
7
|
+
include SolveBio::APIOperations::Help
|
|
8
|
+
|
|
9
|
+
def depository
|
|
10
|
+
return Depository.retrieve(self.depository)
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def depository_version
|
|
14
|
+
return DepositoryVersion.retrieve(self.depository_version)
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def fields(name=nil, params={})
|
|
18
|
+
unless self.fields_url
|
|
19
|
+
raise Exception,
|
|
20
|
+
'Please use Dataset.retrieve({ID}) before doing looking ' +
|
|
21
|
+
'up fields'
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
if name
|
|
25
|
+
# construct the field's full_name if a field name is provided
|
|
26
|
+
return DatasetField.retrieve("#{self.full_name}/#{name}")
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
result = Client.request('get', self.fields_url)
|
|
30
|
+
results = Util.to_solve_object(result)
|
|
31
|
+
unless results.respond_to?(:tabulate)
|
|
32
|
+
results.define_singleton_method(:tabulate) do |results_hash|
|
|
33
|
+
ary = results_hash.to_a.map do |fields|
|
|
34
|
+
[fields['name'], fields['data_type'], fields['description']]
|
|
35
|
+
end
|
|
36
|
+
Tabulate.tabulate(ary,
|
|
37
|
+
['Field', 'Data Type', 'Description'],
|
|
38
|
+
['left', 'left', 'left'], true)
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
results
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def query(params={})
|
|
45
|
+
unless self.respond_to?(:data_url)
|
|
46
|
+
unless self.respond_to?(:id)
|
|
47
|
+
raise Exception,
|
|
48
|
+
'No Dataset ID was provided. ' +
|
|
49
|
+
'Please instantiate the Dataset ' +
|
|
50
|
+
'object with an ID or full_name.'
|
|
51
|
+
end
|
|
52
|
+
# automatically construct the data_url from the ID
|
|
53
|
+
self.data_url = url + '/data'
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
params.merge!(:data_url => self.data_url)
|
|
57
|
+
Query.new(self.id, params)
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
end
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
module SolveBio
|
|
2
|
+
class DatasetField < APIResource
|
|
3
|
+
include SolveBio::APIOperations::Create
|
|
4
|
+
include SolveBio::APIOperations::List
|
|
5
|
+
include SolveBio::APIOperations::Update
|
|
6
|
+
|
|
7
|
+
def facets(params={})
|
|
8
|
+
response = Client.get(self[:facets_url], {:params => params})
|
|
9
|
+
Util.to_solve_object(response)
|
|
10
|
+
end
|
|
11
|
+
end
|
|
12
|
+
end
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
module SolveBio
|
|
2
|
+
class Depository < APIResource
|
|
3
|
+
# A depository (or data repository) is like a source code
|
|
4
|
+
# repository, but for datasets. Depositories have one or more
|
|
5
|
+
# versions, which in turn contain one or more datasets. Typically,
|
|
6
|
+
# depositories contain a series of datasets that are compatible with
|
|
7
|
+
# each other (i.e. they come from the same data source or project).
|
|
8
|
+
include SolveBio::APIOperations::Create
|
|
9
|
+
include SolveBio::APIOperations::List
|
|
10
|
+
include SolveBio::APIOperations::Search
|
|
11
|
+
include SolveBio::APIOperations::Update
|
|
12
|
+
include SolveBio::APIOperations::Help
|
|
13
|
+
|
|
14
|
+
# Fields that get shown by tabulate
|
|
15
|
+
TAB_FIELDS = %w(description full_name latest_version name title url)
|
|
16
|
+
|
|
17
|
+
def versions(name=nil, params={})
|
|
18
|
+
# construct the depo version full name
|
|
19
|
+
return DepositoryVersion.
|
|
20
|
+
retrieve("#{self['full_name']}/#{name}") if name
|
|
21
|
+
|
|
22
|
+
response = Client.request('get', versions_url, {:params => params})
|
|
23
|
+
results = Util.to_solve_object(response)
|
|
24
|
+
unless results.respond_to?(:tabulate)
|
|
25
|
+
results.define_singleton_method(:tabulate) do |results|
|
|
26
|
+
ary = results.to_a.map do |fields|
|
|
27
|
+
[fields['full_name'], fields['title'], fields['description']]
|
|
28
|
+
end
|
|
29
|
+
Utils::Tabulate.tabulate(ary,
|
|
30
|
+
['Depository Version', 'Title', 'Description'],
|
|
31
|
+
['left', 'left', 'left'], true)
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
results
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
end
|
|
38
|
+
end
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
module SolveBio
|
|
2
|
+
class DepositoryVersion < APIResource
|
|
3
|
+
include SolveBio::APIOperations::Create
|
|
4
|
+
include SolveBio::APIOperations::List
|
|
5
|
+
include SolveBio::APIOperations::Update
|
|
6
|
+
include SolveBio::APIOperations::Help
|
|
7
|
+
|
|
8
|
+
# Fields that get shown by tabulate
|
|
9
|
+
TAB_FIELDS = %w(datasets_url depository description full_name
|
|
10
|
+
latest url)
|
|
11
|
+
|
|
12
|
+
def datasets_url(name=nil)
|
|
13
|
+
name ||= self['name']
|
|
14
|
+
"#{self['full_name']}/#{name}"
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def datasets(name=nil, params={})
|
|
18
|
+
return Dataset.retrieve(datasets_url(name)) if name
|
|
19
|
+
|
|
20
|
+
response = Client.request('get', datasets_url, {:params => params})
|
|
21
|
+
results = Util.to_solve_object(response)
|
|
22
|
+
unless results.respond_to?(:tabulate)
|
|
23
|
+
results.define_singleton_method(:tabulate) do |results|
|
|
24
|
+
ary = results.to_a.map do |fields|
|
|
25
|
+
[fields['full_name'], fields['title'], fields['description']]
|
|
26
|
+
end
|
|
27
|
+
Tabulate.tabulate(ary,
|
|
28
|
+
['Field', 'Title', 'Description'],
|
|
29
|
+
['left', 'left', 'left'], true)
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
results
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def <=>(other)
|
|
37
|
+
self[:full_name] <=> other[:full_name]
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
end
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
module SolveBio
|
|
2
|
+
class SolveError < RuntimeError
|
|
3
|
+
DEFAULT_MESSAGE =
|
|
4
|
+
'Unexpected error communicating with SolveBio. ' +
|
|
5
|
+
'If this problem persists, let us know at ' +
|
|
6
|
+
'contact@solvebio.com.'
|
|
7
|
+
|
|
8
|
+
attr_reader :json_body
|
|
9
|
+
attr_reader :status_code
|
|
10
|
+
attr_reader :message
|
|
11
|
+
attr_reader :field_errors
|
|
12
|
+
|
|
13
|
+
def initialize(response=nil, message=nil)
|
|
14
|
+
@json_body = nil
|
|
15
|
+
@status_code = nil
|
|
16
|
+
@message = message or DEFAULT_MESSAGE
|
|
17
|
+
@field_errors = []
|
|
18
|
+
|
|
19
|
+
if response
|
|
20
|
+
@status_code = response.code.to_i
|
|
21
|
+
begin
|
|
22
|
+
@json_body = JSON.parse(response.body)
|
|
23
|
+
rescue
|
|
24
|
+
SolveBio.logger.debug(
|
|
25
|
+
"API Response (%d): No content." % @status_code)
|
|
26
|
+
else
|
|
27
|
+
SolveBio.logger.debug(
|
|
28
|
+
"API Response (#{@status_code}): #{@json_body}")
|
|
29
|
+
|
|
30
|
+
if [400, 401, 403, 404].member?(@status_code)
|
|
31
|
+
@message = 'Bad request.'
|
|
32
|
+
|
|
33
|
+
if @json_body.member?('detail')
|
|
34
|
+
@message = @json_body['detail']
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
if @json_body.member?('non_field_errors')
|
|
38
|
+
@message = @json_body['non_field_errors'].join(', ')
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
@json_body.each do |k, v|
|
|
42
|
+
unless ['detail', 'non_field_errors'].member?(k)
|
|
43
|
+
v = v.join(', ') if v.kind_of?(Array)
|
|
44
|
+
@field_errors << ('%s (%s)' % [k, v])
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
unless @field_errors.empty?
|
|
49
|
+
@message += (' The following fields were missing ' +
|
|
50
|
+
'or invalid: %s' %
|
|
51
|
+
@field_errors.join(', '))
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
self
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def to_s
|
|
61
|
+
@message
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
end
|
|
@@ -0,0 +1,315 @@
|
|
|
1
|
+
module SolveBio
|
|
2
|
+
class Filter
|
|
3
|
+
# SolveBio::Filter objects.
|
|
4
|
+
#
|
|
5
|
+
# Makes it easier to create filters cumulatively using ``&`` (and),
|
|
6
|
+
# ``|`` (or) and ``~`` (not) operations.
|
|
7
|
+
#
|
|
8
|
+
# == Example
|
|
9
|
+
#
|
|
10
|
+
# require 'solvebio'
|
|
11
|
+
# f = SolveBio::Filter.new #=> <Filter []>
|
|
12
|
+
# f &= SolveBio::Filter.new :price => 'Free' #=> <Filter [[:price, "Free"]]>
|
|
13
|
+
# f |= SolveBio::Filter.new :style => 'Mexican' #=> <Filter [{:or=>[[:price, "Free"], [:style, "Mexican"]]}]>
|
|
14
|
+
#
|
|
15
|
+
# The final result is a filter that can be used in a query which match es
|
|
16
|
+
# "price = 'Free' or style = 'Mexican'".
|
|
17
|
+
#
|
|
18
|
+
# By default, each key/value pairs are AND'ed together. However, you can change that
|
|
19
|
+
# to OR by passing in +:or+ as the last argument.
|
|
20
|
+
#
|
|
21
|
+
# * `<field>='value` matches if the field is term filter (exact term)
|
|
22
|
+
# * `<field>__in=[<item1>, ...]` matches any of the terms <item1> and so on
|
|
23
|
+
# * `<field>__range=[<start>, <end>]` matches anything from <start> to <end>
|
|
24
|
+
#
|
|
25
|
+
# String terms are not analyzed and are always assumed to be exact matches.
|
|
26
|
+
#
|
|
27
|
+
# Numeric columns can be selected by range using:
|
|
28
|
+
#
|
|
29
|
+
# * `<field>__gt`: greater than
|
|
30
|
+
# * `<field>__gte`: greater than or equal to
|
|
31
|
+
# * `<field>__lt`: less than
|
|
32
|
+
# * `<field>__lte`: less than or equal to
|
|
33
|
+
#
|
|
34
|
+
# Field action examples:
|
|
35
|
+
#
|
|
36
|
+
# dataset.query(:gene__in => ['BRCA', 'GATA3'],
|
|
37
|
+
# :chr => '3',
|
|
38
|
+
# :start__gt => 10000,
|
|
39
|
+
# :end__lte => 20000)
|
|
40
|
+
|
|
41
|
+
attr_accessor :filters
|
|
42
|
+
|
|
43
|
+
# Creates a new Filter, the first argument is expected to be Hash or an Array.
|
|
44
|
+
def initialize(filters={}, conn=:and)
|
|
45
|
+
if filters.kind_of?(Hash)
|
|
46
|
+
@filters = SolveBio::Filter.
|
|
47
|
+
normalize(filters.keys.sort.map{|key| [key, filters[key]]})
|
|
48
|
+
elsif filters.kind_of?(Array)
|
|
49
|
+
@filters = filters
|
|
50
|
+
elsif filters.kind_of?(SolveBio::Filter)
|
|
51
|
+
@filters = SolveBio::Filter.deep_copy(filters.filters)
|
|
52
|
+
return self
|
|
53
|
+
else
|
|
54
|
+
raise TypeError, "Invalid filter type #{filters.class}"
|
|
55
|
+
end
|
|
56
|
+
@filters = [{conn => @filters}] if filters.size > 1
|
|
57
|
+
self
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def inspect
|
|
61
|
+
return "<SolveBio::Filter #{@filters.inspect}>"
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def empty?
|
|
65
|
+
@filters.empty?
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Deep copy
|
|
69
|
+
def clone
|
|
70
|
+
SolveBio::Filter.deep_copy(self)
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# OR and AND will create a new Filter, with the filters from both Filter
|
|
74
|
+
# objects combined with the connector `conn`.
|
|
75
|
+
# FIXME: should we allow a default conn parameter?
|
|
76
|
+
def combine(other, conn=:and)
|
|
77
|
+
return other.clone if self.empty?
|
|
78
|
+
|
|
79
|
+
if other.empty?
|
|
80
|
+
return self.clone
|
|
81
|
+
elsif self.filters[0].member?(conn)
|
|
82
|
+
f = self.clone
|
|
83
|
+
f.filters[0][conn] += other.filters
|
|
84
|
+
elsif other.filters[0].member?(conn)
|
|
85
|
+
f = other.clone
|
|
86
|
+
f.filters[0][conn] += self.filters
|
|
87
|
+
else
|
|
88
|
+
f = initialize(self.clone.filters + other.filters, conn)
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
return f
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
def |(other)
|
|
95
|
+
return self.combine(other, :or)
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
def &(other)
|
|
99
|
+
return self.combine(other, :and)
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
def ~()
|
|
103
|
+
f = self.clone
|
|
104
|
+
|
|
105
|
+
# not of null filter is null fiter
|
|
106
|
+
return f if f.empty?
|
|
107
|
+
|
|
108
|
+
# length of self_filters should never be more than 1
|
|
109
|
+
filters = f.filters.first
|
|
110
|
+
if filters.kind_of?(Hash) and
|
|
111
|
+
filters.member?(:not)
|
|
112
|
+
# The filters are already a single dictionary
|
|
113
|
+
# containing a 'not'. Swap out the 'not'
|
|
114
|
+
f.filters = [filters[:not]]
|
|
115
|
+
else
|
|
116
|
+
# 'not' blocks can contain only dicts or a single tuple filter
|
|
117
|
+
# so we get the first element from the filter list
|
|
118
|
+
f.filters = [{:not => filters}]
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
return f
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
# Checks and normalizes filter array tuples
|
|
125
|
+
def self.normalize(ary)
|
|
126
|
+
ary.map do |tuple|
|
|
127
|
+
unless tuple.kind_of?(Array)
|
|
128
|
+
raise(TypeError,
|
|
129
|
+
"Invalid filter element #{tuple.class}; want Array")
|
|
130
|
+
end
|
|
131
|
+
unless tuple.size == 2
|
|
132
|
+
raise(TypeError,
|
|
133
|
+
"Filter element size must be 2; is #{tuple.size}")
|
|
134
|
+
end
|
|
135
|
+
key, value = tuple
|
|
136
|
+
if key.to_s =~ /.+__(.+)$/
|
|
137
|
+
op = $1
|
|
138
|
+
unless %w(gt gte lt lte in range contains prefix regexp).member?(op)
|
|
139
|
+
raise(TypeError,
|
|
140
|
+
"Invalid field operation #{op} in #{key}")
|
|
141
|
+
end
|
|
142
|
+
case op
|
|
143
|
+
when 'gt', 'gte', 'lt', 'lte'
|
|
144
|
+
begin
|
|
145
|
+
value = Float(value)
|
|
146
|
+
rescue
|
|
147
|
+
raise(TypeError,
|
|
148
|
+
"Invalid field value #{value} for #{key}; " +
|
|
149
|
+
"Should be a number")
|
|
150
|
+
end
|
|
151
|
+
tuple = [key, value]
|
|
152
|
+
when 'range'
|
|
153
|
+
if value.kind_of?(Range)
|
|
154
|
+
value = [value.min, value.max]
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
unless value.kind_of?(Array)
|
|
158
|
+
raise(TypeError,
|
|
159
|
+
"Invalid field value #{value} for #{key}; " +
|
|
160
|
+
"Should be an array")
|
|
161
|
+
end
|
|
162
|
+
unless value.size == 2
|
|
163
|
+
raise(TypeError,
|
|
164
|
+
"Invalid field value #{value} for #{key}; " +
|
|
165
|
+
"Array should have exactly two values")
|
|
166
|
+
end
|
|
167
|
+
if value.first > value.last
|
|
168
|
+
raise(IndexError,
|
|
169
|
+
"Invalid field value #{value} for #{key}; " +
|
|
170
|
+
"Start value not greater than end value")
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
begin
|
|
174
|
+
Float(value.first)
|
|
175
|
+
Float(value.last)
|
|
176
|
+
rescue
|
|
177
|
+
raise(TypeError,
|
|
178
|
+
"Invalid field values for #{key}; " +
|
|
179
|
+
"Both should be numbers")
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
tuple = [key, value]
|
|
183
|
+
when 'in'
|
|
184
|
+
unless value.kind_of?(Array)
|
|
185
|
+
raise(TypeError,
|
|
186
|
+
"Invalid field value #{value} for #{key}; " +
|
|
187
|
+
"Should be an array")
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
end
|
|
191
|
+
end
|
|
192
|
+
tuple
|
|
193
|
+
end
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
def self.deep_copy(obj)
|
|
197
|
+
Marshal.load(Marshal.dump(obj))
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
# Takes a SolveBio::Filter or an Array of filter items and returns
|
|
201
|
+
# an Array that can be passed off (when converted to JSON) to a
|
|
202
|
+
# SolveBio client filter parameter. As such, the output format is
|
|
203
|
+
# highly dependent on the SolveBio API format.
|
|
204
|
+
#
|
|
205
|
+
# The filter items can be either a SolveBio::Filter, or Hash of
|
|
206
|
+
# the right form, or an Array of the right form.
|
|
207
|
+
def self.process_filters(filters)
|
|
208
|
+
rv = []
|
|
209
|
+
if filters.kind_of?(SolveBio::Filter)
|
|
210
|
+
if filters.filters
|
|
211
|
+
rv = process_filters(filters.filters)
|
|
212
|
+
end
|
|
213
|
+
else
|
|
214
|
+
filters.each do |f|
|
|
215
|
+
if f.kind_of?(SolveBio::Filter)
|
|
216
|
+
if f.filters
|
|
217
|
+
rv << process_filters(f.filters)
|
|
218
|
+
next
|
|
219
|
+
end
|
|
220
|
+
elsif f.kind_of?(Hash)
|
|
221
|
+
key = f.keys[0]
|
|
222
|
+
val = f[key]
|
|
223
|
+
|
|
224
|
+
if val.kind_of?(Hash)
|
|
225
|
+
filter_filters = process_filters(val)
|
|
226
|
+
if filter_filters.size == 1
|
|
227
|
+
filter_filters = filter_filters[0]
|
|
228
|
+
end
|
|
229
|
+
rv << {key => filter_filters}
|
|
230
|
+
else
|
|
231
|
+
rv << {key => process_filters(val)}
|
|
232
|
+
end
|
|
233
|
+
elsif f.kind_of?(Array)
|
|
234
|
+
rv << f
|
|
235
|
+
else
|
|
236
|
+
raise TypeError, "Invalid filter class #{f.class}"
|
|
237
|
+
end
|
|
238
|
+
end
|
|
239
|
+
end
|
|
240
|
+
return rv
|
|
241
|
+
end
|
|
242
|
+
end
|
|
243
|
+
|
|
244
|
+
class GenomicFilter < Filter
|
|
245
|
+
# Helper class that generates filters on genomic coordinates.
|
|
246
|
+
#
|
|
247
|
+
# Range filtering only works on "genomic" datasets
|
|
248
|
+
# (where dataset['is_genomic'] is true).
|
|
249
|
+
|
|
250
|
+
# Standardized fields for genomic coordinates in SolveBio
|
|
251
|
+
FIELD_START = 'genomic_coordinates.start'
|
|
252
|
+
FIELD_STOP = 'genomic_coordinates.stop'
|
|
253
|
+
FIELD_CHR = 'genomic_coordinates.chromosome'
|
|
254
|
+
|
|
255
|
+
# Handles UCSC-style range queries (chr1:100-200)
|
|
256
|
+
def self.from_string(string, exact=false)
|
|
257
|
+
begin
|
|
258
|
+
chromosome, pos = string.split(':')
|
|
259
|
+
rescue ValueError
|
|
260
|
+
raise ValueError,
|
|
261
|
+
'Please use UCSC-style format: "chr2:1000-2000"'
|
|
262
|
+
end
|
|
263
|
+
|
|
264
|
+
if pos.member?('-')
|
|
265
|
+
start, stop = pos.replace(',', '').split('-')
|
|
266
|
+
else
|
|
267
|
+
start = stop = pos.replace(',', '')
|
|
268
|
+
end
|
|
269
|
+
|
|
270
|
+
return self.new(chromosome, start, stop, exact)
|
|
271
|
+
end
|
|
272
|
+
|
|
273
|
+
# This class supports single position and range filters.
|
|
274
|
+
#
|
|
275
|
+
# By default, the filter will match any record that overlaps with
|
|
276
|
+
# the position or range specified. Exact matches must be explicitly
|
|
277
|
+
# specified using the `exact` parameter.
|
|
278
|
+
def initialize(chromosome, start, stop=nil, exact=false)
|
|
279
|
+
begin
|
|
280
|
+
if not start.nil?
|
|
281
|
+
start = Integer(start)
|
|
282
|
+
end
|
|
283
|
+
|
|
284
|
+
stop = stop ? Integer(stop) : start
|
|
285
|
+
rescue ValueError
|
|
286
|
+
raise ValueError('Start and stop positions must be integers or nil')
|
|
287
|
+
end
|
|
288
|
+
|
|
289
|
+
if exact or start.nil?
|
|
290
|
+
f = SolveBio::Filter.new({FIELD_START => start, FIELD_STOP => stop})
|
|
291
|
+
else
|
|
292
|
+
f = SolveBio::Filter.new({"#{FIELD_START}__lte" => start,
|
|
293
|
+
"#{FIELD_START}__gte" => stop})
|
|
294
|
+
if start != stop
|
|
295
|
+
f |= SolveBio::Filter.new({"#{FIELD_START}__range" =>
|
|
296
|
+
[start, stop + 1]})
|
|
297
|
+
f |= SolveBio::Filter.new({"#{FIELD_STOP}__range" =>
|
|
298
|
+
[start, stop + 1]})
|
|
299
|
+
end
|
|
300
|
+
end
|
|
301
|
+
|
|
302
|
+
if chromosome.nil?
|
|
303
|
+
f &= SolveBio::Filter.new({"chromosome" => nil})
|
|
304
|
+
else
|
|
305
|
+
f &= SolveBio::Filter.new({"chromosome" => chromosome.sub('chr', '')})
|
|
306
|
+
end
|
|
307
|
+
|
|
308
|
+
@filters = f.filters
|
|
309
|
+
end
|
|
310
|
+
|
|
311
|
+
def inspect
|
|
312
|
+
return "<GenomicFilter #{@filters}>"
|
|
313
|
+
end
|
|
314
|
+
end
|
|
315
|
+
end
|