solvebio 1.6.1 → 1.7.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (119) hide show
  1. data/.bumpversion.cfg +6 -0
  2. data/.gitignore +5 -4
  3. data/.travis.yml +1 -1
  4. data/Gemfile +3 -0
  5. data/README.md +34 -34
  6. data/Rakefile +1 -18
  7. data/bin/solvebio.rb +14 -16
  8. data/installer +64 -0
  9. data/lib/solvebio.rb +50 -11
  10. data/lib/solvebio/acccount.rb +4 -0
  11. data/lib/solvebio/annotation.rb +11 -0
  12. data/lib/solvebio/api_operations.rb +147 -0
  13. data/lib/solvebio/api_resource.rb +32 -0
  14. data/lib/solvebio/cli.rb +75 -0
  15. data/lib/solvebio/cli/auth.rb +106 -0
  16. data/lib/solvebio/cli/credentials.rb +54 -0
  17. data/lib/{cli → solvebio/cli}/irb.rb +0 -23
  18. data/lib/solvebio/cli/irbrc.rb +48 -0
  19. data/lib/solvebio/cli/tutorial.rb +12 -0
  20. data/lib/solvebio/client.rb +149 -0
  21. data/lib/solvebio/dataset.rb +60 -0
  22. data/lib/solvebio/dataset_field.rb +12 -0
  23. data/lib/solvebio/depository.rb +38 -0
  24. data/lib/solvebio/depository_version.rb +40 -0
  25. data/lib/solvebio/errors.rb +64 -0
  26. data/lib/solvebio/filter.rb +315 -0
  27. data/lib/solvebio/list_object.rb +73 -0
  28. data/lib/solvebio/locale.rb +43 -0
  29. data/lib/solvebio/query.rb +341 -0
  30. data/lib/solvebio/sample.rb +54 -0
  31. data/lib/solvebio/singleton_api_resource.rb +25 -0
  32. data/lib/solvebio/solve_object.rb +164 -0
  33. data/lib/solvebio/tabulate.rb +589 -0
  34. data/lib/solvebio/user.rb +4 -0
  35. data/lib/solvebio/util.rb +59 -0
  36. data/lib/solvebio/version.rb +3 -0
  37. data/solvebio.gemspec +10 -18
  38. data/test/helper.rb +6 -2
  39. data/test/solvebio/data/.gitignore +1 -0
  40. data/test/solvebio/data/.netrc +6 -0
  41. data/test/{data → solvebio/data}/netrc-save +0 -0
  42. data/test/solvebio/data/sample.vcf.gz +0 -0
  43. data/test/solvebio/data/test_creds +3 -0
  44. data/test/solvebio/test_annotation.rb +45 -0
  45. data/test/solvebio/test_client.rb +29 -0
  46. data/test/solvebio/test_conversion.rb +14 -0
  47. data/test/solvebio/test_credentials.rb +67 -0
  48. data/test/solvebio/test_dataset.rb +52 -0
  49. data/test/solvebio/test_depository.rb +24 -0
  50. data/test/solvebio/test_depositoryversion.rb +22 -0
  51. data/test/solvebio/test_error.rb +31 -0
  52. data/test/solvebio/test_filter.rb +86 -0
  53. data/test/solvebio/test_query.rb +282 -0
  54. data/test/solvebio/test_query_batch.rb +38 -0
  55. data/test/solvebio/test_query_init.rb +30 -0
  56. data/test/solvebio/test_query_tabulate.rb +73 -0
  57. data/test/solvebio/test_ratelimit.rb +31 -0
  58. data/test/solvebio/test_resource.rb +29 -0
  59. data/test/solvebio/test_sample_access.rb +60 -0
  60. data/test/solvebio/test_sample_download.rb +20 -0
  61. data/test/solvebio/test_tabulate.rb +129 -0
  62. data/test/solvebio/test_util.rb +39 -0
  63. metadata +100 -85
  64. data/Makefile +0 -17
  65. data/demo/README.md +0 -14
  66. data/demo/cheatsheet.rb +0 -31
  67. data/demo/dataset/facets.rb +0 -13
  68. data/demo/dataset/field.rb +0 -13
  69. data/demo/depository/README.md +0 -24
  70. data/demo/depository/all.rb +0 -13
  71. data/demo/depository/retrieve.rb +0 -13
  72. data/demo/depository/versions-all.rb +0 -13
  73. data/demo/query/query-filter.rb +0 -30
  74. data/demo/query/query.rb +0 -13
  75. data/demo/query/range-filter.rb +0 -18
  76. data/demo/test-api.rb +0 -98
  77. data/lib/cli/auth.rb +0 -122
  78. data/lib/cli/help.rb +0 -13
  79. data/lib/cli/irbrc.rb +0 -54
  80. data/lib/cli/options.rb +0 -75
  81. data/lib/client.rb +0 -154
  82. data/lib/credentials.rb +0 -67
  83. data/lib/errors.rb +0 -81
  84. data/lib/filter.rb +0 -312
  85. data/lib/locale.rb +0 -47
  86. data/lib/main.rb +0 -46
  87. data/lib/query.rb +0 -414
  88. data/lib/resource/annotation.rb +0 -23
  89. data/lib/resource/apiresource.rb +0 -241
  90. data/lib/resource/dataset.rb +0 -91
  91. data/lib/resource/datasetfield.rb +0 -37
  92. data/lib/resource/depository.rb +0 -50
  93. data/lib/resource/depositoryversion.rb +0 -69
  94. data/lib/resource/main.rb +0 -123
  95. data/lib/resource/sample.rb +0 -75
  96. data/lib/resource/solveobject.rb +0 -122
  97. data/lib/resource/user.rb +0 -5
  98. data/lib/tabulate.rb +0 -706
  99. data/lib/util.rb +0 -29
  100. data/test/Makefile +0 -9
  101. data/test/data/sample.vcf.gz +0 -0
  102. data/test/test-annotation.rb +0 -46
  103. data/test/test-auth.rb +0 -58
  104. data/test/test-client.rb +0 -27
  105. data/test/test-conversion.rb +0 -13
  106. data/test/test-dataset.rb +0 -42
  107. data/test/test-depository.rb +0 -35
  108. data/test/test-error.rb +0 -36
  109. data/test/test-filter.rb +0 -70
  110. data/test/test-netrc.rb +0 -52
  111. data/test/test-query-batch.rb +0 -40
  112. data/test/test-query-init.rb +0 -29
  113. data/test/test-query-paging.rb +0 -102
  114. data/test/test-query.rb +0 -71
  115. data/test/test-resource.rb +0 -40
  116. data/test/test-sample-access.rb +0 -59
  117. data/test/test-sample-download.rb +0 -20
  118. data/test/test-tabulate.rb +0 -131
  119. data/test/test-util.rb +0 -42
@@ -1,123 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
-
3
- require_relative 'solveobject'
4
- require_relative 'annotation'
5
- require_relative 'apiresource'
6
- require_relative 'dataset'
7
- require_relative 'datasetfield'
8
- require_relative 'depository'
9
- require_relative 'depositoryversion'
10
- require_relative 'sample'
11
- require_relative 'user'
12
-
13
- class SolveBio::ListObject < SolveBio::SolveObject
14
-
15
- include Enumerable
16
-
17
- def all(params={})
18
- return request('get', self['url'], {:params => params})
19
- end
20
-
21
- def create(params={})
22
- return request('post', self['url'], {:params => params})
23
- end
24
-
25
- def next_page(params={})
26
- if self['links']['next']
27
- return request('get', self['links']['next'], {:params => params})
28
- end
29
- return nil
30
- end
31
-
32
- def prev_page(params={})
33
- if self['links']['prev']
34
- request('get', self['links']['prev'], {:params => params})
35
- end
36
- return nil
37
- end
38
-
39
- def at(i)
40
- self.to_a[i]
41
- end
42
-
43
- def to_a
44
- return to_solve_object(self['data'])
45
- end
46
-
47
- def each(*pass)
48
- return self unless block_given?
49
- i = 0
50
- ary = self.dup
51
- done = false
52
- until done
53
- if i >= ary['data'].size
54
- ary = next_page
55
- break unless ary
56
- i = 0
57
- end
58
- yield(ary.at(i))
59
- i += 1
60
- end
61
- return self
62
- end
63
-
64
- def first
65
- self['data'][0]
66
- end
67
-
68
- # def max
69
- # self['data'][self['total']]
70
- # end
71
-
72
- end
73
-
74
-
75
- SolveBio::SolveObject::CONVERSION = {
76
- 'Annotation' => SolveBio::Annotation,
77
- 'Depository' => SolveBio::Depository,
78
- 'DepositoryVersion' => SolveBio::DepositoryVersion,
79
- 'Dataset' => SolveBio::Dataset,
80
- 'DatasetField' => SolveBio::DatasetField,
81
- 'Sample' => SolveBio::Sample,
82
- 'User' => SolveBio::User,
83
- 'list' => SolveBio::ListObject
84
- }
85
-
86
- if __FILE__ == $0
87
- puts '-' * 50
88
- resp = {
89
- 'class_name' => 'Dataset',
90
- 'data_url' => 'https://api.solvebio.com/v1/datasets/25/data',
91
- 'depository' => 'ClinVar',
92
- 'depository_id' => 223,
93
- 'depository_version' => 'ClinVar/2.0.0-1',
94
- 'depository_version_id' => 15,
95
- 'description' => '',
96
- 'fields_url' => 'https://api.solvebio.com/v1/datasets/25/fields',
97
- 'full_name' => 'ClinVar/2.0.0-1/Variants',
98
- 'id' => 25,
99
- 'name' => 'Variants',
100
- 'title' => 'Variants',
101
- 'url' => 'https://api.solvebio.com/v1/datasets/25'
102
- }
103
- so = to_solve_object(resp)
104
- so = resp.to_solvebio
105
- puts so.inspect
106
- puts so.to_s
107
-
108
- if ARGV[0]
109
- require_relative './cli/auth.rb'
110
- include SolveBio::Auth
111
- login
112
- puts '-' * 30, ' HELP ', '-' * 30
113
- puts SolveBio::Depository.retrieve('ClinVar').help
114
- puts '-' * 30, ' Retrieve ClinVar ','-' * 30
115
- puts SolveBio::Depository.retrieve('ClinVar').to_s
116
- puts '-' * 30, ' Versions ClinVar ','-' * 30
117
- puts SolveBio::Depository.retrieve('Clinvar').versions.to_s
118
- puts '-' * 30, ' Dataset ','-' * 30
119
- puts SolveBio::Dataset.retrieve('Clinvar/2.0.0-1/Variants').to_s
120
- puts '-' * 30, ' All Depository ','-' * 30
121
- puts SolveBio::Depository.all.to_s
122
- end
123
- end
@@ -1,75 +0,0 @@
1
- # Solvebio API Resource for Samples
2
- require_relative 'apiresource'
3
- require_relative 'solveobject'
4
- require_relative '../errors'
5
-
6
- # Samples are VCF files uploaded to the SolveBio API. We currently
7
- # support uncompressed, extension `.vcf`, and gzip-compressed, extension
8
- # `.vcf.gz`, VCF files. Any other extension will be rejected.
9
- class SolveBio::Sample < SolveBio::APIResource
10
-
11
- include SolveBio::DeletableAPIResource
12
- include SolveBio::DownloadableAPIResource
13
- include SolveBio::ListableAPIResource
14
- include SolveBio::HelpableAPIResource
15
-
16
- def annotate
17
- SolveBio::Annotation.create :sample_id => self.id
18
- end
19
-
20
- # FIXME: Rubyize APIResource.retrieve
21
- def self.retrieve(id, params={})
22
- SolveBio::APIResource.retrieve(self, id)
23
- end
24
-
25
- def self.create(genome_build, params={})
26
- if params.member?(:vcf_url)
27
- if params.member?(:vcf_file)
28
- raise TypeError,
29
- 'Specified both vcf_url and vcf_file; use only one'
30
- end
31
- self.create_from_url(genome_build, params[:vcf_url])
32
- elsif params.member?(:vcf_file)
33
- return create_from_file(genome_build, params[:vcf_file])
34
- else
35
- raise TypeError,
36
- 'Must specify exactly one of vcf_url or vcf_file parameter'
37
- end
38
- end
39
-
40
- # Creates from the specified file. The data of the should be in
41
- # VCF format.
42
- def self.create_from_file(genome_build, vcf_file)
43
-
44
- fh = File.open(vcf_file, 'rb')
45
- params = {:genome_build => genome_build,
46
- :vcf_file => fh}
47
- response = SolveBio::Client.client.post(class_url(self), params,
48
- :no_json => true)
49
- to_solve_object(response)
50
- end
51
-
52
- # Creates from the specified URL. The data of the should be in
53
- # VCF format.
54
- def self.create_from_url(genome_build, vcf_url)
55
-
56
- params = {:genome_build => genome_build,
57
- :vcf_url => vcf_url}
58
- begin
59
- response = SolveBio::Client.client.post class_url(self), params
60
- rescue SolveBio::Error => response
61
- end
62
- to_solve_object(response)
63
- end
64
- end
65
-
66
- if __FILE__ == $0
67
- unless SolveBio::API_HOST == 'https://api.solvebio.com'
68
- SolveBio::SolveObject::CONVERSION = {
69
- 'Sample' => SolveBio::Sample,
70
- } unless defined? SolveBio::SolveObject::CONVERSION
71
- url = 'http://downloads.solvebio.com/vcf/small_sample.vcf.gz'
72
- response = SolveBio::Sample.create_from_url 'hg19', url
73
- puts response
74
- end
75
- end
@@ -1,122 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
-
3
- require 'json'
4
- require 'set'
5
- require_relative '../client'
6
-
7
- # Base class for all SolveBio API resource objects
8
- class SolveBio::SolveObject < Hash
9
-
10
- ALLOW_FULL_NAME_ID = false # Treat full_name parameter as an ID?
11
-
12
- attr_reader :unsaved_values
13
-
14
- def allow_full_name_id
15
- self.class.const_get(:ALLOW_FULL_NAME_ID)
16
- end
17
-
18
- def initialize(id=nil, params={})
19
-
20
- super()
21
- # store manually updated values for partial updates
22
- @unsaved_values = Set.new
23
-
24
- if id
25
- self['id'] = id
26
- elsif allow_full_name_id and params['full_name']
27
- self['full_name'] = params['full_name']
28
- # no ID was provided so temporarily set the id as full_name
29
- # this will get updated when the resource is refreshed
30
- self['id'] = params['full_name']
31
- end
32
- end
33
-
34
- # Element Reference — Retrieves the value object corresponding to the key object.
35
- # Note: *key* is turned into a string before access, because the underlying key type
36
- # is a string.
37
- def [](key)
38
- super(key.to_s)
39
- end
40
-
41
- def self.construct_from(cls, values)
42
- instance = cls.new(values['id'])
43
- instance.refresh_from(values)
44
- instance
45
- end
46
-
47
- def refresh_from(values)
48
- self.clear()
49
- @unsaved_values = Set.new
50
- values.each { |k, v| self[k] = to_solve_object(v) }
51
- end
52
-
53
- def request(method, url, params={})
54
- response = SolveBio::Client.client
55
- .request method, url, {:params => params}
56
- to_solve_object(response)
57
- end
58
-
59
- def inspect
60
- ident_parts = [self.class]
61
-
62
- if self['id'].kind_of?(Integer)
63
- ident_parts << "id=#{self['id']}"
64
- end
65
-
66
- if allow_full_name_id and self['full_name']
67
- ident_parts << "full_name=#{self['full_name']}"
68
- end
69
-
70
- '<%s:%x> JSON: %s' % [ident_parts.join(' '),
71
- self.object_id, self.to_json]
72
-
73
- end
74
-
75
- def to_s
76
- # No equivalent of Python's json sort_keys?
77
- JSON.pretty_generate(self, :indent => ' ')
78
- # self.to_json json.dumps(self, sort_keys=true, indent=2)
79
- end
80
-
81
- # @property
82
- def id
83
- self['id']
84
- end
85
- end
86
-
87
- class Hash
88
- def to_solvebio(klass=nil)
89
- resp = self.dup()
90
- if ! klass
91
- klass_name ||= resp['class_name']
92
- if klass_name.kind_of?(String)
93
- klass = SolveBio::SolveObject::CONVERSION[klass_name] ||
94
- SolveBio::SolveObject
95
- else
96
- klass = SolveBio::SolveObject
97
- end
98
- end
99
- SolveBio::SolveObject::construct_from(klass, resp)
100
- end
101
- end
102
-
103
- class Array
104
- def to_solvebio
105
- return self.map{|i| to_solve_object(i)}
106
- end
107
- end
108
-
109
- def to_solve_object(resp)
110
- if resp.kind_of?(Array) or
111
- (not resp.kind_of? SolveBio::SolveObject and resp.kind_of?(Hash))
112
- resp.to_solvebio
113
- else
114
- return resp
115
- end
116
- end
117
-
118
- if __FILE__ == $0
119
- puts SolveBio::SolveObject.new.inspect
120
- puts SolveBio::SolveObject.new(64).inspect
121
-
122
- end
@@ -1,5 +0,0 @@
1
- require_relative 'apiresource'
2
-
3
- class SolveBio::User < SolveBio::APIResource
4
- include SolveBio::SingletonAPIResource
5
- end
@@ -1,706 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
- #
3
- # This file contains code from python-tabulate, modified for SolveBio
4
- #
5
- # Copyright © 2011-2013 Sergey Astanin
6
- #
7
- # Permission is hereby granted, free of charge, to any person obtaining
8
- # a copy of this software and associated documentation files (the
9
- # "Software"), to deal in the Software without restriction, including
10
- # without limitation the rights to use, copy, modify, merge, publish,
11
- # distribute, sublicense, and/or sell copies of the Software, and to
12
- # permit persons to whom the Software is furnished to do so, subject to
13
- # the following conditions:
14
- #
15
- # The above copyright notice and this permission notice shall be
16
- # included in all copies or substantial portions of the Software.
17
- #
18
- # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19
- # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20
- # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21
- # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
22
- # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23
- # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24
- # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
-
26
- # from __future__ require 'print_function'
27
- # from __future__ require 'unicode_literals'
28
-
29
- # from collections require 'namedtuple'
30
- # from platform require 'python_version_tuple'
31
-
32
- # if python_version_tuple()[0] < "3"
33
- # from itertools require 'izip_longest'
34
- # _none_type = type(nil)
35
- # _int_type = int
36
- # _float_type = float
37
- # _text_type = unicode
38
- # _binary_type = str
39
- # else
40
- # from itertools require 'zip_longest as izip_longest'
41
- # from functools require 'reduce'
42
- # _none_type = type(nil)
43
- # _int_type = int
44
- # _float_type = float
45
- # _text_type = str
46
- # _binary_type = bytes
47
- # end
48
-
49
-
50
- require_relative 'main'
51
-
52
- module SolveBio::Tabulate
53
-
54
- VERSION = '0.6'
55
-
56
- TYPES = {NilClass => 0, Fixnum => 1, Float => 2, String => 4}
57
-
58
- INVISIBILE_CODES = %r{\\x1b\[\d*m} # ANSI color codes
59
-
60
- Line = Struct.new(:start, :hline, :sep, :last)
61
-
62
- DataRow = Struct.new(:start, :sep, :last)
63
-
64
- TableFormat = Struct.new(:lineabove, :linebelowheader,
65
- :linebetweenrows, :linebelow,
66
- :headerrow, :datarow,
67
- :padding, :usecolons,
68
- :with_header_hide,
69
- :without_header_hide)
70
-
71
- FORMAT_DEFAULTS = {
72
- :padding => 0,
73
- :usecolons => false,
74
- :with_header_hide => [],
75
- :without_header_hide => []
76
- }
77
-
78
-
79
- INVTYPES = {
80
- 4 => String,
81
- 2 => Float,
82
- 1 => Fixnum,
83
- 0 => NilClass
84
- }
85
-
86
- SIMPLE_DATAROW = DataRow.new('', ' ', '')
87
- PIPE_DATAROW = DataRow.new('|', '|', '|')
88
-
89
- SIMPLE_LINE = Line.new('', '-', ' ', '')
90
- GRID_LINE = Line.new('+', '-', '+', '+')
91
-
92
- TABLE_FORMATS = {
93
- :simple =>
94
- TableFormat.new(lineabove = nil,
95
- linebelowheader = SIMPLE_LINE,
96
- linebetweenrows = nil,
97
- linebelow = SIMPLE_LINE,
98
- headerrow = SIMPLE_DATAROW,
99
- datarow = SIMPLE_DATAROW,
100
- padding = 0,
101
- usecolons = false,
102
- with_header_hide = ['linebelow'],
103
- without_header_hide = []),
104
- :grid =>
105
- TableFormat.new(lineabove = SIMPLE_LINE,
106
- linebelowheader = Line.new('+', '=', '+', '+'),
107
- linebetweenrows = SIMPLE_LINE,
108
- linebelow = SIMPLE_LINE,
109
- headerrow = PIPE_DATAROW,
110
- datarow = PIPE_DATAROW,
111
- padding = 1,
112
- usecolons = false,
113
- with_header_hide = [],
114
- without_header_hide = ['linebelowheader']),
115
-
116
- :pipe =>
117
- TableFormat.new(lineabove = nil,
118
- linebelowheader = Line.new('|', '-', '|', '|'),
119
- linebetweenrows = nil,
120
- linebelow = nil,
121
- headerrow = PIPE_DATAROW,
122
- datarow = PIPE_DATAROW,
123
- padding = 1,
124
- usecolons = true,
125
- with_header_hide = [],
126
- without_header_hide = []),
127
-
128
- :orgmode =>
129
- TableFormat.new(lineabove=nil,
130
- linebelowheader = Line.new('|', '-', '+', '|'),
131
- linebetweenrows = nil,
132
- linebelow = nil,
133
- headerrow = PIPE_DATAROW,
134
- datarow = PIPE_DATAROW,
135
- padding = 1,
136
- usecolons = false,
137
- with_header_hide = [],
138
- without_header_hide = ['linebelowheader'])
139
- }
140
-
141
- module_function
142
- def simple_separated_format(separator)
143
- # FIXME? python code hard-codes separator = "\n" below.
144
- return TableFormat
145
- .new(
146
- :lineabove => nil,
147
- :linebelowheader => nil,
148
- :linebetweenrows => nil,
149
- :linebelow => nil,
150
- :headerrow => nil,
151
- :datarow => DataRow.new('', separator, ''),
152
- :padding => 0,
153
- :usecolons => false,
154
- :with_header_hide => [],
155
- :without_header_hide => [],
156
- )
157
- end
158
-
159
- # The least generic type, one of NilClass, Fixnum, Float, or String.
160
- # _type(nil) => NilClass
161
- # _type("foo") => String
162
- # _type("1") => Fixnum
163
- # _type("\x1b[31m42\x1b[0m") => Fixnum
164
- def _type(obj, has_invisible=true)
165
-
166
- obj = obj.strip_invisible if obj.kind_of?(String) and has_invisible
167
-
168
- if obj.nil?
169
- return NilClass
170
- elsif obj.kind_of?(Fixnum) or obj.int?
171
- return Fixnum
172
- elsif obj.kind_of?(Float) or obj.number?
173
- return Float
174
- else
175
- return String
176
- end
177
- end
178
-
179
- # [string] -> [padded_string]
180
- #
181
- # align_column(
182
- # ["12.345", "-1234.5", "1.23", "1234.5",
183
- # "1e+234", "1.0e234"], "decimal") =>
184
- # [' 12.345 ', '-1234.5 ', ' 1.23 ',
185
- # ' 1234.5 ', ' 1e+234 ', ' 1.0e234']
186
- def align_column(strings, alignment, minwidth=0, has_invisible=true)
187
- if alignment == "right"
188
- strings = strings.map{|s| s.to_s.strip}
189
- padfn = :padleft
190
- elsif alignment == 'center'
191
- strings = strings.map{|s| s.to_s.strip}
192
- padfn = :padboth
193
- elsif alignment == 'decimal'
194
- decimals = strings.map{|s| s.to_s.afterpoint}
195
- maxdecimals = decimals.max
196
- zipped = strings.zip(decimals)
197
- strings = zipped.map{|s, decs|
198
- s.to_s + " " * ((maxdecimals - decs))
199
- }
200
- padfn = :padleft
201
- else
202
- strings = strings.map{|s| s.to_s.strip}
203
- padfn = :padright
204
- end
205
-
206
- if has_invisible
207
- width_fn = :visible_width
208
- else
209
- width_fn = :size
210
- end
211
-
212
- maxwidth = [strings.map{|s| s.send(width_fn)}.max, minwidth].max
213
- strings.map{|s| s.send(padfn, maxwidth, has_invisible) }
214
- end
215
-
216
-
217
- def more_generic(type1, type2)
218
- moregeneric = [TYPES[type1] || 4, TYPES[type2] || 4].max
219
- return INVTYPES[moregeneric]
220
- end
221
-
222
-
223
- # The least generic type all column values are convertible to.
224
- #
225
- # column_type(["1", "2"]) => Fixnum
226
- # column_type(["1", "2.3"]) => Float
227
- # column_type(["1", "2.3", "four"]) => String
228
- # column_type(["four", '\u043f\u044f\u0442\u044c']) => String
229
- # column_type([nil, "brux"]) => String
230
- # column_type([1, 2, nil]) => Fixnum
231
- def column_type(strings, has_invisible=true)
232
- types = strings.map{|s| _type(s, has_invisible)}
233
- # require 'trepanning'; debugger
234
- return types.reduce(Fixnum){
235
- |t, result|
236
- more_generic(result, t)
237
- }
238
- end
239
-
240
-
241
- # Format a value accoding to its type.
242
- #
243
- # Unicode is supported:
244
- #
245
- # >>> hrow = ["\u0431\u0443\u043a\u0432\u0430",
246
- # "\u0446\u0438\u0444\u0440\u0430"]
247
- # tbl = [["\u0430\u0437", 2], ["\u0431\u0443\u043a\u0438", 4]]
248
- # expected = "\\u0431\\u0443\\u043a\\u0432\\u0430 \n
249
- # \\u0446\\u0438\\u0444\\u0440\\u0430\\n-------\n
250
- # -------\\n\\u0430\\u0437 \n
251
- # 2\\n\\u0431\\u0443\\u043a\\u0438 4'
252
- # tabulate(tbl, hrow) => good_result
253
- # true
254
- def format(val, valtype, floatfmt, missingval="")
255
- if val.nil?
256
- return missingval
257
- end
258
-
259
- if [Fixnum, String, Fixnum].member?(valtype)
260
- return "%s" % val
261
- elsif valtype.kind_of?(Float)
262
- return "%#{floatfmt}" % Float(val)
263
- else
264
- return "%s" % val
265
- end
266
- end
267
-
268
-
269
- def align_header(header, alignment, width)
270
- if alignment == "left"
271
- return header.padright(width)
272
- elsif alignment == "center"
273
- return header.padboth(width)
274
- else
275
- return header.padleft(width)
276
- end
277
- end
278
-
279
-
280
- # Transform a supported data type to an Array of Arrays, and an
281
- # Array of headers.
282
- #
283
- # Supported tabular data types:
284
- #
285
- # * Array-of-Arrays or another Enumerable of Enumerables
286
- #
287
- # * Hash of Enumerables
288
- #
289
- # The first row can be used as headers if headers="firstrow",
290
- # column indices can be used as headers if headers="keys".
291
- #
292
- def normalize_tabular_data(tabular_data, headers)
293
- if tabular_data.respond_to?(:keys) and tabular_data.respond_to?(:values)
294
- # likely a Hash
295
- keys = tabular_data.keys
296
- ## FIXME: what's different in the Python code?
297
- # columns have to be transposed
298
- # rows = list(izip_longest(*tabular_data.values()))
299
- # rows = vals[0].zip(*vals[1..-1])
300
- rows = tabular_data.values
301
- if headers == "keys"
302
- # headers should be strings
303
- headers = keys.map{|k| k.to_s}
304
- end
305
- elsif tabular_data.kind_of?(Enumerable)
306
- # Likely an Enumerable of Enumerables
307
- rows = tabular_data.to_a
308
- if headers == "keys" and not rows.empty? # keys are column indices
309
- headers = (0..rows[0]).map {|i| i.to_s}
310
- end
311
- else
312
- raise(ValueError, "tabular data doesn't appear to be a Hash" +
313
- " or Array")
314
- end
315
-
316
- # take headers from the first row if necessary
317
- if headers == "firstrow" and not rows.empty?
318
- headers = rows[0].map{|row| [_text_type(row)]}
319
- rows.shift
320
- end
321
-
322
- # pad with empty headers for initial columns if necessary
323
- if not headers.empty? and not rows.empty?
324
- nhs = headers.size
325
- ncols = rows[0].size
326
- if nhs < ncols
327
- headers = [''] * (ncols - nhs) + headers
328
- end
329
- end
330
-
331
- return rows, headers
332
- end
333
-
334
- TTY_COLS = ENV['COLUMNS'].to_i || 80 rescue 80
335
- # Return a string which represents a row of data cells.
336
- def build_row(cells, padding, first, sep, last)
337
-
338
- pad = ' ' * padding
339
- padded_cells = cells.map{|cell| pad + cell + pad }
340
-
341
- # SolveBio: we're only displaying Key-Value tuples (dimension of 2).
342
- # enforce that we don't wrap lines by setting a max
343
- # limit on row width which is equal to TTY_COLS (see printing)
344
- rendered_cells = (first + padded_cells.join(sep) + last).rstrip
345
- if rendered_cells.size > TTY_COLS
346
- if not cells[-1].end_with?(' ') and not cells[-1].end_with?('-')
347
- terminating_str = ' ... '
348
- else
349
- terminating_str = ''
350
- end
351
- prefix = rendered_cells[1..TTY_COLS - terminating_str.size - 1]
352
- rendered_cells = "%s%s%s" % [prefix, terminating_str, last]
353
- end
354
-
355
- return rendered_cells
356
- end
357
-
358
-
359
- # Return a string which represents a horizontal line.
360
- def build_line(colwidths, padding, first, fill, sep, last)
361
- cells = colwidths.map{|w| fill * (w + 2 * padding)}
362
- return build_row(cells, 0, first, sep, last)
363
- end
364
-
365
-
366
- # Return a segment of a horizontal line with optional colons which
367
- # indicate column's alignment (as in `pipe` output format).
368
- def _line_segment_with_colons(linefmt, align, colwidth)
369
- fill = linefmt.hline
370
- w = colwidth
371
- if ['right', 'decimal'].member?(align)
372
- return (fill[0] * (w - 1)) + ":"
373
- elsif align == "center"
374
- return ":" + (fill[0] * (w - 2)) + ":"
375
- elsif align == "left"
376
- return ":" + (fill[0] * (w - 1))
377
- else
378
- return fill[0] * w
379
- end
380
- end
381
-
382
-
383
- # Produce a plain-text representation of the table.
384
- def format_table(fmt, headers, rows, colwidths, colaligns)
385
- lines = []
386
- hidden = headers ? fmt.with_header_hide : fmt.without_header_hide
387
- pad = fmt.padding || 0
388
- datarow = fmt.datarow ? fmt.datarow : SIMPLE_DATAROW
389
- headerrow = fmt.headerrow ? fmt.headerrow : fmt.datarow
390
-
391
- if fmt.lineabove and hidden and hidden.member?("lineabove")
392
- lines << build_line(colwidths, pad, *fmt.lineabove)
393
- end
394
-
395
- unless headers.empty?
396
- lines << build_row(headers, pad, headerrow.start, headerrow.sep,
397
- headerrow.last)
398
- end
399
-
400
- if fmt.linebelowheader and not hidden.member?("linebelowheader")
401
- first, _, sep, last = fmt.linebelowheader
402
- if fmt.usecolons
403
- segs = [
404
- colwidths.zip(colaligns).map do |w, a|
405
- _line_segment_with_colons(fmt.linebelowheader, a, w + 2 * pad)
406
- end ]
407
- lines << build_row(segs, 0, first, sep, last)
408
- else
409
- lines << build_line(colwidths, pad, fmt.linebelowheader.start,
410
- fmt.linebelowheader.hline,
411
- fmt.linebelowheader.sep,
412
- fmt.linebelowheader.last)
413
- end
414
- end
415
-
416
- if rows and fmt.linebetweenrows and hidden.member?('linebetweenrows')
417
- # initial rows with a line below
418
- rows[1..-1].each do |row|
419
- lines << build_row(row, pad, fmt.datarow.start,
420
- fmt.datarow.sep, fmt.datarow.last)
421
- lines << build_line(colwidths, pad, fmt.linebetweenrows.start,
422
- fmt.linebelowheader.hline,
423
- fmt.linebetweenrows.sep,
424
- fmt.linebetweenrows.last)
425
- end
426
- # the last row without a line below
427
- lines << build_row(rows[-1], pad, datarow.start,
428
- datarow.sep, datarow.last)
429
- else
430
- rows.each do |row|
431
- lines << build_row(row, pad, datarow.start, datarow.sep,
432
- datarow.last)
433
-
434
- if fmt.linebelow and hidden.member?('linebelow')
435
- lines << build_line(colwidths, pad, fmt.linebelow.start,
436
- fmt.linebelowheader.hline,
437
- fmt.linebelow.sep,
438
- fmt.linebelow.last)
439
- end
440
- end
441
- end
442
- return lines.join("\n")
443
- end
444
-
445
- # Construct a simple TableFormat with columns separated by a separator.
446
- #
447
- # tsv = simple_separated_format("\t")
448
- # tabulate([["foo", 1], ["spam", 23]], [], tsv) =>
449
- # "foo 1\nspam 23"
450
- def tabulate(tabular_data, headers=[], tablefmt=TABLE_FORMATS[:orgmode],
451
- floatfmt="g", aligns=[], missingval='')
452
- list_of_lists, headers = normalize_tabular_data(tabular_data, headers)
453
-
454
- # optimization: look for ANSI control codes once,
455
- # enable smart width functions only if a control code is found
456
- plain_rows = [headers.map{|h| h.to_s}.join("\t")]
457
- row_text = list_of_lists.map{|row|
458
- row.map{|r| r.to_s}.join("\t")
459
- }
460
- plain_rows += row_text
461
- plain_text = plain_rows.join("\n")
462
-
463
- has_invisible = INVISIBILE_CODES.match(plain_text)
464
- if has_invisible
465
- width_fn = :visible_width
466
- else
467
- width_fn = :size
468
- end
469
-
470
- # format rows and columns, convert numeric values to strings
471
- cols = list_of_lists[0].zip(*list_of_lists[1..-1]) if
472
- list_of_lists.size > 1
473
-
474
- coltypes = cols.map{|c| column_type(c)}
475
-
476
- cols = cols.zip(coltypes).map do |c, ct|
477
- c.map{|v| format(v, ct, floatfmt, missingval)}
478
- end
479
-
480
- # align columns
481
- if aligns.empty?
482
- # dynamic alignment by col type
483
- aligns = coltypes.map do |ct|
484
- [Fixnum, Float].member?(ct) ? 'decimal' : 'left'
485
- end
486
- end
487
-
488
- minwidths =
489
- if headers.empty? then
490
- [0] * cols.size
491
- else
492
- headers.map{|h| h.send(width_fn) + 2}
493
- end
494
-
495
- cols = cols.zip(aligns, minwidths).map do |c, a, minw|
496
- align_column(c, a, minw, has_invisible)
497
- end
498
-
499
- if headers.empty?
500
- minwidths = cols.map{|c| c[0].send(width_fn)}
501
- else
502
- # align headers and add headers
503
- minwidths =
504
- minwidths.zip(cols).map{|minw, c| [minw, c[0].send(width_fn)].max}
505
- headers =
506
- headers.zip(aligns, minwidths).map{|h, a, minw| align_header(h, a, minw)}
507
- end
508
- rows = cols[0].zip(cols[1])
509
-
510
- tablefmt = TABLE_FORMATS[:orgmode] unless
511
- tablefmt.kind_of?(TableFormat)
512
-
513
- # make sure values don't have newlines or tabs in them
514
- rows = rows.each do |r|
515
- r[1] = r[1].gsub("\n", '').gsub("\t", '')
516
- end
517
- return format_table(tablefmt, headers, rows, minwidths, aligns)
518
- end
519
- end
520
-
521
- class Object
522
-
523
- # "123.45".number? => true
524
- # "123".number? => true
525
- # "spam".number? => false
526
- def number?
527
- begin
528
- Float(self)
529
- return true
530
- rescue
531
- return false
532
- end
533
- end
534
-
535
- # "123".int? => true
536
- # "123.45".int? => false
537
- def int?
538
- begin
539
- Integer(self)
540
- return true
541
- rescue
542
- return false
543
- end
544
- end
545
- end
546
-
547
- class String
548
-
549
- # Symbols after a decimal point, -1 if the string lacks the decimal point.
550
- #
551
- # "123.45".afterpoint => 2
552
- # "1001".afterpoint => -1
553
- # "eggs".afterpoint => -1
554
- # "123e45".afterpoint => 2
555
- def afterpoint
556
- if self.number?
557
- if self.int?
558
- return -1
559
- else
560
- pos = self.rindex('.') || -1
561
- pos = self.downcase().rindex('e') if pos < 0
562
- if pos >= 0
563
- return self.size - pos - 1
564
- else
565
- return -1 # no point
566
- end
567
- end
568
- else
569
- return -1 # not a number
570
- end
571
- end
572
-
573
- def adjusted_size(has_invisible)
574
- return has_invisible ? self.strip_invisible.size : self.size
575
- end
576
-
577
- # Visible width of a printed string. ANSI color codes are removed.
578
- #
579
- # ['\x1b[31mhello\x1b[0m' "world"].map{|s| s.visible_width} =>
580
- # [5, 5]
581
- def visible_width
582
- # if self.kind_of?(_text_type) or self.kind_of?(_binary_type)
583
- return self.strip_invisible.size
584
- # else
585
- # return _text_type(s).size
586
- # end
587
- end
588
-
589
-
590
- # Flush right.
591
- #
592
- # '\u044f\u0439\u0446\u0430'.padleft(6) =>
593
- # ' \u044f\u0439\u0446\u0430'
594
- # 'abc'.padleft(2) => 'abc'
595
- def padleft(width, has_invisible=true)
596
- s_width = self.adjusted_size(has_invisible)
597
- s_width < width ? (' ' * (width - s_width)) + self : self
598
- end
599
-
600
- # Flush left.
601
- #
602
- # padright(6, '\u044f\u0439\u0446\u0430') => '\u044f\u0439\u0446\u0430 '
603
- # padright(2, 'abc') => 'abc'
604
- def padright(width, has_invisible=true)
605
- s_width = self.adjusted_size(has_invisible)
606
- s_width < width ? self + (' ' * (width - s_width)) : self
607
- end
608
-
609
-
610
- # Center string with uneven space on the right
611
- #
612
- # '\u044f\u0439\u0446\u0430'.padboth(6) => ' \u044f\u0439\u0446\u0430 '
613
- # 'abc'.padboth(2) => 'abc'
614
- # 'abc'.padboth(6) => ' abc '
615
- def padboth(width, has_invisible=true)
616
- s_width = self.adjusted_size(has_invisible)
617
- return self if s_width >= width
618
- pad_size = width - s_width
619
- pad_left = ' ' * (pad_size/2)
620
- pad_right = ' ' * ((pad_size + 1)/ 2)
621
- pad_left + self + pad_right
622
- end
623
-
624
-
625
- # Remove invisible ANSI color codes.
626
- def strip_invisible
627
- return self.gsub(SolveBio::Tabulate::INVISIBILE_CODES, '')
628
- end
629
-
630
- end
631
-
632
- if __FILE__ == $0
633
- include SolveBio::Tabulate
634
- # puts '" 123.45".num? %s' % "123.45".number?() # true
635
- # puts "'123'.num?: %s" % '123'.number? # true
636
- # puts "'spam'.num? spam: %s" % "spam".number? # false
637
- # puts "'123'.int? %s" % "123".int? # true
638
- # puts "'123.45'int?: %s" % '124.45'.int? # false
639
-
640
- # puts "_type(nil) %s = %s" % [_type(nil), NilClass]
641
- # puts "_type('foo') %s = %s" % [_type('foo'), String]
642
- # puts "_type('1') %s = %s" % [_type('1'), Fixnum]
643
- # puts "_type(''\x1b[31m42\x1b[0m') %s = %s" % [_type('\x1b[31m42\x1b[0m'), Fixnum]
644
-
645
- # puts "'123.45'.afterpoint: 2 == %d" % '123.45'.afterpoint
646
- # puts "'1001'afterpoint : -1 == %d" % '1001'.afterpoint
647
- # puts "'eggs'.afterpoint : -1 == %d" % 'eggs'.afterpoint
648
- # puts "'123e45'.afterpoint: 2 == %d" % "123e45".afterpoint
649
-
650
- # puts("'\u044f\u0439\u0446\u0430'.padleft(6) = '%s' == '%s'" %
651
- # ["\u044f\u0439\u0446\u0430".padleft(6),
652
- # " \u044f\u0439\u0446\u0430"])
653
- # puts("'abc'.padleft(2) = '%s' == '%s'" %
654
- # ['abc'.padleft(2), 'abc'])
655
- # puts("padright(2, 'abc') = '%s' == '%s'" %
656
- # ['abc'.padright(2), 'abc'])
657
- # puts("'abc'.padboth(2) = '%s' == '%s'" %
658
- # ['abc'.padboth(2), 'abc'])
659
- # puts("'abc'.padboth(6) = '%s' == '%s'" %
660
- # ['abc'.padboth(6), ' abc '])
661
-
662
- # puts align_column(
663
- # ["12.345", "-1234.5", "1.23", "1234.5",
664
- # "1e+234", "1.0e234"], "decimal")
665
-
666
- # puts '=' * 30
667
- # puts [' 12.345 ', '-1234.5 ', ' 1.23 ',
668
- # ' 1234.5 ', ' 1e+234 ', ' 1.0e234']
669
-
670
- # puts('column_type(["1", "2"]) is Fixnum == %s ' %
671
- # column_type(["1", "2"]))
672
- # puts('column_type(["1", "2.3"]) is Float == %s ' %
673
- # column_type(["1", "2.3"]))
674
- # puts('column_type(["1", "2.3", "four"]) is String => %s ' %
675
- # column_type(["1", "2.3", "four"]))
676
- # puts('column_type(["four", "\u043f\u044f\u0442\u044c"]) is text => %s ' %
677
- # column_type(["four", "\u043f\u044f\u0442\u044c"]))
678
- # puts('column_type([nil, "brux"]) is String => %s ' %
679
- # column_type([nil, "brux"]))
680
- # puts('column_type([1, 2, nil]) is Fixnum => %s ' %
681
- # column_type([1, 2, nil]))
682
- # tsv = simple_separated_format("\t")
683
- # puts tabulate([["foo", 1], ["spam", 23]], [], tsv)
684
- # hrow = ["\u0431\u0443\u043a\u0432\u0430", "\u0446\u0438\u0444\u0440\u0430"]
685
- # tbl = [["\u0430\u0437", 2], ["\u0431\u0443\u043a\u0438", 4]]
686
- # puts SolveBio::Tabulate.tabulate(tbl, hrow)
687
-
688
- hash = {
689
- "rcvaccession_version"=>2,
690
- "hg18_chromosome"=>"3",
691
- "hg19_start"=>148562304,
692
- "rcvaccession"=>"RCV000060731",
693
- "hg38_start"=>148844517,
694
- "reference_allele"=>"C",
695
- "gene_symbols"=>["CPB1"],
696
- "rsid"=>"rs150241322",
697
- "hg19_chromosome"=>"3",
698
- "hgvs"=>["NC_000003.12:g.148844517C>T"],
699
- "clinical_significance"=>"other",
700
- "alternate_alleles"=>["T"],
701
- "clinical_origin"=>["somatic"],
702
- "type"=>"SNV"}
703
- puts SolveBio::Tabulate.tabulate(hash.to_a,
704
- ['Fields', 'Data'],
705
- ['right', 'left'])
706
- end