solvebio 1.6.1 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. data/.bumpversion.cfg +6 -0
  2. data/.gitignore +5 -4
  3. data/.travis.yml +1 -1
  4. data/Gemfile +3 -0
  5. data/README.md +34 -34
  6. data/Rakefile +1 -18
  7. data/bin/solvebio.rb +14 -16
  8. data/installer +64 -0
  9. data/lib/solvebio.rb +50 -11
  10. data/lib/solvebio/acccount.rb +4 -0
  11. data/lib/solvebio/annotation.rb +11 -0
  12. data/lib/solvebio/api_operations.rb +147 -0
  13. data/lib/solvebio/api_resource.rb +32 -0
  14. data/lib/solvebio/cli.rb +75 -0
  15. data/lib/solvebio/cli/auth.rb +106 -0
  16. data/lib/solvebio/cli/credentials.rb +54 -0
  17. data/lib/{cli → solvebio/cli}/irb.rb +0 -23
  18. data/lib/solvebio/cli/irbrc.rb +48 -0
  19. data/lib/solvebio/cli/tutorial.rb +12 -0
  20. data/lib/solvebio/client.rb +149 -0
  21. data/lib/solvebio/dataset.rb +60 -0
  22. data/lib/solvebio/dataset_field.rb +12 -0
  23. data/lib/solvebio/depository.rb +38 -0
  24. data/lib/solvebio/depository_version.rb +40 -0
  25. data/lib/solvebio/errors.rb +64 -0
  26. data/lib/solvebio/filter.rb +315 -0
  27. data/lib/solvebio/list_object.rb +73 -0
  28. data/lib/solvebio/locale.rb +43 -0
  29. data/lib/solvebio/query.rb +341 -0
  30. data/lib/solvebio/sample.rb +54 -0
  31. data/lib/solvebio/singleton_api_resource.rb +25 -0
  32. data/lib/solvebio/solve_object.rb +164 -0
  33. data/lib/solvebio/tabulate.rb +589 -0
  34. data/lib/solvebio/user.rb +4 -0
  35. data/lib/solvebio/util.rb +59 -0
  36. data/lib/solvebio/version.rb +3 -0
  37. data/solvebio.gemspec +10 -18
  38. data/test/helper.rb +6 -2
  39. data/test/solvebio/data/.gitignore +1 -0
  40. data/test/solvebio/data/.netrc +6 -0
  41. data/test/{data → solvebio/data}/netrc-save +0 -0
  42. data/test/solvebio/data/sample.vcf.gz +0 -0
  43. data/test/solvebio/data/test_creds +3 -0
  44. data/test/solvebio/test_annotation.rb +45 -0
  45. data/test/solvebio/test_client.rb +29 -0
  46. data/test/solvebio/test_conversion.rb +14 -0
  47. data/test/solvebio/test_credentials.rb +67 -0
  48. data/test/solvebio/test_dataset.rb +52 -0
  49. data/test/solvebio/test_depository.rb +24 -0
  50. data/test/solvebio/test_depositoryversion.rb +22 -0
  51. data/test/solvebio/test_error.rb +31 -0
  52. data/test/solvebio/test_filter.rb +86 -0
  53. data/test/solvebio/test_query.rb +282 -0
  54. data/test/solvebio/test_query_batch.rb +38 -0
  55. data/test/solvebio/test_query_init.rb +30 -0
  56. data/test/solvebio/test_query_tabulate.rb +73 -0
  57. data/test/solvebio/test_ratelimit.rb +31 -0
  58. data/test/solvebio/test_resource.rb +29 -0
  59. data/test/solvebio/test_sample_access.rb +60 -0
  60. data/test/solvebio/test_sample_download.rb +20 -0
  61. data/test/solvebio/test_tabulate.rb +129 -0
  62. data/test/solvebio/test_util.rb +39 -0
  63. metadata +100 -85
  64. data/Makefile +0 -17
  65. data/demo/README.md +0 -14
  66. data/demo/cheatsheet.rb +0 -31
  67. data/demo/dataset/facets.rb +0 -13
  68. data/demo/dataset/field.rb +0 -13
  69. data/demo/depository/README.md +0 -24
  70. data/demo/depository/all.rb +0 -13
  71. data/demo/depository/retrieve.rb +0 -13
  72. data/demo/depository/versions-all.rb +0 -13
  73. data/demo/query/query-filter.rb +0 -30
  74. data/demo/query/query.rb +0 -13
  75. data/demo/query/range-filter.rb +0 -18
  76. data/demo/test-api.rb +0 -98
  77. data/lib/cli/auth.rb +0 -122
  78. data/lib/cli/help.rb +0 -13
  79. data/lib/cli/irbrc.rb +0 -54
  80. data/lib/cli/options.rb +0 -75
  81. data/lib/client.rb +0 -154
  82. data/lib/credentials.rb +0 -67
  83. data/lib/errors.rb +0 -81
  84. data/lib/filter.rb +0 -312
  85. data/lib/locale.rb +0 -47
  86. data/lib/main.rb +0 -46
  87. data/lib/query.rb +0 -414
  88. data/lib/resource/annotation.rb +0 -23
  89. data/lib/resource/apiresource.rb +0 -241
  90. data/lib/resource/dataset.rb +0 -91
  91. data/lib/resource/datasetfield.rb +0 -37
  92. data/lib/resource/depository.rb +0 -50
  93. data/lib/resource/depositoryversion.rb +0 -69
  94. data/lib/resource/main.rb +0 -123
  95. data/lib/resource/sample.rb +0 -75
  96. data/lib/resource/solveobject.rb +0 -122
  97. data/lib/resource/user.rb +0 -5
  98. data/lib/tabulate.rb +0 -706
  99. data/lib/util.rb +0 -29
  100. data/test/Makefile +0 -9
  101. data/test/data/sample.vcf.gz +0 -0
  102. data/test/test-annotation.rb +0 -46
  103. data/test/test-auth.rb +0 -58
  104. data/test/test-client.rb +0 -27
  105. data/test/test-conversion.rb +0 -13
  106. data/test/test-dataset.rb +0 -42
  107. data/test/test-depository.rb +0 -35
  108. data/test/test-error.rb +0 -36
  109. data/test/test-filter.rb +0 -70
  110. data/test/test-netrc.rb +0 -52
  111. data/test/test-query-batch.rb +0 -40
  112. data/test/test-query-init.rb +0 -29
  113. data/test/test-query-paging.rb +0 -102
  114. data/test/test-query.rb +0 -71
  115. data/test/test-resource.rb +0 -40
  116. data/test/test-sample-access.rb +0 -59
  117. data/test/test-sample-download.rb +0 -20
  118. data/test/test-tabulate.rb +0 -131
  119. data/test/test-util.rb +0 -42
@@ -1,123 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
-
3
- require_relative 'solveobject'
4
- require_relative 'annotation'
5
- require_relative 'apiresource'
6
- require_relative 'dataset'
7
- require_relative 'datasetfield'
8
- require_relative 'depository'
9
- require_relative 'depositoryversion'
10
- require_relative 'sample'
11
- require_relative 'user'
12
-
13
- class SolveBio::ListObject < SolveBio::SolveObject
14
-
15
- include Enumerable
16
-
17
- def all(params={})
18
- return request('get', self['url'], {:params => params})
19
- end
20
-
21
- def create(params={})
22
- return request('post', self['url'], {:params => params})
23
- end
24
-
25
- def next_page(params={})
26
- if self['links']['next']
27
- return request('get', self['links']['next'], {:params => params})
28
- end
29
- return nil
30
- end
31
-
32
- def prev_page(params={})
33
- if self['links']['prev']
34
- request('get', self['links']['prev'], {:params => params})
35
- end
36
- return nil
37
- end
38
-
39
- def at(i)
40
- self.to_a[i]
41
- end
42
-
43
- def to_a
44
- return to_solve_object(self['data'])
45
- end
46
-
47
- def each(*pass)
48
- return self unless block_given?
49
- i = 0
50
- ary = self.dup
51
- done = false
52
- until done
53
- if i >= ary['data'].size
54
- ary = next_page
55
- break unless ary
56
- i = 0
57
- end
58
- yield(ary.at(i))
59
- i += 1
60
- end
61
- return self
62
- end
63
-
64
- def first
65
- self['data'][0]
66
- end
67
-
68
- # def max
69
- # self['data'][self['total']]
70
- # end
71
-
72
- end
73
-
74
-
75
- SolveBio::SolveObject::CONVERSION = {
76
- 'Annotation' => SolveBio::Annotation,
77
- 'Depository' => SolveBio::Depository,
78
- 'DepositoryVersion' => SolveBio::DepositoryVersion,
79
- 'Dataset' => SolveBio::Dataset,
80
- 'DatasetField' => SolveBio::DatasetField,
81
- 'Sample' => SolveBio::Sample,
82
- 'User' => SolveBio::User,
83
- 'list' => SolveBio::ListObject
84
- }
85
-
86
- if __FILE__ == $0
87
- puts '-' * 50
88
- resp = {
89
- 'class_name' => 'Dataset',
90
- 'data_url' => 'https://api.solvebio.com/v1/datasets/25/data',
91
- 'depository' => 'ClinVar',
92
- 'depository_id' => 223,
93
- 'depository_version' => 'ClinVar/2.0.0-1',
94
- 'depository_version_id' => 15,
95
- 'description' => '',
96
- 'fields_url' => 'https://api.solvebio.com/v1/datasets/25/fields',
97
- 'full_name' => 'ClinVar/2.0.0-1/Variants',
98
- 'id' => 25,
99
- 'name' => 'Variants',
100
- 'title' => 'Variants',
101
- 'url' => 'https://api.solvebio.com/v1/datasets/25'
102
- }
103
- so = to_solve_object(resp)
104
- so = resp.to_solvebio
105
- puts so.inspect
106
- puts so.to_s
107
-
108
- if ARGV[0]
109
- require_relative './cli/auth.rb'
110
- include SolveBio::Auth
111
- login
112
- puts '-' * 30, ' HELP ', '-' * 30
113
- puts SolveBio::Depository.retrieve('ClinVar').help
114
- puts '-' * 30, ' Retrieve ClinVar ','-' * 30
115
- puts SolveBio::Depository.retrieve('ClinVar').to_s
116
- puts '-' * 30, ' Versions ClinVar ','-' * 30
117
- puts SolveBio::Depository.retrieve('Clinvar').versions.to_s
118
- puts '-' * 30, ' Dataset ','-' * 30
119
- puts SolveBio::Dataset.retrieve('Clinvar/2.0.0-1/Variants').to_s
120
- puts '-' * 30, ' All Depository ','-' * 30
121
- puts SolveBio::Depository.all.to_s
122
- end
123
- end
@@ -1,75 +0,0 @@
1
- # Solvebio API Resource for Samples
2
- require_relative 'apiresource'
3
- require_relative 'solveobject'
4
- require_relative '../errors'
5
-
6
- # Samples are VCF files uploaded to the SolveBio API. We currently
7
- # support uncompressed, extension `.vcf`, and gzip-compressed, extension
8
- # `.vcf.gz`, VCF files. Any other extension will be rejected.
9
- class SolveBio::Sample < SolveBio::APIResource
10
-
11
- include SolveBio::DeletableAPIResource
12
- include SolveBio::DownloadableAPIResource
13
- include SolveBio::ListableAPIResource
14
- include SolveBio::HelpableAPIResource
15
-
16
- def annotate
17
- SolveBio::Annotation.create :sample_id => self.id
18
- end
19
-
20
- # FIXME: Rubyize APIResource.retrieve
21
- def self.retrieve(id, params={})
22
- SolveBio::APIResource.retrieve(self, id)
23
- end
24
-
25
- def self.create(genome_build, params={})
26
- if params.member?(:vcf_url)
27
- if params.member?(:vcf_file)
28
- raise TypeError,
29
- 'Specified both vcf_url and vcf_file; use only one'
30
- end
31
- self.create_from_url(genome_build, params[:vcf_url])
32
- elsif params.member?(:vcf_file)
33
- return create_from_file(genome_build, params[:vcf_file])
34
- else
35
- raise TypeError,
36
- 'Must specify exactly one of vcf_url or vcf_file parameter'
37
- end
38
- end
39
-
40
- # Creates from the specified file. The data of the should be in
41
- # VCF format.
42
- def self.create_from_file(genome_build, vcf_file)
43
-
44
- fh = File.open(vcf_file, 'rb')
45
- params = {:genome_build => genome_build,
46
- :vcf_file => fh}
47
- response = SolveBio::Client.client.post(class_url(self), params,
48
- :no_json => true)
49
- to_solve_object(response)
50
- end
51
-
52
- # Creates from the specified URL. The data of the should be in
53
- # VCF format.
54
- def self.create_from_url(genome_build, vcf_url)
55
-
56
- params = {:genome_build => genome_build,
57
- :vcf_url => vcf_url}
58
- begin
59
- response = SolveBio::Client.client.post class_url(self), params
60
- rescue SolveBio::Error => response
61
- end
62
- to_solve_object(response)
63
- end
64
- end
65
-
66
- if __FILE__ == $0
67
- unless SolveBio::API_HOST == 'https://api.solvebio.com'
68
- SolveBio::SolveObject::CONVERSION = {
69
- 'Sample' => SolveBio::Sample,
70
- } unless defined? SolveBio::SolveObject::CONVERSION
71
- url = 'http://downloads.solvebio.com/vcf/small_sample.vcf.gz'
72
- response = SolveBio::Sample.create_from_url 'hg19', url
73
- puts response
74
- end
75
- end
@@ -1,122 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
-
3
- require 'json'
4
- require 'set'
5
- require_relative '../client'
6
-
7
- # Base class for all SolveBio API resource objects
8
- class SolveBio::SolveObject < Hash
9
-
10
- ALLOW_FULL_NAME_ID = false # Treat full_name parameter as an ID?
11
-
12
- attr_reader :unsaved_values
13
-
14
- def allow_full_name_id
15
- self.class.const_get(:ALLOW_FULL_NAME_ID)
16
- end
17
-
18
- def initialize(id=nil, params={})
19
-
20
- super()
21
- # store manually updated values for partial updates
22
- @unsaved_values = Set.new
23
-
24
- if id
25
- self['id'] = id
26
- elsif allow_full_name_id and params['full_name']
27
- self['full_name'] = params['full_name']
28
- # no ID was provided so temporarily set the id as full_name
29
- # this will get updated when the resource is refreshed
30
- self['id'] = params['full_name']
31
- end
32
- end
33
-
34
- # Element Reference — Retrieves the value object corresponding to the key object.
35
- # Note: *key* is turned into a string before access, because the underlying key type
36
- # is a string.
37
- def [](key)
38
- super(key.to_s)
39
- end
40
-
41
- def self.construct_from(cls, values)
42
- instance = cls.new(values['id'])
43
- instance.refresh_from(values)
44
- instance
45
- end
46
-
47
- def refresh_from(values)
48
- self.clear()
49
- @unsaved_values = Set.new
50
- values.each { |k, v| self[k] = to_solve_object(v) }
51
- end
52
-
53
- def request(method, url, params={})
54
- response = SolveBio::Client.client
55
- .request method, url, {:params => params}
56
- to_solve_object(response)
57
- end
58
-
59
- def inspect
60
- ident_parts = [self.class]
61
-
62
- if self['id'].kind_of?(Integer)
63
- ident_parts << "id=#{self['id']}"
64
- end
65
-
66
- if allow_full_name_id and self['full_name']
67
- ident_parts << "full_name=#{self['full_name']}"
68
- end
69
-
70
- '<%s:%x> JSON: %s' % [ident_parts.join(' '),
71
- self.object_id, self.to_json]
72
-
73
- end
74
-
75
- def to_s
76
- # No equivalent of Python's json sort_keys?
77
- JSON.pretty_generate(self, :indent => ' ')
78
- # self.to_json json.dumps(self, sort_keys=true, indent=2)
79
- end
80
-
81
- # @property
82
- def id
83
- self['id']
84
- end
85
- end
86
-
87
- class Hash
88
- def to_solvebio(klass=nil)
89
- resp = self.dup()
90
- if ! klass
91
- klass_name ||= resp['class_name']
92
- if klass_name.kind_of?(String)
93
- klass = SolveBio::SolveObject::CONVERSION[klass_name] ||
94
- SolveBio::SolveObject
95
- else
96
- klass = SolveBio::SolveObject
97
- end
98
- end
99
- SolveBio::SolveObject::construct_from(klass, resp)
100
- end
101
- end
102
-
103
- class Array
104
- def to_solvebio
105
- return self.map{|i| to_solve_object(i)}
106
- end
107
- end
108
-
109
- def to_solve_object(resp)
110
- if resp.kind_of?(Array) or
111
- (not resp.kind_of? SolveBio::SolveObject and resp.kind_of?(Hash))
112
- resp.to_solvebio
113
- else
114
- return resp
115
- end
116
- end
117
-
118
- if __FILE__ == $0
119
- puts SolveBio::SolveObject.new.inspect
120
- puts SolveBio::SolveObject.new(64).inspect
121
-
122
- end
@@ -1,5 +0,0 @@
1
- require_relative 'apiresource'
2
-
3
- class SolveBio::User < SolveBio::APIResource
4
- include SolveBio::SingletonAPIResource
5
- end
@@ -1,706 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
- #
3
- # This file contains code from python-tabulate, modified for SolveBio
4
- #
5
- # Copyright © 2011-2013 Sergey Astanin
6
- #
7
- # Permission is hereby granted, free of charge, to any person obtaining
8
- # a copy of this software and associated documentation files (the
9
- # "Software"), to deal in the Software without restriction, including
10
- # without limitation the rights to use, copy, modify, merge, publish,
11
- # distribute, sublicense, and/or sell copies of the Software, and to
12
- # permit persons to whom the Software is furnished to do so, subject to
13
- # the following conditions:
14
- #
15
- # The above copyright notice and this permission notice shall be
16
- # included in all copies or substantial portions of the Software.
17
- #
18
- # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19
- # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20
- # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21
- # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
22
- # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23
- # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24
- # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
-
26
- # from __future__ require 'print_function'
27
- # from __future__ require 'unicode_literals'
28
-
29
- # from collections require 'namedtuple'
30
- # from platform require 'python_version_tuple'
31
-
32
- # if python_version_tuple()[0] < "3"
33
- # from itertools require 'izip_longest'
34
- # _none_type = type(nil)
35
- # _int_type = int
36
- # _float_type = float
37
- # _text_type = unicode
38
- # _binary_type = str
39
- # else
40
- # from itertools require 'zip_longest as izip_longest'
41
- # from functools require 'reduce'
42
- # _none_type = type(nil)
43
- # _int_type = int
44
- # _float_type = float
45
- # _text_type = str
46
- # _binary_type = bytes
47
- # end
48
-
49
-
50
- require_relative 'main'
51
-
52
- module SolveBio::Tabulate
53
-
54
- VERSION = '0.6'
55
-
56
- TYPES = {NilClass => 0, Fixnum => 1, Float => 2, String => 4}
57
-
58
- INVISIBILE_CODES = %r{\\x1b\[\d*m} # ANSI color codes
59
-
60
- Line = Struct.new(:start, :hline, :sep, :last)
61
-
62
- DataRow = Struct.new(:start, :sep, :last)
63
-
64
- TableFormat = Struct.new(:lineabove, :linebelowheader,
65
- :linebetweenrows, :linebelow,
66
- :headerrow, :datarow,
67
- :padding, :usecolons,
68
- :with_header_hide,
69
- :without_header_hide)
70
-
71
- FORMAT_DEFAULTS = {
72
- :padding => 0,
73
- :usecolons => false,
74
- :with_header_hide => [],
75
- :without_header_hide => []
76
- }
77
-
78
-
79
- INVTYPES = {
80
- 4 => String,
81
- 2 => Float,
82
- 1 => Fixnum,
83
- 0 => NilClass
84
- }
85
-
86
- SIMPLE_DATAROW = DataRow.new('', ' ', '')
87
- PIPE_DATAROW = DataRow.new('|', '|', '|')
88
-
89
- SIMPLE_LINE = Line.new('', '-', ' ', '')
90
- GRID_LINE = Line.new('+', '-', '+', '+')
91
-
92
- TABLE_FORMATS = {
93
- :simple =>
94
- TableFormat.new(lineabove = nil,
95
- linebelowheader = SIMPLE_LINE,
96
- linebetweenrows = nil,
97
- linebelow = SIMPLE_LINE,
98
- headerrow = SIMPLE_DATAROW,
99
- datarow = SIMPLE_DATAROW,
100
- padding = 0,
101
- usecolons = false,
102
- with_header_hide = ['linebelow'],
103
- without_header_hide = []),
104
- :grid =>
105
- TableFormat.new(lineabove = SIMPLE_LINE,
106
- linebelowheader = Line.new('+', '=', '+', '+'),
107
- linebetweenrows = SIMPLE_LINE,
108
- linebelow = SIMPLE_LINE,
109
- headerrow = PIPE_DATAROW,
110
- datarow = PIPE_DATAROW,
111
- padding = 1,
112
- usecolons = false,
113
- with_header_hide = [],
114
- without_header_hide = ['linebelowheader']),
115
-
116
- :pipe =>
117
- TableFormat.new(lineabove = nil,
118
- linebelowheader = Line.new('|', '-', '|', '|'),
119
- linebetweenrows = nil,
120
- linebelow = nil,
121
- headerrow = PIPE_DATAROW,
122
- datarow = PIPE_DATAROW,
123
- padding = 1,
124
- usecolons = true,
125
- with_header_hide = [],
126
- without_header_hide = []),
127
-
128
- :orgmode =>
129
- TableFormat.new(lineabove=nil,
130
- linebelowheader = Line.new('|', '-', '+', '|'),
131
- linebetweenrows = nil,
132
- linebelow = nil,
133
- headerrow = PIPE_DATAROW,
134
- datarow = PIPE_DATAROW,
135
- padding = 1,
136
- usecolons = false,
137
- with_header_hide = [],
138
- without_header_hide = ['linebelowheader'])
139
- }
140
-
141
- module_function
142
- def simple_separated_format(separator)
143
- # FIXME? python code hard-codes separator = "\n" below.
144
- return TableFormat
145
- .new(
146
- :lineabove => nil,
147
- :linebelowheader => nil,
148
- :linebetweenrows => nil,
149
- :linebelow => nil,
150
- :headerrow => nil,
151
- :datarow => DataRow.new('', separator, ''),
152
- :padding => 0,
153
- :usecolons => false,
154
- :with_header_hide => [],
155
- :without_header_hide => [],
156
- )
157
- end
158
-
159
- # The least generic type, one of NilClass, Fixnum, Float, or String.
160
- # _type(nil) => NilClass
161
- # _type("foo") => String
162
- # _type("1") => Fixnum
163
- # _type("\x1b[31m42\x1b[0m") => Fixnum
164
- def _type(obj, has_invisible=true)
165
-
166
- obj = obj.strip_invisible if obj.kind_of?(String) and has_invisible
167
-
168
- if obj.nil?
169
- return NilClass
170
- elsif obj.kind_of?(Fixnum) or obj.int?
171
- return Fixnum
172
- elsif obj.kind_of?(Float) or obj.number?
173
- return Float
174
- else
175
- return String
176
- end
177
- end
178
-
179
- # [string] -> [padded_string]
180
- #
181
- # align_column(
182
- # ["12.345", "-1234.5", "1.23", "1234.5",
183
- # "1e+234", "1.0e234"], "decimal") =>
184
- # [' 12.345 ', '-1234.5 ', ' 1.23 ',
185
- # ' 1234.5 ', ' 1e+234 ', ' 1.0e234']
186
- def align_column(strings, alignment, minwidth=0, has_invisible=true)
187
- if alignment == "right"
188
- strings = strings.map{|s| s.to_s.strip}
189
- padfn = :padleft
190
- elsif alignment == 'center'
191
- strings = strings.map{|s| s.to_s.strip}
192
- padfn = :padboth
193
- elsif alignment == 'decimal'
194
- decimals = strings.map{|s| s.to_s.afterpoint}
195
- maxdecimals = decimals.max
196
- zipped = strings.zip(decimals)
197
- strings = zipped.map{|s, decs|
198
- s.to_s + " " * ((maxdecimals - decs))
199
- }
200
- padfn = :padleft
201
- else
202
- strings = strings.map{|s| s.to_s.strip}
203
- padfn = :padright
204
- end
205
-
206
- if has_invisible
207
- width_fn = :visible_width
208
- else
209
- width_fn = :size
210
- end
211
-
212
- maxwidth = [strings.map{|s| s.send(width_fn)}.max, minwidth].max
213
- strings.map{|s| s.send(padfn, maxwidth, has_invisible) }
214
- end
215
-
216
-
217
- def more_generic(type1, type2)
218
- moregeneric = [TYPES[type1] || 4, TYPES[type2] || 4].max
219
- return INVTYPES[moregeneric]
220
- end
221
-
222
-
223
- # The least generic type all column values are convertible to.
224
- #
225
- # column_type(["1", "2"]) => Fixnum
226
- # column_type(["1", "2.3"]) => Float
227
- # column_type(["1", "2.3", "four"]) => String
228
- # column_type(["four", '\u043f\u044f\u0442\u044c']) => String
229
- # column_type([nil, "brux"]) => String
230
- # column_type([1, 2, nil]) => Fixnum
231
- def column_type(strings, has_invisible=true)
232
- types = strings.map{|s| _type(s, has_invisible)}
233
- # require 'trepanning'; debugger
234
- return types.reduce(Fixnum){
235
- |t, result|
236
- more_generic(result, t)
237
- }
238
- end
239
-
240
-
241
- # Format a value accoding to its type.
242
- #
243
- # Unicode is supported:
244
- #
245
- # >>> hrow = ["\u0431\u0443\u043a\u0432\u0430",
246
- # "\u0446\u0438\u0444\u0440\u0430"]
247
- # tbl = [["\u0430\u0437", 2], ["\u0431\u0443\u043a\u0438", 4]]
248
- # expected = "\\u0431\\u0443\\u043a\\u0432\\u0430 \n
249
- # \\u0446\\u0438\\u0444\\u0440\\u0430\\n-------\n
250
- # -------\\n\\u0430\\u0437 \n
251
- # 2\\n\\u0431\\u0443\\u043a\\u0438 4'
252
- # tabulate(tbl, hrow) => good_result
253
- # true
254
- def format(val, valtype, floatfmt, missingval="")
255
- if val.nil?
256
- return missingval
257
- end
258
-
259
- if [Fixnum, String, Fixnum].member?(valtype)
260
- return "%s" % val
261
- elsif valtype.kind_of?(Float)
262
- return "%#{floatfmt}" % Float(val)
263
- else
264
- return "%s" % val
265
- end
266
- end
267
-
268
-
269
- def align_header(header, alignment, width)
270
- if alignment == "left"
271
- return header.padright(width)
272
- elsif alignment == "center"
273
- return header.padboth(width)
274
- else
275
- return header.padleft(width)
276
- end
277
- end
278
-
279
-
280
- # Transform a supported data type to an Array of Arrays, and an
281
- # Array of headers.
282
- #
283
- # Supported tabular data types:
284
- #
285
- # * Array-of-Arrays or another Enumerable of Enumerables
286
- #
287
- # * Hash of Enumerables
288
- #
289
- # The first row can be used as headers if headers="firstrow",
290
- # column indices can be used as headers if headers="keys".
291
- #
292
- def normalize_tabular_data(tabular_data, headers)
293
- if tabular_data.respond_to?(:keys) and tabular_data.respond_to?(:values)
294
- # likely a Hash
295
- keys = tabular_data.keys
296
- ## FIXME: what's different in the Python code?
297
- # columns have to be transposed
298
- # rows = list(izip_longest(*tabular_data.values()))
299
- # rows = vals[0].zip(*vals[1..-1])
300
- rows = tabular_data.values
301
- if headers == "keys"
302
- # headers should be strings
303
- headers = keys.map{|k| k.to_s}
304
- end
305
- elsif tabular_data.kind_of?(Enumerable)
306
- # Likely an Enumerable of Enumerables
307
- rows = tabular_data.to_a
308
- if headers == "keys" and not rows.empty? # keys are column indices
309
- headers = (0..rows[0]).map {|i| i.to_s}
310
- end
311
- else
312
- raise(ValueError, "tabular data doesn't appear to be a Hash" +
313
- " or Array")
314
- end
315
-
316
- # take headers from the first row if necessary
317
- if headers == "firstrow" and not rows.empty?
318
- headers = rows[0].map{|row| [_text_type(row)]}
319
- rows.shift
320
- end
321
-
322
- # pad with empty headers for initial columns if necessary
323
- if not headers.empty? and not rows.empty?
324
- nhs = headers.size
325
- ncols = rows[0].size
326
- if nhs < ncols
327
- headers = [''] * (ncols - nhs) + headers
328
- end
329
- end
330
-
331
- return rows, headers
332
- end
333
-
334
- TTY_COLS = ENV['COLUMNS'].to_i || 80 rescue 80
335
- # Return a string which represents a row of data cells.
336
- def build_row(cells, padding, first, sep, last)
337
-
338
- pad = ' ' * padding
339
- padded_cells = cells.map{|cell| pad + cell + pad }
340
-
341
- # SolveBio: we're only displaying Key-Value tuples (dimension of 2).
342
- # enforce that we don't wrap lines by setting a max
343
- # limit on row width which is equal to TTY_COLS (see printing)
344
- rendered_cells = (first + padded_cells.join(sep) + last).rstrip
345
- if rendered_cells.size > TTY_COLS
346
- if not cells[-1].end_with?(' ') and not cells[-1].end_with?('-')
347
- terminating_str = ' ... '
348
- else
349
- terminating_str = ''
350
- end
351
- prefix = rendered_cells[1..TTY_COLS - terminating_str.size - 1]
352
- rendered_cells = "%s%s%s" % [prefix, terminating_str, last]
353
- end
354
-
355
- return rendered_cells
356
- end
357
-
358
-
359
- # Return a string which represents a horizontal line.
360
- def build_line(colwidths, padding, first, fill, sep, last)
361
- cells = colwidths.map{|w| fill * (w + 2 * padding)}
362
- return build_row(cells, 0, first, sep, last)
363
- end
364
-
365
-
366
- # Return a segment of a horizontal line with optional colons which
367
- # indicate column's alignment (as in `pipe` output format).
368
- def _line_segment_with_colons(linefmt, align, colwidth)
369
- fill = linefmt.hline
370
- w = colwidth
371
- if ['right', 'decimal'].member?(align)
372
- return (fill[0] * (w - 1)) + ":"
373
- elsif align == "center"
374
- return ":" + (fill[0] * (w - 2)) + ":"
375
- elsif align == "left"
376
- return ":" + (fill[0] * (w - 1))
377
- else
378
- return fill[0] * w
379
- end
380
- end
381
-
382
-
383
- # Produce a plain-text representation of the table.
384
- def format_table(fmt, headers, rows, colwidths, colaligns)
385
- lines = []
386
- hidden = headers ? fmt.with_header_hide : fmt.without_header_hide
387
- pad = fmt.padding || 0
388
- datarow = fmt.datarow ? fmt.datarow : SIMPLE_DATAROW
389
- headerrow = fmt.headerrow ? fmt.headerrow : fmt.datarow
390
-
391
- if fmt.lineabove and hidden and hidden.member?("lineabove")
392
- lines << build_line(colwidths, pad, *fmt.lineabove)
393
- end
394
-
395
- unless headers.empty?
396
- lines << build_row(headers, pad, headerrow.start, headerrow.sep,
397
- headerrow.last)
398
- end
399
-
400
- if fmt.linebelowheader and not hidden.member?("linebelowheader")
401
- first, _, sep, last = fmt.linebelowheader
402
- if fmt.usecolons
403
- segs = [
404
- colwidths.zip(colaligns).map do |w, a|
405
- _line_segment_with_colons(fmt.linebelowheader, a, w + 2 * pad)
406
- end ]
407
- lines << build_row(segs, 0, first, sep, last)
408
- else
409
- lines << build_line(colwidths, pad, fmt.linebelowheader.start,
410
- fmt.linebelowheader.hline,
411
- fmt.linebelowheader.sep,
412
- fmt.linebelowheader.last)
413
- end
414
- end
415
-
416
- if rows and fmt.linebetweenrows and hidden.member?('linebetweenrows')
417
- # initial rows with a line below
418
- rows[1..-1].each do |row|
419
- lines << build_row(row, pad, fmt.datarow.start,
420
- fmt.datarow.sep, fmt.datarow.last)
421
- lines << build_line(colwidths, pad, fmt.linebetweenrows.start,
422
- fmt.linebelowheader.hline,
423
- fmt.linebetweenrows.sep,
424
- fmt.linebetweenrows.last)
425
- end
426
- # the last row without a line below
427
- lines << build_row(rows[-1], pad, datarow.start,
428
- datarow.sep, datarow.last)
429
- else
430
- rows.each do |row|
431
- lines << build_row(row, pad, datarow.start, datarow.sep,
432
- datarow.last)
433
-
434
- if fmt.linebelow and hidden.member?('linebelow')
435
- lines << build_line(colwidths, pad, fmt.linebelow.start,
436
- fmt.linebelowheader.hline,
437
- fmt.linebelow.sep,
438
- fmt.linebelow.last)
439
- end
440
- end
441
- end
442
- return lines.join("\n")
443
- end
444
-
445
- # Construct a simple TableFormat with columns separated by a separator.
446
- #
447
- # tsv = simple_separated_format("\t")
448
- # tabulate([["foo", 1], ["spam", 23]], [], tsv) =>
449
- # "foo 1\nspam 23"
450
- def tabulate(tabular_data, headers=[], tablefmt=TABLE_FORMATS[:orgmode],
451
- floatfmt="g", aligns=[], missingval='')
452
- list_of_lists, headers = normalize_tabular_data(tabular_data, headers)
453
-
454
- # optimization: look for ANSI control codes once,
455
- # enable smart width functions only if a control code is found
456
- plain_rows = [headers.map{|h| h.to_s}.join("\t")]
457
- row_text = list_of_lists.map{|row|
458
- row.map{|r| r.to_s}.join("\t")
459
- }
460
- plain_rows += row_text
461
- plain_text = plain_rows.join("\n")
462
-
463
- has_invisible = INVISIBILE_CODES.match(plain_text)
464
- if has_invisible
465
- width_fn = :visible_width
466
- else
467
- width_fn = :size
468
- end
469
-
470
- # format rows and columns, convert numeric values to strings
471
- cols = list_of_lists[0].zip(*list_of_lists[1..-1]) if
472
- list_of_lists.size > 1
473
-
474
- coltypes = cols.map{|c| column_type(c)}
475
-
476
- cols = cols.zip(coltypes).map do |c, ct|
477
- c.map{|v| format(v, ct, floatfmt, missingval)}
478
- end
479
-
480
- # align columns
481
- if aligns.empty?
482
- # dynamic alignment by col type
483
- aligns = coltypes.map do |ct|
484
- [Fixnum, Float].member?(ct) ? 'decimal' : 'left'
485
- end
486
- end
487
-
488
- minwidths =
489
- if headers.empty? then
490
- [0] * cols.size
491
- else
492
- headers.map{|h| h.send(width_fn) + 2}
493
- end
494
-
495
- cols = cols.zip(aligns, minwidths).map do |c, a, minw|
496
- align_column(c, a, minw, has_invisible)
497
- end
498
-
499
- if headers.empty?
500
- minwidths = cols.map{|c| c[0].send(width_fn)}
501
- else
502
- # align headers and add headers
503
- minwidths =
504
- minwidths.zip(cols).map{|minw, c| [minw, c[0].send(width_fn)].max}
505
- headers =
506
- headers.zip(aligns, minwidths).map{|h, a, minw| align_header(h, a, minw)}
507
- end
508
- rows = cols[0].zip(cols[1])
509
-
510
- tablefmt = TABLE_FORMATS[:orgmode] unless
511
- tablefmt.kind_of?(TableFormat)
512
-
513
- # make sure values don't have newlines or tabs in them
514
- rows = rows.each do |r|
515
- r[1] = r[1].gsub("\n", '').gsub("\t", '')
516
- end
517
- return format_table(tablefmt, headers, rows, minwidths, aligns)
518
- end
519
- end
520
-
521
- class Object
522
-
523
- # "123.45".number? => true
524
- # "123".number? => true
525
- # "spam".number? => false
526
- def number?
527
- begin
528
- Float(self)
529
- return true
530
- rescue
531
- return false
532
- end
533
- end
534
-
535
- # "123".int? => true
536
- # "123.45".int? => false
537
- def int?
538
- begin
539
- Integer(self)
540
- return true
541
- rescue
542
- return false
543
- end
544
- end
545
- end
546
-
547
- class String
548
-
549
- # Symbols after a decimal point, -1 if the string lacks the decimal point.
550
- #
551
- # "123.45".afterpoint => 2
552
- # "1001".afterpoint => -1
553
- # "eggs".afterpoint => -1
554
- # "123e45".afterpoint => 2
555
- def afterpoint
556
- if self.number?
557
- if self.int?
558
- return -1
559
- else
560
- pos = self.rindex('.') || -1
561
- pos = self.downcase().rindex('e') if pos < 0
562
- if pos >= 0
563
- return self.size - pos - 1
564
- else
565
- return -1 # no point
566
- end
567
- end
568
- else
569
- return -1 # not a number
570
- end
571
- end
572
-
573
- def adjusted_size(has_invisible)
574
- return has_invisible ? self.strip_invisible.size : self.size
575
- end
576
-
577
- # Visible width of a printed string. ANSI color codes are removed.
578
- #
579
- # ['\x1b[31mhello\x1b[0m' "world"].map{|s| s.visible_width} =>
580
- # [5, 5]
581
- def visible_width
582
- # if self.kind_of?(_text_type) or self.kind_of?(_binary_type)
583
- return self.strip_invisible.size
584
- # else
585
- # return _text_type(s).size
586
- # end
587
- end
588
-
589
-
590
- # Flush right.
591
- #
592
- # '\u044f\u0439\u0446\u0430'.padleft(6) =>
593
- # ' \u044f\u0439\u0446\u0430'
594
- # 'abc'.padleft(2) => 'abc'
595
- def padleft(width, has_invisible=true)
596
- s_width = self.adjusted_size(has_invisible)
597
- s_width < width ? (' ' * (width - s_width)) + self : self
598
- end
599
-
600
- # Flush left.
601
- #
602
- # padright(6, '\u044f\u0439\u0446\u0430') => '\u044f\u0439\u0446\u0430 '
603
- # padright(2, 'abc') => 'abc'
604
- def padright(width, has_invisible=true)
605
- s_width = self.adjusted_size(has_invisible)
606
- s_width < width ? self + (' ' * (width - s_width)) : self
607
- end
608
-
609
-
610
- # Center string with uneven space on the right
611
- #
612
- # '\u044f\u0439\u0446\u0430'.padboth(6) => ' \u044f\u0439\u0446\u0430 '
613
- # 'abc'.padboth(2) => 'abc'
614
- # 'abc'.padboth(6) => ' abc '
615
- def padboth(width, has_invisible=true)
616
- s_width = self.adjusted_size(has_invisible)
617
- return self if s_width >= width
618
- pad_size = width - s_width
619
- pad_left = ' ' * (pad_size/2)
620
- pad_right = ' ' * ((pad_size + 1)/ 2)
621
- pad_left + self + pad_right
622
- end
623
-
624
-
625
- # Remove invisible ANSI color codes.
626
- def strip_invisible
627
- return self.gsub(SolveBio::Tabulate::INVISIBILE_CODES, '')
628
- end
629
-
630
- end
631
-
632
- if __FILE__ == $0
633
- include SolveBio::Tabulate
634
- # puts '" 123.45".num? %s' % "123.45".number?() # true
635
- # puts "'123'.num?: %s" % '123'.number? # true
636
- # puts "'spam'.num? spam: %s" % "spam".number? # false
637
- # puts "'123'.int? %s" % "123".int? # true
638
- # puts "'123.45'int?: %s" % '124.45'.int? # false
639
-
640
- # puts "_type(nil) %s = %s" % [_type(nil), NilClass]
641
- # puts "_type('foo') %s = %s" % [_type('foo'), String]
642
- # puts "_type('1') %s = %s" % [_type('1'), Fixnum]
643
- # puts "_type(''\x1b[31m42\x1b[0m') %s = %s" % [_type('\x1b[31m42\x1b[0m'), Fixnum]
644
-
645
- # puts "'123.45'.afterpoint: 2 == %d" % '123.45'.afterpoint
646
- # puts "'1001'afterpoint : -1 == %d" % '1001'.afterpoint
647
- # puts "'eggs'.afterpoint : -1 == %d" % 'eggs'.afterpoint
648
- # puts "'123e45'.afterpoint: 2 == %d" % "123e45".afterpoint
649
-
650
- # puts("'\u044f\u0439\u0446\u0430'.padleft(6) = '%s' == '%s'" %
651
- # ["\u044f\u0439\u0446\u0430".padleft(6),
652
- # " \u044f\u0439\u0446\u0430"])
653
- # puts("'abc'.padleft(2) = '%s' == '%s'" %
654
- # ['abc'.padleft(2), 'abc'])
655
- # puts("padright(2, 'abc') = '%s' == '%s'" %
656
- # ['abc'.padright(2), 'abc'])
657
- # puts("'abc'.padboth(2) = '%s' == '%s'" %
658
- # ['abc'.padboth(2), 'abc'])
659
- # puts("'abc'.padboth(6) = '%s' == '%s'" %
660
- # ['abc'.padboth(6), ' abc '])
661
-
662
- # puts align_column(
663
- # ["12.345", "-1234.5", "1.23", "1234.5",
664
- # "1e+234", "1.0e234"], "decimal")
665
-
666
- # puts '=' * 30
667
- # puts [' 12.345 ', '-1234.5 ', ' 1.23 ',
668
- # ' 1234.5 ', ' 1e+234 ', ' 1.0e234']
669
-
670
- # puts('column_type(["1", "2"]) is Fixnum == %s ' %
671
- # column_type(["1", "2"]))
672
- # puts('column_type(["1", "2.3"]) is Float == %s ' %
673
- # column_type(["1", "2.3"]))
674
- # puts('column_type(["1", "2.3", "four"]) is String => %s ' %
675
- # column_type(["1", "2.3", "four"]))
676
- # puts('column_type(["four", "\u043f\u044f\u0442\u044c"]) is text => %s ' %
677
- # column_type(["four", "\u043f\u044f\u0442\u044c"]))
678
- # puts('column_type([nil, "brux"]) is String => %s ' %
679
- # column_type([nil, "brux"]))
680
- # puts('column_type([1, 2, nil]) is Fixnum => %s ' %
681
- # column_type([1, 2, nil]))
682
- # tsv = simple_separated_format("\t")
683
- # puts tabulate([["foo", 1], ["spam", 23]], [], tsv)
684
- # hrow = ["\u0431\u0443\u043a\u0432\u0430", "\u0446\u0438\u0444\u0440\u0430"]
685
- # tbl = [["\u0430\u0437", 2], ["\u0431\u0443\u043a\u0438", 4]]
686
- # puts SolveBio::Tabulate.tabulate(tbl, hrow)
687
-
688
- hash = {
689
- "rcvaccession_version"=>2,
690
- "hg18_chromosome"=>"3",
691
- "hg19_start"=>148562304,
692
- "rcvaccession"=>"RCV000060731",
693
- "hg38_start"=>148844517,
694
- "reference_allele"=>"C",
695
- "gene_symbols"=>["CPB1"],
696
- "rsid"=>"rs150241322",
697
- "hg19_chromosome"=>"3",
698
- "hgvs"=>["NC_000003.12:g.148844517C>T"],
699
- "clinical_significance"=>"other",
700
- "alternate_alleles"=>["T"],
701
- "clinical_origin"=>["somatic"],
702
- "type"=>"SNV"}
703
- puts SolveBio::Tabulate.tabulate(hash.to_a,
704
- ['Fields', 'Data'],
705
- ['right', 'left'])
706
- end