object_table 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 14a01cfa80b77105e5a243dbaacc894fec8fa9ad
4
+ data.tar.gz: 6bf1d3259550812b745eee202f0937bf7c278732
5
+ SHA512:
6
+ metadata.gz: 1106df0b67029df89d4e8f52e171946f38f4a19c6c16fc0c316823731c8d073fa565b35ce141c2dc399c8d2f2d9002ab11e25178b38cb686db656fc5e4e5fb09
7
+ data.tar.gz: 8a6423b764125e33c092c7d80dbc5fab723995f82873528f8d72e441d54bbe2340b2f9a2022e521fe2d762db0dcbe0fbe90cdfce78643e3479932bb46ad242fe
data/.gitignore ADDED
@@ -0,0 +1,34 @@
1
+ *.gem
2
+ *.rbc
3
+ /.config
4
+ /coverage/
5
+ /InstalledFiles
6
+ /pkg/
7
+ /spec/reports/
8
+ /test/tmp/
9
+ /test/version_tmp/
10
+ /tmp/
11
+
12
+ ## Specific to RubyMotion:
13
+ .dat*
14
+ .repl_history
15
+ build/
16
+
17
+ ## Documentation cache and generated files:
18
+ /.yardoc/
19
+ /_yardoc/
20
+ /doc/
21
+ /rdoc/
22
+
23
+ ## Environment normalisation:
24
+ /.bundle/
25
+ /lib/bundler/man/
26
+
27
+ # for a library or gem, you might want to ignore these files since the code is
28
+ # intended to run in multiple environments; otherwise, check them in:
29
+ Gemfile.lock
30
+ .ruby-version
31
+ .ruby-gemset
32
+
33
+ # unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
34
+ .rvmrc
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in object_table.gemspec
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2014 lincheney
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
22
+
data/README.md ADDED
@@ -0,0 +1,322 @@
1
+ ruby-object-table
2
+ =================
3
+
4
+ Simple data table/frame implementation in ruby
5
+ Probably slow and extremely inefficient, but it works and that's all that matters.
6
+ Uses NArrays (https://github.com/masa16/narray) for storing data.
7
+
8
+ ## Creating a table
9
+
10
+ Just pass a hash of columns into the constructor.
11
+ You can use vectors types (Array, NArray, Range) or scalars (basically anything else).
12
+
13
+ ### Initialising with vector types
14
+
15
+ ```ruby
16
+ >>> ObjectTable.new(array: [1, 2, 3], narray: NArray[4, 5, 6], range: 7..9)
17
+ => ObjectTable(3, 3)
18
+ array narray range
19
+ 0: 1 4 7
20
+ 1: 2 5 8
21
+ 2: 3 6 9
22
+ array narray range
23
+
24
+ # columns with uneven lengths gives an error
25
+ >>> ObjectTable.new(a: [1, 2, 3], b: [4, 5, 6, 7])
26
+ RuntimeError: Differing number of rows: [3, 4]
27
+ ```
28
+
29
+ ### With scalar types
30
+
31
+ With all scalar columns, a one-row table is assumed
32
+ ```ruby
33
+ >>> ObjectTable.new(a: 1, b: 2)
34
+ => ObjectTable(1, 2)
35
+ a b
36
+ 0: 1 2
37
+ a b
38
+ ```
39
+
40
+ Otherwise the scalars are extended to match the length of the vector columns
41
+ ```ruby
42
+ >>> ObjectTable.new(a: [1, 2, 3], b: 100)
43
+ => ObjectTable(3, 2)
44
+ a b
45
+ 0: 1 100
46
+ 1: 2 100
47
+ 2: 3 100
48
+ a b
49
+ ```
50
+
51
+ ## Methods
52
+
53
+ - `#ncols` returns the number of columns
54
+ - `#nrows` returns the number of rows
55
+ - `#colnames` returns an array of the column names
56
+ - `#clone` make a copy of the table
57
+ - `#stack(table1, table2, ...)` appends then supplied tables
58
+ - `#apply(&block)` evaluates `block` in the context of the table
59
+ - `#where(&block)` filters the table
60
+ - `#group(&block)` splits the table into groups
61
+
62
+ ### Getting columns
63
+
64
+ You can get a column by using `#[]` or using the column name as a method.
65
+
66
+ ```ruby
67
+ >>> data = ObjectTable.new(a: [1, 2, 3], b: 100, c: ['a', 'b', 'c'])
68
+
69
+ # using a method
70
+ >>> data.a
71
+ => ObjectTable::Column.int(3):
72
+ [ 1, 2, 3 ]
73
+
74
+ # ... or using []
75
+ >>> data[:a]
76
+ => ObjectTable::Column.int(3):
77
+ [ 1, 2, 3 ]
78
+ ```
79
+
80
+ ### Setting columns
81
+
82
+ You can set/add columns by using `#[]=`.
83
+
84
+ ```ruby
85
+ >>> data = ObjectTable.new(a: [1, 2, 3], b: 100, c: ['a', 'b', 'c'])
86
+
87
+ # replace an old column with a vector
88
+ >>> data[:a] = [4, 5, 6]
89
+ >>> data
90
+ => ObjectTable(3, 3)
91
+ a b c
92
+ 0: 4 100 "a"
93
+ 1: 5 100 "b"
94
+ 2: 6 100 "c"
95
+ a b c
96
+
97
+ # ... or with a scalar
98
+ >>> data[:c] = "scalar string"
99
+ >>> data
100
+ => ObjectTable(3, 3)
101
+ a b c
102
+ 0: 4 100 "scalar string"
103
+ 1: 5 100 "scalar string"
104
+ 2: 6 100 "scalar string"
105
+ a b c
106
+
107
+ # ... and do the same for a new column
108
+ >>> data[:new_column] = 10...13
109
+ >>> data
110
+ => ObjectTable(3, 4)
111
+ a b c new_column
112
+ 0: 4 100 "scalar string" 10
113
+ 1: 5 100 "scalar string" 11
114
+ 2: 6 100 "scalar string" 12
115
+ a b c new_column
116
+
117
+ # ... but make sure they have the right length
118
+ >>> data[:a] = [1, 2, 3, 4]
119
+ IndexError: dst.shape[0]=3 != src.shape[0]=4
120
+ >>> data[:another_column] = [1, 2, 3, 4]
121
+ IndexError: dst.shape[0]=3 != src.shape[0]=4
122
+ ```
123
+
124
+ ### Operating on columns
125
+
126
+ All standard NArray operations apply (addition, subtraction etc.)
127
+ Missing methods are vectorised over the column
128
+
129
+ ```ruby
130
+ >>> data = ObjectTable.new(column: ['abc', 'bcd', 'cde'])
131
+ >>> data.column.match(/bc/)
132
+ => ObjectTable::Column.object(3):
133
+ [ #<MatchData "bc">, #<MatchData "bc">, nil ]
134
+ ```
135
+
136
+ ### `#apply`
137
+
138
+ This is just a convenience method.
139
+ It basically `#instance_eval`s the block passed to it.
140
+
141
+ ```ruby
142
+ >>> data = ObjectTable.new(a: [1, 2, 3], b: [4, 5, 6])
143
+
144
+ # this is exactly the same as (data.a + data.b)
145
+ >>> data.apply{ a + b }
146
+ => ObjectTable::Column.int(3):
147
+ [ 5, 7, 9 ]
148
+
149
+ # you can use self to set/add columns
150
+ >>> data.apply{ self[:c] = a * b }
151
+ >>> data
152
+ => ObjectTable(3, 3)
153
+ a b c
154
+ 0: 1 4 4
155
+ 1: 2 5 10
156
+ 2: 3 6 18
157
+ a b c
158
+ ```
159
+
160
+ ## Filtering
161
+
162
+ Use the `#where` method and pass a filtering block.
163
+ The block is evaluated in the context of the table (like for `#apply`).
164
+ This creates a `TempView` which syncs with the parent table.
165
+ This means any changes made to the parent also affect the view.
166
+
167
+ ```ruby
168
+ >>> data = ObjectTable.new(a: 0...5, b: 5...10)
169
+ >>> a_lt_3 = data.where{ a < 3 }
170
+ => ObjectTable::TempView(3, 2)
171
+ a b
172
+ 0: 0 5
173
+ 1: 1 6
174
+ 2: 2 7
175
+ a b
176
+
177
+ # update the parent table
178
+ >>> data[:b] = data.b.reverse
179
+ # and the view gets updated too
180
+ >>> a_lt_3
181
+ => ObjectTable::TempView(3, 2)
182
+ a b
183
+ 0: 0 9
184
+ 1: 1 8
185
+ 2: 2 7
186
+ a b
187
+
188
+ # you can also chain #where calls
189
+ >>> data.where{ a < 3 }.where{ b > 7 }
190
+ => ObjectTable::TempView(3, 2)
191
+ a b
192
+ 0: 0 9
193
+ 1: 1 8
194
+ a b
195
+ # which is the same as
196
+ >>> data.where{ a < 3 && b > 7 }
197
+ ```
198
+
199
+ Changes are propagated to the parent.
200
+ This means any changes made to the view also affect the parent.
201
+
202
+ ```ruby
203
+ >>> data.where{ a < 3 }[:b] = 100
204
+ >>> data
205
+ => ObjectTable(5, 2)
206
+ a b
207
+ 0: 4 5
208
+ 1: 3 6
209
+ 2: 2 100
210
+ 3: 1 100
211
+ 4: 0 100
212
+ a b
213
+
214
+ # changes made to chained filters are propagated too
215
+ >>> data.where{ a > 3 }.where{ b < 100 }[:b] = -100
216
+ >>> data
217
+ => ObjectTable(5, 2)
218
+ a b
219
+ 0: 4 -100
220
+ 1: 3 6
221
+ 2: 2 100
222
+ 3: 1 100
223
+ 4: 0 100
224
+ a b
225
+ ```
226
+
227
+ ### Adding new columns
228
+
229
+ Added columns have a default value of `nil` outside the view.
230
+
231
+ ```ruby
232
+ >>> data = ObjectTable.new(a: 0...5, b: 5...10)
233
+ # where a < 3, c will be 5, elsewhere it will be nil
234
+ >>> data.where{ a < 3 }[:c] = 5
235
+ >>> data
236
+ => ObjectTable(5, 3)
237
+ a b c
238
+ 0: 0 5 5
239
+ 1: 1 6 5
240
+ 2: 2 7 5
241
+ 3: 3 8 nil
242
+ 4: 4 9 nil
243
+ a b c
244
+ ```
245
+
246
+ ### Other notes
247
+
248
+ You can also use `#apply` on a view (as for a table).
249
+
250
+ If you want to filter a table and keep that data (i.e. without it syncing with the parent, propagating changes etc.) just `#clone` it.
251
+
252
+ ## Grouping (and aggregating)
253
+
254
+ Use the `#group` method and pass a block that returns grouping keys.
255
+ Then call `#each` to iterate through the groups or `#apply` to aggregate the results.
256
+ The blocks are evaluated in the context of the table (in the case of `#apply`, the context of the group).
257
+
258
+ The argument to `#group` should be a hash mapping key name => key. See the below example.
259
+
260
+ ```ruby
261
+ >>> data = ObjectTable.new(name: ['John', 'Tom', 'Jim', 'Tim', 'Jack'], value: 1..5)
262
+ name value
263
+ 0: "John" 1
264
+ 1: "Tom" 2
265
+ 2: "Jim" 3
266
+ 3: "Tim" 4
267
+ 4: "Jack" 5
268
+ name value
269
+
270
+ # group by the first letter of the name and print out each group
271
+ >>> data.group{ {initial: name.map{|n| n[0]}} }.each{ p self; puts }
272
+ ObjectTable::View(3, 2)
273
+ name value
274
+ 0: "John" 1
275
+ 1: "Jim" 3
276
+ 2: "John" 5
277
+ name value
278
+
279
+ ObjectTable::View(2, 2)
280
+ name value
281
+ 0: "Tom" 2
282
+ 1: "Tim" 4
283
+ name value
284
+
285
+ # calculate the average 'value' for each group and get the result in a table
286
+ >>> data.group{ {initial: name.map{|n| n[0]}} }.apply{ value.mean }
287
+ => ObjectTable(2, 2)
288
+ initial v_0
289
+ 0: "J" 3.0
290
+ 1: "T" 3.0
291
+ initial v_0
292
+ ```
293
+
294
+ ### Aggregation
295
+
296
+ Normally you can only have one aggregated column with a default name of v_0.
297
+ You can have more columns and set column names by making a `ObjectTable` or using the @R shortcut.
298
+
299
+ ```ruby
300
+ >>> data.group{ {initial: name.map{|n| n[0]}} }.apply{ @R[ mean: value.mean, sum: value.sum ] }
301
+ => ObjectTable(2, 4)
302
+ initial mean sum std
303
+ 0: "J" 3.0 9 2.0
304
+ 1: "T" 3.0 6 1.4142135623730951
305
+ initial mean sum std
306
+ ```
307
+
308
+ ### Assigning to columns
309
+
310
+ Assigning to columns will assign by group.
311
+
312
+ ```ruby
313
+ >>> data.group{ {initial: name.map{|n| n[0]}} }.each{ self[:num_same_initial] = nrows }
314
+ => ObjectTable(5, 3)
315
+ name value num_same_initial
316
+ 0: "John" 1 3
317
+ 1: "Tom" 2 2
318
+ 2: "Jim" 3 3
319
+ 3: "Tim" 4 2
320
+ 4: "John" 5 3
321
+ name value num_same_initial
322
+ ```
data/Rakefile ADDED
@@ -0,0 +1,5 @@
1
+ require "bundler/gem_tasks"
2
+ require 'rspec/core/rake_task'
3
+ RSpec::Core::RakeTask.new(:spec)
4
+ task :default => :spec
5
+
@@ -0,0 +1,33 @@
1
+ require 'narray'
2
+
3
+ class ObjectTable::BasicGrid < Hash
4
+ ARRAY_LIKE = [Array, Range]
5
+
6
+ # def self.[](*args)
7
+ # grid = super
8
+ # grid._ensure_uniform_columns!
9
+ # end
10
+
11
+ def _ensure_uniform_columns!(rows = nil)
12
+ arrays, scalars = partition{|k, v| ARRAY_LIKE.any?{|cls| v.is_a?(cls)} }
13
+ narrays, scalars = scalars.partition{|k, v| v.is_a?(NArray) }
14
+
15
+ unique_rows = arrays.map{|k, v| v.count}
16
+ unique_rows += narrays.map{|k, v| v.shape.last}
17
+ unique_rows = unique_rows.uniq
18
+
19
+ if rows
20
+ raise "Differing number of rows: #{unique_rows}" unless unique_rows.empty? or unique_rows == [rows]
21
+ else
22
+ raise "Differing number of rows: #{unique_rows}" if unique_rows.length > 1
23
+ rows = (unique_rows[0] or 1)
24
+ end
25
+
26
+ scalars.each do |k, v|
27
+ self[k] = [v] * rows
28
+ end
29
+
30
+ self
31
+ end
32
+
33
+ end
@@ -0,0 +1,80 @@
1
+ require 'narray'
2
+
3
+ class ObjectTable::Column < NArray
4
+ def self.make(value)
5
+ value = case value
6
+ when self
7
+ value
8
+ when NArray
9
+ if value.rank <= 0
10
+ self.new(value.typecode, 0)
11
+ else
12
+ cast(value)
13
+ end
14
+ when Range
15
+ to_na(value.to_a)
16
+ when Array
17
+ to_na(value)
18
+ else
19
+ raise ArgumentError.new("Expected NArray or Array, got #{value.class}")
20
+ end
21
+ value
22
+ end
23
+
24
+ def slice(*)
25
+ self.class.make super
26
+ end
27
+
28
+ def [](*)
29
+ result = super
30
+ result.is_a?(NArray) ? self.class.make(result) : result
31
+ end
32
+
33
+ def get_rows(rows, slice=false)
34
+ if slice
35
+ slice(*([nil] * (rank - 1)), rows)
36
+ else
37
+ self[*([nil] * (rank - 1)), rows]
38
+ end
39
+ end
40
+
41
+ def uniq
42
+ self.class.make to_a.uniq
43
+ end
44
+
45
+ def coerce_rev(other, operator)
46
+ other.send(operator, NArray.refer(self))
47
+ end
48
+
49
+ def method_missing(*args)
50
+ collect{|x| x.send(*args)}
51
+ end
52
+
53
+ # def collect(*)
54
+ # self.class.make super, name
55
+ # end
56
+
57
+ def _refer(value)
58
+ value.is_a?(NArray) ? NArray.refer(value) : value
59
+ end
60
+
61
+ %w{ + - * / }.each do |op|
62
+ define_method(op) do |other|
63
+ #self.class.make super(_refer(other)), name
64
+ super(_refer(other))
65
+ end
66
+ end
67
+
68
+ %w{ xor or and <= >= le ge < > gt lt % ** ne eq & | ^ to_type }.each do |op|
69
+ define_method(op) do |other|
70
+ self.class.make super(other)
71
+ end
72
+ end
73
+
74
+ # %w{ not abs -@ ~ }.each do |op|
75
+ # define_method(op) do
76
+ # self.class.make super()
77
+ # # end
78
+ # end
79
+
80
+ end
@@ -0,0 +1,58 @@
1
+ require_relative 'view'
2
+
3
+ class ObjectTable::Grouped
4
+ DEFAULT_VALUE_PREFIX = 'v_'
5
+
6
+ class Group < ObjectTable::View
7
+ attr_reader :K
8
+
9
+ def initialize(parent, keys, value)
10
+ super(parent, value)
11
+ @K = keys
12
+ end
13
+ end
14
+
15
+ def initialize(parent, names, groups)
16
+ @parent = parent
17
+ @names = names
18
+ @groups = groups
19
+ end
20
+
21
+ def each(&block)
22
+ @groups.each do |k, v|
23
+ names = @names.zip(k)
24
+ Group.new(@parent, Hash[names], v).apply &block
25
+ end
26
+ @parent
27
+ end
28
+
29
+ def apply(&block)
30
+ value_key = self.class._generate_name(DEFAULT_VALUE_PREFIX, @names).to_sym
31
+
32
+ data = @groups.map do |k, v|
33
+ names = @names.zip(k)
34
+ value = Group.new(@parent, Hash[names], v).apply &block
35
+
36
+ if value.is_a?(ObjectTable::TableMethods)
37
+ value = value.columns
38
+ end
39
+
40
+ grid = case value
41
+ when ObjectTable::BasicGrid
42
+ ObjectTable::BasicGrid[names].merge!(value)
43
+ else
44
+ ObjectTable::BasicGrid[names + [[value_key, value]]]
45
+ end
46
+ grid._ensure_uniform_columns!
47
+ end
48
+
49
+ ObjectTable.stack(*data)
50
+ end
51
+
52
+ def self._generate_name(prefix, existing_names)
53
+ regex = Regexp.new(Regexp.quote(prefix) + '(\d+)')
54
+ i = existing_names.map(&regex.method(:match)).compact.map{|match| match[-1].to_i}.max || -1
55
+ "#{prefix}#{i + 1}"
56
+ end
57
+
58
+ end
@@ -0,0 +1,54 @@
1
+ require_relative 'column'
2
+
3
+ class ObjectTable::MaskedColumn < ObjectTable::Column
4
+ attr_accessor :indices, :parent, :padded_dims
5
+
6
+ def self.mask(parent, indices)
7
+ padded_dims = [nil] * (parent.rank - 1)
8
+ masked = parent.slice(*padded_dims, indices)
9
+
10
+ if masked.rank <= 0
11
+ column = self.new(masked.typecode, 0)
12
+ else
13
+ column = self.new(masked.typecode, *masked.shape)
14
+ column.super_slice_assign(masked)
15
+ end
16
+
17
+ column.parent = parent
18
+ column.indices = indices
19
+ column.padded_dims = padded_dims
20
+ column
21
+ end
22
+
23
+ # let ObjectTable::Column do this, since we've overriden []=
24
+ def self.make(*args)
25
+ ObjectTable::Column.make(*args)
26
+ end
27
+
28
+ alias_method :super_slice_assign, :[]=
29
+
30
+ def []=(*keys, value)
31
+ parent[*padded_dims, indices[*keys]] = value
32
+ super
33
+ end
34
+
35
+ # make destructive methods affect parent
36
+ %w{ fill! indgen! indgen random! map! collect! conj! imag= mod! add! div! sbt! mul! }.each do |op|
37
+ define_method(op) do |*args, &block|
38
+ result = super(*args, &block)
39
+ parent[*padded_dims, indices] = result
40
+ result
41
+ end
42
+ end
43
+
44
+ %w{ + - / * % ** to_type not abs -@ ~ }.each do |op|
45
+ define_method(op) do |*args|
46
+ ObjectTable::Column.cast super(*args)
47
+ end
48
+ end
49
+
50
+ def clone
51
+ ObjectTable::Column.cast(self).clone
52
+ end
53
+
54
+ end