rover-df 0.2.1 → 0.2.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +21 -0
- data/LICENSE.txt +1 -1
- data/README.md +51 -6
- data/lib/rover/data_frame.rb +82 -15
- data/lib/rover/group.rb +1 -1
- data/lib/rover/vector.rb +28 -6
- data/lib/rover/version.rb +1 -1
- metadata +6 -90
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ca39a558c3c12103f03fed4cb8f007fbd00a1f8e84b839916fd0010aae4613ba
|
4
|
+
data.tar.gz: 43df8cdc415cc036ac383f30b7c91a35b644067a3cb8ea199abd7452b98298d5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2724c7e85ee7921f277be833cf89be638c14cbb37a44411bba86c42cacffe7c0e4b82ea04d4dfb3d694c6429ba41bc8e8c10f7cb40e5d34bf59d14755858735f
|
7
|
+
data.tar.gz: fa860158decbca0a0b35ccb82e6f73d9a513c37b483eca52d140842d5dd255899a2e1ded3ec4375a492b86d3ec09ffa53d4871e05f1fdad39f3d2630215417dc
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,24 @@
|
|
1
|
+
## 0.2.5 (2021-09-25)
|
2
|
+
|
3
|
+
- Fixed column types with joins
|
4
|
+
|
5
|
+
## 0.2.4 (2021-06-03)
|
6
|
+
|
7
|
+
- Added grouping for `std` and `var`
|
8
|
+
- Fixed `==` for data frames
|
9
|
+
- Fixed error with `first` and `last` for data frames
|
10
|
+
- Fixed error with `last` when vector size is smaller than `n`
|
11
|
+
|
12
|
+
## 0.2.3 (2021-02-08)
|
13
|
+
|
14
|
+
- Added `select`, `reject`, and `map!` methods to vectors
|
15
|
+
|
16
|
+
## 0.2.2 (2021-01-01)
|
17
|
+
|
18
|
+
- Added line, pie, area, and bar charts
|
19
|
+
- Added `|` and `^` for vectors
|
20
|
+
- Fixed typecasting with `map`
|
21
|
+
|
1
22
|
## 0.2.1 (2020-11-23)
|
2
23
|
|
3
24
|
- Added `plot` method to data frames
|
data/LICENSE.txt
CHANGED
data/README.md
CHANGED
@@ -20,7 +20,7 @@ gem 'rover-df'
|
|
20
20
|
|
21
21
|
A data frame is an in-memory table. It’s a useful data structure for data analysis and machine learning. It uses columnar storage for fast operations on columns.
|
22
22
|
|
23
|
-
Try it out for forecasting by clicking the button below:
|
23
|
+
Try it out for forecasting by clicking the button below (it can take a few minutes to start):
|
24
24
|
|
25
25
|
[![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/ankane/ml-stack/master?filepath=Forecasting.ipynb)
|
26
26
|
|
@@ -123,24 +123,53 @@ df[1..3]
|
|
123
123
|
df[[1, 4, 5]]
|
124
124
|
```
|
125
125
|
|
126
|
+
Iterate over rows
|
127
|
+
|
128
|
+
```ruby
|
129
|
+
df.each_row { |row| ... }
|
130
|
+
```
|
131
|
+
|
132
|
+
Iterate over a column
|
133
|
+
|
134
|
+
```ruby
|
135
|
+
df[:a].each { |item| ... }
|
136
|
+
# or
|
137
|
+
df[:a].each_with_index { |item, index| ... }
|
138
|
+
```
|
139
|
+
|
126
140
|
## Filtering
|
127
141
|
|
128
142
|
Filter on a condition
|
129
143
|
|
130
144
|
```ruby
|
145
|
+
df[df[:a] == 100]
|
146
|
+
df[df[:a] != 100]
|
131
147
|
df[df[:a] > 100]
|
148
|
+
df[df[:a] >= 100]
|
149
|
+
df[df[:a] < 100]
|
150
|
+
df[df[:a] <= 100]
|
132
151
|
```
|
133
152
|
|
134
|
-
|
153
|
+
In
|
135
154
|
|
136
155
|
```ruby
|
137
|
-
df[df[:a]
|
156
|
+
df[df[:a].in?([1, 2, 3])]
|
157
|
+
df[df[:a].in?(1..3)]
|
158
|
+
df[df[:a].in?(["a", "b", "c"])]
|
138
159
|
```
|
139
160
|
|
140
|
-
|
161
|
+
Not in
|
141
162
|
|
142
163
|
```ruby
|
143
|
-
df[df[:a]
|
164
|
+
df[!df[:a].in?([1, 2, 3])]
|
165
|
+
```
|
166
|
+
|
167
|
+
And, or, and exclusive or
|
168
|
+
|
169
|
+
```ruby
|
170
|
+
df[(df[:a] > 100) & (df[:b] == "one")] # and
|
171
|
+
df[(df[:a] > 100) | (df[:b] == "one")] # or
|
172
|
+
df[(df[:a] > 100) ^ (df[:b] == "one")] # xor
|
144
173
|
```
|
145
174
|
|
146
175
|
## Operations
|
@@ -166,6 +195,8 @@ df[:a].median
|
|
166
195
|
df[:a].percentile(90)
|
167
196
|
df[:a].min
|
168
197
|
df[:a].max
|
198
|
+
df[:a].std
|
199
|
+
df[:a].var
|
169
200
|
```
|
170
201
|
|
171
202
|
Count occurrences
|
@@ -200,7 +231,7 @@ Multiple groups
|
|
200
231
|
df.group([:a, :b]).count
|
201
232
|
```
|
202
233
|
|
203
|
-
## Visualization
|
234
|
+
## Visualization
|
204
235
|
|
205
236
|
Add [Vega](https://github.com/ankane/vega) to your application’s Gemfile:
|
206
237
|
|
@@ -214,6 +245,12 @@ And use:
|
|
214
245
|
df.plot(:a, :b)
|
215
246
|
```
|
216
247
|
|
248
|
+
Specify the chart type (`line`, `pie`, `column`, `bar`, `area`, or `scatter`)
|
249
|
+
|
250
|
+
```ruby
|
251
|
+
df.plot(:a, :b, type: "pie")
|
252
|
+
```
|
253
|
+
|
217
254
|
## Updating Data
|
218
255
|
|
219
256
|
Add a new column
|
@@ -238,6 +275,14 @@ df[:a][0..2] = 1
|
|
238
275
|
df[:a][0..2] = [1, 2, 3]
|
239
276
|
```
|
240
277
|
|
278
|
+
Update all elements
|
279
|
+
|
280
|
+
```ruby
|
281
|
+
df[:a] = df[:a].map { |v| v.gsub("a", "b") }
|
282
|
+
# or
|
283
|
+
df[:a].map! { |v| v.gsub("a", "b") }
|
284
|
+
```
|
285
|
+
|
241
286
|
Update elements matching a condition
|
242
287
|
|
243
288
|
```ruby
|
data/lib/rover/data_frame.rb
CHANGED
@@ -72,6 +72,7 @@ module Rover
|
|
72
72
|
# multiple columns
|
73
73
|
df = DataFrame.new
|
74
74
|
where.each do |k|
|
75
|
+
check_column(k, true)
|
75
76
|
df[k] = @vectors[k]
|
76
77
|
end
|
77
78
|
df
|
@@ -162,7 +163,7 @@ module Rover
|
|
162
163
|
last(n)
|
163
164
|
end
|
164
165
|
|
165
|
-
def first(n =
|
166
|
+
def first(n = 1)
|
166
167
|
new_vectors = {}
|
167
168
|
@vectors.each do |k, v|
|
168
169
|
new_vectors[k] = v.first(n)
|
@@ -170,7 +171,7 @@ module Rover
|
|
170
171
|
DataFrame.new(new_vectors)
|
171
172
|
end
|
172
173
|
|
173
|
-
def last(n =
|
174
|
+
def last(n = 1)
|
174
175
|
new_vectors = {}
|
175
176
|
@vectors.each do |k, v|
|
176
177
|
new_vectors[k] = v.last(n)
|
@@ -300,7 +301,7 @@ module Rover
|
|
300
301
|
Group.new(self, columns.flatten)
|
301
302
|
end
|
302
303
|
|
303
|
-
[:max, :min, :median, :mean, :percentile, :sum].each do |name|
|
304
|
+
[:max, :min, :median, :mean, :percentile, :sum, :std, :var].each do |name|
|
304
305
|
define_method(name) do |column, *args|
|
305
306
|
check_column(column)
|
306
307
|
self[column].send(name, *args)
|
@@ -359,7 +360,7 @@ module Rover
|
|
359
360
|
def ==(other)
|
360
361
|
size == other.size &&
|
361
362
|
keys == other.keys &&
|
362
|
-
keys.all? { |k| self[k] == other[k] }
|
363
|
+
keys.all? { |k| self[k].to_numo == other[k].to_numo }
|
363
364
|
end
|
364
365
|
|
365
366
|
def plot(x = nil, y = nil, type: nil)
|
@@ -374,22 +375,41 @@ module Rover
|
|
374
375
|
elsif types[x] == :object && self[y].numeric?
|
375
376
|
"column"
|
376
377
|
else
|
377
|
-
raise "Cannot determine type"
|
378
|
+
raise "Cannot determine type. Use the type option."
|
378
379
|
end
|
379
380
|
end
|
380
381
|
data = self[[x, y]]
|
381
382
|
|
382
383
|
case type
|
383
|
-
when "
|
384
|
+
when "line", "area"
|
385
|
+
x_type =
|
386
|
+
if data[x].numeric?
|
387
|
+
"quantitative"
|
388
|
+
elsif data[x].all? { |v| v.is_a?(Date) || v.is_a?(Time) }
|
389
|
+
"temporal"
|
390
|
+
else
|
391
|
+
"nominal"
|
392
|
+
end
|
393
|
+
|
394
|
+
scale = x_type == "temporal" ? {type: "utc"} : {}
|
395
|
+
|
384
396
|
Vega.lite
|
385
397
|
.data(data)
|
386
|
-
.mark(type: "
|
398
|
+
.mark(type: type, tooltip: true, interpolate: "cardinal", point: {size: 60})
|
387
399
|
.encoding(
|
388
|
-
x: {field: x, type:
|
389
|
-
y: {field: y, type: "quantitative"
|
390
|
-
size: {value: 60}
|
400
|
+
x: {field: x, type: x_type, scale: scale},
|
401
|
+
y: {field: y, type: "quantitative"}
|
391
402
|
)
|
392
|
-
.config(axis: {
|
403
|
+
.config(axis: {labelFontSize: 12})
|
404
|
+
when "pie"
|
405
|
+
Vega.lite
|
406
|
+
.data(data)
|
407
|
+
.mark(type: "arc", tooltip: true)
|
408
|
+
.encoding(
|
409
|
+
color: {field: x, type: "nominal", sort: "none", axis: {title: nil}, legend: {labelFontSize: 12}},
|
410
|
+
theta: {field: y, type: "quantitative"}
|
411
|
+
)
|
412
|
+
.view(stroke: nil)
|
393
413
|
when "column"
|
394
414
|
Vega.lite
|
395
415
|
.data(data)
|
@@ -399,7 +419,27 @@ module Rover
|
|
399
419
|
x: {field: x, type: "nominal", sort: "none", axis: {labelAngle: 0}},
|
400
420
|
y: {field: y, type: "quantitative"}
|
401
421
|
)
|
402
|
-
.config(axis: {
|
422
|
+
.config(axis: {labelFontSize: 12})
|
423
|
+
when "bar"
|
424
|
+
Vega.lite
|
425
|
+
.data(data)
|
426
|
+
.mark(type: "bar", tooltip: true)
|
427
|
+
.encoding(
|
428
|
+
# TODO determine label angle
|
429
|
+
y: {field: x, type: "nominal", sort: "none", axis: {labelAngle: 0}},
|
430
|
+
x: {field: y, type: "quantitative"}
|
431
|
+
)
|
432
|
+
.config(axis: {labelFontSize: 12})
|
433
|
+
when "scatter"
|
434
|
+
Vega.lite
|
435
|
+
.data(data)
|
436
|
+
.mark(type: "circle", tooltip: true)
|
437
|
+
.encoding(
|
438
|
+
x: {field: x, type: "quantitative", scale: {zero: false}},
|
439
|
+
y: {field: y, type: "quantitative", scale: {zero: false}},
|
440
|
+
size: {value: 60}
|
441
|
+
)
|
442
|
+
.config(axis: {labelFontSize: 12})
|
403
443
|
else
|
404
444
|
raise ArgumentError, "Invalid type: #{type}"
|
405
445
|
end
|
@@ -435,10 +475,12 @@ module Rover
|
|
435
475
|
|
436
476
|
left = how == "left"
|
437
477
|
|
478
|
+
types = {}
|
438
479
|
vectors = {}
|
439
480
|
keys = (self.keys + other.keys).uniq
|
440
481
|
keys.each do |k|
|
441
482
|
vectors[k] = []
|
483
|
+
types[k] = join_type(self.types[k], other.types[k])
|
442
484
|
end
|
443
485
|
|
444
486
|
each_row do |r|
|
@@ -458,7 +500,7 @@ module Rover
|
|
458
500
|
end
|
459
501
|
end
|
460
502
|
|
461
|
-
DataFrame.new(vectors)
|
503
|
+
DataFrame.new(vectors, types: types)
|
462
504
|
end
|
463
505
|
|
464
506
|
def check_join_keys(df, keys)
|
@@ -467,8 +509,33 @@ module Rover
|
|
467
509
|
raise ArgumentError, "Missing keys: #{missing_keys.join(", ")}" if missing_keys.any?
|
468
510
|
end
|
469
511
|
|
470
|
-
|
471
|
-
|
512
|
+
# TODO in 0.3.0
|
513
|
+
# always use did_you_mean
|
514
|
+
def check_column(key, did_you_mean = false)
|
515
|
+
unless include?(key)
|
516
|
+
if did_you_mean
|
517
|
+
if RUBY_VERSION.to_f >= 2.6
|
518
|
+
raise KeyError.new("Missing column: #{key}", receiver: self, key: key)
|
519
|
+
else
|
520
|
+
raise KeyError.new("Missing column: #{key}")
|
521
|
+
end
|
522
|
+
else
|
523
|
+
raise ArgumentError, "Missing column: #{key}"
|
524
|
+
end
|
525
|
+
end
|
526
|
+
end
|
527
|
+
|
528
|
+
def join_type(a, b)
|
529
|
+
if a.nil?
|
530
|
+
b
|
531
|
+
elsif b.nil?
|
532
|
+
a
|
533
|
+
elsif a == b
|
534
|
+
a
|
535
|
+
else
|
536
|
+
# TODO specify
|
537
|
+
nil
|
538
|
+
end
|
472
539
|
end
|
473
540
|
|
474
541
|
def to_vector(v, size: nil, type: nil)
|
data/lib/rover/group.rb
CHANGED
@@ -9,7 +9,7 @@ module Rover
|
|
9
9
|
Group.new(@df, @columns + columns.flatten)
|
10
10
|
end
|
11
11
|
|
12
|
-
[:count, :max, :min, :mean, :median, :percentile, :sum].each do |name|
|
12
|
+
[:count, :max, :min, :mean, :median, :percentile, :sum, :std, :var].each do |name|
|
13
13
|
define_method(name) do |*args|
|
14
14
|
n = [name, args.first].compact.join("_")
|
15
15
|
|
data/lib/rover/vector.rb
CHANGED
@@ -91,7 +91,7 @@ module Rover
|
|
91
91
|
@data[k] = v
|
92
92
|
end
|
93
93
|
|
94
|
-
%w(+ - * / % ** &).each do |op|
|
94
|
+
%w(+ - * / % ** & | ^).each do |op|
|
95
95
|
define_method(op) do |other|
|
96
96
|
other = other.to_numo if other.is_a?(Vector)
|
97
97
|
# TODO better logic
|
@@ -161,9 +161,22 @@ module Rover
|
|
161
161
|
end
|
162
162
|
|
163
163
|
def map(&block)
|
164
|
-
|
165
|
-
|
166
|
-
Vector.new(
|
164
|
+
# convert to Ruby first to cast properly
|
165
|
+
# https://github.com/ruby-numo/numo-narray/issues/181
|
166
|
+
Vector.new(@data.to_a.map(&block))
|
167
|
+
end
|
168
|
+
|
169
|
+
def map!(&block)
|
170
|
+
@data = cast_data(@data.to_a.map(&block))
|
171
|
+
self
|
172
|
+
end
|
173
|
+
|
174
|
+
def select(&block)
|
175
|
+
Vector.new(@data.to_a.select(&block))
|
176
|
+
end
|
177
|
+
|
178
|
+
def reject(&block)
|
179
|
+
Vector.new(@data.to_a.reject(&block))
|
167
180
|
end
|
168
181
|
|
169
182
|
def tally
|
@@ -250,7 +263,11 @@ module Rover
|
|
250
263
|
end
|
251
264
|
|
252
265
|
def last(n = 1)
|
253
|
-
|
266
|
+
if n >= size
|
267
|
+
Vector.new(@data)
|
268
|
+
else
|
269
|
+
Vector.new(@data[-n..-1])
|
270
|
+
end
|
254
271
|
end
|
255
272
|
|
256
273
|
def take(n)
|
@@ -306,7 +323,12 @@ module Rover
|
|
306
323
|
# for IRuby
|
307
324
|
def to_html
|
308
325
|
require "iruby"
|
309
|
-
|
326
|
+
if size > 7
|
327
|
+
# pass 8 rows so maxrows is applied
|
328
|
+
IRuby::HTML.table(first(4).to_a + last(4).to_a, maxrows: 7)
|
329
|
+
else
|
330
|
+
IRuby::HTML.table(to_a)
|
331
|
+
end
|
310
332
|
end
|
311
333
|
|
312
334
|
private
|
data/lib/rover/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rover-df
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-09-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: numo-narray
|
@@ -16,100 +16,16 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: 0.9.1.
|
19
|
+
version: 0.9.1.9
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: 0.9.1.
|
27
|
-
- !ruby/object:Gem::Dependency
|
28
|
-
name: bundler
|
29
|
-
requirement: !ruby/object:Gem::Requirement
|
30
|
-
requirements:
|
31
|
-
- - ">="
|
32
|
-
- !ruby/object:Gem::Version
|
33
|
-
version: '0'
|
34
|
-
type: :development
|
35
|
-
prerelease: false
|
36
|
-
version_requirements: !ruby/object:Gem::Requirement
|
37
|
-
requirements:
|
38
|
-
- - ">="
|
39
|
-
- !ruby/object:Gem::Version
|
40
|
-
version: '0'
|
41
|
-
- !ruby/object:Gem::Dependency
|
42
|
-
name: rake
|
43
|
-
requirement: !ruby/object:Gem::Requirement
|
44
|
-
requirements:
|
45
|
-
- - ">="
|
46
|
-
- !ruby/object:Gem::Version
|
47
|
-
version: '0'
|
48
|
-
type: :development
|
49
|
-
prerelease: false
|
50
|
-
version_requirements: !ruby/object:Gem::Requirement
|
51
|
-
requirements:
|
52
|
-
- - ">="
|
53
|
-
- !ruby/object:Gem::Version
|
54
|
-
version: '0'
|
55
|
-
- !ruby/object:Gem::Dependency
|
56
|
-
name: minitest
|
57
|
-
requirement: !ruby/object:Gem::Requirement
|
58
|
-
requirements:
|
59
|
-
- - ">="
|
60
|
-
- !ruby/object:Gem::Version
|
61
|
-
version: '5'
|
62
|
-
type: :development
|
63
|
-
prerelease: false
|
64
|
-
version_requirements: !ruby/object:Gem::Requirement
|
65
|
-
requirements:
|
66
|
-
- - ">="
|
67
|
-
- !ruby/object:Gem::Version
|
68
|
-
version: '5'
|
69
|
-
- !ruby/object:Gem::Dependency
|
70
|
-
name: activerecord
|
71
|
-
requirement: !ruby/object:Gem::Requirement
|
72
|
-
requirements:
|
73
|
-
- - ">="
|
74
|
-
- !ruby/object:Gem::Version
|
75
|
-
version: '5'
|
76
|
-
type: :development
|
77
|
-
prerelease: false
|
78
|
-
version_requirements: !ruby/object:Gem::Requirement
|
79
|
-
requirements:
|
80
|
-
- - ">="
|
81
|
-
- !ruby/object:Gem::Version
|
82
|
-
version: '5'
|
83
|
-
- !ruby/object:Gem::Dependency
|
84
|
-
name: sqlite3
|
85
|
-
requirement: !ruby/object:Gem::Requirement
|
86
|
-
requirements:
|
87
|
-
- - ">="
|
88
|
-
- !ruby/object:Gem::Version
|
89
|
-
version: '0'
|
90
|
-
type: :development
|
91
|
-
prerelease: false
|
92
|
-
version_requirements: !ruby/object:Gem::Requirement
|
93
|
-
requirements:
|
94
|
-
- - ">="
|
95
|
-
- !ruby/object:Gem::Version
|
96
|
-
version: '0'
|
97
|
-
- !ruby/object:Gem::Dependency
|
98
|
-
name: iruby
|
99
|
-
requirement: !ruby/object:Gem::Requirement
|
100
|
-
requirements:
|
101
|
-
- - ">="
|
102
|
-
- !ruby/object:Gem::Version
|
103
|
-
version: '0'
|
104
|
-
type: :development
|
105
|
-
prerelease: false
|
106
|
-
version_requirements: !ruby/object:Gem::Requirement
|
107
|
-
requirements:
|
108
|
-
- - ">="
|
109
|
-
- !ruby/object:Gem::Version
|
110
|
-
version: '0'
|
26
|
+
version: 0.9.1.9
|
111
27
|
description:
|
112
|
-
email: andrew@
|
28
|
+
email: andrew@ankane.org
|
113
29
|
executables: []
|
114
30
|
extensions: []
|
115
31
|
extra_rdoc_files: []
|
@@ -142,7 +58,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
142
58
|
- !ruby/object:Gem::Version
|
143
59
|
version: '0'
|
144
60
|
requirements: []
|
145
|
-
rubygems_version: 3.
|
61
|
+
rubygems_version: 3.2.22
|
146
62
|
signing_key:
|
147
63
|
specification_version: 4
|
148
64
|
summary: Simple, powerful data frames for Ruby
|