rover-df 0.2.1 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +21 -0
- data/LICENSE.txt +1 -1
- data/README.md +51 -6
- data/lib/rover/data_frame.rb +82 -15
- data/lib/rover/group.rb +1 -1
- data/lib/rover/vector.rb +28 -6
- data/lib/rover/version.rb +1 -1
- metadata +6 -90
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ca39a558c3c12103f03fed4cb8f007fbd00a1f8e84b839916fd0010aae4613ba
|
4
|
+
data.tar.gz: 43df8cdc415cc036ac383f30b7c91a35b644067a3cb8ea199abd7452b98298d5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2724c7e85ee7921f277be833cf89be638c14cbb37a44411bba86c42cacffe7c0e4b82ea04d4dfb3d694c6429ba41bc8e8c10f7cb40e5d34bf59d14755858735f
|
7
|
+
data.tar.gz: fa860158decbca0a0b35ccb82e6f73d9a513c37b483eca52d140842d5dd255899a2e1ded3ec4375a492b86d3ec09ffa53d4871e05f1fdad39f3d2630215417dc
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,24 @@
|
|
1
|
+
## 0.2.5 (2021-09-25)
|
2
|
+
|
3
|
+
- Fixed column types with joins
|
4
|
+
|
5
|
+
## 0.2.4 (2021-06-03)
|
6
|
+
|
7
|
+
- Added grouping for `std` and `var`
|
8
|
+
- Fixed `==` for data frames
|
9
|
+
- Fixed error with `first` and `last` for data frames
|
10
|
+
- Fixed error with `last` when vector size is smaller than `n`
|
11
|
+
|
12
|
+
## 0.2.3 (2021-02-08)
|
13
|
+
|
14
|
+
- Added `select`, `reject`, and `map!` methods to vectors
|
15
|
+
|
16
|
+
## 0.2.2 (2021-01-01)
|
17
|
+
|
18
|
+
- Added line, pie, area, and bar charts
|
19
|
+
- Added `|` and `^` for vectors
|
20
|
+
- Fixed typecasting with `map`
|
21
|
+
|
1
22
|
## 0.2.1 (2020-11-23)
|
2
23
|
|
3
24
|
- Added `plot` method to data frames
|
data/LICENSE.txt
CHANGED
data/README.md
CHANGED
@@ -20,7 +20,7 @@ gem 'rover-df'
|
|
20
20
|
|
21
21
|
A data frame is an in-memory table. It’s a useful data structure for data analysis and machine learning. It uses columnar storage for fast operations on columns.
|
22
22
|
|
23
|
-
Try it out for forecasting by clicking the button below:
|
23
|
+
Try it out for forecasting by clicking the button below (it can take a few minutes to start):
|
24
24
|
|
25
25
|
[](https://mybinder.org/v2/gh/ankane/ml-stack/master?filepath=Forecasting.ipynb)
|
26
26
|
|
@@ -123,24 +123,53 @@ df[1..3]
|
|
123
123
|
df[[1, 4, 5]]
|
124
124
|
```
|
125
125
|
|
126
|
+
Iterate over rows
|
127
|
+
|
128
|
+
```ruby
|
129
|
+
df.each_row { |row| ... }
|
130
|
+
```
|
131
|
+
|
132
|
+
Iterate over a column
|
133
|
+
|
134
|
+
```ruby
|
135
|
+
df[:a].each { |item| ... }
|
136
|
+
# or
|
137
|
+
df[:a].each_with_index { |item, index| ... }
|
138
|
+
```
|
139
|
+
|
126
140
|
## Filtering
|
127
141
|
|
128
142
|
Filter on a condition
|
129
143
|
|
130
144
|
```ruby
|
145
|
+
df[df[:a] == 100]
|
146
|
+
df[df[:a] != 100]
|
131
147
|
df[df[:a] > 100]
|
148
|
+
df[df[:a] >= 100]
|
149
|
+
df[df[:a] < 100]
|
150
|
+
df[df[:a] <= 100]
|
132
151
|
```
|
133
152
|
|
134
|
-
|
153
|
+
In
|
135
154
|
|
136
155
|
```ruby
|
137
|
-
df[df[:a]
|
156
|
+
df[df[:a].in?([1, 2, 3])]
|
157
|
+
df[df[:a].in?(1..3)]
|
158
|
+
df[df[:a].in?(["a", "b", "c"])]
|
138
159
|
```
|
139
160
|
|
140
|
-
|
161
|
+
Not in
|
141
162
|
|
142
163
|
```ruby
|
143
|
-
df[df[:a]
|
164
|
+
df[!df[:a].in?([1, 2, 3])]
|
165
|
+
```
|
166
|
+
|
167
|
+
And, or, and exclusive or
|
168
|
+
|
169
|
+
```ruby
|
170
|
+
df[(df[:a] > 100) & (df[:b] == "one")] # and
|
171
|
+
df[(df[:a] > 100) | (df[:b] == "one")] # or
|
172
|
+
df[(df[:a] > 100) ^ (df[:b] == "one")] # xor
|
144
173
|
```
|
145
174
|
|
146
175
|
## Operations
|
@@ -166,6 +195,8 @@ df[:a].median
|
|
166
195
|
df[:a].percentile(90)
|
167
196
|
df[:a].min
|
168
197
|
df[:a].max
|
198
|
+
df[:a].std
|
199
|
+
df[:a].var
|
169
200
|
```
|
170
201
|
|
171
202
|
Count occurrences
|
@@ -200,7 +231,7 @@ Multiple groups
|
|
200
231
|
df.group([:a, :b]).count
|
201
232
|
```
|
202
233
|
|
203
|
-
## Visualization
|
234
|
+
## Visualization
|
204
235
|
|
205
236
|
Add [Vega](https://github.com/ankane/vega) to your application’s Gemfile:
|
206
237
|
|
@@ -214,6 +245,12 @@ And use:
|
|
214
245
|
df.plot(:a, :b)
|
215
246
|
```
|
216
247
|
|
248
|
+
Specify the chart type (`line`, `pie`, `column`, `bar`, `area`, or `scatter`)
|
249
|
+
|
250
|
+
```ruby
|
251
|
+
df.plot(:a, :b, type: "pie")
|
252
|
+
```
|
253
|
+
|
217
254
|
## Updating Data
|
218
255
|
|
219
256
|
Add a new column
|
@@ -238,6 +275,14 @@ df[:a][0..2] = 1
|
|
238
275
|
df[:a][0..2] = [1, 2, 3]
|
239
276
|
```
|
240
277
|
|
278
|
+
Update all elements
|
279
|
+
|
280
|
+
```ruby
|
281
|
+
df[:a] = df[:a].map { |v| v.gsub("a", "b") }
|
282
|
+
# or
|
283
|
+
df[:a].map! { |v| v.gsub("a", "b") }
|
284
|
+
```
|
285
|
+
|
241
286
|
Update elements matching a condition
|
242
287
|
|
243
288
|
```ruby
|
data/lib/rover/data_frame.rb
CHANGED
@@ -72,6 +72,7 @@ module Rover
|
|
72
72
|
# multiple columns
|
73
73
|
df = DataFrame.new
|
74
74
|
where.each do |k|
|
75
|
+
check_column(k, true)
|
75
76
|
df[k] = @vectors[k]
|
76
77
|
end
|
77
78
|
df
|
@@ -162,7 +163,7 @@ module Rover
|
|
162
163
|
last(n)
|
163
164
|
end
|
164
165
|
|
165
|
-
def first(n =
|
166
|
+
def first(n = 1)
|
166
167
|
new_vectors = {}
|
167
168
|
@vectors.each do |k, v|
|
168
169
|
new_vectors[k] = v.first(n)
|
@@ -170,7 +171,7 @@ module Rover
|
|
170
171
|
DataFrame.new(new_vectors)
|
171
172
|
end
|
172
173
|
|
173
|
-
def last(n =
|
174
|
+
def last(n = 1)
|
174
175
|
new_vectors = {}
|
175
176
|
@vectors.each do |k, v|
|
176
177
|
new_vectors[k] = v.last(n)
|
@@ -300,7 +301,7 @@ module Rover
|
|
300
301
|
Group.new(self, columns.flatten)
|
301
302
|
end
|
302
303
|
|
303
|
-
[:max, :min, :median, :mean, :percentile, :sum].each do |name|
|
304
|
+
[:max, :min, :median, :mean, :percentile, :sum, :std, :var].each do |name|
|
304
305
|
define_method(name) do |column, *args|
|
305
306
|
check_column(column)
|
306
307
|
self[column].send(name, *args)
|
@@ -359,7 +360,7 @@ module Rover
|
|
359
360
|
def ==(other)
|
360
361
|
size == other.size &&
|
361
362
|
keys == other.keys &&
|
362
|
-
keys.all? { |k| self[k] == other[k] }
|
363
|
+
keys.all? { |k| self[k].to_numo == other[k].to_numo }
|
363
364
|
end
|
364
365
|
|
365
366
|
def plot(x = nil, y = nil, type: nil)
|
@@ -374,22 +375,41 @@ module Rover
|
|
374
375
|
elsif types[x] == :object && self[y].numeric?
|
375
376
|
"column"
|
376
377
|
else
|
377
|
-
raise "Cannot determine type"
|
378
|
+
raise "Cannot determine type. Use the type option."
|
378
379
|
end
|
379
380
|
end
|
380
381
|
data = self[[x, y]]
|
381
382
|
|
382
383
|
case type
|
383
|
-
when "
|
384
|
+
when "line", "area"
|
385
|
+
x_type =
|
386
|
+
if data[x].numeric?
|
387
|
+
"quantitative"
|
388
|
+
elsif data[x].all? { |v| v.is_a?(Date) || v.is_a?(Time) }
|
389
|
+
"temporal"
|
390
|
+
else
|
391
|
+
"nominal"
|
392
|
+
end
|
393
|
+
|
394
|
+
scale = x_type == "temporal" ? {type: "utc"} : {}
|
395
|
+
|
384
396
|
Vega.lite
|
385
397
|
.data(data)
|
386
|
-
.mark(type: "
|
398
|
+
.mark(type: type, tooltip: true, interpolate: "cardinal", point: {size: 60})
|
387
399
|
.encoding(
|
388
|
-
x: {field: x, type:
|
389
|
-
y: {field: y, type: "quantitative"
|
390
|
-
size: {value: 60}
|
400
|
+
x: {field: x, type: x_type, scale: scale},
|
401
|
+
y: {field: y, type: "quantitative"}
|
391
402
|
)
|
392
|
-
.config(axis: {
|
403
|
+
.config(axis: {labelFontSize: 12})
|
404
|
+
when "pie"
|
405
|
+
Vega.lite
|
406
|
+
.data(data)
|
407
|
+
.mark(type: "arc", tooltip: true)
|
408
|
+
.encoding(
|
409
|
+
color: {field: x, type: "nominal", sort: "none", axis: {title: nil}, legend: {labelFontSize: 12}},
|
410
|
+
theta: {field: y, type: "quantitative"}
|
411
|
+
)
|
412
|
+
.view(stroke: nil)
|
393
413
|
when "column"
|
394
414
|
Vega.lite
|
395
415
|
.data(data)
|
@@ -399,7 +419,27 @@ module Rover
|
|
399
419
|
x: {field: x, type: "nominal", sort: "none", axis: {labelAngle: 0}},
|
400
420
|
y: {field: y, type: "quantitative"}
|
401
421
|
)
|
402
|
-
.config(axis: {
|
422
|
+
.config(axis: {labelFontSize: 12})
|
423
|
+
when "bar"
|
424
|
+
Vega.lite
|
425
|
+
.data(data)
|
426
|
+
.mark(type: "bar", tooltip: true)
|
427
|
+
.encoding(
|
428
|
+
# TODO determine label angle
|
429
|
+
y: {field: x, type: "nominal", sort: "none", axis: {labelAngle: 0}},
|
430
|
+
x: {field: y, type: "quantitative"}
|
431
|
+
)
|
432
|
+
.config(axis: {labelFontSize: 12})
|
433
|
+
when "scatter"
|
434
|
+
Vega.lite
|
435
|
+
.data(data)
|
436
|
+
.mark(type: "circle", tooltip: true)
|
437
|
+
.encoding(
|
438
|
+
x: {field: x, type: "quantitative", scale: {zero: false}},
|
439
|
+
y: {field: y, type: "quantitative", scale: {zero: false}},
|
440
|
+
size: {value: 60}
|
441
|
+
)
|
442
|
+
.config(axis: {labelFontSize: 12})
|
403
443
|
else
|
404
444
|
raise ArgumentError, "Invalid type: #{type}"
|
405
445
|
end
|
@@ -435,10 +475,12 @@ module Rover
|
|
435
475
|
|
436
476
|
left = how == "left"
|
437
477
|
|
478
|
+
types = {}
|
438
479
|
vectors = {}
|
439
480
|
keys = (self.keys + other.keys).uniq
|
440
481
|
keys.each do |k|
|
441
482
|
vectors[k] = []
|
483
|
+
types[k] = join_type(self.types[k], other.types[k])
|
442
484
|
end
|
443
485
|
|
444
486
|
each_row do |r|
|
@@ -458,7 +500,7 @@ module Rover
|
|
458
500
|
end
|
459
501
|
end
|
460
502
|
|
461
|
-
DataFrame.new(vectors)
|
503
|
+
DataFrame.new(vectors, types: types)
|
462
504
|
end
|
463
505
|
|
464
506
|
def check_join_keys(df, keys)
|
@@ -467,8 +509,33 @@ module Rover
|
|
467
509
|
raise ArgumentError, "Missing keys: #{missing_keys.join(", ")}" if missing_keys.any?
|
468
510
|
end
|
469
511
|
|
470
|
-
|
471
|
-
|
512
|
+
# TODO in 0.3.0
|
513
|
+
# always use did_you_mean
|
514
|
+
def check_column(key, did_you_mean = false)
|
515
|
+
unless include?(key)
|
516
|
+
if did_you_mean
|
517
|
+
if RUBY_VERSION.to_f >= 2.6
|
518
|
+
raise KeyError.new("Missing column: #{key}", receiver: self, key: key)
|
519
|
+
else
|
520
|
+
raise KeyError.new("Missing column: #{key}")
|
521
|
+
end
|
522
|
+
else
|
523
|
+
raise ArgumentError, "Missing column: #{key}"
|
524
|
+
end
|
525
|
+
end
|
526
|
+
end
|
527
|
+
|
528
|
+
def join_type(a, b)
|
529
|
+
if a.nil?
|
530
|
+
b
|
531
|
+
elsif b.nil?
|
532
|
+
a
|
533
|
+
elsif a == b
|
534
|
+
a
|
535
|
+
else
|
536
|
+
# TODO specify
|
537
|
+
nil
|
538
|
+
end
|
472
539
|
end
|
473
540
|
|
474
541
|
def to_vector(v, size: nil, type: nil)
|
data/lib/rover/group.rb
CHANGED
@@ -9,7 +9,7 @@ module Rover
|
|
9
9
|
Group.new(@df, @columns + columns.flatten)
|
10
10
|
end
|
11
11
|
|
12
|
-
[:count, :max, :min, :mean, :median, :percentile, :sum].each do |name|
|
12
|
+
[:count, :max, :min, :mean, :median, :percentile, :sum, :std, :var].each do |name|
|
13
13
|
define_method(name) do |*args|
|
14
14
|
n = [name, args.first].compact.join("_")
|
15
15
|
|
data/lib/rover/vector.rb
CHANGED
@@ -91,7 +91,7 @@ module Rover
|
|
91
91
|
@data[k] = v
|
92
92
|
end
|
93
93
|
|
94
|
-
%w(+ - * / % ** &).each do |op|
|
94
|
+
%w(+ - * / % ** & | ^).each do |op|
|
95
95
|
define_method(op) do |other|
|
96
96
|
other = other.to_numo if other.is_a?(Vector)
|
97
97
|
# TODO better logic
|
@@ -161,9 +161,22 @@ module Rover
|
|
161
161
|
end
|
162
162
|
|
163
163
|
def map(&block)
|
164
|
-
|
165
|
-
|
166
|
-
Vector.new(
|
164
|
+
# convert to Ruby first to cast properly
|
165
|
+
# https://github.com/ruby-numo/numo-narray/issues/181
|
166
|
+
Vector.new(@data.to_a.map(&block))
|
167
|
+
end
|
168
|
+
|
169
|
+
def map!(&block)
|
170
|
+
@data = cast_data(@data.to_a.map(&block))
|
171
|
+
self
|
172
|
+
end
|
173
|
+
|
174
|
+
def select(&block)
|
175
|
+
Vector.new(@data.to_a.select(&block))
|
176
|
+
end
|
177
|
+
|
178
|
+
def reject(&block)
|
179
|
+
Vector.new(@data.to_a.reject(&block))
|
167
180
|
end
|
168
181
|
|
169
182
|
def tally
|
@@ -250,7 +263,11 @@ module Rover
|
|
250
263
|
end
|
251
264
|
|
252
265
|
def last(n = 1)
|
253
|
-
|
266
|
+
if n >= size
|
267
|
+
Vector.new(@data)
|
268
|
+
else
|
269
|
+
Vector.new(@data[-n..-1])
|
270
|
+
end
|
254
271
|
end
|
255
272
|
|
256
273
|
def take(n)
|
@@ -306,7 +323,12 @@ module Rover
|
|
306
323
|
# for IRuby
|
307
324
|
def to_html
|
308
325
|
require "iruby"
|
309
|
-
|
326
|
+
if size > 7
|
327
|
+
# pass 8 rows so maxrows is applied
|
328
|
+
IRuby::HTML.table(first(4).to_a + last(4).to_a, maxrows: 7)
|
329
|
+
else
|
330
|
+
IRuby::HTML.table(to_a)
|
331
|
+
end
|
310
332
|
end
|
311
333
|
|
312
334
|
private
|
data/lib/rover/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rover-df
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-09-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: numo-narray
|
@@ -16,100 +16,16 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: 0.9.1.
|
19
|
+
version: 0.9.1.9
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: 0.9.1.
|
27
|
-
- !ruby/object:Gem::Dependency
|
28
|
-
name: bundler
|
29
|
-
requirement: !ruby/object:Gem::Requirement
|
30
|
-
requirements:
|
31
|
-
- - ">="
|
32
|
-
- !ruby/object:Gem::Version
|
33
|
-
version: '0'
|
34
|
-
type: :development
|
35
|
-
prerelease: false
|
36
|
-
version_requirements: !ruby/object:Gem::Requirement
|
37
|
-
requirements:
|
38
|
-
- - ">="
|
39
|
-
- !ruby/object:Gem::Version
|
40
|
-
version: '0'
|
41
|
-
- !ruby/object:Gem::Dependency
|
42
|
-
name: rake
|
43
|
-
requirement: !ruby/object:Gem::Requirement
|
44
|
-
requirements:
|
45
|
-
- - ">="
|
46
|
-
- !ruby/object:Gem::Version
|
47
|
-
version: '0'
|
48
|
-
type: :development
|
49
|
-
prerelease: false
|
50
|
-
version_requirements: !ruby/object:Gem::Requirement
|
51
|
-
requirements:
|
52
|
-
- - ">="
|
53
|
-
- !ruby/object:Gem::Version
|
54
|
-
version: '0'
|
55
|
-
- !ruby/object:Gem::Dependency
|
56
|
-
name: minitest
|
57
|
-
requirement: !ruby/object:Gem::Requirement
|
58
|
-
requirements:
|
59
|
-
- - ">="
|
60
|
-
- !ruby/object:Gem::Version
|
61
|
-
version: '5'
|
62
|
-
type: :development
|
63
|
-
prerelease: false
|
64
|
-
version_requirements: !ruby/object:Gem::Requirement
|
65
|
-
requirements:
|
66
|
-
- - ">="
|
67
|
-
- !ruby/object:Gem::Version
|
68
|
-
version: '5'
|
69
|
-
- !ruby/object:Gem::Dependency
|
70
|
-
name: activerecord
|
71
|
-
requirement: !ruby/object:Gem::Requirement
|
72
|
-
requirements:
|
73
|
-
- - ">="
|
74
|
-
- !ruby/object:Gem::Version
|
75
|
-
version: '5'
|
76
|
-
type: :development
|
77
|
-
prerelease: false
|
78
|
-
version_requirements: !ruby/object:Gem::Requirement
|
79
|
-
requirements:
|
80
|
-
- - ">="
|
81
|
-
- !ruby/object:Gem::Version
|
82
|
-
version: '5'
|
83
|
-
- !ruby/object:Gem::Dependency
|
84
|
-
name: sqlite3
|
85
|
-
requirement: !ruby/object:Gem::Requirement
|
86
|
-
requirements:
|
87
|
-
- - ">="
|
88
|
-
- !ruby/object:Gem::Version
|
89
|
-
version: '0'
|
90
|
-
type: :development
|
91
|
-
prerelease: false
|
92
|
-
version_requirements: !ruby/object:Gem::Requirement
|
93
|
-
requirements:
|
94
|
-
- - ">="
|
95
|
-
- !ruby/object:Gem::Version
|
96
|
-
version: '0'
|
97
|
-
- !ruby/object:Gem::Dependency
|
98
|
-
name: iruby
|
99
|
-
requirement: !ruby/object:Gem::Requirement
|
100
|
-
requirements:
|
101
|
-
- - ">="
|
102
|
-
- !ruby/object:Gem::Version
|
103
|
-
version: '0'
|
104
|
-
type: :development
|
105
|
-
prerelease: false
|
106
|
-
version_requirements: !ruby/object:Gem::Requirement
|
107
|
-
requirements:
|
108
|
-
- - ">="
|
109
|
-
- !ruby/object:Gem::Version
|
110
|
-
version: '0'
|
26
|
+
version: 0.9.1.9
|
111
27
|
description:
|
112
|
-
email: andrew@
|
28
|
+
email: andrew@ankane.org
|
113
29
|
executables: []
|
114
30
|
extensions: []
|
115
31
|
extra_rdoc_files: []
|
@@ -142,7 +58,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
142
58
|
- !ruby/object:Gem::Version
|
143
59
|
version: '0'
|
144
60
|
requirements: []
|
145
|
-
rubygems_version: 3.
|
61
|
+
rubygems_version: 3.2.22
|
146
62
|
signing_key:
|
147
63
|
specification_version: 4
|
148
64
|
summary: Simple, powerful data frames for Ruby
|