rover-df 0.2.7 → 0.2.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +15 -3
- data/lib/rover/data_frame.rb +39 -21
- data/lib/rover/group.rb +10 -0
- data/lib/rover/version.rb +1 -1
- data/lib/rover.rb +52 -26
- metadata +3 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 65d2fda186484e920421543e2f0203635054ccb8a23250bd3fc6a9d8c328725f
|
|
4
|
+
data.tar.gz: e4cd1e6d69e1e4f340f6692111476a5be9405f348841cfba6f6c431f04d85347
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: c720f3bc45178f938c20546ac1b7279ae047affafce5e06cff4f703e1d8ff7a99c1bca94a3f40cb7d26945d770bf136a2adc3477cf6ffc3cdaad9a15aa6090a1
|
|
7
|
+
data.tar.gz: c44135cc0e70b08b72e1084565ef3479bcb92000bf34662b76a25933e68ad33a584afae071ddebfd5724ad61fe7e7dbc283241d7194c532dd70f36b1358b266d
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
|
@@ -13,7 +13,7 @@ Simple, powerful data frames for Ruby
|
|
|
13
13
|
Add this line to your application’s Gemfile:
|
|
14
14
|
|
|
15
15
|
```ruby
|
|
16
|
-
gem
|
|
16
|
+
gem "rover-df"
|
|
17
17
|
```
|
|
18
18
|
|
|
19
19
|
## Intro
|
|
@@ -236,7 +236,7 @@ df.group(:a).max(:b)
|
|
|
236
236
|
Multiple groups
|
|
237
237
|
|
|
238
238
|
```ruby
|
|
239
|
-
df.group(
|
|
239
|
+
df.group(:a, :b).count
|
|
240
240
|
```
|
|
241
241
|
|
|
242
242
|
## Visualization
|
|
@@ -244,7 +244,7 @@ df.group([:a, :b]).count
|
|
|
244
244
|
Add [Vega](https://github.com/ankane/vega) to your application’s Gemfile:
|
|
245
245
|
|
|
246
246
|
```ruby
|
|
247
|
-
gem
|
|
247
|
+
gem "vega"
|
|
248
248
|
```
|
|
249
249
|
|
|
250
250
|
And use:
|
|
@@ -259,6 +259,18 @@ Specify the chart type (`line`, `pie`, `column`, `bar`, `area`, or `scatter`)
|
|
|
259
259
|
df.plot(:a, :b, type: "pie")
|
|
260
260
|
```
|
|
261
261
|
|
|
262
|
+
Group data
|
|
263
|
+
|
|
264
|
+
```ruby
|
|
265
|
+
df.plot(:a, :b, group: :c)
|
|
266
|
+
```
|
|
267
|
+
|
|
268
|
+
Stacked columns or bars
|
|
269
|
+
|
|
270
|
+
```ruby
|
|
271
|
+
df.plot(:a, :b, group: :c, stacked: true)
|
|
272
|
+
```
|
|
273
|
+
|
|
262
274
|
## Updating Data
|
|
263
275
|
|
|
264
276
|
Add a new column
|
data/lib/rover/data_frame.rb
CHANGED
|
@@ -401,7 +401,7 @@ module Rover
|
|
|
401
401
|
keys.all? { |k| self[k].to_numo == other[k].to_numo }
|
|
402
402
|
end
|
|
403
403
|
|
|
404
|
-
def plot(x = nil, y = nil, type: nil)
|
|
404
|
+
def plot(x = nil, y = nil, type: nil, group: nil, stacked: nil)
|
|
405
405
|
require "vega"
|
|
406
406
|
|
|
407
407
|
raise ArgumentError, "Must specify columns" if keys.size != 2 && (!x || !y)
|
|
@@ -416,7 +416,7 @@ module Rover
|
|
|
416
416
|
raise "Cannot determine type. Use the type option."
|
|
417
417
|
end
|
|
418
418
|
end
|
|
419
|
-
data = self[[x, y]]
|
|
419
|
+
data = self[group.nil? ? [x, y] : [x, y, group]]
|
|
420
420
|
|
|
421
421
|
case type
|
|
422
422
|
when "line", "area"
|
|
@@ -430,16 +430,20 @@ module Rover
|
|
|
430
430
|
end
|
|
431
431
|
|
|
432
432
|
scale = x_type == "temporal" ? {type: "utc"} : {}
|
|
433
|
+
encoding = {
|
|
434
|
+
x: {field: x, type: x_type, scale: scale},
|
|
435
|
+
y: {field: y, type: "quantitative"}
|
|
436
|
+
}
|
|
437
|
+
encoding[:color] = {field: group} if group
|
|
433
438
|
|
|
434
439
|
Vega.lite
|
|
435
440
|
.data(data)
|
|
436
441
|
.mark(type: type, tooltip: true, interpolate: "cardinal", point: {size: 60})
|
|
437
|
-
.encoding(
|
|
438
|
-
x: {field: x, type: x_type, scale: scale},
|
|
439
|
-
y: {field: y, type: "quantitative"}
|
|
440
|
-
)
|
|
442
|
+
.encoding(encoding)
|
|
441
443
|
.config(axis: {labelFontSize: 12})
|
|
442
444
|
when "pie"
|
|
445
|
+
raise ArgumentError, "Cannot use group option with pie chart" unless group.nil?
|
|
446
|
+
|
|
443
447
|
Vega.lite
|
|
444
448
|
.data(data)
|
|
445
449
|
.mark(type: "arc", tooltip: true)
|
|
@@ -449,34 +453,48 @@ module Rover
|
|
|
449
453
|
)
|
|
450
454
|
.view(stroke: nil)
|
|
451
455
|
when "column"
|
|
456
|
+
encoding = {
|
|
457
|
+
x: {field: x, type: "nominal", sort: "none", axis: {labelAngle: 0}},
|
|
458
|
+
y: {field: y, type: "quantitative"}
|
|
459
|
+
}
|
|
460
|
+
if group
|
|
461
|
+
encoding[:color] = {field: group}
|
|
462
|
+
encoding[:xOffset] = {field: group} unless stacked
|
|
463
|
+
end
|
|
464
|
+
|
|
452
465
|
Vega.lite
|
|
453
466
|
.data(data)
|
|
454
467
|
.mark(type: "bar", tooltip: true)
|
|
455
|
-
.encoding(
|
|
456
|
-
# TODO determine label angle
|
|
457
|
-
x: {field: x, type: "nominal", sort: "none", axis: {labelAngle: 0}},
|
|
458
|
-
y: {field: y, type: "quantitative"}
|
|
459
|
-
)
|
|
468
|
+
.encoding(encoding)
|
|
460
469
|
.config(axis: {labelFontSize: 12})
|
|
461
470
|
when "bar"
|
|
471
|
+
encoding = {
|
|
472
|
+
# TODO determine label angle
|
|
473
|
+
y: {field: x, type: "nominal", sort: "none", axis: {labelAngle: 0}},
|
|
474
|
+
x: {field: y, type: "quantitative"}
|
|
475
|
+
}
|
|
476
|
+
if group
|
|
477
|
+
encoding[:color] = {field: group}
|
|
478
|
+
encoding[:yOffset] = {field: group} unless stacked
|
|
479
|
+
end
|
|
480
|
+
|
|
462
481
|
Vega.lite
|
|
463
482
|
.data(data)
|
|
464
483
|
.mark(type: "bar", tooltip: true)
|
|
465
|
-
.encoding(
|
|
466
|
-
# TODO determine label angle
|
|
467
|
-
y: {field: x, type: "nominal", sort: "none", axis: {labelAngle: 0}},
|
|
468
|
-
x: {field: y, type: "quantitative"}
|
|
469
|
-
)
|
|
484
|
+
.encoding(encoding)
|
|
470
485
|
.config(axis: {labelFontSize: 12})
|
|
471
486
|
when "scatter"
|
|
487
|
+
encoding = {
|
|
488
|
+
x: {field: x, type: "quantitative", scale: {zero: false}},
|
|
489
|
+
y: {field: y, type: "quantitative", scale: {zero: false}},
|
|
490
|
+
size: {value: 60}
|
|
491
|
+
}
|
|
492
|
+
encoding[:color] = {field: group} if group
|
|
493
|
+
|
|
472
494
|
Vega.lite
|
|
473
495
|
.data(data)
|
|
474
496
|
.mark(type: "circle", tooltip: true)
|
|
475
|
-
.encoding(
|
|
476
|
-
x: {field: x, type: "quantitative", scale: {zero: false}},
|
|
477
|
-
y: {field: y, type: "quantitative", scale: {zero: false}},
|
|
478
|
-
size: {value: 60}
|
|
479
|
-
)
|
|
497
|
+
.encoding(encoding)
|
|
480
498
|
.config(axis: {labelFontSize: 12})
|
|
481
499
|
else
|
|
482
500
|
raise ArgumentError, "Invalid type: #{type}"
|
data/lib/rover/group.rb
CHANGED
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
module Rover
|
|
2
2
|
class Group
|
|
3
|
+
# TODO raise ArgumentError for empty columns in 0.3.0
|
|
3
4
|
def initialize(df, columns)
|
|
4
5
|
@df = df
|
|
5
6
|
@columns = columns
|
|
6
7
|
end
|
|
7
8
|
|
|
9
|
+
# TODO raise ArgumentError for empty columns in 0.3.0
|
|
8
10
|
def group(*columns)
|
|
9
11
|
Group.new(@df, @columns + columns.flatten)
|
|
10
12
|
end
|
|
@@ -22,6 +24,14 @@ module Rover
|
|
|
22
24
|
end
|
|
23
25
|
end
|
|
24
26
|
|
|
27
|
+
def plot(*args, **options)
|
|
28
|
+
raise ArgumentError, "Multiple groups not supported" if @columns.size > 1
|
|
29
|
+
# same message as Ruby
|
|
30
|
+
raise ArgumentError, "unknown keyword: :group" if options.key?(:group)
|
|
31
|
+
|
|
32
|
+
@df.plot(*args, **options, group: @columns.first)
|
|
33
|
+
end
|
|
34
|
+
|
|
25
35
|
private
|
|
26
36
|
|
|
27
37
|
# TODO make more efficient
|
data/lib/rover/version.rb
CHANGED
data/lib/rover.rb
CHANGED
|
@@ -9,45 +9,57 @@ require "rover/version"
|
|
|
9
9
|
|
|
10
10
|
module Rover
|
|
11
11
|
class << self
|
|
12
|
-
def read_csv(path,
|
|
13
|
-
|
|
14
|
-
|
|
12
|
+
def read_csv(path, **options)
|
|
13
|
+
csv_to_df(**options) do |csv_options|
|
|
14
|
+
CSV.read(path, **csv_options)
|
|
15
|
+
end
|
|
15
16
|
end
|
|
16
17
|
|
|
17
|
-
def parse_csv(str,
|
|
18
|
-
|
|
19
|
-
|
|
18
|
+
def parse_csv(str, **options)
|
|
19
|
+
csv_to_df(**options) do |csv_options|
|
|
20
|
+
CSV.parse(str, **csv_options)
|
|
21
|
+
end
|
|
20
22
|
end
|
|
21
23
|
|
|
22
|
-
def read_parquet(path,
|
|
23
|
-
|
|
24
|
-
|
|
24
|
+
def read_parquet(path, **options)
|
|
25
|
+
parquet_to_df(**options) do
|
|
26
|
+
Arrow::Table.load(path)
|
|
27
|
+
end
|
|
25
28
|
end
|
|
26
29
|
|
|
27
|
-
def parse_parquet(str,
|
|
28
|
-
|
|
29
|
-
|
|
30
|
+
def parse_parquet(str, **options)
|
|
31
|
+
parquet_to_df(**options) do
|
|
32
|
+
Arrow::Table.load(Arrow::Buffer.new(str), format: :parquet)
|
|
33
|
+
end
|
|
30
34
|
end
|
|
31
35
|
|
|
32
36
|
private
|
|
33
37
|
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
raise ArgumentError, "Must specify headers"
|
|
38
|
-
options
|
|
39
|
-
end
|
|
38
|
+
def csv_to_df(types: nil, headers: nil, **csv_options)
|
|
39
|
+
require "csv"
|
|
40
|
+
|
|
41
|
+
raise ArgumentError, "Must specify headers" if headers == false
|
|
40
42
|
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
43
|
+
# TODO use date converter
|
|
44
|
+
table = yield({converters: :numeric}.merge(csv_options))
|
|
45
|
+
|
|
46
|
+
headers = nil if headers == true
|
|
47
|
+
if headers && table.first && headers.size < table.first.size
|
|
48
|
+
raise ArgumentError, "Expected #{table.first.size} headers, got #{headers.size}"
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
table_headers = (headers || table.shift || []).dup
|
|
52
|
+
# keep same behavior as headers: true
|
|
53
|
+
if table.first
|
|
54
|
+
while table_headers.size < table.first.size
|
|
55
|
+
table_headers << nil
|
|
56
|
+
end
|
|
44
57
|
end
|
|
45
58
|
|
|
46
|
-
table.by_col!
|
|
47
59
|
data = {}
|
|
48
|
-
keys =
|
|
60
|
+
keys = table_headers.map { |k| [k, true] }.to_h
|
|
49
61
|
unnamed_suffix = 1
|
|
50
|
-
|
|
62
|
+
table_headers.each_with_index do |k, i|
|
|
51
63
|
# TODO do same for empty string in 0.3.0
|
|
52
64
|
if k.nil?
|
|
53
65
|
k = "unnamed"
|
|
@@ -57,7 +69,18 @@ module Rover
|
|
|
57
69
|
end
|
|
58
70
|
keys[k] = true
|
|
59
71
|
end
|
|
60
|
-
|
|
72
|
+
table_headers[i] = k
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
table_headers.each_with_index do |k, i|
|
|
76
|
+
# use first value for duplicate headers like headers: true
|
|
77
|
+
next if data[k]
|
|
78
|
+
|
|
79
|
+
values = []
|
|
80
|
+
table.each do |row|
|
|
81
|
+
values << row[i]
|
|
82
|
+
end
|
|
83
|
+
data[k] = values
|
|
61
84
|
end
|
|
62
85
|
|
|
63
86
|
DataFrame.new(data, types: types)
|
|
@@ -78,7 +101,10 @@ module Rover
|
|
|
78
101
|
"uint64" => Numo::UInt64
|
|
79
102
|
}
|
|
80
103
|
|
|
81
|
-
def parquet_to_df(
|
|
104
|
+
def parquet_to_df(types: nil)
|
|
105
|
+
require "parquet"
|
|
106
|
+
|
|
107
|
+
table = yield
|
|
82
108
|
data = {}
|
|
83
109
|
types ||= {}
|
|
84
110
|
table.each_column do |column|
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: rover-df
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.2.
|
|
4
|
+
version: 0.2.8
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Andrew Kane
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2022-
|
|
11
|
+
date: 2022-03-15 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: numo-narray
|
|
@@ -58,7 +58,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
58
58
|
- !ruby/object:Gem::Version
|
|
59
59
|
version: '0'
|
|
60
60
|
requirements: []
|
|
61
|
-
rubygems_version: 3.3.
|
|
61
|
+
rubygems_version: 3.3.7
|
|
62
62
|
signing_key:
|
|
63
63
|
specification_version: 4
|
|
64
64
|
summary: Simple, powerful data frames for Ruby
|