rover-df 0.2.7 → 0.2.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +15 -3
- data/lib/rover/data_frame.rb +39 -21
- data/lib/rover/group.rb +10 -0
- data/lib/rover/version.rb +1 -1
- data/lib/rover.rb +52 -26
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 65d2fda186484e920421543e2f0203635054ccb8a23250bd3fc6a9d8c328725f
|
4
|
+
data.tar.gz: e4cd1e6d69e1e4f340f6692111476a5be9405f348841cfba6f6c431f04d85347
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c720f3bc45178f938c20546ac1b7279ae047affafce5e06cff4f703e1d8ff7a99c1bca94a3f40cb7d26945d770bf136a2adc3477cf6ffc3cdaad9a15aa6090a1
|
7
|
+
data.tar.gz: c44135cc0e70b08b72e1084565ef3479bcb92000bf34662b76a25933e68ad33a584afae071ddebfd5724ad61fe7e7dbc283241d7194c532dd70f36b1358b266d
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -13,7 +13,7 @@ Simple, powerful data frames for Ruby
|
|
13
13
|
Add this line to your application’s Gemfile:
|
14
14
|
|
15
15
|
```ruby
|
16
|
-
gem
|
16
|
+
gem "rover-df"
|
17
17
|
```
|
18
18
|
|
19
19
|
## Intro
|
@@ -236,7 +236,7 @@ df.group(:a).max(:b)
|
|
236
236
|
Multiple groups
|
237
237
|
|
238
238
|
```ruby
|
239
|
-
df.group(
|
239
|
+
df.group(:a, :b).count
|
240
240
|
```
|
241
241
|
|
242
242
|
## Visualization
|
@@ -244,7 +244,7 @@ df.group([:a, :b]).count
|
|
244
244
|
Add [Vega](https://github.com/ankane/vega) to your application’s Gemfile:
|
245
245
|
|
246
246
|
```ruby
|
247
|
-
gem
|
247
|
+
gem "vega"
|
248
248
|
```
|
249
249
|
|
250
250
|
And use:
|
@@ -259,6 +259,18 @@ Specify the chart type (`line`, `pie`, `column`, `bar`, `area`, or `scatter`)
|
|
259
259
|
df.plot(:a, :b, type: "pie")
|
260
260
|
```
|
261
261
|
|
262
|
+
Group data
|
263
|
+
|
264
|
+
```ruby
|
265
|
+
df.plot(:a, :b, group: :c)
|
266
|
+
```
|
267
|
+
|
268
|
+
Stacked columns or bars
|
269
|
+
|
270
|
+
```ruby
|
271
|
+
df.plot(:a, :b, group: :c, stacked: true)
|
272
|
+
```
|
273
|
+
|
262
274
|
## Updating Data
|
263
275
|
|
264
276
|
Add a new column
|
data/lib/rover/data_frame.rb
CHANGED
@@ -401,7 +401,7 @@ module Rover
|
|
401
401
|
keys.all? { |k| self[k].to_numo == other[k].to_numo }
|
402
402
|
end
|
403
403
|
|
404
|
-
def plot(x = nil, y = nil, type: nil)
|
404
|
+
def plot(x = nil, y = nil, type: nil, group: nil, stacked: nil)
|
405
405
|
require "vega"
|
406
406
|
|
407
407
|
raise ArgumentError, "Must specify columns" if keys.size != 2 && (!x || !y)
|
@@ -416,7 +416,7 @@ module Rover
|
|
416
416
|
raise "Cannot determine type. Use the type option."
|
417
417
|
end
|
418
418
|
end
|
419
|
-
data = self[[x, y]]
|
419
|
+
data = self[group.nil? ? [x, y] : [x, y, group]]
|
420
420
|
|
421
421
|
case type
|
422
422
|
when "line", "area"
|
@@ -430,16 +430,20 @@ module Rover
|
|
430
430
|
end
|
431
431
|
|
432
432
|
scale = x_type == "temporal" ? {type: "utc"} : {}
|
433
|
+
encoding = {
|
434
|
+
x: {field: x, type: x_type, scale: scale},
|
435
|
+
y: {field: y, type: "quantitative"}
|
436
|
+
}
|
437
|
+
encoding[:color] = {field: group} if group
|
433
438
|
|
434
439
|
Vega.lite
|
435
440
|
.data(data)
|
436
441
|
.mark(type: type, tooltip: true, interpolate: "cardinal", point: {size: 60})
|
437
|
-
.encoding(
|
438
|
-
x: {field: x, type: x_type, scale: scale},
|
439
|
-
y: {field: y, type: "quantitative"}
|
440
|
-
)
|
442
|
+
.encoding(encoding)
|
441
443
|
.config(axis: {labelFontSize: 12})
|
442
444
|
when "pie"
|
445
|
+
raise ArgumentError, "Cannot use group option with pie chart" unless group.nil?
|
446
|
+
|
443
447
|
Vega.lite
|
444
448
|
.data(data)
|
445
449
|
.mark(type: "arc", tooltip: true)
|
@@ -449,34 +453,48 @@ module Rover
|
|
449
453
|
)
|
450
454
|
.view(stroke: nil)
|
451
455
|
when "column"
|
456
|
+
encoding = {
|
457
|
+
x: {field: x, type: "nominal", sort: "none", axis: {labelAngle: 0}},
|
458
|
+
y: {field: y, type: "quantitative"}
|
459
|
+
}
|
460
|
+
if group
|
461
|
+
encoding[:color] = {field: group}
|
462
|
+
encoding[:xOffset] = {field: group} unless stacked
|
463
|
+
end
|
464
|
+
|
452
465
|
Vega.lite
|
453
466
|
.data(data)
|
454
467
|
.mark(type: "bar", tooltip: true)
|
455
|
-
.encoding(
|
456
|
-
# TODO determine label angle
|
457
|
-
x: {field: x, type: "nominal", sort: "none", axis: {labelAngle: 0}},
|
458
|
-
y: {field: y, type: "quantitative"}
|
459
|
-
)
|
468
|
+
.encoding(encoding)
|
460
469
|
.config(axis: {labelFontSize: 12})
|
461
470
|
when "bar"
|
471
|
+
encoding = {
|
472
|
+
# TODO determine label angle
|
473
|
+
y: {field: x, type: "nominal", sort: "none", axis: {labelAngle: 0}},
|
474
|
+
x: {field: y, type: "quantitative"}
|
475
|
+
}
|
476
|
+
if group
|
477
|
+
encoding[:color] = {field: group}
|
478
|
+
encoding[:yOffset] = {field: group} unless stacked
|
479
|
+
end
|
480
|
+
|
462
481
|
Vega.lite
|
463
482
|
.data(data)
|
464
483
|
.mark(type: "bar", tooltip: true)
|
465
|
-
.encoding(
|
466
|
-
# TODO determine label angle
|
467
|
-
y: {field: x, type: "nominal", sort: "none", axis: {labelAngle: 0}},
|
468
|
-
x: {field: y, type: "quantitative"}
|
469
|
-
)
|
484
|
+
.encoding(encoding)
|
470
485
|
.config(axis: {labelFontSize: 12})
|
471
486
|
when "scatter"
|
487
|
+
encoding = {
|
488
|
+
x: {field: x, type: "quantitative", scale: {zero: false}},
|
489
|
+
y: {field: y, type: "quantitative", scale: {zero: false}},
|
490
|
+
size: {value: 60}
|
491
|
+
}
|
492
|
+
encoding[:color] = {field: group} if group
|
493
|
+
|
472
494
|
Vega.lite
|
473
495
|
.data(data)
|
474
496
|
.mark(type: "circle", tooltip: true)
|
475
|
-
.encoding(
|
476
|
-
x: {field: x, type: "quantitative", scale: {zero: false}},
|
477
|
-
y: {field: y, type: "quantitative", scale: {zero: false}},
|
478
|
-
size: {value: 60}
|
479
|
-
)
|
497
|
+
.encoding(encoding)
|
480
498
|
.config(axis: {labelFontSize: 12})
|
481
499
|
else
|
482
500
|
raise ArgumentError, "Invalid type: #{type}"
|
data/lib/rover/group.rb
CHANGED
@@ -1,10 +1,12 @@
|
|
1
1
|
module Rover
|
2
2
|
class Group
|
3
|
+
# TODO raise ArgumentError for empty columns in 0.3.0
|
3
4
|
def initialize(df, columns)
|
4
5
|
@df = df
|
5
6
|
@columns = columns
|
6
7
|
end
|
7
8
|
|
9
|
+
# TODO raise ArgumentError for empty columns in 0.3.0
|
8
10
|
def group(*columns)
|
9
11
|
Group.new(@df, @columns + columns.flatten)
|
10
12
|
end
|
@@ -22,6 +24,14 @@ module Rover
|
|
22
24
|
end
|
23
25
|
end
|
24
26
|
|
27
|
+
def plot(*args, **options)
|
28
|
+
raise ArgumentError, "Multiple groups not supported" if @columns.size > 1
|
29
|
+
# same message as Ruby
|
30
|
+
raise ArgumentError, "unknown keyword: :group" if options.key?(:group)
|
31
|
+
|
32
|
+
@df.plot(*args, **options, group: @columns.first)
|
33
|
+
end
|
34
|
+
|
25
35
|
private
|
26
36
|
|
27
37
|
# TODO make more efficient
|
data/lib/rover/version.rb
CHANGED
data/lib/rover.rb
CHANGED
@@ -9,45 +9,57 @@ require "rover/version"
|
|
9
9
|
|
10
10
|
module Rover
|
11
11
|
class << self
|
12
|
-
def read_csv(path,
|
13
|
-
|
14
|
-
|
12
|
+
def read_csv(path, **options)
|
13
|
+
csv_to_df(**options) do |csv_options|
|
14
|
+
CSV.read(path, **csv_options)
|
15
|
+
end
|
15
16
|
end
|
16
17
|
|
17
|
-
def parse_csv(str,
|
18
|
-
|
19
|
-
|
18
|
+
def parse_csv(str, **options)
|
19
|
+
csv_to_df(**options) do |csv_options|
|
20
|
+
CSV.parse(str, **csv_options)
|
21
|
+
end
|
20
22
|
end
|
21
23
|
|
22
|
-
def read_parquet(path,
|
23
|
-
|
24
|
-
|
24
|
+
def read_parquet(path, **options)
|
25
|
+
parquet_to_df(**options) do
|
26
|
+
Arrow::Table.load(path)
|
27
|
+
end
|
25
28
|
end
|
26
29
|
|
27
|
-
def parse_parquet(str,
|
28
|
-
|
29
|
-
|
30
|
+
def parse_parquet(str, **options)
|
31
|
+
parquet_to_df(**options) do
|
32
|
+
Arrow::Table.load(Arrow::Buffer.new(str), format: :parquet)
|
33
|
+
end
|
30
34
|
end
|
31
35
|
|
32
36
|
private
|
33
37
|
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
raise ArgumentError, "Must specify headers"
|
38
|
-
options
|
39
|
-
end
|
38
|
+
def csv_to_df(types: nil, headers: nil, **csv_options)
|
39
|
+
require "csv"
|
40
|
+
|
41
|
+
raise ArgumentError, "Must specify headers" if headers == false
|
40
42
|
|
41
|
-
|
42
|
-
|
43
|
-
|
43
|
+
# TODO use date converter
|
44
|
+
table = yield({converters: :numeric}.merge(csv_options))
|
45
|
+
|
46
|
+
headers = nil if headers == true
|
47
|
+
if headers && table.first && headers.size < table.first.size
|
48
|
+
raise ArgumentError, "Expected #{table.first.size} headers, got #{headers.size}"
|
49
|
+
end
|
50
|
+
|
51
|
+
table_headers = (headers || table.shift || []).dup
|
52
|
+
# keep same behavior as headers: true
|
53
|
+
if table.first
|
54
|
+
while table_headers.size < table.first.size
|
55
|
+
table_headers << nil
|
56
|
+
end
|
44
57
|
end
|
45
58
|
|
46
|
-
table.by_col!
|
47
59
|
data = {}
|
48
|
-
keys =
|
60
|
+
keys = table_headers.map { |k| [k, true] }.to_h
|
49
61
|
unnamed_suffix = 1
|
50
|
-
|
62
|
+
table_headers.each_with_index do |k, i|
|
51
63
|
# TODO do same for empty string in 0.3.0
|
52
64
|
if k.nil?
|
53
65
|
k = "unnamed"
|
@@ -57,7 +69,18 @@ module Rover
|
|
57
69
|
end
|
58
70
|
keys[k] = true
|
59
71
|
end
|
60
|
-
|
72
|
+
table_headers[i] = k
|
73
|
+
end
|
74
|
+
|
75
|
+
table_headers.each_with_index do |k, i|
|
76
|
+
# use first value for duplicate headers like headers: true
|
77
|
+
next if data[k]
|
78
|
+
|
79
|
+
values = []
|
80
|
+
table.each do |row|
|
81
|
+
values << row[i]
|
82
|
+
end
|
83
|
+
data[k] = values
|
61
84
|
end
|
62
85
|
|
63
86
|
DataFrame.new(data, types: types)
|
@@ -78,7 +101,10 @@ module Rover
|
|
78
101
|
"uint64" => Numo::UInt64
|
79
102
|
}
|
80
103
|
|
81
|
-
def parquet_to_df(
|
104
|
+
def parquet_to_df(types: nil)
|
105
|
+
require "parquet"
|
106
|
+
|
107
|
+
table = yield
|
82
108
|
data = {}
|
83
109
|
types ||= {}
|
84
110
|
table.each_column do |column|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rover-df
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-03-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: numo-narray
|
@@ -58,7 +58,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
58
58
|
- !ruby/object:Gem::Version
|
59
59
|
version: '0'
|
60
60
|
requirements: []
|
61
|
-
rubygems_version: 3.3.
|
61
|
+
rubygems_version: 3.3.7
|
62
62
|
signing_key:
|
63
63
|
specification_version: 4
|
64
64
|
summary: Simple, powerful data frames for Ruby
|