rover-df 0.2.7 → 0.2.8

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c75bed3989211e806e54c296290e5f7b3af236a15742daac876e211e3ca5a76f
4
- data.tar.gz: 5865ff8f1d0036423f18cfee867da63214ee50f79d373b0f0f244853d8efbefa
3
+ metadata.gz: 65d2fda186484e920421543e2f0203635054ccb8a23250bd3fc6a9d8c328725f
4
+ data.tar.gz: e4cd1e6d69e1e4f340f6692111476a5be9405f348841cfba6f6c431f04d85347
5
5
  SHA512:
6
- metadata.gz: 11718bc8ade75a605e92cabe05c29e55c6d4dfe427cd5ada0a8a216db678b32a88f4a43843d1e7dcda7b7a64adb63b76969f1d958e91ca57c4f71989632e14aa
7
- data.tar.gz: 16940236090625bef69cb14d6d9f9f50720314edea1b5892f60443799e5389700ddfb0d79a29ee1e193168097add9d7195799e7f049d85f9c9dc9c443843a678
6
+ metadata.gz: c720f3bc45178f938c20546ac1b7279ae047affafce5e06cff4f703e1d8ff7a99c1bca94a3f40cb7d26945d770bf136a2adc3477cf6ffc3cdaad9a15aa6090a1
7
+ data.tar.gz: c44135cc0e70b08b72e1084565ef3479bcb92000bf34662b76a25933e68ad33a584afae071ddebfd5724ad61fe7e7dbc283241d7194c532dd70f36b1358b266d
data/CHANGELOG.md CHANGED
@@ -1,3 +1,8 @@
1
+ ## 0.2.8 (2022-03-15)
2
+
3
+ - Added `group` and `stacked` options to `plot`
4
+ - Improved performance of `read_csv` and `parse_csv`
5
+
1
6
  ## 0.2.7 (2022-01-16)
2
7
 
3
8
  - Added support for booleans to Parquet methods
data/README.md CHANGED
@@ -13,7 +13,7 @@ Simple, powerful data frames for Ruby
13
13
  Add this line to your application’s Gemfile:
14
14
 
15
15
  ```ruby
16
- gem 'rover-df'
16
+ gem "rover-df"
17
17
  ```
18
18
 
19
19
  ## Intro
@@ -236,7 +236,7 @@ df.group(:a).max(:b)
236
236
  Multiple groups
237
237
 
238
238
  ```ruby
239
- df.group([:a, :b]).count
239
+ df.group(:a, :b).count
240
240
  ```
241
241
 
242
242
  ## Visualization
@@ -244,7 +244,7 @@ df.group([:a, :b]).count
244
244
  Add [Vega](https://github.com/ankane/vega) to your application’s Gemfile:
245
245
 
246
246
  ```ruby
247
- gem 'vega'
247
+ gem "vega"
248
248
  ```
249
249
 
250
250
  And use:
@@ -259,6 +259,18 @@ Specify the chart type (`line`, `pie`, `column`, `bar`, `area`, or `scatter`)
259
259
  df.plot(:a, :b, type: "pie")
260
260
  ```
261
261
 
262
+ Group data
263
+
264
+ ```ruby
265
+ df.plot(:a, :b, group: :c)
266
+ ```
267
+
268
+ Stacked columns or bars
269
+
270
+ ```ruby
271
+ df.plot(:a, :b, group: :c, stacked: true)
272
+ ```
273
+
262
274
  ## Updating Data
263
275
 
264
276
  Add a new column
@@ -401,7 +401,7 @@ module Rover
401
401
  keys.all? { |k| self[k].to_numo == other[k].to_numo }
402
402
  end
403
403
 
404
- def plot(x = nil, y = nil, type: nil)
404
+ def plot(x = nil, y = nil, type: nil, group: nil, stacked: nil)
405
405
  require "vega"
406
406
 
407
407
  raise ArgumentError, "Must specify columns" if keys.size != 2 && (!x || !y)
@@ -416,7 +416,7 @@ module Rover
416
416
  raise "Cannot determine type. Use the type option."
417
417
  end
418
418
  end
419
- data = self[[x, y]]
419
+ data = self[group.nil? ? [x, y] : [x, y, group]]
420
420
 
421
421
  case type
422
422
  when "line", "area"
@@ -430,16 +430,20 @@ module Rover
430
430
  end
431
431
 
432
432
  scale = x_type == "temporal" ? {type: "utc"} : {}
433
+ encoding = {
434
+ x: {field: x, type: x_type, scale: scale},
435
+ y: {field: y, type: "quantitative"}
436
+ }
437
+ encoding[:color] = {field: group} if group
433
438
 
434
439
  Vega.lite
435
440
  .data(data)
436
441
  .mark(type: type, tooltip: true, interpolate: "cardinal", point: {size: 60})
437
- .encoding(
438
- x: {field: x, type: x_type, scale: scale},
439
- y: {field: y, type: "quantitative"}
440
- )
442
+ .encoding(encoding)
441
443
  .config(axis: {labelFontSize: 12})
442
444
  when "pie"
445
+ raise ArgumentError, "Cannot use group option with pie chart" unless group.nil?
446
+
443
447
  Vega.lite
444
448
  .data(data)
445
449
  .mark(type: "arc", tooltip: true)
@@ -449,34 +453,48 @@ module Rover
449
453
  )
450
454
  .view(stroke: nil)
451
455
  when "column"
456
+ encoding = {
457
+ x: {field: x, type: "nominal", sort: "none", axis: {labelAngle: 0}},
458
+ y: {field: y, type: "quantitative"}
459
+ }
460
+ if group
461
+ encoding[:color] = {field: group}
462
+ encoding[:xOffset] = {field: group} unless stacked
463
+ end
464
+
452
465
  Vega.lite
453
466
  .data(data)
454
467
  .mark(type: "bar", tooltip: true)
455
- .encoding(
456
- # TODO determine label angle
457
- x: {field: x, type: "nominal", sort: "none", axis: {labelAngle: 0}},
458
- y: {field: y, type: "quantitative"}
459
- )
468
+ .encoding(encoding)
460
469
  .config(axis: {labelFontSize: 12})
461
470
  when "bar"
471
+ encoding = {
472
+ # TODO determine label angle
473
+ y: {field: x, type: "nominal", sort: "none", axis: {labelAngle: 0}},
474
+ x: {field: y, type: "quantitative"}
475
+ }
476
+ if group
477
+ encoding[:color] = {field: group}
478
+ encoding[:yOffset] = {field: group} unless stacked
479
+ end
480
+
462
481
  Vega.lite
463
482
  .data(data)
464
483
  .mark(type: "bar", tooltip: true)
465
- .encoding(
466
- # TODO determine label angle
467
- y: {field: x, type: "nominal", sort: "none", axis: {labelAngle: 0}},
468
- x: {field: y, type: "quantitative"}
469
- )
484
+ .encoding(encoding)
470
485
  .config(axis: {labelFontSize: 12})
471
486
  when "scatter"
487
+ encoding = {
488
+ x: {field: x, type: "quantitative", scale: {zero: false}},
489
+ y: {field: y, type: "quantitative", scale: {zero: false}},
490
+ size: {value: 60}
491
+ }
492
+ encoding[:color] = {field: group} if group
493
+
472
494
  Vega.lite
473
495
  .data(data)
474
496
  .mark(type: "circle", tooltip: true)
475
- .encoding(
476
- x: {field: x, type: "quantitative", scale: {zero: false}},
477
- y: {field: y, type: "quantitative", scale: {zero: false}},
478
- size: {value: 60}
479
- )
497
+ .encoding(encoding)
480
498
  .config(axis: {labelFontSize: 12})
481
499
  else
482
500
  raise ArgumentError, "Invalid type: #{type}"
data/lib/rover/group.rb CHANGED
@@ -1,10 +1,12 @@
1
1
  module Rover
2
2
  class Group
3
+ # TODO raise ArgumentError for empty columns in 0.3.0
3
4
  def initialize(df, columns)
4
5
  @df = df
5
6
  @columns = columns
6
7
  end
7
8
 
9
+ # TODO raise ArgumentError for empty columns in 0.3.0
8
10
  def group(*columns)
9
11
  Group.new(@df, @columns + columns.flatten)
10
12
  end
@@ -22,6 +24,14 @@ module Rover
22
24
  end
23
25
  end
24
26
 
27
+ def plot(*args, **options)
28
+ raise ArgumentError, "Multiple groups not supported" if @columns.size > 1
29
+ # same message as Ruby
30
+ raise ArgumentError, "unknown keyword: :group" if options.key?(:group)
31
+
32
+ @df.plot(*args, **options, group: @columns.first)
33
+ end
34
+
25
35
  private
26
36
 
27
37
  # TODO make more efficient
data/lib/rover/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Rover
2
- VERSION = "0.2.7"
2
+ VERSION = "0.2.8"
3
3
  end
data/lib/rover.rb CHANGED
@@ -9,45 +9,57 @@ require "rover/version"
9
9
 
10
10
  module Rover
11
11
  class << self
12
- def read_csv(path, types: nil, **options)
13
- require "csv"
14
- csv_to_df(CSV.read(path, **csv_options(options)), types: types, headers: options[:headers])
12
+ def read_csv(path, **options)
13
+ csv_to_df(**options) do |csv_options|
14
+ CSV.read(path, **csv_options)
15
+ end
15
16
  end
16
17
 
17
- def parse_csv(str, types: nil, **options)
18
- require "csv"
19
- csv_to_df(CSV.parse(str, **csv_options(options)), types: types, headers: options[:headers])
18
+ def parse_csv(str, **options)
19
+ csv_to_df(**options) do |csv_options|
20
+ CSV.parse(str, **csv_options)
21
+ end
20
22
  end
21
23
 
22
- def read_parquet(path, types: nil)
23
- require "parquet"
24
- parquet_to_df(Arrow::Table.load(path), types: types)
24
+ def read_parquet(path, **options)
25
+ parquet_to_df(**options) do
26
+ Arrow::Table.load(path)
27
+ end
25
28
  end
26
29
 
27
- def parse_parquet(str, types: nil)
28
- require "parquet"
29
- parquet_to_df(Arrow::Table.load(Arrow::Buffer.new(str), format: :parquet), types: types)
30
+ def parse_parquet(str, **options)
31
+ parquet_to_df(**options) do
32
+ Arrow::Table.load(Arrow::Buffer.new(str), format: :parquet)
33
+ end
30
34
  end
31
35
 
32
36
  private
33
37
 
34
- # TODO use date converter
35
- def csv_options(options)
36
- options = {headers: true, converters: :numeric}.merge(options)
37
- raise ArgumentError, "Must specify headers" unless options[:headers]
38
- options
39
- end
38
+ def csv_to_df(types: nil, headers: nil, **csv_options)
39
+ require "csv"
40
+
41
+ raise ArgumentError, "Must specify headers" if headers == false
40
42
 
41
- def csv_to_df(table, types: nil, headers: nil)
42
- if headers && headers.size < table.headers.size
43
- raise ArgumentError, "Expected #{table.headers.size} headers, got #{headers.size}"
43
+ # TODO use date converter
44
+ table = yield({converters: :numeric}.merge(csv_options))
45
+
46
+ headers = nil if headers == true
47
+ if headers && table.first && headers.size < table.first.size
48
+ raise ArgumentError, "Expected #{table.first.size} headers, got #{headers.size}"
49
+ end
50
+
51
+ table_headers = (headers || table.shift || []).dup
52
+ # keep same behavior as headers: true
53
+ if table.first
54
+ while table_headers.size < table.first.size
55
+ table_headers << nil
56
+ end
44
57
  end
45
58
 
46
- table.by_col!
47
59
  data = {}
48
- keys = table.map { |k, _| [k, true] }.to_h
60
+ keys = table_headers.map { |k| [k, true] }.to_h
49
61
  unnamed_suffix = 1
50
- table.each do |k, v|
62
+ table_headers.each_with_index do |k, i|
51
63
  # TODO do same for empty string in 0.3.0
52
64
  if k.nil?
53
65
  k = "unnamed"
@@ -57,7 +69,18 @@ module Rover
57
69
  end
58
70
  keys[k] = true
59
71
  end
60
- data[k] = v
72
+ table_headers[i] = k
73
+ end
74
+
75
+ table_headers.each_with_index do |k, i|
76
+ # use first value for duplicate headers like headers: true
77
+ next if data[k]
78
+
79
+ values = []
80
+ table.each do |row|
81
+ values << row[i]
82
+ end
83
+ data[k] = values
61
84
  end
62
85
 
63
86
  DataFrame.new(data, types: types)
@@ -78,7 +101,10 @@ module Rover
78
101
  "uint64" => Numo::UInt64
79
102
  }
80
103
 
81
- def parquet_to_df(table, types: nil)
104
+ def parquet_to_df(types: nil)
105
+ require "parquet"
106
+
107
+ table = yield
82
108
  data = {}
83
109
  types ||= {}
84
110
  table.each_column do |column|
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rover-df
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.7
4
+ version: 0.2.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-01-16 00:00:00.000000000 Z
11
+ date: 2022-03-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: numo-narray
@@ -58,7 +58,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
58
58
  - !ruby/object:Gem::Version
59
59
  version: '0'
60
60
  requirements: []
61
- rubygems_version: 3.3.3
61
+ rubygems_version: 3.3.7
62
62
  signing_key:
63
63
  specification_version: 4
64
64
  summary: Simple, powerful data frames for Ruby