rover-df 0.2.7 → 0.2.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c75bed3989211e806e54c296290e5f7b3af236a15742daac876e211e3ca5a76f
4
- data.tar.gz: 5865ff8f1d0036423f18cfee867da63214ee50f79d373b0f0f244853d8efbefa
3
+ metadata.gz: 65d2fda186484e920421543e2f0203635054ccb8a23250bd3fc6a9d8c328725f
4
+ data.tar.gz: e4cd1e6d69e1e4f340f6692111476a5be9405f348841cfba6f6c431f04d85347
5
5
  SHA512:
6
- metadata.gz: 11718bc8ade75a605e92cabe05c29e55c6d4dfe427cd5ada0a8a216db678b32a88f4a43843d1e7dcda7b7a64adb63b76969f1d958e91ca57c4f71989632e14aa
7
- data.tar.gz: 16940236090625bef69cb14d6d9f9f50720314edea1b5892f60443799e5389700ddfb0d79a29ee1e193168097add9d7195799e7f049d85f9c9dc9c443843a678
6
+ metadata.gz: c720f3bc45178f938c20546ac1b7279ae047affafce5e06cff4f703e1d8ff7a99c1bca94a3f40cb7d26945d770bf136a2adc3477cf6ffc3cdaad9a15aa6090a1
7
+ data.tar.gz: c44135cc0e70b08b72e1084565ef3479bcb92000bf34662b76a25933e68ad33a584afae071ddebfd5724ad61fe7e7dbc283241d7194c532dd70f36b1358b266d
data/CHANGELOG.md CHANGED
@@ -1,3 +1,8 @@
1
+ ## 0.2.8 (2022-03-15)
2
+
3
+ - Added `group` and `stacked` options to `plot`
4
+ - Improved performance of `read_csv` and `parse_csv`
5
+
1
6
  ## 0.2.7 (2022-01-16)
2
7
 
3
8
  - Added support for booleans to Parquet methods
data/README.md CHANGED
@@ -13,7 +13,7 @@ Simple, powerful data frames for Ruby
13
13
  Add this line to your application’s Gemfile:
14
14
 
15
15
  ```ruby
16
- gem 'rover-df'
16
+ gem "rover-df"
17
17
  ```
18
18
 
19
19
  ## Intro
@@ -236,7 +236,7 @@ df.group(:a).max(:b)
236
236
  Multiple groups
237
237
 
238
238
  ```ruby
239
- df.group([:a, :b]).count
239
+ df.group(:a, :b).count
240
240
  ```
241
241
 
242
242
  ## Visualization
@@ -244,7 +244,7 @@ df.group([:a, :b]).count
244
244
  Add [Vega](https://github.com/ankane/vega) to your application’s Gemfile:
245
245
 
246
246
  ```ruby
247
- gem 'vega'
247
+ gem "vega"
248
248
  ```
249
249
 
250
250
  And use:
@@ -259,6 +259,18 @@ Specify the chart type (`line`, `pie`, `column`, `bar`, `area`, or `scatter`)
259
259
  df.plot(:a, :b, type: "pie")
260
260
  ```
261
261
 
262
+ Group data
263
+
264
+ ```ruby
265
+ df.plot(:a, :b, group: :c)
266
+ ```
267
+
268
+ Stacked columns or bars
269
+
270
+ ```ruby
271
+ df.plot(:a, :b, group: :c, stacked: true)
272
+ ```
273
+
262
274
  ## Updating Data
263
275
 
264
276
  Add a new column
@@ -401,7 +401,7 @@ module Rover
401
401
  keys.all? { |k| self[k].to_numo == other[k].to_numo }
402
402
  end
403
403
 
404
- def plot(x = nil, y = nil, type: nil)
404
+ def plot(x = nil, y = nil, type: nil, group: nil, stacked: nil)
405
405
  require "vega"
406
406
 
407
407
  raise ArgumentError, "Must specify columns" if keys.size != 2 && (!x || !y)
@@ -416,7 +416,7 @@ module Rover
416
416
  raise "Cannot determine type. Use the type option."
417
417
  end
418
418
  end
419
- data = self[[x, y]]
419
+ data = self[group.nil? ? [x, y] : [x, y, group]]
420
420
 
421
421
  case type
422
422
  when "line", "area"
@@ -430,16 +430,20 @@ module Rover
430
430
  end
431
431
 
432
432
  scale = x_type == "temporal" ? {type: "utc"} : {}
433
+ encoding = {
434
+ x: {field: x, type: x_type, scale: scale},
435
+ y: {field: y, type: "quantitative"}
436
+ }
437
+ encoding[:color] = {field: group} if group
433
438
 
434
439
  Vega.lite
435
440
  .data(data)
436
441
  .mark(type: type, tooltip: true, interpolate: "cardinal", point: {size: 60})
437
- .encoding(
438
- x: {field: x, type: x_type, scale: scale},
439
- y: {field: y, type: "quantitative"}
440
- )
442
+ .encoding(encoding)
441
443
  .config(axis: {labelFontSize: 12})
442
444
  when "pie"
445
+ raise ArgumentError, "Cannot use group option with pie chart" unless group.nil?
446
+
443
447
  Vega.lite
444
448
  .data(data)
445
449
  .mark(type: "arc", tooltip: true)
@@ -449,34 +453,48 @@ module Rover
449
453
  )
450
454
  .view(stroke: nil)
451
455
  when "column"
456
+ encoding = {
457
+ x: {field: x, type: "nominal", sort: "none", axis: {labelAngle: 0}},
458
+ y: {field: y, type: "quantitative"}
459
+ }
460
+ if group
461
+ encoding[:color] = {field: group}
462
+ encoding[:xOffset] = {field: group} unless stacked
463
+ end
464
+
452
465
  Vega.lite
453
466
  .data(data)
454
467
  .mark(type: "bar", tooltip: true)
455
- .encoding(
456
- # TODO determine label angle
457
- x: {field: x, type: "nominal", sort: "none", axis: {labelAngle: 0}},
458
- y: {field: y, type: "quantitative"}
459
- )
468
+ .encoding(encoding)
460
469
  .config(axis: {labelFontSize: 12})
461
470
  when "bar"
471
+ encoding = {
472
+ # TODO determine label angle
473
+ y: {field: x, type: "nominal", sort: "none", axis: {labelAngle: 0}},
474
+ x: {field: y, type: "quantitative"}
475
+ }
476
+ if group
477
+ encoding[:color] = {field: group}
478
+ encoding[:yOffset] = {field: group} unless stacked
479
+ end
480
+
462
481
  Vega.lite
463
482
  .data(data)
464
483
  .mark(type: "bar", tooltip: true)
465
- .encoding(
466
- # TODO determine label angle
467
- y: {field: x, type: "nominal", sort: "none", axis: {labelAngle: 0}},
468
- x: {field: y, type: "quantitative"}
469
- )
484
+ .encoding(encoding)
470
485
  .config(axis: {labelFontSize: 12})
471
486
  when "scatter"
487
+ encoding = {
488
+ x: {field: x, type: "quantitative", scale: {zero: false}},
489
+ y: {field: y, type: "quantitative", scale: {zero: false}},
490
+ size: {value: 60}
491
+ }
492
+ encoding[:color] = {field: group} if group
493
+
472
494
  Vega.lite
473
495
  .data(data)
474
496
  .mark(type: "circle", tooltip: true)
475
- .encoding(
476
- x: {field: x, type: "quantitative", scale: {zero: false}},
477
- y: {field: y, type: "quantitative", scale: {zero: false}},
478
- size: {value: 60}
479
- )
497
+ .encoding(encoding)
480
498
  .config(axis: {labelFontSize: 12})
481
499
  else
482
500
  raise ArgumentError, "Invalid type: #{type}"
data/lib/rover/group.rb CHANGED
@@ -1,10 +1,12 @@
1
1
  module Rover
2
2
  class Group
3
+ # TODO raise ArgumentError for empty columns in 0.3.0
3
4
  def initialize(df, columns)
4
5
  @df = df
5
6
  @columns = columns
6
7
  end
7
8
 
9
+ # TODO raise ArgumentError for empty columns in 0.3.0
8
10
  def group(*columns)
9
11
  Group.new(@df, @columns + columns.flatten)
10
12
  end
@@ -22,6 +24,14 @@ module Rover
22
24
  end
23
25
  end
24
26
 
27
+ def plot(*args, **options)
28
+ raise ArgumentError, "Multiple groups not supported" if @columns.size > 1
29
+ # same message as Ruby
30
+ raise ArgumentError, "unknown keyword: :group" if options.key?(:group)
31
+
32
+ @df.plot(*args, **options, group: @columns.first)
33
+ end
34
+
25
35
  private
26
36
 
27
37
  # TODO make more efficient
data/lib/rover/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Rover
2
- VERSION = "0.2.7"
2
+ VERSION = "0.2.8"
3
3
  end
data/lib/rover.rb CHANGED
@@ -9,45 +9,57 @@ require "rover/version"
9
9
 
10
10
  module Rover
11
11
  class << self
12
- def read_csv(path, types: nil, **options)
13
- require "csv"
14
- csv_to_df(CSV.read(path, **csv_options(options)), types: types, headers: options[:headers])
12
+ def read_csv(path, **options)
13
+ csv_to_df(**options) do |csv_options|
14
+ CSV.read(path, **csv_options)
15
+ end
15
16
  end
16
17
 
17
- def parse_csv(str, types: nil, **options)
18
- require "csv"
19
- csv_to_df(CSV.parse(str, **csv_options(options)), types: types, headers: options[:headers])
18
+ def parse_csv(str, **options)
19
+ csv_to_df(**options) do |csv_options|
20
+ CSV.parse(str, **csv_options)
21
+ end
20
22
  end
21
23
 
22
- def read_parquet(path, types: nil)
23
- require "parquet"
24
- parquet_to_df(Arrow::Table.load(path), types: types)
24
+ def read_parquet(path, **options)
25
+ parquet_to_df(**options) do
26
+ Arrow::Table.load(path)
27
+ end
25
28
  end
26
29
 
27
- def parse_parquet(str, types: nil)
28
- require "parquet"
29
- parquet_to_df(Arrow::Table.load(Arrow::Buffer.new(str), format: :parquet), types: types)
30
+ def parse_parquet(str, **options)
31
+ parquet_to_df(**options) do
32
+ Arrow::Table.load(Arrow::Buffer.new(str), format: :parquet)
33
+ end
30
34
  end
31
35
 
32
36
  private
33
37
 
34
- # TODO use date converter
35
- def csv_options(options)
36
- options = {headers: true, converters: :numeric}.merge(options)
37
- raise ArgumentError, "Must specify headers" unless options[:headers]
38
- options
39
- end
38
+ def csv_to_df(types: nil, headers: nil, **csv_options)
39
+ require "csv"
40
+
41
+ raise ArgumentError, "Must specify headers" if headers == false
40
42
 
41
- def csv_to_df(table, types: nil, headers: nil)
42
- if headers && headers.size < table.headers.size
43
- raise ArgumentError, "Expected #{table.headers.size} headers, got #{headers.size}"
43
+ # TODO use date converter
44
+ table = yield({converters: :numeric}.merge(csv_options))
45
+
46
+ headers = nil if headers == true
47
+ if headers && table.first && headers.size < table.first.size
48
+ raise ArgumentError, "Expected #{table.first.size} headers, got #{headers.size}"
49
+ end
50
+
51
+ table_headers = (headers || table.shift || []).dup
52
+ # keep same behavior as headers: true
53
+ if table.first
54
+ while table_headers.size < table.first.size
55
+ table_headers << nil
56
+ end
44
57
  end
45
58
 
46
- table.by_col!
47
59
  data = {}
48
- keys = table.map { |k, _| [k, true] }.to_h
60
+ keys = table_headers.map { |k| [k, true] }.to_h
49
61
  unnamed_suffix = 1
50
- table.each do |k, v|
62
+ table_headers.each_with_index do |k, i|
51
63
  # TODO do same for empty string in 0.3.0
52
64
  if k.nil?
53
65
  k = "unnamed"
@@ -57,7 +69,18 @@ module Rover
57
69
  end
58
70
  keys[k] = true
59
71
  end
60
- data[k] = v
72
+ table_headers[i] = k
73
+ end
74
+
75
+ table_headers.each_with_index do |k, i|
76
+ # use first value for duplicate headers like headers: true
77
+ next if data[k]
78
+
79
+ values = []
80
+ table.each do |row|
81
+ values << row[i]
82
+ end
83
+ data[k] = values
61
84
  end
62
85
 
63
86
  DataFrame.new(data, types: types)
@@ -78,7 +101,10 @@ module Rover
78
101
  "uint64" => Numo::UInt64
79
102
  }
80
103
 
81
- def parquet_to_df(table, types: nil)
104
+ def parquet_to_df(types: nil)
105
+ require "parquet"
106
+
107
+ table = yield
82
108
  data = {}
83
109
  types ||= {}
84
110
  table.each_column do |column|
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rover-df
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.7
4
+ version: 0.2.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-01-16 00:00:00.000000000 Z
11
+ date: 2022-03-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: numo-narray
@@ -58,7 +58,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
58
58
  - !ruby/object:Gem::Version
59
59
  version: '0'
60
60
  requirements: []
61
- rubygems_version: 3.3.3
61
+ rubygems_version: 3.3.7
62
62
  signing_key:
63
63
  specification_version: 4
64
64
  summary: Simple, powerful data frames for Ruby