rover-df 0.2.8 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 65d2fda186484e920421543e2f0203635054ccb8a23250bd3fc6a9d8c328725f
4
- data.tar.gz: e4cd1e6d69e1e4f340f6692111476a5be9405f348841cfba6f6c431f04d85347
3
+ metadata.gz: 171a06a05afa4ec4bb09efe7fef53b49220a3d4fa5352621112e29f2b70812b9
4
+ data.tar.gz: 435d8f3d4781f1960236f3c2b7f9fa2c4e38dfc987b53cb9fbe6351a9e8db4e9
5
5
  SHA512:
6
- metadata.gz: c720f3bc45178f938c20546ac1b7279ae047affafce5e06cff4f703e1d8ff7a99c1bca94a3f40cb7d26945d770bf136a2adc3477cf6ffc3cdaad9a15aa6090a1
7
- data.tar.gz: c44135cc0e70b08b72e1084565ef3479bcb92000bf34662b76a25933e68ad33a584afae071ddebfd5724ad61fe7e7dbc283241d7194c532dd70f36b1358b266d
6
+ metadata.gz: 9e5baa8cb051f7c7f06bbe0025ac4d923947b34768461a814e533aa78ec5d2d391a12edc6a9a64abc2fa9b1147255211ff26e8094cfaef67c9d70e393e57bcc0
7
+ data.tar.gz: 55f3438d438326c324c612a92b39a54698123889aeb28bafd93a196f2659208b4b72a1ac02efd407166fc56c8c9f3abff9472804ddfb231220b6adf41ff38df1
data/CHANGELOG.md CHANGED
@@ -1,3 +1,13 @@
1
+ ## 0.3.0 (2022-04-03)
2
+
3
+ - Added `deep_dup` method to data frames
4
+ - Changed `:int` to `:int64`, `:uint` to `:uint64`, and `:float` to `:float64` for type methods
5
+ - Changed missing column to raise `KeyError` instead of `ArgumentError` for aggregate methods
6
+ - Changed passing too many headers to `read_csv` and `parse_csv` to raise `ArgumentError`
7
+ - Changed empty string in CSV headers to match behavior of `nil`
8
+ - Fixed `clone` and `dup` method for vectors
9
+ - Dropped support for Ruby < 2.7
10
+
1
11
  ## 0.2.8 (2022-03-15)
2
12
 
3
13
  - Added `group` and `stacked` options to `plot`
data/README.md CHANGED
@@ -424,22 +424,22 @@ df.to_parquet
424
424
  You can specify column types when creating a data frame
425
425
 
426
426
  ```ruby
427
- Rover::DataFrame.new(data, types: {"a" => :int, "b" => :float})
427
+ Rover::DataFrame.new(data, types: {"a" => :int64, "b" => :float64})
428
428
  ```
429
429
 
430
430
  Or
431
431
 
432
432
  ```ruby
433
- Rover.read_csv("data.csv", types: {"a" => :int, "b" => :float})
433
+ Rover.read_csv("data.csv", types: {"a" => :int64, "b" => :float64})
434
434
  ```
435
435
 
436
436
  Supported types are:
437
437
 
438
- - boolean - `bool`
439
- - float - `float`, `float32`
440
- - integer - `int`, `int32`, `int16`, `int8`
441
- - unsigned integer - `uint`, `uint32`, `uint16`, `uint8`
442
- - object - `object`
438
+ - boolean - `:bool`
439
+ - float - `:float64`, `:float32`
440
+ - integer - `:int64`, `:int32`, `:int16`, `:int8`
441
+ - unsigned integer - `:uint64`, `:uint32`, `:uint16`, `:uint8`
442
+ - object - `:object`
443
443
 
444
444
  Get column types
445
445
 
@@ -456,7 +456,7 @@ df[:a].type
456
456
  Change the type of a column
457
457
 
458
458
  ```ruby
459
- df[:a] = df[:a].to(:int)
459
+ df[:a] = df[:a].to(:int32)
460
460
  ```
461
461
 
462
462
  ## History
@@ -72,7 +72,7 @@ module Rover
72
72
  # multiple columns
73
73
  df = DataFrame.new
74
74
  where.each do |k|
75
- check_column(k, true)
75
+ check_column(k)
76
76
  df[k] = @vectors[k]
77
77
  end
78
78
  df
@@ -102,7 +102,7 @@ module Rover
102
102
  def []=(k, v)
103
103
  check_key(k)
104
104
  v = to_vector(v, size: size)
105
- raise ArgumentError, "Size mismatch: expected #{size}, got #{v.size}" if @vectors.any? && v.size != size
105
+ raise ArgumentError, "Size mismatch (given #{v.size}, expected #{size})" if @vectors.any? && v.size != size
106
106
  @vectors[k] = v
107
107
  end
108
108
 
@@ -242,11 +242,11 @@ module Rover
242
242
  types.each do |name, type|
243
243
  schema[name] =
244
244
  case type
245
- when :int
245
+ when :int64
246
246
  :int64
247
- when :uint
247
+ when :uint64
248
248
  :uint64
249
- when :float
249
+ when :float64
250
250
  :double
251
251
  when :float32
252
252
  :float
@@ -346,10 +346,10 @@ module Rover
346
346
  end
347
347
  end
348
348
 
349
- def dup
349
+ def deep_dup
350
350
  df = DataFrame.new
351
351
  @vectors.each do |k, v|
352
- df[k] = v
352
+ df[k] = v.dup
353
353
  end
354
354
  df
355
355
  end
@@ -503,8 +503,20 @@ module Rover
503
503
 
504
504
  private
505
505
 
506
+ # for clone
507
+ def initialize_clone(_)
508
+ @vectors = @vectors.clone
509
+ super
510
+ end
511
+
512
+ # for dup
513
+ def initialize_dup(_)
514
+ @vectors = @vectors.dup
515
+ super
516
+ end
517
+
506
518
  def check_key(key)
507
- raise ArgumentError, "Key must be a string or symbol, got #{key.inspect}" unless key.is_a?(String) || key.is_a?(Symbol)
519
+ raise ArgumentError, "Key must be a String or Symbol, given #{key.class.name}" unless key.is_a?(String) || key.is_a?(Symbol)
508
520
  end
509
521
 
510
522
  # TODO make more efficient
@@ -565,19 +577,9 @@ module Rover
565
577
  raise ArgumentError, "Missing keys: #{missing_keys.join(", ")}" if missing_keys.any?
566
578
  end
567
579
 
568
- # TODO in 0.3.0
569
- # always use did_you_mean
570
- def check_column(key, did_you_mean = false)
580
+ def check_column(key)
571
581
  unless include?(key)
572
- if did_you_mean
573
- if RUBY_VERSION.to_f >= 2.6
574
- raise KeyError.new("Missing column: #{key}", receiver: self, key: key)
575
- else
576
- raise KeyError.new("Missing column: #{key}")
577
- end
578
- else
579
- raise ArgumentError, "Missing column: #{key}"
580
- end
582
+ raise KeyError.new("Missing column: #{key}", receiver: self, key: key)
581
583
  end
582
584
  end
583
585
 
data/lib/rover/group.rb CHANGED
@@ -1,12 +1,11 @@
1
1
  module Rover
2
2
  class Group
3
- # TODO raise ArgumentError for empty columns in 0.3.0
4
3
  def initialize(df, columns)
5
4
  @df = df
6
5
  @columns = columns
6
+ check_columns
7
7
  end
8
8
 
9
- # TODO raise ArgumentError for empty columns in 0.3.0
10
9
  def group(*columns)
11
10
  Group.new(@df, @columns + columns.flatten)
12
11
  end
@@ -38,10 +37,6 @@ module Rover
38
37
  def grouped_dfs
39
38
  # cache here so we can reuse for multiple calcuations if needed
40
39
  @grouped_dfs ||= begin
41
- raise ArgumentError, "No columns given" if @columns.empty?
42
- missing_keys = @columns - @df.keys
43
- raise ArgumentError, "Missing keys: #{missing_keys.join(", ")}" if missing_keys.any?
44
-
45
40
  groups = Hash.new { |hash, key| hash[key] = [] }
46
41
  i = 0
47
42
  @df.each_row do |row|
@@ -56,5 +51,12 @@ module Rover
56
51
  result
57
52
  end
58
53
  end
54
+
55
+ def check_columns
56
+ raise ArgumentError, "No columns given" if @columns.empty?
57
+
58
+ missing_keys = @columns - @df.keys
59
+ raise ArgumentError, "Missing keys: #{missing_keys.join(", ")}" if missing_keys.any?
60
+ end
59
61
  end
60
62
  end
data/lib/rover/vector.rb CHANGED
@@ -1,23 +1,23 @@
1
1
  module Rover
2
2
  class Vector
3
3
  # if a user never specifies types,
4
- # the defaults are bool, float, int, and object
5
- # keep these simple
6
- #
7
- # we could create aliases for float64, int64, uint64
8
- # if so, type should still return the simple type
4
+ # the defaults are bool, float64, int64, and object
9
5
  TYPE_CAST_MAPPING = {
10
6
  bool: Numo::Bit,
11
7
  float32: Numo::SFloat,
12
- float: Numo::DFloat,
8
+ float64: Numo::DFloat,
13
9
  int8: Numo::Int8,
14
10
  int16: Numo::Int16,
15
11
  int32: Numo::Int32,
16
- int: Numo::Int64,
12
+ int64: Numo::Int64,
17
13
  object: Numo::RObject,
18
14
  uint8: Numo::UInt8,
19
15
  uint16: Numo::UInt16,
20
16
  uint32: Numo::UInt32,
17
+ uint64: Numo::UInt64,
18
+ # legacy - must come last
19
+ float: Numo::DFloat,
20
+ int: Numo::Int64,
21
21
  uint: Numo::UInt64
22
22
  }
23
23
 
@@ -333,6 +333,18 @@ module Rover
333
333
 
334
334
  private
335
335
 
336
+ # for clone
337
+ def initialize_clone(_)
338
+ @data = @data.clone
339
+ super
340
+ end
341
+
342
+ # for dup
343
+ def initialize_dup(_)
344
+ @data = @data.dup
345
+ super
346
+ end
347
+
336
348
  def cast_data(data, type: nil)
337
349
  numo_type = numo_type(type) if type
338
350
 
data/lib/rover/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Rover
2
- VERSION = "0.2.8"
2
+ VERSION = "0.3.0"
3
3
  end
data/lib/rover.rb CHANGED
@@ -40,12 +40,12 @@ module Rover
40
40
 
41
41
  raise ArgumentError, "Must specify headers" if headers == false
42
42
 
43
- # TODO use date converter
43
+ # TODO use date converter in 0.4.0 - need to test performance
44
44
  table = yield({converters: :numeric}.merge(csv_options))
45
45
 
46
46
  headers = nil if headers == true
47
- if headers && table.first && headers.size < table.first.size
48
- raise ArgumentError, "Expected #{table.first.size} headers, got #{headers.size}"
47
+ if headers && table.first && headers.size != table.first.size
48
+ raise ArgumentError, "Expected #{table.first.size} headers, given #{headers.size}"
49
49
  end
50
50
 
51
51
  table_headers = (headers || table.shift || []).dup
@@ -60,8 +60,7 @@ module Rover
60
60
  keys = table_headers.map { |k| [k, true] }.to_h
61
61
  unnamed_suffix = 1
62
62
  table_headers.each_with_index do |k, i|
63
- # TODO do same for empty string in 0.3.0
64
- if k.nil?
63
+ if k.nil? || k.empty?
65
64
  k = "unnamed"
66
65
  while keys.include?(k)
67
66
  unnamed_suffix += 1
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rover-df
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.8
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-03-15 00:00:00.000000000 Z
11
+ date: 2022-04-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: numo-narray
@@ -51,7 +51,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
51
51
  requirements:
52
52
  - - ">="
53
53
  - !ruby/object:Gem::Version
54
- version: '2.4'
54
+ version: '2.7'
55
55
  required_rubygems_version: !ruby/object:Gem::Requirement
56
56
  requirements:
57
57
  - - ">="