rover-df 0.2.8 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 65d2fda186484e920421543e2f0203635054ccb8a23250bd3fc6a9d8c328725f
4
- data.tar.gz: e4cd1e6d69e1e4f340f6692111476a5be9405f348841cfba6f6c431f04d85347
3
+ metadata.gz: 44146e3081c968813848026d2a7f785527a0bb55af0a1978a8087d0dcc7a568a
4
+ data.tar.gz: db64041059937e131d27799739506ad27a78d19be3bdd90d299bcee855b54755
5
5
  SHA512:
6
- metadata.gz: c720f3bc45178f938c20546ac1b7279ae047affafce5e06cff4f703e1d8ff7a99c1bca94a3f40cb7d26945d770bf136a2adc3477cf6ffc3cdaad9a15aa6090a1
7
- data.tar.gz: c44135cc0e70b08b72e1084565ef3479bcb92000bf34662b76a25933e68ad33a584afae071ddebfd5724ad61fe7e7dbc283241d7194c532dd70f36b1358b266d
6
+ metadata.gz: f66190d43258016bc54da2ee42078087784e2c245095fceced4f617b4343130c8e324fd07a4fb0a08b6e23f512268d517e330087959dd7a78187228383189ea8
7
+ data.tar.gz: a1d3a80ff866d72dc32a0067240c32f7a24c602a736d932dba394af8b760566591ff0d2942a61243230016182a3a5fad2e635fa1095f809caf477146d0810868
data/CHANGELOG.md CHANGED
@@ -1,3 +1,25 @@
1
+ ## 0.3.2 (2022-07-10)
2
+
3
+ - Added `sqrt` method to vectors
4
+ - Improved numeric operations between scalars and vectors
5
+ - Improved performance of `tally`
6
+
7
+ ## 0.3.1 (2022-05-18)
8
+
9
+ - Added `to!` to vectors
10
+ - Fixed error with `nil` and `:float64` type
11
+ - Fixed `:header_converters` option with `read_csv` and `parse_csv`
12
+
13
+ ## 0.3.0 (2022-04-03)
14
+
15
+ - Added `deep_dup` method to data frames
16
+ - Changed `:int` to `:int64`, `:uint` to `:uint64`, and `:float` to `:float64` for type methods
17
+ - Changed missing column to raise `KeyError` instead of `ArgumentError` for aggregate methods
18
+ - Changed passing too many headers to `read_csv` and `parse_csv` to raise `ArgumentError`
19
+ - Changed empty string in CSV headers to match behavior of `nil`
20
+ - Fixed `clone` and `dup` method for vectors
21
+ - Dropped support for Ruby < 2.7
22
+
1
23
  ## 0.2.8 (2022-03-15)
2
24
 
3
25
  - Added `group` and `stacked` options to `plot`
data/README.md CHANGED
@@ -424,22 +424,22 @@ df.to_parquet
424
424
  You can specify column types when creating a data frame
425
425
 
426
426
  ```ruby
427
- Rover::DataFrame.new(data, types: {"a" => :int, "b" => :float})
427
+ Rover::DataFrame.new(data, types: {"a" => :int64, "b" => :float64})
428
428
  ```
429
429
 
430
430
  Or
431
431
 
432
432
  ```ruby
433
- Rover.read_csv("data.csv", types: {"a" => :int, "b" => :float})
433
+ Rover.read_csv("data.csv", types: {"a" => :int64, "b" => :float64})
434
434
  ```
435
435
 
436
436
  Supported types are:
437
437
 
438
- - boolean - `bool`
439
- - float - `float`, `float32`
440
- - integer - `int`, `int32`, `int16`, `int8`
441
- - unsigned integer - `uint`, `uint32`, `uint16`, `uint8`
442
- - object - `object`
438
+ - boolean - `:bool`
439
+ - float - `:float64`, `:float32`
440
+ - integer - `:int64`, `:int32`, `:int16`, `:int8`
441
+ - unsigned integer - `:uint64`, `:uint32`, `:uint16`, `:uint8`
442
+ - object - `:object`
443
443
 
444
444
  Get column types
445
445
 
@@ -456,7 +456,7 @@ df[:a].type
456
456
  Change the type of a column
457
457
 
458
458
  ```ruby
459
- df[:a] = df[:a].to(:int)
459
+ df[:a].to!(:int32)
460
460
  ```
461
461
 
462
462
  ## History
@@ -72,7 +72,7 @@ module Rover
72
72
  # multiple columns
73
73
  df = DataFrame.new
74
74
  where.each do |k|
75
- check_column(k, true)
75
+ check_column(k)
76
76
  df[k] = @vectors[k]
77
77
  end
78
78
  df
@@ -102,7 +102,7 @@ module Rover
102
102
  def []=(k, v)
103
103
  check_key(k)
104
104
  v = to_vector(v, size: size)
105
- raise ArgumentError, "Size mismatch: expected #{size}, got #{v.size}" if @vectors.any? && v.size != size
105
+ raise ArgumentError, "Size mismatch (given #{v.size}, expected #{size})" if @vectors.any? && v.size != size
106
106
  @vectors[k] = v
107
107
  end
108
108
 
@@ -242,11 +242,11 @@ module Rover
242
242
  types.each do |name, type|
243
243
  schema[name] =
244
244
  case type
245
- when :int
245
+ when :int64
246
246
  :int64
247
- when :uint
247
+ when :uint64
248
248
  :uint64
249
- when :float
249
+ when :float64
250
250
  :double
251
251
  when :float32
252
252
  :float
@@ -346,10 +346,10 @@ module Rover
346
346
  end
347
347
  end
348
348
 
349
- def dup
349
+ def deep_dup
350
350
  df = DataFrame.new
351
351
  @vectors.each do |k, v|
352
- df[k] = v
352
+ df[k] = v.dup
353
353
  end
354
354
  df
355
355
  end
@@ -503,8 +503,20 @@ module Rover
503
503
 
504
504
  private
505
505
 
506
+ # for clone
507
+ def initialize_clone(_)
508
+ @vectors = @vectors.clone
509
+ super
510
+ end
511
+
512
+ # for dup
513
+ def initialize_dup(_)
514
+ @vectors = @vectors.dup
515
+ super
516
+ end
517
+
506
518
  def check_key(key)
507
- raise ArgumentError, "Key must be a string or symbol, got #{key.inspect}" unless key.is_a?(String) || key.is_a?(Symbol)
519
+ raise ArgumentError, "Key must be a String or Symbol, given #{key.class.name}" unless key.is_a?(String) || key.is_a?(Symbol)
508
520
  end
509
521
 
510
522
  # TODO make more efficient
@@ -565,19 +577,9 @@ module Rover
565
577
  raise ArgumentError, "Missing keys: #{missing_keys.join(", ")}" if missing_keys.any?
566
578
  end
567
579
 
568
- # TODO in 0.3.0
569
- # always use did_you_mean
570
- def check_column(key, did_you_mean = false)
580
+ def check_column(key)
571
581
  unless include?(key)
572
- if did_you_mean
573
- if RUBY_VERSION.to_f >= 2.6
574
- raise KeyError.new("Missing column: #{key}", receiver: self, key: key)
575
- else
576
- raise KeyError.new("Missing column: #{key}")
577
- end
578
- else
579
- raise ArgumentError, "Missing column: #{key}"
580
- end
582
+ raise KeyError.new("Missing column: #{key}", receiver: self, key: key)
581
583
  end
582
584
  end
583
585
 
data/lib/rover/group.rb CHANGED
@@ -1,12 +1,11 @@
1
1
  module Rover
2
2
  class Group
3
- # TODO raise ArgumentError for empty columns in 0.3.0
4
3
  def initialize(df, columns)
5
4
  @df = df
6
5
  @columns = columns
6
+ check_columns
7
7
  end
8
8
 
9
- # TODO raise ArgumentError for empty columns in 0.3.0
10
9
  def group(*columns)
11
10
  Group.new(@df, @columns + columns.flatten)
12
11
  end
@@ -38,10 +37,6 @@ module Rover
38
37
  def grouped_dfs
39
38
  # cache here so we can reuse for multiple calcuations if needed
40
39
  @grouped_dfs ||= begin
41
- raise ArgumentError, "No columns given" if @columns.empty?
42
- missing_keys = @columns - @df.keys
43
- raise ArgumentError, "Missing keys: #{missing_keys.join(", ")}" if missing_keys.any?
44
-
45
40
  groups = Hash.new { |hash, key| hash[key] = [] }
46
41
  i = 0
47
42
  @df.each_row do |row|
@@ -56,5 +51,12 @@ module Rover
56
51
  result
57
52
  end
58
53
  end
54
+
55
+ def check_columns
56
+ raise ArgumentError, "No columns given" if @columns.empty?
57
+
58
+ missing_keys = @columns - @df.keys
59
+ raise ArgumentError, "Missing keys: #{missing_keys.join(", ")}" if missing_keys.any?
60
+ end
59
61
  end
60
62
  end
data/lib/rover/vector.rb CHANGED
@@ -1,23 +1,23 @@
1
1
  module Rover
2
2
  class Vector
3
3
  # if a user never specifies types,
4
- # the defaults are bool, float, int, and object
5
- # keep these simple
6
- #
7
- # we could create aliases for float64, int64, uint64
8
- # if so, type should still return the simple type
4
+ # the defaults are bool, float64, int64, and object
9
5
  TYPE_CAST_MAPPING = {
10
6
  bool: Numo::Bit,
11
7
  float32: Numo::SFloat,
12
- float: Numo::DFloat,
8
+ float64: Numo::DFloat,
13
9
  int8: Numo::Int8,
14
10
  int16: Numo::Int16,
15
11
  int32: Numo::Int32,
16
- int: Numo::Int64,
12
+ int64: Numo::Int64,
17
13
  object: Numo::RObject,
18
14
  uint8: Numo::UInt8,
19
15
  uint16: Numo::UInt16,
20
16
  uint32: Numo::UInt32,
17
+ uint64: Numo::UInt64,
18
+ # legacy - must come last
19
+ float: Numo::DFloat,
20
+ int: Numo::Int64,
21
21
  uint: Numo::UInt64
22
22
  }
23
23
 
@@ -31,7 +31,12 @@ module Rover
31
31
  end
32
32
 
33
33
  def to(type)
34
- Vector.new(self, type: type)
34
+ dup.to!(type)
35
+ end
36
+
37
+ def to!(type)
38
+ @data = cast_data(@data, type: type)
39
+ self
35
40
  end
36
41
 
37
42
  def to_numo
@@ -179,13 +184,9 @@ module Rover
179
184
  Vector.new(@data.to_a.reject(&block))
180
185
  end
181
186
 
187
+ # use Ruby tally for performance
182
188
  def tally
183
- result = Hash.new(0)
184
- @data.each do |v|
185
- result[v] += 1
186
- end
187
- result.default = nil
188
- result
189
+ @data.to_a.tally
189
190
  end
190
191
 
191
192
  def sort
@@ -196,6 +197,16 @@ module Rover
196
197
  Vector.new(@data.abs)
197
198
  end
198
199
 
200
+ def sqrt
201
+ data =
202
+ if @data.is_a?(Numo::SFloat)
203
+ Numo::SFloat::Math.sqrt(@data)
204
+ else
205
+ Numo::DFloat::Math.sqrt(@data)
206
+ end
207
+ Vector.new(data)
208
+ end
209
+
199
210
  def each(&block)
200
211
  @data.each(&block)
201
212
  end
@@ -333,6 +344,26 @@ module Rover
333
344
 
334
345
  private
335
346
 
347
+ # for clone
348
+ def initialize_clone(_)
349
+ @data = @data.clone
350
+ super
351
+ end
352
+
353
+ # for dup
354
+ def initialize_dup(_)
355
+ @data = @data.dup
356
+ super
357
+ end
358
+
359
+ def coerce(other)
360
+ if other.is_a?(Numeric)
361
+ [Vector.new([other]), self]
362
+ else
363
+ raise TypeError, "#{self.class} can't be coerced into #{other.class}"
364
+ end
365
+ end
366
+
336
367
  def cast_data(data, type: nil)
337
368
  numo_type = numo_type(type) if type
338
369
 
@@ -359,7 +390,7 @@ module Rover
359
390
  data = data.to_a
360
391
 
361
392
  if type
362
- data = data.map { |v| v || Float::NAN } if [:float, :float32].include?(type)
393
+ data = data.map { |v| v || Float::NAN } if [:float, :float32, :float64].include?(type)
363
394
  data = numo_type.cast(data)
364
395
  else
365
396
  data =
data/lib/rover/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Rover
2
- VERSION = "0.2.8"
2
+ VERSION = "0.3.2"
3
3
  end
data/lib/rover.rb CHANGED
@@ -40,12 +40,12 @@ module Rover
40
40
 
41
41
  raise ArgumentError, "Must specify headers" if headers == false
42
42
 
43
- # TODO use date converter
43
+ # TODO use date converter in 0.4.0 - need to test performance
44
44
  table = yield({converters: :numeric}.merge(csv_options))
45
45
 
46
46
  headers = nil if headers == true
47
- if headers && table.first && headers.size < table.first.size
48
- raise ArgumentError, "Expected #{table.first.size} headers, got #{headers.size}"
47
+ if headers && table.first && headers.size != table.first.size
48
+ raise ArgumentError, "Expected #{table.first.size} headers, given #{headers.size}"
49
49
  end
50
50
 
51
51
  table_headers = (headers || table.shift || []).dup
@@ -55,13 +55,18 @@ module Rover
55
55
  table_headers << nil
56
56
  end
57
57
  end
58
+ # TODO handle date converters
59
+ table_headers = table_headers.map! { |v| v.nil? ? nil : v.to_s }
60
+
61
+ if csv_options[:header_converters]
62
+ table_headers = CSV.parse(CSV.generate_line(table_headers), headers: true, header_converters: csv_options[:header_converters]).headers
63
+ end
58
64
 
59
65
  data = {}
60
66
  keys = table_headers.map { |k| [k, true] }.to_h
61
67
  unnamed_suffix = 1
62
68
  table_headers.each_with_index do |k, i|
63
- # TODO do same for empty string in 0.3.0
64
- if k.nil?
69
+ if k.nil? || k.empty?
65
70
  k = "unnamed"
66
71
  while keys.include?(k)
67
72
  unnamed_suffix += 1
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rover-df
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.8
4
+ version: 0.3.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-03-15 00:00:00.000000000 Z
11
+ date: 2022-07-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: numo-narray
@@ -51,7 +51,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
51
51
  requirements:
52
52
  - - ">="
53
53
  - !ruby/object:Gem::Version
54
- version: '2.4'
54
+ version: '2.7'
55
55
  required_rubygems_version: !ruby/object:Gem::Requirement
56
56
  requirements:
57
57
  - - ">="