rover-df 0.2.8 → 0.3.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 65d2fda186484e920421543e2f0203635054ccb8a23250bd3fc6a9d8c328725f
4
- data.tar.gz: e4cd1e6d69e1e4f340f6692111476a5be9405f348841cfba6f6c431f04d85347
3
+ metadata.gz: 44146e3081c968813848026d2a7f785527a0bb55af0a1978a8087d0dcc7a568a
4
+ data.tar.gz: db64041059937e131d27799739506ad27a78d19be3bdd90d299bcee855b54755
5
5
  SHA512:
6
- metadata.gz: c720f3bc45178f938c20546ac1b7279ae047affafce5e06cff4f703e1d8ff7a99c1bca94a3f40cb7d26945d770bf136a2adc3477cf6ffc3cdaad9a15aa6090a1
7
- data.tar.gz: c44135cc0e70b08b72e1084565ef3479bcb92000bf34662b76a25933e68ad33a584afae071ddebfd5724ad61fe7e7dbc283241d7194c532dd70f36b1358b266d
6
+ metadata.gz: f66190d43258016bc54da2ee42078087784e2c245095fceced4f617b4343130c8e324fd07a4fb0a08b6e23f512268d517e330087959dd7a78187228383189ea8
7
+ data.tar.gz: a1d3a80ff866d72dc32a0067240c32f7a24c602a736d932dba394af8b760566591ff0d2942a61243230016182a3a5fad2e635fa1095f809caf477146d0810868
data/CHANGELOG.md CHANGED
@@ -1,3 +1,25 @@
1
+ ## 0.3.2 (2022-07-10)
2
+
3
+ - Added `sqrt` method to vectors
4
+ - Improved numeric operations between scalars and vectors
5
+ - Improved performance of `tally`
6
+
7
+ ## 0.3.1 (2022-05-18)
8
+
9
+ - Added `to!` to vectors
10
+ - Fixed error with `nil` and `:float64` type
11
+ - Fixed `:header_converters` option with `read_csv` and `parse_csv`
12
+
13
+ ## 0.3.0 (2022-04-03)
14
+
15
+ - Added `deep_dup` method to data frames
16
+ - Changed `:int` to `:int64`, `:uint` to `:uint64`, and `:float` to `:float64` for type methods
17
+ - Changed missing column to raise `KeyError` instead of `ArgumentError` for aggregate methods
18
+ - Changed passing too many headers to `read_csv` and `parse_csv` to raise `ArgumentError`
19
+ - Changed empty string in CSV headers to match behavior of `nil`
20
+ - Fixed `clone` and `dup` method for vectors
21
+ - Dropped support for Ruby < 2.7
22
+
1
23
  ## 0.2.8 (2022-03-15)
2
24
 
3
25
  - Added `group` and `stacked` options to `plot`
data/README.md CHANGED
@@ -424,22 +424,22 @@ df.to_parquet
424
424
  You can specify column types when creating a data frame
425
425
 
426
426
  ```ruby
427
- Rover::DataFrame.new(data, types: {"a" => :int, "b" => :float})
427
+ Rover::DataFrame.new(data, types: {"a" => :int64, "b" => :float64})
428
428
  ```
429
429
 
430
430
  Or
431
431
 
432
432
  ```ruby
433
- Rover.read_csv("data.csv", types: {"a" => :int, "b" => :float})
433
+ Rover.read_csv("data.csv", types: {"a" => :int64, "b" => :float64})
434
434
  ```
435
435
 
436
436
  Supported types are:
437
437
 
438
- - boolean - `bool`
439
- - float - `float`, `float32`
440
- - integer - `int`, `int32`, `int16`, `int8`
441
- - unsigned integer - `uint`, `uint32`, `uint16`, `uint8`
442
- - object - `object`
438
+ - boolean - `:bool`
439
+ - float - `:float64`, `:float32`
440
+ - integer - `:int64`, `:int32`, `:int16`, `:int8`
441
+ - unsigned integer - `:uint64`, `:uint32`, `:uint16`, `:uint8`
442
+ - object - `:object`
443
443
 
444
444
  Get column types
445
445
 
@@ -456,7 +456,7 @@ df[:a].type
456
456
  Change the type of a column
457
457
 
458
458
  ```ruby
459
- df[:a] = df[:a].to(:int)
459
+ df[:a].to!(:int32)
460
460
  ```
461
461
 
462
462
  ## History
@@ -72,7 +72,7 @@ module Rover
72
72
  # multiple columns
73
73
  df = DataFrame.new
74
74
  where.each do |k|
75
- check_column(k, true)
75
+ check_column(k)
76
76
  df[k] = @vectors[k]
77
77
  end
78
78
  df
@@ -102,7 +102,7 @@ module Rover
102
102
  def []=(k, v)
103
103
  check_key(k)
104
104
  v = to_vector(v, size: size)
105
- raise ArgumentError, "Size mismatch: expected #{size}, got #{v.size}" if @vectors.any? && v.size != size
105
+ raise ArgumentError, "Size mismatch (given #{v.size}, expected #{size})" if @vectors.any? && v.size != size
106
106
  @vectors[k] = v
107
107
  end
108
108
 
@@ -242,11 +242,11 @@ module Rover
242
242
  types.each do |name, type|
243
243
  schema[name] =
244
244
  case type
245
- when :int
245
+ when :int64
246
246
  :int64
247
- when :uint
247
+ when :uint64
248
248
  :uint64
249
- when :float
249
+ when :float64
250
250
  :double
251
251
  when :float32
252
252
  :float
@@ -346,10 +346,10 @@ module Rover
346
346
  end
347
347
  end
348
348
 
349
- def dup
349
+ def deep_dup
350
350
  df = DataFrame.new
351
351
  @vectors.each do |k, v|
352
- df[k] = v
352
+ df[k] = v.dup
353
353
  end
354
354
  df
355
355
  end
@@ -503,8 +503,20 @@ module Rover
503
503
 
504
504
  private
505
505
 
506
+ # for clone
507
+ def initialize_clone(_)
508
+ @vectors = @vectors.clone
509
+ super
510
+ end
511
+
512
+ # for dup
513
+ def initialize_dup(_)
514
+ @vectors = @vectors.dup
515
+ super
516
+ end
517
+
506
518
  def check_key(key)
507
- raise ArgumentError, "Key must be a string or symbol, got #{key.inspect}" unless key.is_a?(String) || key.is_a?(Symbol)
519
+ raise ArgumentError, "Key must be a String or Symbol, given #{key.class.name}" unless key.is_a?(String) || key.is_a?(Symbol)
508
520
  end
509
521
 
510
522
  # TODO make more efficient
@@ -565,19 +577,9 @@ module Rover
565
577
  raise ArgumentError, "Missing keys: #{missing_keys.join(", ")}" if missing_keys.any?
566
578
  end
567
579
 
568
- # TODO in 0.3.0
569
- # always use did_you_mean
570
- def check_column(key, did_you_mean = false)
580
+ def check_column(key)
571
581
  unless include?(key)
572
- if did_you_mean
573
- if RUBY_VERSION.to_f >= 2.6
574
- raise KeyError.new("Missing column: #{key}", receiver: self, key: key)
575
- else
576
- raise KeyError.new("Missing column: #{key}")
577
- end
578
- else
579
- raise ArgumentError, "Missing column: #{key}"
580
- end
582
+ raise KeyError.new("Missing column: #{key}", receiver: self, key: key)
581
583
  end
582
584
  end
583
585
 
data/lib/rover/group.rb CHANGED
@@ -1,12 +1,11 @@
1
1
  module Rover
2
2
  class Group
3
- # TODO raise ArgumentError for empty columns in 0.3.0
4
3
  def initialize(df, columns)
5
4
  @df = df
6
5
  @columns = columns
6
+ check_columns
7
7
  end
8
8
 
9
- # TODO raise ArgumentError for empty columns in 0.3.0
10
9
  def group(*columns)
11
10
  Group.new(@df, @columns + columns.flatten)
12
11
  end
@@ -38,10 +37,6 @@ module Rover
38
37
  def grouped_dfs
39
38
  # cache here so we can reuse for multiple calcuations if needed
40
39
  @grouped_dfs ||= begin
41
- raise ArgumentError, "No columns given" if @columns.empty?
42
- missing_keys = @columns - @df.keys
43
- raise ArgumentError, "Missing keys: #{missing_keys.join(", ")}" if missing_keys.any?
44
-
45
40
  groups = Hash.new { |hash, key| hash[key] = [] }
46
41
  i = 0
47
42
  @df.each_row do |row|
@@ -56,5 +51,12 @@ module Rover
56
51
  result
57
52
  end
58
53
  end
54
+
55
+ def check_columns
56
+ raise ArgumentError, "No columns given" if @columns.empty?
57
+
58
+ missing_keys = @columns - @df.keys
59
+ raise ArgumentError, "Missing keys: #{missing_keys.join(", ")}" if missing_keys.any?
60
+ end
59
61
  end
60
62
  end
data/lib/rover/vector.rb CHANGED
@@ -1,23 +1,23 @@
1
1
  module Rover
2
2
  class Vector
3
3
  # if a user never specifies types,
4
- # the defaults are bool, float, int, and object
5
- # keep these simple
6
- #
7
- # we could create aliases for float64, int64, uint64
8
- # if so, type should still return the simple type
4
+ # the defaults are bool, float64, int64, and object
9
5
  TYPE_CAST_MAPPING = {
10
6
  bool: Numo::Bit,
11
7
  float32: Numo::SFloat,
12
- float: Numo::DFloat,
8
+ float64: Numo::DFloat,
13
9
  int8: Numo::Int8,
14
10
  int16: Numo::Int16,
15
11
  int32: Numo::Int32,
16
- int: Numo::Int64,
12
+ int64: Numo::Int64,
17
13
  object: Numo::RObject,
18
14
  uint8: Numo::UInt8,
19
15
  uint16: Numo::UInt16,
20
16
  uint32: Numo::UInt32,
17
+ uint64: Numo::UInt64,
18
+ # legacy - must come last
19
+ float: Numo::DFloat,
20
+ int: Numo::Int64,
21
21
  uint: Numo::UInt64
22
22
  }
23
23
 
@@ -31,7 +31,12 @@ module Rover
31
31
  end
32
32
 
33
33
  def to(type)
34
- Vector.new(self, type: type)
34
+ dup.to!(type)
35
+ end
36
+
37
+ def to!(type)
38
+ @data = cast_data(@data, type: type)
39
+ self
35
40
  end
36
41
 
37
42
  def to_numo
@@ -179,13 +184,9 @@ module Rover
179
184
  Vector.new(@data.to_a.reject(&block))
180
185
  end
181
186
 
187
+ # use Ruby tally for performance
182
188
  def tally
183
- result = Hash.new(0)
184
- @data.each do |v|
185
- result[v] += 1
186
- end
187
- result.default = nil
188
- result
189
+ @data.to_a.tally
189
190
  end
190
191
 
191
192
  def sort
@@ -196,6 +197,16 @@ module Rover
196
197
  Vector.new(@data.abs)
197
198
  end
198
199
 
200
+ def sqrt
201
+ data =
202
+ if @data.is_a?(Numo::SFloat)
203
+ Numo::SFloat::Math.sqrt(@data)
204
+ else
205
+ Numo::DFloat::Math.sqrt(@data)
206
+ end
207
+ Vector.new(data)
208
+ end
209
+
199
210
  def each(&block)
200
211
  @data.each(&block)
201
212
  end
@@ -333,6 +344,26 @@ module Rover
333
344
 
334
345
  private
335
346
 
347
+ # for clone
348
+ def initialize_clone(_)
349
+ @data = @data.clone
350
+ super
351
+ end
352
+
353
+ # for dup
354
+ def initialize_dup(_)
355
+ @data = @data.dup
356
+ super
357
+ end
358
+
359
+ def coerce(other)
360
+ if other.is_a?(Numeric)
361
+ [Vector.new([other]), self]
362
+ else
363
+ raise TypeError, "#{self.class} can't be coerced into #{other.class}"
364
+ end
365
+ end
366
+
336
367
  def cast_data(data, type: nil)
337
368
  numo_type = numo_type(type) if type
338
369
 
@@ -359,7 +390,7 @@ module Rover
359
390
  data = data.to_a
360
391
 
361
392
  if type
362
- data = data.map { |v| v || Float::NAN } if [:float, :float32].include?(type)
393
+ data = data.map { |v| v || Float::NAN } if [:float, :float32, :float64].include?(type)
363
394
  data = numo_type.cast(data)
364
395
  else
365
396
  data =
data/lib/rover/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Rover
2
- VERSION = "0.2.8"
2
+ VERSION = "0.3.2"
3
3
  end
data/lib/rover.rb CHANGED
@@ -40,12 +40,12 @@ module Rover
40
40
 
41
41
  raise ArgumentError, "Must specify headers" if headers == false
42
42
 
43
- # TODO use date converter
43
+ # TODO use date converter in 0.4.0 - need to test performance
44
44
  table = yield({converters: :numeric}.merge(csv_options))
45
45
 
46
46
  headers = nil if headers == true
47
- if headers && table.first && headers.size < table.first.size
48
- raise ArgumentError, "Expected #{table.first.size} headers, got #{headers.size}"
47
+ if headers && table.first && headers.size != table.first.size
48
+ raise ArgumentError, "Expected #{table.first.size} headers, given #{headers.size}"
49
49
  end
50
50
 
51
51
  table_headers = (headers || table.shift || []).dup
@@ -55,13 +55,18 @@ module Rover
55
55
  table_headers << nil
56
56
  end
57
57
  end
58
+ # TODO handle date converters
59
+ table_headers = table_headers.map! { |v| v.nil? ? nil : v.to_s }
60
+
61
+ if csv_options[:header_converters]
62
+ table_headers = CSV.parse(CSV.generate_line(table_headers), headers: true, header_converters: csv_options[:header_converters]).headers
63
+ end
58
64
 
59
65
  data = {}
60
66
  keys = table_headers.map { |k| [k, true] }.to_h
61
67
  unnamed_suffix = 1
62
68
  table_headers.each_with_index do |k, i|
63
- # TODO do same for empty string in 0.3.0
64
- if k.nil?
69
+ if k.nil? || k.empty?
65
70
  k = "unnamed"
66
71
  while keys.include?(k)
67
72
  unnamed_suffix += 1
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rover-df
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.8
4
+ version: 0.3.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-03-15 00:00:00.000000000 Z
11
+ date: 2022-07-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: numo-narray
@@ -51,7 +51,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
51
51
  requirements:
52
52
  - - ">="
53
53
  - !ruby/object:Gem::Version
54
- version: '2.4'
54
+ version: '2.7'
55
55
  required_rubygems_version: !ruby/object:Gem::Requirement
56
56
  requirements:
57
57
  - - ">="