rover-df 0.2.8 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +22 -0
- data/README.md +8 -8
- data/lib/rover/data_frame.rb +22 -20
- data/lib/rover/group.rb +8 -6
- data/lib/rover/vector.rb +46 -15
- data/lib/rover/version.rb +1 -1
- data/lib/rover.rb +10 -5
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 44146e3081c968813848026d2a7f785527a0bb55af0a1978a8087d0dcc7a568a
|
4
|
+
data.tar.gz: db64041059937e131d27799739506ad27a78d19be3bdd90d299bcee855b54755
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f66190d43258016bc54da2ee42078087784e2c245095fceced4f617b4343130c8e324fd07a4fb0a08b6e23f512268d517e330087959dd7a78187228383189ea8
|
7
|
+
data.tar.gz: a1d3a80ff866d72dc32a0067240c32f7a24c602a736d932dba394af8b760566591ff0d2942a61243230016182a3a5fad2e635fa1095f809caf477146d0810868
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,25 @@
|
|
1
|
+
## 0.3.2 (2022-07-10)
|
2
|
+
|
3
|
+
- Added `sqrt` method to vectors
|
4
|
+
- Improved numeric operations between scalars and vectors
|
5
|
+
- Improved performance of `tally`
|
6
|
+
|
7
|
+
## 0.3.1 (2022-05-18)
|
8
|
+
|
9
|
+
- Added `to!` to vectors
|
10
|
+
- Fixed error with `nil` and `:float64` type
|
11
|
+
- Fixed `:header_converters` option with `read_csv` and `parse_csv`
|
12
|
+
|
13
|
+
## 0.3.0 (2022-04-03)
|
14
|
+
|
15
|
+
- Added `deep_dup` method to data frames
|
16
|
+
- Changed `:int` to `:int64`, `:uint` to `:uint64`, and `:float` to `:float64` for type methods
|
17
|
+
- Changed missing column to raise `KeyError` instead of `ArgumentError` for aggregate methods
|
18
|
+
- Changed passing too many headers to `read_csv` and `parse_csv` to raise `ArgumentError`
|
19
|
+
- Changed empty string in CSV headers to match behavior of `nil`
|
20
|
+
- Fixed `clone` and `dup` method for vectors
|
21
|
+
- Dropped support for Ruby < 2.7
|
22
|
+
|
1
23
|
## 0.2.8 (2022-03-15)
|
2
24
|
|
3
25
|
- Added `group` and `stacked` options to `plot`
|
data/README.md
CHANGED
@@ -424,22 +424,22 @@ df.to_parquet
|
|
424
424
|
You can specify column types when creating a data frame
|
425
425
|
|
426
426
|
```ruby
|
427
|
-
Rover::DataFrame.new(data, types: {"a" => :
|
427
|
+
Rover::DataFrame.new(data, types: {"a" => :int64, "b" => :float64})
|
428
428
|
```
|
429
429
|
|
430
430
|
Or
|
431
431
|
|
432
432
|
```ruby
|
433
|
-
Rover.read_csv("data.csv", types: {"a" => :
|
433
|
+
Rover.read_csv("data.csv", types: {"a" => :int64, "b" => :float64})
|
434
434
|
```
|
435
435
|
|
436
436
|
Supported types are:
|
437
437
|
|
438
|
-
- boolean -
|
439
|
-
- float -
|
440
|
-
- integer -
|
441
|
-
- unsigned integer -
|
442
|
-
- object -
|
438
|
+
- boolean - `:bool`
|
439
|
+
- float - `:float64`, `:float32`
|
440
|
+
- integer - `:int64`, `:int32`, `:int16`, `:int8`
|
441
|
+
- unsigned integer - `:uint64`, `:uint32`, `:uint16`, `:uint8`
|
442
|
+
- object - `:object`
|
443
443
|
|
444
444
|
Get column types
|
445
445
|
|
@@ -456,7 +456,7 @@ df[:a].type
|
|
456
456
|
Change the type of a column
|
457
457
|
|
458
458
|
```ruby
|
459
|
-
df[:a]
|
459
|
+
df[:a].to!(:int32)
|
460
460
|
```
|
461
461
|
|
462
462
|
## History
|
data/lib/rover/data_frame.rb
CHANGED
@@ -72,7 +72,7 @@ module Rover
|
|
72
72
|
# multiple columns
|
73
73
|
df = DataFrame.new
|
74
74
|
where.each do |k|
|
75
|
-
check_column(k
|
75
|
+
check_column(k)
|
76
76
|
df[k] = @vectors[k]
|
77
77
|
end
|
78
78
|
df
|
@@ -102,7 +102,7 @@ module Rover
|
|
102
102
|
def []=(k, v)
|
103
103
|
check_key(k)
|
104
104
|
v = to_vector(v, size: size)
|
105
|
-
raise ArgumentError, "Size mismatch
|
105
|
+
raise ArgumentError, "Size mismatch (given #{v.size}, expected #{size})" if @vectors.any? && v.size != size
|
106
106
|
@vectors[k] = v
|
107
107
|
end
|
108
108
|
|
@@ -242,11 +242,11 @@ module Rover
|
|
242
242
|
types.each do |name, type|
|
243
243
|
schema[name] =
|
244
244
|
case type
|
245
|
-
when :
|
245
|
+
when :int64
|
246
246
|
:int64
|
247
|
-
when :
|
247
|
+
when :uint64
|
248
248
|
:uint64
|
249
|
-
when :
|
249
|
+
when :float64
|
250
250
|
:double
|
251
251
|
when :float32
|
252
252
|
:float
|
@@ -346,10 +346,10 @@ module Rover
|
|
346
346
|
end
|
347
347
|
end
|
348
348
|
|
349
|
-
def
|
349
|
+
def deep_dup
|
350
350
|
df = DataFrame.new
|
351
351
|
@vectors.each do |k, v|
|
352
|
-
df[k] = v
|
352
|
+
df[k] = v.dup
|
353
353
|
end
|
354
354
|
df
|
355
355
|
end
|
@@ -503,8 +503,20 @@ module Rover
|
|
503
503
|
|
504
504
|
private
|
505
505
|
|
506
|
+
# for clone
|
507
|
+
def initialize_clone(_)
|
508
|
+
@vectors = @vectors.clone
|
509
|
+
super
|
510
|
+
end
|
511
|
+
|
512
|
+
# for dup
|
513
|
+
def initialize_dup(_)
|
514
|
+
@vectors = @vectors.dup
|
515
|
+
super
|
516
|
+
end
|
517
|
+
|
506
518
|
def check_key(key)
|
507
|
-
raise ArgumentError, "Key must be a
|
519
|
+
raise ArgumentError, "Key must be a String or Symbol, given #{key.class.name}" unless key.is_a?(String) || key.is_a?(Symbol)
|
508
520
|
end
|
509
521
|
|
510
522
|
# TODO make more efficient
|
@@ -565,19 +577,9 @@ module Rover
|
|
565
577
|
raise ArgumentError, "Missing keys: #{missing_keys.join(", ")}" if missing_keys.any?
|
566
578
|
end
|
567
579
|
|
568
|
-
|
569
|
-
# always use did_you_mean
|
570
|
-
def check_column(key, did_you_mean = false)
|
580
|
+
def check_column(key)
|
571
581
|
unless include?(key)
|
572
|
-
|
573
|
-
if RUBY_VERSION.to_f >= 2.6
|
574
|
-
raise KeyError.new("Missing column: #{key}", receiver: self, key: key)
|
575
|
-
else
|
576
|
-
raise KeyError.new("Missing column: #{key}")
|
577
|
-
end
|
578
|
-
else
|
579
|
-
raise ArgumentError, "Missing column: #{key}"
|
580
|
-
end
|
582
|
+
raise KeyError.new("Missing column: #{key}", receiver: self, key: key)
|
581
583
|
end
|
582
584
|
end
|
583
585
|
|
data/lib/rover/group.rb
CHANGED
@@ -1,12 +1,11 @@
|
|
1
1
|
module Rover
|
2
2
|
class Group
|
3
|
-
# TODO raise ArgumentError for empty columns in 0.3.0
|
4
3
|
def initialize(df, columns)
|
5
4
|
@df = df
|
6
5
|
@columns = columns
|
6
|
+
check_columns
|
7
7
|
end
|
8
8
|
|
9
|
-
# TODO raise ArgumentError for empty columns in 0.3.0
|
10
9
|
def group(*columns)
|
11
10
|
Group.new(@df, @columns + columns.flatten)
|
12
11
|
end
|
@@ -38,10 +37,6 @@ module Rover
|
|
38
37
|
def grouped_dfs
|
39
38
|
# cache here so we can reuse for multiple calcuations if needed
|
40
39
|
@grouped_dfs ||= begin
|
41
|
-
raise ArgumentError, "No columns given" if @columns.empty?
|
42
|
-
missing_keys = @columns - @df.keys
|
43
|
-
raise ArgumentError, "Missing keys: #{missing_keys.join(", ")}" if missing_keys.any?
|
44
|
-
|
45
40
|
groups = Hash.new { |hash, key| hash[key] = [] }
|
46
41
|
i = 0
|
47
42
|
@df.each_row do |row|
|
@@ -56,5 +51,12 @@ module Rover
|
|
56
51
|
result
|
57
52
|
end
|
58
53
|
end
|
54
|
+
|
55
|
+
def check_columns
|
56
|
+
raise ArgumentError, "No columns given" if @columns.empty?
|
57
|
+
|
58
|
+
missing_keys = @columns - @df.keys
|
59
|
+
raise ArgumentError, "Missing keys: #{missing_keys.join(", ")}" if missing_keys.any?
|
60
|
+
end
|
59
61
|
end
|
60
62
|
end
|
data/lib/rover/vector.rb
CHANGED
@@ -1,23 +1,23 @@
|
|
1
1
|
module Rover
|
2
2
|
class Vector
|
3
3
|
# if a user never specifies types,
|
4
|
-
# the defaults are bool,
|
5
|
-
# keep these simple
|
6
|
-
#
|
7
|
-
# we could create aliases for float64, int64, uint64
|
8
|
-
# if so, type should still return the simple type
|
4
|
+
# the defaults are bool, float64, int64, and object
|
9
5
|
TYPE_CAST_MAPPING = {
|
10
6
|
bool: Numo::Bit,
|
11
7
|
float32: Numo::SFloat,
|
12
|
-
|
8
|
+
float64: Numo::DFloat,
|
13
9
|
int8: Numo::Int8,
|
14
10
|
int16: Numo::Int16,
|
15
11
|
int32: Numo::Int32,
|
16
|
-
|
12
|
+
int64: Numo::Int64,
|
17
13
|
object: Numo::RObject,
|
18
14
|
uint8: Numo::UInt8,
|
19
15
|
uint16: Numo::UInt16,
|
20
16
|
uint32: Numo::UInt32,
|
17
|
+
uint64: Numo::UInt64,
|
18
|
+
# legacy - must come last
|
19
|
+
float: Numo::DFloat,
|
20
|
+
int: Numo::Int64,
|
21
21
|
uint: Numo::UInt64
|
22
22
|
}
|
23
23
|
|
@@ -31,7 +31,12 @@ module Rover
|
|
31
31
|
end
|
32
32
|
|
33
33
|
def to(type)
|
34
|
-
|
34
|
+
dup.to!(type)
|
35
|
+
end
|
36
|
+
|
37
|
+
def to!(type)
|
38
|
+
@data = cast_data(@data, type: type)
|
39
|
+
self
|
35
40
|
end
|
36
41
|
|
37
42
|
def to_numo
|
@@ -179,13 +184,9 @@ module Rover
|
|
179
184
|
Vector.new(@data.to_a.reject(&block))
|
180
185
|
end
|
181
186
|
|
187
|
+
# use Ruby tally for performance
|
182
188
|
def tally
|
183
|
-
|
184
|
-
@data.each do |v|
|
185
|
-
result[v] += 1
|
186
|
-
end
|
187
|
-
result.default = nil
|
188
|
-
result
|
189
|
+
@data.to_a.tally
|
189
190
|
end
|
190
191
|
|
191
192
|
def sort
|
@@ -196,6 +197,16 @@ module Rover
|
|
196
197
|
Vector.new(@data.abs)
|
197
198
|
end
|
198
199
|
|
200
|
+
def sqrt
|
201
|
+
data =
|
202
|
+
if @data.is_a?(Numo::SFloat)
|
203
|
+
Numo::SFloat::Math.sqrt(@data)
|
204
|
+
else
|
205
|
+
Numo::DFloat::Math.sqrt(@data)
|
206
|
+
end
|
207
|
+
Vector.new(data)
|
208
|
+
end
|
209
|
+
|
199
210
|
def each(&block)
|
200
211
|
@data.each(&block)
|
201
212
|
end
|
@@ -333,6 +344,26 @@ module Rover
|
|
333
344
|
|
334
345
|
private
|
335
346
|
|
347
|
+
# for clone
|
348
|
+
def initialize_clone(_)
|
349
|
+
@data = @data.clone
|
350
|
+
super
|
351
|
+
end
|
352
|
+
|
353
|
+
# for dup
|
354
|
+
def initialize_dup(_)
|
355
|
+
@data = @data.dup
|
356
|
+
super
|
357
|
+
end
|
358
|
+
|
359
|
+
def coerce(other)
|
360
|
+
if other.is_a?(Numeric)
|
361
|
+
[Vector.new([other]), self]
|
362
|
+
else
|
363
|
+
raise TypeError, "#{self.class} can't be coerced into #{other.class}"
|
364
|
+
end
|
365
|
+
end
|
366
|
+
|
336
367
|
def cast_data(data, type: nil)
|
337
368
|
numo_type = numo_type(type) if type
|
338
369
|
|
@@ -359,7 +390,7 @@ module Rover
|
|
359
390
|
data = data.to_a
|
360
391
|
|
361
392
|
if type
|
362
|
-
data = data.map { |v| v || Float::NAN } if [:float, :float32].include?(type)
|
393
|
+
data = data.map { |v| v || Float::NAN } if [:float, :float32, :float64].include?(type)
|
363
394
|
data = numo_type.cast(data)
|
364
395
|
else
|
365
396
|
data =
|
data/lib/rover/version.rb
CHANGED
data/lib/rover.rb
CHANGED
@@ -40,12 +40,12 @@ module Rover
|
|
40
40
|
|
41
41
|
raise ArgumentError, "Must specify headers" if headers == false
|
42
42
|
|
43
|
-
# TODO use date converter
|
43
|
+
# TODO use date converter in 0.4.0 - need to test performance
|
44
44
|
table = yield({converters: :numeric}.merge(csv_options))
|
45
45
|
|
46
46
|
headers = nil if headers == true
|
47
|
-
if headers && table.first && headers.size
|
48
|
-
raise ArgumentError, "Expected #{table.first.size} headers,
|
47
|
+
if headers && table.first && headers.size != table.first.size
|
48
|
+
raise ArgumentError, "Expected #{table.first.size} headers, given #{headers.size}"
|
49
49
|
end
|
50
50
|
|
51
51
|
table_headers = (headers || table.shift || []).dup
|
@@ -55,13 +55,18 @@ module Rover
|
|
55
55
|
table_headers << nil
|
56
56
|
end
|
57
57
|
end
|
58
|
+
# TODO handle date converters
|
59
|
+
table_headers = table_headers.map! { |v| v.nil? ? nil : v.to_s }
|
60
|
+
|
61
|
+
if csv_options[:header_converters]
|
62
|
+
table_headers = CSV.parse(CSV.generate_line(table_headers), headers: true, header_converters: csv_options[:header_converters]).headers
|
63
|
+
end
|
58
64
|
|
59
65
|
data = {}
|
60
66
|
keys = table_headers.map { |k| [k, true] }.to_h
|
61
67
|
unnamed_suffix = 1
|
62
68
|
table_headers.each_with_index do |k, i|
|
63
|
-
|
64
|
-
if k.nil?
|
69
|
+
if k.nil? || k.empty?
|
65
70
|
k = "unnamed"
|
66
71
|
while keys.include?(k)
|
67
72
|
unnamed_suffix += 1
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rover-df
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2
|
4
|
+
version: 0.3.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-07-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: numo-narray
|
@@ -51,7 +51,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
51
51
|
requirements:
|
52
52
|
- - ">="
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: '2.
|
54
|
+
version: '2.7'
|
55
55
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
56
56
|
requirements:
|
57
57
|
- - ">="
|