rover-df 0.2.8 → 0.3.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +22 -0
- data/README.md +8 -8
- data/lib/rover/data_frame.rb +22 -20
- data/lib/rover/group.rb +8 -6
- data/lib/rover/vector.rb +46 -15
- data/lib/rover/version.rb +1 -1
- data/lib/rover.rb +10 -5
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 44146e3081c968813848026d2a7f785527a0bb55af0a1978a8087d0dcc7a568a
|
4
|
+
data.tar.gz: db64041059937e131d27799739506ad27a78d19be3bdd90d299bcee855b54755
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f66190d43258016bc54da2ee42078087784e2c245095fceced4f617b4343130c8e324fd07a4fb0a08b6e23f512268d517e330087959dd7a78187228383189ea8
|
7
|
+
data.tar.gz: a1d3a80ff866d72dc32a0067240c32f7a24c602a736d932dba394af8b760566591ff0d2942a61243230016182a3a5fad2e635fa1095f809caf477146d0810868
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,25 @@
|
|
1
|
+
## 0.3.2 (2022-07-10)
|
2
|
+
|
3
|
+
- Added `sqrt` method to vectors
|
4
|
+
- Improved numeric operations between scalars and vectors
|
5
|
+
- Improved performance of `tally`
|
6
|
+
|
7
|
+
## 0.3.1 (2022-05-18)
|
8
|
+
|
9
|
+
- Added `to!` to vectors
|
10
|
+
- Fixed error with `nil` and `:float64` type
|
11
|
+
- Fixed `:header_converters` option with `read_csv` and `parse_csv`
|
12
|
+
|
13
|
+
## 0.3.0 (2022-04-03)
|
14
|
+
|
15
|
+
- Added `deep_dup` method to data frames
|
16
|
+
- Changed `:int` to `:int64`, `:uint` to `:uint64`, and `:float` to `:float64` for type methods
|
17
|
+
- Changed missing column to raise `KeyError` instead of `ArgumentError` for aggregate methods
|
18
|
+
- Changed passing too many headers to `read_csv` and `parse_csv` to raise `ArgumentError`
|
19
|
+
- Changed empty string in CSV headers to match behavior of `nil`
|
20
|
+
- Fixed `clone` and `dup` method for vectors
|
21
|
+
- Dropped support for Ruby < 2.7
|
22
|
+
|
1
23
|
## 0.2.8 (2022-03-15)
|
2
24
|
|
3
25
|
- Added `group` and `stacked` options to `plot`
|
data/README.md
CHANGED
@@ -424,22 +424,22 @@ df.to_parquet
|
|
424
424
|
You can specify column types when creating a data frame
|
425
425
|
|
426
426
|
```ruby
|
427
|
-
Rover::DataFrame.new(data, types: {"a" => :
|
427
|
+
Rover::DataFrame.new(data, types: {"a" => :int64, "b" => :float64})
|
428
428
|
```
|
429
429
|
|
430
430
|
Or
|
431
431
|
|
432
432
|
```ruby
|
433
|
-
Rover.read_csv("data.csv", types: {"a" => :
|
433
|
+
Rover.read_csv("data.csv", types: {"a" => :int64, "b" => :float64})
|
434
434
|
```
|
435
435
|
|
436
436
|
Supported types are:
|
437
437
|
|
438
|
-
- boolean -
|
439
|
-
- float -
|
440
|
-
- integer -
|
441
|
-
- unsigned integer -
|
442
|
-
- object -
|
438
|
+
- boolean - `:bool`
|
439
|
+
- float - `:float64`, `:float32`
|
440
|
+
- integer - `:int64`, `:int32`, `:int16`, `:int8`
|
441
|
+
- unsigned integer - `:uint64`, `:uint32`, `:uint16`, `:uint8`
|
442
|
+
- object - `:object`
|
443
443
|
|
444
444
|
Get column types
|
445
445
|
|
@@ -456,7 +456,7 @@ df[:a].type
|
|
456
456
|
Change the type of a column
|
457
457
|
|
458
458
|
```ruby
|
459
|
-
df[:a]
|
459
|
+
df[:a].to!(:int32)
|
460
460
|
```
|
461
461
|
|
462
462
|
## History
|
data/lib/rover/data_frame.rb
CHANGED
@@ -72,7 +72,7 @@ module Rover
|
|
72
72
|
# multiple columns
|
73
73
|
df = DataFrame.new
|
74
74
|
where.each do |k|
|
75
|
-
check_column(k
|
75
|
+
check_column(k)
|
76
76
|
df[k] = @vectors[k]
|
77
77
|
end
|
78
78
|
df
|
@@ -102,7 +102,7 @@ module Rover
|
|
102
102
|
def []=(k, v)
|
103
103
|
check_key(k)
|
104
104
|
v = to_vector(v, size: size)
|
105
|
-
raise ArgumentError, "Size mismatch
|
105
|
+
raise ArgumentError, "Size mismatch (given #{v.size}, expected #{size})" if @vectors.any? && v.size != size
|
106
106
|
@vectors[k] = v
|
107
107
|
end
|
108
108
|
|
@@ -242,11 +242,11 @@ module Rover
|
|
242
242
|
types.each do |name, type|
|
243
243
|
schema[name] =
|
244
244
|
case type
|
245
|
-
when :
|
245
|
+
when :int64
|
246
246
|
:int64
|
247
|
-
when :
|
247
|
+
when :uint64
|
248
248
|
:uint64
|
249
|
-
when :
|
249
|
+
when :float64
|
250
250
|
:double
|
251
251
|
when :float32
|
252
252
|
:float
|
@@ -346,10 +346,10 @@ module Rover
|
|
346
346
|
end
|
347
347
|
end
|
348
348
|
|
349
|
-
def
|
349
|
+
def deep_dup
|
350
350
|
df = DataFrame.new
|
351
351
|
@vectors.each do |k, v|
|
352
|
-
df[k] = v
|
352
|
+
df[k] = v.dup
|
353
353
|
end
|
354
354
|
df
|
355
355
|
end
|
@@ -503,8 +503,20 @@ module Rover
|
|
503
503
|
|
504
504
|
private
|
505
505
|
|
506
|
+
# for clone
|
507
|
+
def initialize_clone(_)
|
508
|
+
@vectors = @vectors.clone
|
509
|
+
super
|
510
|
+
end
|
511
|
+
|
512
|
+
# for dup
|
513
|
+
def initialize_dup(_)
|
514
|
+
@vectors = @vectors.dup
|
515
|
+
super
|
516
|
+
end
|
517
|
+
|
506
518
|
def check_key(key)
|
507
|
-
raise ArgumentError, "Key must be a
|
519
|
+
raise ArgumentError, "Key must be a String or Symbol, given #{key.class.name}" unless key.is_a?(String) || key.is_a?(Symbol)
|
508
520
|
end
|
509
521
|
|
510
522
|
# TODO make more efficient
|
@@ -565,19 +577,9 @@ module Rover
|
|
565
577
|
raise ArgumentError, "Missing keys: #{missing_keys.join(", ")}" if missing_keys.any?
|
566
578
|
end
|
567
579
|
|
568
|
-
|
569
|
-
# always use did_you_mean
|
570
|
-
def check_column(key, did_you_mean = false)
|
580
|
+
def check_column(key)
|
571
581
|
unless include?(key)
|
572
|
-
|
573
|
-
if RUBY_VERSION.to_f >= 2.6
|
574
|
-
raise KeyError.new("Missing column: #{key}", receiver: self, key: key)
|
575
|
-
else
|
576
|
-
raise KeyError.new("Missing column: #{key}")
|
577
|
-
end
|
578
|
-
else
|
579
|
-
raise ArgumentError, "Missing column: #{key}"
|
580
|
-
end
|
582
|
+
raise KeyError.new("Missing column: #{key}", receiver: self, key: key)
|
581
583
|
end
|
582
584
|
end
|
583
585
|
|
data/lib/rover/group.rb
CHANGED
@@ -1,12 +1,11 @@
|
|
1
1
|
module Rover
|
2
2
|
class Group
|
3
|
-
# TODO raise ArgumentError for empty columns in 0.3.0
|
4
3
|
def initialize(df, columns)
|
5
4
|
@df = df
|
6
5
|
@columns = columns
|
6
|
+
check_columns
|
7
7
|
end
|
8
8
|
|
9
|
-
# TODO raise ArgumentError for empty columns in 0.3.0
|
10
9
|
def group(*columns)
|
11
10
|
Group.new(@df, @columns + columns.flatten)
|
12
11
|
end
|
@@ -38,10 +37,6 @@ module Rover
|
|
38
37
|
def grouped_dfs
|
39
38
|
# cache here so we can reuse for multiple calcuations if needed
|
40
39
|
@grouped_dfs ||= begin
|
41
|
-
raise ArgumentError, "No columns given" if @columns.empty?
|
42
|
-
missing_keys = @columns - @df.keys
|
43
|
-
raise ArgumentError, "Missing keys: #{missing_keys.join(", ")}" if missing_keys.any?
|
44
|
-
|
45
40
|
groups = Hash.new { |hash, key| hash[key] = [] }
|
46
41
|
i = 0
|
47
42
|
@df.each_row do |row|
|
@@ -56,5 +51,12 @@ module Rover
|
|
56
51
|
result
|
57
52
|
end
|
58
53
|
end
|
54
|
+
|
55
|
+
def check_columns
|
56
|
+
raise ArgumentError, "No columns given" if @columns.empty?
|
57
|
+
|
58
|
+
missing_keys = @columns - @df.keys
|
59
|
+
raise ArgumentError, "Missing keys: #{missing_keys.join(", ")}" if missing_keys.any?
|
60
|
+
end
|
59
61
|
end
|
60
62
|
end
|
data/lib/rover/vector.rb
CHANGED
@@ -1,23 +1,23 @@
|
|
1
1
|
module Rover
|
2
2
|
class Vector
|
3
3
|
# if a user never specifies types,
|
4
|
-
# the defaults are bool,
|
5
|
-
# keep these simple
|
6
|
-
#
|
7
|
-
# we could create aliases for float64, int64, uint64
|
8
|
-
# if so, type should still return the simple type
|
4
|
+
# the defaults are bool, float64, int64, and object
|
9
5
|
TYPE_CAST_MAPPING = {
|
10
6
|
bool: Numo::Bit,
|
11
7
|
float32: Numo::SFloat,
|
12
|
-
|
8
|
+
float64: Numo::DFloat,
|
13
9
|
int8: Numo::Int8,
|
14
10
|
int16: Numo::Int16,
|
15
11
|
int32: Numo::Int32,
|
16
|
-
|
12
|
+
int64: Numo::Int64,
|
17
13
|
object: Numo::RObject,
|
18
14
|
uint8: Numo::UInt8,
|
19
15
|
uint16: Numo::UInt16,
|
20
16
|
uint32: Numo::UInt32,
|
17
|
+
uint64: Numo::UInt64,
|
18
|
+
# legacy - must come last
|
19
|
+
float: Numo::DFloat,
|
20
|
+
int: Numo::Int64,
|
21
21
|
uint: Numo::UInt64
|
22
22
|
}
|
23
23
|
|
@@ -31,7 +31,12 @@ module Rover
|
|
31
31
|
end
|
32
32
|
|
33
33
|
def to(type)
|
34
|
-
|
34
|
+
dup.to!(type)
|
35
|
+
end
|
36
|
+
|
37
|
+
def to!(type)
|
38
|
+
@data = cast_data(@data, type: type)
|
39
|
+
self
|
35
40
|
end
|
36
41
|
|
37
42
|
def to_numo
|
@@ -179,13 +184,9 @@ module Rover
|
|
179
184
|
Vector.new(@data.to_a.reject(&block))
|
180
185
|
end
|
181
186
|
|
187
|
+
# use Ruby tally for performance
|
182
188
|
def tally
|
183
|
-
|
184
|
-
@data.each do |v|
|
185
|
-
result[v] += 1
|
186
|
-
end
|
187
|
-
result.default = nil
|
188
|
-
result
|
189
|
+
@data.to_a.tally
|
189
190
|
end
|
190
191
|
|
191
192
|
def sort
|
@@ -196,6 +197,16 @@ module Rover
|
|
196
197
|
Vector.new(@data.abs)
|
197
198
|
end
|
198
199
|
|
200
|
+
def sqrt
|
201
|
+
data =
|
202
|
+
if @data.is_a?(Numo::SFloat)
|
203
|
+
Numo::SFloat::Math.sqrt(@data)
|
204
|
+
else
|
205
|
+
Numo::DFloat::Math.sqrt(@data)
|
206
|
+
end
|
207
|
+
Vector.new(data)
|
208
|
+
end
|
209
|
+
|
199
210
|
def each(&block)
|
200
211
|
@data.each(&block)
|
201
212
|
end
|
@@ -333,6 +344,26 @@ module Rover
|
|
333
344
|
|
334
345
|
private
|
335
346
|
|
347
|
+
# for clone
|
348
|
+
def initialize_clone(_)
|
349
|
+
@data = @data.clone
|
350
|
+
super
|
351
|
+
end
|
352
|
+
|
353
|
+
# for dup
|
354
|
+
def initialize_dup(_)
|
355
|
+
@data = @data.dup
|
356
|
+
super
|
357
|
+
end
|
358
|
+
|
359
|
+
def coerce(other)
|
360
|
+
if other.is_a?(Numeric)
|
361
|
+
[Vector.new([other]), self]
|
362
|
+
else
|
363
|
+
raise TypeError, "#{self.class} can't be coerced into #{other.class}"
|
364
|
+
end
|
365
|
+
end
|
366
|
+
|
336
367
|
def cast_data(data, type: nil)
|
337
368
|
numo_type = numo_type(type) if type
|
338
369
|
|
@@ -359,7 +390,7 @@ module Rover
|
|
359
390
|
data = data.to_a
|
360
391
|
|
361
392
|
if type
|
362
|
-
data = data.map { |v| v || Float::NAN } if [:float, :float32].include?(type)
|
393
|
+
data = data.map { |v| v || Float::NAN } if [:float, :float32, :float64].include?(type)
|
363
394
|
data = numo_type.cast(data)
|
364
395
|
else
|
365
396
|
data =
|
data/lib/rover/version.rb
CHANGED
data/lib/rover.rb
CHANGED
@@ -40,12 +40,12 @@ module Rover
|
|
40
40
|
|
41
41
|
raise ArgumentError, "Must specify headers" if headers == false
|
42
42
|
|
43
|
-
# TODO use date converter
|
43
|
+
# TODO use date converter in 0.4.0 - need to test performance
|
44
44
|
table = yield({converters: :numeric}.merge(csv_options))
|
45
45
|
|
46
46
|
headers = nil if headers == true
|
47
|
-
if headers && table.first && headers.size
|
48
|
-
raise ArgumentError, "Expected #{table.first.size} headers,
|
47
|
+
if headers && table.first && headers.size != table.first.size
|
48
|
+
raise ArgumentError, "Expected #{table.first.size} headers, given #{headers.size}"
|
49
49
|
end
|
50
50
|
|
51
51
|
table_headers = (headers || table.shift || []).dup
|
@@ -55,13 +55,18 @@ module Rover
|
|
55
55
|
table_headers << nil
|
56
56
|
end
|
57
57
|
end
|
58
|
+
# TODO handle date converters
|
59
|
+
table_headers = table_headers.map! { |v| v.nil? ? nil : v.to_s }
|
60
|
+
|
61
|
+
if csv_options[:header_converters]
|
62
|
+
table_headers = CSV.parse(CSV.generate_line(table_headers), headers: true, header_converters: csv_options[:header_converters]).headers
|
63
|
+
end
|
58
64
|
|
59
65
|
data = {}
|
60
66
|
keys = table_headers.map { |k| [k, true] }.to_h
|
61
67
|
unnamed_suffix = 1
|
62
68
|
table_headers.each_with_index do |k, i|
|
63
|
-
|
64
|
-
if k.nil?
|
69
|
+
if k.nil? || k.empty?
|
65
70
|
k = "unnamed"
|
66
71
|
while keys.include?(k)
|
67
72
|
unnamed_suffix += 1
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rover-df
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2
|
4
|
+
version: 0.3.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-07-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: numo-narray
|
@@ -51,7 +51,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
51
51
|
requirements:
|
52
52
|
- - ">="
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: '2.
|
54
|
+
version: '2.7'
|
55
55
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
56
56
|
requirements:
|
57
57
|
- - ">="
|