rover-df 0.4.1 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +14 -0
- data/LICENSE.txt +1 -1
- data/README.md +0 -8
- data/lib/rover/data_frame.rb +20 -11
- data/lib/rover/group.rb +1 -1
- data/lib/rover/vector.rb +18 -17
- data/lib/rover/version.rb +1 -1
- data/lib/rover.rb +3 -2
- metadata +7 -11
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 1b269feff9f961b17f921d168f240e24cea8e02a75438cdfc3e0aff5398d78f4
|
|
4
|
+
data.tar.gz: dcd83d0a8ebe2ed3b13c1fdfca8c1a078d3a2b12df66a1c59ebfecf7030ee443
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 74743dad81d9fd28a96818e909f165ec173241d04ef9daeb49b8fbc48ff02055d1946dddf33d052fe68f59c32ac678225c5eef9f1d03a035c35630fa798fda26
|
|
7
|
+
data.tar.gz: d1b12368ae872abfff95cfffaf209e9ccb05b121aaf1f9b23b11785cc00fa9c3915ec15905275c1c69cbcbaefe1f1cae494af648d3ef3a2c9af58b6b931152d5
|
data/CHANGELOG.md
CHANGED
|
@@ -1,3 +1,17 @@
|
|
|
1
|
+
## 1.0.0 (2026-04-04)
|
|
2
|
+
|
|
3
|
+
- Switched to `numo-narray-alt`
|
|
4
|
+
- Dropped support for Ruby < 3.3
|
|
5
|
+
|
|
6
|
+
## 0.5.0 (2025-06-07)
|
|
7
|
+
|
|
8
|
+
- Strings and symbols are no longer treated as different keys
|
|
9
|
+
- Changed methods that return column names to always use strings
|
|
10
|
+
- Changed `[]` method to return vector instead of Numo array with range or array argument
|
|
11
|
+
- Changed `first` and `last` methods to return element instead of vector when no arguments
|
|
12
|
+
- Changed `round`, `ceil`, and `floor` methods to always return same type as original vector
|
|
13
|
+
- Dropped support for Ruby < 3.2
|
|
14
|
+
|
|
1
15
|
## 0.4.1 (2024-10-07)
|
|
2
16
|
|
|
3
17
|
- Fixed connection leasing for Active Record 7.2+
|
data/LICENSE.txt
CHANGED
data/README.md
CHANGED
|
@@ -20,12 +20,6 @@ gem "rover-df"
|
|
|
20
20
|
|
|
21
21
|
A data frame is an in-memory table. It’s a useful data structure for data analysis and machine learning. It uses columnar storage for fast operations on columns.
|
|
22
22
|
|
|
23
|
-
Try it out for forecasting by clicking the button below (it can take a few minutes to start):
|
|
24
|
-
|
|
25
|
-
[](https://mybinder.org/v2/gh/ankane/ml-stack/master?filepath=Forecasting.ipynb)
|
|
26
|
-
|
|
27
|
-
Use the `Run` button (or `SHIFT` + `ENTER`) to run each line.
|
|
28
|
-
|
|
29
23
|
## Creating Data Frames
|
|
30
24
|
|
|
31
25
|
From an array
|
|
@@ -97,8 +91,6 @@ Select a column
|
|
|
97
91
|
df[:a]
|
|
98
92
|
```
|
|
99
93
|
|
|
100
|
-
> Note that strings and symbols are different keys, just like hashes. Creating a data frame from Active Record, a CSV, or Parquet uses strings.
|
|
101
|
-
|
|
102
94
|
Select multiple columns
|
|
103
95
|
|
|
104
96
|
```ruby
|
data/lib/rover/data_frame.rb
CHANGED
|
@@ -4,7 +4,7 @@ module Rover
|
|
|
4
4
|
data, options = process_args(args)
|
|
5
5
|
|
|
6
6
|
@vectors = {}
|
|
7
|
-
types = options[:types] || {}
|
|
7
|
+
types = (options[:types] || {}).transform_keys(&:to_s)
|
|
8
8
|
|
|
9
9
|
if data.is_a?(DataFrame)
|
|
10
10
|
data.vectors.each do |k, v|
|
|
@@ -14,7 +14,7 @@ module Rover
|
|
|
14
14
|
data.to_h.each do |k, v|
|
|
15
15
|
@vectors[k] =
|
|
16
16
|
if v.respond_to?(:to_a)
|
|
17
|
-
Vector.new(v, type: types[k])
|
|
17
|
+
Vector.new(v, type: types[k.to_s])
|
|
18
18
|
else
|
|
19
19
|
v
|
|
20
20
|
end
|
|
@@ -23,7 +23,7 @@ module Rover
|
|
|
23
23
|
# handle scalars
|
|
24
24
|
size = @vectors.values.find { |v| v.is_a?(Vector) }&.size || 1
|
|
25
25
|
@vectors.each_key do |k|
|
|
26
|
-
@vectors[k] = to_vector(@vectors[k], size: size, type: types[k])
|
|
26
|
+
@vectors[k] = to_vector(@vectors[k], size: size, type: types[k.to_s])
|
|
27
27
|
end
|
|
28
28
|
elsif data.is_a?(Array)
|
|
29
29
|
vectors = {}
|
|
@@ -38,12 +38,12 @@ module Rover
|
|
|
38
38
|
end
|
|
39
39
|
end
|
|
40
40
|
vectors.each do |k, v|
|
|
41
|
-
@vectors[k] = to_vector(v, type: types[k])
|
|
41
|
+
@vectors[k] = to_vector(v, type: types[k.to_s])
|
|
42
42
|
end
|
|
43
43
|
elsif defined?(ActiveRecord) && (data.is_a?(ActiveRecord::Relation) || (data.is_a?(Class) && data < ActiveRecord::Base) || data.is_a?(ActiveRecord::Result))
|
|
44
44
|
result = data.is_a?(ActiveRecord::Result) ? data : data.connection_pool.with_connection { |c| c.select_all(data.all.to_sql) }
|
|
45
45
|
result.columns.each_with_index do |k, i|
|
|
46
|
-
@vectors[k] = to_vector(result.rows.map { |r| r[i] }, type: types[k])
|
|
46
|
+
@vectors[k] = to_vector(result.rows.map { |r| r[i] }, type: types[k.to_s])
|
|
47
47
|
end
|
|
48
48
|
else
|
|
49
49
|
raise ArgumentError, "Cannot cast to data frame: #{data.class.name}"
|
|
@@ -54,6 +54,9 @@ module Rover
|
|
|
54
54
|
check_key(k)
|
|
55
55
|
end
|
|
56
56
|
|
|
57
|
+
# TODO check for duplicate keys
|
|
58
|
+
@vectors.transform_keys!(&:to_s)
|
|
59
|
+
|
|
57
60
|
# check sizes
|
|
58
61
|
sizes = @vectors.values.map(&:size).uniq
|
|
59
62
|
if sizes.size > 1
|
|
@@ -73,12 +76,12 @@ module Rover
|
|
|
73
76
|
df = DataFrame.new
|
|
74
77
|
where.each do |k|
|
|
75
78
|
check_column(k)
|
|
76
|
-
df[k] = @vectors[k]
|
|
79
|
+
df[k] = @vectors[k.to_s]
|
|
77
80
|
end
|
|
78
81
|
df
|
|
79
82
|
else
|
|
80
83
|
# single column
|
|
81
|
-
@vectors[where]
|
|
84
|
+
@vectors[where.to_s]
|
|
82
85
|
end
|
|
83
86
|
end
|
|
84
87
|
|
|
@@ -103,7 +106,7 @@ module Rover
|
|
|
103
106
|
check_key(k)
|
|
104
107
|
v = to_vector(v, size: size)
|
|
105
108
|
raise ArgumentError, "Size mismatch (given #{v.size}, expected #{size})" if @vectors.any? && v.size != size
|
|
106
|
-
@vectors[k] = v
|
|
109
|
+
@vectors[k.to_s] = v
|
|
107
110
|
end
|
|
108
111
|
|
|
109
112
|
def size
|
|
@@ -140,6 +143,7 @@ module Rover
|
|
|
140
143
|
mapping.each_key do |k|
|
|
141
144
|
check_column(k)
|
|
142
145
|
end
|
|
146
|
+
mapping = mapping.to_h { |k, v| [k.to_s, v.to_s] }
|
|
143
147
|
# use transform_keys! to preserve order
|
|
144
148
|
@vectors.transform_keys! do |k|
|
|
145
149
|
mapping[k] || k
|
|
@@ -148,7 +152,7 @@ module Rover
|
|
|
148
152
|
end
|
|
149
153
|
|
|
150
154
|
def delete(key)
|
|
151
|
-
@vectors.delete(key)
|
|
155
|
+
@vectors.delete(key.to_s)
|
|
152
156
|
end
|
|
153
157
|
|
|
154
158
|
def except(*keys)
|
|
@@ -163,7 +167,7 @@ module Rover
|
|
|
163
167
|
end
|
|
164
168
|
|
|
165
169
|
def include?(key)
|
|
166
|
-
@vectors.include?(key)
|
|
170
|
+
@vectors.include?(key.to_s)
|
|
167
171
|
end
|
|
168
172
|
|
|
169
173
|
def head(n = 5)
|
|
@@ -237,6 +241,7 @@ module Rover
|
|
|
237
241
|
|
|
238
242
|
def to_csv
|
|
239
243
|
require "csv"
|
|
244
|
+
|
|
240
245
|
CSV.generate do |csv|
|
|
241
246
|
csv << keys
|
|
242
247
|
numo = vectors.values.map(&:to_numo)
|
|
@@ -287,6 +292,7 @@ module Rover
|
|
|
287
292
|
# for IRuby
|
|
288
293
|
def to_html
|
|
289
294
|
require "iruby"
|
|
295
|
+
|
|
290
296
|
if size > 7
|
|
291
297
|
# pass 8 rows so maxrows is applied
|
|
292
298
|
IRuby::HTML.table((self[0..4] + self[-4..-1]).to_h, maxrows: 7)
|
|
@@ -347,7 +353,7 @@ module Rover
|
|
|
347
353
|
end
|
|
348
354
|
|
|
349
355
|
def group(*columns)
|
|
350
|
-
Group.new(self, columns.flatten)
|
|
356
|
+
Group.new(self, columns.flatten.map(&:to_s))
|
|
351
357
|
end
|
|
352
358
|
|
|
353
359
|
[:max, :min, :median, :mean, :percentile, :sum, :std, :var].each do |name|
|
|
@@ -549,6 +555,9 @@ module Rover
|
|
|
549
555
|
check_join_keys(self, self_on)
|
|
550
556
|
check_join_keys(other, other_on)
|
|
551
557
|
|
|
558
|
+
self_on.map!(&:to_s)
|
|
559
|
+
other_on.map!(&:to_s)
|
|
560
|
+
|
|
552
561
|
indexed = other.to_a.group_by { |r| r.values_at(*other_on) }
|
|
553
562
|
indexed.default = []
|
|
554
563
|
|
data/lib/rover/group.rb
CHANGED
|
@@ -35,7 +35,7 @@ module Rover
|
|
|
35
35
|
|
|
36
36
|
# TODO make more efficient
|
|
37
37
|
def grouped_dfs
|
|
38
|
-
# cache here so we can reuse for multiple
|
|
38
|
+
# cache here so we can reuse for multiple calculations if needed
|
|
39
39
|
@grouped_dfs ||= begin
|
|
40
40
|
groups = Hash.new { |hash, key| hash[key] = [] }
|
|
41
41
|
i = 0
|
data/lib/rover/vector.rb
CHANGED
|
@@ -88,9 +88,10 @@ module Rover
|
|
|
88
88
|
def [](v)
|
|
89
89
|
if v.is_a?(Vector)
|
|
90
90
|
Vector.new(v.to_numo.mask(@data))
|
|
91
|
-
|
|
92
|
-
# TODO return vector unless v is an integer in 0.4.0
|
|
91
|
+
elsif v.is_a?(Numeric)
|
|
93
92
|
@data[v]
|
|
93
|
+
else
|
|
94
|
+
Vector.new(@data[v])
|
|
94
95
|
end
|
|
95
96
|
end
|
|
96
97
|
|
|
@@ -171,6 +172,7 @@ module Rover
|
|
|
171
172
|
def map(&block)
|
|
172
173
|
# convert to Ruby first to cast properly
|
|
173
174
|
# https://github.com/ruby-numo/numo-narray/issues/181
|
|
175
|
+
# numo-narray-alt has same behavior
|
|
174
176
|
Vector.new(@data.to_a.map(&block))
|
|
175
177
|
end
|
|
176
178
|
|
|
@@ -204,8 +206,7 @@ module Rover
|
|
|
204
206
|
if ndigits == 0
|
|
205
207
|
Vector.new(@data.round)
|
|
206
208
|
else
|
|
207
|
-
|
|
208
|
-
Vector.new(@data.to_a.map { |v| v.round(ndigits) })
|
|
209
|
+
Vector.new(@data.map { |v| v.round(ndigits) })
|
|
209
210
|
end
|
|
210
211
|
end
|
|
211
212
|
|
|
@@ -213,8 +214,7 @@ module Rover
|
|
|
213
214
|
if ndigits == 0
|
|
214
215
|
Vector.new(@data.ceil)
|
|
215
216
|
else
|
|
216
|
-
|
|
217
|
-
Vector.new(@data.to_a.map { |v| v.ceil(ndigits) })
|
|
217
|
+
Vector.new(@data.map { |v| v.ceil(ndigits) })
|
|
218
218
|
end
|
|
219
219
|
end
|
|
220
220
|
|
|
@@ -222,8 +222,7 @@ module Rover
|
|
|
222
222
|
if ndigits == 0
|
|
223
223
|
Vector.new(@data.floor)
|
|
224
224
|
else
|
|
225
|
-
|
|
226
|
-
Vector.new(@data.to_a.map { |v| v.floor(ndigits) })
|
|
225
|
+
Vector.new(@data.map { |v| v.floor(ndigits) })
|
|
227
226
|
end
|
|
228
227
|
end
|
|
229
228
|
|
|
@@ -278,14 +277,13 @@ module Rover
|
|
|
278
277
|
end
|
|
279
278
|
|
|
280
279
|
def mean
|
|
281
|
-
|
|
282
|
-
# https://github.com/ruby-numo/numo-narray/issues/79
|
|
283
|
-
@data.cast_to(Numo::DFloat).mean
|
|
280
|
+
@data.mean
|
|
284
281
|
end
|
|
285
282
|
|
|
286
283
|
def median
|
|
287
284
|
# need to cast to get correct result
|
|
288
285
|
# https://github.com/ruby-numo/numo-narray/issues/165
|
|
286
|
+
# numo-narray-alt has same behavior
|
|
289
287
|
@data.cast_to(Numo::DFloat).median
|
|
290
288
|
end
|
|
291
289
|
|
|
@@ -327,18 +325,20 @@ module Rover
|
|
|
327
325
|
to_a.zip(other.to_a, &block)
|
|
328
326
|
end
|
|
329
327
|
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
328
|
+
def first(n = NOT_SET)
|
|
329
|
+
if n == NOT_SET
|
|
330
|
+
@data[0]
|
|
331
|
+
elsif n >= size
|
|
333
332
|
Vector.new(@data)
|
|
334
333
|
else
|
|
335
334
|
Vector.new(@data[0...n])
|
|
336
335
|
end
|
|
337
336
|
end
|
|
338
337
|
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
338
|
+
def last(n = NOT_SET)
|
|
339
|
+
if n == NOT_SET
|
|
340
|
+
@data[-1]
|
|
341
|
+
elsif n >= size
|
|
342
342
|
Vector.new(@data)
|
|
343
343
|
else
|
|
344
344
|
Vector.new(@data[-n..-1])
|
|
@@ -398,6 +398,7 @@ module Rover
|
|
|
398
398
|
# for IRuby
|
|
399
399
|
def to_html
|
|
400
400
|
require "iruby"
|
|
401
|
+
|
|
401
402
|
if size > 7
|
|
402
403
|
# pass 8 rows so maxrows is applied
|
|
403
404
|
IRuby::HTML.table(first(4).to_a + last(4).to_a, maxrows: 7)
|
data/lib/rover/version.rb
CHANGED
data/lib/rover.rb
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
# dependencies
|
|
2
|
-
require "numo/narray"
|
|
2
|
+
require "numo/narray/alt"
|
|
3
3
|
|
|
4
4
|
# modules
|
|
5
5
|
require_relative "rover/data_frame"
|
|
@@ -40,7 +40,7 @@ module Rover
|
|
|
40
40
|
|
|
41
41
|
raise ArgumentError, "Must specify headers" if headers == false
|
|
42
42
|
|
|
43
|
-
# TODO use date converter
|
|
43
|
+
# TODO use date converter? need to test performance
|
|
44
44
|
table = yield({converters: :numeric}.merge(csv_options))
|
|
45
45
|
|
|
46
46
|
headers = nil if headers == true
|
|
@@ -112,6 +112,7 @@ module Rover
|
|
|
112
112
|
table = yield
|
|
113
113
|
data = {}
|
|
114
114
|
types ||= {}
|
|
115
|
+
types = types.transform_keys(&:to_s)
|
|
115
116
|
table.each_column do |column|
|
|
116
117
|
k = column.field.name
|
|
117
118
|
if types[k]
|
metadata
CHANGED
|
@@ -1,30 +1,28 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: rover-df
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 1.0.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Andrew Kane
|
|
8
|
-
autorequire:
|
|
9
8
|
bindir: bin
|
|
10
9
|
cert_chain: []
|
|
11
|
-
date:
|
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
12
11
|
dependencies:
|
|
13
12
|
- !ruby/object:Gem::Dependency
|
|
14
|
-
name: numo-narray
|
|
13
|
+
name: numo-narray-alt
|
|
15
14
|
requirement: !ruby/object:Gem::Requirement
|
|
16
15
|
requirements:
|
|
17
16
|
- - ">="
|
|
18
17
|
- !ruby/object:Gem::Version
|
|
19
|
-
version: 0.
|
|
18
|
+
version: '0.10'
|
|
20
19
|
type: :runtime
|
|
21
20
|
prerelease: false
|
|
22
21
|
version_requirements: !ruby/object:Gem::Requirement
|
|
23
22
|
requirements:
|
|
24
23
|
- - ">="
|
|
25
24
|
- !ruby/object:Gem::Version
|
|
26
|
-
version: 0.
|
|
27
|
-
description:
|
|
25
|
+
version: '0.10'
|
|
28
26
|
email: andrew@ankane.org
|
|
29
27
|
executables: []
|
|
30
28
|
extensions: []
|
|
@@ -43,7 +41,6 @@ homepage: https://github.com/ankane/rover
|
|
|
43
41
|
licenses:
|
|
44
42
|
- MIT
|
|
45
43
|
metadata: {}
|
|
46
|
-
post_install_message:
|
|
47
44
|
rdoc_options: []
|
|
48
45
|
require_paths:
|
|
49
46
|
- lib
|
|
@@ -51,15 +48,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
51
48
|
requirements:
|
|
52
49
|
- - ">="
|
|
53
50
|
- !ruby/object:Gem::Version
|
|
54
|
-
version: '3.
|
|
51
|
+
version: '3.3'
|
|
55
52
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
56
53
|
requirements:
|
|
57
54
|
- - ">="
|
|
58
55
|
- !ruby/object:Gem::Version
|
|
59
56
|
version: '0'
|
|
60
57
|
requirements: []
|
|
61
|
-
rubygems_version:
|
|
62
|
-
signing_key:
|
|
58
|
+
rubygems_version: 4.0.6
|
|
63
59
|
specification_version: 4
|
|
64
60
|
summary: Simple, powerful data frames for Ruby
|
|
65
61
|
test_files: []
|