rover-df 0.4.1 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 843e2b3015dd7ad6a9d6a0bb604876a18f3a2f29c37359b50c86087bce64b257
4
- data.tar.gz: 4317f3694ee5fe5c4f0a1dd1dcd55aefa51575ad141779e619b80b4f65790057
3
+ metadata.gz: 1b269feff9f961b17f921d168f240e24cea8e02a75438cdfc3e0aff5398d78f4
4
+ data.tar.gz: dcd83d0a8ebe2ed3b13c1fdfca8c1a078d3a2b12df66a1c59ebfecf7030ee443
5
5
  SHA512:
6
- metadata.gz: 72d33c6f391854f538271090663316e991baa23d2341d227f135802836891867b0bce0096fc6756a88c3cc121a32725d7c7705ad5fe9f9519587761bd86e3d87
7
- data.tar.gz: f487cd44ed31a8a79bf23fbf06dde3b9e5f8a77bb9ef3d56b7f36e7d5df5c3ea4ac3e386907a183afa8c973448d420a827dd3354cc35ea86d4e0625287b0182e
6
+ metadata.gz: 74743dad81d9fd28a96818e909f165ec173241d04ef9daeb49b8fbc48ff02055d1946dddf33d052fe68f59c32ac678225c5eef9f1d03a035c35630fa798fda26
7
+ data.tar.gz: d1b12368ae872abfff95cfffaf209e9ccb05b121aaf1f9b23b11785cc00fa9c3915ec15905275c1c69cbcbaefe1f1cae494af648d3ef3a2c9af58b6b931152d5
data/CHANGELOG.md CHANGED
@@ -1,3 +1,17 @@
1
+ ## 1.0.0 (2026-04-04)
2
+
3
+ - Switched to `numo-narray-alt`
4
+ - Dropped support for Ruby < 3.3
5
+
6
+ ## 0.5.0 (2025-06-07)
7
+
8
+ - Strings and symbols are no longer treated as different keys
9
+ - Changed methods that return column names to always use strings
10
+ - Changed `[]` method to return vector instead of Numo array with range or array argument
11
+ - Changed `first` and `last` methods to return element instead of vector when no arguments
12
+ - Changed `round`, `ceil`, and `floor` methods to always return same type as original vector
13
+ - Dropped support for Ruby < 3.2
14
+
1
15
  ## 0.4.1 (2024-10-07)
2
16
 
3
17
  - Fixed connection leasing for Active Record 7.2+
data/LICENSE.txt CHANGED
@@ -1,4 +1,4 @@
1
- Copyright (c) 2020-2022 Andrew Kane
1
+ Copyright (c) 2020-2026 Andrew Kane
2
2
 
3
3
  MIT License
4
4
 
data/README.md CHANGED
@@ -20,12 +20,6 @@ gem "rover-df"
20
20
 
21
21
  A data frame is an in-memory table. It’s a useful data structure for data analysis and machine learning. It uses columnar storage for fast operations on columns.
22
22
 
23
- Try it out for forecasting by clicking the button below (it can take a few minutes to start):
24
-
25
- [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/ankane/ml-stack/master?filepath=Forecasting.ipynb)
26
-
27
- Use the `Run` button (or `SHIFT` + `ENTER`) to run each line.
28
-
29
23
  ## Creating Data Frames
30
24
 
31
25
  From an array
@@ -97,8 +91,6 @@ Select a column
97
91
  df[:a]
98
92
  ```
99
93
 
100
- > Note that strings and symbols are different keys, just like hashes. Creating a data frame from Active Record, a CSV, or Parquet uses strings.
101
-
102
94
  Select multiple columns
103
95
 
104
96
  ```ruby
@@ -4,7 +4,7 @@ module Rover
4
4
  data, options = process_args(args)
5
5
 
6
6
  @vectors = {}
7
- types = options[:types] || {}
7
+ types = (options[:types] || {}).transform_keys(&:to_s)
8
8
 
9
9
  if data.is_a?(DataFrame)
10
10
  data.vectors.each do |k, v|
@@ -14,7 +14,7 @@ module Rover
14
14
  data.to_h.each do |k, v|
15
15
  @vectors[k] =
16
16
  if v.respond_to?(:to_a)
17
- Vector.new(v, type: types[k])
17
+ Vector.new(v, type: types[k.to_s])
18
18
  else
19
19
  v
20
20
  end
@@ -23,7 +23,7 @@ module Rover
23
23
  # handle scalars
24
24
  size = @vectors.values.find { |v| v.is_a?(Vector) }&.size || 1
25
25
  @vectors.each_key do |k|
26
- @vectors[k] = to_vector(@vectors[k], size: size, type: types[k])
26
+ @vectors[k] = to_vector(@vectors[k], size: size, type: types[k.to_s])
27
27
  end
28
28
  elsif data.is_a?(Array)
29
29
  vectors = {}
@@ -38,12 +38,12 @@ module Rover
38
38
  end
39
39
  end
40
40
  vectors.each do |k, v|
41
- @vectors[k] = to_vector(v, type: types[k])
41
+ @vectors[k] = to_vector(v, type: types[k.to_s])
42
42
  end
43
43
  elsif defined?(ActiveRecord) && (data.is_a?(ActiveRecord::Relation) || (data.is_a?(Class) && data < ActiveRecord::Base) || data.is_a?(ActiveRecord::Result))
44
44
  result = data.is_a?(ActiveRecord::Result) ? data : data.connection_pool.with_connection { |c| c.select_all(data.all.to_sql) }
45
45
  result.columns.each_with_index do |k, i|
46
- @vectors[k] = to_vector(result.rows.map { |r| r[i] }, type: types[k])
46
+ @vectors[k] = to_vector(result.rows.map { |r| r[i] }, type: types[k.to_s])
47
47
  end
48
48
  else
49
49
  raise ArgumentError, "Cannot cast to data frame: #{data.class.name}"
@@ -54,6 +54,9 @@ module Rover
54
54
  check_key(k)
55
55
  end
56
56
 
57
+ # TODO check for duplicate keys
58
+ @vectors.transform_keys!(&:to_s)
59
+
57
60
  # check sizes
58
61
  sizes = @vectors.values.map(&:size).uniq
59
62
  if sizes.size > 1
@@ -73,12 +76,12 @@ module Rover
73
76
  df = DataFrame.new
74
77
  where.each do |k|
75
78
  check_column(k)
76
- df[k] = @vectors[k]
79
+ df[k] = @vectors[k.to_s]
77
80
  end
78
81
  df
79
82
  else
80
83
  # single column
81
- @vectors[where]
84
+ @vectors[where.to_s]
82
85
  end
83
86
  end
84
87
 
@@ -103,7 +106,7 @@ module Rover
103
106
  check_key(k)
104
107
  v = to_vector(v, size: size)
105
108
  raise ArgumentError, "Size mismatch (given #{v.size}, expected #{size})" if @vectors.any? && v.size != size
106
- @vectors[k] = v
109
+ @vectors[k.to_s] = v
107
110
  end
108
111
 
109
112
  def size
@@ -140,6 +143,7 @@ module Rover
140
143
  mapping.each_key do |k|
141
144
  check_column(k)
142
145
  end
146
+ mapping = mapping.to_h { |k, v| [k.to_s, v.to_s] }
143
147
  # use transform_keys! to preserve order
144
148
  @vectors.transform_keys! do |k|
145
149
  mapping[k] || k
@@ -148,7 +152,7 @@ module Rover
148
152
  end
149
153
 
150
154
  def delete(key)
151
- @vectors.delete(key)
155
+ @vectors.delete(key.to_s)
152
156
  end
153
157
 
154
158
  def except(*keys)
@@ -163,7 +167,7 @@ module Rover
163
167
  end
164
168
 
165
169
  def include?(key)
166
- @vectors.include?(key)
170
+ @vectors.include?(key.to_s)
167
171
  end
168
172
 
169
173
  def head(n = 5)
@@ -237,6 +241,7 @@ module Rover
237
241
 
238
242
  def to_csv
239
243
  require "csv"
244
+
240
245
  CSV.generate do |csv|
241
246
  csv << keys
242
247
  numo = vectors.values.map(&:to_numo)
@@ -287,6 +292,7 @@ module Rover
287
292
  # for IRuby
288
293
  def to_html
289
294
  require "iruby"
295
+
290
296
  if size > 7
291
297
  # pass 8 rows so maxrows is applied
292
298
  IRuby::HTML.table((self[0..4] + self[-4..-1]).to_h, maxrows: 7)
@@ -347,7 +353,7 @@ module Rover
347
353
  end
348
354
 
349
355
  def group(*columns)
350
- Group.new(self, columns.flatten)
356
+ Group.new(self, columns.flatten.map(&:to_s))
351
357
  end
352
358
 
353
359
  [:max, :min, :median, :mean, :percentile, :sum, :std, :var].each do |name|
@@ -549,6 +555,9 @@ module Rover
549
555
  check_join_keys(self, self_on)
550
556
  check_join_keys(other, other_on)
551
557
 
558
+ self_on.map!(&:to_s)
559
+ other_on.map!(&:to_s)
560
+
552
561
  indexed = other.to_a.group_by { |r| r.values_at(*other_on) }
553
562
  indexed.default = []
554
563
 
data/lib/rover/group.rb CHANGED
@@ -35,7 +35,7 @@ module Rover
35
35
 
36
36
  # TODO make more efficient
37
37
  def grouped_dfs
38
- # cache here so we can reuse for multiple calcuations if needed
38
+ # cache here so we can reuse for multiple calculations if needed
39
39
  @grouped_dfs ||= begin
40
40
  groups = Hash.new { |hash, key| hash[key] = [] }
41
41
  i = 0
data/lib/rover/vector.rb CHANGED
@@ -88,9 +88,10 @@ module Rover
88
88
  def [](v)
89
89
  if v.is_a?(Vector)
90
90
  Vector.new(v.to_numo.mask(@data))
91
- else
92
- # TODO return vector unless v is an integer in 0.4.0
91
+ elsif v.is_a?(Numeric)
93
92
  @data[v]
93
+ else
94
+ Vector.new(@data[v])
94
95
  end
95
96
  end
96
97
 
@@ -171,6 +172,7 @@ module Rover
171
172
  def map(&block)
172
173
  # convert to Ruby first to cast properly
173
174
  # https://github.com/ruby-numo/numo-narray/issues/181
175
+ # numo-narray-alt has same behavior
174
176
  Vector.new(@data.to_a.map(&block))
175
177
  end
176
178
 
@@ -204,8 +206,7 @@ module Rover
204
206
  if ndigits == 0
205
207
  Vector.new(@data.round)
206
208
  else
207
- # TODO pass type
208
- Vector.new(@data.to_a.map { |v| v.round(ndigits) })
209
+ Vector.new(@data.map { |v| v.round(ndigits) })
209
210
  end
210
211
  end
211
212
 
@@ -213,8 +214,7 @@ module Rover
213
214
  if ndigits == 0
214
215
  Vector.new(@data.ceil)
215
216
  else
216
- # TODO pass type
217
- Vector.new(@data.to_a.map { |v| v.ceil(ndigits) })
217
+ Vector.new(@data.map { |v| v.ceil(ndigits) })
218
218
  end
219
219
  end
220
220
 
@@ -222,8 +222,7 @@ module Rover
222
222
  if ndigits == 0
223
223
  Vector.new(@data.floor)
224
224
  else
225
- # TODO pass type
226
- Vector.new(@data.to_a.map { |v| v.floor(ndigits) })
225
+ Vector.new(@data.map { |v| v.floor(ndigits) })
227
226
  end
228
227
  end
229
228
 
@@ -278,14 +277,13 @@ module Rover
278
277
  end
279
278
 
280
279
  def mean
281
- # currently only floats have mean in Numo
282
- # https://github.com/ruby-numo/numo-narray/issues/79
283
- @data.cast_to(Numo::DFloat).mean
280
+ @data.mean
284
281
  end
285
282
 
286
283
  def median
287
284
  # need to cast to get correct result
288
285
  # https://github.com/ruby-numo/numo-narray/issues/165
286
+ # numo-narray-alt has same behavior
289
287
  @data.cast_to(Numo::DFloat).median
290
288
  end
291
289
 
@@ -327,18 +325,20 @@ module Rover
327
325
  to_a.zip(other.to_a, &block)
328
326
  end
329
327
 
330
- # TODO return element instead of vector if no argument in 0.4.0
331
- def first(n = 1)
332
- if n >= size
328
+ def first(n = NOT_SET)
329
+ if n == NOT_SET
330
+ @data[0]
331
+ elsif n >= size
333
332
  Vector.new(@data)
334
333
  else
335
334
  Vector.new(@data[0...n])
336
335
  end
337
336
  end
338
337
 
339
- # TODO return element instead of vector if no argument in 0.4.0
340
- def last(n = 1)
341
- if n >= size
338
+ def last(n = NOT_SET)
339
+ if n == NOT_SET
340
+ @data[-1]
341
+ elsif n >= size
342
342
  Vector.new(@data)
343
343
  else
344
344
  Vector.new(@data[-n..-1])
@@ -398,6 +398,7 @@ module Rover
398
398
  # for IRuby
399
399
  def to_html
400
400
  require "iruby"
401
+
401
402
  if size > 7
402
403
  # pass 8 rows so maxrows is applied
403
404
  IRuby::HTML.table(first(4).to_a + last(4).to_a, maxrows: 7)
data/lib/rover/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Rover
2
- VERSION = "0.4.1"
2
+ VERSION = "1.0.0"
3
3
  end
data/lib/rover.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # dependencies
2
- require "numo/narray"
2
+ require "numo/narray/alt"
3
3
 
4
4
  # modules
5
5
  require_relative "rover/data_frame"
@@ -40,7 +40,7 @@ module Rover
40
40
 
41
41
  raise ArgumentError, "Must specify headers" if headers == false
42
42
 
43
- # TODO use date converter in 0.4.0 - need to test performance
43
+ # TODO use date converter? need to test performance
44
44
  table = yield({converters: :numeric}.merge(csv_options))
45
45
 
46
46
  headers = nil if headers == true
@@ -112,6 +112,7 @@ module Rover
112
112
  table = yield
113
113
  data = {}
114
114
  types ||= {}
115
+ types = types.transform_keys(&:to_s)
115
116
  table.each_column do |column|
116
117
  k = column.field.name
117
118
  if types[k]
metadata CHANGED
@@ -1,30 +1,28 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rover-df
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.1
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
- autorequire:
9
8
  bindir: bin
10
9
  cert_chain: []
11
- date: 2024-10-08 00:00:00.000000000 Z
10
+ date: 1980-01-02 00:00:00.000000000 Z
12
11
  dependencies:
13
12
  - !ruby/object:Gem::Dependency
14
- name: numo-narray
13
+ name: numo-narray-alt
15
14
  requirement: !ruby/object:Gem::Requirement
16
15
  requirements:
17
16
  - - ">="
18
17
  - !ruby/object:Gem::Version
19
- version: 0.9.1.9
18
+ version: '0.10'
20
19
  type: :runtime
21
20
  prerelease: false
22
21
  version_requirements: !ruby/object:Gem::Requirement
23
22
  requirements:
24
23
  - - ">="
25
24
  - !ruby/object:Gem::Version
26
- version: 0.9.1.9
27
- description:
25
+ version: '0.10'
28
26
  email: andrew@ankane.org
29
27
  executables: []
30
28
  extensions: []
@@ -43,7 +41,6 @@ homepage: https://github.com/ankane/rover
43
41
  licenses:
44
42
  - MIT
45
43
  metadata: {}
46
- post_install_message:
47
44
  rdoc_options: []
48
45
  require_paths:
49
46
  - lib
@@ -51,15 +48,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
51
48
  requirements:
52
49
  - - ">="
53
50
  - !ruby/object:Gem::Version
54
- version: '3.1'
51
+ version: '3.3'
55
52
  required_rubygems_version: !ruby/object:Gem::Requirement
56
53
  requirements:
57
54
  - - ">="
58
55
  - !ruby/object:Gem::Version
59
56
  version: '0'
60
57
  requirements: []
61
- rubygems_version: 3.5.16
62
- signing_key:
58
+ rubygems_version: 4.0.6
63
59
  specification_version: 4
64
60
  summary: Simple, powerful data frames for Ruby
65
61
  test_files: []