rover-df 0.2.1 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1c750c49e4cb313565a3b52a273c4820ae406471a1007ddb8e83742f3f2640f1
4
- data.tar.gz: 2a7469abd672a87c9b4c874469baf07f42b22067ac2281c424eeb7b60841bbfe
3
+ metadata.gz: ca39a558c3c12103f03fed4cb8f007fbd00a1f8e84b839916fd0010aae4613ba
4
+ data.tar.gz: 43df8cdc415cc036ac383f30b7c91a35b644067a3cb8ea199abd7452b98298d5
5
5
  SHA512:
6
- metadata.gz: 24e7874a67e498cadb8c2b4773f00c5cf53ab76e8231b7c762b8f403c2d4783809beb44ba3b37cb50b801032c3a3a9c140b18e1a10967eb331c517b5dd5251aa
7
- data.tar.gz: b62f4bfd7591e9c4869ab13e4e6efeba5c4e121c92470bc81d517ec85045a2fdc080a7a6ca3f61a15e7c98335543fcb11893d00d374cb5a3cf1294e12ed65130
6
+ metadata.gz: 2724c7e85ee7921f277be833cf89be638c14cbb37a44411bba86c42cacffe7c0e4b82ea04d4dfb3d694c6429ba41bc8e8c10f7cb40e5d34bf59d14755858735f
7
+ data.tar.gz: fa860158decbca0a0b35ccb82e6f73d9a513c37b483eca52d140842d5dd255899a2e1ded3ec4375a492b86d3ec09ffa53d4871e05f1fdad39f3d2630215417dc
data/CHANGELOG.md CHANGED
@@ -1,3 +1,24 @@
1
+ ## 0.2.5 (2021-09-25)
2
+
3
+ - Fixed column types with joins
4
+
5
+ ## 0.2.4 (2021-06-03)
6
+
7
+ - Added grouping for `std` and `var`
8
+ - Fixed `==` for data frames
9
+ - Fixed error with `first` and `last` for data frames
10
+ - Fixed error with `last` when vector size is smaller than `n`
11
+
12
+ ## 0.2.3 (2021-02-08)
13
+
14
+ - Added `select`, `reject`, and `map!` methods to vectors
15
+
16
+ ## 0.2.2 (2021-01-01)
17
+
18
+ - Added line, pie, area, and bar charts
19
+ - Added `|` and `^` for vectors
20
+ - Fixed typecasting with `map`
21
+
1
22
  ## 0.2.1 (2020-11-23)
2
23
 
3
24
  - Added `plot` method to data frames
data/LICENSE.txt CHANGED
@@ -1,4 +1,4 @@
1
- Copyright (c) 2020 Andrew Kane
1
+ Copyright (c) 2020-2021 Andrew Kane
2
2
 
3
3
  MIT License
4
4
 
data/README.md CHANGED
@@ -20,7 +20,7 @@ gem 'rover-df'
20
20
 
21
21
  A data frame is an in-memory table. It’s a useful data structure for data analysis and machine learning. It uses columnar storage for fast operations on columns.
22
22
 
23
- Try it out for forecasting by clicking the button below:
23
+ Try it out for forecasting by clicking the button below (it can take a few minutes to start):
24
24
 
25
25
  [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/ankane/ml-stack/master?filepath=Forecasting.ipynb)
26
26
 
@@ -123,24 +123,53 @@ df[1..3]
123
123
  df[[1, 4, 5]]
124
124
  ```
125
125
 
126
+ Iterate over rows
127
+
128
+ ```ruby
129
+ df.each_row { |row| ... }
130
+ ```
131
+
132
+ Iterate over a column
133
+
134
+ ```ruby
135
+ df[:a].each { |item| ... }
136
+ # or
137
+ df[:a].each_with_index { |item, index| ... }
138
+ ```
139
+
126
140
  ## Filtering
127
141
 
128
142
  Filter on a condition
129
143
 
130
144
  ```ruby
145
+ df[df[:a] == 100]
146
+ df[df[:a] != 100]
131
147
  df[df[:a] > 100]
148
+ df[df[:a] >= 100]
149
+ df[df[:a] < 100]
150
+ df[df[:a] <= 100]
132
151
  ```
133
152
 
134
- And
153
+ In
135
154
 
136
155
  ```ruby
137
- df[df[:a] > 100 & df[:b] == "one"]
156
+ df[df[:a].in?([1, 2, 3])]
157
+ df[df[:a].in?(1..3)]
158
+ df[df[:a].in?(["a", "b", "c"])]
138
159
  ```
139
160
 
140
- Or
161
+ Not in
141
162
 
142
163
  ```ruby
143
- df[df[:a] > 100 | df[:b] == "one"]
164
+ df[!df[:a].in?([1, 2, 3])]
165
+ ```
166
+
167
+ And, or, and exclusive or
168
+
169
+ ```ruby
170
+ df[(df[:a] > 100) & (df[:b] == "one")] # and
171
+ df[(df[:a] > 100) | (df[:b] == "one")] # or
172
+ df[(df[:a] > 100) ^ (df[:b] == "one")] # xor
144
173
  ```
145
174
 
146
175
  ## Operations
@@ -166,6 +195,8 @@ df[:a].median
166
195
  df[:a].percentile(90)
167
196
  df[:a].min
168
197
  df[:a].max
198
+ df[:a].std
199
+ df[:a].var
169
200
  ```
170
201
 
171
202
  Count occurrences
@@ -200,7 +231,7 @@ Multiple groups
200
231
  df.group([:a, :b]).count
201
232
  ```
202
233
 
203
- ## Visualization [master]
234
+ ## Visualization
204
235
 
205
236
  Add [Vega](https://github.com/ankane/vega) to your application’s Gemfile:
206
237
 
@@ -214,6 +245,12 @@ And use:
214
245
  df.plot(:a, :b)
215
246
  ```
216
247
 
248
+ Specify the chart type (`line`, `pie`, `column`, `bar`, `area`, or `scatter`)
249
+
250
+ ```ruby
251
+ df.plot(:a, :b, type: "pie")
252
+ ```
253
+
217
254
  ## Updating Data
218
255
 
219
256
  Add a new column
@@ -238,6 +275,14 @@ df[:a][0..2] = 1
238
275
  df[:a][0..2] = [1, 2, 3]
239
276
  ```
240
277
 
278
+ Update all elements
279
+
280
+ ```ruby
281
+ df[:a] = df[:a].map { |v| v.gsub("a", "b") }
282
+ # or
283
+ df[:a].map! { |v| v.gsub("a", "b") }
284
+ ```
285
+
241
286
  Update elements matching a condition
242
287
 
243
288
  ```ruby
@@ -72,6 +72,7 @@ module Rover
72
72
  # multiple columns
73
73
  df = DataFrame.new
74
74
  where.each do |k|
75
+ check_column(k, true)
75
76
  df[k] = @vectors[k]
76
77
  end
77
78
  df
@@ -162,7 +163,7 @@ module Rover
162
163
  last(n)
163
164
  end
164
165
 
165
- def first(n = nil)
166
+ def first(n = 1)
166
167
  new_vectors = {}
167
168
  @vectors.each do |k, v|
168
169
  new_vectors[k] = v.first(n)
@@ -170,7 +171,7 @@ module Rover
170
171
  DataFrame.new(new_vectors)
171
172
  end
172
173
 
173
- def last(n = nil)
174
+ def last(n = 1)
174
175
  new_vectors = {}
175
176
  @vectors.each do |k, v|
176
177
  new_vectors[k] = v.last(n)
@@ -300,7 +301,7 @@ module Rover
300
301
  Group.new(self, columns.flatten)
301
302
  end
302
303
 
303
- [:max, :min, :median, :mean, :percentile, :sum].each do |name|
304
+ [:max, :min, :median, :mean, :percentile, :sum, :std, :var].each do |name|
304
305
  define_method(name) do |column, *args|
305
306
  check_column(column)
306
307
  self[column].send(name, *args)
@@ -359,7 +360,7 @@ module Rover
359
360
  def ==(other)
360
361
  size == other.size &&
361
362
  keys == other.keys &&
362
- keys.all? { |k| self[k] == other[k] }
363
+ keys.all? { |k| self[k].to_numo == other[k].to_numo }
363
364
  end
364
365
 
365
366
  def plot(x = nil, y = nil, type: nil)
@@ -374,22 +375,41 @@ module Rover
374
375
  elsif types[x] == :object && self[y].numeric?
375
376
  "column"
376
377
  else
377
- raise "Cannot determine type"
378
+ raise "Cannot determine type. Use the type option."
378
379
  end
379
380
  end
380
381
  data = self[[x, y]]
381
382
 
382
383
  case type
383
- when "scatter"
384
+ when "line", "area"
385
+ x_type =
386
+ if data[x].numeric?
387
+ "quantitative"
388
+ elsif data[x].all? { |v| v.is_a?(Date) || v.is_a?(Time) }
389
+ "temporal"
390
+ else
391
+ "nominal"
392
+ end
393
+
394
+ scale = x_type == "temporal" ? {type: "utc"} : {}
395
+
384
396
  Vega.lite
385
397
  .data(data)
386
- .mark(type: "circle", tooltip: true)
398
+ .mark(type: type, tooltip: true, interpolate: "cardinal", point: {size: 60})
387
399
  .encoding(
388
- x: {field: x, type: "quantitative", scale: {zero: false}},
389
- y: {field: y, type: "quantitative", scale: {zero: false}},
390
- size: {value: 60}
400
+ x: {field: x, type: x_type, scale: scale},
401
+ y: {field: y, type: "quantitative"}
391
402
  )
392
- .config(axis: {title: nil, labelFontSize: 12})
403
+ .config(axis: {labelFontSize: 12})
404
+ when "pie"
405
+ Vega.lite
406
+ .data(data)
407
+ .mark(type: "arc", tooltip: true)
408
+ .encoding(
409
+ color: {field: x, type: "nominal", sort: "none", axis: {title: nil}, legend: {labelFontSize: 12}},
410
+ theta: {field: y, type: "quantitative"}
411
+ )
412
+ .view(stroke: nil)
393
413
  when "column"
394
414
  Vega.lite
395
415
  .data(data)
@@ -399,7 +419,27 @@ module Rover
399
419
  x: {field: x, type: "nominal", sort: "none", axis: {labelAngle: 0}},
400
420
  y: {field: y, type: "quantitative"}
401
421
  )
402
- .config(axis: {title: nil, labelFontSize: 12})
422
+ .config(axis: {labelFontSize: 12})
423
+ when "bar"
424
+ Vega.lite
425
+ .data(data)
426
+ .mark(type: "bar", tooltip: true)
427
+ .encoding(
428
+ # TODO determine label angle
429
+ y: {field: x, type: "nominal", sort: "none", axis: {labelAngle: 0}},
430
+ x: {field: y, type: "quantitative"}
431
+ )
432
+ .config(axis: {labelFontSize: 12})
433
+ when "scatter"
434
+ Vega.lite
435
+ .data(data)
436
+ .mark(type: "circle", tooltip: true)
437
+ .encoding(
438
+ x: {field: x, type: "quantitative", scale: {zero: false}},
439
+ y: {field: y, type: "quantitative", scale: {zero: false}},
440
+ size: {value: 60}
441
+ )
442
+ .config(axis: {labelFontSize: 12})
403
443
  else
404
444
  raise ArgumentError, "Invalid type: #{type}"
405
445
  end
@@ -435,10 +475,12 @@ module Rover
435
475
 
436
476
  left = how == "left"
437
477
 
478
+ types = {}
438
479
  vectors = {}
439
480
  keys = (self.keys + other.keys).uniq
440
481
  keys.each do |k|
441
482
  vectors[k] = []
483
+ types[k] = join_type(self.types[k], other.types[k])
442
484
  end
443
485
 
444
486
  each_row do |r|
@@ -458,7 +500,7 @@ module Rover
458
500
  end
459
501
  end
460
502
 
461
- DataFrame.new(vectors)
503
+ DataFrame.new(vectors, types: types)
462
504
  end
463
505
 
464
506
  def check_join_keys(df, keys)
@@ -467,8 +509,33 @@ module Rover
467
509
  raise ArgumentError, "Missing keys: #{missing_keys.join(", ")}" if missing_keys.any?
468
510
  end
469
511
 
470
- def check_column(key)
471
- raise ArgumentError, "Missing column: #{key}" unless include?(key)
512
+ # TODO in 0.3.0
513
+ # always use did_you_mean
514
+ def check_column(key, did_you_mean = false)
515
+ unless include?(key)
516
+ if did_you_mean
517
+ if RUBY_VERSION.to_f >= 2.6
518
+ raise KeyError.new("Missing column: #{key}", receiver: self, key: key)
519
+ else
520
+ raise KeyError.new("Missing column: #{key}")
521
+ end
522
+ else
523
+ raise ArgumentError, "Missing column: #{key}"
524
+ end
525
+ end
526
+ end
527
+
528
+ def join_type(a, b)
529
+ if a.nil?
530
+ b
531
+ elsif b.nil?
532
+ a
533
+ elsif a == b
534
+ a
535
+ else
536
+ # TODO specify
537
+ nil
538
+ end
472
539
  end
473
540
 
474
541
  def to_vector(v, size: nil, type: nil)
data/lib/rover/group.rb CHANGED
@@ -9,7 +9,7 @@ module Rover
9
9
  Group.new(@df, @columns + columns.flatten)
10
10
  end
11
11
 
12
- [:count, :max, :min, :mean, :median, :percentile, :sum].each do |name|
12
+ [:count, :max, :min, :mean, :median, :percentile, :sum, :std, :var].each do |name|
13
13
  define_method(name) do |*args|
14
14
  n = [name, args.first].compact.join("_")
15
15
 
data/lib/rover/vector.rb CHANGED
@@ -91,7 +91,7 @@ module Rover
91
91
  @data[k] = v
92
92
  end
93
93
 
94
- %w(+ - * / % ** &).each do |op|
94
+ %w(+ - * / % ** & | ^).each do |op|
95
95
  define_method(op) do |other|
96
96
  other = other.to_numo if other.is_a?(Vector)
97
97
  # TODO better logic
@@ -161,9 +161,22 @@ module Rover
161
161
  end
162
162
 
163
163
  def map(&block)
164
- mapped = @data.map(&block)
165
- mapped = mapped.to_a if mapped.is_a?(Numo::RObject) # re-evaluate cast
166
- Vector.new(mapped)
164
+ # convert to Ruby first to cast properly
165
+ # https://github.com/ruby-numo/numo-narray/issues/181
166
+ Vector.new(@data.to_a.map(&block))
167
+ end
168
+
169
+ def map!(&block)
170
+ @data = cast_data(@data.to_a.map(&block))
171
+ self
172
+ end
173
+
174
+ def select(&block)
175
+ Vector.new(@data.to_a.select(&block))
176
+ end
177
+
178
+ def reject(&block)
179
+ Vector.new(@data.to_a.reject(&block))
167
180
  end
168
181
 
169
182
  def tally
@@ -250,7 +263,11 @@ module Rover
250
263
  end
251
264
 
252
265
  def last(n = 1)
253
- Vector.new(@data[-n..-1])
266
+ if n >= size
267
+ Vector.new(@data)
268
+ else
269
+ Vector.new(@data[-n..-1])
270
+ end
254
271
  end
255
272
 
256
273
  def take(n)
@@ -306,7 +323,12 @@ module Rover
306
323
  # for IRuby
307
324
  def to_html
308
325
  require "iruby"
309
- IRuby::HTML.table(to_a)
326
+ if size > 7
327
+ # pass 8 rows so maxrows is applied
328
+ IRuby::HTML.table(first(4).to_a + last(4).to_a, maxrows: 7)
329
+ else
330
+ IRuby::HTML.table(to_a)
331
+ end
310
332
  end
311
333
 
312
334
  private
data/lib/rover/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Rover
2
- VERSION = "0.2.1"
2
+ VERSION = "0.2.5"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rover-df
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.2.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-11-24 00:00:00.000000000 Z
11
+ date: 2021-09-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: numo-narray
@@ -16,100 +16,16 @@ dependencies:
16
16
  requirements:
17
17
  - - ">="
18
18
  - !ruby/object:Gem::Version
19
- version: 0.9.1.7
19
+ version: 0.9.1.9
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
- version: 0.9.1.7
27
- - !ruby/object:Gem::Dependency
28
- name: bundler
29
- requirement: !ruby/object:Gem::Requirement
30
- requirements:
31
- - - ">="
32
- - !ruby/object:Gem::Version
33
- version: '0'
34
- type: :development
35
- prerelease: false
36
- version_requirements: !ruby/object:Gem::Requirement
37
- requirements:
38
- - - ">="
39
- - !ruby/object:Gem::Version
40
- version: '0'
41
- - !ruby/object:Gem::Dependency
42
- name: rake
43
- requirement: !ruby/object:Gem::Requirement
44
- requirements:
45
- - - ">="
46
- - !ruby/object:Gem::Version
47
- version: '0'
48
- type: :development
49
- prerelease: false
50
- version_requirements: !ruby/object:Gem::Requirement
51
- requirements:
52
- - - ">="
53
- - !ruby/object:Gem::Version
54
- version: '0'
55
- - !ruby/object:Gem::Dependency
56
- name: minitest
57
- requirement: !ruby/object:Gem::Requirement
58
- requirements:
59
- - - ">="
60
- - !ruby/object:Gem::Version
61
- version: '5'
62
- type: :development
63
- prerelease: false
64
- version_requirements: !ruby/object:Gem::Requirement
65
- requirements:
66
- - - ">="
67
- - !ruby/object:Gem::Version
68
- version: '5'
69
- - !ruby/object:Gem::Dependency
70
- name: activerecord
71
- requirement: !ruby/object:Gem::Requirement
72
- requirements:
73
- - - ">="
74
- - !ruby/object:Gem::Version
75
- version: '5'
76
- type: :development
77
- prerelease: false
78
- version_requirements: !ruby/object:Gem::Requirement
79
- requirements:
80
- - - ">="
81
- - !ruby/object:Gem::Version
82
- version: '5'
83
- - !ruby/object:Gem::Dependency
84
- name: sqlite3
85
- requirement: !ruby/object:Gem::Requirement
86
- requirements:
87
- - - ">="
88
- - !ruby/object:Gem::Version
89
- version: '0'
90
- type: :development
91
- prerelease: false
92
- version_requirements: !ruby/object:Gem::Requirement
93
- requirements:
94
- - - ">="
95
- - !ruby/object:Gem::Version
96
- version: '0'
97
- - !ruby/object:Gem::Dependency
98
- name: iruby
99
- requirement: !ruby/object:Gem::Requirement
100
- requirements:
101
- - - ">="
102
- - !ruby/object:Gem::Version
103
- version: '0'
104
- type: :development
105
- prerelease: false
106
- version_requirements: !ruby/object:Gem::Requirement
107
- requirements:
108
- - - ">="
109
- - !ruby/object:Gem::Version
110
- version: '0'
26
+ version: 0.9.1.9
111
27
  description:
112
- email: andrew@chartkick.com
28
+ email: andrew@ankane.org
113
29
  executables: []
114
30
  extensions: []
115
31
  extra_rdoc_files: []
@@ -142,7 +58,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
142
58
  - !ruby/object:Gem::Version
143
59
  version: '0'
144
60
  requirements: []
145
- rubygems_version: 3.1.4
61
+ rubygems_version: 3.2.22
146
62
  signing_key:
147
63
  specification_version: 4
148
64
  summary: Simple, powerful data frames for Ruby