rover-df 0.2.1 → 0.2.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1c750c49e4cb313565a3b52a273c4820ae406471a1007ddb8e83742f3f2640f1
4
- data.tar.gz: 2a7469abd672a87c9b4c874469baf07f42b22067ac2281c424eeb7b60841bbfe
3
+ metadata.gz: ca39a558c3c12103f03fed4cb8f007fbd00a1f8e84b839916fd0010aae4613ba
4
+ data.tar.gz: 43df8cdc415cc036ac383f30b7c91a35b644067a3cb8ea199abd7452b98298d5
5
5
  SHA512:
6
- metadata.gz: 24e7874a67e498cadb8c2b4773f00c5cf53ab76e8231b7c762b8f403c2d4783809beb44ba3b37cb50b801032c3a3a9c140b18e1a10967eb331c517b5dd5251aa
7
- data.tar.gz: b62f4bfd7591e9c4869ab13e4e6efeba5c4e121c92470bc81d517ec85045a2fdc080a7a6ca3f61a15e7c98335543fcb11893d00d374cb5a3cf1294e12ed65130
6
+ metadata.gz: 2724c7e85ee7921f277be833cf89be638c14cbb37a44411bba86c42cacffe7c0e4b82ea04d4dfb3d694c6429ba41bc8e8c10f7cb40e5d34bf59d14755858735f
7
+ data.tar.gz: fa860158decbca0a0b35ccb82e6f73d9a513c37b483eca52d140842d5dd255899a2e1ded3ec4375a492b86d3ec09ffa53d4871e05f1fdad39f3d2630215417dc
data/CHANGELOG.md CHANGED
@@ -1,3 +1,24 @@
1
+ ## 0.2.5 (2021-09-25)
2
+
3
+ - Fixed column types with joins
4
+
5
+ ## 0.2.4 (2021-06-03)
6
+
7
+ - Added grouping for `std` and `var`
8
+ - Fixed `==` for data frames
9
+ - Fixed error with `first` and `last` for data frames
10
+ - Fixed error with `last` when vector size is smaller than `n`
11
+
12
+ ## 0.2.3 (2021-02-08)
13
+
14
+ - Added `select`, `reject`, and `map!` methods to vectors
15
+
16
+ ## 0.2.2 (2021-01-01)
17
+
18
+ - Added line, pie, area, and bar charts
19
+ - Added `|` and `^` for vectors
20
+ - Fixed typecasting with `map`
21
+
1
22
  ## 0.2.1 (2020-11-23)
2
23
 
3
24
  - Added `plot` method to data frames
data/LICENSE.txt CHANGED
@@ -1,4 +1,4 @@
1
- Copyright (c) 2020 Andrew Kane
1
+ Copyright (c) 2020-2021 Andrew Kane
2
2
 
3
3
  MIT License
4
4
 
data/README.md CHANGED
@@ -20,7 +20,7 @@ gem 'rover-df'
20
20
 
21
21
  A data frame is an in-memory table. It’s a useful data structure for data analysis and machine learning. It uses columnar storage for fast operations on columns.
22
22
 
23
- Try it out for forecasting by clicking the button below:
23
+ Try it out for forecasting by clicking the button below (it can take a few minutes to start):
24
24
 
25
25
  [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/ankane/ml-stack/master?filepath=Forecasting.ipynb)
26
26
 
@@ -123,24 +123,53 @@ df[1..3]
123
123
  df[[1, 4, 5]]
124
124
  ```
125
125
 
126
+ Iterate over rows
127
+
128
+ ```ruby
129
+ df.each_row { |row| ... }
130
+ ```
131
+
132
+ Iterate over a column
133
+
134
+ ```ruby
135
+ df[:a].each { |item| ... }
136
+ # or
137
+ df[:a].each_with_index { |item, index| ... }
138
+ ```
139
+
126
140
  ## Filtering
127
141
 
128
142
  Filter on a condition
129
143
 
130
144
  ```ruby
145
+ df[df[:a] == 100]
146
+ df[df[:a] != 100]
131
147
  df[df[:a] > 100]
148
+ df[df[:a] >= 100]
149
+ df[df[:a] < 100]
150
+ df[df[:a] <= 100]
132
151
  ```
133
152
 
134
- And
153
+ In
135
154
 
136
155
  ```ruby
137
- df[df[:a] > 100 & df[:b] == "one"]
156
+ df[df[:a].in?([1, 2, 3])]
157
+ df[df[:a].in?(1..3)]
158
+ df[df[:a].in?(["a", "b", "c"])]
138
159
  ```
139
160
 
140
- Or
161
+ Not in
141
162
 
142
163
  ```ruby
143
- df[df[:a] > 100 | df[:b] == "one"]
164
+ df[!df[:a].in?([1, 2, 3])]
165
+ ```
166
+
167
+ And, or, and exclusive or
168
+
169
+ ```ruby
170
+ df[(df[:a] > 100) & (df[:b] == "one")] # and
171
+ df[(df[:a] > 100) | (df[:b] == "one")] # or
172
+ df[(df[:a] > 100) ^ (df[:b] == "one")] # xor
144
173
  ```
145
174
 
146
175
  ## Operations
@@ -166,6 +195,8 @@ df[:a].median
166
195
  df[:a].percentile(90)
167
196
  df[:a].min
168
197
  df[:a].max
198
+ df[:a].std
199
+ df[:a].var
169
200
  ```
170
201
 
171
202
  Count occurrences
@@ -200,7 +231,7 @@ Multiple groups
200
231
  df.group([:a, :b]).count
201
232
  ```
202
233
 
203
- ## Visualization [master]
234
+ ## Visualization
204
235
 
205
236
  Add [Vega](https://github.com/ankane/vega) to your application’s Gemfile:
206
237
 
@@ -214,6 +245,12 @@ And use:
214
245
  df.plot(:a, :b)
215
246
  ```
216
247
 
248
+ Specify the chart type (`line`, `pie`, `column`, `bar`, `area`, or `scatter`)
249
+
250
+ ```ruby
251
+ df.plot(:a, :b, type: "pie")
252
+ ```
253
+
217
254
  ## Updating Data
218
255
 
219
256
  Add a new column
@@ -238,6 +275,14 @@ df[:a][0..2] = 1
238
275
  df[:a][0..2] = [1, 2, 3]
239
276
  ```
240
277
 
278
+ Update all elements
279
+
280
+ ```ruby
281
+ df[:a] = df[:a].map { |v| v.gsub("a", "b") }
282
+ # or
283
+ df[:a].map! { |v| v.gsub("a", "b") }
284
+ ```
285
+
241
286
  Update elements matching a condition
242
287
 
243
288
  ```ruby
@@ -72,6 +72,7 @@ module Rover
72
72
  # multiple columns
73
73
  df = DataFrame.new
74
74
  where.each do |k|
75
+ check_column(k, true)
75
76
  df[k] = @vectors[k]
76
77
  end
77
78
  df
@@ -162,7 +163,7 @@ module Rover
162
163
  last(n)
163
164
  end
164
165
 
165
- def first(n = nil)
166
+ def first(n = 1)
166
167
  new_vectors = {}
167
168
  @vectors.each do |k, v|
168
169
  new_vectors[k] = v.first(n)
@@ -170,7 +171,7 @@ module Rover
170
171
  DataFrame.new(new_vectors)
171
172
  end
172
173
 
173
- def last(n = nil)
174
+ def last(n = 1)
174
175
  new_vectors = {}
175
176
  @vectors.each do |k, v|
176
177
  new_vectors[k] = v.last(n)
@@ -300,7 +301,7 @@ module Rover
300
301
  Group.new(self, columns.flatten)
301
302
  end
302
303
 
303
- [:max, :min, :median, :mean, :percentile, :sum].each do |name|
304
+ [:max, :min, :median, :mean, :percentile, :sum, :std, :var].each do |name|
304
305
  define_method(name) do |column, *args|
305
306
  check_column(column)
306
307
  self[column].send(name, *args)
@@ -359,7 +360,7 @@ module Rover
359
360
  def ==(other)
360
361
  size == other.size &&
361
362
  keys == other.keys &&
362
- keys.all? { |k| self[k] == other[k] }
363
+ keys.all? { |k| self[k].to_numo == other[k].to_numo }
363
364
  end
364
365
 
365
366
  def plot(x = nil, y = nil, type: nil)
@@ -374,22 +375,41 @@ module Rover
374
375
  elsif types[x] == :object && self[y].numeric?
375
376
  "column"
376
377
  else
377
- raise "Cannot determine type"
378
+ raise "Cannot determine type. Use the type option."
378
379
  end
379
380
  end
380
381
  data = self[[x, y]]
381
382
 
382
383
  case type
383
- when "scatter"
384
+ when "line", "area"
385
+ x_type =
386
+ if data[x].numeric?
387
+ "quantitative"
388
+ elsif data[x].all? { |v| v.is_a?(Date) || v.is_a?(Time) }
389
+ "temporal"
390
+ else
391
+ "nominal"
392
+ end
393
+
394
+ scale = x_type == "temporal" ? {type: "utc"} : {}
395
+
384
396
  Vega.lite
385
397
  .data(data)
386
- .mark(type: "circle", tooltip: true)
398
+ .mark(type: type, tooltip: true, interpolate: "cardinal", point: {size: 60})
387
399
  .encoding(
388
- x: {field: x, type: "quantitative", scale: {zero: false}},
389
- y: {field: y, type: "quantitative", scale: {zero: false}},
390
- size: {value: 60}
400
+ x: {field: x, type: x_type, scale: scale},
401
+ y: {field: y, type: "quantitative"}
391
402
  )
392
- .config(axis: {title: nil, labelFontSize: 12})
403
+ .config(axis: {labelFontSize: 12})
404
+ when "pie"
405
+ Vega.lite
406
+ .data(data)
407
+ .mark(type: "arc", tooltip: true)
408
+ .encoding(
409
+ color: {field: x, type: "nominal", sort: "none", axis: {title: nil}, legend: {labelFontSize: 12}},
410
+ theta: {field: y, type: "quantitative"}
411
+ )
412
+ .view(stroke: nil)
393
413
  when "column"
394
414
  Vega.lite
395
415
  .data(data)
@@ -399,7 +419,27 @@ module Rover
399
419
  x: {field: x, type: "nominal", sort: "none", axis: {labelAngle: 0}},
400
420
  y: {field: y, type: "quantitative"}
401
421
  )
402
- .config(axis: {title: nil, labelFontSize: 12})
422
+ .config(axis: {labelFontSize: 12})
423
+ when "bar"
424
+ Vega.lite
425
+ .data(data)
426
+ .mark(type: "bar", tooltip: true)
427
+ .encoding(
428
+ # TODO determine label angle
429
+ y: {field: x, type: "nominal", sort: "none", axis: {labelAngle: 0}},
430
+ x: {field: y, type: "quantitative"}
431
+ )
432
+ .config(axis: {labelFontSize: 12})
433
+ when "scatter"
434
+ Vega.lite
435
+ .data(data)
436
+ .mark(type: "circle", tooltip: true)
437
+ .encoding(
438
+ x: {field: x, type: "quantitative", scale: {zero: false}},
439
+ y: {field: y, type: "quantitative", scale: {zero: false}},
440
+ size: {value: 60}
441
+ )
442
+ .config(axis: {labelFontSize: 12})
403
443
  else
404
444
  raise ArgumentError, "Invalid type: #{type}"
405
445
  end
@@ -435,10 +475,12 @@ module Rover
435
475
 
436
476
  left = how == "left"
437
477
 
478
+ types = {}
438
479
  vectors = {}
439
480
  keys = (self.keys + other.keys).uniq
440
481
  keys.each do |k|
441
482
  vectors[k] = []
483
+ types[k] = join_type(self.types[k], other.types[k])
442
484
  end
443
485
 
444
486
  each_row do |r|
@@ -458,7 +500,7 @@ module Rover
458
500
  end
459
501
  end
460
502
 
461
- DataFrame.new(vectors)
503
+ DataFrame.new(vectors, types: types)
462
504
  end
463
505
 
464
506
  def check_join_keys(df, keys)
@@ -467,8 +509,33 @@ module Rover
467
509
  raise ArgumentError, "Missing keys: #{missing_keys.join(", ")}" if missing_keys.any?
468
510
  end
469
511
 
470
- def check_column(key)
471
- raise ArgumentError, "Missing column: #{key}" unless include?(key)
512
+ # TODO in 0.3.0
513
+ # always use did_you_mean
514
+ def check_column(key, did_you_mean = false)
515
+ unless include?(key)
516
+ if did_you_mean
517
+ if RUBY_VERSION.to_f >= 2.6
518
+ raise KeyError.new("Missing column: #{key}", receiver: self, key: key)
519
+ else
520
+ raise KeyError.new("Missing column: #{key}")
521
+ end
522
+ else
523
+ raise ArgumentError, "Missing column: #{key}"
524
+ end
525
+ end
526
+ end
527
+
528
+ def join_type(a, b)
529
+ if a.nil?
530
+ b
531
+ elsif b.nil?
532
+ a
533
+ elsif a == b
534
+ a
535
+ else
536
+ # TODO specify
537
+ nil
538
+ end
472
539
  end
473
540
 
474
541
  def to_vector(v, size: nil, type: nil)
data/lib/rover/group.rb CHANGED
@@ -9,7 +9,7 @@ module Rover
9
9
  Group.new(@df, @columns + columns.flatten)
10
10
  end
11
11
 
12
- [:count, :max, :min, :mean, :median, :percentile, :sum].each do |name|
12
+ [:count, :max, :min, :mean, :median, :percentile, :sum, :std, :var].each do |name|
13
13
  define_method(name) do |*args|
14
14
  n = [name, args.first].compact.join("_")
15
15
 
data/lib/rover/vector.rb CHANGED
@@ -91,7 +91,7 @@ module Rover
91
91
  @data[k] = v
92
92
  end
93
93
 
94
- %w(+ - * / % ** &).each do |op|
94
+ %w(+ - * / % ** & | ^).each do |op|
95
95
  define_method(op) do |other|
96
96
  other = other.to_numo if other.is_a?(Vector)
97
97
  # TODO better logic
@@ -161,9 +161,22 @@ module Rover
161
161
  end
162
162
 
163
163
  def map(&block)
164
- mapped = @data.map(&block)
165
- mapped = mapped.to_a if mapped.is_a?(Numo::RObject) # re-evaluate cast
166
- Vector.new(mapped)
164
+ # convert to Ruby first to cast properly
165
+ # https://github.com/ruby-numo/numo-narray/issues/181
166
+ Vector.new(@data.to_a.map(&block))
167
+ end
168
+
169
+ def map!(&block)
170
+ @data = cast_data(@data.to_a.map(&block))
171
+ self
172
+ end
173
+
174
+ def select(&block)
175
+ Vector.new(@data.to_a.select(&block))
176
+ end
177
+
178
+ def reject(&block)
179
+ Vector.new(@data.to_a.reject(&block))
167
180
  end
168
181
 
169
182
  def tally
@@ -250,7 +263,11 @@ module Rover
250
263
  end
251
264
 
252
265
  def last(n = 1)
253
- Vector.new(@data[-n..-1])
266
+ if n >= size
267
+ Vector.new(@data)
268
+ else
269
+ Vector.new(@data[-n..-1])
270
+ end
254
271
  end
255
272
 
256
273
  def take(n)
@@ -306,7 +323,12 @@ module Rover
306
323
  # for IRuby
307
324
  def to_html
308
325
  require "iruby"
309
- IRuby::HTML.table(to_a)
326
+ if size > 7
327
+ # pass 8 rows so maxrows is applied
328
+ IRuby::HTML.table(first(4).to_a + last(4).to_a, maxrows: 7)
329
+ else
330
+ IRuby::HTML.table(to_a)
331
+ end
310
332
  end
311
333
 
312
334
  private
data/lib/rover/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Rover
2
- VERSION = "0.2.1"
2
+ VERSION = "0.2.5"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rover-df
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.2.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-11-24 00:00:00.000000000 Z
11
+ date: 2021-09-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: numo-narray
@@ -16,100 +16,16 @@ dependencies:
16
16
  requirements:
17
17
  - - ">="
18
18
  - !ruby/object:Gem::Version
19
- version: 0.9.1.7
19
+ version: 0.9.1.9
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
- version: 0.9.1.7
27
- - !ruby/object:Gem::Dependency
28
- name: bundler
29
- requirement: !ruby/object:Gem::Requirement
30
- requirements:
31
- - - ">="
32
- - !ruby/object:Gem::Version
33
- version: '0'
34
- type: :development
35
- prerelease: false
36
- version_requirements: !ruby/object:Gem::Requirement
37
- requirements:
38
- - - ">="
39
- - !ruby/object:Gem::Version
40
- version: '0'
41
- - !ruby/object:Gem::Dependency
42
- name: rake
43
- requirement: !ruby/object:Gem::Requirement
44
- requirements:
45
- - - ">="
46
- - !ruby/object:Gem::Version
47
- version: '0'
48
- type: :development
49
- prerelease: false
50
- version_requirements: !ruby/object:Gem::Requirement
51
- requirements:
52
- - - ">="
53
- - !ruby/object:Gem::Version
54
- version: '0'
55
- - !ruby/object:Gem::Dependency
56
- name: minitest
57
- requirement: !ruby/object:Gem::Requirement
58
- requirements:
59
- - - ">="
60
- - !ruby/object:Gem::Version
61
- version: '5'
62
- type: :development
63
- prerelease: false
64
- version_requirements: !ruby/object:Gem::Requirement
65
- requirements:
66
- - - ">="
67
- - !ruby/object:Gem::Version
68
- version: '5'
69
- - !ruby/object:Gem::Dependency
70
- name: activerecord
71
- requirement: !ruby/object:Gem::Requirement
72
- requirements:
73
- - - ">="
74
- - !ruby/object:Gem::Version
75
- version: '5'
76
- type: :development
77
- prerelease: false
78
- version_requirements: !ruby/object:Gem::Requirement
79
- requirements:
80
- - - ">="
81
- - !ruby/object:Gem::Version
82
- version: '5'
83
- - !ruby/object:Gem::Dependency
84
- name: sqlite3
85
- requirement: !ruby/object:Gem::Requirement
86
- requirements:
87
- - - ">="
88
- - !ruby/object:Gem::Version
89
- version: '0'
90
- type: :development
91
- prerelease: false
92
- version_requirements: !ruby/object:Gem::Requirement
93
- requirements:
94
- - - ">="
95
- - !ruby/object:Gem::Version
96
- version: '0'
97
- - !ruby/object:Gem::Dependency
98
- name: iruby
99
- requirement: !ruby/object:Gem::Requirement
100
- requirements:
101
- - - ">="
102
- - !ruby/object:Gem::Version
103
- version: '0'
104
- type: :development
105
- prerelease: false
106
- version_requirements: !ruby/object:Gem::Requirement
107
- requirements:
108
- - - ">="
109
- - !ruby/object:Gem::Version
110
- version: '0'
26
+ version: 0.9.1.9
111
27
  description:
112
- email: andrew@chartkick.com
28
+ email: andrew@ankane.org
113
29
  executables: []
114
30
  extensions: []
115
31
  extra_rdoc_files: []
@@ -142,7 +58,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
142
58
  - !ruby/object:Gem::Version
143
59
  version: '0'
144
60
  requirements: []
145
- rubygems_version: 3.1.4
61
+ rubygems_version: 3.2.22
146
62
  signing_key:
147
63
  specification_version: 4
148
64
  summary: Simple, powerful data frames for Ruby