rover-df 0.1.1 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b003d311b623fdd38fee4c6fa76129ad4bba042e8193c1872928cb05085daad3
4
- data.tar.gz: d0c8c04b2a8aec3ea5b7616cbcda61f03a12c96fc8f9a0c7aa29fc898948b759
3
+ metadata.gz: 2fb7c518e0c1d61e601012d0beff064fe04b6cfcfb852fd97d28b999f1173445
4
+ data.tar.gz: b1a3f177d05095799dc2f02082d921d87774375e3f91db468539540c0c5d9482
5
5
  SHA512:
6
- metadata.gz: 8033d8ae9e5fb8c8e767ba68897d37745cc5d35a7a82cb2847e2b1d2c3adf8eeb406914cd47949d8f4c3f21307617ab550f435e7a5c257fe1187ed47dd943829
7
- data.tar.gz: acfdca4ad081e2722c4b5269824de123d26aadab28532837d2bfc717c2ca263f73dc4335963beae654b24d4c17cebbefdcce761a946f391340a306c9ca2a8c9b
6
+ metadata.gz: 5a98bce5cdb1cd8ed2442dd54cdcc0bcb93f39ff0daa4c4386a5805b285bd898a10278aad2522947bc944e4f995c833ebbe96f08b29421e048fd850cb2cf7903
7
+ data.tar.gz: a64bdc8da3a2202f0b4583d02eb162e07d8ae387bacf4f434ab8b6377a8752da0919c24506616df0bb90667dc1eab9f7b4acd6076ef932d1fb69f57c395e6e00
@@ -1,3 +1,11 @@
1
+ ## 0.2.0 (2020-08-17)
2
+
3
+ - Added `numeric?` and `zip` methods to vectors
4
+ - Changed group calculations to return a data frame instead of a hash
5
+ - Changed `each_row` to return enumerator
6
+ - Improved inspect
7
+ - Fixed `any?`, `all?`, and `uniq` for boolean vectors
8
+
1
9
  ## 0.1.1 (2020-06-10)
2
10
 
3
11
  - Added methods and options for types
data/README.md CHANGED
@@ -141,12 +141,6 @@ Or
141
141
  df[df[:a] > 100 | df[:b] == "one"]
142
142
  ```
143
143
 
144
- Not
145
-
146
- ```ruby
147
- df[df[:a] != 100]
148
- ```
149
-
150
144
  ## Operations
151
145
 
152
146
  Basic operations
@@ -340,7 +334,7 @@ df.to_csv
340
334
 
341
335
  ## Types
342
336
 
343
- Pass column types when creating a data frame
337
+ You can specify column types when creating a data frame
344
338
 
345
339
  ```ruby
346
340
  Rover::DataFrame.new(data, types: {"a" => :int, "b" => :float})
@@ -81,8 +81,9 @@ module Rover
81
81
  end
82
82
  end
83
83
 
84
- # return each row as a hash
85
84
  def each_row
85
+ return enum_for(:each_row) unless block_given?
86
+
86
87
  size.times do |i|
87
88
  yield @vectors.map { |k, v| [k, v[i]] }.to_h
88
89
  end
@@ -247,18 +248,19 @@ module Rover
247
248
  line_start = 0
248
249
  spaces = 2
249
250
 
251
+ summarize = size >= 30
252
+
250
253
  @vectors.each do |k, v|
251
- v = v.first(5).to_a
254
+ v = summarize ? v.first(5).to_a + ["..."] + v.last(5).to_a : v.to_a
252
255
  width = ([k] + v).map(&:to_s).map(&:size).max
253
256
  width = 3 if width < 3
254
257
 
255
258
  if lines.empty? || lines[-2].map { |l| l.size + spaces }.sum + width > 120
256
259
  line_start = lines.size
257
260
  lines << []
258
- [size, 5].min.times do |i|
261
+ v.size.times do |i|
259
262
  lines << []
260
263
  end
261
- lines << [] if size > 5
262
264
  lines << []
263
265
  end
264
266
 
@@ -266,7 +268,6 @@ module Rover
266
268
  v.each_with_index do |v2, i|
267
269
  lines[line_start + 1 + i] << "%#{width}s" % v2.to_s
268
270
  end
269
- lines[line_start + 6] << "%#{width}s" % "..." if size > 5
270
271
  end
271
272
 
272
273
  lines.pop
@@ -7,11 +7,14 @@ module Rover
7
7
 
8
8
  [:count, :max, :min, :mean, :median, :percentile, :sum].each do |name|
9
9
  define_method(name) do |*args|
10
- result = {}
10
+ n = [name, args.first].compact.join("_")
11
+
12
+ rows = []
11
13
  grouped_dfs.each do |k, df|
12
- result[k] = df.send(name, *args)
14
+ rows << k.merge(n => df.send(name, *args))
13
15
  end
14
- result
16
+
17
+ DataFrame.new(rows)
15
18
  end
16
19
  end
17
20
 
@@ -26,21 +29,15 @@ module Rover
26
29
  raise ArgumentError, "Missing keys: #{missing_keys.join(", ")}" if missing_keys.any?
27
30
 
28
31
  groups = Hash.new { |hash, key| hash[key] = [] }
29
- if @columns.size == 1
30
- @df[@columns.first].each_with_index do |v, i|
31
- groups[v] << i
32
- end
33
- else
34
- i = 0
35
- @df.each_row do |row|
36
- groups[@columns.map { |c| row[c] }] << i
37
- i += 1
38
- end
32
+ i = 0
33
+ @df.each_row do |row|
34
+ groups[row.slice(*@columns)] << i
35
+ i += 1
39
36
  end
40
37
 
41
38
  result = {}
42
- groups.each do |k, indexes|
43
- result[k] = @df[indexes]
39
+ groups.keys.each do |k|
40
+ result[k] = @df[groups[k]]
44
41
  end
45
42
  result
46
43
  end
@@ -44,6 +44,10 @@ module Rover
44
44
  a
45
45
  end
46
46
 
47
+ def numeric?
48
+ ![:object, :bool].include?(type)
49
+ end
50
+
47
51
  def size
48
52
  @data.size
49
53
  end
@@ -51,7 +55,7 @@ module Rover
51
55
  alias_method :count, :size
52
56
 
53
57
  def uniq
54
- Vector.new(@data.to_a.uniq)
58
+ Vector.new(to_a.uniq)
55
59
  end
56
60
 
57
61
  def missing
@@ -226,11 +230,15 @@ module Rover
226
230
  end
227
231
 
228
232
  def all?(&block)
229
- @data.to_a.all?(&block)
233
+ to_a.all?(&block)
230
234
  end
231
235
 
232
236
  def any?(&block)
233
- @data.to_a.any?(&block)
237
+ to_a.any?(&block)
238
+ end
239
+
240
+ def zip(other, &block)
241
+ to_a.zip(other.to_a, &block)
234
242
  end
235
243
 
236
244
  def first(n = 1)
@@ -1,3 +1,3 @@
1
1
  module Rover
2
- VERSION = "0.1.1"
2
+ VERSION = "0.2.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rover-df
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-06-10 00:00:00.000000000 Z
11
+ date: 2020-08-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: numo-narray