rover-df 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b003d311b623fdd38fee4c6fa76129ad4bba042e8193c1872928cb05085daad3
4
- data.tar.gz: d0c8c04b2a8aec3ea5b7616cbcda61f03a12c96fc8f9a0c7aa29fc898948b759
3
+ metadata.gz: 2fb7c518e0c1d61e601012d0beff064fe04b6cfcfb852fd97d28b999f1173445
4
+ data.tar.gz: b1a3f177d05095799dc2f02082d921d87774375e3f91db468539540c0c5d9482
5
5
  SHA512:
6
- metadata.gz: 8033d8ae9e5fb8c8e767ba68897d37745cc5d35a7a82cb2847e2b1d2c3adf8eeb406914cd47949d8f4c3f21307617ab550f435e7a5c257fe1187ed47dd943829
7
- data.tar.gz: acfdca4ad081e2722c4b5269824de123d26aadab28532837d2bfc717c2ca263f73dc4335963beae654b24d4c17cebbefdcce761a946f391340a306c9ca2a8c9b
6
+ metadata.gz: 5a98bce5cdb1cd8ed2442dd54cdcc0bcb93f39ff0daa4c4386a5805b285bd898a10278aad2522947bc944e4f995c833ebbe96f08b29421e048fd850cb2cf7903
7
+ data.tar.gz: a64bdc8da3a2202f0b4583d02eb162e07d8ae387bacf4f434ab8b6377a8752da0919c24506616df0bb90667dc1eab9f7b4acd6076ef932d1fb69f57c395e6e00
@@ -1,3 +1,11 @@
1
+ ## 0.2.0 (2020-08-17)
2
+
3
+ - Added `numeric?` and `zip` methods to vectors
4
+ - Changed group calculations to return a data frame instead of a hash
5
+ - Changed `each_row` to return enumerator
6
+ - Improved inspect
7
+ - Fixed `any?`, `all?`, and `uniq` for boolean vectors
8
+
1
9
  ## 0.1.1 (2020-06-10)
2
10
 
3
11
  - Added methods and options for types
data/README.md CHANGED
@@ -141,12 +141,6 @@ Or
141
141
  df[df[:a] > 100 | df[:b] == "one"]
142
142
  ```
143
143
 
144
- Not
145
-
146
- ```ruby
147
- df[df[:a] != 100]
148
- ```
149
-
150
144
  ## Operations
151
145
 
152
146
  Basic operations
@@ -340,7 +334,7 @@ df.to_csv
340
334
 
341
335
  ## Types
342
336
 
343
- Pass column types when creating a data frame
337
+ You can specify column types when creating a data frame
344
338
 
345
339
  ```ruby
346
340
  Rover::DataFrame.new(data, types: {"a" => :int, "b" => :float})
@@ -81,8 +81,9 @@ module Rover
81
81
  end
82
82
  end
83
83
 
84
- # return each row as a hash
85
84
  def each_row
85
+ return enum_for(:each_row) unless block_given?
86
+
86
87
  size.times do |i|
87
88
  yield @vectors.map { |k, v| [k, v[i]] }.to_h
88
89
  end
@@ -247,18 +248,19 @@ module Rover
247
248
  line_start = 0
248
249
  spaces = 2
249
250
 
251
+ summarize = size >= 30
252
+
250
253
  @vectors.each do |k, v|
251
- v = v.first(5).to_a
254
+ v = summarize ? v.first(5).to_a + ["..."] + v.last(5).to_a : v.to_a
252
255
  width = ([k] + v).map(&:to_s).map(&:size).max
253
256
  width = 3 if width < 3
254
257
 
255
258
  if lines.empty? || lines[-2].map { |l| l.size + spaces }.sum + width > 120
256
259
  line_start = lines.size
257
260
  lines << []
258
- [size, 5].min.times do |i|
261
+ v.size.times do |i|
259
262
  lines << []
260
263
  end
261
- lines << [] if size > 5
262
264
  lines << []
263
265
  end
264
266
 
@@ -266,7 +268,6 @@ module Rover
266
268
  v.each_with_index do |v2, i|
267
269
  lines[line_start + 1 + i] << "%#{width}s" % v2.to_s
268
270
  end
269
- lines[line_start + 6] << "%#{width}s" % "..." if size > 5
270
271
  end
271
272
 
272
273
  lines.pop
@@ -7,11 +7,14 @@ module Rover
7
7
 
8
8
  [:count, :max, :min, :mean, :median, :percentile, :sum].each do |name|
9
9
  define_method(name) do |*args|
10
- result = {}
10
+ n = [name, args.first].compact.join("_")
11
+
12
+ rows = []
11
13
  grouped_dfs.each do |k, df|
12
- result[k] = df.send(name, *args)
14
+ rows << k.merge(n => df.send(name, *args))
13
15
  end
14
- result
16
+
17
+ DataFrame.new(rows)
15
18
  end
16
19
  end
17
20
 
@@ -26,21 +29,15 @@ module Rover
26
29
  raise ArgumentError, "Missing keys: #{missing_keys.join(", ")}" if missing_keys.any?
27
30
 
28
31
  groups = Hash.new { |hash, key| hash[key] = [] }
29
- if @columns.size == 1
30
- @df[@columns.first].each_with_index do |v, i|
31
- groups[v] << i
32
- end
33
- else
34
- i = 0
35
- @df.each_row do |row|
36
- groups[@columns.map { |c| row[c] }] << i
37
- i += 1
38
- end
32
+ i = 0
33
+ @df.each_row do |row|
34
+ groups[row.slice(*@columns)] << i
35
+ i += 1
39
36
  end
40
37
 
41
38
  result = {}
42
- groups.each do |k, indexes|
43
- result[k] = @df[indexes]
39
+ groups.keys.each do |k|
40
+ result[k] = @df[groups[k]]
44
41
  end
45
42
  result
46
43
  end
@@ -44,6 +44,10 @@ module Rover
44
44
  a
45
45
  end
46
46
 
47
+ def numeric?
48
+ ![:object, :bool].include?(type)
49
+ end
50
+
47
51
  def size
48
52
  @data.size
49
53
  end
@@ -51,7 +55,7 @@ module Rover
51
55
  alias_method :count, :size
52
56
 
53
57
  def uniq
54
- Vector.new(@data.to_a.uniq)
58
+ Vector.new(to_a.uniq)
55
59
  end
56
60
 
57
61
  def missing
@@ -226,11 +230,15 @@ module Rover
226
230
  end
227
231
 
228
232
  def all?(&block)
229
- @data.to_a.all?(&block)
233
+ to_a.all?(&block)
230
234
  end
231
235
 
232
236
  def any?(&block)
233
- @data.to_a.any?(&block)
237
+ to_a.any?(&block)
238
+ end
239
+
240
+ def zip(other, &block)
241
+ to_a.zip(other.to_a, &block)
234
242
  end
235
243
 
236
244
  def first(n = 1)
@@ -1,3 +1,3 @@
1
1
  module Rover
2
- VERSION = "0.1.1"
2
+ VERSION = "0.2.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rover-df
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-06-10 00:00:00.000000000 Z
11
+ date: 2020-08-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: numo-narray