rover-df 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/README.md +1 -7
- data/lib/rover/data_frame.rb +6 -5
- data/lib/rover/group.rb +12 -15
- data/lib/rover/vector.rb +11 -3
- data/lib/rover/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2fb7c518e0c1d61e601012d0beff064fe04b6cfcfb852fd97d28b999f1173445
|
4
|
+
data.tar.gz: b1a3f177d05095799dc2f02082d921d87774375e3f91db468539540c0c5d9482
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5a98bce5cdb1cd8ed2442dd54cdcc0bcb93f39ff0daa4c4386a5805b285bd898a10278aad2522947bc944e4f995c833ebbe96f08b29421e048fd850cb2cf7903
|
7
|
+
data.tar.gz: a64bdc8da3a2202f0b4583d02eb162e07d8ae387bacf4f434ab8b6377a8752da0919c24506616df0bb90667dc1eab9f7b4acd6076ef932d1fb69f57c395e6e00
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,11 @@
|
|
1
|
+
## 0.2.0 (2020-08-17)
|
2
|
+
|
3
|
+
- Added `numeric?` and `zip` methods to vectors
|
4
|
+
- Changed group calculations to return a data frame instead of a hash
|
5
|
+
- Changed `each_row` to return enumerator
|
6
|
+
- Improved inspect
|
7
|
+
- Fixed `any?`, `all?`, and `uniq` for boolean vectors
|
8
|
+
|
1
9
|
## 0.1.1 (2020-06-10)
|
2
10
|
|
3
11
|
- Added methods and options for types
|
data/README.md
CHANGED
@@ -141,12 +141,6 @@ Or
|
|
141
141
|
df[df[:a] > 100 | df[:b] == "one"]
|
142
142
|
```
|
143
143
|
|
144
|
-
Not
|
145
|
-
|
146
|
-
```ruby
|
147
|
-
df[df[:a] != 100]
|
148
|
-
```
|
149
|
-
|
150
144
|
## Operations
|
151
145
|
|
152
146
|
Basic operations
|
@@ -340,7 +334,7 @@ df.to_csv
|
|
340
334
|
|
341
335
|
## Types
|
342
336
|
|
343
|
-
|
337
|
+
You can specify column types when creating a data frame
|
344
338
|
|
345
339
|
```ruby
|
346
340
|
Rover::DataFrame.new(data, types: {"a" => :int, "b" => :float})
|
data/lib/rover/data_frame.rb
CHANGED
@@ -81,8 +81,9 @@ module Rover
|
|
81
81
|
end
|
82
82
|
end
|
83
83
|
|
84
|
-
# return each row as a hash
|
85
84
|
def each_row
|
85
|
+
return enum_for(:each_row) unless block_given?
|
86
|
+
|
86
87
|
size.times do |i|
|
87
88
|
yield @vectors.map { |k, v| [k, v[i]] }.to_h
|
88
89
|
end
|
@@ -247,18 +248,19 @@ module Rover
|
|
247
248
|
line_start = 0
|
248
249
|
spaces = 2
|
249
250
|
|
251
|
+
summarize = size >= 30
|
252
|
+
|
250
253
|
@vectors.each do |k, v|
|
251
|
-
v = v.first(5).to_a
|
254
|
+
v = summarize ? v.first(5).to_a + ["..."] + v.last(5).to_a : v.to_a
|
252
255
|
width = ([k] + v).map(&:to_s).map(&:size).max
|
253
256
|
width = 3 if width < 3
|
254
257
|
|
255
258
|
if lines.empty? || lines[-2].map { |l| l.size + spaces }.sum + width > 120
|
256
259
|
line_start = lines.size
|
257
260
|
lines << []
|
258
|
-
|
261
|
+
v.size.times do |i|
|
259
262
|
lines << []
|
260
263
|
end
|
261
|
-
lines << [] if size > 5
|
262
264
|
lines << []
|
263
265
|
end
|
264
266
|
|
@@ -266,7 +268,6 @@ module Rover
|
|
266
268
|
v.each_with_index do |v2, i|
|
267
269
|
lines[line_start + 1 + i] << "%#{width}s" % v2.to_s
|
268
270
|
end
|
269
|
-
lines[line_start + 6] << "%#{width}s" % "..." if size > 5
|
270
271
|
end
|
271
272
|
|
272
273
|
lines.pop
|
data/lib/rover/group.rb
CHANGED
@@ -7,11 +7,14 @@ module Rover
|
|
7
7
|
|
8
8
|
[:count, :max, :min, :mean, :median, :percentile, :sum].each do |name|
|
9
9
|
define_method(name) do |*args|
|
10
|
-
|
10
|
+
n = [name, args.first].compact.join("_")
|
11
|
+
|
12
|
+
rows = []
|
11
13
|
grouped_dfs.each do |k, df|
|
12
|
-
|
14
|
+
rows << k.merge(n => df.send(name, *args))
|
13
15
|
end
|
14
|
-
|
16
|
+
|
17
|
+
DataFrame.new(rows)
|
15
18
|
end
|
16
19
|
end
|
17
20
|
|
@@ -26,21 +29,15 @@ module Rover
|
|
26
29
|
raise ArgumentError, "Missing keys: #{missing_keys.join(", ")}" if missing_keys.any?
|
27
30
|
|
28
31
|
groups = Hash.new { |hash, key| hash[key] = [] }
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
else
|
34
|
-
i = 0
|
35
|
-
@df.each_row do |row|
|
36
|
-
groups[@columns.map { |c| row[c] }] << i
|
37
|
-
i += 1
|
38
|
-
end
|
32
|
+
i = 0
|
33
|
+
@df.each_row do |row|
|
34
|
+
groups[row.slice(*@columns)] << i
|
35
|
+
i += 1
|
39
36
|
end
|
40
37
|
|
41
38
|
result = {}
|
42
|
-
groups.each do |k
|
43
|
-
result[k] = @df[
|
39
|
+
groups.keys.each do |k|
|
40
|
+
result[k] = @df[groups[k]]
|
44
41
|
end
|
45
42
|
result
|
46
43
|
end
|
data/lib/rover/vector.rb
CHANGED
@@ -44,6 +44,10 @@ module Rover
|
|
44
44
|
a
|
45
45
|
end
|
46
46
|
|
47
|
+
def numeric?
|
48
|
+
![:object, :bool].include?(type)
|
49
|
+
end
|
50
|
+
|
47
51
|
def size
|
48
52
|
@data.size
|
49
53
|
end
|
@@ -51,7 +55,7 @@ module Rover
|
|
51
55
|
alias_method :count, :size
|
52
56
|
|
53
57
|
def uniq
|
54
|
-
Vector.new(
|
58
|
+
Vector.new(to_a.uniq)
|
55
59
|
end
|
56
60
|
|
57
61
|
def missing
|
@@ -226,11 +230,15 @@ module Rover
|
|
226
230
|
end
|
227
231
|
|
228
232
|
def all?(&block)
|
229
|
-
|
233
|
+
to_a.all?(&block)
|
230
234
|
end
|
231
235
|
|
232
236
|
def any?(&block)
|
233
|
-
|
237
|
+
to_a.any?(&block)
|
238
|
+
end
|
239
|
+
|
240
|
+
def zip(other, &block)
|
241
|
+
to_a.zip(other.to_a, &block)
|
234
242
|
end
|
235
243
|
|
236
244
|
def first(n = 1)
|
data/lib/rover/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rover-df
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-08-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: numo-narray
|