daru 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.build.sh +6 -6
- data/.gitignore +2 -0
- data/CONTRIBUTING.md +7 -3
- data/History.md +36 -0
- data/README.md +21 -13
- data/Rakefile +16 -1
- data/benchmarks/TradeoffData.csv +65 -0
- data/benchmarks/dataframe_creation.rb +39 -0
- data/benchmarks/group_by.rb +32 -0
- data/benchmarks/row_access.rb +41 -0
- data/benchmarks/row_assign.rb +36 -0
- data/benchmarks/sorting.rb +44 -0
- data/benchmarks/vector_access.rb +31 -0
- data/benchmarks/vector_assign.rb +42 -0
- data/benchmarks/where_clause.rb +48 -0
- data/benchmarks/where_vs_filter.rb +28 -0
- data/daru.gemspec +29 -5
- data/lib/daru.rb +30 -1
- data/lib/daru/accessors/array_wrapper.rb +2 -2
- data/lib/daru/accessors/nmatrix_wrapper.rb +6 -6
- data/lib/daru/core/group_by.rb +112 -31
- data/lib/daru/core/merge.rb +170 -0
- data/lib/daru/core/query.rb +95 -0
- data/lib/daru/dataframe.rb +335 -223
- data/lib/daru/date_time/index.rb +550 -0
- data/lib/daru/date_time/offsets.rb +397 -0
- data/lib/daru/index.rb +266 -54
- data/lib/daru/io/io.rb +1 -2
- data/lib/daru/maths/arithmetic/dataframe.rb +2 -2
- data/lib/daru/maths/arithmetic/vector.rb +2 -2
- data/lib/daru/maths/statistics/dataframe.rb +58 -8
- data/lib/daru/maths/statistics/vector.rb +229 -0
- data/lib/daru/vector.rb +230 -80
- data/lib/daru/version.rb +1 -1
- data/spec/core/group_by_spec.rb +16 -16
- data/spec/core/merge_spec.rb +52 -0
- data/spec/core/query_spec.rb +171 -0
- data/spec/dataframe_spec.rb +278 -280
- data/spec/date_time/data_spec.rb +199 -0
- data/spec/date_time/index_spec.rb +433 -0
- data/spec/date_time/offsets_spec.rb +371 -0
- data/spec/fixtures/stock_data.csv +500 -0
- data/spec/index_spec.rb +317 -11
- data/spec/io/io_spec.rb +18 -17
- data/spec/math/arithmetic/dataframe_spec.rb +3 -3
- data/spec/math/statistics/dataframe_spec.rb +39 -1
- data/spec/math/statistics/vector_spec.rb +163 -1
- data/spec/monkeys_spec.rb +4 -0
- data/spec/spec_helper.rb +3 -0
- data/spec/vector_spec.rb +125 -60
- metadata +71 -14
- data/lib/daru/accessors/dataframe_by_vector.rb +0 -17
- data/lib/daru/multi_index.rb +0 -216
- data/spec/multi_index_spec.rb +0 -216
@@ -0,0 +1,31 @@
|
|
1
|
+
$:.unshift File.expand_path("../../lib", __FILE__)
|
2
|
+
|
3
|
+
require 'benchmark'
|
4
|
+
require 'daru'
|
5
|
+
|
6
|
+
df = Daru::DataFrame.new({
|
7
|
+
a: 10000.times.map { rand },
|
8
|
+
b: 10000.times.map { rand },
|
9
|
+
c: 10000.times.map { rand }
|
10
|
+
})
|
11
|
+
|
12
|
+
Benchmark.bm do |x|
|
13
|
+
x.report("Single Vector access") do
|
14
|
+
df[:a]
|
15
|
+
end
|
16
|
+
|
17
|
+
x.report("Access as range") do
|
18
|
+
df[:a..:c]
|
19
|
+
end
|
20
|
+
|
21
|
+
x.report("Access with commas") do
|
22
|
+
df[:a, :c]
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
# ======== Benchmarks =======
|
27
|
+
#
|
28
|
+
# user system total real
|
29
|
+
# Single Vector access 0.000000 0.000000 0.000000 ( 0.000012)
|
30
|
+
# Access as range 0.090000 0.000000 0.090000 ( 0.084584)
|
31
|
+
# Access with commas 0.050000 0.000000 0.050000 ( 0.051951)
|
@@ -0,0 +1,42 @@
|
|
1
|
+
$:.unshift File.expand_path("../../lib", __FILE__)
|
2
|
+
|
3
|
+
require 'benchmark'
|
4
|
+
require 'daru'
|
5
|
+
|
6
|
+
df = Daru::DataFrame.new({
|
7
|
+
a: 100000.times.map { rand },
|
8
|
+
b: 100000.times.map { rand },
|
9
|
+
c: 100000.times.map { rand }
|
10
|
+
})
|
11
|
+
|
12
|
+
index = Daru::Index.new((0...100000).to_a.shuffle)
|
13
|
+
|
14
|
+
Benchmark.bm do |x|
|
15
|
+
x.report("Assign new vector as Array") do
|
16
|
+
df[:d] = 100000.times.map { rand }
|
17
|
+
end
|
18
|
+
|
19
|
+
x.report("Reassign same vector as Array") do
|
20
|
+
df[:a] = 100000.times.map { rand }
|
21
|
+
end
|
22
|
+
|
23
|
+
x.report("Assign new Vector as Daru::Vector") do
|
24
|
+
df[:e] = Daru::Vector.new(100000.times.map { rand })
|
25
|
+
end
|
26
|
+
|
27
|
+
x.report("Reassign same Vector as Daru::Vector") do
|
28
|
+
df[:b] = Daru::Vector.new(100000.times.map { rand })
|
29
|
+
end
|
30
|
+
|
31
|
+
x.report("Reassgin differently indexed Daru::Vector") do
|
32
|
+
df[:b] = Daru::Vector.new(100000.times.map { rand }, index: index)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
# ===== Benchmarks =====
|
37
|
+
# user system total real
|
38
|
+
# Assign new vector as Array 0.370000 0.000000 0.370000 (0.364515)
|
39
|
+
# Reassign same vector as Array 0.470000 0.000000 0.470000 (0.471408)
|
40
|
+
# Assign new Vector as Daru::Vector 0.940000 0.000000 0.940000 (0.947879)
|
41
|
+
# Reassign same Vector as Daru::Vector 0.760000 0.020000 0.780000 (0.769969)
|
42
|
+
# Reassgin differently indexed Daru::Vector <Too embarassingly slow.>
|
@@ -0,0 +1,48 @@
|
|
1
|
+
$:.unshift File.expand_path("../../lib", __FILE__)
|
2
|
+
|
3
|
+
require 'benchmark'
|
4
|
+
require 'daru'
|
5
|
+
|
6
|
+
df = Daru::DataFrame.new({
|
7
|
+
a: 100000.times.map { |i| i },
|
8
|
+
b: 100000.times.map { |i| i },
|
9
|
+
c: 100000.times.map { |i| i }
|
10
|
+
}, index: Daru::Index.new(100000.times.map.to_a.shuffle))
|
11
|
+
|
12
|
+
puts "Benchmarking DataFrame#where\n"
|
13
|
+
Benchmark.bm do |x|
|
14
|
+
x.report("Basic one liner") do
|
15
|
+
df.where(df[:a].mt(2341))
|
16
|
+
end
|
17
|
+
|
18
|
+
x.report("Little complex statement") do
|
19
|
+
df.where(df[:a].lt(235) | df[:b].eq(2341) | df[:c].in([35,355,22]))
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
puts "Benchmarking Vector#where\n"
|
24
|
+
v = Daru::Vector.new(
|
25
|
+
100000.times.map { |i| i }, index: 100000.times.map.to_a.shuffle)
|
26
|
+
|
27
|
+
Benchmark.bm do |x|
|
28
|
+
x.report("Basic one liner") do
|
29
|
+
v.where(v.mteq(1000))
|
30
|
+
end
|
31
|
+
|
32
|
+
x.report("Little complex statement") do
|
33
|
+
v.where(v.lt(235) & v.eq(2341) | v.in([23,511,55]))
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
# ====== Benchmarks ======
|
38
|
+
#
|
39
|
+
# Benchmarking DataFrame#where
|
40
|
+
#
|
41
|
+
# user system total real
|
42
|
+
# Basic one liner 0.700000 0.000000 0.700000 (0.703532)
|
43
|
+
# Little complex statement 0.120000 0.000000 0.120000 (0.121765)
|
44
|
+
#
|
45
|
+
# Benchmarking Vector#where
|
46
|
+
# user system total real
|
47
|
+
# Basic one liner 0.240000 0.000000 0.240000 (0.245787)
|
48
|
+
# Little complex statement 0.100000 0.000000 0.100000 (0.094423)
|
@@ -0,0 +1,28 @@
|
|
1
|
+
$:.unshift File.expand_path("../../lib", __FILE__)
|
2
|
+
|
3
|
+
require 'benchmark'
|
4
|
+
require 'daru'
|
5
|
+
|
6
|
+
df = Daru::DataFrame.new({
|
7
|
+
a: [1,2,3,4,5,6]*100,
|
8
|
+
b: ['a','b','c','d','e','f']*100,
|
9
|
+
c: [11,22,33,44,55,66]*100
|
10
|
+
}, index: (1..600).to_a.shuffle)
|
11
|
+
|
12
|
+
Benchmark.bm do |x|
|
13
|
+
x.report("where") do
|
14
|
+
df.where(df[:a].eq(2) | df[:c].eq(55))
|
15
|
+
end
|
16
|
+
|
17
|
+
x.report("filter_rows") do
|
18
|
+
df.filter(:row) do |r|
|
19
|
+
r[:a] == 2 or r[:c] == 55
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
# ===== Benchmarks =====
|
25
|
+
#
|
26
|
+
# user system total real
|
27
|
+
# where 0.000000 0.000000 0.000000 ( 0.002575)
|
28
|
+
# filter_rows 0.210000 0.000000 0.210000 ( 0.205403)
|
data/daru.gemspec
CHANGED
@@ -5,11 +5,11 @@ require 'daru/version.rb'
|
|
5
5
|
|
6
6
|
Daru::DESCRIPTION = <<MSG
|
7
7
|
Daru (Data Analysis in RUby) is a library for analysis, manipulation and visualization
|
8
|
-
of data.
|
8
|
+
of data. Daru works seamlessly accross interpreters and leverages interpreter-specific
|
9
|
+
optimizations whenever they are available.
|
9
10
|
|
10
|
-
|
11
|
-
|
12
|
-
making working with data super simple and intuitive.
|
11
|
+
It is the default data storage gem for all the statsample gems (glm, timeseries, etc.)
|
12
|
+
and can be used with many others like mixed_models, gnuplotrb, nyaplot and iruby.
|
13
13
|
MSG
|
14
14
|
|
15
15
|
Gem::Specification.new do |spec|
|
@@ -27,16 +27,40 @@ Gem::Specification.new do |spec|
|
|
27
27
|
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
28
28
|
spec.require_paths = ["lib"]
|
29
29
|
|
30
|
+
spec.post_install_message = <<-EOF
|
31
|
+
*************************************************************************
|
32
|
+
Thank you for installing daru!
|
33
|
+
|
34
|
+
oOOOOOo
|
35
|
+
,| oO
|
36
|
+
//| |
|
37
|
+
\\| |
|
38
|
+
`| |
|
39
|
+
`-----`
|
40
|
+
|
41
|
+
|
42
|
+
Hope you love daru! For enhanced interactivity and better visualizations,
|
43
|
+
consider using gnuplotrb and nyaplot with iruby. For statistics use the
|
44
|
+
statsample family.
|
45
|
+
|
46
|
+
Read the README for interesting use cases and examples.
|
47
|
+
|
48
|
+
Cheers!
|
49
|
+
*************************************************************************
|
50
|
+
EOF
|
51
|
+
|
30
52
|
spec.add_runtime_dependency 'reportbuilder', '~> 1.4'
|
31
53
|
spec.add_runtime_dependency 'spreadsheet', '~> 1.0.3'
|
32
54
|
|
33
55
|
spec.add_development_dependency 'bundler', '~> 1.10'
|
34
56
|
spec.add_development_dependency 'rake'
|
57
|
+
spec.add_development_dependency 'pry', '~> 0.10'
|
58
|
+
spec.add_development_dependency 'pry-byebug'
|
35
59
|
spec.add_development_dependency 'rserve-client', '~> 0.3'
|
36
60
|
spec.add_development_dependency 'rspec'
|
37
61
|
spec.add_development_dependency 'awesome_print'
|
38
62
|
spec.add_development_dependency 'nyaplot', '~> 0.1.5'
|
39
63
|
spec.add_development_dependency 'nmatrix', '~> 0.1.0'
|
40
64
|
spec.add_development_dependency 'distribution', '~> 0.7'
|
41
|
-
spec.add_development_dependency 'gsl
|
65
|
+
spec.add_development_dependency 'rb-gsl', '~>1.16'
|
42
66
|
end
|
data/lib/daru.rb
CHANGED
@@ -3,6 +3,31 @@ def jruby?
|
|
3
3
|
end
|
4
4
|
|
5
5
|
module Daru
|
6
|
+
DAYS_OF_WEEK = {
|
7
|
+
'SUN' => 0,
|
8
|
+
'MON' => 1,
|
9
|
+
'TUE' => 2,
|
10
|
+
'WED' => 3,
|
11
|
+
'THU' => 4,
|
12
|
+
'FRI' => 5,
|
13
|
+
'SAT' => 6
|
14
|
+
}
|
15
|
+
|
16
|
+
MONTH_DAYS = {
|
17
|
+
1 => 31,
|
18
|
+
2 => 28,
|
19
|
+
3 => 31,
|
20
|
+
4 => 30,
|
21
|
+
5 => 31,
|
22
|
+
6 => 30,
|
23
|
+
7 => 31,
|
24
|
+
8 => 31,
|
25
|
+
9 => 30,
|
26
|
+
10 => 31,
|
27
|
+
11 => 30,
|
28
|
+
12 => 31
|
29
|
+
}
|
30
|
+
|
6
31
|
SPLIT_TOKEN = ','
|
7
32
|
class << self
|
8
33
|
@@lazy_update = false
|
@@ -42,9 +67,13 @@ require 'reportbuilder'
|
|
42
67
|
|
43
68
|
require 'daru/version.rb'
|
44
69
|
require 'daru/index.rb'
|
45
|
-
require 'daru/multi_index.rb'
|
46
70
|
require 'daru/vector.rb'
|
47
71
|
require 'daru/dataframe.rb'
|
48
72
|
require 'daru/monkeys.rb'
|
49
73
|
|
50
74
|
require 'daru/core/group_by.rb'
|
75
|
+
require 'daru/core/query.rb'
|
76
|
+
require 'daru/core/merge.rb'
|
77
|
+
|
78
|
+
require 'daru/date_time/offsets.rb'
|
79
|
+
require 'daru/date_time/index.rb'
|
@@ -34,6 +34,8 @@ module Daru
|
|
34
34
|
end
|
35
35
|
|
36
36
|
def []= index, value
|
37
|
+
raise ArgumentError, "Index #{index} does not exist" if
|
38
|
+
index > @size and index < @data.size
|
37
39
|
resize if index >= @data.size
|
38
40
|
@size += 1 if index == @size
|
39
41
|
|
@@ -42,7 +44,7 @@ module Daru
|
|
42
44
|
end
|
43
45
|
|
44
46
|
def == other
|
45
|
-
@data == other and @size == other.size
|
47
|
+
@data[0...@size] == other[0...@size] and @size == other.size
|
46
48
|
end
|
47
49
|
|
48
50
|
def delete_at index
|
@@ -59,8 +61,6 @@ module Daru
|
|
59
61
|
def << element
|
60
62
|
resize if @size >= @data.size
|
61
63
|
self[@size] = element
|
62
|
-
|
63
|
-
@size += 1
|
64
64
|
end
|
65
65
|
|
66
66
|
def to_a
|
@@ -74,7 +74,7 @@ module Daru
|
|
74
74
|
def resize size = @size*2
|
75
75
|
raise ArgumentError, "Size must be greater than current size" if size < @size
|
76
76
|
|
77
|
-
@data = NMatrix.new [size], @data.to_a
|
77
|
+
@data = NMatrix.new [size], @data.to_a, dtype: @nm_dtype
|
78
78
|
end
|
79
79
|
|
80
80
|
def mean
|
@@ -90,11 +90,11 @@ module Daru
|
|
90
90
|
end
|
91
91
|
|
92
92
|
def max
|
93
|
-
@data.max
|
93
|
+
@data[0...@size].max
|
94
94
|
end
|
95
95
|
|
96
96
|
def min
|
97
|
-
@data.min
|
97
|
+
@data[0...@size].min
|
98
98
|
end
|
99
99
|
end
|
100
100
|
end
|
data/lib/daru/core/group_by.rb
CHANGED
@@ -4,11 +4,19 @@ module Daru
|
|
4
4
|
|
5
5
|
attr_reader :groups
|
6
6
|
|
7
|
+
# Iterate over each group created by group_by. A DataFrame is yielded in
|
8
|
+
# block.
|
9
|
+
def each_group &block
|
10
|
+
groups.keys.each do |k|
|
11
|
+
yield get_group(k)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
7
15
|
def initialize context, names
|
8
16
|
@groups = {}
|
9
17
|
@non_group_vectors = context.vectors.to_a - names
|
10
18
|
@context = context
|
11
|
-
vectors = names.map { |vec| context
|
19
|
+
vectors = names.map { |vec| context[vec].to_a }
|
12
20
|
tuples = vectors[0].zip(*vectors[1..-1])
|
13
21
|
keys = tuples.uniq.sort
|
14
22
|
|
@@ -18,35 +26,92 @@ module Daru
|
|
18
26
|
@groups.freeze
|
19
27
|
end
|
20
28
|
|
29
|
+
# Get a Daru::Vector of the size of each group.
|
21
30
|
def size
|
22
31
|
index =
|
23
32
|
if multi_indexed_grouping?
|
24
|
-
Daru::MultiIndex.
|
33
|
+
Daru::MultiIndex.from_tuples @groups.keys
|
25
34
|
else
|
26
|
-
Daru::Index.new
|
35
|
+
Daru::Index.new @groups.keys.flatten
|
27
36
|
end
|
28
37
|
|
29
38
|
values = @groups.values.map { |e| e.size }
|
30
39
|
Daru::Vector.new(values, index: index, name: :size)
|
31
40
|
end
|
32
41
|
|
42
|
+
# Get the first group
|
33
43
|
def first
|
34
44
|
head(1)
|
35
45
|
end
|
36
46
|
|
47
|
+
# Get the last group
|
37
48
|
def last
|
38
49
|
tail(1)
|
39
50
|
end
|
40
51
|
|
52
|
+
# Get the top 'n' groups
|
53
|
+
# @param quantity [Fixnum] (5) The number of groups.
|
54
|
+
# @example Usage of head
|
55
|
+
# df = Daru::DataFrame.new({
|
56
|
+
# a: %w{foo bar foo bar foo bar foo foo},
|
57
|
+
# b: %w{one one two three two two one three},
|
58
|
+
# c: [1 ,2 ,3 ,1 ,3 ,6 ,3 ,8],
|
59
|
+
# d: [11 ,22 ,33 ,44 ,55 ,66 ,77 ,88]
|
60
|
+
# })
|
61
|
+
# df.group_by([:a, :b]).head(1)
|
62
|
+
# # =>
|
63
|
+
# # #<Daru::DataFrame:82745170 @name = d7003f75-5eb9-4967-9303-c08dd9160224 @size = 6>
|
64
|
+
# # a b c d
|
65
|
+
# # 1 bar one 2 22
|
66
|
+
# # 3 bar three 1 44
|
67
|
+
# # 5 bar two 6 66
|
68
|
+
# # 0 foo one 1 11
|
69
|
+
# # 7 foo three 8 88
|
70
|
+
# # 2 foo two 3 33
|
41
71
|
def head quantity=5
|
42
72
|
select_groups_from :first, quantity
|
43
73
|
end
|
44
74
|
|
75
|
+
# Get the bottom 'n' groups
|
76
|
+
# @param quantity [Fixnum] (5) The number of groups.
|
77
|
+
# @example Usage of tail
|
78
|
+
# df = Daru::DataFrame.new({
|
79
|
+
# a: %w{foo bar foo bar foo bar foo foo},
|
80
|
+
# b: %w{one one two three two two one three},
|
81
|
+
# c: [1 ,2 ,3 ,1 ,3 ,6 ,3 ,8],
|
82
|
+
# d: [11 ,22 ,33 ,44 ,55 ,66 ,77 ,88]
|
83
|
+
# })
|
84
|
+
# # df.group_by([:a, :b]).tail(1)
|
85
|
+
# # =>
|
86
|
+
# # #<Daru::DataFrame:82378270 @name = 0623db46-5425-41bd-a843-99baac3d1d9a @size = 6>
|
87
|
+
# # a b c d
|
88
|
+
# # 1 bar one 2 22
|
89
|
+
# # 3 bar three 1 44
|
90
|
+
# # 5 bar two 6 66
|
91
|
+
# # 6 foo one 3 77
|
92
|
+
# # 7 foo three 8 88
|
93
|
+
# # 4 foo two 3 55
|
45
94
|
def tail quantity=5
|
46
95
|
select_groups_from :last, quantity
|
47
96
|
end
|
48
97
|
|
49
98
|
# Calculate mean of numeric groups, excluding missing values.
|
99
|
+
# @example Usage of mean
|
100
|
+
# df = Daru::DataFrame.new({
|
101
|
+
# a: %w{foo bar foo bar foo bar foo foo},
|
102
|
+
# b: %w{one one two three two two one three},
|
103
|
+
# c: [1 ,2 ,3 ,1 ,3 ,6 ,3 ,8],
|
104
|
+
# d: [11 ,22 ,33 ,44 ,55 ,66 ,77 ,88]
|
105
|
+
# df.group_by([:a, :b]).mean
|
106
|
+
# # =>
|
107
|
+
# # #<Daru::DataFrame:81097450 @name = 0c32983f-3e06-451f-a9c9-051cadfe7371 @size = 6>
|
108
|
+
# # c d
|
109
|
+
# # ["bar", "one"] 2 22
|
110
|
+
# # ["bar", "three"] 1 44
|
111
|
+
# # ["bar", "two"] 6 66
|
112
|
+
# # ["foo", "one"] 2.0 44.0
|
113
|
+
# # ["foo", "three"] 8 88
|
114
|
+
# # ["foo", "two"] 3.0 44.0
|
50
115
|
def mean
|
51
116
|
apply_method :numeric, :mean
|
52
117
|
end
|
@@ -61,6 +126,24 @@ module Daru
|
|
61
126
|
apply_method :numeric, :sum
|
62
127
|
end
|
63
128
|
|
129
|
+
# Count groups, excludes missing values.
|
130
|
+
# @example Using count
|
131
|
+
# df = Daru::DataFrame.new({
|
132
|
+
# a: %w{foo bar foo bar foo bar foo foo},
|
133
|
+
# b: %w{one one two three two two one three},
|
134
|
+
# c: [1 ,2 ,3 ,1 ,3 ,6 ,3 ,8],
|
135
|
+
# d: [11 ,22 ,33 ,44 ,55 ,66 ,77 ,88]
|
136
|
+
# })
|
137
|
+
# df.group_by([:a, :b]).count
|
138
|
+
# # =>
|
139
|
+
# # #<Daru::DataFrame:76900210 @name = 7b9cf55d-17f8-48c7-b03a-2586c6e5ec5a @size = 6>
|
140
|
+
# # c d
|
141
|
+
# # ["bar", "one"] 1 1
|
142
|
+
# # ["bar", "two"] 1 1
|
143
|
+
# # ["bar", "three"] 1 1
|
144
|
+
# # ["foo", "one"] 2 2
|
145
|
+
# # ["foo", "three"] 1 1
|
146
|
+
# # ["foo", "two"] 2 2
|
64
147
|
def count
|
65
148
|
width = @non_group_vectors.size
|
66
149
|
Daru::DataFrame.new([size]*width, order: @non_group_vectors)
|
@@ -83,6 +166,21 @@ module Daru
|
|
83
166
|
end
|
84
167
|
|
85
168
|
# Returns one of the selected groups as a DataFrame.
|
169
|
+
# @param group [Array] The group that is to be selected from those grouped.
|
170
|
+
#
|
171
|
+
# @example Getting a group
|
172
|
+
#
|
173
|
+
# df = Daru::DataFrame.new({
|
174
|
+
# a: %w{foo bar foo bar foo bar foo foo},
|
175
|
+
# b: %w{one one two three two two one three},
|
176
|
+
# c: [1 ,2 ,3 ,1 ,3 ,6 ,3 ,8],
|
177
|
+
# d: [11 ,22 ,33 ,44 ,55 ,66 ,77 ,88]
|
178
|
+
# })
|
179
|
+
# df.group_by([:a, :b]).get_group ['bar','two']
|
180
|
+
# #=>
|
181
|
+
# ##<Daru::DataFrame:83258980 @name = 687ee3f6-8874-4899-97fa-9b31d84fa1d5 @size = 1>
|
182
|
+
# # a b c d
|
183
|
+
# # 5 bar two 6 66
|
86
184
|
def get_group group
|
87
185
|
indexes = @groups[group]
|
88
186
|
elements = []
|
@@ -96,7 +194,8 @@ module Daru
|
|
96
194
|
indexes.each do |idx|
|
97
195
|
rows << transpose[idx]
|
98
196
|
end
|
99
|
-
Daru::DataFrame.rows(
|
197
|
+
Daru::DataFrame.rows(
|
198
|
+
rows, index: @context.index[indexes], order: @context.vectors)
|
100
199
|
end
|
101
200
|
|
102
201
|
private
|
@@ -123,28 +222,24 @@ module Daru
|
|
123
222
|
@groups.each do |group, indexes|
|
124
223
|
single_row = []
|
125
224
|
@non_group_vectors.each do |ngvector|
|
126
|
-
|
127
|
-
if method_type == :numeric and
|
128
|
-
slice =
|
129
|
-
|
225
|
+
vec = @context[ngvector]
|
226
|
+
if method_type == :numeric and vec.type == :numeric
|
227
|
+
slice = vec[*indexes]
|
130
228
|
single_row << (slice.is_a?(Numeric) ? slice : slice.send(method))
|
131
|
-
order << ngvector
|
132
229
|
end
|
133
230
|
end
|
134
231
|
|
135
232
|
rows << single_row
|
136
233
|
end
|
137
234
|
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
order =
|
142
|
-
if order.all?{ |e| e.is_a?(Array) }
|
143
|
-
Daru::MultiIndex.new(order)
|
144
|
-
else
|
145
|
-
Daru::Index.new(order)
|
235
|
+
@non_group_vectors.each do |ngvec|
|
236
|
+
order << ngvec if
|
237
|
+
(method_type == :numeric and @context[ngvec].type == :numeric)
|
146
238
|
end
|
147
239
|
|
240
|
+
index = @groups.keys
|
241
|
+
index = multi_index ? Daru::MultiIndex.from_tuples(index) : Daru::Index.new(index.flatten)
|
242
|
+
order = Daru::Index.new(order)
|
148
243
|
Daru::DataFrame.new(rows.transpose, index: index, order: order)
|
149
244
|
end
|
150
245
|
|
@@ -160,20 +255,6 @@ module Daru
|
|
160
255
|
indexes
|
161
256
|
end
|
162
257
|
|
163
|
-
def symbolize arry
|
164
|
-
symbolized_arry =
|
165
|
-
if arry.all? { |e| e.is_a?(Array) }
|
166
|
-
arry.map do |sub_arry|
|
167
|
-
sub_arry.map do |e|
|
168
|
-
e.is_a?(Numeric) ? e : e.to_sym
|
169
|
-
end
|
170
|
-
end
|
171
|
-
else
|
172
|
-
arry.map { |e| e.is_a?(Numeric) ? e : e.to_sym }
|
173
|
-
end
|
174
|
-
symbolized_arry
|
175
|
-
end
|
176
|
-
|
177
258
|
def multi_indexed_grouping?
|
178
259
|
@groups.keys[0][1] ? true : false
|
179
260
|
end
|