daru 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.build.sh +6 -6
- data/.gitignore +2 -0
- data/CONTRIBUTING.md +7 -3
- data/History.md +36 -0
- data/README.md +21 -13
- data/Rakefile +16 -1
- data/benchmarks/TradeoffData.csv +65 -0
- data/benchmarks/dataframe_creation.rb +39 -0
- data/benchmarks/group_by.rb +32 -0
- data/benchmarks/row_access.rb +41 -0
- data/benchmarks/row_assign.rb +36 -0
- data/benchmarks/sorting.rb +44 -0
- data/benchmarks/vector_access.rb +31 -0
- data/benchmarks/vector_assign.rb +42 -0
- data/benchmarks/where_clause.rb +48 -0
- data/benchmarks/where_vs_filter.rb +28 -0
- data/daru.gemspec +29 -5
- data/lib/daru.rb +30 -1
- data/lib/daru/accessors/array_wrapper.rb +2 -2
- data/lib/daru/accessors/nmatrix_wrapper.rb +6 -6
- data/lib/daru/core/group_by.rb +112 -31
- data/lib/daru/core/merge.rb +170 -0
- data/lib/daru/core/query.rb +95 -0
- data/lib/daru/dataframe.rb +335 -223
- data/lib/daru/date_time/index.rb +550 -0
- data/lib/daru/date_time/offsets.rb +397 -0
- data/lib/daru/index.rb +266 -54
- data/lib/daru/io/io.rb +1 -2
- data/lib/daru/maths/arithmetic/dataframe.rb +2 -2
- data/lib/daru/maths/arithmetic/vector.rb +2 -2
- data/lib/daru/maths/statistics/dataframe.rb +58 -8
- data/lib/daru/maths/statistics/vector.rb +229 -0
- data/lib/daru/vector.rb +230 -80
- data/lib/daru/version.rb +1 -1
- data/spec/core/group_by_spec.rb +16 -16
- data/spec/core/merge_spec.rb +52 -0
- data/spec/core/query_spec.rb +171 -0
- data/spec/dataframe_spec.rb +278 -280
- data/spec/date_time/data_spec.rb +199 -0
- data/spec/date_time/index_spec.rb +433 -0
- data/spec/date_time/offsets_spec.rb +371 -0
- data/spec/fixtures/stock_data.csv +500 -0
- data/spec/index_spec.rb +317 -11
- data/spec/io/io_spec.rb +18 -17
- data/spec/math/arithmetic/dataframe_spec.rb +3 -3
- data/spec/math/statistics/dataframe_spec.rb +39 -1
- data/spec/math/statistics/vector_spec.rb +163 -1
- data/spec/monkeys_spec.rb +4 -0
- data/spec/spec_helper.rb +3 -0
- data/spec/vector_spec.rb +125 -60
- metadata +71 -14
- data/lib/daru/accessors/dataframe_by_vector.rb +0 -17
- data/lib/daru/multi_index.rb +0 -216
- data/spec/multi_index_spec.rb +0 -216
@@ -0,0 +1,31 @@
|
|
1
|
+
$:.unshift File.expand_path("../../lib", __FILE__)
|
2
|
+
|
3
|
+
require 'benchmark'
|
4
|
+
require 'daru'
|
5
|
+
|
6
|
+
df = Daru::DataFrame.new({
|
7
|
+
a: 10000.times.map { rand },
|
8
|
+
b: 10000.times.map { rand },
|
9
|
+
c: 10000.times.map { rand }
|
10
|
+
})
|
11
|
+
|
12
|
+
Benchmark.bm do |x|
|
13
|
+
x.report("Single Vector access") do
|
14
|
+
df[:a]
|
15
|
+
end
|
16
|
+
|
17
|
+
x.report("Access as range") do
|
18
|
+
df[:a..:c]
|
19
|
+
end
|
20
|
+
|
21
|
+
x.report("Access with commas") do
|
22
|
+
df[:a, :c]
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
# ======== Benchmarks =======
|
27
|
+
#
|
28
|
+
# user system total real
|
29
|
+
# Single Vector access 0.000000 0.000000 0.000000 ( 0.000012)
|
30
|
+
# Access as range 0.090000 0.000000 0.090000 ( 0.084584)
|
31
|
+
# Access with commas 0.050000 0.000000 0.050000 ( 0.051951)
|
@@ -0,0 +1,42 @@
|
|
1
|
+
$:.unshift File.expand_path("../../lib", __FILE__)
|
2
|
+
|
3
|
+
require 'benchmark'
|
4
|
+
require 'daru'
|
5
|
+
|
6
|
+
df = Daru::DataFrame.new({
|
7
|
+
a: 100000.times.map { rand },
|
8
|
+
b: 100000.times.map { rand },
|
9
|
+
c: 100000.times.map { rand }
|
10
|
+
})
|
11
|
+
|
12
|
+
index = Daru::Index.new((0...100000).to_a.shuffle)
|
13
|
+
|
14
|
+
Benchmark.bm do |x|
|
15
|
+
x.report("Assign new vector as Array") do
|
16
|
+
df[:d] = 100000.times.map { rand }
|
17
|
+
end
|
18
|
+
|
19
|
+
x.report("Reassign same vector as Array") do
|
20
|
+
df[:a] = 100000.times.map { rand }
|
21
|
+
end
|
22
|
+
|
23
|
+
x.report("Assign new Vector as Daru::Vector") do
|
24
|
+
df[:e] = Daru::Vector.new(100000.times.map { rand })
|
25
|
+
end
|
26
|
+
|
27
|
+
x.report("Reassign same Vector as Daru::Vector") do
|
28
|
+
df[:b] = Daru::Vector.new(100000.times.map { rand })
|
29
|
+
end
|
30
|
+
|
31
|
+
x.report("Reassgin differently indexed Daru::Vector") do
|
32
|
+
df[:b] = Daru::Vector.new(100000.times.map { rand }, index: index)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
# ===== Benchmarks =====
|
37
|
+
# user system total real
|
38
|
+
# Assign new vector as Array 0.370000 0.000000 0.370000 (0.364515)
|
39
|
+
# Reassign same vector as Array 0.470000 0.000000 0.470000 (0.471408)
|
40
|
+
# Assign new Vector as Daru::Vector 0.940000 0.000000 0.940000 (0.947879)
|
41
|
+
# Reassign same Vector as Daru::Vector 0.760000 0.020000 0.780000 (0.769969)
|
42
|
+
# Reassgin differently indexed Daru::Vector <Too embarassingly slow.>
|
@@ -0,0 +1,48 @@
|
|
1
|
+
$:.unshift File.expand_path("../../lib", __FILE__)
|
2
|
+
|
3
|
+
require 'benchmark'
|
4
|
+
require 'daru'
|
5
|
+
|
6
|
+
df = Daru::DataFrame.new({
|
7
|
+
a: 100000.times.map { |i| i },
|
8
|
+
b: 100000.times.map { |i| i },
|
9
|
+
c: 100000.times.map { |i| i }
|
10
|
+
}, index: Daru::Index.new(100000.times.map.to_a.shuffle))
|
11
|
+
|
12
|
+
puts "Benchmarking DataFrame#where\n"
|
13
|
+
Benchmark.bm do |x|
|
14
|
+
x.report("Basic one liner") do
|
15
|
+
df.where(df[:a].mt(2341))
|
16
|
+
end
|
17
|
+
|
18
|
+
x.report("Little complex statement") do
|
19
|
+
df.where(df[:a].lt(235) | df[:b].eq(2341) | df[:c].in([35,355,22]))
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
puts "Benchmarking Vector#where\n"
|
24
|
+
v = Daru::Vector.new(
|
25
|
+
100000.times.map { |i| i }, index: 100000.times.map.to_a.shuffle)
|
26
|
+
|
27
|
+
Benchmark.bm do |x|
|
28
|
+
x.report("Basic one liner") do
|
29
|
+
v.where(v.mteq(1000))
|
30
|
+
end
|
31
|
+
|
32
|
+
x.report("Little complex statement") do
|
33
|
+
v.where(v.lt(235) & v.eq(2341) | v.in([23,511,55]))
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
# ====== Benchmarks ======
|
38
|
+
#
|
39
|
+
# Benchmarking DataFrame#where
|
40
|
+
#
|
41
|
+
# user system total real
|
42
|
+
# Basic one liner 0.700000 0.000000 0.700000 (0.703532)
|
43
|
+
# Little complex statement 0.120000 0.000000 0.120000 (0.121765)
|
44
|
+
#
|
45
|
+
# Benchmarking Vector#where
|
46
|
+
# user system total real
|
47
|
+
# Basic one liner 0.240000 0.000000 0.240000 (0.245787)
|
48
|
+
# Little complex statement 0.100000 0.000000 0.100000 (0.094423)
|
@@ -0,0 +1,28 @@
|
|
1
|
+
$:.unshift File.expand_path("../../lib", __FILE__)
|
2
|
+
|
3
|
+
require 'benchmark'
|
4
|
+
require 'daru'
|
5
|
+
|
6
|
+
df = Daru::DataFrame.new({
|
7
|
+
a: [1,2,3,4,5,6]*100,
|
8
|
+
b: ['a','b','c','d','e','f']*100,
|
9
|
+
c: [11,22,33,44,55,66]*100
|
10
|
+
}, index: (1..600).to_a.shuffle)
|
11
|
+
|
12
|
+
Benchmark.bm do |x|
|
13
|
+
x.report("where") do
|
14
|
+
df.where(df[:a].eq(2) | df[:c].eq(55))
|
15
|
+
end
|
16
|
+
|
17
|
+
x.report("filter_rows") do
|
18
|
+
df.filter(:row) do |r|
|
19
|
+
r[:a] == 2 or r[:c] == 55
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
# ===== Benchmarks =====
|
25
|
+
#
|
26
|
+
# user system total real
|
27
|
+
# where 0.000000 0.000000 0.000000 ( 0.002575)
|
28
|
+
# filter_rows 0.210000 0.000000 0.210000 ( 0.205403)
|
data/daru.gemspec
CHANGED
@@ -5,11 +5,11 @@ require 'daru/version.rb'
|
|
5
5
|
|
6
6
|
Daru::DESCRIPTION = <<MSG
|
7
7
|
Daru (Data Analysis in RUby) is a library for analysis, manipulation and visualization
|
8
|
-
of data.
|
8
|
+
of data. Daru works seamlessly accross interpreters and leverages interpreter-specific
|
9
|
+
optimizations whenever they are available.
|
9
10
|
|
10
|
-
|
11
|
-
|
12
|
-
making working with data super simple and intuitive.
|
11
|
+
It is the default data storage gem for all the statsample gems (glm, timeseries, etc.)
|
12
|
+
and can be used with many others like mixed_models, gnuplotrb, nyaplot and iruby.
|
13
13
|
MSG
|
14
14
|
|
15
15
|
Gem::Specification.new do |spec|
|
@@ -27,16 +27,40 @@ Gem::Specification.new do |spec|
|
|
27
27
|
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
28
28
|
spec.require_paths = ["lib"]
|
29
29
|
|
30
|
+
spec.post_install_message = <<-EOF
|
31
|
+
*************************************************************************
|
32
|
+
Thank you for installing daru!
|
33
|
+
|
34
|
+
oOOOOOo
|
35
|
+
,| oO
|
36
|
+
//| |
|
37
|
+
\\| |
|
38
|
+
`| |
|
39
|
+
`-----`
|
40
|
+
|
41
|
+
|
42
|
+
Hope you love daru! For enhanced interactivity and better visualizations,
|
43
|
+
consider using gnuplotrb and nyaplot with iruby. For statistics use the
|
44
|
+
statsample family.
|
45
|
+
|
46
|
+
Read the README for interesting use cases and examples.
|
47
|
+
|
48
|
+
Cheers!
|
49
|
+
*************************************************************************
|
50
|
+
EOF
|
51
|
+
|
30
52
|
spec.add_runtime_dependency 'reportbuilder', '~> 1.4'
|
31
53
|
spec.add_runtime_dependency 'spreadsheet', '~> 1.0.3'
|
32
54
|
|
33
55
|
spec.add_development_dependency 'bundler', '~> 1.10'
|
34
56
|
spec.add_development_dependency 'rake'
|
57
|
+
spec.add_development_dependency 'pry', '~> 0.10'
|
58
|
+
spec.add_development_dependency 'pry-byebug'
|
35
59
|
spec.add_development_dependency 'rserve-client', '~> 0.3'
|
36
60
|
spec.add_development_dependency 'rspec'
|
37
61
|
spec.add_development_dependency 'awesome_print'
|
38
62
|
spec.add_development_dependency 'nyaplot', '~> 0.1.5'
|
39
63
|
spec.add_development_dependency 'nmatrix', '~> 0.1.0'
|
40
64
|
spec.add_development_dependency 'distribution', '~> 0.7'
|
41
|
-
spec.add_development_dependency 'gsl
|
65
|
+
spec.add_development_dependency 'rb-gsl', '~>1.16'
|
42
66
|
end
|
data/lib/daru.rb
CHANGED
@@ -3,6 +3,31 @@ def jruby?
|
|
3
3
|
end
|
4
4
|
|
5
5
|
module Daru
|
6
|
+
DAYS_OF_WEEK = {
|
7
|
+
'SUN' => 0,
|
8
|
+
'MON' => 1,
|
9
|
+
'TUE' => 2,
|
10
|
+
'WED' => 3,
|
11
|
+
'THU' => 4,
|
12
|
+
'FRI' => 5,
|
13
|
+
'SAT' => 6
|
14
|
+
}
|
15
|
+
|
16
|
+
MONTH_DAYS = {
|
17
|
+
1 => 31,
|
18
|
+
2 => 28,
|
19
|
+
3 => 31,
|
20
|
+
4 => 30,
|
21
|
+
5 => 31,
|
22
|
+
6 => 30,
|
23
|
+
7 => 31,
|
24
|
+
8 => 31,
|
25
|
+
9 => 30,
|
26
|
+
10 => 31,
|
27
|
+
11 => 30,
|
28
|
+
12 => 31
|
29
|
+
}
|
30
|
+
|
6
31
|
SPLIT_TOKEN = ','
|
7
32
|
class << self
|
8
33
|
@@lazy_update = false
|
@@ -42,9 +67,13 @@ require 'reportbuilder'
|
|
42
67
|
|
43
68
|
require 'daru/version.rb'
|
44
69
|
require 'daru/index.rb'
|
45
|
-
require 'daru/multi_index.rb'
|
46
70
|
require 'daru/vector.rb'
|
47
71
|
require 'daru/dataframe.rb'
|
48
72
|
require 'daru/monkeys.rb'
|
49
73
|
|
50
74
|
require 'daru/core/group_by.rb'
|
75
|
+
require 'daru/core/query.rb'
|
76
|
+
require 'daru/core/merge.rb'
|
77
|
+
|
78
|
+
require 'daru/date_time/offsets.rb'
|
79
|
+
require 'daru/date_time/index.rb'
|
@@ -34,6 +34,8 @@ module Daru
|
|
34
34
|
end
|
35
35
|
|
36
36
|
def []= index, value
|
37
|
+
raise ArgumentError, "Index #{index} does not exist" if
|
38
|
+
index > @size and index < @data.size
|
37
39
|
resize if index >= @data.size
|
38
40
|
@size += 1 if index == @size
|
39
41
|
|
@@ -42,7 +44,7 @@ module Daru
|
|
42
44
|
end
|
43
45
|
|
44
46
|
def == other
|
45
|
-
@data == other and @size == other.size
|
47
|
+
@data[0...@size] == other[0...@size] and @size == other.size
|
46
48
|
end
|
47
49
|
|
48
50
|
def delete_at index
|
@@ -59,8 +61,6 @@ module Daru
|
|
59
61
|
def << element
|
60
62
|
resize if @size >= @data.size
|
61
63
|
self[@size] = element
|
62
|
-
|
63
|
-
@size += 1
|
64
64
|
end
|
65
65
|
|
66
66
|
def to_a
|
@@ -74,7 +74,7 @@ module Daru
|
|
74
74
|
def resize size = @size*2
|
75
75
|
raise ArgumentError, "Size must be greater than current size" if size < @size
|
76
76
|
|
77
|
-
@data = NMatrix.new [size], @data.to_a
|
77
|
+
@data = NMatrix.new [size], @data.to_a, dtype: @nm_dtype
|
78
78
|
end
|
79
79
|
|
80
80
|
def mean
|
@@ -90,11 +90,11 @@ module Daru
|
|
90
90
|
end
|
91
91
|
|
92
92
|
def max
|
93
|
-
@data.max
|
93
|
+
@data[0...@size].max
|
94
94
|
end
|
95
95
|
|
96
96
|
def min
|
97
|
-
@data.min
|
97
|
+
@data[0...@size].min
|
98
98
|
end
|
99
99
|
end
|
100
100
|
end
|
data/lib/daru/core/group_by.rb
CHANGED
@@ -4,11 +4,19 @@ module Daru
|
|
4
4
|
|
5
5
|
attr_reader :groups
|
6
6
|
|
7
|
+
# Iterate over each group created by group_by. A DataFrame is yielded in
|
8
|
+
# block.
|
9
|
+
def each_group &block
|
10
|
+
groups.keys.each do |k|
|
11
|
+
yield get_group(k)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
7
15
|
def initialize context, names
|
8
16
|
@groups = {}
|
9
17
|
@non_group_vectors = context.vectors.to_a - names
|
10
18
|
@context = context
|
11
|
-
vectors = names.map { |vec| context
|
19
|
+
vectors = names.map { |vec| context[vec].to_a }
|
12
20
|
tuples = vectors[0].zip(*vectors[1..-1])
|
13
21
|
keys = tuples.uniq.sort
|
14
22
|
|
@@ -18,35 +26,92 @@ module Daru
|
|
18
26
|
@groups.freeze
|
19
27
|
end
|
20
28
|
|
29
|
+
# Get a Daru::Vector of the size of each group.
|
21
30
|
def size
|
22
31
|
index =
|
23
32
|
if multi_indexed_grouping?
|
24
|
-
Daru::MultiIndex.
|
33
|
+
Daru::MultiIndex.from_tuples @groups.keys
|
25
34
|
else
|
26
|
-
Daru::Index.new
|
35
|
+
Daru::Index.new @groups.keys.flatten
|
27
36
|
end
|
28
37
|
|
29
38
|
values = @groups.values.map { |e| e.size }
|
30
39
|
Daru::Vector.new(values, index: index, name: :size)
|
31
40
|
end
|
32
41
|
|
42
|
+
# Get the first group
|
33
43
|
def first
|
34
44
|
head(1)
|
35
45
|
end
|
36
46
|
|
47
|
+
# Get the last group
|
37
48
|
def last
|
38
49
|
tail(1)
|
39
50
|
end
|
40
51
|
|
52
|
+
# Get the top 'n' groups
|
53
|
+
# @param quantity [Fixnum] (5) The number of groups.
|
54
|
+
# @example Usage of head
|
55
|
+
# df = Daru::DataFrame.new({
|
56
|
+
# a: %w{foo bar foo bar foo bar foo foo},
|
57
|
+
# b: %w{one one two three two two one three},
|
58
|
+
# c: [1 ,2 ,3 ,1 ,3 ,6 ,3 ,8],
|
59
|
+
# d: [11 ,22 ,33 ,44 ,55 ,66 ,77 ,88]
|
60
|
+
# })
|
61
|
+
# df.group_by([:a, :b]).head(1)
|
62
|
+
# # =>
|
63
|
+
# # #<Daru::DataFrame:82745170 @name = d7003f75-5eb9-4967-9303-c08dd9160224 @size = 6>
|
64
|
+
# # a b c d
|
65
|
+
# # 1 bar one 2 22
|
66
|
+
# # 3 bar three 1 44
|
67
|
+
# # 5 bar two 6 66
|
68
|
+
# # 0 foo one 1 11
|
69
|
+
# # 7 foo three 8 88
|
70
|
+
# # 2 foo two 3 33
|
41
71
|
def head quantity=5
|
42
72
|
select_groups_from :first, quantity
|
43
73
|
end
|
44
74
|
|
75
|
+
# Get the bottom 'n' groups
|
76
|
+
# @param quantity [Fixnum] (5) The number of groups.
|
77
|
+
# @example Usage of tail
|
78
|
+
# df = Daru::DataFrame.new({
|
79
|
+
# a: %w{foo bar foo bar foo bar foo foo},
|
80
|
+
# b: %w{one one two three two two one three},
|
81
|
+
# c: [1 ,2 ,3 ,1 ,3 ,6 ,3 ,8],
|
82
|
+
# d: [11 ,22 ,33 ,44 ,55 ,66 ,77 ,88]
|
83
|
+
# })
|
84
|
+
# # df.group_by([:a, :b]).tail(1)
|
85
|
+
# # =>
|
86
|
+
# # #<Daru::DataFrame:82378270 @name = 0623db46-5425-41bd-a843-99baac3d1d9a @size = 6>
|
87
|
+
# # a b c d
|
88
|
+
# # 1 bar one 2 22
|
89
|
+
# # 3 bar three 1 44
|
90
|
+
# # 5 bar two 6 66
|
91
|
+
# # 6 foo one 3 77
|
92
|
+
# # 7 foo three 8 88
|
93
|
+
# # 4 foo two 3 55
|
45
94
|
def tail quantity=5
|
46
95
|
select_groups_from :last, quantity
|
47
96
|
end
|
48
97
|
|
49
98
|
# Calculate mean of numeric groups, excluding missing values.
|
99
|
+
# @example Usage of mean
|
100
|
+
# df = Daru::DataFrame.new({
|
101
|
+
# a: %w{foo bar foo bar foo bar foo foo},
|
102
|
+
# b: %w{one one two three two two one three},
|
103
|
+
# c: [1 ,2 ,3 ,1 ,3 ,6 ,3 ,8],
|
104
|
+
# d: [11 ,22 ,33 ,44 ,55 ,66 ,77 ,88]
|
105
|
+
# df.group_by([:a, :b]).mean
|
106
|
+
# # =>
|
107
|
+
# # #<Daru::DataFrame:81097450 @name = 0c32983f-3e06-451f-a9c9-051cadfe7371 @size = 6>
|
108
|
+
# # c d
|
109
|
+
# # ["bar", "one"] 2 22
|
110
|
+
# # ["bar", "three"] 1 44
|
111
|
+
# # ["bar", "two"] 6 66
|
112
|
+
# # ["foo", "one"] 2.0 44.0
|
113
|
+
# # ["foo", "three"] 8 88
|
114
|
+
# # ["foo", "two"] 3.0 44.0
|
50
115
|
def mean
|
51
116
|
apply_method :numeric, :mean
|
52
117
|
end
|
@@ -61,6 +126,24 @@ module Daru
|
|
61
126
|
apply_method :numeric, :sum
|
62
127
|
end
|
63
128
|
|
129
|
+
# Count groups, excludes missing values.
|
130
|
+
# @example Using count
|
131
|
+
# df = Daru::DataFrame.new({
|
132
|
+
# a: %w{foo bar foo bar foo bar foo foo},
|
133
|
+
# b: %w{one one two three two two one three},
|
134
|
+
# c: [1 ,2 ,3 ,1 ,3 ,6 ,3 ,8],
|
135
|
+
# d: [11 ,22 ,33 ,44 ,55 ,66 ,77 ,88]
|
136
|
+
# })
|
137
|
+
# df.group_by([:a, :b]).count
|
138
|
+
# # =>
|
139
|
+
# # #<Daru::DataFrame:76900210 @name = 7b9cf55d-17f8-48c7-b03a-2586c6e5ec5a @size = 6>
|
140
|
+
# # c d
|
141
|
+
# # ["bar", "one"] 1 1
|
142
|
+
# # ["bar", "two"] 1 1
|
143
|
+
# # ["bar", "three"] 1 1
|
144
|
+
# # ["foo", "one"] 2 2
|
145
|
+
# # ["foo", "three"] 1 1
|
146
|
+
# # ["foo", "two"] 2 2
|
64
147
|
def count
|
65
148
|
width = @non_group_vectors.size
|
66
149
|
Daru::DataFrame.new([size]*width, order: @non_group_vectors)
|
@@ -83,6 +166,21 @@ module Daru
|
|
83
166
|
end
|
84
167
|
|
85
168
|
# Returns one of the selected groups as a DataFrame.
|
169
|
+
# @param group [Array] The group that is to be selected from those grouped.
|
170
|
+
#
|
171
|
+
# @example Getting a group
|
172
|
+
#
|
173
|
+
# df = Daru::DataFrame.new({
|
174
|
+
# a: %w{foo bar foo bar foo bar foo foo},
|
175
|
+
# b: %w{one one two three two two one three},
|
176
|
+
# c: [1 ,2 ,3 ,1 ,3 ,6 ,3 ,8],
|
177
|
+
# d: [11 ,22 ,33 ,44 ,55 ,66 ,77 ,88]
|
178
|
+
# })
|
179
|
+
# df.group_by([:a, :b]).get_group ['bar','two']
|
180
|
+
# #=>
|
181
|
+
# ##<Daru::DataFrame:83258980 @name = 687ee3f6-8874-4899-97fa-9b31d84fa1d5 @size = 1>
|
182
|
+
# # a b c d
|
183
|
+
# # 5 bar two 6 66
|
86
184
|
def get_group group
|
87
185
|
indexes = @groups[group]
|
88
186
|
elements = []
|
@@ -96,7 +194,8 @@ module Daru
|
|
96
194
|
indexes.each do |idx|
|
97
195
|
rows << transpose[idx]
|
98
196
|
end
|
99
|
-
Daru::DataFrame.rows(
|
197
|
+
Daru::DataFrame.rows(
|
198
|
+
rows, index: @context.index[indexes], order: @context.vectors)
|
100
199
|
end
|
101
200
|
|
102
201
|
private
|
@@ -123,28 +222,24 @@ module Daru
|
|
123
222
|
@groups.each do |group, indexes|
|
124
223
|
single_row = []
|
125
224
|
@non_group_vectors.each do |ngvector|
|
126
|
-
|
127
|
-
if method_type == :numeric and
|
128
|
-
slice =
|
129
|
-
|
225
|
+
vec = @context[ngvector]
|
226
|
+
if method_type == :numeric and vec.type == :numeric
|
227
|
+
slice = vec[*indexes]
|
130
228
|
single_row << (slice.is_a?(Numeric) ? slice : slice.send(method))
|
131
|
-
order << ngvector
|
132
229
|
end
|
133
230
|
end
|
134
231
|
|
135
232
|
rows << single_row
|
136
233
|
end
|
137
234
|
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
order =
|
142
|
-
if order.all?{ |e| e.is_a?(Array) }
|
143
|
-
Daru::MultiIndex.new(order)
|
144
|
-
else
|
145
|
-
Daru::Index.new(order)
|
235
|
+
@non_group_vectors.each do |ngvec|
|
236
|
+
order << ngvec if
|
237
|
+
(method_type == :numeric and @context[ngvec].type == :numeric)
|
146
238
|
end
|
147
239
|
|
240
|
+
index = @groups.keys
|
241
|
+
index = multi_index ? Daru::MultiIndex.from_tuples(index) : Daru::Index.new(index.flatten)
|
242
|
+
order = Daru::Index.new(order)
|
148
243
|
Daru::DataFrame.new(rows.transpose, index: index, order: order)
|
149
244
|
end
|
150
245
|
|
@@ -160,20 +255,6 @@ module Daru
|
|
160
255
|
indexes
|
161
256
|
end
|
162
257
|
|
163
|
-
def symbolize arry
|
164
|
-
symbolized_arry =
|
165
|
-
if arry.all? { |e| e.is_a?(Array) }
|
166
|
-
arry.map do |sub_arry|
|
167
|
-
sub_arry.map do |e|
|
168
|
-
e.is_a?(Numeric) ? e : e.to_sym
|
169
|
-
end
|
170
|
-
end
|
171
|
-
else
|
172
|
-
arry.map { |e| e.is_a?(Numeric) ? e : e.to_sym }
|
173
|
-
end
|
174
|
-
symbolized_arry
|
175
|
-
end
|
176
|
-
|
177
258
|
def multi_indexed_grouping?
|
178
259
|
@groups.keys[0][1] ? true : false
|
179
260
|
end
|