daru_lite 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +35 -33
- data/lib/daru_lite/data_frame/aggregatable.rb +165 -0
- data/lib/daru_lite/data_frame/calculatable.rb +140 -0
- data/lib/daru_lite/data_frame/convertible.rb +107 -0
- data/lib/daru_lite/data_frame/duplicatable.rb +64 -0
- data/lib/daru_lite/data_frame/fetchable.rb +301 -0
- data/lib/daru_lite/data_frame/filterable.rb +144 -0
- data/lib/daru_lite/data_frame/i_o_able.rb +179 -0
- data/lib/daru_lite/data_frame/indexable.rb +168 -0
- data/lib/daru_lite/data_frame/iterable.rb +339 -0
- data/lib/daru_lite/data_frame/joinable.rb +152 -0
- data/lib/daru_lite/data_frame/missable.rb +75 -0
- data/lib/daru_lite/data_frame/pivotable.rb +108 -0
- data/lib/daru_lite/data_frame/queryable.rb +67 -0
- data/lib/daru_lite/data_frame/setable.rb +109 -0
- data/lib/daru_lite/data_frame/sortable.rb +241 -0
- data/lib/daru_lite/dataframe.rb +138 -2353
- data/lib/daru_lite/index/index.rb +13 -0
- data/lib/daru_lite/maths/statistics/vector.rb +1 -1
- data/lib/daru_lite/vector/aggregatable.rb +9 -0
- data/lib/daru_lite/vector/calculatable.rb +78 -0
- data/lib/daru_lite/vector/convertible.rb +77 -0
- data/lib/daru_lite/vector/duplicatable.rb +17 -0
- data/lib/daru_lite/vector/fetchable.rb +175 -0
- data/lib/daru_lite/vector/filterable.rb +128 -0
- data/lib/daru_lite/vector/indexable.rb +77 -0
- data/lib/daru_lite/vector/iterable.rb +95 -0
- data/lib/daru_lite/vector/joinable.rb +17 -0
- data/lib/daru_lite/vector/missable.rb +124 -0
- data/lib/daru_lite/vector/queryable.rb +45 -0
- data/lib/daru_lite/vector/setable.rb +47 -0
- data/lib/daru_lite/vector/sortable.rb +113 -0
- data/lib/daru_lite/vector.rb +36 -932
- data/lib/daru_lite/version.rb +1 -1
- data/spec/data_frame/aggregatable_example.rb +65 -0
- data/spec/data_frame/buildable_example.rb +109 -0
- data/spec/data_frame/calculatable_example.rb +135 -0
- data/spec/data_frame/convertible_example.rb +180 -0
- data/spec/data_frame/duplicatable_example.rb +111 -0
- data/spec/data_frame/fetchable_example.rb +476 -0
- data/spec/data_frame/filterable_example.rb +250 -0
- data/spec/data_frame/indexable_example.rb +221 -0
- data/spec/data_frame/iterable_example.rb +465 -0
- data/spec/data_frame/joinable_example.rb +106 -0
- data/spec/data_frame/missable_example.rb +47 -0
- data/spec/data_frame/pivotable_example.rb +297 -0
- data/spec/data_frame/queryable_example.rb +92 -0
- data/spec/data_frame/setable_example.rb +482 -0
- data/spec/data_frame/sortable_example.rb +350 -0
- data/spec/dataframe_spec.rb +181 -3289
- data/spec/index/index_spec.rb +8 -0
- data/spec/vector/aggregatable_example.rb +27 -0
- data/spec/vector/calculatable_example.rb +82 -0
- data/spec/vector/convertible_example.rb +126 -0
- data/spec/vector/duplicatable_example.rb +48 -0
- data/spec/vector/fetchable_example.rb +463 -0
- data/spec/vector/filterable_example.rb +165 -0
- data/spec/vector/indexable_example.rb +201 -0
- data/spec/vector/iterable_example.rb +111 -0
- data/spec/vector/joinable_example.rb +25 -0
- data/spec/vector/missable_example.rb +88 -0
- data/spec/vector/queryable_example.rb +91 -0
- data/spec/vector/setable_example.rb +300 -0
- data/spec/vector/sortable_example.rb +242 -0
- data/spec/vector_spec.rb +111 -1805
- metadata +86 -2
data/lib/daru_lite/version.rb
CHANGED
@@ -0,0 +1,65 @@
|
|
1
|
+
shared_examples_for 'an aggregatable DataFrame' do
|
2
|
+
describe "#group_by" do
|
3
|
+
context "on a single row DataFrame" do
|
4
|
+
subject { df.group_by([:city]) }
|
5
|
+
|
6
|
+
let(:df){ DaruLite::DataFrame.new(city: %w[Kyiv], year: [2015], value: [1]) }
|
7
|
+
|
8
|
+
it "returns a groupby object" do
|
9
|
+
expect(subject).to be_a(DaruLite::Core::GroupBy)
|
10
|
+
end
|
11
|
+
|
12
|
+
it "has the correct index" do
|
13
|
+
expect(subject.groups).to eq({["Kyiv"]=>[0]})
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
describe '#aggregate' do
|
19
|
+
let(:cat_idx) { DaruLite::CategoricalIndex.new [:a, :b, :a, :a, :c] }
|
20
|
+
let(:df) { DaruLite::DataFrame.new(num: [52,12,07,17,01], cat_index: cat_idx) }
|
21
|
+
let(:df_cat_idx) do
|
22
|
+
DaruLite::DataFrame.new({num: [52,12,07,17,01]}, index: cat_idx)
|
23
|
+
end
|
24
|
+
|
25
|
+
it 'lambda function on particular column' do
|
26
|
+
expect(df.aggregate(num_100_times: ->(df) { (df.num*100).first })).to eq(
|
27
|
+
DaruLite::DataFrame.new(num_100_times: [5200, 1200, 700, 1700, 100])
|
28
|
+
)
|
29
|
+
end
|
30
|
+
|
31
|
+
it 'aggregate sum on particular column' do
|
32
|
+
expect(df_cat_idx.aggregate(num: :sum)).to eq(
|
33
|
+
DaruLite::DataFrame.new({num: [76, 12, 1]}, index: [:a, :b, :c])
|
34
|
+
)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
describe '#group_by_and_aggregate' do
|
39
|
+
let(:spending_df) do
|
40
|
+
DaruLite::DataFrame.rows([
|
41
|
+
[2010, 'dev', 50, 1],
|
42
|
+
[2010, 'dev', 150, 1],
|
43
|
+
[2010, 'dev', 200, 1],
|
44
|
+
[2011, 'dev', 50, 1],
|
45
|
+
[2012, 'dev', 150, 1],
|
46
|
+
|
47
|
+
[2011, 'office', 300, 1],
|
48
|
+
|
49
|
+
[2010, 'market', 50, 1],
|
50
|
+
[2011, 'market', 500, 1],
|
51
|
+
[2012, 'market', 500, 1],
|
52
|
+
[2012, 'market', 300, 1],
|
53
|
+
|
54
|
+
[2012, 'R&D', 10, 1],],
|
55
|
+
order: [:year, :category, :spending, :nb_spending])
|
56
|
+
end
|
57
|
+
|
58
|
+
it 'works as group_by + aggregate' do
|
59
|
+
expect(spending_df.group_by_and_aggregate(:year, spending: :sum)).to eq(
|
60
|
+
spending_df.group_by(:year).aggregate(spending: :sum))
|
61
|
+
expect(spending_df.group_by_and_aggregate([:year, :category], spending: :sum, nb_spending: :size)).to eq(
|
62
|
+
spending_df.group_by([:year, :category]).aggregate(spending: :sum, nb_spending: :size))
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
@@ -0,0 +1,109 @@
|
|
1
|
+
shared_examples_for 'a buildable DataFrame' do
|
2
|
+
describe "::rows" do
|
3
|
+
let(:rows) do
|
4
|
+
[
|
5
|
+
[1,2,3,4,5],
|
6
|
+
[1,2,3,4,5],
|
7
|
+
[1,2,3,4,5],
|
8
|
+
[1,2,3,4,5]
|
9
|
+
]
|
10
|
+
end
|
11
|
+
|
12
|
+
context DaruLite::Index do
|
13
|
+
it "creates a DataFrame from Array rows" do
|
14
|
+
df = DaruLite::DataFrame.rows(rows, order: [:a,:b,:c,:d,:e])
|
15
|
+
|
16
|
+
expect(df.index).to eq(DaruLite::Index.new [0,1,2,3])
|
17
|
+
expect(df.vectors).to eq(DaruLite::Index.new [:a,:b,:c,:d,:e])
|
18
|
+
expect(df[:a]).to eq(DaruLite::Vector.new [1,1,1,1])
|
19
|
+
end
|
20
|
+
|
21
|
+
it "creates empty dataframe" do
|
22
|
+
df = DaruLite::DataFrame.rows([], order: [:a, :b, :c])
|
23
|
+
|
24
|
+
expect(df.vectors).to eq(DaruLite::Index.new [:a,:b,:c])
|
25
|
+
expect(df.index).to be_empty
|
26
|
+
end
|
27
|
+
|
28
|
+
it "creates a DataFrame from Vector rows" do
|
29
|
+
vector_rows = rows.map { |r| DaruLite::Vector.new r, index: [:a,:b,:c,:d,:e] }
|
30
|
+
|
31
|
+
df = DaruLite::DataFrame.rows(vector_rows, order: [:a,:b,:c,:d,:e])
|
32
|
+
|
33
|
+
expect(df.index) .to eq(DaruLite::Index.new [0,1,2,3])
|
34
|
+
expect(df.vectors) .to eq(DaruLite::Index.new [:a,:b,:c,:d,:e])
|
35
|
+
expect(df[:a]) .to eq(DaruLite::Vector.new [1,1,1,1])
|
36
|
+
end
|
37
|
+
|
38
|
+
it 'derives index & order from arrays' do
|
39
|
+
df = DaruLite::DataFrame.rows(rows)
|
40
|
+
expect(df.index) .to eq(DaruLite::Index.new [0,1,2,3])
|
41
|
+
expect(df.vectors) .to eq(DaruLite::Index.new %w[0 1 2 3 4])
|
42
|
+
end
|
43
|
+
|
44
|
+
it 'derives index & order from vectors' do
|
45
|
+
vector_rows = rows.zip(%w[w x y z]).map { |r, n| DaruLite::Vector.new r, index: [:a,:b,:c,:d,:e], name: n }
|
46
|
+
df = DaruLite::DataFrame.rows(vector_rows)
|
47
|
+
expect(df.index) .to eq(DaruLite::Index.new %w[w x y z])
|
48
|
+
expect(df.vectors) .to eq(DaruLite::Index.new [:a,:b,:c,:d,:e])
|
49
|
+
end
|
50
|
+
|
51
|
+
it 'behaves, when rows are repeated' do
|
52
|
+
vector_rows = rows.zip(%w[w w y z]).map { |r, n| DaruLite::Vector.new r, index: [:a,:b,:c,:d,:e], name: n }
|
53
|
+
df = DaruLite::DataFrame.rows(vector_rows)
|
54
|
+
expect(df.index) .to eq(DaruLite::Index.new %w[w_1 w_2 y z])
|
55
|
+
expect(df.vectors) .to eq(DaruLite::Index.new [:a,:b,:c,:d,:e])
|
56
|
+
end
|
57
|
+
|
58
|
+
it 'behaves, when vectors are unnamed' do
|
59
|
+
vector_rows = rows.map { |r| DaruLite::Vector.new r, index: [:a,:b,:c,:d,:e] }
|
60
|
+
df = DaruLite::DataFrame.rows(vector_rows)
|
61
|
+
expect(df.index) .to eq(DaruLite::Index.new [0,1,2,3])
|
62
|
+
expect(df.vectors) .to eq(DaruLite::Index.new [:a,:b,:c,:d,:e])
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
context DaruLite::MultiIndex do
|
67
|
+
it "creates a DataFrame from rows" do
|
68
|
+
df = DaruLite::DataFrame.rows(
|
69
|
+
rows*3, index: multi_index, order: [:a,:b,:c,:d,:e])
|
70
|
+
|
71
|
+
expect(df.index).to eq(multi_index)
|
72
|
+
expect(df.vectors).to eq(DaruLite::Index.new([:a,:b,:c,:d,:e]))
|
73
|
+
expect(df[:a]).to eq(DaruLite::Vector.new([1]*12, index: multi_index))
|
74
|
+
end
|
75
|
+
|
76
|
+
it "crates a DataFrame from rows (MultiIndex order)" do
|
77
|
+
rows = [
|
78
|
+
[11, 1, 11, 1],
|
79
|
+
[12, 2, 12, 2],
|
80
|
+
[13, 3, 13, 3],
|
81
|
+
[14, 4, 14, 4]
|
82
|
+
]
|
83
|
+
index = DaruLite::MultiIndex.from_tuples([
|
84
|
+
[:one,:bar],
|
85
|
+
[:one,:baz],
|
86
|
+
[:two,:foo],
|
87
|
+
[:two,:bar]
|
88
|
+
])
|
89
|
+
|
90
|
+
df = DaruLite::DataFrame.rows(rows, index: index, order: order_mi)
|
91
|
+
expect(df.index).to eq(index)
|
92
|
+
expect(df.vectors).to eq(order_mi)
|
93
|
+
expect(df[:a, :one, :bar]).to eq(DaruLite::Vector.new([11,12,13,14],
|
94
|
+
index: index))
|
95
|
+
end
|
96
|
+
|
97
|
+
it "creates a DataFrame from Vector rows" do
|
98
|
+
rows3 = rows*3
|
99
|
+
rows3.map! { |r| DaruLite::Vector.new(r, index: multi_index) }
|
100
|
+
|
101
|
+
df = DaruLite::DataFrame.rows(rows3, order: multi_index)
|
102
|
+
|
103
|
+
expect(df.index).to eq(DaruLite::Index.new(Array.new(rows3.size) { |i| i }))
|
104
|
+
expect(df.vectors).to eq(multi_index)
|
105
|
+
expect(df[:a,:one,:bar]).to eq(DaruLite::Vector.new([1]*12))
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
@@ -0,0 +1,135 @@
|
|
1
|
+
shared_examples_for 'a calculatable DataFrame' do
|
2
|
+
context "#vector_sum" do
|
3
|
+
let(:df) do
|
4
|
+
a1 = DaruLite::Vector.new [1, 2, 3, 4, 5, nil, nil]
|
5
|
+
a2 = DaruLite::Vector.new [10, 10, 20, 20, 20, 30, nil]
|
6
|
+
b1 = DaruLite::Vector.new [nil, 1, 1, 1, 1, 2, nil]
|
7
|
+
b2 = DaruLite::Vector.new [2, 2, 2, nil, 2, 3, nil]
|
8
|
+
DaruLite::DataFrame.new({ a1:, a2:, b1:, b2: })
|
9
|
+
end
|
10
|
+
|
11
|
+
it "calculates complete vector sum" do
|
12
|
+
expect(df.vector_sum).to eq(DaruLite::Vector.new [nil, 15, 26, nil, 28, nil, nil])
|
13
|
+
end
|
14
|
+
|
15
|
+
it "ignores nils if skipnil is true" do
|
16
|
+
expect(df.vector_sum skipnil: true).to eq(DaruLite::Vector.new [13, 15, 26, 25, 28, 35, 0])
|
17
|
+
end
|
18
|
+
|
19
|
+
it "calculates partial vector sum" do
|
20
|
+
a = df.vector_sum([:a1, :a2])
|
21
|
+
b = df.vector_sum([:b1, :b2])
|
22
|
+
|
23
|
+
expect(a).to eq(DaruLite::Vector.new [11, 12, 23, 24, 25, nil, nil])
|
24
|
+
expect(b).to eq(DaruLite::Vector.new [nil, 3, 3, nil, 3, 5, nil])
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
describe "#vector_mean" do
|
29
|
+
let(:df) do
|
30
|
+
a1 = DaruLite::Vector.new [1, 2, 3, 4, 5, nil]
|
31
|
+
a2 = DaruLite::Vector.new [10, 10, 20, 20, 20, 30]
|
32
|
+
b1 = DaruLite::Vector.new [nil, 1, 1, 1, 1, 2]
|
33
|
+
b2 = DaruLite::Vector.new [2, 2, 2, nil, 2, 3]
|
34
|
+
c = DaruLite::Vector.new [nil, 2, 4, 2, 2, 2]
|
35
|
+
DaruLite::DataFrame.new({ a1:, a2:, b1:, b2:, c: })
|
36
|
+
end
|
37
|
+
|
38
|
+
it "calculates complete vector mean" do
|
39
|
+
expect(df.vector_mean).to eq(
|
40
|
+
DaruLite::Vector.new [nil, 3.4, 6, nil, 6.0, nil]
|
41
|
+
)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
describe "#compute" do
|
46
|
+
let(:vnumeric) { DaruLite::Vector.new [0, 0, 1, 4] }
|
47
|
+
let(:vsum) { DaruLite::Vector.new [1 + 4 + 10.0, 2 + 3 + 20.0, 3 + 2 + 30.0, 4 + 1 + 40.0] }
|
48
|
+
let(:vmult) { DaruLite::Vector.new [1 * 4, 2 * 3, 3 * 2, 4 * 1] }
|
49
|
+
let(:df) do
|
50
|
+
v1 = DaruLite::Vector.new [1, 2, 3, 4]
|
51
|
+
v2 = DaruLite::Vector.new [4, 3, 2, 1]
|
52
|
+
v3 = DaruLite::Vector.new [10, 20, 30, 40]
|
53
|
+
|
54
|
+
DaruLite::DataFrame.new({ v1:, v2:, v3: })
|
55
|
+
end
|
56
|
+
|
57
|
+
it "performs a computation when supplied in a string" do
|
58
|
+
expect(df.compute("v1/v2")).to eq(vnumeric)
|
59
|
+
expect(df.compute("v1+v2+v3")).to eq(vsum)
|
60
|
+
expect(df.compute("v1*v2")).to eq(vmult)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
describe "#vector_by_calculation" do
|
65
|
+
subject { df.vector_by_calculation { a + b + c } }
|
66
|
+
|
67
|
+
let(:df) do
|
68
|
+
a1 = DaruLite::Vector.new([1, 2, 3, 4, 5, 6, 7])
|
69
|
+
a2 = DaruLite::Vector.new([10, 20, 30, 40, 50, 60, 70])
|
70
|
+
a3 = DaruLite::Vector.new([100, 200, 300, 400, 500, 600, 700])
|
71
|
+
DaruLite::DataFrame.new({ :a => a1, :b => a2, :c => a3 })
|
72
|
+
end
|
73
|
+
|
74
|
+
it "DSL for returning vector of each calculation" do
|
75
|
+
expect(subject).to eq(DaruLite::Vector.new([111, 222, 333, 444, 555, 666, 777]))
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
describe "#vector_count_characters" do
|
80
|
+
subject { df.vector_count_characters }
|
81
|
+
let(:df) do
|
82
|
+
a1 = DaruLite::Vector.new( [1, 'abcde', 3, 4, 5, nil])
|
83
|
+
a2 = DaruLite::Vector.new( [10, 20.3, 20, 20, 20, 30])
|
84
|
+
b1 = DaruLite::Vector.new( [nil, '343434', 1, 1, 1, 2])
|
85
|
+
b2 = DaruLite::Vector.new( [2, 2, 2, nil, 2, 3])
|
86
|
+
c = DaruLite::Vector.new([nil, 2, 'This is a nice example', 2, 2, 2])
|
87
|
+
|
88
|
+
DaruLite::DataFrame.new({ a1:, a2:, b1:, b2:, c: })
|
89
|
+
end
|
90
|
+
|
91
|
+
it "returns correct values" do
|
92
|
+
expect(subject).to eq(DaruLite::Vector.new([4, 17, 27, 5, 6, 5]))
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
describe "#summary" do
|
97
|
+
subject { df.summary }
|
98
|
+
|
99
|
+
context "DataFrame" do
|
100
|
+
let(:df) do
|
101
|
+
DaruLite::DataFrame.new(
|
102
|
+
{ a: [1,2,5], b: [1,2,"string"] },
|
103
|
+
order: [:a, :b],
|
104
|
+
index: [:one, :two, :three],
|
105
|
+
name: 'frame'
|
106
|
+
)
|
107
|
+
end
|
108
|
+
|
109
|
+
it { is_expected.to eq %Q{
|
110
|
+
|= frame
|
111
|
+
| Number of rows: 3
|
112
|
+
| Element:[a]
|
113
|
+
| == a
|
114
|
+
| n :3
|
115
|
+
| non-missing:3
|
116
|
+
| median: 2
|
117
|
+
| mean: 2.6667
|
118
|
+
| std.dev.: 2.0817
|
119
|
+
| std.err.: 1.2019
|
120
|
+
| skew: 0.2874
|
121
|
+
| kurtosis: -2.3333
|
122
|
+
| Element:[b]
|
123
|
+
| == b
|
124
|
+
| n :3
|
125
|
+
| non-missing:3
|
126
|
+
| factors: 1,2,string
|
127
|
+
| mode: 1,2,string
|
128
|
+
| Distribution
|
129
|
+
| 1 1 100.00%
|
130
|
+
| 2 1 100.00%
|
131
|
+
| string 1 100.00%
|
132
|
+
}.unindent }
|
133
|
+
end
|
134
|
+
end
|
135
|
+
end
|
@@ -0,0 +1,180 @@
|
|
1
|
+
shared_examples_for 'a convertible DataFrame' do
|
2
|
+
describe '#create_sql' do
|
3
|
+
subject { df.create_sql('foo') }
|
4
|
+
|
5
|
+
let(:df) do
|
6
|
+
DaruLite::DataFrame.new(
|
7
|
+
{
|
8
|
+
a: [1,2,3],
|
9
|
+
b: ['test', 'me', 'please'],
|
10
|
+
c: ['2015-06-01', '2015-06-02', '2015-06-03']
|
11
|
+
},
|
12
|
+
name: 'test'
|
13
|
+
)
|
14
|
+
end
|
15
|
+
|
16
|
+
it { is_expected.to eq %Q{
|
17
|
+
|CREATE TABLE foo (a INTEGER,
|
18
|
+
| b VARCHAR (255),
|
19
|
+
| c DATE) CHARACTER SET=UTF8;
|
20
|
+
}.unindent }
|
21
|
+
end
|
22
|
+
|
23
|
+
describe '#to_df' do
|
24
|
+
subject { df.to_df }
|
25
|
+
|
26
|
+
it { is_expected.to eq(df) }
|
27
|
+
end
|
28
|
+
|
29
|
+
describe "#to_matrix" do
|
30
|
+
subject { df.to_matrix }
|
31
|
+
|
32
|
+
let(:df) do
|
33
|
+
DaruLite::DataFrame.new(
|
34
|
+
{
|
35
|
+
b: [11,12,13,14,15],
|
36
|
+
a: [1,2,3,4,5],
|
37
|
+
c: [11,22,33,44,55],
|
38
|
+
d: [5,4,nil,2,1],
|
39
|
+
e: ['this', 'has', 'string','data','too']
|
40
|
+
},
|
41
|
+
order: [:a, :b, :c,:d,:e],
|
42
|
+
index: [:one, :two, :three, :four, :five]
|
43
|
+
)
|
44
|
+
end
|
45
|
+
|
46
|
+
it "concats numeric non-nil vectors to Matrix" do
|
47
|
+
expect(subject).to eq(Matrix[
|
48
|
+
[1,11,11,5],
|
49
|
+
[2,12,22,4],
|
50
|
+
[3,13,33,nil],
|
51
|
+
[4,14,44,2],
|
52
|
+
[5,15,55,1]
|
53
|
+
])
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
describe "#to_a" do
|
58
|
+
subject { df.to_a }
|
59
|
+
|
60
|
+
context DaruLite::Index do
|
61
|
+
it "converts DataFrame into array of hashes" do
|
62
|
+
expect(subject).to eq(
|
63
|
+
[
|
64
|
+
[
|
65
|
+
{a: 1, b: 11, c: 11},
|
66
|
+
{a: 2, b: 12, c: 22},
|
67
|
+
{a: 3, b: 13, c: 33},
|
68
|
+
{a: 4, b: 14, c: 44},
|
69
|
+
{a: 5, b: 15, c: 55}
|
70
|
+
],
|
71
|
+
[
|
72
|
+
:one, :two, :three, :four, :five
|
73
|
+
]
|
74
|
+
])
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
context DaruLite::MultiIndex do
|
79
|
+
pending
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
describe '#to_json' do
|
84
|
+
subject { JSON.parse(json) }
|
85
|
+
|
86
|
+
let(:df) do
|
87
|
+
DaruLite::DataFrame.new(
|
88
|
+
{ a: [1,2,3], b: [3,4,5], c: [6,7,8]},
|
89
|
+
index: [:one, :two, :three],
|
90
|
+
name: 'test'
|
91
|
+
)
|
92
|
+
end
|
93
|
+
|
94
|
+
context 'with index' do
|
95
|
+
let(:json) { df.to_json(false) }
|
96
|
+
# FIXME: is it most reasonable we can do?.. -- zverok
|
97
|
+
# For me, more resonable thing would be something like
|
98
|
+
#
|
99
|
+
# [
|
100
|
+
# {"index" => "one" , "a"=>1, "b"=>3, "c"=>6},
|
101
|
+
# {"index" => "two" , "a"=>2, "b"=>4, "c"=>7},
|
102
|
+
# {"index" => "three", "a"=>3, "b"=>5, "c"=>8}
|
103
|
+
# ]
|
104
|
+
#
|
105
|
+
# Or maybe
|
106
|
+
#
|
107
|
+
# [
|
108
|
+
# ["one" , {"a"=>1, "b"=>3, "c"=>6}],
|
109
|
+
# ["two" , {"a"=>2, "b"=>4, "c"=>7}],
|
110
|
+
# ["three", {"a"=>3, "b"=>5, "c"=>8}]
|
111
|
+
# ]
|
112
|
+
#
|
113
|
+
# Or even
|
114
|
+
#
|
115
|
+
# {
|
116
|
+
# "one" => {"a"=>1, "b"=>3, "c"=>6},
|
117
|
+
# "two" => {"a"=>2, "b"=>4, "c"=>7},
|
118
|
+
# "three" => {"a"=>3, "b"=>5, "c"=>8}
|
119
|
+
# }
|
120
|
+
#
|
121
|
+
it { is_expected.to eq(
|
122
|
+
[
|
123
|
+
[
|
124
|
+
{"a"=>1, "b"=>3, "c"=>6},
|
125
|
+
{"a"=>2, "b"=>4, "c"=>7},
|
126
|
+
{"a"=>3, "b"=>5, "c"=>8}
|
127
|
+
],
|
128
|
+
["one", "two", "three"]
|
129
|
+
]
|
130
|
+
)}
|
131
|
+
end
|
132
|
+
|
133
|
+
context 'without index' do
|
134
|
+
let(:json) { df.to_json(true) }
|
135
|
+
|
136
|
+
it { is_expected.to eq(
|
137
|
+
[
|
138
|
+
{"a"=>1, "b"=>3, "c"=>6},
|
139
|
+
{"a"=>2, "b"=>4, "c"=>7},
|
140
|
+
{"a"=>3, "b"=>5, "c"=>8}
|
141
|
+
]
|
142
|
+
)}
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
describe "#to_h" do
|
147
|
+
subject { df.to_h }
|
148
|
+
|
149
|
+
it "converts to a hash" do
|
150
|
+
expect(subject).to eq(
|
151
|
+
{
|
152
|
+
a: DaruLite::Vector.new([1,2,3,4,5],
|
153
|
+
index: [:one, :two, :three, :four, :five]),
|
154
|
+
b: DaruLite::Vector.new([11,12,13,14,15],
|
155
|
+
index: [:one, :two, :three, :four, :five]),
|
156
|
+
c: DaruLite::Vector.new([11,22,33,44,55],
|
157
|
+
index: [:one, :two, :three, :four, :five])
|
158
|
+
}
|
159
|
+
)
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
describe '#to_s' do
|
164
|
+
subject { df.to_s }
|
165
|
+
|
166
|
+
it 'produces a class, size description' do
|
167
|
+
expect(subject).to eq "#<DaruLite::DataFrame(5x3)>"
|
168
|
+
end
|
169
|
+
|
170
|
+
it 'produces a class, name, size description' do
|
171
|
+
df.name = "Test"
|
172
|
+
expect(subject).to eq "#<DaruLite::DataFrame: Test(5x3)>"
|
173
|
+
end
|
174
|
+
|
175
|
+
it 'produces a class, name, size description when the name is a symbol' do
|
176
|
+
df.name = :Test
|
177
|
+
expect(subject).to eq "#<DaruLite::DataFrame: Test(5x3)>"
|
178
|
+
end
|
179
|
+
end
|
180
|
+
end
|
@@ -0,0 +1,111 @@
|
|
1
|
+
shared_examples_for 'a duplicatable DataFrame' do
|
2
|
+
describe "#dup" do
|
3
|
+
context DaruLite::Index do
|
4
|
+
subject { df.dup }
|
5
|
+
|
6
|
+
it "dups every data structure inside DataFrame" do
|
7
|
+
expect(subject.object_id).not_to eq(df.object_id)
|
8
|
+
expect(subject.vectors.object_id).not_to eq(df.vectors.object_id)
|
9
|
+
expect(subject.index.object_id).not_to eq(df.index.object_id)
|
10
|
+
|
11
|
+
df.each_vector_with_index do |vector, index|
|
12
|
+
expect(vector.object_id).not_to eq(subject[index].object_id)
|
13
|
+
expect(vector.to_a.object_id).not_to eq(subject[index].to_a.object_id)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
context DaruLite::MultiIndex do
|
19
|
+
subject { df_mi.dup }
|
20
|
+
|
21
|
+
it "duplicates with multi index" do
|
22
|
+
expect(subject).to eq(df_mi)
|
23
|
+
expect(subject.vectors.object_id).not_to eq(df_mi.vectors.object_id)
|
24
|
+
expect(subject.index.object_id).not_to eq(df_mi.index.object_id)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
describe "#clone_structure" do
|
30
|
+
subject { df.clone_structure }
|
31
|
+
|
32
|
+
it "clones only the index and vector structures of the data frame" do
|
33
|
+
expect(subject.vectors).to eq(df.vectors)
|
34
|
+
expect(subject.index).to eq(df.index)
|
35
|
+
expect(subject[:a]).to eq(DaruLite::Vector.new([nil] * subject[:a].size, index: df.index))
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
describe "#clone" do
|
40
|
+
subject { df.clone }
|
41
|
+
|
42
|
+
context 'no argument is passed' do
|
43
|
+
subject { df.clone }
|
44
|
+
|
45
|
+
it "returns a view of the whole dataframe" do
|
46
|
+
expect(df.object_id).to_not eq(subject.object_id)
|
47
|
+
expect(df[:a].object_id).to eq(subject[:a].object_id)
|
48
|
+
expect(df[:b].object_id).to eq(subject[:b].object_id)
|
49
|
+
expect(df[:c].object_id).to eq(subject[:c].object_id)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
context 'vector names are passed' do
|
54
|
+
subject { df.clone(:a, :b) }
|
55
|
+
|
56
|
+
it "returns a view of selected vectors" do
|
57
|
+
expect(subject.object_id).to_not eq(df.object_id)
|
58
|
+
expect(subject[:a].object_id).to eq(df[:a].object_id)
|
59
|
+
expect(subject[:b].object_id).to eq(df[:b].object_id)
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
context 'array of vector names is passed' do
|
64
|
+
subject { df.clone([:a, :b]) }
|
65
|
+
|
66
|
+
it "clones properly when supplied array" do
|
67
|
+
expect(subject.object_id).to_not eq(df.object_id)
|
68
|
+
expect(subject[:a].object_id).to eq(df[:a].object_id)
|
69
|
+
expect(subject[:b].object_id).to eq(df[:b].object_id)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
it "original dataframe remains unaffected when operations are applied on subject data frame" do
|
74
|
+
original = df.dup
|
75
|
+
subject.delete_vector :a
|
76
|
+
|
77
|
+
expect(df).to eq(original)
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
describe "#clone_only_valid" do
|
82
|
+
subject { df.clone_only_valid }
|
83
|
+
|
84
|
+
context 'df has missing values' do
|
85
|
+
let(:df) do
|
86
|
+
DaruLite::DataFrame.new({
|
87
|
+
a: [1 , 2, 3, nil, 4, nil, 5],
|
88
|
+
b: [nil, 2, 3, nil, 4, nil, 5],
|
89
|
+
c: [1, 2, 3, 43 , 4, nil, 5]
|
90
|
+
})
|
91
|
+
end
|
92
|
+
|
93
|
+
it 'clones only valid values' do
|
94
|
+
expect(subject).to eq(df.reject_values(*DaruLite::MISSING_VALUES))
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
context 'df has no missing values' do
|
99
|
+
let(:df) do
|
100
|
+
DaruLite::DataFrame.new({
|
101
|
+
a: [2,3,4,5],
|
102
|
+
c: [2,3,4,5]
|
103
|
+
})
|
104
|
+
end
|
105
|
+
|
106
|
+
it 'clones all values' do
|
107
|
+
expect(subject).to eq(df.clone)
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|