daru_lite 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +35 -33
  3. data/lib/daru_lite/data_frame/aggregatable.rb +165 -0
  4. data/lib/daru_lite/data_frame/calculatable.rb +140 -0
  5. data/lib/daru_lite/data_frame/convertible.rb +107 -0
  6. data/lib/daru_lite/data_frame/duplicatable.rb +64 -0
  7. data/lib/daru_lite/data_frame/fetchable.rb +301 -0
  8. data/lib/daru_lite/data_frame/filterable.rb +144 -0
  9. data/lib/daru_lite/data_frame/i_o_able.rb +179 -0
  10. data/lib/daru_lite/data_frame/indexable.rb +168 -0
  11. data/lib/daru_lite/data_frame/iterable.rb +339 -0
  12. data/lib/daru_lite/data_frame/joinable.rb +152 -0
  13. data/lib/daru_lite/data_frame/missable.rb +75 -0
  14. data/lib/daru_lite/data_frame/pivotable.rb +108 -0
  15. data/lib/daru_lite/data_frame/queryable.rb +67 -0
  16. data/lib/daru_lite/data_frame/setable.rb +109 -0
  17. data/lib/daru_lite/data_frame/sortable.rb +241 -0
  18. data/lib/daru_lite/dataframe.rb +138 -2353
  19. data/lib/daru_lite/index/index.rb +13 -0
  20. data/lib/daru_lite/maths/statistics/vector.rb +1 -1
  21. data/lib/daru_lite/vector/aggregatable.rb +9 -0
  22. data/lib/daru_lite/vector/calculatable.rb +78 -0
  23. data/lib/daru_lite/vector/convertible.rb +77 -0
  24. data/lib/daru_lite/vector/duplicatable.rb +17 -0
  25. data/lib/daru_lite/vector/fetchable.rb +175 -0
  26. data/lib/daru_lite/vector/filterable.rb +128 -0
  27. data/lib/daru_lite/vector/indexable.rb +77 -0
  28. data/lib/daru_lite/vector/iterable.rb +95 -0
  29. data/lib/daru_lite/vector/joinable.rb +17 -0
  30. data/lib/daru_lite/vector/missable.rb +124 -0
  31. data/lib/daru_lite/vector/queryable.rb +45 -0
  32. data/lib/daru_lite/vector/setable.rb +47 -0
  33. data/lib/daru_lite/vector/sortable.rb +113 -0
  34. data/lib/daru_lite/vector.rb +36 -932
  35. data/lib/daru_lite/version.rb +1 -1
  36. data/spec/data_frame/aggregatable_example.rb +65 -0
  37. data/spec/data_frame/buildable_example.rb +109 -0
  38. data/spec/data_frame/calculatable_example.rb +135 -0
  39. data/spec/data_frame/convertible_example.rb +180 -0
  40. data/spec/data_frame/duplicatable_example.rb +111 -0
  41. data/spec/data_frame/fetchable_example.rb +476 -0
  42. data/spec/data_frame/filterable_example.rb +250 -0
  43. data/spec/data_frame/indexable_example.rb +221 -0
  44. data/spec/data_frame/iterable_example.rb +465 -0
  45. data/spec/data_frame/joinable_example.rb +106 -0
  46. data/spec/data_frame/missable_example.rb +47 -0
  47. data/spec/data_frame/pivotable_example.rb +297 -0
  48. data/spec/data_frame/queryable_example.rb +92 -0
  49. data/spec/data_frame/setable_example.rb +482 -0
  50. data/spec/data_frame/sortable_example.rb +350 -0
  51. data/spec/dataframe_spec.rb +181 -3289
  52. data/spec/index/index_spec.rb +8 -0
  53. data/spec/vector/aggregatable_example.rb +27 -0
  54. data/spec/vector/calculatable_example.rb +82 -0
  55. data/spec/vector/convertible_example.rb +126 -0
  56. data/spec/vector/duplicatable_example.rb +48 -0
  57. data/spec/vector/fetchable_example.rb +463 -0
  58. data/spec/vector/filterable_example.rb +165 -0
  59. data/spec/vector/indexable_example.rb +201 -0
  60. data/spec/vector/iterable_example.rb +111 -0
  61. data/spec/vector/joinable_example.rb +25 -0
  62. data/spec/vector/missable_example.rb +88 -0
  63. data/spec/vector/queryable_example.rb +91 -0
  64. data/spec/vector/setable_example.rb +300 -0
  65. data/spec/vector/sortable_example.rb +242 -0
  66. data/spec/vector_spec.rb +111 -1805
  67. metadata +86 -2
@@ -1,3 +1,3 @@
1
1
  module DaruLite
2
- VERSION = '0.1.1'.freeze
2
+ VERSION = '0.1.2'.freeze
3
3
  end
@@ -0,0 +1,65 @@
1
+ shared_examples_for 'an aggregatable DataFrame' do
2
+ describe "#group_by" do
3
+ context "on a single row DataFrame" do
4
+ subject { df.group_by([:city]) }
5
+
6
+ let(:df){ DaruLite::DataFrame.new(city: %w[Kyiv], year: [2015], value: [1]) }
7
+
8
+ it "returns a groupby object" do
9
+ expect(subject).to be_a(DaruLite::Core::GroupBy)
10
+ end
11
+
12
+ it "has the correct index" do
13
+ expect(subject.groups).to eq({["Kyiv"]=>[0]})
14
+ end
15
+ end
16
+ end
17
+
18
+ describe '#aggregate' do
19
+ let(:cat_idx) { DaruLite::CategoricalIndex.new [:a, :b, :a, :a, :c] }
20
+ let(:df) { DaruLite::DataFrame.new(num: [52,12,07,17,01], cat_index: cat_idx) }
21
+ let(:df_cat_idx) do
22
+ DaruLite::DataFrame.new({num: [52,12,07,17,01]}, index: cat_idx)
23
+ end
24
+
25
+ it 'lambda function on particular column' do
26
+ expect(df.aggregate(num_100_times: ->(df) { (df.num*100).first })).to eq(
27
+ DaruLite::DataFrame.new(num_100_times: [5200, 1200, 700, 1700, 100])
28
+ )
29
+ end
30
+
31
+ it 'aggregate sum on particular column' do
32
+ expect(df_cat_idx.aggregate(num: :sum)).to eq(
33
+ DaruLite::DataFrame.new({num: [76, 12, 1]}, index: [:a, :b, :c])
34
+ )
35
+ end
36
+ end
37
+
38
+ describe '#group_by_and_aggregate' do
39
+ let(:spending_df) do
40
+ DaruLite::DataFrame.rows([
41
+ [2010, 'dev', 50, 1],
42
+ [2010, 'dev', 150, 1],
43
+ [2010, 'dev', 200, 1],
44
+ [2011, 'dev', 50, 1],
45
+ [2012, 'dev', 150, 1],
46
+
47
+ [2011, 'office', 300, 1],
48
+
49
+ [2010, 'market', 50, 1],
50
+ [2011, 'market', 500, 1],
51
+ [2012, 'market', 500, 1],
52
+ [2012, 'market', 300, 1],
53
+
54
+ [2012, 'R&D', 10, 1],],
55
+ order: [:year, :category, :spending, :nb_spending])
56
+ end
57
+
58
+ it 'works as group_by + aggregate' do
59
+ expect(spending_df.group_by_and_aggregate(:year, spending: :sum)).to eq(
60
+ spending_df.group_by(:year).aggregate(spending: :sum))
61
+ expect(spending_df.group_by_and_aggregate([:year, :category], spending: :sum, nb_spending: :size)).to eq(
62
+ spending_df.group_by([:year, :category]).aggregate(spending: :sum, nb_spending: :size))
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,109 @@
1
+ shared_examples_for 'a buildable DataFrame' do
2
+ describe "::rows" do
3
+ let(:rows) do
4
+ [
5
+ [1,2,3,4,5],
6
+ [1,2,3,4,5],
7
+ [1,2,3,4,5],
8
+ [1,2,3,4,5]
9
+ ]
10
+ end
11
+
12
+ context DaruLite::Index do
13
+ it "creates a DataFrame from Array rows" do
14
+ df = DaruLite::DataFrame.rows(rows, order: [:a,:b,:c,:d,:e])
15
+
16
+ expect(df.index).to eq(DaruLite::Index.new [0,1,2,3])
17
+ expect(df.vectors).to eq(DaruLite::Index.new [:a,:b,:c,:d,:e])
18
+ expect(df[:a]).to eq(DaruLite::Vector.new [1,1,1,1])
19
+ end
20
+
21
+ it "creates empty dataframe" do
22
+ df = DaruLite::DataFrame.rows([], order: [:a, :b, :c])
23
+
24
+ expect(df.vectors).to eq(DaruLite::Index.new [:a,:b,:c])
25
+ expect(df.index).to be_empty
26
+ end
27
+
28
+ it "creates a DataFrame from Vector rows" do
29
+ vector_rows = rows.map { |r| DaruLite::Vector.new r, index: [:a,:b,:c,:d,:e] }
30
+
31
+ df = DaruLite::DataFrame.rows(vector_rows, order: [:a,:b,:c,:d,:e])
32
+
33
+ expect(df.index) .to eq(DaruLite::Index.new [0,1,2,3])
34
+ expect(df.vectors) .to eq(DaruLite::Index.new [:a,:b,:c,:d,:e])
35
+ expect(df[:a]) .to eq(DaruLite::Vector.new [1,1,1,1])
36
+ end
37
+
38
+ it 'derives index & order from arrays' do
39
+ df = DaruLite::DataFrame.rows(rows)
40
+ expect(df.index) .to eq(DaruLite::Index.new [0,1,2,3])
41
+ expect(df.vectors) .to eq(DaruLite::Index.new %w[0 1 2 3 4])
42
+ end
43
+
44
+ it 'derives index & order from vectors' do
45
+ vector_rows = rows.zip(%w[w x y z]).map { |r, n| DaruLite::Vector.new r, index: [:a,:b,:c,:d,:e], name: n }
46
+ df = DaruLite::DataFrame.rows(vector_rows)
47
+ expect(df.index) .to eq(DaruLite::Index.new %w[w x y z])
48
+ expect(df.vectors) .to eq(DaruLite::Index.new [:a,:b,:c,:d,:e])
49
+ end
50
+
51
+ it 'behaves, when rows are repeated' do
52
+ vector_rows = rows.zip(%w[w w y z]).map { |r, n| DaruLite::Vector.new r, index: [:a,:b,:c,:d,:e], name: n }
53
+ df = DaruLite::DataFrame.rows(vector_rows)
54
+ expect(df.index) .to eq(DaruLite::Index.new %w[w_1 w_2 y z])
55
+ expect(df.vectors) .to eq(DaruLite::Index.new [:a,:b,:c,:d,:e])
56
+ end
57
+
58
+ it 'behaves, when vectors are unnamed' do
59
+ vector_rows = rows.map { |r| DaruLite::Vector.new r, index: [:a,:b,:c,:d,:e] }
60
+ df = DaruLite::DataFrame.rows(vector_rows)
61
+ expect(df.index) .to eq(DaruLite::Index.new [0,1,2,3])
62
+ expect(df.vectors) .to eq(DaruLite::Index.new [:a,:b,:c,:d,:e])
63
+ end
64
+ end
65
+
66
+ context DaruLite::MultiIndex do
67
+ it "creates a DataFrame from rows" do
68
+ df = DaruLite::DataFrame.rows(
69
+ rows*3, index: multi_index, order: [:a,:b,:c,:d,:e])
70
+
71
+ expect(df.index).to eq(multi_index)
72
+ expect(df.vectors).to eq(DaruLite::Index.new([:a,:b,:c,:d,:e]))
73
+ expect(df[:a]).to eq(DaruLite::Vector.new([1]*12, index: multi_index))
74
+ end
75
+
76
+ it "crates a DataFrame from rows (MultiIndex order)" do
77
+ rows = [
78
+ [11, 1, 11, 1],
79
+ [12, 2, 12, 2],
80
+ [13, 3, 13, 3],
81
+ [14, 4, 14, 4]
82
+ ]
83
+ index = DaruLite::MultiIndex.from_tuples([
84
+ [:one,:bar],
85
+ [:one,:baz],
86
+ [:two,:foo],
87
+ [:two,:bar]
88
+ ])
89
+
90
+ df = DaruLite::DataFrame.rows(rows, index: index, order: order_mi)
91
+ expect(df.index).to eq(index)
92
+ expect(df.vectors).to eq(order_mi)
93
+ expect(df[:a, :one, :bar]).to eq(DaruLite::Vector.new([11,12,13,14],
94
+ index: index))
95
+ end
96
+
97
+ it "creates a DataFrame from Vector rows" do
98
+ rows3 = rows*3
99
+ rows3.map! { |r| DaruLite::Vector.new(r, index: multi_index) }
100
+
101
+ df = DaruLite::DataFrame.rows(rows3, order: multi_index)
102
+
103
+ expect(df.index).to eq(DaruLite::Index.new(Array.new(rows3.size) { |i| i }))
104
+ expect(df.vectors).to eq(multi_index)
105
+ expect(df[:a,:one,:bar]).to eq(DaruLite::Vector.new([1]*12))
106
+ end
107
+ end
108
+ end
109
+ end
@@ -0,0 +1,135 @@
1
+ shared_examples_for 'a calculatable DataFrame' do
2
+ context "#vector_sum" do
3
+ let(:df) do
4
+ a1 = DaruLite::Vector.new [1, 2, 3, 4, 5, nil, nil]
5
+ a2 = DaruLite::Vector.new [10, 10, 20, 20, 20, 30, nil]
6
+ b1 = DaruLite::Vector.new [nil, 1, 1, 1, 1, 2, nil]
7
+ b2 = DaruLite::Vector.new [2, 2, 2, nil, 2, 3, nil]
8
+ DaruLite::DataFrame.new({ a1:, a2:, b1:, b2: })
9
+ end
10
+
11
+ it "calculates complete vector sum" do
12
+ expect(df.vector_sum).to eq(DaruLite::Vector.new [nil, 15, 26, nil, 28, nil, nil])
13
+ end
14
+
15
+ it "ignores nils if skipnil is true" do
16
+ expect(df.vector_sum skipnil: true).to eq(DaruLite::Vector.new [13, 15, 26, 25, 28, 35, 0])
17
+ end
18
+
19
+ it "calculates partial vector sum" do
20
+ a = df.vector_sum([:a1, :a2])
21
+ b = df.vector_sum([:b1, :b2])
22
+
23
+ expect(a).to eq(DaruLite::Vector.new [11, 12, 23, 24, 25, nil, nil])
24
+ expect(b).to eq(DaruLite::Vector.new [nil, 3, 3, nil, 3, 5, nil])
25
+ end
26
+ end
27
+
28
+ describe "#vector_mean" do
29
+ let(:df) do
30
+ a1 = DaruLite::Vector.new [1, 2, 3, 4, 5, nil]
31
+ a2 = DaruLite::Vector.new [10, 10, 20, 20, 20, 30]
32
+ b1 = DaruLite::Vector.new [nil, 1, 1, 1, 1, 2]
33
+ b2 = DaruLite::Vector.new [2, 2, 2, nil, 2, 3]
34
+ c = DaruLite::Vector.new [nil, 2, 4, 2, 2, 2]
35
+ DaruLite::DataFrame.new({ a1:, a2:, b1:, b2:, c: })
36
+ end
37
+
38
+ it "calculates complete vector mean" do
39
+ expect(df.vector_mean).to eq(
40
+ DaruLite::Vector.new [nil, 3.4, 6, nil, 6.0, nil]
41
+ )
42
+ end
43
+ end
44
+
45
+ describe "#compute" do
46
+ let(:vnumeric) { DaruLite::Vector.new [0, 0, 1, 4] }
47
+ let(:vsum) { DaruLite::Vector.new [1 + 4 + 10.0, 2 + 3 + 20.0, 3 + 2 + 30.0, 4 + 1 + 40.0] }
48
+ let(:vmult) { DaruLite::Vector.new [1 * 4, 2 * 3, 3 * 2, 4 * 1] }
49
+ let(:df) do
50
+ v1 = DaruLite::Vector.new [1, 2, 3, 4]
51
+ v2 = DaruLite::Vector.new [4, 3, 2, 1]
52
+ v3 = DaruLite::Vector.new [10, 20, 30, 40]
53
+
54
+ DaruLite::DataFrame.new({ v1:, v2:, v3: })
55
+ end
56
+
57
+ it "performs a computation when supplied in a string" do
58
+ expect(df.compute("v1/v2")).to eq(vnumeric)
59
+ expect(df.compute("v1+v2+v3")).to eq(vsum)
60
+ expect(df.compute("v1*v2")).to eq(vmult)
61
+ end
62
+ end
63
+
64
+ describe "#vector_by_calculation" do
65
+ subject { df.vector_by_calculation { a + b + c } }
66
+
67
+ let(:df) do
68
+ a1 = DaruLite::Vector.new([1, 2, 3, 4, 5, 6, 7])
69
+ a2 = DaruLite::Vector.new([10, 20, 30, 40, 50, 60, 70])
70
+ a3 = DaruLite::Vector.new([100, 200, 300, 400, 500, 600, 700])
71
+ DaruLite::DataFrame.new({ :a => a1, :b => a2, :c => a3 })
72
+ end
73
+
74
+ it "DSL for returning vector of each calculation" do
75
+ expect(subject).to eq(DaruLite::Vector.new([111, 222, 333, 444, 555, 666, 777]))
76
+ end
77
+ end
78
+
79
+ describe "#vector_count_characters" do
80
+ subject { df.vector_count_characters }
81
+ let(:df) do
82
+ a1 = DaruLite::Vector.new( [1, 'abcde', 3, 4, 5, nil])
83
+ a2 = DaruLite::Vector.new( [10, 20.3, 20, 20, 20, 30])
84
+ b1 = DaruLite::Vector.new( [nil, '343434', 1, 1, 1, 2])
85
+ b2 = DaruLite::Vector.new( [2, 2, 2, nil, 2, 3])
86
+ c = DaruLite::Vector.new([nil, 2, 'This is a nice example', 2, 2, 2])
87
+
88
+ DaruLite::DataFrame.new({ a1:, a2:, b1:, b2:, c: })
89
+ end
90
+
91
+ it "returns correct values" do
92
+ expect(subject).to eq(DaruLite::Vector.new([4, 17, 27, 5, 6, 5]))
93
+ end
94
+ end
95
+
96
+ describe "#summary" do
97
+ subject { df.summary }
98
+
99
+ context "DataFrame" do
100
+ let(:df) do
101
+ DaruLite::DataFrame.new(
102
+ { a: [1,2,5], b: [1,2,"string"] },
103
+ order: [:a, :b],
104
+ index: [:one, :two, :three],
105
+ name: 'frame'
106
+ )
107
+ end
108
+
109
+ it { is_expected.to eq %Q{
110
+ |= frame
111
+ | Number of rows: 3
112
+ | Element:[a]
113
+ | == a
114
+ | n :3
115
+ | non-missing:3
116
+ | median: 2
117
+ | mean: 2.6667
118
+ | std.dev.: 2.0817
119
+ | std.err.: 1.2019
120
+ | skew: 0.2874
121
+ | kurtosis: -2.3333
122
+ | Element:[b]
123
+ | == b
124
+ | n :3
125
+ | non-missing:3
126
+ | factors: 1,2,string
127
+ | mode: 1,2,string
128
+ | Distribution
129
+ | 1 1 100.00%
130
+ | 2 1 100.00%
131
+ | string 1 100.00%
132
+ }.unindent }
133
+ end
134
+ end
135
+ end
@@ -0,0 +1,180 @@
1
+ shared_examples_for 'a convertible DataFrame' do
2
+ describe '#create_sql' do
3
+ subject { df.create_sql('foo') }
4
+
5
+ let(:df) do
6
+ DaruLite::DataFrame.new(
7
+ {
8
+ a: [1,2,3],
9
+ b: ['test', 'me', 'please'],
10
+ c: ['2015-06-01', '2015-06-02', '2015-06-03']
11
+ },
12
+ name: 'test'
13
+ )
14
+ end
15
+
16
+ it { is_expected.to eq %Q{
17
+ |CREATE TABLE foo (a INTEGER,
18
+ | b VARCHAR (255),
19
+ | c DATE) CHARACTER SET=UTF8;
20
+ }.unindent }
21
+ end
22
+
23
+ describe '#to_df' do
24
+ subject { df.to_df }
25
+
26
+ it { is_expected.to eq(df) }
27
+ end
28
+
29
+ describe "#to_matrix" do
30
+ subject { df.to_matrix }
31
+
32
+ let(:df) do
33
+ DaruLite::DataFrame.new(
34
+ {
35
+ b: [11,12,13,14,15],
36
+ a: [1,2,3,4,5],
37
+ c: [11,22,33,44,55],
38
+ d: [5,4,nil,2,1],
39
+ e: ['this', 'has', 'string','data','too']
40
+ },
41
+ order: [:a, :b, :c,:d,:e],
42
+ index: [:one, :two, :three, :four, :five]
43
+ )
44
+ end
45
+
46
+ it "concats numeric non-nil vectors to Matrix" do
47
+ expect(subject).to eq(Matrix[
48
+ [1,11,11,5],
49
+ [2,12,22,4],
50
+ [3,13,33,nil],
51
+ [4,14,44,2],
52
+ [5,15,55,1]
53
+ ])
54
+ end
55
+ end
56
+
57
+ describe "#to_a" do
58
+ subject { df.to_a }
59
+
60
+ context DaruLite::Index do
61
+ it "converts DataFrame into array of hashes" do
62
+ expect(subject).to eq(
63
+ [
64
+ [
65
+ {a: 1, b: 11, c: 11},
66
+ {a: 2, b: 12, c: 22},
67
+ {a: 3, b: 13, c: 33},
68
+ {a: 4, b: 14, c: 44},
69
+ {a: 5, b: 15, c: 55}
70
+ ],
71
+ [
72
+ :one, :two, :three, :four, :five
73
+ ]
74
+ ])
75
+ end
76
+ end
77
+
78
+ context DaruLite::MultiIndex do
79
+ pending
80
+ end
81
+ end
82
+
83
+ describe '#to_json' do
84
+ subject { JSON.parse(json) }
85
+
86
+ let(:df) do
87
+ DaruLite::DataFrame.new(
88
+ { a: [1,2,3], b: [3,4,5], c: [6,7,8]},
89
+ index: [:one, :two, :three],
90
+ name: 'test'
91
+ )
92
+ end
93
+
94
+ context 'with index' do
95
+ let(:json) { df.to_json(false) }
96
+ # FIXME: is it most reasonable we can do?.. -- zverok
97
+ # For me, more resonable thing would be something like
98
+ #
99
+ # [
100
+ # {"index" => "one" , "a"=>1, "b"=>3, "c"=>6},
101
+ # {"index" => "two" , "a"=>2, "b"=>4, "c"=>7},
102
+ # {"index" => "three", "a"=>3, "b"=>5, "c"=>8}
103
+ # ]
104
+ #
105
+ # Or maybe
106
+ #
107
+ # [
108
+ # ["one" , {"a"=>1, "b"=>3, "c"=>6}],
109
+ # ["two" , {"a"=>2, "b"=>4, "c"=>7}],
110
+ # ["three", {"a"=>3, "b"=>5, "c"=>8}]
111
+ # ]
112
+ #
113
+ # Or even
114
+ #
115
+ # {
116
+ # "one" => {"a"=>1, "b"=>3, "c"=>6},
117
+ # "two" => {"a"=>2, "b"=>4, "c"=>7},
118
+ # "three" => {"a"=>3, "b"=>5, "c"=>8}
119
+ # }
120
+ #
121
+ it { is_expected.to eq(
122
+ [
123
+ [
124
+ {"a"=>1, "b"=>3, "c"=>6},
125
+ {"a"=>2, "b"=>4, "c"=>7},
126
+ {"a"=>3, "b"=>5, "c"=>8}
127
+ ],
128
+ ["one", "two", "three"]
129
+ ]
130
+ )}
131
+ end
132
+
133
+ context 'without index' do
134
+ let(:json) { df.to_json(true) }
135
+
136
+ it { is_expected.to eq(
137
+ [
138
+ {"a"=>1, "b"=>3, "c"=>6},
139
+ {"a"=>2, "b"=>4, "c"=>7},
140
+ {"a"=>3, "b"=>5, "c"=>8}
141
+ ]
142
+ )}
143
+ end
144
+ end
145
+
146
+ describe "#to_h" do
147
+ subject { df.to_h }
148
+
149
+ it "converts to a hash" do
150
+ expect(subject).to eq(
151
+ {
152
+ a: DaruLite::Vector.new([1,2,3,4,5],
153
+ index: [:one, :two, :three, :four, :five]),
154
+ b: DaruLite::Vector.new([11,12,13,14,15],
155
+ index: [:one, :two, :three, :four, :five]),
156
+ c: DaruLite::Vector.new([11,22,33,44,55],
157
+ index: [:one, :two, :three, :four, :five])
158
+ }
159
+ )
160
+ end
161
+ end
162
+
163
+ describe '#to_s' do
164
+ subject { df.to_s }
165
+
166
+ it 'produces a class, size description' do
167
+ expect(subject).to eq "#<DaruLite::DataFrame(5x3)>"
168
+ end
169
+
170
+ it 'produces a class, name, size description' do
171
+ df.name = "Test"
172
+ expect(subject).to eq "#<DaruLite::DataFrame: Test(5x3)>"
173
+ end
174
+
175
+ it 'produces a class, name, size description when the name is a symbol' do
176
+ df.name = :Test
177
+ expect(subject).to eq "#<DaruLite::DataFrame: Test(5x3)>"
178
+ end
179
+ end
180
+ end
@@ -0,0 +1,111 @@
1
+ shared_examples_for 'a duplicatable DataFrame' do
2
+ describe "#dup" do
3
+ context DaruLite::Index do
4
+ subject { df.dup }
5
+
6
+ it "dups every data structure inside DataFrame" do
7
+ expect(subject.object_id).not_to eq(df.object_id)
8
+ expect(subject.vectors.object_id).not_to eq(df.vectors.object_id)
9
+ expect(subject.index.object_id).not_to eq(df.index.object_id)
10
+
11
+ df.each_vector_with_index do |vector, index|
12
+ expect(vector.object_id).not_to eq(subject[index].object_id)
13
+ expect(vector.to_a.object_id).not_to eq(subject[index].to_a.object_id)
14
+ end
15
+ end
16
+ end
17
+
18
+ context DaruLite::MultiIndex do
19
+ subject { df_mi.dup }
20
+
21
+ it "duplicates with multi index" do
22
+ expect(subject).to eq(df_mi)
23
+ expect(subject.vectors.object_id).not_to eq(df_mi.vectors.object_id)
24
+ expect(subject.index.object_id).not_to eq(df_mi.index.object_id)
25
+ end
26
+ end
27
+ end
28
+
29
+ describe "#clone_structure" do
30
+ subject { df.clone_structure }
31
+
32
+ it "clones only the index and vector structures of the data frame" do
33
+ expect(subject.vectors).to eq(df.vectors)
34
+ expect(subject.index).to eq(df.index)
35
+ expect(subject[:a]).to eq(DaruLite::Vector.new([nil] * subject[:a].size, index: df.index))
36
+ end
37
+ end
38
+
39
+ describe "#clone" do
40
+ subject { df.clone }
41
+
42
+ context 'no argument is passed' do
43
+ subject { df.clone }
44
+
45
+ it "returns a view of the whole dataframe" do
46
+ expect(df.object_id).to_not eq(subject.object_id)
47
+ expect(df[:a].object_id).to eq(subject[:a].object_id)
48
+ expect(df[:b].object_id).to eq(subject[:b].object_id)
49
+ expect(df[:c].object_id).to eq(subject[:c].object_id)
50
+ end
51
+ end
52
+
53
+ context 'vector names are passed' do
54
+ subject { df.clone(:a, :b) }
55
+
56
+ it "returns a view of selected vectors" do
57
+ expect(subject.object_id).to_not eq(df.object_id)
58
+ expect(subject[:a].object_id).to eq(df[:a].object_id)
59
+ expect(subject[:b].object_id).to eq(df[:b].object_id)
60
+ end
61
+ end
62
+
63
+ context 'array of vector names is passed' do
64
+ subject { df.clone([:a, :b]) }
65
+
66
+ it "clones properly when supplied array" do
67
+ expect(subject.object_id).to_not eq(df.object_id)
68
+ expect(subject[:a].object_id).to eq(df[:a].object_id)
69
+ expect(subject[:b].object_id).to eq(df[:b].object_id)
70
+ end
71
+ end
72
+
73
+ it "original dataframe remains unaffected when operations are applied on subject data frame" do
74
+ original = df.dup
75
+ subject.delete_vector :a
76
+
77
+ expect(df).to eq(original)
78
+ end
79
+ end
80
+
81
+ describe "#clone_only_valid" do
82
+ subject { df.clone_only_valid }
83
+
84
+ context 'df has missing values' do
85
+ let(:df) do
86
+ DaruLite::DataFrame.new({
87
+ a: [1 , 2, 3, nil, 4, nil, 5],
88
+ b: [nil, 2, 3, nil, 4, nil, 5],
89
+ c: [1, 2, 3, 43 , 4, nil, 5]
90
+ })
91
+ end
92
+
93
+ it 'clones only valid values' do
94
+ expect(subject).to eq(df.reject_values(*DaruLite::MISSING_VALUES))
95
+ end
96
+ end
97
+
98
+ context 'df has no missing values' do
99
+ let(:df) do
100
+ DaruLite::DataFrame.new({
101
+ a: [2,3,4,5],
102
+ c: [2,3,4,5]
103
+ })
104
+ end
105
+
106
+ it 'clones all values' do
107
+ expect(subject).to eq(df.clone)
108
+ end
109
+ end
110
+ end
111
+ end