daru_lite 0.1 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (73) hide show
  1. checksums.yaml +4 -4
  2. data/.github/ISSUE_TEMPLATE/bug_report.md +38 -0
  3. data/.github/ISSUE_TEMPLATE/feature_request.md +20 -0
  4. data/.github/workflows/ci.yml +20 -0
  5. data/.rubocop_todo.yml +35 -33
  6. data/README.md +19 -115
  7. data/daru_lite.gemspec +1 -0
  8. data/lib/daru_lite/data_frame/aggregatable.rb +165 -0
  9. data/lib/daru_lite/data_frame/calculatable.rb +140 -0
  10. data/lib/daru_lite/data_frame/convertible.rb +107 -0
  11. data/lib/daru_lite/data_frame/duplicatable.rb +64 -0
  12. data/lib/daru_lite/data_frame/fetchable.rb +301 -0
  13. data/lib/daru_lite/data_frame/filterable.rb +144 -0
  14. data/lib/daru_lite/data_frame/i_o_able.rb +179 -0
  15. data/lib/daru_lite/data_frame/indexable.rb +168 -0
  16. data/lib/daru_lite/data_frame/iterable.rb +339 -0
  17. data/lib/daru_lite/data_frame/joinable.rb +152 -0
  18. data/lib/daru_lite/data_frame/missable.rb +75 -0
  19. data/lib/daru_lite/data_frame/pivotable.rb +108 -0
  20. data/lib/daru_lite/data_frame/queryable.rb +67 -0
  21. data/lib/daru_lite/data_frame/setable.rb +109 -0
  22. data/lib/daru_lite/data_frame/sortable.rb +241 -0
  23. data/lib/daru_lite/dataframe.rb +142 -2355
  24. data/lib/daru_lite/index/index.rb +13 -0
  25. data/lib/daru_lite/maths/statistics/vector.rb +1 -1
  26. data/lib/daru_lite/vector/aggregatable.rb +9 -0
  27. data/lib/daru_lite/vector/calculatable.rb +78 -0
  28. data/lib/daru_lite/vector/convertible.rb +77 -0
  29. data/lib/daru_lite/vector/duplicatable.rb +17 -0
  30. data/lib/daru_lite/vector/fetchable.rb +175 -0
  31. data/lib/daru_lite/vector/filterable.rb +128 -0
  32. data/lib/daru_lite/vector/indexable.rb +77 -0
  33. data/lib/daru_lite/vector/iterable.rb +95 -0
  34. data/lib/daru_lite/vector/joinable.rb +17 -0
  35. data/lib/daru_lite/vector/missable.rb +124 -0
  36. data/lib/daru_lite/vector/queryable.rb +45 -0
  37. data/lib/daru_lite/vector/setable.rb +47 -0
  38. data/lib/daru_lite/vector/sortable.rb +113 -0
  39. data/lib/daru_lite/vector.rb +36 -932
  40. data/lib/daru_lite/version.rb +1 -1
  41. data/spec/data_frame/aggregatable_example.rb +65 -0
  42. data/spec/data_frame/buildable_example.rb +109 -0
  43. data/spec/data_frame/calculatable_example.rb +135 -0
  44. data/spec/data_frame/convertible_example.rb +180 -0
  45. data/spec/data_frame/duplicatable_example.rb +111 -0
  46. data/spec/data_frame/fetchable_example.rb +476 -0
  47. data/spec/data_frame/filterable_example.rb +250 -0
  48. data/spec/data_frame/indexable_example.rb +221 -0
  49. data/spec/data_frame/iterable_example.rb +465 -0
  50. data/spec/data_frame/joinable_example.rb +106 -0
  51. data/spec/data_frame/missable_example.rb +47 -0
  52. data/spec/data_frame/pivotable_example.rb +297 -0
  53. data/spec/data_frame/queryable_example.rb +92 -0
  54. data/spec/data_frame/setable_example.rb +482 -0
  55. data/spec/data_frame/sortable_example.rb +350 -0
  56. data/spec/dataframe_spec.rb +181 -3243
  57. data/spec/index/index_spec.rb +8 -0
  58. data/spec/vector/aggregatable_example.rb +27 -0
  59. data/spec/vector/calculatable_example.rb +82 -0
  60. data/spec/vector/convertible_example.rb +126 -0
  61. data/spec/vector/duplicatable_example.rb +48 -0
  62. data/spec/vector/fetchable_example.rb +463 -0
  63. data/spec/vector/filterable_example.rb +165 -0
  64. data/spec/vector/indexable_example.rb +201 -0
  65. data/spec/vector/iterable_example.rb +111 -0
  66. data/spec/vector/joinable_example.rb +25 -0
  67. data/spec/vector/missable_example.rb +88 -0
  68. data/spec/vector/queryable_example.rb +91 -0
  69. data/spec/vector/setable_example.rb +300 -0
  70. data/spec/vector/sortable_example.rb +242 -0
  71. data/spec/vector_spec.rb +111 -1805
  72. metadata +102 -3
  73. data/.github/ISSUE_TEMPLATE.md +0 -18
@@ -1,3 +1,3 @@
1
1
  module DaruLite
2
- VERSION = '0.1'.freeze
2
+ VERSION = '0.1.2'.freeze
3
3
  end
@@ -0,0 +1,65 @@
1
+ shared_examples_for 'an aggregatable DataFrame' do
2
+ describe "#group_by" do
3
+ context "on a single row DataFrame" do
4
+ subject { df.group_by([:city]) }
5
+
6
+ let(:df){ DaruLite::DataFrame.new(city: %w[Kyiv], year: [2015], value: [1]) }
7
+
8
+ it "returns a groupby object" do
9
+ expect(subject).to be_a(DaruLite::Core::GroupBy)
10
+ end
11
+
12
+ it "has the correct index" do
13
+ expect(subject.groups).to eq({["Kyiv"]=>[0]})
14
+ end
15
+ end
16
+ end
17
+
18
+ describe '#aggregate' do
19
+ let(:cat_idx) { DaruLite::CategoricalIndex.new [:a, :b, :a, :a, :c] }
20
+ let(:df) { DaruLite::DataFrame.new(num: [52,12,07,17,01], cat_index: cat_idx) }
21
+ let(:df_cat_idx) do
22
+ DaruLite::DataFrame.new({num: [52,12,07,17,01]}, index: cat_idx)
23
+ end
24
+
25
+ it 'lambda function on particular column' do
26
+ expect(df.aggregate(num_100_times: ->(df) { (df.num*100).first })).to eq(
27
+ DaruLite::DataFrame.new(num_100_times: [5200, 1200, 700, 1700, 100])
28
+ )
29
+ end
30
+
31
+ it 'aggregate sum on particular column' do
32
+ expect(df_cat_idx.aggregate(num: :sum)).to eq(
33
+ DaruLite::DataFrame.new({num: [76, 12, 1]}, index: [:a, :b, :c])
34
+ )
35
+ end
36
+ end
37
+
38
+ describe '#group_by_and_aggregate' do
39
+ let(:spending_df) do
40
+ DaruLite::DataFrame.rows([
41
+ [2010, 'dev', 50, 1],
42
+ [2010, 'dev', 150, 1],
43
+ [2010, 'dev', 200, 1],
44
+ [2011, 'dev', 50, 1],
45
+ [2012, 'dev', 150, 1],
46
+
47
+ [2011, 'office', 300, 1],
48
+
49
+ [2010, 'market', 50, 1],
50
+ [2011, 'market', 500, 1],
51
+ [2012, 'market', 500, 1],
52
+ [2012, 'market', 300, 1],
53
+
54
+ [2012, 'R&D', 10, 1],],
55
+ order: [:year, :category, :spending, :nb_spending])
56
+ end
57
+
58
+ it 'works as group_by + aggregate' do
59
+ expect(spending_df.group_by_and_aggregate(:year, spending: :sum)).to eq(
60
+ spending_df.group_by(:year).aggregate(spending: :sum))
61
+ expect(spending_df.group_by_and_aggregate([:year, :category], spending: :sum, nb_spending: :size)).to eq(
62
+ spending_df.group_by([:year, :category]).aggregate(spending: :sum, nb_spending: :size))
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,109 @@
1
+ shared_examples_for 'a buildable DataFrame' do
2
+ describe "::rows" do
3
+ let(:rows) do
4
+ [
5
+ [1,2,3,4,5],
6
+ [1,2,3,4,5],
7
+ [1,2,3,4,5],
8
+ [1,2,3,4,5]
9
+ ]
10
+ end
11
+
12
+ context DaruLite::Index do
13
+ it "creates a DataFrame from Array rows" do
14
+ df = DaruLite::DataFrame.rows(rows, order: [:a,:b,:c,:d,:e])
15
+
16
+ expect(df.index).to eq(DaruLite::Index.new [0,1,2,3])
17
+ expect(df.vectors).to eq(DaruLite::Index.new [:a,:b,:c,:d,:e])
18
+ expect(df[:a]).to eq(DaruLite::Vector.new [1,1,1,1])
19
+ end
20
+
21
+ it "creates empty dataframe" do
22
+ df = DaruLite::DataFrame.rows([], order: [:a, :b, :c])
23
+
24
+ expect(df.vectors).to eq(DaruLite::Index.new [:a,:b,:c])
25
+ expect(df.index).to be_empty
26
+ end
27
+
28
+ it "creates a DataFrame from Vector rows" do
29
+ vector_rows = rows.map { |r| DaruLite::Vector.new r, index: [:a,:b,:c,:d,:e] }
30
+
31
+ df = DaruLite::DataFrame.rows(vector_rows, order: [:a,:b,:c,:d,:e])
32
+
33
+ expect(df.index) .to eq(DaruLite::Index.new [0,1,2,3])
34
+ expect(df.vectors) .to eq(DaruLite::Index.new [:a,:b,:c,:d,:e])
35
+ expect(df[:a]) .to eq(DaruLite::Vector.new [1,1,1,1])
36
+ end
37
+
38
+ it 'derives index & order from arrays' do
39
+ df = DaruLite::DataFrame.rows(rows)
40
+ expect(df.index) .to eq(DaruLite::Index.new [0,1,2,3])
41
+ expect(df.vectors) .to eq(DaruLite::Index.new %w[0 1 2 3 4])
42
+ end
43
+
44
+ it 'derives index & order from vectors' do
45
+ vector_rows = rows.zip(%w[w x y z]).map { |r, n| DaruLite::Vector.new r, index: [:a,:b,:c,:d,:e], name: n }
46
+ df = DaruLite::DataFrame.rows(vector_rows)
47
+ expect(df.index) .to eq(DaruLite::Index.new %w[w x y z])
48
+ expect(df.vectors) .to eq(DaruLite::Index.new [:a,:b,:c,:d,:e])
49
+ end
50
+
51
+ it 'behaves, when rows are repeated' do
52
+ vector_rows = rows.zip(%w[w w y z]).map { |r, n| DaruLite::Vector.new r, index: [:a,:b,:c,:d,:e], name: n }
53
+ df = DaruLite::DataFrame.rows(vector_rows)
54
+ expect(df.index) .to eq(DaruLite::Index.new %w[w_1 w_2 y z])
55
+ expect(df.vectors) .to eq(DaruLite::Index.new [:a,:b,:c,:d,:e])
56
+ end
57
+
58
+ it 'behaves, when vectors are unnamed' do
59
+ vector_rows = rows.map { |r| DaruLite::Vector.new r, index: [:a,:b,:c,:d,:e] }
60
+ df = DaruLite::DataFrame.rows(vector_rows)
61
+ expect(df.index) .to eq(DaruLite::Index.new [0,1,2,3])
62
+ expect(df.vectors) .to eq(DaruLite::Index.new [:a,:b,:c,:d,:e])
63
+ end
64
+ end
65
+
66
+ context DaruLite::MultiIndex do
67
+ it "creates a DataFrame from rows" do
68
+ df = DaruLite::DataFrame.rows(
69
+ rows*3, index: multi_index, order: [:a,:b,:c,:d,:e])
70
+
71
+ expect(df.index).to eq(multi_index)
72
+ expect(df.vectors).to eq(DaruLite::Index.new([:a,:b,:c,:d,:e]))
73
+ expect(df[:a]).to eq(DaruLite::Vector.new([1]*12, index: multi_index))
74
+ end
75
+
76
+ it "crates a DataFrame from rows (MultiIndex order)" do
77
+ rows = [
78
+ [11, 1, 11, 1],
79
+ [12, 2, 12, 2],
80
+ [13, 3, 13, 3],
81
+ [14, 4, 14, 4]
82
+ ]
83
+ index = DaruLite::MultiIndex.from_tuples([
84
+ [:one,:bar],
85
+ [:one,:baz],
86
+ [:two,:foo],
87
+ [:two,:bar]
88
+ ])
89
+
90
+ df = DaruLite::DataFrame.rows(rows, index: index, order: order_mi)
91
+ expect(df.index).to eq(index)
92
+ expect(df.vectors).to eq(order_mi)
93
+ expect(df[:a, :one, :bar]).to eq(DaruLite::Vector.new([11,12,13,14],
94
+ index: index))
95
+ end
96
+
97
+ it "creates a DataFrame from Vector rows" do
98
+ rows3 = rows*3
99
+ rows3.map! { |r| DaruLite::Vector.new(r, index: multi_index) }
100
+
101
+ df = DaruLite::DataFrame.rows(rows3, order: multi_index)
102
+
103
+ expect(df.index).to eq(DaruLite::Index.new(Array.new(rows3.size) { |i| i }))
104
+ expect(df.vectors).to eq(multi_index)
105
+ expect(df[:a,:one,:bar]).to eq(DaruLite::Vector.new([1]*12))
106
+ end
107
+ end
108
+ end
109
+ end
@@ -0,0 +1,135 @@
1
+ shared_examples_for 'a calculatable DataFrame' do
2
+ context "#vector_sum" do
3
+ let(:df) do
4
+ a1 = DaruLite::Vector.new [1, 2, 3, 4, 5, nil, nil]
5
+ a2 = DaruLite::Vector.new [10, 10, 20, 20, 20, 30, nil]
6
+ b1 = DaruLite::Vector.new [nil, 1, 1, 1, 1, 2, nil]
7
+ b2 = DaruLite::Vector.new [2, 2, 2, nil, 2, 3, nil]
8
+ DaruLite::DataFrame.new({ a1:, a2:, b1:, b2: })
9
+ end
10
+
11
+ it "calculates complete vector sum" do
12
+ expect(df.vector_sum).to eq(DaruLite::Vector.new [nil, 15, 26, nil, 28, nil, nil])
13
+ end
14
+
15
+ it "ignores nils if skipnil is true" do
16
+ expect(df.vector_sum skipnil: true).to eq(DaruLite::Vector.new [13, 15, 26, 25, 28, 35, 0])
17
+ end
18
+
19
+ it "calculates partial vector sum" do
20
+ a = df.vector_sum([:a1, :a2])
21
+ b = df.vector_sum([:b1, :b2])
22
+
23
+ expect(a).to eq(DaruLite::Vector.new [11, 12, 23, 24, 25, nil, nil])
24
+ expect(b).to eq(DaruLite::Vector.new [nil, 3, 3, nil, 3, 5, nil])
25
+ end
26
+ end
27
+
28
+ describe "#vector_mean" do
29
+ let(:df) do
30
+ a1 = DaruLite::Vector.new [1, 2, 3, 4, 5, nil]
31
+ a2 = DaruLite::Vector.new [10, 10, 20, 20, 20, 30]
32
+ b1 = DaruLite::Vector.new [nil, 1, 1, 1, 1, 2]
33
+ b2 = DaruLite::Vector.new [2, 2, 2, nil, 2, 3]
34
+ c = DaruLite::Vector.new [nil, 2, 4, 2, 2, 2]
35
+ DaruLite::DataFrame.new({ a1:, a2:, b1:, b2:, c: })
36
+ end
37
+
38
+ it "calculates complete vector mean" do
39
+ expect(df.vector_mean).to eq(
40
+ DaruLite::Vector.new [nil, 3.4, 6, nil, 6.0, nil]
41
+ )
42
+ end
43
+ end
44
+
45
+ describe "#compute" do
46
+ let(:vnumeric) { DaruLite::Vector.new [0, 0, 1, 4] }
47
+ let(:vsum) { DaruLite::Vector.new [1 + 4 + 10.0, 2 + 3 + 20.0, 3 + 2 + 30.0, 4 + 1 + 40.0] }
48
+ let(:vmult) { DaruLite::Vector.new [1 * 4, 2 * 3, 3 * 2, 4 * 1] }
49
+ let(:df) do
50
+ v1 = DaruLite::Vector.new [1, 2, 3, 4]
51
+ v2 = DaruLite::Vector.new [4, 3, 2, 1]
52
+ v3 = DaruLite::Vector.new [10, 20, 30, 40]
53
+
54
+ DaruLite::DataFrame.new({ v1:, v2:, v3: })
55
+ end
56
+
57
+ it "performs a computation when supplied in a string" do
58
+ expect(df.compute("v1/v2")).to eq(vnumeric)
59
+ expect(df.compute("v1+v2+v3")).to eq(vsum)
60
+ expect(df.compute("v1*v2")).to eq(vmult)
61
+ end
62
+ end
63
+
64
+ describe "#vector_by_calculation" do
65
+ subject { df.vector_by_calculation { a + b + c } }
66
+
67
+ let(:df) do
68
+ a1 = DaruLite::Vector.new([1, 2, 3, 4, 5, 6, 7])
69
+ a2 = DaruLite::Vector.new([10, 20, 30, 40, 50, 60, 70])
70
+ a3 = DaruLite::Vector.new([100, 200, 300, 400, 500, 600, 700])
71
+ DaruLite::DataFrame.new({ :a => a1, :b => a2, :c => a3 })
72
+ end
73
+
74
+ it "DSL for returning vector of each calculation" do
75
+ expect(subject).to eq(DaruLite::Vector.new([111, 222, 333, 444, 555, 666, 777]))
76
+ end
77
+ end
78
+
79
+ describe "#vector_count_characters" do
80
+ subject { df.vector_count_characters }
81
+ let(:df) do
82
+ a1 = DaruLite::Vector.new( [1, 'abcde', 3, 4, 5, nil])
83
+ a2 = DaruLite::Vector.new( [10, 20.3, 20, 20, 20, 30])
84
+ b1 = DaruLite::Vector.new( [nil, '343434', 1, 1, 1, 2])
85
+ b2 = DaruLite::Vector.new( [2, 2, 2, nil, 2, 3])
86
+ c = DaruLite::Vector.new([nil, 2, 'This is a nice example', 2, 2, 2])
87
+
88
+ DaruLite::DataFrame.new({ a1:, a2:, b1:, b2:, c: })
89
+ end
90
+
91
+ it "returns correct values" do
92
+ expect(subject).to eq(DaruLite::Vector.new([4, 17, 27, 5, 6, 5]))
93
+ end
94
+ end
95
+
96
+ describe "#summary" do
97
+ subject { df.summary }
98
+
99
+ context "DataFrame" do
100
+ let(:df) do
101
+ DaruLite::DataFrame.new(
102
+ { a: [1,2,5], b: [1,2,"string"] },
103
+ order: [:a, :b],
104
+ index: [:one, :two, :three],
105
+ name: 'frame'
106
+ )
107
+ end
108
+
109
+ it { is_expected.to eq %Q{
110
+ |= frame
111
+ | Number of rows: 3
112
+ | Element:[a]
113
+ | == a
114
+ | n :3
115
+ | non-missing:3
116
+ | median: 2
117
+ | mean: 2.6667
118
+ | std.dev.: 2.0817
119
+ | std.err.: 1.2019
120
+ | skew: 0.2874
121
+ | kurtosis: -2.3333
122
+ | Element:[b]
123
+ | == b
124
+ | n :3
125
+ | non-missing:3
126
+ | factors: 1,2,string
127
+ | mode: 1,2,string
128
+ | Distribution
129
+ | 1 1 100.00%
130
+ | 2 1 100.00%
131
+ | string 1 100.00%
132
+ }.unindent }
133
+ end
134
+ end
135
+ end
@@ -0,0 +1,180 @@
1
+ shared_examples_for 'a convertible DataFrame' do
2
+ describe '#create_sql' do
3
+ subject { df.create_sql('foo') }
4
+
5
+ let(:df) do
6
+ DaruLite::DataFrame.new(
7
+ {
8
+ a: [1,2,3],
9
+ b: ['test', 'me', 'please'],
10
+ c: ['2015-06-01', '2015-06-02', '2015-06-03']
11
+ },
12
+ name: 'test'
13
+ )
14
+ end
15
+
16
+ it { is_expected.to eq %Q{
17
+ |CREATE TABLE foo (a INTEGER,
18
+ | b VARCHAR (255),
19
+ | c DATE) CHARACTER SET=UTF8;
20
+ }.unindent }
21
+ end
22
+
23
+ describe '#to_df' do
24
+ subject { df.to_df }
25
+
26
+ it { is_expected.to eq(df) }
27
+ end
28
+
29
+ describe "#to_matrix" do
30
+ subject { df.to_matrix }
31
+
32
+ let(:df) do
33
+ DaruLite::DataFrame.new(
34
+ {
35
+ b: [11,12,13,14,15],
36
+ a: [1,2,3,4,5],
37
+ c: [11,22,33,44,55],
38
+ d: [5,4,nil,2,1],
39
+ e: ['this', 'has', 'string','data','too']
40
+ },
41
+ order: [:a, :b, :c,:d,:e],
42
+ index: [:one, :two, :three, :four, :five]
43
+ )
44
+ end
45
+
46
+ it "concats numeric non-nil vectors to Matrix" do
47
+ expect(subject).to eq(Matrix[
48
+ [1,11,11,5],
49
+ [2,12,22,4],
50
+ [3,13,33,nil],
51
+ [4,14,44,2],
52
+ [5,15,55,1]
53
+ ])
54
+ end
55
+ end
56
+
57
+ describe "#to_a" do
58
+ subject { df.to_a }
59
+
60
+ context DaruLite::Index do
61
+ it "converts DataFrame into array of hashes" do
62
+ expect(subject).to eq(
63
+ [
64
+ [
65
+ {a: 1, b: 11, c: 11},
66
+ {a: 2, b: 12, c: 22},
67
+ {a: 3, b: 13, c: 33},
68
+ {a: 4, b: 14, c: 44},
69
+ {a: 5, b: 15, c: 55}
70
+ ],
71
+ [
72
+ :one, :two, :three, :four, :five
73
+ ]
74
+ ])
75
+ end
76
+ end
77
+
78
+ context DaruLite::MultiIndex do
79
+ pending
80
+ end
81
+ end
82
+
83
+ describe '#to_json' do
84
+ subject { JSON.parse(json) }
85
+
86
+ let(:df) do
87
+ DaruLite::DataFrame.new(
88
+ { a: [1,2,3], b: [3,4,5], c: [6,7,8]},
89
+ index: [:one, :two, :three],
90
+ name: 'test'
91
+ )
92
+ end
93
+
94
+ context 'with index' do
95
+ let(:json) { df.to_json(false) }
96
+ # FIXME: is it most reasonable we can do?.. -- zverok
97
+ # For me, more resonable thing would be something like
98
+ #
99
+ # [
100
+ # {"index" => "one" , "a"=>1, "b"=>3, "c"=>6},
101
+ # {"index" => "two" , "a"=>2, "b"=>4, "c"=>7},
102
+ # {"index" => "three", "a"=>3, "b"=>5, "c"=>8}
103
+ # ]
104
+ #
105
+ # Or maybe
106
+ #
107
+ # [
108
+ # ["one" , {"a"=>1, "b"=>3, "c"=>6}],
109
+ # ["two" , {"a"=>2, "b"=>4, "c"=>7}],
110
+ # ["three", {"a"=>3, "b"=>5, "c"=>8}]
111
+ # ]
112
+ #
113
+ # Or even
114
+ #
115
+ # {
116
+ # "one" => {"a"=>1, "b"=>3, "c"=>6},
117
+ # "two" => {"a"=>2, "b"=>4, "c"=>7},
118
+ # "three" => {"a"=>3, "b"=>5, "c"=>8}
119
+ # }
120
+ #
121
+ it { is_expected.to eq(
122
+ [
123
+ [
124
+ {"a"=>1, "b"=>3, "c"=>6},
125
+ {"a"=>2, "b"=>4, "c"=>7},
126
+ {"a"=>3, "b"=>5, "c"=>8}
127
+ ],
128
+ ["one", "two", "three"]
129
+ ]
130
+ )}
131
+ end
132
+
133
+ context 'without index' do
134
+ let(:json) { df.to_json(true) }
135
+
136
+ it { is_expected.to eq(
137
+ [
138
+ {"a"=>1, "b"=>3, "c"=>6},
139
+ {"a"=>2, "b"=>4, "c"=>7},
140
+ {"a"=>3, "b"=>5, "c"=>8}
141
+ ]
142
+ )}
143
+ end
144
+ end
145
+
146
+ describe "#to_h" do
147
+ subject { df.to_h }
148
+
149
+ it "converts to a hash" do
150
+ expect(subject).to eq(
151
+ {
152
+ a: DaruLite::Vector.new([1,2,3,4,5],
153
+ index: [:one, :two, :three, :four, :five]),
154
+ b: DaruLite::Vector.new([11,12,13,14,15],
155
+ index: [:one, :two, :three, :four, :five]),
156
+ c: DaruLite::Vector.new([11,22,33,44,55],
157
+ index: [:one, :two, :three, :four, :five])
158
+ }
159
+ )
160
+ end
161
+ end
162
+
163
+ describe '#to_s' do
164
+ subject { df.to_s }
165
+
166
+ it 'produces a class, size description' do
167
+ expect(subject).to eq "#<DaruLite::DataFrame(5x3)>"
168
+ end
169
+
170
+ it 'produces a class, name, size description' do
171
+ df.name = "Test"
172
+ expect(subject).to eq "#<DaruLite::DataFrame: Test(5x3)>"
173
+ end
174
+
175
+ it 'produces a class, name, size description when the name is a symbol' do
176
+ df.name = :Test
177
+ expect(subject).to eq "#<DaruLite::DataFrame: Test(5x3)>"
178
+ end
179
+ end
180
+ end
@@ -0,0 +1,111 @@
1
+ shared_examples_for 'a duplicatable DataFrame' do
2
+ describe "#dup" do
3
+ context DaruLite::Index do
4
+ subject { df.dup }
5
+
6
+ it "dups every data structure inside DataFrame" do
7
+ expect(subject.object_id).not_to eq(df.object_id)
8
+ expect(subject.vectors.object_id).not_to eq(df.vectors.object_id)
9
+ expect(subject.index.object_id).not_to eq(df.index.object_id)
10
+
11
+ df.each_vector_with_index do |vector, index|
12
+ expect(vector.object_id).not_to eq(subject[index].object_id)
13
+ expect(vector.to_a.object_id).not_to eq(subject[index].to_a.object_id)
14
+ end
15
+ end
16
+ end
17
+
18
+ context DaruLite::MultiIndex do
19
+ subject { df_mi.dup }
20
+
21
+ it "duplicates with multi index" do
22
+ expect(subject).to eq(df_mi)
23
+ expect(subject.vectors.object_id).not_to eq(df_mi.vectors.object_id)
24
+ expect(subject.index.object_id).not_to eq(df_mi.index.object_id)
25
+ end
26
+ end
27
+ end
28
+
29
+ describe "#clone_structure" do
30
+ subject { df.clone_structure }
31
+
32
+ it "clones only the index and vector structures of the data frame" do
33
+ expect(subject.vectors).to eq(df.vectors)
34
+ expect(subject.index).to eq(df.index)
35
+ expect(subject[:a]).to eq(DaruLite::Vector.new([nil] * subject[:a].size, index: df.index))
36
+ end
37
+ end
38
+
39
+ describe "#clone" do
40
+ subject { df.clone }
41
+
42
+ context 'no argument is passed' do
43
+ subject { df.clone }
44
+
45
+ it "returns a view of the whole dataframe" do
46
+ expect(df.object_id).to_not eq(subject.object_id)
47
+ expect(df[:a].object_id).to eq(subject[:a].object_id)
48
+ expect(df[:b].object_id).to eq(subject[:b].object_id)
49
+ expect(df[:c].object_id).to eq(subject[:c].object_id)
50
+ end
51
+ end
52
+
53
+ context 'vector names are passed' do
54
+ subject { df.clone(:a, :b) }
55
+
56
+ it "returns a view of selected vectors" do
57
+ expect(subject.object_id).to_not eq(df.object_id)
58
+ expect(subject[:a].object_id).to eq(df[:a].object_id)
59
+ expect(subject[:b].object_id).to eq(df[:b].object_id)
60
+ end
61
+ end
62
+
63
+ context 'array of vector names is passed' do
64
+ subject { df.clone([:a, :b]) }
65
+
66
+ it "clones properly when supplied array" do
67
+ expect(subject.object_id).to_not eq(df.object_id)
68
+ expect(subject[:a].object_id).to eq(df[:a].object_id)
69
+ expect(subject[:b].object_id).to eq(df[:b].object_id)
70
+ end
71
+ end
72
+
73
+ it "original dataframe remains unaffected when operations are applied on subject data frame" do
74
+ original = df.dup
75
+ subject.delete_vector :a
76
+
77
+ expect(df).to eq(original)
78
+ end
79
+ end
80
+
81
+ describe "#clone_only_valid" do
82
+ subject { df.clone_only_valid }
83
+
84
+ context 'df has missing values' do
85
+ let(:df) do
86
+ DaruLite::DataFrame.new({
87
+ a: [1 , 2, 3, nil, 4, nil, 5],
88
+ b: [nil, 2, 3, nil, 4, nil, 5],
89
+ c: [1, 2, 3, 43 , 4, nil, 5]
90
+ })
91
+ end
92
+
93
+ it 'clones only valid values' do
94
+ expect(subject).to eq(df.reject_values(*DaruLite::MISSING_VALUES))
95
+ end
96
+ end
97
+
98
+ context 'df has no missing values' do
99
+ let(:df) do
100
+ DaruLite::DataFrame.new({
101
+ a: [2,3,4,5],
102
+ c: [2,3,4,5]
103
+ })
104
+ end
105
+
106
+ it 'clones all values' do
107
+ expect(subject).to eq(df.clone)
108
+ end
109
+ end
110
+ end
111
+ end