daru_lite 0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (149) hide show
  1. checksums.yaml +7 -0
  2. data/.github/ISSUE_TEMPLATE.md +18 -0
  3. data/.github/workflows/ci.yml +33 -0
  4. data/.gitignore +10 -0
  5. data/.rspec +2 -0
  6. data/.rubocop.yml +27 -0
  7. data/.rubocop_todo.yml +137 -0
  8. data/CONTRIBUTING.md +47 -0
  9. data/Gemfile +2 -0
  10. data/History.md +4 -0
  11. data/LICENSE +24 -0
  12. data/README.md +218 -0
  13. data/Rakefile +69 -0
  14. data/ReleasePolicy.md +20 -0
  15. data/benchmarks/TradeoffData.csv +65 -0
  16. data/benchmarks/csv_reading.rb +22 -0
  17. data/benchmarks/dataframe_creation.rb +39 -0
  18. data/benchmarks/db_loading.rb +34 -0
  19. data/benchmarks/duplicating.rb +45 -0
  20. data/benchmarks/group_by.rb +32 -0
  21. data/benchmarks/joining.rb +52 -0
  22. data/benchmarks/row_access.rb +41 -0
  23. data/benchmarks/row_assign.rb +36 -0
  24. data/benchmarks/sorting.rb +51 -0
  25. data/benchmarks/statistics.rb +28 -0
  26. data/benchmarks/vector_access.rb +31 -0
  27. data/benchmarks/vector_assign.rb +42 -0
  28. data/benchmarks/where_clause.rb +48 -0
  29. data/benchmarks/where_vs_filter.rb +28 -0
  30. data/daru_lite.gemspec +55 -0
  31. data/images/README.md +5 -0
  32. data/images/con0.png +0 -0
  33. data/images/con1.png +0 -0
  34. data/images/init0.png +0 -0
  35. data/images/init1.png +0 -0
  36. data/images/man0.png +0 -0
  37. data/images/man1.png +0 -0
  38. data/images/man2.png +0 -0
  39. data/images/man3.png +0 -0
  40. data/images/man4.png +0 -0
  41. data/images/man5.png +0 -0
  42. data/images/man6.png +0 -0
  43. data/lib/daru_lite/accessors/array_wrapper.rb +109 -0
  44. data/lib/daru_lite/accessors/dataframe_by_row.rb +25 -0
  45. data/lib/daru_lite/accessors/mdarray_wrapper.rb +7 -0
  46. data/lib/daru_lite/category.rb +929 -0
  47. data/lib/daru_lite/configuration.rb +34 -0
  48. data/lib/daru_lite/core/group_by.rb +403 -0
  49. data/lib/daru_lite/core/merge.rb +270 -0
  50. data/lib/daru_lite/core/query.rb +109 -0
  51. data/lib/daru_lite/dataframe.rb +3080 -0
  52. data/lib/daru_lite/date_time/index.rb +569 -0
  53. data/lib/daru_lite/date_time/offsets.rb +397 -0
  54. data/lib/daru_lite/exceptions.rb +2 -0
  55. data/lib/daru_lite/extensions/which_dsl.rb +53 -0
  56. data/lib/daru_lite/formatters/table.rb +52 -0
  57. data/lib/daru_lite/helpers/array.rb +53 -0
  58. data/lib/daru_lite/index/categorical_index.rb +201 -0
  59. data/lib/daru_lite/index/index.rb +374 -0
  60. data/lib/daru_lite/index/multi_index.rb +374 -0
  61. data/lib/daru_lite/io/csv/converters.rb +21 -0
  62. data/lib/daru_lite/io/io.rb +294 -0
  63. data/lib/daru_lite/io/sql_data_source.rb +97 -0
  64. data/lib/daru_lite/iruby/helpers.rb +38 -0
  65. data/lib/daru_lite/iruby/templates/dataframe.html.erb +5 -0
  66. data/lib/daru_lite/iruby/templates/dataframe_mi.html.erb +5 -0
  67. data/lib/daru_lite/iruby/templates/dataframe_mi_tbody.html.erb +35 -0
  68. data/lib/daru_lite/iruby/templates/dataframe_mi_thead.html.erb +21 -0
  69. data/lib/daru_lite/iruby/templates/dataframe_tbody.html.erb +28 -0
  70. data/lib/daru_lite/iruby/templates/dataframe_thead.html.erb +21 -0
  71. data/lib/daru_lite/iruby/templates/multi_index.html.erb +12 -0
  72. data/lib/daru_lite/iruby/templates/vector.html.erb +5 -0
  73. data/lib/daru_lite/iruby/templates/vector_mi.html.erb +5 -0
  74. data/lib/daru_lite/iruby/templates/vector_mi_tbody.html.erb +26 -0
  75. data/lib/daru_lite/iruby/templates/vector_mi_thead.html.erb +8 -0
  76. data/lib/daru_lite/iruby/templates/vector_tbody.html.erb +17 -0
  77. data/lib/daru_lite/iruby/templates/vector_thead.html.erb +8 -0
  78. data/lib/daru_lite/maths/arithmetic/dataframe.rb +91 -0
  79. data/lib/daru_lite/maths/arithmetic/vector.rb +117 -0
  80. data/lib/daru_lite/maths/statistics/dataframe.rb +202 -0
  81. data/lib/daru_lite/maths/statistics/vector.rb +1019 -0
  82. data/lib/daru_lite/monkeys.rb +56 -0
  83. data/lib/daru_lite/vector.rb +1678 -0
  84. data/lib/daru_lite/version.rb +3 -0
  85. data/lib/daru_lite.rb +99 -0
  86. data/profile/_base.rb +23 -0
  87. data/profile/df_to_a.rb +10 -0
  88. data/profile/filter.rb +13 -0
  89. data/profile/joining.rb +13 -0
  90. data/profile/sorting.rb +12 -0
  91. data/profile/vector_each_with_index.rb +9 -0
  92. data/profile/vector_new.rb +9 -0
  93. data/spec/accessors/array_wrapper_spec.rb +3 -0
  94. data/spec/category_spec.rb +1741 -0
  95. data/spec/core/group_by_spec.rb +655 -0
  96. data/spec/core/merge_spec.rb +179 -0
  97. data/spec/core/query_spec.rb +347 -0
  98. data/spec/daru_lite_spec.rb +22 -0
  99. data/spec/dataframe_spec.rb +4330 -0
  100. data/spec/date_time/data_spec.rb +197 -0
  101. data/spec/date_time/date_time_index_helper_spec.rb +72 -0
  102. data/spec/date_time/index_spec.rb +588 -0
  103. data/spec/date_time/offsets_spec.rb +465 -0
  104. data/spec/extensions/which_dsl_spec.rb +38 -0
  105. data/spec/fixtures/bank2.dat +200 -0
  106. data/spec/fixtures/boolean_converter_test.csv +5 -0
  107. data/spec/fixtures/countries.json +7794 -0
  108. data/spec/fixtures/duplicates.csv +32 -0
  109. data/spec/fixtures/eciresults.html +394 -0
  110. data/spec/fixtures/empties.dat +2 -0
  111. data/spec/fixtures/empty_rows_test.csv +17 -0
  112. data/spec/fixtures/macau.html +3691 -0
  113. data/spec/fixtures/macd_data.csv +150 -0
  114. data/spec/fixtures/matrix_test.csv +100 -0
  115. data/spec/fixtures/moneycontrol.html +6812 -0
  116. data/spec/fixtures/music_data.tsv +2501 -0
  117. data/spec/fixtures/repeated_fields.csv +7 -0
  118. data/spec/fixtures/sales-funnel.csv +18 -0
  119. data/spec/fixtures/scientific_notation.csv +4 -0
  120. data/spec/fixtures/string_converter_test.csv +5 -0
  121. data/spec/fixtures/strings.dat +2 -0
  122. data/spec/fixtures/test_xls.xls +0 -0
  123. data/spec/fixtures/test_xls_2.xls +0 -0
  124. data/spec/fixtures/url_test.txt~ +0 -0
  125. data/spec/fixtures/valid_markup.html +62 -0
  126. data/spec/fixtures/wiki_climate.html +1243 -0
  127. data/spec/fixtures/wiki_table_info.html +631 -0
  128. data/spec/formatters/table_formatter_spec.rb +137 -0
  129. data/spec/helpers_spec.rb +8 -0
  130. data/spec/index/categorical_index_spec.rb +170 -0
  131. data/spec/index/index_spec.rb +417 -0
  132. data/spec/index/multi_index_spec.rb +680 -0
  133. data/spec/io/io_spec.rb +373 -0
  134. data/spec/io/sql_data_source_spec.rb +56 -0
  135. data/spec/iruby/dataframe_spec.rb +170 -0
  136. data/spec/iruby/helpers_spec.rb +49 -0
  137. data/spec/iruby/multi_index_spec.rb +37 -0
  138. data/spec/iruby/vector_spec.rb +105 -0
  139. data/spec/maths/arithmetic/dataframe_spec.rb +148 -0
  140. data/spec/maths/arithmetic/vector_spec.rb +165 -0
  141. data/spec/maths/statistics/dataframe_spec.rb +178 -0
  142. data/spec/maths/statistics/vector_spec.rb +756 -0
  143. data/spec/monkeys_spec.rb +42 -0
  144. data/spec/shared/vector_display_spec.rb +213 -0
  145. data/spec/spec_helper.rb +87 -0
  146. data/spec/support/database_helper.rb +30 -0
  147. data/spec/support/matchers.rb +5 -0
  148. data/spec/vector_spec.rb +2293 -0
  149. metadata +571 -0
@@ -0,0 +1,4330 @@
1
+ describe DaruLite::DataFrame do
2
+ before :each do
3
+ @data_frame = DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
4
+ c: [11,22,33,44,55]},
5
+ order: [:a, :b, :c],
6
+ index: [:one, :two, :three, :four, :five])
7
+ tuples = [
8
+ [:a,:one,:bar],
9
+ [:a,:one,:baz],
10
+ [:a,:two,:bar],
11
+ [:a,:two,:baz],
12
+ [:b,:one,:bar],
13
+ [:b,:two,:bar],
14
+ [:b,:two,:baz],
15
+ [:b,:one,:foo],
16
+ [:c,:one,:bar],
17
+ [:c,:one,:baz],
18
+ [:c,:two,:foo],
19
+ [:c,:two,:bar]
20
+ ]
21
+ @multi_index = DaruLite::MultiIndex.from_tuples(tuples)
22
+
23
+ @vector_arry1 = [11,12,13,14,11,12,13,14,11,12,13,14]
24
+ @vector_arry2 = [1,2,3,4,1,2,3,4,1,2,3,4]
25
+
26
+ @order_mi = DaruLite::MultiIndex.from_tuples([
27
+ [:a,:one,:bar],
28
+ [:a,:two,:baz],
29
+ [:b,:two,:foo],
30
+ [:b,:one,:foo]])
31
+
32
+ @df_mi = DaruLite::DataFrame.new([
33
+ @vector_arry1,
34
+ @vector_arry2,
35
+ @vector_arry1,
36
+ @vector_arry2], order: @order_mi, index: @multi_index)
37
+ end
38
+
39
+ context ".rows" do
40
+ before do
41
+ @rows = [
42
+ [1,2,3,4,5],
43
+ [1,2,3,4,5],
44
+ [1,2,3,4,5],
45
+ [1,2,3,4,5]
46
+ ]
47
+ end
48
+
49
+ context DaruLite::Index do
50
+ it "creates a DataFrame from Array rows" do
51
+ df = DaruLite::DataFrame.rows @rows, order: [:a,:b,:c,:d,:e]
52
+
53
+ expect(df.index) .to eq(DaruLite::Index.new [0,1,2,3])
54
+ expect(df.vectors) .to eq(DaruLite::Index.new [:a,:b,:c,:d,:e])
55
+ expect(df[:a]) .to eq(DaruLite::Vector.new [1,1,1,1])
56
+ end
57
+
58
+ it "creates empty dataframe" do
59
+ df = DaruLite::DataFrame.rows [], order: [:a, :b, :c]
60
+
61
+ expect(df.vectors).to eq(DaruLite::Index.new [:a,:b,:c])
62
+ expect(df.index).to be_empty
63
+ end
64
+
65
+ it "creates a DataFrame from Vector rows" do
66
+ rows = @rows.map { |r| DaruLite::Vector.new r, index: [:a,:b,:c,:d,:e] }
67
+
68
+ df = DaruLite::DataFrame.rows rows, order: [:a,:b,:c,:d,:e]
69
+
70
+ expect(df.index) .to eq(DaruLite::Index.new [0,1,2,3])
71
+ expect(df.vectors) .to eq(DaruLite::Index.new [:a,:b,:c,:d,:e])
72
+ expect(df[:a]) .to eq(DaruLite::Vector.new [1,1,1,1])
73
+ end
74
+
75
+ it 'derives index & order from arrays' do
76
+ df = DaruLite::DataFrame.rows @rows
77
+ expect(df.index) .to eq(DaruLite::Index.new [0,1,2,3])
78
+ expect(df.vectors) .to eq(DaruLite::Index.new %w[0 1 2 3 4])
79
+ end
80
+
81
+ it 'derives index & order from vectors' do
82
+ rows = @rows.zip(%w[w x y z]).map { |r, n| DaruLite::Vector.new r, index: [:a,:b,:c,:d,:e], name: n }
83
+ df = DaruLite::DataFrame.rows rows
84
+ expect(df.index) .to eq(DaruLite::Index.new %w[w x y z])
85
+ expect(df.vectors) .to eq(DaruLite::Index.new [:a,:b,:c,:d,:e])
86
+ end
87
+
88
+ it 'behaves, when rows are repeated' do
89
+ rows = @rows.zip(%w[w w y z]).map { |r, n| DaruLite::Vector.new r, index: [:a,:b,:c,:d,:e], name: n }
90
+ df = DaruLite::DataFrame.rows rows
91
+ expect(df.index) .to eq(DaruLite::Index.new %w[w_1 w_2 y z])
92
+ expect(df.vectors) .to eq(DaruLite::Index.new [:a,:b,:c,:d,:e])
93
+ end
94
+
95
+ it 'behaves, when vectors are unnamed' do
96
+ rows = @rows.map { |r| DaruLite::Vector.new r, index: [:a,:b,:c,:d,:e] }
97
+ df = DaruLite::DataFrame.rows rows
98
+ expect(df.index) .to eq(DaruLite::Index.new [0,1,2,3])
99
+ expect(df.vectors) .to eq(DaruLite::Index.new [:a,:b,:c,:d,:e])
100
+ end
101
+ end
102
+
103
+ context DaruLite::MultiIndex do
104
+ it "creates a DataFrame from rows" do
105
+ df = DaruLite::DataFrame.rows(
106
+ @rows*3, index: @multi_index, order: [:a,:b,:c,:d,:e])
107
+
108
+ expect(df.index) .to eq(@multi_index)
109
+ expect(df.vectors) .to eq(DaruLite::Index.new([:a,:b,:c,:d,:e]))
110
+ expect(df[:a]).to eq(DaruLite::Vector.new([1]*12, index: @multi_index))
111
+ end
112
+
113
+ it "crates a DataFrame from rows (MultiIndex order)" do
114
+ rows = [
115
+ [11, 1, 11, 1],
116
+ [12, 2, 12, 2],
117
+ [13, 3, 13, 3],
118
+ [14, 4, 14, 4]
119
+ ]
120
+ index = DaruLite::MultiIndex.from_tuples([
121
+ [:one,:bar],
122
+ [:one,:baz],
123
+ [:two,:foo],
124
+ [:two,:bar]
125
+ ])
126
+
127
+ df = DaruLite::DataFrame.rows(rows, index: index, order: @order_mi)
128
+ expect(df.index) .to eq(index)
129
+ expect(df.vectors).to eq(@order_mi)
130
+ expect(df[:a, :one, :bar]).to eq(DaruLite::Vector.new([11,12,13,14],
131
+ index: index))
132
+ end
133
+
134
+ it "creates a DataFrame from Vector rows" do
135
+ rows = @rows*3
136
+ rows.map! { |r| DaruLite::Vector.new(r, index: @multi_index) }
137
+
138
+ df = DaruLite::DataFrame.rows rows, order: @multi_index
139
+
140
+ expect(df.index).to eq(DaruLite::Index.new(Array.new(rows.size) { |i| i }))
141
+ expect(df.vectors).to eq(@multi_index)
142
+ expect(df[:a,:one,:bar]).to eq(DaruLite::Vector.new([1]*12))
143
+ end
144
+ end
145
+ end
146
+
147
+ context "#initialize" do
148
+
149
+ it "initializes an empty DataFrame with no arguments" do
150
+ df = DaruLite::DataFrame.new
151
+ expect(df.nrows).to eq(0)
152
+ expect(df.ncols).to eq(0)
153
+ end
154
+
155
+ context DaruLite::Index do
156
+ it "initializes an empty DataFrame with empty source arg" do
157
+ df = DaruLite::DataFrame.new({}, order: [:a, :b])
158
+
159
+ expect(df.vectors).to eq(DaruLite::Index.new [:a, :b])
160
+ expect(df.a.class).to eq(DaruLite::Vector)
161
+ expect(df.a) .to eq([].dv(:a))
162
+ end
163
+
164
+ it "initializes from a Hash" do
165
+ df = DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5]}, order: [:a, :b],
166
+ index: [:one, :two, :three, :four, :five])
167
+
168
+ expect(df.index) .to eq(DaruLite::Index.new [:one, :two, :three, :four, :five])
169
+ expect(df.vectors).to eq(DaruLite::Index.new [:a, :b])
170
+ expect(df.a.class).to eq(DaruLite::Vector)
171
+ expect(df.a) .to eq([1,2,3,4,5].dv(:a, df.index))
172
+ end
173
+
174
+ it "initializes from a Hash and preserves default order" do
175
+ df = DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5]},
176
+ index: [:one, :two, :three, :four, :five])
177
+
178
+ expect(df.vectors).to eq(DaruLite::Index.new [:b, :a])
179
+ end
180
+
181
+ it "initializes from a Hash of Vectors" do
182
+ va = DaruLite::Vector.new([1,2,3,4,5], index: [:one, :two, :three, :four, :five])
183
+ vb = DaruLite::Vector.new([11,12,13,14,15], index: [:one, :two, :three, :four, :five])
184
+
185
+ df = DaruLite::DataFrame.new({ b: vb, a: va }, order: [:a, :b], index: [:one, :two, :three, :four, :five])
186
+
187
+ expect(df.index) .to eq(DaruLite::Index.new [:one, :two, :three, :four, :five])
188
+ expect(df.vectors).to eq(DaruLite::Index.new [:a, :b])
189
+ expect(df.a.class).to eq(DaruLite::Vector)
190
+ expect(df.a) .to eq([1,2,3,4,5].dv(:a, [:one, :two, :three, :four, :five]))
191
+ end
192
+
193
+ it "initializes from an Array of Hashes" do
194
+ df = DaruLite::DataFrame.new([{a: 1, b: 11}, {a: false, b: 12}, {a: 3, b: 13},
195
+ {a: 4, b: 14}, {a: 5, b: 15}], order: [:b, :a],
196
+ index: [:one, :two, :three, :four, :five])
197
+
198
+ expect(df.index) .to eq(DaruLite::Index.new [:one, :two, :three, :four, :five])
199
+ expect(df.vectors).to eq(DaruLite::Index.new [:b, :a])
200
+ expect(df.a.class).to eq(DaruLite::Vector)
201
+ expect(df.a) .to eq([1,false,3,4,5].dv(:a,[:one, :two, :three, :four, :five]))
202
+ end
203
+
204
+ it "initializes from Array of Arrays" do
205
+ df = DaruLite::DataFrame.new([[1]*5, [2]*5, [3]*5], order: [:b, :a, :c])
206
+
207
+ expect(df.index) .to eq(DaruLite::Index.new(5))
208
+ expect(df.vectors).to eq(DaruLite::Index.new([:b, :a, :c]))
209
+ expect(df.a) .to eq(DaruLite::Vector.new([2]*5))
210
+ end
211
+
212
+ it "initializes from Array of Vectors" do
213
+ df = DaruLite::DataFrame.new([DaruLite::Vector.new([1]*5), DaruLite::Vector.new([2]*5),
214
+ DaruLite::Vector.new([3]*5)], order: [:b, :a, :c])
215
+
216
+ expect(df.index) .to eq(DaruLite::Index.new(5))
217
+ expect(df.vectors).to eq(DaruLite::Index.new([:b, :a, :c]))
218
+ expect(df.a) .to eq(DaruLite::Vector.new([2]*5))
219
+ end
220
+
221
+ it "accepts Index objects for row/col" do
222
+ rows = DaruLite::Index.new [:one, :two, :three, :four, :five]
223
+ cols = DaruLite::Index.new [:a, :b]
224
+
225
+ df = DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5]}, order: cols,
226
+ index: rows)
227
+
228
+ expect(df.a) .to eq(DaruLite::Vector.new([1,2,3,4,5], order: [:a], index: rows))
229
+ expect(df.b) .to eq(DaruLite::Vector.new([11,12,13,14,15], name: :b, index: rows))
230
+ expect(df.index) .to eq(DaruLite::Index.new [:one, :two, :three, :four, :five])
231
+ expect(df.vectors).to eq(DaruLite::Index.new [:a, :b])
232
+ end
233
+
234
+ it "initializes without specifying row/col index" do
235
+ df = DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5]})
236
+
237
+ expect(df.index) .to eq(DaruLite::Index.new [0,1,2,3,4])
238
+ expect(df.vectors).to eq(DaruLite::Index.new [:b, :a])
239
+ end
240
+
241
+ it "aligns indexes properly" do
242
+ df = DaruLite::DataFrame.new({
243
+ b: [11,12,13,14,15].dv(:b, [:two, :one, :four, :five, :three]),
244
+ a: [1,2,3,4,5].dv(:a, [:two,:one,:three, :four, :five])
245
+ },
246
+ order: [:a, :b]
247
+ )
248
+
249
+ expect(df).to eq(DaruLite::DataFrame.new({
250
+ b: [14,13,12,15,11].dv(:b, [:five, :four, :one, :three, :two]),
251
+ a: [5,4,2,3,1].dv(:a, [:five, :four, :one, :three, :two])
252
+ }, order: [:a, :b])
253
+ )
254
+ end
255
+
256
+ it "adds nil values for missing indexes and aligns by index" do
257
+ df = DaruLite::DataFrame.new({
258
+ b: [11,12,13,14,15].dv(:b, [:two, :one, :four, :five, :three]),
259
+ a: [1,2,3] .dv(:a, [:two,:one,:three])
260
+ },
261
+ order: [:a, :b]
262
+ )
263
+
264
+ expect(df).to eq(DaruLite::DataFrame.new({
265
+ b: [14,13,12,15,11].dv(:b, [:five, :four, :one, :three, :two]),
266
+ a: [nil,nil,2,3,1].dv(:a, [:five, :four, :one, :three, :two])
267
+ },
268
+ order: [:a, :b])
269
+ )
270
+ end
271
+
272
+ it "adds nils in first vector when other vectors have many extra indexes" do
273
+ df = DaruLite::DataFrame.new({
274
+ b: [11] .dv(nil, [:one]),
275
+ a: [1,2,3] .dv(nil, [:one, :two, :three]),
276
+ c: [11,22,33,44,55] .dv(nil, [:one, :two, :three, :four, :five]),
277
+ d: [49,69,89,99,108,44].dv(nil, [:one, :two, :three, :four, :five, :six])
278
+ }, order: [:a, :b, :c, :d],
279
+ index: [:one, :two, :three, :four, :five, :six])
280
+
281
+ expect(df).to eq(DaruLite::DataFrame.new({
282
+ b: [11,nil,nil,nil,nil,nil].dv(nil, [:one, :two, :three, :four, :five, :six]),
283
+ a: [1,2,3,nil,nil,nil] .dv(nil, [:one, :two, :three, :four, :five, :six]),
284
+ c: [11,22,33,44,55,nil] .dv(nil, [:one, :two, :three, :four, :five, :six]),
285
+ d: [49,69,89,99,108,44] .dv(nil, [:one, :two, :three, :four, :five, :six])
286
+ }, order: [:a, :b, :c, :d],
287
+ index: [:one, :two, :three, :four, :five, :six])
288
+ )
289
+ end
290
+
291
+ it "correctly matches the supplied DataFrame index with the individual vector indexes" do
292
+ df = DaruLite::DataFrame.new({
293
+ b: [11,12,13] .dv(nil, [:one, :bleh, :blah]),
294
+ a: [1,2,3,4,5].dv(nil, [:one, :two, :booh, :baah, :three]),
295
+ c: [11,22,33,44,55].dv(nil, [0,1,3,:three, :two])
296
+ }, order: [:a, :b, :c], index: [:one, :two, :three])
297
+
298
+ expect(df).to eq(DaruLite::DataFrame.new({
299
+ b: [11,nil,nil].dv(nil, [:one, :two, :three]),
300
+ a: [1,2,5] .dv(nil, [:one, :two, :three]),
301
+ c: [nil,55,44] .dv(nil, [:one, :two, :three]),
302
+ },
303
+ order: [:a, :b, :c], index: [:one, :two, :three]
304
+ )
305
+ )
306
+ end
307
+
308
+ it "completes incomplete vectors" do
309
+ df = DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
310
+ c: [11,22,33,44,55]}, order: [:a, :c])
311
+
312
+ expect(df.vectors).to eq([:a,:c,:b].to_index)
313
+ end
314
+
315
+ it "does not copy vectors when clone: false" do
316
+ a = DaruLite::Vector.new([1,2,3,4,5])
317
+ b = DaruLite::Vector.new([1,2,3,4,5])
318
+ c = DaruLite::Vector.new([1,2,3,4,5])
319
+ df = DaruLite::DataFrame.new({a: a, b: b, c: c}, clone: false)
320
+
321
+ expect(df[:a].object_id).to eq(a.object_id)
322
+ expect(df[:b].object_id).to eq(b.object_id)
323
+ expect(df[:c].object_id).to eq(c.object_id)
324
+ end
325
+
326
+ it "allows creation of empty dataframe with only order" do
327
+ df = DaruLite::DataFrame.new({}, order: [:a, :b, :c])
328
+ df[:a] = DaruLite::Vector.new([1,2,3,4,5,6])
329
+
330
+ expect(df.size).to eq(6)
331
+ expect(df[:a]).to eq(DaruLite::Vector.new([1,2,3,4,5,6]))
332
+ expect(df[:b]).to eq(DaruLite::Vector.new([nil,nil,nil,nil,nil,nil]))
333
+ expect(df[:c]).to eq(DaruLite::Vector.new([nil,nil,nil,nil,nil,nil]))
334
+ end
335
+
336
+ it "allows creation of dataframe without specifying order or index" do
337
+ df = DaruLite::DataFrame.new({})
338
+ df[:a] = DaruLite::Vector.new([1,2,3,4,5])
339
+
340
+ expect(df.size) .to eq(5)
341
+ expect(df.index.to_a) .to eq([0,1,2,3,4])
342
+ expect(df.vectors.to_a).to eq([:a])
343
+ expect(df[:a]) .to eq(DaruLite::Vector.new([1,2,3,4,5]))
344
+ end
345
+
346
+ it "allows creation of dataframe with a default order" do
347
+ arr_of_arrs_df = DaruLite::DataFrame.new([[1,2,3], [4,5,6], [7,8,9]])
348
+ arr_of_vectors_df = DaruLite::DataFrame.new([DaruLite::Vector.new([1,2,3]), DaruLite::Vector.new([4,5,6]), DaruLite::Vector.new([7,8,9])])
349
+
350
+ expect(arr_of_arrs_df.vectors.to_a).to eq([0,1,2])
351
+ expect(arr_of_vectors_df.vectors.to_a).to eq([0,1,2])
352
+ end
353
+
354
+ it "raises error for incomplete DataFrame index" do
355
+ expect {
356
+ df = DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
357
+ c: [11,22,33,44,55]}, order: [:a, :b, :c],
358
+ index: [:one, :two, :three])
359
+ }.to raise_error
360
+ end
361
+
362
+ it "raises error for unequal sized vectors/arrays" do
363
+ expect {
364
+ df = DaruLite::DataFrame.new({b: [11,12,13], a: [1,2,3,4,5],
365
+ c: [11,22,33,44,55]}, order: [:a, :b, :c],
366
+ index: [:one, :two, :three])
367
+ }.to raise_error
368
+ end
369
+ end
370
+
371
+ context DaruLite::MultiIndex do
372
+ it "creates empty DataFrame" do
373
+ df = DaruLite::DataFrame.new({}, order: @order_mi)
374
+
375
+ expect(df.vectors).to eq(@order_mi)
376
+ expect(df[:a, :one, :bar]).to eq(DaruLite::Vector.new([]))
377
+ end
378
+
379
+ it "creates from Hash" do
380
+ df = DaruLite::DataFrame.new({
381
+ [:a,:one,:bar] => @vector_arry1,
382
+ [:a,:two,:baz] => @vector_arry2,
383
+ [:b,:one,:foo] => @vector_arry1,
384
+ [:b,:two,:foo] => @vector_arry2
385
+ }, order: @order_mi, index: @multi_index)
386
+
387
+ expect(df.index) .to eq(@multi_index)
388
+ expect(df.vectors) .to eq(@order_mi)
389
+ expect(df[:a,:one,:bar]).to eq(DaruLite::Vector.new(@vector_arry1,
390
+ index: @multi_index))
391
+ end
392
+
393
+ it "creates from Array of Hashes" do
394
+ # TODO
395
+ end
396
+
397
+ it "creates from Array of Arrays" do
398
+ df = DaruLite::DataFrame.new([@vector_arry1, @vector_arry2, @vector_arry1,
399
+ @vector_arry2], index: @multi_index, order: @order_mi)
400
+
401
+ expect(df.index) .to eq(@multi_index)
402
+ expect(df.vectors).to eq(@order_mi)
403
+ expect(df[:a, :one, :bar]).to eq(DaruLite::Vector.new(@vector_arry1,
404
+ index: @multi_index))
405
+ end
406
+
407
+ it "raises error for order MultiIndex of different size than supplied Array" do
408
+ expect {
409
+ df = DaruLite::DataFrame.new([@vector_arry1, @vector_arry2], order: @order_mi,
410
+ index: @multi_index)
411
+ }.to raise_error
412
+ end
413
+
414
+ it "aligns MultiIndexes properly" do
415
+ pending
416
+ mi_a = @order_mi
417
+ mi_b = DaruLite::MultiIndex.from_tuples([
418
+ [:b,:one,:foo],
419
+ [:a,:one,:bar],
420
+ [:b,:two,:foo],
421
+ [:a,:one,:baz]
422
+ ])
423
+ mi_sorted = DaruLite::MultiIndex.from_tuples([
424
+ [:a, :one, :bar],
425
+ [:a, :one, :baz],
426
+ [:b, :one, :foo],
427
+ [:b, :two, :foo]
428
+ ])
429
+ order = DaruLite::MultiIndex.from_tuples([
430
+ [:pee, :que],
431
+ [:pee, :poo]
432
+ ])
433
+ a = DaruLite::Vector.new([1,2,3,4], index: mi_a)
434
+ b = DaruLite::Vector.new([11,12,13,14], index: mi_b)
435
+ df = DaruLite::DataFrame.new([b,a], order: order)
436
+
437
+ expect(df).to eq(DaruLite::DataFrame.new({
438
+ [:pee, :que] => DaruLite::Vector.new([1,2,4,3], index: mi_sorted),
439
+ [:pee, :poo] => DaruLite::Vector.new([12,14,11,13], index: mi_sorted)
440
+ }, order: order_mi))
441
+ end
442
+
443
+ it "adds nils in case of missing values" do
444
+ # TODO
445
+ end
446
+
447
+ it "matches individual vector indexing with supplied DataFrame index" do
448
+ # TODO
449
+ end
450
+ end
451
+ end
452
+
453
+ context "#[]" do
454
+ context DaruLite::Index do
455
+ before :each do
456
+ @df = DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
457
+ c: [11,22,33,44,55]}, order: [:a, :b, :c],
458
+ index: [:one, :two, :three, :four, :five])
459
+ end
460
+
461
+ it "returns a Vector" do
462
+ expect(@df[:a]).to eq([1,2,3,4,5].dv(:a, [:one, :two, :three, :four, :five]))
463
+ end
464
+
465
+ it "returns a Vector by default" do
466
+ expect(@df[:a]).to eq(DaruLite::Vector.new([1,2,3,4,5], name: :a,
467
+ index: [:one, :two, :three, :four, :five]))
468
+ end
469
+
470
+ it "returns a DataFrame" do
471
+ temp = DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5]},
472
+ order: [:a, :b], index: [:one, :two, :three, :four, :five])
473
+
474
+ expect(@df[:a, :b]).to eq(temp)
475
+ end
476
+
477
+ it "accesses vector with Integer index" do
478
+ expect(@df[0]).to eq([1,2,3,4,5].dv(:a, [:one, :two, :three, :four, :five]))
479
+ end
480
+
481
+ it "returns a subset of DataFrame when specified range" do
482
+ subset = @df[:b..:c]
483
+ expect(subset).to eq(DaruLite::DataFrame.new({
484
+ b: [11,12,13,14,15],
485
+ c: [11,22,33,44,55]
486
+ }, index: [:one, :two, :three, :four, :five]))
487
+ end
488
+
489
+ it 'accepts axis parameter as a last argument' do
490
+ expect(@df[:a, :vector]).to eq @df[:a]
491
+ expect(@df[:one, :row]).to eq [1, 11, 11].dv(:one, [:a, :b, :c])
492
+ end
493
+ end
494
+
495
+ context DaruLite::MultiIndex do
496
+ it "accesses vector with an integer index" do
497
+ expect(@df_mi[0]).to eq(
498
+ DaruLite::Vector.new(@vector_arry1, index: @multi_index))
499
+ end
500
+
501
+ it "returns a vector when specifying full tuple" do
502
+ expect(@df_mi[:a, :one, :bar]).to eq(
503
+ DaruLite::Vector.new(@vector_arry1, index: @multi_index))
504
+ end
505
+
506
+ it "returns DataFrame when specified first layer of MultiIndex" do
507
+ sub_order = DaruLite::MultiIndex.from_tuples([
508
+ [:one, :bar],
509
+ [:two, :baz]
510
+ ])
511
+ expect(@df_mi[:a]).to eq(DaruLite::DataFrame.new([
512
+ @vector_arry1,
513
+ @vector_arry2
514
+ ], index: @multi_index, order: sub_order))
515
+ end
516
+
517
+ it "returns a Vector if the last level of MultiIndex is tracked" do
518
+ expect(@df_mi[:a, :one, :bar]).to eq(
519
+ DaruLite::Vector.new(@vector_arry1, index: @multi_index))
520
+ end
521
+ end
522
+ end
523
+
524
+ context "#[]=" do
525
+ context DaruLite::Index do
526
+ before :each do
527
+ @df = DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
528
+ c: [11,22,33,44,55]}, order: [:a, :b, :c],
529
+ index: [:one, :two, :three, :four, :five])
530
+ end
531
+
532
+ it "assigns directly with the []= operator" do
533
+ @data_frame[:a] = [100,200,300,400,500]
534
+ expect(@data_frame).to eq(DaruLite::DataFrame.new({
535
+ b: [11,12,13,14,15],
536
+ a: [100,200,300,400,500],
537
+ c: [11,22,33,44,55]}, order: [:a, :b, :c],
538
+ index: [:one, :two, :three, :four, :five]))
539
+ end
540
+
541
+ it "assigns new vector with default length if given just a value" do
542
+ @df[:d] = 1.0
543
+ expect(@df[:d]).to eq(DaruLite::Vector.new([1.0, 1.0, 1.0, 1.0, 1.0],
544
+ index: [:one, :two, :three, :four, :five], name: :d))
545
+ end
546
+
547
+ it "updates vector with default length if given just a value" do
548
+ @df[:c] = 1.0
549
+ expect(@df[:c]).to eq(DaruLite::Vector.new([1.0, 1.0, 1.0, 1.0, 1.0],
550
+ index: [:one, :two, :three, :four, :five], name: :c))
551
+ end
552
+
553
+ it "appends an Array as a DaruLite::Vector" do
554
+ @df[:d] = [69,99,108,85,49]
555
+
556
+ expect(@df.d.class).to eq(DaruLite::Vector)
557
+ end
558
+
559
+ it "appends an arbitrary enumerable as a DaruLite::Vector" do
560
+ @df[:d] = Set.new([69,99,108,85,49])
561
+
562
+ expect(@df[:d]).to eq(DaruLite::Vector.new([69, 99, 108, 85, 49],
563
+ index: [:one, :two, :three, :four, :five], name: :c))
564
+ end
565
+
566
+ it "replaces an already present vector" do
567
+ @df[:a] = [69,99,108,85,49].dv(nil, [:one, :two, :three, :four, :five])
568
+
569
+ expect(@df.a).to eq([69,99,108,85,49].dv(nil, [:one, :two, :three, :four, :five]))
570
+ end
571
+
572
+ it "appends a new vector to the DataFrame" do
573
+ @df[:woo] = [69,99,108,85,49].dv(nil, [:one, :two, :three, :four, :five])
574
+
575
+ expect(@df.vectors).to eq([:a, :b, :c, :woo].to_index)
576
+ end
577
+
578
+ it "creates an index for the new vector if not specified" do
579
+ @df[:woo] = [69,99,108,85,49]
580
+
581
+ expect(@df.woo.index).to eq([:one, :two, :three, :four, :five].to_index)
582
+ end
583
+
584
+ it "matches index of vector to be inserted with the DataFrame index" do
585
+ @df[:shankar] = [69,99,108,85,49].dv(:shankar, [:two, :one, :three, :five, :four])
586
+
587
+ expect(@df.shankar).to eq([99,69,108,49,85].dv(:shankar,
588
+ [:one, :two, :three, :four, :five]))
589
+ end
590
+
591
+ it "matches index of vector to be inserted, inserting nils where no match found" do
592
+ @df[:shankar] = [1,2,3].dv(:shankar, [:one, :james, :hetfield])
593
+
594
+ expect(@df.shankar).to eq([1,nil,nil,nil,nil].dv(:shankar, [:one, :two, :three, :four, :five]))
595
+ end
596
+
597
+ it "raises error for Array assignment of wrong length" do
598
+ expect{
599
+ @df[:shiva] = [1,2,3]
600
+ }.to raise_error
601
+ end
602
+
603
+ it "assigns correct name given empty dataframe" do
604
+ df_empty = DaruLite::DataFrame.new({})
605
+ df_empty[:a] = 1..5
606
+ df_empty[:b] = 1..5
607
+
608
+ expect(df_empty[:a].name).to equal(:a)
609
+ expect(df_empty[:b].name).to equal(:b)
610
+ end
611
+
612
+ it "appends multiple vectors at a time" do
613
+ # TODO
614
+ end
615
+ end
616
+
617
+ context DaruLite::MultiIndex do
618
+ it "raises error when incomplete index specified but index is absent" do
619
+ expect {
620
+ @df_mi[:d] = [100,200,300,400,100,200,300,400,100,200,300,400]
621
+ }.to raise_error
622
+ end
623
+
624
+ it "assigns all sub-indexes when a top level index is specified" do
625
+ @df_mi[:a] = [100,200,300,400,100,200,300,400,100,200,300,400]
626
+
627
+ expect(@df_mi).to eq(DaruLite::DataFrame.new([
628
+ [100,200,300,400,100,200,300,400,100,200,300,400],
629
+ [100,200,300,400,100,200,300,400,100,200,300,400],
630
+ @vector_arry1,
631
+ @vector_arry2], index: @multi_index, order: @order_mi))
632
+ end
633
+
634
+ it "creates a new vector when full index specfied" do
635
+ order = DaruLite::MultiIndex.from_tuples([
636
+ [:a,:one,:bar],
637
+ [:a,:two,:baz],
638
+ [:b,:two,:foo],
639
+ [:b,:one,:foo],
640
+ [:c,:one,:bar]])
641
+ answer = DaruLite::DataFrame.new([
642
+ @vector_arry1,
643
+ @vector_arry2,
644
+ @vector_arry1,
645
+ @vector_arry2,
646
+ [100,200,300,400,100,200,300,400,100,200,300,400]
647
+ ], index: @multi_index, order: order)
648
+ @df_mi[:c,:one,:bar] = [100,200,300,400,100,200,300,400,100,200,300,400]
649
+
650
+ expect(@df_mi).to eq(answer)
651
+ end
652
+
653
+ it "assigns correct name given empty dataframe" do
654
+ df_empty = DaruLite::DataFrame.new([], index: @multi_index, order: @order_mi)
655
+ df_empty[:c, :one, :bar] = 1..12
656
+
657
+ expect(df_empty[:c, :one, :bar].name).to eq "conebar"
658
+ end
659
+ end
660
+ end
661
+
662
+ context '#method_missing' do
663
+ let(:df) { DaruLite::DataFrame.new({
664
+ :a => [1, 2, 3, 4, 5],
665
+ 'b' => [5, 4, 3, 2, 1]
666
+ }, index: 11..15)}
667
+
668
+ context 'get vector' do
669
+ context 'by string' do
670
+ subject { df.b }
671
+
672
+ it { is_expected.to be_a DaruLite::Vector }
673
+ its(:to_a) { is_expected.to eq [5, 4, 3, 2, 1] }
674
+ its(:'index.to_a') { is_expected.to eq [11, 12, 13, 14, 15] }
675
+ end
676
+
677
+ context 'by symbol' do
678
+ subject { df.a }
679
+
680
+ it { is_expected.to be_a DaruLite::Vector }
681
+ its(:to_a) { is_expected.to eq [1, 2, 3, 4, 5] }
682
+ its(:'index.to_a') { is_expected.to eq [11, 12, 13, 14, 15] }
683
+ end
684
+ end
685
+
686
+ context 'set existing vector' do
687
+ context 'by string' do
688
+ before { df.b = [:a, :b, :c, :d, :e] }
689
+ subject { df }
690
+
691
+ it { is_expected.to be_a DaruLite::DataFrame }
692
+ its(:'vectors.to_a') { is_expected.to eq [:a, 'b'] }
693
+ its(:'b.to_a') { is_expected.to eq [:a, :b, :c, :d, :e] }
694
+ its(:'index.to_a') { is_expected.to eq [11, 12, 13, 14, 15] }
695
+ end
696
+
697
+ context 'by symbol' do
698
+ before { df.a = [:a, :b, :c, :d, :e] }
699
+ subject { df }
700
+
701
+ it { is_expected.to be_a DaruLite::DataFrame }
702
+ its(:'vectors.to_a') { is_expected.to eq [:a, 'b'] }
703
+ its(:'a.to_a') { is_expected.to eq [:a, :b, :c, :d, :e] }
704
+ its(:'index.to_a') { is_expected.to eq [11, 12, 13, 14, 15] }
705
+ end
706
+ end
707
+
708
+ context 'set new vector' do
709
+ before { df.c = [5, 5, 5, 5, 5] }
710
+ subject { df }
711
+
712
+ it { is_expected.to be_a DaruLite::DataFrame }
713
+ its(:'vectors.to_a') { is_expected.to eq [:a, 'b', :c] }
714
+ its(:'c.to_a') { is_expected.to eq [5, 5, 5, 5, 5] }
715
+ its(:'index.to_a') { is_expected.to eq [11, 12, 13, 14, 15] }
716
+ end
717
+
718
+ context 'reference invalid vector' do
719
+ it { expect { df.d }.to raise_error NoMethodError }
720
+ end
721
+ end
722
+
723
+ context '#add_vector' do
724
+ subject(:data_frame) {
725
+ DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
726
+ c: [11,22,33,44,55]}, order: [:a, :b, :c],
727
+ index: [:one, :two, :three, :four, :five])
728
+ }
729
+ before {
730
+ data_frame.add_vector :a, [100,200,300,400,500]
731
+ }
732
+
733
+ it { is_expected.to eq(DaruLite::DataFrame.new({
734
+ b: [11,12,13,14,15],
735
+ a: [100,200,300,400,500],
736
+ c: [11,22,33,44,55]}, order: [:a, :b, :c],
737
+ index: [:one, :two, :three, :four, :five]))
738
+ }
739
+ end
740
+
741
+ context "#insert_vector" do
742
+ subject(:data_frame) {
743
+ DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
744
+ c: [11,22,33,44,55]}, order: [:a, :b, :c],
745
+ index: [:one, :two, :three, :four, :five])
746
+ }
747
+
748
+ it "insert a new vector at the desired slot" do
749
+ df = DaruLite::DataFrame.new({
750
+ a: [1,2,3,4,5],
751
+ d: [710, 720, 730, 740, 750],
752
+ b: [11, 12, 13, 14, 15],
753
+ c: [11,22,33,44,55]}, order: [:a, :d, :b, :c],
754
+ index: [:one, :two, :three, :four, :five]
755
+ )
756
+ data_frame.insert_vector 1, :d, [710, 720, 730, 740, 750]
757
+ expect(subject).to eq df
758
+ end
759
+
760
+ it "raises error for data array being too big" do
761
+ expect {
762
+ source = (1..8).to_a
763
+ data_frame.insert_vector 1, :d, source
764
+ }.to raise_error(IndexError)
765
+ end
766
+
767
+ it "raises error for invalid index value" do
768
+ expect {
769
+ source = (1..5).to_a
770
+ data_frame.insert_vector 4, :d, source
771
+ }.to raise_error(ArgumentError)
772
+ end
773
+
774
+ it "raises error for invalid source type" do
775
+ expect {
776
+ source = 14
777
+ data_frame.insert_vector 3, :d, source
778
+ }.to raise_error(ArgumentError)
779
+ end
780
+ end
781
+
782
+ context "#row[]=" do
783
+ context DaruLite::Index do
784
+ before :each do
785
+ @df = DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
786
+ c: [11,22,33,44,55]}, order: [:a, :b, :c],
787
+ index: [:one, :two, :three, :four, :five])
788
+ end
789
+
790
+ it "assigns specified row when Array" do
791
+ @df.row[:one] = [49, 99, 59]
792
+
793
+ expect(@df.row[:one]) .to eq([49, 99, 59].dv(:one, [:a, :b, :c]))
794
+ expect(@df.row[:one].index).to eq([:a, :b, :c].to_index)
795
+ expect(@df.row[:one].name) .to eq(:one)
796
+ end
797
+
798
+ it "assigns specified row when DV" do
799
+ @df.row[:one] = [49, 99, 59].dv(nil, [:a, :b, :c])
800
+
801
+ expect(@df.row[:one]).to eq([49, 99, 59].dv(:one, [:a, :b, :c]))
802
+ end
803
+
804
+ it "assigns correct elements when Vector of different index" do
805
+ @df.row[:one] = DaruLite::Vector.new([44,62,11], index: [:b,:f,:a])
806
+
807
+ expect(@df.row[:one]).to eq(DaruLite::Vector.new([11,44,nil], index: [:a,:b,:c]))
808
+ end
809
+
810
+ it "creates a new row from an Array" do
811
+ @df.row[:patekar] = [9,2,11]
812
+
813
+ expect(@df.row[:patekar]).to eq([9,2,11].dv(:patekar, [:a, :b, :c]))
814
+ end
815
+
816
+ it "creates a new row from a DV" do
817
+ @df.row[:patekar] = [9,2,11].dv(nil, [:a, :b, :c])
818
+
819
+ expect(@df.row[:patekar]).to eq([9,2,11].dv(:patekar, [:a, :b, :c]))
820
+ end
821
+
822
+ it "creates a new row from numeric row index and named DV" do
823
+ @df.row[2] = [9,2,11].dv(nil, [:a, :b, :c])
824
+
825
+ expect(@df.row[2]).to eq([9,2,11].dv(nil, [:a, :b, :c]))
826
+ end
827
+
828
+ it "correctly aligns assigned DV by index" do
829
+ @df.row[:two] = [9,2,11].dv(nil, [:b, :a, :c])
830
+
831
+ expect(@df.row[:two]).to eq([2,9,11].dv(:two, [:a, :b, :c]))
832
+ end
833
+
834
+ it "correctlu aligns assinged DV by index for new rows" do
835
+ @df.row[:latest] = DaruLite::Vector.new([2,3,1], index: [:b,:c,:a])
836
+
837
+ expect(@df.row[:latest]).to eq(DaruLite::Vector.new([1,2,3], index: [:a,:b,:c]))
838
+ end
839
+
840
+ it "inserts nils for indexes that dont exist in the DataFrame" do
841
+ @df.row[:two] = [49, 99, 59].dv(nil, [:oo, :aah, :gaah])
842
+
843
+ expect(@df.row[:two]).to eq([nil,nil,nil].dv(nil, [:a, :b, :c]))
844
+ end
845
+
846
+ it "correctly inserts row of a different length by matching indexes" do
847
+ @df.row[:four] = [5,4,3,2,1,3].dv(nil, [:you, :have, :a, :big, :appetite, :spock])
848
+
849
+ expect(@df.row[:four]).to eq([3,nil,nil].dv(:four, [:a, :b, :c]))
850
+ end
851
+
852
+ it "raises error for row insertion by Array of wrong length" do
853
+ expect{
854
+ @df.row[:one] = [1,2,3,4,5,6,7]
855
+ }.to raise_error
856
+ end
857
+ end
858
+
859
+ context DaruLite::MultiIndex do
860
+ pending
861
+ # TO DO
862
+ end
863
+
864
+ context DaruLite::CategoricalIndex do
865
+ let(:idx) { DaruLite::CategoricalIndex.new [:a, 1, :a, 1, :c] }
866
+ let(:df) do
867
+ DaruLite::DataFrame.new({
868
+ a: 'a'..'e',
869
+ b: 1..5
870
+ }, index: idx)
871
+ end
872
+
873
+ context "modify exiting row" do
874
+ context "single category" do
875
+ subject { df }
876
+ before { df.row[:a] = ['x', 'y'] }
877
+
878
+ it { is_expected.to be_a DaruLite::DataFrame }
879
+ its(:index) { is_expected.to eq idx }
880
+ its(:vectors) { is_expected.to eq DaruLite::Index.new [:a, :b] }
881
+ its(:'a.to_a') { is_expected.to eq ['x', 'b', 'x', 'd', 'e'] }
882
+ its(:'b.to_a') { is_expected.to eq ['y', 2, 'y', 4, 5] }
883
+ end
884
+
885
+ context "multiple categories" do
886
+ subject { df }
887
+ before { df.row[:a, 1] = ['x', 'y'] }
888
+
889
+ it { is_expected.to be_a DaruLite::DataFrame }
890
+ its(:index) { is_expected.to eq idx }
891
+ its(:vectors) { is_expected.to eq DaruLite::Index.new [:a, :b] }
892
+ its(:'a.to_a') { is_expected.to eq ['x', 'x', 'x', 'x', 'e'] }
893
+ its(:'b.to_a') { is_expected.to eq ['y', 'y', 'y', 'y', 5] }
894
+ end
895
+
896
+ context "positional index" do
897
+ subject { df }
898
+ before { df.row[0, 2] = ['x', 'y'] }
899
+
900
+ it { is_expected.to be_a DaruLite::DataFrame }
901
+ its(:index) { is_expected.to eq idx }
902
+ its(:vectors) { is_expected.to eq DaruLite::Index.new [:a, :b] }
903
+ its(:'a.to_a') { is_expected.to eq ['x', 'b', 'x', 'd', 'e'] }
904
+ its(:'b.to_a') { is_expected.to eq ['y', 2, 'y', 4, 5] }
905
+ end
906
+ end
907
+
908
+ context "add new row" do
909
+ # TODO
910
+ end
911
+ end
912
+ end
913
+
914
+ context "#row.at" do
915
+ context DaruLite::Index do
916
+ let(:idx) { DaruLite::Index.new [1, 0, :c] }
917
+ let(:df) do
918
+ DaruLite::DataFrame.new({
919
+ a: 1..3,
920
+ b: 'a'..'c'
921
+ }, index: idx)
922
+ end
923
+
924
+ context "single position" do
925
+ subject { df.row.at 1 }
926
+
927
+ it { is_expected.to be_a DaruLite::Vector }
928
+ its(:size) { is_expected.to eq 2 }
929
+ its(:to_a) { is_expected.to eq [2, 'b'] }
930
+ its(:'index.to_a') { is_expected.to eq [:a, :b] }
931
+ end
932
+
933
+ context "multiple positions" do
934
+ subject { df.row.at 0, 2 }
935
+
936
+ it { is_expected.to be_a DaruLite::DataFrame }
937
+ its(:size) { is_expected.to eq 2 }
938
+ its(:'index.to_a') { is_expected.to eq [1, :c] }
939
+ its(:'a.to_a') { is_expected.to eq [1, 3] }
940
+ its(:'b.to_a') { is_expected.to eq ['a', 'c'] }
941
+ end
942
+
943
+ context "invalid position" do
944
+ it { expect { df.row.at 3 }.to raise_error IndexError }
945
+ end
946
+
947
+ context "invalid positions" do
948
+ it { expect { df.row.at 2, 3 }.to raise_error IndexError }
949
+ end
950
+
951
+ context "range" do
952
+ subject { df.row.at 0..1 }
953
+
954
+ it { is_expected.to be_a DaruLite::DataFrame }
955
+ its(:size) { is_expected.to eq 2 }
956
+ its(:'index.to_a') { is_expected.to eq [1, 0] }
957
+ its(:'a.to_a') { is_expected.to eq [1, 2] }
958
+ its(:'b.to_a') { is_expected.to eq ['a', 'b'] }
959
+ end
960
+
961
+ context "range with negative end" do
962
+ subject { df.row.at 0..-2 }
963
+
964
+ it { is_expected.to be_a DaruLite::DataFrame }
965
+ its(:size) { is_expected.to eq 2 }
966
+ its(:'index.to_a') { is_expected.to eq [1, 0] }
967
+ its(:'a.to_a') { is_expected.to eq [1, 2] }
968
+ its(:'b.to_a') { is_expected.to eq ['a', 'b'] }
969
+ end
970
+
971
+ context "range with single element" do
972
+ subject { df.row.at 0..0 }
973
+
974
+ it { is_expected.to be_a DaruLite::DataFrame }
975
+ its(:size) { is_expected.to eq 1 }
976
+ its(:'index.to_a') { is_expected.to eq [1] }
977
+ its(:'a.to_a') { is_expected.to eq [1] }
978
+ its(:'b.to_a') { is_expected.to eq ['a'] }
979
+ end
980
+ end
981
+
982
+ context DaruLite::MultiIndex do
983
+ let (:idx) do
984
+ DaruLite::MultiIndex.from_tuples [
985
+ [:a,:one,:bar],
986
+ [:a,:one,:baz],
987
+ [:b,:two,:bar],
988
+ [:a,:two,:baz],
989
+ ]
990
+ end
991
+ let (:df) do
992
+ DaruLite::DataFrame.new({
993
+ a: 1..4,
994
+ b: 'a'..'d'
995
+ }, index: idx )
996
+ end
997
+
998
+ context "single position" do
999
+ subject { df.row.at 1 }
1000
+
1001
+ it { is_expected.to be_a DaruLite::Vector }
1002
+ its(:size) { is_expected.to eq 2 }
1003
+ its(:to_a) { is_expected.to eq [2, 'b'] }
1004
+ its(:'index.to_a') { is_expected.to eq [:a, :b] }
1005
+ end
1006
+
1007
+ context "multiple positions" do
1008
+ subject { df.row.at 0, 2 }
1009
+
1010
+ it { is_expected.to be_a DaruLite::DataFrame }
1011
+ its(:size) { is_expected.to eq 2 }
1012
+ its(:'index.to_a') { is_expected.to eq [[:a, :one, :bar],
1013
+ [:b, :two, :bar]] }
1014
+ its(:'a.to_a') { is_expected.to eq [1, 3] }
1015
+ its(:'a.index.to_a') { is_expected.to eq [[:a, :one, :bar],
1016
+ [:b, :two, :bar]] }
1017
+ its(:'b.to_a') { is_expected.to eq ['a', 'c'] }
1018
+ end
1019
+
1020
+ context "invalid position" do
1021
+ it { expect { df.row.at 4 }.to raise_error IndexError }
1022
+ end
1023
+
1024
+ context "invalid positions" do
1025
+ it { expect { df.row.at 3, 4 }.to raise_error IndexError }
1026
+ end
1027
+
1028
+ context "range" do
1029
+ subject { df.row.at 0..1 }
1030
+
1031
+ it { is_expected.to be_a DaruLite::DataFrame }
1032
+ its(:size) { is_expected.to eq 2 }
1033
+ its(:'index.to_a') { is_expected.to eq [[:a, :one, :bar],
1034
+ [:a, :one, :baz]] }
1035
+ its(:'a.to_a') { is_expected.to eq [1, 2] }
1036
+ its(:'a.index.to_a') { is_expected.to eq [[:a, :one, :bar],
1037
+ [:a, :one, :baz]] }
1038
+ its(:'b.to_a') { is_expected.to eq ['a', 'b'] }
1039
+ end
1040
+
1041
+ context "range with negative end" do
1042
+ subject { df.row.at 0..-3 }
1043
+
1044
+ it { is_expected.to be_a DaruLite::DataFrame }
1045
+ its(:size) { is_expected.to eq 2 }
1046
+ its(:'index.to_a') { is_expected.to eq [[:a, :one, :bar],
1047
+ [:a, :one, :baz]] }
1048
+ its(:'a.to_a') { is_expected.to eq [1, 2] }
1049
+ its(:'a.index.to_a') { is_expected.to eq [[:a, :one, :bar],
1050
+ [:a, :one, :baz]] }
1051
+ its(:'b.to_a') { is_expected.to eq ['a', 'b'] }
1052
+ end
1053
+
1054
+ context " range with single element" do
1055
+ subject { df.row.at 0..0 }
1056
+
1057
+ it { is_expected.to be_a DaruLite::DataFrame }
1058
+ its(:size) { is_expected.to eq 1 }
1059
+ its(:'index.to_a') { is_expected.to eq [[:a, :one, :bar]] }
1060
+ its(:'a.to_a') { is_expected.to eq [1] }
1061
+ its(:'a.index.to_a') { is_expected.to eq [[:a, :one, :bar]] }
1062
+ its(:'b.to_a') { is_expected.to eq ['a'] }
1063
+ end
1064
+ end
1065
+
1066
+ context DaruLite::CategoricalIndex do
1067
+ let (:idx) { DaruLite::CategoricalIndex.new [:a, 1, 1, :a, :c] }
1068
+ let (:df) do
1069
+ DaruLite::DataFrame.new({
1070
+ a: 1..5,
1071
+ b: 'a'..'e'
1072
+ }, index: idx )
1073
+ end
1074
+
1075
+ context "single positional index" do
1076
+ subject { df.row.at 1 }
1077
+
1078
+ it { is_expected.to be_a DaruLite::Vector }
1079
+ its(:size) { is_expected.to eq 2 }
1080
+ its(:to_a) { is_expected.to eq [2, 'b'] }
1081
+ its(:'index.to_a') { is_expected.to eq [:a, :b] }
1082
+ end
1083
+
1084
+ context "multiple positional indexes" do
1085
+ subject { df.row.at 0, 2 }
1086
+
1087
+ it { is_expected.to be_a DaruLite::DataFrame }
1088
+ its(:size) { is_expected.to eq 2 }
1089
+ its(:'index.to_a') { is_expected.to eq [:a, 1] }
1090
+ its(:'a.to_a') { is_expected.to eq [1, 3] }
1091
+ its(:'a.index.to_a') { is_expected.to eq [:a, 1] }
1092
+ its(:'b.to_a') { is_expected.to eq ['a', 'c'] }
1093
+ its(:'b.index.to_a') { is_expected.to eq [:a, 1] }
1094
+ end
1095
+
1096
+ context "invalid position" do
1097
+ it { expect { df.at 5 }.to raise_error IndexError }
1098
+ end
1099
+
1100
+ context "invalid positions" do
1101
+ it { expect { df.at 4, 5 }.to raise_error IndexError }
1102
+ end
1103
+
1104
+ context "range" do
1105
+ subject { df.row.at 0..1 }
1106
+
1107
+ it { is_expected.to be_a DaruLite::DataFrame }
1108
+ its(:size) { is_expected.to eq 2 }
1109
+ its(:'index.to_a') { is_expected.to eq [:a, 1] }
1110
+ its(:'a.to_a') { is_expected.to eq [1, 2] }
1111
+ its(:'a.index.to_a') { is_expected.to eq [:a, 1] }
1112
+ its(:'b.to_a') { is_expected.to eq ['a', 'b'] }
1113
+ its(:'b.index.to_a') { is_expected.to eq [:a, 1] }
1114
+ end
1115
+
1116
+ context "range with negative end" do
1117
+ subject { df.row.at 0..-4 }
1118
+
1119
+ it { is_expected.to be_a DaruLite::DataFrame }
1120
+ its(:size) { is_expected.to eq 2 }
1121
+ its(:'index.to_a') { is_expected.to eq [:a, 1] }
1122
+ its(:'a.to_a') { is_expected.to eq [1, 2] }
1123
+ its(:'a.index.to_a') { is_expected.to eq [:a, 1] }
1124
+ its(:'b.to_a') { is_expected.to eq ['a', 'b'] }
1125
+ its(:'b.index.to_a') { is_expected.to eq [:a, 1] }
1126
+ end
1127
+
1128
+ context " range with single element" do
1129
+ subject { df.row.at 0..0 }
1130
+
1131
+ it { is_expected.to be_a DaruLite::DataFrame }
1132
+ its(:size) { is_expected.to eq 1 }
1133
+ its(:'index.to_a') { is_expected.to eq [:a] }
1134
+ its(:'a.to_a') { is_expected.to eq [1] }
1135
+ its(:'a.index.to_a') { is_expected.to eq [:a] }
1136
+ its(:'b.to_a') { is_expected.to eq ['a'] }
1137
+ its(:'b.index.to_a') { is_expected.to eq [:a] }
1138
+ end
1139
+ end
1140
+ end
1141
+
1142
+ context "#row.set_at" do
1143
+ let(:df) do
1144
+ DaruLite::DataFrame.new({
1145
+ a: 1..3,
1146
+ b: 'a'..'c'
1147
+ })
1148
+ end
1149
+
1150
+ context "single position" do
1151
+ subject { df }
1152
+ before { df.row.set_at [1], ['x', 'y'] }
1153
+
1154
+ its(:size) { is_expected.to eq 3 }
1155
+ its(:'a.to_a') { is_expected.to eq [1, 'x', 3] }
1156
+ its(:'b.to_a') { is_expected.to eq ['a', 'y', 'c'] }
1157
+ end
1158
+
1159
+ context "multiple position" do
1160
+ subject { df }
1161
+ before { df.row.set_at [0, 2], ['x', 'y'] }
1162
+
1163
+ its(:size) { is_expected.to eq 3 }
1164
+ its(:'a.to_a') { is_expected.to eq ['x', 2, 'x'] }
1165
+ its(:'b.to_a') { is_expected.to eq ['y', 'b', 'y'] }
1166
+ end
1167
+
1168
+ context "invalid position" do
1169
+ it { expect { df.row.set_at [3], ['x', 'y'] }.to raise_error IndexError }
1170
+ end
1171
+
1172
+ context "invalid positions" do
1173
+ it { expect { df.row.set_at [2, 3], ['x', 'y'] }.to raise_error IndexError }
1174
+ end
1175
+
1176
+ context "incorrect size" do
1177
+ it { expect { df.row.set_at [1], ['x', 'y', 'z'] }.to raise_error SizeError }
1178
+ end
1179
+ end
1180
+
1181
+ context "#at" do
1182
+ context DaruLite::Index do
1183
+ let(:idx) { DaruLite::Index.new [:a, :b, :c] }
1184
+ let(:df) do
1185
+ DaruLite::DataFrame.new({
1186
+ 1 => 1..3,
1187
+ a: 'a'..'c',
1188
+ b: 11..13
1189
+ }, index: idx)
1190
+ end
1191
+
1192
+ context "single position" do
1193
+ subject { df.at 1 }
1194
+
1195
+ it { is_expected.to be_a DaruLite::Vector }
1196
+ its(:size) { is_expected.to eq 3 }
1197
+ its(:to_a) { is_expected.to eq ['a', 'b', 'c'] }
1198
+ its(:index) { is_expected.to eq idx }
1199
+ end
1200
+
1201
+ context "multiple positions" do
1202
+ subject { df.at 0, 2 }
1203
+
1204
+ it { is_expected.to be_a DaruLite::DataFrame }
1205
+ its(:shape) { is_expected.to eq [3, 2] }
1206
+ its(:index) { is_expected.to eq idx }
1207
+ it { expect(df[1].to_a).to eq [1, 2, 3] }
1208
+ its(:'b.to_a') { is_expected.to eq [11, 12, 13] }
1209
+ end
1210
+
1211
+ context "single invalid position" do
1212
+ it { expect { df. at 3 }.to raise_error IndexError }
1213
+ end
1214
+
1215
+ context "multiple invalid positions" do
1216
+ it { expect { df.at 2, 3 }.to raise_error IndexError }
1217
+ end
1218
+
1219
+ context "range" do
1220
+ subject { df.at 0..1 }
1221
+
1222
+ it { is_expected.to be_a DaruLite::DataFrame }
1223
+ its(:shape) { is_expected.to eq [3, 2] }
1224
+ its(:index) { is_expected.to eq idx }
1225
+ it { expect(df[1].to_a).to eq [1, 2, 3] }
1226
+ its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
1227
+ end
1228
+
1229
+ context "range with negative end" do
1230
+ subject { df.at 0..-2 }
1231
+
1232
+ it { is_expected.to be_a DaruLite::DataFrame }
1233
+ its(:shape) { is_expected.to eq [3, 2] }
1234
+ its(:index) { is_expected.to eq idx }
1235
+ it { expect(df[1].to_a).to eq [1, 2, 3] }
1236
+ its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
1237
+ end
1238
+
1239
+ context "range with single element" do
1240
+ subject { df.at 1..1 }
1241
+
1242
+ it { is_expected.to be_a DaruLite::DataFrame }
1243
+ its(:shape) { is_expected.to eq [3, 1] }
1244
+ its(:index) { is_expected.to eq idx }
1245
+ its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
1246
+ end
1247
+ end
1248
+
1249
+ context DaruLite::MultiIndex do
1250
+ let (:idx) do
1251
+ DaruLite::MultiIndex.from_tuples [
1252
+ [:a,:one,:bar],
1253
+ [:a,:one,:baz],
1254
+ [:b,:two,:bar],
1255
+ ]
1256
+ end
1257
+ let(:df) do
1258
+ DaruLite::DataFrame.new({
1259
+ 1 => 1..3,
1260
+ a: 'a'..'c',
1261
+ b: 11..13
1262
+ }, index: idx)
1263
+ end
1264
+
1265
+ context "single position" do
1266
+ subject { df.at 1 }
1267
+
1268
+ it { is_expected.to be_a DaruLite::Vector }
1269
+ its(:size) { is_expected.to eq 3 }
1270
+ its(:to_a) { is_expected.to eq ['a', 'b', 'c'] }
1271
+ its(:index) { is_expected.to eq idx }
1272
+ end
1273
+
1274
+ context "multiple positions" do
1275
+ subject { df.at 0, 2 }
1276
+
1277
+ it { is_expected.to be_a DaruLite::DataFrame }
1278
+ its(:shape) { is_expected.to eq [3, 2] }
1279
+ its(:index) { is_expected.to eq idx }
1280
+ it { expect(df[1].to_a).to eq [1, 2, 3] }
1281
+ its(:'b.to_a') { is_expected.to eq [11, 12, 13] }
1282
+ end
1283
+
1284
+ context "single invalid position" do
1285
+ it { expect { df. at 3 }.to raise_error IndexError }
1286
+ end
1287
+
1288
+ context "multiple invalid positions" do
1289
+ it { expect { df.at 2, 3 }.to raise_error IndexError }
1290
+ end
1291
+
1292
+ context "range" do
1293
+ subject { df.at 0..1 }
1294
+
1295
+ it { is_expected.to be_a DaruLite::DataFrame }
1296
+ its(:shape) { is_expected.to eq [3, 2] }
1297
+ its(:index) { is_expected.to eq idx }
1298
+ it { expect(df[1].to_a).to eq [1, 2, 3] }
1299
+ its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
1300
+ end
1301
+
1302
+ context "range with negative end" do
1303
+ subject { df.at 0..-2 }
1304
+
1305
+ it { is_expected.to be_a DaruLite::DataFrame }
1306
+ its(:shape) { is_expected.to eq [3, 2] }
1307
+ its(:index) { is_expected.to eq idx }
1308
+ it { expect(df[1].to_a).to eq [1, 2, 3] }
1309
+ its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
1310
+ end
1311
+
1312
+ context "range with single element" do
1313
+ subject { df.at 1..1 }
1314
+
1315
+ it { is_expected.to be_a DaruLite::DataFrame }
1316
+ its(:shape) { is_expected.to eq [3, 1] }
1317
+ its(:index) { is_expected.to eq idx }
1318
+ its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
1319
+ end
1320
+ end
1321
+
1322
+ context DaruLite::CategoricalIndex do
1323
+ let (:idx) { DaruLite::CategoricalIndex.new [:a, 1, 1] }
1324
+ let(:df) do
1325
+ DaruLite::DataFrame.new({
1326
+ 1 => 1..3,
1327
+ a: 'a'..'c',
1328
+ b: 11..13
1329
+ }, index: idx)
1330
+ end
1331
+
1332
+ context "single position" do
1333
+ subject { df.at 1 }
1334
+
1335
+ it { is_expected.to be_a DaruLite::Vector }
1336
+ its(:size) { is_expected.to eq 3 }
1337
+ its(:to_a) { is_expected.to eq ['a', 'b', 'c'] }
1338
+ its(:index) { is_expected.to eq idx }
1339
+ end
1340
+
1341
+ context "multiple positions" do
1342
+ subject { df.at 0, 2 }
1343
+
1344
+ it { is_expected.to be_a DaruLite::DataFrame }
1345
+ its(:shape) { is_expected.to eq [3, 2] }
1346
+ its(:index) { is_expected.to eq idx }
1347
+ it { expect(df[1].to_a).to eq [1, 2, 3] }
1348
+ its(:'b.to_a') { is_expected.to eq [11, 12, 13] }
1349
+ end
1350
+
1351
+ context "single invalid position" do
1352
+ it { expect { df. at 3 }.to raise_error IndexError }
1353
+ end
1354
+
1355
+ context "multiple invalid positions" do
1356
+ it { expect { df.at 2, 3 }.to raise_error IndexError }
1357
+ end
1358
+
1359
+ context "range" do
1360
+ subject { df.at 0..1 }
1361
+
1362
+ it { is_expected.to be_a DaruLite::DataFrame }
1363
+ its(:shape) { is_expected.to eq [3, 2] }
1364
+ its(:index) { is_expected.to eq idx }
1365
+ it { expect(df[1].to_a).to eq [1, 2, 3] }
1366
+ its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
1367
+ end
1368
+
1369
+ context "range with negative index" do
1370
+ subject { df.at 0..-2 }
1371
+
1372
+ it { is_expected.to be_a DaruLite::DataFrame }
1373
+ its(:shape) { is_expected.to eq [3, 2] }
1374
+ its(:index) { is_expected.to eq idx }
1375
+ it { expect(df[1].to_a).to eq [1, 2, 3] }
1376
+ its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
1377
+ end
1378
+
1379
+ context "range with single element" do
1380
+ subject { df.at 1..1 }
1381
+
1382
+ it { is_expected.to be_a DaruLite::DataFrame }
1383
+ its(:shape) { is_expected.to eq [3, 1] }
1384
+ its(:index) { is_expected.to eq idx }
1385
+ its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
1386
+ end
1387
+ end
1388
+ end
1389
+
1390
+ context "#set_at" do
1391
+ let(:df) do
1392
+ DaruLite::DataFrame.new({
1393
+ 1 => 1..3,
1394
+ a: 'a'..'c',
1395
+ b: 11..13
1396
+ })
1397
+ end
1398
+
1399
+ context "single position" do
1400
+ subject { df }
1401
+ before { df.set_at [1], ['x', 'y', 'z'] }
1402
+
1403
+ its(:shape) { is_expected.to eq [3, 3] }
1404
+ it { expect(df[1].to_a).to eq [1, 2, 3] }
1405
+ its(:'a.to_a') { is_expected.to eq ['x', 'y', 'z'] }
1406
+ its(:'b.to_a') { is_expected.to eq [11, 12, 13] }
1407
+ end
1408
+
1409
+ context "multiple position" do
1410
+ subject { df }
1411
+ before { df.set_at [1, 2], ['x', 'y', 'z'] }
1412
+
1413
+ its(:shape) { is_expected.to eq [3, 3] }
1414
+ it { expect(df[1].to_a).to eq [1, 2, 3] }
1415
+ its(:'a.to_a') { is_expected.to eq ['x', 'y', 'z'] }
1416
+ its(:'b.to_a') { is_expected.to eq ['x', 'y', 'z'] }
1417
+ end
1418
+
1419
+ context "invalid position" do
1420
+ it { expect { df.set_at [3], ['x', 'y', 'z'] }.to raise_error IndexError }
1421
+ end
1422
+
1423
+ context "invalid positions" do
1424
+ it { expect { df.set_at [2, 3], ['x', 'y', 'z'] }.to raise_error IndexError }
1425
+ end
1426
+
1427
+ context "incorrect size" do
1428
+ it { expect { df.set_at [1], ['x', 'y'] }.to raise_error SizeError }
1429
+ end
1430
+ end
1431
+
1432
+ context "#row[]" do
1433
+ context DaruLite::Index do
1434
+ before :each do
1435
+ @df = DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
1436
+ c: [11,22,33,44,55]}, order: [:a, :b, :c],
1437
+ index: [:one, :two, :three, :four, :five])
1438
+ end
1439
+
1440
+ it "creates an index for assignment if not already specified" do
1441
+ @df.row[:one] = [49, 99, 59]
1442
+
1443
+ expect(@df[:one, :row]) .to eq([49, 99, 59].dv(:one, [:a, :b, :c]))
1444
+ expect(@df[:one, :row].index).to eq([:a, :b, :c].to_index)
1445
+ expect(@df[:one, :row].name) .to eq(:one)
1446
+ end
1447
+
1448
+ it "returns a DataFrame when specifying numeric Range" do
1449
+ expect(@df.row[0..2]).to eq(
1450
+ DaruLite::DataFrame.new({b: [11,12,13], a: [1,2,3],
1451
+ c: [11,22,33]}, order: [:a, :b, :c],
1452
+ index: [:one, :two, :three])
1453
+ )
1454
+ end
1455
+
1456
+ it "returns a DataFrame when specifying symbolic Range" do
1457
+ expect(@df.row[:one..:three]).to eq(
1458
+ DaruLite::DataFrame.new({b: [11,12,13], a: [1,2,3],
1459
+ c: [11,22,33]}, order: [:a, :b, :c],
1460
+ index: [:one, :two, :three])
1461
+ )
1462
+ end
1463
+
1464
+ it "returns a row with the given index" do
1465
+ expect(@df.row[:one]).to eq([1,11,11].dv(:one, [:a, :b, :c]))
1466
+ end
1467
+
1468
+ it "returns a row with given Integer index" do
1469
+ expect(@df.row[0]).to eq([1,11,11].dv(:one, [:a, :b, :c]))
1470
+ end
1471
+
1472
+ it "returns a row with given Integer index for default index-less DataFrame" do
1473
+ df = DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
1474
+ c: [11,22,33,44,55]}, order: [:a, :b, :c])
1475
+
1476
+ expect(df.row[0]).to eq([1,11,11].dv(nil, [:a, :b, :c]))
1477
+ expect(df.row[3]).to eq([4,14,44].dv(nil, [:a, :b, :c]))
1478
+ end
1479
+
1480
+ it "returns a row with given Integer index for numerical index DataFrame" do
1481
+ df = DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
1482
+ c: [11,22,33,44,55]}, order: [:a, :b, :c], index: [1,2,3,4,5])
1483
+
1484
+ expect(df.row[0]).to eq([1,11,11].dv(nil, [:a, :b, :c]))
1485
+ expect(df.row[3]).to eq([3,13,33].dv(nil, [:a, :b, :c]))
1486
+ end
1487
+ end
1488
+
1489
+ context DaruLite::MultiIndex do
1490
+ it "returns a Vector when specifying integer index" do
1491
+ expect(@df_mi.row[0]).to eq(DaruLite::Vector.new([11,1,11,1], index: @order_mi))
1492
+ end
1493
+
1494
+ it "returns a DataFrame whecn specifying numeric range" do
1495
+ sub_index = DaruLite::MultiIndex.from_tuples([
1496
+ [:a,:one,:bar],
1497
+ [:a,:one,:baz]
1498
+ ])
1499
+
1500
+ expect(@df_mi.row[0..1]).to eq(DaruLite::DataFrame.new([
1501
+ [11,12],
1502
+ [1,2],
1503
+ [11,12],
1504
+ [1,2]
1505
+ ], order: @order_mi, index: sub_index, name: :numeric_range))
1506
+ end
1507
+
1508
+ it "returns a Vector when specifying complete tuple" do
1509
+ expect(@df_mi.row[:c,:two,:foo]).to eq(DaruLite::Vector.new([13,3,13,3], index: @order_mi))
1510
+ end
1511
+
1512
+ it "returns DataFrame when specifying first layer of MultiIndex" do
1513
+ sub_index = DaruLite::MultiIndex.from_tuples([
1514
+ [:one,:bar],
1515
+ [:one,:baz],
1516
+ [:two,:foo],
1517
+ [:two,:bar]
1518
+ ])
1519
+ expect(@df_mi.row[:c]).to eq(DaruLite::DataFrame.new([
1520
+ [11,12,13,14],
1521
+ [1,2,3,4],
1522
+ [11,12,13,14],
1523
+ [1,2,3,4]
1524
+ ], index: sub_index, order: @order_mi))
1525
+ end
1526
+
1527
+ it "returns DataFrame when specifying first and second layer of MultiIndex" do
1528
+ sub_index = DaruLite::MultiIndex.from_tuples([
1529
+ [:bar],
1530
+ [:baz]
1531
+ ])
1532
+ expect(@df_mi.row[:c,:one]).to eq(DaruLite::DataFrame.new([
1533
+ [11,12],
1534
+ [1,2],
1535
+ [11,12],
1536
+ [1,2]
1537
+ ], index: sub_index, order: @order_mi))
1538
+ end
1539
+ end
1540
+
1541
+ context DaruLite::CategoricalIndex do
1542
+ let(:idx) { DaruLite::CategoricalIndex.new [:a, 1, :a, 1, :c] }
1543
+ let(:df) do
1544
+ DaruLite::DataFrame.new({
1545
+ a: 'a'..'e',
1546
+ b: 1..5
1547
+ }, index: idx)
1548
+ end
1549
+
1550
+ context "single category" do
1551
+ context "multiple instances" do
1552
+ subject { df.row[:a] }
1553
+
1554
+ it { is_expected.to be_a DaruLite::DataFrame }
1555
+ its(:index) { is_expected.to eq DaruLite::CategoricalIndex.new [:a, :a] }
1556
+ its(:vectors) { is_expected.to eq DaruLite::Index.new [:a, :b] }
1557
+ its(:a) { DaruLite::Vector.new ['a', 'c'] }
1558
+ its(:b) { DaruLite::Vector.new [1, 3] }
1559
+ end
1560
+
1561
+ context "single instance" do
1562
+ subject { df.row[:c] }
1563
+
1564
+ it { is_expected.to be_a DaruLite::Vector }
1565
+ its(:index) { is_expected.to eq DaruLite::Index.new [:a, :b] }
1566
+ its(:to_a) { is_expected.to eq ['e', 5] }
1567
+ end
1568
+ end
1569
+
1570
+ context "multiple categories" do
1571
+ subject { df.row[:a, 1] }
1572
+
1573
+ it { is_expected.to be_a DaruLite::DataFrame }
1574
+ its(:index) { is_expected.to eq DaruLite::CategoricalIndex.new(
1575
+ [:a, 1, :a, 1 ]) }
1576
+ its(:vectors) { is_expected.to eq DaruLite::Index.new [:a, :b] }
1577
+ its(:a) { DaruLite::Vector.new ['a', 'c', 'b', 'd'] }
1578
+ its(:b) { DaruLite::Vector.new [1, 3, 2, 4] }
1579
+ end
1580
+
1581
+ context "positional index" do
1582
+ subject { df.row[0] }
1583
+
1584
+ it { is_expected.to be_a DaruLite::Vector }
1585
+ its(:index) { is_expected.to eq DaruLite::Index.new [:a, :b] }
1586
+ its(:to_a) { is_expected.to eq ['a', 1] }
1587
+ end
1588
+
1589
+ context "invalid positional index" do
1590
+ it { expect { df.row[5] }.to raise_error IndexError }
1591
+ end
1592
+
1593
+ context "invalid category" do
1594
+ it { expect { df.row[:d] }.to raise_error IndexError }
1595
+ end
1596
+ end
1597
+ end
1598
+
1599
+ context "#add_row" do
1600
+ subject(:data_frame) {
1601
+ DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
1602
+ c: [11,22,33,44,55]}, order: [:a, :b, :c],
1603
+ index: [:one, :two, :three, :four, :five])
1604
+ }
1605
+ context 'named' do
1606
+ before {
1607
+ data_frame.add_row [100,200,300], :six
1608
+ }
1609
+
1610
+ it { is_expected.to eq(DaruLite::DataFrame.new({
1611
+ a: [1,2,3,4,5,100],
1612
+ b: [11,12,13,14,15,200],
1613
+ c: [11,22,33,44,55,300]}, order: [:a, :b, :c],
1614
+ index: [:one, :two, :three, :four, :five, :six]))
1615
+ }
1616
+ end
1617
+
1618
+ context 'unnamed' do
1619
+ before {
1620
+ data_frame.add_row [100,200,300]
1621
+ }
1622
+
1623
+ it { is_expected.to eq(DaruLite::DataFrame.new({
1624
+ a: [1,2,3,4,5,100],
1625
+ b: [11,12,13,14,15,200],
1626
+ c: [11,22,33,44,55,300]}, order: [:a, :b, :c],
1627
+ index: [:one, :two, :three, :four, :five, 5]))
1628
+ }
1629
+ end
1630
+
1631
+ context 'with mulitiindex DF' do
1632
+ subject(:data_frame) {
1633
+ DaruLite::DataFrame.new({b: [11,12,13], a: [1,2,3],
1634
+ c: [11,22,33]}, order: [:a, :b, :c],
1635
+ index: DaruLite::MultiIndex.from_tuples([[:one, :two], [:one, :three], [:two, :four]]))
1636
+ }
1637
+
1638
+ before { data_frame.add_row [100,200,300], [:two, :five] }
1639
+
1640
+ it { is_expected.to eq(DaruLite::DataFrame.new({
1641
+ b: [11,12,13,200], a: [1,2,3,100],
1642
+ c: [11,22,33,300]}, order: [:a, :b, :c],
1643
+ index: DaruLite::MultiIndex.from_tuples([[:one, :two], [:one, :three], [:two, :four], [:two, :five]])))
1644
+ }
1645
+ end
1646
+
1647
+ it "allows adding rows after making empty DF by specfying only order" do
1648
+ df = DaruLite::DataFrame.new({}, order: [:a, :b, :c])
1649
+ df.add_row [1,2,3]
1650
+ df.add_row [5,6,7]
1651
+
1652
+ expect(df[:a]).to eq(DaruLite::Vector.new([1,5]))
1653
+ expect(df[:b]).to eq(DaruLite::Vector.new([2,6]))
1654
+ expect(df[:c]).to eq(DaruLite::Vector.new([3,7]))
1655
+ expect(df.index).to eq(DaruLite::Index.new([0,1]))
1656
+ end
1657
+ end
1658
+
1659
+ context "#first" do
1660
+ it 'works' do
1661
+ expect(@data_frame.first(2)).to eq(
1662
+ DaruLite::DataFrame.new({b: [11,12], a: [1,2], c: [11,22]},
1663
+ order: [:a, :b, :c],
1664
+ index: [:one, :two]))
1665
+ end
1666
+
1667
+ it 'works with too large values' do
1668
+ expect(@data_frame.first(200)).to eq(@data_frame)
1669
+ end
1670
+
1671
+ it 'has synonym' do
1672
+ expect(@data_frame.first(2)).to eq(@data_frame.head(2))
1673
+ end
1674
+
1675
+ it 'works on DateTime indexes' do
1676
+ idx = DaruLite::DateTimeIndex.new(['2017-01-01', '2017-02-01', '2017-03-01'])
1677
+ df = DaruLite::DataFrame.new({col1: ['a', 'b', 'c']}, index: idx)
1678
+ first = DaruLite::DataFrame.new({col1: ['a']}, index: DaruLite::DateTimeIndex.new(['2017-01-01']))
1679
+ expect(df.head(1)).to eq(first)
1680
+ end
1681
+ end
1682
+
1683
+ context "#last" do
1684
+ it 'works' do
1685
+ expect(@data_frame.last(2)).to eq(
1686
+ DaruLite::DataFrame.new({b: [14,15], a: [4,5], c: [44,55]},
1687
+ order: [:a, :b, :c],
1688
+ index: [:four, :five]))
1689
+ end
1690
+
1691
+ it 'works with too large values' do
1692
+ expect(@data_frame.last(200)).to eq(@data_frame)
1693
+ end
1694
+
1695
+ it 'has synonym' do
1696
+ expect(@data_frame.last(2)).to eq(@data_frame.tail(2))
1697
+ end
1698
+ end
1699
+
1700
+ context "#==" do
1701
+ it "compares by vectors, index and values of a DataFrame (ignores name)" do
1702
+ a = DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5]},
1703
+ order: [:a, :b], index: [:one, :two, :three, :four, :five])
1704
+
1705
+ b = DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5]},
1706
+ order: [:a, :b], index: [:one, :two, :three, :four, :five])
1707
+
1708
+ expect(a).to eq(b)
1709
+ end
1710
+ end
1711
+
1712
+ context '#rename' do
1713
+ subject { @data_frame.rename 'other' }
1714
+
1715
+ it { is_expected.to be_a DaruLite::DataFrame }
1716
+ its(:name) { is_expected.to eq 'other' }
1717
+ end
1718
+
1719
+ context "#dup" do
1720
+ context DaruLite::Index do
1721
+ it "dups every data structure inside DataFrame" do
1722
+ clo = @data_frame.dup
1723
+
1724
+ expect(clo.object_id) .not_to eq(@data_frame.object_id)
1725
+ expect(clo.vectors.object_id).not_to eq(@data_frame.vectors.object_id)
1726
+ expect(clo.index.object_id) .not_to eq(@data_frame.index.object_id)
1727
+
1728
+ @data_frame.each_vector_with_index do |vector, index|
1729
+ expect(vector.object_id).not_to eq(clo[index].object_id)
1730
+ expect(vector.to_a.object_id).not_to eq(clo[index].to_a.object_id)
1731
+ end
1732
+ end
1733
+ end
1734
+
1735
+ context DaruLite::MultiIndex do
1736
+ it "duplicates with multi index" do
1737
+ clo = @df_mi.dup
1738
+
1739
+ expect(clo) .to eq(@df_mi)
1740
+ expect(clo.vectors.object_id).not_to eq(@df_mi.vectors.object_id)
1741
+ expect(clo.index.object_id) .not_to eq(@df_mi.index.object_id)
1742
+ end
1743
+ end
1744
+ end
1745
+
1746
+ context '#reject_values' do
1747
+ let(:df) do
1748
+ DaruLite::DataFrame.new({
1749
+ a: [1, 2, 3, nil, Float::NAN, nil, 1, 7],
1750
+ b: [:a, :b, nil, Float::NAN, nil, 3, 5, 8],
1751
+ c: ['a', Float::NAN, 3, 4, 3, 5, nil, 7]
1752
+ }, index: 11..18)
1753
+ end
1754
+ before { df.to_category :b }
1755
+
1756
+ context 'remove nils only' do
1757
+ subject { df.reject_values nil }
1758
+ it { is_expected.to be_a DaruLite::DataFrame }
1759
+ its(:'b.type') { is_expected.to eq :category }
1760
+ its(:'a.to_a') { is_expected.to eq [1, 2, 7] }
1761
+ its(:'b.to_a') { is_expected.to eq [:a, :b, 8] }
1762
+ its(:'c.to_a') { is_expected.to eq ['a', Float::NAN, 7] }
1763
+ its(:'index.to_a') { is_expected.to eq [11, 12, 18] }
1764
+ end
1765
+
1766
+ context 'remove Float::NAN only' do
1767
+ subject { df.reject_values Float::NAN }
1768
+ it { is_expected.to be_a DaruLite::DataFrame }
1769
+ its(:'b.type') { is_expected.to eq :category }
1770
+ its(:'a.to_a') { is_expected.to eq [1, 3, nil, 1, 7] }
1771
+ its(:'b.to_a') { is_expected.to eq [:a, nil, 3, 5, 8] }
1772
+ its(:'c.to_a') { is_expected.to eq ['a', 3, 5, nil, 7] }
1773
+ its(:'index.to_a') { is_expected.to eq [11, 13, 16, 17, 18] }
1774
+ end
1775
+
1776
+ context 'remove both nil and Float::NAN' do
1777
+ subject { df.reject_values nil, Float::NAN }
1778
+ it { is_expected.to be_a DaruLite::DataFrame }
1779
+ its(:'b.type') { is_expected.to eq :category }
1780
+ its(:'a.to_a') { is_expected.to eq [1, 7] }
1781
+ its(:'b.to_a') { is_expected.to eq [:a, 8] }
1782
+ its(:'c.to_a') { is_expected.to eq ['a', 7] }
1783
+ its(:'index.to_a') { is_expected.to eq [11, 18] }
1784
+ end
1785
+
1786
+ context 'any other values' do
1787
+ subject { df.reject_values 1, 5 }
1788
+ it { is_expected.to be_a DaruLite::DataFrame }
1789
+ its(:'b.type') { is_expected.to eq :category }
1790
+ its(:'a.to_a') { is_expected.to eq [2, 3, nil, Float::NAN, 7] }
1791
+ its(:'b.to_a') { is_expected.to eq [:b, nil, Float::NAN, nil, 8] }
1792
+ its(:'c.to_a') { is_expected.to eq [Float::NAN, 3, 4, 3, 7] }
1793
+ its(:'index.to_a') { is_expected.to eq [12, 13, 14, 15, 18] }
1794
+ end
1795
+
1796
+ context 'when resultant dataframe has one row' do
1797
+ subject { df.reject_values 1, 2, 3, 4, 5, nil, Float::NAN }
1798
+ it { is_expected.to be_a DaruLite::DataFrame }
1799
+ its(:'b.type') { is_expected.to eq :category }
1800
+ its(:'a.to_a') { is_expected.to eq [7] }
1801
+ its(:'b.to_a') { is_expected.to eq [8] }
1802
+ its(:'c.to_a') { is_expected.to eq [7] }
1803
+ its(:'index.to_a') { is_expected.to eq [18] }
1804
+ end
1805
+
1806
+ context 'when resultant dataframe is empty' do
1807
+ subject { df.reject_values 1, 2, 3, 4, 5, 6, 7, nil, Float::NAN }
1808
+ it { is_expected.to be_a DaruLite::DataFrame }
1809
+ its(:'b.type') { is_expected.to eq :category }
1810
+ its(:'a.to_a') { is_expected.to eq [] }
1811
+ its(:'b.to_a') { is_expected.to eq [] }
1812
+ its(:'c.to_a') { is_expected.to eq [] }
1813
+ its(:'index.to_a') { is_expected.to eq [] }
1814
+ end
1815
+ end
1816
+
1817
+ context '#replace_values' do
1818
+ subject do
1819
+ DaruLite::DataFrame.new({
1820
+ a: [1, 2, 3, nil, Float::NAN, nil, 1, 7],
1821
+ b: [:a, :b, nil, Float::NAN, nil, 3, 5, 8],
1822
+ c: ['a', Float::NAN, 3, 4, 3, 5, nil, 7]
1823
+ })
1824
+ end
1825
+ before { subject.to_category :b }
1826
+
1827
+ context 'replace nils only' do
1828
+ before { subject.replace_values nil, 10 }
1829
+ it { is_expected.to be_a DaruLite::DataFrame }
1830
+ its(:'b.type') { is_expected.to eq :category }
1831
+ its(:'a.to_a') { is_expected.to eq [1, 2, 3, 10, Float::NAN, 10, 1, 7] }
1832
+ its(:'b.to_a') { is_expected.to eq [:a, :b, 10, Float::NAN, 10, 3, 5, 8] }
1833
+ its(:'c.to_a') { is_expected.to eq ['a', Float::NAN, 3, 4, 3, 5, 10, 7] }
1834
+ end
1835
+
1836
+ context 'replace Float::NAN only' do
1837
+ before { subject.replace_values Float::NAN, 10 }
1838
+ it { is_expected.to be_a DaruLite::DataFrame }
1839
+ its(:'b.type') { is_expected.to eq :category }
1840
+ its(:'a.to_a') { is_expected.to eq [1, 2, 3, nil, 10, nil, 1, 7] }
1841
+ its(:'b.to_a') { is_expected.to eq [:a, :b, nil, 10, nil, 3, 5, 8] }
1842
+ its(:'c.to_a') { is_expected.to eq ['a', 10, 3, 4, 3, 5, nil, 7] }
1843
+ end
1844
+
1845
+ context 'replace both nil and Float::NAN' do
1846
+ before { subject.replace_values [nil, Float::NAN], 10 }
1847
+ it { is_expected.to be_a DaruLite::DataFrame }
1848
+ its(:'b.type') { is_expected.to eq :category }
1849
+ its(:'a.to_a') { is_expected.to eq [1, 2, 3, 10, 10, 10, 1, 7] }
1850
+ its(:'b.to_a') { is_expected.to eq [:a, :b, 10, 10, 10, 3, 5, 8] }
1851
+ its(:'c.to_a') { is_expected.to eq ['a', 10, 3, 4, 3, 5, 10, 7] }
1852
+ end
1853
+
1854
+ context 'replace other values' do
1855
+ before { subject.replace_values [1, 5], 10 }
1856
+ it { is_expected.to be_a DaruLite::DataFrame }
1857
+ its(:'b.type') { is_expected.to eq :category }
1858
+ its(:'a.to_a') { is_expected.to eq [10, 2, 3, nil, Float::NAN, nil, 10, 7] }
1859
+ its(:'b.to_a') { is_expected.to eq [:a, :b, nil, Float::NAN, nil, 3, 10, 8] }
1860
+ its(:'c.to_a') { is_expected.to eq ['a', Float::NAN, 3, 4, 3, 10, nil, 7] }
1861
+ end
1862
+ end
1863
+
1864
+ describe 'uniq' do
1865
+ let(:df) do
1866
+ DaruLite::DataFrame.from_csv 'spec/fixtures/duplicates.csv'
1867
+ end
1868
+
1869
+ context 'with no args' do
1870
+ it do
1871
+ result = df.uniq
1872
+ expect(result.shape.first).to eq 30
1873
+ end
1874
+ end
1875
+
1876
+ context 'given a vector' do
1877
+ it do
1878
+ result = df.uniq("color")
1879
+ expect(result.shape.first).to eq 2
1880
+ end
1881
+ end
1882
+
1883
+ context 'given an array of vectors' do
1884
+ it do
1885
+ result = df.uniq("color", "director_name")
1886
+ expect(result.shape.first).to eq 29
1887
+ end
1888
+ end
1889
+ end
1890
+
1891
+ context '#rolling_fillna!' do
1892
+ subject do
1893
+ DaruLite::DataFrame.new({
1894
+ a: [1, 2, 3, nil, Float::NAN, nil, 1, 7],
1895
+ b: [:a, :b, nil, Float::NAN, nil, 3, 5, nil],
1896
+ c: ['a', Float::NAN, 3, 4, 3, 5, nil, 7]
1897
+ })
1898
+ end
1899
+
1900
+ context 'rolling_fillna! forwards' do
1901
+ before { subject.rolling_fillna!(:forward) }
1902
+ it { expect(subject.rolling_fillna!(:forward)).to eq(subject) }
1903
+ its(:'a.to_a') { is_expected.to eq [1, 2, 3, 3, 3, 3, 1, 7] }
1904
+ its(:'b.to_a') { is_expected.to eq [:a, :b, :b, :b, :b, 3, 5, 5] }
1905
+ its(:'c.to_a') { is_expected.to eq ['a', 'a', 3, 4, 3, 5, 5, 7] }
1906
+ end
1907
+
1908
+ context 'rolling_fillna! backwards' do
1909
+ before { subject.rolling_fillna!(:backward) }
1910
+ it { expect(subject.rolling_fillna!(:backward)).to eq(subject) }
1911
+ its(:'a.to_a') { is_expected.to eq [1, 2, 3, 1, 1, 1, 1, 7] }
1912
+ its(:'b.to_a') { is_expected.to eq [:a, :b, 3, 3, 3, 3, 5, 0] }
1913
+ its(:'c.to_a') { is_expected.to eq ['a', 3, 3, 4, 3, 5, 7, 7] }
1914
+ end
1915
+ end
1916
+
1917
+ context "#clone" do
1918
+ it "returns a view of the whole dataframe" do
1919
+ cloned = @data_frame.clone
1920
+ expect(@data_frame.object_id).to_not eq(cloned.object_id)
1921
+ expect(@data_frame[:a].object_id).to eq(cloned[:a].object_id)
1922
+ expect(@data_frame[:b].object_id).to eq(cloned[:b].object_id)
1923
+ expect(@data_frame[:c].object_id).to eq(cloned[:c].object_id)
1924
+ end
1925
+
1926
+ it "returns a view of selected vectors" do
1927
+ cloned = @data_frame.clone(:a, :b)
1928
+ expect(cloned.object_id).to_not eq(@data_frame.object_id)
1929
+ expect(cloned[:a].object_id).to eq(@data_frame[:a].object_id)
1930
+ expect(cloned[:b].object_id).to eq(@data_frame[:b].object_id)
1931
+ end
1932
+
1933
+ it "clones properly when supplied array" do
1934
+ cloned = @data_frame.clone([:a, :b])
1935
+ expect(cloned.object_id).to_not eq(@data_frame.object_id)
1936
+ expect(cloned[:a].object_id).to eq(@data_frame[:a].object_id)
1937
+ expect(cloned[:b].object_id).to eq(@data_frame[:b].object_id)
1938
+ end
1939
+
1940
+ it "original dataframe remains unaffected when operations are applied
1941
+ on cloned data frame" do
1942
+ original = @data_frame.dup
1943
+ cloned = @data_frame.clone
1944
+ cloned.delete_vector :a
1945
+
1946
+ expect(@data_frame).to eq(original)
1947
+ end
1948
+
1949
+ end
1950
+
1951
+ context "#clone_only_valid" do
1952
+ let(:df_with_missing) {
1953
+ DaruLite::DataFrame.new({
1954
+ a: [1 , 2, 3, nil, 4, nil, 5],
1955
+ b: [nil, 2, 3, nil, 4, nil, 5],
1956
+ c: [1, 2, 3, 43 , 4, nil, 5]
1957
+ })
1958
+ }
1959
+
1960
+ let(:df_without_missing) {
1961
+ DaruLite::DataFrame.new({
1962
+ a: [2,3,4,5],
1963
+ c: [2,3,4,5]
1964
+ })
1965
+ }
1966
+ it 'does the most reasonable thing' do
1967
+ expect(df_with_missing.clone_only_valid).to eq(df_with_missing.reject_values(*DaruLite::MISSING_VALUES))
1968
+ expect(df_without_missing.clone_only_valid).to eq(df_without_missing.clone)
1969
+ end
1970
+ end
1971
+
1972
+ context "#clone_structure" do
1973
+ it "clones only the index and vector structures of the data frame" do
1974
+ cs = @data_frame.clone_structure
1975
+
1976
+ expect(cs.vectors).to eq(@data_frame.vectors)
1977
+ expect(cs.index).to eq(@data_frame.index)
1978
+ expect(cs[:a]).to eq(DaruLite::Vector.new([nil] * cs[:a].size, index: @data_frame.index))
1979
+ end
1980
+ end
1981
+
1982
+ context "#each_index" do
1983
+ it "iterates over index" do
1984
+ idxs = []
1985
+ ret = @data_frame.each_index do |index|
1986
+ idxs << index
1987
+ end
1988
+
1989
+ expect(idxs).to eq([:one, :two, :three, :four, :five])
1990
+
1991
+ expect(ret).to eq(@data_frame)
1992
+ end
1993
+ end
1994
+
1995
+ context "#each_vector_with_index" do
1996
+ it "iterates over vectors with index" do
1997
+ idxs = []
1998
+ ret = @data_frame.each_vector_with_index do |vector, index|
1999
+ idxs << index
2000
+ expect(vector.index).to eq([:one, :two, :three, :four, :five].to_index)
2001
+ expect(vector.class).to eq(DaruLite::Vector)
2002
+ end
2003
+
2004
+ expect(idxs).to eq([:a, :b, :c])
2005
+
2006
+ expect(ret).to eq(@data_frame)
2007
+ end
2008
+ end
2009
+
2010
+ context "#each_row_with_index" do
2011
+ it "iterates over rows with indexes" do
2012
+ idxs = []
2013
+ ret = @data_frame.each_row_with_index do |row, idx|
2014
+ idxs << idx
2015
+ expect(row.index).to eq([:a, :b, :c].to_index)
2016
+ expect(row.class).to eq(DaruLite::Vector)
2017
+ end
2018
+
2019
+ expect(idxs).to eq([:one, :two, :three, :four, :five])
2020
+ expect(ret) .to eq(@data_frame)
2021
+ end
2022
+ end
2023
+
2024
+ context "#each" do
2025
+ it "iterates over rows" do
2026
+ ret = @data_frame.each(:row) do |row|
2027
+ expect(row.index).to eq([:a, :b, :c].to_index)
2028
+ expect(row.class).to eq(DaruLite::Vector)
2029
+ end
2030
+
2031
+ expect(ret).to eq(@data_frame)
2032
+ end
2033
+
2034
+ it "iterates over all vectors" do
2035
+ ret = @data_frame.each do |vector|
2036
+ expect(vector.index).to eq([:one, :two, :three, :four, :five].to_index)
2037
+ expect(vector.class).to eq(DaruLite::Vector)
2038
+ end
2039
+
2040
+ expect(ret).to eq(@data_frame)
2041
+ end
2042
+
2043
+ it "returns Enumerable if no block specified" do
2044
+ ret = @data_frame.each
2045
+ expect(ret.is_a?(Enumerator)).to eq(true)
2046
+ end
2047
+
2048
+ it "raises on unknown axis" do
2049
+ expect { @data_frame.each(:kitten) }.to raise_error(ArgumentError, /axis/)
2050
+ end
2051
+ end
2052
+
2053
+ context "#recode" do
2054
+ before do
2055
+ @ans_vector = DaruLite::DataFrame.new({b: [21,22,23,24,25], a: [11,12,13,14,15],
2056
+ c: [21,32,43,54,65]}, order: [:a, :b, :c],
2057
+ index: [:one, :two, :three, :four, :five])
2058
+
2059
+ @ans_rows = DaruLite::DataFrame.new({b: [121, 144, 169, 196, 225], a: [1,4,9,16,25],
2060
+ c: [121, 484, 1089, 1936, 3025]}, order: [:a, :b, :c],
2061
+ index: [:one, :two, :three, :four, :five])
2062
+
2063
+ @data_frame_date_time = @data_frame.dup
2064
+ @data_frame_date_time.index = DaruLite::DateTimeIndex.date_range(start:"2016-02-11", periods:5)
2065
+
2066
+ @ans_vector_date_time = DaruLite::DataFrame.new({b: [21,22,23,24,25], a: [11,12,13,14,15],
2067
+ c: [21,32,43,54,65]}, order: [:a, :b, :c],
2068
+ index: DaruLite::DateTimeIndex.date_range(start:"2016-02-11", periods:5))
2069
+
2070
+ @ans_rows_date_time = DaruLite::DataFrame.new({b: [121, 144, 169, 196, 225], a: [1,4,9,16,25],
2071
+ c: [121, 484, 1089, 1936, 3025]}, order: [:a, :b, :c],
2072
+ index: DaruLite::DateTimeIndex.date_range(start:"2016-02-11", periods:5))
2073
+ end
2074
+
2075
+ it "maps over the vectors of a DataFrame and returns a DataFrame" do
2076
+ ret = @data_frame.recode do |vector|
2077
+ vector.map! { |e| e += 10}
2078
+ end
2079
+
2080
+ expect(ret).to eq(@ans_vector)
2081
+ end
2082
+
2083
+ it "maps over the rows of a DataFrame and returns a DataFrame" do
2084
+ ret = @data_frame.recode(:row) do |row|
2085
+ expect(row.class).to eq(DaruLite::Vector)
2086
+ row.map! { |e| e*e }
2087
+ end
2088
+
2089
+ expect(ret).to eq(@ans_rows)
2090
+ end
2091
+
2092
+ it "maps over the vectors of a DataFrame with DateTimeIndex and returns a DataFrame with DateTimeIndex" do
2093
+ ret = @data_frame_date_time.recode do |vector|
2094
+ vector.map! { |e| e += 10}
2095
+ end
2096
+
2097
+ expect(ret).to eq(@ans_vector_date_time)
2098
+ end
2099
+
2100
+ it "maps over the rows of a DataFrame with DateTimeIndex and returns a DataFrame with DateTimeIndex" do
2101
+ ret = @data_frame_date_time.recode(:row) do |row|
2102
+ expect(row.class).to eq(DaruLite::Vector)
2103
+ row.map! { |e| e*e }
2104
+ end
2105
+
2106
+ expect(ret).to eq(@ans_rows_date_time)
2107
+ end
2108
+
2109
+ end
2110
+
2111
+ context "#collect" do
2112
+ before do
2113
+ @df = DaruLite::DataFrame.new({
2114
+ a: [1,2,3,4,5],
2115
+ b: [11,22,33,44,55],
2116
+ c: [1,2,3,4,5]
2117
+ })
2118
+ end
2119
+
2120
+ it "collects calculation over rows and returns a Vector from the results" do
2121
+ expect(@df.collect(:row) { |row| (row[:a] + row[:c]) * row[:c] }).to eq(
2122
+ DaruLite::Vector.new([2,8,18,32,50])
2123
+ )
2124
+ end
2125
+
2126
+ it "collects calculation over vectors and returns a Vector from the results" do
2127
+ expect(@df.collect { |v| v[0] * v[1] + v[4] }).to eq(
2128
+ DaruLite::Vector.new([7,297,7], index: [:a, :b, :c])
2129
+ )
2130
+ end
2131
+ end
2132
+
2133
+ context "#map" do
2134
+ it "iterates over rows and returns an Array" do
2135
+ ret = @data_frame.map(:row) do |row|
2136
+ expect(row.class).to eq(DaruLite::Vector)
2137
+ row[:a] * row[:c]
2138
+ end
2139
+
2140
+ expect(ret).to eq([11, 44, 99, 176, 275])
2141
+ expect(@data_frame.vectors.to_a).to eq([:a, :b, :c])
2142
+ end
2143
+
2144
+ it "iterates over vectors and returns an Array" do
2145
+ ret = @data_frame.map do |vector|
2146
+ vector.mean
2147
+ end
2148
+ expect(ret).to eq([3.0, 13.0, 33.0])
2149
+ end
2150
+ end
2151
+
2152
+ context "#map!" do
2153
+ before do
2154
+ @ans_vector = DaruLite::DataFrame.new({b: [21,22,23,24,25], a: [11,12,13,14,15],
2155
+ c: [21,32,43,54,65]}, order: [:a, :b, :c],
2156
+ index: [:one, :two, :three, :four, :five])
2157
+
2158
+ @ans_row = DaruLite::DataFrame.new({b: [12,13,14,15,16], a: [2,3,4,5,6],
2159
+ c: [12,23,34,45,56]}, order: [:a, :b, :c],
2160
+ index: [:one, :two, :three, :four, :five])
2161
+ end
2162
+
2163
+ it "destructively maps over the vectors and changes the DF" do
2164
+ @data_frame.map! do |vector|
2165
+ vector + 10
2166
+ end
2167
+ expect(@data_frame).to eq(@ans_vector)
2168
+ end
2169
+
2170
+ it "destructively maps over the rows and changes the DF" do
2171
+ @data_frame.map!(:row) do |row|
2172
+ row + 1
2173
+ end
2174
+
2175
+ expect(@data_frame).to eq(@ans_row)
2176
+ end
2177
+ end
2178
+
2179
+ context "#map_vectors_with_index" do
2180
+ it "iterates over vectors with index and returns an Array" do
2181
+ idx = []
2182
+ ret = @data_frame.map_vectors_with_index do |vector, index|
2183
+ idx << index
2184
+ vector.recode { |e| e += 10}
2185
+ end
2186
+
2187
+ expect(ret).to eq([
2188
+ DaruLite::Vector.new([11,12,13,14,15],index: [:one, :two, :three, :four, :five]),
2189
+ DaruLite::Vector.new([21,22,23,24,25],index: [:one, :two, :three, :four, :five]),
2190
+ DaruLite::Vector.new([21,32,43,54,65],index: [:one, :two, :three, :four, :five])])
2191
+ expect(idx).to eq([:a, :b, :c])
2192
+ end
2193
+ end
2194
+
2195
+ # FIXME: collect_VECTORS_with_index, but map_VECTOR_with_index -- ??? -- zverok
2196
+ # (Not saying about unfortunate difference between them...)
2197
+ context "#collect_vector_with_index" do
2198
+ it "iterates over vectors with index and returns an Array" do
2199
+ idx = []
2200
+ ret = @data_frame.collect_vector_with_index do |vector, index|
2201
+ idx << index
2202
+ vector.sum
2203
+ end
2204
+
2205
+ expect(ret).to eq(DaruLite::Vector.new([15, 65, 165], index: [:a, :b, :c]))
2206
+ expect(idx).to eq([:a, :b, :c])
2207
+ end
2208
+ end
2209
+
2210
+ context "#map_rows_with_index" do
2211
+ it "iterates over rows with index and returns an Array" do
2212
+ idx = []
2213
+ ret = @data_frame.map_rows_with_index do |row, index|
2214
+ idx << index
2215
+ expect(row.class).to eq(DaruLite::Vector)
2216
+ row[:a] * row[:c]
2217
+ end
2218
+
2219
+ expect(ret).to eq([11, 44, 99, 176, 275])
2220
+ expect(idx).to eq([:one, :two, :three, :four, :five])
2221
+ end
2222
+ end
2223
+
2224
+ context '#collect_row_with_index' do
2225
+ it "iterates over rows with index and returns a Vector" do
2226
+ idx = []
2227
+ ret = @data_frame.collect_row_with_index do |row, index|
2228
+ idx << index
2229
+ expect(row.class).to eq(DaruLite::Vector)
2230
+ row[:a] * row[:c]
2231
+ end
2232
+
2233
+ expected = DaruLite::Vector.new([11, 44, 99, 176, 275], index: @data_frame.index)
2234
+ expect(ret).to eq(expected)
2235
+ expect(idx).to eq([:one, :two, :three, :four, :five])
2236
+ end
2237
+ end
2238
+
2239
+ context "#delete_vector" do
2240
+ context DaruLite::Index do
2241
+ it "deletes the specified vector" do
2242
+ @data_frame.delete_vector :a
2243
+
2244
+ expect(@data_frame).to eq(DaruLite::DataFrame.new({b: [11,12,13,14,15],
2245
+ c: [11,22,33,44,55]}, order: [:b, :c],
2246
+ index: [:one, :two, :three, :four, :five]))
2247
+ end
2248
+ end
2249
+ end
2250
+
2251
+ context "#delete_vectors" do
2252
+ context DaruLite::Index do
2253
+ it "deletes the specified vectors" do
2254
+ @data_frame.delete_vectors :a, :b
2255
+
2256
+ expect(@data_frame).to eq(DaruLite::DataFrame.new({
2257
+ c: [11,22,33,44,55]}, order: [:c],
2258
+ index: [:one, :two, :three, :four, :five]))
2259
+ end
2260
+ end
2261
+ end
2262
+
2263
+ context "#delete_row" do
2264
+ it "deletes the specified row" do
2265
+ @data_frame.delete_row :three
2266
+
2267
+ expect(@data_frame).to eq(DaruLite::DataFrame.new({b: [11,12,14,15], a: [1,2,4,5],
2268
+ c: [11,22,44,55]}, order: [:a, :b, :c], index: [:one, :two, :four, :five]))
2269
+ end
2270
+ end
2271
+
2272
+ context "#keep_row_if" do
2273
+ pending "changing row from under the iterator trips this"
2274
+ it "keeps row if block evaluates to true" do
2275
+ df = DaruLite::DataFrame.new({b: [10,12,20,23,30], a: [50,30,30,1,5],
2276
+ c: [10,20,30,40,50]}, order: [:a, :b, :c],
2277
+ index: [:one, :two, :three, :four, :five])
2278
+
2279
+ df.keep_row_if do |row|
2280
+ row[:a] % 10 == 0
2281
+ end
2282
+ # TODO: write expectation
2283
+ end
2284
+ end
2285
+
2286
+ context "#keep_vector_if" do
2287
+ it "keeps vector if block evaluates to true" do
2288
+ @data_frame.keep_vector_if do |vector|
2289
+ vector == [1,2,3,4,5].dv(nil, [:one, :two, :three, :four, :five])
2290
+ end
2291
+
2292
+ expect(@data_frame).to eq(DaruLite::DataFrame.new({a: [1,2,3,4,5]}, order: [:a],
2293
+ index: [:one, :two, :three, :four, :five]))
2294
+ end
2295
+ end
2296
+
2297
+ context "#filter_field" do
2298
+ before do
2299
+ @df = DaruLite::DataFrame.new({
2300
+ :id => DaruLite::Vector.new([1, 2, 3, 4, 5]),
2301
+ :name => DaruLite::Vector.new(%w(Alex Claude Peter Franz George)),
2302
+ :age => DaruLite::Vector.new([20, 23, 25, 27, 5]),
2303
+ :city => DaruLite::Vector.new(['New York', 'London', 'London', 'Paris', 'Tome']),
2304
+ :a1 => DaruLite::Vector.new(['a,b', 'b,c', 'a', nil, 'a,b,c']) },
2305
+ order: [:id, :name, :age, :city, :a1])
2306
+ end
2307
+
2308
+ it "creates new vector with the data of a given field for which block returns true" do
2309
+ filtered = @df.filter_vector(:id) { |c| c[:id] == 2 or c[:id] == 4 }
2310
+ expect(filtered).to eq(DaruLite::Vector.new([2,4]))
2311
+ end
2312
+ end
2313
+
2314
+ context "#filter_rows" do
2315
+ context DaruLite::Index do
2316
+ context "when specified no index" do
2317
+ it "filters rows" do
2318
+ df = DaruLite::DataFrame.new({a: [1,2,3], b: [2,3,4]})
2319
+
2320
+ a = df.filter_rows do |row|
2321
+ row[:a] % 2 == 0
2322
+ end
2323
+
2324
+ expect(a).to eq(DaruLite::DataFrame.new({a: [2], b: [3]}, order: [:a, :b], index: [1]))
2325
+ end
2326
+ end
2327
+
2328
+ context "when specified numerical index" do
2329
+ it "filters rows" do
2330
+ df = DaruLite::DataFrame.new({a: [1,2,3], b: [2,3,4]}, index: [1,2,3])
2331
+
2332
+ a = df.filter_rows do |row|
2333
+ row[:a] % 2 == 0
2334
+ end
2335
+
2336
+ expect(a).to eq(DaruLite::DataFrame.new({a: [2], b: [3]}, order: [:a, :b], index: [2]))
2337
+ end
2338
+ end
2339
+
2340
+ it "preserves names of vectors" do
2341
+ df = DaruLite::DataFrame.new a: 1..3, b: 4..6
2342
+ df1 = df.filter_rows { |r| r[:a] != 2 }
2343
+
2344
+ expect(df1[:a].name).to eq(df[:a].name)
2345
+ end
2346
+ end
2347
+ end
2348
+
2349
+ context "#filter_vectors" do
2350
+ context DaruLite::Index do
2351
+ it "filters vectors" do
2352
+ df = DaruLite::DataFrame.new({a: [1,2,3], b: [2,3,4]})
2353
+
2354
+ a = df.filter_vectors do |vector|
2355
+ vector[0] == 1
2356
+ end
2357
+
2358
+ expect(a).to eq(DaruLite::DataFrame.new({a: [1,2,3]}))
2359
+ end
2360
+ end
2361
+ end
2362
+
2363
+ context "#filter" do
2364
+ let(:df) { DaruLite::DataFrame.new({a: [1,2,3], b: [2,3,4]}) }
2365
+ it "dispatches" do
2366
+ expect(df.filter(:row){|r| r[:a] % 2 == 0 }).to \
2367
+ eq df.filter_rows{|r| r[:a] % 2 == 0 }
2368
+
2369
+ expect(df.filter(:vector){|v| v[0] == 1}).to \
2370
+ eq df.filter_vectors{|v| v[0] == 1}
2371
+
2372
+ expect { df.filter(:kitten){} }.to raise_error ArgumentError, /axis/
2373
+ end
2374
+ end
2375
+
2376
+ context "#to_a" do
2377
+ context DaruLite::Index do
2378
+ it "converts DataFrame into array of hashes" do
2379
+ arry = @data_frame.to_a
2380
+
2381
+ expect(arry).to eq(
2382
+ [
2383
+ [
2384
+ {a: 1, b: 11, c: 11},
2385
+ {a: 2, b: 12, c: 22},
2386
+ {a: 3, b: 13, c: 33},
2387
+ {a: 4, b: 14, c: 44},
2388
+ {a: 5, b: 15, c: 55}
2389
+ ],
2390
+ [
2391
+ :one, :two, :three, :four, :five
2392
+ ]
2393
+ ])
2394
+ end
2395
+ end
2396
+
2397
+ context DaruLite::MultiIndex do
2398
+ pending
2399
+ end
2400
+ end
2401
+
2402
+ context "#to_h" do
2403
+ it "converts to a hash" do
2404
+ expect(@data_frame.to_h).to eq(
2405
+ {
2406
+ a: DaruLite::Vector.new([1,2,3,4,5],
2407
+ index: [:one, :two, :three, :four, :five]),
2408
+ b: DaruLite::Vector.new([11,12,13,14,15],
2409
+ index: [:one, :two, :three, :four, :five]),
2410
+ c: DaruLite::Vector.new([11,22,33,44,55],
2411
+ index: [:one, :two, :three, :four, :five])
2412
+ }
2413
+ )
2414
+ end
2415
+ end
2416
+
2417
+ context "#sort" do
2418
+ context DaruLite::Index do
2419
+ before :each do
2420
+ @df = DaruLite::DataFrame.new({a: [5,1,-6,7,5,5], b: [-2,-1,5,3,9,1], c: ['a','aa','aaa','aaaa','aaaaa','aaaaaa']})
2421
+ end
2422
+
2423
+ it "sorts according to given vector order (bang)" do
2424
+ a_sorter = lambda { |a| a }
2425
+ ans = @df.sort([:a], by: { a: a_sorter })
2426
+
2427
+ expect(ans).to eq(
2428
+ DaruLite::DataFrame.new({a: [-6,1,5,5,5,7], b: [5,-1,-2,9,1,3], c: ['aaa','aa','a','aaaaa','aaaaaa','aaaa']},
2429
+ index: [2,1,0,4,5,3])
2430
+ )
2431
+ expect(ans).to_not eq(@df)
2432
+ end
2433
+
2434
+ it "sorts according to vector order using default lambdas (index re ordered according to the last vector) (bang)" do
2435
+ ans = @df.sort([:a, :b])
2436
+ expect(ans).to eq(
2437
+ DaruLite::DataFrame.new({a: [-6,1,5,5,5,7], b: [5,-1,-2,1,9,3], c: ['aaa','aa','a','aaaaaa','aaaaa','aaaa']},
2438
+ index: [2,1,0,5,4,3])
2439
+ )
2440
+ expect(ans).to_not eq(@df)
2441
+ end
2442
+ end
2443
+
2444
+ context DaruLite::MultiIndex do
2445
+ pending
2446
+ end
2447
+
2448
+ context DaruLite::CategoricalIndex do
2449
+ let(:idx) { DaruLite::CategoricalIndex.new [:a, 1, :a, 1, :c] }
2450
+ let(:df) do
2451
+ DaruLite::DataFrame.new({
2452
+ a: [2, -1, 3, 4, 5],
2453
+ b: ['x', 'y', 'x', 'a', 'y'],
2454
+ c: [nil, nil, -2, 2, 1]
2455
+ }, index: idx)
2456
+ end
2457
+
2458
+ context "ascending order" do
2459
+ context "single vector" do
2460
+ subject { df.sort [:a] }
2461
+
2462
+ its(:'index.to_a') { is_expected.to eq [1, :a, :a, 1, :c] }
2463
+ its(:'a.to_a') { is_expected.to eq [-1, 2, 3, 4, 5] }
2464
+ its(:'b.to_a') { is_expected.to eq ['y', 'x', 'x', 'a', 'y'] }
2465
+ its(:'c.to_a') { is_expected.to eq [nil, nil, -2, 2, 1] }
2466
+ end
2467
+
2468
+ context "multiple vectors" do
2469
+ subject { df.sort [:c, :b] }
2470
+
2471
+ its(:'index.to_a') { is_expected.to eq [:a, 1, :a, :c, 1] }
2472
+ its(:'a.to_a') { is_expected.to eq [2, -1, 3, 5, 4] }
2473
+ its(:'b.to_a') { is_expected.to eq ['x', 'y', 'x', 'y', 'a'] }
2474
+ its(:'c.to_a') { is_expected.to eq [nil, nil, -2, 1, 2] }
2475
+ end
2476
+
2477
+ context "block" do
2478
+ context "automatic handle nils" do
2479
+ subject do
2480
+ df.sort [:c], by: {c: lambda { |a| a.abs } }, handle_nils: true
2481
+ end
2482
+
2483
+ its(:'index.to_a') { is_expected.to eq [:a, 1, :c, :a, 1] }
2484
+ its(:'a.to_a') { is_expected.to eq [2, -1, 5, 3, 4] }
2485
+ its(:'b.to_a') { is_expected.to eq ['x', 'y', 'y', 'x', 'a'] }
2486
+ its(:'c.to_a') { is_expected.to eq [nil, nil, 1, -2, 2] }
2487
+ end
2488
+
2489
+ context "manually handle nils" do
2490
+ subject do
2491
+ df.sort [:c], by: {c: lambda { |a| (a.nil?)?[1]:[0,a.abs] } }
2492
+ end
2493
+
2494
+ its(:'index.to_a') { is_expected.to eq [:c, :a, 1, :a, 1] }
2495
+ its(:'a.to_a') { is_expected.to eq [5, 3, 4, 2, -1] }
2496
+ its(:'b.to_a') { is_expected.to eq ['y', 'x', 'a', 'x', 'y'] }
2497
+ its(:'c.to_a') { is_expected.to eq [1, -2, 2, nil, nil] }
2498
+ end
2499
+ end
2500
+ end
2501
+
2502
+ context "descending order" do
2503
+ context "single vector" do
2504
+ subject { df.sort [:a], ascending: false }
2505
+
2506
+ its(:'index.to_a') { is_expected.to eq [:c, 1, :a, :a, 1] }
2507
+ its(:'a.to_a') { is_expected.to eq [5, 4, 3, 2, -1] }
2508
+ its(:'b.to_a') { is_expected.to eq ['y', 'a', 'x', 'x', 'y'] }
2509
+ its(:'c.to_a') { is_expected.to eq [1, 2, -2, nil, nil] }
2510
+ end
2511
+
2512
+ context "multiple vectors" do
2513
+ subject { df.sort [:c, :b], ascending: false }
2514
+
2515
+ its(:'index.to_a') { is_expected.to eq [1, :a, 1, :c, :a] }
2516
+ its(:'a.to_a') { is_expected.to eq [-1, 2, 4, 5, 3] }
2517
+ its(:'b.to_a') { is_expected.to eq ['y', 'x', 'a', 'y', 'x'] }
2518
+ its(:'c.to_a') { is_expected.to eq [nil, nil, 2, 1, -2] }
2519
+ end
2520
+
2521
+ context "block" do
2522
+ context "automatic handle nils" do
2523
+ subject do
2524
+ df.sort [:c],
2525
+ by: {c: lambda { |a| a.abs } },
2526
+ handle_nils: true,
2527
+ ascending: false
2528
+ end
2529
+
2530
+ its(:'index.to_a') { is_expected.to eq [:a, 1, :a, 1, :c] }
2531
+ its(:'a.to_a') { is_expected.to eq [2, -1, 3, 4, 5] }
2532
+ its(:'b.to_a') { is_expected.to eq ['x', 'y', 'x', 'a', 'y'] }
2533
+ its(:'c.to_a') { is_expected.to eq [nil, nil, -2, 2, 1] }
2534
+ end
2535
+
2536
+ context "manually handle nils" do
2537
+ subject do
2538
+ df.sort [:c],
2539
+ by: {c: lambda { |a| (a.nil?)?[1]:[0,a.abs] } },
2540
+ ascending: false
2541
+ end
2542
+
2543
+ its(:'index.to_a') { is_expected.to eq [:a, 1, :a, 1, :c] }
2544
+ its(:'a.to_a') { is_expected.to eq [2, -1, 3, 4, 5] }
2545
+ its(:'b.to_a') { is_expected.to eq ['x', 'y', 'x', 'a', 'y'] }
2546
+ its(:'c.to_a') { is_expected.to eq [nil, nil, -2, 2, 1] }
2547
+ end
2548
+ end
2549
+ end
2550
+ end
2551
+ end
2552
+
2553
+ context "#sort!" do
2554
+ context DaruLite::Index do
2555
+ before :each do
2556
+ @df = DaruLite::DataFrame.new({a: [5,1,-6,7,5,5], b: [-2,-1,5,3,9,1],
2557
+ c: ['a','aa','aaa','aaaa','aaaaa','aaaaaa']})
2558
+ end
2559
+
2560
+ it "sorts according to given vector order (bang)" do
2561
+ a_sorter = lambda { |a| a }
2562
+
2563
+ expect(@df.sort!([:a], by: { a: a_sorter })).to eq(
2564
+ DaruLite::DataFrame.new({a: [-6,1,5,5,5,7], b: [5,-1,-2,9,1,3],
2565
+ c: ['aaa','aa','a','aaaaa','aaaaaa','aaaa']}, index: [2,1,0,4,5,3])
2566
+ )
2567
+ end
2568
+
2569
+ it "sorts according to vector order using default lambdas (index re ordered according to the last vector) (bang)" do
2570
+ expect(@df.sort!([:a, :b])).to eq(
2571
+ DaruLite::DataFrame.new({a: [-6,1,5,5,5,7], b: [5,-1,-2,1,9,3], c: ['aaa','aa','a','aaaaaa','aaaaa','aaaa']},
2572
+ index: [2,1,0,5,4,3])
2573
+ )
2574
+ end
2575
+
2576
+ it "sorts both vectors in descending order" do
2577
+ expect(@df.sort!([:a,:b], ascending: [false, false])).to eq(
2578
+ DaruLite::DataFrame.new({a: [7,5,5,5,1,-6], b: [3,9,1,-2,-1,5], c: ['aaaa','aaaaa','aaaaaa', 'a','aa', 'aaa'] },
2579
+ index: [3,4,5,0,1,2])
2580
+ )
2581
+ end
2582
+
2583
+ it "sorts one vector in desc and other is asc" do
2584
+ expect(@df.sort!([:a, :b], ascending: [false, true])).to eq(
2585
+ DaruLite::DataFrame.new({a: [7,5,5,5,1,-6], b: [3,-2,1,9,-1,5], c: ['aaaa','a','aaaaaa','aaaaa','aa','aaa']},
2586
+ index: [3,0,5,4,1,2])
2587
+ )
2588
+ end
2589
+
2590
+ it "sorts many vectors" do
2591
+ d = DaruLite::DataFrame.new({a: [1,1,1,222,44,5,5,544], b: [44,44,333,222,111,554,22,3], c: [3,2,5,3,3,1,5,5]})
2592
+
2593
+ expect(d.sort!([:a, :b, :c], ascending: [false, true, false])).to eq(
2594
+ DaruLite::DataFrame.new({a: [544,222,44,5,5,1,1,1], b: [3,222,111,22,554,44,44,333], c: [5,3,3,5,1,3,2,5]},
2595
+ index: [7,3,4,6,5,0,1,2])
2596
+ )
2597
+ end
2598
+
2599
+ it "places nils at the beginning when sorting ascedingly" do
2600
+ d = DaruLite::DataFrame.new({a: [1,1,1,nil,44,5,5,nil], b: [44,44,333,222,111,554,22,3], c: [3,2,5,3,3,1,5,5]})
2601
+
2602
+ expect(d.sort!([:a, :b, :c], ascending: [true, true, false])).to eq(
2603
+ DaruLite::DataFrame.new({a: [nil,nil,1,1,1,5,5,44], b: [3,222,44,44,333,22,554,111], c: [5,3,3,2,5,5,1,3]},
2604
+ index: [7,3,0,1,2,6,5,4])
2605
+ )
2606
+ end
2607
+
2608
+ it "places nils at the beginning when sorting decendingly" do
2609
+ d = DaruLite::DataFrame.new({a: [1,1,1,nil,44,5,5,nil], b: [44,44,333,222,111,554,22,3], c: [3,2,5,3,3,1,5,5]})
2610
+
2611
+ expect(d.sort!([:a, :b, :c], ascending: [false, true, false])).to eq(
2612
+ DaruLite::DataFrame.new({a: [nil,nil,44,5,5,1,1,1], b: [3,222,111,22,554,44,44,333], c: [5,3,3,5,1,3,2,5]},
2613
+ index: [7,3,4,6,5,0,1,2])
2614
+ )
2615
+ end
2616
+
2617
+ it "sorts vectors of non-numeric types with nils in ascending order" do
2618
+ non_numeric = DaruLite::DataFrame.new({a: [5,1,-6,7,5,5], b: [nil,-1,1,nil,-1,1],
2619
+ c: ['aaa','aaa',nil,'baaa','xxx',nil]})
2620
+
2621
+ expect(non_numeric.sort!([:c], ascending: [true])).to eq(
2622
+ DaruLite::DataFrame.new({a: [-6, 5, 5, 1, 7, 5], b: [1, 1, nil, -1, nil, -1],
2623
+ c: [nil, nil, "aaa", "aaa", "baaa", "xxx"]},
2624
+ index: [2, 5, 0, 1, 3, 4])
2625
+ )
2626
+ end
2627
+
2628
+ it "sorts vectors of non-numeric types with nils in descending order" do
2629
+ non_numeric = DaruLite::DataFrame.new({a: [5,1,-6,7,5,5], b: [nil,-1,1,nil,-1,1],
2630
+ c: ['aaa','aaa',nil,'baaa','xxx',nil]})
2631
+
2632
+ expect(non_numeric.sort!([:c], ascending: [false])).to eq(
2633
+ DaruLite::DataFrame.new({a: [-6, 5, 5, 7, 5, 1], b: [1, 1, -1, nil, nil, -1],
2634
+ c: [nil, nil, "xxx", "baaa", "aaa", "aaa"]},
2635
+ index: [2, 5, 4, 3, 0, 1])
2636
+ )
2637
+ end
2638
+
2639
+ it "sorts vectors with block provided and handle nils automatically" do
2640
+ non_numeric = DaruLite::DataFrame.new({a: [5,1,-6,7,5,5], b: [nil,-1,1,nil,-1,1],
2641
+ c: ['aaa','aaa',nil,'baaa','xxx',nil]})
2642
+
2643
+ expect(non_numeric.sort!([:b], by: {b: lambda { |a| a.abs } }, handle_nils: true)).to eq(
2644
+ DaruLite::DataFrame.new({a: [5, 7, 1, -6, 5, 5], b: [nil, nil, -1, 1, -1, 1],
2645
+ c: ["aaa", "baaa", "aaa", nil, "xxx", nil]},
2646
+ index: [0, 3, 1, 2, 4, 5])
2647
+ )
2648
+ end
2649
+
2650
+ it "sorts vectors with block provided and nils handled manually" do
2651
+ non_numeric = DaruLite::DataFrame.new({a: [5,1,-6,7,5,5], b: [nil,-1,1,nil,-1,1],
2652
+ c: ['aaa','aaa',nil,'baaa','xxx',nil]})
2653
+
2654
+ expect(non_numeric.sort!([:b], by: {b: lambda { |a| (a.nil?)?[1]:[0, a.abs]} }, handle_nils: false)).to eq(
2655
+ DaruLite::DataFrame.new({a: [1, -6, 5, 5, 5, 7], b: [-1, 1, -1, 1, nil, nil],
2656
+ c: ["aaa", nil, "xxx", nil, "aaa", "baaa"]},
2657
+ index: [1, 2, 4, 5, 0, 3])
2658
+ )
2659
+ end
2660
+ end
2661
+
2662
+ context DaruLite::MultiIndex do
2663
+ pending
2664
+ it "sorts the DataFrame when specified full tuple" do
2665
+ @df_mi.sort([[:a,:one,:bar]])
2666
+ end
2667
+ end
2668
+ end
2669
+
2670
+ context "#index=" do
2671
+ before :each do
2672
+ @df = DaruLite::DataFrame.new({
2673
+ a: [1,2,3,4,5],
2674
+ b: [11,22,33,44,55],
2675
+ c: %w(a b c d e)
2676
+ })
2677
+ end
2678
+
2679
+ it "simply reassigns the index" do
2680
+ @df.index = DaruLite::Index.new(['4','foo', :bar, 0, 23])
2681
+ expect(@df.row['foo']).to eq(DaruLite::Vector.new([2,22,'b'], index: [:a,:b,:c]))
2682
+ end
2683
+
2684
+ it "raises error for improper length index" do
2685
+ expect {
2686
+ @df.index = DaruLite::Index.new([1,2])
2687
+ }.to raise_error(ArgumentError)
2688
+ end
2689
+
2690
+ it "is able to accept array" do
2691
+ @df.index = (1..5).to_a
2692
+ expect(@df.index).to eq DaruLite::Index.new (1..5).to_a
2693
+ end
2694
+ end
2695
+
2696
+ context '#order=' do
2697
+ let(:df) do
2698
+ DaruLite::DataFrame.new({
2699
+ a: [1, 2, 3],
2700
+ b: [4, 5, 6]
2701
+ }, order: [:a, :b])
2702
+ end
2703
+
2704
+ context 'correct order' do
2705
+ before { df.order = [:b, :a] }
2706
+ subject { df }
2707
+
2708
+ its(:'vectors.to_a') { is_expected.to eq [:b, :a] }
2709
+ its(:'b.to_a') { is_expected.to eq [4, 5, 6] }
2710
+ its(:'a.to_a') { is_expected.to eq [1, 2, 3] }
2711
+ end
2712
+
2713
+ context 'insufficient vectors' do
2714
+ it { expect { df.order = [:a] }.to raise_error }
2715
+ end
2716
+
2717
+ context 'wrong vectors' do
2718
+ it { expect { df.order = [:a, :b, 'b'] }.to raise_error }
2719
+ end
2720
+ end
2721
+
2722
+ context "#vectors=" do
2723
+ before :each do
2724
+ @df = DaruLite::DataFrame.new({
2725
+ a: [1,2,3,4,5],
2726
+ b: [11,22,33,44,55],
2727
+ c: %w(a b c d e)
2728
+ })
2729
+ end
2730
+
2731
+ it "simply reassigns vectors" do
2732
+ @df.vectors = DaruLite::Index.new(['b',0,'m'])
2733
+
2734
+ expect(@df.vectors).to eq(DaruLite::Index.new(['b',0,'m']))
2735
+ expect(@df['b']).to eq(DaruLite::Vector.new([1,2,3,4,5]))
2736
+ expect(@df[0]).to eq(DaruLite::Vector.new([11,22,33,44,55]))
2737
+ expect(@df['m']).to eq(DaruLite::Vector.new(%w(a b c d e)))
2738
+ end
2739
+
2740
+ it "raises error for improper length index" do
2741
+ expect {
2742
+ @df.vectors = DaruLite::Index.new([1,2,'3',4,'5'])
2743
+ }.to raise_error(ArgumentError)
2744
+ end
2745
+
2746
+ it "change name of vectors in @data" do
2747
+ new_index_array = [:k, :l, :m]
2748
+ @df.vectors = DaruLite::Index.new(new_index_array)
2749
+
2750
+ expect(@df.data.map { |vector| vector.name }).to eq(new_index_array)
2751
+ end
2752
+ end
2753
+
2754
+ context "#rename_vectors!" do
2755
+ before do
2756
+ @df = DaruLite::DataFrame.new({
2757
+ a: [1,2,3,4,5],
2758
+ b: [11,22,33,44,55],
2759
+ c: %w(a b c d e)
2760
+ })
2761
+ end
2762
+
2763
+ it "returns self as modified dataframe" do
2764
+ expect(@df.rename_vectors!(:a => :alpha)).to eq(@df)
2765
+ end
2766
+
2767
+ it "re-uses rename_vectors method" do
2768
+ name_map = { :a => :alpha, :c => :gamma }
2769
+ expect(@df).to receive(:rename_vectors).with(name_map)
2770
+ @df.rename_vectors! name_map
2771
+ end
2772
+ end
2773
+
2774
+ context "#rename_vectors" do
2775
+ before do
2776
+ @df = DaruLite::DataFrame.new({
2777
+ a: [1,2,3,4,5],
2778
+ b: [11,22,33,44,55],
2779
+ c: %w(a b c d e)
2780
+ })
2781
+ end
2782
+
2783
+ it "returns DaruLite::Index" do
2784
+ expect(@df.rename_vectors(:a => :alpha)).to be_kind_of(DaruLite::Index)
2785
+ end
2786
+
2787
+ it "renames vectors using a hash map" do
2788
+ @df.rename_vectors :a => :alpha, :c => :gamma
2789
+ expect(@df.vectors.to_a).to eq([:alpha, :b, :gamma])
2790
+ end
2791
+
2792
+ it "overwrites vectors if the new name already exists" do
2793
+ saved_vector = @df[:a].dup
2794
+
2795
+ @df.rename_vectors :a => :b
2796
+ expect(@df.vectors.to_a).to eq([:b, :c])
2797
+ expect(@df[:b]).to eq saved_vector
2798
+ end
2799
+
2800
+ it "makes no changes if the old and new names are the same" do
2801
+ saved_vector = @df[:a].dup
2802
+
2803
+ @df.rename_vectors :a => :a
2804
+ expect(@df.vectors.to_a).to eq([:a, :b, :c])
2805
+ expect(@df[:a]).to eq saved_vector
2806
+ end
2807
+ end
2808
+
2809
+ context "#add_level_to_vectors" do
2810
+ subject { df.add_level_to_vectors(top_level_label) }
2811
+
2812
+ let(:df) do
2813
+ DaruLite::DataFrame.new({
2814
+ a: [1, 2, 3, 4, 5],
2815
+ b: [11, 22, 33, 44, 55],
2816
+ c: %w(a b c d e)
2817
+ })
2818
+ end
2819
+ let(:top_level_label) { :percentages }
2820
+ let(:expected_index) do
2821
+ DaruLite::MultiIndex.from_tuples([
2822
+ [:percentages, :a], [:percentages, :b],[:percentages, :c],
2823
+ ])
2824
+ end
2825
+
2826
+ it 'returns expected Multi::Index' do
2827
+ expect(subject).to eq(expected_index)
2828
+ end
2829
+
2830
+ it 'updates dataframe vectors to the expected Multi::Index' do
2831
+ expect { subject }.to change { df.vectors }.to(expected_index)
2832
+ end
2833
+ end
2834
+
2835
+ context "#reindex" do
2836
+ it "re indexes and aligns accordingly" do
2837
+ df = DaruLite::DataFrame.new({
2838
+ a: [1,2,3,4,5],
2839
+ b: [11,22,33,44,55],
2840
+ c: %w(a b c d e)
2841
+ })
2842
+
2843
+ ans = df.reindex(DaruLite::Index.new([1,3,0,8,2]))
2844
+ expect(ans).to eq(DaruLite::DataFrame.new({
2845
+ a: [2,4,1,nil,3],
2846
+ b: [22,44,11,nil,33],
2847
+ c: ['b','d','a',nil,'c']
2848
+ }, index: DaruLite::Index.new([1,3,0,8,2])))
2849
+ expect(ans).to_not eq(df)
2850
+ end
2851
+ end
2852
+
2853
+ context "#reindex_vectors" do
2854
+ it "re indexes vectors and aligns accordingly" do
2855
+ df = DaruLite::DataFrame.new({
2856
+ a: [1,2,3,4,5],
2857
+ b: [11,22,33,44,55],
2858
+ c: %w(a b c d e)
2859
+ })
2860
+
2861
+ ans = df.reindex_vectors(DaruLite::Index.new([:b, 'a', :a]))
2862
+ expect(ans).to eq(DaruLite::DataFrame.new({
2863
+ :b => [11,22,33,44,55],
2864
+ 'a' => [nil, nil, nil, nil, nil],
2865
+ :a => [1,2,3,4,5]
2866
+ }, order: [:b, 'a', :a]))
2867
+ end
2868
+
2869
+ it 'raises ArgumentError if argument was not an index' do
2870
+ df = DaruLite::DataFrame.new([])
2871
+ expect { df.reindex_vectors([]) }.to raise_error(ArgumentError)
2872
+ end
2873
+ end
2874
+
2875
+ context "#to_matrix" do
2876
+ before do
2877
+ @df = DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
2878
+ c: [11,22,33,44,55], d: [5,4,nil,2,1], e: ['this', 'has', 'string','data','too']},
2879
+ order: [:a, :b, :c,:d,:e],
2880
+ index: [:one, :two, :three, :four, :five])
2881
+ end
2882
+
2883
+ it "concats numeric non-nil vectors to Matrix" do
2884
+ expect(@df.to_matrix).to eq(Matrix[
2885
+ [1,11,11,5],
2886
+ [2,12,22,4],
2887
+ [3,13,33,nil],
2888
+ [4,14,44,2],
2889
+ [5,15,55,1]
2890
+ ])
2891
+ end
2892
+ end
2893
+
2894
+ context "#transpose" do
2895
+ context DaruLite::Index do
2896
+ it "transposes a DataFrame including row and column indexing" do
2897
+ expect(@data_frame.transpose).to eq(DaruLite::DataFrame.new({
2898
+ one: [1,11,11],
2899
+ two: [2,12,22],
2900
+ three: [3,13,33],
2901
+ four: [4,14,44],
2902
+ five: [5,15,55]
2903
+ }, index: [:a, :b, :c],
2904
+ order: [:one, :two, :three, :four, :five])
2905
+ )
2906
+ end
2907
+ end
2908
+
2909
+ context DaruLite::MultiIndex do
2910
+ it "transposes a DataFrame including row and column indexing" do
2911
+ expect(@df_mi.transpose).to eq(DaruLite::DataFrame.new([
2912
+ @vector_arry1,
2913
+ @vector_arry2,
2914
+ @vector_arry1,
2915
+ @vector_arry2].transpose, index: @order_mi, order: @multi_index))
2916
+ end
2917
+ end
2918
+ end
2919
+
2920
+ context "#pivot_table" do
2921
+ before do
2922
+ @df = DaruLite::DataFrame.new({
2923
+ a: ['foo' , 'foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar'],
2924
+ b: ['one' , 'one', 'one', 'two', 'two', 'one', 'one', 'two', 'two'],
2925
+ c: ['small','large','large','small','small','large','small','large','small'],
2926
+ d: [1,2,2,3,3,4,5,6,7],
2927
+ e: [2,4,4,6,6,8,10,12,14]
2928
+ })
2929
+ end
2930
+
2931
+ it "creates row index as per (single) index argument and default aggregates to mean" do
2932
+ expect(@df.pivot_table(index: [:a])).to eq(DaruLite::DataFrame.new({
2933
+ d: [5.5,2.2],
2934
+ e: [11.0,4.4]
2935
+ }, index: ['bar', 'foo']))
2936
+ end
2937
+
2938
+ it "creates row index as per (double) index argument and default aggregates to mean" do
2939
+ agg_mi = DaruLite::MultiIndex.from_tuples(
2940
+ [
2941
+ ['bar', 'large'],
2942
+ ['bar', 'small'],
2943
+ ['foo', 'large'],
2944
+ ['foo', 'small']
2945
+ ]
2946
+ )
2947
+ expect(@df.pivot_table(index: [:a, :c]).round(2)).to eq(DaruLite::DataFrame.new({
2948
+ d: [5.0 , 6.0, 2.0, 2.33],
2949
+ e: [10.0, 12.0, 4.0, 4.67]
2950
+ }, index: agg_mi))
2951
+ end
2952
+
2953
+ it "creates row and vector index as per (single) index and (single) vectors args" do
2954
+ agg_vectors = DaruLite::MultiIndex.from_tuples([
2955
+ [:d, 'one'],
2956
+ [:d, 'two'],
2957
+ [:e, 'one'],
2958
+ [:e, 'two']
2959
+ ])
2960
+ agg_index = DaruLite::MultiIndex.from_tuples(
2961
+ [
2962
+ ['bar'],
2963
+ ['foo']
2964
+ ]
2965
+ )
2966
+
2967
+ expect(@df.pivot_table(index: [:a], vectors: [:b]).round(2)).to eq(
2968
+ DaruLite::DataFrame.new(
2969
+ [
2970
+ [4.5, 1.67],
2971
+ [6.5, 3.0],
2972
+ [9.0, 3.33],
2973
+ [13, 6]
2974
+ ], order: agg_vectors, index: agg_index)
2975
+ )
2976
+ end
2977
+
2978
+ it "creates row and vector index as per (single) index and (double) vector args" do
2979
+ agg_vectors = DaruLite::MultiIndex.from_tuples(
2980
+ [
2981
+ [:d, 'one', 'large'],
2982
+ [:d, 'one', 'small'],
2983
+ [:d, 'two', 'large'],
2984
+ [:d, 'two', 'small'],
2985
+ [:e, 'one', 'large'],
2986
+ [:e, 'one', 'small'],
2987
+ [:e, 'two', 'large'],
2988
+ [:e, 'two', 'small']
2989
+ ]
2990
+ )
2991
+
2992
+ agg_index = DaruLite::MultiIndex.from_tuples(
2993
+ [
2994
+ ['bar'],
2995
+ ['foo']
2996
+ ]
2997
+ )
2998
+
2999
+ expect(@df.pivot_table(index: [:a], vectors: [:b, :c])).to eq(DaruLite::DataFrame.new(
3000
+ [
3001
+ [4.0,2.0],
3002
+ [5.0,1.0],
3003
+ [6.0,nil],
3004
+ [7.0,3.0],
3005
+ [8.0,4.0],
3006
+ [10.0,2.0],
3007
+ [12.0,nil],
3008
+ [14.0,6.0]
3009
+ ], order: agg_vectors, index: agg_index
3010
+ ))
3011
+ end
3012
+
3013
+ it "creates row and vector index with (double) index and (double) vector args" do
3014
+ agg_index = DaruLite::MultiIndex.from_tuples([
3015
+ ['bar', 4],
3016
+ ['bar', 5],
3017
+ ['bar', 6],
3018
+ ['bar', 7],
3019
+ ['foo', 1],
3020
+ ['foo', 2],
3021
+ ['foo', 3]
3022
+ ])
3023
+
3024
+ agg_vectors = DaruLite::MultiIndex.from_tuples([
3025
+ [:e, 'one', 'large'],
3026
+ [:e, 'one', 'small'],
3027
+ [:e, 'two', 'large'],
3028
+ [:e, 'two', 'small']
3029
+ ])
3030
+
3031
+ expect(@df.pivot_table(index: [:a, :d], vectors: [:b, :c])).to eq(
3032
+ DaruLite::DataFrame.new(
3033
+ [
3034
+ [8 ,nil,nil,nil,nil, 4,nil],
3035
+ [nil, 10,nil,nil, 2,nil,nil],
3036
+ [nil,nil, 12,nil,nil,nil,nil],
3037
+ [nil,nil,nil, 14,nil,nil, 6],
3038
+ ], index: agg_index, order: agg_vectors)
3039
+ )
3040
+ end
3041
+
3042
+ it "only aggregates over the vector specified in the values argument" do
3043
+ agg_vectors = DaruLite::MultiIndex.from_tuples(
3044
+ [
3045
+ [:e, 'one', 'large'],
3046
+ [:e, 'one', 'small'],
3047
+ [:e, 'two', 'large'],
3048
+ [:e, 'two', 'small']
3049
+ ]
3050
+ )
3051
+ agg_index = DaruLite::MultiIndex.from_tuples(
3052
+ [
3053
+ ['bar'],
3054
+ ['foo']
3055
+ ]
3056
+ )
3057
+ expect(@df.pivot_table(index: [:a], vectors: [:b, :c], values: :e)).to eq(
3058
+ DaruLite::DataFrame.new(
3059
+ [
3060
+ [8, 4],
3061
+ [10, 2],
3062
+ [12,nil],
3063
+ [14, 6]
3064
+ ], order: agg_vectors, index: agg_index
3065
+ )
3066
+ )
3067
+
3068
+ agg_vectors = DaruLite::MultiIndex.from_tuples(
3069
+ [
3070
+ [:d, 'one'],
3071
+ [:d, 'two'],
3072
+ [:e, 'one'],
3073
+ [:e, 'two']
3074
+ ]
3075
+ )
3076
+ expect(@df.pivot_table(index: [:a], vectors: [:b], values: [:d, :e])).to eq(
3077
+ DaruLite::DataFrame.new(
3078
+ [
3079
+ [4.5, 5.0/3],
3080
+ [6.5, 3.0],
3081
+ [9.0, 10.0/3],
3082
+ [13.0, 6.0]
3083
+ ], order: agg_vectors, index: agg_index
3084
+ )
3085
+ )
3086
+ end
3087
+
3088
+ it "overrides default aggregate function to aggregate over sum" do
3089
+ agg_vectors = DaruLite::MultiIndex.from_tuples(
3090
+ [
3091
+ [:e, 'one', 'large'],
3092
+ [:e, 'one', 'small'],
3093
+ [:e, 'two', 'large'],
3094
+ [:e, 'two', 'small']
3095
+ ]
3096
+ )
3097
+ agg_index = DaruLite::MultiIndex.from_tuples(
3098
+ [
3099
+ ['bar'],
3100
+ ['foo']
3101
+ ]
3102
+ )
3103
+ expect(@df.pivot_table(index: [:a], vectors: [:b, :c], values: :e, agg: :sum)).to eq(
3104
+ DaruLite::DataFrame.new(
3105
+ [
3106
+ [8, 8],
3107
+ [10, 2],
3108
+ [12,nil],
3109
+ [14, 12]
3110
+ ], order: agg_vectors, index: agg_index
3111
+ )
3112
+ )
3113
+ end
3114
+
3115
+ it "raises error if no non-numeric vectors are present" do
3116
+ df = DaruLite::DataFrame.new({a: ['a', 'b', 'c'], b: ['b', 'e', 'd']})
3117
+ expect {
3118
+ df.pivot_table(index: [:a])
3119
+ }.to raise_error
3120
+ end
3121
+
3122
+ it "raises error if atleast a row index is not specified" do
3123
+ expect {
3124
+ @df.pivot_table
3125
+ }.to raise_error
3126
+ end
3127
+
3128
+ it "aggregates when nils are present in value vector" do
3129
+ df = DaruLite::DataFrame.new({
3130
+ a: ['foo' , 'foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'ice'],
3131
+ b: ['one' , 'one', 'one', 'two', 'two', 'one', 'one', 'two', 'two'],
3132
+ c: ['small','large','large','small','small','large','small','large','small'],
3133
+ d: [1,2,2,3,3,4,5,6,7],
3134
+ e: [2,nil,4,6,6,8,10,12,nil]
3135
+ })
3136
+
3137
+ expect(df.pivot_table index: [:a]).to eq(
3138
+ DaruLite::DataFrame.new({
3139
+ d: [5.0, 2.2, 7],
3140
+ e: [10.0, 4.5, nil]
3141
+ }, index: DaruLite::Index.new(['bar', 'foo', 'ice'])))
3142
+ end
3143
+
3144
+ it "works when nils are present in value vector" do
3145
+ df = DaruLite::DataFrame.new({
3146
+ a: ['foo' , 'foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'ice'],
3147
+ b: ['one' , 'one', 'one', 'two', 'two', 'one', 'one', 'two', 'two'],
3148
+ c: ['small','large','large','small','small','large','small','large','small'],
3149
+ d: [1,2,2,3,3,4,5,6,7],
3150
+ e: [2,nil,4,6,6,8,10,12,nil]
3151
+ })
3152
+
3153
+ agg_vectors = DaruLite::MultiIndex.from_tuples(
3154
+ [
3155
+ [:e, 'one'],
3156
+ [:e, 'two']
3157
+ ]
3158
+ )
3159
+
3160
+ agg_index = DaruLite::MultiIndex.from_tuples(
3161
+ [
3162
+ ['bar'],
3163
+ ['foo'],
3164
+ ['ice']
3165
+ ]
3166
+ )
3167
+
3168
+ expect(df.pivot_table index: [:a], vectors: [:b], values: :e).to eq(
3169
+ DaruLite::DataFrame.new(
3170
+ [
3171
+ [9, 3, nil],
3172
+ [12, 6, nil]
3173
+ ], order: agg_vectors, index: agg_index
3174
+ )
3175
+ )
3176
+ end
3177
+
3178
+ it 'performs date pivoting' do
3179
+ categories = %i[jan feb mar apr may jun jul aug sep oct nov dec]
3180
+ df = DaruLite::DataFrame.rows([
3181
+ [2014, 2, 1600.0, 20.0],
3182
+ [2014, 3, 1680.0, 21.0],
3183
+ [2016, 2, 1600.0, 20.0],
3184
+ [2016, 4, 1520.0, 19.0],
3185
+ ], order: [:year, :month, :visitors, :days])
3186
+ df[:averages] = df[:visitors] / df[:days]
3187
+ df[:month] = df[:month].map{|i| categories[i - 1]}
3188
+ actual = df.pivot_table(index: :month, vectors: [:year], values: :averages)
3189
+
3190
+ # NB: As you can see, there are some "illogical" parts:
3191
+ # months are sorted lexicographically, then made into multi-index
3192
+ # with one-element-per-tuple, then order of columns is dependent
3193
+ # on which month is lexicographically first (its apr, so, apr-2016
3194
+ # is first row to gather, so 2016 is first column).
3195
+ #
3196
+ # All of it is descendance of our group_by implementation (which
3197
+ # always sorts results & always make array keys). I hope that fixing
3198
+ # group_by, even to the extend described at https://github.com/v0dro/daru/issues/152,
3199
+ # will be fix this case also.
3200
+ expected =
3201
+ DaruLite::DataFrame.new(
3202
+ [
3203
+ [80.0, 80.0, nil],
3204
+ [nil, 80.0, 80.0],
3205
+ ], index: DaruLite::MultiIndex.from_tuples([[:apr], [:feb], [:mar]]),
3206
+ order: DaruLite::MultiIndex.from_tuples([[:averages, 2016], [:averages, 2014]])
3207
+ )
3208
+ # Comparing their parts previous to full comparison allows to
3209
+ # find complicated differences.
3210
+ expect(actual.vectors).to eq expected.vectors
3211
+ expect(actual.index).to eq expected.index
3212
+ expect(actual).to eq expected
3213
+ end
3214
+ end
3215
+
3216
+ context "#shape" do
3217
+ it "returns an array containing number of rows and columns" do
3218
+ expect(@data_frame.shape).to eq([5,3])
3219
+ end
3220
+ end
3221
+
3222
+ context "#nest" do
3223
+ it "nests in a hash" do
3224
+ df = DaruLite::DataFrame.new({
3225
+ :a => DaruLite::Vector.new(%w(a a a b b b)),
3226
+ :b => DaruLite::Vector.new(%w(c c d d e e)),
3227
+ :c => DaruLite::Vector.new(%w(f g h i j k))
3228
+ })
3229
+ nest = df.nest :a, :b
3230
+ expect(nest['a']['c']).to eq([{ :c => 'f' }, { :c => 'g' }])
3231
+ expect(nest['a']['d']).to eq([{ :c => 'h' }])
3232
+ expect(nest['b']['e']).to eq([{ :c => 'j' }, { :c => 'k' }])
3233
+ end
3234
+ end
3235
+
3236
+ context "#summary" do
3237
+ subject { df.summary }
3238
+
3239
+ context "DataFrame" do
3240
+ let(:df) { DaruLite::DataFrame.new({a: [1,2,5], b: [1,2,"string"]}, order: [:a, :b], index: [:one, :two, :three], name: 'frame') }
3241
+ it { is_expected.to eq %Q{
3242
+ |= frame
3243
+ | Number of rows: 3
3244
+ | Element:[a]
3245
+ | == a
3246
+ | n :3
3247
+ | non-missing:3
3248
+ | median: 2
3249
+ | mean: 2.6667
3250
+ | std.dev.: 2.0817
3251
+ | std.err.: 1.2019
3252
+ | skew: 0.2874
3253
+ | kurtosis: -2.3333
3254
+ | Element:[b]
3255
+ | == b
3256
+ | n :3
3257
+ | non-missing:3
3258
+ | factors: 1,2,string
3259
+ | mode: 1,2,string
3260
+ | Distribution
3261
+ | 1 1 100.00%
3262
+ | 2 1 100.00%
3263
+ | string 1 100.00%
3264
+ }.unindent }
3265
+ end
3266
+ end
3267
+
3268
+ context '#to_df' do
3269
+ it 'returns the dataframe' do
3270
+ @data_frame.to_df == @data_frame
3271
+ end
3272
+ end
3273
+
3274
+ context "#merge" do
3275
+ it "merges one dataframe with another" do
3276
+ a = DaruLite::Vector.new [1, 2, 3]
3277
+ b = DaruLite::Vector.new [3, 4, 5]
3278
+ c = DaruLite::Vector.new [4, 5, 6]
3279
+ d = DaruLite::Vector.new [7, 8, 9]
3280
+ e = DaruLite::Vector.new [10, 20, 30]
3281
+ ds1 = DaruLite::DataFrame.new({ :a => a, :b => b })
3282
+ ds2 = DaruLite::DataFrame.new({ :c => c, :d => d })
3283
+ exp = DaruLite::DataFrame.new({ :a => a, :b => b, :c => c, :d => d })
3284
+
3285
+ expect(ds1.merge(ds2)).to eq(exp)
3286
+ expect(ds2.merge(ds1)).to eq(
3287
+ DaruLite::DataFrame.new({c: c, d: d, a: a, b: b}, order: [:c, :d, :a, :b]))
3288
+
3289
+ ds3 = DaruLite::DataFrame.new({ :a => e })
3290
+ exp = DaruLite::DataFrame.new({ :a_1 => a, :a_2 => e, :b => b },
3291
+ order: [:a_1, :b, :a_2])
3292
+
3293
+ expect(ds1.merge(ds3)).to eq(exp)
3294
+ end
3295
+
3296
+ context "preserves type of vector names" do
3297
+ let(:df1) { DaruLite::DataFrame.new({'a'=> [1, 2, 3]}) }
3298
+ let(:df2) { DaruLite::DataFrame.new({:b=> [4, 5, 6]}) }
3299
+ subject { df1.merge df2 }
3300
+
3301
+ it { is_expected.to be_a DaruLite::DataFrame }
3302
+ it { expect(subject['a'].to_a).to eq [1, 2, 3] }
3303
+ it { expect(subject[:b].to_a).to eq [4, 5, 6] }
3304
+ end
3305
+
3306
+ context "preserves indices for dataframes with same index" do
3307
+ let(:index) { ['one','two','three'] }
3308
+ let(:df1) { DaruLite::DataFrame.new({ 'a' => [1, 2, 3], 'b' => [3, 4, 5] }, index: index) }
3309
+ let(:df2) { DaruLite::DataFrame.new({ 'c' => [4, 5, 6], 'd' => [7, 8, 9] }, index: index) }
3310
+ subject { df1.merge df2 }
3311
+
3312
+ its(:index) { is_expected.to eq DaruLite::Index.new(index) }
3313
+ end
3314
+ end
3315
+
3316
+ context "#vector_by_calculation" do
3317
+ it "DSL for returning vector of each calculation" do
3318
+ a1 = DaruLite::Vector.new([1, 2, 3, 4, 5, 6, 7])
3319
+ a2 = DaruLite::Vector.new([10, 20, 30, 40, 50, 60, 70])
3320
+ a3 = DaruLite::Vector.new([100, 200, 300, 400, 500, 600, 700])
3321
+ ds = DaruLite::DataFrame.new({ :a => a1, :b => a2, :c => a3 })
3322
+ total = ds.vector_by_calculation { a + b + c }
3323
+ expected = DaruLite::Vector.new([111, 222, 333, 444, 555, 666, 777])
3324
+ expect(total).to eq(expected)
3325
+ end
3326
+ end
3327
+
3328
+ context "group_by" do
3329
+ context "on a single row DataFrame" do
3330
+ let(:df){ DaruLite::DataFrame.new(city: %w[Kyiv], year: [2015], value: [1]) }
3331
+ it "returns a groupby object" do
3332
+ expect(df.group_by([:city])).to be_a(DaruLite::Core::GroupBy)
3333
+ end
3334
+ it "has the correct index" do
3335
+ expect(df.group_by([:city]).groups).to eq({["Kyiv"]=>[0]})
3336
+ end
3337
+ end
3338
+ end
3339
+
3340
+ context "#vector_sum" do
3341
+ before do
3342
+ a1 = DaruLite::Vector.new [1, 2, 3, 4, 5, nil, nil]
3343
+ a2 = DaruLite::Vector.new [10, 10, 20, 20, 20, 30, nil]
3344
+ b1 = DaruLite::Vector.new [nil, 1, 1, 1, 1, 2, nil]
3345
+ b2 = DaruLite::Vector.new [2, 2, 2, nil, 2, 3, nil]
3346
+ @df = DaruLite::DataFrame.new({ :a1 => a1, :a2 => a2, :b1 => b1, :b2 => b2 })
3347
+ end
3348
+
3349
+ it "calculates complete vector sum" do
3350
+ expect(@df.vector_sum).to eq(DaruLite::Vector.new [nil, 15, 26, nil, 28, nil, nil])
3351
+ end
3352
+
3353
+ it "ignores nils if skipnil is true" do
3354
+ expect(@df.vector_sum skipnil: true).to eq(DaruLite::Vector.new [13, 15, 26, 25, 28, 35, 0])
3355
+ end
3356
+
3357
+ it "calculates partial vector sum" do
3358
+ a = @df.vector_sum([:a1, :a2])
3359
+ b = @df.vector_sum([:b1, :b2])
3360
+
3361
+ expect(a).to eq(DaruLite::Vector.new [11, 12, 23, 24, 25, nil, nil])
3362
+ expect(b).to eq(DaruLite::Vector.new [nil, 3, 3, nil, 3, 5, nil])
3363
+ end
3364
+ end
3365
+
3366
+ context "#missing_values_rows" do
3367
+ it "returns number of missing values in each row" do
3368
+ a1 = DaruLite::Vector.new [1, nil, 3, 4, 5, nil]
3369
+ a2 = DaruLite::Vector.new [10, nil, 20, 20, 20, 30]
3370
+ b1 = DaruLite::Vector.new [nil, nil, 1, 1, 1, 2]
3371
+ b2 = DaruLite::Vector.new [2, 2, 2, nil, 2, 3]
3372
+ c = DaruLite::Vector.new [nil, 2, 4, 2, 2, 2]
3373
+ df = DaruLite::DataFrame.new({
3374
+ :a1 => a1, :a2 => a2, :b1 => b1, :b2 => b2, :c => c })
3375
+
3376
+ expect(df.missing_values_rows).to eq(DaruLite::Vector.new [2, 3, 0, 1, 0, 1])
3377
+ end
3378
+ end
3379
+
3380
+ context "#vector_count_characters" do
3381
+ it "" do
3382
+ a1 = DaruLite::Vector.new( [1, 'abcde', 3, 4, 5, nil])
3383
+ a2 = DaruLite::Vector.new( [10, 20.3, 20, 20, 20, 30])
3384
+ b1 = DaruLite::Vector.new( [nil, '343434', 1, 1, 1, 2])
3385
+ b2 = DaruLite::Vector.new( [2, 2, 2, nil, 2, 3])
3386
+ c = DaruLite::Vector.new([nil, 2, 'This is a nice example', 2, 2, 2])
3387
+ ds = DaruLite::DataFrame.new({ :a1 => a1, :a2 => a2, :b1 => b1, :b2 => b2, :c => c })
3388
+
3389
+ expect(ds.vector_count_characters).to eq(DaruLite::Vector.new([4, 17, 27, 5, 6, 5]))
3390
+ end
3391
+ end
3392
+
3393
+ context '#include_values?' do
3394
+ let(:df) do
3395
+ DaruLite::DataFrame.new({
3396
+ a: [1, 2, 3, 4, Float::NAN, 6, 1],
3397
+ b: [:a, :b, nil, Float::NAN, nil, 3, 5],
3398
+ c: ['a', 6, 3, 4, 3, 5, 3],
3399
+ d: [1, 2, 3, 5, 1, 2, 5]
3400
+ })
3401
+ end
3402
+ before { df.to_category :b }
3403
+
3404
+ context 'true' do
3405
+ it { expect(df.include_values? nil).to eq true }
3406
+ it { expect(df.include_values? Float::NAN).to eq true }
3407
+ it { expect(df.include_values? nil, Float::NAN).to eq true }
3408
+ it { expect(df.include_values? 1, 30).to eq true }
3409
+ end
3410
+
3411
+ context 'false' do
3412
+ it { expect(df[:a, :c].include_values? nil).to eq false }
3413
+ it { expect(df[:c, :d].include_values? Float::NAN).to eq false }
3414
+ it { expect(df[:c, :d].include_values? nil, Float::NAN).to eq false }
3415
+ it { expect(df.include_values? 10, 20).to eq false }
3416
+ end
3417
+ end
3418
+
3419
+ context "#vector_mean" do
3420
+ before do
3421
+ a1 = DaruLite::Vector.new [1, 2, 3, 4, 5, nil]
3422
+ a2 = DaruLite::Vector.new [10, 10, 20, 20, 20, 30]
3423
+ b1 = DaruLite::Vector.new [nil, 1, 1, 1, 1, 2]
3424
+ b2 = DaruLite::Vector.new [2, 2, 2, nil, 2, 3]
3425
+ c = DaruLite::Vector.new [nil, 2, 4, 2, 2, 2]
3426
+ @df = DaruLite::DataFrame.new({
3427
+ :a1 => a1, :a2 => a2, :b1 => b1, :b2 => b2, :c => c })
3428
+ end
3429
+
3430
+ it "calculates complete vector mean" do
3431
+ expect(@df.vector_mean).to eq(
3432
+ DaruLite::Vector.new [nil, 3.4, 6, nil, 6.0, nil])
3433
+ end
3434
+ end
3435
+
3436
+ context "#add_vectors_by_split_recode" do
3437
+ before do
3438
+ @ds = DaruLite::DataFrame.new({
3439
+ :id => DaruLite::Vector.new([1, 2, 3, 4, 5]),
3440
+ :name => DaruLite::Vector.new(%w(Alex Claude Peter Franz George)),
3441
+ :age => DaruLite::Vector.new([20, 23, 25, 27, 5]),
3442
+ :city => DaruLite::Vector.new(['New York', 'London', 'London', 'Paris', 'Tome']),
3443
+ :a1 => DaruLite::Vector.new(['a,b', 'b,c', 'a', nil, 'a,b,c']) },
3444
+ order: [:id, :name, :age, :city, :a1])
3445
+ end
3446
+
3447
+ it "" do
3448
+ @ds.add_vectors_by_split_recode(:a1, '_')
3449
+ expect(@ds.vectors.to_a) .to eq([:id, :name, :age, :city ,:a1, :a1_1, :a1_2, :a1_3])
3450
+ expect(@ds[:a1_1].to_a).to eq([1, 0, 1, nil, 1])
3451
+ expect(@ds[:a1_2].to_a).to eq([1, 1, 0, nil, 1])
3452
+ expect(@ds[:a1_3].to_a).to eq([0, 1, 0, nil, 1])
3453
+ end
3454
+ end
3455
+
3456
+ context "#add_vectors_by_split" do
3457
+ before do
3458
+ @ds = DaruLite::DataFrame.new({
3459
+ :id => DaruLite::Vector.new([1, 2, 3, 4, 5]),
3460
+ :name => DaruLite::Vector.new(%w(Alex Claude Peter Franz George)),
3461
+ :age => DaruLite::Vector.new([20, 23, 25, 27, 5]),
3462
+ :city => DaruLite::Vector.new(['New York', 'London', 'London', 'Paris', 'Tome']),
3463
+ :a1 => DaruLite::Vector.new(['a,b', 'b,c', 'a', nil, 'a,b,c'])
3464
+ }, order: [:id, :name, :age, :city, :a1])
3465
+ end
3466
+
3467
+ it "" do
3468
+ @ds.add_vectors_by_split(:a1, '_')
3469
+ expect(@ds.vectors.to_a).to eq([:id, :name, :age, :city, :a1, :a1_a, :a1_b, :a1_c])
3470
+ expect(@ds[:a1_a].to_a).to eq([1, 0, 1, nil, 1])
3471
+ expect(@ds[:a1_b].to_a).to eq([1, 1, 0, nil, 1])
3472
+ expect(@ds[:a1_c].to_a).to eq([0, 1, 0, nil, 1])
3473
+ end
3474
+ end
3475
+
3476
+ context "#verify" do
3477
+ def create_test(*args, &proc)
3478
+ description = args.shift
3479
+ fields = args
3480
+ [description, fields, proc]
3481
+ end
3482
+
3483
+ before do
3484
+ name = DaruLite::Vector.new %w(r1 r2 r3 r4)
3485
+ v1 = DaruLite::Vector.new [1, 2, 3, 4]
3486
+ v2 = DaruLite::Vector.new [4, 3, 2, 1]
3487
+ v3 = DaruLite::Vector.new [10, 20, 30, 40]
3488
+ v4 = DaruLite::Vector.new %w(a b a b)
3489
+ @df = DaruLite::DataFrame.new({
3490
+ :v1 => v1, :v2 => v2, :v3 => v3, :v4 => v4, :id => name
3491
+ }, order: [:v1, :v2, :v3, :v4, :id])
3492
+ end
3493
+
3494
+ it "correctly verifies data as per the block" do
3495
+ # Correct
3496
+ t1 = create_test('If v4=a, v1 odd') do |r|
3497
+ r[:v4] == 'b' or (r[:v4] == 'a' and r[:v1].odd?)
3498
+ end
3499
+ t2 = create_test('v3=v1*10') { |r| r[:v3] == r[:v1] * 10 }
3500
+ # Fail!
3501
+ t3 = create_test("v4='b'") { |r| r[:v4] == 'b' }
3502
+ exp1 = ["1 [1]: v4='b'", "3 [3]: v4='b'"]
3503
+ exp2 = ["1 [r1]: v4='b'", "3 [r3]: v4='b'"]
3504
+
3505
+ dataf = @df.verify(t3, t1, t2)
3506
+ expect(dataf).to eq(exp1)
3507
+ end
3508
+
3509
+ it "uses additional fields to extend error messages" do
3510
+ t = create_test("v4='b'", :v2, :v3) { |r| r[:v4] == 'b' }
3511
+
3512
+ dataf = @df.verify(:id, t)
3513
+ expect(dataf).to eq(["1 [r1]: v4='b' (v2=4, v3=10)", "3 [r3]: v4='b' (v2=2, v3=30)"])
3514
+ end
3515
+ end
3516
+
3517
+ context "#compute" do
3518
+ it "performs a computation when supplied in a string" do
3519
+ v1 = DaruLite::Vector.new [1, 2, 3, 4]
3520
+ v2 = DaruLite::Vector.new [4, 3, 2, 1]
3521
+ v3 = DaruLite::Vector.new [10, 20, 30, 40]
3522
+ vnumeric = DaruLite::Vector.new [0, 0, 1, 4]
3523
+ vsum = DaruLite::Vector.new [1 + 4 + 10.0, 2 + 3 + 20.0, 3 + 2 + 30.0, 4 + 1 + 40.0]
3524
+ vmult = DaruLite::Vector.new [1 * 4, 2 * 3, 3 * 2, 4 * 1]
3525
+
3526
+ df = DaruLite::DataFrame.new({:v1 => v1, :v2 => v2, :v3 => v3})
3527
+
3528
+ expect(df.compute("v1/v2")).to eq(vnumeric)
3529
+ expect(df.compute("v1+v2+v3")).to eq(vsum)
3530
+ expect(df.compute("v1*v2")).to eq(vmult)
3531
+ end
3532
+ end
3533
+
3534
+ context ".crosstab_by_assignation" do
3535
+ it "" do
3536
+ v1 = DaruLite::Vector.new %w(a a a b b b c c c)
3537
+ v2 = DaruLite::Vector.new %w(a b c a b c a b c)
3538
+ v3 = DaruLite::Vector.new [0, 1, 0, 0, 1, 1, 0, 0, 1]
3539
+ df = DaruLite::DataFrame.crosstab_by_assignation(v1, v2, v3)
3540
+
3541
+ expect(df[:_id].type).to eq(:object)
3542
+ expect(df['a'].type).to eq(:numeric)
3543
+ expect(df['b'].type).to eq(:numeric)
3544
+
3545
+ ev_id = DaruLite::Vector.new %w(a b c)
3546
+ ev_a = DaruLite::Vector.new [0, 0, 0]
3547
+ ev_b = DaruLite::Vector.new [1, 1, 0]
3548
+ ev_c = DaruLite::Vector.new [0, 1, 1]
3549
+ df2 = DaruLite::DataFrame.new({
3550
+ :_id => ev_id, 'a' => ev_a, 'b' => ev_b, 'c' => ev_c },
3551
+ order: ['a', 'b', 'c', :_id])
3552
+
3553
+ expect(df2).to eq(df)
3554
+ end
3555
+ end
3556
+
3557
+ context "#one_to_many" do
3558
+ it "" do
3559
+ rows = [
3560
+ ['1', 'george', 'red', 10, 'blue', 20, nil, nil],
3561
+ ['2', 'fred', 'green', 15, 'orange', 30, 'white', 20],
3562
+ ['3', 'alfred', nil, nil, nil, nil, nil, nil]
3563
+ ]
3564
+
3565
+ df = DaruLite::DataFrame.rows(rows,
3566
+ order: ['id', 'name', 'car_color1', 'car_value1', 'car_color2',
3567
+ 'car_value2', 'car_color3', 'car_value3'])
3568
+
3569
+ ids = DaruLite::Vector.new %w(1 1 2 2 2)
3570
+ colors = DaruLite::Vector.new %w(red blue green orange white)
3571
+ values = DaruLite::Vector.new [10, 20, 15, 30, 20]
3572
+ col_ids = DaruLite::Vector.new [1, 2, 1, 2, 3]
3573
+
3574
+ df_expected = DaruLite::DataFrame.new({
3575
+ 'id' => ids, '_col_id' => col_ids, 'color' => colors, 'value' => values
3576
+ }, order: ['id', '_col_id', 'color', 'value'])
3577
+
3578
+ expect(df.one_to_many(['id'], 'car_%v%n')).to eq(df_expected)
3579
+ end
3580
+ end
3581
+
3582
+ context "#any?" do
3583
+ before do
3584
+ @df = DaruLite::DataFrame.new({
3585
+ a: [1,2,3,4,5],
3586
+ b: [10,20,30,40,50],
3587
+ c: [11,22,33,44,55]})
3588
+ end
3589
+
3590
+ it "returns true if any one of the vectors satisfy condition" do
3591
+ expect(@df.any? { |v| v[0] == 1 }).to eq(true)
3592
+ end
3593
+
3594
+ it "returns false if none of the vectors satisfy the condition" do
3595
+ expect(@df.any? { |v| v.mean > 100 }).to eq(false)
3596
+ end
3597
+
3598
+ it "returns true if any one of the rows satisfy condition" do
3599
+ expect(@df.any?(:row) { |r| r[:a] == 1 and r[:c] == 11 }).to eq(true)
3600
+ end
3601
+
3602
+ it "returns false if none of the rows satisfy the condition" do
3603
+ expect(@df.any?(:row) { |r| r.mean > 100 }).to eq(false)
3604
+ end
3605
+
3606
+ it 'fails on unknown axis' do
3607
+ expect { @df.any?(:kitten) { |r| r.mean > 100 } }.to raise_error ArgumentError, /axis/
3608
+ end
3609
+ end
3610
+
3611
+ context "#all?" do
3612
+ before do
3613
+ @df = DaruLite::DataFrame.new({
3614
+ a: [1,2,3,4,5],
3615
+ b: [10,20,30,40,50],
3616
+ c: [11,22,33,44,55]})
3617
+ end
3618
+
3619
+ it "returns true if all of the vectors satisfy condition" do
3620
+ expect(@df.all? { |v| v.mean < 40 }).to eq(true)
3621
+ end
3622
+
3623
+ it "returns false if any one of the vectors does not satisfy condition" do
3624
+ expect(@df.all? { |v| v.mean == 30 }).to eq(false)
3625
+ end
3626
+
3627
+ it "returns true if all of the rows satisfy condition" do
3628
+ expect(@df.all?(:row) { |r| r.mean < 70 }).to eq(true)
3629
+ end
3630
+
3631
+ it "returns false if any one of the rows does not satisfy condition" do
3632
+ expect(@df.all?(:row) { |r| r.mean == 30 }).to eq(false)
3633
+ end
3634
+
3635
+ it 'fails on unknown axis' do
3636
+ expect { @df.all?(:kitten) { |r| r.mean > 100 } }.to raise_error ArgumentError, /axis/
3637
+ end
3638
+ end
3639
+
3640
+ context "#only_numerics" do
3641
+ before do
3642
+ @v1 = DaruLite::Vector.new([1,2,3,4,5])
3643
+ @v2 = DaruLite::Vector.new(%w(one two three four five))
3644
+ @v3 = DaruLite::Vector.new([11,22,33,44,55])
3645
+ @df = DaruLite::DataFrame.new({
3646
+ a: @v1, b: @v2, c: @v3 }, clone: false)
3647
+ end
3648
+
3649
+ it "returns a view of only the numeric vectors" do
3650
+ dfon = @df.only_numerics(clone: false)
3651
+
3652
+ expect(dfon).to eq(
3653
+ DaruLite::DataFrame.new({ a: @v1, c: @v3 }, clone: false))
3654
+ expect(dfon[:a].object_id).to eq(@v1.object_id)
3655
+ end
3656
+
3657
+ it "returns a clone of numeric vectors" do
3658
+ dfon = @df.only_numerics
3659
+
3660
+ expect(dfon).to eq(
3661
+ DaruLite::DataFrame.new({ a: @v1, c: @v3}, clone: false)
3662
+ )
3663
+ expect(dfon[:a].object_id).to_not eq(@v1.object_id)
3664
+ end
3665
+
3666
+ context DaruLite::MultiIndex do
3667
+ before do
3668
+ agg_vectors = DaruLite::MultiIndex.from_tuples(
3669
+ [
3670
+ [:d, :one, :large],
3671
+ [:d, :one, :small],
3672
+ [:d, :two, :large],
3673
+ [:d, :two, :small],
3674
+ [:e, :one, :large],
3675
+ [:e, :one, :small],
3676
+ [:e, :two, :large],
3677
+ [:e, :two, :small]
3678
+ ]
3679
+ )
3680
+
3681
+ agg_index = DaruLite::MultiIndex.from_tuples(
3682
+ [
3683
+ [:bar],
3684
+ [:foo]
3685
+ ]
3686
+ )
3687
+ @df = DaruLite::DataFrame.new(
3688
+ [
3689
+ [4.112,2.234],
3690
+ %w(a b),
3691
+ [6.342,nil],
3692
+ [7.2344,3.23214],
3693
+ [8.234,4.533],
3694
+ [10.342,2.3432],
3695
+ [12.0,nil],
3696
+ %w(a b)
3697
+ ], order: agg_vectors, index: agg_index
3698
+ )
3699
+ end
3700
+
3701
+ it "returns numeric vectors" do
3702
+ vectors = DaruLite::MultiIndex.from_tuples(
3703
+ [
3704
+ [:d, :one, :large],
3705
+ [:d, :two, :large],
3706
+ [:d, :two, :small],
3707
+ [:e, :one, :large],
3708
+ [:e, :one, :small],
3709
+ [:e, :two, :large]
3710
+ ]
3711
+ )
3712
+
3713
+ index = DaruLite::MultiIndex.from_tuples(
3714
+ [
3715
+ [:bar],
3716
+ [:foo]
3717
+ ]
3718
+ )
3719
+ answer = DaruLite::DataFrame.new(
3720
+ [
3721
+ [4.112,2.234],
3722
+ [6.342,nil],
3723
+ [7.2344,3.23214],
3724
+ [8.234,4.533],
3725
+ [10.342,2.3432],
3726
+ [12.0,nil],
3727
+ ], order: vectors, index: index
3728
+ )
3729
+
3730
+ expect(@df.only_numerics).to eq(answer)
3731
+ end
3732
+ end
3733
+ end
3734
+
3735
+ context '#reset_index' do
3736
+ context 'when Index' do
3737
+ subject do
3738
+ DaruLite::DataFrame.new(
3739
+ {'vals' => [1,2,3,4,5]},
3740
+ index: DaruLite::Index.new(%w[a b c d e], name: 'indices')
3741
+ ).reset_index
3742
+ end
3743
+
3744
+ it { is_expected.to eq DaruLite::DataFrame.new(
3745
+ 'indices' => %w[a b c d e],
3746
+ 'vals' => [1,2,3,4,5]
3747
+ )}
3748
+ end
3749
+
3750
+ context 'when MultiIndex' do
3751
+ subject do
3752
+ mi = DaruLite::MultiIndex.from_tuples([
3753
+ [0, 'a'], [0, 'b'], [1, 'a'], [1, 'b']
3754
+ ])
3755
+ mi.name = %w[nums alphas]
3756
+ DaruLite::DataFrame.new(
3757
+ {'vals' => [1,2,3,4]},
3758
+ index: mi
3759
+ ).reset_index
3760
+ end
3761
+
3762
+ it { is_expected.to eq DaruLite::DataFrame.new(
3763
+ 'nums' => [0,0,1,1],
3764
+ 'alphas' => %w[a b a b],
3765
+ 'vals' => [1,2,3,4]
3766
+ )}
3767
+ end
3768
+ end
3769
+
3770
+ context "#set_index" do
3771
+ before(:each) do
3772
+ @df = DaruLite::DataFrame.new({
3773
+ a: [1,2,3,4,5],
3774
+ b: ['a','b','c','d','e'],
3775
+ c: [11,22,33,44,55]
3776
+ })
3777
+ end
3778
+
3779
+ it "sets a particular column as the index and deletes that column" do
3780
+ @df.set_index(:b)
3781
+ expect(@df).to eq(
3782
+ DaruLite::DataFrame.new({
3783
+ a: [1,2,3,4,5],
3784
+ c: [11,22,33,44,55]
3785
+ }, index: ['a','b','c','d','e'])
3786
+ )
3787
+ end
3788
+
3789
+ it "sets a particular column as index but keeps that column" do
3790
+ expect(@df.set_index(:c, keep: true)).to eq(
3791
+ DaruLite::DataFrame.new({
3792
+ a: [1,2,3,4,5],
3793
+ b: ['a','b','c','d','e'],
3794
+ c: [11,22,33,44,55]
3795
+ }, index: [11,22,33,44,55]))
3796
+ expect(@df[:c]).to eq(@df[:c])
3797
+ end
3798
+
3799
+ it "sets categorical index if categorical is true" do
3800
+ data = {
3801
+ a: [1, 2, 3, 4, 5],
3802
+ b: [:a, 1, :a, 1, 'c'],
3803
+ c: %w[a b c d e]
3804
+ }
3805
+ df = DaruLite::DataFrame.new(data)
3806
+ df.set_index(:b, categorical: true)
3807
+ expected = DaruLite::DataFrame.new(
3808
+ data.slice(:a, :c),
3809
+ index: DaruLite::CategoricalIndex.new(data[:b])
3810
+ )
3811
+ expect(df).to eq(expected)
3812
+ end
3813
+
3814
+ it "raises error if all elements in the column aren't unique" do
3815
+ jholu = DaruLite::DataFrame.new({
3816
+ a: ['a','b','a'],
3817
+ b: [1,2,4]
3818
+ })
3819
+
3820
+ expect {
3821
+ jholu.set_index(:a)
3822
+ }.to raise_error(ArgumentError)
3823
+ end
3824
+
3825
+ it "sets multiindex if array is given" do
3826
+ df = DaruLite::DataFrame.new({
3827
+ a: %w[a a b b],
3828
+ b: [1, 2, 1, 2],
3829
+ c: %w[a b c d]
3830
+ })
3831
+ df.set_index(%i[a b])
3832
+ expected =
3833
+ DaruLite::DataFrame.new(
3834
+ { c: %w[a b c d] },
3835
+ index: DaruLite::MultiIndex.from_tuples(
3836
+ [['a', 1], ['a', 2], ['b', 1], ['b', 2]]
3837
+ )
3838
+ ).tap do |df|
3839
+ df.index.name = %i[a b]
3840
+ df
3841
+ end
3842
+ expect(df).to eq(expected)
3843
+ end
3844
+ end
3845
+
3846
+ context "#concat" do
3847
+ before do
3848
+ @df1 = DaruLite::DataFrame.new({
3849
+ a: [1, 2, 3],
3850
+ b: [1, 2, 3]
3851
+ })
3852
+
3853
+ @df2 = DaruLite::DataFrame.new({
3854
+ a: [4, 5, 6],
3855
+ c: [4, 5, 6]
3856
+ })
3857
+ end
3858
+
3859
+ it 'does not modify the original dataframes' do
3860
+ df1_a = @df1[:a].to_a.dup
3861
+ df2_a = @df2[:a].to_a.dup
3862
+
3863
+ df_concat = @df1.concat @df2
3864
+ expect(@df1[:a].to_a).to eq df1_a
3865
+ expect(@df2[:a].to_a).to eq df2_a
3866
+ end
3867
+
3868
+ it 'creates a new dataframe that is a concatenation of the two dataframe arguments' do
3869
+ df1_a = @df1[:a].to_a.dup
3870
+ df2_a = @df2[:a].to_a.dup
3871
+
3872
+ df_concat = @df1.concat @df2
3873
+ expect(df_concat[:a].to_a).to eq df1_a + df2_a
3874
+ end
3875
+
3876
+ it 'fills in missing vectors with nils' do
3877
+ df1_b = @df1[:b].to_a.dup
3878
+ df2_c = @df2[:c].to_a.dup
3879
+
3880
+ df_concat = @df1.concat @df2
3881
+ expect(df_concat[:b].to_a).to eq df1_b + [nil] * @df2.size
3882
+ expect(df_concat[:c].to_a).to eq [nil] * @df1.size + df2_c
3883
+ end
3884
+
3885
+ end
3886
+
3887
+ context "#union" do
3888
+ before do
3889
+ @df1 = DaruLite::DataFrame.new({
3890
+ a: [1, 2, 3],
3891
+ b: [1, 2, 3]},
3892
+ index: [1,3,5] )
3893
+
3894
+ @df2 = DaruLite::DataFrame.new({
3895
+ a: [4, 5, 6],
3896
+ c: [4, 5, 6]},
3897
+ index: [7,9,11])
3898
+
3899
+ @df3 = DaruLite::DataFrame.new({
3900
+ a: [4, 5, 6],
3901
+ c: [4, 5, 6]},
3902
+ index: [5,7,9])
3903
+ end
3904
+
3905
+ it 'does not modify the original dataframes' do
3906
+ df1_a = @df1[:a].to_a.dup
3907
+ df2_a = @df2[:a].to_a.dup
3908
+
3909
+ _ = @df1.union @df2
3910
+ expect(@df1[:a].to_a).to eq df1_a
3911
+ expect(@df2[:a].to_a).to eq df2_a
3912
+ end
3913
+
3914
+ it 'creates a new dataframe that is a concatenation of the two dataframe arguments' do
3915
+ df1_a = @df1[:a].to_a.dup
3916
+ df2_a = @df2[:a].to_a.dup
3917
+
3918
+ df_union = @df1.union @df2
3919
+ expect(df_union[:a].to_a).to eq df1_a + df2_a
3920
+ end
3921
+
3922
+ it 'fills in missing vectors with nils' do
3923
+ df1_b = @df1[:b].to_a.dup
3924
+ df2_c = @df2[:c].to_a.dup
3925
+
3926
+ df_union = @df1.union @df2
3927
+ expect(df_union[:b].to_a).to eq df1_b + [nil] * @df2.size
3928
+ expect(df_union[:c].to_a).to eq [nil] * @df1.size + df2_c
3929
+ end
3930
+
3931
+ it 'overwrites part of the first dataframe if there are double indices' do
3932
+ vec = DaruLite::Vector.new({a: 4, b: nil, c: 4})
3933
+ expect(@df1.union(@df3).row[5]).to eq vec
3934
+ end
3935
+
3936
+ it 'concats the indices' do
3937
+ v1 = @df1.index.to_a
3938
+ v2 = @df2.index.to_a
3939
+
3940
+ df_union = @df1.union @df2
3941
+ expect(df_union.index.to_a).to eq v1 + v2
3942
+ end
3943
+ end
3944
+
3945
+ context '#inspect' do
3946
+ subject { df.inspect }
3947
+
3948
+ context 'empty' do
3949
+ let(:df) { DaruLite::DataFrame.new({}, order: %w[a b c])}
3950
+ it { is_expected.to eq %Q{
3951
+ |#<DaruLite::DataFrame(0x3)>
3952
+ | a b c
3953
+ }.unindent}
3954
+ end
3955
+
3956
+ context 'simple' do
3957
+ let(:df) { DaruLite::DataFrame.new({a: [1,2,3], b: [3,4,5], c: [6,7,8]}, name: 'test')}
3958
+ it { should == %Q{
3959
+ |#<DaruLite::DataFrame: test (3x3)>
3960
+ | a b c
3961
+ | 0 1 3 6
3962
+ | 1 2 4 7
3963
+ | 2 3 5 8
3964
+ }.unindent}
3965
+ end
3966
+
3967
+ context 'if index name is set' do
3968
+ context 'single index with name' do
3969
+ let(:df) { DaruLite::DataFrame.new({a: [1,2,3], b: [3,4,5], c: [6,7,8]},
3970
+ name: 'test')}
3971
+ before { df.index.name = 'index_name' }
3972
+ it { should == %Q{
3973
+ |#<DaruLite::DataFrame: test (3x3)>
3974
+ | index_name a b c
3975
+ | 0 1 3 6
3976
+ | 1 2 4 7
3977
+ | 2 3 5 8
3978
+ }.unindent}
3979
+ end
3980
+
3981
+ context 'MultiIndex with name' do
3982
+ let(:mi) { DaruLite::MultiIndex.new(
3983
+ levels: [[:a,:b,:c], [:one, :two]],
3984
+ labels: [[0,0,1,1,2,2], [0,1,0,1,0,1]], name: ['s1', 's2']) }
3985
+ let(:df) { DaruLite::DataFrame.new({
3986
+ a: [11, 12, 13, 14, 15, 16], b: [21, 22, 23, 24, 25, 26]},
3987
+ name: 'test', index: mi)}
3988
+ it { should == %Q{
3989
+ |#<DaruLite::DataFrame: test (6x2)>
3990
+ | s1 s2 a b
3991
+ | a one 11 21
3992
+ | two 12 22
3993
+ | b one 13 23
3994
+ | two 14 24
3995
+ | c one 15 25
3996
+ | two 16 26
3997
+ }.unindent}
3998
+ end
3999
+
4000
+ end
4001
+
4002
+ context 'no name' do
4003
+ let(:df) { DaruLite::DataFrame.new({a: [1,2,3], b: [3,4,5], c: [6,7,8]})}
4004
+ it { should == %Q{
4005
+ |#<DaruLite::DataFrame(3x3)>
4006
+ | a b c
4007
+ | 0 1 3 6
4008
+ | 1 2 4 7
4009
+ | 2 3 5 8
4010
+ }.unindent}
4011
+ end
4012
+
4013
+ context 'with nils' do
4014
+ let(:df) { DaruLite::DataFrame.new({a: [1,nil,3], b: [3,4,5], c: [6,7,nil]}, name: 'test')}
4015
+ it { is_expected.to eq %Q{
4016
+ |#<DaruLite::DataFrame: test (3x3)>
4017
+ | a b c
4018
+ | 0 1 3 6
4019
+ | 1 nil 4 7
4020
+ | 2 3 5 nil
4021
+ }.unindent}
4022
+ end
4023
+
4024
+ context 'very long' do
4025
+ let(:df) { DaruLite::DataFrame.new({a: [1,1,1]*20, b: [1,1,1]*20, c: [1,1,1]*20}, name: 'test')}
4026
+ it { is_expected.to eq %Q{
4027
+ |#<DaruLite::DataFrame: test (60x3)>
4028
+ | a b c
4029
+ | 0 1 1 1
4030
+ | 1 1 1 1
4031
+ | 2 1 1 1
4032
+ | 3 1 1 1
4033
+ | 4 1 1 1
4034
+ | 5 1 1 1
4035
+ | 6 1 1 1
4036
+ | 7 1 1 1
4037
+ | 8 1 1 1
4038
+ | 9 1 1 1
4039
+ | 10 1 1 1
4040
+ | 11 1 1 1
4041
+ | 12 1 1 1
4042
+ | 13 1 1 1
4043
+ | 14 1 1 1
4044
+ | 15 1 1 1
4045
+ | 16 1 1 1
4046
+ | 17 1 1 1
4047
+ | 18 1 1 1
4048
+ | 19 1 1 1
4049
+ | 20 1 1 1
4050
+ | 21 1 1 1
4051
+ | 22 1 1 1
4052
+ | 23 1 1 1
4053
+ | 24 1 1 1
4054
+ | 25 1 1 1
4055
+ | 26 1 1 1
4056
+ | 27 1 1 1
4057
+ | 28 1 1 1
4058
+ | 29 1 1 1
4059
+ | ... ... ... ...
4060
+ }.unindent}
4061
+ end
4062
+
4063
+ context 'long data lines' do
4064
+ let(:df) { DaruLite::DataFrame.new({a: [1,2,3], b: [4,5,6], c: ['this is ridiculously long',nil,nil]}, name: 'test')}
4065
+ it { is_expected.to eq %Q{
4066
+ |#<DaruLite::DataFrame: test (3x3)>
4067
+ | a b c
4068
+ | 0 1 4 this is ri
4069
+ | 1 2 5 nil
4070
+ | 2 3 6 nil
4071
+ }.unindent}
4072
+ end
4073
+
4074
+ context 'index is a MultiIndex' do
4075
+ let(:df) {
4076
+ DaruLite::DataFrame.new(
4077
+ {
4078
+ a: [1,2,3,4,5,6,7],
4079
+ b: %w[a b c d e f g]
4080
+ }, index: DaruLite::MultiIndex.from_tuples([
4081
+ %w[foo one],
4082
+ %w[foo two],
4083
+ %w[foo three],
4084
+ %w[bar one],
4085
+ %w[bar two],
4086
+ %w[bar three],
4087
+ %w[baz one],
4088
+ ]),
4089
+ name: 'test'
4090
+ )
4091
+ }
4092
+
4093
+ it { is_expected.to eq %Q{
4094
+ |#<DaruLite::DataFrame: test (7x2)>
4095
+ | a b
4096
+ | foo one 1 a
4097
+ | two 2 b
4098
+ | three 3 c
4099
+ | bar one 4 d
4100
+ | two 5 e
4101
+ | three 6 f
4102
+ | baz one 7 g
4103
+ }.unindent}
4104
+ end
4105
+
4106
+ context 'vectors is a MultiIndex' do
4107
+ end
4108
+
4109
+ context 'spacing and threshold settings' do
4110
+ end
4111
+ end
4112
+
4113
+ context '#to_s' do
4114
+ it 'produces a class, size description' do
4115
+ expect(@data_frame.to_s).to eq "#<DaruLite::DataFrame(5x3)>"
4116
+ end
4117
+
4118
+ it 'produces a class, name, size description' do
4119
+ @data_frame.name = "Test"
4120
+ expect(@data_frame.to_s).to eq "#<DaruLite::DataFrame: Test(5x3)>"
4121
+ end
4122
+
4123
+ it 'produces a class, name, size description when the name is a symbol' do
4124
+ @data_frame.name = :Test
4125
+ expect(@data_frame.to_s).to eq "#<DaruLite::DataFrame: Test(5x3)>"
4126
+ end
4127
+ end
4128
+
4129
+ context '#to_json' do
4130
+ let(:df) { DaruLite::DataFrame.new({a: [1,2,3], b: [3,4,5], c: [6,7,8]}, index: [:one, :two, :three], name: 'test')}
4131
+ subject { JSON.parse(json) }
4132
+
4133
+ context 'with index' do
4134
+ let(:json) { df.to_json(false) }
4135
+ # FIXME: is it most reasonable we can do?.. -- zverok
4136
+ # For me, more resonable thing would be something like
4137
+ #
4138
+ # [
4139
+ # {"index" => "one" , "a"=>1, "b"=>3, "c"=>6},
4140
+ # {"index" => "two" , "a"=>2, "b"=>4, "c"=>7},
4141
+ # {"index" => "three", "a"=>3, "b"=>5, "c"=>8}
4142
+ # ]
4143
+ #
4144
+ # Or maybe
4145
+ #
4146
+ # [
4147
+ # ["one" , {"a"=>1, "b"=>3, "c"=>6}],
4148
+ # ["two" , {"a"=>2, "b"=>4, "c"=>7}],
4149
+ # ["three", {"a"=>3, "b"=>5, "c"=>8}]
4150
+ # ]
4151
+ #
4152
+ # Or even
4153
+ #
4154
+ # {
4155
+ # "one" => {"a"=>1, "b"=>3, "c"=>6},
4156
+ # "two" => {"a"=>2, "b"=>4, "c"=>7},
4157
+ # "three" => {"a"=>3, "b"=>5, "c"=>8}
4158
+ # }
4159
+ #
4160
+ it { is_expected.to eq(
4161
+ [
4162
+ [
4163
+ {"a"=>1, "b"=>3, "c"=>6},
4164
+ {"a"=>2, "b"=>4, "c"=>7},
4165
+ {"a"=>3, "b"=>5, "c"=>8}
4166
+ ],
4167
+ ["one", "two", "three"]
4168
+ ]
4169
+ )}
4170
+ end
4171
+
4172
+ context 'without index' do
4173
+ let(:json) { df.to_json(true) }
4174
+ it { is_expected.to eq(
4175
+ [
4176
+ {"a"=>1, "b"=>3, "c"=>6},
4177
+ {"a"=>2, "b"=>4, "c"=>7},
4178
+ {"a"=>3, "b"=>5, "c"=>8}
4179
+ ]
4180
+ )}
4181
+ end
4182
+ end
4183
+
4184
+ context '#access_row_tuples_by_indexs' do
4185
+ let(:df) {
4186
+ DaruLite::DataFrame.new({col: [:a, :b, :c, :d, :e], num: [52,12,07,17,01]}) }
4187
+ let(:df_idx) {
4188
+ DaruLite::DataFrame.new({a: [52, 12, 07], b: [1, 2, 3]}, index: [:one, :two, :three])
4189
+ }
4190
+ let (:mi_idx) do
4191
+ DaruLite::MultiIndex.from_tuples [
4192
+ [:a,:one,:bar],
4193
+ [:a,:one,:baz],
4194
+ [:b,:two,:bar],
4195
+ [:a,:two,:baz],
4196
+ ]
4197
+ end
4198
+ let (:df_mi) do
4199
+ DaruLite::DataFrame.new({
4200
+ a: 1..4,
4201
+ b: 'a'..'d'
4202
+ }, index: mi_idx )
4203
+ end
4204
+ context 'when no index is given' do
4205
+ it 'returns empty Array' do
4206
+ expect(df.access_row_tuples_by_indexs()).to eq([])
4207
+ end
4208
+ end
4209
+ context 'when index(s) are given' do
4210
+ it 'returns Array of row tuples' do
4211
+ expect(df.access_row_tuples_by_indexs(1)).to eq([[:b, 12]])
4212
+ expect(df.access_row_tuples_by_indexs(0,3)).to eq([[:a, 52], [:d, 17]])
4213
+ end
4214
+ end
4215
+ context 'when custom index(s) are given' do
4216
+ it 'returns Array of row tuples' do
4217
+ expect(df_idx.access_row_tuples_by_indexs(:one,:three)).to eq(
4218
+ [[52, 1], [7, 3]]
4219
+ )
4220
+ end
4221
+ end
4222
+ context 'when multi index is given' do
4223
+ it 'returns Array of row tuples' do
4224
+ expect(df_mi.access_row_tuples_by_indexs(:a)).to eq(
4225
+ [[1, "a"], [2, "b"], [4, "d"]]
4226
+ )
4227
+ expect(df_mi.access_row_tuples_by_indexs(:a, :one, :baz)).to eq(
4228
+ [[2, "b"]]
4229
+ )
4230
+ end
4231
+ end
4232
+ end
4233
+
4234
+ context '#aggregate' do
4235
+ let(:cat_idx) { DaruLite::CategoricalIndex.new [:a, :b, :a, :a, :c] }
4236
+ let(:df) { DaruLite::DataFrame.new(num: [52,12,07,17,01], cat_index: cat_idx) }
4237
+ let(:df_cat_idx) {
4238
+ DaruLite::DataFrame.new({num: [52,12,07,17,01]}, index: cat_idx) }
4239
+
4240
+ it 'lambda function on particular column' do
4241
+ expect(df.aggregate(num_100_times: ->(df) { (df.num*100).first })).to eq(
4242
+ DaruLite::DataFrame.new(num_100_times: [5200, 1200, 700, 1700, 100])
4243
+ )
4244
+ end
4245
+ it 'aggregate sum on particular column' do
4246
+ expect(df_cat_idx.aggregate(num: :sum)).to eq(
4247
+ DaruLite::DataFrame.new({num: [76, 12, 1]}, index: [:a, :b, :c])
4248
+ )
4249
+ end
4250
+ end
4251
+
4252
+ context '#group_by_and_aggregate' do
4253
+ let(:spending_df) {
4254
+ DaruLite::DataFrame.rows([
4255
+ [2010, 'dev', 50, 1],
4256
+ [2010, 'dev', 150, 1],
4257
+ [2010, 'dev', 200, 1],
4258
+ [2011, 'dev', 50, 1],
4259
+ [2012, 'dev', 150, 1],
4260
+
4261
+ [2011, 'office', 300, 1],
4262
+
4263
+ [2010, 'market', 50, 1],
4264
+ [2011, 'market', 500, 1],
4265
+ [2012, 'market', 500, 1],
4266
+ [2012, 'market', 300, 1],
4267
+
4268
+ [2012, 'R&D', 10, 1],],
4269
+ order: [:year, :category, :spending, :nb_spending])
4270
+ }
4271
+
4272
+ it 'works as group_by + aggregate' do
4273
+ expect(spending_df.group_by_and_aggregate(:year, spending: :sum)).to eq(
4274
+ spending_df.group_by(:year).aggregate(spending: :sum))
4275
+ expect(spending_df.group_by_and_aggregate([:year, :category], spending: :sum, nb_spending: :size)).to eq(
4276
+ spending_df.group_by([:year, :category]).aggregate(spending: :sum, nb_spending: :size))
4277
+ end
4278
+ end
4279
+
4280
+ context '#create_sql' do
4281
+ let(:df) { DaruLite::DataFrame.new({
4282
+ a: [1,2,3],
4283
+ b: ['test', 'me', 'please'],
4284
+ c: ['2015-06-01', '2015-06-02', '2015-06-03']
4285
+ },
4286
+ name: 'test'
4287
+ )}
4288
+ subject { df.create_sql('foo') }
4289
+ it { is_expected.to eq %Q{
4290
+ |CREATE TABLE foo (a INTEGER,
4291
+ | b VARCHAR (255),
4292
+ | c DATE) CHARACTER SET=UTF8;
4293
+ }.unindent}
4294
+ end
4295
+
4296
+ context "#by_single_key" do
4297
+ let(:df) { DaruLite::DataFrame.new(a: [1, 2, 3], b: [4, 5, 6] ) }
4298
+
4299
+ it 'raise error when vector is missing from dataframe' do
4300
+ expect { df[:c] }.to raise_error(IndexError, /Specified vector c does not exist/)
4301
+ end
4302
+ end
4303
+
4304
+ context "#rotate_vectors" do
4305
+ subject { df.rotate_vectors(-1) }
4306
+
4307
+ context "several vectors in the dataframe" do
4308
+ let(:df) do
4309
+ DaruLite::DataFrame.new({
4310
+ a: [1,2,3],
4311
+ b: [4,5,6],
4312
+ total: [5,7,9]
4313
+ })
4314
+ end
4315
+ let(:new_order) { [:total, :a, :b] }
4316
+
4317
+ it "return the dataframe with the position of the last vector change to first" do
4318
+ expect(subject.vectors.to_a).to eq(new_order)
4319
+ end
4320
+ end
4321
+
4322
+ context "only one vector in the dataframe" do
4323
+ let(:df) { DaruLite::DataFrame.new({ a: [1,2,3] }) }
4324
+
4325
+ it "return the dataframe without any change" do
4326
+ expect(subject).to eq(df)
4327
+ end
4328
+ end
4329
+ end
4330
+ end if mri?