daru_lite 0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (149) hide show
  1. checksums.yaml +7 -0
  2. data/.github/ISSUE_TEMPLATE.md +18 -0
  3. data/.github/workflows/ci.yml +33 -0
  4. data/.gitignore +10 -0
  5. data/.rspec +2 -0
  6. data/.rubocop.yml +27 -0
  7. data/.rubocop_todo.yml +137 -0
  8. data/CONTRIBUTING.md +47 -0
  9. data/Gemfile +2 -0
  10. data/History.md +4 -0
  11. data/LICENSE +24 -0
  12. data/README.md +218 -0
  13. data/Rakefile +69 -0
  14. data/ReleasePolicy.md +20 -0
  15. data/benchmarks/TradeoffData.csv +65 -0
  16. data/benchmarks/csv_reading.rb +22 -0
  17. data/benchmarks/dataframe_creation.rb +39 -0
  18. data/benchmarks/db_loading.rb +34 -0
  19. data/benchmarks/duplicating.rb +45 -0
  20. data/benchmarks/group_by.rb +32 -0
  21. data/benchmarks/joining.rb +52 -0
  22. data/benchmarks/row_access.rb +41 -0
  23. data/benchmarks/row_assign.rb +36 -0
  24. data/benchmarks/sorting.rb +51 -0
  25. data/benchmarks/statistics.rb +28 -0
  26. data/benchmarks/vector_access.rb +31 -0
  27. data/benchmarks/vector_assign.rb +42 -0
  28. data/benchmarks/where_clause.rb +48 -0
  29. data/benchmarks/where_vs_filter.rb +28 -0
  30. data/daru_lite.gemspec +55 -0
  31. data/images/README.md +5 -0
  32. data/images/con0.png +0 -0
  33. data/images/con1.png +0 -0
  34. data/images/init0.png +0 -0
  35. data/images/init1.png +0 -0
  36. data/images/man0.png +0 -0
  37. data/images/man1.png +0 -0
  38. data/images/man2.png +0 -0
  39. data/images/man3.png +0 -0
  40. data/images/man4.png +0 -0
  41. data/images/man5.png +0 -0
  42. data/images/man6.png +0 -0
  43. data/lib/daru_lite/accessors/array_wrapper.rb +109 -0
  44. data/lib/daru_lite/accessors/dataframe_by_row.rb +25 -0
  45. data/lib/daru_lite/accessors/mdarray_wrapper.rb +7 -0
  46. data/lib/daru_lite/category.rb +929 -0
  47. data/lib/daru_lite/configuration.rb +34 -0
  48. data/lib/daru_lite/core/group_by.rb +403 -0
  49. data/lib/daru_lite/core/merge.rb +270 -0
  50. data/lib/daru_lite/core/query.rb +109 -0
  51. data/lib/daru_lite/dataframe.rb +3080 -0
  52. data/lib/daru_lite/date_time/index.rb +569 -0
  53. data/lib/daru_lite/date_time/offsets.rb +397 -0
  54. data/lib/daru_lite/exceptions.rb +2 -0
  55. data/lib/daru_lite/extensions/which_dsl.rb +53 -0
  56. data/lib/daru_lite/formatters/table.rb +52 -0
  57. data/lib/daru_lite/helpers/array.rb +53 -0
  58. data/lib/daru_lite/index/categorical_index.rb +201 -0
  59. data/lib/daru_lite/index/index.rb +374 -0
  60. data/lib/daru_lite/index/multi_index.rb +374 -0
  61. data/lib/daru_lite/io/csv/converters.rb +21 -0
  62. data/lib/daru_lite/io/io.rb +294 -0
  63. data/lib/daru_lite/io/sql_data_source.rb +97 -0
  64. data/lib/daru_lite/iruby/helpers.rb +38 -0
  65. data/lib/daru_lite/iruby/templates/dataframe.html.erb +5 -0
  66. data/lib/daru_lite/iruby/templates/dataframe_mi.html.erb +5 -0
  67. data/lib/daru_lite/iruby/templates/dataframe_mi_tbody.html.erb +35 -0
  68. data/lib/daru_lite/iruby/templates/dataframe_mi_thead.html.erb +21 -0
  69. data/lib/daru_lite/iruby/templates/dataframe_tbody.html.erb +28 -0
  70. data/lib/daru_lite/iruby/templates/dataframe_thead.html.erb +21 -0
  71. data/lib/daru_lite/iruby/templates/multi_index.html.erb +12 -0
  72. data/lib/daru_lite/iruby/templates/vector.html.erb +5 -0
  73. data/lib/daru_lite/iruby/templates/vector_mi.html.erb +5 -0
  74. data/lib/daru_lite/iruby/templates/vector_mi_tbody.html.erb +26 -0
  75. data/lib/daru_lite/iruby/templates/vector_mi_thead.html.erb +8 -0
  76. data/lib/daru_lite/iruby/templates/vector_tbody.html.erb +17 -0
  77. data/lib/daru_lite/iruby/templates/vector_thead.html.erb +8 -0
  78. data/lib/daru_lite/maths/arithmetic/dataframe.rb +91 -0
  79. data/lib/daru_lite/maths/arithmetic/vector.rb +117 -0
  80. data/lib/daru_lite/maths/statistics/dataframe.rb +202 -0
  81. data/lib/daru_lite/maths/statistics/vector.rb +1019 -0
  82. data/lib/daru_lite/monkeys.rb +56 -0
  83. data/lib/daru_lite/vector.rb +1678 -0
  84. data/lib/daru_lite/version.rb +3 -0
  85. data/lib/daru_lite.rb +99 -0
  86. data/profile/_base.rb +23 -0
  87. data/profile/df_to_a.rb +10 -0
  88. data/profile/filter.rb +13 -0
  89. data/profile/joining.rb +13 -0
  90. data/profile/sorting.rb +12 -0
  91. data/profile/vector_each_with_index.rb +9 -0
  92. data/profile/vector_new.rb +9 -0
  93. data/spec/accessors/array_wrapper_spec.rb +3 -0
  94. data/spec/category_spec.rb +1741 -0
  95. data/spec/core/group_by_spec.rb +655 -0
  96. data/spec/core/merge_spec.rb +179 -0
  97. data/spec/core/query_spec.rb +347 -0
  98. data/spec/daru_lite_spec.rb +22 -0
  99. data/spec/dataframe_spec.rb +4330 -0
  100. data/spec/date_time/data_spec.rb +197 -0
  101. data/spec/date_time/date_time_index_helper_spec.rb +72 -0
  102. data/spec/date_time/index_spec.rb +588 -0
  103. data/spec/date_time/offsets_spec.rb +465 -0
  104. data/spec/extensions/which_dsl_spec.rb +38 -0
  105. data/spec/fixtures/bank2.dat +200 -0
  106. data/spec/fixtures/boolean_converter_test.csv +5 -0
  107. data/spec/fixtures/countries.json +7794 -0
  108. data/spec/fixtures/duplicates.csv +32 -0
  109. data/spec/fixtures/eciresults.html +394 -0
  110. data/spec/fixtures/empties.dat +2 -0
  111. data/spec/fixtures/empty_rows_test.csv +17 -0
  112. data/spec/fixtures/macau.html +3691 -0
  113. data/spec/fixtures/macd_data.csv +150 -0
  114. data/spec/fixtures/matrix_test.csv +100 -0
  115. data/spec/fixtures/moneycontrol.html +6812 -0
  116. data/spec/fixtures/music_data.tsv +2501 -0
  117. data/spec/fixtures/repeated_fields.csv +7 -0
  118. data/spec/fixtures/sales-funnel.csv +18 -0
  119. data/spec/fixtures/scientific_notation.csv +4 -0
  120. data/spec/fixtures/string_converter_test.csv +5 -0
  121. data/spec/fixtures/strings.dat +2 -0
  122. data/spec/fixtures/test_xls.xls +0 -0
  123. data/spec/fixtures/test_xls_2.xls +0 -0
  124. data/spec/fixtures/url_test.txt~ +0 -0
  125. data/spec/fixtures/valid_markup.html +62 -0
  126. data/spec/fixtures/wiki_climate.html +1243 -0
  127. data/spec/fixtures/wiki_table_info.html +631 -0
  128. data/spec/formatters/table_formatter_spec.rb +137 -0
  129. data/spec/helpers_spec.rb +8 -0
  130. data/spec/index/categorical_index_spec.rb +170 -0
  131. data/spec/index/index_spec.rb +417 -0
  132. data/spec/index/multi_index_spec.rb +680 -0
  133. data/spec/io/io_spec.rb +373 -0
  134. data/spec/io/sql_data_source_spec.rb +56 -0
  135. data/spec/iruby/dataframe_spec.rb +170 -0
  136. data/spec/iruby/helpers_spec.rb +49 -0
  137. data/spec/iruby/multi_index_spec.rb +37 -0
  138. data/spec/iruby/vector_spec.rb +105 -0
  139. data/spec/maths/arithmetic/dataframe_spec.rb +148 -0
  140. data/spec/maths/arithmetic/vector_spec.rb +165 -0
  141. data/spec/maths/statistics/dataframe_spec.rb +178 -0
  142. data/spec/maths/statistics/vector_spec.rb +756 -0
  143. data/spec/monkeys_spec.rb +42 -0
  144. data/spec/shared/vector_display_spec.rb +213 -0
  145. data/spec/spec_helper.rb +87 -0
  146. data/spec/support/database_helper.rb +30 -0
  147. data/spec/support/matchers.rb +5 -0
  148. data/spec/vector_spec.rb +2293 -0
  149. metadata +571 -0
@@ -0,0 +1,4330 @@
1
+ describe DaruLite::DataFrame do
2
+ before :each do
3
+ @data_frame = DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
4
+ c: [11,22,33,44,55]},
5
+ order: [:a, :b, :c],
6
+ index: [:one, :two, :three, :four, :five])
7
+ tuples = [
8
+ [:a,:one,:bar],
9
+ [:a,:one,:baz],
10
+ [:a,:two,:bar],
11
+ [:a,:two,:baz],
12
+ [:b,:one,:bar],
13
+ [:b,:two,:bar],
14
+ [:b,:two,:baz],
15
+ [:b,:one,:foo],
16
+ [:c,:one,:bar],
17
+ [:c,:one,:baz],
18
+ [:c,:two,:foo],
19
+ [:c,:two,:bar]
20
+ ]
21
+ @multi_index = DaruLite::MultiIndex.from_tuples(tuples)
22
+
23
+ @vector_arry1 = [11,12,13,14,11,12,13,14,11,12,13,14]
24
+ @vector_arry2 = [1,2,3,4,1,2,3,4,1,2,3,4]
25
+
26
+ @order_mi = DaruLite::MultiIndex.from_tuples([
27
+ [:a,:one,:bar],
28
+ [:a,:two,:baz],
29
+ [:b,:two,:foo],
30
+ [:b,:one,:foo]])
31
+
32
+ @df_mi = DaruLite::DataFrame.new([
33
+ @vector_arry1,
34
+ @vector_arry2,
35
+ @vector_arry1,
36
+ @vector_arry2], order: @order_mi, index: @multi_index)
37
+ end
38
+
39
+ context ".rows" do
40
+ before do
41
+ @rows = [
42
+ [1,2,3,4,5],
43
+ [1,2,3,4,5],
44
+ [1,2,3,4,5],
45
+ [1,2,3,4,5]
46
+ ]
47
+ end
48
+
49
+ context DaruLite::Index do
50
+ it "creates a DataFrame from Array rows" do
51
+ df = DaruLite::DataFrame.rows @rows, order: [:a,:b,:c,:d,:e]
52
+
53
+ expect(df.index) .to eq(DaruLite::Index.new [0,1,2,3])
54
+ expect(df.vectors) .to eq(DaruLite::Index.new [:a,:b,:c,:d,:e])
55
+ expect(df[:a]) .to eq(DaruLite::Vector.new [1,1,1,1])
56
+ end
57
+
58
+ it "creates empty dataframe" do
59
+ df = DaruLite::DataFrame.rows [], order: [:a, :b, :c]
60
+
61
+ expect(df.vectors).to eq(DaruLite::Index.new [:a,:b,:c])
62
+ expect(df.index).to be_empty
63
+ end
64
+
65
+ it "creates a DataFrame from Vector rows" do
66
+ rows = @rows.map { |r| DaruLite::Vector.new r, index: [:a,:b,:c,:d,:e] }
67
+
68
+ df = DaruLite::DataFrame.rows rows, order: [:a,:b,:c,:d,:e]
69
+
70
+ expect(df.index) .to eq(DaruLite::Index.new [0,1,2,3])
71
+ expect(df.vectors) .to eq(DaruLite::Index.new [:a,:b,:c,:d,:e])
72
+ expect(df[:a]) .to eq(DaruLite::Vector.new [1,1,1,1])
73
+ end
74
+
75
+ it 'derives index & order from arrays' do
76
+ df = DaruLite::DataFrame.rows @rows
77
+ expect(df.index) .to eq(DaruLite::Index.new [0,1,2,3])
78
+ expect(df.vectors) .to eq(DaruLite::Index.new %w[0 1 2 3 4])
79
+ end
80
+
81
+ it 'derives index & order from vectors' do
82
+ rows = @rows.zip(%w[w x y z]).map { |r, n| DaruLite::Vector.new r, index: [:a,:b,:c,:d,:e], name: n }
83
+ df = DaruLite::DataFrame.rows rows
84
+ expect(df.index) .to eq(DaruLite::Index.new %w[w x y z])
85
+ expect(df.vectors) .to eq(DaruLite::Index.new [:a,:b,:c,:d,:e])
86
+ end
87
+
88
+ it 'behaves, when rows are repeated' do
89
+ rows = @rows.zip(%w[w w y z]).map { |r, n| DaruLite::Vector.new r, index: [:a,:b,:c,:d,:e], name: n }
90
+ df = DaruLite::DataFrame.rows rows
91
+ expect(df.index) .to eq(DaruLite::Index.new %w[w_1 w_2 y z])
92
+ expect(df.vectors) .to eq(DaruLite::Index.new [:a,:b,:c,:d,:e])
93
+ end
94
+
95
+ it 'behaves, when vectors are unnamed' do
96
+ rows = @rows.map { |r| DaruLite::Vector.new r, index: [:a,:b,:c,:d,:e] }
97
+ df = DaruLite::DataFrame.rows rows
98
+ expect(df.index) .to eq(DaruLite::Index.new [0,1,2,3])
99
+ expect(df.vectors) .to eq(DaruLite::Index.new [:a,:b,:c,:d,:e])
100
+ end
101
+ end
102
+
103
+ context DaruLite::MultiIndex do
104
+ it "creates a DataFrame from rows" do
105
+ df = DaruLite::DataFrame.rows(
106
+ @rows*3, index: @multi_index, order: [:a,:b,:c,:d,:e])
107
+
108
+ expect(df.index) .to eq(@multi_index)
109
+ expect(df.vectors) .to eq(DaruLite::Index.new([:a,:b,:c,:d,:e]))
110
+ expect(df[:a]).to eq(DaruLite::Vector.new([1]*12, index: @multi_index))
111
+ end
112
+
113
+ it "crates a DataFrame from rows (MultiIndex order)" do
114
+ rows = [
115
+ [11, 1, 11, 1],
116
+ [12, 2, 12, 2],
117
+ [13, 3, 13, 3],
118
+ [14, 4, 14, 4]
119
+ ]
120
+ index = DaruLite::MultiIndex.from_tuples([
121
+ [:one,:bar],
122
+ [:one,:baz],
123
+ [:two,:foo],
124
+ [:two,:bar]
125
+ ])
126
+
127
+ df = DaruLite::DataFrame.rows(rows, index: index, order: @order_mi)
128
+ expect(df.index) .to eq(index)
129
+ expect(df.vectors).to eq(@order_mi)
130
+ expect(df[:a, :one, :bar]).to eq(DaruLite::Vector.new([11,12,13,14],
131
+ index: index))
132
+ end
133
+
134
+ it "creates a DataFrame from Vector rows" do
135
+ rows = @rows*3
136
+ rows.map! { |r| DaruLite::Vector.new(r, index: @multi_index) }
137
+
138
+ df = DaruLite::DataFrame.rows rows, order: @multi_index
139
+
140
+ expect(df.index).to eq(DaruLite::Index.new(Array.new(rows.size) { |i| i }))
141
+ expect(df.vectors).to eq(@multi_index)
142
+ expect(df[:a,:one,:bar]).to eq(DaruLite::Vector.new([1]*12))
143
+ end
144
+ end
145
+ end
146
+
147
+ context "#initialize" do
148
+
149
+ it "initializes an empty DataFrame with no arguments" do
150
+ df = DaruLite::DataFrame.new
151
+ expect(df.nrows).to eq(0)
152
+ expect(df.ncols).to eq(0)
153
+ end
154
+
155
+ context DaruLite::Index do
156
+ it "initializes an empty DataFrame with empty source arg" do
157
+ df = DaruLite::DataFrame.new({}, order: [:a, :b])
158
+
159
+ expect(df.vectors).to eq(DaruLite::Index.new [:a, :b])
160
+ expect(df.a.class).to eq(DaruLite::Vector)
161
+ expect(df.a) .to eq([].dv(:a))
162
+ end
163
+
164
+ it "initializes from a Hash" do
165
+ df = DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5]}, order: [:a, :b],
166
+ index: [:one, :two, :three, :four, :five])
167
+
168
+ expect(df.index) .to eq(DaruLite::Index.new [:one, :two, :three, :four, :five])
169
+ expect(df.vectors).to eq(DaruLite::Index.new [:a, :b])
170
+ expect(df.a.class).to eq(DaruLite::Vector)
171
+ expect(df.a) .to eq([1,2,3,4,5].dv(:a, df.index))
172
+ end
173
+
174
+ it "initializes from a Hash and preserves default order" do
175
+ df = DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5]},
176
+ index: [:one, :two, :three, :four, :five])
177
+
178
+ expect(df.vectors).to eq(DaruLite::Index.new [:b, :a])
179
+ end
180
+
181
+ it "initializes from a Hash of Vectors" do
182
+ va = DaruLite::Vector.new([1,2,3,4,5], index: [:one, :two, :three, :four, :five])
183
+ vb = DaruLite::Vector.new([11,12,13,14,15], index: [:one, :two, :three, :four, :five])
184
+
185
+ df = DaruLite::DataFrame.new({ b: vb, a: va }, order: [:a, :b], index: [:one, :two, :three, :four, :five])
186
+
187
+ expect(df.index) .to eq(DaruLite::Index.new [:one, :two, :three, :four, :five])
188
+ expect(df.vectors).to eq(DaruLite::Index.new [:a, :b])
189
+ expect(df.a.class).to eq(DaruLite::Vector)
190
+ expect(df.a) .to eq([1,2,3,4,5].dv(:a, [:one, :two, :three, :four, :five]))
191
+ end
192
+
193
+ it "initializes from an Array of Hashes" do
194
+ df = DaruLite::DataFrame.new([{a: 1, b: 11}, {a: false, b: 12}, {a: 3, b: 13},
195
+ {a: 4, b: 14}, {a: 5, b: 15}], order: [:b, :a],
196
+ index: [:one, :two, :three, :four, :five])
197
+
198
+ expect(df.index) .to eq(DaruLite::Index.new [:one, :two, :three, :four, :five])
199
+ expect(df.vectors).to eq(DaruLite::Index.new [:b, :a])
200
+ expect(df.a.class).to eq(DaruLite::Vector)
201
+ expect(df.a) .to eq([1,false,3,4,5].dv(:a,[:one, :two, :three, :four, :five]))
202
+ end
203
+
204
+ it "initializes from Array of Arrays" do
205
+ df = DaruLite::DataFrame.new([[1]*5, [2]*5, [3]*5], order: [:b, :a, :c])
206
+
207
+ expect(df.index) .to eq(DaruLite::Index.new(5))
208
+ expect(df.vectors).to eq(DaruLite::Index.new([:b, :a, :c]))
209
+ expect(df.a) .to eq(DaruLite::Vector.new([2]*5))
210
+ end
211
+
212
+ it "initializes from Array of Vectors" do
213
+ df = DaruLite::DataFrame.new([DaruLite::Vector.new([1]*5), DaruLite::Vector.new([2]*5),
214
+ DaruLite::Vector.new([3]*5)], order: [:b, :a, :c])
215
+
216
+ expect(df.index) .to eq(DaruLite::Index.new(5))
217
+ expect(df.vectors).to eq(DaruLite::Index.new([:b, :a, :c]))
218
+ expect(df.a) .to eq(DaruLite::Vector.new([2]*5))
219
+ end
220
+
221
+ it "accepts Index objects for row/col" do
222
+ rows = DaruLite::Index.new [:one, :two, :three, :four, :five]
223
+ cols = DaruLite::Index.new [:a, :b]
224
+
225
+ df = DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5]}, order: cols,
226
+ index: rows)
227
+
228
+ expect(df.a) .to eq(DaruLite::Vector.new([1,2,3,4,5], order: [:a], index: rows))
229
+ expect(df.b) .to eq(DaruLite::Vector.new([11,12,13,14,15], name: :b, index: rows))
230
+ expect(df.index) .to eq(DaruLite::Index.new [:one, :two, :three, :four, :five])
231
+ expect(df.vectors).to eq(DaruLite::Index.new [:a, :b])
232
+ end
233
+
234
+ it "initializes without specifying row/col index" do
235
+ df = DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5]})
236
+
237
+ expect(df.index) .to eq(DaruLite::Index.new [0,1,2,3,4])
238
+ expect(df.vectors).to eq(DaruLite::Index.new [:b, :a])
239
+ end
240
+
241
+ it "aligns indexes properly" do
242
+ df = DaruLite::DataFrame.new({
243
+ b: [11,12,13,14,15].dv(:b, [:two, :one, :four, :five, :three]),
244
+ a: [1,2,3,4,5].dv(:a, [:two,:one,:three, :four, :five])
245
+ },
246
+ order: [:a, :b]
247
+ )
248
+
249
+ expect(df).to eq(DaruLite::DataFrame.new({
250
+ b: [14,13,12,15,11].dv(:b, [:five, :four, :one, :three, :two]),
251
+ a: [5,4,2,3,1].dv(:a, [:five, :four, :one, :three, :two])
252
+ }, order: [:a, :b])
253
+ )
254
+ end
255
+
256
+ it "adds nil values for missing indexes and aligns by index" do
257
+ df = DaruLite::DataFrame.new({
258
+ b: [11,12,13,14,15].dv(:b, [:two, :one, :four, :five, :three]),
259
+ a: [1,2,3] .dv(:a, [:two,:one,:three])
260
+ },
261
+ order: [:a, :b]
262
+ )
263
+
264
+ expect(df).to eq(DaruLite::DataFrame.new({
265
+ b: [14,13,12,15,11].dv(:b, [:five, :four, :one, :three, :two]),
266
+ a: [nil,nil,2,3,1].dv(:a, [:five, :four, :one, :three, :two])
267
+ },
268
+ order: [:a, :b])
269
+ )
270
+ end
271
+
272
+ it "adds nils in first vector when other vectors have many extra indexes" do
273
+ df = DaruLite::DataFrame.new({
274
+ b: [11] .dv(nil, [:one]),
275
+ a: [1,2,3] .dv(nil, [:one, :two, :three]),
276
+ c: [11,22,33,44,55] .dv(nil, [:one, :two, :three, :four, :five]),
277
+ d: [49,69,89,99,108,44].dv(nil, [:one, :two, :three, :four, :five, :six])
278
+ }, order: [:a, :b, :c, :d],
279
+ index: [:one, :two, :three, :four, :five, :six])
280
+
281
+ expect(df).to eq(DaruLite::DataFrame.new({
282
+ b: [11,nil,nil,nil,nil,nil].dv(nil, [:one, :two, :three, :four, :five, :six]),
283
+ a: [1,2,3,nil,nil,nil] .dv(nil, [:one, :two, :three, :four, :five, :six]),
284
+ c: [11,22,33,44,55,nil] .dv(nil, [:one, :two, :three, :four, :five, :six]),
285
+ d: [49,69,89,99,108,44] .dv(nil, [:one, :two, :three, :four, :five, :six])
286
+ }, order: [:a, :b, :c, :d],
287
+ index: [:one, :two, :three, :four, :five, :six])
288
+ )
289
+ end
290
+
291
+ it "correctly matches the supplied DataFrame index with the individual vector indexes" do
292
+ df = DaruLite::DataFrame.new({
293
+ b: [11,12,13] .dv(nil, [:one, :bleh, :blah]),
294
+ a: [1,2,3,4,5].dv(nil, [:one, :two, :booh, :baah, :three]),
295
+ c: [11,22,33,44,55].dv(nil, [0,1,3,:three, :two])
296
+ }, order: [:a, :b, :c], index: [:one, :two, :three])
297
+
298
+ expect(df).to eq(DaruLite::DataFrame.new({
299
+ b: [11,nil,nil].dv(nil, [:one, :two, :three]),
300
+ a: [1,2,5] .dv(nil, [:one, :two, :three]),
301
+ c: [nil,55,44] .dv(nil, [:one, :two, :three]),
302
+ },
303
+ order: [:a, :b, :c], index: [:one, :two, :three]
304
+ )
305
+ )
306
+ end
307
+
308
+ it "completes incomplete vectors" do
309
+ df = DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
310
+ c: [11,22,33,44,55]}, order: [:a, :c])
311
+
312
+ expect(df.vectors).to eq([:a,:c,:b].to_index)
313
+ end
314
+
315
+ it "does not copy vectors when clone: false" do
316
+ a = DaruLite::Vector.new([1,2,3,4,5])
317
+ b = DaruLite::Vector.new([1,2,3,4,5])
318
+ c = DaruLite::Vector.new([1,2,3,4,5])
319
+ df = DaruLite::DataFrame.new({a: a, b: b, c: c}, clone: false)
320
+
321
+ expect(df[:a].object_id).to eq(a.object_id)
322
+ expect(df[:b].object_id).to eq(b.object_id)
323
+ expect(df[:c].object_id).to eq(c.object_id)
324
+ end
325
+
326
+ it "allows creation of empty dataframe with only order" do
327
+ df = DaruLite::DataFrame.new({}, order: [:a, :b, :c])
328
+ df[:a] = DaruLite::Vector.new([1,2,3,4,5,6])
329
+
330
+ expect(df.size).to eq(6)
331
+ expect(df[:a]).to eq(DaruLite::Vector.new([1,2,3,4,5,6]))
332
+ expect(df[:b]).to eq(DaruLite::Vector.new([nil,nil,nil,nil,nil,nil]))
333
+ expect(df[:c]).to eq(DaruLite::Vector.new([nil,nil,nil,nil,nil,nil]))
334
+ end
335
+
336
+ it "allows creation of dataframe without specifying order or index" do
337
+ df = DaruLite::DataFrame.new({})
338
+ df[:a] = DaruLite::Vector.new([1,2,3,4,5])
339
+
340
+ expect(df.size) .to eq(5)
341
+ expect(df.index.to_a) .to eq([0,1,2,3,4])
342
+ expect(df.vectors.to_a).to eq([:a])
343
+ expect(df[:a]) .to eq(DaruLite::Vector.new([1,2,3,4,5]))
344
+ end
345
+
346
+ it "allows creation of dataframe with a default order" do
347
+ arr_of_arrs_df = DaruLite::DataFrame.new([[1,2,3], [4,5,6], [7,8,9]])
348
+ arr_of_vectors_df = DaruLite::DataFrame.new([DaruLite::Vector.new([1,2,3]), DaruLite::Vector.new([4,5,6]), DaruLite::Vector.new([7,8,9])])
349
+
350
+ expect(arr_of_arrs_df.vectors.to_a).to eq([0,1,2])
351
+ expect(arr_of_vectors_df.vectors.to_a).to eq([0,1,2])
352
+ end
353
+
354
+ it "raises error for incomplete DataFrame index" do
355
+ expect {
356
+ df = DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
357
+ c: [11,22,33,44,55]}, order: [:a, :b, :c],
358
+ index: [:one, :two, :three])
359
+ }.to raise_error
360
+ end
361
+
362
+ it "raises error for unequal sized vectors/arrays" do
363
+ expect {
364
+ df = DaruLite::DataFrame.new({b: [11,12,13], a: [1,2,3,4,5],
365
+ c: [11,22,33,44,55]}, order: [:a, :b, :c],
366
+ index: [:one, :two, :three])
367
+ }.to raise_error
368
+ end
369
+ end
370
+
371
+ context DaruLite::MultiIndex do
372
+ it "creates empty DataFrame" do
373
+ df = DaruLite::DataFrame.new({}, order: @order_mi)
374
+
375
+ expect(df.vectors).to eq(@order_mi)
376
+ expect(df[:a, :one, :bar]).to eq(DaruLite::Vector.new([]))
377
+ end
378
+
379
+ it "creates from Hash" do
380
+ df = DaruLite::DataFrame.new({
381
+ [:a,:one,:bar] => @vector_arry1,
382
+ [:a,:two,:baz] => @vector_arry2,
383
+ [:b,:one,:foo] => @vector_arry1,
384
+ [:b,:two,:foo] => @vector_arry2
385
+ }, order: @order_mi, index: @multi_index)
386
+
387
+ expect(df.index) .to eq(@multi_index)
388
+ expect(df.vectors) .to eq(@order_mi)
389
+ expect(df[:a,:one,:bar]).to eq(DaruLite::Vector.new(@vector_arry1,
390
+ index: @multi_index))
391
+ end
392
+
393
+ it "creates from Array of Hashes" do
394
+ # TODO
395
+ end
396
+
397
+ it "creates from Array of Arrays" do
398
+ df = DaruLite::DataFrame.new([@vector_arry1, @vector_arry2, @vector_arry1,
399
+ @vector_arry2], index: @multi_index, order: @order_mi)
400
+
401
+ expect(df.index) .to eq(@multi_index)
402
+ expect(df.vectors).to eq(@order_mi)
403
+ expect(df[:a, :one, :bar]).to eq(DaruLite::Vector.new(@vector_arry1,
404
+ index: @multi_index))
405
+ end
406
+
407
+ it "raises error for order MultiIndex of different size than supplied Array" do
408
+ expect {
409
+ df = DaruLite::DataFrame.new([@vector_arry1, @vector_arry2], order: @order_mi,
410
+ index: @multi_index)
411
+ }.to raise_error
412
+ end
413
+
414
+ it "aligns MultiIndexes properly" do
415
+ pending
416
+ mi_a = @order_mi
417
+ mi_b = DaruLite::MultiIndex.from_tuples([
418
+ [:b,:one,:foo],
419
+ [:a,:one,:bar],
420
+ [:b,:two,:foo],
421
+ [:a,:one,:baz]
422
+ ])
423
+ mi_sorted = DaruLite::MultiIndex.from_tuples([
424
+ [:a, :one, :bar],
425
+ [:a, :one, :baz],
426
+ [:b, :one, :foo],
427
+ [:b, :two, :foo]
428
+ ])
429
+ order = DaruLite::MultiIndex.from_tuples([
430
+ [:pee, :que],
431
+ [:pee, :poo]
432
+ ])
433
+ a = DaruLite::Vector.new([1,2,3,4], index: mi_a)
434
+ b = DaruLite::Vector.new([11,12,13,14], index: mi_b)
435
+ df = DaruLite::DataFrame.new([b,a], order: order)
436
+
437
+ expect(df).to eq(DaruLite::DataFrame.new({
438
+ [:pee, :que] => DaruLite::Vector.new([1,2,4,3], index: mi_sorted),
439
+ [:pee, :poo] => DaruLite::Vector.new([12,14,11,13], index: mi_sorted)
440
+ }, order: order_mi))
441
+ end
442
+
443
+ it "adds nils in case of missing values" do
444
+ # TODO
445
+ end
446
+
447
+ it "matches individual vector indexing with supplied DataFrame index" do
448
+ # TODO
449
+ end
450
+ end
451
+ end
452
+
453
+ context "#[]" do
454
+ context DaruLite::Index do
455
+ before :each do
456
+ @df = DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
457
+ c: [11,22,33,44,55]}, order: [:a, :b, :c],
458
+ index: [:one, :two, :three, :four, :five])
459
+ end
460
+
461
+ it "returns a Vector" do
462
+ expect(@df[:a]).to eq([1,2,3,4,5].dv(:a, [:one, :two, :three, :four, :five]))
463
+ end
464
+
465
+ it "returns a Vector by default" do
466
+ expect(@df[:a]).to eq(DaruLite::Vector.new([1,2,3,4,5], name: :a,
467
+ index: [:one, :two, :three, :four, :five]))
468
+ end
469
+
470
+ it "returns a DataFrame" do
471
+ temp = DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5]},
472
+ order: [:a, :b], index: [:one, :two, :three, :four, :five])
473
+
474
+ expect(@df[:a, :b]).to eq(temp)
475
+ end
476
+
477
+ it "accesses vector with Integer index" do
478
+ expect(@df[0]).to eq([1,2,3,4,5].dv(:a, [:one, :two, :three, :four, :five]))
479
+ end
480
+
481
+ it "returns a subset of DataFrame when specified range" do
482
+ subset = @df[:b..:c]
483
+ expect(subset).to eq(DaruLite::DataFrame.new({
484
+ b: [11,12,13,14,15],
485
+ c: [11,22,33,44,55]
486
+ }, index: [:one, :two, :three, :four, :five]))
487
+ end
488
+
489
+ it 'accepts axis parameter as a last argument' do
490
+ expect(@df[:a, :vector]).to eq @df[:a]
491
+ expect(@df[:one, :row]).to eq [1, 11, 11].dv(:one, [:a, :b, :c])
492
+ end
493
+ end
494
+
495
+ context DaruLite::MultiIndex do
496
+ it "accesses vector with an integer index" do
497
+ expect(@df_mi[0]).to eq(
498
+ DaruLite::Vector.new(@vector_arry1, index: @multi_index))
499
+ end
500
+
501
+ it "returns a vector when specifying full tuple" do
502
+ expect(@df_mi[:a, :one, :bar]).to eq(
503
+ DaruLite::Vector.new(@vector_arry1, index: @multi_index))
504
+ end
505
+
506
+ it "returns DataFrame when specified first layer of MultiIndex" do
507
+ sub_order = DaruLite::MultiIndex.from_tuples([
508
+ [:one, :bar],
509
+ [:two, :baz]
510
+ ])
511
+ expect(@df_mi[:a]).to eq(DaruLite::DataFrame.new([
512
+ @vector_arry1,
513
+ @vector_arry2
514
+ ], index: @multi_index, order: sub_order))
515
+ end
516
+
517
+ it "returns a Vector if the last level of MultiIndex is tracked" do
518
+ expect(@df_mi[:a, :one, :bar]).to eq(
519
+ DaruLite::Vector.new(@vector_arry1, index: @multi_index))
520
+ end
521
+ end
522
+ end
523
+
524
+ context "#[]=" do
525
+ context DaruLite::Index do
526
+ before :each do
527
+ @df = DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
528
+ c: [11,22,33,44,55]}, order: [:a, :b, :c],
529
+ index: [:one, :two, :three, :four, :five])
530
+ end
531
+
532
+ it "assigns directly with the []= operator" do
533
+ @data_frame[:a] = [100,200,300,400,500]
534
+ expect(@data_frame).to eq(DaruLite::DataFrame.new({
535
+ b: [11,12,13,14,15],
536
+ a: [100,200,300,400,500],
537
+ c: [11,22,33,44,55]}, order: [:a, :b, :c],
538
+ index: [:one, :two, :three, :four, :five]))
539
+ end
540
+
541
+ it "assigns new vector with default length if given just a value" do
542
+ @df[:d] = 1.0
543
+ expect(@df[:d]).to eq(DaruLite::Vector.new([1.0, 1.0, 1.0, 1.0, 1.0],
544
+ index: [:one, :two, :three, :four, :five], name: :d))
545
+ end
546
+
547
+ it "updates vector with default length if given just a value" do
548
+ @df[:c] = 1.0
549
+ expect(@df[:c]).to eq(DaruLite::Vector.new([1.0, 1.0, 1.0, 1.0, 1.0],
550
+ index: [:one, :two, :three, :four, :five], name: :c))
551
+ end
552
+
553
+ it "appends an Array as a DaruLite::Vector" do
554
+ @df[:d] = [69,99,108,85,49]
555
+
556
+ expect(@df.d.class).to eq(DaruLite::Vector)
557
+ end
558
+
559
+ it "appends an arbitrary enumerable as a DaruLite::Vector" do
560
+ @df[:d] = Set.new([69,99,108,85,49])
561
+
562
+ expect(@df[:d]).to eq(DaruLite::Vector.new([69, 99, 108, 85, 49],
563
+ index: [:one, :two, :three, :four, :five], name: :c))
564
+ end
565
+
566
+ it "replaces an already present vector" do
567
+ @df[:a] = [69,99,108,85,49].dv(nil, [:one, :two, :three, :four, :five])
568
+
569
+ expect(@df.a).to eq([69,99,108,85,49].dv(nil, [:one, :two, :three, :four, :five]))
570
+ end
571
+
572
+ it "appends a new vector to the DataFrame" do
573
+ @df[:woo] = [69,99,108,85,49].dv(nil, [:one, :two, :three, :four, :five])
574
+
575
+ expect(@df.vectors).to eq([:a, :b, :c, :woo].to_index)
576
+ end
577
+
578
+ it "creates an index for the new vector if not specified" do
579
+ @df[:woo] = [69,99,108,85,49]
580
+
581
+ expect(@df.woo.index).to eq([:one, :two, :three, :four, :five].to_index)
582
+ end
583
+
584
+ it "matches index of vector to be inserted with the DataFrame index" do
585
+ @df[:shankar] = [69,99,108,85,49].dv(:shankar, [:two, :one, :three, :five, :four])
586
+
587
+ expect(@df.shankar).to eq([99,69,108,49,85].dv(:shankar,
588
+ [:one, :two, :three, :four, :five]))
589
+ end
590
+
591
+ it "matches index of vector to be inserted, inserting nils where no match found" do
592
+ @df[:shankar] = [1,2,3].dv(:shankar, [:one, :james, :hetfield])
593
+
594
+ expect(@df.shankar).to eq([1,nil,nil,nil,nil].dv(:shankar, [:one, :two, :three, :four, :five]))
595
+ end
596
+
597
+ it "raises error for Array assignment of wrong length" do
598
+ expect{
599
+ @df[:shiva] = [1,2,3]
600
+ }.to raise_error
601
+ end
602
+
603
+ it "assigns correct name given empty dataframe" do
604
+ df_empty = DaruLite::DataFrame.new({})
605
+ df_empty[:a] = 1..5
606
+ df_empty[:b] = 1..5
607
+
608
+ expect(df_empty[:a].name).to equal(:a)
609
+ expect(df_empty[:b].name).to equal(:b)
610
+ end
611
+
612
+ it "appends multiple vectors at a time" do
613
+ # TODO
614
+ end
615
+ end
616
+
617
+ context DaruLite::MultiIndex do
618
+ it "raises error when incomplete index specified but index is absent" do
619
+ expect {
620
+ @df_mi[:d] = [100,200,300,400,100,200,300,400,100,200,300,400]
621
+ }.to raise_error
622
+ end
623
+
624
+ it "assigns all sub-indexes when a top level index is specified" do
625
+ @df_mi[:a] = [100,200,300,400,100,200,300,400,100,200,300,400]
626
+
627
+ expect(@df_mi).to eq(DaruLite::DataFrame.new([
628
+ [100,200,300,400,100,200,300,400,100,200,300,400],
629
+ [100,200,300,400,100,200,300,400,100,200,300,400],
630
+ @vector_arry1,
631
+ @vector_arry2], index: @multi_index, order: @order_mi))
632
+ end
633
+
634
+ it "creates a new vector when full index specfied" do
635
+ order = DaruLite::MultiIndex.from_tuples([
636
+ [:a,:one,:bar],
637
+ [:a,:two,:baz],
638
+ [:b,:two,:foo],
639
+ [:b,:one,:foo],
640
+ [:c,:one,:bar]])
641
+ answer = DaruLite::DataFrame.new([
642
+ @vector_arry1,
643
+ @vector_arry2,
644
+ @vector_arry1,
645
+ @vector_arry2,
646
+ [100,200,300,400,100,200,300,400,100,200,300,400]
647
+ ], index: @multi_index, order: order)
648
+ @df_mi[:c,:one,:bar] = [100,200,300,400,100,200,300,400,100,200,300,400]
649
+
650
+ expect(@df_mi).to eq(answer)
651
+ end
652
+
653
+ it "assigns correct name given empty dataframe" do
654
+ df_empty = DaruLite::DataFrame.new([], index: @multi_index, order: @order_mi)
655
+ df_empty[:c, :one, :bar] = 1..12
656
+
657
+ expect(df_empty[:c, :one, :bar].name).to eq "conebar"
658
+ end
659
+ end
660
+ end
661
+
662
+ context '#method_missing' do
663
+ let(:df) { DaruLite::DataFrame.new({
664
+ :a => [1, 2, 3, 4, 5],
665
+ 'b' => [5, 4, 3, 2, 1]
666
+ }, index: 11..15)}
667
+
668
+ context 'get vector' do
669
+ context 'by string' do
670
+ subject { df.b }
671
+
672
+ it { is_expected.to be_a DaruLite::Vector }
673
+ its(:to_a) { is_expected.to eq [5, 4, 3, 2, 1] }
674
+ its(:'index.to_a') { is_expected.to eq [11, 12, 13, 14, 15] }
675
+ end
676
+
677
+ context 'by symbol' do
678
+ subject { df.a }
679
+
680
+ it { is_expected.to be_a DaruLite::Vector }
681
+ its(:to_a) { is_expected.to eq [1, 2, 3, 4, 5] }
682
+ its(:'index.to_a') { is_expected.to eq [11, 12, 13, 14, 15] }
683
+ end
684
+ end
685
+
686
+ context 'set existing vector' do
687
+ context 'by string' do
688
+ before { df.b = [:a, :b, :c, :d, :e] }
689
+ subject { df }
690
+
691
+ it { is_expected.to be_a DaruLite::DataFrame }
692
+ its(:'vectors.to_a') { is_expected.to eq [:a, 'b'] }
693
+ its(:'b.to_a') { is_expected.to eq [:a, :b, :c, :d, :e] }
694
+ its(:'index.to_a') { is_expected.to eq [11, 12, 13, 14, 15] }
695
+ end
696
+
697
+ context 'by symbol' do
698
+ before { df.a = [:a, :b, :c, :d, :e] }
699
+ subject { df }
700
+
701
+ it { is_expected.to be_a DaruLite::DataFrame }
702
+ its(:'vectors.to_a') { is_expected.to eq [:a, 'b'] }
703
+ its(:'a.to_a') { is_expected.to eq [:a, :b, :c, :d, :e] }
704
+ its(:'index.to_a') { is_expected.to eq [11, 12, 13, 14, 15] }
705
+ end
706
+ end
707
+
708
+ context 'set new vector' do
709
+ before { df.c = [5, 5, 5, 5, 5] }
710
+ subject { df }
711
+
712
+ it { is_expected.to be_a DaruLite::DataFrame }
713
+ its(:'vectors.to_a') { is_expected.to eq [:a, 'b', :c] }
714
+ its(:'c.to_a') { is_expected.to eq [5, 5, 5, 5, 5] }
715
+ its(:'index.to_a') { is_expected.to eq [11, 12, 13, 14, 15] }
716
+ end
717
+
718
+ context 'reference invalid vector' do
719
+ it { expect { df.d }.to raise_error NoMethodError }
720
+ end
721
+ end
722
+
723
+ context '#add_vector' do
724
+ subject(:data_frame) {
725
+ DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
726
+ c: [11,22,33,44,55]}, order: [:a, :b, :c],
727
+ index: [:one, :two, :three, :four, :five])
728
+ }
729
+ before {
730
+ data_frame.add_vector :a, [100,200,300,400,500]
731
+ }
732
+
733
+ it { is_expected.to eq(DaruLite::DataFrame.new({
734
+ b: [11,12,13,14,15],
735
+ a: [100,200,300,400,500],
736
+ c: [11,22,33,44,55]}, order: [:a, :b, :c],
737
+ index: [:one, :two, :three, :four, :five]))
738
+ }
739
+ end
740
+
741
+ context "#insert_vector" do
742
+ subject(:data_frame) {
743
+ DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
744
+ c: [11,22,33,44,55]}, order: [:a, :b, :c],
745
+ index: [:one, :two, :three, :four, :five])
746
+ }
747
+
748
+ it "insert a new vector at the desired slot" do
749
+ df = DaruLite::DataFrame.new({
750
+ a: [1,2,3,4,5],
751
+ d: [710, 720, 730, 740, 750],
752
+ b: [11, 12, 13, 14, 15],
753
+ c: [11,22,33,44,55]}, order: [:a, :d, :b, :c],
754
+ index: [:one, :two, :three, :four, :five]
755
+ )
756
+ data_frame.insert_vector 1, :d, [710, 720, 730, 740, 750]
757
+ expect(subject).to eq df
758
+ end
759
+
760
+ it "raises error for data array being too big" do
761
+ expect {
762
+ source = (1..8).to_a
763
+ data_frame.insert_vector 1, :d, source
764
+ }.to raise_error(IndexError)
765
+ end
766
+
767
+ it "raises error for invalid index value" do
768
+ expect {
769
+ source = (1..5).to_a
770
+ data_frame.insert_vector 4, :d, source
771
+ }.to raise_error(ArgumentError)
772
+ end
773
+
774
+ it "raises error for invalid source type" do
775
+ expect {
776
+ source = 14
777
+ data_frame.insert_vector 3, :d, source
778
+ }.to raise_error(ArgumentError)
779
+ end
780
+ end
781
+
782
+ context "#row[]=" do
783
+ context DaruLite::Index do
784
+ before :each do
785
+ @df = DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
786
+ c: [11,22,33,44,55]}, order: [:a, :b, :c],
787
+ index: [:one, :two, :three, :four, :five])
788
+ end
789
+
790
+ it "assigns specified row when Array" do
791
+ @df.row[:one] = [49, 99, 59]
792
+
793
+ expect(@df.row[:one]) .to eq([49, 99, 59].dv(:one, [:a, :b, :c]))
794
+ expect(@df.row[:one].index).to eq([:a, :b, :c].to_index)
795
+ expect(@df.row[:one].name) .to eq(:one)
796
+ end
797
+
798
+ it "assigns specified row when DV" do
799
+ @df.row[:one] = [49, 99, 59].dv(nil, [:a, :b, :c])
800
+
801
+ expect(@df.row[:one]).to eq([49, 99, 59].dv(:one, [:a, :b, :c]))
802
+ end
803
+
804
+ it "assigns correct elements when Vector of different index" do
805
+ @df.row[:one] = DaruLite::Vector.new([44,62,11], index: [:b,:f,:a])
806
+
807
+ expect(@df.row[:one]).to eq(DaruLite::Vector.new([11,44,nil], index: [:a,:b,:c]))
808
+ end
809
+
810
+ it "creates a new row from an Array" do
811
+ @df.row[:patekar] = [9,2,11]
812
+
813
+ expect(@df.row[:patekar]).to eq([9,2,11].dv(:patekar, [:a, :b, :c]))
814
+ end
815
+
816
+ it "creates a new row from a DV" do
817
+ @df.row[:patekar] = [9,2,11].dv(nil, [:a, :b, :c])
818
+
819
+ expect(@df.row[:patekar]).to eq([9,2,11].dv(:patekar, [:a, :b, :c]))
820
+ end
821
+
822
+ it "creates a new row from numeric row index and named DV" do
823
+ @df.row[2] = [9,2,11].dv(nil, [:a, :b, :c])
824
+
825
+ expect(@df.row[2]).to eq([9,2,11].dv(nil, [:a, :b, :c]))
826
+ end
827
+
828
+ it "correctly aligns assigned DV by index" do
829
+ @df.row[:two] = [9,2,11].dv(nil, [:b, :a, :c])
830
+
831
+ expect(@df.row[:two]).to eq([2,9,11].dv(:two, [:a, :b, :c]))
832
+ end
833
+
834
+ it "correctlu aligns assinged DV by index for new rows" do
835
+ @df.row[:latest] = DaruLite::Vector.new([2,3,1], index: [:b,:c,:a])
836
+
837
+ expect(@df.row[:latest]).to eq(DaruLite::Vector.new([1,2,3], index: [:a,:b,:c]))
838
+ end
839
+
840
+ it "inserts nils for indexes that dont exist in the DataFrame" do
841
+ @df.row[:two] = [49, 99, 59].dv(nil, [:oo, :aah, :gaah])
842
+
843
+ expect(@df.row[:two]).to eq([nil,nil,nil].dv(nil, [:a, :b, :c]))
844
+ end
845
+
846
+ it "correctly inserts row of a different length by matching indexes" do
847
+ @df.row[:four] = [5,4,3,2,1,3].dv(nil, [:you, :have, :a, :big, :appetite, :spock])
848
+
849
+ expect(@df.row[:four]).to eq([3,nil,nil].dv(:four, [:a, :b, :c]))
850
+ end
851
+
852
+ it "raises error for row insertion by Array of wrong length" do
853
+ expect{
854
+ @df.row[:one] = [1,2,3,4,5,6,7]
855
+ }.to raise_error
856
+ end
857
+ end
858
+
859
+ context DaruLite::MultiIndex do
860
+ pending
861
+ # TO DO
862
+ end
863
+
864
+ context DaruLite::CategoricalIndex do
865
+ let(:idx) { DaruLite::CategoricalIndex.new [:a, 1, :a, 1, :c] }
866
+ let(:df) do
867
+ DaruLite::DataFrame.new({
868
+ a: 'a'..'e',
869
+ b: 1..5
870
+ }, index: idx)
871
+ end
872
+
873
+ context "modify exiting row" do
874
+ context "single category" do
875
+ subject { df }
876
+ before { df.row[:a] = ['x', 'y'] }
877
+
878
+ it { is_expected.to be_a DaruLite::DataFrame }
879
+ its(:index) { is_expected.to eq idx }
880
+ its(:vectors) { is_expected.to eq DaruLite::Index.new [:a, :b] }
881
+ its(:'a.to_a') { is_expected.to eq ['x', 'b', 'x', 'd', 'e'] }
882
+ its(:'b.to_a') { is_expected.to eq ['y', 2, 'y', 4, 5] }
883
+ end
884
+
885
+ context "multiple categories" do
886
+ subject { df }
887
+ before { df.row[:a, 1] = ['x', 'y'] }
888
+
889
+ it { is_expected.to be_a DaruLite::DataFrame }
890
+ its(:index) { is_expected.to eq idx }
891
+ its(:vectors) { is_expected.to eq DaruLite::Index.new [:a, :b] }
892
+ its(:'a.to_a') { is_expected.to eq ['x', 'x', 'x', 'x', 'e'] }
893
+ its(:'b.to_a') { is_expected.to eq ['y', 'y', 'y', 'y', 5] }
894
+ end
895
+
896
+ context "positional index" do
897
+ subject { df }
898
+ before { df.row[0, 2] = ['x', 'y'] }
899
+
900
+ it { is_expected.to be_a DaruLite::DataFrame }
901
+ its(:index) { is_expected.to eq idx }
902
+ its(:vectors) { is_expected.to eq DaruLite::Index.new [:a, :b] }
903
+ its(:'a.to_a') { is_expected.to eq ['x', 'b', 'x', 'd', 'e'] }
904
+ its(:'b.to_a') { is_expected.to eq ['y', 2, 'y', 4, 5] }
905
+ end
906
+ end
907
+
908
+ context "add new row" do
909
+ # TODO
910
+ end
911
+ end
912
+ end
913
+
914
+ context "#row.at" do
915
+ context DaruLite::Index do
916
+ let(:idx) { DaruLite::Index.new [1, 0, :c] }
917
+ let(:df) do
918
+ DaruLite::DataFrame.new({
919
+ a: 1..3,
920
+ b: 'a'..'c'
921
+ }, index: idx)
922
+ end
923
+
924
+ context "single position" do
925
+ subject { df.row.at 1 }
926
+
927
+ it { is_expected.to be_a DaruLite::Vector }
928
+ its(:size) { is_expected.to eq 2 }
929
+ its(:to_a) { is_expected.to eq [2, 'b'] }
930
+ its(:'index.to_a') { is_expected.to eq [:a, :b] }
931
+ end
932
+
933
+ context "multiple positions" do
934
+ subject { df.row.at 0, 2 }
935
+
936
+ it { is_expected.to be_a DaruLite::DataFrame }
937
+ its(:size) { is_expected.to eq 2 }
938
+ its(:'index.to_a') { is_expected.to eq [1, :c] }
939
+ its(:'a.to_a') { is_expected.to eq [1, 3] }
940
+ its(:'b.to_a') { is_expected.to eq ['a', 'c'] }
941
+ end
942
+
943
+ context "invalid position" do
944
+ it { expect { df.row.at 3 }.to raise_error IndexError }
945
+ end
946
+
947
+ context "invalid positions" do
948
+ it { expect { df.row.at 2, 3 }.to raise_error IndexError }
949
+ end
950
+
951
+ context "range" do
952
+ subject { df.row.at 0..1 }
953
+
954
+ it { is_expected.to be_a DaruLite::DataFrame }
955
+ its(:size) { is_expected.to eq 2 }
956
+ its(:'index.to_a') { is_expected.to eq [1, 0] }
957
+ its(:'a.to_a') { is_expected.to eq [1, 2] }
958
+ its(:'b.to_a') { is_expected.to eq ['a', 'b'] }
959
+ end
960
+
961
+ context "range with negative end" do
962
+ subject { df.row.at 0..-2 }
963
+
964
+ it { is_expected.to be_a DaruLite::DataFrame }
965
+ its(:size) { is_expected.to eq 2 }
966
+ its(:'index.to_a') { is_expected.to eq [1, 0] }
967
+ its(:'a.to_a') { is_expected.to eq [1, 2] }
968
+ its(:'b.to_a') { is_expected.to eq ['a', 'b'] }
969
+ end
970
+
971
+ context "range with single element" do
972
+ subject { df.row.at 0..0 }
973
+
974
+ it { is_expected.to be_a DaruLite::DataFrame }
975
+ its(:size) { is_expected.to eq 1 }
976
+ its(:'index.to_a') { is_expected.to eq [1] }
977
+ its(:'a.to_a') { is_expected.to eq [1] }
978
+ its(:'b.to_a') { is_expected.to eq ['a'] }
979
+ end
980
+ end
981
+
982
+ context DaruLite::MultiIndex do
983
+ let (:idx) do
984
+ DaruLite::MultiIndex.from_tuples [
985
+ [:a,:one,:bar],
986
+ [:a,:one,:baz],
987
+ [:b,:two,:bar],
988
+ [:a,:two,:baz],
989
+ ]
990
+ end
991
+ let (:df) do
992
+ DaruLite::DataFrame.new({
993
+ a: 1..4,
994
+ b: 'a'..'d'
995
+ }, index: idx )
996
+ end
997
+
998
+ context "single position" do
999
+ subject { df.row.at 1 }
1000
+
1001
+ it { is_expected.to be_a DaruLite::Vector }
1002
+ its(:size) { is_expected.to eq 2 }
1003
+ its(:to_a) { is_expected.to eq [2, 'b'] }
1004
+ its(:'index.to_a') { is_expected.to eq [:a, :b] }
1005
+ end
1006
+
1007
+ context "multiple positions" do
1008
+ subject { df.row.at 0, 2 }
1009
+
1010
+ it { is_expected.to be_a DaruLite::DataFrame }
1011
+ its(:size) { is_expected.to eq 2 }
1012
+ its(:'index.to_a') { is_expected.to eq [[:a, :one, :bar],
1013
+ [:b, :two, :bar]] }
1014
+ its(:'a.to_a') { is_expected.to eq [1, 3] }
1015
+ its(:'a.index.to_a') { is_expected.to eq [[:a, :one, :bar],
1016
+ [:b, :two, :bar]] }
1017
+ its(:'b.to_a') { is_expected.to eq ['a', 'c'] }
1018
+ end
1019
+
1020
+ context "invalid position" do
1021
+ it { expect { df.row.at 4 }.to raise_error IndexError }
1022
+ end
1023
+
1024
+ context "invalid positions" do
1025
+ it { expect { df.row.at 3, 4 }.to raise_error IndexError }
1026
+ end
1027
+
1028
+ context "range" do
1029
+ subject { df.row.at 0..1 }
1030
+
1031
+ it { is_expected.to be_a DaruLite::DataFrame }
1032
+ its(:size) { is_expected.to eq 2 }
1033
+ its(:'index.to_a') { is_expected.to eq [[:a, :one, :bar],
1034
+ [:a, :one, :baz]] }
1035
+ its(:'a.to_a') { is_expected.to eq [1, 2] }
1036
+ its(:'a.index.to_a') { is_expected.to eq [[:a, :one, :bar],
1037
+ [:a, :one, :baz]] }
1038
+ its(:'b.to_a') { is_expected.to eq ['a', 'b'] }
1039
+ end
1040
+
1041
+ context "range with negative end" do
1042
+ subject { df.row.at 0..-3 }
1043
+
1044
+ it { is_expected.to be_a DaruLite::DataFrame }
1045
+ its(:size) { is_expected.to eq 2 }
1046
+ its(:'index.to_a') { is_expected.to eq [[:a, :one, :bar],
1047
+ [:a, :one, :baz]] }
1048
+ its(:'a.to_a') { is_expected.to eq [1, 2] }
1049
+ its(:'a.index.to_a') { is_expected.to eq [[:a, :one, :bar],
1050
+ [:a, :one, :baz]] }
1051
+ its(:'b.to_a') { is_expected.to eq ['a', 'b'] }
1052
+ end
1053
+
1054
+ context " range with single element" do
1055
+ subject { df.row.at 0..0 }
1056
+
1057
+ it { is_expected.to be_a DaruLite::DataFrame }
1058
+ its(:size) { is_expected.to eq 1 }
1059
+ its(:'index.to_a') { is_expected.to eq [[:a, :one, :bar]] }
1060
+ its(:'a.to_a') { is_expected.to eq [1] }
1061
+ its(:'a.index.to_a') { is_expected.to eq [[:a, :one, :bar]] }
1062
+ its(:'b.to_a') { is_expected.to eq ['a'] }
1063
+ end
1064
+ end
1065
+
1066
+ context DaruLite::CategoricalIndex do
1067
+ let (:idx) { DaruLite::CategoricalIndex.new [:a, 1, 1, :a, :c] }
1068
+ let (:df) do
1069
+ DaruLite::DataFrame.new({
1070
+ a: 1..5,
1071
+ b: 'a'..'e'
1072
+ }, index: idx )
1073
+ end
1074
+
1075
+ context "single positional index" do
1076
+ subject { df.row.at 1 }
1077
+
1078
+ it { is_expected.to be_a DaruLite::Vector }
1079
+ its(:size) { is_expected.to eq 2 }
1080
+ its(:to_a) { is_expected.to eq [2, 'b'] }
1081
+ its(:'index.to_a') { is_expected.to eq [:a, :b] }
1082
+ end
1083
+
1084
+ context "multiple positional indexes" do
1085
+ subject { df.row.at 0, 2 }
1086
+
1087
+ it { is_expected.to be_a DaruLite::DataFrame }
1088
+ its(:size) { is_expected.to eq 2 }
1089
+ its(:'index.to_a') { is_expected.to eq [:a, 1] }
1090
+ its(:'a.to_a') { is_expected.to eq [1, 3] }
1091
+ its(:'a.index.to_a') { is_expected.to eq [:a, 1] }
1092
+ its(:'b.to_a') { is_expected.to eq ['a', 'c'] }
1093
+ its(:'b.index.to_a') { is_expected.to eq [:a, 1] }
1094
+ end
1095
+
1096
+ context "invalid position" do
1097
+ it { expect { df.at 5 }.to raise_error IndexError }
1098
+ end
1099
+
1100
+ context "invalid positions" do
1101
+ it { expect { df.at 4, 5 }.to raise_error IndexError }
1102
+ end
1103
+
1104
+ context "range" do
1105
+ subject { df.row.at 0..1 }
1106
+
1107
+ it { is_expected.to be_a DaruLite::DataFrame }
1108
+ its(:size) { is_expected.to eq 2 }
1109
+ its(:'index.to_a') { is_expected.to eq [:a, 1] }
1110
+ its(:'a.to_a') { is_expected.to eq [1, 2] }
1111
+ its(:'a.index.to_a') { is_expected.to eq [:a, 1] }
1112
+ its(:'b.to_a') { is_expected.to eq ['a', 'b'] }
1113
+ its(:'b.index.to_a') { is_expected.to eq [:a, 1] }
1114
+ end
1115
+
1116
+ context "range with negative end" do
1117
+ subject { df.row.at 0..-4 }
1118
+
1119
+ it { is_expected.to be_a DaruLite::DataFrame }
1120
+ its(:size) { is_expected.to eq 2 }
1121
+ its(:'index.to_a') { is_expected.to eq [:a, 1] }
1122
+ its(:'a.to_a') { is_expected.to eq [1, 2] }
1123
+ its(:'a.index.to_a') { is_expected.to eq [:a, 1] }
1124
+ its(:'b.to_a') { is_expected.to eq ['a', 'b'] }
1125
+ its(:'b.index.to_a') { is_expected.to eq [:a, 1] }
1126
+ end
1127
+
1128
+ context " range with single element" do
1129
+ subject { df.row.at 0..0 }
1130
+
1131
+ it { is_expected.to be_a DaruLite::DataFrame }
1132
+ its(:size) { is_expected.to eq 1 }
1133
+ its(:'index.to_a') { is_expected.to eq [:a] }
1134
+ its(:'a.to_a') { is_expected.to eq [1] }
1135
+ its(:'a.index.to_a') { is_expected.to eq [:a] }
1136
+ its(:'b.to_a') { is_expected.to eq ['a'] }
1137
+ its(:'b.index.to_a') { is_expected.to eq [:a] }
1138
+ end
1139
+ end
1140
+ end
1141
+
1142
+ context "#row.set_at" do
1143
+ let(:df) do
1144
+ DaruLite::DataFrame.new({
1145
+ a: 1..3,
1146
+ b: 'a'..'c'
1147
+ })
1148
+ end
1149
+
1150
+ context "single position" do
1151
+ subject { df }
1152
+ before { df.row.set_at [1], ['x', 'y'] }
1153
+
1154
+ its(:size) { is_expected.to eq 3 }
1155
+ its(:'a.to_a') { is_expected.to eq [1, 'x', 3] }
1156
+ its(:'b.to_a') { is_expected.to eq ['a', 'y', 'c'] }
1157
+ end
1158
+
1159
+ context "multiple position" do
1160
+ subject { df }
1161
+ before { df.row.set_at [0, 2], ['x', 'y'] }
1162
+
1163
+ its(:size) { is_expected.to eq 3 }
1164
+ its(:'a.to_a') { is_expected.to eq ['x', 2, 'x'] }
1165
+ its(:'b.to_a') { is_expected.to eq ['y', 'b', 'y'] }
1166
+ end
1167
+
1168
+ context "invalid position" do
1169
+ it { expect { df.row.set_at [3], ['x', 'y'] }.to raise_error IndexError }
1170
+ end
1171
+
1172
+ context "invalid positions" do
1173
+ it { expect { df.row.set_at [2, 3], ['x', 'y'] }.to raise_error IndexError }
1174
+ end
1175
+
1176
+ context "incorrect size" do
1177
+ it { expect { df.row.set_at [1], ['x', 'y', 'z'] }.to raise_error SizeError }
1178
+ end
1179
+ end
1180
+
1181
+ context "#at" do
1182
+ context DaruLite::Index do
1183
+ let(:idx) { DaruLite::Index.new [:a, :b, :c] }
1184
+ let(:df) do
1185
+ DaruLite::DataFrame.new({
1186
+ 1 => 1..3,
1187
+ a: 'a'..'c',
1188
+ b: 11..13
1189
+ }, index: idx)
1190
+ end
1191
+
1192
+ context "single position" do
1193
+ subject { df.at 1 }
1194
+
1195
+ it { is_expected.to be_a DaruLite::Vector }
1196
+ its(:size) { is_expected.to eq 3 }
1197
+ its(:to_a) { is_expected.to eq ['a', 'b', 'c'] }
1198
+ its(:index) { is_expected.to eq idx }
1199
+ end
1200
+
1201
+ context "multiple positions" do
1202
+ subject { df.at 0, 2 }
1203
+
1204
+ it { is_expected.to be_a DaruLite::DataFrame }
1205
+ its(:shape) { is_expected.to eq [3, 2] }
1206
+ its(:index) { is_expected.to eq idx }
1207
+ it { expect(df[1].to_a).to eq [1, 2, 3] }
1208
+ its(:'b.to_a') { is_expected.to eq [11, 12, 13] }
1209
+ end
1210
+
1211
+ context "single invalid position" do
1212
+ it { expect { df. at 3 }.to raise_error IndexError }
1213
+ end
1214
+
1215
+ context "multiple invalid positions" do
1216
+ it { expect { df.at 2, 3 }.to raise_error IndexError }
1217
+ end
1218
+
1219
+ context "range" do
1220
+ subject { df.at 0..1 }
1221
+
1222
+ it { is_expected.to be_a DaruLite::DataFrame }
1223
+ its(:shape) { is_expected.to eq [3, 2] }
1224
+ its(:index) { is_expected.to eq idx }
1225
+ it { expect(df[1].to_a).to eq [1, 2, 3] }
1226
+ its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
1227
+ end
1228
+
1229
+ context "range with negative end" do
1230
+ subject { df.at 0..-2 }
1231
+
1232
+ it { is_expected.to be_a DaruLite::DataFrame }
1233
+ its(:shape) { is_expected.to eq [3, 2] }
1234
+ its(:index) { is_expected.to eq idx }
1235
+ it { expect(df[1].to_a).to eq [1, 2, 3] }
1236
+ its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
1237
+ end
1238
+
1239
+ context "range with single element" do
1240
+ subject { df.at 1..1 }
1241
+
1242
+ it { is_expected.to be_a DaruLite::DataFrame }
1243
+ its(:shape) { is_expected.to eq [3, 1] }
1244
+ its(:index) { is_expected.to eq idx }
1245
+ its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
1246
+ end
1247
+ end
1248
+
1249
+ context DaruLite::MultiIndex do
1250
+ let (:idx) do
1251
+ DaruLite::MultiIndex.from_tuples [
1252
+ [:a,:one,:bar],
1253
+ [:a,:one,:baz],
1254
+ [:b,:two,:bar],
1255
+ ]
1256
+ end
1257
+ let(:df) do
1258
+ DaruLite::DataFrame.new({
1259
+ 1 => 1..3,
1260
+ a: 'a'..'c',
1261
+ b: 11..13
1262
+ }, index: idx)
1263
+ end
1264
+
1265
+ context "single position" do
1266
+ subject { df.at 1 }
1267
+
1268
+ it { is_expected.to be_a DaruLite::Vector }
1269
+ its(:size) { is_expected.to eq 3 }
1270
+ its(:to_a) { is_expected.to eq ['a', 'b', 'c'] }
1271
+ its(:index) { is_expected.to eq idx }
1272
+ end
1273
+
1274
+ context "multiple positions" do
1275
+ subject { df.at 0, 2 }
1276
+
1277
+ it { is_expected.to be_a DaruLite::DataFrame }
1278
+ its(:shape) { is_expected.to eq [3, 2] }
1279
+ its(:index) { is_expected.to eq idx }
1280
+ it { expect(df[1].to_a).to eq [1, 2, 3] }
1281
+ its(:'b.to_a') { is_expected.to eq [11, 12, 13] }
1282
+ end
1283
+
1284
+ context "single invalid position" do
1285
+ it { expect { df. at 3 }.to raise_error IndexError }
1286
+ end
1287
+
1288
+ context "multiple invalid positions" do
1289
+ it { expect { df.at 2, 3 }.to raise_error IndexError }
1290
+ end
1291
+
1292
+ context "range" do
1293
+ subject { df.at 0..1 }
1294
+
1295
+ it { is_expected.to be_a DaruLite::DataFrame }
1296
+ its(:shape) { is_expected.to eq [3, 2] }
1297
+ its(:index) { is_expected.to eq idx }
1298
+ it { expect(df[1].to_a).to eq [1, 2, 3] }
1299
+ its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
1300
+ end
1301
+
1302
+ context "range with negative end" do
1303
+ subject { df.at 0..-2 }
1304
+
1305
+ it { is_expected.to be_a DaruLite::DataFrame }
1306
+ its(:shape) { is_expected.to eq [3, 2] }
1307
+ its(:index) { is_expected.to eq idx }
1308
+ it { expect(df[1].to_a).to eq [1, 2, 3] }
1309
+ its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
1310
+ end
1311
+
1312
+ context "range with single element" do
1313
+ subject { df.at 1..1 }
1314
+
1315
+ it { is_expected.to be_a DaruLite::DataFrame }
1316
+ its(:shape) { is_expected.to eq [3, 1] }
1317
+ its(:index) { is_expected.to eq idx }
1318
+ its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
1319
+ end
1320
+ end
1321
+
1322
+ context DaruLite::CategoricalIndex do
1323
+ let (:idx) { DaruLite::CategoricalIndex.new [:a, 1, 1] }
1324
+ let(:df) do
1325
+ DaruLite::DataFrame.new({
1326
+ 1 => 1..3,
1327
+ a: 'a'..'c',
1328
+ b: 11..13
1329
+ }, index: idx)
1330
+ end
1331
+
1332
+ context "single position" do
1333
+ subject { df.at 1 }
1334
+
1335
+ it { is_expected.to be_a DaruLite::Vector }
1336
+ its(:size) { is_expected.to eq 3 }
1337
+ its(:to_a) { is_expected.to eq ['a', 'b', 'c'] }
1338
+ its(:index) { is_expected.to eq idx }
1339
+ end
1340
+
1341
+ context "multiple positions" do
1342
+ subject { df.at 0, 2 }
1343
+
1344
+ it { is_expected.to be_a DaruLite::DataFrame }
1345
+ its(:shape) { is_expected.to eq [3, 2] }
1346
+ its(:index) { is_expected.to eq idx }
1347
+ it { expect(df[1].to_a).to eq [1, 2, 3] }
1348
+ its(:'b.to_a') { is_expected.to eq [11, 12, 13] }
1349
+ end
1350
+
1351
+ context "single invalid position" do
1352
+ it { expect { df. at 3 }.to raise_error IndexError }
1353
+ end
1354
+
1355
+ context "multiple invalid positions" do
1356
+ it { expect { df.at 2, 3 }.to raise_error IndexError }
1357
+ end
1358
+
1359
+ context "range" do
1360
+ subject { df.at 0..1 }
1361
+
1362
+ it { is_expected.to be_a DaruLite::DataFrame }
1363
+ its(:shape) { is_expected.to eq [3, 2] }
1364
+ its(:index) { is_expected.to eq idx }
1365
+ it { expect(df[1].to_a).to eq [1, 2, 3] }
1366
+ its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
1367
+ end
1368
+
1369
+ context "range with negative index" do
1370
+ subject { df.at 0..-2 }
1371
+
1372
+ it { is_expected.to be_a DaruLite::DataFrame }
1373
+ its(:shape) { is_expected.to eq [3, 2] }
1374
+ its(:index) { is_expected.to eq idx }
1375
+ it { expect(df[1].to_a).to eq [1, 2, 3] }
1376
+ its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
1377
+ end
1378
+
1379
+ context "range with single element" do
1380
+ subject { df.at 1..1 }
1381
+
1382
+ it { is_expected.to be_a DaruLite::DataFrame }
1383
+ its(:shape) { is_expected.to eq [3, 1] }
1384
+ its(:index) { is_expected.to eq idx }
1385
+ its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
1386
+ end
1387
+ end
1388
+ end
1389
+
1390
+ context "#set_at" do
1391
+ let(:df) do
1392
+ DaruLite::DataFrame.new({
1393
+ 1 => 1..3,
1394
+ a: 'a'..'c',
1395
+ b: 11..13
1396
+ })
1397
+ end
1398
+
1399
+ context "single position" do
1400
+ subject { df }
1401
+ before { df.set_at [1], ['x', 'y', 'z'] }
1402
+
1403
+ its(:shape) { is_expected.to eq [3, 3] }
1404
+ it { expect(df[1].to_a).to eq [1, 2, 3] }
1405
+ its(:'a.to_a') { is_expected.to eq ['x', 'y', 'z'] }
1406
+ its(:'b.to_a') { is_expected.to eq [11, 12, 13] }
1407
+ end
1408
+
1409
+ context "multiple position" do
1410
+ subject { df }
1411
+ before { df.set_at [1, 2], ['x', 'y', 'z'] }
1412
+
1413
+ its(:shape) { is_expected.to eq [3, 3] }
1414
+ it { expect(df[1].to_a).to eq [1, 2, 3] }
1415
+ its(:'a.to_a') { is_expected.to eq ['x', 'y', 'z'] }
1416
+ its(:'b.to_a') { is_expected.to eq ['x', 'y', 'z'] }
1417
+ end
1418
+
1419
+ context "invalid position" do
1420
+ it { expect { df.set_at [3], ['x', 'y', 'z'] }.to raise_error IndexError }
1421
+ end
1422
+
1423
+ context "invalid positions" do
1424
+ it { expect { df.set_at [2, 3], ['x', 'y', 'z'] }.to raise_error IndexError }
1425
+ end
1426
+
1427
+ context "incorrect size" do
1428
+ it { expect { df.set_at [1], ['x', 'y'] }.to raise_error SizeError }
1429
+ end
1430
+ end
1431
+
1432
+ context "#row[]" do
1433
+ context DaruLite::Index do
1434
+ before :each do
1435
+ @df = DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
1436
+ c: [11,22,33,44,55]}, order: [:a, :b, :c],
1437
+ index: [:one, :two, :three, :four, :five])
1438
+ end
1439
+
1440
+ it "creates an index for assignment if not already specified" do
1441
+ @df.row[:one] = [49, 99, 59]
1442
+
1443
+ expect(@df[:one, :row]) .to eq([49, 99, 59].dv(:one, [:a, :b, :c]))
1444
+ expect(@df[:one, :row].index).to eq([:a, :b, :c].to_index)
1445
+ expect(@df[:one, :row].name) .to eq(:one)
1446
+ end
1447
+
1448
+ it "returns a DataFrame when specifying numeric Range" do
1449
+ expect(@df.row[0..2]).to eq(
1450
+ DaruLite::DataFrame.new({b: [11,12,13], a: [1,2,3],
1451
+ c: [11,22,33]}, order: [:a, :b, :c],
1452
+ index: [:one, :two, :three])
1453
+ )
1454
+ end
1455
+
1456
+ it "returns a DataFrame when specifying symbolic Range" do
1457
+ expect(@df.row[:one..:three]).to eq(
1458
+ DaruLite::DataFrame.new({b: [11,12,13], a: [1,2,3],
1459
+ c: [11,22,33]}, order: [:a, :b, :c],
1460
+ index: [:one, :two, :three])
1461
+ )
1462
+ end
1463
+
1464
+ it "returns a row with the given index" do
1465
+ expect(@df.row[:one]).to eq([1,11,11].dv(:one, [:a, :b, :c]))
1466
+ end
1467
+
1468
+ it "returns a row with given Integer index" do
1469
+ expect(@df.row[0]).to eq([1,11,11].dv(:one, [:a, :b, :c]))
1470
+ end
1471
+
1472
+ it "returns a row with given Integer index for default index-less DataFrame" do
1473
+ df = DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
1474
+ c: [11,22,33,44,55]}, order: [:a, :b, :c])
1475
+
1476
+ expect(df.row[0]).to eq([1,11,11].dv(nil, [:a, :b, :c]))
1477
+ expect(df.row[3]).to eq([4,14,44].dv(nil, [:a, :b, :c]))
1478
+ end
1479
+
1480
+ it "returns a row with given Integer index for numerical index DataFrame" do
1481
+ df = DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
1482
+ c: [11,22,33,44,55]}, order: [:a, :b, :c], index: [1,2,3,4,5])
1483
+
1484
+ expect(df.row[0]).to eq([1,11,11].dv(nil, [:a, :b, :c]))
1485
+ expect(df.row[3]).to eq([3,13,33].dv(nil, [:a, :b, :c]))
1486
+ end
1487
+ end
1488
+
1489
+ context DaruLite::MultiIndex do
1490
+ it "returns a Vector when specifying integer index" do
1491
+ expect(@df_mi.row[0]).to eq(DaruLite::Vector.new([11,1,11,1], index: @order_mi))
1492
+ end
1493
+
1494
+ it "returns a DataFrame whecn specifying numeric range" do
1495
+ sub_index = DaruLite::MultiIndex.from_tuples([
1496
+ [:a,:one,:bar],
1497
+ [:a,:one,:baz]
1498
+ ])
1499
+
1500
+ expect(@df_mi.row[0..1]).to eq(DaruLite::DataFrame.new([
1501
+ [11,12],
1502
+ [1,2],
1503
+ [11,12],
1504
+ [1,2]
1505
+ ], order: @order_mi, index: sub_index, name: :numeric_range))
1506
+ end
1507
+
1508
+ it "returns a Vector when specifying complete tuple" do
1509
+ expect(@df_mi.row[:c,:two,:foo]).to eq(DaruLite::Vector.new([13,3,13,3], index: @order_mi))
1510
+ end
1511
+
1512
+ it "returns DataFrame when specifying first layer of MultiIndex" do
1513
+ sub_index = DaruLite::MultiIndex.from_tuples([
1514
+ [:one,:bar],
1515
+ [:one,:baz],
1516
+ [:two,:foo],
1517
+ [:two,:bar]
1518
+ ])
1519
+ expect(@df_mi.row[:c]).to eq(DaruLite::DataFrame.new([
1520
+ [11,12,13,14],
1521
+ [1,2,3,4],
1522
+ [11,12,13,14],
1523
+ [1,2,3,4]
1524
+ ], index: sub_index, order: @order_mi))
1525
+ end
1526
+
1527
+ it "returns DataFrame when specifying first and second layer of MultiIndex" do
1528
+ sub_index = DaruLite::MultiIndex.from_tuples([
1529
+ [:bar],
1530
+ [:baz]
1531
+ ])
1532
+ expect(@df_mi.row[:c,:one]).to eq(DaruLite::DataFrame.new([
1533
+ [11,12],
1534
+ [1,2],
1535
+ [11,12],
1536
+ [1,2]
1537
+ ], index: sub_index, order: @order_mi))
1538
+ end
1539
+ end
1540
+
1541
+ context DaruLite::CategoricalIndex do
1542
+ let(:idx) { DaruLite::CategoricalIndex.new [:a, 1, :a, 1, :c] }
1543
+ let(:df) do
1544
+ DaruLite::DataFrame.new({
1545
+ a: 'a'..'e',
1546
+ b: 1..5
1547
+ }, index: idx)
1548
+ end
1549
+
1550
+ context "single category" do
1551
+ context "multiple instances" do
1552
+ subject { df.row[:a] }
1553
+
1554
+ it { is_expected.to be_a DaruLite::DataFrame }
1555
+ its(:index) { is_expected.to eq DaruLite::CategoricalIndex.new [:a, :a] }
1556
+ its(:vectors) { is_expected.to eq DaruLite::Index.new [:a, :b] }
1557
+ its(:a) { DaruLite::Vector.new ['a', 'c'] }
1558
+ its(:b) { DaruLite::Vector.new [1, 3] }
1559
+ end
1560
+
1561
+ context "single instance" do
1562
+ subject { df.row[:c] }
1563
+
1564
+ it { is_expected.to be_a DaruLite::Vector }
1565
+ its(:index) { is_expected.to eq DaruLite::Index.new [:a, :b] }
1566
+ its(:to_a) { is_expected.to eq ['e', 5] }
1567
+ end
1568
+ end
1569
+
1570
+ context "multiple categories" do
1571
+ subject { df.row[:a, 1] }
1572
+
1573
+ it { is_expected.to be_a DaruLite::DataFrame }
1574
+ its(:index) { is_expected.to eq DaruLite::CategoricalIndex.new(
1575
+ [:a, 1, :a, 1 ]) }
1576
+ its(:vectors) { is_expected.to eq DaruLite::Index.new [:a, :b] }
1577
+ its(:a) { DaruLite::Vector.new ['a', 'c', 'b', 'd'] }
1578
+ its(:b) { DaruLite::Vector.new [1, 3, 2, 4] }
1579
+ end
1580
+
1581
+ context "positional index" do
1582
+ subject { df.row[0] }
1583
+
1584
+ it { is_expected.to be_a DaruLite::Vector }
1585
+ its(:index) { is_expected.to eq DaruLite::Index.new [:a, :b] }
1586
+ its(:to_a) { is_expected.to eq ['a', 1] }
1587
+ end
1588
+
1589
+ context "invalid positional index" do
1590
+ it { expect { df.row[5] }.to raise_error IndexError }
1591
+ end
1592
+
1593
+ context "invalid category" do
1594
+ it { expect { df.row[:d] }.to raise_error IndexError }
1595
+ end
1596
+ end
1597
+ end
1598
+
1599
+ context "#add_row" do
1600
+ subject(:data_frame) {
1601
+ DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
1602
+ c: [11,22,33,44,55]}, order: [:a, :b, :c],
1603
+ index: [:one, :two, :three, :four, :five])
1604
+ }
1605
+ context 'named' do
1606
+ before {
1607
+ data_frame.add_row [100,200,300], :six
1608
+ }
1609
+
1610
+ it { is_expected.to eq(DaruLite::DataFrame.new({
1611
+ a: [1,2,3,4,5,100],
1612
+ b: [11,12,13,14,15,200],
1613
+ c: [11,22,33,44,55,300]}, order: [:a, :b, :c],
1614
+ index: [:one, :two, :three, :four, :five, :six]))
1615
+ }
1616
+ end
1617
+
1618
+ context 'unnamed' do
1619
+ before {
1620
+ data_frame.add_row [100,200,300]
1621
+ }
1622
+
1623
+ it { is_expected.to eq(DaruLite::DataFrame.new({
1624
+ a: [1,2,3,4,5,100],
1625
+ b: [11,12,13,14,15,200],
1626
+ c: [11,22,33,44,55,300]}, order: [:a, :b, :c],
1627
+ index: [:one, :two, :three, :four, :five, 5]))
1628
+ }
1629
+ end
1630
+
1631
+ context 'with mulitiindex DF' do
1632
+ subject(:data_frame) {
1633
+ DaruLite::DataFrame.new({b: [11,12,13], a: [1,2,3],
1634
+ c: [11,22,33]}, order: [:a, :b, :c],
1635
+ index: DaruLite::MultiIndex.from_tuples([[:one, :two], [:one, :three], [:two, :four]]))
1636
+ }
1637
+
1638
+ before { data_frame.add_row [100,200,300], [:two, :five] }
1639
+
1640
+ it { is_expected.to eq(DaruLite::DataFrame.new({
1641
+ b: [11,12,13,200], a: [1,2,3,100],
1642
+ c: [11,22,33,300]}, order: [:a, :b, :c],
1643
+ index: DaruLite::MultiIndex.from_tuples([[:one, :two], [:one, :three], [:two, :four], [:two, :five]])))
1644
+ }
1645
+ end
1646
+
1647
+ it "allows adding rows after making empty DF by specfying only order" do
1648
+ df = DaruLite::DataFrame.new({}, order: [:a, :b, :c])
1649
+ df.add_row [1,2,3]
1650
+ df.add_row [5,6,7]
1651
+
1652
+ expect(df[:a]).to eq(DaruLite::Vector.new([1,5]))
1653
+ expect(df[:b]).to eq(DaruLite::Vector.new([2,6]))
1654
+ expect(df[:c]).to eq(DaruLite::Vector.new([3,7]))
1655
+ expect(df.index).to eq(DaruLite::Index.new([0,1]))
1656
+ end
1657
+ end
1658
+
1659
+ context "#first" do
1660
+ it 'works' do
1661
+ expect(@data_frame.first(2)).to eq(
1662
+ DaruLite::DataFrame.new({b: [11,12], a: [1,2], c: [11,22]},
1663
+ order: [:a, :b, :c],
1664
+ index: [:one, :two]))
1665
+ end
1666
+
1667
+ it 'works with too large values' do
1668
+ expect(@data_frame.first(200)).to eq(@data_frame)
1669
+ end
1670
+
1671
+ it 'has synonym' do
1672
+ expect(@data_frame.first(2)).to eq(@data_frame.head(2))
1673
+ end
1674
+
1675
+ it 'works on DateTime indexes' do
1676
+ idx = DaruLite::DateTimeIndex.new(['2017-01-01', '2017-02-01', '2017-03-01'])
1677
+ df = DaruLite::DataFrame.new({col1: ['a', 'b', 'c']}, index: idx)
1678
+ first = DaruLite::DataFrame.new({col1: ['a']}, index: DaruLite::DateTimeIndex.new(['2017-01-01']))
1679
+ expect(df.head(1)).to eq(first)
1680
+ end
1681
+ end
1682
+
1683
+ context "#last" do
1684
+ it 'works' do
1685
+ expect(@data_frame.last(2)).to eq(
1686
+ DaruLite::DataFrame.new({b: [14,15], a: [4,5], c: [44,55]},
1687
+ order: [:a, :b, :c],
1688
+ index: [:four, :five]))
1689
+ end
1690
+
1691
+ it 'works with too large values' do
1692
+ expect(@data_frame.last(200)).to eq(@data_frame)
1693
+ end
1694
+
1695
+ it 'has synonym' do
1696
+ expect(@data_frame.last(2)).to eq(@data_frame.tail(2))
1697
+ end
1698
+ end
1699
+
1700
+ context "#==" do
1701
+ it "compares by vectors, index and values of a DataFrame (ignores name)" do
1702
+ a = DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5]},
1703
+ order: [:a, :b], index: [:one, :two, :three, :four, :five])
1704
+
1705
+ b = DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5]},
1706
+ order: [:a, :b], index: [:one, :two, :three, :four, :five])
1707
+
1708
+ expect(a).to eq(b)
1709
+ end
1710
+ end
1711
+
1712
+ context '#rename' do
1713
+ subject { @data_frame.rename 'other' }
1714
+
1715
+ it { is_expected.to be_a DaruLite::DataFrame }
1716
+ its(:name) { is_expected.to eq 'other' }
1717
+ end
1718
+
1719
+ context "#dup" do
1720
+ context DaruLite::Index do
1721
+ it "dups every data structure inside DataFrame" do
1722
+ clo = @data_frame.dup
1723
+
1724
+ expect(clo.object_id) .not_to eq(@data_frame.object_id)
1725
+ expect(clo.vectors.object_id).not_to eq(@data_frame.vectors.object_id)
1726
+ expect(clo.index.object_id) .not_to eq(@data_frame.index.object_id)
1727
+
1728
+ @data_frame.each_vector_with_index do |vector, index|
1729
+ expect(vector.object_id).not_to eq(clo[index].object_id)
1730
+ expect(vector.to_a.object_id).not_to eq(clo[index].to_a.object_id)
1731
+ end
1732
+ end
1733
+ end
1734
+
1735
+ context DaruLite::MultiIndex do
1736
+ it "duplicates with multi index" do
1737
+ clo = @df_mi.dup
1738
+
1739
+ expect(clo) .to eq(@df_mi)
1740
+ expect(clo.vectors.object_id).not_to eq(@df_mi.vectors.object_id)
1741
+ expect(clo.index.object_id) .not_to eq(@df_mi.index.object_id)
1742
+ end
1743
+ end
1744
+ end
1745
+
1746
+ context '#reject_values' do
1747
+ let(:df) do
1748
+ DaruLite::DataFrame.new({
1749
+ a: [1, 2, 3, nil, Float::NAN, nil, 1, 7],
1750
+ b: [:a, :b, nil, Float::NAN, nil, 3, 5, 8],
1751
+ c: ['a', Float::NAN, 3, 4, 3, 5, nil, 7]
1752
+ }, index: 11..18)
1753
+ end
1754
+ before { df.to_category :b }
1755
+
1756
+ context 'remove nils only' do
1757
+ subject { df.reject_values nil }
1758
+ it { is_expected.to be_a DaruLite::DataFrame }
1759
+ its(:'b.type') { is_expected.to eq :category }
1760
+ its(:'a.to_a') { is_expected.to eq [1, 2, 7] }
1761
+ its(:'b.to_a') { is_expected.to eq [:a, :b, 8] }
1762
+ its(:'c.to_a') { is_expected.to eq ['a', Float::NAN, 7] }
1763
+ its(:'index.to_a') { is_expected.to eq [11, 12, 18] }
1764
+ end
1765
+
1766
+ context 'remove Float::NAN only' do
1767
+ subject { df.reject_values Float::NAN }
1768
+ it { is_expected.to be_a DaruLite::DataFrame }
1769
+ its(:'b.type') { is_expected.to eq :category }
1770
+ its(:'a.to_a') { is_expected.to eq [1, 3, nil, 1, 7] }
1771
+ its(:'b.to_a') { is_expected.to eq [:a, nil, 3, 5, 8] }
1772
+ its(:'c.to_a') { is_expected.to eq ['a', 3, 5, nil, 7] }
1773
+ its(:'index.to_a') { is_expected.to eq [11, 13, 16, 17, 18] }
1774
+ end
1775
+
1776
+ context 'remove both nil and Float::NAN' do
1777
+ subject { df.reject_values nil, Float::NAN }
1778
+ it { is_expected.to be_a DaruLite::DataFrame }
1779
+ its(:'b.type') { is_expected.to eq :category }
1780
+ its(:'a.to_a') { is_expected.to eq [1, 7] }
1781
+ its(:'b.to_a') { is_expected.to eq [:a, 8] }
1782
+ its(:'c.to_a') { is_expected.to eq ['a', 7] }
1783
+ its(:'index.to_a') { is_expected.to eq [11, 18] }
1784
+ end
1785
+
1786
+ context 'any other values' do
1787
+ subject { df.reject_values 1, 5 }
1788
+ it { is_expected.to be_a DaruLite::DataFrame }
1789
+ its(:'b.type') { is_expected.to eq :category }
1790
+ its(:'a.to_a') { is_expected.to eq [2, 3, nil, Float::NAN, 7] }
1791
+ its(:'b.to_a') { is_expected.to eq [:b, nil, Float::NAN, nil, 8] }
1792
+ its(:'c.to_a') { is_expected.to eq [Float::NAN, 3, 4, 3, 7] }
1793
+ its(:'index.to_a') { is_expected.to eq [12, 13, 14, 15, 18] }
1794
+ end
1795
+
1796
+ context 'when resultant dataframe has one row' do
1797
+ subject { df.reject_values 1, 2, 3, 4, 5, nil, Float::NAN }
1798
+ it { is_expected.to be_a DaruLite::DataFrame }
1799
+ its(:'b.type') { is_expected.to eq :category }
1800
+ its(:'a.to_a') { is_expected.to eq [7] }
1801
+ its(:'b.to_a') { is_expected.to eq [8] }
1802
+ its(:'c.to_a') { is_expected.to eq [7] }
1803
+ its(:'index.to_a') { is_expected.to eq [18] }
1804
+ end
1805
+
1806
+ context 'when resultant dataframe is empty' do
1807
+ subject { df.reject_values 1, 2, 3, 4, 5, 6, 7, nil, Float::NAN }
1808
+ it { is_expected.to be_a DaruLite::DataFrame }
1809
+ its(:'b.type') { is_expected.to eq :category }
1810
+ its(:'a.to_a') { is_expected.to eq [] }
1811
+ its(:'b.to_a') { is_expected.to eq [] }
1812
+ its(:'c.to_a') { is_expected.to eq [] }
1813
+ its(:'index.to_a') { is_expected.to eq [] }
1814
+ end
1815
+ end
1816
+
1817
+ context '#replace_values' do
1818
+ subject do
1819
+ DaruLite::DataFrame.new({
1820
+ a: [1, 2, 3, nil, Float::NAN, nil, 1, 7],
1821
+ b: [:a, :b, nil, Float::NAN, nil, 3, 5, 8],
1822
+ c: ['a', Float::NAN, 3, 4, 3, 5, nil, 7]
1823
+ })
1824
+ end
1825
+ before { subject.to_category :b }
1826
+
1827
+ context 'replace nils only' do
1828
+ before { subject.replace_values nil, 10 }
1829
+ it { is_expected.to be_a DaruLite::DataFrame }
1830
+ its(:'b.type') { is_expected.to eq :category }
1831
+ its(:'a.to_a') { is_expected.to eq [1, 2, 3, 10, Float::NAN, 10, 1, 7] }
1832
+ its(:'b.to_a') { is_expected.to eq [:a, :b, 10, Float::NAN, 10, 3, 5, 8] }
1833
+ its(:'c.to_a') { is_expected.to eq ['a', Float::NAN, 3, 4, 3, 5, 10, 7] }
1834
+ end
1835
+
1836
+ context 'replace Float::NAN only' do
1837
+ before { subject.replace_values Float::NAN, 10 }
1838
+ it { is_expected.to be_a DaruLite::DataFrame }
1839
+ its(:'b.type') { is_expected.to eq :category }
1840
+ its(:'a.to_a') { is_expected.to eq [1, 2, 3, nil, 10, nil, 1, 7] }
1841
+ its(:'b.to_a') { is_expected.to eq [:a, :b, nil, 10, nil, 3, 5, 8] }
1842
+ its(:'c.to_a') { is_expected.to eq ['a', 10, 3, 4, 3, 5, nil, 7] }
1843
+ end
1844
+
1845
+ context 'replace both nil and Float::NAN' do
1846
+ before { subject.replace_values [nil, Float::NAN], 10 }
1847
+ it { is_expected.to be_a DaruLite::DataFrame }
1848
+ its(:'b.type') { is_expected.to eq :category }
1849
+ its(:'a.to_a') { is_expected.to eq [1, 2, 3, 10, 10, 10, 1, 7] }
1850
+ its(:'b.to_a') { is_expected.to eq [:a, :b, 10, 10, 10, 3, 5, 8] }
1851
+ its(:'c.to_a') { is_expected.to eq ['a', 10, 3, 4, 3, 5, 10, 7] }
1852
+ end
1853
+
1854
+ context 'replace other values' do
1855
+ before { subject.replace_values [1, 5], 10 }
1856
+ it { is_expected.to be_a DaruLite::DataFrame }
1857
+ its(:'b.type') { is_expected.to eq :category }
1858
+ its(:'a.to_a') { is_expected.to eq [10, 2, 3, nil, Float::NAN, nil, 10, 7] }
1859
+ its(:'b.to_a') { is_expected.to eq [:a, :b, nil, Float::NAN, nil, 3, 10, 8] }
1860
+ its(:'c.to_a') { is_expected.to eq ['a', Float::NAN, 3, 4, 3, 10, nil, 7] }
1861
+ end
1862
+ end
1863
+
1864
+ describe 'uniq' do
1865
+ let(:df) do
1866
+ DaruLite::DataFrame.from_csv 'spec/fixtures/duplicates.csv'
1867
+ end
1868
+
1869
+ context 'with no args' do
1870
+ it do
1871
+ result = df.uniq
1872
+ expect(result.shape.first).to eq 30
1873
+ end
1874
+ end
1875
+
1876
+ context 'given a vector' do
1877
+ it do
1878
+ result = df.uniq("color")
1879
+ expect(result.shape.first).to eq 2
1880
+ end
1881
+ end
1882
+
1883
+ context 'given an array of vectors' do
1884
+ it do
1885
+ result = df.uniq("color", "director_name")
1886
+ expect(result.shape.first).to eq 29
1887
+ end
1888
+ end
1889
+ end
1890
+
1891
+ context '#rolling_fillna!' do
1892
+ subject do
1893
+ DaruLite::DataFrame.new({
1894
+ a: [1, 2, 3, nil, Float::NAN, nil, 1, 7],
1895
+ b: [:a, :b, nil, Float::NAN, nil, 3, 5, nil],
1896
+ c: ['a', Float::NAN, 3, 4, 3, 5, nil, 7]
1897
+ })
1898
+ end
1899
+
1900
+ context 'rolling_fillna! forwards' do
1901
+ before { subject.rolling_fillna!(:forward) }
1902
+ it { expect(subject.rolling_fillna!(:forward)).to eq(subject) }
1903
+ its(:'a.to_a') { is_expected.to eq [1, 2, 3, 3, 3, 3, 1, 7] }
1904
+ its(:'b.to_a') { is_expected.to eq [:a, :b, :b, :b, :b, 3, 5, 5] }
1905
+ its(:'c.to_a') { is_expected.to eq ['a', 'a', 3, 4, 3, 5, 5, 7] }
1906
+ end
1907
+
1908
+ context 'rolling_fillna! backwards' do
1909
+ before { subject.rolling_fillna!(:backward) }
1910
+ it { expect(subject.rolling_fillna!(:backward)).to eq(subject) }
1911
+ its(:'a.to_a') { is_expected.to eq [1, 2, 3, 1, 1, 1, 1, 7] }
1912
+ its(:'b.to_a') { is_expected.to eq [:a, :b, 3, 3, 3, 3, 5, 0] }
1913
+ its(:'c.to_a') { is_expected.to eq ['a', 3, 3, 4, 3, 5, 7, 7] }
1914
+ end
1915
+ end
1916
+
1917
+ context "#clone" do
1918
+ it "returns a view of the whole dataframe" do
1919
+ cloned = @data_frame.clone
1920
+ expect(@data_frame.object_id).to_not eq(cloned.object_id)
1921
+ expect(@data_frame[:a].object_id).to eq(cloned[:a].object_id)
1922
+ expect(@data_frame[:b].object_id).to eq(cloned[:b].object_id)
1923
+ expect(@data_frame[:c].object_id).to eq(cloned[:c].object_id)
1924
+ end
1925
+
1926
+ it "returns a view of selected vectors" do
1927
+ cloned = @data_frame.clone(:a, :b)
1928
+ expect(cloned.object_id).to_not eq(@data_frame.object_id)
1929
+ expect(cloned[:a].object_id).to eq(@data_frame[:a].object_id)
1930
+ expect(cloned[:b].object_id).to eq(@data_frame[:b].object_id)
1931
+ end
1932
+
1933
+ it "clones properly when supplied array" do
1934
+ cloned = @data_frame.clone([:a, :b])
1935
+ expect(cloned.object_id).to_not eq(@data_frame.object_id)
1936
+ expect(cloned[:a].object_id).to eq(@data_frame[:a].object_id)
1937
+ expect(cloned[:b].object_id).to eq(@data_frame[:b].object_id)
1938
+ end
1939
+
1940
+ it "original dataframe remains unaffected when operations are applied
1941
+ on cloned data frame" do
1942
+ original = @data_frame.dup
1943
+ cloned = @data_frame.clone
1944
+ cloned.delete_vector :a
1945
+
1946
+ expect(@data_frame).to eq(original)
1947
+ end
1948
+
1949
+ end
1950
+
1951
+ context "#clone_only_valid" do
1952
+ let(:df_with_missing) {
1953
+ DaruLite::DataFrame.new({
1954
+ a: [1 , 2, 3, nil, 4, nil, 5],
1955
+ b: [nil, 2, 3, nil, 4, nil, 5],
1956
+ c: [1, 2, 3, 43 , 4, nil, 5]
1957
+ })
1958
+ }
1959
+
1960
+ let(:df_without_missing) {
1961
+ DaruLite::DataFrame.new({
1962
+ a: [2,3,4,5],
1963
+ c: [2,3,4,5]
1964
+ })
1965
+ }
1966
+ it 'does the most reasonable thing' do
1967
+ expect(df_with_missing.clone_only_valid).to eq(df_with_missing.reject_values(*DaruLite::MISSING_VALUES))
1968
+ expect(df_without_missing.clone_only_valid).to eq(df_without_missing.clone)
1969
+ end
1970
+ end
1971
+
1972
+ context "#clone_structure" do
1973
+ it "clones only the index and vector structures of the data frame" do
1974
+ cs = @data_frame.clone_structure
1975
+
1976
+ expect(cs.vectors).to eq(@data_frame.vectors)
1977
+ expect(cs.index).to eq(@data_frame.index)
1978
+ expect(cs[:a]).to eq(DaruLite::Vector.new([nil] * cs[:a].size, index: @data_frame.index))
1979
+ end
1980
+ end
1981
+
1982
+ context "#each_index" do
1983
+ it "iterates over index" do
1984
+ idxs = []
1985
+ ret = @data_frame.each_index do |index|
1986
+ idxs << index
1987
+ end
1988
+
1989
+ expect(idxs).to eq([:one, :two, :three, :four, :five])
1990
+
1991
+ expect(ret).to eq(@data_frame)
1992
+ end
1993
+ end
1994
+
1995
+ context "#each_vector_with_index" do
1996
+ it "iterates over vectors with index" do
1997
+ idxs = []
1998
+ ret = @data_frame.each_vector_with_index do |vector, index|
1999
+ idxs << index
2000
+ expect(vector.index).to eq([:one, :two, :three, :four, :five].to_index)
2001
+ expect(vector.class).to eq(DaruLite::Vector)
2002
+ end
2003
+
2004
+ expect(idxs).to eq([:a, :b, :c])
2005
+
2006
+ expect(ret).to eq(@data_frame)
2007
+ end
2008
+ end
2009
+
2010
+ context "#each_row_with_index" do
2011
+ it "iterates over rows with indexes" do
2012
+ idxs = []
2013
+ ret = @data_frame.each_row_with_index do |row, idx|
2014
+ idxs << idx
2015
+ expect(row.index).to eq([:a, :b, :c].to_index)
2016
+ expect(row.class).to eq(DaruLite::Vector)
2017
+ end
2018
+
2019
+ expect(idxs).to eq([:one, :two, :three, :four, :five])
2020
+ expect(ret) .to eq(@data_frame)
2021
+ end
2022
+ end
2023
+
2024
+ context "#each" do
2025
+ it "iterates over rows" do
2026
+ ret = @data_frame.each(:row) do |row|
2027
+ expect(row.index).to eq([:a, :b, :c].to_index)
2028
+ expect(row.class).to eq(DaruLite::Vector)
2029
+ end
2030
+
2031
+ expect(ret).to eq(@data_frame)
2032
+ end
2033
+
2034
+ it "iterates over all vectors" do
2035
+ ret = @data_frame.each do |vector|
2036
+ expect(vector.index).to eq([:one, :two, :three, :four, :five].to_index)
2037
+ expect(vector.class).to eq(DaruLite::Vector)
2038
+ end
2039
+
2040
+ expect(ret).to eq(@data_frame)
2041
+ end
2042
+
2043
+ it "returns Enumerable if no block specified" do
2044
+ ret = @data_frame.each
2045
+ expect(ret.is_a?(Enumerator)).to eq(true)
2046
+ end
2047
+
2048
+ it "raises on unknown axis" do
2049
+ expect { @data_frame.each(:kitten) }.to raise_error(ArgumentError, /axis/)
2050
+ end
2051
+ end
2052
+
2053
+ context "#recode" do
2054
+ before do
2055
+ @ans_vector = DaruLite::DataFrame.new({b: [21,22,23,24,25], a: [11,12,13,14,15],
2056
+ c: [21,32,43,54,65]}, order: [:a, :b, :c],
2057
+ index: [:one, :two, :three, :four, :five])
2058
+
2059
+ @ans_rows = DaruLite::DataFrame.new({b: [121, 144, 169, 196, 225], a: [1,4,9,16,25],
2060
+ c: [121, 484, 1089, 1936, 3025]}, order: [:a, :b, :c],
2061
+ index: [:one, :two, :three, :four, :five])
2062
+
2063
+ @data_frame_date_time = @data_frame.dup
2064
+ @data_frame_date_time.index = DaruLite::DateTimeIndex.date_range(start:"2016-02-11", periods:5)
2065
+
2066
+ @ans_vector_date_time = DaruLite::DataFrame.new({b: [21,22,23,24,25], a: [11,12,13,14,15],
2067
+ c: [21,32,43,54,65]}, order: [:a, :b, :c],
2068
+ index: DaruLite::DateTimeIndex.date_range(start:"2016-02-11", periods:5))
2069
+
2070
+ @ans_rows_date_time = DaruLite::DataFrame.new({b: [121, 144, 169, 196, 225], a: [1,4,9,16,25],
2071
+ c: [121, 484, 1089, 1936, 3025]}, order: [:a, :b, :c],
2072
+ index: DaruLite::DateTimeIndex.date_range(start:"2016-02-11", periods:5))
2073
+ end
2074
+
2075
+ it "maps over the vectors of a DataFrame and returns a DataFrame" do
2076
+ ret = @data_frame.recode do |vector|
2077
+ vector.map! { |e| e += 10}
2078
+ end
2079
+
2080
+ expect(ret).to eq(@ans_vector)
2081
+ end
2082
+
2083
+ it "maps over the rows of a DataFrame and returns a DataFrame" do
2084
+ ret = @data_frame.recode(:row) do |row|
2085
+ expect(row.class).to eq(DaruLite::Vector)
2086
+ row.map! { |e| e*e }
2087
+ end
2088
+
2089
+ expect(ret).to eq(@ans_rows)
2090
+ end
2091
+
2092
+ it "maps over the vectors of a DataFrame with DateTimeIndex and returns a DataFrame with DateTimeIndex" do
2093
+ ret = @data_frame_date_time.recode do |vector|
2094
+ vector.map! { |e| e += 10}
2095
+ end
2096
+
2097
+ expect(ret).to eq(@ans_vector_date_time)
2098
+ end
2099
+
2100
+ it "maps over the rows of a DataFrame with DateTimeIndex and returns a DataFrame with DateTimeIndex" do
2101
+ ret = @data_frame_date_time.recode(:row) do |row|
2102
+ expect(row.class).to eq(DaruLite::Vector)
2103
+ row.map! { |e| e*e }
2104
+ end
2105
+
2106
+ expect(ret).to eq(@ans_rows_date_time)
2107
+ end
2108
+
2109
+ end
2110
+
2111
+ context "#collect" do
2112
+ before do
2113
+ @df = DaruLite::DataFrame.new({
2114
+ a: [1,2,3,4,5],
2115
+ b: [11,22,33,44,55],
2116
+ c: [1,2,3,4,5]
2117
+ })
2118
+ end
2119
+
2120
+ it "collects calculation over rows and returns a Vector from the results" do
2121
+ expect(@df.collect(:row) { |row| (row[:a] + row[:c]) * row[:c] }).to eq(
2122
+ DaruLite::Vector.new([2,8,18,32,50])
2123
+ )
2124
+ end
2125
+
2126
+ it "collects calculation over vectors and returns a Vector from the results" do
2127
+ expect(@df.collect { |v| v[0] * v[1] + v[4] }).to eq(
2128
+ DaruLite::Vector.new([7,297,7], index: [:a, :b, :c])
2129
+ )
2130
+ end
2131
+ end
2132
+
2133
+ context "#map" do
2134
+ it "iterates over rows and returns an Array" do
2135
+ ret = @data_frame.map(:row) do |row|
2136
+ expect(row.class).to eq(DaruLite::Vector)
2137
+ row[:a] * row[:c]
2138
+ end
2139
+
2140
+ expect(ret).to eq([11, 44, 99, 176, 275])
2141
+ expect(@data_frame.vectors.to_a).to eq([:a, :b, :c])
2142
+ end
2143
+
2144
+ it "iterates over vectors and returns an Array" do
2145
+ ret = @data_frame.map do |vector|
2146
+ vector.mean
2147
+ end
2148
+ expect(ret).to eq([3.0, 13.0, 33.0])
2149
+ end
2150
+ end
2151
+
2152
+ context "#map!" do
2153
+ before do
2154
+ @ans_vector = DaruLite::DataFrame.new({b: [21,22,23,24,25], a: [11,12,13,14,15],
2155
+ c: [21,32,43,54,65]}, order: [:a, :b, :c],
2156
+ index: [:one, :two, :three, :four, :five])
2157
+
2158
+ @ans_row = DaruLite::DataFrame.new({b: [12,13,14,15,16], a: [2,3,4,5,6],
2159
+ c: [12,23,34,45,56]}, order: [:a, :b, :c],
2160
+ index: [:one, :two, :three, :four, :five])
2161
+ end
2162
+
2163
+ it "destructively maps over the vectors and changes the DF" do
2164
+ @data_frame.map! do |vector|
2165
+ vector + 10
2166
+ end
2167
+ expect(@data_frame).to eq(@ans_vector)
2168
+ end
2169
+
2170
+ it "destructively maps over the rows and changes the DF" do
2171
+ @data_frame.map!(:row) do |row|
2172
+ row + 1
2173
+ end
2174
+
2175
+ expect(@data_frame).to eq(@ans_row)
2176
+ end
2177
+ end
2178
+
2179
+ context "#map_vectors_with_index" do
2180
+ it "iterates over vectors with index and returns an Array" do
2181
+ idx = []
2182
+ ret = @data_frame.map_vectors_with_index do |vector, index|
2183
+ idx << index
2184
+ vector.recode { |e| e += 10}
2185
+ end
2186
+
2187
+ expect(ret).to eq([
2188
+ DaruLite::Vector.new([11,12,13,14,15],index: [:one, :two, :three, :four, :five]),
2189
+ DaruLite::Vector.new([21,22,23,24,25],index: [:one, :two, :three, :four, :five]),
2190
+ DaruLite::Vector.new([21,32,43,54,65],index: [:one, :two, :three, :four, :five])])
2191
+ expect(idx).to eq([:a, :b, :c])
2192
+ end
2193
+ end
2194
+
2195
+ # FIXME: collect_VECTORS_with_index, but map_VECTOR_with_index -- ??? -- zverok
2196
+ # (Not saying about unfortunate difference between them...)
2197
+ context "#collect_vector_with_index" do
2198
+ it "iterates over vectors with index and returns an Array" do
2199
+ idx = []
2200
+ ret = @data_frame.collect_vector_with_index do |vector, index|
2201
+ idx << index
2202
+ vector.sum
2203
+ end
2204
+
2205
+ expect(ret).to eq(DaruLite::Vector.new([15, 65, 165], index: [:a, :b, :c]))
2206
+ expect(idx).to eq([:a, :b, :c])
2207
+ end
2208
+ end
2209
+
2210
+ context "#map_rows_with_index" do
2211
+ it "iterates over rows with index and returns an Array" do
2212
+ idx = []
2213
+ ret = @data_frame.map_rows_with_index do |row, index|
2214
+ idx << index
2215
+ expect(row.class).to eq(DaruLite::Vector)
2216
+ row[:a] * row[:c]
2217
+ end
2218
+
2219
+ expect(ret).to eq([11, 44, 99, 176, 275])
2220
+ expect(idx).to eq([:one, :two, :three, :four, :five])
2221
+ end
2222
+ end
2223
+
2224
+ context '#collect_row_with_index' do
2225
+ it "iterates over rows with index and returns a Vector" do
2226
+ idx = []
2227
+ ret = @data_frame.collect_row_with_index do |row, index|
2228
+ idx << index
2229
+ expect(row.class).to eq(DaruLite::Vector)
2230
+ row[:a] * row[:c]
2231
+ end
2232
+
2233
+ expected = DaruLite::Vector.new([11, 44, 99, 176, 275], index: @data_frame.index)
2234
+ expect(ret).to eq(expected)
2235
+ expect(idx).to eq([:one, :two, :three, :four, :five])
2236
+ end
2237
+ end
2238
+
2239
+ context "#delete_vector" do
2240
+ context DaruLite::Index do
2241
+ it "deletes the specified vector" do
2242
+ @data_frame.delete_vector :a
2243
+
2244
+ expect(@data_frame).to eq(DaruLite::DataFrame.new({b: [11,12,13,14,15],
2245
+ c: [11,22,33,44,55]}, order: [:b, :c],
2246
+ index: [:one, :two, :three, :four, :five]))
2247
+ end
2248
+ end
2249
+ end
2250
+
2251
+ context "#delete_vectors" do
2252
+ context DaruLite::Index do
2253
+ it "deletes the specified vectors" do
2254
+ @data_frame.delete_vectors :a, :b
2255
+
2256
+ expect(@data_frame).to eq(DaruLite::DataFrame.new({
2257
+ c: [11,22,33,44,55]}, order: [:c],
2258
+ index: [:one, :two, :three, :four, :five]))
2259
+ end
2260
+ end
2261
+ end
2262
+
2263
+ context "#delete_row" do
2264
+ it "deletes the specified row" do
2265
+ @data_frame.delete_row :three
2266
+
2267
+ expect(@data_frame).to eq(DaruLite::DataFrame.new({b: [11,12,14,15], a: [1,2,4,5],
2268
+ c: [11,22,44,55]}, order: [:a, :b, :c], index: [:one, :two, :four, :five]))
2269
+ end
2270
+ end
2271
+
2272
+ context "#keep_row_if" do
2273
+ pending "changing row from under the iterator trips this"
2274
+ it "keeps row if block evaluates to true" do
2275
+ df = DaruLite::DataFrame.new({b: [10,12,20,23,30], a: [50,30,30,1,5],
2276
+ c: [10,20,30,40,50]}, order: [:a, :b, :c],
2277
+ index: [:one, :two, :three, :four, :five])
2278
+
2279
+ df.keep_row_if do |row|
2280
+ row[:a] % 10 == 0
2281
+ end
2282
+ # TODO: write expectation
2283
+ end
2284
+ end
2285
+
2286
+ context "#keep_vector_if" do
2287
+ it "keeps vector if block evaluates to true" do
2288
+ @data_frame.keep_vector_if do |vector|
2289
+ vector == [1,2,3,4,5].dv(nil, [:one, :two, :three, :four, :five])
2290
+ end
2291
+
2292
+ expect(@data_frame).to eq(DaruLite::DataFrame.new({a: [1,2,3,4,5]}, order: [:a],
2293
+ index: [:one, :two, :three, :four, :five]))
2294
+ end
2295
+ end
2296
+
2297
+ context "#filter_field" do
2298
+ before do
2299
+ @df = DaruLite::DataFrame.new({
2300
+ :id => DaruLite::Vector.new([1, 2, 3, 4, 5]),
2301
+ :name => DaruLite::Vector.new(%w(Alex Claude Peter Franz George)),
2302
+ :age => DaruLite::Vector.new([20, 23, 25, 27, 5]),
2303
+ :city => DaruLite::Vector.new(['New York', 'London', 'London', 'Paris', 'Tome']),
2304
+ :a1 => DaruLite::Vector.new(['a,b', 'b,c', 'a', nil, 'a,b,c']) },
2305
+ order: [:id, :name, :age, :city, :a1])
2306
+ end
2307
+
2308
+ it "creates new vector with the data of a given field for which block returns true" do
2309
+ filtered = @df.filter_vector(:id) { |c| c[:id] == 2 or c[:id] == 4 }
2310
+ expect(filtered).to eq(DaruLite::Vector.new([2,4]))
2311
+ end
2312
+ end
2313
+
2314
+ context "#filter_rows" do
2315
+ context DaruLite::Index do
2316
+ context "when specified no index" do
2317
+ it "filters rows" do
2318
+ df = DaruLite::DataFrame.new({a: [1,2,3], b: [2,3,4]})
2319
+
2320
+ a = df.filter_rows do |row|
2321
+ row[:a] % 2 == 0
2322
+ end
2323
+
2324
+ expect(a).to eq(DaruLite::DataFrame.new({a: [2], b: [3]}, order: [:a, :b], index: [1]))
2325
+ end
2326
+ end
2327
+
2328
+ context "when specified numerical index" do
2329
+ it "filters rows" do
2330
+ df = DaruLite::DataFrame.new({a: [1,2,3], b: [2,3,4]}, index: [1,2,3])
2331
+
2332
+ a = df.filter_rows do |row|
2333
+ row[:a] % 2 == 0
2334
+ end
2335
+
2336
+ expect(a).to eq(DaruLite::DataFrame.new({a: [2], b: [3]}, order: [:a, :b], index: [2]))
2337
+ end
2338
+ end
2339
+
2340
+ it "preserves names of vectors" do
2341
+ df = DaruLite::DataFrame.new a: 1..3, b: 4..6
2342
+ df1 = df.filter_rows { |r| r[:a] != 2 }
2343
+
2344
+ expect(df1[:a].name).to eq(df[:a].name)
2345
+ end
2346
+ end
2347
+ end
2348
+
2349
+ context "#filter_vectors" do
2350
+ context DaruLite::Index do
2351
+ it "filters vectors" do
2352
+ df = DaruLite::DataFrame.new({a: [1,2,3], b: [2,3,4]})
2353
+
2354
+ a = df.filter_vectors do |vector|
2355
+ vector[0] == 1
2356
+ end
2357
+
2358
+ expect(a).to eq(DaruLite::DataFrame.new({a: [1,2,3]}))
2359
+ end
2360
+ end
2361
+ end
2362
+
2363
+ context "#filter" do
2364
+ let(:df) { DaruLite::DataFrame.new({a: [1,2,3], b: [2,3,4]}) }
2365
+ it "dispatches" do
2366
+ expect(df.filter(:row){|r| r[:a] % 2 == 0 }).to \
2367
+ eq df.filter_rows{|r| r[:a] % 2 == 0 }
2368
+
2369
+ expect(df.filter(:vector){|v| v[0] == 1}).to \
2370
+ eq df.filter_vectors{|v| v[0] == 1}
2371
+
2372
+ expect { df.filter(:kitten){} }.to raise_error ArgumentError, /axis/
2373
+ end
2374
+ end
2375
+
2376
+ context "#to_a" do
2377
+ context DaruLite::Index do
2378
+ it "converts DataFrame into array of hashes" do
2379
+ arry = @data_frame.to_a
2380
+
2381
+ expect(arry).to eq(
2382
+ [
2383
+ [
2384
+ {a: 1, b: 11, c: 11},
2385
+ {a: 2, b: 12, c: 22},
2386
+ {a: 3, b: 13, c: 33},
2387
+ {a: 4, b: 14, c: 44},
2388
+ {a: 5, b: 15, c: 55}
2389
+ ],
2390
+ [
2391
+ :one, :two, :three, :four, :five
2392
+ ]
2393
+ ])
2394
+ end
2395
+ end
2396
+
2397
+ context DaruLite::MultiIndex do
2398
+ pending
2399
+ end
2400
+ end
2401
+
2402
+ context "#to_h" do
2403
+ it "converts to a hash" do
2404
+ expect(@data_frame.to_h).to eq(
2405
+ {
2406
+ a: DaruLite::Vector.new([1,2,3,4,5],
2407
+ index: [:one, :two, :three, :four, :five]),
2408
+ b: DaruLite::Vector.new([11,12,13,14,15],
2409
+ index: [:one, :two, :three, :four, :five]),
2410
+ c: DaruLite::Vector.new([11,22,33,44,55],
2411
+ index: [:one, :two, :three, :four, :five])
2412
+ }
2413
+ )
2414
+ end
2415
+ end
2416
+
2417
+ context "#sort" do
2418
+ context DaruLite::Index do
2419
+ before :each do
2420
+ @df = DaruLite::DataFrame.new({a: [5,1,-6,7,5,5], b: [-2,-1,5,3,9,1], c: ['a','aa','aaa','aaaa','aaaaa','aaaaaa']})
2421
+ end
2422
+
2423
+ it "sorts according to given vector order (bang)" do
2424
+ a_sorter = lambda { |a| a }
2425
+ ans = @df.sort([:a], by: { a: a_sorter })
2426
+
2427
+ expect(ans).to eq(
2428
+ DaruLite::DataFrame.new({a: [-6,1,5,5,5,7], b: [5,-1,-2,9,1,3], c: ['aaa','aa','a','aaaaa','aaaaaa','aaaa']},
2429
+ index: [2,1,0,4,5,3])
2430
+ )
2431
+ expect(ans).to_not eq(@df)
2432
+ end
2433
+
2434
+ it "sorts according to vector order using default lambdas (index re ordered according to the last vector) (bang)" do
2435
+ ans = @df.sort([:a, :b])
2436
+ expect(ans).to eq(
2437
+ DaruLite::DataFrame.new({a: [-6,1,5,5,5,7], b: [5,-1,-2,1,9,3], c: ['aaa','aa','a','aaaaaa','aaaaa','aaaa']},
2438
+ index: [2,1,0,5,4,3])
2439
+ )
2440
+ expect(ans).to_not eq(@df)
2441
+ end
2442
+ end
2443
+
2444
+ context DaruLite::MultiIndex do
2445
+ pending
2446
+ end
2447
+
2448
+ context DaruLite::CategoricalIndex do
2449
+ let(:idx) { DaruLite::CategoricalIndex.new [:a, 1, :a, 1, :c] }
2450
+ let(:df) do
2451
+ DaruLite::DataFrame.new({
2452
+ a: [2, -1, 3, 4, 5],
2453
+ b: ['x', 'y', 'x', 'a', 'y'],
2454
+ c: [nil, nil, -2, 2, 1]
2455
+ }, index: idx)
2456
+ end
2457
+
2458
+ context "ascending order" do
2459
+ context "single vector" do
2460
+ subject { df.sort [:a] }
2461
+
2462
+ its(:'index.to_a') { is_expected.to eq [1, :a, :a, 1, :c] }
2463
+ its(:'a.to_a') { is_expected.to eq [-1, 2, 3, 4, 5] }
2464
+ its(:'b.to_a') { is_expected.to eq ['y', 'x', 'x', 'a', 'y'] }
2465
+ its(:'c.to_a') { is_expected.to eq [nil, nil, -2, 2, 1] }
2466
+ end
2467
+
2468
+ context "multiple vectors" do
2469
+ subject { df.sort [:c, :b] }
2470
+
2471
+ its(:'index.to_a') { is_expected.to eq [:a, 1, :a, :c, 1] }
2472
+ its(:'a.to_a') { is_expected.to eq [2, -1, 3, 5, 4] }
2473
+ its(:'b.to_a') { is_expected.to eq ['x', 'y', 'x', 'y', 'a'] }
2474
+ its(:'c.to_a') { is_expected.to eq [nil, nil, -2, 1, 2] }
2475
+ end
2476
+
2477
+ context "block" do
2478
+ context "automatic handle nils" do
2479
+ subject do
2480
+ df.sort [:c], by: {c: lambda { |a| a.abs } }, handle_nils: true
2481
+ end
2482
+
2483
+ its(:'index.to_a') { is_expected.to eq [:a, 1, :c, :a, 1] }
2484
+ its(:'a.to_a') { is_expected.to eq [2, -1, 5, 3, 4] }
2485
+ its(:'b.to_a') { is_expected.to eq ['x', 'y', 'y', 'x', 'a'] }
2486
+ its(:'c.to_a') { is_expected.to eq [nil, nil, 1, -2, 2] }
2487
+ end
2488
+
2489
+ context "manually handle nils" do
2490
+ subject do
2491
+ df.sort [:c], by: {c: lambda { |a| (a.nil?)?[1]:[0,a.abs] } }
2492
+ end
2493
+
2494
+ its(:'index.to_a') { is_expected.to eq [:c, :a, 1, :a, 1] }
2495
+ its(:'a.to_a') { is_expected.to eq [5, 3, 4, 2, -1] }
2496
+ its(:'b.to_a') { is_expected.to eq ['y', 'x', 'a', 'x', 'y'] }
2497
+ its(:'c.to_a') { is_expected.to eq [1, -2, 2, nil, nil] }
2498
+ end
2499
+ end
2500
+ end
2501
+
2502
+ context "descending order" do
2503
+ context "single vector" do
2504
+ subject { df.sort [:a], ascending: false }
2505
+
2506
+ its(:'index.to_a') { is_expected.to eq [:c, 1, :a, :a, 1] }
2507
+ its(:'a.to_a') { is_expected.to eq [5, 4, 3, 2, -1] }
2508
+ its(:'b.to_a') { is_expected.to eq ['y', 'a', 'x', 'x', 'y'] }
2509
+ its(:'c.to_a') { is_expected.to eq [1, 2, -2, nil, nil] }
2510
+ end
2511
+
2512
+ context "multiple vectors" do
2513
+ subject { df.sort [:c, :b], ascending: false }
2514
+
2515
+ its(:'index.to_a') { is_expected.to eq [1, :a, 1, :c, :a] }
2516
+ its(:'a.to_a') { is_expected.to eq [-1, 2, 4, 5, 3] }
2517
+ its(:'b.to_a') { is_expected.to eq ['y', 'x', 'a', 'y', 'x'] }
2518
+ its(:'c.to_a') { is_expected.to eq [nil, nil, 2, 1, -2] }
2519
+ end
2520
+
2521
+ context "block" do
2522
+ context "automatic handle nils" do
2523
+ subject do
2524
+ df.sort [:c],
2525
+ by: {c: lambda { |a| a.abs } },
2526
+ handle_nils: true,
2527
+ ascending: false
2528
+ end
2529
+
2530
+ its(:'index.to_a') { is_expected.to eq [:a, 1, :a, 1, :c] }
2531
+ its(:'a.to_a') { is_expected.to eq [2, -1, 3, 4, 5] }
2532
+ its(:'b.to_a') { is_expected.to eq ['x', 'y', 'x', 'a', 'y'] }
2533
+ its(:'c.to_a') { is_expected.to eq [nil, nil, -2, 2, 1] }
2534
+ end
2535
+
2536
+ context "manually handle nils" do
2537
+ subject do
2538
+ df.sort [:c],
2539
+ by: {c: lambda { |a| (a.nil?)?[1]:[0,a.abs] } },
2540
+ ascending: false
2541
+ end
2542
+
2543
+ its(:'index.to_a') { is_expected.to eq [:a, 1, :a, 1, :c] }
2544
+ its(:'a.to_a') { is_expected.to eq [2, -1, 3, 4, 5] }
2545
+ its(:'b.to_a') { is_expected.to eq ['x', 'y', 'x', 'a', 'y'] }
2546
+ its(:'c.to_a') { is_expected.to eq [nil, nil, -2, 2, 1] }
2547
+ end
2548
+ end
2549
+ end
2550
+ end
2551
+ end
2552
+
2553
+ context "#sort!" do
2554
+ context DaruLite::Index do
2555
+ before :each do
2556
+ @df = DaruLite::DataFrame.new({a: [5,1,-6,7,5,5], b: [-2,-1,5,3,9,1],
2557
+ c: ['a','aa','aaa','aaaa','aaaaa','aaaaaa']})
2558
+ end
2559
+
2560
+ it "sorts according to given vector order (bang)" do
2561
+ a_sorter = lambda { |a| a }
2562
+
2563
+ expect(@df.sort!([:a], by: { a: a_sorter })).to eq(
2564
+ DaruLite::DataFrame.new({a: [-6,1,5,5,5,7], b: [5,-1,-2,9,1,3],
2565
+ c: ['aaa','aa','a','aaaaa','aaaaaa','aaaa']}, index: [2,1,0,4,5,3])
2566
+ )
2567
+ end
2568
+
2569
+ it "sorts according to vector order using default lambdas (index re ordered according to the last vector) (bang)" do
2570
+ expect(@df.sort!([:a, :b])).to eq(
2571
+ DaruLite::DataFrame.new({a: [-6,1,5,5,5,7], b: [5,-1,-2,1,9,3], c: ['aaa','aa','a','aaaaaa','aaaaa','aaaa']},
2572
+ index: [2,1,0,5,4,3])
2573
+ )
2574
+ end
2575
+
2576
+ it "sorts both vectors in descending order" do
2577
+ expect(@df.sort!([:a,:b], ascending: [false, false])).to eq(
2578
+ DaruLite::DataFrame.new({a: [7,5,5,5,1,-6], b: [3,9,1,-2,-1,5], c: ['aaaa','aaaaa','aaaaaa', 'a','aa', 'aaa'] },
2579
+ index: [3,4,5,0,1,2])
2580
+ )
2581
+ end
2582
+
2583
+ it "sorts one vector in desc and other is asc" do
2584
+ expect(@df.sort!([:a, :b], ascending: [false, true])).to eq(
2585
+ DaruLite::DataFrame.new({a: [7,5,5,5,1,-6], b: [3,-2,1,9,-1,5], c: ['aaaa','a','aaaaaa','aaaaa','aa','aaa']},
2586
+ index: [3,0,5,4,1,2])
2587
+ )
2588
+ end
2589
+
2590
+ it "sorts many vectors" do
2591
+ d = DaruLite::DataFrame.new({a: [1,1,1,222,44,5,5,544], b: [44,44,333,222,111,554,22,3], c: [3,2,5,3,3,1,5,5]})
2592
+
2593
+ expect(d.sort!([:a, :b, :c], ascending: [false, true, false])).to eq(
2594
+ DaruLite::DataFrame.new({a: [544,222,44,5,5,1,1,1], b: [3,222,111,22,554,44,44,333], c: [5,3,3,5,1,3,2,5]},
2595
+ index: [7,3,4,6,5,0,1,2])
2596
+ )
2597
+ end
2598
+
2599
+ it "places nils at the beginning when sorting ascedingly" do
2600
+ d = DaruLite::DataFrame.new({a: [1,1,1,nil,44,5,5,nil], b: [44,44,333,222,111,554,22,3], c: [3,2,5,3,3,1,5,5]})
2601
+
2602
+ expect(d.sort!([:a, :b, :c], ascending: [true, true, false])).to eq(
2603
+ DaruLite::DataFrame.new({a: [nil,nil,1,1,1,5,5,44], b: [3,222,44,44,333,22,554,111], c: [5,3,3,2,5,5,1,3]},
2604
+ index: [7,3,0,1,2,6,5,4])
2605
+ )
2606
+ end
2607
+
2608
+ it "places nils at the beginning when sorting decendingly" do
2609
+ d = DaruLite::DataFrame.new({a: [1,1,1,nil,44,5,5,nil], b: [44,44,333,222,111,554,22,3], c: [3,2,5,3,3,1,5,5]})
2610
+
2611
+ expect(d.sort!([:a, :b, :c], ascending: [false, true, false])).to eq(
2612
+ DaruLite::DataFrame.new({a: [nil,nil,44,5,5,1,1,1], b: [3,222,111,22,554,44,44,333], c: [5,3,3,5,1,3,2,5]},
2613
+ index: [7,3,4,6,5,0,1,2])
2614
+ )
2615
+ end
2616
+
2617
+ it "sorts vectors of non-numeric types with nils in ascending order" do
2618
+ non_numeric = DaruLite::DataFrame.new({a: [5,1,-6,7,5,5], b: [nil,-1,1,nil,-1,1],
2619
+ c: ['aaa','aaa',nil,'baaa','xxx',nil]})
2620
+
2621
+ expect(non_numeric.sort!([:c], ascending: [true])).to eq(
2622
+ DaruLite::DataFrame.new({a: [-6, 5, 5, 1, 7, 5], b: [1, 1, nil, -1, nil, -1],
2623
+ c: [nil, nil, "aaa", "aaa", "baaa", "xxx"]},
2624
+ index: [2, 5, 0, 1, 3, 4])
2625
+ )
2626
+ end
2627
+
2628
+ it "sorts vectors of non-numeric types with nils in descending order" do
2629
+ non_numeric = DaruLite::DataFrame.new({a: [5,1,-6,7,5,5], b: [nil,-1,1,nil,-1,1],
2630
+ c: ['aaa','aaa',nil,'baaa','xxx',nil]})
2631
+
2632
+ expect(non_numeric.sort!([:c], ascending: [false])).to eq(
2633
+ DaruLite::DataFrame.new({a: [-6, 5, 5, 7, 5, 1], b: [1, 1, -1, nil, nil, -1],
2634
+ c: [nil, nil, "xxx", "baaa", "aaa", "aaa"]},
2635
+ index: [2, 5, 4, 3, 0, 1])
2636
+ )
2637
+ end
2638
+
2639
+ it "sorts vectors with block provided and handle nils automatically" do
2640
+ non_numeric = DaruLite::DataFrame.new({a: [5,1,-6,7,5,5], b: [nil,-1,1,nil,-1,1],
2641
+ c: ['aaa','aaa',nil,'baaa','xxx',nil]})
2642
+
2643
+ expect(non_numeric.sort!([:b], by: {b: lambda { |a| a.abs } }, handle_nils: true)).to eq(
2644
+ DaruLite::DataFrame.new({a: [5, 7, 1, -6, 5, 5], b: [nil, nil, -1, 1, -1, 1],
2645
+ c: ["aaa", "baaa", "aaa", nil, "xxx", nil]},
2646
+ index: [0, 3, 1, 2, 4, 5])
2647
+ )
2648
+ end
2649
+
2650
+ it "sorts vectors with block provided and nils handled manually" do
2651
+ non_numeric = DaruLite::DataFrame.new({a: [5,1,-6,7,5,5], b: [nil,-1,1,nil,-1,1],
2652
+ c: ['aaa','aaa',nil,'baaa','xxx',nil]})
2653
+
2654
+ expect(non_numeric.sort!([:b], by: {b: lambda { |a| (a.nil?)?[1]:[0, a.abs]} }, handle_nils: false)).to eq(
2655
+ DaruLite::DataFrame.new({a: [1, -6, 5, 5, 5, 7], b: [-1, 1, -1, 1, nil, nil],
2656
+ c: ["aaa", nil, "xxx", nil, "aaa", "baaa"]},
2657
+ index: [1, 2, 4, 5, 0, 3])
2658
+ )
2659
+ end
2660
+ end
2661
+
2662
+ context DaruLite::MultiIndex do
2663
+ pending
2664
+ it "sorts the DataFrame when specified full tuple" do
2665
+ @df_mi.sort([[:a,:one,:bar]])
2666
+ end
2667
+ end
2668
+ end
2669
+
2670
+ context "#index=" do
2671
+ before :each do
2672
+ @df = DaruLite::DataFrame.new({
2673
+ a: [1,2,3,4,5],
2674
+ b: [11,22,33,44,55],
2675
+ c: %w(a b c d e)
2676
+ })
2677
+ end
2678
+
2679
+ it "simply reassigns the index" do
2680
+ @df.index = DaruLite::Index.new(['4','foo', :bar, 0, 23])
2681
+ expect(@df.row['foo']).to eq(DaruLite::Vector.new([2,22,'b'], index: [:a,:b,:c]))
2682
+ end
2683
+
2684
+ it "raises error for improper length index" do
2685
+ expect {
2686
+ @df.index = DaruLite::Index.new([1,2])
2687
+ }.to raise_error(ArgumentError)
2688
+ end
2689
+
2690
+ it "is able to accept array" do
2691
+ @df.index = (1..5).to_a
2692
+ expect(@df.index).to eq DaruLite::Index.new (1..5).to_a
2693
+ end
2694
+ end
2695
+
2696
+ context '#order=' do
2697
+ let(:df) do
2698
+ DaruLite::DataFrame.new({
2699
+ a: [1, 2, 3],
2700
+ b: [4, 5, 6]
2701
+ }, order: [:a, :b])
2702
+ end
2703
+
2704
+ context 'correct order' do
2705
+ before { df.order = [:b, :a] }
2706
+ subject { df }
2707
+
2708
+ its(:'vectors.to_a') { is_expected.to eq [:b, :a] }
2709
+ its(:'b.to_a') { is_expected.to eq [4, 5, 6] }
2710
+ its(:'a.to_a') { is_expected.to eq [1, 2, 3] }
2711
+ end
2712
+
2713
+ context 'insufficient vectors' do
2714
+ it { expect { df.order = [:a] }.to raise_error }
2715
+ end
2716
+
2717
+ context 'wrong vectors' do
2718
+ it { expect { df.order = [:a, :b, 'b'] }.to raise_error }
2719
+ end
2720
+ end
2721
+
2722
+ context "#vectors=" do
2723
+ before :each do
2724
+ @df = DaruLite::DataFrame.new({
2725
+ a: [1,2,3,4,5],
2726
+ b: [11,22,33,44,55],
2727
+ c: %w(a b c d e)
2728
+ })
2729
+ end
2730
+
2731
+ it "simply reassigns vectors" do
2732
+ @df.vectors = DaruLite::Index.new(['b',0,'m'])
2733
+
2734
+ expect(@df.vectors).to eq(DaruLite::Index.new(['b',0,'m']))
2735
+ expect(@df['b']).to eq(DaruLite::Vector.new([1,2,3,4,5]))
2736
+ expect(@df[0]).to eq(DaruLite::Vector.new([11,22,33,44,55]))
2737
+ expect(@df['m']).to eq(DaruLite::Vector.new(%w(a b c d e)))
2738
+ end
2739
+
2740
+ it "raises error for improper length index" do
2741
+ expect {
2742
+ @df.vectors = DaruLite::Index.new([1,2,'3',4,'5'])
2743
+ }.to raise_error(ArgumentError)
2744
+ end
2745
+
2746
+ it "change name of vectors in @data" do
2747
+ new_index_array = [:k, :l, :m]
2748
+ @df.vectors = DaruLite::Index.new(new_index_array)
2749
+
2750
+ expect(@df.data.map { |vector| vector.name }).to eq(new_index_array)
2751
+ end
2752
+ end
2753
+
2754
+ context "#rename_vectors!" do
2755
+ before do
2756
+ @df = DaruLite::DataFrame.new({
2757
+ a: [1,2,3,4,5],
2758
+ b: [11,22,33,44,55],
2759
+ c: %w(a b c d e)
2760
+ })
2761
+ end
2762
+
2763
+ it "returns self as modified dataframe" do
2764
+ expect(@df.rename_vectors!(:a => :alpha)).to eq(@df)
2765
+ end
2766
+
2767
+ it "re-uses rename_vectors method" do
2768
+ name_map = { :a => :alpha, :c => :gamma }
2769
+ expect(@df).to receive(:rename_vectors).with(name_map)
2770
+ @df.rename_vectors! name_map
2771
+ end
2772
+ end
2773
+
2774
+ context "#rename_vectors" do
2775
+ before do
2776
+ @df = DaruLite::DataFrame.new({
2777
+ a: [1,2,3,4,5],
2778
+ b: [11,22,33,44,55],
2779
+ c: %w(a b c d e)
2780
+ })
2781
+ end
2782
+
2783
+ it "returns DaruLite::Index" do
2784
+ expect(@df.rename_vectors(:a => :alpha)).to be_kind_of(DaruLite::Index)
2785
+ end
2786
+
2787
+ it "renames vectors using a hash map" do
2788
+ @df.rename_vectors :a => :alpha, :c => :gamma
2789
+ expect(@df.vectors.to_a).to eq([:alpha, :b, :gamma])
2790
+ end
2791
+
2792
+ it "overwrites vectors if the new name already exists" do
2793
+ saved_vector = @df[:a].dup
2794
+
2795
+ @df.rename_vectors :a => :b
2796
+ expect(@df.vectors.to_a).to eq([:b, :c])
2797
+ expect(@df[:b]).to eq saved_vector
2798
+ end
2799
+
2800
+ it "makes no changes if the old and new names are the same" do
2801
+ saved_vector = @df[:a].dup
2802
+
2803
+ @df.rename_vectors :a => :a
2804
+ expect(@df.vectors.to_a).to eq([:a, :b, :c])
2805
+ expect(@df[:a]).to eq saved_vector
2806
+ end
2807
+ end
2808
+
2809
+ context "#add_level_to_vectors" do
2810
+ subject { df.add_level_to_vectors(top_level_label) }
2811
+
2812
+ let(:df) do
2813
+ DaruLite::DataFrame.new({
2814
+ a: [1, 2, 3, 4, 5],
2815
+ b: [11, 22, 33, 44, 55],
2816
+ c: %w(a b c d e)
2817
+ })
2818
+ end
2819
+ let(:top_level_label) { :percentages }
2820
+ let(:expected_index) do
2821
+ DaruLite::MultiIndex.from_tuples([
2822
+ [:percentages, :a], [:percentages, :b],[:percentages, :c],
2823
+ ])
2824
+ end
2825
+
2826
+ it 'returns expected Multi::Index' do
2827
+ expect(subject).to eq(expected_index)
2828
+ end
2829
+
2830
+ it 'updates dataframe vectors to the expected Multi::Index' do
2831
+ expect { subject }.to change { df.vectors }.to(expected_index)
2832
+ end
2833
+ end
2834
+
2835
+ context "#reindex" do
2836
+ it "re indexes and aligns accordingly" do
2837
+ df = DaruLite::DataFrame.new({
2838
+ a: [1,2,3,4,5],
2839
+ b: [11,22,33,44,55],
2840
+ c: %w(a b c d e)
2841
+ })
2842
+
2843
+ ans = df.reindex(DaruLite::Index.new([1,3,0,8,2]))
2844
+ expect(ans).to eq(DaruLite::DataFrame.new({
2845
+ a: [2,4,1,nil,3],
2846
+ b: [22,44,11,nil,33],
2847
+ c: ['b','d','a',nil,'c']
2848
+ }, index: DaruLite::Index.new([1,3,0,8,2])))
2849
+ expect(ans).to_not eq(df)
2850
+ end
2851
+ end
2852
+
2853
+ context "#reindex_vectors" do
2854
+ it "re indexes vectors and aligns accordingly" do
2855
+ df = DaruLite::DataFrame.new({
2856
+ a: [1,2,3,4,5],
2857
+ b: [11,22,33,44,55],
2858
+ c: %w(a b c d e)
2859
+ })
2860
+
2861
+ ans = df.reindex_vectors(DaruLite::Index.new([:b, 'a', :a]))
2862
+ expect(ans).to eq(DaruLite::DataFrame.new({
2863
+ :b => [11,22,33,44,55],
2864
+ 'a' => [nil, nil, nil, nil, nil],
2865
+ :a => [1,2,3,4,5]
2866
+ }, order: [:b, 'a', :a]))
2867
+ end
2868
+
2869
+ it 'raises ArgumentError if argument was not an index' do
2870
+ df = DaruLite::DataFrame.new([])
2871
+ expect { df.reindex_vectors([]) }.to raise_error(ArgumentError)
2872
+ end
2873
+ end
2874
+
2875
+ context "#to_matrix" do
2876
+ before do
2877
+ @df = DaruLite::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
2878
+ c: [11,22,33,44,55], d: [5,4,nil,2,1], e: ['this', 'has', 'string','data','too']},
2879
+ order: [:a, :b, :c,:d,:e],
2880
+ index: [:one, :two, :three, :four, :five])
2881
+ end
2882
+
2883
+ it "concats numeric non-nil vectors to Matrix" do
2884
+ expect(@df.to_matrix).to eq(Matrix[
2885
+ [1,11,11,5],
2886
+ [2,12,22,4],
2887
+ [3,13,33,nil],
2888
+ [4,14,44,2],
2889
+ [5,15,55,1]
2890
+ ])
2891
+ end
2892
+ end
2893
+
2894
+ context "#transpose" do
2895
+ context DaruLite::Index do
2896
+ it "transposes a DataFrame including row and column indexing" do
2897
+ expect(@data_frame.transpose).to eq(DaruLite::DataFrame.new({
2898
+ one: [1,11,11],
2899
+ two: [2,12,22],
2900
+ three: [3,13,33],
2901
+ four: [4,14,44],
2902
+ five: [5,15,55]
2903
+ }, index: [:a, :b, :c],
2904
+ order: [:one, :two, :three, :four, :five])
2905
+ )
2906
+ end
2907
+ end
2908
+
2909
+ context DaruLite::MultiIndex do
2910
+ it "transposes a DataFrame including row and column indexing" do
2911
+ expect(@df_mi.transpose).to eq(DaruLite::DataFrame.new([
2912
+ @vector_arry1,
2913
+ @vector_arry2,
2914
+ @vector_arry1,
2915
+ @vector_arry2].transpose, index: @order_mi, order: @multi_index))
2916
+ end
2917
+ end
2918
+ end
2919
+
2920
+ context "#pivot_table" do
2921
+ before do
2922
+ @df = DaruLite::DataFrame.new({
2923
+ a: ['foo' , 'foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar'],
2924
+ b: ['one' , 'one', 'one', 'two', 'two', 'one', 'one', 'two', 'two'],
2925
+ c: ['small','large','large','small','small','large','small','large','small'],
2926
+ d: [1,2,2,3,3,4,5,6,7],
2927
+ e: [2,4,4,6,6,8,10,12,14]
2928
+ })
2929
+ end
2930
+
2931
+ it "creates row index as per (single) index argument and default aggregates to mean" do
2932
+ expect(@df.pivot_table(index: [:a])).to eq(DaruLite::DataFrame.new({
2933
+ d: [5.5,2.2],
2934
+ e: [11.0,4.4]
2935
+ }, index: ['bar', 'foo']))
2936
+ end
2937
+
2938
+ it "creates row index as per (double) index argument and default aggregates to mean" do
2939
+ agg_mi = DaruLite::MultiIndex.from_tuples(
2940
+ [
2941
+ ['bar', 'large'],
2942
+ ['bar', 'small'],
2943
+ ['foo', 'large'],
2944
+ ['foo', 'small']
2945
+ ]
2946
+ )
2947
+ expect(@df.pivot_table(index: [:a, :c]).round(2)).to eq(DaruLite::DataFrame.new({
2948
+ d: [5.0 , 6.0, 2.0, 2.33],
2949
+ e: [10.0, 12.0, 4.0, 4.67]
2950
+ }, index: agg_mi))
2951
+ end
2952
+
2953
+ it "creates row and vector index as per (single) index and (single) vectors args" do
2954
+ agg_vectors = DaruLite::MultiIndex.from_tuples([
2955
+ [:d, 'one'],
2956
+ [:d, 'two'],
2957
+ [:e, 'one'],
2958
+ [:e, 'two']
2959
+ ])
2960
+ agg_index = DaruLite::MultiIndex.from_tuples(
2961
+ [
2962
+ ['bar'],
2963
+ ['foo']
2964
+ ]
2965
+ )
2966
+
2967
+ expect(@df.pivot_table(index: [:a], vectors: [:b]).round(2)).to eq(
2968
+ DaruLite::DataFrame.new(
2969
+ [
2970
+ [4.5, 1.67],
2971
+ [6.5, 3.0],
2972
+ [9.0, 3.33],
2973
+ [13, 6]
2974
+ ], order: agg_vectors, index: agg_index)
2975
+ )
2976
+ end
2977
+
2978
+ it "creates row and vector index as per (single) index and (double) vector args" do
2979
+ agg_vectors = DaruLite::MultiIndex.from_tuples(
2980
+ [
2981
+ [:d, 'one', 'large'],
2982
+ [:d, 'one', 'small'],
2983
+ [:d, 'two', 'large'],
2984
+ [:d, 'two', 'small'],
2985
+ [:e, 'one', 'large'],
2986
+ [:e, 'one', 'small'],
2987
+ [:e, 'two', 'large'],
2988
+ [:e, 'two', 'small']
2989
+ ]
2990
+ )
2991
+
2992
+ agg_index = DaruLite::MultiIndex.from_tuples(
2993
+ [
2994
+ ['bar'],
2995
+ ['foo']
2996
+ ]
2997
+ )
2998
+
2999
+ expect(@df.pivot_table(index: [:a], vectors: [:b, :c])).to eq(DaruLite::DataFrame.new(
3000
+ [
3001
+ [4.0,2.0],
3002
+ [5.0,1.0],
3003
+ [6.0,nil],
3004
+ [7.0,3.0],
3005
+ [8.0,4.0],
3006
+ [10.0,2.0],
3007
+ [12.0,nil],
3008
+ [14.0,6.0]
3009
+ ], order: agg_vectors, index: agg_index
3010
+ ))
3011
+ end
3012
+
3013
+ it "creates row and vector index with (double) index and (double) vector args" do
3014
+ agg_index = DaruLite::MultiIndex.from_tuples([
3015
+ ['bar', 4],
3016
+ ['bar', 5],
3017
+ ['bar', 6],
3018
+ ['bar', 7],
3019
+ ['foo', 1],
3020
+ ['foo', 2],
3021
+ ['foo', 3]
3022
+ ])
3023
+
3024
+ agg_vectors = DaruLite::MultiIndex.from_tuples([
3025
+ [:e, 'one', 'large'],
3026
+ [:e, 'one', 'small'],
3027
+ [:e, 'two', 'large'],
3028
+ [:e, 'two', 'small']
3029
+ ])
3030
+
3031
+ expect(@df.pivot_table(index: [:a, :d], vectors: [:b, :c])).to eq(
3032
+ DaruLite::DataFrame.new(
3033
+ [
3034
+ [8 ,nil,nil,nil,nil, 4,nil],
3035
+ [nil, 10,nil,nil, 2,nil,nil],
3036
+ [nil,nil, 12,nil,nil,nil,nil],
3037
+ [nil,nil,nil, 14,nil,nil, 6],
3038
+ ], index: agg_index, order: agg_vectors)
3039
+ )
3040
+ end
3041
+
3042
+ it "only aggregates over the vector specified in the values argument" do
3043
+ agg_vectors = DaruLite::MultiIndex.from_tuples(
3044
+ [
3045
+ [:e, 'one', 'large'],
3046
+ [:e, 'one', 'small'],
3047
+ [:e, 'two', 'large'],
3048
+ [:e, 'two', 'small']
3049
+ ]
3050
+ )
3051
+ agg_index = DaruLite::MultiIndex.from_tuples(
3052
+ [
3053
+ ['bar'],
3054
+ ['foo']
3055
+ ]
3056
+ )
3057
+ expect(@df.pivot_table(index: [:a], vectors: [:b, :c], values: :e)).to eq(
3058
+ DaruLite::DataFrame.new(
3059
+ [
3060
+ [8, 4],
3061
+ [10, 2],
3062
+ [12,nil],
3063
+ [14, 6]
3064
+ ], order: agg_vectors, index: agg_index
3065
+ )
3066
+ )
3067
+
3068
+ agg_vectors = DaruLite::MultiIndex.from_tuples(
3069
+ [
3070
+ [:d, 'one'],
3071
+ [:d, 'two'],
3072
+ [:e, 'one'],
3073
+ [:e, 'two']
3074
+ ]
3075
+ )
3076
+ expect(@df.pivot_table(index: [:a], vectors: [:b], values: [:d, :e])).to eq(
3077
+ DaruLite::DataFrame.new(
3078
+ [
3079
+ [4.5, 5.0/3],
3080
+ [6.5, 3.0],
3081
+ [9.0, 10.0/3],
3082
+ [13.0, 6.0]
3083
+ ], order: agg_vectors, index: agg_index
3084
+ )
3085
+ )
3086
+ end
3087
+
3088
+ it "overrides default aggregate function to aggregate over sum" do
3089
+ agg_vectors = DaruLite::MultiIndex.from_tuples(
3090
+ [
3091
+ [:e, 'one', 'large'],
3092
+ [:e, 'one', 'small'],
3093
+ [:e, 'two', 'large'],
3094
+ [:e, 'two', 'small']
3095
+ ]
3096
+ )
3097
+ agg_index = DaruLite::MultiIndex.from_tuples(
3098
+ [
3099
+ ['bar'],
3100
+ ['foo']
3101
+ ]
3102
+ )
3103
+ expect(@df.pivot_table(index: [:a], vectors: [:b, :c], values: :e, agg: :sum)).to eq(
3104
+ DaruLite::DataFrame.new(
3105
+ [
3106
+ [8, 8],
3107
+ [10, 2],
3108
+ [12,nil],
3109
+ [14, 12]
3110
+ ], order: agg_vectors, index: agg_index
3111
+ )
3112
+ )
3113
+ end
3114
+
3115
+ it "raises error if no non-numeric vectors are present" do
3116
+ df = DaruLite::DataFrame.new({a: ['a', 'b', 'c'], b: ['b', 'e', 'd']})
3117
+ expect {
3118
+ df.pivot_table(index: [:a])
3119
+ }.to raise_error
3120
+ end
3121
+
3122
+ it "raises error if atleast a row index is not specified" do
3123
+ expect {
3124
+ @df.pivot_table
3125
+ }.to raise_error
3126
+ end
3127
+
3128
+ it "aggregates when nils are present in value vector" do
3129
+ df = DaruLite::DataFrame.new({
3130
+ a: ['foo' , 'foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'ice'],
3131
+ b: ['one' , 'one', 'one', 'two', 'two', 'one', 'one', 'two', 'two'],
3132
+ c: ['small','large','large','small','small','large','small','large','small'],
3133
+ d: [1,2,2,3,3,4,5,6,7],
3134
+ e: [2,nil,4,6,6,8,10,12,nil]
3135
+ })
3136
+
3137
+ expect(df.pivot_table index: [:a]).to eq(
3138
+ DaruLite::DataFrame.new({
3139
+ d: [5.0, 2.2, 7],
3140
+ e: [10.0, 4.5, nil]
3141
+ }, index: DaruLite::Index.new(['bar', 'foo', 'ice'])))
3142
+ end
3143
+
3144
+ it "works when nils are present in value vector" do
3145
+ df = DaruLite::DataFrame.new({
3146
+ a: ['foo' , 'foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'ice'],
3147
+ b: ['one' , 'one', 'one', 'two', 'two', 'one', 'one', 'two', 'two'],
3148
+ c: ['small','large','large','small','small','large','small','large','small'],
3149
+ d: [1,2,2,3,3,4,5,6,7],
3150
+ e: [2,nil,4,6,6,8,10,12,nil]
3151
+ })
3152
+
3153
+ agg_vectors = DaruLite::MultiIndex.from_tuples(
3154
+ [
3155
+ [:e, 'one'],
3156
+ [:e, 'two']
3157
+ ]
3158
+ )
3159
+
3160
+ agg_index = DaruLite::MultiIndex.from_tuples(
3161
+ [
3162
+ ['bar'],
3163
+ ['foo'],
3164
+ ['ice']
3165
+ ]
3166
+ )
3167
+
3168
+ expect(df.pivot_table index: [:a], vectors: [:b], values: :e).to eq(
3169
+ DaruLite::DataFrame.new(
3170
+ [
3171
+ [9, 3, nil],
3172
+ [12, 6, nil]
3173
+ ], order: agg_vectors, index: agg_index
3174
+ )
3175
+ )
3176
+ end
3177
+
3178
+ it 'performs date pivoting' do
3179
+ categories = %i[jan feb mar apr may jun jul aug sep oct nov dec]
3180
+ df = DaruLite::DataFrame.rows([
3181
+ [2014, 2, 1600.0, 20.0],
3182
+ [2014, 3, 1680.0, 21.0],
3183
+ [2016, 2, 1600.0, 20.0],
3184
+ [2016, 4, 1520.0, 19.0],
3185
+ ], order: [:year, :month, :visitors, :days])
3186
+ df[:averages] = df[:visitors] / df[:days]
3187
+ df[:month] = df[:month].map{|i| categories[i - 1]}
3188
+ actual = df.pivot_table(index: :month, vectors: [:year], values: :averages)
3189
+
3190
+ # NB: As you can see, there are some "illogical" parts:
3191
+ # months are sorted lexicographically, then made into multi-index
3192
+ # with one-element-per-tuple, then order of columns is dependent
3193
+ # on which month is lexicographically first (its apr, so, apr-2016
3194
+ # is first row to gather, so 2016 is first column).
3195
+ #
3196
+ # All of it is descendance of our group_by implementation (which
3197
+ # always sorts results & always make array keys). I hope that fixing
3198
+ # group_by, even to the extend described at https://github.com/v0dro/daru/issues/152,
3199
+ # will be fix this case also.
3200
+ expected =
3201
+ DaruLite::DataFrame.new(
3202
+ [
3203
+ [80.0, 80.0, nil],
3204
+ [nil, 80.0, 80.0],
3205
+ ], index: DaruLite::MultiIndex.from_tuples([[:apr], [:feb], [:mar]]),
3206
+ order: DaruLite::MultiIndex.from_tuples([[:averages, 2016], [:averages, 2014]])
3207
+ )
3208
+ # Comparing their parts previous to full comparison allows to
3209
+ # find complicated differences.
3210
+ expect(actual.vectors).to eq expected.vectors
3211
+ expect(actual.index).to eq expected.index
3212
+ expect(actual).to eq expected
3213
+ end
3214
+ end
3215
+
3216
+ context "#shape" do
3217
+ it "returns an array containing number of rows and columns" do
3218
+ expect(@data_frame.shape).to eq([5,3])
3219
+ end
3220
+ end
3221
+
3222
+ context "#nest" do
3223
+ it "nests in a hash" do
3224
+ df = DaruLite::DataFrame.new({
3225
+ :a => DaruLite::Vector.new(%w(a a a b b b)),
3226
+ :b => DaruLite::Vector.new(%w(c c d d e e)),
3227
+ :c => DaruLite::Vector.new(%w(f g h i j k))
3228
+ })
3229
+ nest = df.nest :a, :b
3230
+ expect(nest['a']['c']).to eq([{ :c => 'f' }, { :c => 'g' }])
3231
+ expect(nest['a']['d']).to eq([{ :c => 'h' }])
3232
+ expect(nest['b']['e']).to eq([{ :c => 'j' }, { :c => 'k' }])
3233
+ end
3234
+ end
3235
+
3236
+ context "#summary" do
3237
+ subject { df.summary }
3238
+
3239
+ context "DataFrame" do
3240
+ let(:df) { DaruLite::DataFrame.new({a: [1,2,5], b: [1,2,"string"]}, order: [:a, :b], index: [:one, :two, :three], name: 'frame') }
3241
+ it { is_expected.to eq %Q{
3242
+ |= frame
3243
+ | Number of rows: 3
3244
+ | Element:[a]
3245
+ | == a
3246
+ | n :3
3247
+ | non-missing:3
3248
+ | median: 2
3249
+ | mean: 2.6667
3250
+ | std.dev.: 2.0817
3251
+ | std.err.: 1.2019
3252
+ | skew: 0.2874
3253
+ | kurtosis: -2.3333
3254
+ | Element:[b]
3255
+ | == b
3256
+ | n :3
3257
+ | non-missing:3
3258
+ | factors: 1,2,string
3259
+ | mode: 1,2,string
3260
+ | Distribution
3261
+ | 1 1 100.00%
3262
+ | 2 1 100.00%
3263
+ | string 1 100.00%
3264
+ }.unindent }
3265
+ end
3266
+ end
3267
+
3268
+ context '#to_df' do
3269
+ it 'returns the dataframe' do
3270
+ @data_frame.to_df == @data_frame
3271
+ end
3272
+ end
3273
+
3274
+ context "#merge" do
3275
+ it "merges one dataframe with another" do
3276
+ a = DaruLite::Vector.new [1, 2, 3]
3277
+ b = DaruLite::Vector.new [3, 4, 5]
3278
+ c = DaruLite::Vector.new [4, 5, 6]
3279
+ d = DaruLite::Vector.new [7, 8, 9]
3280
+ e = DaruLite::Vector.new [10, 20, 30]
3281
+ ds1 = DaruLite::DataFrame.new({ :a => a, :b => b })
3282
+ ds2 = DaruLite::DataFrame.new({ :c => c, :d => d })
3283
+ exp = DaruLite::DataFrame.new({ :a => a, :b => b, :c => c, :d => d })
3284
+
3285
+ expect(ds1.merge(ds2)).to eq(exp)
3286
+ expect(ds2.merge(ds1)).to eq(
3287
+ DaruLite::DataFrame.new({c: c, d: d, a: a, b: b}, order: [:c, :d, :a, :b]))
3288
+
3289
+ ds3 = DaruLite::DataFrame.new({ :a => e })
3290
+ exp = DaruLite::DataFrame.new({ :a_1 => a, :a_2 => e, :b => b },
3291
+ order: [:a_1, :b, :a_2])
3292
+
3293
+ expect(ds1.merge(ds3)).to eq(exp)
3294
+ end
3295
+
3296
+ context "preserves type of vector names" do
3297
+ let(:df1) { DaruLite::DataFrame.new({'a'=> [1, 2, 3]}) }
3298
+ let(:df2) { DaruLite::DataFrame.new({:b=> [4, 5, 6]}) }
3299
+ subject { df1.merge df2 }
3300
+
3301
+ it { is_expected.to be_a DaruLite::DataFrame }
3302
+ it { expect(subject['a'].to_a).to eq [1, 2, 3] }
3303
+ it { expect(subject[:b].to_a).to eq [4, 5, 6] }
3304
+ end
3305
+
3306
+ context "preserves indices for dataframes with same index" do
3307
+ let(:index) { ['one','two','three'] }
3308
+ let(:df1) { DaruLite::DataFrame.new({ 'a' => [1, 2, 3], 'b' => [3, 4, 5] }, index: index) }
3309
+ let(:df2) { DaruLite::DataFrame.new({ 'c' => [4, 5, 6], 'd' => [7, 8, 9] }, index: index) }
3310
+ subject { df1.merge df2 }
3311
+
3312
+ its(:index) { is_expected.to eq DaruLite::Index.new(index) }
3313
+ end
3314
+ end
3315
+
3316
+ context "#vector_by_calculation" do
3317
+ it "DSL for returning vector of each calculation" do
3318
+ a1 = DaruLite::Vector.new([1, 2, 3, 4, 5, 6, 7])
3319
+ a2 = DaruLite::Vector.new([10, 20, 30, 40, 50, 60, 70])
3320
+ a3 = DaruLite::Vector.new([100, 200, 300, 400, 500, 600, 700])
3321
+ ds = DaruLite::DataFrame.new({ :a => a1, :b => a2, :c => a3 })
3322
+ total = ds.vector_by_calculation { a + b + c }
3323
+ expected = DaruLite::Vector.new([111, 222, 333, 444, 555, 666, 777])
3324
+ expect(total).to eq(expected)
3325
+ end
3326
+ end
3327
+
3328
+ context "group_by" do
3329
+ context "on a single row DataFrame" do
3330
+ let(:df){ DaruLite::DataFrame.new(city: %w[Kyiv], year: [2015], value: [1]) }
3331
+ it "returns a groupby object" do
3332
+ expect(df.group_by([:city])).to be_a(DaruLite::Core::GroupBy)
3333
+ end
3334
+ it "has the correct index" do
3335
+ expect(df.group_by([:city]).groups).to eq({["Kyiv"]=>[0]})
3336
+ end
3337
+ end
3338
+ end
3339
+
3340
+ context "#vector_sum" do
3341
+ before do
3342
+ a1 = DaruLite::Vector.new [1, 2, 3, 4, 5, nil, nil]
3343
+ a2 = DaruLite::Vector.new [10, 10, 20, 20, 20, 30, nil]
3344
+ b1 = DaruLite::Vector.new [nil, 1, 1, 1, 1, 2, nil]
3345
+ b2 = DaruLite::Vector.new [2, 2, 2, nil, 2, 3, nil]
3346
+ @df = DaruLite::DataFrame.new({ :a1 => a1, :a2 => a2, :b1 => b1, :b2 => b2 })
3347
+ end
3348
+
3349
+ it "calculates complete vector sum" do
3350
+ expect(@df.vector_sum).to eq(DaruLite::Vector.new [nil, 15, 26, nil, 28, nil, nil])
3351
+ end
3352
+
3353
+ it "ignores nils if skipnil is true" do
3354
+ expect(@df.vector_sum skipnil: true).to eq(DaruLite::Vector.new [13, 15, 26, 25, 28, 35, 0])
3355
+ end
3356
+
3357
+ it "calculates partial vector sum" do
3358
+ a = @df.vector_sum([:a1, :a2])
3359
+ b = @df.vector_sum([:b1, :b2])
3360
+
3361
+ expect(a).to eq(DaruLite::Vector.new [11, 12, 23, 24, 25, nil, nil])
3362
+ expect(b).to eq(DaruLite::Vector.new [nil, 3, 3, nil, 3, 5, nil])
3363
+ end
3364
+ end
3365
+
3366
+ context "#missing_values_rows" do
3367
+ it "returns number of missing values in each row" do
3368
+ a1 = DaruLite::Vector.new [1, nil, 3, 4, 5, nil]
3369
+ a2 = DaruLite::Vector.new [10, nil, 20, 20, 20, 30]
3370
+ b1 = DaruLite::Vector.new [nil, nil, 1, 1, 1, 2]
3371
+ b2 = DaruLite::Vector.new [2, 2, 2, nil, 2, 3]
3372
+ c = DaruLite::Vector.new [nil, 2, 4, 2, 2, 2]
3373
+ df = DaruLite::DataFrame.new({
3374
+ :a1 => a1, :a2 => a2, :b1 => b1, :b2 => b2, :c => c })
3375
+
3376
+ expect(df.missing_values_rows).to eq(DaruLite::Vector.new [2, 3, 0, 1, 0, 1])
3377
+ end
3378
+ end
3379
+
3380
+ context "#vector_count_characters" do
3381
+ it "" do
3382
+ a1 = DaruLite::Vector.new( [1, 'abcde', 3, 4, 5, nil])
3383
+ a2 = DaruLite::Vector.new( [10, 20.3, 20, 20, 20, 30])
3384
+ b1 = DaruLite::Vector.new( [nil, '343434', 1, 1, 1, 2])
3385
+ b2 = DaruLite::Vector.new( [2, 2, 2, nil, 2, 3])
3386
+ c = DaruLite::Vector.new([nil, 2, 'This is a nice example', 2, 2, 2])
3387
+ ds = DaruLite::DataFrame.new({ :a1 => a1, :a2 => a2, :b1 => b1, :b2 => b2, :c => c })
3388
+
3389
+ expect(ds.vector_count_characters).to eq(DaruLite::Vector.new([4, 17, 27, 5, 6, 5]))
3390
+ end
3391
+ end
3392
+
3393
+ context '#include_values?' do
3394
+ let(:df) do
3395
+ DaruLite::DataFrame.new({
3396
+ a: [1, 2, 3, 4, Float::NAN, 6, 1],
3397
+ b: [:a, :b, nil, Float::NAN, nil, 3, 5],
3398
+ c: ['a', 6, 3, 4, 3, 5, 3],
3399
+ d: [1, 2, 3, 5, 1, 2, 5]
3400
+ })
3401
+ end
3402
+ before { df.to_category :b }
3403
+
3404
+ context 'true' do
3405
+ it { expect(df.include_values? nil).to eq true }
3406
+ it { expect(df.include_values? Float::NAN).to eq true }
3407
+ it { expect(df.include_values? nil, Float::NAN).to eq true }
3408
+ it { expect(df.include_values? 1, 30).to eq true }
3409
+ end
3410
+
3411
+ context 'false' do
3412
+ it { expect(df[:a, :c].include_values? nil).to eq false }
3413
+ it { expect(df[:c, :d].include_values? Float::NAN).to eq false }
3414
+ it { expect(df[:c, :d].include_values? nil, Float::NAN).to eq false }
3415
+ it { expect(df.include_values? 10, 20).to eq false }
3416
+ end
3417
+ end
3418
+
3419
+ context "#vector_mean" do
3420
+ before do
3421
+ a1 = DaruLite::Vector.new [1, 2, 3, 4, 5, nil]
3422
+ a2 = DaruLite::Vector.new [10, 10, 20, 20, 20, 30]
3423
+ b1 = DaruLite::Vector.new [nil, 1, 1, 1, 1, 2]
3424
+ b2 = DaruLite::Vector.new [2, 2, 2, nil, 2, 3]
3425
+ c = DaruLite::Vector.new [nil, 2, 4, 2, 2, 2]
3426
+ @df = DaruLite::DataFrame.new({
3427
+ :a1 => a1, :a2 => a2, :b1 => b1, :b2 => b2, :c => c })
3428
+ end
3429
+
3430
+ it "calculates complete vector mean" do
3431
+ expect(@df.vector_mean).to eq(
3432
+ DaruLite::Vector.new [nil, 3.4, 6, nil, 6.0, nil])
3433
+ end
3434
+ end
3435
+
3436
+ context "#add_vectors_by_split_recode" do
3437
+ before do
3438
+ @ds = DaruLite::DataFrame.new({
3439
+ :id => DaruLite::Vector.new([1, 2, 3, 4, 5]),
3440
+ :name => DaruLite::Vector.new(%w(Alex Claude Peter Franz George)),
3441
+ :age => DaruLite::Vector.new([20, 23, 25, 27, 5]),
3442
+ :city => DaruLite::Vector.new(['New York', 'London', 'London', 'Paris', 'Tome']),
3443
+ :a1 => DaruLite::Vector.new(['a,b', 'b,c', 'a', nil, 'a,b,c']) },
3444
+ order: [:id, :name, :age, :city, :a1])
3445
+ end
3446
+
3447
+ it "" do
3448
+ @ds.add_vectors_by_split_recode(:a1, '_')
3449
+ expect(@ds.vectors.to_a) .to eq([:id, :name, :age, :city ,:a1, :a1_1, :a1_2, :a1_3])
3450
+ expect(@ds[:a1_1].to_a).to eq([1, 0, 1, nil, 1])
3451
+ expect(@ds[:a1_2].to_a).to eq([1, 1, 0, nil, 1])
3452
+ expect(@ds[:a1_3].to_a).to eq([0, 1, 0, nil, 1])
3453
+ end
3454
+ end
3455
+
3456
+ context "#add_vectors_by_split" do
3457
+ before do
3458
+ @ds = DaruLite::DataFrame.new({
3459
+ :id => DaruLite::Vector.new([1, 2, 3, 4, 5]),
3460
+ :name => DaruLite::Vector.new(%w(Alex Claude Peter Franz George)),
3461
+ :age => DaruLite::Vector.new([20, 23, 25, 27, 5]),
3462
+ :city => DaruLite::Vector.new(['New York', 'London', 'London', 'Paris', 'Tome']),
3463
+ :a1 => DaruLite::Vector.new(['a,b', 'b,c', 'a', nil, 'a,b,c'])
3464
+ }, order: [:id, :name, :age, :city, :a1])
3465
+ end
3466
+
3467
+ it "" do
3468
+ @ds.add_vectors_by_split(:a1, '_')
3469
+ expect(@ds.vectors.to_a).to eq([:id, :name, :age, :city, :a1, :a1_a, :a1_b, :a1_c])
3470
+ expect(@ds[:a1_a].to_a).to eq([1, 0, 1, nil, 1])
3471
+ expect(@ds[:a1_b].to_a).to eq([1, 1, 0, nil, 1])
3472
+ expect(@ds[:a1_c].to_a).to eq([0, 1, 0, nil, 1])
3473
+ end
3474
+ end
3475
+
3476
+ context "#verify" do
3477
+ def create_test(*args, &proc)
3478
+ description = args.shift
3479
+ fields = args
3480
+ [description, fields, proc]
3481
+ end
3482
+
3483
+ before do
3484
+ name = DaruLite::Vector.new %w(r1 r2 r3 r4)
3485
+ v1 = DaruLite::Vector.new [1, 2, 3, 4]
3486
+ v2 = DaruLite::Vector.new [4, 3, 2, 1]
3487
+ v3 = DaruLite::Vector.new [10, 20, 30, 40]
3488
+ v4 = DaruLite::Vector.new %w(a b a b)
3489
+ @df = DaruLite::DataFrame.new({
3490
+ :v1 => v1, :v2 => v2, :v3 => v3, :v4 => v4, :id => name
3491
+ }, order: [:v1, :v2, :v3, :v4, :id])
3492
+ end
3493
+
3494
+ it "correctly verifies data as per the block" do
3495
+ # Correct
3496
+ t1 = create_test('If v4=a, v1 odd') do |r|
3497
+ r[:v4] == 'b' or (r[:v4] == 'a' and r[:v1].odd?)
3498
+ end
3499
+ t2 = create_test('v3=v1*10') { |r| r[:v3] == r[:v1] * 10 }
3500
+ # Fail!
3501
+ t3 = create_test("v4='b'") { |r| r[:v4] == 'b' }
3502
+ exp1 = ["1 [1]: v4='b'", "3 [3]: v4='b'"]
3503
+ exp2 = ["1 [r1]: v4='b'", "3 [r3]: v4='b'"]
3504
+
3505
+ dataf = @df.verify(t3, t1, t2)
3506
+ expect(dataf).to eq(exp1)
3507
+ end
3508
+
3509
+ it "uses additional fields to extend error messages" do
3510
+ t = create_test("v4='b'", :v2, :v3) { |r| r[:v4] == 'b' }
3511
+
3512
+ dataf = @df.verify(:id, t)
3513
+ expect(dataf).to eq(["1 [r1]: v4='b' (v2=4, v3=10)", "3 [r3]: v4='b' (v2=2, v3=30)"])
3514
+ end
3515
+ end
3516
+
3517
+ context "#compute" do
3518
+ it "performs a computation when supplied in a string" do
3519
+ v1 = DaruLite::Vector.new [1, 2, 3, 4]
3520
+ v2 = DaruLite::Vector.new [4, 3, 2, 1]
3521
+ v3 = DaruLite::Vector.new [10, 20, 30, 40]
3522
+ vnumeric = DaruLite::Vector.new [0, 0, 1, 4]
3523
+ vsum = DaruLite::Vector.new [1 + 4 + 10.0, 2 + 3 + 20.0, 3 + 2 + 30.0, 4 + 1 + 40.0]
3524
+ vmult = DaruLite::Vector.new [1 * 4, 2 * 3, 3 * 2, 4 * 1]
3525
+
3526
+ df = DaruLite::DataFrame.new({:v1 => v1, :v2 => v2, :v3 => v3})
3527
+
3528
+ expect(df.compute("v1/v2")).to eq(vnumeric)
3529
+ expect(df.compute("v1+v2+v3")).to eq(vsum)
3530
+ expect(df.compute("v1*v2")).to eq(vmult)
3531
+ end
3532
+ end
3533
+
3534
+ context ".crosstab_by_assignation" do
3535
+ it "" do
3536
+ v1 = DaruLite::Vector.new %w(a a a b b b c c c)
3537
+ v2 = DaruLite::Vector.new %w(a b c a b c a b c)
3538
+ v3 = DaruLite::Vector.new [0, 1, 0, 0, 1, 1, 0, 0, 1]
3539
+ df = DaruLite::DataFrame.crosstab_by_assignation(v1, v2, v3)
3540
+
3541
+ expect(df[:_id].type).to eq(:object)
3542
+ expect(df['a'].type).to eq(:numeric)
3543
+ expect(df['b'].type).to eq(:numeric)
3544
+
3545
+ ev_id = DaruLite::Vector.new %w(a b c)
3546
+ ev_a = DaruLite::Vector.new [0, 0, 0]
3547
+ ev_b = DaruLite::Vector.new [1, 1, 0]
3548
+ ev_c = DaruLite::Vector.new [0, 1, 1]
3549
+ df2 = DaruLite::DataFrame.new({
3550
+ :_id => ev_id, 'a' => ev_a, 'b' => ev_b, 'c' => ev_c },
3551
+ order: ['a', 'b', 'c', :_id])
3552
+
3553
+ expect(df2).to eq(df)
3554
+ end
3555
+ end
3556
+
3557
+ context "#one_to_many" do
3558
+ it "" do
3559
+ rows = [
3560
+ ['1', 'george', 'red', 10, 'blue', 20, nil, nil],
3561
+ ['2', 'fred', 'green', 15, 'orange', 30, 'white', 20],
3562
+ ['3', 'alfred', nil, nil, nil, nil, nil, nil]
3563
+ ]
3564
+
3565
+ df = DaruLite::DataFrame.rows(rows,
3566
+ order: ['id', 'name', 'car_color1', 'car_value1', 'car_color2',
3567
+ 'car_value2', 'car_color3', 'car_value3'])
3568
+
3569
+ ids = DaruLite::Vector.new %w(1 1 2 2 2)
3570
+ colors = DaruLite::Vector.new %w(red blue green orange white)
3571
+ values = DaruLite::Vector.new [10, 20, 15, 30, 20]
3572
+ col_ids = DaruLite::Vector.new [1, 2, 1, 2, 3]
3573
+
3574
+ df_expected = DaruLite::DataFrame.new({
3575
+ 'id' => ids, '_col_id' => col_ids, 'color' => colors, 'value' => values
3576
+ }, order: ['id', '_col_id', 'color', 'value'])
3577
+
3578
+ expect(df.one_to_many(['id'], 'car_%v%n')).to eq(df_expected)
3579
+ end
3580
+ end
3581
+
3582
+ context "#any?" do
3583
+ before do
3584
+ @df = DaruLite::DataFrame.new({
3585
+ a: [1,2,3,4,5],
3586
+ b: [10,20,30,40,50],
3587
+ c: [11,22,33,44,55]})
3588
+ end
3589
+
3590
+ it "returns true if any one of the vectors satisfy condition" do
3591
+ expect(@df.any? { |v| v[0] == 1 }).to eq(true)
3592
+ end
3593
+
3594
+ it "returns false if none of the vectors satisfy the condition" do
3595
+ expect(@df.any? { |v| v.mean > 100 }).to eq(false)
3596
+ end
3597
+
3598
+ it "returns true if any one of the rows satisfy condition" do
3599
+ expect(@df.any?(:row) { |r| r[:a] == 1 and r[:c] == 11 }).to eq(true)
3600
+ end
3601
+
3602
+ it "returns false if none of the rows satisfy the condition" do
3603
+ expect(@df.any?(:row) { |r| r.mean > 100 }).to eq(false)
3604
+ end
3605
+
3606
+ it 'fails on unknown axis' do
3607
+ expect { @df.any?(:kitten) { |r| r.mean > 100 } }.to raise_error ArgumentError, /axis/
3608
+ end
3609
+ end
3610
+
3611
+ context "#all?" do
3612
+ before do
3613
+ @df = DaruLite::DataFrame.new({
3614
+ a: [1,2,3,4,5],
3615
+ b: [10,20,30,40,50],
3616
+ c: [11,22,33,44,55]})
3617
+ end
3618
+
3619
+ it "returns true if all of the vectors satisfy condition" do
3620
+ expect(@df.all? { |v| v.mean < 40 }).to eq(true)
3621
+ end
3622
+
3623
+ it "returns false if any one of the vectors does not satisfy condition" do
3624
+ expect(@df.all? { |v| v.mean == 30 }).to eq(false)
3625
+ end
3626
+
3627
+ it "returns true if all of the rows satisfy condition" do
3628
+ expect(@df.all?(:row) { |r| r.mean < 70 }).to eq(true)
3629
+ end
3630
+
3631
+ it "returns false if any one of the rows does not satisfy condition" do
3632
+ expect(@df.all?(:row) { |r| r.mean == 30 }).to eq(false)
3633
+ end
3634
+
3635
+ it 'fails on unknown axis' do
3636
+ expect { @df.all?(:kitten) { |r| r.mean > 100 } }.to raise_error ArgumentError, /axis/
3637
+ end
3638
+ end
3639
+
3640
+ context "#only_numerics" do
3641
+ before do
3642
+ @v1 = DaruLite::Vector.new([1,2,3,4,5])
3643
+ @v2 = DaruLite::Vector.new(%w(one two three four five))
3644
+ @v3 = DaruLite::Vector.new([11,22,33,44,55])
3645
+ @df = DaruLite::DataFrame.new({
3646
+ a: @v1, b: @v2, c: @v3 }, clone: false)
3647
+ end
3648
+
3649
+ it "returns a view of only the numeric vectors" do
3650
+ dfon = @df.only_numerics(clone: false)
3651
+
3652
+ expect(dfon).to eq(
3653
+ DaruLite::DataFrame.new({ a: @v1, c: @v3 }, clone: false))
3654
+ expect(dfon[:a].object_id).to eq(@v1.object_id)
3655
+ end
3656
+
3657
+ it "returns a clone of numeric vectors" do
3658
+ dfon = @df.only_numerics
3659
+
3660
+ expect(dfon).to eq(
3661
+ DaruLite::DataFrame.new({ a: @v1, c: @v3}, clone: false)
3662
+ )
3663
+ expect(dfon[:a].object_id).to_not eq(@v1.object_id)
3664
+ end
3665
+
3666
+ context DaruLite::MultiIndex do
3667
+ before do
3668
+ agg_vectors = DaruLite::MultiIndex.from_tuples(
3669
+ [
3670
+ [:d, :one, :large],
3671
+ [:d, :one, :small],
3672
+ [:d, :two, :large],
3673
+ [:d, :two, :small],
3674
+ [:e, :one, :large],
3675
+ [:e, :one, :small],
3676
+ [:e, :two, :large],
3677
+ [:e, :two, :small]
3678
+ ]
3679
+ )
3680
+
3681
+ agg_index = DaruLite::MultiIndex.from_tuples(
3682
+ [
3683
+ [:bar],
3684
+ [:foo]
3685
+ ]
3686
+ )
3687
+ @df = DaruLite::DataFrame.new(
3688
+ [
3689
+ [4.112,2.234],
3690
+ %w(a b),
3691
+ [6.342,nil],
3692
+ [7.2344,3.23214],
3693
+ [8.234,4.533],
3694
+ [10.342,2.3432],
3695
+ [12.0,nil],
3696
+ %w(a b)
3697
+ ], order: agg_vectors, index: agg_index
3698
+ )
3699
+ end
3700
+
3701
+ it "returns numeric vectors" do
3702
+ vectors = DaruLite::MultiIndex.from_tuples(
3703
+ [
3704
+ [:d, :one, :large],
3705
+ [:d, :two, :large],
3706
+ [:d, :two, :small],
3707
+ [:e, :one, :large],
3708
+ [:e, :one, :small],
3709
+ [:e, :two, :large]
3710
+ ]
3711
+ )
3712
+
3713
+ index = DaruLite::MultiIndex.from_tuples(
3714
+ [
3715
+ [:bar],
3716
+ [:foo]
3717
+ ]
3718
+ )
3719
+ answer = DaruLite::DataFrame.new(
3720
+ [
3721
+ [4.112,2.234],
3722
+ [6.342,nil],
3723
+ [7.2344,3.23214],
3724
+ [8.234,4.533],
3725
+ [10.342,2.3432],
3726
+ [12.0,nil],
3727
+ ], order: vectors, index: index
3728
+ )
3729
+
3730
+ expect(@df.only_numerics).to eq(answer)
3731
+ end
3732
+ end
3733
+ end
3734
+
3735
+ context '#reset_index' do
3736
+ context 'when Index' do
3737
+ subject do
3738
+ DaruLite::DataFrame.new(
3739
+ {'vals' => [1,2,3,4,5]},
3740
+ index: DaruLite::Index.new(%w[a b c d e], name: 'indices')
3741
+ ).reset_index
3742
+ end
3743
+
3744
+ it { is_expected.to eq DaruLite::DataFrame.new(
3745
+ 'indices' => %w[a b c d e],
3746
+ 'vals' => [1,2,3,4,5]
3747
+ )}
3748
+ end
3749
+
3750
+ context 'when MultiIndex' do
3751
+ subject do
3752
+ mi = DaruLite::MultiIndex.from_tuples([
3753
+ [0, 'a'], [0, 'b'], [1, 'a'], [1, 'b']
3754
+ ])
3755
+ mi.name = %w[nums alphas]
3756
+ DaruLite::DataFrame.new(
3757
+ {'vals' => [1,2,3,4]},
3758
+ index: mi
3759
+ ).reset_index
3760
+ end
3761
+
3762
+ it { is_expected.to eq DaruLite::DataFrame.new(
3763
+ 'nums' => [0,0,1,1],
3764
+ 'alphas' => %w[a b a b],
3765
+ 'vals' => [1,2,3,4]
3766
+ )}
3767
+ end
3768
+ end
3769
+
3770
+ context "#set_index" do
3771
+ before(:each) do
3772
+ @df = DaruLite::DataFrame.new({
3773
+ a: [1,2,3,4,5],
3774
+ b: ['a','b','c','d','e'],
3775
+ c: [11,22,33,44,55]
3776
+ })
3777
+ end
3778
+
3779
+ it "sets a particular column as the index and deletes that column" do
3780
+ @df.set_index(:b)
3781
+ expect(@df).to eq(
3782
+ DaruLite::DataFrame.new({
3783
+ a: [1,2,3,4,5],
3784
+ c: [11,22,33,44,55]
3785
+ }, index: ['a','b','c','d','e'])
3786
+ )
3787
+ end
3788
+
3789
+ it "sets a particular column as index but keeps that column" do
3790
+ expect(@df.set_index(:c, keep: true)).to eq(
3791
+ DaruLite::DataFrame.new({
3792
+ a: [1,2,3,4,5],
3793
+ b: ['a','b','c','d','e'],
3794
+ c: [11,22,33,44,55]
3795
+ }, index: [11,22,33,44,55]))
3796
+ expect(@df[:c]).to eq(@df[:c])
3797
+ end
3798
+
3799
+ it "sets categorical index if categorical is true" do
3800
+ data = {
3801
+ a: [1, 2, 3, 4, 5],
3802
+ b: [:a, 1, :a, 1, 'c'],
3803
+ c: %w[a b c d e]
3804
+ }
3805
+ df = DaruLite::DataFrame.new(data)
3806
+ df.set_index(:b, categorical: true)
3807
+ expected = DaruLite::DataFrame.new(
3808
+ data.slice(:a, :c),
3809
+ index: DaruLite::CategoricalIndex.new(data[:b])
3810
+ )
3811
+ expect(df).to eq(expected)
3812
+ end
3813
+
3814
+ it "raises error if all elements in the column aren't unique" do
3815
+ jholu = DaruLite::DataFrame.new({
3816
+ a: ['a','b','a'],
3817
+ b: [1,2,4]
3818
+ })
3819
+
3820
+ expect {
3821
+ jholu.set_index(:a)
3822
+ }.to raise_error(ArgumentError)
3823
+ end
3824
+
3825
+ it "sets multiindex if array is given" do
3826
+ df = DaruLite::DataFrame.new({
3827
+ a: %w[a a b b],
3828
+ b: [1, 2, 1, 2],
3829
+ c: %w[a b c d]
3830
+ })
3831
+ df.set_index(%i[a b])
3832
+ expected =
3833
+ DaruLite::DataFrame.new(
3834
+ { c: %w[a b c d] },
3835
+ index: DaruLite::MultiIndex.from_tuples(
3836
+ [['a', 1], ['a', 2], ['b', 1], ['b', 2]]
3837
+ )
3838
+ ).tap do |df|
3839
+ df.index.name = %i[a b]
3840
+ df
3841
+ end
3842
+ expect(df).to eq(expected)
3843
+ end
3844
+ end
3845
+
3846
+ context "#concat" do
3847
+ before do
3848
+ @df1 = DaruLite::DataFrame.new({
3849
+ a: [1, 2, 3],
3850
+ b: [1, 2, 3]
3851
+ })
3852
+
3853
+ @df2 = DaruLite::DataFrame.new({
3854
+ a: [4, 5, 6],
3855
+ c: [4, 5, 6]
3856
+ })
3857
+ end
3858
+
3859
+ it 'does not modify the original dataframes' do
3860
+ df1_a = @df1[:a].to_a.dup
3861
+ df2_a = @df2[:a].to_a.dup
3862
+
3863
+ df_concat = @df1.concat @df2
3864
+ expect(@df1[:a].to_a).to eq df1_a
3865
+ expect(@df2[:a].to_a).to eq df2_a
3866
+ end
3867
+
3868
+ it 'creates a new dataframe that is a concatenation of the two dataframe arguments' do
3869
+ df1_a = @df1[:a].to_a.dup
3870
+ df2_a = @df2[:a].to_a.dup
3871
+
3872
+ df_concat = @df1.concat @df2
3873
+ expect(df_concat[:a].to_a).to eq df1_a + df2_a
3874
+ end
3875
+
3876
+ it 'fills in missing vectors with nils' do
3877
+ df1_b = @df1[:b].to_a.dup
3878
+ df2_c = @df2[:c].to_a.dup
3879
+
3880
+ df_concat = @df1.concat @df2
3881
+ expect(df_concat[:b].to_a).to eq df1_b + [nil] * @df2.size
3882
+ expect(df_concat[:c].to_a).to eq [nil] * @df1.size + df2_c
3883
+ end
3884
+
3885
+ end
3886
+
3887
+ context "#union" do
3888
+ before do
3889
+ @df1 = DaruLite::DataFrame.new({
3890
+ a: [1, 2, 3],
3891
+ b: [1, 2, 3]},
3892
+ index: [1,3,5] )
3893
+
3894
+ @df2 = DaruLite::DataFrame.new({
3895
+ a: [4, 5, 6],
3896
+ c: [4, 5, 6]},
3897
+ index: [7,9,11])
3898
+
3899
+ @df3 = DaruLite::DataFrame.new({
3900
+ a: [4, 5, 6],
3901
+ c: [4, 5, 6]},
3902
+ index: [5,7,9])
3903
+ end
3904
+
3905
+ it 'does not modify the original dataframes' do
3906
+ df1_a = @df1[:a].to_a.dup
3907
+ df2_a = @df2[:a].to_a.dup
3908
+
3909
+ _ = @df1.union @df2
3910
+ expect(@df1[:a].to_a).to eq df1_a
3911
+ expect(@df2[:a].to_a).to eq df2_a
3912
+ end
3913
+
3914
+ it 'creates a new dataframe that is a concatenation of the two dataframe arguments' do
3915
+ df1_a = @df1[:a].to_a.dup
3916
+ df2_a = @df2[:a].to_a.dup
3917
+
3918
+ df_union = @df1.union @df2
3919
+ expect(df_union[:a].to_a).to eq df1_a + df2_a
3920
+ end
3921
+
3922
+ it 'fills in missing vectors with nils' do
3923
+ df1_b = @df1[:b].to_a.dup
3924
+ df2_c = @df2[:c].to_a.dup
3925
+
3926
+ df_union = @df1.union @df2
3927
+ expect(df_union[:b].to_a).to eq df1_b + [nil] * @df2.size
3928
+ expect(df_union[:c].to_a).to eq [nil] * @df1.size + df2_c
3929
+ end
3930
+
3931
+ it 'overwrites part of the first dataframe if there are double indices' do
3932
+ vec = DaruLite::Vector.new({a: 4, b: nil, c: 4})
3933
+ expect(@df1.union(@df3).row[5]).to eq vec
3934
+ end
3935
+
3936
+ it 'concats the indices' do
3937
+ v1 = @df1.index.to_a
3938
+ v2 = @df2.index.to_a
3939
+
3940
+ df_union = @df1.union @df2
3941
+ expect(df_union.index.to_a).to eq v1 + v2
3942
+ end
3943
+ end
3944
+
3945
+ context '#inspect' do
3946
+ subject { df.inspect }
3947
+
3948
+ context 'empty' do
3949
+ let(:df) { DaruLite::DataFrame.new({}, order: %w[a b c])}
3950
+ it { is_expected.to eq %Q{
3951
+ |#<DaruLite::DataFrame(0x3)>
3952
+ | a b c
3953
+ }.unindent}
3954
+ end
3955
+
3956
+ context 'simple' do
3957
+ let(:df) { DaruLite::DataFrame.new({a: [1,2,3], b: [3,4,5], c: [6,7,8]}, name: 'test')}
3958
+ it { should == %Q{
3959
+ |#<DaruLite::DataFrame: test (3x3)>
3960
+ | a b c
3961
+ | 0 1 3 6
3962
+ | 1 2 4 7
3963
+ | 2 3 5 8
3964
+ }.unindent}
3965
+ end
3966
+
3967
+ context 'if index name is set' do
3968
+ context 'single index with name' do
3969
+ let(:df) { DaruLite::DataFrame.new({a: [1,2,3], b: [3,4,5], c: [6,7,8]},
3970
+ name: 'test')}
3971
+ before { df.index.name = 'index_name' }
3972
+ it { should == %Q{
3973
+ |#<DaruLite::DataFrame: test (3x3)>
3974
+ | index_name a b c
3975
+ | 0 1 3 6
3976
+ | 1 2 4 7
3977
+ | 2 3 5 8
3978
+ }.unindent}
3979
+ end
3980
+
3981
+ context 'MultiIndex with name' do
3982
+ let(:mi) { DaruLite::MultiIndex.new(
3983
+ levels: [[:a,:b,:c], [:one, :two]],
3984
+ labels: [[0,0,1,1,2,2], [0,1,0,1,0,1]], name: ['s1', 's2']) }
3985
+ let(:df) { DaruLite::DataFrame.new({
3986
+ a: [11, 12, 13, 14, 15, 16], b: [21, 22, 23, 24, 25, 26]},
3987
+ name: 'test', index: mi)}
3988
+ it { should == %Q{
3989
+ |#<DaruLite::DataFrame: test (6x2)>
3990
+ | s1 s2 a b
3991
+ | a one 11 21
3992
+ | two 12 22
3993
+ | b one 13 23
3994
+ | two 14 24
3995
+ | c one 15 25
3996
+ | two 16 26
3997
+ }.unindent}
3998
+ end
3999
+
4000
+ end
4001
+
4002
+ context 'no name' do
4003
+ let(:df) { DaruLite::DataFrame.new({a: [1,2,3], b: [3,4,5], c: [6,7,8]})}
4004
+ it { should == %Q{
4005
+ |#<DaruLite::DataFrame(3x3)>
4006
+ | a b c
4007
+ | 0 1 3 6
4008
+ | 1 2 4 7
4009
+ | 2 3 5 8
4010
+ }.unindent}
4011
+ end
4012
+
4013
+ context 'with nils' do
4014
+ let(:df) { DaruLite::DataFrame.new({a: [1,nil,3], b: [3,4,5], c: [6,7,nil]}, name: 'test')}
4015
+ it { is_expected.to eq %Q{
4016
+ |#<DaruLite::DataFrame: test (3x3)>
4017
+ | a b c
4018
+ | 0 1 3 6
4019
+ | 1 nil 4 7
4020
+ | 2 3 5 nil
4021
+ }.unindent}
4022
+ end
4023
+
4024
+ context 'very long' do
4025
+ let(:df) { DaruLite::DataFrame.new({a: [1,1,1]*20, b: [1,1,1]*20, c: [1,1,1]*20}, name: 'test')}
4026
+ it { is_expected.to eq %Q{
4027
+ |#<DaruLite::DataFrame: test (60x3)>
4028
+ | a b c
4029
+ | 0 1 1 1
4030
+ | 1 1 1 1
4031
+ | 2 1 1 1
4032
+ | 3 1 1 1
4033
+ | 4 1 1 1
4034
+ | 5 1 1 1
4035
+ | 6 1 1 1
4036
+ | 7 1 1 1
4037
+ | 8 1 1 1
4038
+ | 9 1 1 1
4039
+ | 10 1 1 1
4040
+ | 11 1 1 1
4041
+ | 12 1 1 1
4042
+ | 13 1 1 1
4043
+ | 14 1 1 1
4044
+ | 15 1 1 1
4045
+ | 16 1 1 1
4046
+ | 17 1 1 1
4047
+ | 18 1 1 1
4048
+ | 19 1 1 1
4049
+ | 20 1 1 1
4050
+ | 21 1 1 1
4051
+ | 22 1 1 1
4052
+ | 23 1 1 1
4053
+ | 24 1 1 1
4054
+ | 25 1 1 1
4055
+ | 26 1 1 1
4056
+ | 27 1 1 1
4057
+ | 28 1 1 1
4058
+ | 29 1 1 1
4059
+ | ... ... ... ...
4060
+ }.unindent}
4061
+ end
4062
+
4063
+ context 'long data lines' do
4064
+ let(:df) { DaruLite::DataFrame.new({a: [1,2,3], b: [4,5,6], c: ['this is ridiculously long',nil,nil]}, name: 'test')}
4065
+ it { is_expected.to eq %Q{
4066
+ |#<DaruLite::DataFrame: test (3x3)>
4067
+ | a b c
4068
+ | 0 1 4 this is ri
4069
+ | 1 2 5 nil
4070
+ | 2 3 6 nil
4071
+ }.unindent}
4072
+ end
4073
+
4074
+ context 'index is a MultiIndex' do
4075
+ let(:df) {
4076
+ DaruLite::DataFrame.new(
4077
+ {
4078
+ a: [1,2,3,4,5,6,7],
4079
+ b: %w[a b c d e f g]
4080
+ }, index: DaruLite::MultiIndex.from_tuples([
4081
+ %w[foo one],
4082
+ %w[foo two],
4083
+ %w[foo three],
4084
+ %w[bar one],
4085
+ %w[bar two],
4086
+ %w[bar three],
4087
+ %w[baz one],
4088
+ ]),
4089
+ name: 'test'
4090
+ )
4091
+ }
4092
+
4093
+ it { is_expected.to eq %Q{
4094
+ |#<DaruLite::DataFrame: test (7x2)>
4095
+ | a b
4096
+ | foo one 1 a
4097
+ | two 2 b
4098
+ | three 3 c
4099
+ | bar one 4 d
4100
+ | two 5 e
4101
+ | three 6 f
4102
+ | baz one 7 g
4103
+ }.unindent}
4104
+ end
4105
+
4106
+ context 'vectors is a MultiIndex' do
4107
+ end
4108
+
4109
+ context 'spacing and threshold settings' do
4110
+ end
4111
+ end
4112
+
4113
+ context '#to_s' do
4114
+ it 'produces a class, size description' do
4115
+ expect(@data_frame.to_s).to eq "#<DaruLite::DataFrame(5x3)>"
4116
+ end
4117
+
4118
+ it 'produces a class, name, size description' do
4119
+ @data_frame.name = "Test"
4120
+ expect(@data_frame.to_s).to eq "#<DaruLite::DataFrame: Test(5x3)>"
4121
+ end
4122
+
4123
+ it 'produces a class, name, size description when the name is a symbol' do
4124
+ @data_frame.name = :Test
4125
+ expect(@data_frame.to_s).to eq "#<DaruLite::DataFrame: Test(5x3)>"
4126
+ end
4127
+ end
4128
+
4129
+ context '#to_json' do
4130
+ let(:df) { DaruLite::DataFrame.new({a: [1,2,3], b: [3,4,5], c: [6,7,8]}, index: [:one, :two, :three], name: 'test')}
4131
+ subject { JSON.parse(json) }
4132
+
4133
+ context 'with index' do
4134
+ let(:json) { df.to_json(false) }
4135
+ # FIXME: is it most reasonable we can do?.. -- zverok
4136
+ # For me, more resonable thing would be something like
4137
+ #
4138
+ # [
4139
+ # {"index" => "one" , "a"=>1, "b"=>3, "c"=>6},
4140
+ # {"index" => "two" , "a"=>2, "b"=>4, "c"=>7},
4141
+ # {"index" => "three", "a"=>3, "b"=>5, "c"=>8}
4142
+ # ]
4143
+ #
4144
+ # Or maybe
4145
+ #
4146
+ # [
4147
+ # ["one" , {"a"=>1, "b"=>3, "c"=>6}],
4148
+ # ["two" , {"a"=>2, "b"=>4, "c"=>7}],
4149
+ # ["three", {"a"=>3, "b"=>5, "c"=>8}]
4150
+ # ]
4151
+ #
4152
+ # Or even
4153
+ #
4154
+ # {
4155
+ # "one" => {"a"=>1, "b"=>3, "c"=>6},
4156
+ # "two" => {"a"=>2, "b"=>4, "c"=>7},
4157
+ # "three" => {"a"=>3, "b"=>5, "c"=>8}
4158
+ # }
4159
+ #
4160
+ it { is_expected.to eq(
4161
+ [
4162
+ [
4163
+ {"a"=>1, "b"=>3, "c"=>6},
4164
+ {"a"=>2, "b"=>4, "c"=>7},
4165
+ {"a"=>3, "b"=>5, "c"=>8}
4166
+ ],
4167
+ ["one", "two", "three"]
4168
+ ]
4169
+ )}
4170
+ end
4171
+
4172
+ context 'without index' do
4173
+ let(:json) { df.to_json(true) }
4174
+ it { is_expected.to eq(
4175
+ [
4176
+ {"a"=>1, "b"=>3, "c"=>6},
4177
+ {"a"=>2, "b"=>4, "c"=>7},
4178
+ {"a"=>3, "b"=>5, "c"=>8}
4179
+ ]
4180
+ )}
4181
+ end
4182
+ end
4183
+
4184
+ context '#access_row_tuples_by_indexs' do
4185
+ let(:df) {
4186
+ DaruLite::DataFrame.new({col: [:a, :b, :c, :d, :e], num: [52,12,07,17,01]}) }
4187
+ let(:df_idx) {
4188
+ DaruLite::DataFrame.new({a: [52, 12, 07], b: [1, 2, 3]}, index: [:one, :two, :three])
4189
+ }
4190
+ let (:mi_idx) do
4191
+ DaruLite::MultiIndex.from_tuples [
4192
+ [:a,:one,:bar],
4193
+ [:a,:one,:baz],
4194
+ [:b,:two,:bar],
4195
+ [:a,:two,:baz],
4196
+ ]
4197
+ end
4198
+ let (:df_mi) do
4199
+ DaruLite::DataFrame.new({
4200
+ a: 1..4,
4201
+ b: 'a'..'d'
4202
+ }, index: mi_idx )
4203
+ end
4204
+ context 'when no index is given' do
4205
+ it 'returns empty Array' do
4206
+ expect(df.access_row_tuples_by_indexs()).to eq([])
4207
+ end
4208
+ end
4209
+ context 'when index(s) are given' do
4210
+ it 'returns Array of row tuples' do
4211
+ expect(df.access_row_tuples_by_indexs(1)).to eq([[:b, 12]])
4212
+ expect(df.access_row_tuples_by_indexs(0,3)).to eq([[:a, 52], [:d, 17]])
4213
+ end
4214
+ end
4215
+ context 'when custom index(s) are given' do
4216
+ it 'returns Array of row tuples' do
4217
+ expect(df_idx.access_row_tuples_by_indexs(:one,:three)).to eq(
4218
+ [[52, 1], [7, 3]]
4219
+ )
4220
+ end
4221
+ end
4222
+ context 'when multi index is given' do
4223
+ it 'returns Array of row tuples' do
4224
+ expect(df_mi.access_row_tuples_by_indexs(:a)).to eq(
4225
+ [[1, "a"], [2, "b"], [4, "d"]]
4226
+ )
4227
+ expect(df_mi.access_row_tuples_by_indexs(:a, :one, :baz)).to eq(
4228
+ [[2, "b"]]
4229
+ )
4230
+ end
4231
+ end
4232
+ end
4233
+
4234
+ context '#aggregate' do
4235
+ let(:cat_idx) { DaruLite::CategoricalIndex.new [:a, :b, :a, :a, :c] }
4236
+ let(:df) { DaruLite::DataFrame.new(num: [52,12,07,17,01], cat_index: cat_idx) }
4237
+ let(:df_cat_idx) {
4238
+ DaruLite::DataFrame.new({num: [52,12,07,17,01]}, index: cat_idx) }
4239
+
4240
+ it 'lambda function on particular column' do
4241
+ expect(df.aggregate(num_100_times: ->(df) { (df.num*100).first })).to eq(
4242
+ DaruLite::DataFrame.new(num_100_times: [5200, 1200, 700, 1700, 100])
4243
+ )
4244
+ end
4245
+ it 'aggregate sum on particular column' do
4246
+ expect(df_cat_idx.aggregate(num: :sum)).to eq(
4247
+ DaruLite::DataFrame.new({num: [76, 12, 1]}, index: [:a, :b, :c])
4248
+ )
4249
+ end
4250
+ end
4251
+
4252
+ context '#group_by_and_aggregate' do
4253
+ let(:spending_df) {
4254
+ DaruLite::DataFrame.rows([
4255
+ [2010, 'dev', 50, 1],
4256
+ [2010, 'dev', 150, 1],
4257
+ [2010, 'dev', 200, 1],
4258
+ [2011, 'dev', 50, 1],
4259
+ [2012, 'dev', 150, 1],
4260
+
4261
+ [2011, 'office', 300, 1],
4262
+
4263
+ [2010, 'market', 50, 1],
4264
+ [2011, 'market', 500, 1],
4265
+ [2012, 'market', 500, 1],
4266
+ [2012, 'market', 300, 1],
4267
+
4268
+ [2012, 'R&D', 10, 1],],
4269
+ order: [:year, :category, :spending, :nb_spending])
4270
+ }
4271
+
4272
+ it 'works as group_by + aggregate' do
4273
+ expect(spending_df.group_by_and_aggregate(:year, spending: :sum)).to eq(
4274
+ spending_df.group_by(:year).aggregate(spending: :sum))
4275
+ expect(spending_df.group_by_and_aggregate([:year, :category], spending: :sum, nb_spending: :size)).to eq(
4276
+ spending_df.group_by([:year, :category]).aggregate(spending: :sum, nb_spending: :size))
4277
+ end
4278
+ end
4279
+
4280
+ context '#create_sql' do
4281
+ let(:df) { DaruLite::DataFrame.new({
4282
+ a: [1,2,3],
4283
+ b: ['test', 'me', 'please'],
4284
+ c: ['2015-06-01', '2015-06-02', '2015-06-03']
4285
+ },
4286
+ name: 'test'
4287
+ )}
4288
+ subject { df.create_sql('foo') }
4289
+ it { is_expected.to eq %Q{
4290
+ |CREATE TABLE foo (a INTEGER,
4291
+ | b VARCHAR (255),
4292
+ | c DATE) CHARACTER SET=UTF8;
4293
+ }.unindent}
4294
+ end
4295
+
4296
+ context "#by_single_key" do
4297
+ let(:df) { DaruLite::DataFrame.new(a: [1, 2, 3], b: [4, 5, 6] ) }
4298
+
4299
+ it 'raise error when vector is missing from dataframe' do
4300
+ expect { df[:c] }.to raise_error(IndexError, /Specified vector c does not exist/)
4301
+ end
4302
+ end
4303
+
4304
+ context "#rotate_vectors" do
4305
+ subject { df.rotate_vectors(-1) }
4306
+
4307
+ context "several vectors in the dataframe" do
4308
+ let(:df) do
4309
+ DaruLite::DataFrame.new({
4310
+ a: [1,2,3],
4311
+ b: [4,5,6],
4312
+ total: [5,7,9]
4313
+ })
4314
+ end
4315
+ let(:new_order) { [:total, :a, :b] }
4316
+
4317
+ it "return the dataframe with the position of the last vector change to first" do
4318
+ expect(subject.vectors.to_a).to eq(new_order)
4319
+ end
4320
+ end
4321
+
4322
+ context "only one vector in the dataframe" do
4323
+ let(:df) { DaruLite::DataFrame.new({ a: [1,2,3] }) }
4324
+
4325
+ it "return the dataframe without any change" do
4326
+ expect(subject).to eq(df)
4327
+ end
4328
+ end
4329
+ end
4330
+ end if mri?