daru_lite 0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (149) hide show
  1. checksums.yaml +7 -0
  2. data/.github/ISSUE_TEMPLATE.md +18 -0
  3. data/.github/workflows/ci.yml +33 -0
  4. data/.gitignore +10 -0
  5. data/.rspec +2 -0
  6. data/.rubocop.yml +27 -0
  7. data/.rubocop_todo.yml +137 -0
  8. data/CONTRIBUTING.md +47 -0
  9. data/Gemfile +2 -0
  10. data/History.md +4 -0
  11. data/LICENSE +24 -0
  12. data/README.md +218 -0
  13. data/Rakefile +69 -0
  14. data/ReleasePolicy.md +20 -0
  15. data/benchmarks/TradeoffData.csv +65 -0
  16. data/benchmarks/csv_reading.rb +22 -0
  17. data/benchmarks/dataframe_creation.rb +39 -0
  18. data/benchmarks/db_loading.rb +34 -0
  19. data/benchmarks/duplicating.rb +45 -0
  20. data/benchmarks/group_by.rb +32 -0
  21. data/benchmarks/joining.rb +52 -0
  22. data/benchmarks/row_access.rb +41 -0
  23. data/benchmarks/row_assign.rb +36 -0
  24. data/benchmarks/sorting.rb +51 -0
  25. data/benchmarks/statistics.rb +28 -0
  26. data/benchmarks/vector_access.rb +31 -0
  27. data/benchmarks/vector_assign.rb +42 -0
  28. data/benchmarks/where_clause.rb +48 -0
  29. data/benchmarks/where_vs_filter.rb +28 -0
  30. data/daru_lite.gemspec +55 -0
  31. data/images/README.md +5 -0
  32. data/images/con0.png +0 -0
  33. data/images/con1.png +0 -0
  34. data/images/init0.png +0 -0
  35. data/images/init1.png +0 -0
  36. data/images/man0.png +0 -0
  37. data/images/man1.png +0 -0
  38. data/images/man2.png +0 -0
  39. data/images/man3.png +0 -0
  40. data/images/man4.png +0 -0
  41. data/images/man5.png +0 -0
  42. data/images/man6.png +0 -0
  43. data/lib/daru_lite/accessors/array_wrapper.rb +109 -0
  44. data/lib/daru_lite/accessors/dataframe_by_row.rb +25 -0
  45. data/lib/daru_lite/accessors/mdarray_wrapper.rb +7 -0
  46. data/lib/daru_lite/category.rb +929 -0
  47. data/lib/daru_lite/configuration.rb +34 -0
  48. data/lib/daru_lite/core/group_by.rb +403 -0
  49. data/lib/daru_lite/core/merge.rb +270 -0
  50. data/lib/daru_lite/core/query.rb +109 -0
  51. data/lib/daru_lite/dataframe.rb +3080 -0
  52. data/lib/daru_lite/date_time/index.rb +569 -0
  53. data/lib/daru_lite/date_time/offsets.rb +397 -0
  54. data/lib/daru_lite/exceptions.rb +2 -0
  55. data/lib/daru_lite/extensions/which_dsl.rb +53 -0
  56. data/lib/daru_lite/formatters/table.rb +52 -0
  57. data/lib/daru_lite/helpers/array.rb +53 -0
  58. data/lib/daru_lite/index/categorical_index.rb +201 -0
  59. data/lib/daru_lite/index/index.rb +374 -0
  60. data/lib/daru_lite/index/multi_index.rb +374 -0
  61. data/lib/daru_lite/io/csv/converters.rb +21 -0
  62. data/lib/daru_lite/io/io.rb +294 -0
  63. data/lib/daru_lite/io/sql_data_source.rb +97 -0
  64. data/lib/daru_lite/iruby/helpers.rb +38 -0
  65. data/lib/daru_lite/iruby/templates/dataframe.html.erb +5 -0
  66. data/lib/daru_lite/iruby/templates/dataframe_mi.html.erb +5 -0
  67. data/lib/daru_lite/iruby/templates/dataframe_mi_tbody.html.erb +35 -0
  68. data/lib/daru_lite/iruby/templates/dataframe_mi_thead.html.erb +21 -0
  69. data/lib/daru_lite/iruby/templates/dataframe_tbody.html.erb +28 -0
  70. data/lib/daru_lite/iruby/templates/dataframe_thead.html.erb +21 -0
  71. data/lib/daru_lite/iruby/templates/multi_index.html.erb +12 -0
  72. data/lib/daru_lite/iruby/templates/vector.html.erb +5 -0
  73. data/lib/daru_lite/iruby/templates/vector_mi.html.erb +5 -0
  74. data/lib/daru_lite/iruby/templates/vector_mi_tbody.html.erb +26 -0
  75. data/lib/daru_lite/iruby/templates/vector_mi_thead.html.erb +8 -0
  76. data/lib/daru_lite/iruby/templates/vector_tbody.html.erb +17 -0
  77. data/lib/daru_lite/iruby/templates/vector_thead.html.erb +8 -0
  78. data/lib/daru_lite/maths/arithmetic/dataframe.rb +91 -0
  79. data/lib/daru_lite/maths/arithmetic/vector.rb +117 -0
  80. data/lib/daru_lite/maths/statistics/dataframe.rb +202 -0
  81. data/lib/daru_lite/maths/statistics/vector.rb +1019 -0
  82. data/lib/daru_lite/monkeys.rb +56 -0
  83. data/lib/daru_lite/vector.rb +1678 -0
  84. data/lib/daru_lite/version.rb +3 -0
  85. data/lib/daru_lite.rb +99 -0
  86. data/profile/_base.rb +23 -0
  87. data/profile/df_to_a.rb +10 -0
  88. data/profile/filter.rb +13 -0
  89. data/profile/joining.rb +13 -0
  90. data/profile/sorting.rb +12 -0
  91. data/profile/vector_each_with_index.rb +9 -0
  92. data/profile/vector_new.rb +9 -0
  93. data/spec/accessors/array_wrapper_spec.rb +3 -0
  94. data/spec/category_spec.rb +1741 -0
  95. data/spec/core/group_by_spec.rb +655 -0
  96. data/spec/core/merge_spec.rb +179 -0
  97. data/spec/core/query_spec.rb +347 -0
  98. data/spec/daru_lite_spec.rb +22 -0
  99. data/spec/dataframe_spec.rb +4330 -0
  100. data/spec/date_time/data_spec.rb +197 -0
  101. data/spec/date_time/date_time_index_helper_spec.rb +72 -0
  102. data/spec/date_time/index_spec.rb +588 -0
  103. data/spec/date_time/offsets_spec.rb +465 -0
  104. data/spec/extensions/which_dsl_spec.rb +38 -0
  105. data/spec/fixtures/bank2.dat +200 -0
  106. data/spec/fixtures/boolean_converter_test.csv +5 -0
  107. data/spec/fixtures/countries.json +7794 -0
  108. data/spec/fixtures/duplicates.csv +32 -0
  109. data/spec/fixtures/eciresults.html +394 -0
  110. data/spec/fixtures/empties.dat +2 -0
  111. data/spec/fixtures/empty_rows_test.csv +17 -0
  112. data/spec/fixtures/macau.html +3691 -0
  113. data/spec/fixtures/macd_data.csv +150 -0
  114. data/spec/fixtures/matrix_test.csv +100 -0
  115. data/spec/fixtures/moneycontrol.html +6812 -0
  116. data/spec/fixtures/music_data.tsv +2501 -0
  117. data/spec/fixtures/repeated_fields.csv +7 -0
  118. data/spec/fixtures/sales-funnel.csv +18 -0
  119. data/spec/fixtures/scientific_notation.csv +4 -0
  120. data/spec/fixtures/string_converter_test.csv +5 -0
  121. data/spec/fixtures/strings.dat +2 -0
  122. data/spec/fixtures/test_xls.xls +0 -0
  123. data/spec/fixtures/test_xls_2.xls +0 -0
  124. data/spec/fixtures/url_test.txt~ +0 -0
  125. data/spec/fixtures/valid_markup.html +62 -0
  126. data/spec/fixtures/wiki_climate.html +1243 -0
  127. data/spec/fixtures/wiki_table_info.html +631 -0
  128. data/spec/formatters/table_formatter_spec.rb +137 -0
  129. data/spec/helpers_spec.rb +8 -0
  130. data/spec/index/categorical_index_spec.rb +170 -0
  131. data/spec/index/index_spec.rb +417 -0
  132. data/spec/index/multi_index_spec.rb +680 -0
  133. data/spec/io/io_spec.rb +373 -0
  134. data/spec/io/sql_data_source_spec.rb +56 -0
  135. data/spec/iruby/dataframe_spec.rb +170 -0
  136. data/spec/iruby/helpers_spec.rb +49 -0
  137. data/spec/iruby/multi_index_spec.rb +37 -0
  138. data/spec/iruby/vector_spec.rb +105 -0
  139. data/spec/maths/arithmetic/dataframe_spec.rb +148 -0
  140. data/spec/maths/arithmetic/vector_spec.rb +165 -0
  141. data/spec/maths/statistics/dataframe_spec.rb +178 -0
  142. data/spec/maths/statistics/vector_spec.rb +756 -0
  143. data/spec/monkeys_spec.rb +42 -0
  144. data/spec/shared/vector_display_spec.rb +213 -0
  145. data/spec/spec_helper.rb +87 -0
  146. data/spec/support/database_helper.rb +30 -0
  147. data/spec/support/matchers.rb +5 -0
  148. data/spec/vector_spec.rb +2293 -0
  149. metadata +571 -0
@@ -0,0 +1,756 @@
1
+ describe DaruLite::Vector do
2
+ describe 'array' do
3
+ let(:dtype) { :array }
4
+
5
+ before do
6
+ @dv = DaruLite::Vector.new [323, 11, 555, 666, 234, 21, 666, 343, 1, 2], dtype: dtype
7
+ @dv_with_nils = DaruLite::Vector.new [323, 11, 555, nil, 666, 234, 21, 666, 343, nil, 1, 2]
8
+ end
9
+
10
+ context "#mean" do
11
+ it "calculates mean" do
12
+ expect(@dv.mean).to eq(282.2)
13
+ end
14
+ end
15
+
16
+ let(:dv) { dv = DaruLite::Vector.new (["Tyrion", "Daenerys", nil, "Jon Starkgaryen"]), index: DaruLite::Index.new([:t, :d, :n, :j]) }
17
+
18
+ context "#max" do
19
+ it "returns max value" do
20
+ expect(dv.max).to eq("Tyrion")
21
+ end
22
+ it "returns N max values" do
23
+ expect(dv.max(2)).to eq(["Tyrion","Jon Starkgaryen"])
24
+ end
25
+ it "returns max value, sorted by comparitive block input" do
26
+ expect(dv.max { |a,b| a.size <=> b.size }).to eq("Jon Starkgaryen")
27
+ end
28
+ it "returns N max values, sorted by comparitive block input" do
29
+ expect(dv.max(2) {|a,b| a.size <=> b.size}).to eq(["Jon Starkgaryen","Daenerys"])
30
+ end
31
+ end
32
+
33
+ context "#max_by" do
34
+ it "raises error without object block" do
35
+ expect { dv.max_by }.to raise_error(ArgumentError)
36
+ end
37
+ it "raises error without object block when N is given" do
38
+ expect { dv.max_by(2) }.to raise_error(ArgumentError)
39
+ end
40
+ it "returns max value, sorted by object block input" do
41
+ expect(dv.max_by { |x| x.size }).to eq("Jon Starkgaryen")
42
+ end
43
+ it "returns N max values, sorted by object block input" do
44
+ expect(dv.max_by(2) {|x| x.size }).to eq(["Jon Starkgaryen","Daenerys"])
45
+ end
46
+ end
47
+
48
+ context "#index_of_max" do
49
+ it "returns index_of_max value" do
50
+ expect(dv.index_of_max).to eq(:t)
51
+ end
52
+ it "returns N index_of_max values" do
53
+ expect(dv.index_of_max(2)).to eq([:t, :j])
54
+ end
55
+ it "returns index_of_max value, sorted by comparitive block input" do
56
+ expect(dv.index_of_max { |a,b| a.size <=> b.size }).to eq(:j)
57
+ end
58
+ it "returns N index_of_max values, sorted by comparitive block input" do
59
+ expect(dv.index_of_max(2) {|a,b| a.size <=> b.size}).to eq([:j, :d])
60
+ end
61
+ end
62
+
63
+ context "#index_of_max_by" do
64
+ it "raises error without object block" do
65
+ expect { dv.index_of_max_by }.to raise_error(ArgumentError)
66
+ end
67
+ it "raises error without object block when N is given" do
68
+ expect { dv.index_of_max_by(2) }.to raise_error(ArgumentError)
69
+ end
70
+ it "returns index_of_max value, sorted by object block input" do
71
+ expect(dv.index_of_max_by { |x| x.size }).to eq(:j)
72
+ end
73
+ it "returns N index_of_max values, sorted by object block input" do
74
+ expect(dv.index_of_max_by(2) {|x| x.size }).to eq([:j, :d])
75
+ end
76
+ end
77
+
78
+ context "#min" do
79
+ it "returns min value" do
80
+ expect(dv.min).to eq("Daenerys")
81
+ end
82
+ it "returns N min values" do
83
+ expect(dv.min(2)).to eq(["Daenerys","Jon Starkgaryen"])
84
+ end
85
+ it "returns min value, sorted by comparitive block input" do
86
+ expect(dv.min { |a,b| a.size <=> b.size }).to eq("Tyrion")
87
+ end
88
+ it "returns N min values, sorted by comparitive block input" do
89
+ expect(dv.min(2) {|a,b| a.size <=> b.size}).to eq(["Tyrion","Daenerys"])
90
+ end
91
+ end
92
+
93
+ context "#min_by" do
94
+ it "raises error without object block" do
95
+ expect { dv.min_by }.to raise_error(ArgumentError)
96
+ end
97
+ it "raises error without object block when N is given" do
98
+ expect { dv.min_by(2) }.to raise_error(ArgumentError)
99
+ end
100
+ it "returns min value, sorted by object block input" do
101
+ expect(dv.min_by { |x| x.size }).to eq("Tyrion")
102
+ end
103
+ it "returns N min values, sorted by object block input" do
104
+ expect(dv.min_by(2) {|x| x.size }).to eq(["Tyrion","Daenerys"])
105
+ end
106
+ end
107
+
108
+ context "#index_of_min" do
109
+ it "returns index of min value" do
110
+ expect(dv.index_of_min).to eq(:d)
111
+ end
112
+ it "returns N index of min values" do
113
+ expect(dv.index_of_min(2)).to eq([:d, :j])
114
+ end
115
+ it "returns index of min value, sorted by comparitive block input" do
116
+ expect(dv.index_of_min { |a,b| a.size <=> b.size }).to eq(:t)
117
+ end
118
+ it "returns N index of min values, sorted by comparitive block input" do
119
+ expect(dv.index_of_min(2) {|a,b| a.size <=> b.size}).to eq([:t, :d])
120
+ end
121
+ end
122
+
123
+ context "#index_of_min_by" do
124
+ it "raises error without object block" do
125
+ expect { dv.index_of_min_by }.to raise_error(ArgumentError)
126
+ end
127
+ it "raises error without object block when N is given" do
128
+ expect { dv.index_of_min_by(2) }.to raise_error(ArgumentError)
129
+ end
130
+ it "returns index of min value, sorted by object block input" do
131
+ expect(dv.index_of_min_by { |x| x.size }).to eq(:t)
132
+ end
133
+ it "returns N index of min values, sorted by object block input" do
134
+ expect(dv.index_of_min_by(2) {|x| x.size }).to eq([:t, :d])
135
+ end
136
+ end
137
+
138
+ context "#sum_of_squares" do
139
+ it "calcs sum of squares, omits nil values" do
140
+ v = DaruLite::Vector.new [1,2,3,4,5,6], dtype: dtype
141
+ expect(v.sum_of_squares).to eq(17.5)
142
+ end
143
+ end
144
+
145
+ context "#standard_deviation_sample" do
146
+ it "calcs standard deviation sample" do
147
+ @dv_with_nils.standard_deviation_sample
148
+ end
149
+ end
150
+
151
+ context "#variance_sample" do
152
+ it "calculates sample variance" do
153
+ expect(@dv.variance).to be_within(0.01).of(75118.84)
154
+ end
155
+ end
156
+
157
+ context "#standard_deviation_population" do
158
+ it "calculates standard deviation population" do
159
+ @dv.standard_deviation_population
160
+ end
161
+ end
162
+
163
+ context "#variance_population" do
164
+ it "calculates population variance" do
165
+ expect(@dv.variance_population).to be_within(0.001).of(67606.95999999999)
166
+ end
167
+ end
168
+
169
+ context "#covariance_sample" do
170
+ it "calculates sample covariance" do
171
+ @dv_1 = DaruLite::Vector.new [323, 11, 555, 666, 234, 21, 666, 343, 1, 2]
172
+ @dv_2 = DaruLite::Vector.new [123, 22, 444, 555, 324, 21, 666, 434, 5, 8]
173
+ expect(@dv_1.covariance @dv_2).to be_within(0.00001).of(65603.62222)
174
+ end
175
+ end
176
+
177
+ context "#covariance_population" do
178
+ it "calculates population covariance" do
179
+ @dv_1 = DaruLite::Vector.new [323, 11, 555, 666, 234, 21, 666, 343, 1, 2]
180
+ @dv_2 = DaruLite::Vector.new [123, 22, 444, 555, 324, 21, 666, 434, 5, 8]
181
+ expect(@dv_1.covariance_population @dv_2).to be_within(0.01).of(59043.26)
182
+ end
183
+ end
184
+
185
+ context "#sum_of_squared_deviation" do
186
+ it "calculates sum of squared deviation" do
187
+ expect(@dv.sum_of_squared_deviation).to eq(676069.6)
188
+ end
189
+ end
190
+
191
+ context "#skew" do
192
+ it "calculates skewness" do
193
+ @dv.skew
194
+ end
195
+ end
196
+
197
+ context "#max" do
198
+ it "returns the max value" do
199
+ expect(@dv.max).to eq(666)
200
+ end
201
+ end
202
+
203
+ context "#min" do
204
+ it "returns the min value" do
205
+ expect(@dv.min).to eq(1)
206
+ end
207
+ end
208
+
209
+ context "#sum" do
210
+ it "returns the sum" do
211
+ expect(@dv.sum).to eq(2822)
212
+ end
213
+ end
214
+
215
+ context "#product" do
216
+ it "returns the product" do
217
+ v = DaruLite::Vector.new [1, 2, 3, 4, 5], dtype: dtype
218
+ expect(v.product).to eq(120)
219
+ end
220
+ end
221
+
222
+ context "#median" do
223
+ it "returns the median" do
224
+ @dv.median
225
+ end
226
+ end
227
+
228
+ context "#mode" do
229
+ it "returns the single modal value as a numeric" do
230
+ mode_test_example = DaruLite::Vector.new [1,2,3,2,4,4,4,4], dtype: dtype
231
+ expect(mode_test_example.mode).to eq(4)
232
+ end
233
+
234
+ it "returns multiple modal values as a vector" do
235
+ mode_test_example = DaruLite::Vector.new [1,2,2,2,3,2,4,4,4,4], dtype: dtype
236
+ expect(mode_test_example.mode).to eq(DaruLite::Vector.new [2,4], dtype: dtype)
237
+ end
238
+ end
239
+
240
+ context "#describe" do
241
+ it "generates count, mean, std, min and max of vectors in one shot" do
242
+ expect(@dv.describe.round(2)).to eq(DaruLite::Vector.new([10.00, 282.20, 274.08, 1.00, 666.00],
243
+ index: [:count, :mean, :std, :min, :max],
244
+ name: :statistics
245
+ ))
246
+ end
247
+ end
248
+
249
+ context "#kurtosis" do
250
+ it "calculates kurtosis" do
251
+ @dv.kurtosis
252
+ end
253
+ end
254
+
255
+ context "#count" do
256
+ it "counts specified element" do
257
+ expect(@dv.count(323)).to eq(1)
258
+ end
259
+
260
+ it "counts total number of elements" do
261
+ expect(@dv.count).to eq(10)
262
+ end
263
+
264
+ it "counts by block provided" do
265
+ expect(@dv.count{|e| e.to_i.even? }).to eq(4)
266
+ end
267
+ end
268
+
269
+ context "#value_counts" do
270
+ it "counts number of unique values in the Vector" do
271
+ vector = DaruLite::Vector.new(
272
+ ["America","America","America","America","America",
273
+ "India","India", "China", "India", "China"])
274
+ expect(vector.value_counts).to eq(
275
+ DaruLite::Vector.new([5,3,2], index: ["America", "India", "China"]))
276
+ end
277
+ end
278
+
279
+ context "#coefficient_of_variation" do
280
+ it "calculates coefficient_of_variation" do
281
+ @dv.coefficient_of_variation
282
+ end
283
+ end
284
+
285
+ context "#percentile" do
286
+ it "calculates mid point percentile" do
287
+ expect(@dv.percentile(50)).to eq(278.5)
288
+ end
289
+
290
+ it "calculates linear percentile" do
291
+ # FIXME: Not enough testing?..
292
+ expect(@dv.percentile(50, :linear)).to eq(278.5)
293
+ end
294
+
295
+ it "fails on unknown strategy" do
296
+ expect { @dv.percentile(50, :killemall) }.to raise_error(ArgumentError, /strategy/)
297
+ end
298
+ end
299
+
300
+ context "#average_deviation_population" do
301
+ it "calculates average_deviation_population" do
302
+ a = DaruLite::Vector.new([1, 2, 3, 4, 5, 6, 7, 8, 9], dtype: dtype)
303
+ expect(a.average_deviation_population).to eq(20.quo(9).to_f)
304
+ end
305
+ end
306
+
307
+ context "#proportion" do
308
+ it "calculates proportion" do
309
+ expect(@dv.proportion(1)).to eq(0.1)
310
+ end
311
+ end
312
+
313
+ context "#proportions" do
314
+ it "calculates proportions" do
315
+ actual_proportions = {
316
+ 323=>0.1,11=>0.1,555=>0.1,666=>0.2,234=>0.1,21=>0.1,343=>0.1,1=>0.1,2=>0.1
317
+ }
318
+ expect(@dv.proportions).to eq(actual_proportions)
319
+ end
320
+ end
321
+
322
+ context "#standard_error" do
323
+ it "calculates standard error" do
324
+ @dv.standard_error
325
+ end
326
+ end
327
+
328
+ context "#vector_standardized_compute" do
329
+ it "calculates vector_standardized_compute" do
330
+ @dv.vector_standardized_compute(@dv.mean, @dv.sd)
331
+ @dv_with_nils.vector_standardized_compute(@dv.mean, @dv.sd)
332
+ end
333
+ end
334
+
335
+ context "#vector_centered_compute" do
336
+ it "calculates vector_centered_compute" do
337
+ @dv.vector_centered_compute(@dv.mean)
338
+ @dv_with_nils.vector_centered_compute(@dv.mean)
339
+ end
340
+ end
341
+ end
342
+
343
+ # Only Array tests
344
+ context "#percentile" do
345
+ it "tests linear percentile strategy" do
346
+ values = DaruLite::Vector.new [102, 104, 105, 107, 108, 109, 110, 112, 115, 116].shuffle
347
+ expect(values.percentil(0, :linear)).to eq(102)
348
+ expect(values.percentil(25, :linear)).to eq(104.75)
349
+ expect(values.percentil(50, :linear)).to eq(108.5)
350
+ expect(values.percentil(75, :linear)).to eq(112.75)
351
+ expect(values.percentil(100, :linear)).to eq(116)
352
+
353
+ values = DaruLite::Vector.new [102, 104, 105, 107, 108, 109, 110, 112, 115, 116, 118].shuffle
354
+ expect(values.percentil(0, :linear)).to eq(102)
355
+ expect(values.percentil(25, :linear)).to eq(105)
356
+ expect(values.percentil(50, :linear)).to eq(109)
357
+ expect(values.percentil(75, :linear)).to eq(115)
358
+ expect(values.percentil(100, :linear)).to eq(118)
359
+ end
360
+ end
361
+
362
+ context "#frequencies" do
363
+ let(:vector) { DaruLite::Vector.new([5,5,5,5,5,6,6,7,8,9,10,1,2,3,4,nil,-99,-99]) }
364
+ subject { vector.frequencies }
365
+ it { is_expected.to eq DaruLite::Vector.new(
366
+ [5,2,1,1,1,1,1,1,1,1,2],
367
+ index: [5,6,7,8,9,10,1,2,3,4,-99]
368
+ )}
369
+ end
370
+
371
+ context "#ranked" do
372
+ it "curates by rank" do
373
+ vector = DaruLite::Vector.new([nil, 0.8, 1.2, 1.2, 2.3, 18, nil])
374
+ expect(vector.ranked).to eq(DaruLite::Vector.new([nil,1,2.5,2.5,4,5,nil]))
375
+
376
+ v = DaruLite::Vector.new [0.8, 1.2, 1.2, 2.3, 18]
377
+ expect(v.ranked).to eq(DaruLite::Vector.new [1, 2.5, 2.5, 4, 5])
378
+ end
379
+
380
+ it "tests paired ties" do
381
+ a = DaruLite::Vector.new [0, 0, 0, 1, 1, 2, 3, 3, 4, 4, 4]
382
+ expected = DaruLite::Vector.new [2, 2, 2, 4.5, 4.5, 6, 7.5, 7.5, 10, 10, 10]
383
+ expect(a.ranked).to eq(expected)
384
+ end
385
+ end
386
+
387
+ context "#dichotomize" do
388
+ it "dichotomizes" do
389
+ a = DaruLite::Vector.new [0, 0, 0, 1, 2, 3, nil]
390
+ exp = DaruLite::Vector.new [0, 0, 0, 1, 1, 1, nil]
391
+ expect(a.dichotomize).to eq(exp)
392
+
393
+ a = DaruLite::Vector.new [1, 1, 1, 2, 2, 2, 3]
394
+ exp = DaruLite::Vector.new [0, 0, 0, 1, 1, 1, 1]
395
+ expect(a.dichotomize).to eq(exp)
396
+
397
+ a = DaruLite::Vector.new [0, 0, 0, 1, 2, 3, nil]
398
+ exp = DaruLite::Vector.new [0, 0, 0, 0, 1, 1, nil]
399
+ expect(a.dichotomize(1)).to eq(exp)
400
+
401
+ a = DaruLite::Vector.new %w(a a a b c d)
402
+ exp = DaruLite::Vector.new [0, 0, 0, 1, 1, 1]
403
+ expect(a.dichotomize).to eq(exp)
404
+ end
405
+ end
406
+
407
+ context "#median_absolute_deviation" do
408
+ it "calculates median_absolute_deviation" do
409
+ a = DaruLite::Vector.new [1, 1, 2, 2, 4, 6, 9]
410
+ expect(a.median_absolute_deviation).to eq(1)
411
+ end
412
+ end
413
+
414
+ context "#round" do
415
+ it "rounds non-nil values" do
416
+ vector = DaruLite::Vector.new([1.44,55.32,nil,4])
417
+ expect(vector.round(1)).to eq(DaruLite::Vector.new([1.4,55.3,nil,4]))
418
+ end
419
+ end
420
+
421
+ context "#center" do
422
+ it "centers" do
423
+ mean = rand
424
+ samples = 11
425
+ centered = DaruLite::Vector.new(samples.times.map { |i| i - ((samples / 2).floor).to_i })
426
+ not_centered = centered.recode { |v| v + mean }
427
+ obs = not_centered.center
428
+ centered.each_with_index do |v, i|
429
+ expect(v).to be_within(0.0001).of(obs[i])
430
+ end
431
+ end
432
+ end
433
+
434
+ context "#standardize" do
435
+ it "returns a standardized vector" do
436
+ vector = DaruLite::Vector.new([11,55,33,25,nil,22])
437
+ expect(vector.standardize.round(2)).to eq(
438
+ DaruLite::Vector.new([-1.11, 1.57, 0.23, -0.26,nil, -0.44])
439
+ )
440
+ end
441
+
442
+ it "tests for vector standardized with zero variance" do
443
+ v1 = DaruLite::Vector.new 100.times.map { |_i| 1 }
444
+ exp = DaruLite::Vector.new 100.times.map { nil }
445
+ expect(v1.standardize).to eq(exp)
446
+ end
447
+ end
448
+
449
+ context "#vector_percentile" do
450
+ it "replaces each non-nil value with its percentile value" do
451
+ vector = DaruLite::Vector.new([1,nil,nil,2,2,3,4,nil,nil,5,5,5,6,10])
452
+ expect(vector.vector_percentile).to eq(DaruLite::Vector.new(
453
+ [10,nil,nil,25,25,40,50,nil,nil,70,70,70,90,100])
454
+ )
455
+ end
456
+ end
457
+
458
+ context "#sample_with_replacement" do
459
+ it "calculates sample_with_replacement" do
460
+ vec = DaruLite::Vector.new(
461
+ [5, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10, 1, 2, 3, 4, nil, -99, -99],
462
+ name: :common_all_dtypes)
463
+ srand(1)
464
+ expect(vec.sample_with_replacement(100).size).to eq(100)
465
+
466
+ srand(1)
467
+ expect(vec.sample_with_replacement(100).size).to eq(100)
468
+ end
469
+ end
470
+
471
+ context "#sample_without_replacement" do
472
+ it "calculates sample_without_replacement" do
473
+ vec = DaruLite::Vector.new(
474
+ [5, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10, 1, 2, 3, 4, nil, -99, -99],
475
+ name: :common_all_dtypes)
476
+
477
+ srand(1)
478
+ expect(vec.sample_without_replacement(17).sort).to eq(
479
+ vec.reject_values(*DaruLite::MISSING_VALUES).to_a.sort)
480
+ expect {
481
+ vec.sample_without_replacement(20)
482
+ }.to raise_error(ArgumentError)
483
+
484
+ srand(1)
485
+ expect(vec.sample_without_replacement(17).sort).to eq(
486
+ vec.reject_values(*DaruLite::MISSING_VALUES).to_a.sort)
487
+ end
488
+ end
489
+
490
+ context "#jackknife" do
491
+ it "jack knife correctly with named method" do
492
+ a = DaruLite::Vector.new [1, 2, 3, 4]
493
+ df = a.jackknife(:mean)
494
+ expect(df[:mean].mean).to eq (a.mean)
495
+
496
+ df = a.jackknife([:mean, :sd])
497
+ expect(df[:mean].mean).to eq(a.mean)
498
+ expect(df[:mean].sd).to eq(a.sd)
499
+ end
500
+
501
+ it "jack knife correctly with custom method" do
502
+ a = DaruLite::Vector.new [17.23, 18.71, 13.93, 18.81, 15.78, 11.29, 14.91, 13.39, 18.21, 11.57, 14.28, 10.94, 18.83, 15.52, 13.45, 15.25]
503
+ ds = a.jackknife(log_s2: ->(v) { Math.log(v.variance) })
504
+ exp = DaruLite::Vector.new [1.605, 2.972, 1.151, 3.097, 0.998, 3.308, 0.942, 1.393, 2.416, 2.951, 1.043, 3.806, 3.122, 0.958, 1.362, 0.937]
505
+
506
+ expect_correct_vector_in_delta ds[:log_s2], exp, 0.001
507
+ # expect(ds[:log_s2]).to be_within(0.001).of(exp)
508
+ expect(ds[:log_s2].mean).to be_within(0.00001).of(2.00389)
509
+ expect(ds[:log_s2].variance).to be_within(0.001).of(1.091)
510
+ end
511
+
512
+ it "jack knife correctly with k > 1" do
513
+ rng = Distribution::Normal.rng(0,1)
514
+ a = DaruLite::Vector.new_with_size(6) { rng.call}
515
+
516
+ ds = a.jackknife(:mean, 2)
517
+ mean = a.mean
518
+ exp = DaruLite::Vector.new [3 * mean - 2 * (a[2] + a[3] + a[4] + a[5]) / 4, 3 * mean - 2 * (a[0] + a[1] + a[4] + a[5]) / 4, 3 * mean - 2 * (a[0] + a[1] + a[2] + a[3]) / 4]
519
+ expect_correct_vector_in_delta(exp, ds[:mean], 1e-13)
520
+ end
521
+ end
522
+
523
+ before do
524
+ # daily closes of iShares XIU on the TSX
525
+ @shares = DaruLite::Vector.new([17.28, 17.45, 17.84, 17.74, 17.82, 17.85, 17.36, 17.3, 17.56, 17.49, 17.46, 17.4, 17.03, 17.01,16.86, 16.86, 16.56, 16.36, 16.66, 16.77])
526
+ end
527
+
528
+ context "#acf" do
529
+ it "calculates autocorrelation co-efficients" do
530
+ acf = @shares.acf
531
+
532
+ expect(acf.length).to eq(14)
533
+
534
+ # test the first few autocorrelations
535
+ expect(acf[0]).to be_within(0.0001).of(1.0)
536
+ expect(acf[1]).to be_within(0.001) .of(0.852)
537
+ expect(acf[2]).to be_within(0.001) .of(0.669)
538
+ expect(acf[3]).to be_within(0.001) .of(0.486)
539
+ end
540
+ end
541
+
542
+ context "#percent_change" do
543
+ it "calculates percent change" do
544
+ vector = DaruLite::Vector.new([4,6,6,8,10],index: ['a','f','t','i','k'])
545
+ expect(vector.percent_change).to eq(
546
+ DaruLite::Vector.new([nil, 0.5, 0.0, 0.3333333333333333, 0.25], index: ['a','f','t','i','k']))
547
+ end
548
+
549
+ it "tests for numerical vectors with nils" do
550
+ vector2 = DaruLite::Vector.new([nil,6,nil,8,10],index: ['a','f','t','i','k'])
551
+ expect(vector2.percent_change).to eq(
552
+ DaruLite::Vector.new([nil, nil, nil, 0.3333333333333333, 0.25], index: ['a','f','t','i','k']))
553
+ end
554
+ end
555
+
556
+ context "#diff" do
557
+ it "performs the difference of the series" do
558
+ diff = @shares.diff
559
+
560
+ expect(diff.class).to eq(DaruLite::Vector)
561
+ expect(diff[@shares.size - 1]).to be_within(0.001).of( 0.11)
562
+ expect(diff[@shares.size - 2]).to be_within(0.001).of( 0.30)
563
+ expect(diff[@shares.size - 3]).to be_within(0.001).of(-0.20)
564
+ end
565
+ end
566
+
567
+ context "#rolling" do
568
+ it "calculates rolling mean" do
569
+ ma10 = @shares.rolling_mean
570
+
571
+ expect(ma10[-1]) .to be_within(0.001).of(16.897)
572
+ expect(ma10[-5]) .to be_within(0.001).of(17.233)
573
+ expect(ma10[-10]).to be_within(0.001).of(17.587)
574
+
575
+ # test with a different lookback period
576
+ ma5 = @shares.rolling :mean, 5
577
+
578
+ expect(ma5[-1]).to be_within(0.001).of(16.642)
579
+ expect(ma5[-10]).to be_within(0.001).of(17.434)
580
+ expect(ma5[-15]).to be_within(0.001).of(17.74)
581
+ end
582
+
583
+ it "calculates rolling median" do
584
+ me10 = @shares.rolling_median.round(2)
585
+ expect(me10).to eq(DaruLite::Vector.new([nil,nil,nil,nil,nil,nil,nil,nil,nil,17.525,17.525,17.525,17.475,17.430,17.380,17.330,17.165,17.020,16.94,16.860]).round(2))
586
+
587
+ me5 = @shares.rolling(:median, 5).round(2)
588
+ expect(me5).to eq(DaruLite::Vector.new([nil,nil,nil,nil,17.74,17.82,17.82,17.74,17.56,17.49,17.46,17.46,17.46,17.40,17.03,17.01,16.86,16.86,16.66,16.66]))
589
+ end
590
+
591
+ it "calculates rolling max" do
592
+ max10 = @shares.rolling_max.round(2)
593
+ expect(max10).to eq(DaruLite::Vector.new([nil,nil,nil,nil,nil,nil,nil,nil,nil,17.85,17.85,17.85,17.85,17.85,17.85,17.56,17.56,17.56,17.49,17.46]))
594
+
595
+ max5 = @shares.rolling(:max, 5).round(2)
596
+ expect(max5).to eq(DaruLite::Vector.new([nil, nil, nil, nil,17.84,17.85,17.85,17.85,17.85,17.85,17.56,17.56,17.56,17.49,17.46,17.40,17.03,17.01,16.86,16.86]))
597
+ end
598
+
599
+ it "calculates rolling min" do
600
+ min10 = @shares.rolling_min.round(2)
601
+ expect(min10).to eq(DaruLite::Vector.new([nil,nil,nil,nil,nil,nil,nil,nil,nil,17.28,17.30,17.30,17.03,17.01,16.86,16.86,16.56,16.36,16.36,16.36]))
602
+
603
+ min5 = @shares.rolling(:min, 5).round(2)
604
+ expect(min5).to eq(DaruLite::Vector.new([nil,nil,nil,nil,17.28,17.45,17.36,17.30,17.30,17.30,17.30,17.30,17.03,17.01,16.86,16.86,16.56,16.36,16.36,16.36]))
605
+ end
606
+
607
+ it "calculates rolling sum" do
608
+ sum10 = @shares.rolling_sum.round(2)
609
+ expect(sum10).to eq(DaruLite::Vector.new([nil,nil,nil,nil,nil,nil,nil,nil,nil,175.69,175.87,175.82,175.01,174.28,173.32,172.33,171.53,170.59,169.69,168.97]))
610
+
611
+ sum5 = @shares.rolling(:sum, 5).round(2)
612
+ expect(sum5).to eq(DaruLite::Vector.new([nil,nil,nil,nil,88.13,88.70,88.61,88.07,87.89,87.56,87.17,87.21,86.94,86.39,85.76,85.16,84.32,83.65,83.30,83.21]))
613
+ end
614
+
615
+ it "calculates rolling std" do
616
+ std10 = @shares.rolling_std.round(2)
617
+ expect(std10).to eq(DaruLite::Vector.new([nil,nil,nil,nil,nil,nil,nil,nil,nil,0.227227,0.208116,0.212331,0.253485,0.280666,0.295477,0.267127,0.335826,0.412834,0.388886,0.345995]).round(2))
618
+
619
+ std5 = @shares.rolling(:std, 5).round(2)
620
+ expect(std5).to eq(DaruLite::Vector.new([nil,nil,nil,nil,0.248556,0.167780,0.206930,0.263211,0.253811,0.215105,0.103827,0.098082,0.208255,0.237844,0.263002,0.220839,0.187963,0.263629,0.212132,0.193959]).round(2))
621
+ end
622
+
623
+ it "calculates rolling variance" do
624
+ var10 = @shares.rolling_variance.round(2)
625
+ expect(var10).to eq(DaruLite::Vector.new([nil,nil,nil,nil,nil,nil,nil,nil,nil,0.051632,0.043312,0.045084,0.064254,0.078773,0.087307,0.071357,0.112779,0.170432,0.151232,0.119712]).round(2))
626
+
627
+ var5 = @shares.rolling(:variance, 5).round(2)
628
+ expect(var5).to eq(DaruLite::Vector.new([nil,nil,nil,nil,0.06178,0.02815,0.04282,0.06928,0.06442,0.04627,0.01078,0.00962,0.04337,0.05657,0.06917,0.04877,0.03533,0.06950,0.04500,0.03762]).round(2))
629
+ end
630
+
631
+ it "calculates rolling non-nil count" do
632
+ @shares.rolling_count
633
+ end
634
+ end
635
+
636
+ context "#ema" do
637
+ it "calculates exponential moving average" do
638
+ # test default
639
+ ema10 = @shares.ema
640
+
641
+ expect(ema10[-1]) .to be_within(0.00001).of( 16.87187)
642
+ expect(ema10[-5]) .to be_within(0.00001).of( 17.19187)
643
+ expect(ema10[-10]).to be_within(0.00001).of( 17.54918)
644
+
645
+ # test with a different loopback period
646
+ ema5 = @shares.ema 5
647
+
648
+ expect(ema5[-1]) .to be_within( 0.00001).of(16.71299)
649
+ expect(ema5[-10]).to be_within( 0.00001).of(17.49079)
650
+ expect(ema5[-15]).to be_within( 0.00001).of(17.70067)
651
+
652
+ # test with a different smoother
653
+ ema_w = @shares.ema 10, true
654
+
655
+ expect(ema_w[-1]) .to be_within(0.00001).of(17.08044)
656
+ expect(ema_w[-5]) .to be_within(0.00001).of(17.33219)
657
+ expect(ema_w[-10]).to be_within(0.00001).of(17.55810)
658
+ end
659
+ end
660
+
661
+ context "#emv" do
662
+ it "calculates exponential moving variance" do
663
+ # test default
664
+ emv10 = @shares.emv
665
+
666
+ expect(emv10[-1]) .to be_within(0.00001).of(0.14441)
667
+ expect(emv10[-5]) .to be_within(0.00001).of(0.10797)
668
+ expect(emv10[-10]).to be_within(0.00001).of(0.03979)
669
+
670
+ # test with a different loopback period
671
+ emv5 = @shares.emv 5
672
+
673
+ expect(emv5[-1]) .to be_within(0.00001).of(0.05172)
674
+ expect(emv5[-10]).to be_within(0.00001).of(0.01736)
675
+ expect(emv5[-15]).to be_within(0.00001).of(0.04410)
676
+
677
+ # test with a different smoother
678
+ emv_w = @shares.emv 10, true
679
+
680
+ expect(emv_w[-1]) .to be_within(0.00001).of(0.20318)
681
+ expect(emv_w[-5]) .to be_within(0.00001).of(0.11319)
682
+ expect(emv_w[-10]).to be_within(0.00001).of(0.04289)
683
+ end
684
+ end
685
+
686
+ context "#emsd" do
687
+ it "calculates exponential moving standard deviation" do
688
+ # test default
689
+ emsd10 = @shares.emsd
690
+
691
+ expect(emsd10[-1]) .to be_within(0.00001).of(0.38002)
692
+ expect(emsd10[-5]) .to be_within(0.00001).of(0.32859)
693
+ expect(emsd10[-10]).to be_within(0.00001).of(0.19947)
694
+
695
+ # test with a different loopback period
696
+ emsd5 = @shares.emsd 5
697
+
698
+ expect(emsd5[-1]) .to be_within(0.00001).of(0.22742)
699
+ expect(emsd5[-10]).to be_within(0.00001).of(0.13174)
700
+ expect(emsd5[-15]).to be_within(0.00001).of(0.21000)
701
+
702
+ # test with a different smoother
703
+ emsd_w = @shares.emsd 10, true
704
+
705
+ expect(emsd_w[-1]) .to be_within(0.00001).of(0.45076)
706
+ expect(emsd_w[-5]) .to be_within(0.00001).of(0.33644)
707
+ expect(emsd_w[-10]).to be_within(0.00001).of(0.20710)
708
+ end
709
+ end
710
+
711
+ RSpec.shared_examples 'correct macd' do |*args|
712
+ let(:source) { DaruLite::DataFrame.from_csv('spec/fixtures/macd_data.csv') }
713
+
714
+ # skip initial records during compare as ema is sensitive to
715
+ # period used.
716
+ # http://ta-lib.org/d_api/ta_setunstableperiod.html
717
+ let(:stability_offset) { 90 }
718
+ let(:delta) { 0.001 }
719
+ let(:desc) { args.empty? ? '12_26_9' : args.join('_') }
720
+
721
+ subject { source['price'].macd(*args) }
722
+
723
+ %w[ macd macdsig macdhist ].each_with_index do |field, i|
724
+ it do
725
+ act = subject[i][stability_offset..-1]
726
+ exp = source["#{field}_#{desc}"][stability_offset..-1]
727
+ expect(act).to be_all_within(delta).of(exp)
728
+ end
729
+ end
730
+ end
731
+
732
+ describe '#macd' do
733
+ context 'by default' do
734
+ it_should_behave_like 'correct macd'
735
+ end
736
+
737
+ context 'custom values for fast, slow, signal' do
738
+ it_should_behave_like 'correct macd', 6, 13, 4
739
+ end
740
+
741
+ end
742
+
743
+ context "#cumsum" do
744
+ it "calculates cumulative sum" do
745
+ vector = DaruLite::Vector.new([1,2,3,4,5,6,7,8,9,10])
746
+ expect(vector.cumsum).to eq(
747
+ DaruLite::Vector.new([1,3,6,10,15,21,28,36,45,55]))
748
+ end
749
+
750
+ it "works with missing values" do
751
+ vector = DaruLite::Vector.new([1,2,nil,3,nil,4,5])
752
+ expect(vector.cumsum).to eq(
753
+ DaruLite::Vector.new([1,3,nil,6,nil,10,15]))
754
+ end
755
+ end
756
+ end