daru_lite 0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (149) hide show
  1. checksums.yaml +7 -0
  2. data/.github/ISSUE_TEMPLATE.md +18 -0
  3. data/.github/workflows/ci.yml +33 -0
  4. data/.gitignore +10 -0
  5. data/.rspec +2 -0
  6. data/.rubocop.yml +27 -0
  7. data/.rubocop_todo.yml +137 -0
  8. data/CONTRIBUTING.md +47 -0
  9. data/Gemfile +2 -0
  10. data/History.md +4 -0
  11. data/LICENSE +24 -0
  12. data/README.md +218 -0
  13. data/Rakefile +69 -0
  14. data/ReleasePolicy.md +20 -0
  15. data/benchmarks/TradeoffData.csv +65 -0
  16. data/benchmarks/csv_reading.rb +22 -0
  17. data/benchmarks/dataframe_creation.rb +39 -0
  18. data/benchmarks/db_loading.rb +34 -0
  19. data/benchmarks/duplicating.rb +45 -0
  20. data/benchmarks/group_by.rb +32 -0
  21. data/benchmarks/joining.rb +52 -0
  22. data/benchmarks/row_access.rb +41 -0
  23. data/benchmarks/row_assign.rb +36 -0
  24. data/benchmarks/sorting.rb +51 -0
  25. data/benchmarks/statistics.rb +28 -0
  26. data/benchmarks/vector_access.rb +31 -0
  27. data/benchmarks/vector_assign.rb +42 -0
  28. data/benchmarks/where_clause.rb +48 -0
  29. data/benchmarks/where_vs_filter.rb +28 -0
  30. data/daru_lite.gemspec +55 -0
  31. data/images/README.md +5 -0
  32. data/images/con0.png +0 -0
  33. data/images/con1.png +0 -0
  34. data/images/init0.png +0 -0
  35. data/images/init1.png +0 -0
  36. data/images/man0.png +0 -0
  37. data/images/man1.png +0 -0
  38. data/images/man2.png +0 -0
  39. data/images/man3.png +0 -0
  40. data/images/man4.png +0 -0
  41. data/images/man5.png +0 -0
  42. data/images/man6.png +0 -0
  43. data/lib/daru_lite/accessors/array_wrapper.rb +109 -0
  44. data/lib/daru_lite/accessors/dataframe_by_row.rb +25 -0
  45. data/lib/daru_lite/accessors/mdarray_wrapper.rb +7 -0
  46. data/lib/daru_lite/category.rb +929 -0
  47. data/lib/daru_lite/configuration.rb +34 -0
  48. data/lib/daru_lite/core/group_by.rb +403 -0
  49. data/lib/daru_lite/core/merge.rb +270 -0
  50. data/lib/daru_lite/core/query.rb +109 -0
  51. data/lib/daru_lite/dataframe.rb +3080 -0
  52. data/lib/daru_lite/date_time/index.rb +569 -0
  53. data/lib/daru_lite/date_time/offsets.rb +397 -0
  54. data/lib/daru_lite/exceptions.rb +2 -0
  55. data/lib/daru_lite/extensions/which_dsl.rb +53 -0
  56. data/lib/daru_lite/formatters/table.rb +52 -0
  57. data/lib/daru_lite/helpers/array.rb +53 -0
  58. data/lib/daru_lite/index/categorical_index.rb +201 -0
  59. data/lib/daru_lite/index/index.rb +374 -0
  60. data/lib/daru_lite/index/multi_index.rb +374 -0
  61. data/lib/daru_lite/io/csv/converters.rb +21 -0
  62. data/lib/daru_lite/io/io.rb +294 -0
  63. data/lib/daru_lite/io/sql_data_source.rb +97 -0
  64. data/lib/daru_lite/iruby/helpers.rb +38 -0
  65. data/lib/daru_lite/iruby/templates/dataframe.html.erb +5 -0
  66. data/lib/daru_lite/iruby/templates/dataframe_mi.html.erb +5 -0
  67. data/lib/daru_lite/iruby/templates/dataframe_mi_tbody.html.erb +35 -0
  68. data/lib/daru_lite/iruby/templates/dataframe_mi_thead.html.erb +21 -0
  69. data/lib/daru_lite/iruby/templates/dataframe_tbody.html.erb +28 -0
  70. data/lib/daru_lite/iruby/templates/dataframe_thead.html.erb +21 -0
  71. data/lib/daru_lite/iruby/templates/multi_index.html.erb +12 -0
  72. data/lib/daru_lite/iruby/templates/vector.html.erb +5 -0
  73. data/lib/daru_lite/iruby/templates/vector_mi.html.erb +5 -0
  74. data/lib/daru_lite/iruby/templates/vector_mi_tbody.html.erb +26 -0
  75. data/lib/daru_lite/iruby/templates/vector_mi_thead.html.erb +8 -0
  76. data/lib/daru_lite/iruby/templates/vector_tbody.html.erb +17 -0
  77. data/lib/daru_lite/iruby/templates/vector_thead.html.erb +8 -0
  78. data/lib/daru_lite/maths/arithmetic/dataframe.rb +91 -0
  79. data/lib/daru_lite/maths/arithmetic/vector.rb +117 -0
  80. data/lib/daru_lite/maths/statistics/dataframe.rb +202 -0
  81. data/lib/daru_lite/maths/statistics/vector.rb +1019 -0
  82. data/lib/daru_lite/monkeys.rb +56 -0
  83. data/lib/daru_lite/vector.rb +1678 -0
  84. data/lib/daru_lite/version.rb +3 -0
  85. data/lib/daru_lite.rb +99 -0
  86. data/profile/_base.rb +23 -0
  87. data/profile/df_to_a.rb +10 -0
  88. data/profile/filter.rb +13 -0
  89. data/profile/joining.rb +13 -0
  90. data/profile/sorting.rb +12 -0
  91. data/profile/vector_each_with_index.rb +9 -0
  92. data/profile/vector_new.rb +9 -0
  93. data/spec/accessors/array_wrapper_spec.rb +3 -0
  94. data/spec/category_spec.rb +1741 -0
  95. data/spec/core/group_by_spec.rb +655 -0
  96. data/spec/core/merge_spec.rb +179 -0
  97. data/spec/core/query_spec.rb +347 -0
  98. data/spec/daru_lite_spec.rb +22 -0
  99. data/spec/dataframe_spec.rb +4330 -0
  100. data/spec/date_time/data_spec.rb +197 -0
  101. data/spec/date_time/date_time_index_helper_spec.rb +72 -0
  102. data/spec/date_time/index_spec.rb +588 -0
  103. data/spec/date_time/offsets_spec.rb +465 -0
  104. data/spec/extensions/which_dsl_spec.rb +38 -0
  105. data/spec/fixtures/bank2.dat +200 -0
  106. data/spec/fixtures/boolean_converter_test.csv +5 -0
  107. data/spec/fixtures/countries.json +7794 -0
  108. data/spec/fixtures/duplicates.csv +32 -0
  109. data/spec/fixtures/eciresults.html +394 -0
  110. data/spec/fixtures/empties.dat +2 -0
  111. data/spec/fixtures/empty_rows_test.csv +17 -0
  112. data/spec/fixtures/macau.html +3691 -0
  113. data/spec/fixtures/macd_data.csv +150 -0
  114. data/spec/fixtures/matrix_test.csv +100 -0
  115. data/spec/fixtures/moneycontrol.html +6812 -0
  116. data/spec/fixtures/music_data.tsv +2501 -0
  117. data/spec/fixtures/repeated_fields.csv +7 -0
  118. data/spec/fixtures/sales-funnel.csv +18 -0
  119. data/spec/fixtures/scientific_notation.csv +4 -0
  120. data/spec/fixtures/string_converter_test.csv +5 -0
  121. data/spec/fixtures/strings.dat +2 -0
  122. data/spec/fixtures/test_xls.xls +0 -0
  123. data/spec/fixtures/test_xls_2.xls +0 -0
  124. data/spec/fixtures/url_test.txt~ +0 -0
  125. data/spec/fixtures/valid_markup.html +62 -0
  126. data/spec/fixtures/wiki_climate.html +1243 -0
  127. data/spec/fixtures/wiki_table_info.html +631 -0
  128. data/spec/formatters/table_formatter_spec.rb +137 -0
  129. data/spec/helpers_spec.rb +8 -0
  130. data/spec/index/categorical_index_spec.rb +170 -0
  131. data/spec/index/index_spec.rb +417 -0
  132. data/spec/index/multi_index_spec.rb +680 -0
  133. data/spec/io/io_spec.rb +373 -0
  134. data/spec/io/sql_data_source_spec.rb +56 -0
  135. data/spec/iruby/dataframe_spec.rb +170 -0
  136. data/spec/iruby/helpers_spec.rb +49 -0
  137. data/spec/iruby/multi_index_spec.rb +37 -0
  138. data/spec/iruby/vector_spec.rb +105 -0
  139. data/spec/maths/arithmetic/dataframe_spec.rb +148 -0
  140. data/spec/maths/arithmetic/vector_spec.rb +165 -0
  141. data/spec/maths/statistics/dataframe_spec.rb +178 -0
  142. data/spec/maths/statistics/vector_spec.rb +756 -0
  143. data/spec/monkeys_spec.rb +42 -0
  144. data/spec/shared/vector_display_spec.rb +213 -0
  145. data/spec/spec_helper.rb +87 -0
  146. data/spec/support/database_helper.rb +30 -0
  147. data/spec/support/matchers.rb +5 -0
  148. data/spec/vector_spec.rb +2293 -0
  149. metadata +571 -0
@@ -0,0 +1,1019 @@
1
+ module DaruLite
2
+ module Maths
3
+ # Encapsulates statistics methods for vectors. Most basic stuff like mean, etc.
4
+ # is done inside the wrapper, so that native methods can be used for most of
5
+ # the computationally intensive tasks.
6
+ module Statistics
7
+ module Vector # rubocop:disable Metrics/ModuleLength
8
+ extend Gem::Deprecate
9
+
10
+ def mean
11
+ @data.mean
12
+ end
13
+
14
+ def sum
15
+ @data.sum
16
+ end
17
+
18
+ def product
19
+ @data.product
20
+ end
21
+
22
+ def range
23
+ max - min
24
+ end
25
+
26
+ def median
27
+ @data.respond_to?(:median) ? @data.median : percentile(50)
28
+ end
29
+
30
+ def mode
31
+ mode = frequencies.to_h.select { |_, v| v == frequencies.max }.keys
32
+ mode.size > 1 ? DaruLite::Vector.new(mode) : mode.first
33
+ end
34
+
35
+ # Create a summary of count, mean, standard deviation, min and max of
36
+ # the vector in one shot.
37
+ #
38
+ # == Arguments
39
+ #
40
+ # +methods+ - An array with aggregation methods specified as symbols to
41
+ # be applied to vectors. Default is [:count, :mean, :std, :max,
42
+ # :min]. Methods will be applied in the specified order.
43
+ def describe(methods = nil)
44
+ methods ||= %i[count mean std min max]
45
+ description = methods.map { |m| send(m) }
46
+ DaruLite::Vector.new(description, index: methods, name: :statistics)
47
+ end
48
+
49
+ def median_absolute_deviation
50
+ m = median
51
+ recode { |val| (val - m).abs }.median
52
+ end
53
+
54
+ alias mad median_absolute_deviation
55
+
56
+ def standard_error
57
+ standard_deviation_sample / Math.sqrt(size - count_values(*DaruLite::MISSING_VALUES))
58
+ end
59
+
60
+ def sum_of_squared_deviation
61
+ (
62
+ @data.inject(0) { |a, x| (x**2) + a } -
63
+ (sum**2).quo(size - count_values(*DaruLite::MISSING_VALUES)).to_f
64
+ ).to_f
65
+ end
66
+
67
+ # Retrieve unique values of non-nil data
68
+ def factors
69
+ reject_values(*DaruLite::MISSING_VALUES).uniq.reset_index!
70
+ end
71
+
72
+ if RUBY_VERSION >= '2.2'
73
+ # Returns the maximum value(s) present in the vector, with an optional comparator block.
74
+ #
75
+ # @param size [Integer] Number of maximum values to return. Defaults to nil.
76
+ #
77
+ # @example
78
+ #
79
+ # dv = DaruLite::Vector.new (["Tyrion", "Daenerys", "Jon Starkgaryen"]),
80
+ # index: DaruLite::Index.new([:t, :d, :j])
81
+ # #=>
82
+ # # #<DaruLite::Vector(3)>
83
+ # # t Tyrion
84
+ # # d Daenerys
85
+ # # j Jon Starkgaryen
86
+ #
87
+ # dv.max
88
+ # #=> "Tyrion"
89
+ #
90
+ # dv.max(2) { |a,b| a.size <=> b.size }
91
+ # #=> ["Jon Starkgaryen","Daenerys"]
92
+ def max(size = nil, &block)
93
+ reject_values(*DaruLite::MISSING_VALUES).to_a.max(size, &block)
94
+ end
95
+
96
+ # Returns the maximum value(s) present in the vector, with a compulsory object block.
97
+ #
98
+ # @param size [Integer] Number of maximum values to return. Defaults to nil.
99
+ #
100
+ # @example
101
+ #
102
+ # dv = DaruLite::Vector.new (["Tyrion", "Daenerys", "Jon Starkgaryen"]),
103
+ # index: DaruLite::Index.new([:t, :d, :j])
104
+ # #=>
105
+ # # #<DaruLite::Vector(3)>
106
+ # # t Tyrion
107
+ # # d Daenerys
108
+ # # j Jon Starkgaryen
109
+ #
110
+ # dv.max_by(2) { |i| i.size }
111
+ # #=> ["Jon Starkgaryen","Daenerys"]
112
+ def max_by(size = nil, &block)
113
+ raise ArgumentError, 'Expected compulsory object block in max_by method' unless block
114
+
115
+ reject_values(*DaruLite::MISSING_VALUES).to_a.max_by(size, &block)
116
+ end
117
+
118
+ # Returns the minimum value(s) present in the vector, with an optional comparator block.
119
+ #
120
+ # @param size [Integer] Number of minimum values to return. Defaults to nil.
121
+ #
122
+ # @example
123
+ #
124
+ # dv = DaruLite::Vector.new (["Tyrion", "Daenerys", "Jon Starkgaryen"]),
125
+ # index: DaruLite::Index.new([:t, :d, :j])
126
+ # #=>
127
+ # # #<DaruLite::Vector(3)>
128
+ # # t Tyrion
129
+ # # d Daenerys
130
+ # # j Jon Starkgaryen
131
+ #
132
+ # dv.min
133
+ # #=> "Daenerys"
134
+ #
135
+ # dv.min(2) { |a,b| a.size <=> b.size }
136
+ # #=> ["Tyrion","Daenerys"]
137
+ def min(size = nil, &block)
138
+ reject_values(*DaruLite::MISSING_VALUES).to_a.min(size, &block)
139
+ end
140
+
141
+ # Returns the minimum value(s) present in the vector, with a compulsory object block.
142
+ #
143
+ # @param size [Integer] Number of minimum values to return. Defaults to nil.
144
+ #
145
+ # @example
146
+ #
147
+ # dv = DaruLite::Vector.new (["Tyrion", "Daenerys", "Jon Starkgaryen"]),
148
+ # index: DaruLite::Index.new([:t, :d, :j])
149
+ # #=>
150
+ # # #<DaruLite::Vector(3)>
151
+ # # t Tyrion
152
+ # # d Daenerys
153
+ # # j Jon Starkgaryen
154
+ #
155
+ # dv.min_by(2) { |i| i.size }
156
+ # #=> ["Tyrion","Daenerys"]
157
+ def min_by(size = nil, &block)
158
+ raise ArgumentError, 'Expected compulsory object block in min_by method' unless block
159
+
160
+ reject_values(*DaruLite::MISSING_VALUES).to_a.min_by(size, &block)
161
+ end
162
+ else
163
+ # Returns the maximum value(s) present in the vector, with an optional comparator block.
164
+ #
165
+ # @param size [Integer] Number of maximum values to return. Defaults to nil.
166
+ #
167
+ # @example
168
+ #
169
+ # dv = DaruLite::Vector.new (["Tyrion", "Daenerys", "Jon Starkgaryen"]),
170
+ # index: DaruLite::Index.new([:t, :d, :j])
171
+ # #=>
172
+ # # #<DaruLite::Vector(3)>
173
+ # # t Tyrion
174
+ # # d Daenerys
175
+ # # j Jon Starkgaryen
176
+ #
177
+ # dv.max
178
+ # #=> "Tyrion"
179
+ #
180
+ # dv.max(2) { |a,b| a.size <=> b.size }
181
+ # #=> ["Jon Starkgaryen","Daenerys"]
182
+ def max(size = nil, &block)
183
+ range = size.nil? ? 0 : (0..size - 1)
184
+ reject_values(*DaruLite::MISSING_VALUES).to_a.sort(&block).reverse[range]
185
+ end
186
+
187
+ # Returns the maximum value(s) present in the vector, with a compulsory object block.
188
+ #
189
+ # @param size [Integer] Number of maximum values to return. Defaults to nil.
190
+ #
191
+ # @example
192
+ #
193
+ # dv = DaruLite::Vector.new (["Tyrion", "Daenerys", "Jon Starkgaryen"]),
194
+ # index: DaruLite::Index.new([:t, :d, :j])
195
+ # #=>
196
+ # # #<DaruLite::Vector(3)>
197
+ # # t Tyrion
198
+ # # d Daenerys
199
+ # # j Jon Starkgaryen
200
+ #
201
+ # dv.max_by(2) { |i| i.size }
202
+ # #=> ["Jon Starkgaryen","Daenerys"]
203
+ def max_by(size = nil, &block)
204
+ raise ArgumentError, 'Expected compulsory object block in max_by method' unless block
205
+
206
+ reject_values(*DaruLite::MISSING_VALUES).to_a.sort_by(&block).reverse[size.nil? ? 0 : (0..size - 1)]
207
+ end
208
+
209
+ # Returns the minimum value(s) present in the vector, with an optional comparator block.
210
+ #
211
+ # @param size [Integer] Number of minimum values to return. Defaults to nil.
212
+ #
213
+ # @example
214
+ #
215
+ # dv = DaruLite::Vector.new (["Tyrion", "Daenerys", "Jon Starkgaryen"]),
216
+ # index: DaruLite::Index.new([:t, :d, :j])
217
+ # #=>
218
+ # # #<DaruLite::Vector(3)>
219
+ # # t Tyrion
220
+ # # d Daenerys
221
+ # # j Jon Starkgaryen
222
+ #
223
+ # dv.min
224
+ # #=> "Daenerys"
225
+ #
226
+ # dv.min(2) { |a,b| a.size <=> b.size }
227
+ # #=> ["Tyrion","Daenerys"]
228
+ def min(size = nil, &block)
229
+ range = size.nil? ? 0 : (0..size - 1)
230
+ reject_values(*DaruLite::MISSING_VALUES).to_a.sort(&block)[range]
231
+ end
232
+
233
+ # Returns the minimum value(s) present in the vector, with a compulsory object block.
234
+ #
235
+ # @param size [Integer] Number of minimum values to return. Defaults to nil.
236
+ #
237
+ # @example
238
+ #
239
+ # dv = DaruLite::Vector.new (["Tyrion", "Daenerys", "Jon Starkgaryen"]),
240
+ # index: DaruLite::Index.new([:t, :d, :j])
241
+ # #=>
242
+ # # #<DaruLite::Vector(3)>
243
+ # # t Tyrion
244
+ # # d Daenerys
245
+ # # j Jon Starkgaryen
246
+ #
247
+ # dv.min_by
248
+ # #=> "Daenerys"
249
+ #
250
+ # dv.min_by(2) { |i| i.size }
251
+ # #=> ["Tyrion","Daenerys"]
252
+ def min_by(size = nil, &block)
253
+ raise ArgumentError, 'Expected compulsory object block in min_by method' unless block
254
+
255
+ reject_values(*DaruLite::MISSING_VALUES).to_a.sort_by(&block)[size.nil? ? 0 : (0..size - 1)]
256
+ end
257
+ end
258
+
259
+ # Returns the index of the maximum value(s) present in the vector, with an optional
260
+ # comparator block.
261
+ #
262
+ # @param size [Integer] Number of maximum indices to return. Defaults to nil.
263
+ #
264
+ # @example
265
+ #
266
+ # dv = DaruLite::Vector.new (["Tyrion", "Daenerys", "Jon Starkgaryen"]),
267
+ # index: DaruLite::Index.new([:t, :d, :j])
268
+ # #=>
269
+ # # #<DaruLite::Vector(3)>
270
+ # # t Tyrion
271
+ # # d Daenerys
272
+ # # j Jon Starkgaryen
273
+ #
274
+ # dv.index_of_max
275
+ # #=> :t
276
+ #
277
+ # dv.index_of_max(2) { |a,b| a.size <=> b.size }
278
+ # #=> [:j, :d]
279
+ def index_of_max(size = nil, &block)
280
+ vals = max(size, &block)
281
+ dv = reject_values(*DaruLite::MISSING_VALUES)
282
+ vals.is_a?(Array) ? (vals.map { |x| dv.index_of(x) }) : dv.index_of(vals)
283
+ end
284
+
285
+ # Returns the index of the maximum value(s) present in the vector, with a compulsory
286
+ # object block.
287
+ #
288
+ # @param size [Integer] Number of maximum indices to return. Defaults to nil.
289
+ #
290
+ # @example
291
+ #
292
+ # dv = DaruLite::Vector.new (["Tyrion", "Daenerys", "Jon Starkgaryen"]),
293
+ # index: DaruLite::Index.new([:t, :d, :j])
294
+ # #=>
295
+ # # #<DaruLite::Vector(3)>
296
+ # # t Tyrion
297
+ # # d Daenerys
298
+ # # j Jon Starkgaryen
299
+ #
300
+ # dv.index_of_max_by(2) { |i| i.size }
301
+ # #=> [:j, :d]
302
+ def index_of_max_by(size = nil, &block)
303
+ vals = max_by(size, &block)
304
+ dv = reject_values(*DaruLite::MISSING_VALUES)
305
+ vals.is_a?(Array) ? (vals.map { |x| dv.index_of(x) }) : dv.index_of(vals)
306
+ end
307
+
308
+ # Returns the index of the minimum value(s) present in the vector, with an optional
309
+ # comparator block.
310
+ #
311
+ # @param size [Integer] Number of minimum indices to return. Defaults to nil.
312
+ #
313
+ # @example
314
+ #
315
+ # dv = DaruLite::Vector.new (["Tyrion", "Daenerys", "Jon Starkgaryen"]),
316
+ # index: DaruLite::Index.new([:t, :d, :j])
317
+ # #=>
318
+ # # #<DaruLite::Vector(3)>
319
+ # # t Tyrion
320
+ # # d Daenerys
321
+ # # j Jon Starkgaryen
322
+ #
323
+ # dv.index_of_min
324
+ # #=> :d
325
+ #
326
+ # dv.index_of_min(2) { |a,b| a.size <=> b.size }
327
+ # #=> [:t, :d]
328
+ def index_of_min(size = nil, &block)
329
+ vals = min(size, &block)
330
+ dv = reject_values(*DaruLite::MISSING_VALUES)
331
+ vals.is_a?(Array) ? (vals.map { |x| dv.index_of(x) }) : dv.index_of(vals)
332
+ end
333
+
334
+ # Returns the index of the minimum value(s) present in the vector, with a compulsory
335
+ # object block.
336
+ #
337
+ # @param size [Integer] Number of minimum indices to return. Defaults to nil.
338
+ #
339
+ # @example
340
+ #
341
+ # dv = DaruLite::Vector.new (["Tyrion", "Daenerys", "Jon Starkgaryen"]),
342
+ # index: DaruLite::Index.new([:t, :d, :j])
343
+ # #=>
344
+ # # #<DaruLite::Vector(3)>
345
+ # # t Tyrion
346
+ # # d Daenerys
347
+ # # j Jon Starkgaryen
348
+ #
349
+ # dv.index_of_min(2) { |i| i.size }
350
+ # #=> [:t, :d]
351
+ def index_of_min_by(size = nil, &block)
352
+ vals = min_by(size, &block)
353
+ dv = reject_values(*DaruLite::MISSING_VALUES)
354
+ vals.is_a?(Array) ? (vals.map { |x| dv.index_of(x) }) : dv.index_of(vals)
355
+ end
356
+
357
+ # Return the maximum element present in the Vector, as a Vector.
358
+ # @return [DaruLite::Vector]
359
+ def max_index
360
+ max_value = @data.max
361
+ DaruLite::Vector.new({ index_of(max_value) => max_value }, name: @name, dtype: @dtype)
362
+ end
363
+
364
+ def frequencies
365
+ DaruLite::Vector.new(
366
+ @data.each_with_object(Hash.new(0)) do |element, hash|
367
+ hash[element] += 1 unless element.nil?
368
+ end
369
+ )
370
+ end
371
+
372
+ alias freqs frequencies
373
+ deprecate :freqs, :frequencies, 2016, 10
374
+
375
+ def proportions
376
+ len = size - count_values(*DaruLite::MISSING_VALUES)
377
+ frequencies.to_h.transform_values do |count|
378
+ count / len.to_f
379
+ end
380
+ end
381
+
382
+ def ranked
383
+ sum = 0
384
+ r = frequencies.to_h.sort.each_with_object({}) do |(el, count), memo|
385
+ memo[el] = ((sum + 1) + (sum + count)).quo(2)
386
+ sum += count
387
+ end
388
+
389
+ recode { |e| r[e] }
390
+ end
391
+
392
+ def coefficient_of_variation
393
+ standard_deviation_sample / mean
394
+ end
395
+
396
+ # Retrieves number of cases which comply condition. If block given,
397
+ # retrieves number of instances where block returns true. If other
398
+ # values given, retrieves the frequency for this value. If no value
399
+ # given, counts the number of non-nil elements in the Vector.
400
+ def count(value = false, &block)
401
+ if block
402
+ @data.count(&block)
403
+ elsif value
404
+ count { |val| val == value }
405
+ else
406
+ size - indexes(*DaruLite::MISSING_VALUES).size
407
+ end
408
+ end
409
+
410
+ # Count number of occurrences of each value in the Vector
411
+ def value_counts
412
+ values = @data.each_with_object(Hash.new(0)) do |d, memo|
413
+ memo[d] += 1
414
+ end
415
+
416
+ DaruLite::Vector.new(values)
417
+ end
418
+
419
+ def proportion(value = 1)
420
+ frequencies[value].quo(size - count_values(*DaruLite::MISSING_VALUES)).to_f
421
+ end
422
+
423
+ # Sample variance with denominator (N-1)
424
+ def variance_sample(m = nil)
425
+ m ||= mean
426
+ if @data.respond_to? :variance_sample
427
+ @data.variance_sample m
428
+ else
429
+ sum_of_squares(m).quo(size - count_values(*DaruLite::MISSING_VALUES) - 1)
430
+ end
431
+ end
432
+
433
+ # Population variance with denominator (N)
434
+ def variance_population(m = nil)
435
+ m ||= mean
436
+ if @data.respond_to? :variance_population
437
+ @data.variance_population m
438
+ else
439
+ sum_of_squares(m).quo(size - count_values(*DaruLite::MISSING_VALUES)).to_f
440
+ end
441
+ end
442
+
443
+ # Sample covariance with denominator (N-1)
444
+ def covariance_sample(other)
445
+ size == other.size or raise ArgumentError, 'size of both the vectors must be equal'
446
+ covariance_sum(other) / (size - count_values(*DaruLite::MISSING_VALUES) - 1)
447
+ end
448
+
449
+ # Population covariance with denominator (N)
450
+ def covariance_population(other)
451
+ size == other.size or raise ArgumentError, 'size of both the vectors must be equal'
452
+ covariance_sum(other) / (size - count_values(*DaruLite::MISSING_VALUES))
453
+ end
454
+
455
+ def sum_of_squares(m = nil)
456
+ m ||= mean
457
+ reject_values(*DaruLite::MISSING_VALUES).data.inject(0) do |memo, val|
458
+ memo + ((val - m)**2)
459
+ end
460
+ end
461
+
462
+ def standard_deviation_population(m = nil)
463
+ m ||= mean
464
+ if @data.respond_to? :standard_deviation_population
465
+ @data.standard_deviation_population(m)
466
+ else
467
+ Math.sqrt(variance_population(m))
468
+ end
469
+ end
470
+
471
+ def standard_deviation_sample(m = nil)
472
+ m ||= mean
473
+ if @data.respond_to? :standard_deviation_sample
474
+ @data.standard_deviation_sample m
475
+ else
476
+ Math.sqrt(variance_sample(m))
477
+ end
478
+ end
479
+
480
+ # Calculate skewness using (sigma(xi - mean)^3)/((N)*std_dev_sample^3)
481
+ def skew(m = nil)
482
+ if @data.respond_to? :skew
483
+ @data.skew
484
+ else
485
+ m ||= mean
486
+ th = @data.inject(0) { |memo, val| memo + ((val - m)**3) }
487
+ th.quo((size - indexes(*DaruLite::MISSING_VALUES).size) * (standard_deviation_sample(m)**3))
488
+ end
489
+ end
490
+
491
+ def kurtosis(m = nil)
492
+ if @data.respond_to? :kurtosis
493
+ @data.kurtosis
494
+ else
495
+ m ||= mean
496
+ fo = @data.inject(0) { |a, x| a + ((x - m)**4) }
497
+ fo.quo((size - indexes(*DaruLite::MISSING_VALUES).size) * (standard_deviation_sample(m)**4)) - 3
498
+ end
499
+ end
500
+
501
+ def average_deviation_population(m = nil)
502
+ must_be_numeric!
503
+ m ||= mean
504
+ reject_values(*DaruLite::MISSING_VALUES).data.inject(0) do |memo, val|
505
+ (val - m).abs + memo
506
+ end.quo(size - count_values(*DaruLite::MISSING_VALUES))
507
+ end
508
+
509
+ # Returns the value of the percentile q
510
+ #
511
+ # Accepts an optional second argument specifying the strategy to interpolate
512
+ # when the requested percentile lies between two data points a and b
513
+ # Valid strategies are:
514
+ # * :midpoint (Default): (a + b) / 2
515
+ # * :linear : a + (b - a) * d where d is the decimal part of the index between a and b.
516
+ # == References
517
+ #
518
+ # This is the NIST recommended method (http://en.wikipedia.org/wiki/Percentile#NIST_method)
519
+ def percentile(q, strategy = :midpoint)
520
+ case strategy
521
+ when :midpoint
522
+ midpoint_percentile(q)
523
+ when :linear
524
+ linear_percentile(q)
525
+ else
526
+ raise ArgumentError, "Unknown strategy #{strategy}"
527
+ end
528
+ end
529
+
530
+ # Dichotomize the vector with 0 and 1, based on lowest value.
531
+ # If parameter is defined, this value and lower will be 0
532
+ # and higher, 1.
533
+ def dichotomize(low = nil)
534
+ low ||= factors.min
535
+
536
+ recode do |x|
537
+ if x.nil?
538
+ nil
539
+ elsif x > low
540
+ 1
541
+ else
542
+ 0
543
+ end
544
+ end
545
+ end
546
+
547
+ # Center data by subtracting the mean from each non-nil value.
548
+ def center
549
+ self - mean
550
+ end
551
+
552
+ # Standardize data.
553
+ #
554
+ # == Arguments
555
+ #
556
+ # * use_population - Pass as *true* if you want to use population
557
+ # standard deviation instead of sample standard deviation.
558
+ def standardize(use_population = false)
559
+ m ||= mean
560
+ sd = use_population ? sdp : sds
561
+ return DaruLite::Vector.new([nil] * size) if m.nil? || sd.to_d == BigDecimal('0.0')
562
+
563
+ vector_standardized_compute m, sd
564
+ end
565
+
566
+ # :nocov:
567
+ def box_cox_transformation(lambda) # :nodoc:
568
+ must_be_numeric!
569
+
570
+ recode do |x|
571
+ unless x.nil?
572
+ if lambda.zero?
573
+ Math.log(x)
574
+ else
575
+ ((x**lambda) - 1).quo(lambda)
576
+ end
577
+ end
578
+ end
579
+ end
580
+ # :nocov:
581
+
582
+ # Replace each non-nil value in the vector with its percentile.
583
+ def vector_percentile
584
+ c = size - indexes(*DaruLite::MISSING_VALUES).size
585
+ ranked.recode! { |i| i.nil? ? nil : (i.quo(c) * 100).to_f }
586
+ end
587
+
588
+ def vector_standardized_compute(m, sd)
589
+ if @data.respond_to? :vector_standardized_compute
590
+ @data.vector_standardized_compute(m, sd)
591
+ else
592
+ DaruLite::Vector.new @data.collect { |x| x.nil? ? nil : (x.to_f - m).quo(sd) },
593
+ index: index, name: name, dtype: dtype
594
+ end
595
+ end
596
+
597
+ def vector_centered_compute(m)
598
+ if @data.respond_to? :vector_centered_compute
599
+ @data.vector_centered_compute(m)
600
+ else
601
+ DaruLite::Vector.new @data.collect { |x| x.nil? ? nil : x.to_f - m },
602
+ index: index, name: name, dtype: dtype
603
+ end
604
+ end
605
+
606
+ # Returns an random sample of size n, with replacement,
607
+ # only with non-nil data.
608
+ #
609
+ # In all the trails, every item have the same probability
610
+ # of been selected.
611
+ def sample_with_replacement(sample = 1)
612
+ if @data.respond_to? :sample_with_replacement
613
+ @data.sample_with_replacement sample
614
+ else
615
+ valid = indexes(*DaruLite::MISSING_VALUES).empty? ? self : reject_values(*DaruLite::MISSING_VALUES)
616
+ vds = valid.size
617
+ (0...sample).collect { valid[rand(vds)] }
618
+ end
619
+ end
620
+
621
+ # Returns an random sample of size n, without replacement,
622
+ # only with valid data.
623
+ #
624
+ # Every element could only be selected once.
625
+ #
626
+ # A sample of the same size of the vector is the vector itself.
627
+ def sample_without_replacement(sample = 1)
628
+ if @data.respond_to? :sample_without_replacement
629
+ @data.sample_without_replacement sample
630
+ else
631
+ raw_sample_without_replacement(sample)
632
+ end
633
+ end
634
+
635
+ # The percent_change method computes the percent change over
636
+ # the given number of periods.
637
+ #
638
+ # @param [Integer] periods (1) number of nils to insert at the beginning.
639
+ #
640
+ # @example
641
+ #
642
+ # vector = DaruLite::Vector.new([4,6,6,8,10],index: ['a','f','t','i','k'])
643
+ # vector.percent_change
644
+ # #=>
645
+ # # <DaruLite::Vector:28713060 @name = nil size: 5 >
646
+ # # nil
647
+ # # a
648
+ # # f 0.5
649
+ # # t 0.0
650
+ # # i 0.3333333333333333
651
+ # # k 0.25
652
+ def percent_change(periods = 1)
653
+ must_be_numeric!
654
+
655
+ prev = nil
656
+ arr = @data.each_with_index.map do |cur, i|
657
+ if i < periods ||
658
+ include_with_nan?(DaruLite::MISSING_VALUES, cur) ||
659
+ include_with_nan?(DaruLite::MISSING_VALUES, prev)
660
+ nil
661
+ else
662
+ (cur - prev) / prev.to_f
663
+ end.tap { prev = cur if cur }
664
+ end
665
+
666
+ DaruLite::Vector.new(arr, index: @index, name: @name)
667
+ end
668
+
669
+ # Performs the difference of the series.
670
+ # Note: The first difference of series is X(t) - X(t-1)
671
+ # But, second difference of series is NOT X(t) - X(t-2)
672
+ # It is the first difference of the first difference
673
+ # => (X(t) - X(t-1)) - (X(t-1) - X(t-2))
674
+ #
675
+ # == Arguments
676
+ #
677
+ # * *max_lags*: integer, (default: 1), number of differences reqd.
678
+ #
679
+ # @example Using #diff
680
+ #
681
+ # ts = DaruLite::Vector.new((1..10).map { rand })
682
+ # # => [0.69, 0.23, 0.44, 0.71, ...]
683
+ #
684
+ # ts.diff # => [nil, -0.46, 0.21, 0.27, ...]
685
+ #
686
+ # @return [DaruLite::Vector]
687
+ def diff(max_lags = 1)
688
+ ts = self
689
+ difference = []
690
+ max_lags.times do
691
+ difference = ts - ts.lag
692
+ ts = difference
693
+ end
694
+ difference
695
+ end
696
+
697
+ # Calculate the rolling function for a loopback value.
698
+ #
699
+ # @param [Symbol] function The rolling function to be applied. Can be
700
+ # any function applicatble to DaruLite::Vector (:mean, :median, :count,
701
+ # :min, :max, etc.)
702
+ # @param [Integer] n (10) A non-negative value which serves as the loopback length.
703
+ # @return [DaruLite::Vector] Vector containin rolling calculations.
704
+ # @example Using #rolling
705
+ # ts = DaruLite::Vector.new((1..100).map { rand })
706
+ # # => [0.69, 0.23, 0.44, 0.71, ...]
707
+ # # first 9 observations are nil
708
+ # ts.rolling(:mean) # => [ ... nil, 0.484... , 0.445... , 0.513 ... , ... ]
709
+ def rolling(function, n = 10)
710
+ DaruLite::Vector.new(
711
+ ([nil] * (n - 1)) +
712
+ (0..(size - n)).map do |i|
713
+ DaruLite::Vector.new(@data[i...(i + n)]).send(function)
714
+ end, index: @index
715
+ )
716
+ end
717
+
718
+ # @!method rolling_mean
719
+ # Calculate rolling average
720
+ # @yieldparam [Integer] n (10) Loopback length
721
+ # @!method rolling_median
722
+ # Calculate rolling median
723
+ # @yieldparam [Integer] n (10) Loopback length
724
+ # @!method rolling_count
725
+ # Calculate rolling non-missing count
726
+ # @yieldparam [Integer] n (10) Loopback length
727
+ # @!method rolling_max
728
+ # Calculate rolling max value
729
+ # @yieldparam [Integer] n (10) Loopback length
730
+ # @!method rolling_min
731
+ # Calculate rolling min value
732
+ # @yieldparam [Integer] n (10) Loopback length
733
+ # @!method rolling_sum
734
+ # Calculate rolling sum
735
+ # @yieldparam [Integer] n (10) Loopback length
736
+ # @!method rolling_std
737
+ # Calculate rolling standard deviation
738
+ # @yieldparam [Integer] n (10) Loopback length
739
+ # @!method rolling_variance
740
+ # Calculate rolling variance
741
+ # @yieldparam [Integer] n (10) Loopback length
742
+ %i[count mean median max min sum std variance].each do |meth|
743
+ define_method(:"rolling_#{meth}") do |n = 10|
744
+ rolling(meth, n)
745
+ end
746
+ end
747
+
748
+ # Exponential Moving Average.
749
+ # Calculates an exponential moving average of the series using a
750
+ # specified parameter. If wilder is false (the default) then the EMA
751
+ # uses a smoothing value of 2 / (n + 1), if it is true then it uses the
752
+ # Welles Wilder smoother of 1 / n.
753
+ #
754
+ # Warning for EMA usage: EMAs are unstable for small series, as they
755
+ # use a lot more than n observations to calculate. The series is stable
756
+ # if the size of the series is >= 3.45 * (n + 1)
757
+ #
758
+ # @param [Integer] n (10) Loopback length.
759
+ # @param [TrueClass, FalseClass] wilder (false) If true, 1/n value is
760
+ # used for smoothing; if false, uses 2/(n+1) value
761
+ #
762
+ # @example Using ema
763
+ #
764
+ # ts = DaruLite::Vector.new((1..100).map { rand })
765
+ # # => [0.577..., 0.123..., 0.173..., 0.233..., ...]
766
+ #
767
+ # # first 9 observations are nil
768
+ # ts.ema # => [ ... nil, 0.455... , 0.395..., 0.323..., ... ]
769
+ #
770
+ # @return [DaruLite::Vector] Contains EMA
771
+ def ema(n = 10, wilder = false)
772
+ smoother = wilder ? 1.0 / n : 2.0 / (n + 1)
773
+ # need to start everything from the first non-nil observation
774
+ start = @data.index { |i| !i.nil? }
775
+ # first n - 1 observations are nil
776
+ base = [nil] * (start + n - 1)
777
+ # nth observation is just a moving average
778
+ base << (@data[start...(start + n)].inject(0.0) { |s, a| a.nil? ? s : s + a } / n)
779
+ (start + n).upto size - 1 do |i|
780
+ base << ((self[i] * smoother) + ((1 - smoother) * base.last))
781
+ end
782
+
783
+ DaruLite::Vector.new(base, index: @index, name: @name)
784
+ end
785
+
786
+ # Exponential Moving Variance.
787
+ # Calculates an exponential moving variance of the series using a
788
+ # specified parameter. If wilder is false (the default) then the EMV
789
+ # uses a smoothing value of 2 / (n + 1), if it is true then it uses the
790
+ # Welles Wilder smoother of 1 / n.
791
+ #
792
+ # @param [Integer] n (10) Loopback length.
793
+ # @param [TrueClass, FalseClass] wilder (false) If true, 1/n value is
794
+ # used for smoothing; if false, uses 2/(n+1) value
795
+ #
796
+ # @example Using emv
797
+ #
798
+ # ts = DaruLite::Vector.new((1..100).map { rand })
799
+ # # => [0.047..., 0.23..., 0.836..., 0.845..., ...]
800
+ #
801
+ # # first 9 observations are nil
802
+ # ts.emv # => [ ... nil, 0.073... , 0.082..., 0.080..., ...]
803
+ #
804
+ # @return [DaruLite::Vector] contains EMV
805
+ def emv(n = 10, wilder = false) # rubocop:disable Metrics/AbcSize
806
+ smoother = wilder ? 1.0 / n : 2.0 / (n + 1)
807
+ # need to start everything from the first non-nil observation
808
+ start = @data.index { |i| !i.nil? }
809
+ # first n - 1 observations are nil
810
+ var_base = [nil] * (start + n - 1)
811
+ mean_base = [nil] * (start + n - 1)
812
+ mean_base << (@data[start...(start + n)].inject(0.0) { |s, a| a.nil? ? s : s + a } / n)
813
+ # nth observation is just a moving variance_population
814
+ var_base << (@data[start...(start + n)].inject(0.0) { |s, x| x.nil? ? s : s + ((x - mean_base.last)**2) } / n)
815
+ (start + n).upto size - 1 do |i|
816
+ last = mean_base.last
817
+ mean_base << ((self[i] * smoother) + ((1 - smoother) * last))
818
+ var_base << (((1 - smoother) * var_base.last) + (smoother * (self[i] - last) * (self[i] - mean_base.last)))
819
+ end
820
+ DaruLite::Vector.new(var_base, index: @index, name: @name)
821
+ end
822
+
823
+ # Exponential Moving Standard Deviation.
824
+ # Calculates an exponential moving standard deviation of the series using a
825
+ # specified parameter. If wilder is false (the default) then the EMSD
826
+ # uses a smoothing value of 2 / (n + 1), if it is true then it uses the
827
+ # Welles Wilder smoother of 1 / n.
828
+ #
829
+ # @param [Integer] n (10) Loopback length.
830
+ # @param [TrueClass, FalseClass] wilder (false) If true, 1/n value is
831
+ # used for smoothing; if false, uses 2/(n+1) value
832
+ #
833
+ # @example Using emsd
834
+ #
835
+ # ts = DaruLite::Vector.new((1..100).map { rand })
836
+ # # => [0.400..., 0.727..., 0.862..., 0.013..., ...]
837
+ #
838
+ # # first 9 observations are nil
839
+ # ts.emsd # => [ ... nil, 0.285... , 0.258..., 0.243..., ...]
840
+ #
841
+ # @return [DaruLite::Vector] contains EMSD
842
+ def emsd(n = 10, wilder = false)
843
+ result = []
844
+ emv_return = emv(n, wilder)
845
+ emv_return.each do |d|
846
+ result << (d.nil? ? nil : Math.sqrt(d))
847
+ end
848
+ DaruLite::Vector.new(result, index: @index, name: @name)
849
+ end
850
+
851
+ # Moving Average Convergence-Divergence.
852
+ # Calculates the MACD (moving average convergence-divergence) of the time
853
+ # series.
854
+ # @see https://en.wikipedia.org/wiki/MACD
855
+ #
856
+ # @param fast [Integer] fast period of MACD (default 12)
857
+ # @param slow [Integer] slow period of MACD (default 26)
858
+ # @param signal [Integer] signal period of MACD (default 9)
859
+ #
860
+ # @example Create a series and calculate MACD values
861
+ # ts = DaruLite::Vector.new((1..100).map { rand })
862
+ # # => [0.69, 0.23, 0.44, 0.71, ...]
863
+ # macdseries, macdsignal, macdhist = ts.macd
864
+ # macdseries, macdsignal, macdhist = ts.macd(13)
865
+ # macdseries, macdsignal, macdhist = ts.macd(signal=5)
866
+ #
867
+ # @return [Array<DaruLite::Vector>] macdseries, macdsignal and macdhist are
868
+ # returned as an array of three DaruLite::Vectors
869
+ #
870
+ def macd(fast = 12, slow = 26, signal = 9)
871
+ macdseries = ema(fast) - ema(slow)
872
+ macdsignal = macdseries.ema(signal)
873
+ macdhist = macdseries - macdsignal
874
+ [macdseries, macdsignal, macdhist]
875
+ end
876
+
877
+ # Calculates the autocorrelation coefficients of the series.
878
+ #
879
+ # The first element is always 1, since that is the correlation
880
+ # of the series with itself.
881
+ #
882
+ # @example
883
+ # ts = DaruLite::Vector.new((1..100).map { rand })
884
+ #
885
+ # ts.acf # => array with first 21 autocorrelations
886
+ # ts.acf 3 # => array with first 3 autocorrelations
887
+ def acf(max_lags = nil)
888
+ max_lags ||= (10 * Math.log10(size)).to_i
889
+
890
+ (0..max_lags).map do |i|
891
+ if i.zero?
892
+ 1.0
893
+ else
894
+ m = mean
895
+ # can't use Pearson coefficient since the mean for the lagged series should
896
+ # be the same as the regular series
897
+ ((self - m) * (lag(i) - m)).sum / variance_sample / (size - 1)
898
+ end
899
+ end
900
+ end
901
+
902
+ # Provides autocovariance.
903
+ #
904
+ # == Options
905
+ #
906
+ # * *:demean* = true; optional. Supply false if series is not to be demeaned
907
+ # * *:unbiased* = true; optional. true/false for unbiased/biased form of autocovariance
908
+ #
909
+ # == Returns
910
+ #
911
+ # Autocovariance value
912
+ def acvf(demean = true, unbiased = true)
913
+ demeaned_series = demean ? self - mean : self
914
+
915
+ n = (10 * Math.log10(size)).to_i + 1
916
+ m = mean
917
+ d = if unbiased
918
+ Array.new(size, size)
919
+ else
920
+ (1..size).to_a.reverse[0..n]
921
+ end
922
+
923
+ 0.upto(n - 1).map do |i|
924
+ (demeaned_series * (lag(i) - m)).sum / d[i]
925
+ end
926
+ end
927
+
928
+ # Calculate cumulative sum of Vector
929
+ def cumsum
930
+ result = []
931
+ acc = 0
932
+ @data.each do |d|
933
+ if include_with_nan? DaruLite::MISSING_VALUES, d
934
+ result << nil
935
+ else
936
+ acc += d
937
+ result << acc
938
+ end
939
+ end
940
+
941
+ DaruLite::Vector.new(result, index: @index)
942
+ end
943
+
944
+ alias sdp standard_deviation_population
945
+ alias sds standard_deviation_sample
946
+ alias std sds
947
+ alias adp average_deviation_population
948
+ alias cov coefficient_of_variation
949
+ alias variance variance_sample
950
+ alias covariance covariance_sample
951
+ alias sd standard_deviation_sample
952
+ alias ss sum_of_squares
953
+ alias percentil percentile
954
+ alias se standard_error
955
+
956
+ private
957
+
958
+ def must_be_numeric!
959
+ numeric? or raise TypeError, 'Vector must be numeric'
960
+ end
961
+
962
+ def covariance_sum(other)
963
+ self_mean = mean
964
+ other_mean = other.mean
965
+ @data
966
+ .zip(other.data).inject(0) do |res, (d, o)|
967
+ res + if !d || !o
968
+ 0
969
+ else
970
+ (d - self_mean) * (o - other_mean)
971
+ end
972
+ end
973
+ end
974
+
975
+ def midpoint_percentile(q)
976
+ sorted = reject_values(*DaruLite::MISSING_VALUES).to_a.sort
977
+
978
+ v = ((size - count_values(*DaruLite::MISSING_VALUES)) * q).quo(100)
979
+ if v.to_i == v
980
+ (sorted[(v - 0.5).to_i].to_f + sorted[(v + 0.5).to_i]).quo(2)
981
+ else
982
+ sorted[v.to_i]
983
+ end
984
+ end
985
+
986
+ def linear_percentile(q)
987
+ sorted = reject_values(*DaruLite::MISSING_VALUES).to_a.sort
988
+ index = (q / 100.0) * ((size - count_values(*DaruLite::MISSING_VALUES)) + 1)
989
+
990
+ k = index.truncate
991
+ d = index % 1
992
+
993
+ if k.zero?
994
+ sorted[0]
995
+ elsif k >= sorted.size
996
+ sorted[-1]
997
+ else
998
+ sorted[k - 1] + (d * (sorted[k] - sorted[k - 1]))
999
+ end
1000
+ end
1001
+
1002
+ def raw_sample_without_replacement(sample)
1003
+ valid = indexes(*DaruLite::MISSING_VALUES).empty? ? self : reject_values(*DaruLite::MISSING_VALUES)
1004
+ raise ArgumentError, "Sample size couldn't be greater than n" if
1005
+ sample > valid.size
1006
+
1007
+ out = []
1008
+ size = valid.size
1009
+ while out.size < sample
1010
+ value = rand(size)
1011
+ out.push(value) unless out.include?(value)
1012
+ end
1013
+
1014
+ out.collect { |i| valid[i] }
1015
+ end
1016
+ end
1017
+ end
1018
+ end
1019
+ end