daru_lite 0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (149) hide show
  1. checksums.yaml +7 -0
  2. data/.github/ISSUE_TEMPLATE.md +18 -0
  3. data/.github/workflows/ci.yml +33 -0
  4. data/.gitignore +10 -0
  5. data/.rspec +2 -0
  6. data/.rubocop.yml +27 -0
  7. data/.rubocop_todo.yml +137 -0
  8. data/CONTRIBUTING.md +47 -0
  9. data/Gemfile +2 -0
  10. data/History.md +4 -0
  11. data/LICENSE +24 -0
  12. data/README.md +218 -0
  13. data/Rakefile +69 -0
  14. data/ReleasePolicy.md +20 -0
  15. data/benchmarks/TradeoffData.csv +65 -0
  16. data/benchmarks/csv_reading.rb +22 -0
  17. data/benchmarks/dataframe_creation.rb +39 -0
  18. data/benchmarks/db_loading.rb +34 -0
  19. data/benchmarks/duplicating.rb +45 -0
  20. data/benchmarks/group_by.rb +32 -0
  21. data/benchmarks/joining.rb +52 -0
  22. data/benchmarks/row_access.rb +41 -0
  23. data/benchmarks/row_assign.rb +36 -0
  24. data/benchmarks/sorting.rb +51 -0
  25. data/benchmarks/statistics.rb +28 -0
  26. data/benchmarks/vector_access.rb +31 -0
  27. data/benchmarks/vector_assign.rb +42 -0
  28. data/benchmarks/where_clause.rb +48 -0
  29. data/benchmarks/where_vs_filter.rb +28 -0
  30. data/daru_lite.gemspec +55 -0
  31. data/images/README.md +5 -0
  32. data/images/con0.png +0 -0
  33. data/images/con1.png +0 -0
  34. data/images/init0.png +0 -0
  35. data/images/init1.png +0 -0
  36. data/images/man0.png +0 -0
  37. data/images/man1.png +0 -0
  38. data/images/man2.png +0 -0
  39. data/images/man3.png +0 -0
  40. data/images/man4.png +0 -0
  41. data/images/man5.png +0 -0
  42. data/images/man6.png +0 -0
  43. data/lib/daru_lite/accessors/array_wrapper.rb +109 -0
  44. data/lib/daru_lite/accessors/dataframe_by_row.rb +25 -0
  45. data/lib/daru_lite/accessors/mdarray_wrapper.rb +7 -0
  46. data/lib/daru_lite/category.rb +929 -0
  47. data/lib/daru_lite/configuration.rb +34 -0
  48. data/lib/daru_lite/core/group_by.rb +403 -0
  49. data/lib/daru_lite/core/merge.rb +270 -0
  50. data/lib/daru_lite/core/query.rb +109 -0
  51. data/lib/daru_lite/dataframe.rb +3080 -0
  52. data/lib/daru_lite/date_time/index.rb +569 -0
  53. data/lib/daru_lite/date_time/offsets.rb +397 -0
  54. data/lib/daru_lite/exceptions.rb +2 -0
  55. data/lib/daru_lite/extensions/which_dsl.rb +53 -0
  56. data/lib/daru_lite/formatters/table.rb +52 -0
  57. data/lib/daru_lite/helpers/array.rb +53 -0
  58. data/lib/daru_lite/index/categorical_index.rb +201 -0
  59. data/lib/daru_lite/index/index.rb +374 -0
  60. data/lib/daru_lite/index/multi_index.rb +374 -0
  61. data/lib/daru_lite/io/csv/converters.rb +21 -0
  62. data/lib/daru_lite/io/io.rb +294 -0
  63. data/lib/daru_lite/io/sql_data_source.rb +97 -0
  64. data/lib/daru_lite/iruby/helpers.rb +38 -0
  65. data/lib/daru_lite/iruby/templates/dataframe.html.erb +5 -0
  66. data/lib/daru_lite/iruby/templates/dataframe_mi.html.erb +5 -0
  67. data/lib/daru_lite/iruby/templates/dataframe_mi_tbody.html.erb +35 -0
  68. data/lib/daru_lite/iruby/templates/dataframe_mi_thead.html.erb +21 -0
  69. data/lib/daru_lite/iruby/templates/dataframe_tbody.html.erb +28 -0
  70. data/lib/daru_lite/iruby/templates/dataframe_thead.html.erb +21 -0
  71. data/lib/daru_lite/iruby/templates/multi_index.html.erb +12 -0
  72. data/lib/daru_lite/iruby/templates/vector.html.erb +5 -0
  73. data/lib/daru_lite/iruby/templates/vector_mi.html.erb +5 -0
  74. data/lib/daru_lite/iruby/templates/vector_mi_tbody.html.erb +26 -0
  75. data/lib/daru_lite/iruby/templates/vector_mi_thead.html.erb +8 -0
  76. data/lib/daru_lite/iruby/templates/vector_tbody.html.erb +17 -0
  77. data/lib/daru_lite/iruby/templates/vector_thead.html.erb +8 -0
  78. data/lib/daru_lite/maths/arithmetic/dataframe.rb +91 -0
  79. data/lib/daru_lite/maths/arithmetic/vector.rb +117 -0
  80. data/lib/daru_lite/maths/statistics/dataframe.rb +202 -0
  81. data/lib/daru_lite/maths/statistics/vector.rb +1019 -0
  82. data/lib/daru_lite/monkeys.rb +56 -0
  83. data/lib/daru_lite/vector.rb +1678 -0
  84. data/lib/daru_lite/version.rb +3 -0
  85. data/lib/daru_lite.rb +99 -0
  86. data/profile/_base.rb +23 -0
  87. data/profile/df_to_a.rb +10 -0
  88. data/profile/filter.rb +13 -0
  89. data/profile/joining.rb +13 -0
  90. data/profile/sorting.rb +12 -0
  91. data/profile/vector_each_with_index.rb +9 -0
  92. data/profile/vector_new.rb +9 -0
  93. data/spec/accessors/array_wrapper_spec.rb +3 -0
  94. data/spec/category_spec.rb +1741 -0
  95. data/spec/core/group_by_spec.rb +655 -0
  96. data/spec/core/merge_spec.rb +179 -0
  97. data/spec/core/query_spec.rb +347 -0
  98. data/spec/daru_lite_spec.rb +22 -0
  99. data/spec/dataframe_spec.rb +4330 -0
  100. data/spec/date_time/data_spec.rb +197 -0
  101. data/spec/date_time/date_time_index_helper_spec.rb +72 -0
  102. data/spec/date_time/index_spec.rb +588 -0
  103. data/spec/date_time/offsets_spec.rb +465 -0
  104. data/spec/extensions/which_dsl_spec.rb +38 -0
  105. data/spec/fixtures/bank2.dat +200 -0
  106. data/spec/fixtures/boolean_converter_test.csv +5 -0
  107. data/spec/fixtures/countries.json +7794 -0
  108. data/spec/fixtures/duplicates.csv +32 -0
  109. data/spec/fixtures/eciresults.html +394 -0
  110. data/spec/fixtures/empties.dat +2 -0
  111. data/spec/fixtures/empty_rows_test.csv +17 -0
  112. data/spec/fixtures/macau.html +3691 -0
  113. data/spec/fixtures/macd_data.csv +150 -0
  114. data/spec/fixtures/matrix_test.csv +100 -0
  115. data/spec/fixtures/moneycontrol.html +6812 -0
  116. data/spec/fixtures/music_data.tsv +2501 -0
  117. data/spec/fixtures/repeated_fields.csv +7 -0
  118. data/spec/fixtures/sales-funnel.csv +18 -0
  119. data/spec/fixtures/scientific_notation.csv +4 -0
  120. data/spec/fixtures/string_converter_test.csv +5 -0
  121. data/spec/fixtures/strings.dat +2 -0
  122. data/spec/fixtures/test_xls.xls +0 -0
  123. data/spec/fixtures/test_xls_2.xls +0 -0
  124. data/spec/fixtures/url_test.txt~ +0 -0
  125. data/spec/fixtures/valid_markup.html +62 -0
  126. data/spec/fixtures/wiki_climate.html +1243 -0
  127. data/spec/fixtures/wiki_table_info.html +631 -0
  128. data/spec/formatters/table_formatter_spec.rb +137 -0
  129. data/spec/helpers_spec.rb +8 -0
  130. data/spec/index/categorical_index_spec.rb +170 -0
  131. data/spec/index/index_spec.rb +417 -0
  132. data/spec/index/multi_index_spec.rb +680 -0
  133. data/spec/io/io_spec.rb +373 -0
  134. data/spec/io/sql_data_source_spec.rb +56 -0
  135. data/spec/iruby/dataframe_spec.rb +170 -0
  136. data/spec/iruby/helpers_spec.rb +49 -0
  137. data/spec/iruby/multi_index_spec.rb +37 -0
  138. data/spec/iruby/vector_spec.rb +105 -0
  139. data/spec/maths/arithmetic/dataframe_spec.rb +148 -0
  140. data/spec/maths/arithmetic/vector_spec.rb +165 -0
  141. data/spec/maths/statistics/dataframe_spec.rb +178 -0
  142. data/spec/maths/statistics/vector_spec.rb +756 -0
  143. data/spec/monkeys_spec.rb +42 -0
  144. data/spec/shared/vector_display_spec.rb +213 -0
  145. data/spec/spec_helper.rb +87 -0
  146. data/spec/support/database_helper.rb +30 -0
  147. data/spec/support/matchers.rb +5 -0
  148. data/spec/vector_spec.rb +2293 -0
  149. metadata +571 -0
@@ -0,0 +1,1019 @@
1
+ module DaruLite
2
+ module Maths
3
+ # Encapsulates statistics methods for vectors. Most basic stuff like mean, etc.
4
+ # is done inside the wrapper, so that native methods can be used for most of
5
+ # the computationally intensive tasks.
6
+ module Statistics
7
+ module Vector # rubocop:disable Metrics/ModuleLength
8
+ extend Gem::Deprecate
9
+
10
+ def mean
11
+ @data.mean
12
+ end
13
+
14
+ def sum
15
+ @data.sum
16
+ end
17
+
18
+ def product
19
+ @data.product
20
+ end
21
+
22
+ def range
23
+ max - min
24
+ end
25
+
26
+ def median
27
+ @data.respond_to?(:median) ? @data.median : percentile(50)
28
+ end
29
+
30
+ def mode
31
+ mode = frequencies.to_h.select { |_, v| v == frequencies.max }.keys
32
+ mode.size > 1 ? DaruLite::Vector.new(mode) : mode.first
33
+ end
34
+
35
+ # Create a summary of count, mean, standard deviation, min and max of
36
+ # the vector in one shot.
37
+ #
38
+ # == Arguments
39
+ #
40
+ # +methods+ - An array with aggregation methods specified as symbols to
41
+ # be applied to vectors. Default is [:count, :mean, :std, :max,
42
+ # :min]. Methods will be applied in the specified order.
43
+ def describe(methods = nil)
44
+ methods ||= %i[count mean std min max]
45
+ description = methods.map { |m| send(m) }
46
+ DaruLite::Vector.new(description, index: methods, name: :statistics)
47
+ end
48
+
49
+ def median_absolute_deviation
50
+ m = median
51
+ recode { |val| (val - m).abs }.median
52
+ end
53
+
54
+ alias mad median_absolute_deviation
55
+
56
+ def standard_error
57
+ standard_deviation_sample / Math.sqrt(size - count_values(*DaruLite::MISSING_VALUES))
58
+ end
59
+
60
+ def sum_of_squared_deviation
61
+ (
62
+ @data.inject(0) { |a, x| (x**2) + a } -
63
+ (sum**2).quo(size - count_values(*DaruLite::MISSING_VALUES)).to_f
64
+ ).to_f
65
+ end
66
+
67
+ # Retrieve unique values of non-nil data
68
+ def factors
69
+ reject_values(*DaruLite::MISSING_VALUES).uniq.reset_index!
70
+ end
71
+
72
+ if RUBY_VERSION >= '2.2'
73
+ # Returns the maximum value(s) present in the vector, with an optional comparator block.
74
+ #
75
+ # @param size [Integer] Number of maximum values to return. Defaults to nil.
76
+ #
77
+ # @example
78
+ #
79
+ # dv = DaruLite::Vector.new (["Tyrion", "Daenerys", "Jon Starkgaryen"]),
80
+ # index: DaruLite::Index.new([:t, :d, :j])
81
+ # #=>
82
+ # # #<DaruLite::Vector(3)>
83
+ # # t Tyrion
84
+ # # d Daenerys
85
+ # # j Jon Starkgaryen
86
+ #
87
+ # dv.max
88
+ # #=> "Tyrion"
89
+ #
90
+ # dv.max(2) { |a,b| a.size <=> b.size }
91
+ # #=> ["Jon Starkgaryen","Daenerys"]
92
+ def max(size = nil, &block)
93
+ reject_values(*DaruLite::MISSING_VALUES).to_a.max(size, &block)
94
+ end
95
+
96
+ # Returns the maximum value(s) present in the vector, with a compulsory object block.
97
+ #
98
+ # @param size [Integer] Number of maximum values to return. Defaults to nil.
99
+ #
100
+ # @example
101
+ #
102
+ # dv = DaruLite::Vector.new (["Tyrion", "Daenerys", "Jon Starkgaryen"]),
103
+ # index: DaruLite::Index.new([:t, :d, :j])
104
+ # #=>
105
+ # # #<DaruLite::Vector(3)>
106
+ # # t Tyrion
107
+ # # d Daenerys
108
+ # # j Jon Starkgaryen
109
+ #
110
+ # dv.max_by(2) { |i| i.size }
111
+ # #=> ["Jon Starkgaryen","Daenerys"]
112
+ def max_by(size = nil, &block)
113
+ raise ArgumentError, 'Expected compulsory object block in max_by method' unless block
114
+
115
+ reject_values(*DaruLite::MISSING_VALUES).to_a.max_by(size, &block)
116
+ end
117
+
118
+ # Returns the minimum value(s) present in the vector, with an optional comparator block.
119
+ #
120
+ # @param size [Integer] Number of minimum values to return. Defaults to nil.
121
+ #
122
+ # @example
123
+ #
124
+ # dv = DaruLite::Vector.new (["Tyrion", "Daenerys", "Jon Starkgaryen"]),
125
+ # index: DaruLite::Index.new([:t, :d, :j])
126
+ # #=>
127
+ # # #<DaruLite::Vector(3)>
128
+ # # t Tyrion
129
+ # # d Daenerys
130
+ # # j Jon Starkgaryen
131
+ #
132
+ # dv.min
133
+ # #=> "Daenerys"
134
+ #
135
+ # dv.min(2) { |a,b| a.size <=> b.size }
136
+ # #=> ["Tyrion","Daenerys"]
137
+ def min(size = nil, &block)
138
+ reject_values(*DaruLite::MISSING_VALUES).to_a.min(size, &block)
139
+ end
140
+
141
+ # Returns the minimum value(s) present in the vector, with a compulsory object block.
142
+ #
143
+ # @param size [Integer] Number of minimum values to return. Defaults to nil.
144
+ #
145
+ # @example
146
+ #
147
+ # dv = DaruLite::Vector.new (["Tyrion", "Daenerys", "Jon Starkgaryen"]),
148
+ # index: DaruLite::Index.new([:t, :d, :j])
149
+ # #=>
150
+ # # #<DaruLite::Vector(3)>
151
+ # # t Tyrion
152
+ # # d Daenerys
153
+ # # j Jon Starkgaryen
154
+ #
155
+ # dv.min_by(2) { |i| i.size }
156
+ # #=> ["Tyrion","Daenerys"]
157
+ def min_by(size = nil, &block)
158
+ raise ArgumentError, 'Expected compulsory object block in min_by method' unless block
159
+
160
+ reject_values(*DaruLite::MISSING_VALUES).to_a.min_by(size, &block)
161
+ end
162
+ else
163
+ # Returns the maximum value(s) present in the vector, with an optional comparator block.
164
+ #
165
+ # @param size [Integer] Number of maximum values to return. Defaults to nil.
166
+ #
167
+ # @example
168
+ #
169
+ # dv = DaruLite::Vector.new (["Tyrion", "Daenerys", "Jon Starkgaryen"]),
170
+ # index: DaruLite::Index.new([:t, :d, :j])
171
+ # #=>
172
+ # # #<DaruLite::Vector(3)>
173
+ # # t Tyrion
174
+ # # d Daenerys
175
+ # # j Jon Starkgaryen
176
+ #
177
+ # dv.max
178
+ # #=> "Tyrion"
179
+ #
180
+ # dv.max(2) { |a,b| a.size <=> b.size }
181
+ # #=> ["Jon Starkgaryen","Daenerys"]
182
+ def max(size = nil, &block)
183
+ range = size.nil? ? 0 : (0..size - 1)
184
+ reject_values(*DaruLite::MISSING_VALUES).to_a.sort(&block).reverse[range]
185
+ end
186
+
187
+ # Returns the maximum value(s) present in the vector, with a compulsory object block.
188
+ #
189
+ # @param size [Integer] Number of maximum values to return. Defaults to nil.
190
+ #
191
+ # @example
192
+ #
193
+ # dv = DaruLite::Vector.new (["Tyrion", "Daenerys", "Jon Starkgaryen"]),
194
+ # index: DaruLite::Index.new([:t, :d, :j])
195
+ # #=>
196
+ # # #<DaruLite::Vector(3)>
197
+ # # t Tyrion
198
+ # # d Daenerys
199
+ # # j Jon Starkgaryen
200
+ #
201
+ # dv.max_by(2) { |i| i.size }
202
+ # #=> ["Jon Starkgaryen","Daenerys"]
203
+ def max_by(size = nil, &block)
204
+ raise ArgumentError, 'Expected compulsory object block in max_by method' unless block
205
+
206
+ reject_values(*DaruLite::MISSING_VALUES).to_a.sort_by(&block).reverse[size.nil? ? 0 : (0..size - 1)]
207
+ end
208
+
209
+ # Returns the minimum value(s) present in the vector, with an optional comparator block.
210
+ #
211
+ # @param size [Integer] Number of minimum values to return. Defaults to nil.
212
+ #
213
+ # @example
214
+ #
215
+ # dv = DaruLite::Vector.new (["Tyrion", "Daenerys", "Jon Starkgaryen"]),
216
+ # index: DaruLite::Index.new([:t, :d, :j])
217
+ # #=>
218
+ # # #<DaruLite::Vector(3)>
219
+ # # t Tyrion
220
+ # # d Daenerys
221
+ # # j Jon Starkgaryen
222
+ #
223
+ # dv.min
224
+ # #=> "Daenerys"
225
+ #
226
+ # dv.min(2) { |a,b| a.size <=> b.size }
227
+ # #=> ["Tyrion","Daenerys"]
228
+ def min(size = nil, &block)
229
+ range = size.nil? ? 0 : (0..size - 1)
230
+ reject_values(*DaruLite::MISSING_VALUES).to_a.sort(&block)[range]
231
+ end
232
+
233
+ # Returns the minimum value(s) present in the vector, with a compulsory object block.
234
+ #
235
+ # @param size [Integer] Number of minimum values to return. Defaults to nil.
236
+ #
237
+ # @example
238
+ #
239
+ # dv = DaruLite::Vector.new (["Tyrion", "Daenerys", "Jon Starkgaryen"]),
240
+ # index: DaruLite::Index.new([:t, :d, :j])
241
+ # #=>
242
+ # # #<DaruLite::Vector(3)>
243
+ # # t Tyrion
244
+ # # d Daenerys
245
+ # # j Jon Starkgaryen
246
+ #
247
+ # dv.min_by
248
+ # #=> "Daenerys"
249
+ #
250
+ # dv.min_by(2) { |i| i.size }
251
+ # #=> ["Tyrion","Daenerys"]
252
+ def min_by(size = nil, &block)
253
+ raise ArgumentError, 'Expected compulsory object block in min_by method' unless block
254
+
255
+ reject_values(*DaruLite::MISSING_VALUES).to_a.sort_by(&block)[size.nil? ? 0 : (0..size - 1)]
256
+ end
257
+ end
258
+
259
+ # Returns the index of the maximum value(s) present in the vector, with an optional
260
+ # comparator block.
261
+ #
262
+ # @param size [Integer] Number of maximum indices to return. Defaults to nil.
263
+ #
264
+ # @example
265
+ #
266
+ # dv = DaruLite::Vector.new (["Tyrion", "Daenerys", "Jon Starkgaryen"]),
267
+ # index: DaruLite::Index.new([:t, :d, :j])
268
+ # #=>
269
+ # # #<DaruLite::Vector(3)>
270
+ # # t Tyrion
271
+ # # d Daenerys
272
+ # # j Jon Starkgaryen
273
+ #
274
+ # dv.index_of_max
275
+ # #=> :t
276
+ #
277
+ # dv.index_of_max(2) { |a,b| a.size <=> b.size }
278
+ # #=> [:j, :d]
279
+ def index_of_max(size = nil, &block)
280
+ vals = max(size, &block)
281
+ dv = reject_values(*DaruLite::MISSING_VALUES)
282
+ vals.is_a?(Array) ? (vals.map { |x| dv.index_of(x) }) : dv.index_of(vals)
283
+ end
284
+
285
+ # Returns the index of the maximum value(s) present in the vector, with a compulsory
286
+ # object block.
287
+ #
288
+ # @param size [Integer] Number of maximum indices to return. Defaults to nil.
289
+ #
290
+ # @example
291
+ #
292
+ # dv = DaruLite::Vector.new (["Tyrion", "Daenerys", "Jon Starkgaryen"]),
293
+ # index: DaruLite::Index.new([:t, :d, :j])
294
+ # #=>
295
+ # # #<DaruLite::Vector(3)>
296
+ # # t Tyrion
297
+ # # d Daenerys
298
+ # # j Jon Starkgaryen
299
+ #
300
+ # dv.index_of_max_by(2) { |i| i.size }
301
+ # #=> [:j, :d]
302
+ def index_of_max_by(size = nil, &block)
303
+ vals = max_by(size, &block)
304
+ dv = reject_values(*DaruLite::MISSING_VALUES)
305
+ vals.is_a?(Array) ? (vals.map { |x| dv.index_of(x) }) : dv.index_of(vals)
306
+ end
307
+
308
+ # Returns the index of the minimum value(s) present in the vector, with an optional
309
+ # comparator block.
310
+ #
311
+ # @param size [Integer] Number of minimum indices to return. Defaults to nil.
312
+ #
313
+ # @example
314
+ #
315
+ # dv = DaruLite::Vector.new (["Tyrion", "Daenerys", "Jon Starkgaryen"]),
316
+ # index: DaruLite::Index.new([:t, :d, :j])
317
+ # #=>
318
+ # # #<DaruLite::Vector(3)>
319
+ # # t Tyrion
320
+ # # d Daenerys
321
+ # # j Jon Starkgaryen
322
+ #
323
+ # dv.index_of_min
324
+ # #=> :d
325
+ #
326
+ # dv.index_of_min(2) { |a,b| a.size <=> b.size }
327
+ # #=> [:t, :d]
328
+ def index_of_min(size = nil, &block)
329
+ vals = min(size, &block)
330
+ dv = reject_values(*DaruLite::MISSING_VALUES)
331
+ vals.is_a?(Array) ? (vals.map { |x| dv.index_of(x) }) : dv.index_of(vals)
332
+ end
333
+
334
+ # Returns the index of the minimum value(s) present in the vector, with a compulsory
335
+ # object block.
336
+ #
337
+ # @param size [Integer] Number of minimum indices to return. Defaults to nil.
338
+ #
339
+ # @example
340
+ #
341
+ # dv = DaruLite::Vector.new (["Tyrion", "Daenerys", "Jon Starkgaryen"]),
342
+ # index: DaruLite::Index.new([:t, :d, :j])
343
+ # #=>
344
+ # # #<DaruLite::Vector(3)>
345
+ # # t Tyrion
346
+ # # d Daenerys
347
+ # # j Jon Starkgaryen
348
+ #
349
+ # dv.index_of_min(2) { |i| i.size }
350
+ # #=> [:t, :d]
351
+ def index_of_min_by(size = nil, &block)
352
+ vals = min_by(size, &block)
353
+ dv = reject_values(*DaruLite::MISSING_VALUES)
354
+ vals.is_a?(Array) ? (vals.map { |x| dv.index_of(x) }) : dv.index_of(vals)
355
+ end
356
+
357
+ # Return the maximum element present in the Vector, as a Vector.
358
+ # @return [DaruLite::Vector]
359
+ def max_index
360
+ max_value = @data.max
361
+ DaruLite::Vector.new({ index_of(max_value) => max_value }, name: @name, dtype: @dtype)
362
+ end
363
+
364
+ def frequencies
365
+ DaruLite::Vector.new(
366
+ @data.each_with_object(Hash.new(0)) do |element, hash|
367
+ hash[element] += 1 unless element.nil?
368
+ end
369
+ )
370
+ end
371
+
372
+ alias freqs frequencies
373
+ deprecate :freqs, :frequencies, 2016, 10
374
+
375
+ def proportions
376
+ len = size - count_values(*DaruLite::MISSING_VALUES)
377
+ frequencies.to_h.transform_values do |count|
378
+ count / len.to_f
379
+ end
380
+ end
381
+
382
+ def ranked
383
+ sum = 0
384
+ r = frequencies.to_h.sort.each_with_object({}) do |(el, count), memo|
385
+ memo[el] = ((sum + 1) + (sum + count)).quo(2)
386
+ sum += count
387
+ end
388
+
389
+ recode { |e| r[e] }
390
+ end
391
+
392
+ def coefficient_of_variation
393
+ standard_deviation_sample / mean
394
+ end
395
+
396
+ # Retrieves number of cases which comply condition. If block given,
397
+ # retrieves number of instances where block returns true. If other
398
+ # values given, retrieves the frequency for this value. If no value
399
+ # given, counts the number of non-nil elements in the Vector.
400
+ def count(value = false, &block)
401
+ if block
402
+ @data.count(&block)
403
+ elsif value
404
+ count { |val| val == value }
405
+ else
406
+ size - indexes(*DaruLite::MISSING_VALUES).size
407
+ end
408
+ end
409
+
410
+ # Count number of occurrences of each value in the Vector
411
+ def value_counts
412
+ values = @data.each_with_object(Hash.new(0)) do |d, memo|
413
+ memo[d] += 1
414
+ end
415
+
416
+ DaruLite::Vector.new(values)
417
+ end
418
+
419
+ def proportion(value = 1)
420
+ frequencies[value].quo(size - count_values(*DaruLite::MISSING_VALUES)).to_f
421
+ end
422
+
423
+ # Sample variance with denominator (N-1)
424
+ def variance_sample(m = nil)
425
+ m ||= mean
426
+ if @data.respond_to? :variance_sample
427
+ @data.variance_sample m
428
+ else
429
+ sum_of_squares(m).quo(size - count_values(*DaruLite::MISSING_VALUES) - 1)
430
+ end
431
+ end
432
+
433
+ # Population variance with denominator (N)
434
+ def variance_population(m = nil)
435
+ m ||= mean
436
+ if @data.respond_to? :variance_population
437
+ @data.variance_population m
438
+ else
439
+ sum_of_squares(m).quo(size - count_values(*DaruLite::MISSING_VALUES)).to_f
440
+ end
441
+ end
442
+
443
+ # Sample covariance with denominator (N-1)
444
+ def covariance_sample(other)
445
+ size == other.size or raise ArgumentError, 'size of both the vectors must be equal'
446
+ covariance_sum(other) / (size - count_values(*DaruLite::MISSING_VALUES) - 1)
447
+ end
448
+
449
+ # Population covariance with denominator (N)
450
+ def covariance_population(other)
451
+ size == other.size or raise ArgumentError, 'size of both the vectors must be equal'
452
+ covariance_sum(other) / (size - count_values(*DaruLite::MISSING_VALUES))
453
+ end
454
+
455
+ def sum_of_squares(m = nil)
456
+ m ||= mean
457
+ reject_values(*DaruLite::MISSING_VALUES).data.inject(0) do |memo, val|
458
+ memo + ((val - m)**2)
459
+ end
460
+ end
461
+
462
+ def standard_deviation_population(m = nil)
463
+ m ||= mean
464
+ if @data.respond_to? :standard_deviation_population
465
+ @data.standard_deviation_population(m)
466
+ else
467
+ Math.sqrt(variance_population(m))
468
+ end
469
+ end
470
+
471
+ def standard_deviation_sample(m = nil)
472
+ m ||= mean
473
+ if @data.respond_to? :standard_deviation_sample
474
+ @data.standard_deviation_sample m
475
+ else
476
+ Math.sqrt(variance_sample(m))
477
+ end
478
+ end
479
+
480
+ # Calculate skewness using (sigma(xi - mean)^3)/((N)*std_dev_sample^3)
481
+ def skew(m = nil)
482
+ if @data.respond_to? :skew
483
+ @data.skew
484
+ else
485
+ m ||= mean
486
+ th = @data.inject(0) { |memo, val| memo + ((val - m)**3) }
487
+ th.quo((size - indexes(*DaruLite::MISSING_VALUES).size) * (standard_deviation_sample(m)**3))
488
+ end
489
+ end
490
+
491
+ def kurtosis(m = nil)
492
+ if @data.respond_to? :kurtosis
493
+ @data.kurtosis
494
+ else
495
+ m ||= mean
496
+ fo = @data.inject(0) { |a, x| a + ((x - m)**4) }
497
+ fo.quo((size - indexes(*DaruLite::MISSING_VALUES).size) * (standard_deviation_sample(m)**4)) - 3
498
+ end
499
+ end
500
+
501
+ def average_deviation_population(m = nil)
502
+ must_be_numeric!
503
+ m ||= mean
504
+ reject_values(*DaruLite::MISSING_VALUES).data.inject(0) do |memo, val|
505
+ (val - m).abs + memo
506
+ end.quo(size - count_values(*DaruLite::MISSING_VALUES))
507
+ end
508
+
509
+ # Returns the value of the percentile q
510
+ #
511
+ # Accepts an optional second argument specifying the strategy to interpolate
512
+ # when the requested percentile lies between two data points a and b
513
+ # Valid strategies are:
514
+ # * :midpoint (Default): (a + b) / 2
515
+ # * :linear : a + (b - a) * d where d is the decimal part of the index between a and b.
516
+ # == References
517
+ #
518
+ # This is the NIST recommended method (http://en.wikipedia.org/wiki/Percentile#NIST_method)
519
+ def percentile(q, strategy = :midpoint)
520
+ case strategy
521
+ when :midpoint
522
+ midpoint_percentile(q)
523
+ when :linear
524
+ linear_percentile(q)
525
+ else
526
+ raise ArgumentError, "Unknown strategy #{strategy}"
527
+ end
528
+ end
529
+
530
+ # Dichotomize the vector with 0 and 1, based on lowest value.
531
+ # If parameter is defined, this value and lower will be 0
532
+ # and higher, 1.
533
+ def dichotomize(low = nil)
534
+ low ||= factors.min
535
+
536
+ recode do |x|
537
+ if x.nil?
538
+ nil
539
+ elsif x > low
540
+ 1
541
+ else
542
+ 0
543
+ end
544
+ end
545
+ end
546
+
547
+ # Center data by subtracting the mean from each non-nil value.
548
+ def center
549
+ self - mean
550
+ end
551
+
552
+ # Standardize data.
553
+ #
554
+ # == Arguments
555
+ #
556
+ # * use_population - Pass as *true* if you want to use population
557
+ # standard deviation instead of sample standard deviation.
558
+ def standardize(use_population = false)
559
+ m ||= mean
560
+ sd = use_population ? sdp : sds
561
+ return DaruLite::Vector.new([nil] * size) if m.nil? || sd.to_d == BigDecimal('0.0')
562
+
563
+ vector_standardized_compute m, sd
564
+ end
565
+
566
+ # :nocov:
567
+ def box_cox_transformation(lambda) # :nodoc:
568
+ must_be_numeric!
569
+
570
+ recode do |x|
571
+ unless x.nil?
572
+ if lambda.zero?
573
+ Math.log(x)
574
+ else
575
+ ((x**lambda) - 1).quo(lambda)
576
+ end
577
+ end
578
+ end
579
+ end
580
+ # :nocov:
581
+
582
+ # Replace each non-nil value in the vector with its percentile.
583
+ def vector_percentile
584
+ c = size - indexes(*DaruLite::MISSING_VALUES).size
585
+ ranked.recode! { |i| i.nil? ? nil : (i.quo(c) * 100).to_f }
586
+ end
587
+
588
+ def vector_standardized_compute(m, sd)
589
+ if @data.respond_to? :vector_standardized_compute
590
+ @data.vector_standardized_compute(m, sd)
591
+ else
592
+ DaruLite::Vector.new @data.collect { |x| x.nil? ? nil : (x.to_f - m).quo(sd) },
593
+ index: index, name: name, dtype: dtype
594
+ end
595
+ end
596
+
597
+ def vector_centered_compute(m)
598
+ if @data.respond_to? :vector_centered_compute
599
+ @data.vector_centered_compute(m)
600
+ else
601
+ DaruLite::Vector.new @data.collect { |x| x.nil? ? nil : x.to_f - m },
602
+ index: index, name: name, dtype: dtype
603
+ end
604
+ end
605
+
606
+ # Returns an random sample of size n, with replacement,
607
+ # only with non-nil data.
608
+ #
609
+ # In all the trails, every item have the same probability
610
+ # of been selected.
611
+ def sample_with_replacement(sample = 1)
612
+ if @data.respond_to? :sample_with_replacement
613
+ @data.sample_with_replacement sample
614
+ else
615
+ valid = indexes(*DaruLite::MISSING_VALUES).empty? ? self : reject_values(*DaruLite::MISSING_VALUES)
616
+ vds = valid.size
617
+ (0...sample).collect { valid[rand(vds)] }
618
+ end
619
+ end
620
+
621
+ # Returns an random sample of size n, without replacement,
622
+ # only with valid data.
623
+ #
624
+ # Every element could only be selected once.
625
+ #
626
+ # A sample of the same size of the vector is the vector itself.
627
+ def sample_without_replacement(sample = 1)
628
+ if @data.respond_to? :sample_without_replacement
629
+ @data.sample_without_replacement sample
630
+ else
631
+ raw_sample_without_replacement(sample)
632
+ end
633
+ end
634
+
635
+ # The percent_change method computes the percent change over
636
+ # the given number of periods.
637
+ #
638
+ # @param [Integer] periods (1) number of nils to insert at the beginning.
639
+ #
640
+ # @example
641
+ #
642
+ # vector = DaruLite::Vector.new([4,6,6,8,10],index: ['a','f','t','i','k'])
643
+ # vector.percent_change
644
+ # #=>
645
+ # # <DaruLite::Vector:28713060 @name = nil size: 5 >
646
+ # # nil
647
+ # # a
648
+ # # f 0.5
649
+ # # t 0.0
650
+ # # i 0.3333333333333333
651
+ # # k 0.25
652
+ def percent_change(periods = 1)
653
+ must_be_numeric!
654
+
655
+ prev = nil
656
+ arr = @data.each_with_index.map do |cur, i|
657
+ if i < periods ||
658
+ include_with_nan?(DaruLite::MISSING_VALUES, cur) ||
659
+ include_with_nan?(DaruLite::MISSING_VALUES, prev)
660
+ nil
661
+ else
662
+ (cur - prev) / prev.to_f
663
+ end.tap { prev = cur if cur }
664
+ end
665
+
666
+ DaruLite::Vector.new(arr, index: @index, name: @name)
667
+ end
668
+
669
+ # Performs the difference of the series.
670
+ # Note: The first difference of series is X(t) - X(t-1)
671
+ # But, second difference of series is NOT X(t) - X(t-2)
672
+ # It is the first difference of the first difference
673
+ # => (X(t) - X(t-1)) - (X(t-1) - X(t-2))
674
+ #
675
+ # == Arguments
676
+ #
677
+ # * *max_lags*: integer, (default: 1), number of differences reqd.
678
+ #
679
+ # @example Using #diff
680
+ #
681
+ # ts = DaruLite::Vector.new((1..10).map { rand })
682
+ # # => [0.69, 0.23, 0.44, 0.71, ...]
683
+ #
684
+ # ts.diff # => [nil, -0.46, 0.21, 0.27, ...]
685
+ #
686
+ # @return [DaruLite::Vector]
687
+ def diff(max_lags = 1)
688
+ ts = self
689
+ difference = []
690
+ max_lags.times do
691
+ difference = ts - ts.lag
692
+ ts = difference
693
+ end
694
+ difference
695
+ end
696
+
697
+ # Calculate the rolling function for a loopback value.
698
+ #
699
+ # @param [Symbol] function The rolling function to be applied. Can be
700
+ # any function applicatble to DaruLite::Vector (:mean, :median, :count,
701
+ # :min, :max, etc.)
702
+ # @param [Integer] n (10) A non-negative value which serves as the loopback length.
703
+ # @return [DaruLite::Vector] Vector containin rolling calculations.
704
+ # @example Using #rolling
705
+ # ts = DaruLite::Vector.new((1..100).map { rand })
706
+ # # => [0.69, 0.23, 0.44, 0.71, ...]
707
+ # # first 9 observations are nil
708
+ # ts.rolling(:mean) # => [ ... nil, 0.484... , 0.445... , 0.513 ... , ... ]
709
+ def rolling(function, n = 10)
710
+ DaruLite::Vector.new(
711
+ ([nil] * (n - 1)) +
712
+ (0..(size - n)).map do |i|
713
+ DaruLite::Vector.new(@data[i...(i + n)]).send(function)
714
+ end, index: @index
715
+ )
716
+ end
717
+
718
+ # @!method rolling_mean
719
+ # Calculate rolling average
720
+ # @yieldparam [Integer] n (10) Loopback length
721
+ # @!method rolling_median
722
+ # Calculate rolling median
723
+ # @yieldparam [Integer] n (10) Loopback length
724
+ # @!method rolling_count
725
+ # Calculate rolling non-missing count
726
+ # @yieldparam [Integer] n (10) Loopback length
727
+ # @!method rolling_max
728
+ # Calculate rolling max value
729
+ # @yieldparam [Integer] n (10) Loopback length
730
+ # @!method rolling_min
731
+ # Calculate rolling min value
732
+ # @yieldparam [Integer] n (10) Loopback length
733
+ # @!method rolling_sum
734
+ # Calculate rolling sum
735
+ # @yieldparam [Integer] n (10) Loopback length
736
+ # @!method rolling_std
737
+ # Calculate rolling standard deviation
738
+ # @yieldparam [Integer] n (10) Loopback length
739
+ # @!method rolling_variance
740
+ # Calculate rolling variance
741
+ # @yieldparam [Integer] n (10) Loopback length
742
+ %i[count mean median max min sum std variance].each do |meth|
743
+ define_method(:"rolling_#{meth}") do |n = 10|
744
+ rolling(meth, n)
745
+ end
746
+ end
747
+
748
+ # Exponential Moving Average.
749
+ # Calculates an exponential moving average of the series using a
750
+ # specified parameter. If wilder is false (the default) then the EMA
751
+ # uses a smoothing value of 2 / (n + 1), if it is true then it uses the
752
+ # Welles Wilder smoother of 1 / n.
753
+ #
754
+ # Warning for EMA usage: EMAs are unstable for small series, as they
755
+ # use a lot more than n observations to calculate. The series is stable
756
+ # if the size of the series is >= 3.45 * (n + 1)
757
+ #
758
+ # @param [Integer] n (10) Loopback length.
759
+ # @param [TrueClass, FalseClass] wilder (false) If true, 1/n value is
760
+ # used for smoothing; if false, uses 2/(n+1) value
761
+ #
762
+ # @example Using ema
763
+ #
764
+ # ts = DaruLite::Vector.new((1..100).map { rand })
765
+ # # => [0.577..., 0.123..., 0.173..., 0.233..., ...]
766
+ #
767
+ # # first 9 observations are nil
768
+ # ts.ema # => [ ... nil, 0.455... , 0.395..., 0.323..., ... ]
769
+ #
770
+ # @return [DaruLite::Vector] Contains EMA
771
+ def ema(n = 10, wilder = false)
772
+ smoother = wilder ? 1.0 / n : 2.0 / (n + 1)
773
+ # need to start everything from the first non-nil observation
774
+ start = @data.index { |i| !i.nil? }
775
+ # first n - 1 observations are nil
776
+ base = [nil] * (start + n - 1)
777
+ # nth observation is just a moving average
778
+ base << (@data[start...(start + n)].inject(0.0) { |s, a| a.nil? ? s : s + a } / n)
779
+ (start + n).upto size - 1 do |i|
780
+ base << ((self[i] * smoother) + ((1 - smoother) * base.last))
781
+ end
782
+
783
+ DaruLite::Vector.new(base, index: @index, name: @name)
784
+ end
785
+
786
+ # Exponential Moving Variance.
787
+ # Calculates an exponential moving variance of the series using a
788
+ # specified parameter. If wilder is false (the default) then the EMV
789
+ # uses a smoothing value of 2 / (n + 1), if it is true then it uses the
790
+ # Welles Wilder smoother of 1 / n.
791
+ #
792
+ # @param [Integer] n (10) Loopback length.
793
+ # @param [TrueClass, FalseClass] wilder (false) If true, 1/n value is
794
+ # used for smoothing; if false, uses 2/(n+1) value
795
+ #
796
+ # @example Using emv
797
+ #
798
+ # ts = DaruLite::Vector.new((1..100).map { rand })
799
+ # # => [0.047..., 0.23..., 0.836..., 0.845..., ...]
800
+ #
801
+ # # first 9 observations are nil
802
+ # ts.emv # => [ ... nil, 0.073... , 0.082..., 0.080..., ...]
803
+ #
804
+ # @return [DaruLite::Vector] contains EMV
805
+ def emv(n = 10, wilder = false) # rubocop:disable Metrics/AbcSize
806
+ smoother = wilder ? 1.0 / n : 2.0 / (n + 1)
807
+ # need to start everything from the first non-nil observation
808
+ start = @data.index { |i| !i.nil? }
809
+ # first n - 1 observations are nil
810
+ var_base = [nil] * (start + n - 1)
811
+ mean_base = [nil] * (start + n - 1)
812
+ mean_base << (@data[start...(start + n)].inject(0.0) { |s, a| a.nil? ? s : s + a } / n)
813
+ # nth observation is just a moving variance_population
814
+ var_base << (@data[start...(start + n)].inject(0.0) { |s, x| x.nil? ? s : s + ((x - mean_base.last)**2) } / n)
815
+ (start + n).upto size - 1 do |i|
816
+ last = mean_base.last
817
+ mean_base << ((self[i] * smoother) + ((1 - smoother) * last))
818
+ var_base << (((1 - smoother) * var_base.last) + (smoother * (self[i] - last) * (self[i] - mean_base.last)))
819
+ end
820
+ DaruLite::Vector.new(var_base, index: @index, name: @name)
821
+ end
822
+
823
+ # Exponential Moving Standard Deviation.
824
+ # Calculates an exponential moving standard deviation of the series using a
825
+ # specified parameter. If wilder is false (the default) then the EMSD
826
+ # uses a smoothing value of 2 / (n + 1), if it is true then it uses the
827
+ # Welles Wilder smoother of 1 / n.
828
+ #
829
+ # @param [Integer] n (10) Loopback length.
830
+ # @param [TrueClass, FalseClass] wilder (false) If true, 1/n value is
831
+ # used for smoothing; if false, uses 2/(n+1) value
832
+ #
833
+ # @example Using emsd
834
+ #
835
+ # ts = DaruLite::Vector.new((1..100).map { rand })
836
+ # # => [0.400..., 0.727..., 0.862..., 0.013..., ...]
837
+ #
838
+ # # first 9 observations are nil
839
+ # ts.emsd # => [ ... nil, 0.285... , 0.258..., 0.243..., ...]
840
+ #
841
+ # @return [DaruLite::Vector] contains EMSD
842
+ def emsd(n = 10, wilder = false)
843
+ result = []
844
+ emv_return = emv(n, wilder)
845
+ emv_return.each do |d|
846
+ result << (d.nil? ? nil : Math.sqrt(d))
847
+ end
848
+ DaruLite::Vector.new(result, index: @index, name: @name)
849
+ end
850
+
851
+ # Moving Average Convergence-Divergence.
852
+ # Calculates the MACD (moving average convergence-divergence) of the time
853
+ # series.
854
+ # @see https://en.wikipedia.org/wiki/MACD
855
+ #
856
+ # @param fast [Integer] fast period of MACD (default 12)
857
+ # @param slow [Integer] slow period of MACD (default 26)
858
+ # @param signal [Integer] signal period of MACD (default 9)
859
+ #
860
+ # @example Create a series and calculate MACD values
861
+ # ts = DaruLite::Vector.new((1..100).map { rand })
862
+ # # => [0.69, 0.23, 0.44, 0.71, ...]
863
+ # macdseries, macdsignal, macdhist = ts.macd
864
+ # macdseries, macdsignal, macdhist = ts.macd(13)
865
+ # macdseries, macdsignal, macdhist = ts.macd(signal=5)
866
+ #
867
+ # @return [Array<DaruLite::Vector>] macdseries, macdsignal and macdhist are
868
+ # returned as an array of three DaruLite::Vectors
869
+ #
870
+ def macd(fast = 12, slow = 26, signal = 9)
871
+ macdseries = ema(fast) - ema(slow)
872
+ macdsignal = macdseries.ema(signal)
873
+ macdhist = macdseries - macdsignal
874
+ [macdseries, macdsignal, macdhist]
875
+ end
876
+
877
+ # Calculates the autocorrelation coefficients of the series.
878
+ #
879
+ # The first element is always 1, since that is the correlation
880
+ # of the series with itself.
881
+ #
882
+ # @example
883
+ # ts = DaruLite::Vector.new((1..100).map { rand })
884
+ #
885
+ # ts.acf # => array with first 21 autocorrelations
886
+ # ts.acf 3 # => array with first 3 autocorrelations
887
+ def acf(max_lags = nil)
888
+ max_lags ||= (10 * Math.log10(size)).to_i
889
+
890
+ (0..max_lags).map do |i|
891
+ if i.zero?
892
+ 1.0
893
+ else
894
+ m = mean
895
+ # can't use Pearson coefficient since the mean for the lagged series should
896
+ # be the same as the regular series
897
+ ((self - m) * (lag(i) - m)).sum / variance_sample / (size - 1)
898
+ end
899
+ end
900
+ end
901
+
902
+ # Provides autocovariance.
903
+ #
904
+ # == Options
905
+ #
906
+ # * *:demean* = true; optional. Supply false if series is not to be demeaned
907
+ # * *:unbiased* = true; optional. true/false for unbiased/biased form of autocovariance
908
+ #
909
+ # == Returns
910
+ #
911
+ # Autocovariance value
912
+ def acvf(demean = true, unbiased = true)
913
+ demeaned_series = demean ? self - mean : self
914
+
915
+ n = (10 * Math.log10(size)).to_i + 1
916
+ m = mean
917
+ d = if unbiased
918
+ Array.new(size, size)
919
+ else
920
+ (1..size).to_a.reverse[0..n]
921
+ end
922
+
923
+ 0.upto(n - 1).map do |i|
924
+ (demeaned_series * (lag(i) - m)).sum / d[i]
925
+ end
926
+ end
927
+
928
+ # Calculate cumulative sum of Vector
929
+ def cumsum
930
+ result = []
931
+ acc = 0
932
+ @data.each do |d|
933
+ if include_with_nan? DaruLite::MISSING_VALUES, d
934
+ result << nil
935
+ else
936
+ acc += d
937
+ result << acc
938
+ end
939
+ end
940
+
941
+ DaruLite::Vector.new(result, index: @index)
942
+ end
943
+
944
+ alias sdp standard_deviation_population
945
+ alias sds standard_deviation_sample
946
+ alias std sds
947
+ alias adp average_deviation_population
948
+ alias cov coefficient_of_variation
949
+ alias variance variance_sample
950
+ alias covariance covariance_sample
951
+ alias sd standard_deviation_sample
952
+ alias ss sum_of_squares
953
+ alias percentil percentile
954
+ alias se standard_error
955
+
956
+ private
957
+
958
+ def must_be_numeric!
959
+ numeric? or raise TypeError, 'Vector must be numeric'
960
+ end
961
+
962
+ def covariance_sum(other)
963
+ self_mean = mean
964
+ other_mean = other.mean
965
+ @data
966
+ .zip(other.data).inject(0) do |res, (d, o)|
967
+ res + if !d || !o
968
+ 0
969
+ else
970
+ (d - self_mean) * (o - other_mean)
971
+ end
972
+ end
973
+ end
974
+
975
+ def midpoint_percentile(q)
976
+ sorted = reject_values(*DaruLite::MISSING_VALUES).to_a.sort
977
+
978
+ v = ((size - count_values(*DaruLite::MISSING_VALUES)) * q).quo(100)
979
+ if v.to_i == v
980
+ (sorted[(v - 0.5).to_i].to_f + sorted[(v + 0.5).to_i]).quo(2)
981
+ else
982
+ sorted[v.to_i]
983
+ end
984
+ end
985
+
986
+ def linear_percentile(q)
987
+ sorted = reject_values(*DaruLite::MISSING_VALUES).to_a.sort
988
+ index = (q / 100.0) * ((size - count_values(*DaruLite::MISSING_VALUES)) + 1)
989
+
990
+ k = index.truncate
991
+ d = index % 1
992
+
993
+ if k.zero?
994
+ sorted[0]
995
+ elsif k >= sorted.size
996
+ sorted[-1]
997
+ else
998
+ sorted[k - 1] + (d * (sorted[k] - sorted[k - 1]))
999
+ end
1000
+ end
1001
+
1002
+ def raw_sample_without_replacement(sample)
1003
+ valid = indexes(*DaruLite::MISSING_VALUES).empty? ? self : reject_values(*DaruLite::MISSING_VALUES)
1004
+ raise ArgumentError, "Sample size couldn't be greater than n" if
1005
+ sample > valid.size
1006
+
1007
+ out = []
1008
+ size = valid.size
1009
+ while out.size < sample
1010
+ value = rand(size)
1011
+ out.push(value) unless out.include?(value)
1012
+ end
1013
+
1014
+ out.collect { |i| valid[i] }
1015
+ end
1016
+ end
1017
+ end
1018
+ end
1019
+ end