daru_lite 0.1.1 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +35 -33
- data/lib/daru_lite/data_frame/aggregatable.rb +165 -0
- data/lib/daru_lite/data_frame/calculatable.rb +140 -0
- data/lib/daru_lite/data_frame/convertible.rb +107 -0
- data/lib/daru_lite/data_frame/duplicatable.rb +64 -0
- data/lib/daru_lite/data_frame/fetchable.rb +301 -0
- data/lib/daru_lite/data_frame/filterable.rb +144 -0
- data/lib/daru_lite/data_frame/i_o_able.rb +179 -0
- data/lib/daru_lite/data_frame/indexable.rb +168 -0
- data/lib/daru_lite/data_frame/iterable.rb +339 -0
- data/lib/daru_lite/data_frame/joinable.rb +152 -0
- data/lib/daru_lite/data_frame/missable.rb +75 -0
- data/lib/daru_lite/data_frame/pivotable.rb +108 -0
- data/lib/daru_lite/data_frame/queryable.rb +67 -0
- data/lib/daru_lite/data_frame/setable.rb +109 -0
- data/lib/daru_lite/data_frame/sortable.rb +241 -0
- data/lib/daru_lite/dataframe.rb +138 -2353
- data/lib/daru_lite/index/index.rb +14 -1
- data/lib/daru_lite/index/multi_index.rb +9 -0
- data/lib/daru_lite/maths/statistics/vector.rb +1 -1
- data/lib/daru_lite/vector/aggregatable.rb +9 -0
- data/lib/daru_lite/vector/calculatable.rb +78 -0
- data/lib/daru_lite/vector/convertible.rb +77 -0
- data/lib/daru_lite/vector/duplicatable.rb +17 -0
- data/lib/daru_lite/vector/fetchable.rb +175 -0
- data/lib/daru_lite/vector/filterable.rb +128 -0
- data/lib/daru_lite/vector/indexable.rb +77 -0
- data/lib/daru_lite/vector/iterable.rb +95 -0
- data/lib/daru_lite/vector/joinable.rb +17 -0
- data/lib/daru_lite/vector/missable.rb +124 -0
- data/lib/daru_lite/vector/queryable.rb +45 -0
- data/lib/daru_lite/vector/setable.rb +47 -0
- data/lib/daru_lite/vector/sortable.rb +113 -0
- data/lib/daru_lite/vector.rb +36 -932
- data/lib/daru_lite/version.rb +1 -1
- data/spec/data_frame/aggregatable_example.rb +65 -0
- data/spec/data_frame/buildable_example.rb +109 -0
- data/spec/data_frame/calculatable_example.rb +135 -0
- data/spec/data_frame/convertible_example.rb +180 -0
- data/spec/data_frame/duplicatable_example.rb +111 -0
- data/spec/data_frame/fetchable_example.rb +476 -0
- data/spec/data_frame/filterable_example.rb +409 -0
- data/spec/data_frame/indexable_example.rb +221 -0
- data/spec/data_frame/iterable_example.rb +465 -0
- data/spec/data_frame/joinable_example.rb +106 -0
- data/spec/data_frame/missable_example.rb +47 -0
- data/spec/data_frame/pivotable_example.rb +297 -0
- data/spec/data_frame/queryable_example.rb +92 -0
- data/spec/data_frame/setable_example.rb +482 -0
- data/spec/data_frame/sortable_example.rb +350 -0
- data/spec/dataframe_spec.rb +181 -3289
- data/spec/index/categorical_index_spec.rb +27 -8
- data/spec/index/index_spec.rb +21 -0
- data/spec/index/multi_index_spec.rb +85 -76
- data/spec/vector/aggregatable_example.rb +27 -0
- data/spec/vector/calculatable_example.rb +82 -0
- data/spec/vector/convertible_example.rb +126 -0
- data/spec/vector/duplicatable_example.rb +48 -0
- data/spec/vector/fetchable_example.rb +463 -0
- data/spec/vector/filterable_example.rb +165 -0
- data/spec/vector/indexable_example.rb +201 -0
- data/spec/vector/iterable_example.rb +111 -0
- data/spec/vector/joinable_example.rb +25 -0
- data/spec/vector/missable_example.rb +88 -0
- data/spec/vector/queryable_example.rb +91 -0
- data/spec/vector/setable_example.rb +300 -0
- data/spec/vector/sortable_example.rb +242 -0
- data/spec/vector_spec.rb +111 -1805
- metadata +86 -2
data/lib/daru_lite/vector.rb
CHANGED
@@ -2,12 +2,38 @@ require 'daru_lite/maths/arithmetic/vector'
|
|
2
2
|
require 'daru_lite/maths/statistics/vector'
|
3
3
|
require 'daru_lite/accessors/array_wrapper'
|
4
4
|
require 'daru_lite/category'
|
5
|
+
require 'daru_lite/vector/aggregatable'
|
6
|
+
require 'daru_lite/vector/calculatable'
|
7
|
+
require 'daru_lite/vector/convertible'
|
8
|
+
require 'daru_lite/vector/duplicatable'
|
9
|
+
require 'daru_lite/vector/fetchable'
|
10
|
+
require 'daru_lite/vector/filterable'
|
11
|
+
require 'daru_lite/vector/indexable'
|
12
|
+
require 'daru_lite/vector/iterable'
|
13
|
+
require 'daru_lite/vector/joinable'
|
14
|
+
require 'daru_lite/vector/missable'
|
15
|
+
require 'daru_lite/vector/setable'
|
16
|
+
require 'daru_lite/vector/sortable'
|
17
|
+
require 'daru_lite/vector/queryable'
|
5
18
|
|
6
19
|
module DaruLite
|
7
20
|
class Vector # rubocop:disable Metrics/ClassLength
|
8
21
|
include Enumerable
|
9
22
|
include DaruLite::Maths::Arithmetic::Vector
|
10
23
|
include DaruLite::Maths::Statistics::Vector
|
24
|
+
include DaruLite::Vector::Aggregatable
|
25
|
+
include DaruLite::Vector::Calculatable
|
26
|
+
include DaruLite::Vector::Convertible
|
27
|
+
include DaruLite::Vector::Duplicatable
|
28
|
+
include DaruLite::Vector::Fetchable
|
29
|
+
include DaruLite::Vector::Filterable
|
30
|
+
include DaruLite::Vector::Indexable
|
31
|
+
include DaruLite::Vector::Iterable
|
32
|
+
include DaruLite::Vector::Joinable
|
33
|
+
include DaruLite::Vector::Missable
|
34
|
+
include DaruLite::Vector::Setable
|
35
|
+
include DaruLite::Vector::Sortable
|
36
|
+
include DaruLite::Vector::Queryable
|
11
37
|
extend Gem::Deprecate
|
12
38
|
|
13
39
|
class << self
|
@@ -90,46 +116,6 @@ module DaruLite
|
|
90
116
|
@data.size
|
91
117
|
end
|
92
118
|
|
93
|
-
def each(&block)
|
94
|
-
return to_enum(:each) unless block
|
95
|
-
|
96
|
-
@data.each(&block)
|
97
|
-
self
|
98
|
-
end
|
99
|
-
|
100
|
-
def each_index(&block)
|
101
|
-
return to_enum(:each_index) unless block
|
102
|
-
|
103
|
-
@index.each(&block)
|
104
|
-
self
|
105
|
-
end
|
106
|
-
|
107
|
-
def each_with_index(&block)
|
108
|
-
return to_enum(:each_with_index) unless block
|
109
|
-
|
110
|
-
@data.to_a.zip(@index.to_a).each(&block)
|
111
|
-
|
112
|
-
self
|
113
|
-
end
|
114
|
-
|
115
|
-
def map!(&block)
|
116
|
-
return to_enum(:map!) unless block
|
117
|
-
|
118
|
-
@data.map!(&block)
|
119
|
-
self
|
120
|
-
end
|
121
|
-
|
122
|
-
def apply_method(method, keys: nil, by_position: true)
|
123
|
-
vect = keys ? get_sub_vector(keys, by_position: by_position) : self
|
124
|
-
|
125
|
-
case method
|
126
|
-
when Symbol then vect.send(method)
|
127
|
-
when Proc then method.call(vect)
|
128
|
-
else raise
|
129
|
-
end
|
130
|
-
end
|
131
|
-
alias apply_method_on_sub_vector apply_method
|
132
|
-
|
133
119
|
# The name of the DaruLite::Vector. String.
|
134
120
|
attr_reader :name
|
135
121
|
# The row index. Can be either DaruLite::Index or DaruLite::MultiIndex.
|
@@ -185,98 +171,6 @@ module DaruLite
|
|
185
171
|
end
|
186
172
|
end
|
187
173
|
|
188
|
-
# Get one or more elements with specified index or a range.
|
189
|
-
#
|
190
|
-
# == Usage
|
191
|
-
# # For vectors employing single layer Index
|
192
|
-
#
|
193
|
-
# v[:one, :two] # => DaruLite::Vector with indexes :one and :two
|
194
|
-
# v[:one] # => Single element
|
195
|
-
# v[:one..:three] # => DaruLite::Vector with indexes :one, :two and :three
|
196
|
-
#
|
197
|
-
# # For vectors employing hierarchial multi index
|
198
|
-
#
|
199
|
-
def [](*input_indexes)
|
200
|
-
# Get array of positions indexes
|
201
|
-
positions = @index.pos(*input_indexes)
|
202
|
-
|
203
|
-
# If one object is asked return it
|
204
|
-
return @data[positions] if positions.is_a? Numeric
|
205
|
-
|
206
|
-
# Form a new Vector using positional indexes
|
207
|
-
DaruLite::Vector.new(
|
208
|
-
positions.map { |loc| @data[loc] },
|
209
|
-
name: @name,
|
210
|
-
index: @index.subset(*input_indexes), dtype: @dtype
|
211
|
-
)
|
212
|
-
end
|
213
|
-
|
214
|
-
# Returns vector of values given positional values
|
215
|
-
# @param positions [Array<object>] positional values
|
216
|
-
# @return [object] vector
|
217
|
-
# @example
|
218
|
-
# dv = DaruLite::Vector.new 'a'..'e'
|
219
|
-
# dv.at 0, 1, 2
|
220
|
-
# # => #<DaruLite::Vector(3)>
|
221
|
-
# # 0 a
|
222
|
-
# # 1 b
|
223
|
-
# # 2 c
|
224
|
-
def at(*positions)
|
225
|
-
# to be used to form index
|
226
|
-
original_positions = positions
|
227
|
-
positions = coerce_positions(*positions)
|
228
|
-
validate_positions(*positions)
|
229
|
-
|
230
|
-
if positions.is_a? Integer
|
231
|
-
@data[positions]
|
232
|
-
else
|
233
|
-
values = positions.map { |pos| @data[pos] }
|
234
|
-
DaruLite::Vector.new values, index: @index.at(*original_positions), dtype: dtype
|
235
|
-
end
|
236
|
-
end
|
237
|
-
|
238
|
-
# Change value at given positions
|
239
|
-
# @param positions [Array<object>] positional values
|
240
|
-
# @param [object] val value to assign
|
241
|
-
# @example
|
242
|
-
# dv = DaruLite::Vector.new 'a'..'e'
|
243
|
-
# dv.set_at [0, 1], 'x'
|
244
|
-
# dv
|
245
|
-
# # => #<DaruLite::Vector(5)>
|
246
|
-
# # 0 x
|
247
|
-
# # 1 x
|
248
|
-
# # 2 c
|
249
|
-
# # 3 d
|
250
|
-
# # 4 e
|
251
|
-
def set_at(positions, val)
|
252
|
-
validate_positions(*positions)
|
253
|
-
positions.map { |pos| @data[pos] = val }
|
254
|
-
update_position_cache
|
255
|
-
end
|
256
|
-
|
257
|
-
# Just like in Hashes, you can specify the index label of the DaruLite::Vector
|
258
|
-
# and assign an element an that place in the DaruLite::Vector.
|
259
|
-
#
|
260
|
-
# == Usage
|
261
|
-
#
|
262
|
-
# v = DaruLite::Vector.new([1,2,3], index: [:a, :b, :c])
|
263
|
-
# v[:a] = 999
|
264
|
-
# #=>
|
265
|
-
# ##<DaruLite::Vector:90257920 @name = nil @size = 3 >
|
266
|
-
# # nil
|
267
|
-
# # a 999
|
268
|
-
# # b 2
|
269
|
-
# # c 3
|
270
|
-
def []=(*indexes, val)
|
271
|
-
cast(dtype: :array) if val.nil? && dtype != :array
|
272
|
-
|
273
|
-
guard_type_check(val)
|
274
|
-
|
275
|
-
modify_vector(indexes, val)
|
276
|
-
|
277
|
-
update_position_cache
|
278
|
-
end
|
279
|
-
|
280
174
|
# Two vectors are equal if they have the exact same index values corresponding
|
281
175
|
# with the exact same elements. Name is ignored.
|
282
176
|
def ==(other)
|
@@ -367,85 +261,6 @@ module DaruLite
|
|
367
261
|
)
|
368
262
|
end
|
369
263
|
|
370
|
-
# Return a new vector based on the contents of a boolean array. Use with the
|
371
|
-
# comparator methods to obtain meaningful results. See this notebook for
|
372
|
-
# a good overview of using #where.
|
373
|
-
#
|
374
|
-
# @param bool_array [DaruLite::Core::Query::BoolArray, Array<TrueClass, FalseClass>] The
|
375
|
-
# collection containing the true of false values. Each element in the Vector
|
376
|
-
# corresponding to a `true` in the bool_arry will be returned alongwith it's
|
377
|
-
# index.
|
378
|
-
# @example Usage of #where.
|
379
|
-
# vector = DaruLite::Vector.new([2,4,5,51,5,16,2,5,3,2,1,5,2,5,2,1,56,234,6,21])
|
380
|
-
#
|
381
|
-
# # Simple logic statement passed to #where.
|
382
|
-
# vector.where(vector.eq(5).or(vector.eq(1)))
|
383
|
-
# # =>
|
384
|
-
# ##<DaruLite::Vector:77626210 @name = nil @size = 7 >
|
385
|
-
# # nil
|
386
|
-
# # 2 5
|
387
|
-
# # 4 5
|
388
|
-
# # 7 5
|
389
|
-
# # 10 1
|
390
|
-
# # 11 5
|
391
|
-
# # 13 5
|
392
|
-
# # 15 1
|
393
|
-
#
|
394
|
-
# # A somewhat more complex logic statement
|
395
|
-
# vector.where((vector.eq(5) | vector.lteq(1)) & vector.in([4,5,1]))
|
396
|
-
# #=>
|
397
|
-
# ##<DaruLite::Vector:81072310 @name = nil @size = 7 >
|
398
|
-
# # nil
|
399
|
-
# # 2 5
|
400
|
-
# # 4 5
|
401
|
-
# # 7 5
|
402
|
-
# # 10 1
|
403
|
-
# # 11 5
|
404
|
-
# # 13 5
|
405
|
-
# # 15 1
|
406
|
-
def where(bool_array)
|
407
|
-
DaruLite::Core::Query.vector_where self, bool_array
|
408
|
-
end
|
409
|
-
|
410
|
-
# Return a new vector based on the contents of a boolean array and &block.
|
411
|
-
#
|
412
|
-
# @param bool_array [DaruLite::Core::Query::BoolArray, Array<TrueClass, FalseClass>, &block] The
|
413
|
-
# collection containing the true of false values. Each element in the Vector
|
414
|
-
# corresponding to a `true` in the bool_array will be returned along with it's
|
415
|
-
# index. The &block may contain manipulative functions for the Vector elements.
|
416
|
-
#
|
417
|
-
# @return [DaruLite::Vector]
|
418
|
-
#
|
419
|
-
# @example Usage of #apply_where.
|
420
|
-
# dv = DaruLite::Vector.new ['3 days', '5 weeks', '2 weeks']
|
421
|
-
# dv = dv.apply_where(dv.match /weeks/) { |x| "#{x.split.first.to_i * 7} days" }
|
422
|
-
# # =>
|
423
|
-
# ##<DaruLite::Vector(3)>
|
424
|
-
# # 0 3 days
|
425
|
-
# # 1 35 days
|
426
|
-
# # 2 14 days
|
427
|
-
def apply_where(bool_array, &block)
|
428
|
-
DaruLite::Core::Query.vector_apply_where self, bool_array, &block
|
429
|
-
end
|
430
|
-
|
431
|
-
def head(q = 10)
|
432
|
-
self[0..(q - 1)]
|
433
|
-
end
|
434
|
-
|
435
|
-
def tail(q = 10)
|
436
|
-
start = [size - q, 0].max
|
437
|
-
self[start..(size - 1)]
|
438
|
-
end
|
439
|
-
|
440
|
-
def last(q = 1)
|
441
|
-
# The Enumerable mixin dose not provide the last method.
|
442
|
-
tail(q)
|
443
|
-
end
|
444
|
-
|
445
|
-
def empty?
|
446
|
-
@index.empty?
|
447
|
-
end
|
448
|
-
|
449
264
|
def numeric?
|
450
265
|
type == :numeric
|
451
266
|
end
|
@@ -454,26 +269,6 @@ module DaruLite
|
|
454
269
|
type == :object
|
455
270
|
end
|
456
271
|
|
457
|
-
# Reports whether missing data is present in the Vector.
|
458
|
-
def has_missing_data?
|
459
|
-
!indexes(*DaruLite::MISSING_VALUES).empty?
|
460
|
-
end
|
461
|
-
alias flawed? has_missing_data?
|
462
|
-
deprecate :has_missing_data?, :include_values?, 2016, 10
|
463
|
-
deprecate :flawed?, :include_values?, 2016, 10
|
464
|
-
|
465
|
-
# Check if any one of mentioned values occur in the vector
|
466
|
-
# @param values [Array] values to check for
|
467
|
-
# @return [true, false] returns true if any one of specified values
|
468
|
-
# occur in the vector
|
469
|
-
# @example
|
470
|
-
# dv = DaruLite::Vector.new [1, 2, 3, 4, nil]
|
471
|
-
# dv.include_values? nil, Float::NAN
|
472
|
-
# # => true
|
473
|
-
def include_values?(*values)
|
474
|
-
values.any? { |v| include_with_nan? @data, v }
|
475
|
-
end
|
476
|
-
|
477
272
|
# @note Do not use it to check for Float::NAN as
|
478
273
|
# Float::NAN == Float::NAN is false
|
479
274
|
# Return vector of booleans with value at ith position is either
|
@@ -494,18 +289,6 @@ module DaruLite
|
|
494
289
|
DaruLite::Vector.new values.map { |v| eq(v) }.inject(:|)
|
495
290
|
end
|
496
291
|
|
497
|
-
# Append an element to the vector by specifying the element and index
|
498
|
-
def concat(element, index)
|
499
|
-
raise IndexError, 'Expected new unique index' if @index.include? index
|
500
|
-
|
501
|
-
@index |= [index]
|
502
|
-
@data[@index[index]] = element
|
503
|
-
|
504
|
-
update_position_cache
|
505
|
-
end
|
506
|
-
alias push concat
|
507
|
-
alias << concat
|
508
|
-
|
509
292
|
# Cast a vector to a new data type.
|
510
293
|
#
|
511
294
|
# == Options
|
@@ -531,6 +314,14 @@ module DaruLite
|
|
531
314
|
update_position_cache
|
532
315
|
end
|
533
316
|
|
317
|
+
# Delete element by position
|
318
|
+
def delete_at_position(position)
|
319
|
+
@data.delete_at(position)
|
320
|
+
@index = @index.delete_at(position)
|
321
|
+
|
322
|
+
update_position_cache
|
323
|
+
end
|
324
|
+
|
534
325
|
# The type of data contained in the vector. Can be :object.
|
535
326
|
#
|
536
327
|
# Running through the data to figure out the kind of data is delayed to the
|
@@ -560,148 +351,6 @@ module DaruLite
|
|
560
351
|
type == :category
|
561
352
|
end
|
562
353
|
|
563
|
-
# Get index of element
|
564
|
-
def index_of(element)
|
565
|
-
case dtype
|
566
|
-
when :array then @index.key(@data.index { |x| x.eql? element })
|
567
|
-
else @index.key @data.index(element)
|
568
|
-
end
|
569
|
-
end
|
570
|
-
|
571
|
-
# Keep only unique elements of the vector alongwith their indexes.
|
572
|
-
def uniq
|
573
|
-
uniq_vector = @data.uniq
|
574
|
-
new_index = uniq_vector.map { |element| index_of(element) }
|
575
|
-
|
576
|
-
DaruLite::Vector.new uniq_vector, name: @name, index: new_index, dtype: @dtype
|
577
|
-
end
|
578
|
-
|
579
|
-
def any?(&block)
|
580
|
-
@data.data.any?(&block)
|
581
|
-
end
|
582
|
-
|
583
|
-
def all?(&block)
|
584
|
-
@data.data.all?(&block)
|
585
|
-
end
|
586
|
-
|
587
|
-
# Sorts a vector according to its values. If a block is specified, the contents
|
588
|
-
# will be evaluated and data will be swapped whenever the block evaluates
|
589
|
-
# to *true*. Defaults to ascending order sorting. Any missing values will be
|
590
|
-
# put at the end of the vector. Preserves indexing. Default sort algorithm is
|
591
|
-
# quick sort.
|
592
|
-
#
|
593
|
-
# == Options
|
594
|
-
#
|
595
|
-
# * +:ascending+ - if false, will sort in descending order. Defaults to true.
|
596
|
-
#
|
597
|
-
# * +:type+ - Specify the sorting algorithm. Only supports quick_sort for now.
|
598
|
-
# == Usage
|
599
|
-
#
|
600
|
-
# v = DaruLite::Vector.new ["My first guitar", "jazz", "guitar"]
|
601
|
-
# # Say you want to sort these strings by length.
|
602
|
-
# v.sort(ascending: false) { |a,b| a.length <=> b.length }
|
603
|
-
def sort(opts = {}, &block)
|
604
|
-
opts = { ascending: true }.merge(opts)
|
605
|
-
|
606
|
-
vector_index = resort_index(@data.each_with_index, opts, &block)
|
607
|
-
vector, index = vector_index.transpose
|
608
|
-
|
609
|
-
index = @index.reorder index
|
610
|
-
|
611
|
-
DaruLite::Vector.new(vector, index: index, name: @name, dtype: @dtype)
|
612
|
-
end
|
613
|
-
|
614
|
-
# Sorts the vector according to it's`Index` values. Defaults to ascending
|
615
|
-
# order sorting.
|
616
|
-
#
|
617
|
-
# @param [Hash] opts the options for sort_by_index method.
|
618
|
-
# @option opts [Boolean] :ascending false, will sort `index` in
|
619
|
-
# descending order.
|
620
|
-
#
|
621
|
-
# @return [Vector] new sorted `Vector` according to the index values.
|
622
|
-
#
|
623
|
-
# @example
|
624
|
-
#
|
625
|
-
# dv = DaruLite::Vector.new [11, 13, 12], index: [23, 21, 22]
|
626
|
-
# # Say you want to sort index in ascending order
|
627
|
-
# dv.sort_by_index(ascending: true)
|
628
|
-
# #=> DaruLite::Vector.new [13, 12, 11], index: [21, 22, 23]
|
629
|
-
# # Say you want to sort index in descending order
|
630
|
-
# dv.sort_by_index(ascending: false)
|
631
|
-
# #=> DaruLite::Vector.new [11, 12, 13], index: [23, 22, 21]
|
632
|
-
def sort_by_index(opts = {})
|
633
|
-
opts = { ascending: true }.merge(opts)
|
634
|
-
_, new_order = resort_index(@index.each_with_index, opts).transpose
|
635
|
-
|
636
|
-
reorder new_order
|
637
|
-
end
|
638
|
-
|
639
|
-
DEFAULT_SORTER = lambda { |(lv, li), (rv, ri)|
|
640
|
-
if lv.nil? && rv.nil?
|
641
|
-
li <=> ri
|
642
|
-
elsif lv.nil?
|
643
|
-
-1
|
644
|
-
elsif rv.nil?
|
645
|
-
1
|
646
|
-
else
|
647
|
-
lv <=> rv
|
648
|
-
end
|
649
|
-
}
|
650
|
-
|
651
|
-
# Just sort the data and get an Array in return using Enumerable#sort.
|
652
|
-
# Non-destructive.
|
653
|
-
# :nocov:
|
654
|
-
def sorted_data(&block)
|
655
|
-
@data.to_a.sort(&block)
|
656
|
-
end
|
657
|
-
# :nocov:
|
658
|
-
|
659
|
-
# Like map, but returns a DaruLite::Vector with the returned values.
|
660
|
-
def recode(dt = nil, &block)
|
661
|
-
return to_enum(:recode, dt) unless block
|
662
|
-
|
663
|
-
dup.recode! dt, &block
|
664
|
-
end
|
665
|
-
|
666
|
-
# Destructive version of recode!
|
667
|
-
def recode!(dt = nil, &block)
|
668
|
-
return to_enum(:recode!, dt) unless block
|
669
|
-
|
670
|
-
@data.map!(&block).data
|
671
|
-
@data = cast_vector_to(dt || @dtype)
|
672
|
-
self
|
673
|
-
end
|
674
|
-
|
675
|
-
# Delete an element if block returns true. Destructive.
|
676
|
-
def delete_if
|
677
|
-
return to_enum(:delete_if) unless block_given?
|
678
|
-
|
679
|
-
keep_e, keep_i = each_with_index.reject { |n, _i| yield(n) }.transpose
|
680
|
-
|
681
|
-
@data = cast_vector_to @dtype, keep_e
|
682
|
-
@index = DaruLite::Index.new(keep_i)
|
683
|
-
|
684
|
-
update_position_cache
|
685
|
-
|
686
|
-
self
|
687
|
-
end
|
688
|
-
|
689
|
-
# Keep an element if block returns true. Destructive.
|
690
|
-
def keep_if
|
691
|
-
return to_enum(:keep_if) unless block_given?
|
692
|
-
|
693
|
-
delete_if { |val| !yield(val) }
|
694
|
-
end
|
695
|
-
|
696
|
-
# Reports all values that doesn't comply with a condition.
|
697
|
-
# Returns a hash with the index of data and the invalid data.
|
698
|
-
def verify
|
699
|
-
(0...size)
|
700
|
-
.map { |i| [i, @data[i]] }
|
701
|
-
.reject { |_i, val| yield(val) }
|
702
|
-
.to_h
|
703
|
-
end
|
704
|
-
|
705
354
|
# Return an Array with the data splitted by a separator.
|
706
355
|
# a=DaruLite::Vector.new(["a,b","c,d","a,b","d"])
|
707
356
|
# a.splitted
|
@@ -719,93 +368,6 @@ module DaruLite
|
|
719
368
|
end
|
720
369
|
end
|
721
370
|
|
722
|
-
# Returns a hash of Vectors, defined by the different values
|
723
|
-
# defined on the fields
|
724
|
-
# Example:
|
725
|
-
#
|
726
|
-
# a=DaruLite::Vector.new(["a,b","c,d","a,b"])
|
727
|
-
# a.split_by_separator
|
728
|
-
# => {"a"=>#<DaruLite::Vector:0x7f2dbcc09d88
|
729
|
-
# @data=[1, 0, 1]>,
|
730
|
-
# "b"=>#<DaruLite::Vector:0x7f2dbcc09c48
|
731
|
-
# @data=[1, 1, 0]>,
|
732
|
-
# "c"=>#<DaruLite::Vector:0x7f2dbcc09b08
|
733
|
-
# @data=[0, 1, 1]>}
|
734
|
-
#
|
735
|
-
def split_by_separator(sep = ',')
|
736
|
-
split_data = splitted sep
|
737
|
-
split_data
|
738
|
-
.flatten.uniq.compact.to_h do |key|
|
739
|
-
[
|
740
|
-
key,
|
741
|
-
DaruLite::Vector.new(split_data.map { |v| split_value(key, v) })
|
742
|
-
]
|
743
|
-
end
|
744
|
-
end
|
745
|
-
|
746
|
-
def split_by_separator_freq(sep = ',')
|
747
|
-
split_by_separator(sep).transform_values do |v|
|
748
|
-
v.sum(&:to_i)
|
749
|
-
end
|
750
|
-
end
|
751
|
-
|
752
|
-
def reset_index!
|
753
|
-
@index = DaruLite::Index.new(Array.new(size) { |i| i })
|
754
|
-
self
|
755
|
-
end
|
756
|
-
|
757
|
-
# Replace all nils in the vector with the value passed as an argument. Destructive.
|
758
|
-
# See #replace_nils for non-destructive version
|
759
|
-
#
|
760
|
-
# == Arguments
|
761
|
-
#
|
762
|
-
# * +replacement+ - The value which should replace all nils
|
763
|
-
def replace_nils!(replacement)
|
764
|
-
indexes(*DaruLite::MISSING_VALUES).each do |idx|
|
765
|
-
self[idx] = replacement
|
766
|
-
end
|
767
|
-
|
768
|
-
self
|
769
|
-
end
|
770
|
-
|
771
|
-
# Rolling fillna
|
772
|
-
# replace all Float::NAN and NIL values with the preceeding or following value
|
773
|
-
#
|
774
|
-
# @param direction [Symbol] (:forward, :backward) whether replacement value is preceeding or following
|
775
|
-
#
|
776
|
-
# @example
|
777
|
-
# dv = DaruLite::Vector.new([1, 2, 1, 4, nil, Float::NAN, 3, nil, Float::NAN])
|
778
|
-
#
|
779
|
-
# 2.3.3 :068 > dv.rolling_fillna(:forward)
|
780
|
-
# => #<DaruLite::Vector(9)>
|
781
|
-
# 0 1
|
782
|
-
# 1 2
|
783
|
-
# 2 1
|
784
|
-
# 3 4
|
785
|
-
# 4 4
|
786
|
-
# 5 4
|
787
|
-
# 6 3
|
788
|
-
# 7 3
|
789
|
-
# 8 3
|
790
|
-
#
|
791
|
-
def rolling_fillna!(direction = :forward)
|
792
|
-
enum = direction == :forward ? index : index.reverse_each
|
793
|
-
last_valid_value = 0
|
794
|
-
enum.each do |idx|
|
795
|
-
if valid_value?(self[idx])
|
796
|
-
last_valid_value = self[idx]
|
797
|
-
else
|
798
|
-
self[idx] = last_valid_value
|
799
|
-
end
|
800
|
-
end
|
801
|
-
self
|
802
|
-
end
|
803
|
-
|
804
|
-
# Non-destructive version of rolling_fillna!
|
805
|
-
def rolling_fillna(direction = :forward)
|
806
|
-
dup.rolling_fillna!(direction)
|
807
|
-
end
|
808
|
-
|
809
371
|
# Lags the series by `k` periods.
|
810
372
|
#
|
811
373
|
# Lags the series by `k` periods, "shifting" data and inserting `nil`s
|
@@ -845,187 +407,6 @@ module DaruLite
|
|
845
407
|
end
|
846
408
|
end
|
847
409
|
|
848
|
-
def detach_index
|
849
|
-
DaruLite::DataFrame.new(
|
850
|
-
index: @index.to_a,
|
851
|
-
values: @data.to_a
|
852
|
-
)
|
853
|
-
end
|
854
|
-
|
855
|
-
# Non-destructive version of #replace_nils!
|
856
|
-
def replace_nils(replacement)
|
857
|
-
dup.replace_nils!(replacement)
|
858
|
-
end
|
859
|
-
|
860
|
-
# number of non-missing elements
|
861
|
-
def n_valid
|
862
|
-
size - indexes(*DaruLite::MISSING_VALUES).size
|
863
|
-
end
|
864
|
-
deprecate :n_valid, :count_values, 2016, 10
|
865
|
-
|
866
|
-
# Count the number of values specified
|
867
|
-
# @param values [Array] values to count for
|
868
|
-
# @return [Integer] the number of times the values mentioned occurs
|
869
|
-
# @example
|
870
|
-
# dv = DaruLite::Vector.new [1, 2, 1, 2, 3, 4, nil, nil]
|
871
|
-
# dv.count_values nil
|
872
|
-
# # => 2
|
873
|
-
def count_values(*values)
|
874
|
-
positions(*values).size
|
875
|
-
end
|
876
|
-
|
877
|
-
# Returns *true* if an index exists
|
878
|
-
def has_index?(index)
|
879
|
-
@index.include? index
|
880
|
-
end
|
881
|
-
|
882
|
-
# @param keys [Array] can be positions (if by_position is true) or indexes (if by_position if false)
|
883
|
-
# @return [DaruLite::Vector]
|
884
|
-
def get_sub_vector(keys, by_position: true)
|
885
|
-
return DaruLite::Vector.new([]) if keys == []
|
886
|
-
|
887
|
-
keys = @index.pos(*keys) unless by_position
|
888
|
-
|
889
|
-
sub_vect = at(*keys)
|
890
|
-
sub_vect = DaruLite::Vector.new([sub_vect]) unless sub_vect.is_a?(DaruLite::Vector)
|
891
|
-
|
892
|
-
sub_vect
|
893
|
-
end
|
894
|
-
|
895
|
-
# @return [DaruLite::DataFrame] the vector as a single-vector dataframe
|
896
|
-
def to_df
|
897
|
-
DaruLite::DataFrame.new({ @name => @data }, name: @name, index: @index)
|
898
|
-
end
|
899
|
-
|
900
|
-
# Convert Vector to a horizontal or vertical Ruby Matrix.
|
901
|
-
#
|
902
|
-
# == Arguments
|
903
|
-
#
|
904
|
-
# * +axis+ - Specify whether you want a *:horizontal* or a *:vertical* matrix.
|
905
|
-
def to_matrix(axis = :horizontal)
|
906
|
-
case axis
|
907
|
-
when :horizontal
|
908
|
-
Matrix[to_a]
|
909
|
-
when :vertical
|
910
|
-
Matrix.columns([to_a])
|
911
|
-
else
|
912
|
-
raise ArgumentError, "axis should be either :horizontal or :vertical, not #{axis}"
|
913
|
-
end
|
914
|
-
end
|
915
|
-
|
916
|
-
# Convert to hash (explicit). Hash keys are indexes and values are the correspoding elements
|
917
|
-
def to_h
|
918
|
-
@index.to_h { |index| [index, self[index]] }
|
919
|
-
end
|
920
|
-
|
921
|
-
# Return an array
|
922
|
-
def to_a
|
923
|
-
@data.to_a
|
924
|
-
end
|
925
|
-
|
926
|
-
# Convert the hash from to_h to json
|
927
|
-
def to_json(*)
|
928
|
-
to_h.to_json
|
929
|
-
end
|
930
|
-
|
931
|
-
# Convert to html for iruby
|
932
|
-
def to_html(threshold = 30)
|
933
|
-
table_thead = to_html_thead
|
934
|
-
table_tbody = to_html_tbody(threshold)
|
935
|
-
path = if index.is_a?(MultiIndex)
|
936
|
-
File.expand_path('iruby/templates/vector_mi.html.erb', __dir__)
|
937
|
-
else
|
938
|
-
File.expand_path('iruby/templates/vector.html.erb', __dir__)
|
939
|
-
end
|
940
|
-
ERB.new(File.read(path).strip).result(binding)
|
941
|
-
end
|
942
|
-
|
943
|
-
def to_html_thead
|
944
|
-
table_thead_path =
|
945
|
-
if index.is_a?(MultiIndex)
|
946
|
-
File.expand_path('iruby/templates/vector_mi_thead.html.erb', __dir__)
|
947
|
-
else
|
948
|
-
File.expand_path('iruby/templates/vector_thead.html.erb', __dir__)
|
949
|
-
end
|
950
|
-
ERB.new(File.read(table_thead_path).strip).result(binding)
|
951
|
-
end
|
952
|
-
|
953
|
-
def to_html_tbody(threshold = 30)
|
954
|
-
table_tbody_path =
|
955
|
-
if index.is_a?(MultiIndex)
|
956
|
-
File.expand_path('iruby/templates/vector_mi_tbody.html.erb', __dir__)
|
957
|
-
else
|
958
|
-
File.expand_path('iruby/templates/vector_tbody.html.erb', __dir__)
|
959
|
-
end
|
960
|
-
ERB.new(File.read(table_tbody_path).strip).result(binding)
|
961
|
-
end
|
962
|
-
|
963
|
-
def to_s
|
964
|
-
"#<#{self.class}#{": #{@name}" if @name}(#{size})#{':category' if category?}>"
|
965
|
-
end
|
966
|
-
|
967
|
-
# Create a summary of the Vector
|
968
|
-
# @param indent_level [Fixnum] indent level
|
969
|
-
# @return [String] String containing the summary of the Vector
|
970
|
-
# @example
|
971
|
-
# dv = DaruLite::Vector.new [1, 2, 3]
|
972
|
-
# puts dv.summary
|
973
|
-
#
|
974
|
-
# # =
|
975
|
-
# # n :3
|
976
|
-
# # non-missing:3
|
977
|
-
# # median: 2
|
978
|
-
# # mean: 2.0000
|
979
|
-
# # std.dev.: 1.0000
|
980
|
-
# # std.err.: 0.5774
|
981
|
-
# # skew: 0.0000
|
982
|
-
# # kurtosis: -2.3333
|
983
|
-
def summary(indent_level = 0)
|
984
|
-
non_missing = size - count_values(*DaruLite::MISSING_VALUES)
|
985
|
-
summary = (' =' * indent_level) + "= #{name}" \
|
986
|
-
"\n n :#{size}" \
|
987
|
-
"\n non-missing:#{non_missing}"
|
988
|
-
case type
|
989
|
-
when :object
|
990
|
-
summary << object_summary
|
991
|
-
when :numeric
|
992
|
-
summary << numeric_summary
|
993
|
-
end
|
994
|
-
summary.split("\n").join("\n#{' ' * indent_level}")
|
995
|
-
end
|
996
|
-
|
997
|
-
# Displays summary for an object type Vector
|
998
|
-
# @return [String] String containing object vector summary
|
999
|
-
def object_summary
|
1000
|
-
nval = count_values(*DaruLite::MISSING_VALUES)
|
1001
|
-
summary = "\n factors: #{factors.to_a.join(',')}" \
|
1002
|
-
"\n mode: #{mode.to_a.join(',')}" \
|
1003
|
-
"\n Distribution\n"
|
1004
|
-
|
1005
|
-
data = frequencies.sort.each_with_index.map do |v, k|
|
1006
|
-
[k, v, format('%0.2f%%', ((nval.zero? ? 1 : v.quo(nval)) * 100))]
|
1007
|
-
end
|
1008
|
-
|
1009
|
-
summary + Formatters::Table.format(data)
|
1010
|
-
end
|
1011
|
-
|
1012
|
-
# Displays summary for an numeric type Vector
|
1013
|
-
# @return [String] String containing numeric vector summary
|
1014
|
-
def numeric_summary
|
1015
|
-
summary = "\n median: #{median}" +
|
1016
|
-
format("\n mean: %0.4f", mean)
|
1017
|
-
if sd
|
1018
|
-
summary << (format("\n std.dev.: %0.4f", sd) +
|
1019
|
-
format("\n std.err.: %0.4f", se))
|
1020
|
-
end
|
1021
|
-
|
1022
|
-
if count_values(*DaruLite::MISSING_VALUES).zero?
|
1023
|
-
summary << (format("\n skew: %0.4f", skew) +
|
1024
|
-
format("\n kurtosis: %0.4f", kurtosis))
|
1025
|
-
end
|
1026
|
-
summary
|
1027
|
-
end
|
1028
|
-
|
1029
410
|
# Over rides original inspect for pretty printing in irb
|
1030
411
|
def inspect(spacing = 20, threshold = 15)
|
1031
412
|
row_headers = index.is_a?(MultiIndex) ? index.sparse_tuples : index.to_a
|
@@ -1040,68 +421,6 @@ module DaruLite
|
|
1040
421
|
)
|
1041
422
|
end
|
1042
423
|
|
1043
|
-
# Sets new index for vector. Preserves index->value correspondence.
|
1044
|
-
# Sets nil for new index keys absent from original index.
|
1045
|
-
# @note Unlike #reorder! which takes positions as input it takes
|
1046
|
-
# index as an input to reorder the vector
|
1047
|
-
# @param [DaruLite::Index, DaruLite::MultiIndex] new_index new index to order with
|
1048
|
-
# @return [DaruLite::Vector] vector reindexed with new index
|
1049
|
-
def reindex!(new_index)
|
1050
|
-
values = []
|
1051
|
-
each_with_index do |val, i|
|
1052
|
-
values[new_index[i]] = val if new_index.include?(i)
|
1053
|
-
end
|
1054
|
-
values.fill(nil, values.size, new_index.size - values.size)
|
1055
|
-
|
1056
|
-
@data = cast_vector_to @dtype, values
|
1057
|
-
@index = new_index
|
1058
|
-
|
1059
|
-
update_position_cache
|
1060
|
-
|
1061
|
-
self
|
1062
|
-
end
|
1063
|
-
|
1064
|
-
# Reorder the vector with given positions
|
1065
|
-
# @note Unlike #reindex! which takes index as input, it takes
|
1066
|
-
# positions as an input to reorder the vector
|
1067
|
-
# @param [Array] order the order to reorder the vector with
|
1068
|
-
# @return reordered vector
|
1069
|
-
# @example
|
1070
|
-
# dv = DaruLite::Vector.new [3, 2, 1], index: ['c', 'b', 'a']
|
1071
|
-
# dv.reorder! [2, 1, 0]
|
1072
|
-
# # => #<DaruLite::Vector(3)>
|
1073
|
-
# # a 1
|
1074
|
-
# # b 2
|
1075
|
-
# # c 3
|
1076
|
-
def reorder!(order)
|
1077
|
-
@index = @index.reorder order
|
1078
|
-
data_array = order.map { |i| @data[i] }
|
1079
|
-
@data = cast_vector_to @dtype, data_array, @nm_dtype
|
1080
|
-
update_position_cache
|
1081
|
-
self
|
1082
|
-
end
|
1083
|
-
|
1084
|
-
# Non-destructive version of #reorder!
|
1085
|
-
def reorder(order)
|
1086
|
-
dup.reorder! order
|
1087
|
-
end
|
1088
|
-
|
1089
|
-
# Create a new vector with a different index, and preserve the indexing of
|
1090
|
-
# current elements.
|
1091
|
-
def reindex(new_index)
|
1092
|
-
dup.reindex!(new_index)
|
1093
|
-
end
|
1094
|
-
|
1095
|
-
def index=(idx)
|
1096
|
-
idx = Index.coerce(idx)
|
1097
|
-
|
1098
|
-
raise ArgumentError, "Size of supplied index #{idx.size} does not match size of Vector" if idx.size != size
|
1099
|
-
raise ArgumentError, 'Can only assign type Index and its subclasses.' unless idx.is_a?(DaruLite::Index)
|
1100
|
-
|
1101
|
-
@index = idx
|
1102
|
-
self
|
1103
|
-
end
|
1104
|
-
|
1105
424
|
# Give the vector a new name
|
1106
425
|
#
|
1107
426
|
# @param new_name [Symbol] The new name.
|
@@ -1112,12 +431,6 @@ module DaruLite
|
|
1112
431
|
|
1113
432
|
alias name= rename
|
1114
433
|
|
1115
|
-
# Duplicated a vector
|
1116
|
-
# @return [DaruLite::Vector] duplicated vector
|
1117
|
-
def dup
|
1118
|
-
DaruLite::Vector.new @data.dup, name: @name, index: @index.dup
|
1119
|
-
end
|
1120
|
-
|
1121
434
|
# == Bootstrap
|
1122
435
|
# Generate +nr+ resamples (with replacement) of size +s+
|
1123
436
|
# from vector, computing each estimate from +estimators+
|
@@ -1195,130 +508,6 @@ module DaruLite
|
|
1195
508
|
DaruLite::DataFrame.new ps
|
1196
509
|
end
|
1197
510
|
|
1198
|
-
# Returns an array of either none or integer values, indicating the
|
1199
|
-
# +regexp+ matching with the given array.
|
1200
|
-
#
|
1201
|
-
# @param regexp [Regexp] A regular matching expression. For example, +/weeks/+.
|
1202
|
-
#
|
1203
|
-
# @return [Array] Containing either +nil+ or integer values, according to the match with the given +regexp+
|
1204
|
-
#
|
1205
|
-
# @example
|
1206
|
-
# dv = DaruLite::Vector.new(['3 days', '5 weeks', '2 weeks'])
|
1207
|
-
# dv.match(/weeks/)
|
1208
|
-
#
|
1209
|
-
# # => [false, true, true]
|
1210
|
-
def match(regexp)
|
1211
|
-
@data.map { |value| !!(value =~ regexp) }
|
1212
|
-
end
|
1213
|
-
|
1214
|
-
# Creates a new vector consisting only of non-nil data
|
1215
|
-
#
|
1216
|
-
# == Arguments
|
1217
|
-
#
|
1218
|
-
# @param as_a [Symbol] Passing :array will return only the elements
|
1219
|
-
# as an Array. Otherwise will return a DaruLite::Vector.
|
1220
|
-
#
|
1221
|
-
# @param _duplicate [Symbol] In case no missing data is found in the
|
1222
|
-
# vector, setting this to false will return the same vector.
|
1223
|
-
# Otherwise, a duplicate will be returned irrespective of
|
1224
|
-
# presence of missing data.
|
1225
|
-
|
1226
|
-
def only_valid(as_a = :vector, _duplicate = true)
|
1227
|
-
# FIXME: Now duplicate is just ignored.
|
1228
|
-
# There are no spec that fail on this case, so I'll leave it
|
1229
|
-
# this way for now - zverok, 2016-05-07
|
1230
|
-
|
1231
|
-
new_index = @index.to_a - indexes(*DaruLite::MISSING_VALUES)
|
1232
|
-
new_vector = new_index.map { |idx| self[idx] }
|
1233
|
-
|
1234
|
-
if as_a == :vector
|
1235
|
-
DaruLite::Vector.new new_vector, index: new_index, name: @name, dtype: dtype
|
1236
|
-
else
|
1237
|
-
new_vector
|
1238
|
-
end
|
1239
|
-
end
|
1240
|
-
deprecate :only_valid, :reject_values, 2016, 10
|
1241
|
-
|
1242
|
-
# Return a vector with specified values removed
|
1243
|
-
# @param values [Array] values to reject from resultant vector
|
1244
|
-
# @return [DaruLite::Vector] vector with specified values removed
|
1245
|
-
# @example
|
1246
|
-
# dv = DaruLite::Vector.new [1, 2, nil, Float::NAN]
|
1247
|
-
# dv.reject_values nil, Float::NAN
|
1248
|
-
# # => #<DaruLite::Vector(2)>
|
1249
|
-
# # 0 1
|
1250
|
-
# # 1 2
|
1251
|
-
def reject_values(*values)
|
1252
|
-
resultant_pos = size.times.to_a - positions(*values)
|
1253
|
-
dv = at(*resultant_pos)
|
1254
|
-
# Handle the case when number of positions is 1
|
1255
|
-
# and hence #at doesn't return a vector
|
1256
|
-
if dv.is_a?(DaruLite::Vector)
|
1257
|
-
dv
|
1258
|
-
else
|
1259
|
-
pos = resultant_pos.first
|
1260
|
-
at(pos..pos)
|
1261
|
-
end
|
1262
|
-
end
|
1263
|
-
|
1264
|
-
# Return indexes of values specified
|
1265
|
-
# @param values [Array] values to find indexes for
|
1266
|
-
# @return [Array] array of indexes of values specified
|
1267
|
-
# @example
|
1268
|
-
# dv = DaruLite::Vector.new [1, 2, nil, Float::NAN], index: 11..14
|
1269
|
-
# dv.indexes nil, Float::NAN
|
1270
|
-
# # => [13, 14]
|
1271
|
-
def indexes(*values)
|
1272
|
-
index.to_a.values_at(*positions(*values))
|
1273
|
-
end
|
1274
|
-
|
1275
|
-
# Replaces specified values with a new value
|
1276
|
-
# @param [Array] old_values array of values to replace
|
1277
|
-
# @param [object] new_value new value to replace with
|
1278
|
-
# @note It performs the replace in place.
|
1279
|
-
# @return [DaruLite::Vector] Same vector itself with values
|
1280
|
-
# replaced with new value
|
1281
|
-
# @example
|
1282
|
-
# dv = DaruLite::Vector.new [1, 2, :a, :b]
|
1283
|
-
# dv.replace_values [:a, :b], nil
|
1284
|
-
# dv
|
1285
|
-
# # =>
|
1286
|
-
# # #<DaruLite::Vector:19903200 @name = nil @metadata = {} @size = 4 >
|
1287
|
-
# # nil
|
1288
|
-
# # 0 1
|
1289
|
-
# # 1 2
|
1290
|
-
# # 2 nil
|
1291
|
-
# # 3 nil
|
1292
|
-
def replace_values(old_values, new_value)
|
1293
|
-
old_values = [old_values] unless old_values.is_a? Array
|
1294
|
-
size.times do |pos|
|
1295
|
-
set_at([pos], new_value) if include_with_nan? old_values, at(pos)
|
1296
|
-
end
|
1297
|
-
self
|
1298
|
-
end
|
1299
|
-
|
1300
|
-
# Returns a Vector containing only missing data (preserves indexes).
|
1301
|
-
def only_missing(as_a = :vector)
|
1302
|
-
case as_a
|
1303
|
-
when :vector
|
1304
|
-
self[*indexes(*DaruLite::MISSING_VALUES)]
|
1305
|
-
when :array
|
1306
|
-
self[*indexes(*DaruLite::MISSING_VALUES)].to_a
|
1307
|
-
end
|
1308
|
-
end
|
1309
|
-
deprecate :only_missing, nil, 2016, 10
|
1310
|
-
|
1311
|
-
# Returns a Vector with only numerical data. Missing data is included
|
1312
|
-
# but non-Numeric objects are excluded. Preserves index.
|
1313
|
-
def only_numerics
|
1314
|
-
numeric_indexes =
|
1315
|
-
each_with_index
|
1316
|
-
.select { |v, _i| v.is_a?(Numeric) || v.nil? }
|
1317
|
-
.map(&:last)
|
1318
|
-
|
1319
|
-
self[*numeric_indexes]
|
1320
|
-
end
|
1321
|
-
|
1322
511
|
DATE_REGEXP = /^(\d{2}-\d{2}-\d{4}|\d{4}-\d{2}-\d{2})$/.freeze
|
1323
512
|
|
1324
513
|
# Returns the database type for the vector, according to its content
|
@@ -1335,12 +524,6 @@ module DaruLite
|
|
1335
524
|
end
|
1336
525
|
end
|
1337
526
|
|
1338
|
-
# Copies the structure of the vector (i.e the index, size, etc.) and fills all
|
1339
|
-
# all values with nils.
|
1340
|
-
def clone_structure
|
1341
|
-
DaruLite::Vector.new(([nil] * size), name: @name, index: @index.dup)
|
1342
|
-
end
|
1343
|
-
|
1344
527
|
# Save the vector to a file
|
1345
528
|
#
|
1346
529
|
# == Arguments
|
@@ -1396,61 +579,6 @@ module DaruLite
|
|
1396
579
|
name.to_s.end_with?('=') || has_index?(name) || super
|
1397
580
|
end
|
1398
581
|
|
1399
|
-
# Partition a numeric variable into categories.
|
1400
|
-
# @param [Array<Numeric>] partitions an array whose consecutive elements
|
1401
|
-
# provide intervals for categories
|
1402
|
-
# @param [Hash] opts options to cut the partition
|
1403
|
-
# @option opts [:left, :right] :close_at specifies whether the interval closes at
|
1404
|
-
# the right side of left side
|
1405
|
-
# @option opts [Array] :labels names of the categories
|
1406
|
-
# @return [DaruLite::Vector] numeric variable converted to categorical variable
|
1407
|
-
# @example
|
1408
|
-
# heights = DaruLite::Vector.new [30, 35, 32, 50, 42, 51]
|
1409
|
-
# height_cat = heights.cut [30, 40, 50, 60], labels=['low', 'medium', 'high']
|
1410
|
-
# # => #<DaruLite::Vector(6)>
|
1411
|
-
# # 0 low
|
1412
|
-
# # 1 low
|
1413
|
-
# # 2 low
|
1414
|
-
# # 3 high
|
1415
|
-
# # 4 medium
|
1416
|
-
# # 5 high
|
1417
|
-
def cut(partitions, opts = {})
|
1418
|
-
close_at = opts[:close_at] || :right
|
1419
|
-
labels = opts[:labels]
|
1420
|
-
partitions = partitions.to_a
|
1421
|
-
values = to_a.map { |val| cut_find_category partitions, val, close_at }
|
1422
|
-
cats = cut_categories(partitions, close_at)
|
1423
|
-
|
1424
|
-
dv = DaruLite::Vector.new values,
|
1425
|
-
index: @index,
|
1426
|
-
type: :category,
|
1427
|
-
categories: cats
|
1428
|
-
|
1429
|
-
# Rename categories if new labels provided
|
1430
|
-
if labels
|
1431
|
-
dv.rename_categories cats.zip(labels).to_h
|
1432
|
-
else
|
1433
|
-
dv
|
1434
|
-
end
|
1435
|
-
end
|
1436
|
-
|
1437
|
-
def positions(*values)
|
1438
|
-
case values
|
1439
|
-
when [nil]
|
1440
|
-
nil_positions
|
1441
|
-
when [Float::NAN]
|
1442
|
-
nan_positions
|
1443
|
-
when [nil, Float::NAN], [Float::NAN, nil]
|
1444
|
-
nil_positions + nan_positions
|
1445
|
-
else
|
1446
|
-
size.times.select { |i| include_with_nan? values, @data[i] }
|
1447
|
-
end
|
1448
|
-
end
|
1449
|
-
|
1450
|
-
def group_by(*args)
|
1451
|
-
to_df.group_by(*args)
|
1452
|
-
end
|
1453
|
-
|
1454
582
|
private
|
1455
583
|
|
1456
584
|
def copy(values)
|
@@ -1471,11 +599,6 @@ module DaruLite
|
|
1471
599
|
end
|
1472
600
|
end
|
1473
601
|
|
1474
|
-
# Helper method returning validity of arbitrary value
|
1475
|
-
def valid_value?(v)
|
1476
|
-
!((v.respond_to?(:nan?) && v.nan?) || v.nil?)
|
1477
|
-
end
|
1478
|
-
|
1479
602
|
def initialize_vector(source, opts)
|
1480
603
|
index, source = parse_source(source, opts)
|
1481
604
|
set_name opts[:name]
|
@@ -1506,18 +629,8 @@ module DaruLite
|
|
1506
629
|
end
|
1507
630
|
|
1508
631
|
def guard_type_check(value)
|
1509
|
-
|
1510
|
-
|
1511
|
-
(numeric? && !value.is_a?(Numeric) && !value.nil?)
|
1512
|
-
end
|
1513
|
-
|
1514
|
-
def split_value(key, v)
|
1515
|
-
if v.nil?
|
1516
|
-
nil
|
1517
|
-
elsif v.include?(key)
|
1518
|
-
1
|
1519
|
-
else
|
1520
|
-
0
|
632
|
+
if (object? && (value.nil? || value.is_a?(Numeric))) || (numeric? && !value.is_a?(Numeric) && !value.nil?)
|
633
|
+
@possibly_changed_type = true
|
1521
634
|
end
|
1522
635
|
end
|
1523
636
|
|
@@ -1665,14 +778,5 @@ module DaruLite
|
|
1665
778
|
@nil_positions = nil
|
1666
779
|
@nan_positions = nil
|
1667
780
|
end
|
1668
|
-
|
1669
|
-
def resort_index(vector_index, opts)
|
1670
|
-
if block_given?
|
1671
|
-
vector_index.sort { |(lv, _li), (rv, _ri)| yield(lv, rv) }
|
1672
|
-
else
|
1673
|
-
vector_index.sort(&DEFAULT_SORTER)
|
1674
|
-
end
|
1675
|
-
.tap { |res| res.reverse! unless opts[:ascending] }
|
1676
|
-
end
|
1677
781
|
end
|
1678
782
|
end
|