daru_lite 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +35 -33
- data/lib/daru_lite/data_frame/aggregatable.rb +165 -0
- data/lib/daru_lite/data_frame/calculatable.rb +140 -0
- data/lib/daru_lite/data_frame/convertible.rb +107 -0
- data/lib/daru_lite/data_frame/duplicatable.rb +64 -0
- data/lib/daru_lite/data_frame/fetchable.rb +301 -0
- data/lib/daru_lite/data_frame/filterable.rb +144 -0
- data/lib/daru_lite/data_frame/i_o_able.rb +179 -0
- data/lib/daru_lite/data_frame/indexable.rb +168 -0
- data/lib/daru_lite/data_frame/iterable.rb +339 -0
- data/lib/daru_lite/data_frame/joinable.rb +152 -0
- data/lib/daru_lite/data_frame/missable.rb +75 -0
- data/lib/daru_lite/data_frame/pivotable.rb +108 -0
- data/lib/daru_lite/data_frame/queryable.rb +67 -0
- data/lib/daru_lite/data_frame/setable.rb +109 -0
- data/lib/daru_lite/data_frame/sortable.rb +241 -0
- data/lib/daru_lite/dataframe.rb +138 -2353
- data/lib/daru_lite/index/index.rb +13 -0
- data/lib/daru_lite/maths/statistics/vector.rb +1 -1
- data/lib/daru_lite/vector/aggregatable.rb +9 -0
- data/lib/daru_lite/vector/calculatable.rb +78 -0
- data/lib/daru_lite/vector/convertible.rb +77 -0
- data/lib/daru_lite/vector/duplicatable.rb +17 -0
- data/lib/daru_lite/vector/fetchable.rb +175 -0
- data/lib/daru_lite/vector/filterable.rb +128 -0
- data/lib/daru_lite/vector/indexable.rb +77 -0
- data/lib/daru_lite/vector/iterable.rb +95 -0
- data/lib/daru_lite/vector/joinable.rb +17 -0
- data/lib/daru_lite/vector/missable.rb +124 -0
- data/lib/daru_lite/vector/queryable.rb +45 -0
- data/lib/daru_lite/vector/setable.rb +47 -0
- data/lib/daru_lite/vector/sortable.rb +113 -0
- data/lib/daru_lite/vector.rb +36 -932
- data/lib/daru_lite/version.rb +1 -1
- data/spec/data_frame/aggregatable_example.rb +65 -0
- data/spec/data_frame/buildable_example.rb +109 -0
- data/spec/data_frame/calculatable_example.rb +135 -0
- data/spec/data_frame/convertible_example.rb +180 -0
- data/spec/data_frame/duplicatable_example.rb +111 -0
- data/spec/data_frame/fetchable_example.rb +476 -0
- data/spec/data_frame/filterable_example.rb +250 -0
- data/spec/data_frame/indexable_example.rb +221 -0
- data/spec/data_frame/iterable_example.rb +465 -0
- data/spec/data_frame/joinable_example.rb +106 -0
- data/spec/data_frame/missable_example.rb +47 -0
- data/spec/data_frame/pivotable_example.rb +297 -0
- data/spec/data_frame/queryable_example.rb +92 -0
- data/spec/data_frame/setable_example.rb +482 -0
- data/spec/data_frame/sortable_example.rb +350 -0
- data/spec/dataframe_spec.rb +181 -3289
- data/spec/index/index_spec.rb +8 -0
- data/spec/vector/aggregatable_example.rb +27 -0
- data/spec/vector/calculatable_example.rb +82 -0
- data/spec/vector/convertible_example.rb +126 -0
- data/spec/vector/duplicatable_example.rb +48 -0
- data/spec/vector/fetchable_example.rb +463 -0
- data/spec/vector/filterable_example.rb +165 -0
- data/spec/vector/indexable_example.rb +201 -0
- data/spec/vector/iterable_example.rb +111 -0
- data/spec/vector/joinable_example.rb +25 -0
- data/spec/vector/missable_example.rb +88 -0
- data/spec/vector/queryable_example.rb +91 -0
- data/spec/vector/setable_example.rb +300 -0
- data/spec/vector/sortable_example.rb +242 -0
- data/spec/vector_spec.rb +111 -1805
- metadata +86 -2
data/lib/daru_lite/vector.rb
CHANGED
@@ -2,12 +2,38 @@ require 'daru_lite/maths/arithmetic/vector'
|
|
2
2
|
require 'daru_lite/maths/statistics/vector'
|
3
3
|
require 'daru_lite/accessors/array_wrapper'
|
4
4
|
require 'daru_lite/category'
|
5
|
+
require 'daru_lite/vector/aggregatable'
|
6
|
+
require 'daru_lite/vector/calculatable'
|
7
|
+
require 'daru_lite/vector/convertible'
|
8
|
+
require 'daru_lite/vector/duplicatable'
|
9
|
+
require 'daru_lite/vector/fetchable'
|
10
|
+
require 'daru_lite/vector/filterable'
|
11
|
+
require 'daru_lite/vector/indexable'
|
12
|
+
require 'daru_lite/vector/iterable'
|
13
|
+
require 'daru_lite/vector/joinable'
|
14
|
+
require 'daru_lite/vector/missable'
|
15
|
+
require 'daru_lite/vector/setable'
|
16
|
+
require 'daru_lite/vector/sortable'
|
17
|
+
require 'daru_lite/vector/queryable'
|
5
18
|
|
6
19
|
module DaruLite
|
7
20
|
class Vector # rubocop:disable Metrics/ClassLength
|
8
21
|
include Enumerable
|
9
22
|
include DaruLite::Maths::Arithmetic::Vector
|
10
23
|
include DaruLite::Maths::Statistics::Vector
|
24
|
+
include DaruLite::Vector::Aggregatable
|
25
|
+
include DaruLite::Vector::Calculatable
|
26
|
+
include DaruLite::Vector::Convertible
|
27
|
+
include DaruLite::Vector::Duplicatable
|
28
|
+
include DaruLite::Vector::Fetchable
|
29
|
+
include DaruLite::Vector::Filterable
|
30
|
+
include DaruLite::Vector::Indexable
|
31
|
+
include DaruLite::Vector::Iterable
|
32
|
+
include DaruLite::Vector::Joinable
|
33
|
+
include DaruLite::Vector::Missable
|
34
|
+
include DaruLite::Vector::Setable
|
35
|
+
include DaruLite::Vector::Sortable
|
36
|
+
include DaruLite::Vector::Queryable
|
11
37
|
extend Gem::Deprecate
|
12
38
|
|
13
39
|
class << self
|
@@ -90,46 +116,6 @@ module DaruLite
|
|
90
116
|
@data.size
|
91
117
|
end
|
92
118
|
|
93
|
-
def each(&block)
|
94
|
-
return to_enum(:each) unless block
|
95
|
-
|
96
|
-
@data.each(&block)
|
97
|
-
self
|
98
|
-
end
|
99
|
-
|
100
|
-
def each_index(&block)
|
101
|
-
return to_enum(:each_index) unless block
|
102
|
-
|
103
|
-
@index.each(&block)
|
104
|
-
self
|
105
|
-
end
|
106
|
-
|
107
|
-
def each_with_index(&block)
|
108
|
-
return to_enum(:each_with_index) unless block
|
109
|
-
|
110
|
-
@data.to_a.zip(@index.to_a).each(&block)
|
111
|
-
|
112
|
-
self
|
113
|
-
end
|
114
|
-
|
115
|
-
def map!(&block)
|
116
|
-
return to_enum(:map!) unless block
|
117
|
-
|
118
|
-
@data.map!(&block)
|
119
|
-
self
|
120
|
-
end
|
121
|
-
|
122
|
-
def apply_method(method, keys: nil, by_position: true)
|
123
|
-
vect = keys ? get_sub_vector(keys, by_position: by_position) : self
|
124
|
-
|
125
|
-
case method
|
126
|
-
when Symbol then vect.send(method)
|
127
|
-
when Proc then method.call(vect)
|
128
|
-
else raise
|
129
|
-
end
|
130
|
-
end
|
131
|
-
alias apply_method_on_sub_vector apply_method
|
132
|
-
|
133
119
|
# The name of the DaruLite::Vector. String.
|
134
120
|
attr_reader :name
|
135
121
|
# The row index. Can be either DaruLite::Index or DaruLite::MultiIndex.
|
@@ -185,98 +171,6 @@ module DaruLite
|
|
185
171
|
end
|
186
172
|
end
|
187
173
|
|
188
|
-
# Get one or more elements with specified index or a range.
|
189
|
-
#
|
190
|
-
# == Usage
|
191
|
-
# # For vectors employing single layer Index
|
192
|
-
#
|
193
|
-
# v[:one, :two] # => DaruLite::Vector with indexes :one and :two
|
194
|
-
# v[:one] # => Single element
|
195
|
-
# v[:one..:three] # => DaruLite::Vector with indexes :one, :two and :three
|
196
|
-
#
|
197
|
-
# # For vectors employing hierarchial multi index
|
198
|
-
#
|
199
|
-
def [](*input_indexes)
|
200
|
-
# Get array of positions indexes
|
201
|
-
positions = @index.pos(*input_indexes)
|
202
|
-
|
203
|
-
# If one object is asked return it
|
204
|
-
return @data[positions] if positions.is_a? Numeric
|
205
|
-
|
206
|
-
# Form a new Vector using positional indexes
|
207
|
-
DaruLite::Vector.new(
|
208
|
-
positions.map { |loc| @data[loc] },
|
209
|
-
name: @name,
|
210
|
-
index: @index.subset(*input_indexes), dtype: @dtype
|
211
|
-
)
|
212
|
-
end
|
213
|
-
|
214
|
-
# Returns vector of values given positional values
|
215
|
-
# @param positions [Array<object>] positional values
|
216
|
-
# @return [object] vector
|
217
|
-
# @example
|
218
|
-
# dv = DaruLite::Vector.new 'a'..'e'
|
219
|
-
# dv.at 0, 1, 2
|
220
|
-
# # => #<DaruLite::Vector(3)>
|
221
|
-
# # 0 a
|
222
|
-
# # 1 b
|
223
|
-
# # 2 c
|
224
|
-
def at(*positions)
|
225
|
-
# to be used to form index
|
226
|
-
original_positions = positions
|
227
|
-
positions = coerce_positions(*positions)
|
228
|
-
validate_positions(*positions)
|
229
|
-
|
230
|
-
if positions.is_a? Integer
|
231
|
-
@data[positions]
|
232
|
-
else
|
233
|
-
values = positions.map { |pos| @data[pos] }
|
234
|
-
DaruLite::Vector.new values, index: @index.at(*original_positions), dtype: dtype
|
235
|
-
end
|
236
|
-
end
|
237
|
-
|
238
|
-
# Change value at given positions
|
239
|
-
# @param positions [Array<object>] positional values
|
240
|
-
# @param [object] val value to assign
|
241
|
-
# @example
|
242
|
-
# dv = DaruLite::Vector.new 'a'..'e'
|
243
|
-
# dv.set_at [0, 1], 'x'
|
244
|
-
# dv
|
245
|
-
# # => #<DaruLite::Vector(5)>
|
246
|
-
# # 0 x
|
247
|
-
# # 1 x
|
248
|
-
# # 2 c
|
249
|
-
# # 3 d
|
250
|
-
# # 4 e
|
251
|
-
def set_at(positions, val)
|
252
|
-
validate_positions(*positions)
|
253
|
-
positions.map { |pos| @data[pos] = val }
|
254
|
-
update_position_cache
|
255
|
-
end
|
256
|
-
|
257
|
-
# Just like in Hashes, you can specify the index label of the DaruLite::Vector
|
258
|
-
# and assign an element an that place in the DaruLite::Vector.
|
259
|
-
#
|
260
|
-
# == Usage
|
261
|
-
#
|
262
|
-
# v = DaruLite::Vector.new([1,2,3], index: [:a, :b, :c])
|
263
|
-
# v[:a] = 999
|
264
|
-
# #=>
|
265
|
-
# ##<DaruLite::Vector:90257920 @name = nil @size = 3 >
|
266
|
-
# # nil
|
267
|
-
# # a 999
|
268
|
-
# # b 2
|
269
|
-
# # c 3
|
270
|
-
def []=(*indexes, val)
|
271
|
-
cast(dtype: :array) if val.nil? && dtype != :array
|
272
|
-
|
273
|
-
guard_type_check(val)
|
274
|
-
|
275
|
-
modify_vector(indexes, val)
|
276
|
-
|
277
|
-
update_position_cache
|
278
|
-
end
|
279
|
-
|
280
174
|
# Two vectors are equal if they have the exact same index values corresponding
|
281
175
|
# with the exact same elements. Name is ignored.
|
282
176
|
def ==(other)
|
@@ -367,85 +261,6 @@ module DaruLite
|
|
367
261
|
)
|
368
262
|
end
|
369
263
|
|
370
|
-
# Return a new vector based on the contents of a boolean array. Use with the
|
371
|
-
# comparator methods to obtain meaningful results. See this notebook for
|
372
|
-
# a good overview of using #where.
|
373
|
-
#
|
374
|
-
# @param bool_array [DaruLite::Core::Query::BoolArray, Array<TrueClass, FalseClass>] The
|
375
|
-
# collection containing the true of false values. Each element in the Vector
|
376
|
-
# corresponding to a `true` in the bool_arry will be returned alongwith it's
|
377
|
-
# index.
|
378
|
-
# @example Usage of #where.
|
379
|
-
# vector = DaruLite::Vector.new([2,4,5,51,5,16,2,5,3,2,1,5,2,5,2,1,56,234,6,21])
|
380
|
-
#
|
381
|
-
# # Simple logic statement passed to #where.
|
382
|
-
# vector.where(vector.eq(5).or(vector.eq(1)))
|
383
|
-
# # =>
|
384
|
-
# ##<DaruLite::Vector:77626210 @name = nil @size = 7 >
|
385
|
-
# # nil
|
386
|
-
# # 2 5
|
387
|
-
# # 4 5
|
388
|
-
# # 7 5
|
389
|
-
# # 10 1
|
390
|
-
# # 11 5
|
391
|
-
# # 13 5
|
392
|
-
# # 15 1
|
393
|
-
#
|
394
|
-
# # A somewhat more complex logic statement
|
395
|
-
# vector.where((vector.eq(5) | vector.lteq(1)) & vector.in([4,5,1]))
|
396
|
-
# #=>
|
397
|
-
# ##<DaruLite::Vector:81072310 @name = nil @size = 7 >
|
398
|
-
# # nil
|
399
|
-
# # 2 5
|
400
|
-
# # 4 5
|
401
|
-
# # 7 5
|
402
|
-
# # 10 1
|
403
|
-
# # 11 5
|
404
|
-
# # 13 5
|
405
|
-
# # 15 1
|
406
|
-
def where(bool_array)
|
407
|
-
DaruLite::Core::Query.vector_where self, bool_array
|
408
|
-
end
|
409
|
-
|
410
|
-
# Return a new vector based on the contents of a boolean array and &block.
|
411
|
-
#
|
412
|
-
# @param bool_array [DaruLite::Core::Query::BoolArray, Array<TrueClass, FalseClass>, &block] The
|
413
|
-
# collection containing the true of false values. Each element in the Vector
|
414
|
-
# corresponding to a `true` in the bool_array will be returned along with it's
|
415
|
-
# index. The &block may contain manipulative functions for the Vector elements.
|
416
|
-
#
|
417
|
-
# @return [DaruLite::Vector]
|
418
|
-
#
|
419
|
-
# @example Usage of #apply_where.
|
420
|
-
# dv = DaruLite::Vector.new ['3 days', '5 weeks', '2 weeks']
|
421
|
-
# dv = dv.apply_where(dv.match /weeks/) { |x| "#{x.split.first.to_i * 7} days" }
|
422
|
-
# # =>
|
423
|
-
# ##<DaruLite::Vector(3)>
|
424
|
-
# # 0 3 days
|
425
|
-
# # 1 35 days
|
426
|
-
# # 2 14 days
|
427
|
-
def apply_where(bool_array, &block)
|
428
|
-
DaruLite::Core::Query.vector_apply_where self, bool_array, &block
|
429
|
-
end
|
430
|
-
|
431
|
-
def head(q = 10)
|
432
|
-
self[0..(q - 1)]
|
433
|
-
end
|
434
|
-
|
435
|
-
def tail(q = 10)
|
436
|
-
start = [size - q, 0].max
|
437
|
-
self[start..(size - 1)]
|
438
|
-
end
|
439
|
-
|
440
|
-
def last(q = 1)
|
441
|
-
# The Enumerable mixin dose not provide the last method.
|
442
|
-
tail(q)
|
443
|
-
end
|
444
|
-
|
445
|
-
def empty?
|
446
|
-
@index.empty?
|
447
|
-
end
|
448
|
-
|
449
264
|
def numeric?
|
450
265
|
type == :numeric
|
451
266
|
end
|
@@ -454,26 +269,6 @@ module DaruLite
|
|
454
269
|
type == :object
|
455
270
|
end
|
456
271
|
|
457
|
-
# Reports whether missing data is present in the Vector.
|
458
|
-
def has_missing_data?
|
459
|
-
!indexes(*DaruLite::MISSING_VALUES).empty?
|
460
|
-
end
|
461
|
-
alias flawed? has_missing_data?
|
462
|
-
deprecate :has_missing_data?, :include_values?, 2016, 10
|
463
|
-
deprecate :flawed?, :include_values?, 2016, 10
|
464
|
-
|
465
|
-
# Check if any one of mentioned values occur in the vector
|
466
|
-
# @param values [Array] values to check for
|
467
|
-
# @return [true, false] returns true if any one of specified values
|
468
|
-
# occur in the vector
|
469
|
-
# @example
|
470
|
-
# dv = DaruLite::Vector.new [1, 2, 3, 4, nil]
|
471
|
-
# dv.include_values? nil, Float::NAN
|
472
|
-
# # => true
|
473
|
-
def include_values?(*values)
|
474
|
-
values.any? { |v| include_with_nan? @data, v }
|
475
|
-
end
|
476
|
-
|
477
272
|
# @note Do not use it to check for Float::NAN as
|
478
273
|
# Float::NAN == Float::NAN is false
|
479
274
|
# Return vector of booleans with value at ith position is either
|
@@ -494,18 +289,6 @@ module DaruLite
|
|
494
289
|
DaruLite::Vector.new values.map { |v| eq(v) }.inject(:|)
|
495
290
|
end
|
496
291
|
|
497
|
-
# Append an element to the vector by specifying the element and index
|
498
|
-
def concat(element, index)
|
499
|
-
raise IndexError, 'Expected new unique index' if @index.include? index
|
500
|
-
|
501
|
-
@index |= [index]
|
502
|
-
@data[@index[index]] = element
|
503
|
-
|
504
|
-
update_position_cache
|
505
|
-
end
|
506
|
-
alias push concat
|
507
|
-
alias << concat
|
508
|
-
|
509
292
|
# Cast a vector to a new data type.
|
510
293
|
#
|
511
294
|
# == Options
|
@@ -531,6 +314,14 @@ module DaruLite
|
|
531
314
|
update_position_cache
|
532
315
|
end
|
533
316
|
|
317
|
+
# Delete element by position
|
318
|
+
def delete_at_position(position)
|
319
|
+
@data.delete_at(position)
|
320
|
+
@index = @index.delete_at(position)
|
321
|
+
|
322
|
+
update_position_cache
|
323
|
+
end
|
324
|
+
|
534
325
|
# The type of data contained in the vector. Can be :object.
|
535
326
|
#
|
536
327
|
# Running through the data to figure out the kind of data is delayed to the
|
@@ -560,148 +351,6 @@ module DaruLite
|
|
560
351
|
type == :category
|
561
352
|
end
|
562
353
|
|
563
|
-
# Get index of element
|
564
|
-
def index_of(element)
|
565
|
-
case dtype
|
566
|
-
when :array then @index.key(@data.index { |x| x.eql? element })
|
567
|
-
else @index.key @data.index(element)
|
568
|
-
end
|
569
|
-
end
|
570
|
-
|
571
|
-
# Keep only unique elements of the vector alongwith their indexes.
|
572
|
-
def uniq
|
573
|
-
uniq_vector = @data.uniq
|
574
|
-
new_index = uniq_vector.map { |element| index_of(element) }
|
575
|
-
|
576
|
-
DaruLite::Vector.new uniq_vector, name: @name, index: new_index, dtype: @dtype
|
577
|
-
end
|
578
|
-
|
579
|
-
def any?(&block)
|
580
|
-
@data.data.any?(&block)
|
581
|
-
end
|
582
|
-
|
583
|
-
def all?(&block)
|
584
|
-
@data.data.all?(&block)
|
585
|
-
end
|
586
|
-
|
587
|
-
# Sorts a vector according to its values. If a block is specified, the contents
|
588
|
-
# will be evaluated and data will be swapped whenever the block evaluates
|
589
|
-
# to *true*. Defaults to ascending order sorting. Any missing values will be
|
590
|
-
# put at the end of the vector. Preserves indexing. Default sort algorithm is
|
591
|
-
# quick sort.
|
592
|
-
#
|
593
|
-
# == Options
|
594
|
-
#
|
595
|
-
# * +:ascending+ - if false, will sort in descending order. Defaults to true.
|
596
|
-
#
|
597
|
-
# * +:type+ - Specify the sorting algorithm. Only supports quick_sort for now.
|
598
|
-
# == Usage
|
599
|
-
#
|
600
|
-
# v = DaruLite::Vector.new ["My first guitar", "jazz", "guitar"]
|
601
|
-
# # Say you want to sort these strings by length.
|
602
|
-
# v.sort(ascending: false) { |a,b| a.length <=> b.length }
|
603
|
-
def sort(opts = {}, &block)
|
604
|
-
opts = { ascending: true }.merge(opts)
|
605
|
-
|
606
|
-
vector_index = resort_index(@data.each_with_index, opts, &block)
|
607
|
-
vector, index = vector_index.transpose
|
608
|
-
|
609
|
-
index = @index.reorder index
|
610
|
-
|
611
|
-
DaruLite::Vector.new(vector, index: index, name: @name, dtype: @dtype)
|
612
|
-
end
|
613
|
-
|
614
|
-
# Sorts the vector according to it's`Index` values. Defaults to ascending
|
615
|
-
# order sorting.
|
616
|
-
#
|
617
|
-
# @param [Hash] opts the options for sort_by_index method.
|
618
|
-
# @option opts [Boolean] :ascending false, will sort `index` in
|
619
|
-
# descending order.
|
620
|
-
#
|
621
|
-
# @return [Vector] new sorted `Vector` according to the index values.
|
622
|
-
#
|
623
|
-
# @example
|
624
|
-
#
|
625
|
-
# dv = DaruLite::Vector.new [11, 13, 12], index: [23, 21, 22]
|
626
|
-
# # Say you want to sort index in ascending order
|
627
|
-
# dv.sort_by_index(ascending: true)
|
628
|
-
# #=> DaruLite::Vector.new [13, 12, 11], index: [21, 22, 23]
|
629
|
-
# # Say you want to sort index in descending order
|
630
|
-
# dv.sort_by_index(ascending: false)
|
631
|
-
# #=> DaruLite::Vector.new [11, 12, 13], index: [23, 22, 21]
|
632
|
-
def sort_by_index(opts = {})
|
633
|
-
opts = { ascending: true }.merge(opts)
|
634
|
-
_, new_order = resort_index(@index.each_with_index, opts).transpose
|
635
|
-
|
636
|
-
reorder new_order
|
637
|
-
end
|
638
|
-
|
639
|
-
DEFAULT_SORTER = lambda { |(lv, li), (rv, ri)|
|
640
|
-
if lv.nil? && rv.nil?
|
641
|
-
li <=> ri
|
642
|
-
elsif lv.nil?
|
643
|
-
-1
|
644
|
-
elsif rv.nil?
|
645
|
-
1
|
646
|
-
else
|
647
|
-
lv <=> rv
|
648
|
-
end
|
649
|
-
}
|
650
|
-
|
651
|
-
# Just sort the data and get an Array in return using Enumerable#sort.
|
652
|
-
# Non-destructive.
|
653
|
-
# :nocov:
|
654
|
-
def sorted_data(&block)
|
655
|
-
@data.to_a.sort(&block)
|
656
|
-
end
|
657
|
-
# :nocov:
|
658
|
-
|
659
|
-
# Like map, but returns a DaruLite::Vector with the returned values.
|
660
|
-
def recode(dt = nil, &block)
|
661
|
-
return to_enum(:recode, dt) unless block
|
662
|
-
|
663
|
-
dup.recode! dt, &block
|
664
|
-
end
|
665
|
-
|
666
|
-
# Destructive version of recode!
|
667
|
-
def recode!(dt = nil, &block)
|
668
|
-
return to_enum(:recode!, dt) unless block
|
669
|
-
|
670
|
-
@data.map!(&block).data
|
671
|
-
@data = cast_vector_to(dt || @dtype)
|
672
|
-
self
|
673
|
-
end
|
674
|
-
|
675
|
-
# Delete an element if block returns true. Destructive.
|
676
|
-
def delete_if
|
677
|
-
return to_enum(:delete_if) unless block_given?
|
678
|
-
|
679
|
-
keep_e, keep_i = each_with_index.reject { |n, _i| yield(n) }.transpose
|
680
|
-
|
681
|
-
@data = cast_vector_to @dtype, keep_e
|
682
|
-
@index = DaruLite::Index.new(keep_i)
|
683
|
-
|
684
|
-
update_position_cache
|
685
|
-
|
686
|
-
self
|
687
|
-
end
|
688
|
-
|
689
|
-
# Keep an element if block returns true. Destructive.
|
690
|
-
def keep_if
|
691
|
-
return to_enum(:keep_if) unless block_given?
|
692
|
-
|
693
|
-
delete_if { |val| !yield(val) }
|
694
|
-
end
|
695
|
-
|
696
|
-
# Reports all values that doesn't comply with a condition.
|
697
|
-
# Returns a hash with the index of data and the invalid data.
|
698
|
-
def verify
|
699
|
-
(0...size)
|
700
|
-
.map { |i| [i, @data[i]] }
|
701
|
-
.reject { |_i, val| yield(val) }
|
702
|
-
.to_h
|
703
|
-
end
|
704
|
-
|
705
354
|
# Return an Array with the data splitted by a separator.
|
706
355
|
# a=DaruLite::Vector.new(["a,b","c,d","a,b","d"])
|
707
356
|
# a.splitted
|
@@ -719,93 +368,6 @@ module DaruLite
|
|
719
368
|
end
|
720
369
|
end
|
721
370
|
|
722
|
-
# Returns a hash of Vectors, defined by the different values
|
723
|
-
# defined on the fields
|
724
|
-
# Example:
|
725
|
-
#
|
726
|
-
# a=DaruLite::Vector.new(["a,b","c,d","a,b"])
|
727
|
-
# a.split_by_separator
|
728
|
-
# => {"a"=>#<DaruLite::Vector:0x7f2dbcc09d88
|
729
|
-
# @data=[1, 0, 1]>,
|
730
|
-
# "b"=>#<DaruLite::Vector:0x7f2dbcc09c48
|
731
|
-
# @data=[1, 1, 0]>,
|
732
|
-
# "c"=>#<DaruLite::Vector:0x7f2dbcc09b08
|
733
|
-
# @data=[0, 1, 1]>}
|
734
|
-
#
|
735
|
-
def split_by_separator(sep = ',')
|
736
|
-
split_data = splitted sep
|
737
|
-
split_data
|
738
|
-
.flatten.uniq.compact.to_h do |key|
|
739
|
-
[
|
740
|
-
key,
|
741
|
-
DaruLite::Vector.new(split_data.map { |v| split_value(key, v) })
|
742
|
-
]
|
743
|
-
end
|
744
|
-
end
|
745
|
-
|
746
|
-
def split_by_separator_freq(sep = ',')
|
747
|
-
split_by_separator(sep).transform_values do |v|
|
748
|
-
v.sum(&:to_i)
|
749
|
-
end
|
750
|
-
end
|
751
|
-
|
752
|
-
def reset_index!
|
753
|
-
@index = DaruLite::Index.new(Array.new(size) { |i| i })
|
754
|
-
self
|
755
|
-
end
|
756
|
-
|
757
|
-
# Replace all nils in the vector with the value passed as an argument. Destructive.
|
758
|
-
# See #replace_nils for non-destructive version
|
759
|
-
#
|
760
|
-
# == Arguments
|
761
|
-
#
|
762
|
-
# * +replacement+ - The value which should replace all nils
|
763
|
-
def replace_nils!(replacement)
|
764
|
-
indexes(*DaruLite::MISSING_VALUES).each do |idx|
|
765
|
-
self[idx] = replacement
|
766
|
-
end
|
767
|
-
|
768
|
-
self
|
769
|
-
end
|
770
|
-
|
771
|
-
# Rolling fillna
|
772
|
-
# replace all Float::NAN and NIL values with the preceeding or following value
|
773
|
-
#
|
774
|
-
# @param direction [Symbol] (:forward, :backward) whether replacement value is preceeding or following
|
775
|
-
#
|
776
|
-
# @example
|
777
|
-
# dv = DaruLite::Vector.new([1, 2, 1, 4, nil, Float::NAN, 3, nil, Float::NAN])
|
778
|
-
#
|
779
|
-
# 2.3.3 :068 > dv.rolling_fillna(:forward)
|
780
|
-
# => #<DaruLite::Vector(9)>
|
781
|
-
# 0 1
|
782
|
-
# 1 2
|
783
|
-
# 2 1
|
784
|
-
# 3 4
|
785
|
-
# 4 4
|
786
|
-
# 5 4
|
787
|
-
# 6 3
|
788
|
-
# 7 3
|
789
|
-
# 8 3
|
790
|
-
#
|
791
|
-
def rolling_fillna!(direction = :forward)
|
792
|
-
enum = direction == :forward ? index : index.reverse_each
|
793
|
-
last_valid_value = 0
|
794
|
-
enum.each do |idx|
|
795
|
-
if valid_value?(self[idx])
|
796
|
-
last_valid_value = self[idx]
|
797
|
-
else
|
798
|
-
self[idx] = last_valid_value
|
799
|
-
end
|
800
|
-
end
|
801
|
-
self
|
802
|
-
end
|
803
|
-
|
804
|
-
# Non-destructive version of rolling_fillna!
|
805
|
-
def rolling_fillna(direction = :forward)
|
806
|
-
dup.rolling_fillna!(direction)
|
807
|
-
end
|
808
|
-
|
809
371
|
# Lags the series by `k` periods.
|
810
372
|
#
|
811
373
|
# Lags the series by `k` periods, "shifting" data and inserting `nil`s
|
@@ -845,187 +407,6 @@ module DaruLite
|
|
845
407
|
end
|
846
408
|
end
|
847
409
|
|
848
|
-
def detach_index
|
849
|
-
DaruLite::DataFrame.new(
|
850
|
-
index: @index.to_a,
|
851
|
-
values: @data.to_a
|
852
|
-
)
|
853
|
-
end
|
854
|
-
|
855
|
-
# Non-destructive version of #replace_nils!
|
856
|
-
def replace_nils(replacement)
|
857
|
-
dup.replace_nils!(replacement)
|
858
|
-
end
|
859
|
-
|
860
|
-
# number of non-missing elements
|
861
|
-
def n_valid
|
862
|
-
size - indexes(*DaruLite::MISSING_VALUES).size
|
863
|
-
end
|
864
|
-
deprecate :n_valid, :count_values, 2016, 10
|
865
|
-
|
866
|
-
# Count the number of values specified
|
867
|
-
# @param values [Array] values to count for
|
868
|
-
# @return [Integer] the number of times the values mentioned occurs
|
869
|
-
# @example
|
870
|
-
# dv = DaruLite::Vector.new [1, 2, 1, 2, 3, 4, nil, nil]
|
871
|
-
# dv.count_values nil
|
872
|
-
# # => 2
|
873
|
-
def count_values(*values)
|
874
|
-
positions(*values).size
|
875
|
-
end
|
876
|
-
|
877
|
-
# Returns *true* if an index exists
|
878
|
-
def has_index?(index)
|
879
|
-
@index.include? index
|
880
|
-
end
|
881
|
-
|
882
|
-
# @param keys [Array] can be positions (if by_position is true) or indexes (if by_position if false)
|
883
|
-
# @return [DaruLite::Vector]
|
884
|
-
def get_sub_vector(keys, by_position: true)
|
885
|
-
return DaruLite::Vector.new([]) if keys == []
|
886
|
-
|
887
|
-
keys = @index.pos(*keys) unless by_position
|
888
|
-
|
889
|
-
sub_vect = at(*keys)
|
890
|
-
sub_vect = DaruLite::Vector.new([sub_vect]) unless sub_vect.is_a?(DaruLite::Vector)
|
891
|
-
|
892
|
-
sub_vect
|
893
|
-
end
|
894
|
-
|
895
|
-
# @return [DaruLite::DataFrame] the vector as a single-vector dataframe
|
896
|
-
def to_df
|
897
|
-
DaruLite::DataFrame.new({ @name => @data }, name: @name, index: @index)
|
898
|
-
end
|
899
|
-
|
900
|
-
# Convert Vector to a horizontal or vertical Ruby Matrix.
|
901
|
-
#
|
902
|
-
# == Arguments
|
903
|
-
#
|
904
|
-
# * +axis+ - Specify whether you want a *:horizontal* or a *:vertical* matrix.
|
905
|
-
def to_matrix(axis = :horizontal)
|
906
|
-
case axis
|
907
|
-
when :horizontal
|
908
|
-
Matrix[to_a]
|
909
|
-
when :vertical
|
910
|
-
Matrix.columns([to_a])
|
911
|
-
else
|
912
|
-
raise ArgumentError, "axis should be either :horizontal or :vertical, not #{axis}"
|
913
|
-
end
|
914
|
-
end
|
915
|
-
|
916
|
-
# Convert to hash (explicit). Hash keys are indexes and values are the correspoding elements
|
917
|
-
def to_h
|
918
|
-
@index.to_h { |index| [index, self[index]] }
|
919
|
-
end
|
920
|
-
|
921
|
-
# Return an array
|
922
|
-
def to_a
|
923
|
-
@data.to_a
|
924
|
-
end
|
925
|
-
|
926
|
-
# Convert the hash from to_h to json
|
927
|
-
def to_json(*)
|
928
|
-
to_h.to_json
|
929
|
-
end
|
930
|
-
|
931
|
-
# Convert to html for iruby
|
932
|
-
def to_html(threshold = 30)
|
933
|
-
table_thead = to_html_thead
|
934
|
-
table_tbody = to_html_tbody(threshold)
|
935
|
-
path = if index.is_a?(MultiIndex)
|
936
|
-
File.expand_path('iruby/templates/vector_mi.html.erb', __dir__)
|
937
|
-
else
|
938
|
-
File.expand_path('iruby/templates/vector.html.erb', __dir__)
|
939
|
-
end
|
940
|
-
ERB.new(File.read(path).strip).result(binding)
|
941
|
-
end
|
942
|
-
|
943
|
-
def to_html_thead
|
944
|
-
table_thead_path =
|
945
|
-
if index.is_a?(MultiIndex)
|
946
|
-
File.expand_path('iruby/templates/vector_mi_thead.html.erb', __dir__)
|
947
|
-
else
|
948
|
-
File.expand_path('iruby/templates/vector_thead.html.erb', __dir__)
|
949
|
-
end
|
950
|
-
ERB.new(File.read(table_thead_path).strip).result(binding)
|
951
|
-
end
|
952
|
-
|
953
|
-
def to_html_tbody(threshold = 30)
|
954
|
-
table_tbody_path =
|
955
|
-
if index.is_a?(MultiIndex)
|
956
|
-
File.expand_path('iruby/templates/vector_mi_tbody.html.erb', __dir__)
|
957
|
-
else
|
958
|
-
File.expand_path('iruby/templates/vector_tbody.html.erb', __dir__)
|
959
|
-
end
|
960
|
-
ERB.new(File.read(table_tbody_path).strip).result(binding)
|
961
|
-
end
|
962
|
-
|
963
|
-
def to_s
|
964
|
-
"#<#{self.class}#{": #{@name}" if @name}(#{size})#{':category' if category?}>"
|
965
|
-
end
|
966
|
-
|
967
|
-
# Create a summary of the Vector
|
968
|
-
# @param indent_level [Fixnum] indent level
|
969
|
-
# @return [String] String containing the summary of the Vector
|
970
|
-
# @example
|
971
|
-
# dv = DaruLite::Vector.new [1, 2, 3]
|
972
|
-
# puts dv.summary
|
973
|
-
#
|
974
|
-
# # =
|
975
|
-
# # n :3
|
976
|
-
# # non-missing:3
|
977
|
-
# # median: 2
|
978
|
-
# # mean: 2.0000
|
979
|
-
# # std.dev.: 1.0000
|
980
|
-
# # std.err.: 0.5774
|
981
|
-
# # skew: 0.0000
|
982
|
-
# # kurtosis: -2.3333
|
983
|
-
def summary(indent_level = 0)
|
984
|
-
non_missing = size - count_values(*DaruLite::MISSING_VALUES)
|
985
|
-
summary = (' =' * indent_level) + "= #{name}" \
|
986
|
-
"\n n :#{size}" \
|
987
|
-
"\n non-missing:#{non_missing}"
|
988
|
-
case type
|
989
|
-
when :object
|
990
|
-
summary << object_summary
|
991
|
-
when :numeric
|
992
|
-
summary << numeric_summary
|
993
|
-
end
|
994
|
-
summary.split("\n").join("\n#{' ' * indent_level}")
|
995
|
-
end
|
996
|
-
|
997
|
-
# Displays summary for an object type Vector
|
998
|
-
# @return [String] String containing object vector summary
|
999
|
-
def object_summary
|
1000
|
-
nval = count_values(*DaruLite::MISSING_VALUES)
|
1001
|
-
summary = "\n factors: #{factors.to_a.join(',')}" \
|
1002
|
-
"\n mode: #{mode.to_a.join(',')}" \
|
1003
|
-
"\n Distribution\n"
|
1004
|
-
|
1005
|
-
data = frequencies.sort.each_with_index.map do |v, k|
|
1006
|
-
[k, v, format('%0.2f%%', ((nval.zero? ? 1 : v.quo(nval)) * 100))]
|
1007
|
-
end
|
1008
|
-
|
1009
|
-
summary + Formatters::Table.format(data)
|
1010
|
-
end
|
1011
|
-
|
1012
|
-
# Displays summary for an numeric type Vector
|
1013
|
-
# @return [String] String containing numeric vector summary
|
1014
|
-
def numeric_summary
|
1015
|
-
summary = "\n median: #{median}" +
|
1016
|
-
format("\n mean: %0.4f", mean)
|
1017
|
-
if sd
|
1018
|
-
summary << (format("\n std.dev.: %0.4f", sd) +
|
1019
|
-
format("\n std.err.: %0.4f", se))
|
1020
|
-
end
|
1021
|
-
|
1022
|
-
if count_values(*DaruLite::MISSING_VALUES).zero?
|
1023
|
-
summary << (format("\n skew: %0.4f", skew) +
|
1024
|
-
format("\n kurtosis: %0.4f", kurtosis))
|
1025
|
-
end
|
1026
|
-
summary
|
1027
|
-
end
|
1028
|
-
|
1029
410
|
# Over rides original inspect for pretty printing in irb
|
1030
411
|
def inspect(spacing = 20, threshold = 15)
|
1031
412
|
row_headers = index.is_a?(MultiIndex) ? index.sparse_tuples : index.to_a
|
@@ -1040,68 +421,6 @@ module DaruLite
|
|
1040
421
|
)
|
1041
422
|
end
|
1042
423
|
|
1043
|
-
# Sets new index for vector. Preserves index->value correspondence.
|
1044
|
-
# Sets nil for new index keys absent from original index.
|
1045
|
-
# @note Unlike #reorder! which takes positions as input it takes
|
1046
|
-
# index as an input to reorder the vector
|
1047
|
-
# @param [DaruLite::Index, DaruLite::MultiIndex] new_index new index to order with
|
1048
|
-
# @return [DaruLite::Vector] vector reindexed with new index
|
1049
|
-
def reindex!(new_index)
|
1050
|
-
values = []
|
1051
|
-
each_with_index do |val, i|
|
1052
|
-
values[new_index[i]] = val if new_index.include?(i)
|
1053
|
-
end
|
1054
|
-
values.fill(nil, values.size, new_index.size - values.size)
|
1055
|
-
|
1056
|
-
@data = cast_vector_to @dtype, values
|
1057
|
-
@index = new_index
|
1058
|
-
|
1059
|
-
update_position_cache
|
1060
|
-
|
1061
|
-
self
|
1062
|
-
end
|
1063
|
-
|
1064
|
-
# Reorder the vector with given positions
|
1065
|
-
# @note Unlike #reindex! which takes index as input, it takes
|
1066
|
-
# positions as an input to reorder the vector
|
1067
|
-
# @param [Array] order the order to reorder the vector with
|
1068
|
-
# @return reordered vector
|
1069
|
-
# @example
|
1070
|
-
# dv = DaruLite::Vector.new [3, 2, 1], index: ['c', 'b', 'a']
|
1071
|
-
# dv.reorder! [2, 1, 0]
|
1072
|
-
# # => #<DaruLite::Vector(3)>
|
1073
|
-
# # a 1
|
1074
|
-
# # b 2
|
1075
|
-
# # c 3
|
1076
|
-
def reorder!(order)
|
1077
|
-
@index = @index.reorder order
|
1078
|
-
data_array = order.map { |i| @data[i] }
|
1079
|
-
@data = cast_vector_to @dtype, data_array, @nm_dtype
|
1080
|
-
update_position_cache
|
1081
|
-
self
|
1082
|
-
end
|
1083
|
-
|
1084
|
-
# Non-destructive version of #reorder!
|
1085
|
-
def reorder(order)
|
1086
|
-
dup.reorder! order
|
1087
|
-
end
|
1088
|
-
|
1089
|
-
# Create a new vector with a different index, and preserve the indexing of
|
1090
|
-
# current elements.
|
1091
|
-
def reindex(new_index)
|
1092
|
-
dup.reindex!(new_index)
|
1093
|
-
end
|
1094
|
-
|
1095
|
-
def index=(idx)
|
1096
|
-
idx = Index.coerce(idx)
|
1097
|
-
|
1098
|
-
raise ArgumentError, "Size of supplied index #{idx.size} does not match size of Vector" if idx.size != size
|
1099
|
-
raise ArgumentError, 'Can only assign type Index and its subclasses.' unless idx.is_a?(DaruLite::Index)
|
1100
|
-
|
1101
|
-
@index = idx
|
1102
|
-
self
|
1103
|
-
end
|
1104
|
-
|
1105
424
|
# Give the vector a new name
|
1106
425
|
#
|
1107
426
|
# @param new_name [Symbol] The new name.
|
@@ -1112,12 +431,6 @@ module DaruLite
|
|
1112
431
|
|
1113
432
|
alias name= rename
|
1114
433
|
|
1115
|
-
# Duplicated a vector
|
1116
|
-
# @return [DaruLite::Vector] duplicated vector
|
1117
|
-
def dup
|
1118
|
-
DaruLite::Vector.new @data.dup, name: @name, index: @index.dup
|
1119
|
-
end
|
1120
|
-
|
1121
434
|
# == Bootstrap
|
1122
435
|
# Generate +nr+ resamples (with replacement) of size +s+
|
1123
436
|
# from vector, computing each estimate from +estimators+
|
@@ -1195,130 +508,6 @@ module DaruLite
|
|
1195
508
|
DaruLite::DataFrame.new ps
|
1196
509
|
end
|
1197
510
|
|
1198
|
-
# Returns an array of either none or integer values, indicating the
|
1199
|
-
# +regexp+ matching with the given array.
|
1200
|
-
#
|
1201
|
-
# @param regexp [Regexp] A regular matching expression. For example, +/weeks/+.
|
1202
|
-
#
|
1203
|
-
# @return [Array] Containing either +nil+ or integer values, according to the match with the given +regexp+
|
1204
|
-
#
|
1205
|
-
# @example
|
1206
|
-
# dv = DaruLite::Vector.new(['3 days', '5 weeks', '2 weeks'])
|
1207
|
-
# dv.match(/weeks/)
|
1208
|
-
#
|
1209
|
-
# # => [false, true, true]
|
1210
|
-
def match(regexp)
|
1211
|
-
@data.map { |value| !!(value =~ regexp) }
|
1212
|
-
end
|
1213
|
-
|
1214
|
-
# Creates a new vector consisting only of non-nil data
|
1215
|
-
#
|
1216
|
-
# == Arguments
|
1217
|
-
#
|
1218
|
-
# @param as_a [Symbol] Passing :array will return only the elements
|
1219
|
-
# as an Array. Otherwise will return a DaruLite::Vector.
|
1220
|
-
#
|
1221
|
-
# @param _duplicate [Symbol] In case no missing data is found in the
|
1222
|
-
# vector, setting this to false will return the same vector.
|
1223
|
-
# Otherwise, a duplicate will be returned irrespective of
|
1224
|
-
# presence of missing data.
|
1225
|
-
|
1226
|
-
def only_valid(as_a = :vector, _duplicate = true)
|
1227
|
-
# FIXME: Now duplicate is just ignored.
|
1228
|
-
# There are no spec that fail on this case, so I'll leave it
|
1229
|
-
# this way for now - zverok, 2016-05-07
|
1230
|
-
|
1231
|
-
new_index = @index.to_a - indexes(*DaruLite::MISSING_VALUES)
|
1232
|
-
new_vector = new_index.map { |idx| self[idx] }
|
1233
|
-
|
1234
|
-
if as_a == :vector
|
1235
|
-
DaruLite::Vector.new new_vector, index: new_index, name: @name, dtype: dtype
|
1236
|
-
else
|
1237
|
-
new_vector
|
1238
|
-
end
|
1239
|
-
end
|
1240
|
-
deprecate :only_valid, :reject_values, 2016, 10
|
1241
|
-
|
1242
|
-
# Return a vector with specified values removed
|
1243
|
-
# @param values [Array] values to reject from resultant vector
|
1244
|
-
# @return [DaruLite::Vector] vector with specified values removed
|
1245
|
-
# @example
|
1246
|
-
# dv = DaruLite::Vector.new [1, 2, nil, Float::NAN]
|
1247
|
-
# dv.reject_values nil, Float::NAN
|
1248
|
-
# # => #<DaruLite::Vector(2)>
|
1249
|
-
# # 0 1
|
1250
|
-
# # 1 2
|
1251
|
-
def reject_values(*values)
|
1252
|
-
resultant_pos = size.times.to_a - positions(*values)
|
1253
|
-
dv = at(*resultant_pos)
|
1254
|
-
# Handle the case when number of positions is 1
|
1255
|
-
# and hence #at doesn't return a vector
|
1256
|
-
if dv.is_a?(DaruLite::Vector)
|
1257
|
-
dv
|
1258
|
-
else
|
1259
|
-
pos = resultant_pos.first
|
1260
|
-
at(pos..pos)
|
1261
|
-
end
|
1262
|
-
end
|
1263
|
-
|
1264
|
-
# Return indexes of values specified
|
1265
|
-
# @param values [Array] values to find indexes for
|
1266
|
-
# @return [Array] array of indexes of values specified
|
1267
|
-
# @example
|
1268
|
-
# dv = DaruLite::Vector.new [1, 2, nil, Float::NAN], index: 11..14
|
1269
|
-
# dv.indexes nil, Float::NAN
|
1270
|
-
# # => [13, 14]
|
1271
|
-
def indexes(*values)
|
1272
|
-
index.to_a.values_at(*positions(*values))
|
1273
|
-
end
|
1274
|
-
|
1275
|
-
# Replaces specified values with a new value
|
1276
|
-
# @param [Array] old_values array of values to replace
|
1277
|
-
# @param [object] new_value new value to replace with
|
1278
|
-
# @note It performs the replace in place.
|
1279
|
-
# @return [DaruLite::Vector] Same vector itself with values
|
1280
|
-
# replaced with new value
|
1281
|
-
# @example
|
1282
|
-
# dv = DaruLite::Vector.new [1, 2, :a, :b]
|
1283
|
-
# dv.replace_values [:a, :b], nil
|
1284
|
-
# dv
|
1285
|
-
# # =>
|
1286
|
-
# # #<DaruLite::Vector:19903200 @name = nil @metadata = {} @size = 4 >
|
1287
|
-
# # nil
|
1288
|
-
# # 0 1
|
1289
|
-
# # 1 2
|
1290
|
-
# # 2 nil
|
1291
|
-
# # 3 nil
|
1292
|
-
def replace_values(old_values, new_value)
|
1293
|
-
old_values = [old_values] unless old_values.is_a? Array
|
1294
|
-
size.times do |pos|
|
1295
|
-
set_at([pos], new_value) if include_with_nan? old_values, at(pos)
|
1296
|
-
end
|
1297
|
-
self
|
1298
|
-
end
|
1299
|
-
|
1300
|
-
# Returns a Vector containing only missing data (preserves indexes).
|
1301
|
-
def only_missing(as_a = :vector)
|
1302
|
-
case as_a
|
1303
|
-
when :vector
|
1304
|
-
self[*indexes(*DaruLite::MISSING_VALUES)]
|
1305
|
-
when :array
|
1306
|
-
self[*indexes(*DaruLite::MISSING_VALUES)].to_a
|
1307
|
-
end
|
1308
|
-
end
|
1309
|
-
deprecate :only_missing, nil, 2016, 10
|
1310
|
-
|
1311
|
-
# Returns a Vector with only numerical data. Missing data is included
|
1312
|
-
# but non-Numeric objects are excluded. Preserves index.
|
1313
|
-
def only_numerics
|
1314
|
-
numeric_indexes =
|
1315
|
-
each_with_index
|
1316
|
-
.select { |v, _i| v.is_a?(Numeric) || v.nil? }
|
1317
|
-
.map(&:last)
|
1318
|
-
|
1319
|
-
self[*numeric_indexes]
|
1320
|
-
end
|
1321
|
-
|
1322
511
|
DATE_REGEXP = /^(\d{2}-\d{2}-\d{4}|\d{4}-\d{2}-\d{2})$/.freeze
|
1323
512
|
|
1324
513
|
# Returns the database type for the vector, according to its content
|
@@ -1335,12 +524,6 @@ module DaruLite
|
|
1335
524
|
end
|
1336
525
|
end
|
1337
526
|
|
1338
|
-
# Copies the structure of the vector (i.e the index, size, etc.) and fills all
|
1339
|
-
# all values with nils.
|
1340
|
-
def clone_structure
|
1341
|
-
DaruLite::Vector.new(([nil] * size), name: @name, index: @index.dup)
|
1342
|
-
end
|
1343
|
-
|
1344
527
|
# Save the vector to a file
|
1345
528
|
#
|
1346
529
|
# == Arguments
|
@@ -1396,61 +579,6 @@ module DaruLite
|
|
1396
579
|
name.to_s.end_with?('=') || has_index?(name) || super
|
1397
580
|
end
|
1398
581
|
|
1399
|
-
# Partition a numeric variable into categories.
|
1400
|
-
# @param [Array<Numeric>] partitions an array whose consecutive elements
|
1401
|
-
# provide intervals for categories
|
1402
|
-
# @param [Hash] opts options to cut the partition
|
1403
|
-
# @option opts [:left, :right] :close_at specifies whether the interval closes at
|
1404
|
-
# the right side of left side
|
1405
|
-
# @option opts [Array] :labels names of the categories
|
1406
|
-
# @return [DaruLite::Vector] numeric variable converted to categorical variable
|
1407
|
-
# @example
|
1408
|
-
# heights = DaruLite::Vector.new [30, 35, 32, 50, 42, 51]
|
1409
|
-
# height_cat = heights.cut [30, 40, 50, 60], labels=['low', 'medium', 'high']
|
1410
|
-
# # => #<DaruLite::Vector(6)>
|
1411
|
-
# # 0 low
|
1412
|
-
# # 1 low
|
1413
|
-
# # 2 low
|
1414
|
-
# # 3 high
|
1415
|
-
# # 4 medium
|
1416
|
-
# # 5 high
|
1417
|
-
def cut(partitions, opts = {})
|
1418
|
-
close_at = opts[:close_at] || :right
|
1419
|
-
labels = opts[:labels]
|
1420
|
-
partitions = partitions.to_a
|
1421
|
-
values = to_a.map { |val| cut_find_category partitions, val, close_at }
|
1422
|
-
cats = cut_categories(partitions, close_at)
|
1423
|
-
|
1424
|
-
dv = DaruLite::Vector.new values,
|
1425
|
-
index: @index,
|
1426
|
-
type: :category,
|
1427
|
-
categories: cats
|
1428
|
-
|
1429
|
-
# Rename categories if new labels provided
|
1430
|
-
if labels
|
1431
|
-
dv.rename_categories cats.zip(labels).to_h
|
1432
|
-
else
|
1433
|
-
dv
|
1434
|
-
end
|
1435
|
-
end
|
1436
|
-
|
1437
|
-
def positions(*values)
|
1438
|
-
case values
|
1439
|
-
when [nil]
|
1440
|
-
nil_positions
|
1441
|
-
when [Float::NAN]
|
1442
|
-
nan_positions
|
1443
|
-
when [nil, Float::NAN], [Float::NAN, nil]
|
1444
|
-
nil_positions + nan_positions
|
1445
|
-
else
|
1446
|
-
size.times.select { |i| include_with_nan? values, @data[i] }
|
1447
|
-
end
|
1448
|
-
end
|
1449
|
-
|
1450
|
-
def group_by(*args)
|
1451
|
-
to_df.group_by(*args)
|
1452
|
-
end
|
1453
|
-
|
1454
582
|
private
|
1455
583
|
|
1456
584
|
def copy(values)
|
@@ -1471,11 +599,6 @@ module DaruLite
|
|
1471
599
|
end
|
1472
600
|
end
|
1473
601
|
|
1474
|
-
# Helper method returning validity of arbitrary value
|
1475
|
-
def valid_value?(v)
|
1476
|
-
!((v.respond_to?(:nan?) && v.nan?) || v.nil?)
|
1477
|
-
end
|
1478
|
-
|
1479
602
|
def initialize_vector(source, opts)
|
1480
603
|
index, source = parse_source(source, opts)
|
1481
604
|
set_name opts[:name]
|
@@ -1506,18 +629,8 @@ module DaruLite
|
|
1506
629
|
end
|
1507
630
|
|
1508
631
|
def guard_type_check(value)
|
1509
|
-
|
1510
|
-
|
1511
|
-
(numeric? && !value.is_a?(Numeric) && !value.nil?)
|
1512
|
-
end
|
1513
|
-
|
1514
|
-
def split_value(key, v)
|
1515
|
-
if v.nil?
|
1516
|
-
nil
|
1517
|
-
elsif v.include?(key)
|
1518
|
-
1
|
1519
|
-
else
|
1520
|
-
0
|
632
|
+
if (object? && (value.nil? || value.is_a?(Numeric))) || (numeric? && !value.is_a?(Numeric) && !value.nil?)
|
633
|
+
@possibly_changed_type = true
|
1521
634
|
end
|
1522
635
|
end
|
1523
636
|
|
@@ -1665,14 +778,5 @@ module DaruLite
|
|
1665
778
|
@nil_positions = nil
|
1666
779
|
@nan_positions = nil
|
1667
780
|
end
|
1668
|
-
|
1669
|
-
def resort_index(vector_index, opts)
|
1670
|
-
if block_given?
|
1671
|
-
vector_index.sort { |(lv, _li), (rv, _ri)| yield(lv, rv) }
|
1672
|
-
else
|
1673
|
-
vector_index.sort(&DEFAULT_SORTER)
|
1674
|
-
end
|
1675
|
-
.tap { |res| res.reverse! unless opts[:ascending] }
|
1676
|
-
end
|
1677
781
|
end
|
1678
782
|
end
|