daru_lite 0.1 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (73) hide show
  1. checksums.yaml +4 -4
  2. data/.github/ISSUE_TEMPLATE/bug_report.md +38 -0
  3. data/.github/ISSUE_TEMPLATE/feature_request.md +20 -0
  4. data/.github/workflows/ci.yml +20 -0
  5. data/.rubocop_todo.yml +35 -33
  6. data/README.md +19 -115
  7. data/daru_lite.gemspec +1 -0
  8. data/lib/daru_lite/data_frame/aggregatable.rb +165 -0
  9. data/lib/daru_lite/data_frame/calculatable.rb +140 -0
  10. data/lib/daru_lite/data_frame/convertible.rb +107 -0
  11. data/lib/daru_lite/data_frame/duplicatable.rb +64 -0
  12. data/lib/daru_lite/data_frame/fetchable.rb +301 -0
  13. data/lib/daru_lite/data_frame/filterable.rb +144 -0
  14. data/lib/daru_lite/data_frame/i_o_able.rb +179 -0
  15. data/lib/daru_lite/data_frame/indexable.rb +168 -0
  16. data/lib/daru_lite/data_frame/iterable.rb +339 -0
  17. data/lib/daru_lite/data_frame/joinable.rb +152 -0
  18. data/lib/daru_lite/data_frame/missable.rb +75 -0
  19. data/lib/daru_lite/data_frame/pivotable.rb +108 -0
  20. data/lib/daru_lite/data_frame/queryable.rb +67 -0
  21. data/lib/daru_lite/data_frame/setable.rb +109 -0
  22. data/lib/daru_lite/data_frame/sortable.rb +241 -0
  23. data/lib/daru_lite/dataframe.rb +142 -2355
  24. data/lib/daru_lite/index/index.rb +13 -0
  25. data/lib/daru_lite/maths/statistics/vector.rb +1 -1
  26. data/lib/daru_lite/vector/aggregatable.rb +9 -0
  27. data/lib/daru_lite/vector/calculatable.rb +78 -0
  28. data/lib/daru_lite/vector/convertible.rb +77 -0
  29. data/lib/daru_lite/vector/duplicatable.rb +17 -0
  30. data/lib/daru_lite/vector/fetchable.rb +175 -0
  31. data/lib/daru_lite/vector/filterable.rb +128 -0
  32. data/lib/daru_lite/vector/indexable.rb +77 -0
  33. data/lib/daru_lite/vector/iterable.rb +95 -0
  34. data/lib/daru_lite/vector/joinable.rb +17 -0
  35. data/lib/daru_lite/vector/missable.rb +124 -0
  36. data/lib/daru_lite/vector/queryable.rb +45 -0
  37. data/lib/daru_lite/vector/setable.rb +47 -0
  38. data/lib/daru_lite/vector/sortable.rb +113 -0
  39. data/lib/daru_lite/vector.rb +36 -932
  40. data/lib/daru_lite/version.rb +1 -1
  41. data/spec/data_frame/aggregatable_example.rb +65 -0
  42. data/spec/data_frame/buildable_example.rb +109 -0
  43. data/spec/data_frame/calculatable_example.rb +135 -0
  44. data/spec/data_frame/convertible_example.rb +180 -0
  45. data/spec/data_frame/duplicatable_example.rb +111 -0
  46. data/spec/data_frame/fetchable_example.rb +476 -0
  47. data/spec/data_frame/filterable_example.rb +250 -0
  48. data/spec/data_frame/indexable_example.rb +221 -0
  49. data/spec/data_frame/iterable_example.rb +465 -0
  50. data/spec/data_frame/joinable_example.rb +106 -0
  51. data/spec/data_frame/missable_example.rb +47 -0
  52. data/spec/data_frame/pivotable_example.rb +297 -0
  53. data/spec/data_frame/queryable_example.rb +92 -0
  54. data/spec/data_frame/setable_example.rb +482 -0
  55. data/spec/data_frame/sortable_example.rb +350 -0
  56. data/spec/dataframe_spec.rb +181 -3243
  57. data/spec/index/index_spec.rb +8 -0
  58. data/spec/vector/aggregatable_example.rb +27 -0
  59. data/spec/vector/calculatable_example.rb +82 -0
  60. data/spec/vector/convertible_example.rb +126 -0
  61. data/spec/vector/duplicatable_example.rb +48 -0
  62. data/spec/vector/fetchable_example.rb +463 -0
  63. data/spec/vector/filterable_example.rb +165 -0
  64. data/spec/vector/indexable_example.rb +201 -0
  65. data/spec/vector/iterable_example.rb +111 -0
  66. data/spec/vector/joinable_example.rb +25 -0
  67. data/spec/vector/missable_example.rb +88 -0
  68. data/spec/vector/queryable_example.rb +91 -0
  69. data/spec/vector/setable_example.rb +300 -0
  70. data/spec/vector/sortable_example.rb +242 -0
  71. data/spec/vector_spec.rb +111 -1805
  72. metadata +102 -3
  73. data/.github/ISSUE_TEMPLATE.md +0 -18
@@ -2,12 +2,38 @@ require 'daru_lite/maths/arithmetic/vector'
2
2
  require 'daru_lite/maths/statistics/vector'
3
3
  require 'daru_lite/accessors/array_wrapper'
4
4
  require 'daru_lite/category'
5
+ require 'daru_lite/vector/aggregatable'
6
+ require 'daru_lite/vector/calculatable'
7
+ require 'daru_lite/vector/convertible'
8
+ require 'daru_lite/vector/duplicatable'
9
+ require 'daru_lite/vector/fetchable'
10
+ require 'daru_lite/vector/filterable'
11
+ require 'daru_lite/vector/indexable'
12
+ require 'daru_lite/vector/iterable'
13
+ require 'daru_lite/vector/joinable'
14
+ require 'daru_lite/vector/missable'
15
+ require 'daru_lite/vector/setable'
16
+ require 'daru_lite/vector/sortable'
17
+ require 'daru_lite/vector/queryable'
5
18
 
6
19
  module DaruLite
7
20
  class Vector # rubocop:disable Metrics/ClassLength
8
21
  include Enumerable
9
22
  include DaruLite::Maths::Arithmetic::Vector
10
23
  include DaruLite::Maths::Statistics::Vector
24
+ include DaruLite::Vector::Aggregatable
25
+ include DaruLite::Vector::Calculatable
26
+ include DaruLite::Vector::Convertible
27
+ include DaruLite::Vector::Duplicatable
28
+ include DaruLite::Vector::Fetchable
29
+ include DaruLite::Vector::Filterable
30
+ include DaruLite::Vector::Indexable
31
+ include DaruLite::Vector::Iterable
32
+ include DaruLite::Vector::Joinable
33
+ include DaruLite::Vector::Missable
34
+ include DaruLite::Vector::Setable
35
+ include DaruLite::Vector::Sortable
36
+ include DaruLite::Vector::Queryable
11
37
  extend Gem::Deprecate
12
38
 
13
39
  class << self
@@ -90,46 +116,6 @@ module DaruLite
90
116
  @data.size
91
117
  end
92
118
 
93
- def each(&block)
94
- return to_enum(:each) unless block
95
-
96
- @data.each(&block)
97
- self
98
- end
99
-
100
- def each_index(&block)
101
- return to_enum(:each_index) unless block
102
-
103
- @index.each(&block)
104
- self
105
- end
106
-
107
- def each_with_index(&block)
108
- return to_enum(:each_with_index) unless block
109
-
110
- @data.to_a.zip(@index.to_a).each(&block)
111
-
112
- self
113
- end
114
-
115
- def map!(&block)
116
- return to_enum(:map!) unless block
117
-
118
- @data.map!(&block)
119
- self
120
- end
121
-
122
- def apply_method(method, keys: nil, by_position: true)
123
- vect = keys ? get_sub_vector(keys, by_position: by_position) : self
124
-
125
- case method
126
- when Symbol then vect.send(method)
127
- when Proc then method.call(vect)
128
- else raise
129
- end
130
- end
131
- alias apply_method_on_sub_vector apply_method
132
-
133
119
  # The name of the DaruLite::Vector. String.
134
120
  attr_reader :name
135
121
  # The row index. Can be either DaruLite::Index or DaruLite::MultiIndex.
@@ -185,98 +171,6 @@ module DaruLite
185
171
  end
186
172
  end
187
173
 
188
- # Get one or more elements with specified index or a range.
189
- #
190
- # == Usage
191
- # # For vectors employing single layer Index
192
- #
193
- # v[:one, :two] # => DaruLite::Vector with indexes :one and :two
194
- # v[:one] # => Single element
195
- # v[:one..:three] # => DaruLite::Vector with indexes :one, :two and :three
196
- #
197
- # # For vectors employing hierarchial multi index
198
- #
199
- def [](*input_indexes)
200
- # Get array of positions indexes
201
- positions = @index.pos(*input_indexes)
202
-
203
- # If one object is asked return it
204
- return @data[positions] if positions.is_a? Numeric
205
-
206
- # Form a new Vector using positional indexes
207
- DaruLite::Vector.new(
208
- positions.map { |loc| @data[loc] },
209
- name: @name,
210
- index: @index.subset(*input_indexes), dtype: @dtype
211
- )
212
- end
213
-
214
- # Returns vector of values given positional values
215
- # @param positions [Array<object>] positional values
216
- # @return [object] vector
217
- # @example
218
- # dv = DaruLite::Vector.new 'a'..'e'
219
- # dv.at 0, 1, 2
220
- # # => #<DaruLite::Vector(3)>
221
- # # 0 a
222
- # # 1 b
223
- # # 2 c
224
- def at(*positions)
225
- # to be used to form index
226
- original_positions = positions
227
- positions = coerce_positions(*positions)
228
- validate_positions(*positions)
229
-
230
- if positions.is_a? Integer
231
- @data[positions]
232
- else
233
- values = positions.map { |pos| @data[pos] }
234
- DaruLite::Vector.new values, index: @index.at(*original_positions), dtype: dtype
235
- end
236
- end
237
-
238
- # Change value at given positions
239
- # @param positions [Array<object>] positional values
240
- # @param [object] val value to assign
241
- # @example
242
- # dv = DaruLite::Vector.new 'a'..'e'
243
- # dv.set_at [0, 1], 'x'
244
- # dv
245
- # # => #<DaruLite::Vector(5)>
246
- # # 0 x
247
- # # 1 x
248
- # # 2 c
249
- # # 3 d
250
- # # 4 e
251
- def set_at(positions, val)
252
- validate_positions(*positions)
253
- positions.map { |pos| @data[pos] = val }
254
- update_position_cache
255
- end
256
-
257
- # Just like in Hashes, you can specify the index label of the DaruLite::Vector
258
- # and assign an element an that place in the DaruLite::Vector.
259
- #
260
- # == Usage
261
- #
262
- # v = DaruLite::Vector.new([1,2,3], index: [:a, :b, :c])
263
- # v[:a] = 999
264
- # #=>
265
- # ##<DaruLite::Vector:90257920 @name = nil @size = 3 >
266
- # # nil
267
- # # a 999
268
- # # b 2
269
- # # c 3
270
- def []=(*indexes, val)
271
- cast(dtype: :array) if val.nil? && dtype != :array
272
-
273
- guard_type_check(val)
274
-
275
- modify_vector(indexes, val)
276
-
277
- update_position_cache
278
- end
279
-
280
174
  # Two vectors are equal if they have the exact same index values corresponding
281
175
  # with the exact same elements. Name is ignored.
282
176
  def ==(other)
@@ -367,85 +261,6 @@ module DaruLite
367
261
  )
368
262
  end
369
263
 
370
- # Return a new vector based on the contents of a boolean array. Use with the
371
- # comparator methods to obtain meaningful results. See this notebook for
372
- # a good overview of using #where.
373
- #
374
- # @param bool_array [DaruLite::Core::Query::BoolArray, Array<TrueClass, FalseClass>] The
375
- # collection containing the true of false values. Each element in the Vector
376
- # corresponding to a `true` in the bool_arry will be returned alongwith it's
377
- # index.
378
- # @example Usage of #where.
379
- # vector = DaruLite::Vector.new([2,4,5,51,5,16,2,5,3,2,1,5,2,5,2,1,56,234,6,21])
380
- #
381
- # # Simple logic statement passed to #where.
382
- # vector.where(vector.eq(5).or(vector.eq(1)))
383
- # # =>
384
- # ##<DaruLite::Vector:77626210 @name = nil @size = 7 >
385
- # # nil
386
- # # 2 5
387
- # # 4 5
388
- # # 7 5
389
- # # 10 1
390
- # # 11 5
391
- # # 13 5
392
- # # 15 1
393
- #
394
- # # A somewhat more complex logic statement
395
- # vector.where((vector.eq(5) | vector.lteq(1)) & vector.in([4,5,1]))
396
- # #=>
397
- # ##<DaruLite::Vector:81072310 @name = nil @size = 7 >
398
- # # nil
399
- # # 2 5
400
- # # 4 5
401
- # # 7 5
402
- # # 10 1
403
- # # 11 5
404
- # # 13 5
405
- # # 15 1
406
- def where(bool_array)
407
- DaruLite::Core::Query.vector_where self, bool_array
408
- end
409
-
410
- # Return a new vector based on the contents of a boolean array and &block.
411
- #
412
- # @param bool_array [DaruLite::Core::Query::BoolArray, Array<TrueClass, FalseClass>, &block] The
413
- # collection containing the true of false values. Each element in the Vector
414
- # corresponding to a `true` in the bool_array will be returned along with it's
415
- # index. The &block may contain manipulative functions for the Vector elements.
416
- #
417
- # @return [DaruLite::Vector]
418
- #
419
- # @example Usage of #apply_where.
420
- # dv = DaruLite::Vector.new ['3 days', '5 weeks', '2 weeks']
421
- # dv = dv.apply_where(dv.match /weeks/) { |x| "#{x.split.first.to_i * 7} days" }
422
- # # =>
423
- # ##<DaruLite::Vector(3)>
424
- # # 0 3 days
425
- # # 1 35 days
426
- # # 2 14 days
427
- def apply_where(bool_array, &block)
428
- DaruLite::Core::Query.vector_apply_where self, bool_array, &block
429
- end
430
-
431
- def head(q = 10)
432
- self[0..(q - 1)]
433
- end
434
-
435
- def tail(q = 10)
436
- start = [size - q, 0].max
437
- self[start..(size - 1)]
438
- end
439
-
440
- def last(q = 1)
441
- # The Enumerable mixin dose not provide the last method.
442
- tail(q)
443
- end
444
-
445
- def empty?
446
- @index.empty?
447
- end
448
-
449
264
  def numeric?
450
265
  type == :numeric
451
266
  end
@@ -454,26 +269,6 @@ module DaruLite
454
269
  type == :object
455
270
  end
456
271
 
457
- # Reports whether missing data is present in the Vector.
458
- def has_missing_data?
459
- !indexes(*DaruLite::MISSING_VALUES).empty?
460
- end
461
- alias flawed? has_missing_data?
462
- deprecate :has_missing_data?, :include_values?, 2016, 10
463
- deprecate :flawed?, :include_values?, 2016, 10
464
-
465
- # Check if any one of mentioned values occur in the vector
466
- # @param values [Array] values to check for
467
- # @return [true, false] returns true if any one of specified values
468
- # occur in the vector
469
- # @example
470
- # dv = DaruLite::Vector.new [1, 2, 3, 4, nil]
471
- # dv.include_values? nil, Float::NAN
472
- # # => true
473
- def include_values?(*values)
474
- values.any? { |v| include_with_nan? @data, v }
475
- end
476
-
477
272
  # @note Do not use it to check for Float::NAN as
478
273
  # Float::NAN == Float::NAN is false
479
274
  # Return vector of booleans with value at ith position is either
@@ -494,18 +289,6 @@ module DaruLite
494
289
  DaruLite::Vector.new values.map { |v| eq(v) }.inject(:|)
495
290
  end
496
291
 
497
- # Append an element to the vector by specifying the element and index
498
- def concat(element, index)
499
- raise IndexError, 'Expected new unique index' if @index.include? index
500
-
501
- @index |= [index]
502
- @data[@index[index]] = element
503
-
504
- update_position_cache
505
- end
506
- alias push concat
507
- alias << concat
508
-
509
292
  # Cast a vector to a new data type.
510
293
  #
511
294
  # == Options
@@ -531,6 +314,14 @@ module DaruLite
531
314
  update_position_cache
532
315
  end
533
316
 
317
+ # Delete element by position
318
+ def delete_at_position(position)
319
+ @data.delete_at(position)
320
+ @index = @index.delete_at(position)
321
+
322
+ update_position_cache
323
+ end
324
+
534
325
  # The type of data contained in the vector. Can be :object.
535
326
  #
536
327
  # Running through the data to figure out the kind of data is delayed to the
@@ -560,148 +351,6 @@ module DaruLite
560
351
  type == :category
561
352
  end
562
353
 
563
- # Get index of element
564
- def index_of(element)
565
- case dtype
566
- when :array then @index.key(@data.index { |x| x.eql? element })
567
- else @index.key @data.index(element)
568
- end
569
- end
570
-
571
- # Keep only unique elements of the vector alongwith their indexes.
572
- def uniq
573
- uniq_vector = @data.uniq
574
- new_index = uniq_vector.map { |element| index_of(element) }
575
-
576
- DaruLite::Vector.new uniq_vector, name: @name, index: new_index, dtype: @dtype
577
- end
578
-
579
- def any?(&block)
580
- @data.data.any?(&block)
581
- end
582
-
583
- def all?(&block)
584
- @data.data.all?(&block)
585
- end
586
-
587
- # Sorts a vector according to its values. If a block is specified, the contents
588
- # will be evaluated and data will be swapped whenever the block evaluates
589
- # to *true*. Defaults to ascending order sorting. Any missing values will be
590
- # put at the end of the vector. Preserves indexing. Default sort algorithm is
591
- # quick sort.
592
- #
593
- # == Options
594
- #
595
- # * +:ascending+ - if false, will sort in descending order. Defaults to true.
596
- #
597
- # * +:type+ - Specify the sorting algorithm. Only supports quick_sort for now.
598
- # == Usage
599
- #
600
- # v = DaruLite::Vector.new ["My first guitar", "jazz", "guitar"]
601
- # # Say you want to sort these strings by length.
602
- # v.sort(ascending: false) { |a,b| a.length <=> b.length }
603
- def sort(opts = {}, &block)
604
- opts = { ascending: true }.merge(opts)
605
-
606
- vector_index = resort_index(@data.each_with_index, opts, &block)
607
- vector, index = vector_index.transpose
608
-
609
- index = @index.reorder index
610
-
611
- DaruLite::Vector.new(vector, index: index, name: @name, dtype: @dtype)
612
- end
613
-
614
- # Sorts the vector according to it's`Index` values. Defaults to ascending
615
- # order sorting.
616
- #
617
- # @param [Hash] opts the options for sort_by_index method.
618
- # @option opts [Boolean] :ascending false, will sort `index` in
619
- # descending order.
620
- #
621
- # @return [Vector] new sorted `Vector` according to the index values.
622
- #
623
- # @example
624
- #
625
- # dv = DaruLite::Vector.new [11, 13, 12], index: [23, 21, 22]
626
- # # Say you want to sort index in ascending order
627
- # dv.sort_by_index(ascending: true)
628
- # #=> DaruLite::Vector.new [13, 12, 11], index: [21, 22, 23]
629
- # # Say you want to sort index in descending order
630
- # dv.sort_by_index(ascending: false)
631
- # #=> DaruLite::Vector.new [11, 12, 13], index: [23, 22, 21]
632
- def sort_by_index(opts = {})
633
- opts = { ascending: true }.merge(opts)
634
- _, new_order = resort_index(@index.each_with_index, opts).transpose
635
-
636
- reorder new_order
637
- end
638
-
639
- DEFAULT_SORTER = lambda { |(lv, li), (rv, ri)|
640
- if lv.nil? && rv.nil?
641
- li <=> ri
642
- elsif lv.nil?
643
- -1
644
- elsif rv.nil?
645
- 1
646
- else
647
- lv <=> rv
648
- end
649
- }
650
-
651
- # Just sort the data and get an Array in return using Enumerable#sort.
652
- # Non-destructive.
653
- # :nocov:
654
- def sorted_data(&block)
655
- @data.to_a.sort(&block)
656
- end
657
- # :nocov:
658
-
659
- # Like map, but returns a DaruLite::Vector with the returned values.
660
- def recode(dt = nil, &block)
661
- return to_enum(:recode, dt) unless block
662
-
663
- dup.recode! dt, &block
664
- end
665
-
666
- # Destructive version of recode!
667
- def recode!(dt = nil, &block)
668
- return to_enum(:recode!, dt) unless block
669
-
670
- @data.map!(&block).data
671
- @data = cast_vector_to(dt || @dtype)
672
- self
673
- end
674
-
675
- # Delete an element if block returns true. Destructive.
676
- def delete_if
677
- return to_enum(:delete_if) unless block_given?
678
-
679
- keep_e, keep_i = each_with_index.reject { |n, _i| yield(n) }.transpose
680
-
681
- @data = cast_vector_to @dtype, keep_e
682
- @index = DaruLite::Index.new(keep_i)
683
-
684
- update_position_cache
685
-
686
- self
687
- end
688
-
689
- # Keep an element if block returns true. Destructive.
690
- def keep_if
691
- return to_enum(:keep_if) unless block_given?
692
-
693
- delete_if { |val| !yield(val) }
694
- end
695
-
696
- # Reports all values that doesn't comply with a condition.
697
- # Returns a hash with the index of data and the invalid data.
698
- def verify
699
- (0...size)
700
- .map { |i| [i, @data[i]] }
701
- .reject { |_i, val| yield(val) }
702
- .to_h
703
- end
704
-
705
354
  # Return an Array with the data splitted by a separator.
706
355
  # a=DaruLite::Vector.new(["a,b","c,d","a,b","d"])
707
356
  # a.splitted
@@ -719,93 +368,6 @@ module DaruLite
719
368
  end
720
369
  end
721
370
 
722
- # Returns a hash of Vectors, defined by the different values
723
- # defined on the fields
724
- # Example:
725
- #
726
- # a=DaruLite::Vector.new(["a,b","c,d","a,b"])
727
- # a.split_by_separator
728
- # => {"a"=>#<DaruLite::Vector:0x7f2dbcc09d88
729
- # @data=[1, 0, 1]>,
730
- # "b"=>#<DaruLite::Vector:0x7f2dbcc09c48
731
- # @data=[1, 1, 0]>,
732
- # "c"=>#<DaruLite::Vector:0x7f2dbcc09b08
733
- # @data=[0, 1, 1]>}
734
- #
735
- def split_by_separator(sep = ',')
736
- split_data = splitted sep
737
- split_data
738
- .flatten.uniq.compact.to_h do |key|
739
- [
740
- key,
741
- DaruLite::Vector.new(split_data.map { |v| split_value(key, v) })
742
- ]
743
- end
744
- end
745
-
746
- def split_by_separator_freq(sep = ',')
747
- split_by_separator(sep).transform_values do |v|
748
- v.sum(&:to_i)
749
- end
750
- end
751
-
752
- def reset_index!
753
- @index = DaruLite::Index.new(Array.new(size) { |i| i })
754
- self
755
- end
756
-
757
- # Replace all nils in the vector with the value passed as an argument. Destructive.
758
- # See #replace_nils for non-destructive version
759
- #
760
- # == Arguments
761
- #
762
- # * +replacement+ - The value which should replace all nils
763
- def replace_nils!(replacement)
764
- indexes(*DaruLite::MISSING_VALUES).each do |idx|
765
- self[idx] = replacement
766
- end
767
-
768
- self
769
- end
770
-
771
- # Rolling fillna
772
- # replace all Float::NAN and NIL values with the preceeding or following value
773
- #
774
- # @param direction [Symbol] (:forward, :backward) whether replacement value is preceeding or following
775
- #
776
- # @example
777
- # dv = DaruLite::Vector.new([1, 2, 1, 4, nil, Float::NAN, 3, nil, Float::NAN])
778
- #
779
- # 2.3.3 :068 > dv.rolling_fillna(:forward)
780
- # => #<DaruLite::Vector(9)>
781
- # 0 1
782
- # 1 2
783
- # 2 1
784
- # 3 4
785
- # 4 4
786
- # 5 4
787
- # 6 3
788
- # 7 3
789
- # 8 3
790
- #
791
- def rolling_fillna!(direction = :forward)
792
- enum = direction == :forward ? index : index.reverse_each
793
- last_valid_value = 0
794
- enum.each do |idx|
795
- if valid_value?(self[idx])
796
- last_valid_value = self[idx]
797
- else
798
- self[idx] = last_valid_value
799
- end
800
- end
801
- self
802
- end
803
-
804
- # Non-destructive version of rolling_fillna!
805
- def rolling_fillna(direction = :forward)
806
- dup.rolling_fillna!(direction)
807
- end
808
-
809
371
  # Lags the series by `k` periods.
810
372
  #
811
373
  # Lags the series by `k` periods, "shifting" data and inserting `nil`s
@@ -845,187 +407,6 @@ module DaruLite
845
407
  end
846
408
  end
847
409
 
848
- def detach_index
849
- DaruLite::DataFrame.new(
850
- index: @index.to_a,
851
- values: @data.to_a
852
- )
853
- end
854
-
855
- # Non-destructive version of #replace_nils!
856
- def replace_nils(replacement)
857
- dup.replace_nils!(replacement)
858
- end
859
-
860
- # number of non-missing elements
861
- def n_valid
862
- size - indexes(*DaruLite::MISSING_VALUES).size
863
- end
864
- deprecate :n_valid, :count_values, 2016, 10
865
-
866
- # Count the number of values specified
867
- # @param values [Array] values to count for
868
- # @return [Integer] the number of times the values mentioned occurs
869
- # @example
870
- # dv = DaruLite::Vector.new [1, 2, 1, 2, 3, 4, nil, nil]
871
- # dv.count_values nil
872
- # # => 2
873
- def count_values(*values)
874
- positions(*values).size
875
- end
876
-
877
- # Returns *true* if an index exists
878
- def has_index?(index)
879
- @index.include? index
880
- end
881
-
882
- # @param keys [Array] can be positions (if by_position is true) or indexes (if by_position if false)
883
- # @return [DaruLite::Vector]
884
- def get_sub_vector(keys, by_position: true)
885
- return DaruLite::Vector.new([]) if keys == []
886
-
887
- keys = @index.pos(*keys) unless by_position
888
-
889
- sub_vect = at(*keys)
890
- sub_vect = DaruLite::Vector.new([sub_vect]) unless sub_vect.is_a?(DaruLite::Vector)
891
-
892
- sub_vect
893
- end
894
-
895
- # @return [DaruLite::DataFrame] the vector as a single-vector dataframe
896
- def to_df
897
- DaruLite::DataFrame.new({ @name => @data }, name: @name, index: @index)
898
- end
899
-
900
- # Convert Vector to a horizontal or vertical Ruby Matrix.
901
- #
902
- # == Arguments
903
- #
904
- # * +axis+ - Specify whether you want a *:horizontal* or a *:vertical* matrix.
905
- def to_matrix(axis = :horizontal)
906
- case axis
907
- when :horizontal
908
- Matrix[to_a]
909
- when :vertical
910
- Matrix.columns([to_a])
911
- else
912
- raise ArgumentError, "axis should be either :horizontal or :vertical, not #{axis}"
913
- end
914
- end
915
-
916
- # Convert to hash (explicit). Hash keys are indexes and values are the correspoding elements
917
- def to_h
918
- @index.to_h { |index| [index, self[index]] }
919
- end
920
-
921
- # Return an array
922
- def to_a
923
- @data.to_a
924
- end
925
-
926
- # Convert the hash from to_h to json
927
- def to_json(*)
928
- to_h.to_json
929
- end
930
-
931
- # Convert to html for iruby
932
- def to_html(threshold = 30)
933
- table_thead = to_html_thead
934
- table_tbody = to_html_tbody(threshold)
935
- path = if index.is_a?(MultiIndex)
936
- File.expand_path('iruby/templates/vector_mi.html.erb', __dir__)
937
- else
938
- File.expand_path('iruby/templates/vector.html.erb', __dir__)
939
- end
940
- ERB.new(File.read(path).strip).result(binding)
941
- end
942
-
943
- def to_html_thead
944
- table_thead_path =
945
- if index.is_a?(MultiIndex)
946
- File.expand_path('iruby/templates/vector_mi_thead.html.erb', __dir__)
947
- else
948
- File.expand_path('iruby/templates/vector_thead.html.erb', __dir__)
949
- end
950
- ERB.new(File.read(table_thead_path).strip).result(binding)
951
- end
952
-
953
- def to_html_tbody(threshold = 30)
954
- table_tbody_path =
955
- if index.is_a?(MultiIndex)
956
- File.expand_path('iruby/templates/vector_mi_tbody.html.erb', __dir__)
957
- else
958
- File.expand_path('iruby/templates/vector_tbody.html.erb', __dir__)
959
- end
960
- ERB.new(File.read(table_tbody_path).strip).result(binding)
961
- end
962
-
963
- def to_s
964
- "#<#{self.class}#{": #{@name}" if @name}(#{size})#{':category' if category?}>"
965
- end
966
-
967
- # Create a summary of the Vector
968
- # @param indent_level [Fixnum] indent level
969
- # @return [String] String containing the summary of the Vector
970
- # @example
971
- # dv = DaruLite::Vector.new [1, 2, 3]
972
- # puts dv.summary
973
- #
974
- # # =
975
- # # n :3
976
- # # non-missing:3
977
- # # median: 2
978
- # # mean: 2.0000
979
- # # std.dev.: 1.0000
980
- # # std.err.: 0.5774
981
- # # skew: 0.0000
982
- # # kurtosis: -2.3333
983
- def summary(indent_level = 0)
984
- non_missing = size - count_values(*DaruLite::MISSING_VALUES)
985
- summary = (' =' * indent_level) + "= #{name}" \
986
- "\n n :#{size}" \
987
- "\n non-missing:#{non_missing}"
988
- case type
989
- when :object
990
- summary << object_summary
991
- when :numeric
992
- summary << numeric_summary
993
- end
994
- summary.split("\n").join("\n#{' ' * indent_level}")
995
- end
996
-
997
- # Displays summary for an object type Vector
998
- # @return [String] String containing object vector summary
999
- def object_summary
1000
- nval = count_values(*DaruLite::MISSING_VALUES)
1001
- summary = "\n factors: #{factors.to_a.join(',')}" \
1002
- "\n mode: #{mode.to_a.join(',')}" \
1003
- "\n Distribution\n"
1004
-
1005
- data = frequencies.sort.each_with_index.map do |v, k|
1006
- [k, v, format('%0.2f%%', ((nval.zero? ? 1 : v.quo(nval)) * 100))]
1007
- end
1008
-
1009
- summary + Formatters::Table.format(data)
1010
- end
1011
-
1012
- # Displays summary for an numeric type Vector
1013
- # @return [String] String containing numeric vector summary
1014
- def numeric_summary
1015
- summary = "\n median: #{median}" +
1016
- format("\n mean: %0.4f", mean)
1017
- if sd
1018
- summary << (format("\n std.dev.: %0.4f", sd) +
1019
- format("\n std.err.: %0.4f", se))
1020
- end
1021
-
1022
- if count_values(*DaruLite::MISSING_VALUES).zero?
1023
- summary << (format("\n skew: %0.4f", skew) +
1024
- format("\n kurtosis: %0.4f", kurtosis))
1025
- end
1026
- summary
1027
- end
1028
-
1029
410
  # Over rides original inspect for pretty printing in irb
1030
411
  def inspect(spacing = 20, threshold = 15)
1031
412
  row_headers = index.is_a?(MultiIndex) ? index.sparse_tuples : index.to_a
@@ -1040,68 +421,6 @@ module DaruLite
1040
421
  )
1041
422
  end
1042
423
 
1043
- # Sets new index for vector. Preserves index->value correspondence.
1044
- # Sets nil for new index keys absent from original index.
1045
- # @note Unlike #reorder! which takes positions as input it takes
1046
- # index as an input to reorder the vector
1047
- # @param [DaruLite::Index, DaruLite::MultiIndex] new_index new index to order with
1048
- # @return [DaruLite::Vector] vector reindexed with new index
1049
- def reindex!(new_index)
1050
- values = []
1051
- each_with_index do |val, i|
1052
- values[new_index[i]] = val if new_index.include?(i)
1053
- end
1054
- values.fill(nil, values.size, new_index.size - values.size)
1055
-
1056
- @data = cast_vector_to @dtype, values
1057
- @index = new_index
1058
-
1059
- update_position_cache
1060
-
1061
- self
1062
- end
1063
-
1064
- # Reorder the vector with given positions
1065
- # @note Unlike #reindex! which takes index as input, it takes
1066
- # positions as an input to reorder the vector
1067
- # @param [Array] order the order to reorder the vector with
1068
- # @return reordered vector
1069
- # @example
1070
- # dv = DaruLite::Vector.new [3, 2, 1], index: ['c', 'b', 'a']
1071
- # dv.reorder! [2, 1, 0]
1072
- # # => #<DaruLite::Vector(3)>
1073
- # # a 1
1074
- # # b 2
1075
- # # c 3
1076
- def reorder!(order)
1077
- @index = @index.reorder order
1078
- data_array = order.map { |i| @data[i] }
1079
- @data = cast_vector_to @dtype, data_array, @nm_dtype
1080
- update_position_cache
1081
- self
1082
- end
1083
-
1084
- # Non-destructive version of #reorder!
1085
- def reorder(order)
1086
- dup.reorder! order
1087
- end
1088
-
1089
- # Create a new vector with a different index, and preserve the indexing of
1090
- # current elements.
1091
- def reindex(new_index)
1092
- dup.reindex!(new_index)
1093
- end
1094
-
1095
- def index=(idx)
1096
- idx = Index.coerce(idx)
1097
-
1098
- raise ArgumentError, "Size of supplied index #{idx.size} does not match size of Vector" if idx.size != size
1099
- raise ArgumentError, 'Can only assign type Index and its subclasses.' unless idx.is_a?(DaruLite::Index)
1100
-
1101
- @index = idx
1102
- self
1103
- end
1104
-
1105
424
  # Give the vector a new name
1106
425
  #
1107
426
  # @param new_name [Symbol] The new name.
@@ -1112,12 +431,6 @@ module DaruLite
1112
431
 
1113
432
  alias name= rename
1114
433
 
1115
- # Duplicated a vector
1116
- # @return [DaruLite::Vector] duplicated vector
1117
- def dup
1118
- DaruLite::Vector.new @data.dup, name: @name, index: @index.dup
1119
- end
1120
-
1121
434
  # == Bootstrap
1122
435
  # Generate +nr+ resamples (with replacement) of size +s+
1123
436
  # from vector, computing each estimate from +estimators+
@@ -1195,130 +508,6 @@ module DaruLite
1195
508
  DaruLite::DataFrame.new ps
1196
509
  end
1197
510
 
1198
- # Returns an array of either none or integer values, indicating the
1199
- # +regexp+ matching with the given array.
1200
- #
1201
- # @param regexp [Regexp] A regular matching expression. For example, +/weeks/+.
1202
- #
1203
- # @return [Array] Containing either +nil+ or integer values, according to the match with the given +regexp+
1204
- #
1205
- # @example
1206
- # dv = DaruLite::Vector.new(['3 days', '5 weeks', '2 weeks'])
1207
- # dv.match(/weeks/)
1208
- #
1209
- # # => [false, true, true]
1210
- def match(regexp)
1211
- @data.map { |value| !!(value =~ regexp) }
1212
- end
1213
-
1214
- # Creates a new vector consisting only of non-nil data
1215
- #
1216
- # == Arguments
1217
- #
1218
- # @param as_a [Symbol] Passing :array will return only the elements
1219
- # as an Array. Otherwise will return a DaruLite::Vector.
1220
- #
1221
- # @param _duplicate [Symbol] In case no missing data is found in the
1222
- # vector, setting this to false will return the same vector.
1223
- # Otherwise, a duplicate will be returned irrespective of
1224
- # presence of missing data.
1225
-
1226
- def only_valid(as_a = :vector, _duplicate = true)
1227
- # FIXME: Now duplicate is just ignored.
1228
- # There are no spec that fail on this case, so I'll leave it
1229
- # this way for now - zverok, 2016-05-07
1230
-
1231
- new_index = @index.to_a - indexes(*DaruLite::MISSING_VALUES)
1232
- new_vector = new_index.map { |idx| self[idx] }
1233
-
1234
- if as_a == :vector
1235
- DaruLite::Vector.new new_vector, index: new_index, name: @name, dtype: dtype
1236
- else
1237
- new_vector
1238
- end
1239
- end
1240
- deprecate :only_valid, :reject_values, 2016, 10
1241
-
1242
- # Return a vector with specified values removed
1243
- # @param values [Array] values to reject from resultant vector
1244
- # @return [DaruLite::Vector] vector with specified values removed
1245
- # @example
1246
- # dv = DaruLite::Vector.new [1, 2, nil, Float::NAN]
1247
- # dv.reject_values nil, Float::NAN
1248
- # # => #<DaruLite::Vector(2)>
1249
- # # 0 1
1250
- # # 1 2
1251
- def reject_values(*values)
1252
- resultant_pos = size.times.to_a - positions(*values)
1253
- dv = at(*resultant_pos)
1254
- # Handle the case when number of positions is 1
1255
- # and hence #at doesn't return a vector
1256
- if dv.is_a?(DaruLite::Vector)
1257
- dv
1258
- else
1259
- pos = resultant_pos.first
1260
- at(pos..pos)
1261
- end
1262
- end
1263
-
1264
- # Return indexes of values specified
1265
- # @param values [Array] values to find indexes for
1266
- # @return [Array] array of indexes of values specified
1267
- # @example
1268
- # dv = DaruLite::Vector.new [1, 2, nil, Float::NAN], index: 11..14
1269
- # dv.indexes nil, Float::NAN
1270
- # # => [13, 14]
1271
- def indexes(*values)
1272
- index.to_a.values_at(*positions(*values))
1273
- end
1274
-
1275
- # Replaces specified values with a new value
1276
- # @param [Array] old_values array of values to replace
1277
- # @param [object] new_value new value to replace with
1278
- # @note It performs the replace in place.
1279
- # @return [DaruLite::Vector] Same vector itself with values
1280
- # replaced with new value
1281
- # @example
1282
- # dv = DaruLite::Vector.new [1, 2, :a, :b]
1283
- # dv.replace_values [:a, :b], nil
1284
- # dv
1285
- # # =>
1286
- # # #<DaruLite::Vector:19903200 @name = nil @metadata = {} @size = 4 >
1287
- # # nil
1288
- # # 0 1
1289
- # # 1 2
1290
- # # 2 nil
1291
- # # 3 nil
1292
- def replace_values(old_values, new_value)
1293
- old_values = [old_values] unless old_values.is_a? Array
1294
- size.times do |pos|
1295
- set_at([pos], new_value) if include_with_nan? old_values, at(pos)
1296
- end
1297
- self
1298
- end
1299
-
1300
- # Returns a Vector containing only missing data (preserves indexes).
1301
- def only_missing(as_a = :vector)
1302
- case as_a
1303
- when :vector
1304
- self[*indexes(*DaruLite::MISSING_VALUES)]
1305
- when :array
1306
- self[*indexes(*DaruLite::MISSING_VALUES)].to_a
1307
- end
1308
- end
1309
- deprecate :only_missing, nil, 2016, 10
1310
-
1311
- # Returns a Vector with only numerical data. Missing data is included
1312
- # but non-Numeric objects are excluded. Preserves index.
1313
- def only_numerics
1314
- numeric_indexes =
1315
- each_with_index
1316
- .select { |v, _i| v.is_a?(Numeric) || v.nil? }
1317
- .map(&:last)
1318
-
1319
- self[*numeric_indexes]
1320
- end
1321
-
1322
511
  DATE_REGEXP = /^(\d{2}-\d{2}-\d{4}|\d{4}-\d{2}-\d{2})$/.freeze
1323
512
 
1324
513
  # Returns the database type for the vector, according to its content
@@ -1335,12 +524,6 @@ module DaruLite
1335
524
  end
1336
525
  end
1337
526
 
1338
- # Copies the structure of the vector (i.e the index, size, etc.) and fills all
1339
- # all values with nils.
1340
- def clone_structure
1341
- DaruLite::Vector.new(([nil] * size), name: @name, index: @index.dup)
1342
- end
1343
-
1344
527
  # Save the vector to a file
1345
528
  #
1346
529
  # == Arguments
@@ -1396,61 +579,6 @@ module DaruLite
1396
579
  name.to_s.end_with?('=') || has_index?(name) || super
1397
580
  end
1398
581
 
1399
- # Partition a numeric variable into categories.
1400
- # @param [Array<Numeric>] partitions an array whose consecutive elements
1401
- # provide intervals for categories
1402
- # @param [Hash] opts options to cut the partition
1403
- # @option opts [:left, :right] :close_at specifies whether the interval closes at
1404
- # the right side of left side
1405
- # @option opts [Array] :labels names of the categories
1406
- # @return [DaruLite::Vector] numeric variable converted to categorical variable
1407
- # @example
1408
- # heights = DaruLite::Vector.new [30, 35, 32, 50, 42, 51]
1409
- # height_cat = heights.cut [30, 40, 50, 60], labels=['low', 'medium', 'high']
1410
- # # => #<DaruLite::Vector(6)>
1411
- # # 0 low
1412
- # # 1 low
1413
- # # 2 low
1414
- # # 3 high
1415
- # # 4 medium
1416
- # # 5 high
1417
- def cut(partitions, opts = {})
1418
- close_at = opts[:close_at] || :right
1419
- labels = opts[:labels]
1420
- partitions = partitions.to_a
1421
- values = to_a.map { |val| cut_find_category partitions, val, close_at }
1422
- cats = cut_categories(partitions, close_at)
1423
-
1424
- dv = DaruLite::Vector.new values,
1425
- index: @index,
1426
- type: :category,
1427
- categories: cats
1428
-
1429
- # Rename categories if new labels provided
1430
- if labels
1431
- dv.rename_categories cats.zip(labels).to_h
1432
- else
1433
- dv
1434
- end
1435
- end
1436
-
1437
- def positions(*values)
1438
- case values
1439
- when [nil]
1440
- nil_positions
1441
- when [Float::NAN]
1442
- nan_positions
1443
- when [nil, Float::NAN], [Float::NAN, nil]
1444
- nil_positions + nan_positions
1445
- else
1446
- size.times.select { |i| include_with_nan? values, @data[i] }
1447
- end
1448
- end
1449
-
1450
- def group_by(*args)
1451
- to_df.group_by(*args)
1452
- end
1453
-
1454
582
  private
1455
583
 
1456
584
  def copy(values)
@@ -1471,11 +599,6 @@ module DaruLite
1471
599
  end
1472
600
  end
1473
601
 
1474
- # Helper method returning validity of arbitrary value
1475
- def valid_value?(v)
1476
- !((v.respond_to?(:nan?) && v.nan?) || v.nil?)
1477
- end
1478
-
1479
602
  def initialize_vector(source, opts)
1480
603
  index, source = parse_source(source, opts)
1481
604
  set_name opts[:name]
@@ -1506,18 +629,8 @@ module DaruLite
1506
629
  end
1507
630
 
1508
631
  def guard_type_check(value)
1509
- @possibly_changed_type = true \
1510
- if (object? && (value.nil? || value.is_a?(Numeric))) ||
1511
- (numeric? && !value.is_a?(Numeric) && !value.nil?)
1512
- end
1513
-
1514
- def split_value(key, v)
1515
- if v.nil?
1516
- nil
1517
- elsif v.include?(key)
1518
- 1
1519
- else
1520
- 0
632
+ if (object? && (value.nil? || value.is_a?(Numeric))) || (numeric? && !value.is_a?(Numeric) && !value.nil?)
633
+ @possibly_changed_type = true
1521
634
  end
1522
635
  end
1523
636
 
@@ -1665,14 +778,5 @@ module DaruLite
1665
778
  @nil_positions = nil
1666
779
  @nan_positions = nil
1667
780
  end
1668
-
1669
- def resort_index(vector_index, opts)
1670
- if block_given?
1671
- vector_index.sort { |(lv, _li), (rv, _ri)| yield(lv, rv) }
1672
- else
1673
- vector_index.sort(&DEFAULT_SORTER)
1674
- end
1675
- .tap { |res| res.reverse! unless opts[:ascending] }
1676
- end
1677
781
  end
1678
782
  end