daru_lite 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +35 -33
  3. data/lib/daru_lite/data_frame/aggregatable.rb +165 -0
  4. data/lib/daru_lite/data_frame/calculatable.rb +140 -0
  5. data/lib/daru_lite/data_frame/convertible.rb +107 -0
  6. data/lib/daru_lite/data_frame/duplicatable.rb +64 -0
  7. data/lib/daru_lite/data_frame/fetchable.rb +301 -0
  8. data/lib/daru_lite/data_frame/filterable.rb +144 -0
  9. data/lib/daru_lite/data_frame/i_o_able.rb +179 -0
  10. data/lib/daru_lite/data_frame/indexable.rb +168 -0
  11. data/lib/daru_lite/data_frame/iterable.rb +339 -0
  12. data/lib/daru_lite/data_frame/joinable.rb +152 -0
  13. data/lib/daru_lite/data_frame/missable.rb +75 -0
  14. data/lib/daru_lite/data_frame/pivotable.rb +108 -0
  15. data/lib/daru_lite/data_frame/queryable.rb +67 -0
  16. data/lib/daru_lite/data_frame/setable.rb +109 -0
  17. data/lib/daru_lite/data_frame/sortable.rb +241 -0
  18. data/lib/daru_lite/dataframe.rb +138 -2353
  19. data/lib/daru_lite/index/index.rb +13 -0
  20. data/lib/daru_lite/maths/statistics/vector.rb +1 -1
  21. data/lib/daru_lite/vector/aggregatable.rb +9 -0
  22. data/lib/daru_lite/vector/calculatable.rb +78 -0
  23. data/lib/daru_lite/vector/convertible.rb +77 -0
  24. data/lib/daru_lite/vector/duplicatable.rb +17 -0
  25. data/lib/daru_lite/vector/fetchable.rb +175 -0
  26. data/lib/daru_lite/vector/filterable.rb +128 -0
  27. data/lib/daru_lite/vector/indexable.rb +77 -0
  28. data/lib/daru_lite/vector/iterable.rb +95 -0
  29. data/lib/daru_lite/vector/joinable.rb +17 -0
  30. data/lib/daru_lite/vector/missable.rb +124 -0
  31. data/lib/daru_lite/vector/queryable.rb +45 -0
  32. data/lib/daru_lite/vector/setable.rb +47 -0
  33. data/lib/daru_lite/vector/sortable.rb +113 -0
  34. data/lib/daru_lite/vector.rb +36 -932
  35. data/lib/daru_lite/version.rb +1 -1
  36. data/spec/data_frame/aggregatable_example.rb +65 -0
  37. data/spec/data_frame/buildable_example.rb +109 -0
  38. data/spec/data_frame/calculatable_example.rb +135 -0
  39. data/spec/data_frame/convertible_example.rb +180 -0
  40. data/spec/data_frame/duplicatable_example.rb +111 -0
  41. data/spec/data_frame/fetchable_example.rb +476 -0
  42. data/spec/data_frame/filterable_example.rb +250 -0
  43. data/spec/data_frame/indexable_example.rb +221 -0
  44. data/spec/data_frame/iterable_example.rb +465 -0
  45. data/spec/data_frame/joinable_example.rb +106 -0
  46. data/spec/data_frame/missable_example.rb +47 -0
  47. data/spec/data_frame/pivotable_example.rb +297 -0
  48. data/spec/data_frame/queryable_example.rb +92 -0
  49. data/spec/data_frame/setable_example.rb +482 -0
  50. data/spec/data_frame/sortable_example.rb +350 -0
  51. data/spec/dataframe_spec.rb +181 -3289
  52. data/spec/index/index_spec.rb +8 -0
  53. data/spec/vector/aggregatable_example.rb +27 -0
  54. data/spec/vector/calculatable_example.rb +82 -0
  55. data/spec/vector/convertible_example.rb +126 -0
  56. data/spec/vector/duplicatable_example.rb +48 -0
  57. data/spec/vector/fetchable_example.rb +463 -0
  58. data/spec/vector/filterable_example.rb +165 -0
  59. data/spec/vector/indexable_example.rb +201 -0
  60. data/spec/vector/iterable_example.rb +111 -0
  61. data/spec/vector/joinable_example.rb +25 -0
  62. data/spec/vector/missable_example.rb +88 -0
  63. data/spec/vector/queryable_example.rb +91 -0
  64. data/spec/vector/setable_example.rb +300 -0
  65. data/spec/vector/sortable_example.rb +242 -0
  66. data/spec/vector_spec.rb +111 -1805
  67. metadata +86 -2
@@ -2,12 +2,38 @@ require 'daru_lite/maths/arithmetic/vector'
2
2
  require 'daru_lite/maths/statistics/vector'
3
3
  require 'daru_lite/accessors/array_wrapper'
4
4
  require 'daru_lite/category'
5
+ require 'daru_lite/vector/aggregatable'
6
+ require 'daru_lite/vector/calculatable'
7
+ require 'daru_lite/vector/convertible'
8
+ require 'daru_lite/vector/duplicatable'
9
+ require 'daru_lite/vector/fetchable'
10
+ require 'daru_lite/vector/filterable'
11
+ require 'daru_lite/vector/indexable'
12
+ require 'daru_lite/vector/iterable'
13
+ require 'daru_lite/vector/joinable'
14
+ require 'daru_lite/vector/missable'
15
+ require 'daru_lite/vector/setable'
16
+ require 'daru_lite/vector/sortable'
17
+ require 'daru_lite/vector/queryable'
5
18
 
6
19
  module DaruLite
7
20
  class Vector # rubocop:disable Metrics/ClassLength
8
21
  include Enumerable
9
22
  include DaruLite::Maths::Arithmetic::Vector
10
23
  include DaruLite::Maths::Statistics::Vector
24
+ include DaruLite::Vector::Aggregatable
25
+ include DaruLite::Vector::Calculatable
26
+ include DaruLite::Vector::Convertible
27
+ include DaruLite::Vector::Duplicatable
28
+ include DaruLite::Vector::Fetchable
29
+ include DaruLite::Vector::Filterable
30
+ include DaruLite::Vector::Indexable
31
+ include DaruLite::Vector::Iterable
32
+ include DaruLite::Vector::Joinable
33
+ include DaruLite::Vector::Missable
34
+ include DaruLite::Vector::Setable
35
+ include DaruLite::Vector::Sortable
36
+ include DaruLite::Vector::Queryable
11
37
  extend Gem::Deprecate
12
38
 
13
39
  class << self
@@ -90,46 +116,6 @@ module DaruLite
90
116
  @data.size
91
117
  end
92
118
 
93
- def each(&block)
94
- return to_enum(:each) unless block
95
-
96
- @data.each(&block)
97
- self
98
- end
99
-
100
- def each_index(&block)
101
- return to_enum(:each_index) unless block
102
-
103
- @index.each(&block)
104
- self
105
- end
106
-
107
- def each_with_index(&block)
108
- return to_enum(:each_with_index) unless block
109
-
110
- @data.to_a.zip(@index.to_a).each(&block)
111
-
112
- self
113
- end
114
-
115
- def map!(&block)
116
- return to_enum(:map!) unless block
117
-
118
- @data.map!(&block)
119
- self
120
- end
121
-
122
- def apply_method(method, keys: nil, by_position: true)
123
- vect = keys ? get_sub_vector(keys, by_position: by_position) : self
124
-
125
- case method
126
- when Symbol then vect.send(method)
127
- when Proc then method.call(vect)
128
- else raise
129
- end
130
- end
131
- alias apply_method_on_sub_vector apply_method
132
-
133
119
  # The name of the DaruLite::Vector. String.
134
120
  attr_reader :name
135
121
  # The row index. Can be either DaruLite::Index or DaruLite::MultiIndex.
@@ -185,98 +171,6 @@ module DaruLite
185
171
  end
186
172
  end
187
173
 
188
- # Get one or more elements with specified index or a range.
189
- #
190
- # == Usage
191
- # # For vectors employing single layer Index
192
- #
193
- # v[:one, :two] # => DaruLite::Vector with indexes :one and :two
194
- # v[:one] # => Single element
195
- # v[:one..:three] # => DaruLite::Vector with indexes :one, :two and :three
196
- #
197
- # # For vectors employing hierarchial multi index
198
- #
199
- def [](*input_indexes)
200
- # Get array of positions indexes
201
- positions = @index.pos(*input_indexes)
202
-
203
- # If one object is asked return it
204
- return @data[positions] if positions.is_a? Numeric
205
-
206
- # Form a new Vector using positional indexes
207
- DaruLite::Vector.new(
208
- positions.map { |loc| @data[loc] },
209
- name: @name,
210
- index: @index.subset(*input_indexes), dtype: @dtype
211
- )
212
- end
213
-
214
- # Returns vector of values given positional values
215
- # @param positions [Array<object>] positional values
216
- # @return [object] vector
217
- # @example
218
- # dv = DaruLite::Vector.new 'a'..'e'
219
- # dv.at 0, 1, 2
220
- # # => #<DaruLite::Vector(3)>
221
- # # 0 a
222
- # # 1 b
223
- # # 2 c
224
- def at(*positions)
225
- # to be used to form index
226
- original_positions = positions
227
- positions = coerce_positions(*positions)
228
- validate_positions(*positions)
229
-
230
- if positions.is_a? Integer
231
- @data[positions]
232
- else
233
- values = positions.map { |pos| @data[pos] }
234
- DaruLite::Vector.new values, index: @index.at(*original_positions), dtype: dtype
235
- end
236
- end
237
-
238
- # Change value at given positions
239
- # @param positions [Array<object>] positional values
240
- # @param [object] val value to assign
241
- # @example
242
- # dv = DaruLite::Vector.new 'a'..'e'
243
- # dv.set_at [0, 1], 'x'
244
- # dv
245
- # # => #<DaruLite::Vector(5)>
246
- # # 0 x
247
- # # 1 x
248
- # # 2 c
249
- # # 3 d
250
- # # 4 e
251
- def set_at(positions, val)
252
- validate_positions(*positions)
253
- positions.map { |pos| @data[pos] = val }
254
- update_position_cache
255
- end
256
-
257
- # Just like in Hashes, you can specify the index label of the DaruLite::Vector
258
- # and assign an element an that place in the DaruLite::Vector.
259
- #
260
- # == Usage
261
- #
262
- # v = DaruLite::Vector.new([1,2,3], index: [:a, :b, :c])
263
- # v[:a] = 999
264
- # #=>
265
- # ##<DaruLite::Vector:90257920 @name = nil @size = 3 >
266
- # # nil
267
- # # a 999
268
- # # b 2
269
- # # c 3
270
- def []=(*indexes, val)
271
- cast(dtype: :array) if val.nil? && dtype != :array
272
-
273
- guard_type_check(val)
274
-
275
- modify_vector(indexes, val)
276
-
277
- update_position_cache
278
- end
279
-
280
174
  # Two vectors are equal if they have the exact same index values corresponding
281
175
  # with the exact same elements. Name is ignored.
282
176
  def ==(other)
@@ -367,85 +261,6 @@ module DaruLite
367
261
  )
368
262
  end
369
263
 
370
- # Return a new vector based on the contents of a boolean array. Use with the
371
- # comparator methods to obtain meaningful results. See this notebook for
372
- # a good overview of using #where.
373
- #
374
- # @param bool_array [DaruLite::Core::Query::BoolArray, Array<TrueClass, FalseClass>] The
375
- # collection containing the true of false values. Each element in the Vector
376
- # corresponding to a `true` in the bool_arry will be returned alongwith it's
377
- # index.
378
- # @example Usage of #where.
379
- # vector = DaruLite::Vector.new([2,4,5,51,5,16,2,5,3,2,1,5,2,5,2,1,56,234,6,21])
380
- #
381
- # # Simple logic statement passed to #where.
382
- # vector.where(vector.eq(5).or(vector.eq(1)))
383
- # # =>
384
- # ##<DaruLite::Vector:77626210 @name = nil @size = 7 >
385
- # # nil
386
- # # 2 5
387
- # # 4 5
388
- # # 7 5
389
- # # 10 1
390
- # # 11 5
391
- # # 13 5
392
- # # 15 1
393
- #
394
- # # A somewhat more complex logic statement
395
- # vector.where((vector.eq(5) | vector.lteq(1)) & vector.in([4,5,1]))
396
- # #=>
397
- # ##<DaruLite::Vector:81072310 @name = nil @size = 7 >
398
- # # nil
399
- # # 2 5
400
- # # 4 5
401
- # # 7 5
402
- # # 10 1
403
- # # 11 5
404
- # # 13 5
405
- # # 15 1
406
- def where(bool_array)
407
- DaruLite::Core::Query.vector_where self, bool_array
408
- end
409
-
410
- # Return a new vector based on the contents of a boolean array and &block.
411
- #
412
- # @param bool_array [DaruLite::Core::Query::BoolArray, Array<TrueClass, FalseClass>, &block] The
413
- # collection containing the true of false values. Each element in the Vector
414
- # corresponding to a `true` in the bool_array will be returned along with it's
415
- # index. The &block may contain manipulative functions for the Vector elements.
416
- #
417
- # @return [DaruLite::Vector]
418
- #
419
- # @example Usage of #apply_where.
420
- # dv = DaruLite::Vector.new ['3 days', '5 weeks', '2 weeks']
421
- # dv = dv.apply_where(dv.match /weeks/) { |x| "#{x.split.first.to_i * 7} days" }
422
- # # =>
423
- # ##<DaruLite::Vector(3)>
424
- # # 0 3 days
425
- # # 1 35 days
426
- # # 2 14 days
427
- def apply_where(bool_array, &block)
428
- DaruLite::Core::Query.vector_apply_where self, bool_array, &block
429
- end
430
-
431
- def head(q = 10)
432
- self[0..(q - 1)]
433
- end
434
-
435
- def tail(q = 10)
436
- start = [size - q, 0].max
437
- self[start..(size - 1)]
438
- end
439
-
440
- def last(q = 1)
441
- # The Enumerable mixin dose not provide the last method.
442
- tail(q)
443
- end
444
-
445
- def empty?
446
- @index.empty?
447
- end
448
-
449
264
  def numeric?
450
265
  type == :numeric
451
266
  end
@@ -454,26 +269,6 @@ module DaruLite
454
269
  type == :object
455
270
  end
456
271
 
457
- # Reports whether missing data is present in the Vector.
458
- def has_missing_data?
459
- !indexes(*DaruLite::MISSING_VALUES).empty?
460
- end
461
- alias flawed? has_missing_data?
462
- deprecate :has_missing_data?, :include_values?, 2016, 10
463
- deprecate :flawed?, :include_values?, 2016, 10
464
-
465
- # Check if any one of mentioned values occur in the vector
466
- # @param values [Array] values to check for
467
- # @return [true, false] returns true if any one of specified values
468
- # occur in the vector
469
- # @example
470
- # dv = DaruLite::Vector.new [1, 2, 3, 4, nil]
471
- # dv.include_values? nil, Float::NAN
472
- # # => true
473
- def include_values?(*values)
474
- values.any? { |v| include_with_nan? @data, v }
475
- end
476
-
477
272
  # @note Do not use it to check for Float::NAN as
478
273
  # Float::NAN == Float::NAN is false
479
274
  # Return vector of booleans with value at ith position is either
@@ -494,18 +289,6 @@ module DaruLite
494
289
  DaruLite::Vector.new values.map { |v| eq(v) }.inject(:|)
495
290
  end
496
291
 
497
- # Append an element to the vector by specifying the element and index
498
- def concat(element, index)
499
- raise IndexError, 'Expected new unique index' if @index.include? index
500
-
501
- @index |= [index]
502
- @data[@index[index]] = element
503
-
504
- update_position_cache
505
- end
506
- alias push concat
507
- alias << concat
508
-
509
292
  # Cast a vector to a new data type.
510
293
  #
511
294
  # == Options
@@ -531,6 +314,14 @@ module DaruLite
531
314
  update_position_cache
532
315
  end
533
316
 
317
+ # Delete element by position
318
+ def delete_at_position(position)
319
+ @data.delete_at(position)
320
+ @index = @index.delete_at(position)
321
+
322
+ update_position_cache
323
+ end
324
+
534
325
  # The type of data contained in the vector. Can be :object.
535
326
  #
536
327
  # Running through the data to figure out the kind of data is delayed to the
@@ -560,148 +351,6 @@ module DaruLite
560
351
  type == :category
561
352
  end
562
353
 
563
- # Get index of element
564
- def index_of(element)
565
- case dtype
566
- when :array then @index.key(@data.index { |x| x.eql? element })
567
- else @index.key @data.index(element)
568
- end
569
- end
570
-
571
- # Keep only unique elements of the vector alongwith their indexes.
572
- def uniq
573
- uniq_vector = @data.uniq
574
- new_index = uniq_vector.map { |element| index_of(element) }
575
-
576
- DaruLite::Vector.new uniq_vector, name: @name, index: new_index, dtype: @dtype
577
- end
578
-
579
- def any?(&block)
580
- @data.data.any?(&block)
581
- end
582
-
583
- def all?(&block)
584
- @data.data.all?(&block)
585
- end
586
-
587
- # Sorts a vector according to its values. If a block is specified, the contents
588
- # will be evaluated and data will be swapped whenever the block evaluates
589
- # to *true*. Defaults to ascending order sorting. Any missing values will be
590
- # put at the end of the vector. Preserves indexing. Default sort algorithm is
591
- # quick sort.
592
- #
593
- # == Options
594
- #
595
- # * +:ascending+ - if false, will sort in descending order. Defaults to true.
596
- #
597
- # * +:type+ - Specify the sorting algorithm. Only supports quick_sort for now.
598
- # == Usage
599
- #
600
- # v = DaruLite::Vector.new ["My first guitar", "jazz", "guitar"]
601
- # # Say you want to sort these strings by length.
602
- # v.sort(ascending: false) { |a,b| a.length <=> b.length }
603
- def sort(opts = {}, &block)
604
- opts = { ascending: true }.merge(opts)
605
-
606
- vector_index = resort_index(@data.each_with_index, opts, &block)
607
- vector, index = vector_index.transpose
608
-
609
- index = @index.reorder index
610
-
611
- DaruLite::Vector.new(vector, index: index, name: @name, dtype: @dtype)
612
- end
613
-
614
- # Sorts the vector according to it's`Index` values. Defaults to ascending
615
- # order sorting.
616
- #
617
- # @param [Hash] opts the options for sort_by_index method.
618
- # @option opts [Boolean] :ascending false, will sort `index` in
619
- # descending order.
620
- #
621
- # @return [Vector] new sorted `Vector` according to the index values.
622
- #
623
- # @example
624
- #
625
- # dv = DaruLite::Vector.new [11, 13, 12], index: [23, 21, 22]
626
- # # Say you want to sort index in ascending order
627
- # dv.sort_by_index(ascending: true)
628
- # #=> DaruLite::Vector.new [13, 12, 11], index: [21, 22, 23]
629
- # # Say you want to sort index in descending order
630
- # dv.sort_by_index(ascending: false)
631
- # #=> DaruLite::Vector.new [11, 12, 13], index: [23, 22, 21]
632
- def sort_by_index(opts = {})
633
- opts = { ascending: true }.merge(opts)
634
- _, new_order = resort_index(@index.each_with_index, opts).transpose
635
-
636
- reorder new_order
637
- end
638
-
639
- DEFAULT_SORTER = lambda { |(lv, li), (rv, ri)|
640
- if lv.nil? && rv.nil?
641
- li <=> ri
642
- elsif lv.nil?
643
- -1
644
- elsif rv.nil?
645
- 1
646
- else
647
- lv <=> rv
648
- end
649
- }
650
-
651
- # Just sort the data and get an Array in return using Enumerable#sort.
652
- # Non-destructive.
653
- # :nocov:
654
- def sorted_data(&block)
655
- @data.to_a.sort(&block)
656
- end
657
- # :nocov:
658
-
659
- # Like map, but returns a DaruLite::Vector with the returned values.
660
- def recode(dt = nil, &block)
661
- return to_enum(:recode, dt) unless block
662
-
663
- dup.recode! dt, &block
664
- end
665
-
666
- # Destructive version of recode!
667
- def recode!(dt = nil, &block)
668
- return to_enum(:recode!, dt) unless block
669
-
670
- @data.map!(&block).data
671
- @data = cast_vector_to(dt || @dtype)
672
- self
673
- end
674
-
675
- # Delete an element if block returns true. Destructive.
676
- def delete_if
677
- return to_enum(:delete_if) unless block_given?
678
-
679
- keep_e, keep_i = each_with_index.reject { |n, _i| yield(n) }.transpose
680
-
681
- @data = cast_vector_to @dtype, keep_e
682
- @index = DaruLite::Index.new(keep_i)
683
-
684
- update_position_cache
685
-
686
- self
687
- end
688
-
689
- # Keep an element if block returns true. Destructive.
690
- def keep_if
691
- return to_enum(:keep_if) unless block_given?
692
-
693
- delete_if { |val| !yield(val) }
694
- end
695
-
696
- # Reports all values that doesn't comply with a condition.
697
- # Returns a hash with the index of data and the invalid data.
698
- def verify
699
- (0...size)
700
- .map { |i| [i, @data[i]] }
701
- .reject { |_i, val| yield(val) }
702
- .to_h
703
- end
704
-
705
354
  # Return an Array with the data splitted by a separator.
706
355
  # a=DaruLite::Vector.new(["a,b","c,d","a,b","d"])
707
356
  # a.splitted
@@ -719,93 +368,6 @@ module DaruLite
719
368
  end
720
369
  end
721
370
 
722
- # Returns a hash of Vectors, defined by the different values
723
- # defined on the fields
724
- # Example:
725
- #
726
- # a=DaruLite::Vector.new(["a,b","c,d","a,b"])
727
- # a.split_by_separator
728
- # => {"a"=>#<DaruLite::Vector:0x7f2dbcc09d88
729
- # @data=[1, 0, 1]>,
730
- # "b"=>#<DaruLite::Vector:0x7f2dbcc09c48
731
- # @data=[1, 1, 0]>,
732
- # "c"=>#<DaruLite::Vector:0x7f2dbcc09b08
733
- # @data=[0, 1, 1]>}
734
- #
735
- def split_by_separator(sep = ',')
736
- split_data = splitted sep
737
- split_data
738
- .flatten.uniq.compact.to_h do |key|
739
- [
740
- key,
741
- DaruLite::Vector.new(split_data.map { |v| split_value(key, v) })
742
- ]
743
- end
744
- end
745
-
746
- def split_by_separator_freq(sep = ',')
747
- split_by_separator(sep).transform_values do |v|
748
- v.sum(&:to_i)
749
- end
750
- end
751
-
752
- def reset_index!
753
- @index = DaruLite::Index.new(Array.new(size) { |i| i })
754
- self
755
- end
756
-
757
- # Replace all nils in the vector with the value passed as an argument. Destructive.
758
- # See #replace_nils for non-destructive version
759
- #
760
- # == Arguments
761
- #
762
- # * +replacement+ - The value which should replace all nils
763
- def replace_nils!(replacement)
764
- indexes(*DaruLite::MISSING_VALUES).each do |idx|
765
- self[idx] = replacement
766
- end
767
-
768
- self
769
- end
770
-
771
- # Rolling fillna
772
- # replace all Float::NAN and NIL values with the preceeding or following value
773
- #
774
- # @param direction [Symbol] (:forward, :backward) whether replacement value is preceeding or following
775
- #
776
- # @example
777
- # dv = DaruLite::Vector.new([1, 2, 1, 4, nil, Float::NAN, 3, nil, Float::NAN])
778
- #
779
- # 2.3.3 :068 > dv.rolling_fillna(:forward)
780
- # => #<DaruLite::Vector(9)>
781
- # 0 1
782
- # 1 2
783
- # 2 1
784
- # 3 4
785
- # 4 4
786
- # 5 4
787
- # 6 3
788
- # 7 3
789
- # 8 3
790
- #
791
- def rolling_fillna!(direction = :forward)
792
- enum = direction == :forward ? index : index.reverse_each
793
- last_valid_value = 0
794
- enum.each do |idx|
795
- if valid_value?(self[idx])
796
- last_valid_value = self[idx]
797
- else
798
- self[idx] = last_valid_value
799
- end
800
- end
801
- self
802
- end
803
-
804
- # Non-destructive version of rolling_fillna!
805
- def rolling_fillna(direction = :forward)
806
- dup.rolling_fillna!(direction)
807
- end
808
-
809
371
  # Lags the series by `k` periods.
810
372
  #
811
373
  # Lags the series by `k` periods, "shifting" data and inserting `nil`s
@@ -845,187 +407,6 @@ module DaruLite
845
407
  end
846
408
  end
847
409
 
848
- def detach_index
849
- DaruLite::DataFrame.new(
850
- index: @index.to_a,
851
- values: @data.to_a
852
- )
853
- end
854
-
855
- # Non-destructive version of #replace_nils!
856
- def replace_nils(replacement)
857
- dup.replace_nils!(replacement)
858
- end
859
-
860
- # number of non-missing elements
861
- def n_valid
862
- size - indexes(*DaruLite::MISSING_VALUES).size
863
- end
864
- deprecate :n_valid, :count_values, 2016, 10
865
-
866
- # Count the number of values specified
867
- # @param values [Array] values to count for
868
- # @return [Integer] the number of times the values mentioned occurs
869
- # @example
870
- # dv = DaruLite::Vector.new [1, 2, 1, 2, 3, 4, nil, nil]
871
- # dv.count_values nil
872
- # # => 2
873
- def count_values(*values)
874
- positions(*values).size
875
- end
876
-
877
- # Returns *true* if an index exists
878
- def has_index?(index)
879
- @index.include? index
880
- end
881
-
882
- # @param keys [Array] can be positions (if by_position is true) or indexes (if by_position if false)
883
- # @return [DaruLite::Vector]
884
- def get_sub_vector(keys, by_position: true)
885
- return DaruLite::Vector.new([]) if keys == []
886
-
887
- keys = @index.pos(*keys) unless by_position
888
-
889
- sub_vect = at(*keys)
890
- sub_vect = DaruLite::Vector.new([sub_vect]) unless sub_vect.is_a?(DaruLite::Vector)
891
-
892
- sub_vect
893
- end
894
-
895
- # @return [DaruLite::DataFrame] the vector as a single-vector dataframe
896
- def to_df
897
- DaruLite::DataFrame.new({ @name => @data }, name: @name, index: @index)
898
- end
899
-
900
- # Convert Vector to a horizontal or vertical Ruby Matrix.
901
- #
902
- # == Arguments
903
- #
904
- # * +axis+ - Specify whether you want a *:horizontal* or a *:vertical* matrix.
905
- def to_matrix(axis = :horizontal)
906
- case axis
907
- when :horizontal
908
- Matrix[to_a]
909
- when :vertical
910
- Matrix.columns([to_a])
911
- else
912
- raise ArgumentError, "axis should be either :horizontal or :vertical, not #{axis}"
913
- end
914
- end
915
-
916
- # Convert to hash (explicit). Hash keys are indexes and values are the correspoding elements
917
- def to_h
918
- @index.to_h { |index| [index, self[index]] }
919
- end
920
-
921
- # Return an array
922
- def to_a
923
- @data.to_a
924
- end
925
-
926
- # Convert the hash from to_h to json
927
- def to_json(*)
928
- to_h.to_json
929
- end
930
-
931
- # Convert to html for iruby
932
- def to_html(threshold = 30)
933
- table_thead = to_html_thead
934
- table_tbody = to_html_tbody(threshold)
935
- path = if index.is_a?(MultiIndex)
936
- File.expand_path('iruby/templates/vector_mi.html.erb', __dir__)
937
- else
938
- File.expand_path('iruby/templates/vector.html.erb', __dir__)
939
- end
940
- ERB.new(File.read(path).strip).result(binding)
941
- end
942
-
943
- def to_html_thead
944
- table_thead_path =
945
- if index.is_a?(MultiIndex)
946
- File.expand_path('iruby/templates/vector_mi_thead.html.erb', __dir__)
947
- else
948
- File.expand_path('iruby/templates/vector_thead.html.erb', __dir__)
949
- end
950
- ERB.new(File.read(table_thead_path).strip).result(binding)
951
- end
952
-
953
- def to_html_tbody(threshold = 30)
954
- table_tbody_path =
955
- if index.is_a?(MultiIndex)
956
- File.expand_path('iruby/templates/vector_mi_tbody.html.erb', __dir__)
957
- else
958
- File.expand_path('iruby/templates/vector_tbody.html.erb', __dir__)
959
- end
960
- ERB.new(File.read(table_tbody_path).strip).result(binding)
961
- end
962
-
963
- def to_s
964
- "#<#{self.class}#{": #{@name}" if @name}(#{size})#{':category' if category?}>"
965
- end
966
-
967
- # Create a summary of the Vector
968
- # @param indent_level [Fixnum] indent level
969
- # @return [String] String containing the summary of the Vector
970
- # @example
971
- # dv = DaruLite::Vector.new [1, 2, 3]
972
- # puts dv.summary
973
- #
974
- # # =
975
- # # n :3
976
- # # non-missing:3
977
- # # median: 2
978
- # # mean: 2.0000
979
- # # std.dev.: 1.0000
980
- # # std.err.: 0.5774
981
- # # skew: 0.0000
982
- # # kurtosis: -2.3333
983
- def summary(indent_level = 0)
984
- non_missing = size - count_values(*DaruLite::MISSING_VALUES)
985
- summary = (' =' * indent_level) + "= #{name}" \
986
- "\n n :#{size}" \
987
- "\n non-missing:#{non_missing}"
988
- case type
989
- when :object
990
- summary << object_summary
991
- when :numeric
992
- summary << numeric_summary
993
- end
994
- summary.split("\n").join("\n#{' ' * indent_level}")
995
- end
996
-
997
- # Displays summary for an object type Vector
998
- # @return [String] String containing object vector summary
999
- def object_summary
1000
- nval = count_values(*DaruLite::MISSING_VALUES)
1001
- summary = "\n factors: #{factors.to_a.join(',')}" \
1002
- "\n mode: #{mode.to_a.join(',')}" \
1003
- "\n Distribution\n"
1004
-
1005
- data = frequencies.sort.each_with_index.map do |v, k|
1006
- [k, v, format('%0.2f%%', ((nval.zero? ? 1 : v.quo(nval)) * 100))]
1007
- end
1008
-
1009
- summary + Formatters::Table.format(data)
1010
- end
1011
-
1012
- # Displays summary for an numeric type Vector
1013
- # @return [String] String containing numeric vector summary
1014
- def numeric_summary
1015
- summary = "\n median: #{median}" +
1016
- format("\n mean: %0.4f", mean)
1017
- if sd
1018
- summary << (format("\n std.dev.: %0.4f", sd) +
1019
- format("\n std.err.: %0.4f", se))
1020
- end
1021
-
1022
- if count_values(*DaruLite::MISSING_VALUES).zero?
1023
- summary << (format("\n skew: %0.4f", skew) +
1024
- format("\n kurtosis: %0.4f", kurtosis))
1025
- end
1026
- summary
1027
- end
1028
-
1029
410
  # Over rides original inspect for pretty printing in irb
1030
411
  def inspect(spacing = 20, threshold = 15)
1031
412
  row_headers = index.is_a?(MultiIndex) ? index.sparse_tuples : index.to_a
@@ -1040,68 +421,6 @@ module DaruLite
1040
421
  )
1041
422
  end
1042
423
 
1043
- # Sets new index for vector. Preserves index->value correspondence.
1044
- # Sets nil for new index keys absent from original index.
1045
- # @note Unlike #reorder! which takes positions as input it takes
1046
- # index as an input to reorder the vector
1047
- # @param [DaruLite::Index, DaruLite::MultiIndex] new_index new index to order with
1048
- # @return [DaruLite::Vector] vector reindexed with new index
1049
- def reindex!(new_index)
1050
- values = []
1051
- each_with_index do |val, i|
1052
- values[new_index[i]] = val if new_index.include?(i)
1053
- end
1054
- values.fill(nil, values.size, new_index.size - values.size)
1055
-
1056
- @data = cast_vector_to @dtype, values
1057
- @index = new_index
1058
-
1059
- update_position_cache
1060
-
1061
- self
1062
- end
1063
-
1064
- # Reorder the vector with given positions
1065
- # @note Unlike #reindex! which takes index as input, it takes
1066
- # positions as an input to reorder the vector
1067
- # @param [Array] order the order to reorder the vector with
1068
- # @return reordered vector
1069
- # @example
1070
- # dv = DaruLite::Vector.new [3, 2, 1], index: ['c', 'b', 'a']
1071
- # dv.reorder! [2, 1, 0]
1072
- # # => #<DaruLite::Vector(3)>
1073
- # # a 1
1074
- # # b 2
1075
- # # c 3
1076
- def reorder!(order)
1077
- @index = @index.reorder order
1078
- data_array = order.map { |i| @data[i] }
1079
- @data = cast_vector_to @dtype, data_array, @nm_dtype
1080
- update_position_cache
1081
- self
1082
- end
1083
-
1084
- # Non-destructive version of #reorder!
1085
- def reorder(order)
1086
- dup.reorder! order
1087
- end
1088
-
1089
- # Create a new vector with a different index, and preserve the indexing of
1090
- # current elements.
1091
- def reindex(new_index)
1092
- dup.reindex!(new_index)
1093
- end
1094
-
1095
- def index=(idx)
1096
- idx = Index.coerce(idx)
1097
-
1098
- raise ArgumentError, "Size of supplied index #{idx.size} does not match size of Vector" if idx.size != size
1099
- raise ArgumentError, 'Can only assign type Index and its subclasses.' unless idx.is_a?(DaruLite::Index)
1100
-
1101
- @index = idx
1102
- self
1103
- end
1104
-
1105
424
  # Give the vector a new name
1106
425
  #
1107
426
  # @param new_name [Symbol] The new name.
@@ -1112,12 +431,6 @@ module DaruLite
1112
431
 
1113
432
  alias name= rename
1114
433
 
1115
- # Duplicated a vector
1116
- # @return [DaruLite::Vector] duplicated vector
1117
- def dup
1118
- DaruLite::Vector.new @data.dup, name: @name, index: @index.dup
1119
- end
1120
-
1121
434
  # == Bootstrap
1122
435
  # Generate +nr+ resamples (with replacement) of size +s+
1123
436
  # from vector, computing each estimate from +estimators+
@@ -1195,130 +508,6 @@ module DaruLite
1195
508
  DaruLite::DataFrame.new ps
1196
509
  end
1197
510
 
1198
- # Returns an array of either none or integer values, indicating the
1199
- # +regexp+ matching with the given array.
1200
- #
1201
- # @param regexp [Regexp] A regular matching expression. For example, +/weeks/+.
1202
- #
1203
- # @return [Array] Containing either +nil+ or integer values, according to the match with the given +regexp+
1204
- #
1205
- # @example
1206
- # dv = DaruLite::Vector.new(['3 days', '5 weeks', '2 weeks'])
1207
- # dv.match(/weeks/)
1208
- #
1209
- # # => [false, true, true]
1210
- def match(regexp)
1211
- @data.map { |value| !!(value =~ regexp) }
1212
- end
1213
-
1214
- # Creates a new vector consisting only of non-nil data
1215
- #
1216
- # == Arguments
1217
- #
1218
- # @param as_a [Symbol] Passing :array will return only the elements
1219
- # as an Array. Otherwise will return a DaruLite::Vector.
1220
- #
1221
- # @param _duplicate [Symbol] In case no missing data is found in the
1222
- # vector, setting this to false will return the same vector.
1223
- # Otherwise, a duplicate will be returned irrespective of
1224
- # presence of missing data.
1225
-
1226
- def only_valid(as_a = :vector, _duplicate = true)
1227
- # FIXME: Now duplicate is just ignored.
1228
- # There are no spec that fail on this case, so I'll leave it
1229
- # this way for now - zverok, 2016-05-07
1230
-
1231
- new_index = @index.to_a - indexes(*DaruLite::MISSING_VALUES)
1232
- new_vector = new_index.map { |idx| self[idx] }
1233
-
1234
- if as_a == :vector
1235
- DaruLite::Vector.new new_vector, index: new_index, name: @name, dtype: dtype
1236
- else
1237
- new_vector
1238
- end
1239
- end
1240
- deprecate :only_valid, :reject_values, 2016, 10
1241
-
1242
- # Return a vector with specified values removed
1243
- # @param values [Array] values to reject from resultant vector
1244
- # @return [DaruLite::Vector] vector with specified values removed
1245
- # @example
1246
- # dv = DaruLite::Vector.new [1, 2, nil, Float::NAN]
1247
- # dv.reject_values nil, Float::NAN
1248
- # # => #<DaruLite::Vector(2)>
1249
- # # 0 1
1250
- # # 1 2
1251
- def reject_values(*values)
1252
- resultant_pos = size.times.to_a - positions(*values)
1253
- dv = at(*resultant_pos)
1254
- # Handle the case when number of positions is 1
1255
- # and hence #at doesn't return a vector
1256
- if dv.is_a?(DaruLite::Vector)
1257
- dv
1258
- else
1259
- pos = resultant_pos.first
1260
- at(pos..pos)
1261
- end
1262
- end
1263
-
1264
- # Return indexes of values specified
1265
- # @param values [Array] values to find indexes for
1266
- # @return [Array] array of indexes of values specified
1267
- # @example
1268
- # dv = DaruLite::Vector.new [1, 2, nil, Float::NAN], index: 11..14
1269
- # dv.indexes nil, Float::NAN
1270
- # # => [13, 14]
1271
- def indexes(*values)
1272
- index.to_a.values_at(*positions(*values))
1273
- end
1274
-
1275
- # Replaces specified values with a new value
1276
- # @param [Array] old_values array of values to replace
1277
- # @param [object] new_value new value to replace with
1278
- # @note It performs the replace in place.
1279
- # @return [DaruLite::Vector] Same vector itself with values
1280
- # replaced with new value
1281
- # @example
1282
- # dv = DaruLite::Vector.new [1, 2, :a, :b]
1283
- # dv.replace_values [:a, :b], nil
1284
- # dv
1285
- # # =>
1286
- # # #<DaruLite::Vector:19903200 @name = nil @metadata = {} @size = 4 >
1287
- # # nil
1288
- # # 0 1
1289
- # # 1 2
1290
- # # 2 nil
1291
- # # 3 nil
1292
- def replace_values(old_values, new_value)
1293
- old_values = [old_values] unless old_values.is_a? Array
1294
- size.times do |pos|
1295
- set_at([pos], new_value) if include_with_nan? old_values, at(pos)
1296
- end
1297
- self
1298
- end
1299
-
1300
- # Returns a Vector containing only missing data (preserves indexes).
1301
- def only_missing(as_a = :vector)
1302
- case as_a
1303
- when :vector
1304
- self[*indexes(*DaruLite::MISSING_VALUES)]
1305
- when :array
1306
- self[*indexes(*DaruLite::MISSING_VALUES)].to_a
1307
- end
1308
- end
1309
- deprecate :only_missing, nil, 2016, 10
1310
-
1311
- # Returns a Vector with only numerical data. Missing data is included
1312
- # but non-Numeric objects are excluded. Preserves index.
1313
- def only_numerics
1314
- numeric_indexes =
1315
- each_with_index
1316
- .select { |v, _i| v.is_a?(Numeric) || v.nil? }
1317
- .map(&:last)
1318
-
1319
- self[*numeric_indexes]
1320
- end
1321
-
1322
511
  DATE_REGEXP = /^(\d{2}-\d{2}-\d{4}|\d{4}-\d{2}-\d{2})$/.freeze
1323
512
 
1324
513
  # Returns the database type for the vector, according to its content
@@ -1335,12 +524,6 @@ module DaruLite
1335
524
  end
1336
525
  end
1337
526
 
1338
- # Copies the structure of the vector (i.e the index, size, etc.) and fills all
1339
- # all values with nils.
1340
- def clone_structure
1341
- DaruLite::Vector.new(([nil] * size), name: @name, index: @index.dup)
1342
- end
1343
-
1344
527
  # Save the vector to a file
1345
528
  #
1346
529
  # == Arguments
@@ -1396,61 +579,6 @@ module DaruLite
1396
579
  name.to_s.end_with?('=') || has_index?(name) || super
1397
580
  end
1398
581
 
1399
- # Partition a numeric variable into categories.
1400
- # @param [Array<Numeric>] partitions an array whose consecutive elements
1401
- # provide intervals for categories
1402
- # @param [Hash] opts options to cut the partition
1403
- # @option opts [:left, :right] :close_at specifies whether the interval closes at
1404
- # the right side of left side
1405
- # @option opts [Array] :labels names of the categories
1406
- # @return [DaruLite::Vector] numeric variable converted to categorical variable
1407
- # @example
1408
- # heights = DaruLite::Vector.new [30, 35, 32, 50, 42, 51]
1409
- # height_cat = heights.cut [30, 40, 50, 60], labels=['low', 'medium', 'high']
1410
- # # => #<DaruLite::Vector(6)>
1411
- # # 0 low
1412
- # # 1 low
1413
- # # 2 low
1414
- # # 3 high
1415
- # # 4 medium
1416
- # # 5 high
1417
- def cut(partitions, opts = {})
1418
- close_at = opts[:close_at] || :right
1419
- labels = opts[:labels]
1420
- partitions = partitions.to_a
1421
- values = to_a.map { |val| cut_find_category partitions, val, close_at }
1422
- cats = cut_categories(partitions, close_at)
1423
-
1424
- dv = DaruLite::Vector.new values,
1425
- index: @index,
1426
- type: :category,
1427
- categories: cats
1428
-
1429
- # Rename categories if new labels provided
1430
- if labels
1431
- dv.rename_categories cats.zip(labels).to_h
1432
- else
1433
- dv
1434
- end
1435
- end
1436
-
1437
- def positions(*values)
1438
- case values
1439
- when [nil]
1440
- nil_positions
1441
- when [Float::NAN]
1442
- nan_positions
1443
- when [nil, Float::NAN], [Float::NAN, nil]
1444
- nil_positions + nan_positions
1445
- else
1446
- size.times.select { |i| include_with_nan? values, @data[i] }
1447
- end
1448
- end
1449
-
1450
- def group_by(*args)
1451
- to_df.group_by(*args)
1452
- end
1453
-
1454
582
  private
1455
583
 
1456
584
  def copy(values)
@@ -1471,11 +599,6 @@ module DaruLite
1471
599
  end
1472
600
  end
1473
601
 
1474
- # Helper method returning validity of arbitrary value
1475
- def valid_value?(v)
1476
- !((v.respond_to?(:nan?) && v.nan?) || v.nil?)
1477
- end
1478
-
1479
602
  def initialize_vector(source, opts)
1480
603
  index, source = parse_source(source, opts)
1481
604
  set_name opts[:name]
@@ -1506,18 +629,8 @@ module DaruLite
1506
629
  end
1507
630
 
1508
631
  def guard_type_check(value)
1509
- @possibly_changed_type = true \
1510
- if (object? && (value.nil? || value.is_a?(Numeric))) ||
1511
- (numeric? && !value.is_a?(Numeric) && !value.nil?)
1512
- end
1513
-
1514
- def split_value(key, v)
1515
- if v.nil?
1516
- nil
1517
- elsif v.include?(key)
1518
- 1
1519
- else
1520
- 0
632
+ if (object? && (value.nil? || value.is_a?(Numeric))) || (numeric? && !value.is_a?(Numeric) && !value.nil?)
633
+ @possibly_changed_type = true
1521
634
  end
1522
635
  end
1523
636
 
@@ -1665,14 +778,5 @@ module DaruLite
1665
778
  @nil_positions = nil
1666
779
  @nan_positions = nil
1667
780
  end
1668
-
1669
- def resort_index(vector_index, opts)
1670
- if block_given?
1671
- vector_index.sort { |(lv, _li), (rv, _ri)| yield(lv, rv) }
1672
- else
1673
- vector_index.sort(&DEFAULT_SORTER)
1674
- end
1675
- .tap { |res| res.reverse! unless opts[:ascending] }
1676
- end
1677
781
  end
1678
782
  end