red_amber 0.1.5 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +33 -5
  3. data/.rubocop_todo.yml +2 -15
  4. data/.yardopts +1 -0
  5. data/CHANGELOG.md +164 -18
  6. data/Gemfile +6 -1
  7. data/README.md +247 -33
  8. data/Rakefile +1 -0
  9. data/benchmark/csv_load_penguins.yml +1 -1
  10. data/doc/DataFrame.md +383 -219
  11. data/doc/Vector.md +247 -37
  12. data/doc/examples_of_red_amber.ipynb +5454 -0
  13. data/doc/image/dataframe/assign.png +0 -0
  14. data/doc/image/dataframe/drop.png +0 -0
  15. data/doc/image/dataframe/pick.png +0 -0
  16. data/doc/image/dataframe/remove.png +0 -0
  17. data/doc/image/dataframe/rename.png +0 -0
  18. data/doc/image/dataframe/slice.png +0 -0
  19. data/doc/image/dataframe_model.png +0 -0
  20. data/doc/image/vector/binary_element_wise.png +0 -0
  21. data/doc/image/vector/unary_aggregation.png +0 -0
  22. data/doc/image/vector/unary_aggregation_w_option.png +0 -0
  23. data/doc/image/vector/unary_element_wise.png +0 -0
  24. data/lib/red-amber.rb +3 -0
  25. data/lib/red_amber/data_frame.rb +62 -10
  26. data/lib/red_amber/data_frame_displayable.rb +86 -9
  27. data/lib/red_amber/data_frame_selectable.rb +151 -32
  28. data/lib/red_amber/data_frame_variable_operation.rb +4 -0
  29. data/lib/red_amber/group.rb +59 -0
  30. data/lib/red_amber/helper.rb +61 -0
  31. data/lib/red_amber/vector.rb +59 -15
  32. data/lib/red_amber/vector_functions.rb +47 -38
  33. data/lib/red_amber/vector_selectable.rb +126 -0
  34. data/lib/red_amber/vector_updatable.rb +125 -0
  35. data/lib/red_amber/version.rb +1 -1
  36. data/lib/red_amber.rb +6 -3
  37. data/red_amber.gemspec +0 -2
  38. metadata +9 -33
  39. data/lib/red_amber/data_frame_helper.rb +0 -64
  40. data/lib/red_amber/data_frame_observation_operation.rb +0 -83
  41. data/lib/red_amber/vector_compensable.rb +0 -68
@@ -1,64 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module RedAmber
4
- # mix-in for the class DataFrame
5
- module DataFrameHelper
6
- private
7
-
8
- def expand_range(args)
9
- ary = args.each_with_object([]) do |e, a|
10
- e.is_a?(Range) ? a.concat(normalized_array(e)) : a.append(e)
11
- end
12
- ary.map do |e|
13
- if e.is_a?(Integer) && e.negative?
14
- e + size
15
- else
16
- e
17
- end
18
- end
19
- end
20
-
21
- def normalized_array(range)
22
- both_end = [range.begin, range.end]
23
- both_end[1] -= 1 if range.exclude_end? && range.end.is_a?(Integer)
24
-
25
- if both_end.any?(Integer) || both_end.all?(&:nil?)
26
- if both_end.any? { |e| e&.>=(size) || e&.<(-size) }
27
- raise DataFrameArgumentError, "Index out of range: #{range} for 0..#{size - 1}"
28
- end
29
-
30
- (0...size).to_a[range]
31
- else
32
- range.to_a
33
- end
34
- end
35
-
36
- def out_of_range?(indeces)
37
- indeces.max >= size || indeces.min < -size
38
- end
39
-
40
- def integers?(enum)
41
- enum.all?(Integer)
42
- end
43
-
44
- def sym_or_str?(enum)
45
- enum.all? { |e| e.is_a?(Symbol) || e.is_a?(String) }
46
- end
47
-
48
- def booleans?(enum)
49
- enum.all? { |e| e.is_a?(TrueClass) || e.is_a?(FalseClass) || e.is_a?(NilClass) }
50
- end
51
-
52
- def create_dataframe_from_vector(key, vector)
53
- DataFrame.new(key => vector.data)
54
- end
55
-
56
- def select_obs_by_boolean(array)
57
- DataFrame.new(@table.filter(array))
58
- end
59
-
60
- def keys_by_booleans(booleans)
61
- keys.select.with_index { |_, i| booleans[i] }
62
- end
63
- end
64
- end
@@ -1,83 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module RedAmber
4
- # mix-ins for the class DataFrame
5
- module DataFrameObservationOperation
6
- # slice and select some observations to create sub DataFrame
7
- def slice(*args, &block)
8
- slicer = args
9
- if block
10
- raise DataFrameArgumentError, 'Must not specify both arguments and block.' unless args.empty?
11
-
12
- slicer = instance_eval(&block)
13
- end
14
- slicer = [slicer].flatten
15
- return remove_all_values if slicer.empty? || slicer[0].nil?
16
-
17
- # filter with same length
18
- booleans = nil
19
- if slicer[0].is_a?(Vector) || slicer[0].is_a?(Arrow::BooleanArray)
20
- booleans = slicer[0].to_a
21
- elsif slicer.size == size && booleans?(slicer)
22
- booleans = slicer
23
- end
24
- return select_obs_by_boolean(booleans) if booleans
25
-
26
- # filter with indexes
27
- slicer = expand_range(slicer)
28
- return map_indices(*slicer) if integers?(slicer)
29
-
30
- raise DataFrameArgumentError, "Invalid argument #{args}"
31
- end
32
-
33
- # remove selected observations to create sub DataFrame
34
- def remove(*args, &block)
35
- remover = args
36
- if block
37
- raise DataFrameArgumentError, 'Must not specify both arguments and block.' unless args.empty?
38
-
39
- remover = instance_eval(&block)
40
- end
41
- remover = [remover].flatten
42
-
43
- return self if remover.empty?
44
-
45
- # filter with same length
46
- booleans = nil
47
- if remover[0].is_a?(Vector) || remover[0].is_a?(Arrow::BooleanArray)
48
- booleans = remover[0].to_a
49
- elsif remover.size == size && booleans?(remover)
50
- booleans = remover
51
- end
52
- if booleans
53
- inverted = booleans.map(&:!)
54
- return select_obs_by_boolean(inverted)
55
- end
56
-
57
- # filter with indexes
58
- slicer = indexes.to_a - expand_range(remover)
59
- return remove_all_values if slicer.empty?
60
- return map_indices(*slicer) if integers?(slicer)
61
-
62
- raise DataFrameArgumentError, "Invalid argument #{args}"
63
- end
64
-
65
- def remove_nil
66
- func = Arrow::Function.find(:drop_null)
67
- DataFrame.new(func.execute([table]).value)
68
- end
69
- alias_method :drop_nil, :remove_nil
70
-
71
- def group(aggregating_keys, func, target_keys)
72
- t = table.group(*aggregating_keys)
73
- RedAmber::DataFrame.new(t.send(func, *target_keys))
74
- end
75
-
76
- private
77
-
78
- # return a DataFrame with same keys as self without values
79
- def remove_all_values
80
- DataFrame.new(keys.each_with_object({}) { |key, h| h[key] = [] })
81
- end
82
- end
83
- end
@@ -1,68 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- # Available functions in Arrow are shown by `Arrow::Function.all.map(&:name)`
4
- # reference: https://arrow.apache.org/docs/cpp/compute.html
5
-
6
- module RedAmber
7
- # mix-ins for class Vector
8
- # Functions to make up some data (especially missing) for new data.
9
- module VectorCompensable
10
- # [Ternary]: replace_with(booleans, replacements) => vector
11
- # Replace items selected with a boolean mask
12
- #
13
- # (from Arrow C++ inline doc.)
14
- # Given an array and a boolean mask (either scalar or of equal length),
15
- # along with replacement values (either scalar or array),
16
- # each element of the array for which the corresponding mask element is
17
- # true will be replaced by the next value from the replacements,
18
- # or with null if the mask is null.
19
- # Hence, for replacement arrays, len(replacements) == sum(mask == true).
20
-
21
- def replace_with(booleans, replacements = nil)
22
- specifier =
23
- if booleans.is_a?(Arrow::BooleanArray)
24
- booleans
25
- elsif booleans.is_a?(Vector) && booleans.boolean?
26
- booleans.data
27
- elsif booleans.is_a?(Array) && booleans?(booleans)
28
- Arrow::BooleanArray.new(booleans)
29
- else
30
- raise VectorTypeError, 'Not a valid type'
31
- end
32
- raise VectorArgumentError, 'Booleans size unmatch' if specifier.length != size
33
- raise VectorArgumentError, 'Booleans not have any `true`' unless specifier.any?
34
-
35
- r = Array(replacements) # scalar to [scalar]
36
- r = [nil] if r.empty?
37
-
38
- replacer =
39
- if r.size == 1
40
- case replacements
41
- when Arrow::Array then replacements
42
- when Vector then replacements.data
43
- else
44
- Arrow::Array.new(r * specifier.to_a.count(true)) # broadcast
45
- end
46
- else
47
- Arrow::Array.new(r)
48
- end
49
- replacer = data.class.new(replacer) if replacer.uniq == [nil]
50
-
51
- raise VectorArgumentError, 'Replacements size unmatch' if Array(specifier).count(true) != replacer.length
52
-
53
- values = replacer.class.new(data)
54
-
55
- datum = find('replace_with_mask').execute([values, specifier, replacer])
56
- take_out_element_wise(datum)
57
- end
58
-
59
- # (related functions)
60
- # fill_null_backward, fill_null_forward
61
-
62
- private
63
-
64
- def booleans?(enum)
65
- enum.all? { |e| e.is_a?(TrueClass) || e.is_a?(FalseClass) || e.is_a?(NilClass) }
66
- end
67
- end
68
- end