daru_lite 0.1 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (73) hide show
  1. checksums.yaml +4 -4
  2. data/.github/ISSUE_TEMPLATE/bug_report.md +38 -0
  3. data/.github/ISSUE_TEMPLATE/feature_request.md +20 -0
  4. data/.github/workflows/ci.yml +20 -0
  5. data/.rubocop_todo.yml +35 -33
  6. data/README.md +19 -115
  7. data/daru_lite.gemspec +1 -0
  8. data/lib/daru_lite/data_frame/aggregatable.rb +165 -0
  9. data/lib/daru_lite/data_frame/calculatable.rb +140 -0
  10. data/lib/daru_lite/data_frame/convertible.rb +107 -0
  11. data/lib/daru_lite/data_frame/duplicatable.rb +64 -0
  12. data/lib/daru_lite/data_frame/fetchable.rb +301 -0
  13. data/lib/daru_lite/data_frame/filterable.rb +144 -0
  14. data/lib/daru_lite/data_frame/i_o_able.rb +179 -0
  15. data/lib/daru_lite/data_frame/indexable.rb +168 -0
  16. data/lib/daru_lite/data_frame/iterable.rb +339 -0
  17. data/lib/daru_lite/data_frame/joinable.rb +152 -0
  18. data/lib/daru_lite/data_frame/missable.rb +75 -0
  19. data/lib/daru_lite/data_frame/pivotable.rb +108 -0
  20. data/lib/daru_lite/data_frame/queryable.rb +67 -0
  21. data/lib/daru_lite/data_frame/setable.rb +109 -0
  22. data/lib/daru_lite/data_frame/sortable.rb +241 -0
  23. data/lib/daru_lite/dataframe.rb +142 -2355
  24. data/lib/daru_lite/index/index.rb +13 -0
  25. data/lib/daru_lite/maths/statistics/vector.rb +1 -1
  26. data/lib/daru_lite/vector/aggregatable.rb +9 -0
  27. data/lib/daru_lite/vector/calculatable.rb +78 -0
  28. data/lib/daru_lite/vector/convertible.rb +77 -0
  29. data/lib/daru_lite/vector/duplicatable.rb +17 -0
  30. data/lib/daru_lite/vector/fetchable.rb +175 -0
  31. data/lib/daru_lite/vector/filterable.rb +128 -0
  32. data/lib/daru_lite/vector/indexable.rb +77 -0
  33. data/lib/daru_lite/vector/iterable.rb +95 -0
  34. data/lib/daru_lite/vector/joinable.rb +17 -0
  35. data/lib/daru_lite/vector/missable.rb +124 -0
  36. data/lib/daru_lite/vector/queryable.rb +45 -0
  37. data/lib/daru_lite/vector/setable.rb +47 -0
  38. data/lib/daru_lite/vector/sortable.rb +113 -0
  39. data/lib/daru_lite/vector.rb +36 -932
  40. data/lib/daru_lite/version.rb +1 -1
  41. data/spec/data_frame/aggregatable_example.rb +65 -0
  42. data/spec/data_frame/buildable_example.rb +109 -0
  43. data/spec/data_frame/calculatable_example.rb +135 -0
  44. data/spec/data_frame/convertible_example.rb +180 -0
  45. data/spec/data_frame/duplicatable_example.rb +111 -0
  46. data/spec/data_frame/fetchable_example.rb +476 -0
  47. data/spec/data_frame/filterable_example.rb +250 -0
  48. data/spec/data_frame/indexable_example.rb +221 -0
  49. data/spec/data_frame/iterable_example.rb +465 -0
  50. data/spec/data_frame/joinable_example.rb +106 -0
  51. data/spec/data_frame/missable_example.rb +47 -0
  52. data/spec/data_frame/pivotable_example.rb +297 -0
  53. data/spec/data_frame/queryable_example.rb +92 -0
  54. data/spec/data_frame/setable_example.rb +482 -0
  55. data/spec/data_frame/sortable_example.rb +350 -0
  56. data/spec/dataframe_spec.rb +181 -3243
  57. data/spec/index/index_spec.rb +8 -0
  58. data/spec/vector/aggregatable_example.rb +27 -0
  59. data/spec/vector/calculatable_example.rb +82 -0
  60. data/spec/vector/convertible_example.rb +126 -0
  61. data/spec/vector/duplicatable_example.rb +48 -0
  62. data/spec/vector/fetchable_example.rb +463 -0
  63. data/spec/vector/filterable_example.rb +165 -0
  64. data/spec/vector/indexable_example.rb +201 -0
  65. data/spec/vector/iterable_example.rb +111 -0
  66. data/spec/vector/joinable_example.rb +25 -0
  67. data/spec/vector/missable_example.rb +88 -0
  68. data/spec/vector/queryable_example.rb +91 -0
  69. data/spec/vector/setable_example.rb +300 -0
  70. data/spec/vector/sortable_example.rb +242 -0
  71. data/spec/vector_spec.rb +111 -1805
  72. metadata +102 -3
  73. data/.github/ISSUE_TEMPLATE.md +0 -18
@@ -0,0 +1,140 @@
1
+ module DaruLite
2
+ class DataFrame
3
+ module Calculatable
4
+ # Sum all numeric/specified vectors in the DataFrame.
5
+ #
6
+ # Returns a new vector that's a containing a sum of all numeric
7
+ # or specified vectors of the DataFrame. By default, if the vector
8
+ # contains a nil, the sum is nil.
9
+ # With :skipnil argument set to true, nil values are assumed to be
10
+ # 0 (zero) and the sum vector is returned.
11
+ #
12
+ # @param args [Array] List of vectors to sum. Default is nil in which case
13
+ # all numeric vectors are summed.
14
+ #
15
+ # @option opts [Boolean] :skipnil Consider nils as 0. Default is false.
16
+ #
17
+ # @return Vector with sum of all vectors specified in the argument.
18
+ # If vecs parameter is empty, sum all numeric vector.
19
+ #
20
+ # @example
21
+ # df = DaruLite::DataFrame.new({
22
+ # a: [1, 2, nil],
23
+ # b: [2, 1, 3],
24
+ # c: [1, 1, 1]
25
+ # })
26
+ # => #<DaruLite::DataFrame(3x3)>
27
+ # a b c
28
+ # 0 1 2 1
29
+ # 1 2 1 1
30
+ # 2 nil 3 1
31
+ # df.vector_sum [:a, :c]
32
+ # => #<DaruLite::Vector(3)>
33
+ # 0 2
34
+ # 1 3
35
+ # 2 nil
36
+ # df.vector_sum
37
+ # => #<DaruLite::Vector(3)>
38
+ # 0 4
39
+ # 1 4
40
+ # 2 nil
41
+ # df.vector_sum skipnil: true
42
+ # => #<DaruLite::Vector(3)>
43
+ # c
44
+ # 0 4
45
+ # 1 4
46
+ # 2 4
47
+ #
48
+ def vector_sum(*args)
49
+ defaults = { vecs: nil, skipnil: false }
50
+ options = args.last.is_a?(::Hash) ? args.pop : {}
51
+ options = defaults.merge(options)
52
+ vecs = args[0] || options[:vecs]
53
+ skipnil = args[1] || options[:skipnil]
54
+
55
+ vecs ||= numeric_vectors
56
+ sum = DaruLite::Vector.new [0] * @size, index: @index, name: @name, dtype: @dtype
57
+ vecs.inject(sum) { |memo, n| self[n].add(memo, skipnil: skipnil) }
58
+ end
59
+
60
+ # Calculate mean of the rows of the dataframe.
61
+ #
62
+ # == Arguments
63
+ #
64
+ # * +max_missing+ - The maximum number of elements in the row that can be
65
+ # zero for the mean calculation to happen. Default to 0.
66
+ def vector_mean(max_missing = 0)
67
+ # FIXME: in vector_sum we preserve created vector dtype, but
68
+ # here we are not. Is this by design or ...? - zverok, 2016-05-18
69
+ mean_vec = DaruLite::Vector.new [0] * @size, index: @index, name: "mean_#{@name}"
70
+
71
+ each_row_with_index.with_object(mean_vec) do |(row, i), memo|
72
+ memo[i] = row.indexes(*DaruLite::MISSING_VALUES).size > max_missing ? nil : row.mean
73
+ end
74
+ end
75
+
76
+ # Returns a vector, based on a string with a calculation based
77
+ # on vector.
78
+ #
79
+ # The calculation will be eval'ed, so you can put any variable
80
+ # or expression valid on ruby.
81
+ #
82
+ # For example:
83
+ # a = DaruLite::Vector.new [1,2]
84
+ # b = DaruLite::Vector.new [3,4]
85
+ # ds = DaruLite::DataFrame.new({:a => a,:b => b})
86
+ # ds.compute("a+b")
87
+ # => Vector [4,6]
88
+ def compute(text, &block)
89
+ return instance_eval(&block) if block
90
+
91
+ instance_eval(text)
92
+ end
93
+
94
+ # DSL for yielding each row and returning a DaruLite::Vector based on the
95
+ # value each run of the block returns.
96
+ #
97
+ # == Usage
98
+ #
99
+ # a1 = DaruLite::Vector.new([1, 2, 3, 4, 5, 6, 7])
100
+ # a2 = DaruLite::Vector.new([10, 20, 30, 40, 50, 60, 70])
101
+ # a3 = DaruLite::Vector.new([100, 200, 300, 400, 500, 600, 700])
102
+ # ds = DaruLite::DataFrame.new({ :a => a1, :b => a2, :c => a3 })
103
+ # total = ds.vector_by_calculation { a + b + c }
104
+ # # <DaruLite::Vector:82314050 @name = nil @size = 7 >
105
+ # # nil
106
+ # # 0 111
107
+ # # 1 222
108
+ # # 2 333
109
+ # # 3 444
110
+ # # 4 555
111
+ # # 5 666
112
+ # # 6 777
113
+ def vector_by_calculation(&block)
114
+ a = each_row.map { |r| r.instance_eval(&block) }
115
+
116
+ DaruLite::Vector.new a, index: @index
117
+ end
118
+
119
+ def vector_count_characters(vecs = nil)
120
+ vecs ||= @vectors.to_a
121
+
122
+ collect_rows do |row|
123
+ vecs.sum { |v| row[v].to_s.size }
124
+ end
125
+ end
126
+
127
+ # Generate a summary of this DataFrame based on individual vectors in the DataFrame
128
+ # @return [String] String containing the summary of the DataFrame
129
+ def summary
130
+ summary = "= #{name}"
131
+ summary << "\n Number of rows: #{nrows}"
132
+ @vectors.each do |v|
133
+ summary << "\n Element:[#{v}]\n"
134
+ summary << self[v].summary(1)
135
+ end
136
+ summary
137
+ end
138
+ end
139
+ end
140
+ end
@@ -0,0 +1,107 @@
1
+ module DaruLite
2
+ class DataFrame
3
+ module Convertible
4
+ # Create a sql, basen on a given Dataset
5
+ #
6
+ # == Arguments
7
+ #
8
+ # * table - String specifying name of the table that will created in SQL.
9
+ # * charset - Character set. Default is "UTF8".
10
+ #
11
+ # @example
12
+ #
13
+ # ds = DaruLite::DataFrame.new({
14
+ # :id => DaruLite::Vector.new([1,2,3,4,5]),
15
+ # :name => DaruLite::Vector.new(%w{Alex Peter Susan Mary John})
16
+ # })
17
+ # ds.create_sql('names')
18
+ # #=>"CREATE TABLE names (id INTEGER,\n name VARCHAR (255)) CHARACTER SET=UTF8;"
19
+ #
20
+ def create_sql(table, charset = 'UTF8')
21
+ sql = "CREATE TABLE #{table} ("
22
+ fields = vectors.to_a.collect do |f|
23
+ v = self[f]
24
+ "#{f} #{v.db_type}"
25
+ end
26
+
27
+ sql + fields.join(",\n ") + ") CHARACTER SET=#{charset};"
28
+ end
29
+
30
+ # Returns the dataframe. This can be convenient when the user does not
31
+ # know whether the object is a vector or a dataframe.
32
+ # @return [self] the dataframe
33
+ def to_df
34
+ self
35
+ end
36
+
37
+ # Convert all vectors of type *:numeric* into a Matrix.
38
+ def to_matrix
39
+ Matrix.columns each_vector.select(&:numeric?).map(&:to_a)
40
+ end
41
+
42
+ # Converts the DataFrame into an array of hashes where key is vector name
43
+ # and value is the corresponding element. The 0th index of the array contains
44
+ # the array of hashes while the 1th index contains the indexes of each row
45
+ # of the dataframe. Each element in the index array corresponds to its row
46
+ # in the array of hashes, which has the same index.
47
+ def to_a
48
+ [each_row.map(&:to_h), @index.to_a]
49
+ end
50
+
51
+ # Convert to json. If no_index is false then the index will NOT be included
52
+ # in the JSON thus created.
53
+ def to_json(no_index = true)
54
+ if no_index
55
+ to_a[0].to_json
56
+ else
57
+ to_a.to_json
58
+ end
59
+ end
60
+
61
+ # Converts DataFrame to a hash (explicit) with keys as vector names and values as
62
+ # the corresponding vectors.
63
+ def to_h
64
+ @vectors
65
+ .each_with_index
66
+ .map { |vec_name, idx| [vec_name, @data[idx]] }.to_h
67
+ end
68
+
69
+ # Convert to html for IRuby.
70
+ def to_html(threshold = DaruLite.max_rows)
71
+ table_thead = to_html_thead
72
+ table_tbody = to_html_tbody(threshold)
73
+ path = if index.is_a?(MultiIndex)
74
+ File.expand_path('../iruby/templates/dataframe_mi.html.erb', __dir__)
75
+ else
76
+ File.expand_path('../iruby/templates/dataframe.html.erb', __dir__)
77
+ end
78
+ ERB.new(File.read(path).strip).result(binding)
79
+ end
80
+
81
+ def to_html_thead
82
+ table_thead_path =
83
+ if index.is_a?(MultiIndex)
84
+ File.expand_path('../iruby/templates/dataframe_mi_thead.html.erb', __dir__)
85
+ else
86
+ File.expand_path('../iruby/templates/dataframe_thead.html.erb', __dir__)
87
+ end
88
+ ERB.new(File.read(table_thead_path).strip).result(binding)
89
+ end
90
+
91
+ def to_html_tbody(threshold = DaruLite.max_rows)
92
+ threshold ||= @size
93
+ table_tbody_path =
94
+ if index.is_a?(MultiIndex)
95
+ File.expand_path('../iruby/templates/dataframe_mi_tbody.html.erb', __dir__)
96
+ else
97
+ File.expand_path('../iruby/templates/dataframe_tbody.html.erb', __dir__)
98
+ end
99
+ ERB.new(File.read(table_tbody_path).strip).result(binding)
100
+ end
101
+
102
+ def to_s
103
+ "#<#{self.class}#{": #{@name}" if @name}(#{nrows}x#{ncols})>"
104
+ end
105
+ end
106
+ end
107
+ end
@@ -0,0 +1,64 @@
1
+ module DaruLite
2
+ class DataFrame
3
+ module Duplicatable
4
+ extend Gem::Deprecate
5
+
6
+ # Duplicate the DataFrame entirely.
7
+ #
8
+ # == Arguments
9
+ #
10
+ # * +vectors_to_dup+ - An Array specifying the names of Vectors to
11
+ # be duplicated. Will duplicate the entire DataFrame if not specified.
12
+ def dup(vectors_to_dup = nil)
13
+ vectors_to_dup ||= @vectors.to_a
14
+
15
+ src = vectors_to_dup.map { |vec| @data[@vectors.pos(vec)].dup }
16
+ new_order = DaruLite::Index.new(vectors_to_dup)
17
+
18
+ DaruLite::DataFrame.new src, order: new_order, index: @index.dup, name: @name, clone: true
19
+ end
20
+
21
+ # Only clone the structure of the DataFrame.
22
+ def clone_structure
23
+ DaruLite::DataFrame.new([], order: @vectors.dup, index: @index.dup, name: @name)
24
+ end
25
+
26
+ # Returns a 'view' of the DataFrame, i.e the object ID's of vectors are
27
+ # preserved.
28
+ #
29
+ # == Arguments
30
+ #
31
+ # +vectors_to_clone+ - Names of vectors to clone. Optional. Will return
32
+ # a view of the whole data frame otherwise.
33
+ def clone(*vectors_to_clone)
34
+ vectors_to_clone.flatten! if ArrayHelper.array_of?(vectors_to_clone, Array)
35
+ vectors_to_clone = @vectors.to_a if vectors_to_clone.empty?
36
+
37
+ h = vectors_to_clone.map { |vec| [vec, self[vec]] }.to_h
38
+ DaruLite::DataFrame.new(h, clone: false, order: vectors_to_clone, name: @name)
39
+ end
40
+
41
+ # Returns a 'shallow' copy of DataFrame if missing data is not present,
42
+ # or a full copy of only valid data if missing data is present.
43
+ def clone_only_valid
44
+ if include_values?(*DaruLite::MISSING_VALUES)
45
+ reject_values(*DaruLite::MISSING_VALUES)
46
+ else
47
+ clone
48
+ end
49
+ end
50
+
51
+ # Creates a new duplicate dataframe containing only rows
52
+ # without a single missing value.
53
+ def dup_only_valid(vecs = nil)
54
+ rows_with_nil = @data.map { |vec| vec.indexes(*DaruLite::MISSING_VALUES) }
55
+ .inject(&:concat)
56
+ .uniq
57
+
58
+ row_indexes = @index.to_a
59
+ (vecs.nil? ? self : dup(vecs)).row[*(row_indexes - rows_with_nil)]
60
+ end
61
+ deprecate :dup_only_valid, :reject_values, 2016, 10
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,301 @@
1
+ module DaruLite
2
+ class DataFrame
3
+ module Fetchable
4
+ # Access row or vector. Specify name of row/vector followed by axis(:row, :vector).
5
+ # Defaults to *:vector*. Use of this method is not recommended for accessing
6
+ # rows. Use df.row[:a] for accessing row with index ':a'.
7
+ def [](*names)
8
+ axis = extract_axis(names, :vector)
9
+ dispatch_to_axis axis, :access, *names
10
+ end
11
+
12
+ # Retrive rows by positions
13
+ # @param [Array<Integer>] positions of rows to retrive
14
+ # @return [DaruLite::Vector, DaruLite::DataFrame] vector for single position and dataframe for multiple positions
15
+ # @example
16
+ # df = DaruLite::DataFrame.new({
17
+ # a: [1, 2, 3],
18
+ # b: ['a', 'b', 'c']
19
+ # })
20
+ # df.row_at 1, 2
21
+ # # => #<DaruLite::DataFrame(2x2)>
22
+ # # a b
23
+ # # 1 2 b
24
+ # # 2 3 c
25
+ def row_at(*positions)
26
+ original_positions = positions
27
+ positions = coerce_positions(*positions, nrows)
28
+ validate_positions(*positions, nrows)
29
+
30
+ if positions.is_a? Integer
31
+ row = get_rows_for([positions])
32
+ DaruLite::Vector.new(row, index: @vectors, name: @index.at(positions))
33
+ else
34
+ new_rows = get_rows_for(original_positions)
35
+ DaruLite::DataFrame.new(
36
+ new_rows,
37
+ index: @index.at(*original_positions),
38
+ order: @vectors,
39
+ name: @name
40
+ )
41
+ end
42
+ end
43
+
44
+ # Retrive vectors by positions
45
+ # @param [Array<Integer>] positions of vectors to retrive
46
+ # @return [DaruLite::Vector, DaruLite::DataFrame] vector for single position and dataframe for multiple positions
47
+ # @example
48
+ # df = DaruLite::DataFrame.new({
49
+ # a: [1, 2, 3],
50
+ # b: ['a', 'b', 'c']
51
+ # })
52
+ # df.at 0
53
+ # # => #<DaruLite::Vector(3)>
54
+ # # a
55
+ # # 0 1
56
+ # # 1 2
57
+ # # 2 3
58
+ def at(*positions)
59
+ if AXES.include? positions.last
60
+ axis = positions.pop
61
+ return row_at(*positions) if axis == :row
62
+ end
63
+
64
+ original_positions = positions
65
+ positions = coerce_positions(*positions, ncols)
66
+ validate_positions(*positions, ncols)
67
+
68
+ if positions.is_a? Integer
69
+ @data[positions].dup
70
+ else
71
+ DaruLite::DataFrame.new positions.map { |pos| @data[pos].dup },
72
+ index: @index,
73
+ order: @vectors.at(*original_positions),
74
+ name: @name
75
+ end
76
+ end
77
+
78
+ # The first ten elements of the DataFrame
79
+ #
80
+ # @param [Fixnum] quantity (10) The number of elements to display from the top.
81
+ def head(quantity = 10)
82
+ row.at 0..(quantity - 1)
83
+ end
84
+ alias first head
85
+
86
+ # The last ten elements of the DataFrame
87
+ #
88
+ # @param [Fixnum] quantity (10) The number of elements to display from the bottom.
89
+ def tail(quantity = 10)
90
+ start = [-quantity, -size].max
91
+ row.at start..-1
92
+ end
93
+ alias last tail
94
+
95
+ # Extract a dataframe given row indexes or positions
96
+ # @param keys [Array] can be positions (if by_position is true) or indexes (if by_position if false)
97
+ # @return [DaruLite::Dataframe]
98
+ def get_sub_dataframe(keys, by_position: true)
99
+ return DaruLite::DataFrame.new({}) if keys == []
100
+
101
+ keys = @index.pos(*keys) unless by_position
102
+
103
+ sub_df = row_at(*keys)
104
+ sub_df = sub_df.to_df.transpose if sub_df.is_a?(DaruLite::Vector)
105
+
106
+ sub_df
107
+ end
108
+
109
+ def get_vector_anyways(v)
110
+ @vectors.include?(v) ? self[v].to_a : Array.new(size)
111
+ end
112
+
113
+ # @param indexes [Array] index(s) at which row tuples are retrieved
114
+ # @return [Array] returns array of row tuples at given index(s)
115
+ # @example Using DaruLite::Index
116
+ # df = DaruLite::DataFrame.new({
117
+ # a: [1, 2, 3],
118
+ # b: ['a', 'a', 'b']
119
+ # })
120
+ #
121
+ # df.access_row_tuples_by_indexs(1,2)
122
+ # # => [[2, "a"], [3, "b"]]
123
+ #
124
+ # df.index = DaruLite::Index.new([:one,:two,:three])
125
+ # df.access_row_tuples_by_indexs(:one,:three)
126
+ # # => [[1, "a"], [3, "b"]]
127
+ #
128
+ # @example Using DaruLite::MultiIndex
129
+ # mi_idx = DaruLite::MultiIndex.from_tuples [
130
+ # [:a,:one,:bar],
131
+ # [:a,:one,:baz],
132
+ # [:b,:two,:bar],
133
+ # [:a,:two,:baz],
134
+ # ]
135
+ # df_mi = DaruLite::DataFrame.new({
136
+ # a: 1..4,
137
+ # b: 'a'..'d'
138
+ # }, index: mi_idx )
139
+ #
140
+ # df_mi.access_row_tuples_by_indexs(:b, :two, :bar)
141
+ # # => [[3, "c"]]
142
+ # df_mi.access_row_tuples_by_indexs(:a)
143
+ # # => [[1, "a"], [2, "b"], [4, "d"]]
144
+ def access_row_tuples_by_indexs(*indexes)
145
+ return get_sub_dataframe(indexes, by_position: false).map_rows(&:to_a) if
146
+ @index.is_a?(DaruLite::MultiIndex)
147
+
148
+ positions = @index.pos(*indexes)
149
+ if positions.is_a? Numeric
150
+ row = get_rows_for([positions])
151
+ row.first.is_a?(Array) ? row : [row]
152
+ else
153
+ new_rows = get_rows_for(indexes, by_position: false)
154
+ indexes.map { |index| new_rows.map { |r| r[index] } }
155
+ end
156
+ end
157
+
158
+ # Split the dataframe into many dataframes based on category vector
159
+ # @param [object] cat_name name of category vector to split the dataframe
160
+ # @return [Array] array of dataframes split by category with category vector
161
+ # used to split not included
162
+ # @example
163
+ # df = DaruLite::DataFrame.new({
164
+ # a: [1, 2, 3],
165
+ # b: ['a', 'a', 'b']
166
+ # })
167
+ # df.to_category :b
168
+ # df.split_by_category :b
169
+ # # => [#<DaruLite::DataFrame: a (2x1)>
170
+ # # a
171
+ # # 0 1
172
+ # # 1 2,
173
+ # # #<DaruLite::DataFrame: b (1x1)>
174
+ # # a
175
+ # # 2 3]
176
+ def split_by_category(cat_name)
177
+ cat_dv = self[cat_name]
178
+ raise ArgumentError, "#{cat_name} is not a category vector" unless
179
+ cat_dv.category?
180
+
181
+ cat_dv.categories.map do |cat|
182
+ where(cat_dv.eq cat)
183
+ .rename(cat)
184
+ .delete_vector cat_name
185
+ end
186
+ end
187
+
188
+ # Return the indexes of all the numeric vectors. Will include vectors with nils
189
+ # alongwith numbers.
190
+ def numeric_vectors
191
+ # FIXME: Why _with_index ?..
192
+ each_vector_with_index
193
+ .select { |vec, _i| vec.numeric? }
194
+ .map(&:last)
195
+ end
196
+
197
+ def numeric_vector_names
198
+ @vectors.select { |v| self[v].numeric? }
199
+ end
200
+
201
+ # Return a DataFrame of only the numerical Vectors. If clone: false
202
+ # is specified as option, only a *view* of the Vectors will be
203
+ # returned. Defaults to clone: true.
204
+ def only_numerics(opts = {})
205
+ cln = opts[:clone] != false
206
+ arry = numeric_vectors.map { |v| self[v] }
207
+
208
+ order = Index.new(numeric_vectors)
209
+ DaruLite::DataFrame.new(arry, clone: cln, order: order, index: @index)
210
+ end
211
+
212
+ private
213
+
214
+ def access_vector(*names)
215
+ if names.first.is_a?(Range)
216
+ dup(@vectors.subset(names.first))
217
+ elsif @vectors.is_a?(MultiIndex)
218
+ access_vector_multi_index(*names)
219
+ else
220
+ access_vector_single_index(*names)
221
+ end
222
+ end
223
+
224
+ def access_vector_multi_index(*names)
225
+ pos = @vectors[names]
226
+
227
+ return @data[pos] if pos.is_a?(Integer)
228
+
229
+ new_vectors = pos.map { |tuple| @data[@vectors[tuple]] }
230
+
231
+ pos = pos.drop_left_level(names.size) if names.size < @vectors.width
232
+
233
+ DaruLite::DataFrame.new(new_vectors, index: @index, order: pos)
234
+ end
235
+
236
+ def access_vector_single_index(*names)
237
+ if names.count < 2
238
+ begin
239
+ pos = @vectors.is_a?(DaruLite::DateTimeIndex) ? @vectors[names.first] : @vectors.pos(names.first)
240
+ rescue IndexError
241
+ raise IndexError, "Specified vector #{names.first} does not exist"
242
+ end
243
+ return @data[pos] if pos.is_a?(Numeric)
244
+
245
+ names = pos
246
+ end
247
+
248
+ new_vectors = names.map { |name| [name, @data[@vectors.pos(name)]] }.to_h
249
+
250
+ order = names.is_a?(Array) ? DaruLite::Index.new(names) : names
251
+ DaruLite::DataFrame.new(new_vectors, order: order, index: @index, name: @name)
252
+ end
253
+
254
+ def access_row(*indexes)
255
+ positions = @index.pos(*indexes)
256
+
257
+ if positions.is_a? Numeric
258
+ row = get_rows_for([positions])
259
+ DaruLite::Vector.new row, index: @vectors, name: indexes.first
260
+ else
261
+ new_rows = get_rows_for(indexes, by_position: false)
262
+ DaruLite::DataFrame.new new_rows, index: @index.subset(*indexes), order: @vectors
263
+ end
264
+ end
265
+
266
+ # @param keys [Array] can be an array of positions (if by_position is true) or indexes (if by_position if false)
267
+ # because of coercion by DaruLite::Vector#at and DaruLite::Vector#[], can return either an Array of
268
+ # values (representing a row) or an array of Vectors (that can be seen as rows)
269
+ def get_rows_for(keys, by_position: true)
270
+ raise unless keys.is_a?(Array)
271
+
272
+ if by_position
273
+ pos = keys
274
+ @data.map { |vector| vector.at(*pos) }
275
+ else
276
+ # TODO: for now (2018-07-27), it is different than using
277
+ # get_rows_for(@index.pos(*keys))
278
+ # because DaruLite::Vector#at and DaruLite::Vector#[] don't handle DaruLite::MultiIndex the same way
279
+ indexes = keys
280
+ @data.map { |vec| vec[*indexes] }
281
+ end
282
+ end
283
+
284
+ # coerce ranges, integers and array in appropriate ways
285
+ def coerce_positions(*positions, size)
286
+ if positions.size == 1
287
+ case positions.first
288
+ when Integer
289
+ positions.first
290
+ when Range
291
+ size.times.to_a[positions.first]
292
+ else
293
+ raise ArgumentError, 'Unknown position type.'
294
+ end
295
+ else
296
+ positions
297
+ end
298
+ end
299
+ end
300
+ end
301
+ end