daru_lite 0.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. checksums.yaml +4 -4
  2. data/.github/ISSUE_TEMPLATE/bug_report.md +38 -0
  3. data/.github/ISSUE_TEMPLATE/feature_request.md +20 -0
  4. data/.github/workflows/ci.yml +20 -0
  5. data/.rubocop_todo.yml +35 -33
  6. data/README.md +19 -115
  7. data/daru_lite.gemspec +1 -0
  8. data/lib/daru_lite/data_frame/aggregatable.rb +165 -0
  9. data/lib/daru_lite/data_frame/calculatable.rb +140 -0
  10. data/lib/daru_lite/data_frame/convertible.rb +107 -0
  11. data/lib/daru_lite/data_frame/duplicatable.rb +64 -0
  12. data/lib/daru_lite/data_frame/fetchable.rb +301 -0
  13. data/lib/daru_lite/data_frame/filterable.rb +144 -0
  14. data/lib/daru_lite/data_frame/i_o_able.rb +179 -0
  15. data/lib/daru_lite/data_frame/indexable.rb +168 -0
  16. data/lib/daru_lite/data_frame/iterable.rb +339 -0
  17. data/lib/daru_lite/data_frame/joinable.rb +152 -0
  18. data/lib/daru_lite/data_frame/missable.rb +75 -0
  19. data/lib/daru_lite/data_frame/pivotable.rb +108 -0
  20. data/lib/daru_lite/data_frame/queryable.rb +67 -0
  21. data/lib/daru_lite/data_frame/setable.rb +109 -0
  22. data/lib/daru_lite/data_frame/sortable.rb +241 -0
  23. data/lib/daru_lite/dataframe.rb +142 -2355
  24. data/lib/daru_lite/index/index.rb +13 -0
  25. data/lib/daru_lite/maths/statistics/vector.rb +1 -1
  26. data/lib/daru_lite/vector/aggregatable.rb +9 -0
  27. data/lib/daru_lite/vector/calculatable.rb +78 -0
  28. data/lib/daru_lite/vector/convertible.rb +77 -0
  29. data/lib/daru_lite/vector/duplicatable.rb +17 -0
  30. data/lib/daru_lite/vector/fetchable.rb +175 -0
  31. data/lib/daru_lite/vector/filterable.rb +128 -0
  32. data/lib/daru_lite/vector/indexable.rb +77 -0
  33. data/lib/daru_lite/vector/iterable.rb +95 -0
  34. data/lib/daru_lite/vector/joinable.rb +17 -0
  35. data/lib/daru_lite/vector/missable.rb +124 -0
  36. data/lib/daru_lite/vector/queryable.rb +45 -0
  37. data/lib/daru_lite/vector/setable.rb +47 -0
  38. data/lib/daru_lite/vector/sortable.rb +113 -0
  39. data/lib/daru_lite/vector.rb +36 -932
  40. data/lib/daru_lite/version.rb +1 -1
  41. data/spec/data_frame/aggregatable_example.rb +65 -0
  42. data/spec/data_frame/buildable_example.rb +109 -0
  43. data/spec/data_frame/calculatable_example.rb +135 -0
  44. data/spec/data_frame/convertible_example.rb +180 -0
  45. data/spec/data_frame/duplicatable_example.rb +111 -0
  46. data/spec/data_frame/fetchable_example.rb +476 -0
  47. data/spec/data_frame/filterable_example.rb +250 -0
  48. data/spec/data_frame/indexable_example.rb +221 -0
  49. data/spec/data_frame/iterable_example.rb +465 -0
  50. data/spec/data_frame/joinable_example.rb +106 -0
  51. data/spec/data_frame/missable_example.rb +47 -0
  52. data/spec/data_frame/pivotable_example.rb +297 -0
  53. data/spec/data_frame/queryable_example.rb +92 -0
  54. data/spec/data_frame/setable_example.rb +482 -0
  55. data/spec/data_frame/sortable_example.rb +350 -0
  56. data/spec/dataframe_spec.rb +181 -3243
  57. data/spec/index/index_spec.rb +8 -0
  58. data/spec/vector/aggregatable_example.rb +27 -0
  59. data/spec/vector/calculatable_example.rb +82 -0
  60. data/spec/vector/convertible_example.rb +126 -0
  61. data/spec/vector/duplicatable_example.rb +48 -0
  62. data/spec/vector/fetchable_example.rb +463 -0
  63. data/spec/vector/filterable_example.rb +165 -0
  64. data/spec/vector/indexable_example.rb +201 -0
  65. data/spec/vector/iterable_example.rb +111 -0
  66. data/spec/vector/joinable_example.rb +25 -0
  67. data/spec/vector/missable_example.rb +88 -0
  68. data/spec/vector/queryable_example.rb +91 -0
  69. data/spec/vector/setable_example.rb +300 -0
  70. data/spec/vector/sortable_example.rb +242 -0
  71. data/spec/vector_spec.rb +111 -1805
  72. metadata +102 -3
  73. data/.github/ISSUE_TEMPLATE.md +0 -18
@@ -0,0 +1,140 @@
1
+ module DaruLite
2
+ class DataFrame
3
+ module Calculatable
4
+ # Sum all numeric/specified vectors in the DataFrame.
5
+ #
6
+ # Returns a new vector that's a containing a sum of all numeric
7
+ # or specified vectors of the DataFrame. By default, if the vector
8
+ # contains a nil, the sum is nil.
9
+ # With :skipnil argument set to true, nil values are assumed to be
10
+ # 0 (zero) and the sum vector is returned.
11
+ #
12
+ # @param args [Array] List of vectors to sum. Default is nil in which case
13
+ # all numeric vectors are summed.
14
+ #
15
+ # @option opts [Boolean] :skipnil Consider nils as 0. Default is false.
16
+ #
17
+ # @return Vector with sum of all vectors specified in the argument.
18
+ # If vecs parameter is empty, sum all numeric vector.
19
+ #
20
+ # @example
21
+ # df = DaruLite::DataFrame.new({
22
+ # a: [1, 2, nil],
23
+ # b: [2, 1, 3],
24
+ # c: [1, 1, 1]
25
+ # })
26
+ # => #<DaruLite::DataFrame(3x3)>
27
+ # a b c
28
+ # 0 1 2 1
29
+ # 1 2 1 1
30
+ # 2 nil 3 1
31
+ # df.vector_sum [:a, :c]
32
+ # => #<DaruLite::Vector(3)>
33
+ # 0 2
34
+ # 1 3
35
+ # 2 nil
36
+ # df.vector_sum
37
+ # => #<DaruLite::Vector(3)>
38
+ # 0 4
39
+ # 1 4
40
+ # 2 nil
41
+ # df.vector_sum skipnil: true
42
+ # => #<DaruLite::Vector(3)>
43
+ # c
44
+ # 0 4
45
+ # 1 4
46
+ # 2 4
47
+ #
48
+ def vector_sum(*args)
49
+ defaults = { vecs: nil, skipnil: false }
50
+ options = args.last.is_a?(::Hash) ? args.pop : {}
51
+ options = defaults.merge(options)
52
+ vecs = args[0] || options[:vecs]
53
+ skipnil = args[1] || options[:skipnil]
54
+
55
+ vecs ||= numeric_vectors
56
+ sum = DaruLite::Vector.new [0] * @size, index: @index, name: @name, dtype: @dtype
57
+ vecs.inject(sum) { |memo, n| self[n].add(memo, skipnil: skipnil) }
58
+ end
59
+
60
+ # Calculate mean of the rows of the dataframe.
61
+ #
62
+ # == Arguments
63
+ #
64
+ # * +max_missing+ - The maximum number of elements in the row that can be
65
+ # zero for the mean calculation to happen. Default to 0.
66
+ def vector_mean(max_missing = 0)
67
+ # FIXME: in vector_sum we preserve created vector dtype, but
68
+ # here we are not. Is this by design or ...? - zverok, 2016-05-18
69
+ mean_vec = DaruLite::Vector.new [0] * @size, index: @index, name: "mean_#{@name}"
70
+
71
+ each_row_with_index.with_object(mean_vec) do |(row, i), memo|
72
+ memo[i] = row.indexes(*DaruLite::MISSING_VALUES).size > max_missing ? nil : row.mean
73
+ end
74
+ end
75
+
76
+ # Returns a vector, based on a string with a calculation based
77
+ # on vector.
78
+ #
79
+ # The calculation will be eval'ed, so you can put any variable
80
+ # or expression valid on ruby.
81
+ #
82
+ # For example:
83
+ # a = DaruLite::Vector.new [1,2]
84
+ # b = DaruLite::Vector.new [3,4]
85
+ # ds = DaruLite::DataFrame.new({:a => a,:b => b})
86
+ # ds.compute("a+b")
87
+ # => Vector [4,6]
88
+ def compute(text, &block)
89
+ return instance_eval(&block) if block
90
+
91
+ instance_eval(text)
92
+ end
93
+
94
+ # DSL for yielding each row and returning a DaruLite::Vector based on the
95
+ # value each run of the block returns.
96
+ #
97
+ # == Usage
98
+ #
99
+ # a1 = DaruLite::Vector.new([1, 2, 3, 4, 5, 6, 7])
100
+ # a2 = DaruLite::Vector.new([10, 20, 30, 40, 50, 60, 70])
101
+ # a3 = DaruLite::Vector.new([100, 200, 300, 400, 500, 600, 700])
102
+ # ds = DaruLite::DataFrame.new({ :a => a1, :b => a2, :c => a3 })
103
+ # total = ds.vector_by_calculation { a + b + c }
104
+ # # <DaruLite::Vector:82314050 @name = nil @size = 7 >
105
+ # # nil
106
+ # # 0 111
107
+ # # 1 222
108
+ # # 2 333
109
+ # # 3 444
110
+ # # 4 555
111
+ # # 5 666
112
+ # # 6 777
113
+ def vector_by_calculation(&block)
114
+ a = each_row.map { |r| r.instance_eval(&block) }
115
+
116
+ DaruLite::Vector.new a, index: @index
117
+ end
118
+
119
+ def vector_count_characters(vecs = nil)
120
+ vecs ||= @vectors.to_a
121
+
122
+ collect_rows do |row|
123
+ vecs.sum { |v| row[v].to_s.size }
124
+ end
125
+ end
126
+
127
+ # Generate a summary of this DataFrame based on individual vectors in the DataFrame
128
+ # @return [String] String containing the summary of the DataFrame
129
+ def summary
130
+ summary = "= #{name}"
131
+ summary << "\n Number of rows: #{nrows}"
132
+ @vectors.each do |v|
133
+ summary << "\n Element:[#{v}]\n"
134
+ summary << self[v].summary(1)
135
+ end
136
+ summary
137
+ end
138
+ end
139
+ end
140
+ end
@@ -0,0 +1,107 @@
1
+ module DaruLite
2
+ class DataFrame
3
+ module Convertible
4
+ # Create a sql, basen on a given Dataset
5
+ #
6
+ # == Arguments
7
+ #
8
+ # * table - String specifying name of the table that will created in SQL.
9
+ # * charset - Character set. Default is "UTF8".
10
+ #
11
+ # @example
12
+ #
13
+ # ds = DaruLite::DataFrame.new({
14
+ # :id => DaruLite::Vector.new([1,2,3,4,5]),
15
+ # :name => DaruLite::Vector.new(%w{Alex Peter Susan Mary John})
16
+ # })
17
+ # ds.create_sql('names')
18
+ # #=>"CREATE TABLE names (id INTEGER,\n name VARCHAR (255)) CHARACTER SET=UTF8;"
19
+ #
20
+ def create_sql(table, charset = 'UTF8')
21
+ sql = "CREATE TABLE #{table} ("
22
+ fields = vectors.to_a.collect do |f|
23
+ v = self[f]
24
+ "#{f} #{v.db_type}"
25
+ end
26
+
27
+ sql + fields.join(",\n ") + ") CHARACTER SET=#{charset};"
28
+ end
29
+
30
+ # Returns the dataframe. This can be convenient when the user does not
31
+ # know whether the object is a vector or a dataframe.
32
+ # @return [self] the dataframe
33
+ def to_df
34
+ self
35
+ end
36
+
37
+ # Convert all vectors of type *:numeric* into a Matrix.
38
+ def to_matrix
39
+ Matrix.columns each_vector.select(&:numeric?).map(&:to_a)
40
+ end
41
+
42
+ # Converts the DataFrame into an array of hashes where key is vector name
43
+ # and value is the corresponding element. The 0th index of the array contains
44
+ # the array of hashes while the 1th index contains the indexes of each row
45
+ # of the dataframe. Each element in the index array corresponds to its row
46
+ # in the array of hashes, which has the same index.
47
+ def to_a
48
+ [each_row.map(&:to_h), @index.to_a]
49
+ end
50
+
51
+ # Convert to json. If no_index is false then the index will NOT be included
52
+ # in the JSON thus created.
53
+ def to_json(no_index = true)
54
+ if no_index
55
+ to_a[0].to_json
56
+ else
57
+ to_a.to_json
58
+ end
59
+ end
60
+
61
+ # Converts DataFrame to a hash (explicit) with keys as vector names and values as
62
+ # the corresponding vectors.
63
+ def to_h
64
+ @vectors
65
+ .each_with_index
66
+ .map { |vec_name, idx| [vec_name, @data[idx]] }.to_h
67
+ end
68
+
69
+ # Convert to html for IRuby.
70
+ def to_html(threshold = DaruLite.max_rows)
71
+ table_thead = to_html_thead
72
+ table_tbody = to_html_tbody(threshold)
73
+ path = if index.is_a?(MultiIndex)
74
+ File.expand_path('../iruby/templates/dataframe_mi.html.erb', __dir__)
75
+ else
76
+ File.expand_path('../iruby/templates/dataframe.html.erb', __dir__)
77
+ end
78
+ ERB.new(File.read(path).strip).result(binding)
79
+ end
80
+
81
+ def to_html_thead
82
+ table_thead_path =
83
+ if index.is_a?(MultiIndex)
84
+ File.expand_path('../iruby/templates/dataframe_mi_thead.html.erb', __dir__)
85
+ else
86
+ File.expand_path('../iruby/templates/dataframe_thead.html.erb', __dir__)
87
+ end
88
+ ERB.new(File.read(table_thead_path).strip).result(binding)
89
+ end
90
+
91
+ def to_html_tbody(threshold = DaruLite.max_rows)
92
+ threshold ||= @size
93
+ table_tbody_path =
94
+ if index.is_a?(MultiIndex)
95
+ File.expand_path('../iruby/templates/dataframe_mi_tbody.html.erb', __dir__)
96
+ else
97
+ File.expand_path('../iruby/templates/dataframe_tbody.html.erb', __dir__)
98
+ end
99
+ ERB.new(File.read(table_tbody_path).strip).result(binding)
100
+ end
101
+
102
+ def to_s
103
+ "#<#{self.class}#{": #{@name}" if @name}(#{nrows}x#{ncols})>"
104
+ end
105
+ end
106
+ end
107
+ end
@@ -0,0 +1,64 @@
1
+ module DaruLite
2
+ class DataFrame
3
+ module Duplicatable
4
+ extend Gem::Deprecate
5
+
6
+ # Duplicate the DataFrame entirely.
7
+ #
8
+ # == Arguments
9
+ #
10
+ # * +vectors_to_dup+ - An Array specifying the names of Vectors to
11
+ # be duplicated. Will duplicate the entire DataFrame if not specified.
12
+ def dup(vectors_to_dup = nil)
13
+ vectors_to_dup ||= @vectors.to_a
14
+
15
+ src = vectors_to_dup.map { |vec| @data[@vectors.pos(vec)].dup }
16
+ new_order = DaruLite::Index.new(vectors_to_dup)
17
+
18
+ DaruLite::DataFrame.new src, order: new_order, index: @index.dup, name: @name, clone: true
19
+ end
20
+
21
+ # Only clone the structure of the DataFrame.
22
+ def clone_structure
23
+ DaruLite::DataFrame.new([], order: @vectors.dup, index: @index.dup, name: @name)
24
+ end
25
+
26
+ # Returns a 'view' of the DataFrame, i.e the object ID's of vectors are
27
+ # preserved.
28
+ #
29
+ # == Arguments
30
+ #
31
+ # +vectors_to_clone+ - Names of vectors to clone. Optional. Will return
32
+ # a view of the whole data frame otherwise.
33
+ def clone(*vectors_to_clone)
34
+ vectors_to_clone.flatten! if ArrayHelper.array_of?(vectors_to_clone, Array)
35
+ vectors_to_clone = @vectors.to_a if vectors_to_clone.empty?
36
+
37
+ h = vectors_to_clone.map { |vec| [vec, self[vec]] }.to_h
38
+ DaruLite::DataFrame.new(h, clone: false, order: vectors_to_clone, name: @name)
39
+ end
40
+
41
+ # Returns a 'shallow' copy of DataFrame if missing data is not present,
42
+ # or a full copy of only valid data if missing data is present.
43
+ def clone_only_valid
44
+ if include_values?(*DaruLite::MISSING_VALUES)
45
+ reject_values(*DaruLite::MISSING_VALUES)
46
+ else
47
+ clone
48
+ end
49
+ end
50
+
51
+ # Creates a new duplicate dataframe containing only rows
52
+ # without a single missing value.
53
+ def dup_only_valid(vecs = nil)
54
+ rows_with_nil = @data.map { |vec| vec.indexes(*DaruLite::MISSING_VALUES) }
55
+ .inject(&:concat)
56
+ .uniq
57
+
58
+ row_indexes = @index.to_a
59
+ (vecs.nil? ? self : dup(vecs)).row[*(row_indexes - rows_with_nil)]
60
+ end
61
+ deprecate :dup_only_valid, :reject_values, 2016, 10
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,301 @@
1
+ module DaruLite
2
+ class DataFrame
3
+ module Fetchable
4
+ # Access row or vector. Specify name of row/vector followed by axis(:row, :vector).
5
+ # Defaults to *:vector*. Use of this method is not recommended for accessing
6
+ # rows. Use df.row[:a] for accessing row with index ':a'.
7
+ def [](*names)
8
+ axis = extract_axis(names, :vector)
9
+ dispatch_to_axis axis, :access, *names
10
+ end
11
+
12
+ # Retrive rows by positions
13
+ # @param [Array<Integer>] positions of rows to retrive
14
+ # @return [DaruLite::Vector, DaruLite::DataFrame] vector for single position and dataframe for multiple positions
15
+ # @example
16
+ # df = DaruLite::DataFrame.new({
17
+ # a: [1, 2, 3],
18
+ # b: ['a', 'b', 'c']
19
+ # })
20
+ # df.row_at 1, 2
21
+ # # => #<DaruLite::DataFrame(2x2)>
22
+ # # a b
23
+ # # 1 2 b
24
+ # # 2 3 c
25
+ def row_at(*positions)
26
+ original_positions = positions
27
+ positions = coerce_positions(*positions, nrows)
28
+ validate_positions(*positions, nrows)
29
+
30
+ if positions.is_a? Integer
31
+ row = get_rows_for([positions])
32
+ DaruLite::Vector.new(row, index: @vectors, name: @index.at(positions))
33
+ else
34
+ new_rows = get_rows_for(original_positions)
35
+ DaruLite::DataFrame.new(
36
+ new_rows,
37
+ index: @index.at(*original_positions),
38
+ order: @vectors,
39
+ name: @name
40
+ )
41
+ end
42
+ end
43
+
44
+ # Retrive vectors by positions
45
+ # @param [Array<Integer>] positions of vectors to retrive
46
+ # @return [DaruLite::Vector, DaruLite::DataFrame] vector for single position and dataframe for multiple positions
47
+ # @example
48
+ # df = DaruLite::DataFrame.new({
49
+ # a: [1, 2, 3],
50
+ # b: ['a', 'b', 'c']
51
+ # })
52
+ # df.at 0
53
+ # # => #<DaruLite::Vector(3)>
54
+ # # a
55
+ # # 0 1
56
+ # # 1 2
57
+ # # 2 3
58
+ def at(*positions)
59
+ if AXES.include? positions.last
60
+ axis = positions.pop
61
+ return row_at(*positions) if axis == :row
62
+ end
63
+
64
+ original_positions = positions
65
+ positions = coerce_positions(*positions, ncols)
66
+ validate_positions(*positions, ncols)
67
+
68
+ if positions.is_a? Integer
69
+ @data[positions].dup
70
+ else
71
+ DaruLite::DataFrame.new positions.map { |pos| @data[pos].dup },
72
+ index: @index,
73
+ order: @vectors.at(*original_positions),
74
+ name: @name
75
+ end
76
+ end
77
+
78
+ # The first ten elements of the DataFrame
79
+ #
80
+ # @param [Fixnum] quantity (10) The number of elements to display from the top.
81
+ def head(quantity = 10)
82
+ row.at 0..(quantity - 1)
83
+ end
84
+ alias first head
85
+
86
+ # The last ten elements of the DataFrame
87
+ #
88
+ # @param [Fixnum] quantity (10) The number of elements to display from the bottom.
89
+ def tail(quantity = 10)
90
+ start = [-quantity, -size].max
91
+ row.at start..-1
92
+ end
93
+ alias last tail
94
+
95
+ # Extract a dataframe given row indexes or positions
96
+ # @param keys [Array] can be positions (if by_position is true) or indexes (if by_position if false)
97
+ # @return [DaruLite::Dataframe]
98
+ def get_sub_dataframe(keys, by_position: true)
99
+ return DaruLite::DataFrame.new({}) if keys == []
100
+
101
+ keys = @index.pos(*keys) unless by_position
102
+
103
+ sub_df = row_at(*keys)
104
+ sub_df = sub_df.to_df.transpose if sub_df.is_a?(DaruLite::Vector)
105
+
106
+ sub_df
107
+ end
108
+
109
+ def get_vector_anyways(v)
110
+ @vectors.include?(v) ? self[v].to_a : Array.new(size)
111
+ end
112
+
113
+ # @param indexes [Array] index(s) at which row tuples are retrieved
114
+ # @return [Array] returns array of row tuples at given index(s)
115
+ # @example Using DaruLite::Index
116
+ # df = DaruLite::DataFrame.new({
117
+ # a: [1, 2, 3],
118
+ # b: ['a', 'a', 'b']
119
+ # })
120
+ #
121
+ # df.access_row_tuples_by_indexs(1,2)
122
+ # # => [[2, "a"], [3, "b"]]
123
+ #
124
+ # df.index = DaruLite::Index.new([:one,:two,:three])
125
+ # df.access_row_tuples_by_indexs(:one,:three)
126
+ # # => [[1, "a"], [3, "b"]]
127
+ #
128
+ # @example Using DaruLite::MultiIndex
129
+ # mi_idx = DaruLite::MultiIndex.from_tuples [
130
+ # [:a,:one,:bar],
131
+ # [:a,:one,:baz],
132
+ # [:b,:two,:bar],
133
+ # [:a,:two,:baz],
134
+ # ]
135
+ # df_mi = DaruLite::DataFrame.new({
136
+ # a: 1..4,
137
+ # b: 'a'..'d'
138
+ # }, index: mi_idx )
139
+ #
140
+ # df_mi.access_row_tuples_by_indexs(:b, :two, :bar)
141
+ # # => [[3, "c"]]
142
+ # df_mi.access_row_tuples_by_indexs(:a)
143
+ # # => [[1, "a"], [2, "b"], [4, "d"]]
144
+ def access_row_tuples_by_indexs(*indexes)
145
+ return get_sub_dataframe(indexes, by_position: false).map_rows(&:to_a) if
146
+ @index.is_a?(DaruLite::MultiIndex)
147
+
148
+ positions = @index.pos(*indexes)
149
+ if positions.is_a? Numeric
150
+ row = get_rows_for([positions])
151
+ row.first.is_a?(Array) ? row : [row]
152
+ else
153
+ new_rows = get_rows_for(indexes, by_position: false)
154
+ indexes.map { |index| new_rows.map { |r| r[index] } }
155
+ end
156
+ end
157
+
158
+ # Split the dataframe into many dataframes based on category vector
159
+ # @param [object] cat_name name of category vector to split the dataframe
160
+ # @return [Array] array of dataframes split by category with category vector
161
+ # used to split not included
162
+ # @example
163
+ # df = DaruLite::DataFrame.new({
164
+ # a: [1, 2, 3],
165
+ # b: ['a', 'a', 'b']
166
+ # })
167
+ # df.to_category :b
168
+ # df.split_by_category :b
169
+ # # => [#<DaruLite::DataFrame: a (2x1)>
170
+ # # a
171
+ # # 0 1
172
+ # # 1 2,
173
+ # # #<DaruLite::DataFrame: b (1x1)>
174
+ # # a
175
+ # # 2 3]
176
+ def split_by_category(cat_name)
177
+ cat_dv = self[cat_name]
178
+ raise ArgumentError, "#{cat_name} is not a category vector" unless
179
+ cat_dv.category?
180
+
181
+ cat_dv.categories.map do |cat|
182
+ where(cat_dv.eq cat)
183
+ .rename(cat)
184
+ .delete_vector cat_name
185
+ end
186
+ end
187
+
188
+ # Return the indexes of all the numeric vectors. Will include vectors with nils
189
+ # alongwith numbers.
190
+ def numeric_vectors
191
+ # FIXME: Why _with_index ?..
192
+ each_vector_with_index
193
+ .select { |vec, _i| vec.numeric? }
194
+ .map(&:last)
195
+ end
196
+
197
+ def numeric_vector_names
198
+ @vectors.select { |v| self[v].numeric? }
199
+ end
200
+
201
+ # Return a DataFrame of only the numerical Vectors. If clone: false
202
+ # is specified as option, only a *view* of the Vectors will be
203
+ # returned. Defaults to clone: true.
204
+ def only_numerics(opts = {})
205
+ cln = opts[:clone] != false
206
+ arry = numeric_vectors.map { |v| self[v] }
207
+
208
+ order = Index.new(numeric_vectors)
209
+ DaruLite::DataFrame.new(arry, clone: cln, order: order, index: @index)
210
+ end
211
+
212
+ private
213
+
214
+ def access_vector(*names)
215
+ if names.first.is_a?(Range)
216
+ dup(@vectors.subset(names.first))
217
+ elsif @vectors.is_a?(MultiIndex)
218
+ access_vector_multi_index(*names)
219
+ else
220
+ access_vector_single_index(*names)
221
+ end
222
+ end
223
+
224
+ def access_vector_multi_index(*names)
225
+ pos = @vectors[names]
226
+
227
+ return @data[pos] if pos.is_a?(Integer)
228
+
229
+ new_vectors = pos.map { |tuple| @data[@vectors[tuple]] }
230
+
231
+ pos = pos.drop_left_level(names.size) if names.size < @vectors.width
232
+
233
+ DaruLite::DataFrame.new(new_vectors, index: @index, order: pos)
234
+ end
235
+
236
+ def access_vector_single_index(*names)
237
+ if names.count < 2
238
+ begin
239
+ pos = @vectors.is_a?(DaruLite::DateTimeIndex) ? @vectors[names.first] : @vectors.pos(names.first)
240
+ rescue IndexError
241
+ raise IndexError, "Specified vector #{names.first} does not exist"
242
+ end
243
+ return @data[pos] if pos.is_a?(Numeric)
244
+
245
+ names = pos
246
+ end
247
+
248
+ new_vectors = names.map { |name| [name, @data[@vectors.pos(name)]] }.to_h
249
+
250
+ order = names.is_a?(Array) ? DaruLite::Index.new(names) : names
251
+ DaruLite::DataFrame.new(new_vectors, order: order, index: @index, name: @name)
252
+ end
253
+
254
+ def access_row(*indexes)
255
+ positions = @index.pos(*indexes)
256
+
257
+ if positions.is_a? Numeric
258
+ row = get_rows_for([positions])
259
+ DaruLite::Vector.new row, index: @vectors, name: indexes.first
260
+ else
261
+ new_rows = get_rows_for(indexes, by_position: false)
262
+ DaruLite::DataFrame.new new_rows, index: @index.subset(*indexes), order: @vectors
263
+ end
264
+ end
265
+
266
+ # @param keys [Array] can be an array of positions (if by_position is true) or indexes (if by_position if false)
267
+ # because of coercion by DaruLite::Vector#at and DaruLite::Vector#[], can return either an Array of
268
+ # values (representing a row) or an array of Vectors (that can be seen as rows)
269
+ def get_rows_for(keys, by_position: true)
270
+ raise unless keys.is_a?(Array)
271
+
272
+ if by_position
273
+ pos = keys
274
+ @data.map { |vector| vector.at(*pos) }
275
+ else
276
+ # TODO: for now (2018-07-27), it is different than using
277
+ # get_rows_for(@index.pos(*keys))
278
+ # because DaruLite::Vector#at and DaruLite::Vector#[] don't handle DaruLite::MultiIndex the same way
279
+ indexes = keys
280
+ @data.map { |vec| vec[*indexes] }
281
+ end
282
+ end
283
+
284
+ # coerce ranges, integers and array in appropriate ways
285
+ def coerce_positions(*positions, size)
286
+ if positions.size == 1
287
+ case positions.first
288
+ when Integer
289
+ positions.first
290
+ when Range
291
+ size.times.to_a[positions.first]
292
+ else
293
+ raise ArgumentError, 'Unknown position type.'
294
+ end
295
+ else
296
+ positions
297
+ end
298
+ end
299
+ end
300
+ end
301
+ end