daru 0.1.3.1 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/.rspec +2 -1
- data/.rspec_formatter.rb +33 -0
- data/.rubocop.yml +26 -2
- data/History.md +38 -0
- data/README.md +22 -13
- data/Rakefile +50 -2
- data/benchmarks/csv_reading.rb +22 -0
- data/daru.gemspec +9 -2
- data/lib/daru.rb +36 -4
- data/lib/daru/accessors/array_wrapper.rb +6 -1
- data/lib/daru/accessors/dataframe_by_row.rb +10 -2
- data/lib/daru/accessors/gsl_wrapper.rb +1 -3
- data/lib/daru/accessors/nmatrix_wrapper.rb +9 -0
- data/lib/daru/category.rb +935 -0
- data/lib/daru/core/group_by.rb +29 -38
- data/lib/daru/core/merge.rb +186 -145
- data/lib/daru/core/query.rb +22 -11
- data/lib/daru/dataframe.rb +976 -885
- data/lib/daru/date_time/index.rb +166 -166
- data/lib/daru/date_time/offsets.rb +66 -77
- data/lib/daru/formatters/table.rb +54 -0
- data/lib/daru/helpers/array.rb +40 -0
- data/lib/daru/index.rb +476 -73
- data/lib/daru/io/io.rb +66 -45
- data/lib/daru/io/sql_data_source.rb +33 -62
- data/lib/daru/iruby/helpers.rb +38 -0
- data/lib/daru/iruby/templates/dataframe.html.erb +52 -0
- data/lib/daru/iruby/templates/dataframe_mi.html.erb +58 -0
- data/lib/daru/iruby/templates/multi_index.html.erb +12 -0
- data/lib/daru/iruby/templates/vector.html.erb +27 -0
- data/lib/daru/iruby/templates/vector_mi.html.erb +36 -0
- data/lib/daru/maths/arithmetic/dataframe.rb +16 -18
- data/lib/daru/maths/arithmetic/vector.rb +4 -6
- data/lib/daru/maths/statistics/dataframe.rb +8 -15
- data/lib/daru/maths/statistics/vector.rb +120 -98
- data/lib/daru/monkeys.rb +12 -40
- data/lib/daru/plotting/gruff.rb +3 -0
- data/lib/daru/plotting/gruff/category.rb +49 -0
- data/lib/daru/plotting/gruff/dataframe.rb +91 -0
- data/lib/daru/plotting/gruff/vector.rb +57 -0
- data/lib/daru/plotting/nyaplot.rb +3 -0
- data/lib/daru/plotting/nyaplot/category.rb +34 -0
- data/lib/daru/plotting/nyaplot/dataframe.rb +187 -0
- data/lib/daru/plotting/nyaplot/vector.rb +46 -0
- data/lib/daru/vector.rb +694 -421
- data/lib/daru/version.rb +1 -1
- data/profile/_base.rb +23 -0
- data/profile/df_to_a.rb +10 -0
- data/profile/filter.rb +13 -0
- data/profile/joining.rb +13 -0
- data/profile/sorting.rb +12 -0
- data/profile/vector_each_with_index.rb +9 -0
- data/spec/accessors/wrappers_spec.rb +2 -4
- data/spec/categorical_spec.rb +1734 -0
- data/spec/core/group_by_spec.rb +52 -2
- data/spec/core/merge_spec.rb +63 -2
- data/spec/core/query_spec.rb +236 -80
- data/spec/dataframe_spec.rb +1373 -79
- data/spec/date_time/data_spec.rb +3 -5
- data/spec/date_time/index_spec.rb +154 -17
- data/spec/date_time/offsets_spec.rb +3 -4
- data/spec/fixtures/empties.dat +2 -0
- data/spec/fixtures/strings.dat +2 -0
- data/spec/formatters/table_formatter_spec.rb +99 -0
- data/spec/helpers_spec.rb +8 -0
- data/spec/index/categorical_index_spec.rb +168 -0
- data/spec/index/index_spec.rb +283 -0
- data/spec/index/multi_index_spec.rb +570 -0
- data/spec/io/io_spec.rb +31 -4
- data/spec/io/sql_data_source_spec.rb +0 -1
- data/spec/iruby/dataframe_spec.rb +172 -0
- data/spec/iruby/helpers_spec.rb +49 -0
- data/spec/iruby/multi_index_spec.rb +37 -0
- data/spec/iruby/vector_spec.rb +107 -0
- data/spec/math/arithmetic/dataframe_spec.rb +71 -13
- data/spec/math/arithmetic/vector_spec.rb +8 -10
- data/spec/math/statistics/dataframe_spec.rb +3 -5
- data/spec/math/statistics/vector_spec.rb +45 -55
- data/spec/monkeys_spec.rb +32 -9
- data/spec/plotting/dataframe_spec.rb +386 -0
- data/spec/plotting/vector_spec.rb +230 -0
- data/spec/shared/vector_display_spec.rb +215 -0
- data/spec/spec_helper.rb +23 -0
- data/spec/vector_spec.rb +905 -138
- metadata +143 -11
- data/.rubocop_todo.yml +0 -44
- data/lib/daru/plotting/dataframe.rb +0 -104
- data/lib/daru/plotting/vector.rb +0 -38
- data/spec/daru_spec.rb +0 -58
- data/spec/index_spec.rb +0 -375
@@ -54,6 +54,11 @@ module Daru
|
|
54
54
|
set_size
|
55
55
|
end
|
56
56
|
|
57
|
+
def fill(*arg)
|
58
|
+
@data.fill(*arg)
|
59
|
+
set_size
|
60
|
+
end
|
61
|
+
|
57
62
|
def uniq
|
58
63
|
@data.uniq
|
59
64
|
end
|
@@ -67,7 +72,7 @@ module Daru
|
|
67
72
|
end
|
68
73
|
|
69
74
|
def compact
|
70
|
-
@data -
|
75
|
+
@data - Daru::MISSING_VALUES
|
71
76
|
end
|
72
77
|
|
73
78
|
def mean
|
@@ -9,8 +9,16 @@ module Daru
|
|
9
9
|
@data_frame[*names, :row]
|
10
10
|
end
|
11
11
|
|
12
|
-
def []=(
|
13
|
-
@data_frame[
|
12
|
+
def []=(*names, vector)
|
13
|
+
@data_frame[*names, :row] = vector
|
14
|
+
end
|
15
|
+
|
16
|
+
def at *positions
|
17
|
+
@data_frame.row_at(*positions)
|
18
|
+
end
|
19
|
+
|
20
|
+
def set_at positions, vector
|
21
|
+
@data_frame.set_row_at(positions, vector)
|
14
22
|
end
|
15
23
|
end
|
16
24
|
end
|
@@ -61,9 +61,7 @@ module Daru
|
|
61
61
|
attr_reader :data
|
62
62
|
|
63
63
|
def compact
|
64
|
-
|
65
|
-
missing = @context.missing_values rescue []
|
66
|
-
::GSL::Vector.alloc(@data.to_a - missing.map(&:to_f))
|
64
|
+
::GSL::Vector.alloc(@data.to_a - [Float::NAN])
|
67
65
|
end
|
68
66
|
|
69
67
|
[:mean, :min, :max, :prod, :sum].each do |method|
|
@@ -14,9 +14,12 @@ module Daru
|
|
14
14
|
self
|
15
15
|
end
|
16
16
|
|
17
|
+
# :nocov:
|
18
|
+
# FIXME: not sure, why this kind of wrapper have such a pure coverage
|
17
19
|
def inject(*args, &block)
|
18
20
|
@data[0...@size].inject(*args, &block)
|
19
21
|
end
|
22
|
+
# :nocov:
|
20
23
|
|
21
24
|
attr_reader :size, :data, :nm_dtype
|
22
25
|
|
@@ -43,9 +46,11 @@ module Daru
|
|
43
46
|
@data[index] = value
|
44
47
|
end
|
45
48
|
|
49
|
+
# :nocov:
|
46
50
|
def == other
|
47
51
|
@data[0...@size] == other[0...@size] and @size == other.size
|
48
52
|
end
|
53
|
+
# :nocov:
|
49
54
|
|
50
55
|
def delete_at index
|
51
56
|
arry = @data.to_a
|
@@ -58,10 +63,12 @@ module Daru
|
|
58
63
|
@data.to_a.index key
|
59
64
|
end
|
60
65
|
|
66
|
+
# :nocov:
|
61
67
|
def << element
|
62
68
|
resize if @size >= @data.size
|
63
69
|
self[@size] = element
|
64
70
|
end
|
71
|
+
# :nocov:
|
65
72
|
|
66
73
|
def to_a
|
67
74
|
@data[0...@size].to_a
|
@@ -77,6 +84,7 @@ module Daru
|
|
77
84
|
@data = NMatrix.new [size], @data.to_a, dtype: @nm_dtype
|
78
85
|
end
|
79
86
|
|
87
|
+
# :nocov:
|
80
88
|
def mean
|
81
89
|
@data[0...@size].mean.first
|
82
90
|
end
|
@@ -96,6 +104,7 @@ module Daru
|
|
96
104
|
def min
|
97
105
|
@data[0...@size].min
|
98
106
|
end
|
107
|
+
# :nocov:
|
99
108
|
end
|
100
109
|
end
|
101
110
|
end if Daru.has_nmatrix?
|
@@ -0,0 +1,935 @@
|
|
1
|
+
module Daru
|
2
|
+
module Category # rubocop:disable Metrics/ModuleLength
|
3
|
+
attr_accessor :base_category
|
4
|
+
attr_reader :index, :coding_scheme, :name
|
5
|
+
|
6
|
+
# For debuggin. To be removed
|
7
|
+
attr_reader :array, :cat_hash, :map_int_cat
|
8
|
+
|
9
|
+
# Initializes a vector to store categorical data.
|
10
|
+
# @note Base category is set to the first category encountered in the vector.
|
11
|
+
# @param [Array] data the categorical data
|
12
|
+
# @param [Hash] opts the options
|
13
|
+
# @option opts [Boolean] :ordered true if data is ordered, false otherwise
|
14
|
+
# @option opts [Array] :categories categories to associate with the vector.
|
15
|
+
# It add extra categories if specified and provides order of categories also.
|
16
|
+
# @option opts [object] :index gives index to vector. By default its from 0 to size-1
|
17
|
+
# @return the categorical data created
|
18
|
+
# @example
|
19
|
+
# dv = Daru::Vector.new [:a, 1, :a, 1, :c],
|
20
|
+
# type: :category,
|
21
|
+
# ordered: true,
|
22
|
+
# categories: [:a, :b, :c, 1]
|
23
|
+
# # => #<Daru::Vector(5)>
|
24
|
+
# # 0 a
|
25
|
+
# # 1 1
|
26
|
+
# # 2 a
|
27
|
+
# # 3 1
|
28
|
+
# # 4 c
|
29
|
+
def initialize_category data, opts={}
|
30
|
+
@type = :category
|
31
|
+
initialize_core_attributes data
|
32
|
+
|
33
|
+
if opts[:categories]
|
34
|
+
validate_categories(opts[:categories])
|
35
|
+
add_extra_categories(opts[:categories] - categories)
|
36
|
+
order_with opts[:categories]
|
37
|
+
end
|
38
|
+
|
39
|
+
# Specify if the categories are ordered or not.
|
40
|
+
# By default its unordered
|
41
|
+
@ordered = opts[:ordered] || false
|
42
|
+
|
43
|
+
# The coding scheme to code with. Default is dummy coding.
|
44
|
+
@coding_scheme = :dummy
|
45
|
+
|
46
|
+
# Base category which won't be present in the coding
|
47
|
+
@base_category = @cat_hash.keys.first
|
48
|
+
|
49
|
+
# Stores the name of the vector
|
50
|
+
@name = opts[:name]
|
51
|
+
|
52
|
+
# Index of the vector
|
53
|
+
@index = coerce_index opts[:index]
|
54
|
+
|
55
|
+
self
|
56
|
+
end
|
57
|
+
|
58
|
+
def name= new_name
|
59
|
+
@name = new_name
|
60
|
+
self
|
61
|
+
end
|
62
|
+
|
63
|
+
def plotting_library= lib
|
64
|
+
case lib
|
65
|
+
when :gruff, :nyaplot
|
66
|
+
@plotting_library = lib
|
67
|
+
extend Module.const_get(
|
68
|
+
"Daru::Plotting::Category::#{lib.to_s.capitalize}Library"
|
69
|
+
) if Daru.send("has_#{lib}?".to_sym)
|
70
|
+
else
|
71
|
+
raise ArguementError, "Plotting library #{lib} not supported. "\
|
72
|
+
'Supported libraries are :nyaplot and :gruff'
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
alias_method :rename, :name=
|
77
|
+
|
78
|
+
# Returns an enumerator that enumerates on categorical data
|
79
|
+
# @return [Enumerator] an enumerator that enumerates over data stored in vector
|
80
|
+
def each
|
81
|
+
return enum_for(:each) unless block_given?
|
82
|
+
@array.each { |pos| yield cat_from_int pos }
|
83
|
+
self
|
84
|
+
end
|
85
|
+
|
86
|
+
# Returns all categorical data
|
87
|
+
# @return [Array] array of all categorical data which vector is storing
|
88
|
+
# @example
|
89
|
+
# dv = Daru::Vector.new [:a, 1, :a, 1, :c], type: :category
|
90
|
+
# dv.to_a
|
91
|
+
# # => [:a, 1, :a, 1, :c]
|
92
|
+
def to_a
|
93
|
+
each.to_a
|
94
|
+
end
|
95
|
+
|
96
|
+
# Duplicated a vector
|
97
|
+
# @return [Daru::Vector] duplicated vector
|
98
|
+
# @example
|
99
|
+
# dv = Daru::Vector.new [:a, 1, :a, 1, :c], type: :category
|
100
|
+
# dv.dup
|
101
|
+
# # => #<Daru::Vector(5)>
|
102
|
+
# # 0 a
|
103
|
+
# # 1 1
|
104
|
+
# # 2 a
|
105
|
+
# # 3 1
|
106
|
+
# # 4 c
|
107
|
+
def dup
|
108
|
+
Daru::Vector.new to_a.dup,
|
109
|
+
name: @name,
|
110
|
+
index: @index.dup,
|
111
|
+
type: :category,
|
112
|
+
categories: categories,
|
113
|
+
ordered: ordered?
|
114
|
+
end
|
115
|
+
|
116
|
+
# Associates a category to the vector.
|
117
|
+
# @param [Array] *new_categories new categories to be associated
|
118
|
+
# @example
|
119
|
+
# dv = Daru::Vector.new [:a, 1, :a, 1, :c], type: :category
|
120
|
+
# dv.add_category :b
|
121
|
+
# dv.categories
|
122
|
+
# # => [:a, :b, :c, 1]
|
123
|
+
def add_category(*new_categories)
|
124
|
+
new_categories -= categories
|
125
|
+
add_extra_categories new_categories
|
126
|
+
end
|
127
|
+
|
128
|
+
# Returns frequency of given category
|
129
|
+
# @param [object] category given category whose count has to be founded
|
130
|
+
# @return count/frequency of given category
|
131
|
+
# @example
|
132
|
+
# dv = Daru::Vector.new [:a, 1, :a, 1, :c], type: :category
|
133
|
+
# dv.count :a
|
134
|
+
# # => 2
|
135
|
+
def count category
|
136
|
+
raise ArgumentError, "Invalid category #{category}" unless
|
137
|
+
categories.include?(category)
|
138
|
+
|
139
|
+
@cat_hash[category].size
|
140
|
+
end
|
141
|
+
|
142
|
+
# Returns a vector storing count/frequency of each category
|
143
|
+
# @return [Daru::Vector] Return a vector whose indexes are categories
|
144
|
+
# and corresponding values are its count
|
145
|
+
# @example
|
146
|
+
# dv = Daru::Vector.new [:a, 1, :a, 1, :c], type: :category
|
147
|
+
# dv.frequencies
|
148
|
+
# # => #<Daru::Vector(4)>
|
149
|
+
# # a 2
|
150
|
+
# # b 0
|
151
|
+
# # c 1
|
152
|
+
# # 1 2
|
153
|
+
def frequencies type=:count
|
154
|
+
counts = @cat_hash.values.map(&:size)
|
155
|
+
values =
|
156
|
+
case type
|
157
|
+
when :count
|
158
|
+
counts
|
159
|
+
when :fraction
|
160
|
+
counts.map { |c| c / size.to_f }
|
161
|
+
when :percentage
|
162
|
+
counts.map { |c| c / size.to_f * 100 }
|
163
|
+
end
|
164
|
+
Daru::Vector.new values, index: categories, name: name
|
165
|
+
end
|
166
|
+
|
167
|
+
# Returns vector for indexes/positions specified
|
168
|
+
# @param [Array] *indexes indexes/positions for which values has to be retrived
|
169
|
+
# @note Since it accepts both indexes and postions. In case of collision,
|
170
|
+
# arguement will be treated as index
|
171
|
+
# @return vector containing values specified at specified indexes/positions
|
172
|
+
# @example
|
173
|
+
# dv = Daru::Vector.new [:a, 1, :a, 1, :c],
|
174
|
+
# type: :category,
|
175
|
+
# index: 'a'..'e'
|
176
|
+
# dv[:a, 1]
|
177
|
+
# # => #<Daru::Vector(2)>
|
178
|
+
# # a a
|
179
|
+
# # b 1
|
180
|
+
# dv[0, 1]
|
181
|
+
# # => #<Daru::Vector(2)>
|
182
|
+
# # a a
|
183
|
+
# # b 1
|
184
|
+
def [] *indexes
|
185
|
+
positions = @index.pos(*indexes)
|
186
|
+
return category_from_position(positions) if positions.is_a? Integer
|
187
|
+
|
188
|
+
Daru::Vector.new positions.map { |pos| category_from_position pos },
|
189
|
+
index: @index.subset(*indexes),
|
190
|
+
name: @name,
|
191
|
+
type: :category,
|
192
|
+
ordered: @ordered,
|
193
|
+
categories: categories
|
194
|
+
end
|
195
|
+
|
196
|
+
# Returns vector for positions specified.
|
197
|
+
# @param [Array] *positions positions at which values to be retrived.
|
198
|
+
# @return vector containing values specified at specified positions
|
199
|
+
# @example
|
200
|
+
# dv = Daru::Vector.new [:a, 1, :a, 1, :c], type: :category
|
201
|
+
# dv.at 0..-2
|
202
|
+
# # => #<Daru::Vector(4)>
|
203
|
+
# # 0 a
|
204
|
+
# # 1 1
|
205
|
+
# # 2 a
|
206
|
+
# # 3 1
|
207
|
+
def at *positions
|
208
|
+
original_positions = positions
|
209
|
+
positions = coerce_positions(*positions)
|
210
|
+
validate_positions(*positions)
|
211
|
+
|
212
|
+
return category_from_position(positions) if positions.is_a? Integer
|
213
|
+
|
214
|
+
Daru::Vector.new positions.map { |pos| category_from_position(pos) },
|
215
|
+
index: @index.at(*original_positions),
|
216
|
+
name: @name,
|
217
|
+
type: :category,
|
218
|
+
ordered: @ordered,
|
219
|
+
categories: categories
|
220
|
+
end
|
221
|
+
|
222
|
+
# Modifies values at specified indexes/positions.
|
223
|
+
# @note In order to add a new category you need to associate it via #add_category
|
224
|
+
# @param [Array] *indexes indexes/positions at which to modify value
|
225
|
+
# @param [object] val value to assign at specific indexes/positions
|
226
|
+
# @return modified vector
|
227
|
+
# @example
|
228
|
+
# dv = Daru::Vector.new [:a, 1, :a, 1, :c], type: :category
|
229
|
+
# dv.add_category :b
|
230
|
+
# dv[0] = :b
|
231
|
+
# dv
|
232
|
+
# # => #<Daru::Vector(5)>
|
233
|
+
# # 0 b
|
234
|
+
# # 1 1
|
235
|
+
# # 2 a
|
236
|
+
# # 3 1
|
237
|
+
# # 4 c
|
238
|
+
def []= *indexes, val
|
239
|
+
positions = @index.pos(*indexes)
|
240
|
+
|
241
|
+
if positions.is_a? Numeric
|
242
|
+
modify_category_at positions, val
|
243
|
+
else
|
244
|
+
positions.each { |pos| modify_category_at pos, val }
|
245
|
+
end
|
246
|
+
self
|
247
|
+
end
|
248
|
+
|
249
|
+
# Modifies values at specified positions.
|
250
|
+
# @param [Array] positions positions at which to modify value
|
251
|
+
# @param [object] val value to assign at specific positions
|
252
|
+
# @return modified vector
|
253
|
+
# @example
|
254
|
+
# dv = Daru::Vector.new [:a, 1, :a, 1, :c], type: :category
|
255
|
+
# dv.add_category :b
|
256
|
+
# dv.set_at [0, 1], :b
|
257
|
+
# # => #<Daru::Vector(5)>
|
258
|
+
# # 0 b
|
259
|
+
# # 1 b
|
260
|
+
# # 2 a
|
261
|
+
# # 3 1
|
262
|
+
# # 4 c
|
263
|
+
def set_at positions, val
|
264
|
+
validate_positions(*positions)
|
265
|
+
positions.map { |pos| modify_category_at pos, val }
|
266
|
+
self
|
267
|
+
end
|
268
|
+
|
269
|
+
# Size of categorical data.
|
270
|
+
# @return total number of values in the vector
|
271
|
+
# @example
|
272
|
+
# dv = Daru::Vector.new [:a, 1, :a, 1, :c], type: :category
|
273
|
+
# dv.size
|
274
|
+
# # => 5
|
275
|
+
def size
|
276
|
+
@array.size
|
277
|
+
end
|
278
|
+
|
279
|
+
# Tells whether vector is ordered or not.
|
280
|
+
# @return [Boolean] true if vector is ordered, false otherwise
|
281
|
+
# @example
|
282
|
+
# dv = Daru::Vector.new [:a, 1, :a, 1, :c], type: :category
|
283
|
+
# dv.ordered?
|
284
|
+
# # => false
|
285
|
+
def ordered?
|
286
|
+
@ordered
|
287
|
+
end
|
288
|
+
|
289
|
+
# Make categorical data ordered or unordered.
|
290
|
+
# @param [Boolean] bool true if categorical data is to be to ordered, false otherwise
|
291
|
+
# @example
|
292
|
+
# dv = Daru::Vector.new [:a, 1, :a, 1, :c], type: :category
|
293
|
+
# dv.ordered = true
|
294
|
+
# dv.ordered?
|
295
|
+
# # => true
|
296
|
+
def ordered= bool
|
297
|
+
@ordered = bool
|
298
|
+
end
|
299
|
+
|
300
|
+
# Returns all the categories with the inherent order
|
301
|
+
# @return [Array] categories of the vector with the order
|
302
|
+
# @example
|
303
|
+
# dv = Daru::Vector.new [:a, 1, :a, 1, :c],
|
304
|
+
# type: :category,
|
305
|
+
# categories: [:a, :b, :c, 1]
|
306
|
+
# dv.categories
|
307
|
+
# # => [:a, :b, :c, 1]
|
308
|
+
def categories
|
309
|
+
@cat_hash.keys
|
310
|
+
end
|
311
|
+
|
312
|
+
alias_method :order, :categories
|
313
|
+
|
314
|
+
# Sets order of the categories.
|
315
|
+
# @note If extra categories are specified, they get added too.
|
316
|
+
# @param [Array] cat_with_order categories specifying their order
|
317
|
+
# @example
|
318
|
+
# dv = Daru::Vector.new [:a, 1, :a, 1, :c], type: :category
|
319
|
+
# dv.categories = [:a, :b, :c, 1]
|
320
|
+
# dv.categories
|
321
|
+
# # => [:a, :b, :c, 1]
|
322
|
+
def categories= cat_with_order
|
323
|
+
validate_categories(cat_with_order)
|
324
|
+
add_extra_categories(cat_with_order - categories)
|
325
|
+
order_with cat_with_order
|
326
|
+
end
|
327
|
+
|
328
|
+
# Rename categories.
|
329
|
+
# @note The order of categories after renaming is preserved but new categories
|
330
|
+
# are added at the end in the order. Also the base-category is reassigned
|
331
|
+
# to new value if it is renamed
|
332
|
+
# @param [Hash] old_to_new a hash mapping categories whose name to be changed
|
333
|
+
# to their new names
|
334
|
+
# @example
|
335
|
+
# dv = Daru::Vector.new [:a, 1, :a, 1, :c], type: :category
|
336
|
+
# dv.rename_categories :a => :b
|
337
|
+
# dv
|
338
|
+
# # => #<Daru::Vector(5)>
|
339
|
+
# # 0 b
|
340
|
+
# # 1 1
|
341
|
+
# # 2 b
|
342
|
+
# # 3 1
|
343
|
+
# # 4 c
|
344
|
+
def rename_categories old_to_new
|
345
|
+
old_categories = categories
|
346
|
+
data = to_a.map do |cat|
|
347
|
+
old_to_new.include?(cat) ? old_to_new[cat] : cat
|
348
|
+
end
|
349
|
+
|
350
|
+
initialize_core_attributes data
|
351
|
+
self.categories = (old_categories - old_to_new.keys) | old_to_new.values
|
352
|
+
self.base_category = old_to_new[base_category] if
|
353
|
+
old_to_new.include? base_category
|
354
|
+
self
|
355
|
+
end
|
356
|
+
|
357
|
+
# Removes the unused categories
|
358
|
+
# @note If base category is removed, then the first occuring category in the
|
359
|
+
# data is taken as base category. Order of the undeleted categories
|
360
|
+
# remains preserved.
|
361
|
+
# @return [Daru::Vector] Makes changes in the vector itself i.e. deletes
|
362
|
+
# the unused categories and returns itself
|
363
|
+
# @example
|
364
|
+
# dv = Daru::Vector.new [:one, :two, :one], type: :category,
|
365
|
+
# categories: [:three, :two, :one]
|
366
|
+
# dv.remove_unused_categories
|
367
|
+
# dv.categories
|
368
|
+
# # => [:two, :one]
|
369
|
+
def remove_unused_categories
|
370
|
+
old_categories = categories
|
371
|
+
|
372
|
+
initialize_core_attributes to_a
|
373
|
+
self.categories = old_categories & categories
|
374
|
+
self.base_category = @cat_hash.keys.first unless
|
375
|
+
categories.include? base_category
|
376
|
+
self
|
377
|
+
end
|
378
|
+
|
379
|
+
# Returns the minimum category acording to the order specified.
|
380
|
+
# @note This operation will only work if vector is ordered.
|
381
|
+
# To set the vector ordered do `vector.ordered = true`
|
382
|
+
# @return [object] the minimum category acording to the order
|
383
|
+
# @example
|
384
|
+
# dv = Daru::Vector.new ['second', 'second', 'third', 'first'],
|
385
|
+
# categories: ['first', 'second', 'third']
|
386
|
+
# dv.min
|
387
|
+
# # => 'first'
|
388
|
+
def min
|
389
|
+
assert_ordered :min
|
390
|
+
categories.first
|
391
|
+
end
|
392
|
+
|
393
|
+
# Returns the maximum category acording to the order specified.
|
394
|
+
# @note This operation will only work if vector is ordered.
|
395
|
+
# To set the vector ordered do `vector.ordered = true`
|
396
|
+
# @return [object] the maximum category acording to the order
|
397
|
+
# @example
|
398
|
+
# dv = Daru::Vector.new ['second', 'second', 'third', 'first'],
|
399
|
+
# categories: ['first', 'second', 'third']
|
400
|
+
# dv.max
|
401
|
+
# # => 'third'
|
402
|
+
def max
|
403
|
+
assert_ordered :max
|
404
|
+
categories.last
|
405
|
+
end
|
406
|
+
|
407
|
+
# Sorts the vector in the order specified.
|
408
|
+
# @note This operation will only work if vector is ordered.
|
409
|
+
# To set the vector ordered, do `vector.ordered = true`
|
410
|
+
# @return [Daru::Vector] sorted vector
|
411
|
+
# @example
|
412
|
+
# dv = Daru::Vector.new ['second', 'second', 'third', 'first'],
|
413
|
+
# categories: ['first', 'second', 'thrid'],
|
414
|
+
# type: :categories,
|
415
|
+
# ordered: true
|
416
|
+
# dv.sort!
|
417
|
+
# # => #<Daru::Vector(4)>
|
418
|
+
# # 3 first
|
419
|
+
# # 0 second
|
420
|
+
# # 1 second
|
421
|
+
# # 2 third
|
422
|
+
def sort! # rubocop:disable Metrics/AbcSize
|
423
|
+
# TODO: Simply the code
|
424
|
+
assert_ordered :sort
|
425
|
+
|
426
|
+
# Build sorted index
|
427
|
+
old_index = @index.to_a
|
428
|
+
new_index = @cat_hash.values.map do |positions|
|
429
|
+
old_index.values_at(*positions)
|
430
|
+
end.flatten
|
431
|
+
@index = @index.class.new new_index
|
432
|
+
|
433
|
+
# Build sorted data
|
434
|
+
@cat_hash = categories.inject([{}, 0]) do |acc, cat|
|
435
|
+
hash, count = acc
|
436
|
+
cat_count = @cat_hash[cat].size
|
437
|
+
cat_count.times { |i| @array[count+i] = int_from_cat(cat) }
|
438
|
+
hash[cat] = (count...(cat_count+count)).to_a
|
439
|
+
[hash, count + cat_count]
|
440
|
+
end.first
|
441
|
+
|
442
|
+
self
|
443
|
+
end
|
444
|
+
|
445
|
+
def sort
|
446
|
+
dup.sort!
|
447
|
+
end
|
448
|
+
|
449
|
+
# Set coding scheme
|
450
|
+
# @param [Symbol] scheme to set
|
451
|
+
# @example
|
452
|
+
# dv = Daru::Vector.new [:a, 1, :a, 1, :c], type: :category
|
453
|
+
# dv.coding_scheme = :deviation
|
454
|
+
# dv.coding_scheme
|
455
|
+
# # => :deviation
|
456
|
+
def coding_scheme= scheme
|
457
|
+
raise ArgumentError, "Unknown or unsupported coding scheme #{scheme}." unless
|
458
|
+
CODING_SCHEMES.include? scheme
|
459
|
+
@coding_scheme = scheme
|
460
|
+
end
|
461
|
+
|
462
|
+
CODING_SCHEMES = [:dummy, :deviation, :helmert, :simple].freeze
|
463
|
+
|
464
|
+
# Contrast code the vector acording to the coding scheme set.
|
465
|
+
# @note To set the coding scheme use #coding_scheme=
|
466
|
+
# @param [true, false] full true if you want k variables for k categories,
|
467
|
+
# false if you want k-1 variables for k categories
|
468
|
+
# @return [Daru::DataFrame] dataframe containing all coded variables
|
469
|
+
# @example
|
470
|
+
# dv = Daru::Vector.new [:a, 1, :a, 1, :c], type: :category
|
471
|
+
# dv.contrast_code
|
472
|
+
# # => #<Daru::DataFrame(5x2)>
|
473
|
+
# # daru_1 daru_c
|
474
|
+
# # 0 0 0
|
475
|
+
# # 1 1 0
|
476
|
+
# # 2 0 0
|
477
|
+
# # 3 1 0
|
478
|
+
# # 4 0 1
|
479
|
+
def contrast_code opts={}
|
480
|
+
if opts[:user_defined]
|
481
|
+
user_defined_coding(opts[:user_defined])
|
482
|
+
else
|
483
|
+
# TODO: Make various coding schemes code DRY
|
484
|
+
send("#{coding_scheme}_coding".to_sym, opts[:full] || false)
|
485
|
+
end
|
486
|
+
end
|
487
|
+
|
488
|
+
# Two categorical vectors are equal if their index and corresponding values are same
|
489
|
+
# return [true, false] true if two vectors are similar
|
490
|
+
# @example
|
491
|
+
# dv = Daru::Vector.new [:a, 1, :a, 1, :c], type: :category
|
492
|
+
# other = Daru::Vector.new [:a, 1, :a, 1, :c],
|
493
|
+
# type: :category,
|
494
|
+
# index: 1..5
|
495
|
+
# dv == other
|
496
|
+
# # => false
|
497
|
+
def == other
|
498
|
+
size == other.size &&
|
499
|
+
to_a == other.to_a &&
|
500
|
+
index == other.index
|
501
|
+
end
|
502
|
+
|
503
|
+
# Returns integer coding for categorical data in the order starting from 0.
|
504
|
+
# For example if order is [:a, :b, :c], then :a, will be coded as 0, :b as 1 and :c as 2
|
505
|
+
# @return [Array] integer coding of all values of vector
|
506
|
+
# @example
|
507
|
+
# dv = Daru::Vector.new [:a, 1, :a, 1, :c],
|
508
|
+
# type: :category,
|
509
|
+
# categories: [:a, :b, :c, 1]
|
510
|
+
# dv.to_ints
|
511
|
+
# # => [0, 1, 0, 1, 2]
|
512
|
+
def to_ints
|
513
|
+
@array
|
514
|
+
end
|
515
|
+
|
516
|
+
# Reorder the vector with given positions
|
517
|
+
# @note Unlike #reindex! which takes index as input, it takes
|
518
|
+
# positions as an input to reorder the vector
|
519
|
+
# @param [Array] order the order to reorder the vector with
|
520
|
+
# @return reordered vector
|
521
|
+
# @example
|
522
|
+
# dv = Daru::Vector.new [3, 2, 1], index: ['c', 'b', 'a'], type: :category
|
523
|
+
# dv.reorder! [2, 1, 0]
|
524
|
+
# # => #<Daru::Vector(3)>
|
525
|
+
# # a 1
|
526
|
+
# # b 2
|
527
|
+
# # c 3
|
528
|
+
def reorder! order
|
529
|
+
raise ArgumentError, 'Invalid order specified' unless
|
530
|
+
order.sort == size.times.to_a
|
531
|
+
# TODO: Room for optimization
|
532
|
+
old_data = to_a
|
533
|
+
new_data = order.map { |i| old_data[i] }
|
534
|
+
initialize_core_attributes new_data
|
535
|
+
self
|
536
|
+
end
|
537
|
+
|
538
|
+
# Sets new index for vector. Preserves index->value correspondence.
|
539
|
+
# @note Unlike #reorder! which takes positions as input it takes
|
540
|
+
# index as an input to reorder the vector
|
541
|
+
# @param [Daru::Index, Daru::MultiIndex, Array] idx new index to order with
|
542
|
+
# @return [Daru::Vector] vector reindexed with new index
|
543
|
+
# @example
|
544
|
+
# dv = Daru::Vector.new [3, 2, 1], index: ['c', 'b', 'a'], type: :category
|
545
|
+
# dv.reindex! ['a', 'b', 'c']
|
546
|
+
# # => #<Daru::Vector(3)>
|
547
|
+
# # a 1
|
548
|
+
# # b 2
|
549
|
+
# # c 3
|
550
|
+
def reindex! idx
|
551
|
+
idx = Daru::Index.new idx unless idx.is_a? Daru::Index
|
552
|
+
raise ArgumentError, 'Invalid index specified' unless
|
553
|
+
idx.to_a.sort == index.to_a.sort
|
554
|
+
|
555
|
+
old_categories = categories
|
556
|
+
data = idx.map { |i| self[i] }
|
557
|
+
initialize_core_attributes data
|
558
|
+
self.categories = old_categories
|
559
|
+
self.index = idx
|
560
|
+
self
|
561
|
+
end
|
562
|
+
|
563
|
+
{
|
564
|
+
eq: :==,
|
565
|
+
not_eq: :!=,
|
566
|
+
lt: :<,
|
567
|
+
lteq: :<=,
|
568
|
+
mt: :>,
|
569
|
+
mteq: :>=
|
570
|
+
}.each do |method, operator|
|
571
|
+
define_method(method) do |other|
|
572
|
+
mod = Daru::Core::Query
|
573
|
+
if other.is_a?(Daru::Vector)
|
574
|
+
mod.apply_vector_operator operator, to_ints, other.to_ints
|
575
|
+
else
|
576
|
+
mod.apply_scalar_operator operator, @array, int_from_cat(other)
|
577
|
+
end
|
578
|
+
end
|
579
|
+
end
|
580
|
+
alias :gt :mt
|
581
|
+
alias :gteq :mteq
|
582
|
+
|
583
|
+
# For querying the data
|
584
|
+
# @param [object] arel like query syntax
|
585
|
+
# @return [Daru::Vector] Vector which makes the conditions true
|
586
|
+
# @example
|
587
|
+
# dv = Daru::Vector.new ['I', 'II', 'I', 'III', 'I', 'II'],
|
588
|
+
# type: :category,
|
589
|
+
# ordered: true,
|
590
|
+
# categories: ['I', 'II', 'III']
|
591
|
+
# dv.where(dv.mt('I') & dv.lt('III'))
|
592
|
+
# # => #<Daru::Vector(2)>
|
593
|
+
# # 1 II
|
594
|
+
# # 5 II
|
595
|
+
def where bool_array
|
596
|
+
Daru::Core::Query.vector_where self, bool_array
|
597
|
+
end
|
598
|
+
|
599
|
+
# Gives the summary of data using following parameters
|
600
|
+
# - size: size of the data
|
601
|
+
# - categories: total number of categories
|
602
|
+
# - max_freq: Max no of times a category occurs
|
603
|
+
# - max_category: The category which occurs max no of times
|
604
|
+
# - min_freq: Min no of times a category occurs
|
605
|
+
# - min_category: The category which occurs min no of times
|
606
|
+
# @return [Daru::Vector] Vector with index as following parameters
|
607
|
+
# and values as values to these parameters
|
608
|
+
# @example
|
609
|
+
# dv = Daru::Vector.new [:a, 1, :a, 1, :c], type: :category
|
610
|
+
# dv.describe
|
611
|
+
# # => #<Daru::Vector(6)>
|
612
|
+
# # size 5
|
613
|
+
# # categories 3
|
614
|
+
# # max_freq 2
|
615
|
+
# # max_category a
|
616
|
+
# # min_freq 1
|
617
|
+
# # min_category c
|
618
|
+
def describe
|
619
|
+
Daru::Vector.new(
|
620
|
+
size: size,
|
621
|
+
categories: categories.size,
|
622
|
+
max_freq: @cat_hash.values.map(&:size).max,
|
623
|
+
max_category: @cat_hash.keys.max_by { |cat| @cat_hash[cat].size },
|
624
|
+
min_freq: @cat_hash.values.map(&:size).min,
|
625
|
+
min_category: @cat_hash.keys.min_by { |cat| @cat_hash[cat].size }
|
626
|
+
)
|
627
|
+
end
|
628
|
+
|
629
|
+
# Does nothing since its already of type category.
|
630
|
+
# @return [Daru::Vector] categorical vector
|
631
|
+
def to_category
|
632
|
+
self
|
633
|
+
end
|
634
|
+
|
635
|
+
# Converts a category type vector to non category type vector
|
636
|
+
# @return [Daru::Vector] non category type vector
|
637
|
+
def to_non_category
|
638
|
+
Daru::Vector.new to_a, name: name, index: index
|
639
|
+
end
|
640
|
+
|
641
|
+
# Sets index of the vector
|
642
|
+
# @param [Daru::Index, Daru::MultiIndex, Daru::CategoricalIndex, Array, Range]
|
643
|
+
# idx new index to assign to vector
|
644
|
+
# @return [Daru::Index, Daru::CategoricalIndex, Daru::MultiIndex] the index assigned
|
645
|
+
# @example
|
646
|
+
# dv = Daru::Vector.new [1, 2, 3], type: :category
|
647
|
+
# dv.index = 'a'..'c'
|
648
|
+
# dv
|
649
|
+
# # => #<Daru::Vector(3)>
|
650
|
+
# # a 1
|
651
|
+
# # b 2
|
652
|
+
# # c 3
|
653
|
+
def index= idx
|
654
|
+
@index = coerce_index idx
|
655
|
+
end
|
656
|
+
|
657
|
+
# Check if any one of mentioned values occur in the vector
|
658
|
+
# @param [Array] *values values to check for
|
659
|
+
# @return [true, false] returns true if any one of specified values
|
660
|
+
# occur in the vector
|
661
|
+
# @example
|
662
|
+
# dv = Daru::Vector.new [1, 2, 3, 4, nil]
|
663
|
+
# dv.include_values? nil, Float::NAN
|
664
|
+
# # => true
|
665
|
+
def include_values?(*values)
|
666
|
+
values.any? { |v| @cat_hash.include?(v) && !@cat_hash[v].empty? }
|
667
|
+
end
|
668
|
+
|
669
|
+
# Return a vector with specified values removed
|
670
|
+
# @param [Array] *values values to reject from resultant vector
|
671
|
+
# @return [Daru::Vector] vector with specified values removed
|
672
|
+
# @example
|
673
|
+
# dv = Daru::Vector.new [1, 2, nil, Float::NAN], type: :category
|
674
|
+
# dv.reject_values nil, Float::NAN
|
675
|
+
# # => #<Daru::Vector(2)>
|
676
|
+
# # 0 1
|
677
|
+
# # 1 2
|
678
|
+
def reject_values(*values)
|
679
|
+
resultant_pos = size.times.to_a - values.flat_map { |v| @cat_hash[v] }
|
680
|
+
dv = at(*resultant_pos)
|
681
|
+
unless dv.is_a? Daru::Vector
|
682
|
+
pos = resultant_pos.first
|
683
|
+
dv = at(pos..pos)
|
684
|
+
end
|
685
|
+
dv.remove_unused_categories
|
686
|
+
end
|
687
|
+
|
688
|
+
# Count the number of values specified
|
689
|
+
# @param [Array] *values values to count for
|
690
|
+
# @return [Integer] the number of times the values mentioned occurs
|
691
|
+
# @example
|
692
|
+
# dv = Daru::Vector.new [1, 2, 1, 2, 3, 4, nil, nil]
|
693
|
+
# dv.count_values nil
|
694
|
+
# # => 2
|
695
|
+
def count_values(*values)
|
696
|
+
values.map { |v| @cat_hash[v].size if @cat_hash.include? v }
|
697
|
+
.compact
|
698
|
+
.inject(0, :+)
|
699
|
+
end
|
700
|
+
|
701
|
+
# Return indexes of values specified
|
702
|
+
# @param [Array] *values values to find indexes for
|
703
|
+
# @return [Array] array of indexes of values specified
|
704
|
+
# @example
|
705
|
+
# dv = Daru::Vector.new [1, 2, nil, Float::NAN], index: 11..14
|
706
|
+
# dv.indexes nil, Float::NAN
|
707
|
+
# # => [13, 14]
|
708
|
+
def indexes(*values)
|
709
|
+
values &= categories
|
710
|
+
index.to_a.values_at(*values.flat_map { |v| @cat_hash[v] }.sort)
|
711
|
+
end
|
712
|
+
|
713
|
+
# Replaces specified values with a new value
|
714
|
+
# @param [Array] old_values array of values to replace
|
715
|
+
# @param [object] new_value new value to replace with
|
716
|
+
# @note It performs the replace in place.
|
717
|
+
# @return [Daru::Vector] Same vector itself with values
|
718
|
+
# replaced with new value
|
719
|
+
# @example
|
720
|
+
# dv = Daru::Vector.new [1, 2, :a, :b]
|
721
|
+
# dv.replace_values [:a, :b], nil
|
722
|
+
# dv
|
723
|
+
# # =>
|
724
|
+
# # #<Daru::Vector:19903200 @name = nil @metadata = {} @size = 4 >
|
725
|
+
# # nil
|
726
|
+
# # 0 1
|
727
|
+
# # 1 2
|
728
|
+
# # 2 nil
|
729
|
+
# # 3 nil
|
730
|
+
def replace_values old_values, new_value
|
731
|
+
old_values = [old_values] unless old_values.is_a? Array
|
732
|
+
rename_hash = old_values.map { |v| [v, new_value] }.to_h
|
733
|
+
rename_categories rename_hash
|
734
|
+
end
|
735
|
+
|
736
|
+
def positions(*values)
|
737
|
+
values &= categories
|
738
|
+
values.flat_map { |v| @cat_hash[v] }.sort
|
739
|
+
end
|
740
|
+
|
741
|
+
private
|
742
|
+
|
743
|
+
def validate_categories input_categories
|
744
|
+
raise ArgumentError, 'Input categories and speculated categories mismatch' unless
|
745
|
+
(categories - input_categories).empty?
|
746
|
+
end
|
747
|
+
|
748
|
+
def add_extra_categories extra_categories
|
749
|
+
extra_categories.each { |cat| @cat_hash[cat] = [] }
|
750
|
+
end
|
751
|
+
|
752
|
+
def initialize_core_attributes data
|
753
|
+
# Create a hash to map each category to positional indexes
|
754
|
+
categories = data.each_with_index.group_by(&:first)
|
755
|
+
@cat_hash = categories.map { |cat, group| [cat, group.map(&:last)] }.to_h
|
756
|
+
|
757
|
+
# Map each category to a unique integer for effective storage in @array
|
758
|
+
map_cat_int = categories.keys.each_with_index.to_h
|
759
|
+
|
760
|
+
# To link every instance to its category,
|
761
|
+
# it stores integer for every instance representing its category
|
762
|
+
@array = map_cat_int.values_at(*data)
|
763
|
+
|
764
|
+
# Include plotting functionality
|
765
|
+
self.plotting_library = Daru.plotting_library
|
766
|
+
end
|
767
|
+
|
768
|
+
def category_from_position position
|
769
|
+
cat_from_int @array[position]
|
770
|
+
end
|
771
|
+
|
772
|
+
def assert_ordered operation
|
773
|
+
# TODO: Change ArgumentError to something more expressive
|
774
|
+
raise ArgumentError, "Can not apply #{operation} when vector is unordered. "\
|
775
|
+
'To make the categorical data ordered, use #ordered = true'\
|
776
|
+
unless ordered?
|
777
|
+
end
|
778
|
+
|
779
|
+
def dummy_coding full
|
780
|
+
categories = @cat_hash.keys
|
781
|
+
categories.delete(base_category) unless full
|
782
|
+
|
783
|
+
df = categories.map do |category|
|
784
|
+
dummy_code @cat_hash[category]
|
785
|
+
end
|
786
|
+
|
787
|
+
Daru::DataFrame.new df,
|
788
|
+
index: @index,
|
789
|
+
order: create_names(categories)
|
790
|
+
end
|
791
|
+
|
792
|
+
def dummy_code positions
|
793
|
+
code = Array.new(size, 0)
|
794
|
+
positions.each { |pos| code[pos] = 1 }
|
795
|
+
code
|
796
|
+
end
|
797
|
+
|
798
|
+
def simple_coding full
|
799
|
+
categories = @cat_hash.keys
|
800
|
+
categories.delete(base_category) unless full
|
801
|
+
|
802
|
+
df = categories.map do |category|
|
803
|
+
simple_code @cat_hash[category]
|
804
|
+
end
|
805
|
+
|
806
|
+
Daru::DataFrame.new df,
|
807
|
+
index: @index,
|
808
|
+
order: create_names(categories)
|
809
|
+
end
|
810
|
+
|
811
|
+
def simple_code positions
|
812
|
+
n = @cat_hash.keys.size.to_f
|
813
|
+
code = Array.new(size, -1/n)
|
814
|
+
positions.each { |pos| code[pos] = (n-1)/n }
|
815
|
+
code
|
816
|
+
end
|
817
|
+
|
818
|
+
def helmert_coding(*)
|
819
|
+
categories = @cat_hash.keys[0..-2]
|
820
|
+
|
821
|
+
df = categories.each_index.map do |index|
|
822
|
+
helmert_code index
|
823
|
+
end
|
824
|
+
|
825
|
+
Daru::DataFrame.new df,
|
826
|
+
index: @index,
|
827
|
+
order: create_names(categories)
|
828
|
+
end
|
829
|
+
|
830
|
+
def helmert_code index
|
831
|
+
n = (categories.size - index).to_f
|
832
|
+
|
833
|
+
@array.map do |cat_index|
|
834
|
+
if cat_index == index
|
835
|
+
(n-1)/n
|
836
|
+
elsif cat_index > index
|
837
|
+
-1/n
|
838
|
+
else
|
839
|
+
0
|
840
|
+
end
|
841
|
+
end
|
842
|
+
end
|
843
|
+
|
844
|
+
def deviation_coding(*)
|
845
|
+
categories = @cat_hash.keys[0..-2]
|
846
|
+
|
847
|
+
df = categories.each_index.map do |index|
|
848
|
+
deviation_code index
|
849
|
+
end
|
850
|
+
|
851
|
+
Daru::DataFrame.new df,
|
852
|
+
index: @index,
|
853
|
+
order: create_names(categories)
|
854
|
+
end
|
855
|
+
|
856
|
+
def deviation_code index
|
857
|
+
last = categories.size - 1
|
858
|
+
@array.map do |cat_index|
|
859
|
+
case cat_index
|
860
|
+
when index then 1
|
861
|
+
when last then -1
|
862
|
+
else 0
|
863
|
+
end
|
864
|
+
end
|
865
|
+
end
|
866
|
+
|
867
|
+
def user_defined_coding df
|
868
|
+
Daru::DataFrame.rows (Array.new(size) { |pos| df.row[at(pos)].to_a }),
|
869
|
+
index: @index,
|
870
|
+
order: df.vectors.to_a
|
871
|
+
end
|
872
|
+
|
873
|
+
def create_names categories
|
874
|
+
categories.map do |cat|
|
875
|
+
name.is_a?(Symbol) ? "#{name}_#{cat}".to_sym : "#{name}_#{cat}"
|
876
|
+
end
|
877
|
+
end
|
878
|
+
|
879
|
+
def coerce_index index
|
880
|
+
index =
|
881
|
+
case index
|
882
|
+
when Daru::MultiIndex, Daru::CategoricalIndex, Daru::Index
|
883
|
+
index
|
884
|
+
when nil
|
885
|
+
Daru::Index.new size
|
886
|
+
when Range
|
887
|
+
Daru::Index.new index.to_a
|
888
|
+
when Array
|
889
|
+
Daru::Index.new index
|
890
|
+
else
|
891
|
+
raise ArgumentError, "Unregnized index type #{index.class}"
|
892
|
+
end
|
893
|
+
validate_index index
|
894
|
+
index
|
895
|
+
end
|
896
|
+
|
897
|
+
def validate_index index
|
898
|
+
# Change to SizeError
|
899
|
+
raise ArgumentError, "Size of index (#{index.size}) does not matches"\
|
900
|
+
"size of vector (#{size})" if size != index.size
|
901
|
+
end
|
902
|
+
|
903
|
+
def modify_category_at pos, category
|
904
|
+
raise ArgumentError, "Invalid category #{category}, "\
|
905
|
+
'to add a new category use #add_category' unless
|
906
|
+
categories.include? category
|
907
|
+
old_category = category_from_position pos
|
908
|
+
@array[pos] = int_from_cat category
|
909
|
+
@cat_hash[old_category].delete pos
|
910
|
+
@cat_hash[category] << pos
|
911
|
+
end
|
912
|
+
|
913
|
+
def order_with new
|
914
|
+
if new.to_set != categories.to_set
|
915
|
+
raise ArgumentError, 'The contents of new and old order must be the same.'
|
916
|
+
end
|
917
|
+
|
918
|
+
@cat_hash = new.map { |cat| [cat, @cat_hash[cat]] }.to_h
|
919
|
+
|
920
|
+
map_cat_int = @cat_hash.keys.each_with_index.to_a.to_h
|
921
|
+
@array = Array.new(size)
|
922
|
+
@cat_hash.map do |cat, positions|
|
923
|
+
positions.each { |pos| @array[pos] = map_cat_int[cat] }
|
924
|
+
end
|
925
|
+
end
|
926
|
+
|
927
|
+
def cat_from_int int
|
928
|
+
@cat_hash.keys[int]
|
929
|
+
end
|
930
|
+
|
931
|
+
def int_from_cat cat
|
932
|
+
@cat_hash.keys.index cat
|
933
|
+
end
|
934
|
+
end
|
935
|
+
end
|