daru 0.0.4 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. data/CONTRIBUTING.md +0 -0
  3. data/Gemfile +0 -1
  4. data/History.txt +35 -0
  5. data/README.md +178 -198
  6. data/daru.gemspec +5 -7
  7. data/lib/daru.rb +10 -2
  8. data/lib/daru/accessors/array_wrapper.rb +36 -198
  9. data/lib/daru/accessors/nmatrix_wrapper.rb +60 -209
  10. data/lib/daru/core/group_by.rb +183 -0
  11. data/lib/daru/dataframe.rb +615 -167
  12. data/lib/daru/index.rb +17 -16
  13. data/lib/daru/io/io.rb +5 -12
  14. data/lib/daru/maths/arithmetic/dataframe.rb +72 -8
  15. data/lib/daru/maths/arithmetic/vector.rb +19 -6
  16. data/lib/daru/maths/statistics/dataframe.rb +103 -2
  17. data/lib/daru/maths/statistics/vector.rb +102 -61
  18. data/lib/daru/monkeys.rb +8 -0
  19. data/lib/daru/multi_index.rb +199 -0
  20. data/lib/daru/plotting/dataframe.rb +24 -24
  21. data/lib/daru/plotting/vector.rb +14 -15
  22. data/lib/daru/vector.rb +402 -98
  23. data/lib/version.rb +1 -1
  24. data/notebooks/grouping_splitting_pivots.ipynb +529 -0
  25. data/notebooks/intro_with_music_data_.ipynb +104 -119
  26. data/spec/accessors/wrappers_spec.rb +36 -0
  27. data/spec/core/group_by_spec.rb +331 -0
  28. data/spec/dataframe_spec.rb +1237 -475
  29. data/spec/fixtures/sales-funnel.csv +18 -0
  30. data/spec/index_spec.rb +10 -21
  31. data/spec/io/io_spec.rb +4 -14
  32. data/spec/math/arithmetic/dataframe_spec.rb +66 -0
  33. data/spec/math/arithmetic/vector_spec.rb +45 -4
  34. data/spec/math/statistics/dataframe_spec.rb +91 -1
  35. data/spec/math/statistics/vector_spec.rb +32 -6
  36. data/spec/monkeys_spec.rb +10 -1
  37. data/spec/multi_index_spec.rb +216 -0
  38. data/spec/spec_helper.rb +1 -0
  39. data/spec/vector_spec.rb +505 -57
  40. metadata +21 -15
@@ -50,4 +50,12 @@ class Numeric
50
50
  def square
51
51
  self * self
52
52
  end
53
+ end
54
+
55
+ class Matrix
56
+ def elementwise_division other
57
+ self.map.with_index do |e, index|
58
+ e / other.to_a.flatten[index]
59
+ end
60
+ end
53
61
  end
@@ -0,0 +1,199 @@
1
+ module Daru
2
+ # Class for holding multi index on Vector and DataFrame.
3
+ class MultiIndex
4
+ include Enumerable
5
+
6
+ def each(&block)
7
+ to_a.each(&block)
8
+ self
9
+ end
10
+
11
+ def map(&block)
12
+ to_a.map(&block)
13
+ end
14
+
15
+ attr_reader :relation_hash
16
+ attr_reader :size
17
+ attr_reader :values
18
+
19
+ # Initialize a MultiIndex by passing a tuple of indexes. The order assigned
20
+ # to the multi index corresponds to the position of the tuple in the array
21
+ # of tuples.
22
+ #
23
+ # Although you can create your own hierarchially indexed Vectors and DataFrames,
24
+ # this class currently contains minimal error checking and is mainly used
25
+ # internally for summarizing, splitting and grouping of data.
26
+ #
27
+ # == Arguments
28
+ #
29
+ # * +source+ - The array of arrays from which the multi index is to be created.
30
+ #
31
+ # == Usage
32
+ #
33
+ # tuples = [:a,:a,:b,:b].zip([:one,:two,:one,:two])
34
+ # #=> [[:a, :one], [:a, :two], [:b, :one], [:b, :two]]
35
+ # Daru::MultiIndex.new(tuples)
36
+ def initialize source, values=nil
37
+ @relation_hash = {}
38
+ @size = source.size
39
+ values = Array.new(source.size) { |i| i } if values.nil?
40
+ create_relation_hash source, values
41
+ @relation_hash.freeze
42
+ @values = values
43
+ end
44
+
45
+ def [] *indexes
46
+ indexes.flatten!
47
+ location = indexes[0]
48
+
49
+ if location.is_a?(Symbol)
50
+ result = read_relation_hash @relation_hash, indexes, 0
51
+ return nil if result.nil?
52
+ result.is_a?(Integer) ? result : Daru::MultiIndex.new(*make_tuples(result))
53
+ else
54
+ case location
55
+ when Integer
56
+ self[@relation_hash.keys[location]]
57
+ when Range
58
+ first = location.first
59
+ last = location.last
60
+
61
+ hsh = {}
62
+ first.upto(last) do |index|
63
+ key = @relation_hash.keys[index]
64
+ hsh[key] = read_relation_hash(@relation_hash, [key], 0)
65
+ end
66
+ Daru::MultiIndex.new(*make_tuples(hsh))
67
+ end
68
+ end
69
+ end
70
+
71
+ # Compare two MultiIndex objects for equality based on the contents of their
72
+ # relation hashes. Does not take object_id into account.
73
+ def == other
74
+ return false if size != other.size
75
+ deep_compare @relation_hash, other.relation_hash
76
+ end
77
+
78
+ # Convert a MultiIndex back to tuples (array of arrays). Will retain the
79
+ # order of creation.
80
+ def to_a
81
+ make_tuples(@relation_hash)[0]
82
+ end
83
+
84
+ # Completely duplicate a MultiIndex object and its contents.
85
+ def dup
86
+ Daru::MultiIndex.new to_a
87
+ end
88
+
89
+ # Check whether a tuple or identifier number exists in the multi index.
90
+ # The argument *tuple* can either a complete or incomplete tuple, or a number.
91
+ def include? tuple
92
+ tuple = [tuple] unless tuple.is_a?(Array)
93
+ tuple.flatten!
94
+ !!read_relation_hash(@relation_hash, tuple, 0)
95
+ end
96
+
97
+ # Obtain the tuple that correponds with the indentifier number.
98
+ #
99
+ # == Arguments
100
+ #
101
+ # * +key+ - A number for which the tuple is to be obtained.
102
+ #
103
+ # == Usage
104
+ #
105
+ # mi.key(3) #=> [:a,:two,:baz]
106
+ def key key
107
+ tuple = find_tuple_for(@relation_hash, key)
108
+ tuple.empty? ? nil : tuple
109
+ end
110
+
111
+ private
112
+
113
+ # Deep compare two hashes
114
+ def deep_compare this, other
115
+ if this == other
116
+ return true if this.is_a?(Integer) and other.is_a?(Integer)
117
+ this.each_key do |key|
118
+ deep_compare this[key], other[key]
119
+ end
120
+ else
121
+ return false
122
+ end
123
+ true
124
+ end
125
+
126
+ # Create tuples out of the relation hash based on the order of the identifier
127
+ # numbers. Returns an array of arrays containing the tuples and another
128
+ # containing their corresponding index numbers.
129
+ def make_tuples relation_hash
130
+ tuples = []
131
+ new_vals = []
132
+ values.each do |number|
133
+ tuple = find_tuple_for(relation_hash, number)
134
+ unless tuple.empty?
135
+ tuples << tuple
136
+ new_vals << number
137
+ end
138
+ end
139
+ [tuples,new_vals]
140
+ end
141
+
142
+ # Finds and returns a single tuple for a particular identifier number
143
+ def find_tuple_for relation_hash, number
144
+ tuple = []
145
+ search_for_number number, relation_hash, tuple
146
+ tuple.reverse
147
+ end
148
+
149
+ # Search for a number and store its corresponding tuple in *tuple*. Returns
150
+ # true if the number is successfully found.
151
+ def search_for_number number, relation_hash, tuple
152
+ found = false
153
+ relation_hash.each_key do |key|
154
+ value = relation_hash[key]
155
+ if value.is_a?(Hash)
156
+ if search_for_number(number, value, tuple)
157
+ tuple << key
158
+ found = true
159
+ end
160
+ elsif value == number
161
+ tuple << key
162
+ found = true
163
+ end
164
+ end
165
+
166
+ found
167
+ end
168
+
169
+ # Read the relation hash and return a sub-relation hash or the number to which
170
+ # indexes belogs to.
171
+ def read_relation_hash relation_hash, indexes, index
172
+ identifier = indexes[index]
173
+ value = relation_hash[identifier]
174
+
175
+ indexes[index+1].nil? ? value : read_relation_hash(value,indexes,index+1)
176
+ end
177
+
178
+ # Create the relation hash from supplied tuples.
179
+ def create_relation_hash source, values
180
+ source.each_with_index do |tuple, idx|
181
+ populate @relation_hash, tuple, 0, values[idx]
182
+ end
183
+ end
184
+
185
+ def populate relation_hash, tuple, index, number
186
+ identifier = tuple[index]
187
+
188
+ if identifier
189
+ if tuple[index+1]
190
+ relation_hash[identifier] ||= {}
191
+ else
192
+ relation_hash[identifier] = number
193
+ return
194
+ end
195
+ populate relation_hash[identifier], tuple, index+1, number
196
+ end
197
+ end
198
+ end
199
+ end
@@ -7,40 +7,40 @@ end
7
7
  module Daru
8
8
  module Plotting
9
9
  module DataFrame
10
- # Plots a DataFrame with Nyaplot on IRuby using the given options.
11
- # == Arguments
12
- # +x+ - Vector name to be used for x-axis
13
- # +y+ - Vector name to be used for y-axis
10
+ # Plots a DataFrame with Nyaplot on IRuby using the given options. Yields
11
+ # the corresponding Nyaplot::Plot object and the Nyaplot::Diagram object
12
+ # to the block, if it is specified. See the nyaplot docs for info on how to
13
+ # further use these objects.
14
+ #
14
15
  # == Options
15
- # type - Type of plot (scatter, bar, histogram)
16
- # title - Title of plot
17
- # x_label - X - label
18
- # y_label - Y - label
19
- # tooltip_contents - Contents of the tooltip. Array of vector names
20
- # fill_by - Vector name by which each plotted element is colored
21
- # shape_by- Vector name by which dots in a scatter plot are shaped
16
+ # +:type+ - Type of plot (scatter, bar, histogram)
17
+ # +:legends+ - The names of the vectors that are to be used as X and Y axes.
18
+ # The vectors names must be specified as symbols inside an Array. They
19
+ # also should be specified in the right order. For example, passing [:a, :b]
20
+ # will keep vector :a as the X axis and :b as the Y axis. Passing [:a]
21
+ # keep :a as the X axis and plot the frequency with which :a appears
22
+ # on the Y axis.
23
+ # +:frame+ - Pass this as *true* to disable plotting the graph directly
24
+ # and instead manually create Nyaplot::Frame object inside the block using
25
+ # the Nyaplot::Plot object for plotting one or many graphs in a frame.
26
+ #
22
27
  # == Usage
23
28
  # df = Daru::DataFrame.new({a:[0,1,2,3,4], b:[10,20,30,40,50]})
24
- # df.plot :a, :b, type: :bar, title: "Awesome plot"
25
- def plot x, y, opts={}
29
+ # df.plot legends: [:a, :b], type: :bar
30
+ def plot opts={}
26
31
  options = {
27
32
  type: :scatter,
28
- title: "#{@name}",
33
+ frame: false,
34
+ legends: []
29
35
  }.merge(opts)
30
36
 
31
37
  plot = Nyaplot::Plot.new
32
- p = plot.add_with_df(Nyaplot::DataFrame.new(self.to_a[0]), options[:type], x, y)
33
- plot.x_label options[:x_label] if options[:x_label]
34
- plot.y_label options[:y_label] if options[:y_label]
35
- p.tooltip_contents options[:tooltip_contents] if options[:tooltip_contents]
38
+ diagram = plot.add_with_df(Nyaplot::DataFrame.new(self.to_a[0]),
39
+ options[:type], *options[:legends])
36
40
 
37
- if options[:fill_by] or options[:shape_by]
38
- p.color Nyaplot::Colors.qual
39
- p.fill_by options[:fill_by] if options[:fill_by]
40
- p.shape_by options[:shape_by] if options[:shape_by]
41
- end
41
+ yield(plot, diagram) if block_given?
42
42
 
43
- plot.show
43
+ plot.show unless options[:frame]
44
44
  end
45
45
  end
46
46
  end
@@ -8,32 +8,31 @@ module Daru
8
8
  module Plotting
9
9
  module Vector
10
10
 
11
- # Plots a Vector with Nyaplot on IRuby using the given options.
11
+ # Plots a Vector with Nyaplot on IRuby using the given options. Yields the
12
+ # plot object (Nyaplot::Plot) and the diagram object (Nyaplot::Diagram)
13
+ # to the block, which can be used for setting various options as per the
14
+ # Nyaplot API.
15
+ #
12
16
  # == Options
13
17
  # type (:scatter, :bar, :histogram), title, x_label, y_label, color(true/false)
14
18
  #
15
19
  # == Usage
16
20
  # vector = Daru::Vector.new [10,20,30,40], [:one, :two, :three, :four]
17
- # vector.plot type: :bar, title: "My first plot", color: true
18
- def plot opts={}
21
+ # vector.plot(type: :bar) do |plot|
22
+ # plot.title "My first plot"
23
+ # plot.width 1200
24
+ # end
25
+ def plot opts={}, &block
19
26
  options = {
20
- type: :scatter,
21
- title: "#{@name}",
22
- x_label: '',
23
- y_label: '',
24
- color: false
27
+ type: :scatter
25
28
  }.merge(opts)
26
29
 
27
30
  x_axis = options[:type] == :scatter ? Array.new(@size) { |i| i } : @index.to_a
28
31
  plot = Nyaplot::Plot.new
29
- plot.width(options[:width]) if options[:width]
30
- plot.height(options[:height]) if options[:height]
31
-
32
- p = plot.add( options[:type], x_axis, @vector.to_a )
33
- plot.x_label( options[:x_label] ) if options[:x_label]
34
- plot.y_label( options[:y_label] ) if options[:y_label]
35
- p.color( Nyaplot::Colors.qual ) if options[:color]
32
+ diagram = plot.add( options[:type], x_axis, @data.to_a )
36
33
 
34
+ yield plot, diagram if block_given?
35
+
37
36
  plot.show
38
37
  end
39
38
  end
@@ -14,17 +14,23 @@ module Daru
14
14
  include Daru::Plotting::Vector
15
15
 
16
16
  def each(&block)
17
- @vector.each(&block)
17
+ return to_enum(:each) unless block_given?
18
+
19
+ @data.each(&block)
20
+ self
18
21
  end
19
22
 
20
23
  def map!(&block)
21
- @vector.map!(&block)
24
+ return to_enum(:map!) unless block_given?
22
25
 
26
+ @data.map!(&block)
23
27
  self
24
28
  end
25
29
 
26
30
  def map(&block)
27
- Daru::Vector.new @vector.map(&block), name: @name, index: @index, dtype: @dtype
31
+ return to_enum(:map) unless block_given?
32
+
33
+ Daru::Vector.new @data.map(&block), name: @name, index: @index, dtype: @dtype
28
34
  end
29
35
 
30
36
  alias_method :recode, :map
@@ -33,22 +39,29 @@ module Daru
33
39
  attr_reader :index
34
40
  attr_reader :size
35
41
  attr_reader :dtype
42
+ attr_reader :nm_dtype
43
+ attr_reader :nil_positions
36
44
 
37
45
  # Create a Vector object.
38
46
  # == Arguments
39
47
  #
40
48
  # @param source[Array,Hash] - Supply elements in the form of an Array or a Hash. If Array, a
41
- # numeric index will be created if not supplied in the options. Specifying more
42
- # index elements than actual values in *source* will insert *nil* into the
43
- # surplus index elements. When a Hash is specified, the keys of the Hash are
44
- # taken as the index elements and the corresponding values as the values that
45
- # populate the vector.
49
+ # numeric index will be created if not supplied in the options. Specifying more
50
+ # index elements than actual values in *source* will insert *nil* into the
51
+ # surplus index elements. When a Hash is specified, the keys of the Hash are
52
+ # taken as the index elements and the corresponding values as the values that
53
+ # populate the vector.
46
54
  #
47
55
  # == Options
48
56
  #
49
- # * +:name+ - Name of the vector
57
+ # * +:name+ - Name of the vector
58
+ #
59
+ # * +:index+ - Index of the vector
50
60
  #
51
- # * +:index+ - Index of the vector
61
+ # * +:dtype+ - The underlying data type. Can be :array or :nmatrix. Default :array.
62
+ #
63
+ # * +:nm_dtype+ - For NMatrix, the data type of the numbers. See the NMatrix docs for
64
+ # further information on supported data type.
52
65
  #
53
66
  # == Usage
54
67
  #
@@ -64,76 +77,112 @@ module Daru
64
77
  source = source || []
65
78
  end
66
79
  name = opts[:name]
67
- @dtype = opts[:dtype] || Array
68
-
69
80
  set_name name
70
81
 
71
- @vector =
72
- case
73
- when @dtype == Array
74
- Daru::Accessors::ArrayWrapper.new source.dup, self
75
- when @dtype == NMatrix
76
- Daru::Accessors::NMatrixWrapper.new source.dup, self
77
- when @dtype == MDArray
78
- Daru::Accessors::MDArrayWrapper.new source.dup
79
- when @dtype == Range, Matrix
80
- Daru::Accessors::ArrayWrapper.new source.to_a.dup, self
81
- end
82
-
83
- if index.nil?
84
- @index = Daru::Index.new @vector.size
85
- else
86
- @index = index.to_index
87
- end
88
- # TODO: Will need work for NMatrix/MDArray
89
- if @index.size > @vector.size
90
- self.coerce Array # NM with nils seg faults
91
- (@index.size - @vector.size).times { @vector << nil }
92
- elsif @index.size < @vector.size
93
- raise IndexError, "Expected index size >= vector size"
82
+ @data = cast_vector_to(opts[:dtype], source, opts[:nm_dtype])
83
+ @index = create_index(index || @data.size)
84
+
85
+ if @index.size > @data.size
86
+ cast(dtype: :array) # NM with nils seg faults
87
+ (@index.size - @data.size).times { @data << nil }
88
+ elsif @index.size < @data.size
89
+ raise IndexError, "Expected index size >= vector size. Index size : #{@index.size}, vector size : #{@data.size}"
94
90
  end
95
91
 
92
+ @possibly_changed_type = true
93
+ set_nil_positions
96
94
  set_size
97
95
  end
98
96
 
99
- # Get one or more elements with specified index.
97
+ # Get one or more elements with specified index or a range.
100
98
  #
101
99
  # == Usage
100
+ # # For vectors employing single layer Index
101
+ #
102
102
  # v[:one, :two] # => Daru::Vector with indexes :one and :two
103
103
  # v[:one] # => Single element
104
- def [](index, *indexes)
105
- if indexes.empty?
106
- case index
107
- when Range
108
- # range into vector
109
- #
104
+ # v[:one..:three] # => Daru::Vector with indexes :one, :two and :three
105
+ #
106
+ # # For vectors employing hierarchial multi index
107
+ #
108
+ def [](*indexes)
109
+ location = indexes[0]
110
+ if @index.is_a?(MultiIndex)
111
+ result =
112
+ if location.is_a?(Integer)
113
+ element_from_numeric_index(location)
114
+ elsif location.is_a?(Range)
115
+ arry = location.inject([]) do |memo, num|
116
+ memo << element_from_numeric_index(num)
117
+ memo
118
+ end
119
+
120
+ new_index = Daru::MultiIndex.new(@index.to_a[location])
121
+ Daru::Vector.new(arry, index: new_index, name: @name, dtype: dtype)
110
122
  else
111
- if @index.include? index
112
- @vector[@index[index]]
113
- elsif index.is_a?(Numeric)
114
- @vector[index]
123
+ sub_index = @index[indexes]
124
+
125
+ if sub_index.is_a?(Integer)
126
+ element_from_numeric_index(sub_index)
115
127
  else
116
- return nil
128
+ elements = sub_index.map do |tuple|
129
+ @data[@index[(indexes + tuple)]]
130
+ end
131
+ Daru::Vector.new(elements, index: Daru::MultiIndex.new(sub_index.to_a),
132
+ name: @name, dtype: @dtype)
117
133
  end
118
134
  end
135
+
136
+ return result
119
137
  else
120
- indexes.unshift index
138
+ unless indexes[1]
139
+ case location
140
+ when Range
141
+ range =
142
+ if location.first.is_a?(Numeric)
143
+ location
144
+ else
145
+ first = location.first
146
+ last = location.last
147
+
148
+ (first..last)
149
+ end
150
+ indexes = @index[range]
151
+ else
152
+ return element_from_numeric_index(location)
153
+ end
154
+ end
121
155
 
122
- Daru::Vector.new indexes.map { |index| @vector[@index[index]] },name: @name,
123
- index: indexes
156
+ Daru::Vector.new indexes.map { |loc| @data[index_for(loc)] }, name: @name,
157
+ index: indexes.map { |e| named_index_for(e) }, dtype: @dtype
124
158
  end
125
159
  end
126
160
 
127
- def []=(index, value)
128
- @vector = @vector.coerce(Array) if value.nil?
161
+ def []=(*location, value)
162
+ cast(dtype: :array) if value.nil? and dtype != :array
129
163
 
130
- if @index.include? index
131
- @vector[@index[index]] = value
164
+ @possibly_changed_type = true if @type == :object and (value.nil? or
165
+ value.is_a?(Numeric))
166
+ @possibly_changed_type = true if @type == :numeric and (!value.is_a?(Numeric) and
167
+ !value.nil?)
168
+
169
+ pos =
170
+ if @index.is_a?(MultiIndex) and !location[0].is_a?(Integer)
171
+ index_for location
172
+ else
173
+ index_for location[0]
174
+ end
175
+
176
+ if pos.is_a?(MultiIndex)
177
+ pos.each do |sub_tuple|
178
+ self[*(location + sub_tuple)] = value
179
+ end
132
180
  else
133
- @vector[index] = value
181
+ @data[pos] = value
134
182
  end
135
183
 
136
184
  set_size
185
+ set_nil_positions
137
186
  end
138
187
 
139
188
  # Two vectors are equal if the have the exact same index values corresponding
@@ -158,8 +207,12 @@ module Daru
158
207
  concat element
159
208
  end
160
209
 
161
- def re_index new_index
162
-
210
+ def head q=10
211
+ self[0..q]
212
+ end
213
+
214
+ def tail q=10
215
+ self[-q..-1]
163
216
  end
164
217
 
165
218
  # Append an element to the vector by specifying the element and index
@@ -167,30 +220,31 @@ module Daru
167
220
  raise IndexError, "Expected new unique index" if @index.include? index
168
221
 
169
222
  if index.nil? and @index.index_class == Integer
170
- @index = Daru::Index.new @size+1
223
+ @index = create_index(@size + 1)
171
224
  index = @size
172
225
  else
173
226
  begin
174
- @index = @index.re_index(@index + index)
175
- rescue Exception => e
227
+ @index = create_index(@index + index)
228
+ rescue StandardError => e
176
229
  raise e, "Expected valid index."
177
230
  end
178
231
  end
179
-
180
- @vector[@index[index]] = element
181
-
232
+ @data[@index[index]] = element
182
233
  set_size
234
+ set_nil_positions
183
235
  end
184
236
 
185
- def coerce dtype
186
- begin
187
- @vector = @vector.coerce @dtype
188
- @dtype = dtype
189
- rescue StandardError => e
190
- puts "Cannot convert to #{dtype} because of data type mismatch. #{e}"
191
- end
237
+ # Cast a vector to a new data type.
238
+ #
239
+ # == Options
240
+ #
241
+ # * +:dtype+ - :array for Ruby Array. :nmatrix for NMatrix.
242
+ def cast opts={}
243
+ dtype = opts[:dtype]
244
+ raise ArgumentError, "Unsupported dtype #{opts[:dtype]}" unless
245
+ dtype == :array or dtype == :nmatrix
192
246
 
193
- self
247
+ @data = cast_vector_to dtype
194
248
  end
195
249
 
196
250
  # Delete an element by value
@@ -201,25 +255,51 @@ module Daru
201
255
  # Delete element by index
202
256
  def delete_at index
203
257
  idx = named_index_for index
204
- @vector.delete_at @index[idx]
258
+ @data.delete_at @index[idx]
205
259
 
206
260
  if @index.index_class == Integer
207
261
  @index = Daru::Index.new @size-1
208
262
  else
209
- @index = (@index.to_a - [idx]).to_index
263
+ @index = Daru::Index.new (@index.to_a - [idx])
210
264
  end
211
265
 
212
266
  set_size
267
+ set_nil_positions
268
+ end
269
+
270
+ # The type of data contained in the vector. Can be :object or :numeric. If
271
+ # the underlying dtype is an NMatrix, this method will return the data type
272
+ # of the NMatrix object.
273
+ #
274
+ # Running through the data to figure out the kind of data is delayed to the
275
+ # last possible moment.
276
+ def type
277
+ return @data.nm_dtype if dtype == :nmatrix
278
+
279
+ if @type.nil? or @possibly_changed_type
280
+ @type = :numeric
281
+ self.each do |e|
282
+ unless e.nil?
283
+ unless e.is_a?(Numeric)
284
+ @type = :object
285
+ break
286
+ end
287
+ end
288
+ end
289
+ @possibly_changed_type = false
290
+ end
291
+
292
+ @type
213
293
  end
214
294
 
215
295
  # Get index of element
216
296
  def index_of element
217
- @index.key @vector.index(element)
297
+ @index.key @data.index(element)
218
298
  end
219
299
 
220
300
  # Keep only unique elements of the vector alongwith their indexes.
221
301
  def uniq
222
- uniq_vector = @vector.uniq
302
+ uniq_vector = @data.uniq
223
303
  new_index = uniq_vector.inject([]) do |acc, element|
224
304
  acc << index_of(element)
225
305
  acc
@@ -228,21 +308,123 @@ module Daru
228
308
  Daru::Vector.new uniq_vector, name: @name, index: new_index, dtype: @dtype
229
309
  end
230
310
 
231
- # def sort ascending=true
232
- # if ascending
233
-
234
- # end
235
- # end
311
+ # Sorts a vector according to its values. If a block is specified, the contents
312
+ # will be evaluated and data will be swapped whenever the block evaluates
313
+ # to *true*. Defaults to ascending order sorting. Any missing values will be
314
+ # put at the end of the vector. Preserves indexing. Default sort algorithm is
315
+ # quick sort.
316
+ #
317
+ # == Options
318
+ #
319
+ # * +:ascending+ - if false, will sort in descending order. Defaults to true.
320
+ #
321
+ # * +:type+ - Specify the sorting algorithm. Only supports quick_sort for now.
322
+ # == Usage
323
+ #
324
+ # v = Daru::Vector.new ["My first guitar", "jazz", "guitar"]
325
+ # # Say you want to sort these strings by length.
326
+ # v.sort { |a,b| a.length <=> b.length }
327
+ def sort opts={}, &block
328
+ opts = {
329
+ ascending: true,
330
+ type: :quick_sort
331
+ }.merge(opts)
332
+
333
+ block = lambda { |a,b| a <=> b } unless block
334
+
335
+ order = opts[:ascending] ? :ascending : :descending
336
+ vector, index = send(opts[:type], @data.to_a.dup, @index.to_a, order, &block)
337
+ index = @index.is_a?(MultiIndex) ? Daru::MultiIndex.new(index) : index
338
+
339
+ Daru::Vector.new(vector, index: create_index(index), name: @name, dtype: @dtype)
340
+ end
341
+
342
+ # Just sort the data and get an Array in return using Enumerable#sort. Non-destructive.
343
+ def sorted_data &block
344
+ @data.to_a.sort(&block)
345
+ end
236
346
 
237
- def is_valid? value
347
+ # Returns *true* if the value passed actually exists in the vector.
348
+ def exists? value
238
349
  !self[index_of(value)].nil?
239
350
  end
240
351
 
352
+ # Returns a vector which has *true* in the position where the element in self
353
+ # is nil, and false otherwise.
354
+ #
355
+ # == Usage
356
+ #
357
+ # v = Daru::Vector.new([1,2,4,nil])
358
+ # v.is_nil?
359
+ # # =>
360
+ # #<Daru::Vector:89421000 @name = nil @size = 4 >
361
+ # # nil
362
+ # # 0 false
363
+ # # 1 false
364
+ # # 2 false
365
+ # # 3 true
366
+ def is_nil?
367
+ nil_truth_vector = clone_structure
368
+ @index.each do |idx|
369
+ nil_truth_vector[idx] = self[idx].nil? ? true : false
370
+ end
371
+
372
+ nil_truth_vector
373
+ end
374
+
375
+ # Opposite of #is_nil?
376
+ def not_nil?
377
+ nil_truth_vector = clone_structure
378
+ @index.each do |idx|
379
+ nil_truth_vector[idx] = self[idx].nil? ? false : true
380
+ end
381
+
382
+ nil_truth_vector
383
+ end
384
+
385
+ # Replace all nils in the vector with the value passed as an argument. Destructive.
386
+ # See #replace_nils for non-destructive version
387
+ #
388
+ # == Arguments
389
+ #
390
+ # * +replacement+ - The value which should replace all nils
391
+ def replace_nils! replacement
392
+ nil_positions.each do |idx|
393
+ self[idx] = replacement
394
+ end
395
+
396
+ self
397
+ end
398
+
399
+ # Non-destructive version of #replace_nils!
400
+ def replace_nils replacement
401
+ self.dup.replace_nils!(replacement)
402
+ end
403
+
404
+ def n_valid
405
+ @size
406
+ end
407
+
241
408
  # Returns *true* if an index exists
242
409
  def has_index? index
243
410
  @index.include? index
244
411
  end
245
412
 
413
+ # Convert Vector to a horizontal or vertical Ruby Matrix.
414
+ #
415
+ # == Arguments
416
+ #
417
+ # * +axis+ - Specify whether you want a *:horizontal* or a *:vertical* matrix.
418
+ def to_matrix axis=:horizontal
419
+ if axis == :horizontal
420
+ Matrix[to_a]
421
+ elsif axis == :vertical
422
+ Matrix.columns([to_a])
423
+ else
424
+ raise ArgumentError, "axis should be either :horizontal or :vertical, not #{axis}"
425
+ end
426
+ end
427
+
246
428
  # Convert to hash. Hash keys are indexes and values are the correspoding elements
247
429
  def to_hash
248
430
  @index.inject({}) do |hsh, index|
@@ -253,7 +435,7 @@ module Daru
253
435
 
254
436
  # Return an array
255
437
  def to_a
256
- @vector.to_a
438
+ @data.to_a
257
439
  end
258
440
 
259
441
  # Convert the hash from to_hash to json
@@ -283,10 +465,10 @@ module Daru
283
465
  end
284
466
 
285
467
  # Over rides original inspect for pretty printing in irb
286
- def inspect spacing=10, threshold=15
468
+ def inspect spacing=20, threshold=15
287
469
  longest = [@name.to_s.size,
288
470
  @index.to_a.map(&:to_s).map(&:size).max,
289
- @vector .map(&:to_s).map(&:size).max,
471
+ @data .map(&:to_s).map(&:size).max,
290
472
  'nil'.size].max
291
473
 
292
474
  content = ""
@@ -297,7 +479,7 @@ module Daru
297
479
 
298
480
  content += sprintf formatter, "", name
299
481
  @index.each_with_index do |index, num|
300
- content += sprintf formatter, index.to_s, (self[index] || 'nil').to_s
482
+ content += sprintf formatter, index.to_s, (self[*index] || 'nil').to_s
301
483
  if num > threshold
302
484
  content += sprintf formatter, '...', '...'
303
485
  break
@@ -308,27 +490,38 @@ module Daru
308
490
  content
309
491
  end
310
492
 
311
- # def compact!
312
- # TODO: Compact and also take care of indexes
313
- # @vector.compact!
314
- # set_size
315
- # end
493
+ # Create a new vector with a different index.
494
+ #
495
+ # @param new_index [Symbol, Array, Daru::Index] The new index. Passing *:seq*
496
+ # will reindex with sequential numbers from 0 to (n-1).
497
+ def reindex new_index
498
+ index = create_index(new_index == :seq ? @size : new_index)
499
+ Daru::Vector.new @data.to_a, index: index, name: name, dtype: @dtype
500
+ end
316
501
 
317
502
  # Give the vector a new name
503
+ #
504
+ # @param new_name [Symbol] The new name.
318
505
  def rename new_name
319
506
  @name = new_name.to_sym
320
507
  end
321
508
 
322
509
  # Duplicate elements and indexes
323
510
  def dup
324
- Daru::Vector.new @vector.dup, name: @name, index: @index.dup
511
+ Daru::Vector.new @data.dup, name: @name, index: @index.dup
512
+ end
513
+
514
+ # Copies the structure of the vector (i.e the index, size, etc.) and fills all
515
+ # all values with nils.
516
+ def clone_structure
517
+ Daru::Vector.new(([nil]*@size), name: @name, index: @index.dup)
325
518
  end
326
519
 
327
520
  def daru_vector *name
328
521
  self
329
522
  end
330
523
 
331
- alias_method :dv, :daru_vector
524
+ alias :dv :daru_vector
332
525
 
333
526
  def method_missing(name, *args, &block)
334
527
  if name.match(/(.+)\=/)
@@ -342,6 +535,88 @@ module Daru
342
535
 
343
536
  private
344
537
 
538
+ def quick_sort vector, index, order, &block
539
+ recursive_quick_sort vector, index, order, 0, @size-1, &block
540
+ [vector, index]
541
+ end
542
+
543
+ def recursive_quick_sort vector, index, order, left_lower, right_upper, &block
544
+ if left_lower < right_upper
545
+ left_upper, right_lower = partition(vector, index, order, left_lower, right_upper, &block)
546
+ if left_upper - left_lower < right_upper - right_lower
547
+ recursive_quick_sort(vector, index, order, left_lower, left_upper, &block)
548
+ recursive_quick_sort(vector, index, order, right_lower, right_upper, &block)
549
+ else
550
+ recursive_quick_sort(vector, index, order, right_lower, right_upper, &block)
551
+ recursive_quick_sort(vector, index, order, left_lower, left_upper, &block)
552
+ end
553
+ end
554
+ end
555
+
556
+ def partition vector, index, order, left_lower, right_upper, &block
557
+ mindex = (left_lower + right_upper) / 2
558
+ mvalue = vector[mindex]
559
+ i = left_lower
560
+ j = right_upper
561
+ opposite_order = order == :ascending ? :descending : :ascending
562
+
563
+ i += 1 while(keep?(vector[i], mvalue, order, &block))
564
+ j -= 1 while(keep?(vector[j], mvalue, opposite_order, &block))
565
+
566
+ while i < j - 1
567
+ vector[i], vector[j] = vector[j], vector[i]
568
+ index[i], index[j] = index[j], index[i]
569
+ i += 1
570
+ j -= 1
571
+
572
+ i += 1 while(keep?(vector[i], mvalue, order, &block))
573
+ j -= 1 while(keep?(vector[j], mvalue, opposite_order, &block))
574
+ end
575
+
576
+ if i <= j
577
+ if i < j
578
+ vector[i], vector[j] = vector[j], vector[i]
579
+ index[i], index[j] = index[j], index[i]
580
+ end
581
+ i += 1
582
+ j -= 1
583
+ end
584
+
585
+ [j,i]
586
+ end
587
+
588
+ def keep? a, b, order, &block
589
+ return false if a.nil? or b.nil?
590
+ eval = block.call(a,b)
591
+ if order == :ascending
592
+ return true if eval == -1
593
+ return false if eval == 1
594
+ elsif order == :descending
595
+ return false if eval == -1
596
+ return true if eval == 1
597
+ end
598
+ return false
599
+ end
600
+
601
+ # Note: To maintain sanity, this _MUST_ be the _ONLY_ place in daru where the
602
+ # @dtype variable is set and the underlying data type of vector changed.
603
+ def cast_vector_to dtype, source=nil, nm_dtype=nil
604
+ source = @data if source.nil?
605
+ return @data if @dtype and @dtype == dtype
606
+
607
+ new_vector =
608
+ case dtype
609
+ when :array then Daru::Accessors::ArrayWrapper.new(source.to_a.dup, self)
610
+ when :nmatrix then Daru::Accessors::NMatrixWrapper.new(source.to_a.dup,
611
+ self, nm_dtype)
612
+ when :mdarray then raise NotImplementedError, "MDArray not yet supported."
613
+ else Daru::Accessors::ArrayWrapper.new(source.dup, self)
614
+ end
615
+
616
+ @dtype = dtype || :array
617
+ new_vector
618
+ end
619
+
345
620
  def named_index_for index
346
621
  if @index.include? index
347
622
  index
@@ -352,18 +627,47 @@ module Daru
352
627
  end
353
628
  end
354
629
 
630
+ def index_for index
631
+ if @index.include?(index)
632
+ @index[index]
633
+ elsif index.is_a?(Numeric)
634
+ index
635
+ end
636
+ end
637
+
355
638
  def set_size
356
- @size = @vector.size
639
+ @size = @data.size
357
640
  end
358
641
 
359
642
  def set_name name
360
- if name.is_a?(Numeric)
361
- @name = name
362
- elsif name # anything but Numeric or nil
363
- @name = name.to_sym
643
+ @name =
644
+ if name.is_a?(Numeric) then name
645
+ elsif name.is_a?(Array) then name.join.to_sym # in case of MultiIndex tuple
646
+ elsif name then name.to_sym # anything but Numeric or nil
364
647
  else
365
- @name = nil
648
+ nil
366
649
  end
367
650
  end
651
+
652
+ def set_nil_positions
653
+ @nil_positions = []
654
+ @index.each do |e|
655
+ @nil_positions << e if(self[e].nil?)
656
+ end
657
+ @nil_positions.uniq!
658
+ end
659
+
660
+ def create_index potential_index
661
+ if potential_index.is_a?(Daru::MultiIndex) or potential_index.is_a?(Daru::Index)
662
+ potential_index
663
+ else
664
+ Daru::Index.new(potential_index)
665
+ end
666
+ end
667
+
668
+ def element_from_numeric_index location
669
+ pos = index_for location
670
+ pos ? @data[pos] : nil
671
+ end
368
672
  end
369
673
  end