daru 0.0.4 → 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. data/CONTRIBUTING.md +0 -0
  3. data/Gemfile +0 -1
  4. data/History.txt +35 -0
  5. data/README.md +178 -198
  6. data/daru.gemspec +5 -7
  7. data/lib/daru.rb +10 -2
  8. data/lib/daru/accessors/array_wrapper.rb +36 -198
  9. data/lib/daru/accessors/nmatrix_wrapper.rb +60 -209
  10. data/lib/daru/core/group_by.rb +183 -0
  11. data/lib/daru/dataframe.rb +615 -167
  12. data/lib/daru/index.rb +17 -16
  13. data/lib/daru/io/io.rb +5 -12
  14. data/lib/daru/maths/arithmetic/dataframe.rb +72 -8
  15. data/lib/daru/maths/arithmetic/vector.rb +19 -6
  16. data/lib/daru/maths/statistics/dataframe.rb +103 -2
  17. data/lib/daru/maths/statistics/vector.rb +102 -61
  18. data/lib/daru/monkeys.rb +8 -0
  19. data/lib/daru/multi_index.rb +199 -0
  20. data/lib/daru/plotting/dataframe.rb +24 -24
  21. data/lib/daru/plotting/vector.rb +14 -15
  22. data/lib/daru/vector.rb +402 -98
  23. data/lib/version.rb +1 -1
  24. data/notebooks/grouping_splitting_pivots.ipynb +529 -0
  25. data/notebooks/intro_with_music_data_.ipynb +104 -119
  26. data/spec/accessors/wrappers_spec.rb +36 -0
  27. data/spec/core/group_by_spec.rb +331 -0
  28. data/spec/dataframe_spec.rb +1237 -475
  29. data/spec/fixtures/sales-funnel.csv +18 -0
  30. data/spec/index_spec.rb +10 -21
  31. data/spec/io/io_spec.rb +4 -14
  32. data/spec/math/arithmetic/dataframe_spec.rb +66 -0
  33. data/spec/math/arithmetic/vector_spec.rb +45 -4
  34. data/spec/math/statistics/dataframe_spec.rb +91 -1
  35. data/spec/math/statistics/vector_spec.rb +32 -6
  36. data/spec/monkeys_spec.rb +10 -1
  37. data/spec/multi_index_spec.rb +216 -0
  38. data/spec/spec_helper.rb +1 -0
  39. data/spec/vector_spec.rb +505 -57
  40. metadata +21 -15
@@ -50,4 +50,12 @@ class Numeric
50
50
  def square
51
51
  self * self
52
52
  end
53
+ end
54
+
55
+ class Matrix
56
+ def elementwise_division other
57
+ self.map.with_index do |e, index|
58
+ e / other.to_a.flatten[index]
59
+ end
60
+ end
53
61
  end
@@ -0,0 +1,199 @@
1
+ module Daru
2
+ # Class for holding multi index on Vector and DataFrame.
3
+ class MultiIndex
4
+ include Enumerable
5
+
6
+ def each(&block)
7
+ to_a.each(&block)
8
+ self
9
+ end
10
+
11
+ def map(&block)
12
+ to_a.map(&block)
13
+ end
14
+
15
+ attr_reader :relation_hash
16
+ attr_reader :size
17
+ attr_reader :values
18
+
19
+ # Initialize a MultiIndex by passing a tuple of indexes. The order assigned
20
+ # to the multi index corresponds to the position of the tuple in the array
21
+ # of tuples.
22
+ #
23
+ # Although you can create your own hierarchially indexed Vectors and DataFrames,
24
+ # this class currently contains minimal error checking and is mainly used
25
+ # internally for summarizing, splitting and grouping of data.
26
+ #
27
+ # == Arguments
28
+ #
29
+ # * +source+ - The array of arrays from which the multi index is to be created.
30
+ #
31
+ # == Usage
32
+ #
33
+ # tuples = [:a,:a,:b,:b].zip([:one,:two,:one,:two])
34
+ # #=> [[:a, :one], [:a, :two], [:b, :one], [:b, :two]]
35
+ # Daru::MultiIndex.new(tuples)
36
+ def initialize source, values=nil
37
+ @relation_hash = {}
38
+ @size = source.size
39
+ values = Array.new(source.size) { |i| i } if values.nil?
40
+ create_relation_hash source, values
41
+ @relation_hash.freeze
42
+ @values = values
43
+ end
44
+
45
+ def [] *indexes
46
+ indexes.flatten!
47
+ location = indexes[0]
48
+
49
+ if location.is_a?(Symbol)
50
+ result = read_relation_hash @relation_hash, indexes, 0
51
+ return nil if result.nil?
52
+ result.is_a?(Integer) ? result : Daru::MultiIndex.new(*make_tuples(result))
53
+ else
54
+ case location
55
+ when Integer
56
+ self[@relation_hash.keys[location]]
57
+ when Range
58
+ first = location.first
59
+ last = location.last
60
+
61
+ hsh = {}
62
+ first.upto(last) do |index|
63
+ key = @relation_hash.keys[index]
64
+ hsh[key] = read_relation_hash(@relation_hash, [key], 0)
65
+ end
66
+ Daru::MultiIndex.new(*make_tuples(hsh))
67
+ end
68
+ end
69
+ end
70
+
71
+ # Compare two MultiIndex objects for equality based on the contents of their
72
+ # relation hashes. Does not take object_id into account.
73
+ def == other
74
+ return false if size != other.size
75
+ deep_compare @relation_hash, other.relation_hash
76
+ end
77
+
78
+ # Convert a MultiIndex back to tuples (array of arrays). Will retain the
79
+ # order of creation.
80
+ def to_a
81
+ make_tuples(@relation_hash)[0]
82
+ end
83
+
84
+ # Completely duplicate a MultiIndex object and its contents.
85
+ def dup
86
+ Daru::MultiIndex.new to_a
87
+ end
88
+
89
+ # Check whether a tuple or identifier number exists in the multi index.
90
+ # The argument *tuple* can either a complete or incomplete tuple, or a number.
91
+ def include? tuple
92
+ tuple = [tuple] unless tuple.is_a?(Array)
93
+ tuple.flatten!
94
+ !!read_relation_hash(@relation_hash, tuple, 0)
95
+ end
96
+
97
+ # Obtain the tuple that correponds with the indentifier number.
98
+ #
99
+ # == Arguments
100
+ #
101
+ # * +key+ - A number for which the tuple is to be obtained.
102
+ #
103
+ # == Usage
104
+ #
105
+ # mi.key(3) #=> [:a,:two,:baz]
106
+ def key key
107
+ tuple = find_tuple_for(@relation_hash, key)
108
+ tuple.empty? ? nil : tuple
109
+ end
110
+
111
+ private
112
+
113
+ # Deep compare two hashes
114
+ def deep_compare this, other
115
+ if this == other
116
+ return true if this.is_a?(Integer) and other.is_a?(Integer)
117
+ this.each_key do |key|
118
+ deep_compare this[key], other[key]
119
+ end
120
+ else
121
+ return false
122
+ end
123
+ true
124
+ end
125
+
126
+ # Create tuples out of the relation hash based on the order of the identifier
127
+ # numbers. Returns an array of arrays containing the tuples and another
128
+ # containing their corresponding index numbers.
129
+ def make_tuples relation_hash
130
+ tuples = []
131
+ new_vals = []
132
+ values.each do |number|
133
+ tuple = find_tuple_for(relation_hash, number)
134
+ unless tuple.empty?
135
+ tuples << tuple
136
+ new_vals << number
137
+ end
138
+ end
139
+ [tuples,new_vals]
140
+ end
141
+
142
+ # Finds and returns a single tuple for a particular identifier number
143
+ def find_tuple_for relation_hash, number
144
+ tuple = []
145
+ search_for_number number, relation_hash, tuple
146
+ tuple.reverse
147
+ end
148
+
149
+ # Search for a number and store its corresponding tuple in *tuple*. Returns
150
+ # true if the number is successfully found.
151
+ def search_for_number number, relation_hash, tuple
152
+ found = false
153
+ relation_hash.each_key do |key|
154
+ value = relation_hash[key]
155
+ if value.is_a?(Hash)
156
+ if search_for_number(number, value, tuple)
157
+ tuple << key
158
+ found = true
159
+ end
160
+ elsif value == number
161
+ tuple << key
162
+ found = true
163
+ end
164
+ end
165
+
166
+ found
167
+ end
168
+
169
+ # Read the relation hash and return a sub-relation hash or the number to which
170
+ # indexes belogs to.
171
+ def read_relation_hash relation_hash, indexes, index
172
+ identifier = indexes[index]
173
+ value = relation_hash[identifier]
174
+
175
+ indexes[index+1].nil? ? value : read_relation_hash(value,indexes,index+1)
176
+ end
177
+
178
+ # Create the relation hash from supplied tuples.
179
+ def create_relation_hash source, values
180
+ source.each_with_index do |tuple, idx|
181
+ populate @relation_hash, tuple, 0, values[idx]
182
+ end
183
+ end
184
+
185
+ def populate relation_hash, tuple, index, number
186
+ identifier = tuple[index]
187
+
188
+ if identifier
189
+ if tuple[index+1]
190
+ relation_hash[identifier] ||= {}
191
+ else
192
+ relation_hash[identifier] = number
193
+ return
194
+ end
195
+ populate relation_hash[identifier], tuple, index+1, number
196
+ end
197
+ end
198
+ end
199
+ end
@@ -7,40 +7,40 @@ end
7
7
  module Daru
8
8
  module Plotting
9
9
  module DataFrame
10
- # Plots a DataFrame with Nyaplot on IRuby using the given options.
11
- # == Arguments
12
- # +x+ - Vector name to be used for x-axis
13
- # +y+ - Vector name to be used for y-axis
10
+ # Plots a DataFrame with Nyaplot on IRuby using the given options. Yields
11
+ # the corresponding Nyaplot::Plot object and the Nyaplot::Diagram object
12
+ # to the block, if it is specified. See the nyaplot docs for info on how to
13
+ # further use these objects.
14
+ #
14
15
  # == Options
15
- # type - Type of plot (scatter, bar, histogram)
16
- # title - Title of plot
17
- # x_label - X - label
18
- # y_label - Y - label
19
- # tooltip_contents - Contents of the tooltip. Array of vector names
20
- # fill_by - Vector name by which each plotted element is colored
21
- # shape_by- Vector name by which dots in a scatter plot are shaped
16
+ # +:type+ - Type of plot (scatter, bar, histogram)
17
+ # +:legends+ - The names of the vectors that are to be used as X and Y axes.
18
+ # The vectors names must be specified as symbols inside an Array. They
19
+ # also should be specified in the right order. For example, passing [:a, :b]
20
+ # will keep vector :a as the X axis and :b as the Y axis. Passing [:a]
21
+ # keep :a as the X axis and plot the frequency with which :a appears
22
+ # on the Y axis.
23
+ # +:frame+ - Pass this as *true* to disable plotting the graph directly
24
+ # and instead manually create Nyaplot::Frame object inside the block using
25
+ # the Nyaplot::Plot object for plotting one or many graphs in a frame.
26
+ #
22
27
  # == Usage
23
28
  # df = Daru::DataFrame.new({a:[0,1,2,3,4], b:[10,20,30,40,50]})
24
- # df.plot :a, :b, type: :bar, title: "Awesome plot"
25
- def plot x, y, opts={}
29
+ # df.plot legends: [:a, :b], type: :bar
30
+ def plot opts={}
26
31
  options = {
27
32
  type: :scatter,
28
- title: "#{@name}",
33
+ frame: false,
34
+ legends: []
29
35
  }.merge(opts)
30
36
 
31
37
  plot = Nyaplot::Plot.new
32
- p = plot.add_with_df(Nyaplot::DataFrame.new(self.to_a[0]), options[:type], x, y)
33
- plot.x_label options[:x_label] if options[:x_label]
34
- plot.y_label options[:y_label] if options[:y_label]
35
- p.tooltip_contents options[:tooltip_contents] if options[:tooltip_contents]
38
+ diagram = plot.add_with_df(Nyaplot::DataFrame.new(self.to_a[0]),
39
+ options[:type], *options[:legends])
36
40
 
37
- if options[:fill_by] or options[:shape_by]
38
- p.color Nyaplot::Colors.qual
39
- p.fill_by options[:fill_by] if options[:fill_by]
40
- p.shape_by options[:shape_by] if options[:shape_by]
41
- end
41
+ yield(plot, diagram) if block_given?
42
42
 
43
- plot.show
43
+ plot.show unless options[:frame]
44
44
  end
45
45
  end
46
46
  end
@@ -8,32 +8,31 @@ module Daru
8
8
  module Plotting
9
9
  module Vector
10
10
 
11
- # Plots a Vector with Nyaplot on IRuby using the given options.
11
+ # Plots a Vector with Nyaplot on IRuby using the given options. Yields the
12
+ # plot object (Nyaplot::Plot) and the diagram object (Nyaplot::Diagram)
13
+ # to the block, which can be used for setting various options as per the
14
+ # Nyaplot API.
15
+ #
12
16
  # == Options
13
17
  # type (:scatter, :bar, :histogram), title, x_label, y_label, color(true/false)
14
18
  #
15
19
  # == Usage
16
20
  # vector = Daru::Vector.new [10,20,30,40], [:one, :two, :three, :four]
17
- # vector.plot type: :bar, title: "My first plot", color: true
18
- def plot opts={}
21
+ # vector.plot(type: :bar) do |plot|
22
+ # plot.title "My first plot"
23
+ # plot.width 1200
24
+ # end
25
+ def plot opts={}, &block
19
26
  options = {
20
- type: :scatter,
21
- title: "#{@name}",
22
- x_label: '',
23
- y_label: '',
24
- color: false
27
+ type: :scatter
25
28
  }.merge(opts)
26
29
 
27
30
  x_axis = options[:type] == :scatter ? Array.new(@size) { |i| i } : @index.to_a
28
31
  plot = Nyaplot::Plot.new
29
- plot.width(options[:width]) if options[:width]
30
- plot.height(options[:height]) if options[:height]
31
-
32
- p = plot.add( options[:type], x_axis, @vector.to_a )
33
- plot.x_label( options[:x_label] ) if options[:x_label]
34
- plot.y_label( options[:y_label] ) if options[:y_label]
35
- p.color( Nyaplot::Colors.qual ) if options[:color]
32
+ diagram = plot.add( options[:type], x_axis, @data.to_a )
36
33
 
34
+ yield plot, diagram if block_given?
35
+
37
36
  plot.show
38
37
  end
39
38
  end
@@ -14,17 +14,23 @@ module Daru
14
14
  include Daru::Plotting::Vector
15
15
 
16
16
  def each(&block)
17
- @vector.each(&block)
17
+ return to_enum(:each) unless block_given?
18
+
19
+ @data.each(&block)
20
+ self
18
21
  end
19
22
 
20
23
  def map!(&block)
21
- @vector.map!(&block)
24
+ return to_enum(:map!) unless block_given?
22
25
 
26
+ @data.map!(&block)
23
27
  self
24
28
  end
25
29
 
26
30
  def map(&block)
27
- Daru::Vector.new @vector.map(&block), name: @name, index: @index, dtype: @dtype
31
+ return to_enum(:map) unless block_given?
32
+
33
+ Daru::Vector.new @data.map(&block), name: @name, index: @index, dtype: @dtype
28
34
  end
29
35
 
30
36
  alias_method :recode, :map
@@ -33,22 +39,29 @@ module Daru
33
39
  attr_reader :index
34
40
  attr_reader :size
35
41
  attr_reader :dtype
42
+ attr_reader :nm_dtype
43
+ attr_reader :nil_positions
36
44
 
37
45
  # Create a Vector object.
38
46
  # == Arguments
39
47
  #
40
48
  # @param source[Array,Hash] - Supply elements in the form of an Array or a Hash. If Array, a
41
- # numeric index will be created if not supplied in the options. Specifying more
42
- # index elements than actual values in *source* will insert *nil* into the
43
- # surplus index elements. When a Hash is specified, the keys of the Hash are
44
- # taken as the index elements and the corresponding values as the values that
45
- # populate the vector.
49
+ # numeric index will be created if not supplied in the options. Specifying more
50
+ # index elements than actual values in *source* will insert *nil* into the
51
+ # surplus index elements. When a Hash is specified, the keys of the Hash are
52
+ # taken as the index elements and the corresponding values as the values that
53
+ # populate the vector.
46
54
  #
47
55
  # == Options
48
56
  #
49
- # * +:name+ - Name of the vector
57
+ # * +:name+ - Name of the vector
58
+ #
59
+ # * +:index+ - Index of the vector
50
60
  #
51
- # * +:index+ - Index of the vector
61
+ # * +:dtype+ - The underlying data type. Can be :array or :nmatrix. Default :array.
62
+ #
63
+ # * +:nm_dtype+ - For NMatrix, the data type of the numbers. See the NMatrix docs for
64
+ # further information on supported data type.
52
65
  #
53
66
  # == Usage
54
67
  #
@@ -64,76 +77,112 @@ module Daru
64
77
  source = source || []
65
78
  end
66
79
  name = opts[:name]
67
- @dtype = opts[:dtype] || Array
68
-
69
80
  set_name name
70
81
 
71
- @vector =
72
- case
73
- when @dtype == Array
74
- Daru::Accessors::ArrayWrapper.new source.dup, self
75
- when @dtype == NMatrix
76
- Daru::Accessors::NMatrixWrapper.new source.dup, self
77
- when @dtype == MDArray
78
- Daru::Accessors::MDArrayWrapper.new source.dup
79
- when @dtype == Range, Matrix
80
- Daru::Accessors::ArrayWrapper.new source.to_a.dup, self
81
- end
82
-
83
- if index.nil?
84
- @index = Daru::Index.new @vector.size
85
- else
86
- @index = index.to_index
87
- end
88
- # TODO: Will need work for NMatrix/MDArray
89
- if @index.size > @vector.size
90
- self.coerce Array # NM with nils seg faults
91
- (@index.size - @vector.size).times { @vector << nil }
92
- elsif @index.size < @vector.size
93
- raise IndexError, "Expected index size >= vector size"
82
+ @data = cast_vector_to(opts[:dtype], source, opts[:nm_dtype])
83
+ @index = create_index(index || @data.size)
84
+
85
+ if @index.size > @data.size
86
+ cast(dtype: :array) # NM with nils seg faults
87
+ (@index.size - @data.size).times { @data << nil }
88
+ elsif @index.size < @data.size
89
+ raise IndexError, "Expected index size >= vector size. Index size : #{@index.size}, vector size : #{@data.size}"
94
90
  end
95
91
 
92
+ @possibly_changed_type = true
93
+ set_nil_positions
96
94
  set_size
97
95
  end
98
96
 
99
- # Get one or more elements with specified index.
97
+ # Get one or more elements with specified index or a range.
100
98
  #
101
99
  # == Usage
100
+ # # For vectors employing single layer Index
101
+ #
102
102
  # v[:one, :two] # => Daru::Vector with indexes :one and :two
103
103
  # v[:one] # => Single element
104
- def [](index, *indexes)
105
- if indexes.empty?
106
- case index
107
- when Range
108
- # range into vector
109
- #
104
+ # v[:one..:three] # => Daru::Vector with indexes :one, :two and :three
105
+ #
106
+ # # For vectors employing hierarchial multi index
107
+ #
108
+ def [](*indexes)
109
+ location = indexes[0]
110
+ if @index.is_a?(MultiIndex)
111
+ result =
112
+ if location.is_a?(Integer)
113
+ element_from_numeric_index(location)
114
+ elsif location.is_a?(Range)
115
+ arry = location.inject([]) do |memo, num|
116
+ memo << element_from_numeric_index(num)
117
+ memo
118
+ end
119
+
120
+ new_index = Daru::MultiIndex.new(@index.to_a[location])
121
+ Daru::Vector.new(arry, index: new_index, name: @name, dtype: dtype)
110
122
  else
111
- if @index.include? index
112
- @vector[@index[index]]
113
- elsif index.is_a?(Numeric)
114
- @vector[index]
123
+ sub_index = @index[indexes]
124
+
125
+ if sub_index.is_a?(Integer)
126
+ element_from_numeric_index(sub_index)
115
127
  else
116
- return nil
128
+ elements = sub_index.map do |tuple|
129
+ @data[@index[(indexes + tuple)]]
130
+ end
131
+ Daru::Vector.new(elements, index: Daru::MultiIndex.new(sub_index.to_a),
132
+ name: @name, dtype: @dtype)
117
133
  end
118
134
  end
135
+
136
+ return result
119
137
  else
120
- indexes.unshift index
138
+ unless indexes[1]
139
+ case location
140
+ when Range
141
+ range =
142
+ if location.first.is_a?(Numeric)
143
+ location
144
+ else
145
+ first = location.first
146
+ last = location.last
147
+
148
+ (first..last)
149
+ end
150
+ indexes = @index[range]
151
+ else
152
+ return element_from_numeric_index(location)
153
+ end
154
+ end
121
155
 
122
- Daru::Vector.new indexes.map { |index| @vector[@index[index]] },name: @name,
123
- index: indexes
156
+ Daru::Vector.new indexes.map { |loc| @data[index_for(loc)] }, name: @name,
157
+ index: indexes.map { |e| named_index_for(e) }, dtype: @dtype
124
158
  end
125
159
  end
126
160
 
127
- def []=(index, value)
128
- @vector = @vector.coerce(Array) if value.nil?
161
+ def []=(*location, value)
162
+ cast(dtype: :array) if value.nil? and dtype != :array
129
163
 
130
- if @index.include? index
131
- @vector[@index[index]] = value
164
+ @possibly_changed_type = true if @type == :object and (value.nil? or
165
+ value.is_a?(Numeric))
166
+ @possibly_changed_type = true if @type == :numeric and (!value.is_a?(Numeric) and
167
+ !value.nil?)
168
+
169
+ pos =
170
+ if @index.is_a?(MultiIndex) and !location[0].is_a?(Integer)
171
+ index_for location
172
+ else
173
+ index_for location[0]
174
+ end
175
+
176
+ if pos.is_a?(MultiIndex)
177
+ pos.each do |sub_tuple|
178
+ self[*(location + sub_tuple)] = value
179
+ end
132
180
  else
133
- @vector[index] = value
181
+ @data[pos] = value
134
182
  end
135
183
 
136
184
  set_size
185
+ set_nil_positions
137
186
  end
138
187
 
139
188
  # Two vectors are equal if the have the exact same index values corresponding
@@ -158,8 +207,12 @@ module Daru
158
207
  concat element
159
208
  end
160
209
 
161
- def re_index new_index
162
-
210
+ def head q=10
211
+ self[0..q]
212
+ end
213
+
214
+ def tail q=10
215
+ self[-q..-1]
163
216
  end
164
217
 
165
218
  # Append an element to the vector by specifying the element and index
@@ -167,30 +220,31 @@ module Daru
167
220
  raise IndexError, "Expected new unique index" if @index.include? index
168
221
 
169
222
  if index.nil? and @index.index_class == Integer
170
- @index = Daru::Index.new @size+1
223
+ @index = create_index(@size + 1)
171
224
  index = @size
172
225
  else
173
226
  begin
174
- @index = @index.re_index(@index + index)
175
- rescue Exception => e
227
+ @index = create_index(@index + index)
228
+ rescue StandardError => e
176
229
  raise e, "Expected valid index."
177
230
  end
178
231
  end
179
-
180
- @vector[@index[index]] = element
181
-
232
+ @data[@index[index]] = element
182
233
  set_size
234
+ set_nil_positions
183
235
  end
184
236
 
185
- def coerce dtype
186
- begin
187
- @vector = @vector.coerce @dtype
188
- @dtype = dtype
189
- rescue StandardError => e
190
- puts "Cannot convert to #{dtype} because of data type mismatch. #{e}"
191
- end
237
+ # Cast a vector to a new data type.
238
+ #
239
+ # == Options
240
+ #
241
+ # * +:dtype+ - :array for Ruby Array. :nmatrix for NMatrix.
242
+ def cast opts={}
243
+ dtype = opts[:dtype]
244
+ raise ArgumentError, "Unsupported dtype #{opts[:dtype]}" unless
245
+ dtype == :array or dtype == :nmatrix
192
246
 
193
- self
247
+ @data = cast_vector_to dtype
194
248
  end
195
249
 
196
250
  # Delete an element by value
@@ -201,25 +255,51 @@ module Daru
201
255
  # Delete element by index
202
256
  def delete_at index
203
257
  idx = named_index_for index
204
- @vector.delete_at @index[idx]
258
+ @data.delete_at @index[idx]
205
259
 
206
260
  if @index.index_class == Integer
207
261
  @index = Daru::Index.new @size-1
208
262
  else
209
- @index = (@index.to_a - [idx]).to_index
263
+ @index = Daru::Index.new (@index.to_a - [idx])
210
264
  end
211
265
 
212
266
  set_size
267
+ set_nil_positions
268
+ end
269
+
270
+ # The type of data contained in the vector. Can be :object or :numeric. If
271
+ # the underlying dtype is an NMatrix, this method will return the data type
272
+ # of the NMatrix object.
273
+ #
274
+ # Running through the data to figure out the kind of data is delayed to the
275
+ # last possible moment.
276
+ def type
277
+ return @data.nm_dtype if dtype == :nmatrix
278
+
279
+ if @type.nil? or @possibly_changed_type
280
+ @type = :numeric
281
+ self.each do |e|
282
+ unless e.nil?
283
+ unless e.is_a?(Numeric)
284
+ @type = :object
285
+ break
286
+ end
287
+ end
288
+ end
289
+ @possibly_changed_type = false
290
+ end
291
+
292
+ @type
213
293
  end
214
294
 
215
295
  # Get index of element
216
296
  def index_of element
217
- @index.key @vector.index(element)
297
+ @index.key @data.index(element)
218
298
  end
219
299
 
220
300
  # Keep only unique elements of the vector alongwith their indexes.
221
301
  def uniq
222
- uniq_vector = @vector.uniq
302
+ uniq_vector = @data.uniq
223
303
  new_index = uniq_vector.inject([]) do |acc, element|
224
304
  acc << index_of(element)
225
305
  acc
@@ -228,21 +308,123 @@ module Daru
228
308
  Daru::Vector.new uniq_vector, name: @name, index: new_index, dtype: @dtype
229
309
  end
230
310
 
231
- # def sort ascending=true
232
- # if ascending
233
-
234
- # end
235
- # end
311
+ # Sorts a vector according to its values. If a block is specified, the contents
312
+ # will be evaluated and data will be swapped whenever the block evaluates
313
+ # to *true*. Defaults to ascending order sorting. Any missing values will be
314
+ # put at the end of the vector. Preserves indexing. Default sort algorithm is
315
+ # quick sort.
316
+ #
317
+ # == Options
318
+ #
319
+ # * +:ascending+ - if false, will sort in descending order. Defaults to true.
320
+ #
321
+ # * +:type+ - Specify the sorting algorithm. Only supports quick_sort for now.
322
+ # == Usage
323
+ #
324
+ # v = Daru::Vector.new ["My first guitar", "jazz", "guitar"]
325
+ # # Say you want to sort these strings by length.
326
+ # v.sort { |a,b| a.length <=> b.length }
327
+ def sort opts={}, &block
328
+ opts = {
329
+ ascending: true,
330
+ type: :quick_sort
331
+ }.merge(opts)
332
+
333
+ block = lambda { |a,b| a <=> b } unless block
334
+
335
+ order = opts[:ascending] ? :ascending : :descending
336
+ vector, index = send(opts[:type], @data.to_a.dup, @index.to_a, order, &block)
337
+ index = @index.is_a?(MultiIndex) ? Daru::MultiIndex.new(index) : index
338
+
339
+ Daru::Vector.new(vector, index: create_index(index), name: @name, dtype: @dtype)
340
+ end
341
+
342
+ # Just sort the data and get an Array in return using Enumerable#sort. Non-destructive.
343
+ def sorted_data &block
344
+ @data.to_a.sort(&block)
345
+ end
236
346
 
237
- def is_valid? value
347
+ # Returns *true* if the value passed actually exists in the vector.
348
+ def exists? value
238
349
  !self[index_of(value)].nil?
239
350
  end
240
351
 
352
+ # Returns a vector which has *true* in the position where the element in self
353
+ # is nil, and false otherwise.
354
+ #
355
+ # == Usage
356
+ #
357
+ # v = Daru::Vector.new([1,2,4,nil])
358
+ # v.is_nil?
359
+ # # =>
360
+ # #<Daru::Vector:89421000 @name = nil @size = 4 >
361
+ # # nil
362
+ # # 0 false
363
+ # # 1 false
364
+ # # 2 false
365
+ # # 3 true
366
+ def is_nil?
367
+ nil_truth_vector = clone_structure
368
+ @index.each do |idx|
369
+ nil_truth_vector[idx] = self[idx].nil? ? true : false
370
+ end
371
+
372
+ nil_truth_vector
373
+ end
374
+
375
+ # Opposite of #is_nil?
376
+ def not_nil?
377
+ nil_truth_vector = clone_structure
378
+ @index.each do |idx|
379
+ nil_truth_vector[idx] = self[idx].nil? ? false : true
380
+ end
381
+
382
+ nil_truth_vector
383
+ end
384
+
385
+ # Replace all nils in the vector with the value passed as an argument. Destructive.
386
+ # See #replace_nils for non-destructive version
387
+ #
388
+ # == Arguments
389
+ #
390
+ # * +replacement+ - The value which should replace all nils
391
+ def replace_nils! replacement
392
+ nil_positions.each do |idx|
393
+ self[idx] = replacement
394
+ end
395
+
396
+ self
397
+ end
398
+
399
+ # Non-destructive version of #replace_nils!
400
+ def replace_nils replacement
401
+ self.dup.replace_nils!(replacement)
402
+ end
403
+
404
+ def n_valid
405
+ @size
406
+ end
407
+
241
408
  # Returns *true* if an index exists
242
409
  def has_index? index
243
410
  @index.include? index
244
411
  end
245
412
 
413
+ # Convert Vector to a horizontal or vertical Ruby Matrix.
414
+ #
415
+ # == Arguments
416
+ #
417
+ # * +axis+ - Specify whether you want a *:horizontal* or a *:vertical* matrix.
418
+ def to_matrix axis=:horizontal
419
+ if axis == :horizontal
420
+ Matrix[to_a]
421
+ elsif axis == :vertical
422
+ Matrix.columns([to_a])
423
+ else
424
+ raise ArgumentError, "axis should be either :horizontal or :vertical, not #{axis}"
425
+ end
426
+ end
427
+
246
428
  # Convert to hash. Hash keys are indexes and values are the correspoding elements
247
429
  def to_hash
248
430
  @index.inject({}) do |hsh, index|
@@ -253,7 +435,7 @@ module Daru
253
435
 
254
436
  # Return an array
255
437
  def to_a
256
- @vector.to_a
438
+ @data.to_a
257
439
  end
258
440
 
259
441
  # Convert the hash from to_hash to json
@@ -283,10 +465,10 @@ module Daru
283
465
  end
284
466
 
285
467
  # Over rides original inspect for pretty printing in irb
286
- def inspect spacing=10, threshold=15
468
+ def inspect spacing=20, threshold=15
287
469
  longest = [@name.to_s.size,
288
470
  @index.to_a.map(&:to_s).map(&:size).max,
289
- @vector .map(&:to_s).map(&:size).max,
471
+ @data .map(&:to_s).map(&:size).max,
290
472
  'nil'.size].max
291
473
 
292
474
  content = ""
@@ -297,7 +479,7 @@ module Daru
297
479
 
298
480
  content += sprintf formatter, "", name
299
481
  @index.each_with_index do |index, num|
300
- content += sprintf formatter, index.to_s, (self[index] || 'nil').to_s
482
+ content += sprintf formatter, index.to_s, (self[*index] || 'nil').to_s
301
483
  if num > threshold
302
484
  content += sprintf formatter, '...', '...'
303
485
  break
@@ -308,27 +490,38 @@ module Daru
308
490
  content
309
491
  end
310
492
 
311
- # def compact!
312
- # TODO: Compact and also take care of indexes
313
- # @vector.compact!
314
- # set_size
315
- # end
493
+ # Create a new vector with a different index.
494
+ #
495
+ # @param new_index [Symbol, Array, Daru::Index] The new index. Passing *:seq*
496
+ # will reindex with sequential numbers from 0 to (n-1).
497
+ def reindex new_index
498
+ index = create_index(new_index == :seq ? @size : new_index)
499
+ Daru::Vector.new @data.to_a, index: index, name: name, dtype: @dtype
500
+ end
316
501
 
317
502
  # Give the vector a new name
503
+ #
504
+ # @param new_name [Symbol] The new name.
318
505
  def rename new_name
319
506
  @name = new_name.to_sym
320
507
  end
321
508
 
322
509
  # Duplicate elements and indexes
323
510
  def dup
324
- Daru::Vector.new @vector.dup, name: @name, index: @index.dup
511
+ Daru::Vector.new @data.dup, name: @name, index: @index.dup
512
+ end
513
+
514
+ # Copies the structure of the vector (i.e the index, size, etc.) and fills all
515
+ # all values with nils.
516
+ def clone_structure
517
+ Daru::Vector.new(([nil]*@size), name: @name, index: @index.dup)
325
518
  end
326
519
 
327
520
  def daru_vector *name
328
521
  self
329
522
  end
330
523
 
331
- alias_method :dv, :daru_vector
524
+ alias :dv :daru_vector
332
525
 
333
526
  def method_missing(name, *args, &block)
334
527
  if name.match(/(.+)\=/)
@@ -342,6 +535,88 @@ module Daru
342
535
 
343
536
  private
344
537
 
538
+ def quick_sort vector, index, order, &block
539
+ recursive_quick_sort vector, index, order, 0, @size-1, &block
540
+ [vector, index]
541
+ end
542
+
543
+ def recursive_quick_sort vector, index, order, left_lower, right_upper, &block
544
+ if left_lower < right_upper
545
+ left_upper, right_lower = partition(vector, index, order, left_lower, right_upper, &block)
546
+ if left_upper - left_lower < right_upper - right_lower
547
+ recursive_quick_sort(vector, index, order, left_lower, left_upper, &block)
548
+ recursive_quick_sort(vector, index, order, right_lower, right_upper, &block)
549
+ else
550
+ recursive_quick_sort(vector, index, order, right_lower, right_upper, &block)
551
+ recursive_quick_sort(vector, index, order, left_lower, left_upper, &block)
552
+ end
553
+ end
554
+ end
555
+
556
+ def partition vector, index, order, left_lower, right_upper, &block
557
+ mindex = (left_lower + right_upper) / 2
558
+ mvalue = vector[mindex]
559
+ i = left_lower
560
+ j = right_upper
561
+ opposite_order = order == :ascending ? :descending : :ascending
562
+
563
+ i += 1 while(keep?(vector[i], mvalue, order, &block))
564
+ j -= 1 while(keep?(vector[j], mvalue, opposite_order, &block))
565
+
566
+ while i < j - 1
567
+ vector[i], vector[j] = vector[j], vector[i]
568
+ index[i], index[j] = index[j], index[i]
569
+ i += 1
570
+ j -= 1
571
+
572
+ i += 1 while(keep?(vector[i], mvalue, order, &block))
573
+ j -= 1 while(keep?(vector[j], mvalue, opposite_order, &block))
574
+ end
575
+
576
+ if i <= j
577
+ if i < j
578
+ vector[i], vector[j] = vector[j], vector[i]
579
+ index[i], index[j] = index[j], index[i]
580
+ end
581
+ i += 1
582
+ j -= 1
583
+ end
584
+
585
+ [j,i]
586
+ end
587
+
588
+ def keep? a, b, order, &block
589
+ return false if a.nil? or b.nil?
590
+ eval = block.call(a,b)
591
+ if order == :ascending
592
+ return true if eval == -1
593
+ return false if eval == 1
594
+ elsif order == :descending
595
+ return false if eval == -1
596
+ return true if eval == 1
597
+ end
598
+ return false
599
+ end
600
+
601
+ # Note: To maintain sanity, this _MUST_ be the _ONLY_ place in daru where the
602
+ # @dtype variable is set and the underlying data type of vector changed.
603
+ def cast_vector_to dtype, source=nil, nm_dtype=nil
604
+ source = @data if source.nil?
605
+ return @data if @dtype and @dtype == dtype
606
+
607
+ new_vector =
608
+ case dtype
609
+ when :array then Daru::Accessors::ArrayWrapper.new(source.to_a.dup, self)
610
+ when :nmatrix then Daru::Accessors::NMatrixWrapper.new(source.to_a.dup,
611
+ self, nm_dtype)
612
+ when :mdarray then raise NotImplementedError, "MDArray not yet supported."
613
+ else Daru::Accessors::ArrayWrapper.new(source.dup, self)
614
+ end
615
+
616
+ @dtype = dtype || :array
617
+ new_vector
618
+ end
619
+
345
620
  def named_index_for index
346
621
  if @index.include? index
347
622
  index
@@ -352,18 +627,47 @@ module Daru
352
627
  end
353
628
  end
354
629
 
630
+ def index_for index
631
+ if @index.include?(index)
632
+ @index[index]
633
+ elsif index.is_a?(Numeric)
634
+ index
635
+ end
636
+ end
637
+
355
638
  def set_size
356
- @size = @vector.size
639
+ @size = @data.size
357
640
  end
358
641
 
359
642
  def set_name name
360
- if name.is_a?(Numeric)
361
- @name = name
362
- elsif name # anything but Numeric or nil
363
- @name = name.to_sym
643
+ @name =
644
+ if name.is_a?(Numeric) then name
645
+ elsif name.is_a?(Array) then name.join.to_sym # in case of MultiIndex tuple
646
+ elsif name then name.to_sym # anything but Numeric or nil
364
647
  else
365
- @name = nil
648
+ nil
366
649
  end
367
650
  end
651
+
652
+ def set_nil_positions
653
+ @nil_positions = []
654
+ @index.each do |e|
655
+ @nil_positions << e if(self[e].nil?)
656
+ end
657
+ @nil_positions.uniq!
658
+ end
659
+
660
+ def create_index potential_index
661
+ if potential_index.is_a?(Daru::MultiIndex) or potential_index.is_a?(Daru::Index)
662
+ potential_index
663
+ else
664
+ Daru::Index.new(potential_index)
665
+ end
666
+ end
667
+
668
+ def element_from_numeric_index location
669
+ pos = index_for location
670
+ pos ? @data[pos] : nil
671
+ end
368
672
  end
369
673
  end