daru 0.0.4 → 0.0.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CONTRIBUTING.md +0 -0
- data/Gemfile +0 -1
- data/History.txt +35 -0
- data/README.md +178 -198
- data/daru.gemspec +5 -7
- data/lib/daru.rb +10 -2
- data/lib/daru/accessors/array_wrapper.rb +36 -198
- data/lib/daru/accessors/nmatrix_wrapper.rb +60 -209
- data/lib/daru/core/group_by.rb +183 -0
- data/lib/daru/dataframe.rb +615 -167
- data/lib/daru/index.rb +17 -16
- data/lib/daru/io/io.rb +5 -12
- data/lib/daru/maths/arithmetic/dataframe.rb +72 -8
- data/lib/daru/maths/arithmetic/vector.rb +19 -6
- data/lib/daru/maths/statistics/dataframe.rb +103 -2
- data/lib/daru/maths/statistics/vector.rb +102 -61
- data/lib/daru/monkeys.rb +8 -0
- data/lib/daru/multi_index.rb +199 -0
- data/lib/daru/plotting/dataframe.rb +24 -24
- data/lib/daru/plotting/vector.rb +14 -15
- data/lib/daru/vector.rb +402 -98
- data/lib/version.rb +1 -1
- data/notebooks/grouping_splitting_pivots.ipynb +529 -0
- data/notebooks/intro_with_music_data_.ipynb +104 -119
- data/spec/accessors/wrappers_spec.rb +36 -0
- data/spec/core/group_by_spec.rb +331 -0
- data/spec/dataframe_spec.rb +1237 -475
- data/spec/fixtures/sales-funnel.csv +18 -0
- data/spec/index_spec.rb +10 -21
- data/spec/io/io_spec.rb +4 -14
- data/spec/math/arithmetic/dataframe_spec.rb +66 -0
- data/spec/math/arithmetic/vector_spec.rb +45 -4
- data/spec/math/statistics/dataframe_spec.rb +91 -1
- data/spec/math/statistics/vector_spec.rb +32 -6
- data/spec/monkeys_spec.rb +10 -1
- data/spec/multi_index_spec.rb +216 -0
- data/spec/spec_helper.rb +1 -0
- data/spec/vector_spec.rb +505 -57
- metadata +21 -15
data/lib/daru/monkeys.rb
CHANGED
@@ -0,0 +1,199 @@
|
|
1
|
+
module Daru
|
2
|
+
# Class for holding multi index on Vector and DataFrame.
|
3
|
+
class MultiIndex
|
4
|
+
include Enumerable
|
5
|
+
|
6
|
+
def each(&block)
|
7
|
+
to_a.each(&block)
|
8
|
+
self
|
9
|
+
end
|
10
|
+
|
11
|
+
def map(&block)
|
12
|
+
to_a.map(&block)
|
13
|
+
end
|
14
|
+
|
15
|
+
attr_reader :relation_hash
|
16
|
+
attr_reader :size
|
17
|
+
attr_reader :values
|
18
|
+
|
19
|
+
# Initialize a MultiIndex by passing a tuple of indexes. The order assigned
|
20
|
+
# to the multi index corresponds to the position of the tuple in the array
|
21
|
+
# of tuples.
|
22
|
+
#
|
23
|
+
# Although you can create your own hierarchially indexed Vectors and DataFrames,
|
24
|
+
# this class currently contains minimal error checking and is mainly used
|
25
|
+
# internally for summarizing, splitting and grouping of data.
|
26
|
+
#
|
27
|
+
# == Arguments
|
28
|
+
#
|
29
|
+
# * +source+ - The array of arrays from which the multi index is to be created.
|
30
|
+
#
|
31
|
+
# == Usage
|
32
|
+
#
|
33
|
+
# tuples = [:a,:a,:b,:b].zip([:one,:two,:one,:two])
|
34
|
+
# #=> [[:a, :one], [:a, :two], [:b, :one], [:b, :two]]
|
35
|
+
# Daru::MultiIndex.new(tuples)
|
36
|
+
def initialize source, values=nil
|
37
|
+
@relation_hash = {}
|
38
|
+
@size = source.size
|
39
|
+
values = Array.new(source.size) { |i| i } if values.nil?
|
40
|
+
create_relation_hash source, values
|
41
|
+
@relation_hash.freeze
|
42
|
+
@values = values
|
43
|
+
end
|
44
|
+
|
45
|
+
def [] *indexes
|
46
|
+
indexes.flatten!
|
47
|
+
location = indexes[0]
|
48
|
+
|
49
|
+
if location.is_a?(Symbol)
|
50
|
+
result = read_relation_hash @relation_hash, indexes, 0
|
51
|
+
return nil if result.nil?
|
52
|
+
result.is_a?(Integer) ? result : Daru::MultiIndex.new(*make_tuples(result))
|
53
|
+
else
|
54
|
+
case location
|
55
|
+
when Integer
|
56
|
+
self[@relation_hash.keys[location]]
|
57
|
+
when Range
|
58
|
+
first = location.first
|
59
|
+
last = location.last
|
60
|
+
|
61
|
+
hsh = {}
|
62
|
+
first.upto(last) do |index|
|
63
|
+
key = @relation_hash.keys[index]
|
64
|
+
hsh[key] = read_relation_hash(@relation_hash, [key], 0)
|
65
|
+
end
|
66
|
+
Daru::MultiIndex.new(*make_tuples(hsh))
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
# Compare two MultiIndex objects for equality based on the contents of their
|
72
|
+
# relation hashes. Does not take object_id into account.
|
73
|
+
def == other
|
74
|
+
return false if size != other.size
|
75
|
+
deep_compare @relation_hash, other.relation_hash
|
76
|
+
end
|
77
|
+
|
78
|
+
# Convert a MultiIndex back to tuples (array of arrays). Will retain the
|
79
|
+
# order of creation.
|
80
|
+
def to_a
|
81
|
+
make_tuples(@relation_hash)[0]
|
82
|
+
end
|
83
|
+
|
84
|
+
# Completely duplicate a MultiIndex object and its contents.
|
85
|
+
def dup
|
86
|
+
Daru::MultiIndex.new to_a
|
87
|
+
end
|
88
|
+
|
89
|
+
# Check whether a tuple or identifier number exists in the multi index.
|
90
|
+
# The argument *tuple* can either a complete or incomplete tuple, or a number.
|
91
|
+
def include? tuple
|
92
|
+
tuple = [tuple] unless tuple.is_a?(Array)
|
93
|
+
tuple.flatten!
|
94
|
+
!!read_relation_hash(@relation_hash, tuple, 0)
|
95
|
+
end
|
96
|
+
|
97
|
+
# Obtain the tuple that correponds with the indentifier number.
|
98
|
+
#
|
99
|
+
# == Arguments
|
100
|
+
#
|
101
|
+
# * +key+ - A number for which the tuple is to be obtained.
|
102
|
+
#
|
103
|
+
# == Usage
|
104
|
+
#
|
105
|
+
# mi.key(3) #=> [:a,:two,:baz]
|
106
|
+
def key key
|
107
|
+
tuple = find_tuple_for(@relation_hash, key)
|
108
|
+
tuple.empty? ? nil : tuple
|
109
|
+
end
|
110
|
+
|
111
|
+
private
|
112
|
+
|
113
|
+
# Deep compare two hashes
|
114
|
+
def deep_compare this, other
|
115
|
+
if this == other
|
116
|
+
return true if this.is_a?(Integer) and other.is_a?(Integer)
|
117
|
+
this.each_key do |key|
|
118
|
+
deep_compare this[key], other[key]
|
119
|
+
end
|
120
|
+
else
|
121
|
+
return false
|
122
|
+
end
|
123
|
+
true
|
124
|
+
end
|
125
|
+
|
126
|
+
# Create tuples out of the relation hash based on the order of the identifier
|
127
|
+
# numbers. Returns an array of arrays containing the tuples and another
|
128
|
+
# containing their corresponding index numbers.
|
129
|
+
def make_tuples relation_hash
|
130
|
+
tuples = []
|
131
|
+
new_vals = []
|
132
|
+
values.each do |number|
|
133
|
+
tuple = find_tuple_for(relation_hash, number)
|
134
|
+
unless tuple.empty?
|
135
|
+
tuples << tuple
|
136
|
+
new_vals << number
|
137
|
+
end
|
138
|
+
end
|
139
|
+
[tuples,new_vals]
|
140
|
+
end
|
141
|
+
|
142
|
+
# Finds and returns a single tuple for a particular identifier number
|
143
|
+
def find_tuple_for relation_hash, number
|
144
|
+
tuple = []
|
145
|
+
search_for_number number, relation_hash, tuple
|
146
|
+
tuple.reverse
|
147
|
+
end
|
148
|
+
|
149
|
+
# Search for a number and store its corresponding tuple in *tuple*. Returns
|
150
|
+
# true if the number is successfully found.
|
151
|
+
def search_for_number number, relation_hash, tuple
|
152
|
+
found = false
|
153
|
+
relation_hash.each_key do |key|
|
154
|
+
value = relation_hash[key]
|
155
|
+
if value.is_a?(Hash)
|
156
|
+
if search_for_number(number, value, tuple)
|
157
|
+
tuple << key
|
158
|
+
found = true
|
159
|
+
end
|
160
|
+
elsif value == number
|
161
|
+
tuple << key
|
162
|
+
found = true
|
163
|
+
end
|
164
|
+
end
|
165
|
+
|
166
|
+
found
|
167
|
+
end
|
168
|
+
|
169
|
+
# Read the relation hash and return a sub-relation hash or the number to which
|
170
|
+
# indexes belogs to.
|
171
|
+
def read_relation_hash relation_hash, indexes, index
|
172
|
+
identifier = indexes[index]
|
173
|
+
value = relation_hash[identifier]
|
174
|
+
|
175
|
+
indexes[index+1].nil? ? value : read_relation_hash(value,indexes,index+1)
|
176
|
+
end
|
177
|
+
|
178
|
+
# Create the relation hash from supplied tuples.
|
179
|
+
def create_relation_hash source, values
|
180
|
+
source.each_with_index do |tuple, idx|
|
181
|
+
populate @relation_hash, tuple, 0, values[idx]
|
182
|
+
end
|
183
|
+
end
|
184
|
+
|
185
|
+
def populate relation_hash, tuple, index, number
|
186
|
+
identifier = tuple[index]
|
187
|
+
|
188
|
+
if identifier
|
189
|
+
if tuple[index+1]
|
190
|
+
relation_hash[identifier] ||= {}
|
191
|
+
else
|
192
|
+
relation_hash[identifier] = number
|
193
|
+
return
|
194
|
+
end
|
195
|
+
populate relation_hash[identifier], tuple, index+1, number
|
196
|
+
end
|
197
|
+
end
|
198
|
+
end
|
199
|
+
end
|
@@ -7,40 +7,40 @@ end
|
|
7
7
|
module Daru
|
8
8
|
module Plotting
|
9
9
|
module DataFrame
|
10
|
-
# Plots a DataFrame with Nyaplot on IRuby using the given options.
|
11
|
-
#
|
12
|
-
#
|
13
|
-
#
|
10
|
+
# Plots a DataFrame with Nyaplot on IRuby using the given options. Yields
|
11
|
+
# the corresponding Nyaplot::Plot object and the Nyaplot::Diagram object
|
12
|
+
# to the block, if it is specified. See the nyaplot docs for info on how to
|
13
|
+
# further use these objects.
|
14
|
+
#
|
14
15
|
# == Options
|
15
|
-
# type
|
16
|
-
#
|
17
|
-
#
|
18
|
-
#
|
19
|
-
#
|
20
|
-
#
|
21
|
-
#
|
16
|
+
# +:type+ - Type of plot (scatter, bar, histogram)
|
17
|
+
# +:legends+ - The names of the vectors that are to be used as X and Y axes.
|
18
|
+
# The vectors names must be specified as symbols inside an Array. They
|
19
|
+
# also should be specified in the right order. For example, passing [:a, :b]
|
20
|
+
# will keep vector :a as the X axis and :b as the Y axis. Passing [:a]
|
21
|
+
# keep :a as the X axis and plot the frequency with which :a appears
|
22
|
+
# on the Y axis.
|
23
|
+
# +:frame+ - Pass this as *true* to disable plotting the graph directly
|
24
|
+
# and instead manually create Nyaplot::Frame object inside the block using
|
25
|
+
# the Nyaplot::Plot object for plotting one or many graphs in a frame.
|
26
|
+
#
|
22
27
|
# == Usage
|
23
28
|
# df = Daru::DataFrame.new({a:[0,1,2,3,4], b:[10,20,30,40,50]})
|
24
|
-
# df.plot :a, :b, type: :bar
|
25
|
-
def plot
|
29
|
+
# df.plot legends: [:a, :b], type: :bar
|
30
|
+
def plot opts={}
|
26
31
|
options = {
|
27
32
|
type: :scatter,
|
28
|
-
|
33
|
+
frame: false,
|
34
|
+
legends: []
|
29
35
|
}.merge(opts)
|
30
36
|
|
31
37
|
plot = Nyaplot::Plot.new
|
32
|
-
|
33
|
-
|
34
|
-
plot.y_label options[:y_label] if options[:y_label]
|
35
|
-
p.tooltip_contents options[:tooltip_contents] if options[:tooltip_contents]
|
38
|
+
diagram = plot.add_with_df(Nyaplot::DataFrame.new(self.to_a[0]),
|
39
|
+
options[:type], *options[:legends])
|
36
40
|
|
37
|
-
|
38
|
-
p.color Nyaplot::Colors.qual
|
39
|
-
p.fill_by options[:fill_by] if options[:fill_by]
|
40
|
-
p.shape_by options[:shape_by] if options[:shape_by]
|
41
|
-
end
|
41
|
+
yield(plot, diagram) if block_given?
|
42
42
|
|
43
|
-
plot.show
|
43
|
+
plot.show unless options[:frame]
|
44
44
|
end
|
45
45
|
end
|
46
46
|
end
|
data/lib/daru/plotting/vector.rb
CHANGED
@@ -8,32 +8,31 @@ module Daru
|
|
8
8
|
module Plotting
|
9
9
|
module Vector
|
10
10
|
|
11
|
-
# Plots a Vector with Nyaplot on IRuby using the given options.
|
11
|
+
# Plots a Vector with Nyaplot on IRuby using the given options. Yields the
|
12
|
+
# plot object (Nyaplot::Plot) and the diagram object (Nyaplot::Diagram)
|
13
|
+
# to the block, which can be used for setting various options as per the
|
14
|
+
# Nyaplot API.
|
15
|
+
#
|
12
16
|
# == Options
|
13
17
|
# type (:scatter, :bar, :histogram), title, x_label, y_label, color(true/false)
|
14
18
|
#
|
15
19
|
# == Usage
|
16
20
|
# vector = Daru::Vector.new [10,20,30,40], [:one, :two, :three, :four]
|
17
|
-
# vector.plot
|
18
|
-
|
21
|
+
# vector.plot(type: :bar) do |plot|
|
22
|
+
# plot.title "My first plot"
|
23
|
+
# plot.width 1200
|
24
|
+
# end
|
25
|
+
def plot opts={}, &block
|
19
26
|
options = {
|
20
|
-
type: :scatter
|
21
|
-
title: "#{@name}",
|
22
|
-
x_label: '',
|
23
|
-
y_label: '',
|
24
|
-
color: false
|
27
|
+
type: :scatter
|
25
28
|
}.merge(opts)
|
26
29
|
|
27
30
|
x_axis = options[:type] == :scatter ? Array.new(@size) { |i| i } : @index.to_a
|
28
31
|
plot = Nyaplot::Plot.new
|
29
|
-
plot.
|
30
|
-
plot.height(options[:height]) if options[:height]
|
31
|
-
|
32
|
-
p = plot.add( options[:type], x_axis, @vector.to_a )
|
33
|
-
plot.x_label( options[:x_label] ) if options[:x_label]
|
34
|
-
plot.y_label( options[:y_label] ) if options[:y_label]
|
35
|
-
p.color( Nyaplot::Colors.qual ) if options[:color]
|
32
|
+
diagram = plot.add( options[:type], x_axis, @data.to_a )
|
36
33
|
|
34
|
+
yield plot, diagram if block_given?
|
35
|
+
|
37
36
|
plot.show
|
38
37
|
end
|
39
38
|
end
|
data/lib/daru/vector.rb
CHANGED
@@ -14,17 +14,23 @@ module Daru
|
|
14
14
|
include Daru::Plotting::Vector
|
15
15
|
|
16
16
|
def each(&block)
|
17
|
-
|
17
|
+
return to_enum(:each) unless block_given?
|
18
|
+
|
19
|
+
@data.each(&block)
|
20
|
+
self
|
18
21
|
end
|
19
22
|
|
20
23
|
def map!(&block)
|
21
|
-
|
24
|
+
return to_enum(:map!) unless block_given?
|
22
25
|
|
26
|
+
@data.map!(&block)
|
23
27
|
self
|
24
28
|
end
|
25
29
|
|
26
30
|
def map(&block)
|
27
|
-
|
31
|
+
return to_enum(:map) unless block_given?
|
32
|
+
|
33
|
+
Daru::Vector.new @data.map(&block), name: @name, index: @index, dtype: @dtype
|
28
34
|
end
|
29
35
|
|
30
36
|
alias_method :recode, :map
|
@@ -33,22 +39,29 @@ module Daru
|
|
33
39
|
attr_reader :index
|
34
40
|
attr_reader :size
|
35
41
|
attr_reader :dtype
|
42
|
+
attr_reader :nm_dtype
|
43
|
+
attr_reader :nil_positions
|
36
44
|
|
37
45
|
# Create a Vector object.
|
38
46
|
# == Arguments
|
39
47
|
#
|
40
48
|
# @param source[Array,Hash] - Supply elements in the form of an Array or a Hash. If Array, a
|
41
|
-
#
|
42
|
-
#
|
43
|
-
#
|
44
|
-
#
|
45
|
-
#
|
49
|
+
# numeric index will be created if not supplied in the options. Specifying more
|
50
|
+
# index elements than actual values in *source* will insert *nil* into the
|
51
|
+
# surplus index elements. When a Hash is specified, the keys of the Hash are
|
52
|
+
# taken as the index elements and the corresponding values as the values that
|
53
|
+
# populate the vector.
|
46
54
|
#
|
47
55
|
# == Options
|
48
56
|
#
|
49
|
-
# * +:name+
|
57
|
+
# * +:name+ - Name of the vector
|
58
|
+
#
|
59
|
+
# * +:index+ - Index of the vector
|
50
60
|
#
|
51
|
-
# * +:
|
61
|
+
# * +:dtype+ - The underlying data type. Can be :array or :nmatrix. Default :array.
|
62
|
+
#
|
63
|
+
# * +:nm_dtype+ - For NMatrix, the data type of the numbers. See the NMatrix docs for
|
64
|
+
# further information on supported data type.
|
52
65
|
#
|
53
66
|
# == Usage
|
54
67
|
#
|
@@ -64,76 +77,112 @@ module Daru
|
|
64
77
|
source = source || []
|
65
78
|
end
|
66
79
|
name = opts[:name]
|
67
|
-
@dtype = opts[:dtype] || Array
|
68
|
-
|
69
80
|
set_name name
|
70
81
|
|
71
|
-
@
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
when @dtype == Range, Matrix
|
80
|
-
Daru::Accessors::ArrayWrapper.new source.to_a.dup, self
|
81
|
-
end
|
82
|
-
|
83
|
-
if index.nil?
|
84
|
-
@index = Daru::Index.new @vector.size
|
85
|
-
else
|
86
|
-
@index = index.to_index
|
87
|
-
end
|
88
|
-
# TODO: Will need work for NMatrix/MDArray
|
89
|
-
if @index.size > @vector.size
|
90
|
-
self.coerce Array # NM with nils seg faults
|
91
|
-
(@index.size - @vector.size).times { @vector << nil }
|
92
|
-
elsif @index.size < @vector.size
|
93
|
-
raise IndexError, "Expected index size >= vector size"
|
82
|
+
@data = cast_vector_to(opts[:dtype], source, opts[:nm_dtype])
|
83
|
+
@index = create_index(index || @data.size)
|
84
|
+
|
85
|
+
if @index.size > @data.size
|
86
|
+
cast(dtype: :array) # NM with nils seg faults
|
87
|
+
(@index.size - @data.size).times { @data << nil }
|
88
|
+
elsif @index.size < @data.size
|
89
|
+
raise IndexError, "Expected index size >= vector size. Index size : #{@index.size}, vector size : #{@data.size}"
|
94
90
|
end
|
95
91
|
|
92
|
+
@possibly_changed_type = true
|
93
|
+
set_nil_positions
|
96
94
|
set_size
|
97
95
|
end
|
98
96
|
|
99
|
-
# Get one or more elements with specified index.
|
97
|
+
# Get one or more elements with specified index or a range.
|
100
98
|
#
|
101
99
|
# == Usage
|
100
|
+
# # For vectors employing single layer Index
|
101
|
+
#
|
102
102
|
# v[:one, :two] # => Daru::Vector with indexes :one and :two
|
103
103
|
# v[:one] # => Single element
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
104
|
+
# v[:one..:three] # => Daru::Vector with indexes :one, :two and :three
|
105
|
+
#
|
106
|
+
# # For vectors employing hierarchial multi index
|
107
|
+
#
|
108
|
+
def [](*indexes)
|
109
|
+
location = indexes[0]
|
110
|
+
if @index.is_a?(MultiIndex)
|
111
|
+
result =
|
112
|
+
if location.is_a?(Integer)
|
113
|
+
element_from_numeric_index(location)
|
114
|
+
elsif location.is_a?(Range)
|
115
|
+
arry = location.inject([]) do |memo, num|
|
116
|
+
memo << element_from_numeric_index(num)
|
117
|
+
memo
|
118
|
+
end
|
119
|
+
|
120
|
+
new_index = Daru::MultiIndex.new(@index.to_a[location])
|
121
|
+
Daru::Vector.new(arry, index: new_index, name: @name, dtype: dtype)
|
110
122
|
else
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
123
|
+
sub_index = @index[indexes]
|
124
|
+
|
125
|
+
if sub_index.is_a?(Integer)
|
126
|
+
element_from_numeric_index(sub_index)
|
115
127
|
else
|
116
|
-
|
128
|
+
elements = sub_index.map do |tuple|
|
129
|
+
@data[@index[(indexes + tuple)]]
|
130
|
+
end
|
131
|
+
Daru::Vector.new(elements, index: Daru::MultiIndex.new(sub_index.to_a),
|
132
|
+
name: @name, dtype: @dtype)
|
117
133
|
end
|
118
134
|
end
|
135
|
+
|
136
|
+
return result
|
119
137
|
else
|
120
|
-
indexes
|
138
|
+
unless indexes[1]
|
139
|
+
case location
|
140
|
+
when Range
|
141
|
+
range =
|
142
|
+
if location.first.is_a?(Numeric)
|
143
|
+
location
|
144
|
+
else
|
145
|
+
first = location.first
|
146
|
+
last = location.last
|
147
|
+
|
148
|
+
(first..last)
|
149
|
+
end
|
150
|
+
indexes = @index[range]
|
151
|
+
else
|
152
|
+
return element_from_numeric_index(location)
|
153
|
+
end
|
154
|
+
end
|
121
155
|
|
122
|
-
Daru::Vector.new indexes.map { |
|
123
|
-
index: indexes
|
156
|
+
Daru::Vector.new indexes.map { |loc| @data[index_for(loc)] }, name: @name,
|
157
|
+
index: indexes.map { |e| named_index_for(e) }, dtype: @dtype
|
124
158
|
end
|
125
159
|
end
|
126
160
|
|
127
|
-
def []=(
|
128
|
-
|
161
|
+
def []=(*location, value)
|
162
|
+
cast(dtype: :array) if value.nil? and dtype != :array
|
129
163
|
|
130
|
-
if @
|
131
|
-
|
164
|
+
@possibly_changed_type = true if @type == :object and (value.nil? or
|
165
|
+
value.is_a?(Numeric))
|
166
|
+
@possibly_changed_type = true if @type == :numeric and (!value.is_a?(Numeric) and
|
167
|
+
!value.nil?)
|
168
|
+
|
169
|
+
pos =
|
170
|
+
if @index.is_a?(MultiIndex) and !location[0].is_a?(Integer)
|
171
|
+
index_for location
|
172
|
+
else
|
173
|
+
index_for location[0]
|
174
|
+
end
|
175
|
+
|
176
|
+
if pos.is_a?(MultiIndex)
|
177
|
+
pos.each do |sub_tuple|
|
178
|
+
self[*(location + sub_tuple)] = value
|
179
|
+
end
|
132
180
|
else
|
133
|
-
@
|
181
|
+
@data[pos] = value
|
134
182
|
end
|
135
183
|
|
136
184
|
set_size
|
185
|
+
set_nil_positions
|
137
186
|
end
|
138
187
|
|
139
188
|
# Two vectors are equal if the have the exact same index values corresponding
|
@@ -158,8 +207,12 @@ module Daru
|
|
158
207
|
concat element
|
159
208
|
end
|
160
209
|
|
161
|
-
def
|
162
|
-
|
210
|
+
def head q=10
|
211
|
+
self[0..q]
|
212
|
+
end
|
213
|
+
|
214
|
+
def tail q=10
|
215
|
+
self[-q..-1]
|
163
216
|
end
|
164
217
|
|
165
218
|
# Append an element to the vector by specifying the element and index
|
@@ -167,30 +220,31 @@ module Daru
|
|
167
220
|
raise IndexError, "Expected new unique index" if @index.include? index
|
168
221
|
|
169
222
|
if index.nil? and @index.index_class == Integer
|
170
|
-
@index =
|
223
|
+
@index = create_index(@size + 1)
|
171
224
|
index = @size
|
172
225
|
else
|
173
226
|
begin
|
174
|
-
@index =
|
175
|
-
rescue
|
227
|
+
@index = create_index(@index + index)
|
228
|
+
rescue StandardError => e
|
176
229
|
raise e, "Expected valid index."
|
177
230
|
end
|
178
231
|
end
|
179
|
-
|
180
|
-
@vector[@index[index]] = element
|
181
|
-
|
232
|
+
@data[@index[index]] = element
|
182
233
|
set_size
|
234
|
+
set_nil_positions
|
183
235
|
end
|
184
236
|
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
237
|
+
# Cast a vector to a new data type.
|
238
|
+
#
|
239
|
+
# == Options
|
240
|
+
#
|
241
|
+
# * +:dtype+ - :array for Ruby Array. :nmatrix for NMatrix.
|
242
|
+
def cast opts={}
|
243
|
+
dtype = opts[:dtype]
|
244
|
+
raise ArgumentError, "Unsupported dtype #{opts[:dtype]}" unless
|
245
|
+
dtype == :array or dtype == :nmatrix
|
192
246
|
|
193
|
-
|
247
|
+
@data = cast_vector_to dtype
|
194
248
|
end
|
195
249
|
|
196
250
|
# Delete an element by value
|
@@ -201,25 +255,51 @@ module Daru
|
|
201
255
|
# Delete element by index
|
202
256
|
def delete_at index
|
203
257
|
idx = named_index_for index
|
204
|
-
@
|
258
|
+
@data.delete_at @index[idx]
|
205
259
|
|
206
260
|
if @index.index_class == Integer
|
207
261
|
@index = Daru::Index.new @size-1
|
208
262
|
else
|
209
|
-
@index = (@index.to_a - [idx])
|
263
|
+
@index = Daru::Index.new (@index.to_a - [idx])
|
210
264
|
end
|
211
265
|
|
212
266
|
set_size
|
267
|
+
set_nil_positions
|
268
|
+
end
|
269
|
+
|
270
|
+
# The type of data contained in the vector. Can be :object or :numeric. If
|
271
|
+
# the underlying dtype is an NMatrix, this method will return the data type
|
272
|
+
# of the NMatrix object.
|
273
|
+
#
|
274
|
+
# Running through the data to figure out the kind of data is delayed to the
|
275
|
+
# last possible moment.
|
276
|
+
def type
|
277
|
+
return @data.nm_dtype if dtype == :nmatrix
|
278
|
+
|
279
|
+
if @type.nil? or @possibly_changed_type
|
280
|
+
@type = :numeric
|
281
|
+
self.each do |e|
|
282
|
+
unless e.nil?
|
283
|
+
unless e.is_a?(Numeric)
|
284
|
+
@type = :object
|
285
|
+
break
|
286
|
+
end
|
287
|
+
end
|
288
|
+
end
|
289
|
+
@possibly_changed_type = false
|
290
|
+
end
|
291
|
+
|
292
|
+
@type
|
213
293
|
end
|
214
294
|
|
215
295
|
# Get index of element
|
216
296
|
def index_of element
|
217
|
-
@index.key @
|
297
|
+
@index.key @data.index(element)
|
218
298
|
end
|
219
299
|
|
220
300
|
# Keep only unique elements of the vector alongwith their indexes.
|
221
301
|
def uniq
|
222
|
-
uniq_vector = @
|
302
|
+
uniq_vector = @data.uniq
|
223
303
|
new_index = uniq_vector.inject([]) do |acc, element|
|
224
304
|
acc << index_of(element)
|
225
305
|
acc
|
@@ -228,21 +308,123 @@ module Daru
|
|
228
308
|
Daru::Vector.new uniq_vector, name: @name, index: new_index, dtype: @dtype
|
229
309
|
end
|
230
310
|
|
231
|
-
#
|
232
|
-
#
|
233
|
-
|
234
|
-
# end
|
235
|
-
#
|
311
|
+
# Sorts a vector according to its values. If a block is specified, the contents
|
312
|
+
# will be evaluated and data will be swapped whenever the block evaluates
|
313
|
+
# to *true*. Defaults to ascending order sorting. Any missing values will be
|
314
|
+
# put at the end of the vector. Preserves indexing. Default sort algorithm is
|
315
|
+
# quick sort.
|
316
|
+
#
|
317
|
+
# == Options
|
318
|
+
#
|
319
|
+
# * +:ascending+ - if false, will sort in descending order. Defaults to true.
|
320
|
+
#
|
321
|
+
# * +:type+ - Specify the sorting algorithm. Only supports quick_sort for now.
|
322
|
+
# == Usage
|
323
|
+
#
|
324
|
+
# v = Daru::Vector.new ["My first guitar", "jazz", "guitar"]
|
325
|
+
# # Say you want to sort these strings by length.
|
326
|
+
# v.sort { |a,b| a.length <=> b.length }
|
327
|
+
def sort opts={}, &block
|
328
|
+
opts = {
|
329
|
+
ascending: true,
|
330
|
+
type: :quick_sort
|
331
|
+
}.merge(opts)
|
332
|
+
|
333
|
+
block = lambda { |a,b| a <=> b } unless block
|
334
|
+
|
335
|
+
order = opts[:ascending] ? :ascending : :descending
|
336
|
+
vector, index = send(opts[:type], @data.to_a.dup, @index.to_a, order, &block)
|
337
|
+
index = @index.is_a?(MultiIndex) ? Daru::MultiIndex.new(index) : index
|
338
|
+
|
339
|
+
Daru::Vector.new(vector, index: create_index(index), name: @name, dtype: @dtype)
|
340
|
+
end
|
341
|
+
|
342
|
+
# Just sort the data and get an Array in return using Enumerable#sort. Non-destructive.
|
343
|
+
def sorted_data &block
|
344
|
+
@data.to_a.sort(&block)
|
345
|
+
end
|
236
346
|
|
237
|
-
|
347
|
+
# Returns *true* if the value passed actually exists in the vector.
|
348
|
+
def exists? value
|
238
349
|
!self[index_of(value)].nil?
|
239
350
|
end
|
240
351
|
|
352
|
+
# Returns a vector which has *true* in the position where the element in self
|
353
|
+
# is nil, and false otherwise.
|
354
|
+
#
|
355
|
+
# == Usage
|
356
|
+
#
|
357
|
+
# v = Daru::Vector.new([1,2,4,nil])
|
358
|
+
# v.is_nil?
|
359
|
+
# # =>
|
360
|
+
# #<Daru::Vector:89421000 @name = nil @size = 4 >
|
361
|
+
# # nil
|
362
|
+
# # 0 false
|
363
|
+
# # 1 false
|
364
|
+
# # 2 false
|
365
|
+
# # 3 true
|
366
|
+
def is_nil?
|
367
|
+
nil_truth_vector = clone_structure
|
368
|
+
@index.each do |idx|
|
369
|
+
nil_truth_vector[idx] = self[idx].nil? ? true : false
|
370
|
+
end
|
371
|
+
|
372
|
+
nil_truth_vector
|
373
|
+
end
|
374
|
+
|
375
|
+
# Opposite of #is_nil?
|
376
|
+
def not_nil?
|
377
|
+
nil_truth_vector = clone_structure
|
378
|
+
@index.each do |idx|
|
379
|
+
nil_truth_vector[idx] = self[idx].nil? ? false : true
|
380
|
+
end
|
381
|
+
|
382
|
+
nil_truth_vector
|
383
|
+
end
|
384
|
+
|
385
|
+
# Replace all nils in the vector with the value passed as an argument. Destructive.
|
386
|
+
# See #replace_nils for non-destructive version
|
387
|
+
#
|
388
|
+
# == Arguments
|
389
|
+
#
|
390
|
+
# * +replacement+ - The value which should replace all nils
|
391
|
+
def replace_nils! replacement
|
392
|
+
nil_positions.each do |idx|
|
393
|
+
self[idx] = replacement
|
394
|
+
end
|
395
|
+
|
396
|
+
self
|
397
|
+
end
|
398
|
+
|
399
|
+
# Non-destructive version of #replace_nils!
|
400
|
+
def replace_nils replacement
|
401
|
+
self.dup.replace_nils!(replacement)
|
402
|
+
end
|
403
|
+
|
404
|
+
def n_valid
|
405
|
+
@size
|
406
|
+
end
|
407
|
+
|
241
408
|
# Returns *true* if an index exists
|
242
409
|
def has_index? index
|
243
410
|
@index.include? index
|
244
411
|
end
|
245
412
|
|
413
|
+
# Convert Vector to a horizontal or vertical Ruby Matrix.
|
414
|
+
#
|
415
|
+
# == Arguments
|
416
|
+
#
|
417
|
+
# * +axis+ - Specify whether you want a *:horizontal* or a *:vertical* matrix.
|
418
|
+
def to_matrix axis=:horizontal
|
419
|
+
if axis == :horizontal
|
420
|
+
Matrix[to_a]
|
421
|
+
elsif axis == :vertical
|
422
|
+
Matrix.columns([to_a])
|
423
|
+
else
|
424
|
+
raise ArgumentError, "axis should be either :horizontal or :vertical, not #{axis}"
|
425
|
+
end
|
426
|
+
end
|
427
|
+
|
246
428
|
# Convert to hash. Hash keys are indexes and values are the correspoding elements
|
247
429
|
def to_hash
|
248
430
|
@index.inject({}) do |hsh, index|
|
@@ -253,7 +435,7 @@ module Daru
|
|
253
435
|
|
254
436
|
# Return an array
|
255
437
|
def to_a
|
256
|
-
@
|
438
|
+
@data.to_a
|
257
439
|
end
|
258
440
|
|
259
441
|
# Convert the hash from to_hash to json
|
@@ -283,10 +465,10 @@ module Daru
|
|
283
465
|
end
|
284
466
|
|
285
467
|
# Over rides original inspect for pretty printing in irb
|
286
|
-
def inspect spacing=
|
468
|
+
def inspect spacing=20, threshold=15
|
287
469
|
longest = [@name.to_s.size,
|
288
470
|
@index.to_a.map(&:to_s).map(&:size).max,
|
289
|
-
@
|
471
|
+
@data .map(&:to_s).map(&:size).max,
|
290
472
|
'nil'.size].max
|
291
473
|
|
292
474
|
content = ""
|
@@ -297,7 +479,7 @@ module Daru
|
|
297
479
|
|
298
480
|
content += sprintf formatter, "", name
|
299
481
|
@index.each_with_index do |index, num|
|
300
|
-
content += sprintf formatter, index.to_s, (self[index] || 'nil').to_s
|
482
|
+
content += sprintf formatter, index.to_s, (self[*index] || 'nil').to_s
|
301
483
|
if num > threshold
|
302
484
|
content += sprintf formatter, '...', '...'
|
303
485
|
break
|
@@ -308,27 +490,38 @@ module Daru
|
|
308
490
|
content
|
309
491
|
end
|
310
492
|
|
311
|
-
#
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
493
|
+
# Create a new vector with a different index.
|
494
|
+
#
|
495
|
+
# @param new_index [Symbol, Array, Daru::Index] The new index. Passing *:seq*
|
496
|
+
# will reindex with sequential numbers from 0 to (n-1).
|
497
|
+
def reindex new_index
|
498
|
+
index = create_index(new_index == :seq ? @size : new_index)
|
499
|
+
Daru::Vector.new @data.to_a, index: index, name: name, dtype: @dtype
|
500
|
+
end
|
316
501
|
|
317
502
|
# Give the vector a new name
|
503
|
+
#
|
504
|
+
# @param new_name [Symbol] The new name.
|
318
505
|
def rename new_name
|
319
506
|
@name = new_name.to_sym
|
320
507
|
end
|
321
508
|
|
322
509
|
# Duplicate elements and indexes
|
323
510
|
def dup
|
324
|
-
Daru::Vector.new @
|
511
|
+
Daru::Vector.new @data.dup, name: @name, index: @index.dup
|
512
|
+
end
|
513
|
+
|
514
|
+
# Copies the structure of the vector (i.e the index, size, etc.) and fills all
|
515
|
+
# all values with nils.
|
516
|
+
def clone_structure
|
517
|
+
Daru::Vector.new(([nil]*@size), name: @name, index: @index.dup)
|
325
518
|
end
|
326
519
|
|
327
520
|
def daru_vector *name
|
328
521
|
self
|
329
522
|
end
|
330
523
|
|
331
|
-
|
524
|
+
alias :dv :daru_vector
|
332
525
|
|
333
526
|
def method_missing(name, *args, &block)
|
334
527
|
if name.match(/(.+)\=/)
|
@@ -342,6 +535,88 @@ module Daru
|
|
342
535
|
|
343
536
|
private
|
344
537
|
|
538
|
+
def quick_sort vector, index, order, &block
|
539
|
+
recursive_quick_sort vector, index, order, 0, @size-1, &block
|
540
|
+
[vector, index]
|
541
|
+
end
|
542
|
+
|
543
|
+
def recursive_quick_sort vector, index, order, left_lower, right_upper, &block
|
544
|
+
if left_lower < right_upper
|
545
|
+
left_upper, right_lower = partition(vector, index, order, left_lower, right_upper, &block)
|
546
|
+
if left_upper - left_lower < right_upper - right_lower
|
547
|
+
recursive_quick_sort(vector, index, order, left_lower, left_upper, &block)
|
548
|
+
recursive_quick_sort(vector, index, order, right_lower, right_upper, &block)
|
549
|
+
else
|
550
|
+
recursive_quick_sort(vector, index, order, right_lower, right_upper, &block)
|
551
|
+
recursive_quick_sort(vector, index, order, left_lower, left_upper, &block)
|
552
|
+
end
|
553
|
+
end
|
554
|
+
end
|
555
|
+
|
556
|
+
def partition vector, index, order, left_lower, right_upper, &block
|
557
|
+
mindex = (left_lower + right_upper) / 2
|
558
|
+
mvalue = vector[mindex]
|
559
|
+
i = left_lower
|
560
|
+
j = right_upper
|
561
|
+
opposite_order = order == :ascending ? :descending : :ascending
|
562
|
+
|
563
|
+
i += 1 while(keep?(vector[i], mvalue, order, &block))
|
564
|
+
j -= 1 while(keep?(vector[j], mvalue, opposite_order, &block))
|
565
|
+
|
566
|
+
while i < j - 1
|
567
|
+
vector[i], vector[j] = vector[j], vector[i]
|
568
|
+
index[i], index[j] = index[j], index[i]
|
569
|
+
i += 1
|
570
|
+
j -= 1
|
571
|
+
|
572
|
+
i += 1 while(keep?(vector[i], mvalue, order, &block))
|
573
|
+
j -= 1 while(keep?(vector[j], mvalue, opposite_order, &block))
|
574
|
+
end
|
575
|
+
|
576
|
+
if i <= j
|
577
|
+
if i < j
|
578
|
+
vector[i], vector[j] = vector[j], vector[i]
|
579
|
+
index[i], index[j] = index[j], index[i]
|
580
|
+
end
|
581
|
+
i += 1
|
582
|
+
j -= 1
|
583
|
+
end
|
584
|
+
|
585
|
+
[j,i]
|
586
|
+
end
|
587
|
+
|
588
|
+
def keep? a, b, order, &block
|
589
|
+
return false if a.nil? or b.nil?
|
590
|
+
eval = block.call(a,b)
|
591
|
+
if order == :ascending
|
592
|
+
return true if eval == -1
|
593
|
+
return false if eval == 1
|
594
|
+
elsif order == :descending
|
595
|
+
return false if eval == -1
|
596
|
+
return true if eval == 1
|
597
|
+
end
|
598
|
+
return false
|
599
|
+
end
|
600
|
+
|
601
|
+
# Note: To maintain sanity, this _MUST_ be the _ONLY_ place in daru where the
|
602
|
+
# @dtype variable is set and the underlying data type of vector changed.
|
603
|
+
def cast_vector_to dtype, source=nil, nm_dtype=nil
|
604
|
+
source = @data if source.nil?
|
605
|
+
return @data if @dtype and @dtype == dtype
|
606
|
+
|
607
|
+
new_vector =
|
608
|
+
case dtype
|
609
|
+
when :array then Daru::Accessors::ArrayWrapper.new(source.to_a.dup, self)
|
610
|
+
when :nmatrix then Daru::Accessors::NMatrixWrapper.new(source.to_a.dup,
|
611
|
+
self, nm_dtype)
|
612
|
+
when :mdarray then raise NotImplementedError, "MDArray not yet supported."
|
613
|
+
else Daru::Accessors::ArrayWrapper.new(source.dup, self)
|
614
|
+
end
|
615
|
+
|
616
|
+
@dtype = dtype || :array
|
617
|
+
new_vector
|
618
|
+
end
|
619
|
+
|
345
620
|
def named_index_for index
|
346
621
|
if @index.include? index
|
347
622
|
index
|
@@ -352,18 +627,47 @@ module Daru
|
|
352
627
|
end
|
353
628
|
end
|
354
629
|
|
630
|
+
def index_for index
|
631
|
+
if @index.include?(index)
|
632
|
+
@index[index]
|
633
|
+
elsif index.is_a?(Numeric)
|
634
|
+
index
|
635
|
+
end
|
636
|
+
end
|
637
|
+
|
355
638
|
def set_size
|
356
|
-
@size = @
|
639
|
+
@size = @data.size
|
357
640
|
end
|
358
641
|
|
359
642
|
def set_name name
|
360
|
-
|
361
|
-
|
362
|
-
elsif name #
|
363
|
-
|
643
|
+
@name =
|
644
|
+
if name.is_a?(Numeric) then name
|
645
|
+
elsif name.is_a?(Array) then name.join.to_sym # in case of MultiIndex tuple
|
646
|
+
elsif name then name.to_sym # anything but Numeric or nil
|
364
647
|
else
|
365
|
-
|
648
|
+
nil
|
366
649
|
end
|
367
650
|
end
|
651
|
+
|
652
|
+
def set_nil_positions
|
653
|
+
@nil_positions = []
|
654
|
+
@index.each do |e|
|
655
|
+
@nil_positions << e if(self[e].nil?)
|
656
|
+
end
|
657
|
+
@nil_positions.uniq!
|
658
|
+
end
|
659
|
+
|
660
|
+
def create_index potential_index
|
661
|
+
if potential_index.is_a?(Daru::MultiIndex) or potential_index.is_a?(Daru::Index)
|
662
|
+
potential_index
|
663
|
+
else
|
664
|
+
Daru::Index.new(potential_index)
|
665
|
+
end
|
666
|
+
end
|
667
|
+
|
668
|
+
def element_from_numeric_index location
|
669
|
+
pos = index_for location
|
670
|
+
pos ? @data[pos] : nil
|
671
|
+
end
|
368
672
|
end
|
369
673
|
end
|