daru 0.0.4 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CONTRIBUTING.md +0 -0
- data/Gemfile +0 -1
- data/History.txt +35 -0
- data/README.md +178 -198
- data/daru.gemspec +5 -7
- data/lib/daru.rb +10 -2
- data/lib/daru/accessors/array_wrapper.rb +36 -198
- data/lib/daru/accessors/nmatrix_wrapper.rb +60 -209
- data/lib/daru/core/group_by.rb +183 -0
- data/lib/daru/dataframe.rb +615 -167
- data/lib/daru/index.rb +17 -16
- data/lib/daru/io/io.rb +5 -12
- data/lib/daru/maths/arithmetic/dataframe.rb +72 -8
- data/lib/daru/maths/arithmetic/vector.rb +19 -6
- data/lib/daru/maths/statistics/dataframe.rb +103 -2
- data/lib/daru/maths/statistics/vector.rb +102 -61
- data/lib/daru/monkeys.rb +8 -0
- data/lib/daru/multi_index.rb +199 -0
- data/lib/daru/plotting/dataframe.rb +24 -24
- data/lib/daru/plotting/vector.rb +14 -15
- data/lib/daru/vector.rb +402 -98
- data/lib/version.rb +1 -1
- data/notebooks/grouping_splitting_pivots.ipynb +529 -0
- data/notebooks/intro_with_music_data_.ipynb +104 -119
- data/spec/accessors/wrappers_spec.rb +36 -0
- data/spec/core/group_by_spec.rb +331 -0
- data/spec/dataframe_spec.rb +1237 -475
- data/spec/fixtures/sales-funnel.csv +18 -0
- data/spec/index_spec.rb +10 -21
- data/spec/io/io_spec.rb +4 -14
- data/spec/math/arithmetic/dataframe_spec.rb +66 -0
- data/spec/math/arithmetic/vector_spec.rb +45 -4
- data/spec/math/statistics/dataframe_spec.rb +91 -1
- data/spec/math/statistics/vector_spec.rb +32 -6
- data/spec/monkeys_spec.rb +10 -1
- data/spec/multi_index_spec.rb +216 -0
- data/spec/spec_helper.rb +1 -0
- data/spec/vector_spec.rb +505 -57
- metadata +21 -15
data/lib/daru/monkeys.rb
CHANGED
@@ -0,0 +1,199 @@
|
|
1
|
+
module Daru
|
2
|
+
# Class for holding multi index on Vector and DataFrame.
|
3
|
+
class MultiIndex
|
4
|
+
include Enumerable
|
5
|
+
|
6
|
+
def each(&block)
|
7
|
+
to_a.each(&block)
|
8
|
+
self
|
9
|
+
end
|
10
|
+
|
11
|
+
def map(&block)
|
12
|
+
to_a.map(&block)
|
13
|
+
end
|
14
|
+
|
15
|
+
attr_reader :relation_hash
|
16
|
+
attr_reader :size
|
17
|
+
attr_reader :values
|
18
|
+
|
19
|
+
# Initialize a MultiIndex by passing a tuple of indexes. The order assigned
|
20
|
+
# to the multi index corresponds to the position of the tuple in the array
|
21
|
+
# of tuples.
|
22
|
+
#
|
23
|
+
# Although you can create your own hierarchially indexed Vectors and DataFrames,
|
24
|
+
# this class currently contains minimal error checking and is mainly used
|
25
|
+
# internally for summarizing, splitting and grouping of data.
|
26
|
+
#
|
27
|
+
# == Arguments
|
28
|
+
#
|
29
|
+
# * +source+ - The array of arrays from which the multi index is to be created.
|
30
|
+
#
|
31
|
+
# == Usage
|
32
|
+
#
|
33
|
+
# tuples = [:a,:a,:b,:b].zip([:one,:two,:one,:two])
|
34
|
+
# #=> [[:a, :one], [:a, :two], [:b, :one], [:b, :two]]
|
35
|
+
# Daru::MultiIndex.new(tuples)
|
36
|
+
def initialize source, values=nil
|
37
|
+
@relation_hash = {}
|
38
|
+
@size = source.size
|
39
|
+
values = Array.new(source.size) { |i| i } if values.nil?
|
40
|
+
create_relation_hash source, values
|
41
|
+
@relation_hash.freeze
|
42
|
+
@values = values
|
43
|
+
end
|
44
|
+
|
45
|
+
def [] *indexes
|
46
|
+
indexes.flatten!
|
47
|
+
location = indexes[0]
|
48
|
+
|
49
|
+
if location.is_a?(Symbol)
|
50
|
+
result = read_relation_hash @relation_hash, indexes, 0
|
51
|
+
return nil if result.nil?
|
52
|
+
result.is_a?(Integer) ? result : Daru::MultiIndex.new(*make_tuples(result))
|
53
|
+
else
|
54
|
+
case location
|
55
|
+
when Integer
|
56
|
+
self[@relation_hash.keys[location]]
|
57
|
+
when Range
|
58
|
+
first = location.first
|
59
|
+
last = location.last
|
60
|
+
|
61
|
+
hsh = {}
|
62
|
+
first.upto(last) do |index|
|
63
|
+
key = @relation_hash.keys[index]
|
64
|
+
hsh[key] = read_relation_hash(@relation_hash, [key], 0)
|
65
|
+
end
|
66
|
+
Daru::MultiIndex.new(*make_tuples(hsh))
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
# Compare two MultiIndex objects for equality based on the contents of their
|
72
|
+
# relation hashes. Does not take object_id into account.
|
73
|
+
def == other
|
74
|
+
return false if size != other.size
|
75
|
+
deep_compare @relation_hash, other.relation_hash
|
76
|
+
end
|
77
|
+
|
78
|
+
# Convert a MultiIndex back to tuples (array of arrays). Will retain the
|
79
|
+
# order of creation.
|
80
|
+
def to_a
|
81
|
+
make_tuples(@relation_hash)[0]
|
82
|
+
end
|
83
|
+
|
84
|
+
# Completely duplicate a MultiIndex object and its contents.
|
85
|
+
def dup
|
86
|
+
Daru::MultiIndex.new to_a
|
87
|
+
end
|
88
|
+
|
89
|
+
# Check whether a tuple or identifier number exists in the multi index.
|
90
|
+
# The argument *tuple* can either a complete or incomplete tuple, or a number.
|
91
|
+
def include? tuple
|
92
|
+
tuple = [tuple] unless tuple.is_a?(Array)
|
93
|
+
tuple.flatten!
|
94
|
+
!!read_relation_hash(@relation_hash, tuple, 0)
|
95
|
+
end
|
96
|
+
|
97
|
+
# Obtain the tuple that correponds with the indentifier number.
|
98
|
+
#
|
99
|
+
# == Arguments
|
100
|
+
#
|
101
|
+
# * +key+ - A number for which the tuple is to be obtained.
|
102
|
+
#
|
103
|
+
# == Usage
|
104
|
+
#
|
105
|
+
# mi.key(3) #=> [:a,:two,:baz]
|
106
|
+
def key key
|
107
|
+
tuple = find_tuple_for(@relation_hash, key)
|
108
|
+
tuple.empty? ? nil : tuple
|
109
|
+
end
|
110
|
+
|
111
|
+
private
|
112
|
+
|
113
|
+
# Deep compare two hashes
|
114
|
+
def deep_compare this, other
|
115
|
+
if this == other
|
116
|
+
return true if this.is_a?(Integer) and other.is_a?(Integer)
|
117
|
+
this.each_key do |key|
|
118
|
+
deep_compare this[key], other[key]
|
119
|
+
end
|
120
|
+
else
|
121
|
+
return false
|
122
|
+
end
|
123
|
+
true
|
124
|
+
end
|
125
|
+
|
126
|
+
# Create tuples out of the relation hash based on the order of the identifier
|
127
|
+
# numbers. Returns an array of arrays containing the tuples and another
|
128
|
+
# containing their corresponding index numbers.
|
129
|
+
def make_tuples relation_hash
|
130
|
+
tuples = []
|
131
|
+
new_vals = []
|
132
|
+
values.each do |number|
|
133
|
+
tuple = find_tuple_for(relation_hash, number)
|
134
|
+
unless tuple.empty?
|
135
|
+
tuples << tuple
|
136
|
+
new_vals << number
|
137
|
+
end
|
138
|
+
end
|
139
|
+
[tuples,new_vals]
|
140
|
+
end
|
141
|
+
|
142
|
+
# Finds and returns a single tuple for a particular identifier number
|
143
|
+
def find_tuple_for relation_hash, number
|
144
|
+
tuple = []
|
145
|
+
search_for_number number, relation_hash, tuple
|
146
|
+
tuple.reverse
|
147
|
+
end
|
148
|
+
|
149
|
+
# Search for a number and store its corresponding tuple in *tuple*. Returns
|
150
|
+
# true if the number is successfully found.
|
151
|
+
def search_for_number number, relation_hash, tuple
|
152
|
+
found = false
|
153
|
+
relation_hash.each_key do |key|
|
154
|
+
value = relation_hash[key]
|
155
|
+
if value.is_a?(Hash)
|
156
|
+
if search_for_number(number, value, tuple)
|
157
|
+
tuple << key
|
158
|
+
found = true
|
159
|
+
end
|
160
|
+
elsif value == number
|
161
|
+
tuple << key
|
162
|
+
found = true
|
163
|
+
end
|
164
|
+
end
|
165
|
+
|
166
|
+
found
|
167
|
+
end
|
168
|
+
|
169
|
+
# Read the relation hash and return a sub-relation hash or the number to which
|
170
|
+
# indexes belogs to.
|
171
|
+
def read_relation_hash relation_hash, indexes, index
|
172
|
+
identifier = indexes[index]
|
173
|
+
value = relation_hash[identifier]
|
174
|
+
|
175
|
+
indexes[index+1].nil? ? value : read_relation_hash(value,indexes,index+1)
|
176
|
+
end
|
177
|
+
|
178
|
+
# Create the relation hash from supplied tuples.
|
179
|
+
def create_relation_hash source, values
|
180
|
+
source.each_with_index do |tuple, idx|
|
181
|
+
populate @relation_hash, tuple, 0, values[idx]
|
182
|
+
end
|
183
|
+
end
|
184
|
+
|
185
|
+
def populate relation_hash, tuple, index, number
|
186
|
+
identifier = tuple[index]
|
187
|
+
|
188
|
+
if identifier
|
189
|
+
if tuple[index+1]
|
190
|
+
relation_hash[identifier] ||= {}
|
191
|
+
else
|
192
|
+
relation_hash[identifier] = number
|
193
|
+
return
|
194
|
+
end
|
195
|
+
populate relation_hash[identifier], tuple, index+1, number
|
196
|
+
end
|
197
|
+
end
|
198
|
+
end
|
199
|
+
end
|
@@ -7,40 +7,40 @@ end
|
|
7
7
|
module Daru
|
8
8
|
module Plotting
|
9
9
|
module DataFrame
|
10
|
-
# Plots a DataFrame with Nyaplot on IRuby using the given options.
|
11
|
-
#
|
12
|
-
#
|
13
|
-
#
|
10
|
+
# Plots a DataFrame with Nyaplot on IRuby using the given options. Yields
|
11
|
+
# the corresponding Nyaplot::Plot object and the Nyaplot::Diagram object
|
12
|
+
# to the block, if it is specified. See the nyaplot docs for info on how to
|
13
|
+
# further use these objects.
|
14
|
+
#
|
14
15
|
# == Options
|
15
|
-
# type
|
16
|
-
#
|
17
|
-
#
|
18
|
-
#
|
19
|
-
#
|
20
|
-
#
|
21
|
-
#
|
16
|
+
# +:type+ - Type of plot (scatter, bar, histogram)
|
17
|
+
# +:legends+ - The names of the vectors that are to be used as X and Y axes.
|
18
|
+
# The vectors names must be specified as symbols inside an Array. They
|
19
|
+
# also should be specified in the right order. For example, passing [:a, :b]
|
20
|
+
# will keep vector :a as the X axis and :b as the Y axis. Passing [:a]
|
21
|
+
# keep :a as the X axis and plot the frequency with which :a appears
|
22
|
+
# on the Y axis.
|
23
|
+
# +:frame+ - Pass this as *true* to disable plotting the graph directly
|
24
|
+
# and instead manually create Nyaplot::Frame object inside the block using
|
25
|
+
# the Nyaplot::Plot object for plotting one or many graphs in a frame.
|
26
|
+
#
|
22
27
|
# == Usage
|
23
28
|
# df = Daru::DataFrame.new({a:[0,1,2,3,4], b:[10,20,30,40,50]})
|
24
|
-
# df.plot :a, :b, type: :bar
|
25
|
-
def plot
|
29
|
+
# df.plot legends: [:a, :b], type: :bar
|
30
|
+
def plot opts={}
|
26
31
|
options = {
|
27
32
|
type: :scatter,
|
28
|
-
|
33
|
+
frame: false,
|
34
|
+
legends: []
|
29
35
|
}.merge(opts)
|
30
36
|
|
31
37
|
plot = Nyaplot::Plot.new
|
32
|
-
|
33
|
-
|
34
|
-
plot.y_label options[:y_label] if options[:y_label]
|
35
|
-
p.tooltip_contents options[:tooltip_contents] if options[:tooltip_contents]
|
38
|
+
diagram = plot.add_with_df(Nyaplot::DataFrame.new(self.to_a[0]),
|
39
|
+
options[:type], *options[:legends])
|
36
40
|
|
37
|
-
|
38
|
-
p.color Nyaplot::Colors.qual
|
39
|
-
p.fill_by options[:fill_by] if options[:fill_by]
|
40
|
-
p.shape_by options[:shape_by] if options[:shape_by]
|
41
|
-
end
|
41
|
+
yield(plot, diagram) if block_given?
|
42
42
|
|
43
|
-
plot.show
|
43
|
+
plot.show unless options[:frame]
|
44
44
|
end
|
45
45
|
end
|
46
46
|
end
|
data/lib/daru/plotting/vector.rb
CHANGED
@@ -8,32 +8,31 @@ module Daru
|
|
8
8
|
module Plotting
|
9
9
|
module Vector
|
10
10
|
|
11
|
-
# Plots a Vector with Nyaplot on IRuby using the given options.
|
11
|
+
# Plots a Vector with Nyaplot on IRuby using the given options. Yields the
|
12
|
+
# plot object (Nyaplot::Plot) and the diagram object (Nyaplot::Diagram)
|
13
|
+
# to the block, which can be used for setting various options as per the
|
14
|
+
# Nyaplot API.
|
15
|
+
#
|
12
16
|
# == Options
|
13
17
|
# type (:scatter, :bar, :histogram), title, x_label, y_label, color(true/false)
|
14
18
|
#
|
15
19
|
# == Usage
|
16
20
|
# vector = Daru::Vector.new [10,20,30,40], [:one, :two, :three, :four]
|
17
|
-
# vector.plot
|
18
|
-
|
21
|
+
# vector.plot(type: :bar) do |plot|
|
22
|
+
# plot.title "My first plot"
|
23
|
+
# plot.width 1200
|
24
|
+
# end
|
25
|
+
def plot opts={}, &block
|
19
26
|
options = {
|
20
|
-
type: :scatter
|
21
|
-
title: "#{@name}",
|
22
|
-
x_label: '',
|
23
|
-
y_label: '',
|
24
|
-
color: false
|
27
|
+
type: :scatter
|
25
28
|
}.merge(opts)
|
26
29
|
|
27
30
|
x_axis = options[:type] == :scatter ? Array.new(@size) { |i| i } : @index.to_a
|
28
31
|
plot = Nyaplot::Plot.new
|
29
|
-
plot.
|
30
|
-
plot.height(options[:height]) if options[:height]
|
31
|
-
|
32
|
-
p = plot.add( options[:type], x_axis, @vector.to_a )
|
33
|
-
plot.x_label( options[:x_label] ) if options[:x_label]
|
34
|
-
plot.y_label( options[:y_label] ) if options[:y_label]
|
35
|
-
p.color( Nyaplot::Colors.qual ) if options[:color]
|
32
|
+
diagram = plot.add( options[:type], x_axis, @data.to_a )
|
36
33
|
|
34
|
+
yield plot, diagram if block_given?
|
35
|
+
|
37
36
|
plot.show
|
38
37
|
end
|
39
38
|
end
|
data/lib/daru/vector.rb
CHANGED
@@ -14,17 +14,23 @@ module Daru
|
|
14
14
|
include Daru::Plotting::Vector
|
15
15
|
|
16
16
|
def each(&block)
|
17
|
-
|
17
|
+
return to_enum(:each) unless block_given?
|
18
|
+
|
19
|
+
@data.each(&block)
|
20
|
+
self
|
18
21
|
end
|
19
22
|
|
20
23
|
def map!(&block)
|
21
|
-
|
24
|
+
return to_enum(:map!) unless block_given?
|
22
25
|
|
26
|
+
@data.map!(&block)
|
23
27
|
self
|
24
28
|
end
|
25
29
|
|
26
30
|
def map(&block)
|
27
|
-
|
31
|
+
return to_enum(:map) unless block_given?
|
32
|
+
|
33
|
+
Daru::Vector.new @data.map(&block), name: @name, index: @index, dtype: @dtype
|
28
34
|
end
|
29
35
|
|
30
36
|
alias_method :recode, :map
|
@@ -33,22 +39,29 @@ module Daru
|
|
33
39
|
attr_reader :index
|
34
40
|
attr_reader :size
|
35
41
|
attr_reader :dtype
|
42
|
+
attr_reader :nm_dtype
|
43
|
+
attr_reader :nil_positions
|
36
44
|
|
37
45
|
# Create a Vector object.
|
38
46
|
# == Arguments
|
39
47
|
#
|
40
48
|
# @param source[Array,Hash] - Supply elements in the form of an Array or a Hash. If Array, a
|
41
|
-
#
|
42
|
-
#
|
43
|
-
#
|
44
|
-
#
|
45
|
-
#
|
49
|
+
# numeric index will be created if not supplied in the options. Specifying more
|
50
|
+
# index elements than actual values in *source* will insert *nil* into the
|
51
|
+
# surplus index elements. When a Hash is specified, the keys of the Hash are
|
52
|
+
# taken as the index elements and the corresponding values as the values that
|
53
|
+
# populate the vector.
|
46
54
|
#
|
47
55
|
# == Options
|
48
56
|
#
|
49
|
-
# * +:name+
|
57
|
+
# * +:name+ - Name of the vector
|
58
|
+
#
|
59
|
+
# * +:index+ - Index of the vector
|
50
60
|
#
|
51
|
-
# * +:
|
61
|
+
# * +:dtype+ - The underlying data type. Can be :array or :nmatrix. Default :array.
|
62
|
+
#
|
63
|
+
# * +:nm_dtype+ - For NMatrix, the data type of the numbers. See the NMatrix docs for
|
64
|
+
# further information on supported data type.
|
52
65
|
#
|
53
66
|
# == Usage
|
54
67
|
#
|
@@ -64,76 +77,112 @@ module Daru
|
|
64
77
|
source = source || []
|
65
78
|
end
|
66
79
|
name = opts[:name]
|
67
|
-
@dtype = opts[:dtype] || Array
|
68
|
-
|
69
80
|
set_name name
|
70
81
|
|
71
|
-
@
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
when @dtype == Range, Matrix
|
80
|
-
Daru::Accessors::ArrayWrapper.new source.to_a.dup, self
|
81
|
-
end
|
82
|
-
|
83
|
-
if index.nil?
|
84
|
-
@index = Daru::Index.new @vector.size
|
85
|
-
else
|
86
|
-
@index = index.to_index
|
87
|
-
end
|
88
|
-
# TODO: Will need work for NMatrix/MDArray
|
89
|
-
if @index.size > @vector.size
|
90
|
-
self.coerce Array # NM with nils seg faults
|
91
|
-
(@index.size - @vector.size).times { @vector << nil }
|
92
|
-
elsif @index.size < @vector.size
|
93
|
-
raise IndexError, "Expected index size >= vector size"
|
82
|
+
@data = cast_vector_to(opts[:dtype], source, opts[:nm_dtype])
|
83
|
+
@index = create_index(index || @data.size)
|
84
|
+
|
85
|
+
if @index.size > @data.size
|
86
|
+
cast(dtype: :array) # NM with nils seg faults
|
87
|
+
(@index.size - @data.size).times { @data << nil }
|
88
|
+
elsif @index.size < @data.size
|
89
|
+
raise IndexError, "Expected index size >= vector size. Index size : #{@index.size}, vector size : #{@data.size}"
|
94
90
|
end
|
95
91
|
|
92
|
+
@possibly_changed_type = true
|
93
|
+
set_nil_positions
|
96
94
|
set_size
|
97
95
|
end
|
98
96
|
|
99
|
-
# Get one or more elements with specified index.
|
97
|
+
# Get one or more elements with specified index or a range.
|
100
98
|
#
|
101
99
|
# == Usage
|
100
|
+
# # For vectors employing single layer Index
|
101
|
+
#
|
102
102
|
# v[:one, :two] # => Daru::Vector with indexes :one and :two
|
103
103
|
# v[:one] # => Single element
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
104
|
+
# v[:one..:three] # => Daru::Vector with indexes :one, :two and :three
|
105
|
+
#
|
106
|
+
# # For vectors employing hierarchial multi index
|
107
|
+
#
|
108
|
+
def [](*indexes)
|
109
|
+
location = indexes[0]
|
110
|
+
if @index.is_a?(MultiIndex)
|
111
|
+
result =
|
112
|
+
if location.is_a?(Integer)
|
113
|
+
element_from_numeric_index(location)
|
114
|
+
elsif location.is_a?(Range)
|
115
|
+
arry = location.inject([]) do |memo, num|
|
116
|
+
memo << element_from_numeric_index(num)
|
117
|
+
memo
|
118
|
+
end
|
119
|
+
|
120
|
+
new_index = Daru::MultiIndex.new(@index.to_a[location])
|
121
|
+
Daru::Vector.new(arry, index: new_index, name: @name, dtype: dtype)
|
110
122
|
else
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
123
|
+
sub_index = @index[indexes]
|
124
|
+
|
125
|
+
if sub_index.is_a?(Integer)
|
126
|
+
element_from_numeric_index(sub_index)
|
115
127
|
else
|
116
|
-
|
128
|
+
elements = sub_index.map do |tuple|
|
129
|
+
@data[@index[(indexes + tuple)]]
|
130
|
+
end
|
131
|
+
Daru::Vector.new(elements, index: Daru::MultiIndex.new(sub_index.to_a),
|
132
|
+
name: @name, dtype: @dtype)
|
117
133
|
end
|
118
134
|
end
|
135
|
+
|
136
|
+
return result
|
119
137
|
else
|
120
|
-
indexes
|
138
|
+
unless indexes[1]
|
139
|
+
case location
|
140
|
+
when Range
|
141
|
+
range =
|
142
|
+
if location.first.is_a?(Numeric)
|
143
|
+
location
|
144
|
+
else
|
145
|
+
first = location.first
|
146
|
+
last = location.last
|
147
|
+
|
148
|
+
(first..last)
|
149
|
+
end
|
150
|
+
indexes = @index[range]
|
151
|
+
else
|
152
|
+
return element_from_numeric_index(location)
|
153
|
+
end
|
154
|
+
end
|
121
155
|
|
122
|
-
Daru::Vector.new indexes.map { |
|
123
|
-
index: indexes
|
156
|
+
Daru::Vector.new indexes.map { |loc| @data[index_for(loc)] }, name: @name,
|
157
|
+
index: indexes.map { |e| named_index_for(e) }, dtype: @dtype
|
124
158
|
end
|
125
159
|
end
|
126
160
|
|
127
|
-
def []=(
|
128
|
-
|
161
|
+
def []=(*location, value)
|
162
|
+
cast(dtype: :array) if value.nil? and dtype != :array
|
129
163
|
|
130
|
-
if @
|
131
|
-
|
164
|
+
@possibly_changed_type = true if @type == :object and (value.nil? or
|
165
|
+
value.is_a?(Numeric))
|
166
|
+
@possibly_changed_type = true if @type == :numeric and (!value.is_a?(Numeric) and
|
167
|
+
!value.nil?)
|
168
|
+
|
169
|
+
pos =
|
170
|
+
if @index.is_a?(MultiIndex) and !location[0].is_a?(Integer)
|
171
|
+
index_for location
|
172
|
+
else
|
173
|
+
index_for location[0]
|
174
|
+
end
|
175
|
+
|
176
|
+
if pos.is_a?(MultiIndex)
|
177
|
+
pos.each do |sub_tuple|
|
178
|
+
self[*(location + sub_tuple)] = value
|
179
|
+
end
|
132
180
|
else
|
133
|
-
@
|
181
|
+
@data[pos] = value
|
134
182
|
end
|
135
183
|
|
136
184
|
set_size
|
185
|
+
set_nil_positions
|
137
186
|
end
|
138
187
|
|
139
188
|
# Two vectors are equal if the have the exact same index values corresponding
|
@@ -158,8 +207,12 @@ module Daru
|
|
158
207
|
concat element
|
159
208
|
end
|
160
209
|
|
161
|
-
def
|
162
|
-
|
210
|
+
def head q=10
|
211
|
+
self[0..q]
|
212
|
+
end
|
213
|
+
|
214
|
+
def tail q=10
|
215
|
+
self[-q..-1]
|
163
216
|
end
|
164
217
|
|
165
218
|
# Append an element to the vector by specifying the element and index
|
@@ -167,30 +220,31 @@ module Daru
|
|
167
220
|
raise IndexError, "Expected new unique index" if @index.include? index
|
168
221
|
|
169
222
|
if index.nil? and @index.index_class == Integer
|
170
|
-
@index =
|
223
|
+
@index = create_index(@size + 1)
|
171
224
|
index = @size
|
172
225
|
else
|
173
226
|
begin
|
174
|
-
@index =
|
175
|
-
rescue
|
227
|
+
@index = create_index(@index + index)
|
228
|
+
rescue StandardError => e
|
176
229
|
raise e, "Expected valid index."
|
177
230
|
end
|
178
231
|
end
|
179
|
-
|
180
|
-
@vector[@index[index]] = element
|
181
|
-
|
232
|
+
@data[@index[index]] = element
|
182
233
|
set_size
|
234
|
+
set_nil_positions
|
183
235
|
end
|
184
236
|
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
237
|
+
# Cast a vector to a new data type.
|
238
|
+
#
|
239
|
+
# == Options
|
240
|
+
#
|
241
|
+
# * +:dtype+ - :array for Ruby Array. :nmatrix for NMatrix.
|
242
|
+
def cast opts={}
|
243
|
+
dtype = opts[:dtype]
|
244
|
+
raise ArgumentError, "Unsupported dtype #{opts[:dtype]}" unless
|
245
|
+
dtype == :array or dtype == :nmatrix
|
192
246
|
|
193
|
-
|
247
|
+
@data = cast_vector_to dtype
|
194
248
|
end
|
195
249
|
|
196
250
|
# Delete an element by value
|
@@ -201,25 +255,51 @@ module Daru
|
|
201
255
|
# Delete element by index
|
202
256
|
def delete_at index
|
203
257
|
idx = named_index_for index
|
204
|
-
@
|
258
|
+
@data.delete_at @index[idx]
|
205
259
|
|
206
260
|
if @index.index_class == Integer
|
207
261
|
@index = Daru::Index.new @size-1
|
208
262
|
else
|
209
|
-
@index = (@index.to_a - [idx])
|
263
|
+
@index = Daru::Index.new (@index.to_a - [idx])
|
210
264
|
end
|
211
265
|
|
212
266
|
set_size
|
267
|
+
set_nil_positions
|
268
|
+
end
|
269
|
+
|
270
|
+
# The type of data contained in the vector. Can be :object or :numeric. If
|
271
|
+
# the underlying dtype is an NMatrix, this method will return the data type
|
272
|
+
# of the NMatrix object.
|
273
|
+
#
|
274
|
+
# Running through the data to figure out the kind of data is delayed to the
|
275
|
+
# last possible moment.
|
276
|
+
def type
|
277
|
+
return @data.nm_dtype if dtype == :nmatrix
|
278
|
+
|
279
|
+
if @type.nil? or @possibly_changed_type
|
280
|
+
@type = :numeric
|
281
|
+
self.each do |e|
|
282
|
+
unless e.nil?
|
283
|
+
unless e.is_a?(Numeric)
|
284
|
+
@type = :object
|
285
|
+
break
|
286
|
+
end
|
287
|
+
end
|
288
|
+
end
|
289
|
+
@possibly_changed_type = false
|
290
|
+
end
|
291
|
+
|
292
|
+
@type
|
213
293
|
end
|
214
294
|
|
215
295
|
# Get index of element
|
216
296
|
def index_of element
|
217
|
-
@index.key @
|
297
|
+
@index.key @data.index(element)
|
218
298
|
end
|
219
299
|
|
220
300
|
# Keep only unique elements of the vector alongwith their indexes.
|
221
301
|
def uniq
|
222
|
-
uniq_vector = @
|
302
|
+
uniq_vector = @data.uniq
|
223
303
|
new_index = uniq_vector.inject([]) do |acc, element|
|
224
304
|
acc << index_of(element)
|
225
305
|
acc
|
@@ -228,21 +308,123 @@ module Daru
|
|
228
308
|
Daru::Vector.new uniq_vector, name: @name, index: new_index, dtype: @dtype
|
229
309
|
end
|
230
310
|
|
231
|
-
#
|
232
|
-
#
|
233
|
-
|
234
|
-
# end
|
235
|
-
#
|
311
|
+
# Sorts a vector according to its values. If a block is specified, the contents
|
312
|
+
# will be evaluated and data will be swapped whenever the block evaluates
|
313
|
+
# to *true*. Defaults to ascending order sorting. Any missing values will be
|
314
|
+
# put at the end of the vector. Preserves indexing. Default sort algorithm is
|
315
|
+
# quick sort.
|
316
|
+
#
|
317
|
+
# == Options
|
318
|
+
#
|
319
|
+
# * +:ascending+ - if false, will sort in descending order. Defaults to true.
|
320
|
+
#
|
321
|
+
# * +:type+ - Specify the sorting algorithm. Only supports quick_sort for now.
|
322
|
+
# == Usage
|
323
|
+
#
|
324
|
+
# v = Daru::Vector.new ["My first guitar", "jazz", "guitar"]
|
325
|
+
# # Say you want to sort these strings by length.
|
326
|
+
# v.sort { |a,b| a.length <=> b.length }
|
327
|
+
def sort opts={}, &block
|
328
|
+
opts = {
|
329
|
+
ascending: true,
|
330
|
+
type: :quick_sort
|
331
|
+
}.merge(opts)
|
332
|
+
|
333
|
+
block = lambda { |a,b| a <=> b } unless block
|
334
|
+
|
335
|
+
order = opts[:ascending] ? :ascending : :descending
|
336
|
+
vector, index = send(opts[:type], @data.to_a.dup, @index.to_a, order, &block)
|
337
|
+
index = @index.is_a?(MultiIndex) ? Daru::MultiIndex.new(index) : index
|
338
|
+
|
339
|
+
Daru::Vector.new(vector, index: create_index(index), name: @name, dtype: @dtype)
|
340
|
+
end
|
341
|
+
|
342
|
+
# Just sort the data and get an Array in return using Enumerable#sort. Non-destructive.
|
343
|
+
def sorted_data &block
|
344
|
+
@data.to_a.sort(&block)
|
345
|
+
end
|
236
346
|
|
237
|
-
|
347
|
+
# Returns *true* if the value passed actually exists in the vector.
|
348
|
+
def exists? value
|
238
349
|
!self[index_of(value)].nil?
|
239
350
|
end
|
240
351
|
|
352
|
+
# Returns a vector which has *true* in the position where the element in self
|
353
|
+
# is nil, and false otherwise.
|
354
|
+
#
|
355
|
+
# == Usage
|
356
|
+
#
|
357
|
+
# v = Daru::Vector.new([1,2,4,nil])
|
358
|
+
# v.is_nil?
|
359
|
+
# # =>
|
360
|
+
# #<Daru::Vector:89421000 @name = nil @size = 4 >
|
361
|
+
# # nil
|
362
|
+
# # 0 false
|
363
|
+
# # 1 false
|
364
|
+
# # 2 false
|
365
|
+
# # 3 true
|
366
|
+
def is_nil?
|
367
|
+
nil_truth_vector = clone_structure
|
368
|
+
@index.each do |idx|
|
369
|
+
nil_truth_vector[idx] = self[idx].nil? ? true : false
|
370
|
+
end
|
371
|
+
|
372
|
+
nil_truth_vector
|
373
|
+
end
|
374
|
+
|
375
|
+
# Opposite of #is_nil?
|
376
|
+
def not_nil?
|
377
|
+
nil_truth_vector = clone_structure
|
378
|
+
@index.each do |idx|
|
379
|
+
nil_truth_vector[idx] = self[idx].nil? ? false : true
|
380
|
+
end
|
381
|
+
|
382
|
+
nil_truth_vector
|
383
|
+
end
|
384
|
+
|
385
|
+
# Replace all nils in the vector with the value passed as an argument. Destructive.
|
386
|
+
# See #replace_nils for non-destructive version
|
387
|
+
#
|
388
|
+
# == Arguments
|
389
|
+
#
|
390
|
+
# * +replacement+ - The value which should replace all nils
|
391
|
+
def replace_nils! replacement
|
392
|
+
nil_positions.each do |idx|
|
393
|
+
self[idx] = replacement
|
394
|
+
end
|
395
|
+
|
396
|
+
self
|
397
|
+
end
|
398
|
+
|
399
|
+
# Non-destructive version of #replace_nils!
|
400
|
+
def replace_nils replacement
|
401
|
+
self.dup.replace_nils!(replacement)
|
402
|
+
end
|
403
|
+
|
404
|
+
def n_valid
|
405
|
+
@size
|
406
|
+
end
|
407
|
+
|
241
408
|
# Returns *true* if an index exists
|
242
409
|
def has_index? index
|
243
410
|
@index.include? index
|
244
411
|
end
|
245
412
|
|
413
|
+
# Convert Vector to a horizontal or vertical Ruby Matrix.
|
414
|
+
#
|
415
|
+
# == Arguments
|
416
|
+
#
|
417
|
+
# * +axis+ - Specify whether you want a *:horizontal* or a *:vertical* matrix.
|
418
|
+
def to_matrix axis=:horizontal
|
419
|
+
if axis == :horizontal
|
420
|
+
Matrix[to_a]
|
421
|
+
elsif axis == :vertical
|
422
|
+
Matrix.columns([to_a])
|
423
|
+
else
|
424
|
+
raise ArgumentError, "axis should be either :horizontal or :vertical, not #{axis}"
|
425
|
+
end
|
426
|
+
end
|
427
|
+
|
246
428
|
# Convert to hash. Hash keys are indexes and values are the correspoding elements
|
247
429
|
def to_hash
|
248
430
|
@index.inject({}) do |hsh, index|
|
@@ -253,7 +435,7 @@ module Daru
|
|
253
435
|
|
254
436
|
# Return an array
|
255
437
|
def to_a
|
256
|
-
@
|
438
|
+
@data.to_a
|
257
439
|
end
|
258
440
|
|
259
441
|
# Convert the hash from to_hash to json
|
@@ -283,10 +465,10 @@ module Daru
|
|
283
465
|
end
|
284
466
|
|
285
467
|
# Over rides original inspect for pretty printing in irb
|
286
|
-
def inspect spacing=
|
468
|
+
def inspect spacing=20, threshold=15
|
287
469
|
longest = [@name.to_s.size,
|
288
470
|
@index.to_a.map(&:to_s).map(&:size).max,
|
289
|
-
@
|
471
|
+
@data .map(&:to_s).map(&:size).max,
|
290
472
|
'nil'.size].max
|
291
473
|
|
292
474
|
content = ""
|
@@ -297,7 +479,7 @@ module Daru
|
|
297
479
|
|
298
480
|
content += sprintf formatter, "", name
|
299
481
|
@index.each_with_index do |index, num|
|
300
|
-
content += sprintf formatter, index.to_s, (self[index] || 'nil').to_s
|
482
|
+
content += sprintf formatter, index.to_s, (self[*index] || 'nil').to_s
|
301
483
|
if num > threshold
|
302
484
|
content += sprintf formatter, '...', '...'
|
303
485
|
break
|
@@ -308,27 +490,38 @@ module Daru
|
|
308
490
|
content
|
309
491
|
end
|
310
492
|
|
311
|
-
#
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
493
|
+
# Create a new vector with a different index.
|
494
|
+
#
|
495
|
+
# @param new_index [Symbol, Array, Daru::Index] The new index. Passing *:seq*
|
496
|
+
# will reindex with sequential numbers from 0 to (n-1).
|
497
|
+
def reindex new_index
|
498
|
+
index = create_index(new_index == :seq ? @size : new_index)
|
499
|
+
Daru::Vector.new @data.to_a, index: index, name: name, dtype: @dtype
|
500
|
+
end
|
316
501
|
|
317
502
|
# Give the vector a new name
|
503
|
+
#
|
504
|
+
# @param new_name [Symbol] The new name.
|
318
505
|
def rename new_name
|
319
506
|
@name = new_name.to_sym
|
320
507
|
end
|
321
508
|
|
322
509
|
# Duplicate elements and indexes
|
323
510
|
def dup
|
324
|
-
Daru::Vector.new @
|
511
|
+
Daru::Vector.new @data.dup, name: @name, index: @index.dup
|
512
|
+
end
|
513
|
+
|
514
|
+
# Copies the structure of the vector (i.e the index, size, etc.) and fills all
|
515
|
+
# all values with nils.
|
516
|
+
def clone_structure
|
517
|
+
Daru::Vector.new(([nil]*@size), name: @name, index: @index.dup)
|
325
518
|
end
|
326
519
|
|
327
520
|
def daru_vector *name
|
328
521
|
self
|
329
522
|
end
|
330
523
|
|
331
|
-
|
524
|
+
alias :dv :daru_vector
|
332
525
|
|
333
526
|
def method_missing(name, *args, &block)
|
334
527
|
if name.match(/(.+)\=/)
|
@@ -342,6 +535,88 @@ module Daru
|
|
342
535
|
|
343
536
|
private
|
344
537
|
|
538
|
+
def quick_sort vector, index, order, &block
|
539
|
+
recursive_quick_sort vector, index, order, 0, @size-1, &block
|
540
|
+
[vector, index]
|
541
|
+
end
|
542
|
+
|
543
|
+
def recursive_quick_sort vector, index, order, left_lower, right_upper, &block
|
544
|
+
if left_lower < right_upper
|
545
|
+
left_upper, right_lower = partition(vector, index, order, left_lower, right_upper, &block)
|
546
|
+
if left_upper - left_lower < right_upper - right_lower
|
547
|
+
recursive_quick_sort(vector, index, order, left_lower, left_upper, &block)
|
548
|
+
recursive_quick_sort(vector, index, order, right_lower, right_upper, &block)
|
549
|
+
else
|
550
|
+
recursive_quick_sort(vector, index, order, right_lower, right_upper, &block)
|
551
|
+
recursive_quick_sort(vector, index, order, left_lower, left_upper, &block)
|
552
|
+
end
|
553
|
+
end
|
554
|
+
end
|
555
|
+
|
556
|
+
def partition vector, index, order, left_lower, right_upper, &block
|
557
|
+
mindex = (left_lower + right_upper) / 2
|
558
|
+
mvalue = vector[mindex]
|
559
|
+
i = left_lower
|
560
|
+
j = right_upper
|
561
|
+
opposite_order = order == :ascending ? :descending : :ascending
|
562
|
+
|
563
|
+
i += 1 while(keep?(vector[i], mvalue, order, &block))
|
564
|
+
j -= 1 while(keep?(vector[j], mvalue, opposite_order, &block))
|
565
|
+
|
566
|
+
while i < j - 1
|
567
|
+
vector[i], vector[j] = vector[j], vector[i]
|
568
|
+
index[i], index[j] = index[j], index[i]
|
569
|
+
i += 1
|
570
|
+
j -= 1
|
571
|
+
|
572
|
+
i += 1 while(keep?(vector[i], mvalue, order, &block))
|
573
|
+
j -= 1 while(keep?(vector[j], mvalue, opposite_order, &block))
|
574
|
+
end
|
575
|
+
|
576
|
+
if i <= j
|
577
|
+
if i < j
|
578
|
+
vector[i], vector[j] = vector[j], vector[i]
|
579
|
+
index[i], index[j] = index[j], index[i]
|
580
|
+
end
|
581
|
+
i += 1
|
582
|
+
j -= 1
|
583
|
+
end
|
584
|
+
|
585
|
+
[j,i]
|
586
|
+
end
|
587
|
+
|
588
|
+
def keep? a, b, order, &block
|
589
|
+
return false if a.nil? or b.nil?
|
590
|
+
eval = block.call(a,b)
|
591
|
+
if order == :ascending
|
592
|
+
return true if eval == -1
|
593
|
+
return false if eval == 1
|
594
|
+
elsif order == :descending
|
595
|
+
return false if eval == -1
|
596
|
+
return true if eval == 1
|
597
|
+
end
|
598
|
+
return false
|
599
|
+
end
|
600
|
+
|
601
|
+
# Note: To maintain sanity, this _MUST_ be the _ONLY_ place in daru where the
|
602
|
+
# @dtype variable is set and the underlying data type of vector changed.
|
603
|
+
def cast_vector_to dtype, source=nil, nm_dtype=nil
|
604
|
+
source = @data if source.nil?
|
605
|
+
return @data if @dtype and @dtype == dtype
|
606
|
+
|
607
|
+
new_vector =
|
608
|
+
case dtype
|
609
|
+
when :array then Daru::Accessors::ArrayWrapper.new(source.to_a.dup, self)
|
610
|
+
when :nmatrix then Daru::Accessors::NMatrixWrapper.new(source.to_a.dup,
|
611
|
+
self, nm_dtype)
|
612
|
+
when :mdarray then raise NotImplementedError, "MDArray not yet supported."
|
613
|
+
else Daru::Accessors::ArrayWrapper.new(source.dup, self)
|
614
|
+
end
|
615
|
+
|
616
|
+
@dtype = dtype || :array
|
617
|
+
new_vector
|
618
|
+
end
|
619
|
+
|
345
620
|
def named_index_for index
|
346
621
|
if @index.include? index
|
347
622
|
index
|
@@ -352,18 +627,47 @@ module Daru
|
|
352
627
|
end
|
353
628
|
end
|
354
629
|
|
630
|
+
def index_for index
|
631
|
+
if @index.include?(index)
|
632
|
+
@index[index]
|
633
|
+
elsif index.is_a?(Numeric)
|
634
|
+
index
|
635
|
+
end
|
636
|
+
end
|
637
|
+
|
355
638
|
def set_size
|
356
|
-
@size = @
|
639
|
+
@size = @data.size
|
357
640
|
end
|
358
641
|
|
359
642
|
def set_name name
|
360
|
-
|
361
|
-
|
362
|
-
elsif name #
|
363
|
-
|
643
|
+
@name =
|
644
|
+
if name.is_a?(Numeric) then name
|
645
|
+
elsif name.is_a?(Array) then name.join.to_sym # in case of MultiIndex tuple
|
646
|
+
elsif name then name.to_sym # anything but Numeric or nil
|
364
647
|
else
|
365
|
-
|
648
|
+
nil
|
366
649
|
end
|
367
650
|
end
|
651
|
+
|
652
|
+
def set_nil_positions
|
653
|
+
@nil_positions = []
|
654
|
+
@index.each do |e|
|
655
|
+
@nil_positions << e if(self[e].nil?)
|
656
|
+
end
|
657
|
+
@nil_positions.uniq!
|
658
|
+
end
|
659
|
+
|
660
|
+
def create_index potential_index
|
661
|
+
if potential_index.is_a?(Daru::MultiIndex) or potential_index.is_a?(Daru::Index)
|
662
|
+
potential_index
|
663
|
+
else
|
664
|
+
Daru::Index.new(potential_index)
|
665
|
+
end
|
666
|
+
end
|
667
|
+
|
668
|
+
def element_from_numeric_index location
|
669
|
+
pos = index_for location
|
670
|
+
pos ? @data[pos] : nil
|
671
|
+
end
|
368
672
|
end
|
369
673
|
end
|