daru 0.1.4.1 → 0.1.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +3 -0
- data/.travis.yml +3 -0
- data/CONTRIBUTING.md +27 -3
- data/Guardfile +7 -0
- data/History.md +39 -1
- data/README.md +1 -1
- data/daru.gemspec +9 -2
- data/lib/daru.rb +4 -1
- data/lib/daru/accessors/gsl_wrapper.rb +93 -91
- data/lib/daru/accessors/nmatrix_wrapper.rb +109 -107
- data/lib/daru/category.rb +22 -15
- data/lib/daru/core/group_by.rb +13 -2
- data/lib/daru/core/merge.rb +37 -31
- data/lib/daru/core/query.rb +10 -2
- data/lib/daru/dataframe.rb +95 -34
- data/lib/daru/date_time/index.rb +15 -16
- data/lib/daru/date_time/offsets.rb +14 -11
- data/lib/daru/formatters/table.rb +2 -2
- data/lib/daru/index/categorical_index.rb +201 -0
- data/lib/daru/index/index.rb +289 -0
- data/lib/daru/index/multi_index.rb +266 -0
- data/lib/daru/maths/statistics/vector.rb +13 -9
- data/lib/daru/monkeys.rb +0 -7
- data/lib/daru/plotting/gruff/category.rb +1 -0
- data/lib/daru/plotting/gruff/dataframe.rb +3 -3
- data/lib/daru/plotting/nyaplot/dataframe.rb +1 -1
- data/lib/daru/vector.rb +36 -21
- data/lib/daru/version.rb +1 -1
- data/spec/accessors/array_wrapper_spec.rb +3 -0
- data/spec/accessors/{wrappers_spec.rb → gsl_wrapper_spec.rb} +0 -35
- data/spec/accessors/nmatrix_wrapper_spec.rb +32 -0
- data/spec/{categorical_spec.rb → category_spec.rb} +3 -0
- data/spec/core/group_by_spec.rb +17 -1
- data/spec/core/merge_spec.rb +38 -1
- data/spec/core/query_spec.rb +5 -0
- data/spec/dataframe_spec.rb +230 -57
- data/spec/date_time/offsets_spec.rb +84 -3
- data/spec/formatters/table_formatter_spec.rb +9 -0
- data/spec/index/categorical_index_spec.rb +2 -0
- data/spec/index/index_spec.rb +17 -2
- data/spec/{math → maths}/arithmetic/dataframe_spec.rb +0 -0
- data/spec/{math → maths}/arithmetic/vector_spec.rb +0 -0
- data/spec/{math → maths}/statistics/dataframe_spec.rb +1 -1
- data/spec/{math → maths}/statistics/vector_spec.rb +7 -12
- data/spec/plotting/gruff/category_spec.rb +44 -0
- data/spec/plotting/gruff/dataframe_spec.rb +84 -0
- data/spec/plotting/gruff/vector_spec.rb +70 -0
- data/spec/plotting/nyaplot/category_spec.rb +51 -0
- data/spec/plotting/{dataframe_spec.rb → nyaplot/dataframe_spec.rb} +0 -83
- data/spec/plotting/nyaplot/vector_spec.rb +66 -0
- data/spec/spec_helper.rb +3 -2
- data/spec/vector_spec.rb +68 -1
- metadata +53 -24
- data/lib/daru/index.rb +0 -761
- data/spec/plotting/vector_spec.rb +0 -230
@@ -0,0 +1,266 @@
|
|
1
|
+
module Daru
|
2
|
+
class MultiIndex < Index
|
3
|
+
def each(&block)
|
4
|
+
to_a.each(&block)
|
5
|
+
end
|
6
|
+
|
7
|
+
def map(&block)
|
8
|
+
to_a.map(&block)
|
9
|
+
end
|
10
|
+
|
11
|
+
attr_reader :labels
|
12
|
+
|
13
|
+
def levels
|
14
|
+
@levels.map(&:keys)
|
15
|
+
end
|
16
|
+
|
17
|
+
def initialize opts={}
|
18
|
+
labels = opts[:labels]
|
19
|
+
levels = opts[:levels]
|
20
|
+
|
21
|
+
raise ArgumentError, 'Must specify both labels and levels' unless labels && levels
|
22
|
+
raise ArgumentError, 'Labels and levels should be same size' if labels.size != levels.size
|
23
|
+
raise ArgumentError, 'Incorrect labels and levels' if incorrect_fields?(labels, levels)
|
24
|
+
|
25
|
+
@labels = labels
|
26
|
+
@levels = levels.map { |e| e.map.with_index.to_h }
|
27
|
+
end
|
28
|
+
|
29
|
+
def incorrect_fields?(_labels, levels)
|
30
|
+
levels[0].size # FIXME: without this exact call some specs are failing
|
31
|
+
|
32
|
+
levels.any? { |e| e.uniq.size != e.size }
|
33
|
+
end
|
34
|
+
|
35
|
+
private :incorrect_fields?
|
36
|
+
|
37
|
+
def self.from_arrays arrays
|
38
|
+
levels = arrays.map { |e| e.uniq.sort_by(&:to_s) }
|
39
|
+
|
40
|
+
labels = arrays.each_with_index.map do |arry, level_index|
|
41
|
+
level = levels[level_index]
|
42
|
+
arry.map { |lvl| level.index(lvl) }
|
43
|
+
end
|
44
|
+
|
45
|
+
MultiIndex.new labels: labels, levels: levels
|
46
|
+
end
|
47
|
+
|
48
|
+
def self.from_tuples tuples
|
49
|
+
from_arrays tuples.transpose
|
50
|
+
end
|
51
|
+
|
52
|
+
def self.try_from_tuples tuples
|
53
|
+
if tuples.respond_to?(:first) && tuples.first.is_a?(Array)
|
54
|
+
from_tuples(tuples)
|
55
|
+
else
|
56
|
+
nil
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def [] *key
|
61
|
+
key.flatten!
|
62
|
+
case
|
63
|
+
when key[0].is_a?(Range)
|
64
|
+
retrieve_from_range(key[0])
|
65
|
+
when key[0].is_a?(Integer) && key.size == 1
|
66
|
+
try_retrieve_from_integer(key[0])
|
67
|
+
else
|
68
|
+
begin
|
69
|
+
retrieve_from_tuples key
|
70
|
+
rescue NoMethodError
|
71
|
+
raise IndexError, "Specified index #{key.inspect} do not exist"
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def valid? *indexes
|
77
|
+
# FIXME: This is perhaps not a good method
|
78
|
+
pos(*indexes)
|
79
|
+
return true
|
80
|
+
rescue IndexError
|
81
|
+
return false
|
82
|
+
end
|
83
|
+
|
84
|
+
# Returns positions given indexes or positions
|
85
|
+
# @note If the arugent is both a valid index and a valid position,
|
86
|
+
# it will treated as valid index
|
87
|
+
# @param [Array<object>] *indexes indexes or positions
|
88
|
+
# @example
|
89
|
+
# idx = Daru::MultiIndex.from_tuples [[:a, :one], [:a, :two], [:b, :one], [:b, :two]]
|
90
|
+
# idx.pos :a
|
91
|
+
# # => [0, 1]
|
92
|
+
def pos *indexes
|
93
|
+
if indexes.first.is_a? Integer
|
94
|
+
return indexes.first if indexes.size == 1
|
95
|
+
return indexes
|
96
|
+
end
|
97
|
+
res = self[indexes]
|
98
|
+
return res if res.is_a? Integer
|
99
|
+
res.map { |i| self[i] }
|
100
|
+
end
|
101
|
+
|
102
|
+
def subset *indexes
|
103
|
+
if indexes.first.is_a? Integer
|
104
|
+
MultiIndex.from_tuples(indexes.map { |index| key(index) })
|
105
|
+
else
|
106
|
+
self[indexes].conform indexes
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
# Takes positional values and returns subset of the self
|
111
|
+
# capturing the indexes at mentioned positions
|
112
|
+
# @param [Array<Integer>] positional values
|
113
|
+
# @return [object] index object
|
114
|
+
# @example
|
115
|
+
# idx = Daru::MultiIndex.from_tuples [[:a, :one], [:a, :two], [:b, :one], [:b, :two]]
|
116
|
+
# idx.at 0, 1
|
117
|
+
# # => #<Daru::MultiIndex(2x2)>
|
118
|
+
# # a one
|
119
|
+
# # two
|
120
|
+
def at *positions
|
121
|
+
positions = preprocess_positions(*positions)
|
122
|
+
validate_positions(*positions)
|
123
|
+
if positions.is_a? Integer
|
124
|
+
key(positions)
|
125
|
+
else
|
126
|
+
Daru::MultiIndex.from_tuples positions.map(&method(:key))
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
def add *indexes
|
131
|
+
Daru::MultiIndex.from_tuples to_a << indexes
|
132
|
+
end
|
133
|
+
|
134
|
+
def reorder(new_order)
|
135
|
+
from = to_a
|
136
|
+
self.class.from_tuples(new_order.map { |i| from[i] })
|
137
|
+
end
|
138
|
+
|
139
|
+
def try_retrieve_from_integer int
|
140
|
+
@levels[0].key?(int) ? retrieve_from_tuples([int]) : int
|
141
|
+
end
|
142
|
+
|
143
|
+
def retrieve_from_range range
|
144
|
+
MultiIndex.from_tuples(range.map { |index| key(index) })
|
145
|
+
end
|
146
|
+
|
147
|
+
def retrieve_from_tuples key
|
148
|
+
chosen = []
|
149
|
+
|
150
|
+
key.each_with_index do |k, depth|
|
151
|
+
level_index = @levels[depth][k]
|
152
|
+
raise IndexError, "Specified index #{key.inspect} do not exist" if level_index.nil?
|
153
|
+
label = @labels[depth]
|
154
|
+
chosen = find_all_indexes label, level_index, chosen
|
155
|
+
end
|
156
|
+
|
157
|
+
return chosen[0] if chosen.size == 1 && key.size == @levels.size
|
158
|
+
multi_index_from_multiple_selections(chosen)
|
159
|
+
end
|
160
|
+
|
161
|
+
def multi_index_from_multiple_selections chosen
|
162
|
+
MultiIndex.from_tuples(chosen.map { |e| key(e) })
|
163
|
+
end
|
164
|
+
|
165
|
+
def find_all_indexes label, level_index, chosen
|
166
|
+
if chosen.empty?
|
167
|
+
label.each_with_index
|
168
|
+
.select { |lbl, _| lbl == level_index }.map(&:last)
|
169
|
+
else
|
170
|
+
chosen.keep_if { |c| label[c] == level_index }
|
171
|
+
end
|
172
|
+
end
|
173
|
+
|
174
|
+
private :find_all_indexes, :multi_index_from_multiple_selections,
|
175
|
+
:retrieve_from_range, :retrieve_from_tuples
|
176
|
+
|
177
|
+
def key index
|
178
|
+
raise ArgumentError, "Key #{index} is too large" if index >= @labels[0].size
|
179
|
+
|
180
|
+
@labels
|
181
|
+
.each_with_index
|
182
|
+
.map { |label, i| @levels[i].keys[label[index]] }
|
183
|
+
end
|
184
|
+
|
185
|
+
def dup
|
186
|
+
MultiIndex.new levels: levels.dup, labels: labels
|
187
|
+
end
|
188
|
+
|
189
|
+
def drop_left_level by=1
|
190
|
+
MultiIndex.from_arrays to_a.transpose[by..-1]
|
191
|
+
end
|
192
|
+
|
193
|
+
def | other
|
194
|
+
MultiIndex.from_tuples(to_a | other.to_a)
|
195
|
+
end
|
196
|
+
|
197
|
+
def & other
|
198
|
+
MultiIndex.from_tuples(to_a & other.to_a)
|
199
|
+
end
|
200
|
+
|
201
|
+
def empty?
|
202
|
+
@labels.flatten.empty? && @levels.all?(&:empty?)
|
203
|
+
end
|
204
|
+
|
205
|
+
def include? tuple
|
206
|
+
return false unless tuple.is_a? Enumerable
|
207
|
+
tuple.flatten.each_with_index
|
208
|
+
.all? { |tup, i| @levels[i][tup] }
|
209
|
+
end
|
210
|
+
|
211
|
+
def size
|
212
|
+
@labels[0].size
|
213
|
+
end
|
214
|
+
|
215
|
+
def width
|
216
|
+
@levels.size
|
217
|
+
end
|
218
|
+
|
219
|
+
def == other
|
220
|
+
self.class == other.class &&
|
221
|
+
labels == other.labels &&
|
222
|
+
levels == other.levels
|
223
|
+
end
|
224
|
+
|
225
|
+
def to_a
|
226
|
+
(0...size).map { |e| key(e) }
|
227
|
+
end
|
228
|
+
|
229
|
+
def values
|
230
|
+
Array.new(size) { |i| i }
|
231
|
+
end
|
232
|
+
|
233
|
+
def inspect threshold=20
|
234
|
+
"#<Daru::MultiIndex(#{size}x#{width})>\n" +
|
235
|
+
Formatters::Table.format([], row_headers: sparse_tuples, threshold: threshold)
|
236
|
+
end
|
237
|
+
|
238
|
+
def to_html
|
239
|
+
path = File.expand_path('../../iruby/templates/multi_index.html.erb', __FILE__)
|
240
|
+
ERB.new(File.read(path).strip).result(binding)
|
241
|
+
end
|
242
|
+
|
243
|
+
# Provide a MultiIndex for sub vector produced
|
244
|
+
#
|
245
|
+
# @param input_indexes [Array] the input by user to index the vector
|
246
|
+
# @return [Object] the MultiIndex object for sub vector produced
|
247
|
+
def conform input_indexes
|
248
|
+
return self if input_indexes[0].is_a? Range
|
249
|
+
drop_left_level input_indexes.size
|
250
|
+
end
|
251
|
+
|
252
|
+
# Return tuples with nils in place of repeating values, like this:
|
253
|
+
#
|
254
|
+
# [:a , :bar, :one]
|
255
|
+
# [nil, nil , :two]
|
256
|
+
# [nil, :foo, :one]
|
257
|
+
#
|
258
|
+
def sparse_tuples
|
259
|
+
tuples = to_a
|
260
|
+
[tuples.first] + each_cons(2).map { |prev, cur|
|
261
|
+
left = cur.zip(prev).drop_while { |c, p| c == p }
|
262
|
+
[nil] * (cur.size - left.size) + left.map(&:first)
|
263
|
+
}
|
264
|
+
end
|
265
|
+
end
|
266
|
+
end
|
@@ -5,6 +5,8 @@ module Daru
|
|
5
5
|
# the computationally intensive tasks.
|
6
6
|
module Statistics
|
7
7
|
module Vector # rubocop:disable Metrics/ModuleLength
|
8
|
+
extend Gem::Deprecate
|
9
|
+
|
8
10
|
def mean
|
9
11
|
@data.mean
|
10
12
|
end
|
@@ -30,7 +32,8 @@ module Daru
|
|
30
32
|
end
|
31
33
|
|
32
34
|
def mode
|
33
|
-
frequencies.
|
35
|
+
mode = frequencies.to_h.select { |_,v| v == frequencies.max }.keys
|
36
|
+
mode.size > 1 ? Daru::Vector.new(mode) : mode.first
|
34
37
|
end
|
35
38
|
|
36
39
|
# Create a summary of count, mean, standard deviation, min and max of
|
@@ -88,25 +91,26 @@ module Daru
|
|
88
91
|
end
|
89
92
|
|
90
93
|
def frequencies
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
+
Daru::Vector.new(
|
95
|
+
@data.each_with_object(Hash.new(0)) do |element, hash|
|
96
|
+
hash[element] += 1 unless element.nil?
|
97
|
+
end
|
98
|
+
)
|
94
99
|
end
|
95
100
|
|
96
|
-
|
97
|
-
|
98
|
-
end
|
101
|
+
alias_method :freqs, :frequencies
|
102
|
+
deprecate :freqs, :frequencies, 2016, 10
|
99
103
|
|
100
104
|
def proportions
|
101
105
|
len = size - count_values(*Daru::MISSING_VALUES)
|
102
|
-
frequencies.each_with_object({}) do |(el, count), hash|
|
106
|
+
frequencies.to_h.each_with_object({}) do |(el, count), hash|
|
103
107
|
hash[el] = count / len
|
104
108
|
end
|
105
109
|
end
|
106
110
|
|
107
111
|
def ranked
|
108
112
|
sum = 0
|
109
|
-
r = frequencies.sort.each_with_object({}) do |(el, count), memo|
|
113
|
+
r = frequencies.to_h.sort.each_with_object({}) do |(el, count), memo|
|
110
114
|
memo[el] = ((sum + 1) + (sum + count)).quo(2)
|
111
115
|
sum += count
|
112
116
|
end
|
data/lib/daru/monkeys.rb
CHANGED
@@ -7,9 +7,9 @@ module Daru
|
|
7
7
|
size = opts[:size] || 500
|
8
8
|
x = extract_x_vector opts[:x]
|
9
9
|
y = extract_y_vectors opts[:y]
|
10
|
-
|
11
|
-
size, type, x, y, opts[:categorized]
|
12
|
-
|
10
|
+
if opts[:categorized]
|
11
|
+
return plot_with_category(size, type, x, y, opts[:categorized])
|
12
|
+
end
|
13
13
|
case type
|
14
14
|
when :line, :bar, :scatter
|
15
15
|
plot = send("#{type}_plot", size, x, y)
|
data/lib/daru/vector.rb
CHANGED
@@ -190,9 +190,11 @@ module Daru
|
|
190
190
|
case lib
|
191
191
|
when :gruff, :nyaplot
|
192
192
|
@plotting_library = lib
|
193
|
-
|
194
|
-
|
195
|
-
|
193
|
+
if Daru.send("has_#{lib}?".to_sym)
|
194
|
+
extend Module.const_get(
|
195
|
+
"Daru::Plotting::Vector::#{lib.to_s.capitalize}Library"
|
196
|
+
)
|
197
|
+
end
|
196
198
|
else
|
197
199
|
raise ArguementError, "Plotting library #{lib} not supported. "\
|
198
200
|
'Supported libraries are :nyaplot and :gruff'
|
@@ -291,7 +293,7 @@ module Daru
|
|
291
293
|
update_position_cache
|
292
294
|
end
|
293
295
|
|
294
|
-
# Two vectors are equal if
|
296
|
+
# Two vectors are equal if they have the exact same index values corresponding
|
295
297
|
# with the exact same elements. Name is ignored.
|
296
298
|
def == other
|
297
299
|
case other
|
@@ -353,6 +355,7 @@ module Daru
|
|
353
355
|
mod.apply_scalar_operator operator, @data,other
|
354
356
|
end
|
355
357
|
end
|
358
|
+
alias_method operator, method if operator != :== && operator != :!=
|
356
359
|
end
|
357
360
|
alias :gt :mt
|
358
361
|
alias :gteq :mteq
|
@@ -593,15 +596,6 @@ module Daru
|
|
593
596
|
end
|
594
597
|
}
|
595
598
|
|
596
|
-
def resort_index vector_index, opts
|
597
|
-
if block_given?
|
598
|
-
vector_index.sort { |(lv, _li), (rv, _ri)| yield(lv, rv) }
|
599
|
-
else
|
600
|
-
vector_index.sort(&DEFAULT_SORTER)
|
601
|
-
end
|
602
|
-
.tap { |res| res.reverse! unless opts[:ascending] }
|
603
|
-
end
|
604
|
-
|
605
599
|
# Just sort the data and get an Array in return using Enumerable#sort.
|
606
600
|
# Non-destructive.
|
607
601
|
# :nocov:
|
@@ -834,8 +828,10 @@ module Daru
|
|
834
828
|
# # [
|
835
829
|
# # [1, 2, 3] ]
|
836
830
|
def to_nmatrix axis=:horizontal
|
837
|
-
|
838
|
-
|
831
|
+
unless numeric? && !include?(nil)
|
832
|
+
raise ArgumentError, 'Can not convert to nmatrix'\
|
833
|
+
'because the vector is numeric'
|
834
|
+
end
|
839
835
|
|
840
836
|
case axis
|
841
837
|
when :horizontal
|
@@ -973,7 +969,8 @@ module Daru
|
|
973
969
|
# # c 3
|
974
970
|
def reorder! order
|
975
971
|
@index = @index.reorder order
|
976
|
-
|
972
|
+
data_array = order.map { |i| @data[i] }
|
973
|
+
@data = cast_vector_to @dtype, data_array, @nm_dtype
|
977
974
|
update_position_cache
|
978
975
|
self
|
979
976
|
end
|
@@ -990,11 +987,16 @@ module Daru
|
|
990
987
|
end
|
991
988
|
|
992
989
|
def index= idx
|
993
|
-
|
994
|
-
|
995
|
-
|
996
|
-
|
997
|
-
|
990
|
+
idx = Index.coerce idx
|
991
|
+
|
992
|
+
if idx.size != size
|
993
|
+
raise ArgumentError,
|
994
|
+
"Size of supplied index #{idx.size} does not match size of Vector"
|
995
|
+
end
|
996
|
+
|
997
|
+
unless idx.is_a?(Daru::Index)
|
998
|
+
raise ArgumentError, 'Can only assign type Index and its subclasses.'
|
999
|
+
end
|
998
1000
|
|
999
1001
|
@index = idx
|
1000
1002
|
self
|
@@ -1328,6 +1330,10 @@ module Daru
|
|
1328
1330
|
end
|
1329
1331
|
end
|
1330
1332
|
|
1333
|
+
def group_by(*args)
|
1334
|
+
to_df.group_by(*args)
|
1335
|
+
end
|
1336
|
+
|
1331
1337
|
private
|
1332
1338
|
|
1333
1339
|
def nil_positions
|
@@ -1538,5 +1544,14 @@ module Daru
|
|
1538
1544
|
@nil_positions = nil
|
1539
1545
|
@nan_positions = nil
|
1540
1546
|
end
|
1547
|
+
|
1548
|
+
def resort_index vector_index, opts
|
1549
|
+
if block_given?
|
1550
|
+
vector_index.sort { |(lv, _li), (rv, _ri)| yield(lv, rv) }
|
1551
|
+
else
|
1552
|
+
vector_index.sort(&DEFAULT_SORTER)
|
1553
|
+
end
|
1554
|
+
.tap { |res| res.reverse! unless opts[:ascending] }
|
1555
|
+
end
|
1541
1556
|
end
|
1542
1557
|
end
|