daru 0.1.4.1 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +3 -0
- data/.travis.yml +3 -0
- data/CONTRIBUTING.md +27 -3
- data/Guardfile +7 -0
- data/History.md +39 -1
- data/README.md +1 -1
- data/daru.gemspec +9 -2
- data/lib/daru.rb +4 -1
- data/lib/daru/accessors/gsl_wrapper.rb +93 -91
- data/lib/daru/accessors/nmatrix_wrapper.rb +109 -107
- data/lib/daru/category.rb +22 -15
- data/lib/daru/core/group_by.rb +13 -2
- data/lib/daru/core/merge.rb +37 -31
- data/lib/daru/core/query.rb +10 -2
- data/lib/daru/dataframe.rb +95 -34
- data/lib/daru/date_time/index.rb +15 -16
- data/lib/daru/date_time/offsets.rb +14 -11
- data/lib/daru/formatters/table.rb +2 -2
- data/lib/daru/index/categorical_index.rb +201 -0
- data/lib/daru/index/index.rb +289 -0
- data/lib/daru/index/multi_index.rb +266 -0
- data/lib/daru/maths/statistics/vector.rb +13 -9
- data/lib/daru/monkeys.rb +0 -7
- data/lib/daru/plotting/gruff/category.rb +1 -0
- data/lib/daru/plotting/gruff/dataframe.rb +3 -3
- data/lib/daru/plotting/nyaplot/dataframe.rb +1 -1
- data/lib/daru/vector.rb +36 -21
- data/lib/daru/version.rb +1 -1
- data/spec/accessors/array_wrapper_spec.rb +3 -0
- data/spec/accessors/{wrappers_spec.rb → gsl_wrapper_spec.rb} +0 -35
- data/spec/accessors/nmatrix_wrapper_spec.rb +32 -0
- data/spec/{categorical_spec.rb → category_spec.rb} +3 -0
- data/spec/core/group_by_spec.rb +17 -1
- data/spec/core/merge_spec.rb +38 -1
- data/spec/core/query_spec.rb +5 -0
- data/spec/dataframe_spec.rb +230 -57
- data/spec/date_time/offsets_spec.rb +84 -3
- data/spec/formatters/table_formatter_spec.rb +9 -0
- data/spec/index/categorical_index_spec.rb +2 -0
- data/spec/index/index_spec.rb +17 -2
- data/spec/{math → maths}/arithmetic/dataframe_spec.rb +0 -0
- data/spec/{math → maths}/arithmetic/vector_spec.rb +0 -0
- data/spec/{math → maths}/statistics/dataframe_spec.rb +1 -1
- data/spec/{math → maths}/statistics/vector_spec.rb +7 -12
- data/spec/plotting/gruff/category_spec.rb +44 -0
- data/spec/plotting/gruff/dataframe_spec.rb +84 -0
- data/spec/plotting/gruff/vector_spec.rb +70 -0
- data/spec/plotting/nyaplot/category_spec.rb +51 -0
- data/spec/plotting/{dataframe_spec.rb → nyaplot/dataframe_spec.rb} +0 -83
- data/spec/plotting/nyaplot/vector_spec.rb +66 -0
- data/spec/spec_helper.rb +3 -2
- data/spec/vector_spec.rb +68 -1
- metadata +53 -24
- data/lib/daru/index.rb +0 -761
- data/spec/plotting/vector_spec.rb +0 -230
@@ -0,0 +1,266 @@
|
|
1
|
+
module Daru
|
2
|
+
class MultiIndex < Index
|
3
|
+
def each(&block)
|
4
|
+
to_a.each(&block)
|
5
|
+
end
|
6
|
+
|
7
|
+
def map(&block)
|
8
|
+
to_a.map(&block)
|
9
|
+
end
|
10
|
+
|
11
|
+
attr_reader :labels
|
12
|
+
|
13
|
+
def levels
|
14
|
+
@levels.map(&:keys)
|
15
|
+
end
|
16
|
+
|
17
|
+
def initialize opts={}
|
18
|
+
labels = opts[:labels]
|
19
|
+
levels = opts[:levels]
|
20
|
+
|
21
|
+
raise ArgumentError, 'Must specify both labels and levels' unless labels && levels
|
22
|
+
raise ArgumentError, 'Labels and levels should be same size' if labels.size != levels.size
|
23
|
+
raise ArgumentError, 'Incorrect labels and levels' if incorrect_fields?(labels, levels)
|
24
|
+
|
25
|
+
@labels = labels
|
26
|
+
@levels = levels.map { |e| e.map.with_index.to_h }
|
27
|
+
end
|
28
|
+
|
29
|
+
def incorrect_fields?(_labels, levels)
|
30
|
+
levels[0].size # FIXME: without this exact call some specs are failing
|
31
|
+
|
32
|
+
levels.any? { |e| e.uniq.size != e.size }
|
33
|
+
end
|
34
|
+
|
35
|
+
private :incorrect_fields?
|
36
|
+
|
37
|
+
def self.from_arrays arrays
|
38
|
+
levels = arrays.map { |e| e.uniq.sort_by(&:to_s) }
|
39
|
+
|
40
|
+
labels = arrays.each_with_index.map do |arry, level_index|
|
41
|
+
level = levels[level_index]
|
42
|
+
arry.map { |lvl| level.index(lvl) }
|
43
|
+
end
|
44
|
+
|
45
|
+
MultiIndex.new labels: labels, levels: levels
|
46
|
+
end
|
47
|
+
|
48
|
+
def self.from_tuples tuples
|
49
|
+
from_arrays tuples.transpose
|
50
|
+
end
|
51
|
+
|
52
|
+
def self.try_from_tuples tuples
|
53
|
+
if tuples.respond_to?(:first) && tuples.first.is_a?(Array)
|
54
|
+
from_tuples(tuples)
|
55
|
+
else
|
56
|
+
nil
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def [] *key
|
61
|
+
key.flatten!
|
62
|
+
case
|
63
|
+
when key[0].is_a?(Range)
|
64
|
+
retrieve_from_range(key[0])
|
65
|
+
when key[0].is_a?(Integer) && key.size == 1
|
66
|
+
try_retrieve_from_integer(key[0])
|
67
|
+
else
|
68
|
+
begin
|
69
|
+
retrieve_from_tuples key
|
70
|
+
rescue NoMethodError
|
71
|
+
raise IndexError, "Specified index #{key.inspect} do not exist"
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def valid? *indexes
|
77
|
+
# FIXME: This is perhaps not a good method
|
78
|
+
pos(*indexes)
|
79
|
+
return true
|
80
|
+
rescue IndexError
|
81
|
+
return false
|
82
|
+
end
|
83
|
+
|
84
|
+
# Returns positions given indexes or positions
|
85
|
+
# @note If the arugent is both a valid index and a valid position,
|
86
|
+
# it will treated as valid index
|
87
|
+
# @param [Array<object>] *indexes indexes or positions
|
88
|
+
# @example
|
89
|
+
# idx = Daru::MultiIndex.from_tuples [[:a, :one], [:a, :two], [:b, :one], [:b, :two]]
|
90
|
+
# idx.pos :a
|
91
|
+
# # => [0, 1]
|
92
|
+
def pos *indexes
|
93
|
+
if indexes.first.is_a? Integer
|
94
|
+
return indexes.first if indexes.size == 1
|
95
|
+
return indexes
|
96
|
+
end
|
97
|
+
res = self[indexes]
|
98
|
+
return res if res.is_a? Integer
|
99
|
+
res.map { |i| self[i] }
|
100
|
+
end
|
101
|
+
|
102
|
+
def subset *indexes
|
103
|
+
if indexes.first.is_a? Integer
|
104
|
+
MultiIndex.from_tuples(indexes.map { |index| key(index) })
|
105
|
+
else
|
106
|
+
self[indexes].conform indexes
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
# Takes positional values and returns subset of the self
|
111
|
+
# capturing the indexes at mentioned positions
|
112
|
+
# @param [Array<Integer>] positional values
|
113
|
+
# @return [object] index object
|
114
|
+
# @example
|
115
|
+
# idx = Daru::MultiIndex.from_tuples [[:a, :one], [:a, :two], [:b, :one], [:b, :two]]
|
116
|
+
# idx.at 0, 1
|
117
|
+
# # => #<Daru::MultiIndex(2x2)>
|
118
|
+
# # a one
|
119
|
+
# # two
|
120
|
+
def at *positions
|
121
|
+
positions = preprocess_positions(*positions)
|
122
|
+
validate_positions(*positions)
|
123
|
+
if positions.is_a? Integer
|
124
|
+
key(positions)
|
125
|
+
else
|
126
|
+
Daru::MultiIndex.from_tuples positions.map(&method(:key))
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
def add *indexes
|
131
|
+
Daru::MultiIndex.from_tuples to_a << indexes
|
132
|
+
end
|
133
|
+
|
134
|
+
def reorder(new_order)
|
135
|
+
from = to_a
|
136
|
+
self.class.from_tuples(new_order.map { |i| from[i] })
|
137
|
+
end
|
138
|
+
|
139
|
+
def try_retrieve_from_integer int
|
140
|
+
@levels[0].key?(int) ? retrieve_from_tuples([int]) : int
|
141
|
+
end
|
142
|
+
|
143
|
+
def retrieve_from_range range
|
144
|
+
MultiIndex.from_tuples(range.map { |index| key(index) })
|
145
|
+
end
|
146
|
+
|
147
|
+
def retrieve_from_tuples key
|
148
|
+
chosen = []
|
149
|
+
|
150
|
+
key.each_with_index do |k, depth|
|
151
|
+
level_index = @levels[depth][k]
|
152
|
+
raise IndexError, "Specified index #{key.inspect} do not exist" if level_index.nil?
|
153
|
+
label = @labels[depth]
|
154
|
+
chosen = find_all_indexes label, level_index, chosen
|
155
|
+
end
|
156
|
+
|
157
|
+
return chosen[0] if chosen.size == 1 && key.size == @levels.size
|
158
|
+
multi_index_from_multiple_selections(chosen)
|
159
|
+
end
|
160
|
+
|
161
|
+
def multi_index_from_multiple_selections chosen
|
162
|
+
MultiIndex.from_tuples(chosen.map { |e| key(e) })
|
163
|
+
end
|
164
|
+
|
165
|
+
def find_all_indexes label, level_index, chosen
|
166
|
+
if chosen.empty?
|
167
|
+
label.each_with_index
|
168
|
+
.select { |lbl, _| lbl == level_index }.map(&:last)
|
169
|
+
else
|
170
|
+
chosen.keep_if { |c| label[c] == level_index }
|
171
|
+
end
|
172
|
+
end
|
173
|
+
|
174
|
+
private :find_all_indexes, :multi_index_from_multiple_selections,
|
175
|
+
:retrieve_from_range, :retrieve_from_tuples
|
176
|
+
|
177
|
+
def key index
|
178
|
+
raise ArgumentError, "Key #{index} is too large" if index >= @labels[0].size
|
179
|
+
|
180
|
+
@labels
|
181
|
+
.each_with_index
|
182
|
+
.map { |label, i| @levels[i].keys[label[index]] }
|
183
|
+
end
|
184
|
+
|
185
|
+
def dup
|
186
|
+
MultiIndex.new levels: levels.dup, labels: labels
|
187
|
+
end
|
188
|
+
|
189
|
+
def drop_left_level by=1
|
190
|
+
MultiIndex.from_arrays to_a.transpose[by..-1]
|
191
|
+
end
|
192
|
+
|
193
|
+
def | other
|
194
|
+
MultiIndex.from_tuples(to_a | other.to_a)
|
195
|
+
end
|
196
|
+
|
197
|
+
def & other
|
198
|
+
MultiIndex.from_tuples(to_a & other.to_a)
|
199
|
+
end
|
200
|
+
|
201
|
+
def empty?
|
202
|
+
@labels.flatten.empty? && @levels.all?(&:empty?)
|
203
|
+
end
|
204
|
+
|
205
|
+
def include? tuple
|
206
|
+
return false unless tuple.is_a? Enumerable
|
207
|
+
tuple.flatten.each_with_index
|
208
|
+
.all? { |tup, i| @levels[i][tup] }
|
209
|
+
end
|
210
|
+
|
211
|
+
def size
|
212
|
+
@labels[0].size
|
213
|
+
end
|
214
|
+
|
215
|
+
def width
|
216
|
+
@levels.size
|
217
|
+
end
|
218
|
+
|
219
|
+
def == other
|
220
|
+
self.class == other.class &&
|
221
|
+
labels == other.labels &&
|
222
|
+
levels == other.levels
|
223
|
+
end
|
224
|
+
|
225
|
+
def to_a
|
226
|
+
(0...size).map { |e| key(e) }
|
227
|
+
end
|
228
|
+
|
229
|
+
def values
|
230
|
+
Array.new(size) { |i| i }
|
231
|
+
end
|
232
|
+
|
233
|
+
def inspect threshold=20
|
234
|
+
"#<Daru::MultiIndex(#{size}x#{width})>\n" +
|
235
|
+
Formatters::Table.format([], row_headers: sparse_tuples, threshold: threshold)
|
236
|
+
end
|
237
|
+
|
238
|
+
def to_html
|
239
|
+
path = File.expand_path('../../iruby/templates/multi_index.html.erb', __FILE__)
|
240
|
+
ERB.new(File.read(path).strip).result(binding)
|
241
|
+
end
|
242
|
+
|
243
|
+
# Provide a MultiIndex for sub vector produced
|
244
|
+
#
|
245
|
+
# @param input_indexes [Array] the input by user to index the vector
|
246
|
+
# @return [Object] the MultiIndex object for sub vector produced
|
247
|
+
def conform input_indexes
|
248
|
+
return self if input_indexes[0].is_a? Range
|
249
|
+
drop_left_level input_indexes.size
|
250
|
+
end
|
251
|
+
|
252
|
+
# Return tuples with nils in place of repeating values, like this:
|
253
|
+
#
|
254
|
+
# [:a , :bar, :one]
|
255
|
+
# [nil, nil , :two]
|
256
|
+
# [nil, :foo, :one]
|
257
|
+
#
|
258
|
+
def sparse_tuples
|
259
|
+
tuples = to_a
|
260
|
+
[tuples.first] + each_cons(2).map { |prev, cur|
|
261
|
+
left = cur.zip(prev).drop_while { |c, p| c == p }
|
262
|
+
[nil] * (cur.size - left.size) + left.map(&:first)
|
263
|
+
}
|
264
|
+
end
|
265
|
+
end
|
266
|
+
end
|
@@ -5,6 +5,8 @@ module Daru
|
|
5
5
|
# the computationally intensive tasks.
|
6
6
|
module Statistics
|
7
7
|
module Vector # rubocop:disable Metrics/ModuleLength
|
8
|
+
extend Gem::Deprecate
|
9
|
+
|
8
10
|
def mean
|
9
11
|
@data.mean
|
10
12
|
end
|
@@ -30,7 +32,8 @@ module Daru
|
|
30
32
|
end
|
31
33
|
|
32
34
|
def mode
|
33
|
-
frequencies.
|
35
|
+
mode = frequencies.to_h.select { |_,v| v == frequencies.max }.keys
|
36
|
+
mode.size > 1 ? Daru::Vector.new(mode) : mode.first
|
34
37
|
end
|
35
38
|
|
36
39
|
# Create a summary of count, mean, standard deviation, min and max of
|
@@ -88,25 +91,26 @@ module Daru
|
|
88
91
|
end
|
89
92
|
|
90
93
|
def frequencies
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
+
Daru::Vector.new(
|
95
|
+
@data.each_with_object(Hash.new(0)) do |element, hash|
|
96
|
+
hash[element] += 1 unless element.nil?
|
97
|
+
end
|
98
|
+
)
|
94
99
|
end
|
95
100
|
|
96
|
-
|
97
|
-
|
98
|
-
end
|
101
|
+
alias_method :freqs, :frequencies
|
102
|
+
deprecate :freqs, :frequencies, 2016, 10
|
99
103
|
|
100
104
|
def proportions
|
101
105
|
len = size - count_values(*Daru::MISSING_VALUES)
|
102
|
-
frequencies.each_with_object({}) do |(el, count), hash|
|
106
|
+
frequencies.to_h.each_with_object({}) do |(el, count), hash|
|
103
107
|
hash[el] = count / len
|
104
108
|
end
|
105
109
|
end
|
106
110
|
|
107
111
|
def ranked
|
108
112
|
sum = 0
|
109
|
-
r = frequencies.sort.each_with_object({}) do |(el, count), memo|
|
113
|
+
r = frequencies.to_h.sort.each_with_object({}) do |(el, count), memo|
|
110
114
|
memo[el] = ((sum + 1) + (sum + count)).quo(2)
|
111
115
|
sum += count
|
112
116
|
end
|
data/lib/daru/monkeys.rb
CHANGED
@@ -7,9 +7,9 @@ module Daru
|
|
7
7
|
size = opts[:size] || 500
|
8
8
|
x = extract_x_vector opts[:x]
|
9
9
|
y = extract_y_vectors opts[:y]
|
10
|
-
|
11
|
-
size, type, x, y, opts[:categorized]
|
12
|
-
|
10
|
+
if opts[:categorized]
|
11
|
+
return plot_with_category(size, type, x, y, opts[:categorized])
|
12
|
+
end
|
13
13
|
case type
|
14
14
|
when :line, :bar, :scatter
|
15
15
|
plot = send("#{type}_plot", size, x, y)
|
data/lib/daru/vector.rb
CHANGED
@@ -190,9 +190,11 @@ module Daru
|
|
190
190
|
case lib
|
191
191
|
when :gruff, :nyaplot
|
192
192
|
@plotting_library = lib
|
193
|
-
|
194
|
-
|
195
|
-
|
193
|
+
if Daru.send("has_#{lib}?".to_sym)
|
194
|
+
extend Module.const_get(
|
195
|
+
"Daru::Plotting::Vector::#{lib.to_s.capitalize}Library"
|
196
|
+
)
|
197
|
+
end
|
196
198
|
else
|
197
199
|
raise ArguementError, "Plotting library #{lib} not supported. "\
|
198
200
|
'Supported libraries are :nyaplot and :gruff'
|
@@ -291,7 +293,7 @@ module Daru
|
|
291
293
|
update_position_cache
|
292
294
|
end
|
293
295
|
|
294
|
-
# Two vectors are equal if
|
296
|
+
# Two vectors are equal if they have the exact same index values corresponding
|
295
297
|
# with the exact same elements. Name is ignored.
|
296
298
|
def == other
|
297
299
|
case other
|
@@ -353,6 +355,7 @@ module Daru
|
|
353
355
|
mod.apply_scalar_operator operator, @data,other
|
354
356
|
end
|
355
357
|
end
|
358
|
+
alias_method operator, method if operator != :== && operator != :!=
|
356
359
|
end
|
357
360
|
alias :gt :mt
|
358
361
|
alias :gteq :mteq
|
@@ -593,15 +596,6 @@ module Daru
|
|
593
596
|
end
|
594
597
|
}
|
595
598
|
|
596
|
-
def resort_index vector_index, opts
|
597
|
-
if block_given?
|
598
|
-
vector_index.sort { |(lv, _li), (rv, _ri)| yield(lv, rv) }
|
599
|
-
else
|
600
|
-
vector_index.sort(&DEFAULT_SORTER)
|
601
|
-
end
|
602
|
-
.tap { |res| res.reverse! unless opts[:ascending] }
|
603
|
-
end
|
604
|
-
|
605
599
|
# Just sort the data and get an Array in return using Enumerable#sort.
|
606
600
|
# Non-destructive.
|
607
601
|
# :nocov:
|
@@ -834,8 +828,10 @@ module Daru
|
|
834
828
|
# # [
|
835
829
|
# # [1, 2, 3] ]
|
836
830
|
def to_nmatrix axis=:horizontal
|
837
|
-
|
838
|
-
|
831
|
+
unless numeric? && !include?(nil)
|
832
|
+
raise ArgumentError, 'Can not convert to nmatrix'\
|
833
|
+
'because the vector is numeric'
|
834
|
+
end
|
839
835
|
|
840
836
|
case axis
|
841
837
|
when :horizontal
|
@@ -973,7 +969,8 @@ module Daru
|
|
973
969
|
# # c 3
|
974
970
|
def reorder! order
|
975
971
|
@index = @index.reorder order
|
976
|
-
|
972
|
+
data_array = order.map { |i| @data[i] }
|
973
|
+
@data = cast_vector_to @dtype, data_array, @nm_dtype
|
977
974
|
update_position_cache
|
978
975
|
self
|
979
976
|
end
|
@@ -990,11 +987,16 @@ module Daru
|
|
990
987
|
end
|
991
988
|
|
992
989
|
def index= idx
|
993
|
-
|
994
|
-
|
995
|
-
|
996
|
-
|
997
|
-
|
990
|
+
idx = Index.coerce idx
|
991
|
+
|
992
|
+
if idx.size != size
|
993
|
+
raise ArgumentError,
|
994
|
+
"Size of supplied index #{idx.size} does not match size of Vector"
|
995
|
+
end
|
996
|
+
|
997
|
+
unless idx.is_a?(Daru::Index)
|
998
|
+
raise ArgumentError, 'Can only assign type Index and its subclasses.'
|
999
|
+
end
|
998
1000
|
|
999
1001
|
@index = idx
|
1000
1002
|
self
|
@@ -1328,6 +1330,10 @@ module Daru
|
|
1328
1330
|
end
|
1329
1331
|
end
|
1330
1332
|
|
1333
|
+
def group_by(*args)
|
1334
|
+
to_df.group_by(*args)
|
1335
|
+
end
|
1336
|
+
|
1331
1337
|
private
|
1332
1338
|
|
1333
1339
|
def nil_positions
|
@@ -1538,5 +1544,14 @@ module Daru
|
|
1538
1544
|
@nil_positions = nil
|
1539
1545
|
@nan_positions = nil
|
1540
1546
|
end
|
1547
|
+
|
1548
|
+
def resort_index vector_index, opts
|
1549
|
+
if block_given?
|
1550
|
+
vector_index.sort { |(lv, _li), (rv, _ri)| yield(lv, rv) }
|
1551
|
+
else
|
1552
|
+
vector_index.sort(&DEFAULT_SORTER)
|
1553
|
+
end
|
1554
|
+
.tap { |res| res.reverse! unless opts[:ascending] }
|
1555
|
+
end
|
1541
1556
|
end
|
1542
1557
|
end
|