daru 0.1.5 → 0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.github/ISSUE_TEMPLATE.md +18 -0
- data/.gitignore +1 -0
- data/.rubocop.yml +21 -7
- data/.travis.yml +10 -5
- data/CONTRIBUTING.md +15 -10
- data/History.md +124 -2
- data/README.md +37 -9
- data/ReleasePolicy.md +20 -0
- data/benchmarks/db_loading.rb +34 -0
- data/benchmarks/statistics.rb +6 -6
- data/benchmarks/where_clause.rb +1 -1
- data/benchmarks/where_vs_filter.rb +1 -1
- data/daru.gemspec +17 -41
- data/lib/daru.rb +10 -13
- data/lib/daru/accessors/gsl_wrapper.rb +1 -1
- data/lib/daru/accessors/nmatrix_wrapper.rb +2 -0
- data/lib/daru/category.rb +29 -15
- data/lib/daru/configuration.rb +34 -0
- data/lib/daru/core/group_by.rb +158 -77
- data/lib/daru/core/merge.rb +12 -3
- data/lib/daru/core/query.rb +20 -4
- data/lib/daru/dataframe.rb +692 -118
- data/lib/daru/date_time/index.rb +14 -11
- data/lib/daru/date_time/offsets.rb +9 -1
- data/lib/daru/extensions/which_dsl.rb +55 -0
- data/lib/daru/formatters/table.rb +3 -5
- data/lib/daru/index/categorical_index.rb +4 -4
- data/lib/daru/index/index.rb +131 -42
- data/lib/daru/index/multi_index.rb +118 -10
- data/lib/daru/io/csv/converters.rb +21 -0
- data/lib/daru/io/io.rb +105 -33
- data/lib/daru/io/sql_data_source.rb +10 -0
- data/lib/daru/iruby/templates/dataframe.html.erb +4 -51
- data/lib/daru/iruby/templates/dataframe_mi.html.erb +3 -56
- data/lib/daru/iruby/templates/dataframe_mi_tbody.html.erb +35 -0
- data/lib/daru/iruby/templates/dataframe_mi_thead.html.erb +21 -0
- data/lib/daru/iruby/templates/dataframe_tbody.html.erb +28 -0
- data/lib/daru/iruby/templates/dataframe_thead.html.erb +21 -0
- data/lib/daru/iruby/templates/vector.html.erb +3 -25
- data/lib/daru/iruby/templates/vector_mi.html.erb +3 -34
- data/lib/daru/iruby/templates/vector_mi_tbody.html.erb +26 -0
- data/lib/daru/iruby/templates/vector_mi_thead.html.erb +8 -0
- data/lib/daru/iruby/templates/vector_tbody.html.erb +17 -0
- data/lib/daru/iruby/templates/vector_thead.html.erb +8 -0
- data/lib/daru/maths/arithmetic/vector.rb +38 -2
- data/lib/daru/maths/statistics/dataframe.rb +28 -30
- data/lib/daru/maths/statistics/vector.rb +295 -41
- data/lib/daru/plotting/gruff/dataframe.rb +13 -15
- data/lib/daru/plotting/nyaplot/category.rb +1 -1
- data/lib/daru/plotting/nyaplot/dataframe.rb +15 -4
- data/lib/daru/plotting/nyaplot/vector.rb +1 -2
- data/lib/daru/vector.rb +308 -96
- data/lib/daru/version.rb +1 -1
- data/profile/vector_new.rb +9 -0
- data/spec/accessors/gsl_wrapper_spec.rb +38 -35
- data/spec/accessors/nmatrix_wrapper_spec.rb +25 -22
- data/spec/category_spec.rb +24 -20
- data/spec/core/group_by_spec.rb +238 -4
- data/spec/core/merge_spec.rb +1 -1
- data/spec/core/query_spec.rb +65 -50
- data/spec/daru_spec.rb +22 -0
- data/spec/dataframe_spec.rb +473 -16
- data/spec/date_time/date_time_index_helper_spec.rb +72 -0
- data/spec/date_time/index_spec.rb +34 -16
- data/spec/date_time/offsets_spec.rb +14 -0
- data/spec/extensions/rserve_spec.rb +1 -1
- data/spec/extensions/which_dsl_spec.rb +38 -0
- data/spec/fixtures/boolean_converter_test.csv +5 -0
- data/spec/fixtures/duplicates.csv +32 -0
- data/spec/fixtures/eciresults.html +394 -0
- data/spec/fixtures/empty_rows_test.csv +17 -0
- data/spec/fixtures/macau.html +3691 -0
- data/spec/fixtures/macd_data.csv +150 -0
- data/spec/fixtures/matrix_test.csv +55 -55
- data/spec/fixtures/moneycontrol.html +6812 -0
- data/spec/fixtures/string_converter_test.csv +5 -0
- data/spec/fixtures/test_xls.xls +0 -0
- data/spec/fixtures/test_xls_2.xls +0 -0
- data/spec/fixtures/url_test.txt~ +0 -0
- data/spec/fixtures/valid_markup.html +62 -0
- data/spec/fixtures/wiki_climate.html +1243 -0
- data/spec/fixtures/wiki_table_info.html +631 -0
- data/spec/formatters/table_formatter_spec.rb +29 -0
- data/spec/index/categorical_index_spec.rb +33 -33
- data/spec/index/index_spec.rb +160 -41
- data/spec/index/multi_index_spec.rb +143 -33
- data/spec/io/io_spec.rb +246 -2
- data/spec/io/sql_data_source_spec.rb +31 -41
- data/spec/iruby/dataframe_spec.rb +17 -19
- data/spec/iruby/vector_spec.rb +26 -28
- data/spec/maths/arithmetic/dataframe_spec.rb +1 -1
- data/spec/maths/arithmetic/vector_spec.rb +18 -0
- data/spec/maths/statistics/vector_spec.rb +153 -15
- data/spec/plotting/gruff/category_spec.rb +3 -3
- data/spec/plotting/gruff/dataframe_spec.rb +14 -4
- data/spec/plotting/gruff/vector_spec.rb +9 -9
- data/spec/plotting/nyaplot/category_spec.rb +5 -9
- data/spec/plotting/nyaplot/dataframe_spec.rb +95 -47
- data/spec/plotting/nyaplot/vector_spec.rb +5 -11
- data/spec/shared/vector_display_spec.rb +12 -14
- data/spec/spec_helper.rb +30 -7
- data/spec/support/matchers.rb +5 -0
- data/spec/vector_spec.rb +306 -72
- metadata +96 -55
- data/spec/fixtures/stock_data.csv +0 -500
@@ -7,12 +7,12 @@ module Daru
|
|
7
7
|
size = opts[:size] || 500
|
8
8
|
x = extract_x_vector opts[:x]
|
9
9
|
y = extract_y_vectors opts[:y]
|
10
|
-
|
11
|
-
return plot_with_category(size, type, x, y, opts[:categorized])
|
12
|
-
end
|
10
|
+
type = process_type type, opts[:categorized]
|
13
11
|
case type
|
14
12
|
when :line, :bar, :scatter
|
15
13
|
plot = send("#{type}_plot", size, x, y)
|
14
|
+
when :scatter_categorized
|
15
|
+
plot = scatter_with_category(size, x, y, opts[:categorized])
|
16
16
|
# TODO: hist, box
|
17
17
|
# It turns out hist and box are not supported in Gruff yet
|
18
18
|
else
|
@@ -24,6 +24,10 @@ module Daru
|
|
24
24
|
|
25
25
|
private
|
26
26
|
|
27
|
+
def process_type type, categorized
|
28
|
+
type == :scatter && categorized ? :scatter_categorized : type
|
29
|
+
end
|
30
|
+
|
27
31
|
def line_plot size, x, y
|
28
32
|
plot = Gruff::Line.new size
|
29
33
|
plot.labels = size.times.to_a.zip(x).to_h
|
@@ -50,21 +54,15 @@ module Daru
|
|
50
54
|
plot
|
51
55
|
end
|
52
56
|
|
53
|
-
def
|
57
|
+
def scatter_with_category size, x, y, opts
|
54
58
|
x = Daru::Vector.new x
|
55
59
|
y = y.first
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
bools = cat_dv.eq cat
|
62
|
-
plot.data cat, x.where(bools).to_a, y.where(bools).to_a
|
63
|
-
end
|
64
|
-
else
|
65
|
-
raise ArgumentError, "Type #{type} is not supported."
|
60
|
+
plot = Gruff::Scatter.new size
|
61
|
+
cat_dv = self[opts[:by]]
|
62
|
+
cat_dv.categories.each do |cat|
|
63
|
+
bools = cat_dv.eq cat
|
64
|
+
plot.data cat, x.where(bools).to_a, y.where(bools).to_a
|
66
65
|
end
|
67
|
-
yield plot if block_given?
|
68
66
|
plot
|
69
67
|
end
|
70
68
|
|
@@ -21,6 +21,8 @@ module Daru
|
|
21
21
|
# df = Daru::DataFrame.new({a:['A', 'B', 'C', 'D', 'E'], b:[10,20,30,40,50]})
|
22
22
|
# df.plot type: :bar, x: :a, y: :b
|
23
23
|
def plot opts={}, &block
|
24
|
+
index_as_default_x_axis(opts) unless x_axis_defined?(opts)
|
25
|
+
|
24
26
|
if opts[:categorized]
|
25
27
|
plot_with_category(opts, &block)
|
26
28
|
else
|
@@ -30,6 +32,15 @@ module Daru
|
|
30
32
|
|
31
33
|
private
|
32
34
|
|
35
|
+
def x_axis_defined?(opts)
|
36
|
+
opts[:x] || opts.keys.any? { |k| k.to_s.match(/x\d+/) }
|
37
|
+
end
|
38
|
+
|
39
|
+
def index_as_default_x_axis(opts)
|
40
|
+
opts[:x] = :_index
|
41
|
+
self[:_index] = @index.to_a
|
42
|
+
end
|
43
|
+
|
33
44
|
def plot_without_category opts
|
34
45
|
options = {type: :scatter}.merge(opts)
|
35
46
|
|
@@ -38,7 +49,7 @@ module Daru
|
|
38
49
|
|
39
50
|
diagram =
|
40
51
|
case
|
41
|
-
when !([
|
52
|
+
when !(%i[scatter bar line histogram] & types).empty?
|
42
53
|
plot_regular_diagrams plot, opts
|
43
54
|
when types.include?(:box)
|
44
55
|
plot_box_diagram plot
|
@@ -48,7 +59,7 @@ module Daru
|
|
48
59
|
|
49
60
|
yield(plot, diagram) if block_given?
|
50
61
|
|
51
|
-
plot
|
62
|
+
plot
|
52
63
|
end
|
53
64
|
|
54
65
|
def plot_with_category opts
|
@@ -66,8 +77,8 @@ module Daru
|
|
66
77
|
|
67
78
|
plot.legend true
|
68
79
|
yield plot, *diagrams if block_given?
|
69
|
-
plot.show
|
70
80
|
|
81
|
+
plot
|
71
82
|
else
|
72
83
|
raise ArgumentError, "Unsupported type #{type}"
|
73
84
|
end
|
@@ -102,7 +113,7 @@ module Daru
|
|
102
113
|
end
|
103
114
|
end
|
104
115
|
|
105
|
-
SHAPES = %w
|
116
|
+
SHAPES = %w[circle triangle-up diamond square triangle-down cross].freeze
|
106
117
|
def get_shape type
|
107
118
|
validate_type type, :scatter
|
108
119
|
SHAPES.cycle
|
data/lib/daru/vector.rb
CHANGED
@@ -122,6 +122,17 @@ module Daru
|
|
122
122
|
self
|
123
123
|
end
|
124
124
|
|
125
|
+
def apply_method(method, keys: nil, by_position: true)
|
126
|
+
vect = keys ? get_sub_vector(keys, by_position: by_position) : self
|
127
|
+
|
128
|
+
case method
|
129
|
+
when Symbol then vect.send(method)
|
130
|
+
when Proc then method.call(vect)
|
131
|
+
else raise
|
132
|
+
end
|
133
|
+
end
|
134
|
+
alias :apply_method_on_sub_vector :apply_method
|
135
|
+
|
125
136
|
# The name of the Daru::Vector. String.
|
126
137
|
attr_reader :name
|
127
138
|
# The row index. Can be either Daru::Index or Daru::MultiIndex.
|
@@ -140,8 +151,6 @@ module Daru
|
|
140
151
|
attr_accessor :labels
|
141
152
|
# Store vector data in an array
|
142
153
|
attr_reader :data
|
143
|
-
# Ploting library being used for this vector
|
144
|
-
attr_reader :plotting_library
|
145
154
|
# TODO: Make private.
|
146
155
|
attr_reader :nil_positions, :nan_positions
|
147
156
|
|
@@ -186,6 +195,13 @@ module Daru
|
|
186
195
|
end
|
187
196
|
end
|
188
197
|
|
198
|
+
# attr_reader for :plotting_library
|
199
|
+
def plotting_library
|
200
|
+
init_plotting_library
|
201
|
+
|
202
|
+
@plotting_library
|
203
|
+
end
|
204
|
+
|
189
205
|
def plotting_library= lib
|
190
206
|
case lib
|
191
207
|
when :gruff, :nyaplot
|
@@ -196,11 +212,18 @@ module Daru
|
|
196
212
|
)
|
197
213
|
end
|
198
214
|
else
|
199
|
-
raise
|
215
|
+
raise ArgumentError, "Plotting library #{lib} not supported. "\
|
200
216
|
'Supported libraries are :nyaplot and :gruff'
|
201
217
|
end
|
202
218
|
end
|
203
219
|
|
220
|
+
# this method is overwritten: see Daru::Vector#plotting_library=
|
221
|
+
def plot(*args, **options, &b)
|
222
|
+
init_plotting_library
|
223
|
+
|
224
|
+
plot(*args, **options, &b)
|
225
|
+
end
|
226
|
+
|
204
227
|
# Get one or more elements with specified index or a range.
|
205
228
|
#
|
206
229
|
# == Usage
|
@@ -228,7 +251,7 @@ module Daru
|
|
228
251
|
end
|
229
252
|
|
230
253
|
# Returns vector of values given positional values
|
231
|
-
# @param [Array<object>]
|
254
|
+
# @param positions [Array<object>] positional values
|
232
255
|
# @return [object] vector
|
233
256
|
# @example
|
234
257
|
# dv = Daru::Vector.new 'a'..'e'
|
@@ -252,7 +275,7 @@ module Daru
|
|
252
275
|
end
|
253
276
|
|
254
277
|
# Change value at given positions
|
255
|
-
# @param [Array<object>]
|
278
|
+
# @param positions [Array<object>] positional values
|
256
279
|
# @param [object] val value to assign
|
257
280
|
# @example
|
258
281
|
# dv = Daru::Vector.new 'a'..'e'
|
@@ -352,7 +375,7 @@ module Daru
|
|
352
375
|
if other.is_a?(Daru::Vector)
|
353
376
|
mod.apply_vector_operator operator, self, other
|
354
377
|
else
|
355
|
-
mod.apply_scalar_operator operator, @data,other
|
378
|
+
mod.apply_scalar_operator operator, @data, other
|
356
379
|
end
|
357
380
|
end
|
358
381
|
alias_method operator, method if operator != :== && operator != :!=
|
@@ -385,11 +408,11 @@ module Daru
|
|
385
408
|
# comparator methods to obtain meaningful results. See this notebook for
|
386
409
|
# a good overview of using #where.
|
387
410
|
#
|
388
|
-
# @param [Daru::Core::Query::BoolArray, Array<TrueClass, FalseClass>]
|
411
|
+
# @param bool_array [Daru::Core::Query::BoolArray, Array<TrueClass, FalseClass>] The
|
389
412
|
# collection containing the true of false values. Each element in the Vector
|
390
413
|
# corresponding to a `true` in the bool_arry will be returned alongwith it's
|
391
414
|
# index.
|
392
|
-
# @
|
415
|
+
# @example Usage of #where.
|
393
416
|
# vector = Daru::Vector.new([2,4,5,51,5,16,2,5,3,2,1,5,2,5,2,1,56,234,6,21])
|
394
417
|
#
|
395
418
|
# # Simple logic statement passed to #where.
|
@@ -421,6 +444,27 @@ module Daru
|
|
421
444
|
Daru::Core::Query.vector_where self, bool_array
|
422
445
|
end
|
423
446
|
|
447
|
+
# Return a new vector based on the contents of a boolean array and &block.
|
448
|
+
#
|
449
|
+
# @param bool_array [Daru::Core::Query::BoolArray, Array<TrueClass, FalseClass>, &block] The
|
450
|
+
# collection containing the true of false values. Each element in the Vector
|
451
|
+
# corresponding to a `true` in the bool_array will be returned along with it's
|
452
|
+
# index. The &block may contain manipulative functions for the Vector elements.
|
453
|
+
#
|
454
|
+
# @return [Daru::Vector]
|
455
|
+
#
|
456
|
+
# @example Usage of #apply_where.
|
457
|
+
# dv = Daru::Vector.new ['3 days', '5 weeks', '2 weeks']
|
458
|
+
# dv = dv.apply_where(dv.match /weeks/) { |x| "#{x.split.first.to_i * 7} days" }
|
459
|
+
# # =>
|
460
|
+
# ##<Daru::Vector(3)>
|
461
|
+
# # 0 3 days
|
462
|
+
# # 1 35 days
|
463
|
+
# # 2 14 days
|
464
|
+
def apply_where bool_array, &block
|
465
|
+
Daru::Core::Query.vector_apply_where self, bool_array, &block
|
466
|
+
end
|
467
|
+
|
424
468
|
def head q=10
|
425
469
|
self[0..(q-1)]
|
426
470
|
end
|
@@ -430,6 +474,11 @@ module Daru
|
|
430
474
|
self[start..(size-1)]
|
431
475
|
end
|
432
476
|
|
477
|
+
def last q=1
|
478
|
+
# The Enumerable mixin dose not provide the last method.
|
479
|
+
tail(q)
|
480
|
+
end
|
481
|
+
|
433
482
|
def empty?
|
434
483
|
@index.empty?
|
435
484
|
end
|
@@ -451,7 +500,7 @@ module Daru
|
|
451
500
|
deprecate :flawed?, :include_values?, 2016, 10
|
452
501
|
|
453
502
|
# Check if any one of mentioned values occur in the vector
|
454
|
-
# @param [Array]
|
503
|
+
# @param values [Array] values to check for
|
455
504
|
# @return [true, false] returns true if any one of specified values
|
456
505
|
# occur in the vector
|
457
506
|
# @example
|
@@ -462,6 +511,26 @@ module Daru
|
|
462
511
|
values.any? { |v| include_with_nan? @data, v }
|
463
512
|
end
|
464
513
|
|
514
|
+
# @note Do not use it to check for Float::NAN as
|
515
|
+
# Float::NAN == Float::NAN is false
|
516
|
+
# Return vector of booleans with value at ith position is either
|
517
|
+
# true or false depending upon whether value at position i is equal to
|
518
|
+
# any of the values passed in the argument or not
|
519
|
+
# @param values [Array] values to equate with
|
520
|
+
# @return [Daru::Vector] vector of boolean values
|
521
|
+
# @example
|
522
|
+
# dv = Daru::Vector.new [1, 2, 3, 2, 1]
|
523
|
+
# dv.is_values 1, 2
|
524
|
+
# # => #<Daru::Vector(5)>
|
525
|
+
# # 0 true
|
526
|
+
# # 1 true
|
527
|
+
# # 2 false
|
528
|
+
# # 3 true
|
529
|
+
# # 4 true
|
530
|
+
def is_values(*values)
|
531
|
+
Daru::Vector.new values.map { |v| eq(v) }.inject(:|)
|
532
|
+
end
|
533
|
+
|
465
534
|
# Append an element to the vector by specifying the element and index
|
466
535
|
def concat element, index
|
467
536
|
raise IndexError, 'Expected new unique index' if @index.include? index
|
@@ -481,8 +550,7 @@ module Daru
|
|
481
550
|
# * +:dtype+ - :array for Ruby Array. :nmatrix for NMatrix.
|
482
551
|
def cast opts={}
|
483
552
|
dt = opts[:dtype]
|
484
|
-
raise ArgumentError, "Unsupported dtype #{opts[:dtype]}" unless
|
485
|
-
dt == :array || dt == :nmatrix || dt == :gsl
|
553
|
+
raise ArgumentError, "Unsupported dtype #{opts[:dtype]}" unless %i[array nmatrix gsl].include?(dt)
|
486
554
|
|
487
555
|
@data = cast_vector_to dt unless @dtype == dt
|
488
556
|
end
|
@@ -535,7 +603,7 @@ module Daru
|
|
535
603
|
# Get index of element
|
536
604
|
def index_of element
|
537
605
|
case dtype
|
538
|
-
when :array then @index.key
|
606
|
+
when :array then @index.key(@data.index { |x| x.eql? element })
|
539
607
|
else @index.key @data.index(element)
|
540
608
|
end
|
541
609
|
end
|
@@ -583,6 +651,31 @@ module Daru
|
|
583
651
|
Daru::Vector.new(vector, index: index, name: @name, dtype: @dtype)
|
584
652
|
end
|
585
653
|
|
654
|
+
# Sorts the vector according to it's`Index` values. Defaults to ascending
|
655
|
+
# order sorting.
|
656
|
+
#
|
657
|
+
# @param [Hash] opts the options for sort_by_index method.
|
658
|
+
# @option opts [Boolean] :ascending false, will sort `index` in
|
659
|
+
# descending order.
|
660
|
+
#
|
661
|
+
# @return [Vector] new sorted `Vector` according to the index values.
|
662
|
+
#
|
663
|
+
# @example
|
664
|
+
#
|
665
|
+
# dv = Daru::Vector.new [11, 13, 12], index: [23, 21, 22]
|
666
|
+
# # Say you want to sort index in ascending order
|
667
|
+
# dv.sort_by_index(ascending: true)
|
668
|
+
# #=> Daru::Vector.new [13, 12, 11], index: [21, 22, 23]
|
669
|
+
# # Say you want to sort index in descending order
|
670
|
+
# dv.sort_by_index(ascending: false)
|
671
|
+
# #=> Daru::Vector.new [11, 12, 13], index: [23, 22, 21]
|
672
|
+
def sort_by_index opts={}
|
673
|
+
opts = {ascending: true}.merge(opts)
|
674
|
+
_, new_order = resort_index(@index.each_with_index, opts).transpose
|
675
|
+
|
676
|
+
reorder new_order
|
677
|
+
end
|
678
|
+
|
586
679
|
DEFAULT_SORTER = lambda { |(lv, li), (rv, ri)|
|
587
680
|
case
|
588
681
|
when lv.nil? && rv.nil?
|
@@ -624,7 +717,7 @@ module Daru
|
|
624
717
|
def delete_if
|
625
718
|
return to_enum(:delete_if) unless block_given?
|
626
719
|
|
627
|
-
keep_e, keep_i = each_with_index.
|
720
|
+
keep_e, keep_i = each_with_index.reject { |n, _i| yield(n) }.transpose
|
628
721
|
|
629
722
|
@data = cast_vector_to @dtype, keep_e
|
630
723
|
@index = Daru::Index.new(keep_i)
|
@@ -702,31 +795,6 @@ module Daru
|
|
702
795
|
self
|
703
796
|
end
|
704
797
|
|
705
|
-
# Returns a vector which has *true* in the position where the element in self
|
706
|
-
# is nil, and false otherwise.
|
707
|
-
#
|
708
|
-
# == Usage
|
709
|
-
#
|
710
|
-
# v = Daru::Vector.new([1,2,4,nil])
|
711
|
-
# v.is_nil?
|
712
|
-
# # =>
|
713
|
-
# #<Daru::Vector:89421000 @name = nil @size = 4 >
|
714
|
-
# # nil
|
715
|
-
# # 0 false
|
716
|
-
# # 1 false
|
717
|
-
# # 2 false
|
718
|
-
# # 3 true
|
719
|
-
#
|
720
|
-
def is_nil?
|
721
|
-
# FIXME: EXTREMELY bad name for method not returning boolean - zverok, 2016-05-18
|
722
|
-
recode(&:nil?)
|
723
|
-
end
|
724
|
-
|
725
|
-
# Opposite of #is_nil?
|
726
|
-
def not_nil?
|
727
|
-
recode { |v| !v.nil? }
|
728
|
-
end
|
729
|
-
|
730
798
|
# Replace all nils in the vector with the value passed as an argument. Destructive.
|
731
799
|
# See #replace_nils for non-destructive version
|
732
800
|
#
|
@@ -741,27 +809,81 @@ module Daru
|
|
741
809
|
self
|
742
810
|
end
|
743
811
|
|
744
|
-
#
|
812
|
+
# Rolling fillna
|
813
|
+
# replace all Float::NAN and NIL values with the preceeding or following value
|
745
814
|
#
|
746
|
-
#
|
747
|
-
# in the series) to nil so that the size of the lagged series is the
|
748
|
-
# same as the original.
|
815
|
+
# @param direction [Symbol] (:forward, :backward) whether replacement value is preceeding or following
|
749
816
|
#
|
750
|
-
#
|
817
|
+
# @example
|
818
|
+
# dv = Daru::Vector.new([1, 2, 1, 4, nil, Float::NAN, 3, nil, Float::NAN])
|
751
819
|
#
|
752
|
-
#
|
753
|
-
#
|
820
|
+
# 2.3.3 :068 > dv.rolling_fillna(:forward)
|
821
|
+
# => #<Daru::Vector(9)>
|
822
|
+
# 0 1
|
823
|
+
# 1 2
|
824
|
+
# 2 1
|
825
|
+
# 3 4
|
826
|
+
# 4 4
|
827
|
+
# 5 4
|
828
|
+
# 6 3
|
829
|
+
# 7 3
|
830
|
+
# 8 3
|
754
831
|
#
|
755
|
-
|
756
|
-
|
757
|
-
|
758
|
-
|
832
|
+
def rolling_fillna!(direction=:forward)
|
833
|
+
enum = direction == :forward ? index : index.reverse_each
|
834
|
+
last_valid_value = 0
|
835
|
+
enum.each do |idx|
|
836
|
+
if valid_value?(self[idx])
|
837
|
+
last_valid_value = self[idx]
|
838
|
+
else
|
839
|
+
self[idx] = last_valid_value
|
840
|
+
end
|
841
|
+
end
|
842
|
+
self
|
843
|
+
end
|
759
844
|
|
760
|
-
|
761
|
-
|
762
|
-
(
|
845
|
+
# Non-destructive version of rolling_fillna!
|
846
|
+
def rolling_fillna(direction=:forward)
|
847
|
+
dup.rolling_fillna!(direction)
|
848
|
+
end
|
763
849
|
|
764
|
-
|
850
|
+
# Lags the series by `k` periods.
|
851
|
+
#
|
852
|
+
# Lags the series by `k` periods, "shifting" data and inserting `nil`s
|
853
|
+
# from beginning or end of a vector, while preserving original vector's
|
854
|
+
# size.
|
855
|
+
#
|
856
|
+
# `k` can be positive or negative integer. If `k` is positive, `nil`s
|
857
|
+
# are inserted at the beginning of the vector, otherwise they are
|
858
|
+
# inserted at the end.
|
859
|
+
#
|
860
|
+
# @param [Integer] k "shift" the series by `k` periods. `k` can be
|
861
|
+
# positive or negative. (default = 1)
|
862
|
+
#
|
863
|
+
# @return [Daru::Vector] a new vector with "shifted" inital values
|
864
|
+
# and `nil` values inserted. The return vector is the same length
|
865
|
+
# as the orignal vector.
|
866
|
+
#
|
867
|
+
# @example Lag a vector with different periods `k`
|
868
|
+
#
|
869
|
+
# ts = Daru::Vector.new(1..5)
|
870
|
+
# # => [1, 2, 3, 4, 5]
|
871
|
+
#
|
872
|
+
# ts.lag # => [nil, 1, 2, 3, 4]
|
873
|
+
# ts.lag(1) # => [nil, 1, 2, 3, 4]
|
874
|
+
# ts.lag(2) # => [nil, nil, 1, 2, 3]
|
875
|
+
# ts.lag(-1) # => [2, 3, 4, 5, nil]
|
876
|
+
#
|
877
|
+
def lag k=1
|
878
|
+
case k
|
879
|
+
when 0 then dup
|
880
|
+
when 1...size
|
881
|
+
copy([nil] * k + data.to_a)
|
882
|
+
when -size..-1
|
883
|
+
copy(data.to_a[k.abs...size])
|
884
|
+
else
|
885
|
+
copy([])
|
886
|
+
end
|
765
887
|
end
|
766
888
|
|
767
889
|
def detach_index
|
@@ -783,7 +905,7 @@ module Daru
|
|
783
905
|
deprecate :n_valid, :count_values, 2016, 10
|
784
906
|
|
785
907
|
# Count the number of values specified
|
786
|
-
# @param [Array]
|
908
|
+
# @param values [Array] values to count for
|
787
909
|
# @return [Integer] the number of times the values mentioned occurs
|
788
910
|
# @example
|
789
911
|
# dv = Daru::Vector.new [1, 2, 1, 2, 3, 4, nil, nil]
|
@@ -798,6 +920,19 @@ module Daru
|
|
798
920
|
@index.include? index
|
799
921
|
end
|
800
922
|
|
923
|
+
# @param keys [Array] can be positions (if by_position is true) or indexes (if by_position if false)
|
924
|
+
# @return [Daru::Vector]
|
925
|
+
def get_sub_vector(keys, by_position: true)
|
926
|
+
return Daru::Vector.new([]) if keys == []
|
927
|
+
|
928
|
+
keys = @index.pos(*keys) unless by_position
|
929
|
+
|
930
|
+
sub_vect = at(*keys)
|
931
|
+
sub_vect = Daru::Vector.new([sub_vect]) unless sub_vect.is_a?(Daru::Vector)
|
932
|
+
|
933
|
+
sub_vect
|
934
|
+
end
|
935
|
+
|
801
936
|
# @return [Daru::DataFrame] the vector as a single-vector dataframe
|
802
937
|
def to_df
|
803
938
|
Daru::DataFrame.new({@name => @data}, name: @name, index: @index)
|
@@ -871,7 +1006,9 @@ module Daru
|
|
871
1006
|
end
|
872
1007
|
|
873
1008
|
# Convert to html for iruby
|
874
|
-
def to_html
|
1009
|
+
def to_html(threshold=30)
|
1010
|
+
table_thead = to_html_thead
|
1011
|
+
table_tbody = to_html_tbody(threshold)
|
875
1012
|
path = if index.is_a?(MultiIndex)
|
876
1013
|
File.expand_path('../iruby/templates/vector_mi.html.erb', __FILE__)
|
877
1014
|
else
|
@@ -880,51 +1017,97 @@ module Daru
|
|
880
1017
|
ERB.new(File.read(path).strip).result(binding)
|
881
1018
|
end
|
882
1019
|
|
1020
|
+
def to_html_thead
|
1021
|
+
table_thead_path =
|
1022
|
+
if index.is_a?(MultiIndex)
|
1023
|
+
File.expand_path('../iruby/templates/vector_mi_thead.html.erb', __FILE__)
|
1024
|
+
else
|
1025
|
+
File.expand_path('../iruby/templates/vector_thead.html.erb', __FILE__)
|
1026
|
+
end
|
1027
|
+
ERB.new(File.read(table_thead_path).strip).result(binding)
|
1028
|
+
end
|
1029
|
+
|
1030
|
+
def to_html_tbody(threshold=30)
|
1031
|
+
table_tbody_path =
|
1032
|
+
if index.is_a?(MultiIndex)
|
1033
|
+
File.expand_path('../iruby/templates/vector_mi_tbody.html.erb', __FILE__)
|
1034
|
+
else
|
1035
|
+
File.expand_path('../iruby/templates/vector_tbody.html.erb', __FILE__)
|
1036
|
+
end
|
1037
|
+
ERB.new(File.read(table_tbody_path).strip).result(binding)
|
1038
|
+
end
|
1039
|
+
|
883
1040
|
def to_s
|
884
|
-
|
1041
|
+
"#<#{self.class}#{': ' + @name.to_s if @name}(#{size})#{':category' if category?}>"
|
885
1042
|
end
|
886
1043
|
|
887
|
-
# Create a summary of the Vector
|
888
|
-
|
889
|
-
|
1044
|
+
# Create a summary of the Vector
|
1045
|
+
# @param indent_level [Fixnum] indent level
|
1046
|
+
# @return [String] String containing the summary of the Vector
|
1047
|
+
# @example
|
1048
|
+
# dv = Daru::Vector.new [1, 2, 3]
|
1049
|
+
# puts dv.summary
|
1050
|
+
#
|
1051
|
+
# # =
|
1052
|
+
# # n :3
|
1053
|
+
# # non-missing:3
|
1054
|
+
# # median: 2
|
1055
|
+
# # mean: 2.0000
|
1056
|
+
# # std.dev.: 1.0000
|
1057
|
+
# # std.err.: 0.5774
|
1058
|
+
# # skew: 0.0000
|
1059
|
+
# # kurtosis: -2.3333
|
1060
|
+
def summary(indent_level=0)
|
1061
|
+
non_missing = size - count_values(*Daru::MISSING_VALUES)
|
1062
|
+
summary = ' =' * indent_level + "= #{name}" \
|
1063
|
+
"\n n :#{size}" \
|
1064
|
+
"\n non-missing:#{non_missing}"
|
1065
|
+
case type
|
1066
|
+
when :object
|
1067
|
+
summary << object_summary
|
1068
|
+
when :numeric
|
1069
|
+
summary << numeric_summary
|
1070
|
+
end
|
1071
|
+
summary.split("\n").join("\n" + ' ' * indent_level)
|
890
1072
|
end
|
891
1073
|
|
892
|
-
#
|
893
|
-
|
894
|
-
|
895
|
-
|
896
|
-
|
897
|
-
|
898
|
-
|
899
|
-
s.text "mode: #{mode}"
|
900
|
-
|
901
|
-
s.table(name: 'Distribution') do |t|
|
902
|
-
frequencies.sort_by(&:to_s).each do |k,v|
|
903
|
-
key = @index.include?(k) ? @index[k] : k
|
904
|
-
t.row [key, v, ('%0.2f%%' % (v.quo(count_values(*Daru::MISSING_VALUES))*100))]
|
905
|
-
end
|
906
|
-
end
|
907
|
-
end
|
1074
|
+
# Displays summary for an object type Vector
|
1075
|
+
# @return [String] String containing object vector summary
|
1076
|
+
def object_summary
|
1077
|
+
nval = count_values(*Daru::MISSING_VALUES)
|
1078
|
+
summary = "\n factors: #{factors.to_a.join(',')}" \
|
1079
|
+
"\n mode: #{mode.to_a.join(',')}" \
|
1080
|
+
"\n Distribution\n"
|
908
1081
|
|
909
|
-
|
910
|
-
|
911
|
-
s.text 'mean: %0.4f' % mean
|
912
|
-
if sd
|
913
|
-
s.text 'std.dev.: %0.4f' % sd
|
914
|
-
s.text 'std.err.: %0.4f' % se
|
915
|
-
s.text 'skew: %0.4f' % skew
|
916
|
-
s.text 'kurtosis: %0.4f' % kurtosis
|
917
|
-
end
|
918
|
-
end
|
1082
|
+
data = frequencies.sort.each_with_index.map do |v, k|
|
1083
|
+
[k, v, '%0.2f%%' % ((nval.zero? ? 1 : v.quo(nval))*100)]
|
919
1084
|
end
|
1085
|
+
|
1086
|
+
summary + Formatters::Table.format(data)
|
1087
|
+
end
|
1088
|
+
|
1089
|
+
# Displays summary for an numeric type Vector
|
1090
|
+
# @return [String] String containing numeric vector summary
|
1091
|
+
def numeric_summary
|
1092
|
+
summary = "\n median: #{median}" +
|
1093
|
+
"\n mean: %0.4f" % mean
|
1094
|
+
if sd
|
1095
|
+
summary << "\n std.dev.: %0.4f" % sd +
|
1096
|
+
"\n std.err.: %0.4f" % se
|
1097
|
+
end
|
1098
|
+
|
1099
|
+
if count_values(*Daru::MISSING_VALUES).zero?
|
1100
|
+
summary << "\n skew: %0.4f" % skew +
|
1101
|
+
"\n kurtosis: %0.4f" % kurtosis
|
1102
|
+
end
|
1103
|
+
summary
|
920
1104
|
end
|
921
|
-
# :nocov:
|
922
1105
|
|
923
1106
|
# Over rides original inspect for pretty printing in irb
|
924
1107
|
def inspect spacing=20, threshold=15
|
925
1108
|
row_headers = index.is_a?(MultiIndex) ? index.sparse_tuples : index.to_a
|
926
1109
|
|
927
|
-
"#<#{self.class}(#{size})#{':
|
1110
|
+
"#<#{self.class}(#{size})#{':category' if category?}>\n" +
|
928
1111
|
Formatters::Table.format(
|
929
1112
|
to_a.lazy.map { |v| [v] },
|
930
1113
|
headers: @name && [@name],
|
@@ -1095,14 +1278,30 @@ module Daru
|
|
1095
1278
|
Daru::DataFrame.new ps
|
1096
1279
|
end
|
1097
1280
|
|
1281
|
+
# Returns an array of either none or integer values, indicating the
|
1282
|
+
# +regexp+ matching with the given array.
|
1283
|
+
#
|
1284
|
+
# @param regexp [Regexp] A regular matching expression. For example, +/weeks/+.
|
1285
|
+
#
|
1286
|
+
# @return [Array] Containing either +nil+ or integer values, according to the match with the given +regexp+
|
1287
|
+
#
|
1288
|
+
# @example
|
1289
|
+
# dv = Daru::Vector.new(['3 days', '5 weeks', '2 weeks'])
|
1290
|
+
# dv.match(/weeks/)
|
1291
|
+
#
|
1292
|
+
# # => [false, true, true]
|
1293
|
+
def match(regexp)
|
1294
|
+
@data.map { |value| !!(value =~ regexp) }
|
1295
|
+
end
|
1296
|
+
|
1098
1297
|
# Creates a new vector consisting only of non-nil data
|
1099
1298
|
#
|
1100
1299
|
# == Arguments
|
1101
1300
|
#
|
1102
|
-
# @as_a [Symbol] Passing :array will return only the elements
|
1301
|
+
# @param as_a [Symbol] Passing :array will return only the elements
|
1103
1302
|
# as an Array. Otherwise will return a Daru::Vector.
|
1104
1303
|
#
|
1105
|
-
# @
|
1304
|
+
# @param _duplicate [Symbol] In case no missing data is found in the
|
1106
1305
|
# vector, setting this to false will return the same vector.
|
1107
1306
|
# Otherwise, a duplicate will be returned irrespective of
|
1108
1307
|
# presence of missing data.
|
@@ -1124,7 +1323,7 @@ module Daru
|
|
1124
1323
|
deprecate :only_valid, :reject_values, 2016, 10
|
1125
1324
|
|
1126
1325
|
# Return a vector with specified values removed
|
1127
|
-
# @param [Array]
|
1326
|
+
# @param values [Array] values to reject from resultant vector
|
1128
1327
|
# @return [Daru::Vector] vector with specified values removed
|
1129
1328
|
# @example
|
1130
1329
|
# dv = Daru::Vector.new [1, 2, nil, Float::NAN]
|
@@ -1146,7 +1345,7 @@ module Daru
|
|
1146
1345
|
end
|
1147
1346
|
|
1148
1347
|
# Return indexes of values specified
|
1149
|
-
# @param [Array]
|
1348
|
+
# @param values [Array] values to find indexes for
|
1150
1349
|
# @return [Array] array of indexes of values specified
|
1151
1350
|
# @example
|
1152
1351
|
# dv = Daru::Vector.new [1, 2, nil, Float::NAN], index: 11..14
|
@@ -1336,6 +1535,17 @@ module Daru
|
|
1336
1535
|
|
1337
1536
|
private
|
1338
1537
|
|
1538
|
+
# Will lazily load the plotting library being used for this vector
|
1539
|
+
def init_plotting_library
|
1540
|
+
self.plotting_library = Daru.plotting_library
|
1541
|
+
end
|
1542
|
+
|
1543
|
+
def copy(values)
|
1544
|
+
# Make sure values is right-justified to the size of the vector
|
1545
|
+
values.concat([nil] * (size-values.size)) if values.size < size
|
1546
|
+
Daru::Vector.new(values[0...size], index: @index, name: @name)
|
1547
|
+
end
|
1548
|
+
|
1339
1549
|
def nil_positions
|
1340
1550
|
@nil_positions ||
|
1341
1551
|
@nil_positions = size.times.select { |i| @data[i].nil? }
|
@@ -1348,6 +1558,11 @@ module Daru
|
|
1348
1558
|
end
|
1349
1559
|
end
|
1350
1560
|
|
1561
|
+
# Helper method returning validity of arbitrary value
|
1562
|
+
def valid_value?(v)
|
1563
|
+
v.respond_to?(:nan?) && v.nan? || v.nil? ? false : true
|
1564
|
+
end
|
1565
|
+
|
1351
1566
|
def initialize_vector source, opts
|
1352
1567
|
index, source = parse_source(source, opts)
|
1353
1568
|
set_name opts[:name]
|
@@ -1358,8 +1573,6 @@ module Daru
|
|
1358
1573
|
guard_sizes!
|
1359
1574
|
|
1360
1575
|
@possibly_changed_type = true
|
1361
|
-
# Include plotting functionality
|
1362
|
-
self.plotting_library = Daru.plotting_library
|
1363
1576
|
end
|
1364
1577
|
|
1365
1578
|
def parse_source source, opts
|
@@ -1413,7 +1626,7 @@ module Daru
|
|
1413
1626
|
end
|
1414
1627
|
|
1415
1628
|
# Note: To maintain sanity, this _MUST_ be the _ONLY_ place in daru where the
|
1416
|
-
# @dtype variable is set and the underlying data type of vector changed.
|
1629
|
+
# @param dtype [db_type] variable is set and the underlying data type of vector changed.
|
1417
1630
|
def cast_vector_to dtype, source=nil, nm_dtype=nil
|
1418
1631
|
source = @data.to_a if source.nil?
|
1419
1632
|
|
@@ -1426,7 +1639,7 @@ module Daru
|
|
1426
1639
|
else raise ArgumentError, "Unknown dtype #{dtype}"
|
1427
1640
|
end
|
1428
1641
|
|
1429
|
-
@dtype = dtype
|
1642
|
+
@dtype = dtype
|
1430
1643
|
new_vector
|
1431
1644
|
end
|
1432
1645
|
|
@@ -1442,7 +1655,6 @@ module Daru
|
|
1442
1655
|
|
1443
1656
|
# Raises IndexError when one of the positions is an invalid position
|
1444
1657
|
def validate_positions *positions
|
1445
|
-
positions = [positions] if positions.is_a? Integer
|
1446
1658
|
positions.each do |pos|
|
1447
1659
|
raise IndexError, "#{pos} is not a valid position." if pos >= size
|
1448
1660
|
end
|