daru 0.1.5 → 0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.github/ISSUE_TEMPLATE.md +18 -0
- data/.gitignore +1 -0
- data/.rubocop.yml +21 -7
- data/.travis.yml +10 -5
- data/CONTRIBUTING.md +15 -10
- data/History.md +124 -2
- data/README.md +37 -9
- data/ReleasePolicy.md +20 -0
- data/benchmarks/db_loading.rb +34 -0
- data/benchmarks/statistics.rb +6 -6
- data/benchmarks/where_clause.rb +1 -1
- data/benchmarks/where_vs_filter.rb +1 -1
- data/daru.gemspec +17 -41
- data/lib/daru.rb +10 -13
- data/lib/daru/accessors/gsl_wrapper.rb +1 -1
- data/lib/daru/accessors/nmatrix_wrapper.rb +2 -0
- data/lib/daru/category.rb +29 -15
- data/lib/daru/configuration.rb +34 -0
- data/lib/daru/core/group_by.rb +158 -77
- data/lib/daru/core/merge.rb +12 -3
- data/lib/daru/core/query.rb +20 -4
- data/lib/daru/dataframe.rb +692 -118
- data/lib/daru/date_time/index.rb +14 -11
- data/lib/daru/date_time/offsets.rb +9 -1
- data/lib/daru/extensions/which_dsl.rb +55 -0
- data/lib/daru/formatters/table.rb +3 -5
- data/lib/daru/index/categorical_index.rb +4 -4
- data/lib/daru/index/index.rb +131 -42
- data/lib/daru/index/multi_index.rb +118 -10
- data/lib/daru/io/csv/converters.rb +21 -0
- data/lib/daru/io/io.rb +105 -33
- data/lib/daru/io/sql_data_source.rb +10 -0
- data/lib/daru/iruby/templates/dataframe.html.erb +4 -51
- data/lib/daru/iruby/templates/dataframe_mi.html.erb +3 -56
- data/lib/daru/iruby/templates/dataframe_mi_tbody.html.erb +35 -0
- data/lib/daru/iruby/templates/dataframe_mi_thead.html.erb +21 -0
- data/lib/daru/iruby/templates/dataframe_tbody.html.erb +28 -0
- data/lib/daru/iruby/templates/dataframe_thead.html.erb +21 -0
- data/lib/daru/iruby/templates/vector.html.erb +3 -25
- data/lib/daru/iruby/templates/vector_mi.html.erb +3 -34
- data/lib/daru/iruby/templates/vector_mi_tbody.html.erb +26 -0
- data/lib/daru/iruby/templates/vector_mi_thead.html.erb +8 -0
- data/lib/daru/iruby/templates/vector_tbody.html.erb +17 -0
- data/lib/daru/iruby/templates/vector_thead.html.erb +8 -0
- data/lib/daru/maths/arithmetic/vector.rb +38 -2
- data/lib/daru/maths/statistics/dataframe.rb +28 -30
- data/lib/daru/maths/statistics/vector.rb +295 -41
- data/lib/daru/plotting/gruff/dataframe.rb +13 -15
- data/lib/daru/plotting/nyaplot/category.rb +1 -1
- data/lib/daru/plotting/nyaplot/dataframe.rb +15 -4
- data/lib/daru/plotting/nyaplot/vector.rb +1 -2
- data/lib/daru/vector.rb +308 -96
- data/lib/daru/version.rb +1 -1
- data/profile/vector_new.rb +9 -0
- data/spec/accessors/gsl_wrapper_spec.rb +38 -35
- data/spec/accessors/nmatrix_wrapper_spec.rb +25 -22
- data/spec/category_spec.rb +24 -20
- data/spec/core/group_by_spec.rb +238 -4
- data/spec/core/merge_spec.rb +1 -1
- data/spec/core/query_spec.rb +65 -50
- data/spec/daru_spec.rb +22 -0
- data/spec/dataframe_spec.rb +473 -16
- data/spec/date_time/date_time_index_helper_spec.rb +72 -0
- data/spec/date_time/index_spec.rb +34 -16
- data/spec/date_time/offsets_spec.rb +14 -0
- data/spec/extensions/rserve_spec.rb +1 -1
- data/spec/extensions/which_dsl_spec.rb +38 -0
- data/spec/fixtures/boolean_converter_test.csv +5 -0
- data/spec/fixtures/duplicates.csv +32 -0
- data/spec/fixtures/eciresults.html +394 -0
- data/spec/fixtures/empty_rows_test.csv +17 -0
- data/spec/fixtures/macau.html +3691 -0
- data/spec/fixtures/macd_data.csv +150 -0
- data/spec/fixtures/matrix_test.csv +55 -55
- data/spec/fixtures/moneycontrol.html +6812 -0
- data/spec/fixtures/string_converter_test.csv +5 -0
- data/spec/fixtures/test_xls.xls +0 -0
- data/spec/fixtures/test_xls_2.xls +0 -0
- data/spec/fixtures/url_test.txt~ +0 -0
- data/spec/fixtures/valid_markup.html +62 -0
- data/spec/fixtures/wiki_climate.html +1243 -0
- data/spec/fixtures/wiki_table_info.html +631 -0
- data/spec/formatters/table_formatter_spec.rb +29 -0
- data/spec/index/categorical_index_spec.rb +33 -33
- data/spec/index/index_spec.rb +160 -41
- data/spec/index/multi_index_spec.rb +143 -33
- data/spec/io/io_spec.rb +246 -2
- data/spec/io/sql_data_source_spec.rb +31 -41
- data/spec/iruby/dataframe_spec.rb +17 -19
- data/spec/iruby/vector_spec.rb +26 -28
- data/spec/maths/arithmetic/dataframe_spec.rb +1 -1
- data/spec/maths/arithmetic/vector_spec.rb +18 -0
- data/spec/maths/statistics/vector_spec.rb +153 -15
- data/spec/plotting/gruff/category_spec.rb +3 -3
- data/spec/plotting/gruff/dataframe_spec.rb +14 -4
- data/spec/plotting/gruff/vector_spec.rb +9 -9
- data/spec/plotting/nyaplot/category_spec.rb +5 -9
- data/spec/plotting/nyaplot/dataframe_spec.rb +95 -47
- data/spec/plotting/nyaplot/vector_spec.rb +5 -11
- data/spec/shared/vector_display_spec.rb +12 -14
- data/spec/spec_helper.rb +30 -7
- data/spec/support/matchers.rb +5 -0
- data/spec/vector_spec.rb +306 -72
- metadata +96 -55
- data/spec/fixtures/stock_data.csv +0 -500
data/lib/daru/core/merge.rb
CHANGED
@@ -17,17 +17,17 @@ module Daru
|
|
17
17
|
end
|
18
18
|
end
|
19
19
|
|
20
|
-
def initialize left_df, right_df, opts={}
|
20
|
+
def initialize left_df, right_df, opts={} # rubocop:disable Metrics/AbcSize -- quick-fix for issue #171
|
21
21
|
init_opts(opts)
|
22
22
|
validate_on!(left_df, right_df)
|
23
23
|
key_sanitizer = ->(h) { sanitize_merge_keys(h.values_at(*on)) }
|
24
24
|
|
25
25
|
@left = df_to_a(left_df)
|
26
|
-
@left.
|
26
|
+
@left.sort! { |a, b| safe_compare(a.values_at(*on), b.values_at(*on)) }
|
27
27
|
@left_key_values = @left.map(&key_sanitizer)
|
28
28
|
|
29
29
|
@right = df_to_a(right_df)
|
30
|
-
@right.
|
30
|
+
@right.sort! { |a, b| safe_compare(a.values_at(*on), b.values_at(*on)) }
|
31
31
|
@right_key_values = @right.map(&key_sanitizer)
|
32
32
|
|
33
33
|
@left_keys, @right_keys = merge_keys(left_df, right_df, on)
|
@@ -246,6 +246,15 @@ module Daru
|
|
246
246
|
raise ArgumentError, "Both dataframes expected to have #{on.inspect} field"
|
247
247
|
end
|
248
248
|
end
|
249
|
+
|
250
|
+
def safe_compare(left_array, right_array)
|
251
|
+
left_array.zip(right_array).map { |l, r|
|
252
|
+
next 0 if l.nil? && r.nil?
|
253
|
+
next 1 if r.nil?
|
254
|
+
next -1 if l.nil?
|
255
|
+
l <=> r
|
256
|
+
}.reject(&:zero?).first || 0
|
257
|
+
end
|
249
258
|
end
|
250
259
|
|
251
260
|
module Merge
|
data/lib/daru/core/query.rb
CHANGED
@@ -9,13 +9,13 @@ module Daru
|
|
9
9
|
end
|
10
10
|
|
11
11
|
def & other
|
12
|
-
BoolArray.new
|
12
|
+
BoolArray.new(@barry.zip(other.barry).map { |b, o| b && o })
|
13
13
|
end
|
14
14
|
|
15
15
|
alias :and :&
|
16
16
|
|
17
17
|
def | other
|
18
|
-
BoolArray.new
|
18
|
+
BoolArray.new(@barry.zip(other.barry).map { |b, o| b || o })
|
19
19
|
end
|
20
20
|
|
21
21
|
alias :or :|
|
@@ -39,11 +39,11 @@ module Daru
|
|
39
39
|
|
40
40
|
class << self
|
41
41
|
def apply_scalar_operator operator, data, other
|
42
|
-
BoolArray.new
|
42
|
+
BoolArray.new(data.map { |d| !!d.send(operator, other) if d.respond_to?(operator) })
|
43
43
|
end
|
44
44
|
|
45
45
|
def apply_vector_operator operator, vector, other
|
46
|
-
BoolArray.new
|
46
|
+
BoolArray.new(vector.zip(other).map { |d, o| !!d.send(operator, o) })
|
47
47
|
end
|
48
48
|
|
49
49
|
def df_where data_frame, bool_array
|
@@ -70,6 +70,22 @@ module Daru
|
|
70
70
|
resultant_dv
|
71
71
|
end
|
72
72
|
|
73
|
+
def vector_apply_where dv, bool_array
|
74
|
+
_data, new_index = fetch_new_data_and_index dv, bool_array
|
75
|
+
all_index = dv.index
|
76
|
+
all_data = all_index.map { |idx| new_index.include?(idx) ? yield(dv[idx]) : dv[idx] }
|
77
|
+
|
78
|
+
resultant_dv = Daru::Vector.new all_data,
|
79
|
+
index: dv.index.class.new(all_index),
|
80
|
+
dtype: dv.dtype,
|
81
|
+
type: dv.type,
|
82
|
+
name: dv.name
|
83
|
+
|
84
|
+
# Preserve categories order for category vector
|
85
|
+
resultant_dv.categories = dv.categories if dv.category?
|
86
|
+
resultant_dv
|
87
|
+
end
|
88
|
+
|
73
89
|
private
|
74
90
|
|
75
91
|
def fetch_new_data_and_index dv, bool_array
|
data/lib/daru/dataframe.rb
CHANGED
@@ -10,7 +10,10 @@ module Daru
|
|
10
10
|
include Daru::Maths::Arithmetic::DataFrame
|
11
11
|
include Daru::Maths::Statistics::DataFrame
|
12
12
|
# TODO: Remove this line but its causing erros due to unkown reason
|
13
|
-
|
13
|
+
Daru.has_nyaplot?
|
14
|
+
|
15
|
+
attr_accessor(*Configuration::INSPECT_OPTIONS_KEYS)
|
16
|
+
|
14
17
|
extend Gem::Deprecate
|
15
18
|
|
16
19
|
class << self
|
@@ -20,7 +23,7 @@ module Daru
|
|
20
23
|
#
|
21
24
|
# == Arguments
|
22
25
|
#
|
23
|
-
# * path -
|
26
|
+
# * path - Local path / Remote URL of the file to load specified as a String.
|
24
27
|
#
|
25
28
|
# == Options
|
26
29
|
#
|
@@ -63,7 +66,7 @@ module Daru
|
|
63
66
|
|
64
67
|
# Read a database query and returns a Dataset
|
65
68
|
#
|
66
|
-
# @param dbh [DBI::DatabaseHandle] A DBI connection
|
69
|
+
# @param dbh [DBI::DatabaseHandle, String] A DBI connection OR Path to a SQlite3 database.
|
67
70
|
# @param query [String] The query to be executed
|
68
71
|
#
|
69
72
|
# @return A dataframe containing the data resulting from the query
|
@@ -72,6 +75,11 @@ module Daru
|
|
72
75
|
#
|
73
76
|
# dbh = DBI.connect("DBI:Mysql:database:localhost", "user", "password")
|
74
77
|
# Daru::DataFrame.from_sql(dbh, "SELECT * FROM test")
|
78
|
+
#
|
79
|
+
# #Alternatively
|
80
|
+
#
|
81
|
+
# require 'dbi'
|
82
|
+
# Daru::DataFrame.from_sql("path/to/sqlite.db", "SELECT * FROM test")
|
75
83
|
def from_sql dbh, query
|
76
84
|
Daru::IO.from_sql dbh, query
|
77
85
|
end
|
@@ -79,7 +87,7 @@ module Daru
|
|
79
87
|
# Read a dataframe from AR::Relation
|
80
88
|
#
|
81
89
|
# @param relation [ActiveRecord::Relation] An AR::Relation object from which data is loaded
|
82
|
-
# @
|
90
|
+
# @param fields [Array] Field names to be loaded (optional)
|
83
91
|
#
|
84
92
|
# @return A dataframe containing the data loaded from the relation
|
85
93
|
#
|
@@ -112,6 +120,49 @@ module Daru
|
|
112
120
|
Daru::IO.from_plaintext path, fields
|
113
121
|
end
|
114
122
|
|
123
|
+
# Read the table data from a remote html file. Please note that this module
|
124
|
+
# works only for static table elements on a HTML page, and won't work in
|
125
|
+
# cases where the data is being loaded into the HTML table by Javascript.
|
126
|
+
#
|
127
|
+
# By default - all <th> tag elements in the first proper row are considered
|
128
|
+
# as the order, and all the <th> tag elements in the first column are
|
129
|
+
# considered as the index.
|
130
|
+
#
|
131
|
+
# == Arguments
|
132
|
+
#
|
133
|
+
# * path [String] - URL of the target HTML file.
|
134
|
+
# * fields [Hash] -
|
135
|
+
#
|
136
|
+
# +:match+ - A *String* to match and choose a particular table(s) from multiple tables of a HTML page.
|
137
|
+
#
|
138
|
+
# +:order+ - An *Array* which would act as the user-defined order, to override the parsed *Daru::DataFrame*.
|
139
|
+
#
|
140
|
+
# +:index+ - An *Array* which would act as the user-defined index, to override the parsed *Daru::DataFrame*.
|
141
|
+
#
|
142
|
+
# +:name+ - A *String* that manually assigns a name to the scraped *Daru::DataFrame*, for user's preference.
|
143
|
+
#
|
144
|
+
# == Returns
|
145
|
+
# An Array of +Daru::DataFrame+s, with each dataframe corresponding to a
|
146
|
+
# HTML table on that webpage.
|
147
|
+
#
|
148
|
+
# == Usage
|
149
|
+
# dfs = Daru::DataFrame.from_html("http://www.moneycontrol.com/", match: "Sun Pharma")
|
150
|
+
# dfs.count
|
151
|
+
# # => 4
|
152
|
+
#
|
153
|
+
# dfs.first
|
154
|
+
# #
|
155
|
+
# # => <Daru::DataFrame(5x4)>
|
156
|
+
# # Company Price Change Value (Rs
|
157
|
+
# # 0 Sun Pharma 502.60 -65.05 2,117.87
|
158
|
+
# # 1 Reliance 1356.90 19.60 745.10
|
159
|
+
# # 2 Tech Mahin 379.45 -49.70 650.22
|
160
|
+
# # 3 ITC 315.85 6.75 621.12
|
161
|
+
# # 4 HDFC 1598.85 50.95 553.91
|
162
|
+
def from_html path, fields={}
|
163
|
+
Daru::IO.from_html path, fields
|
164
|
+
end
|
165
|
+
|
115
166
|
# Create DataFrame by specifying rows as an Array of Arrays or Array of
|
116
167
|
# Daru::Vector objects.
|
117
168
|
def rows source, opts={}
|
@@ -229,6 +280,17 @@ module Daru
|
|
229
280
|
# Default to *true*.
|
230
281
|
#
|
231
282
|
# == Usage
|
283
|
+
#
|
284
|
+
# df = Daru::DataFrame.new
|
285
|
+
# # =>
|
286
|
+
# # <Daru::DataFrame(0x0)>
|
287
|
+
# # Creates an empty DataFrame with no rows or columns.
|
288
|
+
#
|
289
|
+
# df = Daru::DataFrame.new({}, order: [:a, :b])
|
290
|
+
# #<Daru::DataFrame(0x2)>
|
291
|
+
# a b
|
292
|
+
# # Creates a DataFrame with no rows and columns :a and :b
|
293
|
+
#
|
232
294
|
# df = Daru::DataFrame.new({a: [1,2,3,4], b: [6,7,8,9]}, order: [:b, :a],
|
233
295
|
# index: [:a, :b, :c, :d], name: :spider_man)
|
234
296
|
#
|
@@ -239,26 +301,67 @@ module Daru
|
|
239
301
|
# # b 7 2
|
240
302
|
# # c 8 3
|
241
303
|
# # d 9 4
|
242
|
-
|
304
|
+
#
|
305
|
+
# df = Daru::DataFrame.new([[1,2,3,4],[6,7,8,9]], name: :bat_man)
|
306
|
+
#
|
307
|
+
# # =>
|
308
|
+
# # #<Daru::DataFrame: bat_man (4x2)>
|
309
|
+
# # 0 1
|
310
|
+
# # 0 1 6
|
311
|
+
# # 1 2 7
|
312
|
+
# # 2 3 8
|
313
|
+
# # 3 4 9
|
314
|
+
#
|
315
|
+
# # Dataframe having Index name
|
316
|
+
#
|
317
|
+
# df = Daru::DataFrame.new({a: [1,2,3,4], b: [6,7,8,9]}, order: [:b, :a],
|
318
|
+
# index: Daru::Index.new([:a, :b, :c, :d], name: 'idx_name'),
|
319
|
+
# name: :spider_man)
|
320
|
+
#
|
321
|
+
# # =>
|
322
|
+
# # <Daru::DataFrame:80766980 @name = spider_man @size = 4>
|
323
|
+
# # idx_name b a
|
324
|
+
# # a 6 1
|
325
|
+
# # b 7 2
|
326
|
+
# # c 8 3
|
327
|
+
# # d 9 4
|
328
|
+
#
|
329
|
+
#
|
330
|
+
# idx = Daru::Index.new [100, 99, 101, 1, 2], name: "s1"
|
331
|
+
# => #<Daru::Index(5): s1 {100, 99, 101, 1, 2}>
|
332
|
+
#
|
333
|
+
# df = Daru::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
|
334
|
+
# c: [11,22,33,44,55]},
|
335
|
+
# order: [:a, :b, :c],
|
336
|
+
# index: idx)
|
337
|
+
# # =>
|
338
|
+
# #<Daru::DataFrame(5x3)>
|
339
|
+
# # s1 a b c
|
340
|
+
# # 100 1 11 11
|
341
|
+
# # 99 2 12 22
|
342
|
+
# # 101 3 13 33
|
343
|
+
# # 1 4 14 44
|
344
|
+
# # 2 5 15 55
|
345
|
+
|
346
|
+
def initialize source={}, opts={} # rubocop:disable Metrics/MethodLength
|
243
347
|
vectors, index = opts[:order], opts[:index] # FIXME: just keyword arges after Ruby 2.1
|
244
348
|
@data = []
|
245
349
|
@name = opts[:name]
|
246
350
|
|
247
351
|
case source
|
248
|
-
when
|
249
|
-
|
250
|
-
@index = Index.coerce index
|
251
|
-
create_empty_vectors
|
352
|
+
when [], {}
|
353
|
+
create_empty_vectors(vectors, index)
|
252
354
|
when Array
|
253
355
|
initialize_from_array source, vectors, index, opts
|
254
356
|
when Hash
|
255
357
|
initialize_from_hash source, vectors, index, opts
|
358
|
+
when ->(s) { s.empty? } # TODO: likely want to remove this case
|
359
|
+
create_empty_vectors(vectors, index)
|
256
360
|
end
|
257
361
|
|
258
362
|
set_size
|
259
363
|
validate
|
260
364
|
update
|
261
|
-
self.plotting_library = Daru.plotting_library
|
262
365
|
end
|
263
366
|
|
264
367
|
def plotting_library= lib
|
@@ -271,11 +374,18 @@ module Daru
|
|
271
374
|
)
|
272
375
|
end
|
273
376
|
else
|
274
|
-
raise
|
377
|
+
raise ArgumentError, "Plotting library #{lib} not supported. "\
|
275
378
|
'Supported libraries are :nyaplot and :gruff'
|
276
379
|
end
|
277
380
|
end
|
278
381
|
|
382
|
+
# this method is overwritten: see Daru::DataFrame#plotting_library=
|
383
|
+
def plot(*args, **options, &b)
|
384
|
+
init_plotting_library
|
385
|
+
|
386
|
+
plot(*args, **options, &b)
|
387
|
+
end
|
388
|
+
|
279
389
|
# Access row or vector. Specify name of row/vector followed by axis(:row, :vector).
|
280
390
|
# Defaults to *:vector*. Use of this method is not recommended for accessing
|
281
391
|
# rows. Use df.row[:a] for accessing row with index ':a'.
|
@@ -285,7 +395,7 @@ module Daru
|
|
285
395
|
end
|
286
396
|
|
287
397
|
# Retrive rows by positions
|
288
|
-
# @param [Array<Integer>]
|
398
|
+
# @param [Array<Integer>] positions of rows to retrive
|
289
399
|
# @return [Daru::Vector, Daru::DataFrame] vector for single position and dataframe for multiple positions
|
290
400
|
# @example
|
291
401
|
# df = Daru::DataFrame.new({
|
@@ -303,19 +413,17 @@ module Daru
|
|
303
413
|
validate_positions(*positions, nrows)
|
304
414
|
|
305
415
|
if positions.is_a? Integer
|
306
|
-
|
307
|
-
|
416
|
+
row = get_rows_for([positions])
|
417
|
+
Daru::Vector.new row, index: @vectors
|
308
418
|
else
|
309
|
-
new_rows =
|
310
|
-
|
311
|
-
index: @index.at(*original_positions),
|
312
|
-
order: @vectors
|
419
|
+
new_rows = get_rows_for(original_positions)
|
420
|
+
Daru::DataFrame.new new_rows, index: @index.at(*original_positions), order: @vectors
|
313
421
|
end
|
314
422
|
end
|
315
423
|
|
316
424
|
# Set rows by positions
|
317
425
|
# @param [Array<Integer>] positions positions of rows to set
|
318
|
-
# @
|
426
|
+
# @param [Array, Daru::Vector] vector vector to be assigned
|
319
427
|
# @example
|
320
428
|
# df = Daru::DataFrame.new({
|
321
429
|
# a: [1, 2, 3],
|
@@ -348,7 +456,7 @@ module Daru
|
|
348
456
|
end
|
349
457
|
|
350
458
|
# Retrive vectors by positions
|
351
|
-
# @param [Array<Integer>]
|
459
|
+
# @param [Array<Integer>] positions of vectors to retrive
|
352
460
|
# @return [Daru::Vector, Daru::DataFrame] vector for single position and dataframe for multiple positions
|
353
461
|
# @example
|
354
462
|
# df = Daru::DataFrame.new({
|
@@ -432,13 +540,24 @@ module Daru
|
|
432
540
|
end
|
433
541
|
|
434
542
|
def add_row row, index=nil
|
435
|
-
self.row[index || @size] = row
|
543
|
+
self.row[*(index || @size)] = row
|
436
544
|
end
|
437
545
|
|
438
546
|
def add_vector n, vector
|
439
547
|
self[n] = vector
|
440
548
|
end
|
441
549
|
|
550
|
+
def insert_vector n, name, source
|
551
|
+
raise ArgumentError unless source.is_a? Array
|
552
|
+
vector = Daru::Vector.new(source, index: @index, name: @name)
|
553
|
+
@data << vector
|
554
|
+
@vectors = @vectors.add name
|
555
|
+
ordr = @vectors.dup.to_a
|
556
|
+
elmnt = ordr.pop
|
557
|
+
ordr = ordr.insert n, elmnt
|
558
|
+
self.order=ordr
|
559
|
+
end
|
560
|
+
|
442
561
|
# Access a row or set/create a row. Refer #[] and #[]= docs for details.
|
443
562
|
#
|
444
563
|
# == Usage
|
@@ -448,6 +567,20 @@ module Daru
|
|
448
567
|
Daru::Accessors::DataFrameByRow.new(self)
|
449
568
|
end
|
450
569
|
|
570
|
+
# Extract a dataframe given row indexes or positions
|
571
|
+
# @param keys [Array] can be positions (if by_position is true) or indexes (if by_position if false)
|
572
|
+
# @return [Daru::Dataframe]
|
573
|
+
def get_sub_dataframe(keys, by_position: true)
|
574
|
+
return Daru::DataFrame.new({}) if keys == []
|
575
|
+
|
576
|
+
keys = @index.pos(*keys) unless by_position
|
577
|
+
|
578
|
+
sub_df = row_at(*keys)
|
579
|
+
sub_df = sub_df.to_df.transpose if sub_df.is_a?(Daru::Vector)
|
580
|
+
|
581
|
+
sub_df
|
582
|
+
end
|
583
|
+
|
451
584
|
# Duplicate the DataFrame entirely.
|
452
585
|
#
|
453
586
|
# == Arguments
|
@@ -457,7 +590,7 @@ module Daru
|
|
457
590
|
def dup vectors_to_dup=nil
|
458
591
|
vectors_to_dup = @vectors.to_a unless vectors_to_dup
|
459
592
|
|
460
|
-
src = vectors_to_dup.map { |vec| @data[@vectors
|
593
|
+
src = vectors_to_dup.map { |vec| @data[@vectors.pos(vec)].dup }
|
461
594
|
new_order = Daru::Index.new(vectors_to_dup)
|
462
595
|
|
463
596
|
Daru::DataFrame.new src, order: new_order, index: @index.dup, name: @name, clone: true
|
@@ -506,8 +639,8 @@ module Daru
|
|
506
639
|
deprecate :dup_only_valid, :reject_values, 2016, 10
|
507
640
|
|
508
641
|
# Returns a dataframe in which rows with any of the mentioned values
|
509
|
-
#
|
510
|
-
# @param [Array]
|
642
|
+
# are ignored.
|
643
|
+
# @param [Array] values to reject to form the new dataframe
|
511
644
|
# @return [Daru::DataFrame] Data Frame with only rows which doesn't
|
512
645
|
# contain the mentioned values
|
513
646
|
# @example
|
@@ -544,7 +677,7 @@ module Daru
|
|
544
677
|
# b: [:a, :b, nil, Float::NAN, nil, 3, 5, 8],
|
545
678
|
# c: ['a', Float::NAN, 3, 4, 3, 5, nil, 7]
|
546
679
|
# }, index: 11..18)
|
547
|
-
# df
|
680
|
+
# df.replace_values nil, Float::NAN
|
548
681
|
# # => #<Daru::DataFrame(8x3)>
|
549
682
|
# # a b c
|
550
683
|
# # 11 1 a a
|
@@ -560,6 +693,89 @@ module Daru
|
|
560
693
|
self
|
561
694
|
end
|
562
695
|
|
696
|
+
# Rolling fillna
|
697
|
+
# replace all Float::NAN and NIL values with the preceeding or following value
|
698
|
+
#
|
699
|
+
# @param direction [Symbol] (:forward, :backward) whether replacement value is preceeding or following
|
700
|
+
#
|
701
|
+
# @example
|
702
|
+
# df = Daru::DataFrame.new({
|
703
|
+
# a: [1, 2, 3, nil, Float::NAN, nil, 1, 7],
|
704
|
+
# b: [:a, :b, nil, Float::NAN, nil, 3, 5, nil],
|
705
|
+
# c: ['a', Float::NAN, 3, 4, 3, 5, nil, 7]
|
706
|
+
# })
|
707
|
+
#
|
708
|
+
# => #<Daru::DataFrame(8x3)>
|
709
|
+
# a b c
|
710
|
+
# 0 1 a a
|
711
|
+
# 1 2 b NaN
|
712
|
+
# 2 3 nil 3
|
713
|
+
# 3 nil NaN 4
|
714
|
+
# 4 NaN nil 3
|
715
|
+
# 5 nil 3 5
|
716
|
+
# 6 1 5 nil
|
717
|
+
# 7 7 nil 7
|
718
|
+
#
|
719
|
+
# 2.3.3 :068 > df.rolling_fillna(:forward)
|
720
|
+
# => #<Daru::DataFrame(8x3)>
|
721
|
+
# a b c
|
722
|
+
# 0 1 a a
|
723
|
+
# 1 2 b a
|
724
|
+
# 2 3 b 3
|
725
|
+
# 3 3 b 4
|
726
|
+
# 4 3 b 3
|
727
|
+
# 5 3 3 5
|
728
|
+
# 6 1 5 5
|
729
|
+
# 7 7 5 7
|
730
|
+
#
|
731
|
+
def rolling_fillna!(direction=:forward)
|
732
|
+
@data.each { |vec| vec.rolling_fillna!(direction) }
|
733
|
+
self
|
734
|
+
end
|
735
|
+
|
736
|
+
def rolling_fillna(direction=:forward)
|
737
|
+
dup.rolling_fillna!(direction)
|
738
|
+
end
|
739
|
+
|
740
|
+
# Return unique rows by vector specified or all vectors
|
741
|
+
#
|
742
|
+
# @param vtrs [String][Symbol] vector names(s) that should be considered
|
743
|
+
#
|
744
|
+
# @example
|
745
|
+
#
|
746
|
+
# => #<Daru::DataFrame(6x2)>
|
747
|
+
# a b
|
748
|
+
# 0 1 a
|
749
|
+
# 1 2 b
|
750
|
+
# 2 3 c
|
751
|
+
# 3 4 d
|
752
|
+
# 2 3 c
|
753
|
+
# 3 4 f
|
754
|
+
#
|
755
|
+
# 2.3.3 :> df.unique
|
756
|
+
# => #<Daru::DataFrame(5x2)>
|
757
|
+
# a b
|
758
|
+
# 0 1 a
|
759
|
+
# 1 2 b
|
760
|
+
# 2 3 c
|
761
|
+
# 3 4 d
|
762
|
+
# 3 4 f
|
763
|
+
#
|
764
|
+
# 2.3.3 :> df.unique(:a)
|
765
|
+
# => #<Daru::DataFrame(5x2)>
|
766
|
+
# a b
|
767
|
+
# 0 1 a
|
768
|
+
# 1 2 b
|
769
|
+
# 2 3 c
|
770
|
+
# 3 4 d
|
771
|
+
#
|
772
|
+
def uniq(*vtrs)
|
773
|
+
vecs = vtrs.empty? ? vectors.to_a : Array(vtrs)
|
774
|
+
grouped = group_by(vecs)
|
775
|
+
indexes = grouped.groups.values.map { |v| v[0] }.sort
|
776
|
+
row[*indexes]
|
777
|
+
end
|
778
|
+
|
563
779
|
# Iterate over each index of the DataFrame.
|
564
780
|
def each_index &block
|
565
781
|
return to_enum(:each_index) unless block_given?
|
@@ -679,7 +895,7 @@ module Daru
|
|
679
895
|
# * +axis+ - The axis to map over. Can be :vector (or :column) or :row.
|
680
896
|
# Default to :vector.
|
681
897
|
def map! axis=:vector, &block
|
682
|
-
if
|
898
|
+
if %i[vector column].include?(axis)
|
683
899
|
map_vectors!(&block)
|
684
900
|
elsif axis == :row
|
685
901
|
map_rows!(&block)
|
@@ -807,6 +1023,18 @@ module Daru
|
|
807
1023
|
self
|
808
1024
|
end
|
809
1025
|
|
1026
|
+
def apply_method(method, keys: nil, by_position: true)
|
1027
|
+
df = keys ? get_sub_dataframe(keys, by_position: by_position) : self
|
1028
|
+
|
1029
|
+
case method
|
1030
|
+
when Symbol then df.send(method)
|
1031
|
+
when Proc then method.call(df)
|
1032
|
+
when Array then method.map(&:to_proc).map { |proc| proc.call(df) } # works with Array of both Symbol and/or Proc
|
1033
|
+
else raise
|
1034
|
+
end
|
1035
|
+
end
|
1036
|
+
alias :apply_method_on_sub_df :apply_method
|
1037
|
+
|
810
1038
|
# Retrieves a Daru::Vector, based on the result of calculation
|
811
1039
|
# performed on each row.
|
812
1040
|
def collect_rows &block
|
@@ -913,7 +1141,7 @@ module Daru
|
|
913
1141
|
|
914
1142
|
# creates a new vector with the data of a given field which the block returns true
|
915
1143
|
def filter_vector vec, &block
|
916
|
-
Daru::Vector.new
|
1144
|
+
Daru::Vector.new(each_row.select(&block).map { |row| row[vec] })
|
917
1145
|
end
|
918
1146
|
|
919
1147
|
# Iterates over each row and retains it in a new DataFrame if the block returns
|
@@ -934,9 +1162,9 @@ module Daru
|
|
934
1162
|
dup.tap { |df| df.keep_vector_if(&block) }
|
935
1163
|
end
|
936
1164
|
|
937
|
-
# Test each row with one or more tests.
|
938
|
-
#
|
939
|
-
#
|
1165
|
+
# Test each row with one or more tests.
|
1166
|
+
# @param tests [Proc] Each test is a Proc with the form
|
1167
|
+
# *Proc.new {|row| row[:age] > 0}*
|
940
1168
|
# The function returns an array with all errors.
|
941
1169
|
#
|
942
1170
|
# FIXME: description here is too sparse. As far as I can get,
|
@@ -1031,14 +1259,14 @@ module Daru
|
|
1031
1259
|
alias :vector_missing_values :missing_values_rows
|
1032
1260
|
|
1033
1261
|
def has_missing_data?
|
1034
|
-
|
1262
|
+
@data.any? { |vec| vec.include_values?(*Daru::MISSING_VALUES) }
|
1035
1263
|
end
|
1036
1264
|
alias :flawed? :has_missing_data?
|
1037
1265
|
deprecate :has_missing_data?, :include_values?, 2016, 10
|
1038
1266
|
deprecate :flawed?, :include_values?, 2016, 10
|
1039
1267
|
|
1040
1268
|
# Check if any of given values occur in the data frame
|
1041
|
-
# @param [Array]
|
1269
|
+
# @param [Array] values to check for
|
1042
1270
|
# @return [true, false] true if any of the given values occur in the
|
1043
1271
|
# dataframe, false otherwise
|
1044
1272
|
# @example
|
@@ -1119,7 +1347,7 @@ module Daru
|
|
1119
1347
|
# row[:a] < 3 and row[:b] == 'b'
|
1120
1348
|
# end #=> true
|
1121
1349
|
def any? axis=:vector, &block
|
1122
|
-
if
|
1350
|
+
if %i[vector column].include?(axis)
|
1123
1351
|
@data.any?(&block)
|
1124
1352
|
elsif axis == :row
|
1125
1353
|
each_row do |row|
|
@@ -1141,7 +1369,7 @@ module Daru
|
|
1141
1369
|
# row[:a] < 10
|
1142
1370
|
# end #=> true
|
1143
1371
|
def all? axis=:vector, &block
|
1144
|
-
if
|
1372
|
+
if %i[vector column].include?(axis)
|
1145
1373
|
@data.all?(&block)
|
1146
1374
|
elsif axis == :row
|
1147
1375
|
each_row.all?(&block)
|
@@ -1169,13 +1397,60 @@ module Daru
|
|
1169
1397
|
|
1170
1398
|
alias :last :tail
|
1171
1399
|
|
1172
|
-
#
|
1173
|
-
#
|
1174
|
-
|
1400
|
+
# Sum all numeric/specified vectors in the DataFrame.
|
1401
|
+
#
|
1402
|
+
# Returns a new vector that's a containing a sum of all numeric
|
1403
|
+
# or specified vectors of the DataFrame. By default, if the vector
|
1404
|
+
# contains a nil, the sum is nil.
|
1405
|
+
# With :skipnil argument set to true, nil values are assumed to be
|
1406
|
+
# 0 (zero) and the sum vector is returned.
|
1407
|
+
#
|
1408
|
+
# @param args [Array] List of vectors to sum. Default is nil in which case
|
1409
|
+
# all numeric vectors are summed.
|
1410
|
+
#
|
1411
|
+
# @option opts [Boolean] :skipnil Consider nils as 0. Default is false.
|
1412
|
+
#
|
1413
|
+
# @return Vector with sum of all vectors specified in the argument.
|
1414
|
+
# If vecs parameter is empty, sum all numeric vector.
|
1415
|
+
#
|
1416
|
+
# @example
|
1417
|
+
# df = Daru::DataFrame.new({
|
1418
|
+
# a: [1, 2, nil],
|
1419
|
+
# b: [2, 1, 3],
|
1420
|
+
# c: [1, 1, 1]
|
1421
|
+
# })
|
1422
|
+
# => #<Daru::DataFrame(3x3)>
|
1423
|
+
# a b c
|
1424
|
+
# 0 1 2 1
|
1425
|
+
# 1 2 1 1
|
1426
|
+
# 2 nil 3 1
|
1427
|
+
# df.vector_sum [:a, :c]
|
1428
|
+
# => #<Daru::Vector(3)>
|
1429
|
+
# 0 2
|
1430
|
+
# 1 3
|
1431
|
+
# 2 nil
|
1432
|
+
# df.vector_sum
|
1433
|
+
# => #<Daru::Vector(3)>
|
1434
|
+
# 0 4
|
1435
|
+
# 1 4
|
1436
|
+
# 2 nil
|
1437
|
+
# df.vector_sum skipnil: true
|
1438
|
+
# => #<Daru::Vector(3)>
|
1439
|
+
# c
|
1440
|
+
# 0 4
|
1441
|
+
# 1 4
|
1442
|
+
# 2 4
|
1443
|
+
#
|
1444
|
+
def vector_sum(*args)
|
1445
|
+
defaults = {vecs: nil, skipnil: false}
|
1446
|
+
options = args.last.is_a?(::Hash) ? args.pop : {}
|
1447
|
+
options = defaults.merge(options)
|
1448
|
+
vecs = args[0] || options[:vecs]
|
1449
|
+
skipnil = args[1] || options[:skipnil]
|
1450
|
+
|
1175
1451
|
vecs ||= numeric_vectors
|
1176
1452
|
sum = Daru::Vector.new [0]*@size, index: @index, name: @name, dtype: @dtype
|
1177
|
-
|
1178
|
-
vecs.inject(sum) { |memo, n| memo + self[n] }
|
1453
|
+
vecs.inject(sum) { |memo, n| self[n].add(memo, skipnil: skipnil) }
|
1179
1454
|
end
|
1180
1455
|
|
1181
1456
|
# Calculate mean of the rows of the dataframe.
|
@@ -1220,11 +1495,10 @@ module Daru
|
|
1220
1495
|
# # ["foo", "two", 3]=>[2, 4]}
|
1221
1496
|
def group_by *vectors
|
1222
1497
|
vectors.flatten!
|
1223
|
-
|
1224
|
-
|
1225
|
-
|
1226
|
-
|
1227
|
-
}
|
1498
|
+
missing = vectors - @vectors.to_a
|
1499
|
+
unless missing.empty?
|
1500
|
+
raise(ArgumentError, "Vector(s) missing: #{missing.join(', ')}")
|
1501
|
+
end
|
1228
1502
|
|
1229
1503
|
vectors = [@vectors.first] if vectors.empty?
|
1230
1504
|
|
@@ -1234,7 +1508,7 @@ module Daru
|
|
1234
1508
|
def reindex_vectors new_vectors
|
1235
1509
|
unless new_vectors.is_a?(Daru::Index)
|
1236
1510
|
raise ArgumentError, 'Must pass the new index of type Index or its '\
|
1237
|
-
"subclasses, not #{
|
1511
|
+
"subclasses, not #{new_vectors.class}"
|
1238
1512
|
end
|
1239
1513
|
|
1240
1514
|
cl = Daru::DataFrame.new({}, order: new_vectors, index: @index, name: @name)
|
@@ -1272,14 +1546,52 @@ module Daru
|
|
1272
1546
|
df
|
1273
1547
|
end
|
1274
1548
|
|
1549
|
+
module SetSingleIndexStrategy
|
1550
|
+
def self.uniq_size(df, col)
|
1551
|
+
df[col].uniq.size
|
1552
|
+
end
|
1553
|
+
|
1554
|
+
def self.new_index(df, col)
|
1555
|
+
Daru::Index.new(df[col].to_a)
|
1556
|
+
end
|
1557
|
+
|
1558
|
+
def self.delete_vector(df, col)
|
1559
|
+
df.delete_vector(col)
|
1560
|
+
end
|
1561
|
+
end
|
1562
|
+
|
1563
|
+
module SetMultiIndexStrategy
|
1564
|
+
def self.uniq_size(df, cols)
|
1565
|
+
df[*cols].uniq.size
|
1566
|
+
end
|
1567
|
+
|
1568
|
+
def self.new_index(df, cols)
|
1569
|
+
Daru::MultiIndex.from_arrays(df[*cols].map_vectors(&:to_a)).tap do |mi|
|
1570
|
+
mi.name = cols
|
1571
|
+
mi
|
1572
|
+
end
|
1573
|
+
end
|
1574
|
+
|
1575
|
+
def self.delete_vector(df, cols)
|
1576
|
+
df.delete_vectors(*cols)
|
1577
|
+
end
|
1578
|
+
end
|
1579
|
+
|
1275
1580
|
# Set a particular column as the new DF
|
1276
|
-
def set_index
|
1277
|
-
|
1278
|
-
|
1581
|
+
def set_index new_index_col, opts={}
|
1582
|
+
if new_index_col.respond_to?(:to_a)
|
1583
|
+
strategy = SetMultiIndexStrategy
|
1584
|
+
new_index_col = new_index_col.to_a
|
1585
|
+
else
|
1586
|
+
strategy = SetSingleIndexStrategy
|
1587
|
+
end
|
1279
1588
|
|
1280
|
-
|
1281
|
-
|
1589
|
+
uniq_size = strategy.uniq_size(self, new_index_col)
|
1590
|
+
raise ArgumentError, 'All elements in new index must be unique.' if
|
1591
|
+
@size != uniq_size
|
1282
1592
|
|
1593
|
+
self.index = strategy.new_index(self, new_index_col)
|
1594
|
+
strategy.delete_vector(self, new_index_col) unless opts[:keep]
|
1283
1595
|
self
|
1284
1596
|
end
|
1285
1597
|
|
@@ -1317,11 +1629,24 @@ module Daru
|
|
1317
1629
|
end
|
1318
1630
|
end
|
1319
1631
|
|
1632
|
+
def reset_index
|
1633
|
+
index_df = index.to_df
|
1634
|
+
names = index.name
|
1635
|
+
names = [names] unless names.instance_of?(Array)
|
1636
|
+
new_vectors = names + vectors.to_a
|
1637
|
+
self.index = index_df.index
|
1638
|
+
names.each do |name|
|
1639
|
+
self[name] = index_df[name]
|
1640
|
+
end
|
1641
|
+
self.order = new_vectors
|
1642
|
+
self
|
1643
|
+
end
|
1644
|
+
|
1320
1645
|
# Reassign index with a new index of type Daru::Index or any of its subclasses.
|
1321
1646
|
#
|
1322
1647
|
# @param [Daru::Index] idx New index object on which the rows of the dataframe
|
1323
1648
|
# are to be indexed.
|
1324
|
-
# @example
|
1649
|
+
# @example Reassigining index of a DataFrame
|
1325
1650
|
# df = Daru::DataFrame.new({a: [1,2,3,4], b: [11,22,33,44]})
|
1326
1651
|
# df.index.to_a #=> [0,1,2,3]
|
1327
1652
|
#
|
@@ -1337,7 +1662,7 @@ module Daru
|
|
1337
1662
|
|
1338
1663
|
# Reassign vectors with a new index of type Daru::Index or any of its subclasses.
|
1339
1664
|
#
|
1340
|
-
# @param [Daru::Index] idx The new index object on which the vectors are to
|
1665
|
+
# @param new_index [Daru::Index] idx The new index object on which the vectors are to
|
1341
1666
|
# be indexed. Must of the same size as ncols.
|
1342
1667
|
# @example Reassigning vectors of a DataFrame
|
1343
1668
|
# df = Daru::DataFrame.new({a: [1,2,3,4], b: [:a,:b,:c,:d], c: [11,22,33,44]})
|
@@ -1377,13 +1702,31 @@ module Daru
|
|
1377
1702
|
# df.rename_vectors :a => :alpha, :c => :gamma
|
1378
1703
|
# df.vectors.to_a #=> [:alpha, :b, :gamma]
|
1379
1704
|
def rename_vectors name_map
|
1380
|
-
existing_targets = name_map.
|
1705
|
+
existing_targets = name_map.reject { |k,v| k == v }.values & vectors.to_a
|
1381
1706
|
delete_vectors(*existing_targets)
|
1382
1707
|
|
1383
1708
|
new_names = vectors.to_a.map { |v| name_map[v] ? name_map[v] : v }
|
1384
1709
|
self.vectors = Daru::Index.new new_names
|
1385
1710
|
end
|
1386
1711
|
|
1712
|
+
# Renames the vectors and returns itself
|
1713
|
+
#
|
1714
|
+
# == Arguments
|
1715
|
+
#
|
1716
|
+
# * name_map - A hash where the keys are the exising vector names and
|
1717
|
+
# the values are the new names. If a vector is renamed
|
1718
|
+
# to a vector name that is already in use, the existing
|
1719
|
+
# one is overwritten.
|
1720
|
+
#
|
1721
|
+
# == Usage
|
1722
|
+
#
|
1723
|
+
# df = Daru::DataFrame.new({ a: [1,2,3,4], b: [:a,:b,:c,:d], c: [11,22,33,44] })
|
1724
|
+
# df.rename_vectors! :a => :alpha, :c => :gamma # df
|
1725
|
+
def rename_vectors! name_map
|
1726
|
+
rename_vectors(name_map)
|
1727
|
+
self
|
1728
|
+
end
|
1729
|
+
|
1387
1730
|
# Return the indexes of all the numeric vectors. Will include vectors with nils
|
1388
1731
|
# alongwith numbers.
|
1389
1732
|
def numeric_vectors
|
@@ -1408,27 +1751,24 @@ module Daru
|
|
1408
1751
|
Daru::DataFrame.new(arry, clone: cln, order: order, index: @index)
|
1409
1752
|
end
|
1410
1753
|
|
1411
|
-
# Generate a summary of this DataFrame
|
1412
|
-
|
1413
|
-
|
1414
|
-
|
1415
|
-
|
1416
|
-
|
1417
|
-
|
1418
|
-
|
1419
|
-
@vectors.each do |v|
|
1420
|
-
g.text "Element:[#{v}]"
|
1421
|
-
g.parse_element(self[v])
|
1422
|
-
end
|
1754
|
+
# Generate a summary of this DataFrame based on individual vectors in the DataFrame
|
1755
|
+
# @return [String] String containing the summary of the DataFrame
|
1756
|
+
def summary
|
1757
|
+
summary = "= #{name}"
|
1758
|
+
summary << "\n Number of rows: #{nrows}"
|
1759
|
+
@vectors.each do |v|
|
1760
|
+
summary << "\n Element:[#{v}]\n"
|
1761
|
+
summary << self[v].summary(1)
|
1423
1762
|
end
|
1763
|
+
summary
|
1424
1764
|
end
|
1425
1765
|
|
1426
1766
|
# Sorts a dataframe (ascending/descending) in the given pripority sequence of
|
1427
1767
|
# vectors, with or without a block.
|
1428
1768
|
#
|
1429
|
-
# @param
|
1769
|
+
# @param vector_order [Array] The order of vector names in which the DataFrame
|
1430
1770
|
# should be sorted.
|
1431
|
-
# @param [Hash] opts The options to sort with.
|
1771
|
+
# @param opts [Hash] opts The options to sort with.
|
1432
1772
|
# @option opts [TrueClass,FalseClass,Array] :ascending (true) Sort in ascending
|
1433
1773
|
# or descending order. Specify Array corresponding to *order* for multiple
|
1434
1774
|
# sort orders.
|
@@ -1597,12 +1937,11 @@ module Daru
|
|
1597
1937
|
|
1598
1938
|
new_fields = (@vectors.to_a + other_df.vectors.to_a)
|
1599
1939
|
new_fields = ArrayHelper.recode_repeated(new_fields)
|
1600
|
-
|
1601
1940
|
DataFrame.new({}, order: new_fields).tap do |df_new|
|
1602
1941
|
(0...nrows).each do |i|
|
1603
1942
|
df_new.add_row row[i].to_a + other_df.row[i].to_a
|
1604
1943
|
end
|
1605
|
-
|
1944
|
+
df_new.index = @index if @index == other_df.index
|
1606
1945
|
df_new.update
|
1607
1946
|
end
|
1608
1947
|
end
|
@@ -1783,7 +2122,9 @@ module Daru
|
|
1783
2122
|
end
|
1784
2123
|
|
1785
2124
|
# Convert to html for IRuby.
|
1786
|
-
def to_html
|
2125
|
+
def to_html(threshold=Daru.max_rows)
|
2126
|
+
table_thead = to_html_thead
|
2127
|
+
table_tbody = to_html_tbody(threshold)
|
1787
2128
|
path = if index.is_a?(MultiIndex)
|
1788
2129
|
File.expand_path('../iruby/templates/dataframe_mi.html.erb', __FILE__)
|
1789
2130
|
else
|
@@ -1792,8 +2133,29 @@ module Daru
|
|
1792
2133
|
ERB.new(File.read(path).strip).result(binding)
|
1793
2134
|
end
|
1794
2135
|
|
2136
|
+
def to_html_thead
|
2137
|
+
table_thead_path =
|
2138
|
+
if index.is_a?(MultiIndex)
|
2139
|
+
File.expand_path('../iruby/templates/dataframe_mi_thead.html.erb', __FILE__)
|
2140
|
+
else
|
2141
|
+
File.expand_path('../iruby/templates/dataframe_thead.html.erb', __FILE__)
|
2142
|
+
end
|
2143
|
+
ERB.new(File.read(table_thead_path).strip).result(binding)
|
2144
|
+
end
|
2145
|
+
|
2146
|
+
def to_html_tbody(threshold=Daru.max_rows)
|
2147
|
+
threshold ||= @size
|
2148
|
+
table_tbody_path =
|
2149
|
+
if index.is_a?(MultiIndex)
|
2150
|
+
File.expand_path('../iruby/templates/dataframe_mi_tbody.html.erb', __FILE__)
|
2151
|
+
else
|
2152
|
+
File.expand_path('../iruby/templates/dataframe_tbody.html.erb', __FILE__)
|
2153
|
+
end
|
2154
|
+
ERB.new(File.read(table_tbody_path).strip).result(binding)
|
2155
|
+
end
|
2156
|
+
|
1795
2157
|
def to_s
|
1796
|
-
|
2158
|
+
"#<#{self.class}#{': ' + @name.to_s if @name}(#{nrows}x#{ncols})>"
|
1797
2159
|
end
|
1798
2160
|
|
1799
2161
|
# Method for updating the metadata (i.e. missing value positions) of the
|
@@ -1815,7 +2177,7 @@ module Daru
|
|
1815
2177
|
|
1816
2178
|
# Write this DataFrame to a CSV file.
|
1817
2179
|
#
|
1818
|
-
# ==
|
2180
|
+
# == Arguments
|
1819
2181
|
#
|
1820
2182
|
# * filename - Path of CSV file where the DataFrame is to be saved.
|
1821
2183
|
#
|
@@ -1899,15 +2261,15 @@ module Daru
|
|
1899
2261
|
end
|
1900
2262
|
|
1901
2263
|
# Pretty print in a nice table format for the command line (irb/pry/iruby)
|
1902
|
-
def inspect spacing=
|
1903
|
-
row_headers = index.is_a?(MultiIndex) ? index.sparse_tuples : index.to_a
|
2264
|
+
def inspect spacing=Daru.spacing, threshold=Daru.max_rows
|
1904
2265
|
name_part = @name ? ": #{@name} " : ''
|
2266
|
+
spacing = [headers.to_a.map(&:length).max, spacing].max
|
1905
2267
|
|
1906
|
-
"#<#{self.class}#{name_part}(#{nrows}x#{ncols})
|
2268
|
+
"#<#{self.class}#{name_part}(#{nrows}x#{ncols})>#{$INPUT_RECORD_SEPARATOR}" +
|
1907
2269
|
Formatters::Table.format(
|
1908
2270
|
each_row.lazy,
|
1909
2271
|
row_headers: row_headers,
|
1910
|
-
headers:
|
2272
|
+
headers: headers,
|
1911
2273
|
threshold: threshold,
|
1912
2274
|
spacing: spacing
|
1913
2275
|
)
|
@@ -1927,7 +2289,7 @@ module Daru
|
|
1927
2289
|
end
|
1928
2290
|
|
1929
2291
|
# Converts the specified non category type vectors to category type vectors
|
1930
|
-
# @param [Array]
|
2292
|
+
# @param [Array] names of non category type vectors to be converted
|
1931
2293
|
# @return [Daru::DataFrame] data frame in which specified vectors have been
|
1932
2294
|
# converted to category type
|
1933
2295
|
# @example
|
@@ -1992,7 +2354,7 @@ module Daru
|
|
1992
2354
|
# # 2 3]
|
1993
2355
|
def split_by_category cat_name
|
1994
2356
|
cat_dv = self[cat_name]
|
1995
|
-
raise
|
2357
|
+
raise ArgumentError, "#{cat_name} is not a category vector" unless
|
1996
2358
|
cat_dv.category?
|
1997
2359
|
|
1998
2360
|
cat_dv.categories.map do |cat|
|
@@ -2002,8 +2364,128 @@ module Daru
|
|
2002
2364
|
end
|
2003
2365
|
end
|
2004
2366
|
|
2367
|
+
# @param indexes [Array] index(s) at which row tuples are retrieved
|
2368
|
+
# @return [Array] returns array of row tuples at given index(s)
|
2369
|
+
# @example Using Daru::Index
|
2370
|
+
# df = Daru::DataFrame.new({
|
2371
|
+
# a: [1, 2, 3],
|
2372
|
+
# b: ['a', 'a', 'b']
|
2373
|
+
# })
|
2374
|
+
#
|
2375
|
+
# df.access_row_tuples_by_indexs(1,2)
|
2376
|
+
# # => [[2, "a"], [3, "b"]]
|
2377
|
+
#
|
2378
|
+
# df.index = Daru::Index.new([:one,:two,:three])
|
2379
|
+
# df.access_row_tuples_by_indexs(:one,:three)
|
2380
|
+
# # => [[1, "a"], [3, "b"]]
|
2381
|
+
#
|
2382
|
+
# @example Using Daru::MultiIndex
|
2383
|
+
# mi_idx = Daru::MultiIndex.from_tuples [
|
2384
|
+
# [:a,:one,:bar],
|
2385
|
+
# [:a,:one,:baz],
|
2386
|
+
# [:b,:two,:bar],
|
2387
|
+
# [:a,:two,:baz],
|
2388
|
+
# ]
|
2389
|
+
# df_mi = Daru::DataFrame.new({
|
2390
|
+
# a: 1..4,
|
2391
|
+
# b: 'a'..'d'
|
2392
|
+
# }, index: mi_idx )
|
2393
|
+
#
|
2394
|
+
# df_mi.access_row_tuples_by_indexs(:b, :two, :bar)
|
2395
|
+
# # => [[3, "c"]]
|
2396
|
+
# df_mi.access_row_tuples_by_indexs(:a)
|
2397
|
+
# # => [[1, "a"], [2, "b"], [4, "d"]]
|
2398
|
+
def access_row_tuples_by_indexs *indexes
|
2399
|
+
return get_sub_dataframe(indexes, by_position: false).map_rows(&:to_a) if
|
2400
|
+
@index.is_a?(Daru::MultiIndex)
|
2401
|
+
positions = @index.pos(*indexes)
|
2402
|
+
if positions.is_a? Numeric
|
2403
|
+
row = get_rows_for([positions])
|
2404
|
+
row.first.is_a?(Array) ? row : [row]
|
2405
|
+
else
|
2406
|
+
new_rows = get_rows_for(indexes, by_position: false)
|
2407
|
+
indexes.map { |index| new_rows.map { |r| r[index] } }
|
2408
|
+
end
|
2409
|
+
end
|
2410
|
+
|
2411
|
+
# Function to use for aggregating the data.
|
2412
|
+
#
|
2413
|
+
# @param options [Hash] options for column, you want in resultant dataframe
|
2414
|
+
#
|
2415
|
+
# @return [Daru::DataFrame]
|
2416
|
+
#
|
2417
|
+
# @example
|
2418
|
+
# df = Daru::DataFrame.new(
|
2419
|
+
# {col: [:a, :b, :c, :d, :e], num: [52,12,07,17,01]})
|
2420
|
+
# => #<Daru::DataFrame(5x2)>
|
2421
|
+
# col num
|
2422
|
+
# 0 a 52
|
2423
|
+
# 1 b 12
|
2424
|
+
# 2 c 7
|
2425
|
+
# 3 d 17
|
2426
|
+
# 4 e 1
|
2427
|
+
#
|
2428
|
+
# df.aggregate(num_100_times: ->(df) { (df.num*100).first })
|
2429
|
+
# => #<Daru::DataFrame(5x1)>
|
2430
|
+
# num_100_ti
|
2431
|
+
# 0 5200
|
2432
|
+
# 1 1200
|
2433
|
+
# 2 700
|
2434
|
+
# 3 1700
|
2435
|
+
# 4 100
|
2436
|
+
#
|
2437
|
+
# When we have duplicate index :
|
2438
|
+
#
|
2439
|
+
# idx = Daru::CategoricalIndex.new [:a, :b, :a, :a, :c]
|
2440
|
+
# df = Daru::DataFrame.new({num: [52,12,07,17,01]}, index: idx)
|
2441
|
+
# => #<Daru::DataFrame(5x1)>
|
2442
|
+
# num
|
2443
|
+
# a 52
|
2444
|
+
# b 12
|
2445
|
+
# a 7
|
2446
|
+
# a 17
|
2447
|
+
# c 1
|
2448
|
+
#
|
2449
|
+
# df.aggregate(num: :mean)
|
2450
|
+
# => #<Daru::DataFrame(3x1)>
|
2451
|
+
# num
|
2452
|
+
# a 25.3333333
|
2453
|
+
# b 12
|
2454
|
+
# c 1
|
2455
|
+
#
|
2456
|
+
# Note: `GroupBy` class `aggregate` method uses this `aggregate` method
|
2457
|
+
# internally.
|
2458
|
+
def aggregate(options={}, multi_index_level=-1)
|
2459
|
+
if block_given?
|
2460
|
+
positions_tuples, new_index = yield(@index) # note: use of yield is private for now
|
2461
|
+
else
|
2462
|
+
positions_tuples, new_index = group_index_for_aggregation(@index, multi_index_level)
|
2463
|
+
end
|
2464
|
+
|
2465
|
+
colmn_value = aggregate_by_positions_tuples(options, positions_tuples)
|
2466
|
+
|
2467
|
+
Daru::DataFrame.new(colmn_value, index: new_index, order: options.keys)
|
2468
|
+
end
|
2469
|
+
|
2470
|
+
def group_by_and_aggregate(*group_by_keys, **aggregation_map)
|
2471
|
+
group_by(*group_by_keys).aggregate(aggregation_map)
|
2472
|
+
end
|
2473
|
+
|
2005
2474
|
private
|
2006
2475
|
|
2476
|
+
# Will lazily load the plotting library being used for this dataframe
|
2477
|
+
def init_plotting_library
|
2478
|
+
self.plotting_library = Daru.plotting_library
|
2479
|
+
end
|
2480
|
+
|
2481
|
+
def headers
|
2482
|
+
Daru::Index.new(Array(index.name) + @vectors.to_a)
|
2483
|
+
end
|
2484
|
+
|
2485
|
+
def row_headers
|
2486
|
+
index.is_a?(MultiIndex) ? index.sparse_tuples : index.to_a
|
2487
|
+
end
|
2488
|
+
|
2007
2489
|
def convert_categorical_vectors names
|
2008
2490
|
names.map do |n|
|
2009
2491
|
next unless self[n].category?
|
@@ -2034,7 +2516,7 @@ module Daru
|
|
2034
2516
|
end
|
2035
2517
|
|
2036
2518
|
def dispatch_to_axis(axis, method, *args, &block)
|
2037
|
-
if
|
2519
|
+
if %i[vector column].include?(axis)
|
2038
2520
|
send("#{method}_vector", *args, &block)
|
2039
2521
|
elsif axis == :row
|
2040
2522
|
send("#{method}_row", *args, &block)
|
@@ -2044,7 +2526,7 @@ module Daru
|
|
2044
2526
|
end
|
2045
2527
|
|
2046
2528
|
def dispatch_to_axis_pl(axis, method, *args, &block)
|
2047
|
-
if
|
2529
|
+
if %i[vector column].include?(axis)
|
2048
2530
|
send("#{method}_vectors", *args, &block)
|
2049
2531
|
elsif axis == :row
|
2050
2532
|
send("#{method}_rows", *args, &block)
|
@@ -2053,7 +2535,7 @@ module Daru
|
|
2053
2535
|
end
|
2054
2536
|
end
|
2055
2537
|
|
2056
|
-
AXES = [
|
2538
|
+
AXES = %i[row vector].freeze
|
2057
2539
|
|
2058
2540
|
def extract_axis names, default=:vector
|
2059
2541
|
if AXES.include?(names.last)
|
@@ -2065,7 +2547,7 @@ module Daru
|
|
2065
2547
|
|
2066
2548
|
def access_vector *names
|
2067
2549
|
if names.first.is_a?(Range)
|
2068
|
-
dup(@vectors
|
2550
|
+
dup(@vectors.subset(names.first))
|
2069
2551
|
elsif @vectors.is_a?(MultiIndex)
|
2070
2552
|
access_vector_multi_index(*names)
|
2071
2553
|
else
|
@@ -2087,14 +2569,16 @@ module Daru
|
|
2087
2569
|
|
2088
2570
|
def access_vector_single_index *names
|
2089
2571
|
if names.count < 2
|
2090
|
-
|
2091
|
-
|
2572
|
+
begin
|
2573
|
+
pos = @vectors.is_a?(Daru::DateTimeIndex) ? @vectors[names.first] : @vectors.pos(names.first)
|
2574
|
+
rescue IndexError
|
2575
|
+
raise IndexError, "Specified vector #{names.first} does not exist"
|
2576
|
+
end
|
2092
2577
|
return @data[pos] if pos.is_a?(Numeric)
|
2093
|
-
|
2094
2578
|
names = pos
|
2095
2579
|
end
|
2096
2580
|
|
2097
|
-
new_vectors = names.map { |name| [name, @data[@vectors
|
2581
|
+
new_vectors = names.map { |name| [name, @data[@vectors.pos(name)]] }.to_h
|
2098
2582
|
|
2099
2583
|
order = names.is_a?(Array) ? Daru::Index.new(names) : names
|
2100
2584
|
Daru::DataFrame.new(new_vectors, order: order,
|
@@ -2105,19 +2589,30 @@ module Daru
|
|
2105
2589
|
positions = @index.pos(*indexes)
|
2106
2590
|
|
2107
2591
|
if positions.is_a? Numeric
|
2108
|
-
|
2109
|
-
|
2110
|
-
name: indexes.first
|
2592
|
+
row = get_rows_for([positions])
|
2593
|
+
Daru::Vector.new row, index: @vectors, name: indexes.first
|
2111
2594
|
else
|
2112
|
-
new_rows =
|
2113
|
-
|
2114
|
-
index: @index.subset(*indexes),
|
2115
|
-
order: @vectors
|
2595
|
+
new_rows = get_rows_for(indexes, by_position: false)
|
2596
|
+
Daru::DataFrame.new new_rows, index: @index.subset(*indexes), order: @vectors
|
2116
2597
|
end
|
2117
2598
|
end
|
2118
2599
|
|
2119
|
-
|
2120
|
-
|
2600
|
+
# @param keys [Array] can be an array of positions (if by_position is true) or indexes (if by_position if false)
|
2601
|
+
# because of coercion by Daru::Vector#at and Daru::Vector#[], can return either an Array of
|
2602
|
+
# values (representing a row) or an array of Vectors (that can be seen as rows)
|
2603
|
+
def get_rows_for(keys, by_position: true)
|
2604
|
+
raise unless keys.is_a?(Array)
|
2605
|
+
|
2606
|
+
if by_position
|
2607
|
+
pos = keys
|
2608
|
+
@data.map { |vector| vector.at(*pos) }
|
2609
|
+
else
|
2610
|
+
# TODO: for now (2018-07-27), it is different than using
|
2611
|
+
# get_rows_for(@index.pos(*keys))
|
2612
|
+
# because Daru::Vector#at and Daru::Vector#[] don't handle Daru::MultiIndex the same way
|
2613
|
+
indexes = keys
|
2614
|
+
@data.map { |vec| vec[*indexes] }
|
2615
|
+
end
|
2121
2616
|
end
|
2122
2617
|
|
2123
2618
|
def insert_or_modify_vector name, vector
|
@@ -2126,7 +2621,7 @@ module Daru
|
|
2126
2621
|
if @index.empty?
|
2127
2622
|
insert_vector_in_empty name, vector
|
2128
2623
|
else
|
2129
|
-
vec =
|
2624
|
+
vec = prepare_for_insert name, vector
|
2130
2625
|
|
2131
2626
|
assign_or_add_vector name, vec
|
2132
2627
|
end
|
@@ -2173,25 +2668,35 @@ module Daru
|
|
2173
2668
|
@data.map! { |v| v.empty? ? v.reindex(@index) : v }
|
2174
2669
|
end
|
2175
2670
|
|
2176
|
-
def
|
2177
|
-
if
|
2178
|
-
|
2179
|
-
|
2180
|
-
|
2181
|
-
Daru::Vector.new([], name: coerce_name(name), index: @index).tap { |v|
|
2182
|
-
@index.each do |idx|
|
2183
|
-
v[idx] = vector.index.include?(idx) ? vector[idx] : nil
|
2184
|
-
end
|
2185
|
-
}
|
2671
|
+
def prepare_for_insert name, arg
|
2672
|
+
if arg.is_a? Daru::Vector
|
2673
|
+
prepare_vector_for_insert name, arg
|
2674
|
+
elsif arg.respond_to?(:to_a)
|
2675
|
+
prepare_enum_for_insert name, arg
|
2186
2676
|
else
|
2187
|
-
|
2188
|
-
|
2189
|
-
|
2190
|
-
|
2677
|
+
prepare_value_for_insert name, arg
|
2678
|
+
end
|
2679
|
+
end
|
2680
|
+
|
2681
|
+
def prepare_vector_for_insert name, vector
|
2682
|
+
# so that index-by-index assignment is avoided when possible.
|
2683
|
+
return vector.dup if vector.index == @index
|
2684
|
+
Daru::Vector.new([], name: coerce_name(name), index: @index).tap { |v|
|
2685
|
+
@index.each do |idx|
|
2686
|
+
v[idx] = vector.index.include?(idx) ? vector[idx] : nil
|
2191
2687
|
end
|
2688
|
+
}
|
2689
|
+
end
|
2192
2690
|
|
2193
|
-
|
2691
|
+
def prepare_enum_for_insert name, enum
|
2692
|
+
if @size != enum.size
|
2693
|
+
raise "Specified vector of length #{enum.size} cannot be inserted in DataFrame of size #{@size}"
|
2194
2694
|
end
|
2695
|
+
Daru::Vector.new(enum, name: coerce_name(name), index: @index)
|
2696
|
+
end
|
2697
|
+
|
2698
|
+
def prepare_value_for_insert name, value
|
2699
|
+
Daru::Vector.new(Array(value) * @size, name: coerce_name(name), index: @index)
|
2195
2700
|
end
|
2196
2701
|
|
2197
2702
|
def insert_or_modify_row indexes, vector
|
@@ -2208,7 +2713,10 @@ module Daru
|
|
2208
2713
|
set_size
|
2209
2714
|
end
|
2210
2715
|
|
2211
|
-
def create_empty_vectors
|
2716
|
+
def create_empty_vectors(vectors, index)
|
2717
|
+
@vectors = Index.coerce vectors
|
2718
|
+
@index = Index.coerce index
|
2719
|
+
|
2212
2720
|
@data = @vectors.map do |name|
|
2213
2721
|
Daru::Vector.new([], name: coerce_name(name), index: @index)
|
2214
2722
|
end
|
@@ -2250,7 +2758,7 @@ module Daru
|
|
2250
2758
|
end
|
2251
2759
|
|
2252
2760
|
def create_vectors_index_with vectors, source
|
2253
|
-
vectors = source.keys
|
2761
|
+
vectors = source.keys if vectors.nil?
|
2254
2762
|
|
2255
2763
|
@vectors =
|
2256
2764
|
if vectors.is_a?(Index) || vectors.is_a?(MultiIndex)
|
@@ -2276,8 +2784,10 @@ module Daru
|
|
2276
2784
|
|
2277
2785
|
case source.first
|
2278
2786
|
when Array
|
2787
|
+
vectors ||= (0..source.size-1).to_a
|
2279
2788
|
initialize_from_array_of_arrays source, vectors, index, opts
|
2280
2789
|
when Vector
|
2790
|
+
vectors ||= (0..source.size-1).to_a
|
2281
2791
|
initialize_from_array_of_vectors source, vectors, index, opts
|
2282
2792
|
when Hash
|
2283
2793
|
initialize_from_array_of_hashes source, vectors, index, opts
|
@@ -2295,9 +2805,7 @@ module Daru
|
|
2295
2805
|
@index = Index.coerce(index || source[0].size)
|
2296
2806
|
@vectors = Index.coerce(vectors)
|
2297
2807
|
|
2298
|
-
|
2299
|
-
Daru::Vector.new(source[idx], index: @index, name: vectors[idx])
|
2300
|
-
end
|
2808
|
+
update_data source, vectors
|
2301
2809
|
end
|
2302
2810
|
|
2303
2811
|
def initialize_from_array_of_vectors source, vectors, index, opts
|
@@ -2528,7 +3036,6 @@ module Daru
|
|
2528
3036
|
|
2529
3037
|
# Raises IndexError when one of the positions is not a valid position
|
2530
3038
|
def validate_positions *positions, size
|
2531
|
-
positions = [positions] if positions.is_a? Integer
|
2532
3039
|
positions.each do |pos|
|
2533
3040
|
raise IndexError, "#{pos} is not a valid position." if pos >= size
|
2534
3041
|
end
|
@@ -2546,6 +3053,73 @@ module Daru
|
|
2546
3053
|
end
|
2547
3054
|
end
|
2548
3055
|
|
3056
|
+
def update_data source, vectors
|
3057
|
+
@data = @vectors.each_with_index.map do |_vec, idx|
|
3058
|
+
Daru::Vector.new(source[idx], index: @index, name: vectors[idx])
|
3059
|
+
end
|
3060
|
+
end
|
3061
|
+
|
3062
|
+
def aggregate_by_positions_tuples(options, positions_tuples)
|
3063
|
+
agg_over_vectors_only, options = cast_aggregation_options(options)
|
3064
|
+
|
3065
|
+
if agg_over_vectors_only
|
3066
|
+
options.map do |vect_name, method|
|
3067
|
+
vect = self[vect_name]
|
3068
|
+
|
3069
|
+
positions_tuples.map do |positions|
|
3070
|
+
vect.apply_method_on_sub_vector(method, keys: positions)
|
3071
|
+
end
|
3072
|
+
end
|
3073
|
+
else
|
3074
|
+
methods = options.values
|
3075
|
+
|
3076
|
+
# note: because we aggregate over rows, we don't have to re-get sub-dfs for each method (which is expensive)
|
3077
|
+
rows = positions_tuples.map do |positions|
|
3078
|
+
apply_method_on_sub_df(methods, keys: positions)
|
3079
|
+
end
|
3080
|
+
|
3081
|
+
rows.transpose
|
3082
|
+
end
|
3083
|
+
end
|
3084
|
+
|
3085
|
+
# convert operations over sub-vectors to operations over sub-dfs when it improves perf
|
3086
|
+
# note: we don't always "cast" because aggregation over a single vector / a few vector is faster
|
3087
|
+
# than aggregation over (sub-)dfs
|
3088
|
+
def cast_aggregation_options(options)
|
3089
|
+
vects, non_vects = options.keys.partition { |k| @vectors.include?(k) }
|
3090
|
+
|
3091
|
+
over_vectors = true
|
3092
|
+
|
3093
|
+
if non_vects.any?
|
3094
|
+
options = options.clone
|
3095
|
+
|
3096
|
+
vects.each do |name|
|
3097
|
+
proc_on_vect = options[name].to_proc
|
3098
|
+
options[name] = ->(sub_df) { proc_on_vect.call(sub_df[name]) }
|
3099
|
+
end
|
3100
|
+
|
3101
|
+
over_vectors = false
|
3102
|
+
end
|
3103
|
+
|
3104
|
+
[over_vectors, options]
|
3105
|
+
end
|
3106
|
+
|
3107
|
+
def group_index_for_aggregation(index, multi_index_level=-1)
|
3108
|
+
case index
|
3109
|
+
when Daru::MultiIndex
|
3110
|
+
groups_by_pos = Daru::Core::GroupBy.get_positions_group_for_aggregation(index, multi_index_level)
|
3111
|
+
|
3112
|
+
new_index = Daru::MultiIndex.from_tuples(groups_by_pos.keys).coerce_index
|
3113
|
+
pos_tuples = groups_by_pos.values
|
3114
|
+
when Daru::Index, Daru::CategoricalIndex
|
3115
|
+
new_index = Array(index).uniq
|
3116
|
+
pos_tuples = new_index.map { |idx| [*index.pos(idx)] }
|
3117
|
+
else raise
|
3118
|
+
end
|
3119
|
+
|
3120
|
+
[pos_tuples, new_index]
|
3121
|
+
end
|
3122
|
+
|
2549
3123
|
# coerce ranges, integers and array in appropriate ways
|
2550
3124
|
def coerce_positions *positions, size
|
2551
3125
|
if positions.size == 1
|
@@ -2555,7 +3129,7 @@ module Daru
|
|
2555
3129
|
when Range
|
2556
3130
|
size.times.to_a[positions.first]
|
2557
3131
|
else
|
2558
|
-
raise ArgumentError, '
|
3132
|
+
raise ArgumentError, 'Unknown position type.'
|
2559
3133
|
end
|
2560
3134
|
else
|
2561
3135
|
positions
|