red_amber 0.2.3 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +111 -48
- data/CHANGELOG.md +90 -1
- data/Gemfile +1 -0
- data/README.md +42 -25
- data/benchmark/basic.yml +11 -4
- data/benchmark/combine.yml +3 -4
- data/benchmark/dataframe.yml +62 -0
- data/benchmark/group.yml +7 -1
- data/benchmark/reshape.yml +6 -2
- data/benchmark/vector.yml +60 -0
- data/doc/DataFrame.md +3 -0
- data/doc/Vector.md +88 -0
- data/lib/red_amber/data_frame.rb +161 -46
- data/lib/red_amber/data_frame_combinable.rb +304 -101
- data/lib/red_amber/data_frame_displayable.rb +4 -4
- data/lib/red_amber/data_frame_indexable.rb +2 -2
- data/lib/red_amber/data_frame_loadsave.rb +4 -1
- data/lib/red_amber/data_frame_reshaping.rb +35 -10
- data/lib/red_amber/data_frame_selectable.rb +221 -116
- data/lib/red_amber/data_frame_variable_operation.rb +146 -82
- data/lib/red_amber/group.rb +16 -7
- data/lib/red_amber/helper.rb +53 -31
- data/lib/red_amber/refinements.rb +199 -0
- data/lib/red_amber/vector.rb +55 -52
- data/lib/red_amber/vector_functions.rb +23 -75
- data/lib/red_amber/vector_selectable.rb +116 -69
- data/lib/red_amber/vector_updatable.rb +136 -7
- data/lib/red_amber/version.rb +1 -1
- data/lib/red_amber.rb +2 -0
- data/red_amber.gemspec +3 -2
- metadata +11 -8
@@ -0,0 +1,60 @@
|
|
1
|
+
loop_count: 10
|
2
|
+
|
3
|
+
contexts:
|
4
|
+
- name: HEAD
|
5
|
+
prelude: |
|
6
|
+
$LOAD_PATH.unshift(File.expand_path('lib'))
|
7
|
+
- name: 0.2.0
|
8
|
+
gems:
|
9
|
+
red_amber: 0.2.0
|
10
|
+
|
11
|
+
prelude: |
|
12
|
+
require 'red_amber'
|
13
|
+
include RedAmber
|
14
|
+
require 'datasets-arrow'
|
15
|
+
|
16
|
+
ds = Datasets::Rdatasets.new('nycflights13', 'flights')
|
17
|
+
flights = RedAmber::DataFrame.new(ds.to_arrow)
|
18
|
+
df = flights.slice { flights[:month] <= 6 }
|
19
|
+
|
20
|
+
tailnum_vector = df[:tailnum]
|
21
|
+
distance_vector = df[:distance]
|
22
|
+
|
23
|
+
strings = tailnum_vector.to_a
|
24
|
+
arrow_array = tailnum_vector.data
|
25
|
+
integers = df[:dep_delay].to_a
|
26
|
+
boolean_vector = df[:air_time].is_nil
|
27
|
+
index_vector = Vector.new(0...boolean_vector.size).filter(boolean_vector)
|
28
|
+
replacer = index_vector.data.map(&:to_s)
|
29
|
+
booleans = boolean_vector.to_a
|
30
|
+
|
31
|
+
benchmark:
|
32
|
+
'V01: Vector.new from integer Array': |
|
33
|
+
Vector.new(integers)
|
34
|
+
|
35
|
+
'V02: Vector.new from string Array': |
|
36
|
+
Vector.new(strings)
|
37
|
+
|
38
|
+
'V03: Vector.new from boolean Vector': |
|
39
|
+
Vector.new(boolean_vector)
|
40
|
+
|
41
|
+
'V04: Vector#sum': |
|
42
|
+
distance_vector.mean
|
43
|
+
|
44
|
+
'V05: Vector#*': |
|
45
|
+
distance_vector * 1.852
|
46
|
+
|
47
|
+
'V06: Vector#[booleans]': |
|
48
|
+
tailnum_vector[booleans]
|
49
|
+
|
50
|
+
'V07: Vector#[boolean_vector]': |
|
51
|
+
tailnum_vector[boolean_vector]
|
52
|
+
|
53
|
+
'V08: Vector#[index_vector]': |
|
54
|
+
tailnum_vector[index_vector]
|
55
|
+
|
56
|
+
'V09: Vector#replace': |
|
57
|
+
tailnum_vector.replace(booleans, replacer)
|
58
|
+
|
59
|
+
'V10: Vector#replace with broad casting': |
|
60
|
+
tailnum_vector.replace(booleans, 'x')
|
data/doc/DataFrame.md
CHANGED
@@ -1302,7 +1302,10 @@ When the option `keep_key: true` used, the column `key` will be preserved.
|
|
1302
1302
|
- `join_keys` are keys shared by self and other to match with them.
|
1303
1303
|
- If `join_keys` are empty, common keys in self and other are chosen (natural join).
|
1304
1304
|
- If (common keys) > `join_keys`, duplicated keys are renamed by `suffix`.
|
1305
|
+
- If you want to match the columns with different names,
|
1306
|
+
use Hash for `join_keys` such as `{ left: :KEY1, right: KEY2}`.
|
1305
1307
|
|
1308
|
+
These are dataframes to use in the examples of joins.
|
1306
1309
|
```ruby
|
1307
1310
|
df = DataFrame.new(
|
1308
1311
|
KEY: %w[A B C],
|
data/doc/Vector.md
CHANGED
@@ -513,3 +513,91 @@ vector.shift(fill: Float::NAN)
|
|
513
513
|
#<RedAmber::Vector(:double, size=5):0x0000000000011d3c>
|
514
514
|
[NaN, 1.0, 2.0, 3.0, 4.0]
|
515
515
|
```
|
516
|
+
|
517
|
+
### `split_to_columns(sep = ' ', limit = 0)`
|
518
|
+
|
519
|
+
Split string type Vector with any ASCII whitespace as separator.
|
520
|
+
Returns an Array of Vectors.
|
521
|
+
|
522
|
+
```ruby
|
523
|
+
vector = Vector.new(['a b', 'c d', 'e f'])
|
524
|
+
vector.split_to_columns
|
525
|
+
|
526
|
+
#=>
|
527
|
+
[#<RedAmber::Vector(:string, size=3):0x00000000000363a8>
|
528
|
+
["a", "c", "e"]
|
529
|
+
,
|
530
|
+
#<RedAmber::Vector(:string, size=3):0x00000000000363bc>
|
531
|
+
["b", "d", "f"]
|
532
|
+
]
|
533
|
+
```
|
534
|
+
It will be used for column splitting in DataFrame.
|
535
|
+
|
536
|
+
```ruby
|
537
|
+
df = DataFrame.new(year_month: %w[2022-01 2022-02 2022-03])
|
538
|
+
.assign(:year, :month) { year_month.split_to_columns('-') }
|
539
|
+
.drop(:year_month)
|
540
|
+
|
541
|
+
#=>
|
542
|
+
#<RedAmber::DataFrame : 3 x 2 Vectors, 0x000000000000f974>
|
543
|
+
year month
|
544
|
+
<string> <string>
|
545
|
+
0 2022 01
|
546
|
+
1 2022 02
|
547
|
+
2 2022 03
|
548
|
+
```
|
549
|
+
|
550
|
+
### `split_to_rows(sep = ' ', limit = 0)`
|
551
|
+
|
552
|
+
Split string type Vector with any ASCII whitespace as separator.
|
553
|
+
Returns an flattend into rows by Vector.
|
554
|
+
|
555
|
+
```ruby
|
556
|
+
vector = Vector.new(['a b', 'c d', 'e f'])
|
557
|
+
vector.split_to_rows
|
558
|
+
|
559
|
+
#=>
|
560
|
+
#<RedAmber::Vector(:string, size=6):0x000000000002ccf4>
|
561
|
+
["a", "b", "c", "d", "e", "f"]
|
562
|
+
```
|
563
|
+
|
564
|
+
### `merge(other, sep: ' ')`
|
565
|
+
|
566
|
+
Merge String or other string Vector to self using aseparator.
|
567
|
+
Self must be a string Vector.
|
568
|
+
Returns merged string Vector.
|
569
|
+
|
570
|
+
```ruby
|
571
|
+
# with vector
|
572
|
+
vector = Vector.new(%w[a c e])
|
573
|
+
other = Vector.new(%w[b d f])
|
574
|
+
vector.merge(other)
|
575
|
+
|
576
|
+
#=>
|
577
|
+
#<RedAmber::Vector(:string, size=3):0x0000000000038b80>
|
578
|
+
["a b", "c d", "e f"]
|
579
|
+
```
|
580
|
+
|
581
|
+
If other is a String it will be broadcasted.
|
582
|
+
|
583
|
+
```ruby
|
584
|
+
# with vector
|
585
|
+
vector = Vector.new(%w[a c e])
|
586
|
+
|
587
|
+
#=>
|
588
|
+
#<RedAmber::Vector(:string, size=3):0x00000000000446b0>
|
589
|
+
["a x", "c x", "e x"]
|
590
|
+
```
|
591
|
+
|
592
|
+
You can specify separator string by :sep.
|
593
|
+
|
594
|
+
```ruby
|
595
|
+
# with vector
|
596
|
+
vector = Vector.new(%w[a c e])
|
597
|
+
other = Vector.new(%w[b d f])
|
598
|
+
vector.merge(other, sep: '')
|
599
|
+
|
600
|
+
#=>
|
601
|
+
#<RedAmber::Vector(:string, size=3):0x0000000000038b80>
|
602
|
+
["ab", "cd", "ef"]
|
603
|
+
```
|
data/lib/red_amber/data_frame.rb
CHANGED
@@ -14,65 +14,111 @@ module RedAmber
|
|
14
14
|
include DataFrameVariableOperation
|
15
15
|
include Helper
|
16
16
|
|
17
|
-
|
17
|
+
using RefineArrowTable
|
18
|
+
using RefineHash
|
19
|
+
|
20
|
+
# Quicker DataFrame construction from a `Arrow::Table`.
|
18
21
|
#
|
19
|
-
# @
|
22
|
+
# @param table [Arrow::Table] A table to have in the DataFrame.
|
23
|
+
# @return [DataFrame] Initialized DataFrame.
|
20
24
|
#
|
21
|
-
#
|
25
|
+
# @note This method will allocate table directly and may be used in the method.
|
26
|
+
# @note `table` must have unique keys.
|
27
|
+
def self.create(table)
|
28
|
+
instance = allocate
|
29
|
+
instance.instance_variable_set(:@table, table)
|
30
|
+
instance
|
31
|
+
end
|
32
|
+
|
33
|
+
# Creates a new DataFrame.
|
22
34
|
#
|
23
35
|
# @overload initialize(table)
|
36
|
+
# Initialize DataFrame by an `Arrow::Table`
|
37
|
+
#
|
38
|
+
# @param table [Arrow::Table]
|
39
|
+
# A table to have in the DataFrame.
|
40
|
+
#
|
41
|
+
# @overload initialize(arrowable)
|
42
|
+
# Initialize DataFrame by a `#to_arrow` responsible object.
|
43
|
+
#
|
44
|
+
# @param arrowable [#to_arrow]
|
45
|
+
# Any object which responds to `#to_arrow`.
|
46
|
+
# `#to_arrow` must return `Arrow::Table`.
|
47
|
+
#
|
48
|
+
# @note `RedAmber::DataFrame` itself is readable by this.
|
49
|
+
# @note Hash is refined to respond to `#to_arrow` in this class.
|
50
|
+
#
|
51
|
+
# @overload initialize(rover_like)
|
52
|
+
# Initialize DataFrame by a `Rover::DataFrame`-like `#to_h` responsible object.
|
24
53
|
#
|
25
|
-
# @
|
54
|
+
# @param rover_like [#to_h]
|
55
|
+
# Any object which responds to `#to_h`.
|
56
|
+
# `#to_h` must return a Hash which is convertable by `Arrow::Table.new`.
|
26
57
|
#
|
27
|
-
#
|
58
|
+
# @note `Rover::DataFrame` is readable by this.
|
28
59
|
#
|
29
|
-
#
|
60
|
+
# @overload initialize()
|
61
|
+
# Create empty DataFrame
|
30
62
|
#
|
31
|
-
#
|
63
|
+
# @example DataFrame.new
|
32
64
|
#
|
33
|
-
#
|
65
|
+
# @overload initialize(empty)
|
66
|
+
# Create empty DataFrame
|
67
|
+
#
|
68
|
+
# @param empty [nil, [], {}]
|
69
|
+
#
|
70
|
+
# @example DataFrame.new([]), DataFrame.new({}), DataFrame.new(nil)
|
71
|
+
#
|
72
|
+
# @overload initialize(args)
|
73
|
+
#
|
74
|
+
# @param args [values]
|
75
|
+
# Accepts any argments which is valid for `Arrow::Table.new(args)`. See
|
76
|
+
# {https://github.com/apache/arrow/blob/master/ruby/red-arrow/lib/arrow/table.rb
|
34
77
|
#
|
35
78
|
def initialize(*args)
|
36
|
-
@variables = @keys = @vectors = @types = @data_types = nil
|
37
79
|
case args
|
38
80
|
in nil | [nil] | [] | {} | [[]] | [{}]
|
39
|
-
# DataFrame.new, DataFrame.new([]), DataFrame.new({}), DataFrame.new(nil)
|
40
|
-
# returns empty DataFrame
|
41
81
|
@table = Arrow::Table.new({}, [])
|
42
|
-
in [
|
82
|
+
in [Arrow::Table => table]
|
83
|
+
@table = table
|
84
|
+
in [arrowable] if arrowable.respond_to?(:to_arrow)
|
43
85
|
table = arrowable.to_arrow
|
44
86
|
unless table.is_a?(Arrow::Table)
|
45
87
|
raise DataFrameTypeError,
|
46
88
|
"to_arrow must return an Arrow::Table but #{table.class}: #{arrowable}"
|
47
89
|
end
|
48
90
|
@table = table
|
49
|
-
in [
|
50
|
-
@table = table
|
51
|
-
in [rover_or_hash]
|
91
|
+
in [rover_like] if rover_like.respond_to?(:to_h)
|
52
92
|
begin
|
53
|
-
# Accepts Rover::DataFrame
|
54
|
-
@table = Arrow::Table.new(
|
93
|
+
# Accepts Rover::DataFrame
|
94
|
+
@table = Arrow::Table.new(rover_like.to_h)
|
55
95
|
rescue StandardError
|
56
|
-
raise DataFrameTypeError, "
|
96
|
+
raise DataFrameTypeError, "to_h must return Arrowable object: #{rover_like}"
|
57
97
|
end
|
58
98
|
else
|
59
|
-
|
99
|
+
begin
|
100
|
+
@table = Arrow::Table.new(*args)
|
101
|
+
rescue StandardError
|
102
|
+
raise DataFrameTypeError, "invalid argument to create Arrow::Table: #{args}"
|
103
|
+
end
|
60
104
|
end
|
61
|
-
name_unnamed_keys
|
62
105
|
|
63
|
-
|
64
|
-
|
106
|
+
name_unnamed_keys
|
107
|
+
check_duplicate_keys(keys)
|
65
108
|
end
|
66
109
|
|
110
|
+
# Returns the table having within.
|
111
|
+
#
|
112
|
+
# @return [Arrow::Table] The table within.
|
113
|
+
#
|
67
114
|
attr_reader :table
|
68
115
|
|
69
|
-
|
70
|
-
@table
|
71
|
-
end
|
116
|
+
alias_method :to_arrow, :table
|
72
117
|
|
73
118
|
# Returns the number of rows.
|
74
119
|
#
|
75
120
|
# @return [Integer] Number of rows.
|
121
|
+
#
|
76
122
|
def size
|
77
123
|
@table.n_rows
|
78
124
|
end
|
@@ -83,6 +129,7 @@ module RedAmber
|
|
83
129
|
# Returns the number of columns.
|
84
130
|
#
|
85
131
|
# @return [Integer] Number of columns.
|
132
|
+
#
|
86
133
|
def n_keys
|
87
134
|
@table.n_columns
|
88
135
|
end
|
@@ -95,6 +142,7 @@ module RedAmber
|
|
95
142
|
# @return [Array]
|
96
143
|
# Number of rows and number of columns in an array.
|
97
144
|
# Same as [size, n_keys].
|
145
|
+
#
|
98
146
|
def shape
|
99
147
|
[size, n_keys]
|
100
148
|
end
|
@@ -102,7 +150,8 @@ module RedAmber
|
|
102
150
|
# Returns a Hash of key and Vector pairs in the columns.
|
103
151
|
#
|
104
152
|
# @return [Hash]
|
105
|
-
# key => Vector pairs for each columns.
|
153
|
+
# `key => Vector` pairs for each columns.
|
154
|
+
#
|
106
155
|
def variables
|
107
156
|
@variables || @variables = init_instance_vars(:variables)
|
108
157
|
end
|
@@ -112,6 +161,7 @@ module RedAmber
|
|
112
161
|
#
|
113
162
|
# @return [Array]
|
114
163
|
# Keys in an Array.
|
164
|
+
#
|
115
165
|
def keys
|
116
166
|
@keys || @keys = init_instance_vars(:keys)
|
117
167
|
end
|
@@ -123,6 +173,7 @@ module RedAmber
|
|
123
173
|
# @param key [Symbol, String] Key to test.
|
124
174
|
# @return [Boolean]
|
125
175
|
# Returns true if self has key in Symbol.
|
176
|
+
#
|
126
177
|
def key?(key)
|
127
178
|
keys.include?(key.to_sym)
|
128
179
|
end
|
@@ -133,6 +184,7 @@ module RedAmber
|
|
133
184
|
# @param key [Symbol, String] key to know.
|
134
185
|
# @return [Integer]
|
135
186
|
# Index of key in the Array keys.
|
187
|
+
#
|
136
188
|
def key_index(key)
|
137
189
|
keys.find_index(key.to_sym)
|
138
190
|
end
|
@@ -143,14 +195,18 @@ module RedAmber
|
|
143
195
|
#
|
144
196
|
# @return [Array]
|
145
197
|
# Abbreviated Red Arrow data type names.
|
198
|
+
#
|
146
199
|
def types
|
147
|
-
@types || @types = @table.columns.map
|
200
|
+
@types || @types = @table.columns.map do |column|
|
201
|
+
column.data.value_type.nick.to_sym
|
202
|
+
end
|
148
203
|
end
|
149
204
|
|
150
205
|
# Returns an Array of Classes of data type.
|
151
206
|
#
|
152
207
|
# @return [Array]
|
153
208
|
# An Array of Red Arrow data type Classes.
|
209
|
+
#
|
154
210
|
def type_classes
|
155
211
|
@data_types || @data_types = @table.columns.map { |column| column.data_type.class }
|
156
212
|
end
|
@@ -158,50 +214,94 @@ module RedAmber
|
|
158
214
|
# Returns Vectors in an Array.
|
159
215
|
#
|
160
216
|
# @return [Array]
|
161
|
-
# An Array of RedAmber::Vector
|
217
|
+
# An Array of `RedAmber::Vector`s.
|
218
|
+
#
|
162
219
|
def vectors
|
163
220
|
@vectors || @vectors = init_instance_vars(:vectors)
|
164
221
|
end
|
165
222
|
|
166
|
-
# Returns row indices (start...(size+start)) in
|
223
|
+
# Returns row indices (start...(size+start)) in a Vector.
|
167
224
|
#
|
168
225
|
# @param start [Object]
|
169
|
-
# Object which have
|
226
|
+
# Object which have `#succ` method.
|
227
|
+
#
|
170
228
|
# @return [Array]
|
171
|
-
#
|
229
|
+
# A Vector of row indices.
|
230
|
+
#
|
172
231
|
# @example
|
173
232
|
# (when self.size == 5)
|
174
|
-
# - indices #=> [0, 1, 2, 3, 4]
|
175
|
-
# - indices(1) #=> [1, 2, 3, 4, 5]
|
176
|
-
# - indices('a') #=> ['a', 'b', 'c', 'd', 'e']
|
233
|
+
# - indices #=> Vector[0, 1, 2, 3, 4]
|
234
|
+
# - indices(1) #=> Vector[1, 2, 3, 4, 5]
|
235
|
+
# - indices('a') #=> Vector['a', 'b', 'c', 'd', 'e']
|
236
|
+
#
|
177
237
|
def indices(start = 0)
|
178
238
|
Vector.new((start..).take(size))
|
179
239
|
end
|
180
240
|
alias_method :indexes, :indices
|
181
241
|
|
242
|
+
# Returns column-oriented data in a Hash.
|
243
|
+
#
|
244
|
+
# @return [Hash] A Hash of 'key => column_in_an_array'.
|
245
|
+
#
|
182
246
|
def to_h
|
183
247
|
variables.transform_values(&:to_a)
|
184
248
|
end
|
185
249
|
|
250
|
+
# Returns a row-oriented array without header.
|
251
|
+
#
|
252
|
+
# @return [Array] Row-oriented data without header.
|
253
|
+
#
|
254
|
+
# @note If you need column-oriented array, use `.to_h.to_a`.
|
255
|
+
#
|
186
256
|
def to_a
|
187
|
-
# output an array of row-oriented data without header
|
188
|
-
# if you need column-oriented array, use `.to_h.to_a`
|
189
257
|
@table.raw_records
|
190
258
|
end
|
191
259
|
alias_method :raw_records, :to_a
|
192
260
|
|
261
|
+
# Returns column name and data type in a Hash.
|
262
|
+
#
|
263
|
+
# @return [Hash] Column name and data type.
|
264
|
+
#
|
265
|
+
# @example
|
266
|
+
# RedAmber::DataFrame.new(x: [1, 2, 3], y: %w[A B C]).schema
|
267
|
+
# # => {:x=>:uint8, :y=>:string}
|
268
|
+
#
|
193
269
|
def schema
|
194
270
|
keys.zip(types).to_h
|
195
271
|
end
|
196
272
|
|
273
|
+
# Compare DataFrames.
|
274
|
+
#
|
275
|
+
# @return [true, false]
|
276
|
+
# True if other is a DataFrame and table is same.
|
277
|
+
# Otherwise return false.
|
278
|
+
#
|
197
279
|
def ==(other)
|
198
280
|
other.is_a?(DataFrame) && @table == other.table
|
199
281
|
end
|
200
282
|
|
283
|
+
# Check if it is a empty DataFrame.
|
284
|
+
#
|
285
|
+
# @return [true, false] True if it has no columns.
|
286
|
+
#
|
201
287
|
def empty?
|
202
288
|
variables.empty?
|
203
289
|
end
|
204
290
|
|
291
|
+
# Enumerate for each row.
|
292
|
+
#
|
293
|
+
# @overload each_row
|
294
|
+
# Returns Enumerator when no block given.
|
295
|
+
#
|
296
|
+
# @return [Enumerator] Enumerator of each rows.
|
297
|
+
#
|
298
|
+
# @overload each_row(&block)
|
299
|
+
# Yields with key and row pairs.
|
300
|
+
#
|
301
|
+
# @yield [key_row_pairs] Yields with key and row pairs.
|
302
|
+
# @yieldparam [Hash] Key and row pairs.
|
303
|
+
# @yieldreturn [Integer] Size of the DataFrame.
|
304
|
+
#
|
205
305
|
def each_row
|
206
306
|
return enum_for(:each_row) unless block_given?
|
207
307
|
|
@@ -214,6 +314,10 @@ module RedAmber
|
|
214
314
|
end
|
215
315
|
end
|
216
316
|
|
317
|
+
# Returns self in a `Rover::DataFrame`.
|
318
|
+
#
|
319
|
+
# @return [Rover::DataFrame] A `Rover::DataFrame`.
|
320
|
+
#
|
217
321
|
def to_rover
|
218
322
|
require 'rover'
|
219
323
|
Rover::DataFrame.new(to_h)
|
@@ -226,7 +330,7 @@ module RedAmber
|
|
226
330
|
end
|
227
331
|
|
228
332
|
def method_missing(name, *args, &block)
|
229
|
-
return v(name) if args.empty?
|
333
|
+
return v(name) if args.empty? && key?(name)
|
230
334
|
|
231
335
|
super
|
232
336
|
end
|
@@ -241,20 +345,31 @@ module RedAmber
|
|
241
345
|
|
242
346
|
# initialize @variable, @keys, @vectors and return one of them
|
243
347
|
def init_instance_vars(var)
|
244
|
-
ary =
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
348
|
+
ary =
|
349
|
+
@table.columns
|
350
|
+
.each_with_object([{}, [], []]) do |column, (variables, keys, vectors)|
|
351
|
+
v = Vector.create(column.data)
|
352
|
+
k = column.name.to_sym
|
353
|
+
v.key = k
|
354
|
+
variables[k] = v
|
355
|
+
keys << k
|
356
|
+
vectors << v
|
357
|
+
end
|
358
|
+
|
252
359
|
@variables, @keys, @vectors = ary
|
253
360
|
ary[%i[variables keys vectors].index(var)]
|
254
361
|
end
|
255
362
|
|
363
|
+
def check_duplicate_keys(array)
|
364
|
+
org = array.dup
|
365
|
+
return unless array.uniq!
|
366
|
+
|
367
|
+
raise DataFrameArgumentError,
|
368
|
+
"duplicate keys: #{org.tally.select { |_k, v| v > 1 }.keys}"
|
369
|
+
end
|
370
|
+
|
256
371
|
def name_unnamed_keys
|
257
|
-
return unless @table
|
372
|
+
return unless @table.key?('')
|
258
373
|
|
259
374
|
# We can't use #keys because it causes mismatch of @table and @keys
|
260
375
|
keys = @table.schema.fields.map { |f| f.name.to_sym }
|