red_amber 0.2.2 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +12 -0
- data/CHANGELOG.md +114 -31
- data/Gemfile +4 -2
- data/README.md +41 -25
- data/benchmark/basic.yml +79 -0
- data/benchmark/combine.yml +63 -0
- data/benchmark/drop_nil.yml +15 -3
- data/benchmark/group.yml +33 -0
- data/benchmark/reshape.yml +27 -0
- data/benchmark/{csv_load_penguins.yml → rover/csv_load_penguins.yml} +3 -3
- data/benchmark/rover/flights.yml +23 -0
- data/benchmark/rover/penguins.yml +23 -0
- data/benchmark/rover/planes.yml +23 -0
- data/benchmark/rover/weather.yml +23 -0
- data/doc/DataFrame.md +332 -53
- data/doc/Vector.md +3 -0
- data/doc/image/dataframe/join.png +0 -0
- data/doc/image/dataframe/set_and_bind.png +0 -0
- data/doc/image/dataframe_model.png +0 -0
- data/lib/red_amber/data_frame.rb +6 -5
- data/lib/red_amber/data_frame_combinable.rb +283 -0
- data/lib/red_amber/data_frame_displayable.rb +2 -0
- data/lib/red_amber/data_frame_selectable.rb +9 -9
- data/lib/red_amber/data_frame_variable_operation.rb +4 -4
- data/lib/red_amber/group.rb +99 -18
- data/lib/red_amber/helper.rb +1 -13
- data/lib/red_amber/vector.rb +7 -0
- data/lib/red_amber/vector_functions.rb +0 -8
- data/lib/red_amber/vector_updatable.rb +60 -65
- data/lib/red_amber/version.rb +1 -1
- data/lib/red_amber.rb +1 -0
- data/red_amber.gemspec +1 -1
- metadata +21 -10
@@ -0,0 +1,23 @@
|
|
1
|
+
contexts:
|
2
|
+
- gems:
|
3
|
+
red_amber: 0.2.2
|
4
|
+
- name: HEAD
|
5
|
+
prelude: |
|
6
|
+
$LOAD_PATH.unshift(File.expand_path('lib'))
|
7
|
+
require 'red_amber'
|
8
|
+
|
9
|
+
prelude: |
|
10
|
+
require 'rover'
|
11
|
+
require 'datasets-arrow'
|
12
|
+
ds = Datasets::Rdatasets.new('nycflights13', 'flights')
|
13
|
+
df = RedAmber::DataFrame.new(ds)
|
14
|
+
rover = Rover::DataFrame.new(df.to_h)
|
15
|
+
group_keys = [:month, :origin]
|
16
|
+
summary_key = :air_time
|
17
|
+
|
18
|
+
benchmark:
|
19
|
+
'penguins Group by Rover': |
|
20
|
+
rover.group(group_keys).count
|
21
|
+
|
22
|
+
'penguins Group by RedAmber': |
|
23
|
+
df.group(group_keys).count
|
@@ -0,0 +1,23 @@
|
|
1
|
+
contexts:
|
2
|
+
- gems:
|
3
|
+
red_amber: 0.2.2
|
4
|
+
- name: HEAD
|
5
|
+
prelude: |
|
6
|
+
$LOAD_PATH.unshift(File.expand_path('lib'))
|
7
|
+
require 'red_amber'
|
8
|
+
|
9
|
+
prelude: |
|
10
|
+
require 'rover'
|
11
|
+
require 'datasets-arrow'
|
12
|
+
ds = Datasets::Penguins.new
|
13
|
+
df = RedAmber::DataFrame.new(ds)
|
14
|
+
rover = Rover::DataFrame.new(df.to_h)
|
15
|
+
group_keys = [:species, :island]
|
16
|
+
summary_key = :body_mass_g
|
17
|
+
|
18
|
+
benchmark:
|
19
|
+
'penguins Group by Rover': |
|
20
|
+
rover.group(group_keys).mean(summary_key)
|
21
|
+
|
22
|
+
'penguins Group by RedAmber': |
|
23
|
+
df.group(group_keys).mean(summary_key)
|
@@ -0,0 +1,23 @@
|
|
1
|
+
contexts:
|
2
|
+
- gems:
|
3
|
+
red_amber: 0.2.2
|
4
|
+
- name: HEAD
|
5
|
+
prelude: |
|
6
|
+
$LOAD_PATH.unshift(File.expand_path('lib'))
|
7
|
+
require 'red_amber'
|
8
|
+
|
9
|
+
prelude: |
|
10
|
+
require 'rover'
|
11
|
+
require 'datasets-arrow'
|
12
|
+
ds = Datasets::Rdatasets.new('nycflights13', 'planes')
|
13
|
+
df = RedAmber::DataFrame.new(ds)
|
14
|
+
rover = Rover::DataFrame.new(df.to_h)
|
15
|
+
group_keys = [:engines, :engine]
|
16
|
+
summary_key = :seats
|
17
|
+
|
18
|
+
benchmark:
|
19
|
+
'penguins Group by Rover': |
|
20
|
+
rover.group(group_keys).mean(summary_key)
|
21
|
+
|
22
|
+
'penguins Group by RedAmber': |
|
23
|
+
df.group(group_keys).mean(summary_key)
|
@@ -0,0 +1,23 @@
|
|
1
|
+
contexts:
|
2
|
+
- gems:
|
3
|
+
red_amber: 0.2.2
|
4
|
+
- name: HEAD
|
5
|
+
prelude: |
|
6
|
+
$LOAD_PATH.unshift(File.expand_path('lib'))
|
7
|
+
require 'red_amber'
|
8
|
+
|
9
|
+
prelude: |
|
10
|
+
require 'rover'
|
11
|
+
require 'datasets-arrow'
|
12
|
+
ds = Datasets::Rdatasets.new('nycflights13', 'weather')
|
13
|
+
df = RedAmber::DataFrame.new(ds)
|
14
|
+
rover = Rover::DataFrame.new(df.to_h)
|
15
|
+
group_keys = [:month, :origin]
|
16
|
+
summary_key = :temp
|
17
|
+
|
18
|
+
benchmark:
|
19
|
+
'penguins Group by Rover': |
|
20
|
+
rover.group(group_keys).mean(summary_key)
|
21
|
+
|
22
|
+
'penguins Group by RedAmber': |
|
23
|
+
df.group(group_keys).mean(summary_key)
|
data/doc/DataFrame.md
CHANGED
@@ -5,7 +5,8 @@ Class `RedAmber::DataFrame` represents 2D-data. A `DataFrame` consists with:
|
|
5
5
|
- A label is attached to `Vector`. We call it `key`.
|
6
6
|
- A `Vector` and associated `key` is grouped as a `variable`.
|
7
7
|
- `variable`s with same vector length are aligned and arranged to be a `DataFrame`.
|
8
|
-
- Each `
|
8
|
+
- Each `key` in a `DataFrame` must be unique.
|
9
|
+
- Each `Vector` in a `DataFrame` contains a set of relating data at same position. We call it `record` or `observation`.
|
9
10
|
|
10
11
|

|
11
12
|
|
@@ -94,13 +95,13 @@ Class `RedAmber::DataFrame` represents 2D-data. A `DataFrame` consists with:
|
|
94
95
|
|
95
96
|
### `table`, `to_arrow`
|
96
97
|
|
97
|
-
-
|
98
|
+
- Returns Arrow::Table object in the DataFrame.
|
98
99
|
|
99
|
-
### `size`, `n_obs`, `n_rows`
|
100
|
+
### `size`, `n_records`, `n_obs`, `n_rows`
|
100
101
|
|
101
|
-
- Returns size of Vector (num of
|
102
|
-
|
103
|
-
### `n_keys`, `n_vars`, `n_cols`,
|
102
|
+
- Returns size of Vector (num of records).
|
103
|
+
|
104
|
+
### `n_keys`, `n_variables`, `n_vars`, `n_cols`,
|
104
105
|
|
105
106
|
- Returns num of keys (num of variables).
|
106
107
|
|
@@ -138,16 +139,7 @@ Class `RedAmber::DataFrame` represents 2D-data. A `DataFrame` consists with:
|
|
138
139
|
|
139
140
|
- Returns key names in an Array.
|
140
141
|
|
141
|
-
|
142
|
-
|
143
|
-
```ruby
|
144
|
-
# update numeric variables, another solution
|
145
|
-
df.assign do
|
146
|
-
vectors.each_with_object({}) do |vector, assigner|
|
147
|
-
assigner[vector.key] = vector * -1 if vector.numeric?
|
148
|
-
end
|
149
|
-
end
|
150
|
-
```
|
142
|
+
Each key must be unique in the DataFrame.
|
151
143
|
|
152
144
|
### `types`
|
153
145
|
|
@@ -161,9 +153,20 @@ Class `RedAmber::DataFrame` represents 2D-data. A `DataFrame` consists with:
|
|
161
153
|
|
162
154
|
- Returns an Array of Vectors.
|
163
155
|
|
156
|
+
When we use it, Vector#key is useful to get the key in the DataFrame.
|
157
|
+
|
158
|
+
```ruby
|
159
|
+
# update numeric variables, another solution
|
160
|
+
df.assign do
|
161
|
+
vectors.each_with_object({}) do |vector, assigner|
|
162
|
+
assigner[vector.key] = vector * -1 if vector.numeric?
|
163
|
+
end
|
164
|
+
end
|
165
|
+
```
|
166
|
+
|
164
167
|
### `indices`, `indexes`
|
165
168
|
|
166
|
-
- Returns indexes in
|
169
|
+
- Returns indexes in a Vector.
|
167
170
|
Accepts an option `start` as the first of indexes.
|
168
171
|
|
169
172
|
```ruby
|
@@ -171,15 +174,19 @@ Class `RedAmber::DataFrame` represents 2D-data. A `DataFrame` consists with:
|
|
171
174
|
df.indices
|
172
175
|
|
173
176
|
# =>
|
177
|
+
#<RedAmber::Vector(:uint8, size=5):0x0000000000013ed4>
|
174
178
|
[0, 1, 2, 3, 4]
|
175
179
|
|
176
180
|
df.indices(1)
|
177
181
|
|
178
182
|
# =>
|
183
|
+
#<RedAmber::Vector(:uint8, size=5):0x0000000000018fd8>
|
179
184
|
[1, 2, 3, 4, 5]
|
180
185
|
|
181
186
|
df.indices(:a)
|
187
|
+
|
182
188
|
# =>
|
189
|
+
#<RedAmber::Vector(:dictionary, size=5):0x000000000001bd50>
|
183
190
|
[:a, :b, :c, :d, :e]
|
184
191
|
```
|
185
192
|
|
@@ -275,6 +282,7 @@ penguins.to_rover
|
|
275
282
|
|
276
283
|
dataset = Datasets::Penguins.new
|
277
284
|
# (From 0.2.2) responsible to the object which has `to_arrow` method.
|
285
|
+
# If older, it should be `dataset.to_arrow` in the parentheses.
|
278
286
|
RedAmber::DataFrame.new(dataset).tdr
|
279
287
|
|
280
288
|
# =>
|
@@ -290,10 +298,11 @@ penguins.to_rover
|
|
290
298
|
6 :sex string 3 {"male"=>168, "female"=>165, nil=>11}
|
291
299
|
7 :year uint16 3 {2007=>110, 2008=>114, 2009=>120}
|
292
300
|
```
|
293
|
-
|
301
|
+
|
302
|
+
Options:
|
294
303
|
- limit: limit of variables to show. Default value is 10.
|
295
|
-
- tally: max level to use tally mode.
|
296
|
-
- elements: max num of element to show values in each
|
304
|
+
- tally: max level to use tally mode. Default value is 5.
|
305
|
+
- elements: max num of element to show values in each records. Default value is 5.
|
297
306
|
|
298
307
|
## Selecting
|
299
308
|
|
@@ -303,13 +312,13 @@ penguins.to_rover
|
|
303
312
|
- Keys in an Array: `df[:symbol1, "string", :symbol2]`
|
304
313
|
- Keys by indeces: `df[df.keys[0]`, `df[df.keys[1,2]]`, `df[df.keys[1..]]`
|
305
314
|
|
306
|
-
Key indeces
|
315
|
+
Key indeces should be used via `keys[i]` because numbers are used to select records (rows). See next section.
|
307
316
|
|
308
317
|
- Keys by a Range:
|
309
318
|
|
310
|
-
If keys are able to represent by Range, it can be included in the arguments. See a example below.
|
319
|
+
If keys are able to represent by a Range, it can be included in the arguments. See a example below.
|
311
320
|
|
312
|
-
- You can exchange the order of variables (columns).
|
321
|
+
- You can also exchange the order of variables (columns).
|
313
322
|
|
314
323
|
```ruby
|
315
324
|
hash = {a: [1, 2, 3], b: %w[A B C], c: [1.0, 2, 3]}
|
@@ -325,7 +334,7 @@ penguins.to_rover
|
|
325
334
|
2 C 3.0 3
|
326
335
|
```
|
327
336
|
|
328
|
-
If `#[]` represents single variable (column), it returns a Vector object.
|
337
|
+
If `#[]` represents a single variable (column), it returns a Vector object.
|
329
338
|
|
330
339
|
```ruby
|
331
340
|
df[:a]
|
@@ -334,6 +343,7 @@ penguins.to_rover
|
|
334
343
|
#<RedAmber::Vector(:uint8, size=3):0x000000000000f140>
|
335
344
|
[1, 2, 3]
|
336
345
|
```
|
346
|
+
|
337
347
|
Or `#v` method also returns a Vector for a key.
|
338
348
|
|
339
349
|
```ruby
|
@@ -344,18 +354,19 @@ penguins.to_rover
|
|
344
354
|
[1, 2, 3]
|
345
355
|
```
|
346
356
|
|
347
|
-
This may be useful to use in a block of DataFrame manipulation verbs. We can write `v(:a)` rather than `self[:a]` or `df[:a]`
|
357
|
+
This method may be useful to use in a block of DataFrame manipulation verbs. We can write `v(:a)` rather than `self[:a]` or `df[:a]`
|
348
358
|
|
349
|
-
### Select
|
359
|
+
### Select records (rows in a table) by `[]` as `[index]`, `[range]`, `[array]`
|
350
360
|
|
351
|
-
- Select a
|
352
|
-
- Select obs. by indeces in a Range: `df[1..2]`
|
361
|
+
- Select a record by index: `df[0]`
|
353
362
|
|
354
|
-
|
363
|
+
- Select records by indeces in an Array: `df[1, 2]`
|
355
364
|
|
356
|
-
- Select
|
365
|
+
- Select records by indeces in a Range: `df[1..2]`
|
357
366
|
|
358
|
-
-
|
367
|
+
An end-less or a begin-less Range can be used to represent indeces.
|
368
|
+
|
369
|
+
- You can use indices in Float.
|
359
370
|
|
360
371
|
- Mixed case: `df[2, 0..]`
|
361
372
|
|
@@ -374,9 +385,9 @@ penguins.to_rover
|
|
374
385
|
3 3 C 3.0
|
375
386
|
```
|
376
387
|
|
377
|
-
- Select
|
388
|
+
- Select records by a boolean Array or a boolean RedAmber::Vector at same size as self.
|
378
389
|
|
379
|
-
It returns a sub dataframe with
|
390
|
+
It returns a sub dataframe with records at boolean is true.
|
380
391
|
|
381
392
|
```ruby
|
382
393
|
# with the same dataframe `df` above
|
@@ -391,15 +402,15 @@ penguins.to_rover
|
|
391
402
|
1 1 A 1.0
|
392
403
|
```
|
393
404
|
|
394
|
-
### Select rows from top or from bottom
|
405
|
+
### Select records (rows) from top or from bottom
|
395
406
|
|
396
407
|
`head(n=5)`, `tail(n=5)`, `first(n=1)`, `last(n=1)`
|
397
408
|
|
398
409
|
## Sub DataFrame manipulations
|
399
410
|
|
400
|
-
### `pick ` - pick up variables
|
411
|
+
### `pick ` - pick up variables -
|
401
412
|
|
402
|
-
Pick up some
|
413
|
+
Pick up some variables (columns) to create a sub DataFrame.
|
403
414
|
|
404
415
|

|
405
416
|
|
@@ -491,9 +502,9 @@ penguins.to_rover
|
|
491
502
|
343 49.9 16.1 213
|
492
503
|
```
|
493
504
|
|
494
|
-
### `drop ` -
|
505
|
+
### `drop ` - counterpart of pick -
|
495
506
|
|
496
|
-
Drop some
|
507
|
+
Drop some variables (columns) to create a remainer DataFrame.
|
497
508
|
|
498
509
|

|
499
510
|
|
@@ -557,9 +568,9 @@ penguins.to_rover
|
|
557
568
|
[1, 2, 3]
|
558
569
|
```
|
559
570
|
|
560
|
-
### `slice ` -
|
571
|
+
### `slice ` - slice and select records -
|
561
572
|
|
562
|
-
Slice and select
|
573
|
+
Slice and select records (rows) to create a sub DataFrame.
|
563
574
|
|
564
575
|

|
565
576
|
|
@@ -570,7 +581,7 @@ penguins.to_rover
|
|
570
581
|
Negative index from the tail like Ruby's Array is also acceptable.
|
571
582
|
|
572
583
|
```ruby
|
573
|
-
# returns 5
|
584
|
+
# returns 5 records at start and 5 records from end
|
574
585
|
penguins.slice(0...5, -5..-1)
|
575
586
|
|
576
587
|
# =>
|
@@ -665,9 +676,9 @@ penguins.to_rover
|
|
665
676
|
0 1 A 1.000000
|
666
677
|
```
|
667
678
|
|
668
|
-
### `remove`
|
679
|
+
### `remove` - counterpart of slice -
|
669
680
|
|
670
|
-
Slice and reject
|
681
|
+
Slice and reject records (rows) to create a remainer DataFrame.
|
671
682
|
|
672
683
|

|
673
684
|
|
@@ -676,7 +687,7 @@ penguins.to_rover
|
|
676
687
|
`remove(indeces)` accepts indeces as arguments. Indeces should be an Integer or a Range of Integer.
|
677
688
|
|
678
689
|
```ruby
|
679
|
-
# returns 6th to 339th
|
690
|
+
# returns 6th to 339th records
|
680
691
|
penguins.remove(0...5, -5..-1)
|
681
692
|
|
682
693
|
# =>
|
@@ -699,7 +710,7 @@ penguins.to_rover
|
|
699
710
|
`remove(booleans)` accepts booleans as an argument in an Array, a Vector or an Arrow::BooleanArray . Booleans must be same length as `size`.
|
700
711
|
|
701
712
|
```ruby
|
702
|
-
# remove all
|
713
|
+
# remove all records contains nil
|
703
714
|
removed = penguins.remove { vectors.map(&:is_nil).reduce(&:|) }
|
704
715
|
removed
|
705
716
|
|
@@ -785,7 +796,7 @@ penguins.to_rover
|
|
785
796
|
|
786
797
|
### `rename`
|
787
798
|
|
788
|
-
Rename keys (column names) to create a updated DataFrame.
|
799
|
+
Rename keys (variable/column names) to create a updated DataFrame.
|
789
800
|
|
790
801
|

|
791
802
|
|
@@ -820,7 +831,7 @@ penguins.to_rover
|
|
820
831
|
|
821
832
|
### `assign`
|
822
833
|
|
823
|
-
Assign new or updated
|
834
|
+
Assign new or updated variables (columns) and create an updated DataFrame.
|
824
835
|
|
825
836
|
- Variables with new keys will append new columns from the right.
|
826
837
|
- Variables with exisiting keys will update corresponding vectors.
|
@@ -1009,7 +1020,7 @@ When the option `keep_key: true` used, the column `key` will be preserved.
|
|
1009
1020
|
|
1010
1021
|
### `sort`
|
1011
1022
|
|
1012
|
-
`sort` accepts parameters as sort_keys thanks to the
|
1023
|
+
`sort` accepts parameters as sort_keys thanks to the Red Arrow's feature。
|
1013
1024
|
- :key, "key" or "+key" denotes ascending order
|
1014
1025
|
- "-key" denotes descending order
|
1015
1026
|
|
@@ -1040,7 +1051,7 @@ When the option `keep_key: true` used, the column `key` will be preserved.
|
|
1040
1051
|
|
1041
1052
|
### `remove_nil`
|
1042
1053
|
|
1043
|
-
Remove any
|
1054
|
+
Remove any records containing nil.
|
1044
1055
|
|
1045
1056
|
## Grouping
|
1046
1057
|
|
@@ -1210,7 +1221,7 @@ When the option `keep_key: true` used, the column `key` will be preserved.
|
|
1210
1221
|
|
1211
1222
|
### `to_long(*keep_keys)`
|
1212
1223
|
|
1213
|
-
Creates a 'long' (tidy) DataFrame from a 'wide' DataFrame.
|
1224
|
+
Creates a 'long' (may be tidy) DataFrame from a 'wide' DataFrame.
|
1214
1225
|
|
1215
1226
|
- Parameter `keep_keys` specifies the key names to keep.
|
1216
1227
|
|
@@ -1257,7 +1268,7 @@ When the option `keep_key: true` used, the column `key` will be preserved.
|
|
1257
1268
|
|
1258
1269
|
### `to_wide`
|
1259
1270
|
|
1260
|
-
Creates a 'wide' (messy) DataFrame from a 'long' DataFrame.
|
1271
|
+
Creates a 'wide' (may be messy) DataFrame from a 'long' DataFrame.
|
1261
1272
|
|
1262
1273
|
- Option `:name` is the key of the column which will be expanded **to key names**.
|
1263
1274
|
The default value is `:NAME` if it is not specified.
|
@@ -1282,9 +1293,277 @@ When the option `keep_key: true` used, the column `key` will be preserved.
|
|
1282
1293
|
|
1283
1294
|
## Combine
|
1284
1295
|
|
1285
|
-
|
1296
|
+
### `join`
|
1297
|
+

|
1298
|
+
|
1299
|
+
You should use specific `*_join` methods below.
|
1300
|
+
|
1301
|
+
- `other` is a DataFrame or a Arrow::Table.
|
1302
|
+
- `join_keys` are keys shared by self and other to match with them.
|
1303
|
+
- If `join_keys` are empty, common keys in self and other are chosen (natural join).
|
1304
|
+
- If (common keys) > `join_keys`, duplicated keys are renamed by `suffix`.
|
1305
|
+
|
1306
|
+
```ruby
|
1307
|
+
df = DataFrame.new(
|
1308
|
+
KEY: %w[A B C],
|
1309
|
+
X1: [1, 2, 3]
|
1310
|
+
)
|
1311
|
+
#=>
|
1312
|
+
#<RedAmber::DataFrame : 3 x 2 Vectors, 0x0000000000012a70>
|
1313
|
+
KEY X1
|
1314
|
+
<string> <uint8>
|
1315
|
+
0 A 1
|
1316
|
+
1 B 2
|
1317
|
+
2 C 3
|
1318
|
+
|
1319
|
+
other = DataFrame.new(
|
1320
|
+
KEY: %w[A B D],
|
1321
|
+
X2: [true, false, nil]
|
1322
|
+
)
|
1323
|
+
#=>
|
1324
|
+
#<RedAmber::DataFrame : 3 x 2 Vectors, 0x0000000000017034>
|
1325
|
+
KEY X2
|
1326
|
+
<string> <boolean>
|
1327
|
+
0 A true
|
1328
|
+
1 B false
|
1329
|
+
2 D (nil)
|
1330
|
+
```
|
1331
|
+
|
1332
|
+
#### Mutating joins
|
1333
|
+
|
1334
|
+
##### `inner_join(other, join_keys = nil, suffix: '.1')`
|
1335
|
+
|
1336
|
+
Join data, leaving only the matching records.
|
1337
|
+
|
1338
|
+
```ruby
|
1339
|
+
df.inner_join(other, :KEY)
|
1340
|
+
#=>
|
1341
|
+
#<RedAmber::DataFrame : 2 x 3 Vectors, 0x000000000001e2bc>
|
1342
|
+
KEY X1 X2
|
1343
|
+
<string> <uint8> <boolean>
|
1344
|
+
0 A 1 true
|
1345
|
+
1 B 2 false
|
1346
|
+
```
|
1347
|
+
|
1348
|
+
##### `full_join(other, join_keys = nil, suffix: '.1')`
|
1349
|
+
|
1350
|
+
Join data, leaving all records.
|
1351
|
+
|
1352
|
+
```ruby
|
1353
|
+
df.full_join(other, :KEY)
|
1354
|
+
#=>
|
1355
|
+
#<RedAmber::DataFrame : 4 x 3 Vectors, 0x0000000000029fcc>
|
1356
|
+
KEY X1 X2
|
1357
|
+
<string> <uint8> <boolean>
|
1358
|
+
0 A 1 true
|
1359
|
+
1 B 2 false
|
1360
|
+
2 C 3 (nil)
|
1361
|
+
3 D (nil) (nil)
|
1362
|
+
```
|
1363
|
+
|
1364
|
+
##### `left_join(other, join_keys = nil, suffix: '.1')`
|
1365
|
+
|
1366
|
+
Join matching values to self from other.
|
1367
|
+
|
1368
|
+
```ruby
|
1369
|
+
df.left_join(other, :KEY)
|
1370
|
+
#=>
|
1371
|
+
#<RedAmber::DataFrame : 3 x 3 Vectors, 0x0000000000029fcc>
|
1372
|
+
KEY X1 X2
|
1373
|
+
<string> <uint8> <boolean>
|
1374
|
+
0 A 1 true
|
1375
|
+
1 B 2 false
|
1376
|
+
2 C 3 (nil)
|
1377
|
+
```
|
1378
|
+
|
1379
|
+
##### `right_join(other, join_keys = nil, suffix: '.1')`
|
1380
|
+
|
1381
|
+
Join matching values from self to other.
|
1286
1382
|
|
1287
|
-
|
1383
|
+
```ruby
|
1384
|
+
df.right_join(other, :KEY)
|
1385
|
+
#=>
|
1386
|
+
#<RedAmber::DataFrame : 2 x 3 Vectors, 0x0000000000029fcc>
|
1387
|
+
KEY X1 X2
|
1388
|
+
<string> <uint8> <boolean>
|
1389
|
+
0 A 1 true
|
1390
|
+
1 B 2 false
|
1391
|
+
2 D (nil) (nil)
|
1392
|
+
```
|
1393
|
+
|
1394
|
+
#### Filtering join
|
1395
|
+
|
1396
|
+
##### `semi_join(other, join_keys = nil, suffix: '.1')`
|
1397
|
+
|
1398
|
+
Return records of self that have a match in other.
|
1399
|
+
|
1400
|
+
```ruby
|
1401
|
+
df.semi_join(other, :KEY)
|
1402
|
+
#=>
|
1403
|
+
#<RedAmber::DataFrame : 2 x 2 Vectors, 0x0000000000029fcc>
|
1404
|
+
KEY X1
|
1405
|
+
<string> <uint8>
|
1406
|
+
0 A 1
|
1407
|
+
1 B 2
|
1408
|
+
```
|
1409
|
+
|
1410
|
+
##### `anti_join(other, join_keys = nil, suffix: '.1')`
|
1411
|
+
|
1412
|
+
Return records of self that do not have a match in other.
|
1413
|
+
|
1414
|
+
```ruby
|
1415
|
+
df.anti_join(other, :KEY)
|
1416
|
+
#=>
|
1417
|
+
#<RedAmber::DataFrame : 1 x 2 Vectors, 0x0000000000029fcc>
|
1418
|
+
KEY X1
|
1419
|
+
<string> <uint8>
|
1420
|
+
0 C 3
|
1421
|
+
```
|
1422
|
+
|
1423
|
+
## Set operations
|
1424
|
+

|
1425
|
+
|
1426
|
+
Keys in self and other must be same in set operations.
|
1427
|
+
|
1428
|
+
```ruby
|
1429
|
+
df = DataFrame.new(
|
1430
|
+
KEY1: %w[A B C],
|
1431
|
+
KEY2: [1, 2, 3]
|
1432
|
+
)
|
1433
|
+
#=>
|
1434
|
+
#<RedAmber::DataFrame : 3 x 2 Vectors, 0x0000000000012a70>
|
1435
|
+
KEY1 KEY2
|
1436
|
+
<string> <uint8>
|
1437
|
+
0 A 1
|
1438
|
+
1 B 2
|
1439
|
+
2 C 3
|
1440
|
+
|
1441
|
+
other = DataFrame.new(
|
1442
|
+
KEY1: %w[A B D],
|
1443
|
+
KEY2: [1, 4, 5]
|
1444
|
+
)
|
1445
|
+
#=>
|
1446
|
+
#<RedAmber::DataFrame : 3 x 2 Vectors, 0x0000000000017034>
|
1447
|
+
KEY1 KEY2
|
1448
|
+
<string> <uint8>
|
1449
|
+
0 A 1
|
1450
|
+
1 B 4
|
1451
|
+
2 D 5
|
1452
|
+
```
|
1453
|
+
|
1454
|
+
##### `intersect(other)`
|
1455
|
+
|
1456
|
+
Select records appearing in both self and other.
|
1457
|
+
|
1458
|
+
```ruby
|
1459
|
+
df.intersect(other)
|
1460
|
+
#=>
|
1461
|
+
#<RedAmber::DataFrame : 1 x 2 Vectors, 0x0000000000029fcc>
|
1462
|
+
KEY1 KEY2
|
1463
|
+
<string> <uint8>
|
1464
|
+
0 A 1
|
1465
|
+
```
|
1466
|
+
|
1467
|
+
##### `union(other)`
|
1468
|
+
|
1469
|
+
Select records appearing in self or other.
|
1470
|
+
|
1471
|
+
```ruby
|
1472
|
+
df.union(other)
|
1473
|
+
#=>
|
1474
|
+
#<RedAmber::DataFrame : 5 x 2 Vectors, 0x0000000000029fcc>
|
1475
|
+
KEY1 KEY2
|
1476
|
+
<string> <uint8>
|
1477
|
+
0 A 1
|
1478
|
+
1 B 2
|
1479
|
+
2 C 3
|
1480
|
+
3 B 4
|
1481
|
+
4 D 5
|
1482
|
+
```
|
1483
|
+
|
1484
|
+
##### `difference(other)`
|
1485
|
+
|
1486
|
+
Select records appearing in self but not in other.
|
1487
|
+
|
1488
|
+
It has an alias `setdiff`.
|
1489
|
+
|
1490
|
+
```ruby
|
1491
|
+
df.difference(other)
|
1492
|
+
#=>
|
1493
|
+
#<RedAmber::DataFrame : 1 x 2 Vectors, 0x0000000000029fcc>
|
1494
|
+
KEY1 KEY2
|
1495
|
+
<string> <uint8>
|
1496
|
+
1 B 2
|
1497
|
+
2 C 3
|
1498
|
+
```
|
1499
|
+
|
1500
|
+
## Binding
|
1501
|
+
|
1502
|
+
### `concatenate(other)`
|
1503
|
+
|
1504
|
+
Concatenate another DataFrame or Table onto the bottom of self. The shape and data type of other must be the same as self.
|
1505
|
+
|
1506
|
+
The alias is `concat`.
|
1507
|
+
|
1508
|
+
An array of DataFrames or Tables is also acceptable as other.
|
1509
|
+
|
1510
|
+
```ruby
|
1511
|
+
df
|
1512
|
+
#=>
|
1513
|
+
#<RedAmber::DataFrame : 2 x 2 Vectors, 0x0000000000022cb8>
|
1514
|
+
x y
|
1515
|
+
<uint8> <string>
|
1516
|
+
0 1 A
|
1517
|
+
1 2 B
|
1518
|
+
|
1519
|
+
other
|
1520
|
+
#=>
|
1521
|
+
#<RedAmber::DataFrame : 2 x 2 Vectors, 0x000000000001f6d0>
|
1522
|
+
x y
|
1523
|
+
<uint8> <string>
|
1524
|
+
0 3 C
|
1525
|
+
1 4 D
|
1526
|
+
|
1527
|
+
df.concatenate(other)
|
1528
|
+
#=>
|
1529
|
+
#<RedAmber::DataFrame : 4 x 2 Vectors, 0x0000000000022574>
|
1530
|
+
x y
|
1531
|
+
<uint8> <string>
|
1532
|
+
0 1 A
|
1533
|
+
1 2 B
|
1534
|
+
2 3 C
|
1535
|
+
3 4 D
|
1536
|
+
```
|
1537
|
+
|
1538
|
+
### `merge(other)`
|
1539
|
+
|
1540
|
+
Concatenate another DataFrame or Table onto the bottom of self. The shape and data type of other must be the same as self.
|
1541
|
+
|
1542
|
+
```ruby
|
1543
|
+
df
|
1544
|
+
#=>
|
1545
|
+
#<RedAmber::DataFrame : 2 x 2 Vectors, 0x0000000000009150>
|
1546
|
+
x y
|
1547
|
+
<uint8> <uint8>
|
1548
|
+
0 1 3
|
1549
|
+
1 2 4
|
1550
|
+
|
1551
|
+
other
|
1552
|
+
#=>
|
1553
|
+
#<RedAmber::DataFrame : 2 x 2 Vectors, 0x0000000000008a0c>
|
1554
|
+
a b
|
1555
|
+
<string> <string>
|
1556
|
+
0 A C
|
1557
|
+
1 B D
|
1558
|
+
|
1559
|
+
df.merge(other)
|
1560
|
+
#=>
|
1561
|
+
#<RedAmber::DataFrame : 2 x 4 Vectors, 0x000000000000cb70>
|
1562
|
+
x y a b
|
1563
|
+
<uint8> <uint8> <string> <string>
|
1564
|
+
0 1 3 A C
|
1565
|
+
1 2 4 B D
|
1566
|
+
```
|
1288
1567
|
|
1289
1568
|
## Encoding
|
1290
1569
|
|
data/doc/Vector.md
CHANGED
@@ -24,6 +24,9 @@ Class `RedAmber::Vector` represents a series of data in the DataFrame.
|
|
24
24
|
vector = Vector.new(1..3)
|
25
25
|
# or
|
26
26
|
vector = Vector.new(Arrow::Array.new([1, 2, 3])
|
27
|
+
# or
|
28
|
+
require 'arrow-numo-narray'
|
29
|
+
vector = Vector.new(Numo::Int8[1, 2, 3])
|
27
30
|
|
28
31
|
# =>
|
29
32
|
#<RedAmber::Vector(:uint8, size=3):0x000000000000f514>
|
Binary file
|
Binary file
|
Binary file
|