red_amber 0.1.7 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,4872 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "markdown",
5
- "id": "e355db8b-ebb6-4ea6-97b5-3b9fdadc302c",
6
- "metadata": {},
7
- "source": [
8
- "# 47 examples of Red Amber"
9
- ]
10
- },
11
- {
12
- "cell_type": "markdown",
13
- "id": "f20f4970-db38-4d96-9a36-d4cf9d007596",
14
- "metadata": {},
15
- "source": [
16
- "Last update: July 15, 2022 / RedAmber Version 0.1.7"
17
- ]
18
- },
19
- {
20
- "cell_type": "markdown",
21
- "id": "f6e927d0-b59a-4c4e-9f8a-4fa08f9a6b2f",
22
- "metadata": {},
23
- "source": [
24
- "## 1. Install"
25
- ]
26
- },
27
- {
28
- "cell_type": "markdown",
29
- "id": "85eacfe6-fa11-4749-844f-5914d6cd7dbc",
30
- "metadata": {},
31
- "source": [
32
- "Install requirements before you install Red Amber.\n",
33
- "\n",
34
- "- Apache Arrow GLib (>= 8.0.0)\n",
35
- "\n",
36
- "- Apache Parquet GLib (>= 8.0.0) # if you need IO from/to Parquet resource.\n",
37
- "\n",
38
- " See [Apache Arrow install document](https://arrow.apache.org/install/).\n",
39
- " \n",
40
- " Minimum installation example for the latest Ubuntu is in the ['Prepare the Apache Arrow' section in ci test](https://github.com/heronshoes/red_amber/blob/master/.github/workflows/test.yml) of Red Amber.\n",
41
- "\n",
42
- "Then add this line to your Gemfile:\n",
43
- "```\n",
44
- "gem 'red_amber'\n",
45
- "```\n",
46
- "\n",
47
- "And then execute:\n",
48
- "```\n",
49
- "$ bundle install\n",
50
- "```\n",
51
- "\n",
52
- "Or install it yourself as:\n",
53
- "```\n",
54
- "$ gem install red_amber\n",
55
- "```"
56
- ]
57
- },
58
- {
59
- "cell_type": "markdown",
60
- "id": "8c08c45d-0818-4b43-bc65-4d43dd8b6b66",
61
- "metadata": {},
62
- "source": [
63
- "## 2. Require"
64
- ]
65
- },
66
- {
67
- "cell_type": "code",
68
- "execution_count": 1,
69
- "id": "74b76022-03ea-40ae-bac8-fc8743659042",
70
- "metadata": {},
71
- "outputs": [
72
- {
73
- "data": {
74
- "text/plain": [
75
- "\"0.1.7\""
76
- ]
77
- },
78
- "execution_count": 1,
79
- "metadata": {},
80
- "output_type": "execute_result"
81
- }
82
- ],
83
- "source": [
84
- "require 'red_amber' # require 'red-amber' is also OK\n",
85
- "include RedAmber\n",
86
- "VERSION"
87
- ]
88
- },
89
- {
90
- "cell_type": "markdown",
91
- "id": "d8fb6289-39ea-4fa9-a165-b87ee6d125e9",
92
- "metadata": {
93
- "tags": []
94
- },
95
- "source": [
96
- "## 3. Initialize"
97
- ]
98
- },
99
- {
100
- "cell_type": "code",
101
- "execution_count": 2,
102
- "id": "51f81824-626a-4741-a29b-30ea357fe7b5",
103
- "metadata": {},
104
- "outputs": [
105
- {
106
- "data": {
107
- "text/html": [
108
- "RedAmber::DataFrame <3 x 2 vectors> <table><tr><th>x</th><th>y</th></tr><tr><td>1</td><td>A</td></tr><tr><td>2</td><td>B</td></tr><tr><td>3</td><td>C</td></tr></table>"
109
- ],
110
- "text/plain": [
111
- "#<RedAmber::DataFrame : 3 x 2 Vectors, 0x000000000000f85c>\n",
112
- " x y\n",
113
- " <uint8> <string>\n",
114
- "1 1 A\n",
115
- "2 2 B\n",
116
- "3 3 C\n"
117
- ]
118
- },
119
- "execution_count": 2,
120
- "metadata": {},
121
- "output_type": "execute_result"
122
- }
123
- ],
124
- "source": [
125
- "# From a Hash\n",
126
- "DataFrame.new(x: [1, 2, 3], y: %w[A B C])"
127
- ]
128
- },
129
- {
130
- "cell_type": "code",
131
- "execution_count": 3,
132
- "id": "20b696eb-c199-444d-a957-e0b1081f1506",
133
- "metadata": {},
134
- "outputs": [
135
- {
136
- "data": {
137
- "text/html": [
138
- "RedAmber::DataFrame <3 x 2 vectors> <table><tr><th>x</th><th>y</th></tr><tr><td>1</td><td>A</td></tr><tr><td>2</td><td>B</td></tr><tr><td>3</td><td>C</td></tr></table>"
139
- ],
140
- "text/plain": [
141
- "#<RedAmber::DataFrame : 3 x 2 Vectors, 0x000000000000f870>\n",
142
- " x y\n",
143
- " <uint8> <string>\n",
144
- "1 1 A\n",
145
- "2 2 B\n",
146
- "3 3 C\n"
147
- ]
148
- },
149
- "execution_count": 3,
150
- "metadata": {},
151
- "output_type": "execute_result"
152
- }
153
- ],
154
- "source": [
155
- "# From a schema and a column array\n",
156
- "DataFrame.new({ x: :uint8, y: :string }, [[1, 'A'], [2, 'B'], [3, 'C']])"
157
- ]
158
- },
159
- {
160
- "cell_type": "code",
161
- "execution_count": 4,
162
- "id": "21eab151-f977-4474-a6d1-576169e24b26",
163
- "metadata": {},
164
- "outputs": [
165
- {
166
- "data": {
167
- "text/html": [
168
- "RedAmber::DataFrame <3 x 2 vectors> <table><tr><th>x</th><th>y</th></tr><tr><td>1</td><td>A</td></tr><tr><td>2</td><td>B</td></tr><tr><td>3</td><td>C</td></tr></table>"
169
- ],
170
- "text/plain": [
171
- "#<RedAmber::DataFrame : 3 x 2 Vectors, 0x000000000000f884>\n",
172
- " x y\n",
173
- " <uint8> <string>\n",
174
- "1 1 A\n",
175
- "2 2 B\n",
176
- "3 3 C\n"
177
- ]
178
- },
179
- "execution_count": 4,
180
- "metadata": {},
181
- "output_type": "execute_result"
182
- }
183
- ],
184
- "source": [
185
- "# From a Arrow::Table\n",
186
- "table = Arrow::Table.new(x: [1, 2, 3], y: %w[A B C])\n",
187
- "DataFrame.new(table)"
188
- ]
189
- },
190
- {
191
- "cell_type": "code",
192
- "execution_count": 5,
193
- "id": "aa09d3da-f332-45cd-92ca-712c6a679035",
194
- "metadata": {},
195
- "outputs": [
196
- {
197
- "data": {
198
- "text/html": [
199
- "RedAmber::DataFrame <3 x 2 vectors> <table><tr><th>x</th><th>y</th></tr><tr><td>1</td><td>A</td></tr><tr><td>2</td><td>B</td></tr><tr><td>3</td><td>C</td></tr></table>"
200
- ],
201
- "text/plain": [
202
- "#<RedAmber::DataFrame : 3 x 2 Vectors, 0x000000000000f898>\n",
203
- " x y\n",
204
- " <uint8> <string>\n",
205
- "1 1 A\n",
206
- "2 2 B\n",
207
- "3 3 C\n"
208
- ]
209
- },
210
- "execution_count": 5,
211
- "metadata": {},
212
- "output_type": "execute_result"
213
- }
214
- ],
215
- "source": [
216
- "# From a Rover::DataFrame\n",
217
- "require 'rover'\n",
218
- "rover = Rover::DataFrame.new(x: [1, 2, 3], y: %w[A B C])\n",
219
- "DataFrame.new(rover)"
220
- ]
221
- },
222
- {
223
- "cell_type": "code",
224
- "execution_count": 6,
225
- "id": "cd2c3677-00fb-48fe-bb94-18bc0815db72",
226
- "metadata": {},
227
- "outputs": [
228
- {
229
- "data": {
230
- "text/html": [
231
- "RedAmber::DataFrame <344 x 8 vectors> <table><tr><th>species</th><th>island</th><th>bill_length_mm</th><th>bill_depth_mm</th><th>flipper_length_mm</th><th>body_mass_g</th><th>sex</th><th>year</th></tr><tr><td>Adelie</td><td>Torgersen</td><td>39.1</td><td>18.7</td><td>181</td><td>3750</td><td>male</td><td>2007</td></tr><tr><td>Adelie</td><td>Torgersen</td><td>39.5</td><td>17.4</td><td>186</td><td>3800</td><td>female</td><td>2007</td></tr><tr><td>Adelie</td><td>Torgersen</td><td>40.3</td><td>18.0</td><td>195</td><td>3250</td><td>female</td><td>2007</td></tr><tr><td>Adelie</td><td>Torgersen</td><td><i>(nil)</i></td><td><i>(nil)</i></td><td><i>(nil)</i></td><td><i>(nil)</i></td><td><i>(nil)</i></td><td>2007</td></tr><tr><td colspan='8'>&#8942;</td></tr><tr><td>Gentoo</td><td>Biscoe</td><td>50.4</td><td>15.7</td><td>222</td><td>5750</td><td>male</td><td>2009</td></tr><tr><td>Gentoo</td><td>Biscoe</td><td>45.2</td><td>14.8</td><td>212</td><td>5200</td><td>female</td><td>2009</td></tr><tr><td>Gentoo</td><td>Biscoe</td><td>49.9</td><td>16.1</td><td>213</td><td>5400</td><td>male</td><td>2009</td></tr></table>"
232
- ],
233
- "text/plain": [
234
- "#<RedAmber::DataFrame : 344 x 8 Vectors, 0x000000000000f8ac>\n",
235
- " species island bill_length_mm bill_depth_mm flipper_length_mm ... year\n",
236
- " <string> <string> <double> <double> <uint8> ... <uint16>\n",
237
- " 1 Adelie Torgersen 39.1 18.7 181 ... 2007\n",
238
- " 2 Adelie Torgersen 39.5 17.4 186 ... 2007\n",
239
- " 3 Adelie Torgersen 40.3 18.0 195 ... 2007\n",
240
- " 4 Adelie Torgersen (nil) (nil) (nil) ... 2007\n",
241
- " 5 Adelie Torgersen 36.7 19.3 193 ... 2007\n",
242
- " : : : : : : ... :\n",
243
- "342 Gentoo Biscoe 50.4 15.7 222 ... 2009\n",
244
- "343 Gentoo Biscoe 45.2 14.8 212 ... 2009\n",
245
- "344 Gentoo Biscoe 49.9 16.1 213 ... 2009\n"
246
- ]
247
- },
248
- "execution_count": 6,
249
- "metadata": {},
250
- "output_type": "execute_result"
251
- }
252
- ],
253
- "source": [
254
- "# from a red-datasets\n",
255
- "require 'datasets-arrow'\n",
256
- "dataset = Datasets::Penguins.new\n",
257
- "penguins = DataFrame.new(dataset.to_arrow)"
258
- ]
259
- },
260
- {
261
- "cell_type": "markdown",
262
- "id": "3a2d12b4-7623-42c7-9e32-76cf303c7cea",
263
- "metadata": {},
264
- "source": [
265
- "It should be in future version;\n",
266
- "```ruby\n",
267
- "require 'datasets-red-amber'\n",
268
- "penguins = Datasets::Penguins.new.to_red_amber\n",
269
- "```"
270
- ]
271
- },
272
- {
273
- "cell_type": "code",
274
- "execution_count": 7,
275
- "id": "2e4619b7-bf6d-4081-9066-b186da8fdf5b",
276
- "metadata": {},
277
- "outputs": [
278
- {
279
- "data": {
280
- "text/html": [
281
- "RedAmber::DataFrame <32 x 11 vectors> <table><tr><th>mpg</th><th>cyl</th><th>disp</th><th>hp</th><th>drat</th><th>wt</th><th>qsec</th><th>vs</th><th>am</th><th>gear</th><th>carb</th></tr><tr><td>21.0</td><td>6</td><td>160.0</td><td>110</td><td>3.9</td><td>2.62</td><td>16.46</td><td>0</td><td>1</td><td>4</td><td>4</td></tr><tr><td>21.0</td><td>6</td><td>160.0</td><td>110</td><td>3.9</td><td>2.875</td><td>17.02</td><td>0</td><td>1</td><td>4</td><td>4</td></tr><tr><td>22.8</td><td>4</td><td>108.0</td><td>93</td><td>3.85</td><td>2.32</td><td>18.61</td><td>1</td><td>1</td><td>4</td><td>1</td></tr><tr><td>21.4</td><td>6</td><td>258.0</td><td>110</td><td>3.08</td><td>3.215</td><td>19.44</td><td>1</td><td>0</td><td>3</td><td>1</td></tr><tr><td colspan='11'>&#8942;</td></tr><tr><td>19.7</td><td>6</td><td>145.0</td><td>175</td><td>3.62</td><td>2.77</td><td>15.5</td><td>0</td><td>1</td><td>5</td><td>6</td></tr><tr><td>15.0</td><td>8</td><td>301.0</td><td>335</td><td>3.54</td><td>3.57</td><td>14.6</td><td>0</td><td>1</td><td>5</td><td>8</td></tr><tr><td>21.4</td><td>4</td><td>121.0</td><td>109</td><td>4.11</td><td>2.78</td><td>18.6</td><td>1</td><td>1</td><td>4</td><td>2</td></tr></table>"
282
- ],
283
- "text/plain": [
284
- "#<RedAmber::DataFrame : 32 x 11 Vectors, 0x000000000000f8c0>\n",
285
- " mpg cyl disp hp drat wt qsec vs am ... carb\n",
286
- " <double> <uint8> <double> <uint16> <double> <double> <double> <uint8> <uint8> ... <uint8>\n",
287
- " 1 21.0 6 160.0 110 3.9 2.6 16.5 0 1 ... 4\n",
288
- " 2 21.0 6 160.0 110 3.9 2.9 17.0 0 1 ... 4\n",
289
- " 3 22.8 4 108.0 93 3.9 2.3 18.6 1 1 ... 1\n",
290
- " 4 21.4 6 258.0 110 3.1 3.2 19.4 1 0 ... 1\n",
291
- " 5 18.7 8 360.0 175 3.2 3.4 17.0 0 0 ... 2\n",
292
- " : : : : : : : : : : ... :\n",
293
- "30 19.7 6 145.0 175 3.6 2.8 15.5 0 1 ... 6\n",
294
- "31 15.0 8 301.0 335 3.5 3.6 14.6 0 1 ... 8\n",
295
- "32 21.4 4 121.0 109 4.1 2.8 18.6 1 1 ... 2\n"
296
- ]
297
- },
298
- "execution_count": 7,
299
- "metadata": {},
300
- "output_type": "execute_result"
301
- }
302
- ],
303
- "source": [
304
- "dataset = Datasets::Rdatasets.new('datasets', 'mtcars')\n",
305
- "mtcars = DataFrame.new(dataset.to_arrow)"
306
- ]
307
- },
308
- {
309
- "cell_type": "markdown",
310
- "id": "e1f77a54-3a43-4d17-bb6f-332ef13832a3",
311
- "metadata": {},
312
- "source": [
313
- "## 4. Load"
314
- ]
315
- },
316
- {
317
- "cell_type": "markdown",
318
- "id": "0fed4f43-3fbb-43e5-af0d-f93401deea78",
319
- "metadata": {},
320
- "source": [
321
- "`RedAmber::DataFrame` delegates `#load` to `Arrow::Table#load`. We can load from `[.arrow, .arrows, .csv, .csv.gz, .tsv]` files."
322
- ]
323
- },
324
- {
325
- "cell_type": "code",
326
- "execution_count": 8,
327
- "id": "4203e671-0a0a-405c-8482-53a8cd78a891",
328
- "metadata": {},
329
- "outputs": [
330
- {
331
- "data": {
332
- "text/html": [
333
- "RedAmber::DataFrame <3 x 2 vectors> <table><tr><th>name</th><th>age</th></tr><tr><td>Yasuko</td><td>68</td></tr><tr><td>Rui</td><td>49</td></tr><tr><td>Hinata</td><td>28</td></tr></table>"
334
- ],
335
- "text/plain": [
336
- "#<RedAmber::DataFrame : 3 x 2 Vectors, 0x000000000000f8d4>\n",
337
- " name age\n",
338
- " <string> <int64>\n",
339
- "1 Yasuko 68\n",
340
- "2 Rui 49\n",
341
- "3 Hinata 28\n"
342
- ]
343
- },
344
- "execution_count": 8,
345
- "metadata": {},
346
- "output_type": "execute_result"
347
- }
348
- ],
349
- "source": [
350
- "DataFrame.load(\"test/entity/with_header.csv\")"
351
- ]
352
- },
353
- {
354
- "cell_type": "markdown",
355
- "id": "29875147-1371-4575-a565-69c3534c15f2",
356
- "metadata": {},
357
- "source": [
358
- "## 5. Load from a URI"
359
- ]
360
- },
361
- {
362
- "cell_type": "code",
363
- "execution_count": 9,
364
- "id": "916b86e2-e3a2-4ebb-8770-9e8a29c46523",
365
- "metadata": {},
366
- "outputs": [
367
- {
368
- "data": {
369
- "text/html": [
370
- "RedAmber::DataFrame <344 x 7 vectors> <table><tr><th>species</th><th>island</th><th>bill_length_mm</th><th>bill_depth_mm</th><th>flipper_length_mm</th><th>body_mass_g</th><th>sex</th></tr><tr><td>Adelie</td><td>Torgersen</td><td>39.1</td><td>18.7</td><td>181</td><td>3750</td><td>MALE</td></tr><tr><td>Adelie</td><td>Torgersen</td><td>39.5</td><td>17.4</td><td>186</td><td>3800</td><td>FEMALE</td></tr><tr><td>Adelie</td><td>Torgersen</td><td>40.3</td><td>18.0</td><td>195</td><td>3250</td><td>FEMALE</td></tr><tr><td>Adelie</td><td>Torgersen</td><td><i>(nil)</i></td><td><i>(nil)</i></td><td><i>(nil)</i></td><td><i>(nil)</i></td><td></td></tr><tr><td colspan='7'>&#8942;</td></tr><tr><td>Gentoo</td><td>Biscoe</td><td>50.4</td><td>15.7</td><td>222</td><td>5750</td><td>MALE</td></tr><tr><td>Gentoo</td><td>Biscoe</td><td>45.2</td><td>14.8</td><td>212</td><td>5200</td><td>FEMALE</td></tr><tr><td>Gentoo</td><td>Biscoe</td><td>49.9</td><td>16.1</td><td>213</td><td>5400</td><td>MALE</td></tr></table>"
371
- ],
372
- "text/plain": [
373
- "#<RedAmber::DataFrame : 344 x 7 Vectors, 0x000000000000f8e8>\n",
374
- " species island bill_length_mm bill_depth_mm flipper_length_mm ... sex\n",
375
- " <string> <string> <double> <double> <int64> ... <string>\n",
376
- " 1 Adelie Torgersen 39.1 18.7 181 ... MALE\n",
377
- " 2 Adelie Torgersen 39.5 17.4 186 ... FEMALE\n",
378
- " 3 Adelie Torgersen 40.3 18.0 195 ... FEMALE\n",
379
- " 4 Adelie Torgersen (nil) (nil) (nil) ...\n",
380
- " 5 Adelie Torgersen 36.7 19.3 193 ... FEMALE\n",
381
- " : : : : : : ... :\n",
382
- "342 Gentoo Biscoe 50.4 15.7 222 ... MALE\n",
383
- "343 Gentoo Biscoe 45.2 14.8 212 ... FEMALE\n",
384
- "344 Gentoo Biscoe 49.9 16.1 213 ... MALE\n"
385
- ]
386
- },
387
- "execution_count": 9,
388
- "metadata": {},
389
- "output_type": "execute_result"
390
- }
391
- ],
392
- "source": [
393
- "uri = URI(\"https://raw.githubusercontent.com/mwaskom/seaborn-data/master/penguins.csv\")\n",
394
- "DataFrame.load(uri)"
395
- ]
396
- },
397
- {
398
- "cell_type": "markdown",
399
- "id": "e6abe64d-e97f-437e-9c54-18f9e06e9668",
400
- "metadata": {},
401
- "source": [
402
- "## 6. Save"
403
- ]
404
- },
405
- {
406
- "cell_type": "code",
407
- "execution_count": 10,
408
- "id": "91c0fb62-7990-47f1-9fb6-b0529bc1783f",
409
- "metadata": {},
410
- "outputs": [
411
- {
412
- "data": {
413
- "text/plain": [
414
- "true"
415
- ]
416
- },
417
- "execution_count": 10,
418
- "metadata": {},
419
- "output_type": "execute_result"
420
- }
421
- ],
422
- "source": [
423
- "penguins.save(\"file.arrow\")\n",
424
- "penguins.save(\"file.arrows\")\n",
425
- "penguins.save(\"file.csv\")\n",
426
- "penguins.save(\"file.csv.gz\")\n",
427
- "penguins.save(\"file.tsv\")\n",
428
- "penguins.save(\"file.feather\")"
429
- ]
430
- },
431
- {
432
- "cell_type": "markdown",
433
- "id": "d1d30973-9e2f-406a-9f42-9e6e4c966baf",
434
- "metadata": {},
435
- "source": [
436
- "## 7. to_s/inspect"
437
- ]
438
- },
439
- {
440
- "cell_type": "markdown",
441
- "id": "a7bc9cb7-eae4-495f-831e-b747e486d0bd",
442
- "metadata": {},
443
- "source": [
444
- "`to_s` or `inspect` (it uses to_s inside) shows a preview of the dataframe.\n",
445
- "\n",
446
- "It shows first 5 and last 3 rows if it has many rows. Columns are also omitted if line is exceeded 80 letters."
447
- ]
448
- },
449
- {
450
- "cell_type": "code",
451
- "execution_count": 11,
452
- "id": "af6d29ef-2e1c-4a08-a8b2-d69acda79ec5",
453
- "metadata": {},
454
- "outputs": [
455
- {
456
- "name": "stdout",
457
- "output_type": "stream",
458
- "text": [
459
- "#<RedAmber::DataFrame : 5 x 4 Vectors, 0x000000000000f8fc>\n",
460
- " x y s b\n",
461
- " <uint8> <double> <string> <boolean>\n",
462
- "1 1 1.0 A true\n",
463
- "2 2 2.0 B false\n",
464
- "3 3 3.0 C true\n",
465
- "4 4 NaN D false\n",
466
- "5 5 (nil) (nil) (nil)\n",
467
- "\n"
468
- ]
469
- }
470
- ],
471
- "source": [
472
- "df = DataFrame.new(\n",
473
- " x: [1, 2, 3, 4, 5],\n",
474
- " y: [1, 2, 3, 0/0.0, nil],\n",
475
- " s: %w[A B C D] << nil,\n",
476
- " b: [true, false, true, false, nil])\n",
477
- "p df; nil"
478
- ]
479
- },
480
- {
481
- "cell_type": "code",
482
- "execution_count": 12,
483
- "id": "cdff2e60-bd0a-4d12-b348-201a49bbbbbe",
484
- "metadata": {},
485
- "outputs": [
486
- {
487
- "name": "stdout",
488
- "output_type": "stream",
489
- "text": [
490
- "#<RedAmber::DataFrame : 344 x 8 Vectors, 0x000000000000f8ac>\n",
491
- " species island bill_length_mm bill_depth_mm flipper_length_mm ... year\n",
492
- " <string> <string> <double> <double> <uint8> ... <uint16>\n",
493
- " 1 Adelie Torgersen 39.1 18.7 181 ... 2007\n",
494
- " 2 Adelie Torgersen 39.5 17.4 186 ... 2007\n",
495
- " 3 Adelie Torgersen 40.3 18.0 195 ... 2007\n",
496
- " 4 Adelie Torgersen (nil) (nil) (nil) ... 2007\n",
497
- " 5 Adelie Torgersen 36.7 19.3 193 ... 2007\n",
498
- " : : : : : : ... :\n",
499
- "342 Gentoo Biscoe 50.4 15.7 222 ... 2009\n",
500
- "343 Gentoo Biscoe 45.2 14.8 212 ... 2009\n",
501
- "344 Gentoo Biscoe 49.9 16.1 213 ... 2009\n",
502
- "\n"
503
- ]
504
- }
505
- ],
506
- "source": [
507
- "p penguins; nil"
508
- ]
509
- },
510
- {
511
- "cell_type": "markdown",
512
- "id": "cb44df38-58f7-479c-b7a4-c9c305639292",
513
- "metadata": {},
514
- "source": [
515
- "## 8. Show table"
516
- ]
517
- },
518
- {
519
- "cell_type": "code",
520
- "execution_count": 13,
521
- "id": "fc710035-8134-4b18-89fe-8c58b95e0e0e",
522
- "metadata": {},
523
- "outputs": [
524
- {
525
- "data": {
526
- "text/plain": [
527
- "#<Arrow::Table:0x113637c20 ptr=0x7fcc504bb870>\n",
528
- "\tx\t y\ts\tb\n",
529
- "0\t1\t 1.000000\tA\ttrue\n",
530
- "1\t2\t 2.000000\tB\tfalse\n",
531
- "2\t3\t 3.000000\tC\ttrue\n",
532
- "3\t4\t NaN\tD\tfalse\n",
533
- "4\t5\t (null)\t(null)\t(null)\n"
534
- ]
535
- },
536
- "execution_count": 13,
537
- "metadata": {},
538
- "output_type": "execute_result"
539
- }
540
- ],
541
- "source": [
542
- "df.table"
543
- ]
544
- },
545
- {
546
- "cell_type": "code",
547
- "execution_count": 14,
548
- "id": "2634fb7b-194f-4277-94ba-05f39c497ffa",
549
- "metadata": {},
550
- "outputs": [
551
- {
552
- "data": {
553
- "text/plain": [
554
- "#<Arrow::Table:0x10fcb7c20 ptr=0x7fcc5057dc70>\n",
555
- "\tspecies\tisland\tbill_length_mm\tbill_depth_mm\tflipper_length_mm\tbody_mass_g\tsex\tyear\n",
556
- " 0\tAdelie \tTorgersen\t 39.100000\t 18.700000\t 181\t 3750\tmale\t2007\n",
557
- " 1\tAdelie \tTorgersen\t 39.500000\t 17.400000\t 186\t 3800\tfemale\t2007\n",
558
- " 2\tAdelie \tTorgersen\t 40.300000\t 18.000000\t 195\t 3250\tfemale\t2007\n",
559
- " 3\tAdelie \tTorgersen\t (null)\t (null)\t (null)\t (null)\t(null)\t2007\n",
560
- " 4\tAdelie \tTorgersen\t 36.700000\t 19.300000\t 193\t 3450\tfemale\t2007\n",
561
- " 5\tAdelie \tTorgersen\t 39.300000\t 20.600000\t 190\t 3650\tmale\t2007\n",
562
- " 6\tAdelie \tTorgersen\t 38.900000\t 17.800000\t 181\t 3625\tfemale\t2007\n",
563
- " 7\tAdelie \tTorgersen\t 39.200000\t 19.600000\t 195\t 4675\tmale\t2007\n",
564
- " 8\tAdelie \tTorgersen\t 34.100000\t 18.100000\t 193\t 3475\t(null)\t2007\n",
565
- " 9\tAdelie \tTorgersen\t 42.000000\t 20.200000\t 190\t 4250\t(null)\t2007\n",
566
- "...\n",
567
- "334\tGentoo \tBiscoe\t 46.200000\t 14.100000\t 217\t 4375\tfemale\t2009\n",
568
- "335\tGentoo \tBiscoe\t 55.100000\t 16.000000\t 230\t 5850\tmale\t2009\n",
569
- "336\tGentoo \tBiscoe\t 44.500000\t 15.700000\t 217\t 4875\t(null)\t2009\n",
570
- "337\tGentoo \tBiscoe\t 48.800000\t 16.200000\t 222\t 6000\tmale\t2009\n",
571
- "338\tGentoo \tBiscoe\t 47.200000\t 13.700000\t 214\t 4925\tfemale\t2009\n",
572
- "339\tGentoo \tBiscoe\t (null)\t (null)\t (null)\t (null)\t(null)\t2009\n",
573
- "340\tGentoo \tBiscoe\t 46.800000\t 14.300000\t 215\t 4850\tfemale\t2009\n",
574
- "341\tGentoo \tBiscoe\t 50.400000\t 15.700000\t 222\t 5750\tmale\t2009\n",
575
- "342\tGentoo \tBiscoe\t 45.200000\t 14.800000\t 212\t 5200\tfemale\t2009\n",
576
- "343\tGentoo \tBiscoe\t 49.900000\t 16.100000\t 213\t 5400\tmale\t2009\n"
577
- ]
578
- },
579
- "execution_count": 14,
580
- "metadata": {},
581
- "output_type": "execute_result"
582
- }
583
- ],
584
- "source": [
585
- "penguins.table"
586
- ]
587
- },
588
- {
589
- "cell_type": "code",
590
- "execution_count": 15,
591
- "id": "9dba2a67-ede7-4663-907b-9b2dd5db1605",
592
- "metadata": {},
593
- "outputs": [
594
- {
595
- "name": "stdout",
596
- "output_type": "stream",
597
- "text": [
598
- "x: uint8\n",
599
- "y: double\n",
600
- "s: string\n",
601
- "b: bool\n",
602
- "----\n",
603
- "x:\n",
604
- " [\n",
605
- " [\n",
606
- " 1,\n",
607
- " 2,\n",
608
- " 3,\n",
609
- " 4,\n",
610
- " 5\n",
611
- " ]\n",
612
- " ]\n",
613
- "y:\n",
614
- " [\n",
615
- " [\n",
616
- " 1,\n",
617
- " 2,\n",
618
- " 3,\n",
619
- " nan,\n",
620
- " null\n",
621
- " ]\n",
622
- " ]\n",
623
- "s:\n",
624
- " [\n",
625
- " [\n",
626
- " \"A\",\n",
627
- " \"B\",\n",
628
- " \"C\",\n",
629
- " \"D\",\n",
630
- " null\n",
631
- " ]\n",
632
- " ]\n",
633
- "b:\n",
634
- " [\n",
635
- " [\n",
636
- " true,\n",
637
- " false,\n",
638
- " true,\n",
639
- " false,\n",
640
- " null\n",
641
- " ]\n",
642
- " ]\n"
643
- ]
644
- }
645
- ],
646
- "source": [
647
- "# This is a Red Arrow's feature\n",
648
- "puts df.table.to_s(format: :column)"
649
- ]
650
- },
651
- {
652
- "cell_type": "code",
653
- "execution_count": 16,
654
- "id": "d1cc17b8-1cfc-4986-9dec-7bca02be32f0",
655
- "metadata": {},
656
- "outputs": [
657
- {
658
- "name": "stdout",
659
- "output_type": "stream",
660
- "text": [
661
- "==================== 0 ====================\n",
662
- "x: 1\n",
663
- "y: 1.000000\n",
664
- "s: A\n",
665
- "b: true\n",
666
- "==================== 1 ====================\n",
667
- "x: 2\n",
668
- "y: 2.000000\n",
669
- "s: B\n",
670
- "b: false\n",
671
- "==================== 2 ====================\n",
672
- "x: 3\n",
673
- "y: 3.000000\n",
674
- "s: C\n",
675
- "b: true\n",
676
- "==================== 3 ====================\n",
677
- "x: 4\n",
678
- "y: NaN\n",
679
- "s: D\n",
680
- "b: false\n",
681
- "==================== 4 ====================\n",
682
- "x: 5\n",
683
- "y: (null)\n",
684
- "s: (null)\n",
685
- "b: (null)\n"
686
- ]
687
- }
688
- ],
689
- "source": [
690
- "# This is also a Red Arrow's feature\n",
691
- "puts df.table.to_s(format: :list)"
692
- ]
693
- },
694
- {
695
- "cell_type": "markdown",
696
- "id": "16e4ae6b-2399-43f0-be8e-65669b95c7b6",
697
- "metadata": {},
698
- "source": [
699
- "## 9. TDR"
700
- ]
701
- },
702
- {
703
- "cell_type": "markdown",
704
- "id": "2d14eb4b-9026-4cc5-a71a-598946d40b67",
705
- "metadata": {},
706
- "source": [
707
- "TDR means 'Transposed Dataframe Representation'. It shows columns in lateral just the same shape as initializing by a Hash. TDR has some information which is useful for the exploratory data processing.\n",
708
- "\n",
709
- "- DataFrame shape: n_rows x n_columns\n",
710
- "- Data types\n",
711
- "- Levels: number of unique elements\n",
712
- "- Data preview: same data is aggregated if level is smaller (tally mode)\n",
713
- "- Show counts of abnormal element: NaN and nil"
714
- ]
715
- },
716
- {
717
- "cell_type": "code",
718
- "execution_count": 17,
719
- "id": "8050462f-7c60-41b7-a011-af11763784dc",
720
- "metadata": {},
721
- "outputs": [
722
- {
723
- "name": "stdout",
724
- "output_type": "stream",
725
- "text": [
726
- "RedAmber::DataFrame : 5 x 4 Vectors\n",
727
- "Vectors : 2 numeric, 1 string, 1 boolean\n",
728
- "# key type level data_preview\n",
729
- "1 :x uint8 5 [1, 2, 3, 4, 5]\n",
730
- "2 :y double 5 [1.0, 2.0, 3.0, NaN, nil], 1 NaN, 1 nil\n",
731
- "3 :s string 5 [\"A\", \"B\", \"C\", \"D\", nil], 1 nil\n",
732
- "4 :b boolean 3 {true=>2, false=>2, nil=>1}\n"
733
- ]
734
- }
735
- ],
736
- "source": [
737
- "# use the same dataframe as #7\n",
738
- "df.tdr"
739
- ]
740
- },
741
- {
742
- "cell_type": "code",
743
- "execution_count": 18,
744
- "id": "bb616ffe-c19a-4b02-a011-601ceb3db656",
745
- "metadata": {},
746
- "outputs": [
747
- {
748
- "name": "stdout",
749
- "output_type": "stream",
750
- "text": [
751
- "RedAmber::DataFrame : 344 x 8 Vectors\n",
752
- "Vectors : 5 numeric, 3 strings\n",
753
- "# key type level data_preview\n",
754
- "1 :species string 3 {\"Adelie\"=>152, \"Chinstrap\"=>68, \"Gentoo\"=>124}\n",
755
- "2 :island string 3 {\"Torgersen\"=>52, \"Biscoe\"=>168, \"Dream\"=>124}\n",
756
- "3 :bill_length_mm double 165 [39.1, 39.5, 40.3, nil, 36.7, ... ], 2 nils\n",
757
- "4 :bill_depth_mm double 81 [18.7, 17.4, 18.0, nil, 19.3, ... ], 2 nils\n",
758
- "5 :flipper_length_mm uint8 56 [181, 186, 195, nil, 193, ... ], 2 nils\n",
759
- "6 :body_mass_g uint16 95 [3750, 3800, 3250, nil, 3450, ... ], 2 nils\n",
760
- "7 :sex string 3 {\"male\"=>168, \"female\"=>165, nil=>11}\n",
761
- "8 :year uint16 3 {2007=>110, 2008=>114, 2009=>120}\n"
762
- ]
763
- }
764
- ],
765
- "source": [
766
- "penguins.tdr"
767
- ]
768
- },
769
- {
770
- "cell_type": "markdown",
771
- "id": "73b8dc18-079f-4d40-8d0e-239f010550da",
772
- "metadata": {},
773
- "source": [
774
- "`#tdr` has some options:\n",
775
- "\n",
776
- "`limit` : to limit a number of variables to show. Default value is `limit=10`."
777
- ]
778
- },
779
- {
780
- "cell_type": "code",
781
- "execution_count": 19,
782
- "id": "0962845d-e642-4d2a-9607-43e197b46bc5",
783
- "metadata": {},
784
- "outputs": [
785
- {
786
- "name": "stdout",
787
- "output_type": "stream",
788
- "text": [
789
- "RedAmber::DataFrame : 344 x 8 Vectors\n",
790
- "Vectors : 5 numeric, 3 strings\n",
791
- "# key type level data_preview\n",
792
- "1 :species string 3 {\"Adelie\"=>152, \"Chinstrap\"=>68, \"Gentoo\"=>124}\n",
793
- "2 :island string 3 {\"Torgersen\"=>52, \"Biscoe\"=>168, \"Dream\"=>124}\n",
794
- "3 :bill_length_mm double 165 [39.1, 39.5, 40.3, nil, 36.7, ... ], 2 nils\n",
795
- " ... 5 more Vectors ...\n"
796
- ]
797
- }
798
- ],
799
- "source": [
800
- "penguins.tdr(3)"
801
- ]
802
- },
803
- {
804
- "cell_type": "markdown",
805
- "id": "573606c4-23b9-4b38-8c92-a04f1c1e8781",
806
- "metadata": {},
807
- "source": [
808
- "`elements` : max number of elements to show in observations. Default value is `elements: 5`."
809
- ]
810
- },
811
- {
812
- "cell_type": "code",
813
- "execution_count": 20,
814
- "id": "f957d2bd-e8c0-42a1-a3b4-0a9478e740bf",
815
- "metadata": {},
816
- "outputs": [
817
- {
818
- "name": "stdout",
819
- "output_type": "stream",
820
- "text": [
821
- "RedAmber::DataFrame : 344 x 8 Vectors\n",
822
- "Vectors : 5 numeric, 3 strings\n",
823
- "# key type level data_preview\n",
824
- "1 :species string 3 {\"Adelie\"=>152, \"Chinstrap\"=>68, \"Gentoo\"=>124}\n",
825
- "2 :island string 3 {\"Torgersen\"=>52, \"Biscoe\"=>168, \"Dream\"=>124}\n",
826
- "3 :bill_length_mm double 165 [39.1, 39.5, 40.3, ... ], 2 nils\n",
827
- "4 :bill_depth_mm double 81 [18.7, 17.4, 18.0, ... ], 2 nils\n",
828
- "5 :flipper_length_mm uint8 56 [181, 186, 195, ... ], 2 nils\n",
829
- "6 :body_mass_g uint16 95 [3750, 3800, 3250, ... ], 2 nils\n",
830
- "7 :sex string 3 {\"male\"=>168, \"female\"=>165, nil=>11}\n",
831
- "8 :year uint16 3 {2007=>110, 2008=>114, 2009=>120}\n"
832
- ]
833
- }
834
- ],
835
- "source": [
836
- "penguins.tdr(elements: 3) # Show first 3 items in data"
837
- ]
838
- },
839
- {
840
- "cell_type": "markdown",
841
- "id": "d37ece79-1999-49eb-a2d1-831184ee6509",
842
- "metadata": {},
843
- "source": [
844
- "`tally` : max level to use tally mode. Level means size of `tally`ed hash. Default value is `tally: 5`."
845
- ]
846
- },
847
- {
848
- "cell_type": "code",
849
- "execution_count": 21,
850
- "id": "9c1c472c-3d15-4bca-9a1b-7f86c63d3ed8",
851
- "metadata": {},
852
- "outputs": [
853
- {
854
- "name": "stdout",
855
- "output_type": "stream",
856
- "text": [
857
- "RedAmber::DataFrame : 344 x 8 Vectors\n",
858
- "Vectors : 5 numeric, 3 strings\n",
859
- "# key type level data_preview\n",
860
- "1 :species string 3 [\"Adelie\", \"Adelie\", \"Adelie\", \"Adelie\", \"Adelie\", ... ]\n",
861
- "2 :island string 3 [\"Torgersen\", \"Torgersen\", \"Torgersen\", \"Torgersen\", \"Torgersen\", ... ]\n",
862
- "3 :bill_length_mm double 165 [39.1, 39.5, 40.3, nil, 36.7, ... ], 2 nils\n",
863
- "4 :bill_depth_mm double 81 [18.7, 17.4, 18.0, nil, 19.3, ... ], 2 nils\n",
864
- "5 :flipper_length_mm uint8 56 [181, 186, 195, nil, 193, ... ], 2 nils\n",
865
- "6 :body_mass_g uint16 95 [3750, 3800, 3250, nil, 3450, ... ], 2 nils\n",
866
- "7 :sex string 3 [\"male\", \"female\", \"female\", nil, \"female\", ... ], 11 nils\n",
867
- "8 :year uint16 3 [2007, 2007, 2007, 2007, 2007, ... ]\n"
868
- ]
869
- }
870
- ],
871
- "source": [
872
- "penguins.tdr(tally: 0) # Don't use tally mode"
873
- ]
874
- },
875
- {
876
- "cell_type": "markdown",
877
- "id": "e3c38037-90a1-4fc5-9904-41fc74085908",
878
- "metadata": {},
879
- "source": [
880
- "`#tdr_str` returns a String. `#tdr` do the same thing as `puts #tdr_str`"
881
- ]
882
- },
883
- {
884
- "cell_type": "markdown",
885
- "id": "21d68764-1bc1-4915-99b6-5ae938b85999",
886
- "metadata": {},
887
- "source": [
888
- "## 10. Size and shape"
889
- ]
890
- },
891
- {
892
- "cell_type": "code",
893
- "execution_count": 22,
894
- "id": "487399f8-a3ef-467f-aa7f-ecbaee5fcb75",
895
- "metadata": {},
896
- "outputs": [
897
- {
898
- "data": {
899
- "text/plain": [
900
- "5"
901
- ]
902
- },
903
- "execution_count": 22,
904
- "metadata": {},
905
- "output_type": "execute_result"
906
- }
907
- ],
908
- "source": [
909
- "# same as n_rows, n_obs\n",
910
- "df.size"
911
- ]
912
- },
913
- {
914
- "cell_type": "code",
915
- "execution_count": 23,
916
- "id": "dc7441c3-7c85-4ce1-a20e-de8f41f280b4",
917
- "metadata": {},
918
- "outputs": [
919
- {
920
- "data": {
921
- "text/plain": [
922
- "4"
923
- ]
924
- },
925
- "execution_count": 23,
926
- "metadata": {},
927
- "output_type": "execute_result"
928
- }
929
- ],
930
- "source": [
931
- "# same as n_cols, n_vars\n",
932
- "df.n_keys"
933
- ]
934
- },
935
- {
936
- "cell_type": "code",
937
- "execution_count": 24,
938
- "id": "3d42fea6-801a-45f4-8e22-ea9d76ae070f",
939
- "metadata": {},
940
- "outputs": [
941
- {
942
- "data": {
943
- "text/plain": [
944
- "[5, 4]"
945
- ]
946
- },
947
- "execution_count": 24,
948
- "metadata": {},
949
- "output_type": "execute_result"
950
- }
951
- ],
952
- "source": [
953
- "# [df.size, df.n_keys], [df.n_rows, df.n_cols]\n",
954
- "df.shape"
955
- ]
956
- },
957
- {
958
- "cell_type": "markdown",
959
- "id": "bc5caa94-325f-4014-9c90-8ac909c2b378",
960
- "metadata": {},
961
- "source": [
962
- "## 11. Keys"
963
- ]
964
- },
965
- {
966
- "cell_type": "code",
967
- "execution_count": 25,
968
- "id": "bb47775f-fed0-42e6-8781-aa8b721d6112",
969
- "metadata": {},
970
- "outputs": [
971
- {
972
- "data": {
973
- "text/plain": [
974
- "[:x, :y, :s, :b]"
975
- ]
976
- },
977
- "execution_count": 25,
978
- "metadata": {},
979
- "output_type": "execute_result"
980
- }
981
- ],
982
- "source": [
983
- "df.keys"
984
- ]
985
- },
986
- {
987
- "cell_type": "code",
988
- "execution_count": 26,
989
- "id": "3d540ab0-3e52-47b7-b338-b4e0b3d929cb",
990
- "metadata": {},
991
- "outputs": [
992
- {
993
- "data": {
994
- "text/plain": [
995
- "[:species, :island, :bill_length_mm, :bill_depth_mm, :flipper_length_mm, :body_mass_g, :sex, :year]"
996
- ]
997
- },
998
- "execution_count": 26,
999
- "metadata": {},
1000
- "output_type": "execute_result"
1001
- }
1002
- ],
1003
- "source": [
1004
- "penguins.keys"
1005
- ]
1006
- },
1007
- {
1008
- "cell_type": "markdown",
1009
- "id": "decc6a61-9994-4d60-9827-b257cafafb70",
1010
- "metadata": {},
1011
- "source": [
1012
- "## 12. Types"
1013
- ]
1014
- },
1015
- {
1016
- "cell_type": "code",
1017
- "execution_count": 27,
1018
- "id": "bf9cd2bc-a213-427e-bc00-f2083b0e0471",
1019
- "metadata": {},
1020
- "outputs": [
1021
- {
1022
- "data": {
1023
- "text/plain": [
1024
- "[:uint8, :double, :string, :boolean]"
1025
- ]
1026
- },
1027
- "execution_count": 27,
1028
- "metadata": {},
1029
- "output_type": "execute_result"
1030
- }
1031
- ],
1032
- "source": [
1033
- "df.types"
1034
- ]
1035
- },
1036
- {
1037
- "cell_type": "code",
1038
- "execution_count": 28,
1039
- "id": "b1ecb891-98b5-4919-9f37-1847202007d8",
1040
- "metadata": {},
1041
- "outputs": [
1042
- {
1043
- "data": {
1044
- "text/plain": [
1045
- "[:string, :string, :double, :double, :uint8, :uint16, :string, :uint16]"
1046
- ]
1047
- },
1048
- "execution_count": 28,
1049
- "metadata": {},
1050
- "output_type": "execute_result"
1051
- }
1052
- ],
1053
- "source": [
1054
- "penguins.types"
1055
- ]
1056
- },
1057
- {
1058
- "cell_type": "markdown",
1059
- "id": "869b3670-62f8-4c23-807b-d6d100a1981e",
1060
- "metadata": {},
1061
- "source": [
1062
- "## 13. Data type classes"
1063
- ]
1064
- },
1065
- {
1066
- "cell_type": "code",
1067
- "execution_count": 29,
1068
- "id": "776ab4db-073b-4b30-931a-8ec77284cdc4",
1069
- "metadata": {},
1070
- "outputs": [
1071
- {
1072
- "data": {
1073
- "text/plain": [
1074
- "[Arrow::UInt8DataType, Arrow::DoubleDataType, Arrow::StringDataType, Arrow::BooleanDataType]"
1075
- ]
1076
- },
1077
- "execution_count": 29,
1078
- "metadata": {},
1079
- "output_type": "execute_result"
1080
- }
1081
- ],
1082
- "source": [
1083
- "df.type_classes"
1084
- ]
1085
- },
1086
- {
1087
- "cell_type": "code",
1088
- "execution_count": 30,
1089
- "id": "0546a5d0-cab1-4ca8-a2e5-0637d0fd48b6",
1090
- "metadata": {},
1091
- "outputs": [
1092
- {
1093
- "data": {
1094
- "text/plain": [
1095
- "[Arrow::StringDataType, Arrow::StringDataType, Arrow::DoubleDataType, Arrow::DoubleDataType, Arrow::UInt8DataType, Arrow::UInt16DataType, Arrow::StringDataType, Arrow::UInt16DataType]"
1096
- ]
1097
- },
1098
- "execution_count": 30,
1099
- "metadata": {},
1100
- "output_type": "execute_result"
1101
- }
1102
- ],
1103
- "source": [
1104
- "penguins.type_classes"
1105
- ]
1106
- },
1107
- {
1108
- "cell_type": "markdown",
1109
- "id": "1c2513f6-909e-47fd-a543-66c4f424f44e",
1110
- "metadata": {},
1111
- "source": [
1112
- "## 14. Indices"
1113
- ]
1114
- },
1115
- {
1116
- "cell_type": "code",
1117
- "execution_count": 31,
1118
- "id": "e6e9d7ef-1471-4f23-9210-56045c9fabd5",
1119
- "metadata": {},
1120
- "outputs": [
1121
- {
1122
- "data": {
1123
- "text/plain": [
1124
- "[0, 1, 2, 3, 4]"
1125
- ]
1126
- },
1127
- "execution_count": 31,
1128
- "metadata": {},
1129
- "output_type": "execute_result"
1130
- }
1131
- ],
1132
- "source": [
1133
- "df.indexes\n",
1134
- "# or\n",
1135
- "df.indices"
1136
- ]
1137
- },
1138
- {
1139
- "cell_type": "markdown",
1140
- "id": "3908395f-b086-4fbb-9855-e1ce233f0595",
1141
- "metadata": {},
1142
- "source": [
1143
- "## 15. To an Array or a Hash"
1144
- ]
1145
- },
1146
- {
1147
- "cell_type": "markdown",
1148
- "id": "22cb724e-cf61-40d9-a58b-9cc793e83645",
1149
- "metadata": {},
1150
- "source": [
1151
- "DataFrame#to_a returns an array of row-oriented data without a header."
1152
- ]
1153
- },
1154
- {
1155
- "cell_type": "code",
1156
- "execution_count": 32,
1157
- "id": "4054daad-9266-4002-8942-c0891050cb4d",
1158
- "metadata": {},
1159
- "outputs": [
1160
- {
1161
- "data": {
1162
- "text/plain": [
1163
- "[[1, 1.0, \"A\", true], [2, 2.0, \"B\", false], [3, 3.0, \"C\", true], [4, NaN, \"D\", false], [5, nil, nil, nil]]"
1164
- ]
1165
- },
1166
- "execution_count": 32,
1167
- "metadata": {},
1168
- "output_type": "execute_result"
1169
- }
1170
- ],
1171
- "source": [
1172
- "df.to_a"
1173
- ]
1174
- },
1175
- {
1176
- "cell_type": "markdown",
1177
- "id": "f6abae59-fe31-4056-9de8-7c36e35235de",
1178
- "metadata": {},
1179
- "source": [
1180
- "If you need a column-oriented array with keys, use `.to_h.to_a`"
1181
- ]
1182
- },
1183
- {
1184
- "cell_type": "code",
1185
- "execution_count": 33,
1186
- "id": "d3631290-eb74-4d21-a469-86381c668c7f",
1187
- "metadata": {},
1188
- "outputs": [
1189
- {
1190
- "data": {
1191
- "text/plain": [
1192
- "{:x=>[1, 2, 3, 4, 5], :y=>[1.0, 2.0, 3.0, NaN, nil], :s=>[\"A\", \"B\", \"C\", \"D\", nil], :b=>[true, false, true, false, nil]}"
1193
- ]
1194
- },
1195
- "execution_count": 33,
1196
- "metadata": {},
1197
- "output_type": "execute_result"
1198
- }
1199
- ],
1200
- "source": [
1201
- "df.to_h"
1202
- ]
1203
- },
1204
- {
1205
- "cell_type": "code",
1206
- "execution_count": 34,
1207
- "id": "08c45e92-f640-4e62-bc96-ee259d0ecff4",
1208
- "metadata": {},
1209
- "outputs": [
1210
- {
1211
- "data": {
1212
- "text/plain": [
1213
- "[[:x, [1, 2, 3, 4, 5]], [:y, [1.0, 2.0, 3.0, NaN, nil]], [:s, [\"A\", \"B\", \"C\", \"D\", nil]], [:b, [true, false, true, false, nil]]]"
1214
- ]
1215
- },
1216
- "execution_count": 34,
1217
- "metadata": {},
1218
- "output_type": "execute_result"
1219
- }
1220
- ],
1221
- "source": [
1222
- "df.to_h.to_a"
1223
- ]
1224
- },
1225
- {
1226
- "cell_type": "markdown",
1227
- "id": "39b65fc0-4405-4414-9a74-91c724ef587c",
1228
- "metadata": {},
1229
- "source": [
1230
- "## 16. Schema"
1231
- ]
1232
- },
1233
- {
1234
- "cell_type": "code",
1235
- "execution_count": 35,
1236
- "id": "36db7842-e9b0-4473-84d4-3aef987d427f",
1237
- "metadata": {},
1238
- "outputs": [
1239
- {
1240
- "data": {
1241
- "text/plain": [
1242
- "{:x=>:uint8, :y=>:double, :s=>:string, :b=>:boolean}"
1243
- ]
1244
- },
1245
- "execution_count": 35,
1246
- "metadata": {},
1247
- "output_type": "execute_result"
1248
- }
1249
- ],
1250
- "source": [
1251
- "df.schema"
1252
- ]
1253
- },
1254
- {
1255
- "cell_type": "markdown",
1256
- "id": "3e61237d-ac67-45bb-827c-a769dff61809",
1257
- "metadata": {},
1258
- "source": [
1259
- "## 17. Vector"
1260
- ]
1261
- },
1262
- {
1263
- "cell_type": "markdown",
1264
- "id": "27402307-aaad-49c8-88ca-65346668601d",
1265
- "metadata": {},
1266
- "source": [
1267
- "Each variable (column in the table) is represented by a Vector object."
1268
- ]
1269
- },
1270
- {
1271
- "cell_type": "code",
1272
- "execution_count": 36,
1273
- "id": "6c9ba041-231d-4057-a280-acf620b68525",
1274
- "metadata": {},
1275
- "outputs": [
1276
- {
1277
- "data": {
1278
- "text/plain": [
1279
- "#<RedAmber::Vector(:uint8, size=5):0x000000000000f910>\n",
1280
- "[1, 2, 3, 4, 5]\n"
1281
- ]
1282
- },
1283
- "execution_count": 36,
1284
- "metadata": {},
1285
- "output_type": "execute_result"
1286
- }
1287
- ],
1288
- "source": [
1289
- "df[:x] # This syntax comes later"
1290
- ]
1291
- },
1292
- {
1293
- "cell_type": "markdown",
1294
- "id": "3e13d06d-b432-45b2-9745-0c6ef9228e23",
1295
- "metadata": {},
1296
- "source": [
1297
- "Or create new Vector by the constructor."
1298
- ]
1299
- },
1300
- {
1301
- "cell_type": "code",
1302
- "execution_count": 37,
1303
- "id": "3e18a4e0-238c-4800-8bda-a88a57dde3e9",
1304
- "metadata": {},
1305
- "outputs": [
1306
- {
1307
- "data": {
1308
- "text/plain": [
1309
- "#<RedAmber::Vector(:uint8, size=5):0x000000000000f924>\n",
1310
- "[1, 2, 3, 4, 5]\n"
1311
- ]
1312
- },
1313
- "execution_count": 37,
1314
- "metadata": {},
1315
- "output_type": "execute_result"
1316
- }
1317
- ],
1318
- "source": [
1319
- "Vector.new(1, 2, 3, 4, 5)"
1320
- ]
1321
- },
1322
- {
1323
- "cell_type": "code",
1324
- "execution_count": 38,
1325
- "id": "3bd55d9d-b988-46b2-bc11-e3dc5f4adc6c",
1326
- "metadata": {},
1327
- "outputs": [
1328
- {
1329
- "data": {
1330
- "text/plain": [
1331
- "#<RedAmber::Vector(:uint8, size=5):0x000000000000f938>\n",
1332
- "[1, 2, 3, 4, 5]\n"
1333
- ]
1334
- },
1335
- "execution_count": 38,
1336
- "metadata": {},
1337
- "output_type": "execute_result"
1338
- }
1339
- ],
1340
- "source": [
1341
- "Vector.new(1..5)"
1342
- ]
1343
- },
1344
- {
1345
- "cell_type": "code",
1346
- "execution_count": 39,
1347
- "id": "19688e6e-b59b-4a84-8c07-57e87cd0e242",
1348
- "metadata": {},
1349
- "outputs": [
1350
- {
1351
- "data": {
1352
- "text/plain": [
1353
- "#<RedAmber::Vector(:uint8, size=5):0x000000000000f94c>\n",
1354
- "[1, 2, 3, 4, 5]\n"
1355
- ]
1356
- },
1357
- "execution_count": 39,
1358
- "metadata": {},
1359
- "output_type": "execute_result"
1360
- }
1361
- ],
1362
- "source": [
1363
- "Vector.new([1, 2, 3], [4, 5])"
1364
- ]
1365
- },
1366
- {
1367
- "cell_type": "code",
1368
- "execution_count": 40,
1369
- "id": "076bd0e2-01ab-4497-9b9b-84f72a4805bc",
1370
- "metadata": {},
1371
- "outputs": [
1372
- {
1373
- "data": {
1374
- "text/plain": [
1375
- "#<RedAmber::Vector(:uint8, size=5):0x000000000000f960>\n",
1376
- "[1, 2, 3, 4, 5]\n"
1377
- ]
1378
- },
1379
- "execution_count": 40,
1380
- "metadata": {},
1381
- "output_type": "execute_result"
1382
- }
1383
- ],
1384
- "source": [
1385
- "array = Arrow::Array.new([1, 2, 3, 4, 5])\n",
1386
- "Vector.new(array)"
1387
- ]
1388
- },
1389
- {
1390
- "cell_type": "markdown",
1391
- "id": "22091661-e78a-4c66-9e48-4c3c676469b4",
1392
- "metadata": {},
1393
- "source": [
1394
- "- TODO: `Vector[1..5]` as a constructor"
1395
- ]
1396
- },
1397
- {
1398
- "cell_type": "markdown",
1399
- "id": "b729bdba-87a2-4282-bd0e-319fe17f42da",
1400
- "metadata": {},
1401
- "source": [
1402
- "## 18. Vectors"
1403
- ]
1404
- },
1405
- {
1406
- "cell_type": "markdown",
1407
- "id": "f5ddd840-2f84-467b-a9bb-feb769573b69",
1408
- "metadata": {},
1409
- "source": [
1410
- "Returns an Array of Vectors in a DataFrame."
1411
- ]
1412
- },
1413
- {
1414
- "cell_type": "code",
1415
- "execution_count": 41,
1416
- "id": "d3ae03f2-e2fe-4a15-abe1-331185448d61",
1417
- "metadata": {},
1418
- "outputs": [
1419
- {
1420
- "data": {
1421
- "text/plain": [
1422
- "[#<RedAmber::Vector(:uint8, size=5):0x000000000000f910>\n",
1423
- "[1, 2, 3, 4, 5]\n",
1424
- ", #<RedAmber::Vector(:double, size=5):0x000000000000f974>\n",
1425
- "[1.0, 2.0, 3.0, NaN, nil]\n",
1426
- ", #<RedAmber::Vector(:string, size=5):0x000000000000f988>\n",
1427
- "[\"A\", \"B\", \"C\", \"D\", nil]\n",
1428
- ", #<RedAmber::Vector(:boolean, size=5):0x000000000000f99c>\n",
1429
- "[true, false, true, false, nil]\n",
1430
- "]"
1431
- ]
1432
- },
1433
- "execution_count": 41,
1434
- "metadata": {},
1435
- "output_type": "execute_result"
1436
- }
1437
- ],
1438
- "source": [
1439
- "df.vectors"
1440
- ]
1441
- },
1442
- {
1443
- "cell_type": "markdown",
1444
- "id": "8ac88ff3-0cb6-43d6-a999-0c2e8c6defb7",
1445
- "metadata": {
1446
- "tags": []
1447
- },
1448
- "source": [
1449
- "## 19. Variables\n",
1450
- "\n",
1451
- "Returns key and Vector pairs in a Hash."
1452
- ]
1453
- },
1454
- {
1455
- "cell_type": "code",
1456
- "execution_count": 42,
1457
- "id": "3351a216-6fe5-485e-8686-53c1e754fa2e",
1458
- "metadata": {},
1459
- "outputs": [
1460
- {
1461
- "data": {
1462
- "text/plain": [
1463
- "{:x=>#<RedAmber::Vector(:uint8, size=5):0x000000000000f910>\n",
1464
- "[1, 2, 3, 4, 5]\n",
1465
- ", :y=>#<RedAmber::Vector(:double, size=5):0x000000000000f974>\n",
1466
- "[1.0, 2.0, 3.0, NaN, nil]\n",
1467
- ", :s=>#<RedAmber::Vector(:string, size=5):0x000000000000f988>\n",
1468
- "[\"A\", \"B\", \"C\", \"D\", nil]\n",
1469
- ", :b=>#<RedAmber::Vector(:boolean, size=5):0x000000000000f99c>\n",
1470
- "[true, false, true, false, nil]\n",
1471
- "}"
1472
- ]
1473
- },
1474
- "execution_count": 42,
1475
- "metadata": {},
1476
- "output_type": "execute_result"
1477
- }
1478
- ],
1479
- "source": [
1480
- "df.variables"
1481
- ]
1482
- },
1483
- {
1484
- "cell_type": "markdown",
1485
- "id": "3b518c1c-eda7-406f-a885-b2344b1726eb",
1486
- "metadata": {},
1487
- "source": [
1488
- "## 20. Select columns by #[ ]"
1489
- ]
1490
- },
1491
- {
1492
- "cell_type": "markdown",
1493
- "id": "767b4e49-19eb-4d5f-b030-91bd78f0f5b9",
1494
- "metadata": {},
1495
- "source": [
1496
- "`DataFrame#[]` is overloading column operations and row operations.\n",
1497
- "\n",
1498
- "- For columns (variables)\n",
1499
- " - Key in a Symbol: `df[:symbol]`\n",
1500
- " - Key in a String: `df[\"string\"]`\n",
1501
- " - Keys in an Array: `df[:symbol1, \"string\", :symbol2]`\n",
1502
- " - Keys by indeces: `df[df.keys[0]`, `df[df.keys[1,2]]`, `df[df.keys[1..]]`"
1503
- ]
1504
- },
1505
- {
1506
- "cell_type": "code",
1507
- "execution_count": 43,
1508
- "id": "ccf60edc-cccf-49e3-a503-1ca532247130",
1509
- "metadata": {},
1510
- "outputs": [
1511
- {
1512
- "data": {
1513
- "text/html": [
1514
- "RedAmber::DataFrame <5 x 2 vectors> <table><tr><th>x</th><th>y</th></tr><tr><td>1</td><td>1.0</td></tr><tr><td>2</td><td>2.0</td></tr><tr><td>3</td><td>3.0</td></tr><tr><td>4</td><td>NaN</td></tr><tr><td>5</td><td><i>(nil)</i></td></tr></table>"
1515
- ],
1516
- "text/plain": [
1517
- "#<RedAmber::DataFrame : 5 x 2 Vectors, 0x000000000000f9b0>\n",
1518
- " x y\n",
1519
- " <uint8> <double>\n",
1520
- "1 1 1.0\n",
1521
- "2 2 2.0\n",
1522
- "3 3 3.0\n",
1523
- "4 4 NaN\n",
1524
- "5 5 (nil)\n"
1525
- ]
1526
- },
1527
- "execution_count": 43,
1528
- "metadata": {},
1529
- "output_type": "execute_result"
1530
- }
1531
- ],
1532
- "source": [
1533
- "# Keys in a Symbol and a String\n",
1534
- "df[:x, 'y']"
1535
- ]
1536
- },
1537
- {
1538
- "cell_type": "code",
1539
- "execution_count": 44,
1540
- "id": "8500f8c0-ff5a-4537-9f47-03d675e31b18",
1541
- "metadata": {},
1542
- "outputs": [
1543
- {
1544
- "data": {
1545
- "text/html": [
1546
- "RedAmber::DataFrame <5 x 2 vectors> <table><tr><th>x</th><th>y</th></tr><tr><td>1</td><td>1.0</td></tr><tr><td>2</td><td>2.0</td></tr><tr><td>3</td><td>3.0</td></tr><tr><td>4</td><td>NaN</td></tr><tr><td>5</td><td><i>(nil)</i></td></tr></table>"
1547
- ],
1548
- "text/plain": [
1549
- "#<RedAmber::DataFrame : 5 x 2 Vectors, 0x000000000000f9c4>\n",
1550
- " x y\n",
1551
- " <uint8> <double>\n",
1552
- "1 1 1.0\n",
1553
- "2 2 2.0\n",
1554
- "3 3 3.0\n",
1555
- "4 4 NaN\n",
1556
- "5 5 (nil)\n"
1557
- ]
1558
- },
1559
- "execution_count": 44,
1560
- "metadata": {},
1561
- "output_type": "execute_result"
1562
- }
1563
- ],
1564
- "source": [
1565
- "# Keys in a Range\n",
1566
- "df['x'..'y']"
1567
- ]
1568
- },
1569
- {
1570
- "cell_type": "code",
1571
- "execution_count": 45,
1572
- "id": "db35cae1-35c2-47de-a7e8-906161f21282",
1573
- "metadata": {},
1574
- "outputs": [
1575
- {
1576
- "data": {
1577
- "text/html": [
1578
- "RedAmber::DataFrame <5 x 3 vectors> <table><tr><th>s</th><th>b</th><th>x</th></tr><tr><td>A</td><td>true</td><td>1</td></tr><tr><td>B</td><td>false</td><td>2</td></tr><tr><td>C</td><td>true</td><td>3</td></tr><tr><td>D</td><td>false</td><td>4</td></tr><tr><td><i>(nil)</i></td><td><i>(nil)</i></td><td>5</td></tr></table>"
1579
- ],
1580
- "text/plain": [
1581
- "#<RedAmber::DataFrame : 5 x 3 Vectors, 0x000000000000f9d8>\n",
1582
- " s b x\n",
1583
- " <string> <boolean> <uint8>\n",
1584
- "1 A true 1\n",
1585
- "2 B false 2\n",
1586
- "3 C true 3\n",
1587
- "4 D false 4\n",
1588
- "5 (nil) (nil) 5\n"
1589
- ]
1590
- },
1591
- "execution_count": 45,
1592
- "metadata": {},
1593
- "output_type": "execute_result"
1594
- }
1595
- ],
1596
- "source": [
1597
- "# Keys with a index Range, and a symbol\n",
1598
- "df[df.keys[2..], :x]"
1599
- ]
1600
- },
1601
- {
1602
- "cell_type": "markdown",
1603
- "id": "03e14403-f7bc-4350-9e7b-715901164331",
1604
- "metadata": {},
1605
- "source": [
1606
- "## 21. Select rows by #[ ]\n",
1607
- "`DataFrame#[]` is overloading column operations and row operations.\n",
1608
- "\n",
1609
- "- For rows (observations)\n",
1610
- " - Select rows by a Index: `df[index]`\n",
1611
- " - Select rows by Indices: `df[indices]` # Array, Arrow::Array, Vectors are acceptable for indices\n",
1612
- " - Select rows by Ranges: `df[range]`\n",
1613
- " - Select rows by Booleans: `df[booleans]` # Array, Arrow::Array, Vectors are acceptable for booleans"
1614
- ]
1615
- },
1616
- {
1617
- "cell_type": "code",
1618
- "execution_count": 46,
1619
- "id": "e3bc60a7-611e-4fd8-9770-8e0d167d3fee",
1620
- "metadata": {},
1621
- "outputs": [
1622
- {
1623
- "data": {
1624
- "text/html": [
1625
- "RedAmber::DataFrame <3 x 4 vectors> <table><tr><th>x</th><th>y</th><th>s</th><th>b</th></tr><tr><td>1</td><td>1.0</td><td>A</td><td>true</td></tr><tr><td>3</td><td>3.0</td><td>C</td><td>true</td></tr><tr><td>2</td><td>2.0</td><td>B</td><td>false</td></tr></table>"
1626
- ],
1627
- "text/plain": [
1628
- "#<RedAmber::DataFrame : 3 x 4 Vectors, 0x000000000000f9ec>\n",
1629
- " x y s b\n",
1630
- " <uint8> <double> <string> <boolean>\n",
1631
- "1 1 1.0 A true\n",
1632
- "2 3 3.0 C true\n",
1633
- "3 2 2.0 B false\n"
1634
- ]
1635
- },
1636
- "execution_count": 46,
1637
- "metadata": {},
1638
- "output_type": "execute_result"
1639
- }
1640
- ],
1641
- "source": [
1642
- "# indices\n",
1643
- "df[0, 2, 1]"
1644
- ]
1645
- },
1646
- {
1647
- "cell_type": "code",
1648
- "execution_count": 47,
1649
- "id": "2b8b3801-ae37-4629-9db5-ff937941c895",
1650
- "metadata": {},
1651
- "outputs": [
1652
- {
1653
- "data": {
1654
- "text/html": [
1655
- "RedAmber::DataFrame <3 x 4 vectors> <table><tr><th>x</th><th>y</th><th>s</th><th>b</th></tr><tr><td>2</td><td>2.0</td><td>B</td><td>false</td></tr><tr><td>3</td><td>3.0</td><td>C</td><td>true</td></tr><tr><td>5</td><td><i>(nil)</i></td><td><i>(nil)</i></td><td><i>(nil)</i></td></tr></table>"
1656
- ],
1657
- "text/plain": [
1658
- "#<RedAmber::DataFrame : 3 x 4 Vectors, 0x000000000000fa00>\n",
1659
- " x y s b\n",
1660
- " <uint8> <double> <string> <boolean>\n",
1661
- "1 2 2.0 B false\n",
1662
- "2 3 3.0 C true\n",
1663
- "3 5 (nil) (nil) (nil)\n"
1664
- ]
1665
- },
1666
- "execution_count": 47,
1667
- "metadata": {},
1668
- "output_type": "execute_result"
1669
- }
1670
- ],
1671
- "source": [
1672
- "# including a Range\n",
1673
- "# negative indices are also acceptable\n",
1674
- "df[1..2, -1]"
1675
- ]
1676
- },
1677
- {
1678
- "cell_type": "code",
1679
- "execution_count": 48,
1680
- "id": "3f6f8d73-a66c-4773-9bf5-0878c700f2d6",
1681
- "metadata": {},
1682
- "outputs": [
1683
- {
1684
- "data": {
1685
- "text/html": [
1686
- "RedAmber::DataFrame <3 x 4 vectors> <table><tr><th>x</th><th>y</th><th>s</th><th>b</th></tr><tr><td>2</td><td>2.0</td><td>B</td><td>false</td></tr><tr><td>3</td><td>3.0</td><td>C</td><td>true</td></tr><tr><td>5</td><td><i>(nil)</i></td><td><i>(nil)</i></td><td><i>(nil)</i></td></tr></table>"
1687
- ],
1688
- "text/plain": [
1689
- "#<RedAmber::DataFrame : 3 x 4 Vectors, 0x000000000000fa14>\n",
1690
- " x y s b\n",
1691
- " <uint8> <double> <string> <boolean>\n",
1692
- "1 2 2.0 B false\n",
1693
- "2 3 3.0 C true\n",
1694
- "3 5 (nil) (nil) (nil)\n"
1695
- ]
1696
- },
1697
- "execution_count": 48,
1698
- "metadata": {},
1699
- "output_type": "execute_result"
1700
- }
1701
- ],
1702
- "source": [
1703
- "# booleans\n",
1704
- "# length of boolean should be the same as self\n",
1705
- "df[false, true, true, false, true]"
1706
- ]
1707
- },
1708
- {
1709
- "cell_type": "code",
1710
- "execution_count": 49,
1711
- "id": "abe57279-54fd-48ec-a1a4-c7453211e776",
1712
- "metadata": {},
1713
- "outputs": [
1714
- {
1715
- "data": {
1716
- "text/html": [
1717
- "RedAmber::DataFrame <3 x 4 vectors> <table><tr><th>x</th><th>y</th><th>s</th><th>b</th></tr><tr><td>1</td><td>1.0</td><td>A</td><td>true</td></tr><tr><td>3</td><td>3.0</td><td>C</td><td>true</td></tr><tr><td>5</td><td><i>(nil)</i></td><td><i>(nil)</i></td><td><i>(nil)</i></td></tr></table>"
1718
- ],
1719
- "text/plain": [
1720
- "#<RedAmber::DataFrame : 3 x 4 Vectors, 0x000000000000fa28>\n",
1721
- " x y s b\n",
1722
- " <uint8> <double> <string> <boolean>\n",
1723
- "1 1 1.0 A true\n",
1724
- "2 3 3.0 C true\n",
1725
- "3 5 (nil) (nil) (nil)\n"
1726
- ]
1727
- },
1728
- "execution_count": 49,
1729
- "metadata": {},
1730
- "output_type": "execute_result"
1731
- }
1732
- ],
1733
- "source": [
1734
- "# Arrow::Array\n",
1735
- "indices = Arrow::UInt8Array.new([0,2,4])\n",
1736
- "df[indices]"
1737
- ]
1738
- },
1739
- {
1740
- "cell_type": "code",
1741
- "execution_count": 50,
1742
- "id": "2266611f-23d8-4645-a1e8-b07c2370fb3f",
1743
- "metadata": {},
1744
- "outputs": [
1745
- {
1746
- "data": {
1747
- "text/html": [
1748
- "RedAmber::DataFrame <3 x 4 vectors> <table><tr><th>x</th><th>y</th><th>s</th><th>b</th></tr><tr><td>3</td><td>3.0</td><td>C</td><td>true</td></tr><tr><td>4</td><td>NaN</td><td>D</td><td>false</td></tr><tr><td>5</td><td><i>(nil)</i></td><td><i>(nil)</i></td><td><i>(nil)</i></td></tr></table>"
1749
- ],
1750
- "text/plain": [
1751
- "#<RedAmber::DataFrame : 3 x 4 Vectors, 0x000000000000fa3c>\n",
1752
- " x y s b\n",
1753
- " <uint8> <double> <string> <boolean>\n",
1754
- "1 3 3.0 C true\n",
1755
- "2 4 NaN D false\n",
1756
- "3 5 (nil) (nil) (nil)\n"
1757
- ]
1758
- },
1759
- "execution_count": 50,
1760
- "metadata": {},
1761
- "output_type": "execute_result"
1762
- }
1763
- ],
1764
- "source": [
1765
- "# By a Vector as indices\n",
1766
- "indices = Vector.new(df.indices)\n",
1767
- "# indices > 1 returns a boolean Vector\n",
1768
- "df[indices > 1]"
1769
- ]
1770
- },
1771
- {
1772
- "cell_type": "code",
1773
- "execution_count": 51,
1774
- "id": "0ea2da7e-aeca-4874-be4a-6af563aa378b",
1775
- "metadata": {},
1776
- "outputs": [
1777
- {
1778
- "data": {
1779
- "text/plain": [
1780
- "#<RedAmber::Vector(:boolean, size=5):0x000000000000f99c>\n",
1781
- "[true, false, true, false, nil]\n"
1782
- ]
1783
- },
1784
- "execution_count": 51,
1785
- "metadata": {},
1786
- "output_type": "execute_result"
1787
- }
1788
- ],
1789
- "source": [
1790
- "# By a Vector as booleans\n",
1791
- "booleans = df[:b]"
1792
- ]
1793
- },
1794
- {
1795
- "cell_type": "code",
1796
- "execution_count": 52,
1797
- "id": "9f842890-6359-4266-9a23-2f8f813ef548",
1798
- "metadata": {},
1799
- "outputs": [
1800
- {
1801
- "data": {
1802
- "text/html": [
1803
- "RedAmber::DataFrame <2 x 4 vectors> <table><tr><th>x</th><th>y</th><th>s</th><th>b</th></tr><tr><td>1</td><td>1.0</td><td>A</td><td>true</td></tr><tr><td>3</td><td>3.0</td><td>C</td><td>true</td></tr></table>"
1804
- ],
1805
- "text/plain": [
1806
- "#<RedAmber::DataFrame : 2 x 4 Vectors, 0x000000000000fa50>\n",
1807
- " x y s b\n",
1808
- " <uint8> <double> <string> <boolean>\n",
1809
- "1 1 1.0 A true\n",
1810
- "2 3 3.0 C true\n"
1811
- ]
1812
- },
1813
- "execution_count": 52,
1814
- "metadata": {},
1815
- "output_type": "execute_result"
1816
- }
1817
- ],
1818
- "source": [
1819
- "df[booleans]"
1820
- ]
1821
- },
1822
- {
1823
- "cell_type": "markdown",
1824
- "id": "98a04874-cb2c-44c0-b410-b330b9d12b0f",
1825
- "metadata": {},
1826
- "source": [
1827
- "## 22. empty?"
1828
- ]
1829
- },
1830
- {
1831
- "cell_type": "code",
1832
- "execution_count": 53,
1833
- "id": "7b1ab319-90a7-4f09-8629-04dcd94076cb",
1834
- "metadata": {},
1835
- "outputs": [
1836
- {
1837
- "data": {
1838
- "text/plain": [
1839
- "false"
1840
- ]
1841
- },
1842
- "execution_count": 53,
1843
- "metadata": {},
1844
- "output_type": "execute_result"
1845
- }
1846
- ],
1847
- "source": [
1848
- "df.empty?"
1849
- ]
1850
- },
1851
- {
1852
- "cell_type": "code",
1853
- "execution_count": 54,
1854
- "id": "1e09c32f-20a8-4175-827f-cdb98063535a",
1855
- "metadata": {},
1856
- "outputs": [
1857
- {
1858
- "data": {
1859
- "text/plain": [
1860
- "true"
1861
- ]
1862
- },
1863
- "execution_count": 54,
1864
- "metadata": {},
1865
- "output_type": "execute_result"
1866
- }
1867
- ],
1868
- "source": [
1869
- "DataFrame.new.empty?"
1870
- ]
1871
- },
1872
- {
1873
- "cell_type": "code",
1874
- "execution_count": 55,
1875
- "id": "3f9f8771-87dd-44eb-8aac-6a3ed8b4c183",
1876
- "metadata": {},
1877
- "outputs": [
1878
- {
1879
- "data": {
1880
- "text/plain": [
1881
- "(empty DataFrame)"
1882
- ]
1883
- },
1884
- "execution_count": 55,
1885
- "metadata": {},
1886
- "output_type": "execute_result"
1887
- }
1888
- ],
1889
- "source": [
1890
- "DataFrame.new"
1891
- ]
1892
- },
1893
- {
1894
- "cell_type": "markdown",
1895
- "id": "86b826dd-10e6-4087-9162-b89ac6561a61",
1896
- "metadata": {},
1897
- "source": [
1898
- "## 23. Select columns by pick"
1899
- ]
1900
- },
1901
- {
1902
- "cell_type": "markdown",
1903
- "id": "b5aefd22-4e96-4dc5-91d2-e6826256bda6",
1904
- "metadata": {
1905
- "tags": []
1906
- },
1907
- "source": [
1908
- "`DataFrame#pick` accepts an Array of keys to pick up columns (variables). You can change the order of columns at a same time."
1909
- ]
1910
- },
1911
- {
1912
- "cell_type": "code",
1913
- "execution_count": 56,
1914
- "id": "68124521-b823-424d-9e06-d11aa927d618",
1915
- "metadata": {
1916
- "tags": []
1917
- },
1918
- "outputs": [
1919
- {
1920
- "data": {
1921
- "text/html": [
1922
- "RedAmber::DataFrame <5 x 2 vectors> <table><tr><th>s</th><th>y</th></tr><tr><td>A</td><td>1.0</td></tr><tr><td>B</td><td>2.0</td></tr><tr><td>C</td><td>3.0</td></tr><tr><td>D</td><td>NaN</td></tr><tr><td><i>(nil)</i></td><td><i>(nil)</i></td></tr></table>"
1923
- ],
1924
- "text/plain": [
1925
- "#<RedAmber::DataFrame : 5 x 2 Vectors, 0x000000000000fa64>\n",
1926
- " s y\n",
1927
- " <string> <double>\n",
1928
- "1 A 1.0\n",
1929
- "2 B 2.0\n",
1930
- "3 C 3.0\n",
1931
- "4 D NaN\n",
1932
- "5 (nil) (nil)\n"
1933
- ]
1934
- },
1935
- "execution_count": 56,
1936
- "metadata": {},
1937
- "output_type": "execute_result"
1938
- }
1939
- ],
1940
- "source": [
1941
- "df.pick(:s, :y)\n",
1942
- "# or\n",
1943
- "df.pick([:s, :y]) # OK too."
1944
- ]
1945
- },
1946
- {
1947
- "cell_type": "markdown",
1948
- "id": "a76dca00-da8f-4959-be18-7a1015a9d13c",
1949
- "metadata": {},
1950
- "source": [
1951
- "Or use a boolean Array of lengeh `n_key` to `pick`. This style remains the order of variables."
1952
- ]
1953
- },
1954
- {
1955
- "cell_type": "code",
1956
- "execution_count": 57,
1957
- "id": "b91f8925-529c-43c9-93ba-e21bcac0f2f7",
1958
- "metadata": {},
1959
- "outputs": [
1960
- {
1961
- "data": {
1962
- "text/html": [
1963
- "RedAmber::DataFrame <5 x 2 vectors> <table><tr><th>y</th><th>s</th></tr><tr><td>1.0</td><td>A</td></tr><tr><td>2.0</td><td>B</td></tr><tr><td>3.0</td><td>C</td></tr><tr><td>NaN</td><td>D</td></tr><tr><td><i>(nil)</i></td><td><i>(nil)</i></td></tr></table>"
1964
- ],
1965
- "text/plain": [
1966
- "#<RedAmber::DataFrame : 5 x 2 Vectors, 0x000000000000fa78>\n",
1967
- " y s\n",
1968
- " <double> <string>\n",
1969
- "1 1.0 A\n",
1970
- "2 2.0 B\n",
1971
- "3 3.0 C\n",
1972
- "4 NaN D\n",
1973
- "5 (nil) (nil)\n"
1974
- ]
1975
- },
1976
- "execution_count": 57,
1977
- "metadata": {},
1978
- "output_type": "execute_result"
1979
- }
1980
- ],
1981
- "source": [
1982
- "df.pick(false, true, true, false)\n",
1983
- "# or\n",
1984
- "df.pick([false, true, true, false]) # OK"
1985
- ]
1986
- },
1987
- {
1988
- "cell_type": "markdown",
1989
- "id": "5f903182-745b-4923-99d8-14a9b9c6ea4c",
1990
- "metadata": {},
1991
- "source": [
1992
- "`#pick` also accepts a block in the context of self.\n",
1993
- "\n",
1994
- "Next example is picking up numeric variables."
1995
- ]
1996
- },
1997
- {
1998
- "cell_type": "code",
1999
- "execution_count": 58,
2000
- "id": "37bb0a49-c38a-484c-91d4-3e23ab43a727",
2001
- "metadata": {},
2002
- "outputs": [
2003
- {
2004
- "data": {
2005
- "text/html": [
2006
- "RedAmber::DataFrame <5 x 2 vectors> <table><tr><th>x</th><th>y</th></tr><tr><td>1</td><td>1.0</td></tr><tr><td>2</td><td>2.0</td></tr><tr><td>3</td><td>3.0</td></tr><tr><td>4</td><td>NaN</td></tr><tr><td>5</td><td><i>(nil)</i></td></tr></table>"
2007
- ],
2008
- "text/plain": [
2009
- "#<RedAmber::DataFrame : 5 x 2 Vectors, 0x000000000000fa8c>\n",
2010
- " x y\n",
2011
- " <uint8> <double>\n",
2012
- "1 1 1.0\n",
2013
- "2 2 2.0\n",
2014
- "3 3 3.0\n",
2015
- "4 4 NaN\n",
2016
- "5 5 (nil)\n"
2017
- ]
2018
- },
2019
- "execution_count": 58,
2020
- "metadata": {},
2021
- "output_type": "execute_result"
2022
- }
2023
- ],
2024
- "source": [
2025
- "# reciever is required with the argument style\n",
2026
- "df.pick(df.vectors.map(&:numeric?))\n",
2027
- "\n",
2028
- "# with a block\n",
2029
- "df.pick { vectors.map(&:numeric?) }"
2030
- ]
2031
- },
2032
- {
2033
- "cell_type": "markdown",
2034
- "id": "e51f07c0-54eb-4114-8cd6-63c7780e7248",
2035
- "metadata": {},
2036
- "source": [
2037
- "The name `pick` comes from the action to pick variables(columns) according to the label keys."
2038
- ]
2039
- },
2040
- {
2041
- "cell_type": "markdown",
2042
- "id": "7c1815e4-de6c-425e-8602-b8dd66836250",
2043
- "metadata": {},
2044
- "source": [
2045
- "## 24. Reject columns by drop"
2046
- ]
2047
- },
2048
- {
2049
- "cell_type": "markdown",
2050
- "id": "d1ab045e-66f9-4922-8bf2-35aee7f2812e",
2051
- "metadata": {
2052
- "tags": []
2053
- },
2054
- "source": [
2055
- "`DataFrame#drop` accepts an Array keys to drop columns (variables) to create remainer DataFrame."
2056
- ]
2057
- },
2058
- {
2059
- "cell_type": "code",
2060
- "execution_count": 59,
2061
- "id": "7ccace08-62b0-4b0b-93fb-81edf673abf7",
2062
- "metadata": {},
2063
- "outputs": [
2064
- {
2065
- "data": {
2066
- "text/html": [
2067
- "RedAmber::DataFrame <5 x 2 vectors> <table><tr><th>y</th><th>s</th></tr><tr><td>1.0</td><td>A</td></tr><tr><td>2.0</td><td>B</td></tr><tr><td>3.0</td><td>C</td></tr><tr><td>NaN</td><td>D</td></tr><tr><td><i>(nil)</i></td><td><i>(nil)</i></td></tr></table>"
2068
- ],
2069
- "text/plain": [
2070
- "#<RedAmber::DataFrame : 5 x 2 Vectors, 0x000000000000faa0>\n",
2071
- " y s\n",
2072
- " <double> <string>\n",
2073
- "1 1.0 A\n",
2074
- "2 2.0 B\n",
2075
- "3 3.0 C\n",
2076
- "4 NaN D\n",
2077
- "5 (nil) (nil)\n"
2078
- ]
2079
- },
2080
- "execution_count": 59,
2081
- "metadata": {},
2082
- "output_type": "execute_result"
2083
- }
2084
- ],
2085
- "source": [
2086
- "df.drop(:x, :b)\n",
2087
- "# df.drop([:x, :b]) #is OK too."
2088
- ]
2089
- },
2090
- {
2091
- "cell_type": "markdown",
2092
- "id": "2085b349-95c5-4607-b029-f7c3d630ac1c",
2093
- "metadata": {},
2094
- "source": [
2095
- "Or use a boolean Array of lengeh `n_key` to `drop`."
2096
- ]
2097
- },
2098
- {
2099
- "cell_type": "code",
2100
- "execution_count": 60,
2101
- "id": "785c02f1-1e16-4722-9961-4b49223c8290",
2102
- "metadata": {},
2103
- "outputs": [
2104
- {
2105
- "data": {
2106
- "text/html": [
2107
- "RedAmber::DataFrame <5 x 2 vectors> <table><tr><th>y</th><th>s</th></tr><tr><td>1.0</td><td>A</td></tr><tr><td>2.0</td><td>B</td></tr><tr><td>3.0</td><td>C</td></tr><tr><td>NaN</td><td>D</td></tr><tr><td><i>(nil)</i></td><td><i>(nil)</i></td></tr></table>"
2108
- ],
2109
- "text/plain": [
2110
- "#<RedAmber::DataFrame : 5 x 2 Vectors, 0x000000000000fab4>\n",
2111
- " y s\n",
2112
- " <double> <string>\n",
2113
- "1 1.0 A\n",
2114
- "2 2.0 B\n",
2115
- "3 3.0 C\n",
2116
- "4 NaN D\n",
2117
- "5 (nil) (nil)\n"
2118
- ]
2119
- },
2120
- "execution_count": 60,
2121
- "metadata": {},
2122
- "output_type": "execute_result"
2123
- }
2124
- ],
2125
- "source": [
2126
- "df.drop(true, false, false, true)\n",
2127
- "# df.drop([true, false, false, true]) # is OK too"
2128
- ]
2129
- },
2130
- {
2131
- "cell_type": "markdown",
2132
- "id": "d246161e-02cc-40fb-8921-26b37eb3956f",
2133
- "metadata": {},
2134
- "source": [
2135
- "`#drop` also accepts a block in the context of self.\n",
2136
- "\n",
2137
- "Next example will drop variables which have nil or NaN values."
2138
- ]
2139
- },
2140
- {
2141
- "cell_type": "code",
2142
- "execution_count": 61,
2143
- "id": "069932e3-d393-4ede-9eb5-7aac8625e0c0",
2144
- "metadata": {},
2145
- "outputs": [
2146
- {
2147
- "data": {
2148
- "text/html": [
2149
- "RedAmber::DataFrame <5 x 1 vector> <table><tr><th>x</th></tr><tr><td>1</td></tr><tr><td>2</td></tr><tr><td>3</td></tr><tr><td>4</td></tr><tr><td>5</td></tr></table>"
2150
- ],
2151
- "text/plain": [
2152
- "#<RedAmber::DataFrame : 5 x 1 Vector, 0x000000000000fac8>\n",
2153
- " x\n",
2154
- " <uint8>\n",
2155
- "1 1\n",
2156
- "2 2\n",
2157
- "3 3\n",
2158
- "4 4\n",
2159
- "5 5\n"
2160
- ]
2161
- },
2162
- "execution_count": 61,
2163
- "metadata": {},
2164
- "output_type": "execute_result"
2165
- }
2166
- ],
2167
- "source": [
2168
- "df.drop { vectors.map { |v| v.is_na.any } }"
2169
- ]
2170
- },
2171
- {
2172
- "cell_type": "markdown",
2173
- "id": "88b064d6-7d90-4a0b-b9c8-d92e103269fb",
2174
- "metadata": {},
2175
- "source": [
2176
- "Argument style is also acceptable but it requires the reciever 'df'."
2177
- ]
2178
- },
2179
- {
2180
- "cell_type": "code",
2181
- "execution_count": 62,
2182
- "id": "3003a5c2-0966-4f2c-9643-59e8b546c8aa",
2183
- "metadata": {},
2184
- "outputs": [
2185
- {
2186
- "data": {
2187
- "text/html": [
2188
- "RedAmber::DataFrame <5 x 1 vector> <table><tr><th>x</th></tr><tr><td>1</td></tr><tr><td>2</td></tr><tr><td>3</td></tr><tr><td>4</td></tr><tr><td>5</td></tr></table>"
2189
- ],
2190
- "text/plain": [
2191
- "#<RedAmber::DataFrame : 5 x 1 Vector, 0x000000000000fadc>\n",
2192
- " x\n",
2193
- " <uint8>\n",
2194
- "1 1\n",
2195
- "2 2\n",
2196
- "3 3\n",
2197
- "4 4\n",
2198
- "5 5\n"
2199
- ]
2200
- },
2201
- "execution_count": 62,
2202
- "metadata": {},
2203
- "output_type": "execute_result"
2204
- }
2205
- ],
2206
- "source": [
2207
- "df.drop(df.vectors.map { |v| v.is_na.any })"
2208
- ]
2209
- },
2210
- {
2211
- "cell_type": "markdown",
2212
- "id": "c6fce15c-d4a9-4281-9c07-457e78d3c13e",
2213
- "metadata": {},
2214
- "source": [
2215
- "The name `drop` comes from the pair word of `pick`."
2216
- ]
2217
- },
2218
- {
2219
- "cell_type": "markdown",
2220
- "id": "0f6dc86c-828d-4f9f-8b07-fce63c30fdca",
2221
- "metadata": {},
2222
- "source": [
2223
- "## 25. Pick/drop and nil"
2224
- ]
2225
- },
2226
- {
2227
- "cell_type": "markdown",
2228
- "id": "0a108878-565b-400e-9a47-a15aae09429c",
2229
- "metadata": {},
2230
- "source": [
2231
- "When `pick` or `drop` is used with booleans, nil in the booleans is treated as false. This behavior is aligned with Ruby's `BasicObject#!`."
2232
- ]
2233
- },
2234
- {
2235
- "cell_type": "code",
2236
- "execution_count": 63,
2237
- "id": "7c01fbb4-9bfa-4afc-8e6b-45c97c0beb03",
2238
- "metadata": {},
2239
- "outputs": [
2240
- {
2241
- "data": {
2242
- "text/plain": [
2243
- "true"
2244
- ]
2245
- },
2246
- "execution_count": 63,
2247
- "metadata": {},
2248
- "output_type": "execute_result"
2249
- }
2250
- ],
2251
- "source": [
2252
- "booleans = [true, true, false, nil]\n",
2253
- "booleans_invert = booleans.map(&:!) # => [false, false, true, true] because nil.! is true\n",
2254
- "df.pick(booleans) == df.drop(booleans_invert)"
2255
- ]
2256
- },
2257
- {
2258
- "cell_type": "markdown",
2259
- "id": "12a24264-9b7a-42a1-a541-e292e3876e35",
2260
- "metadata": {},
2261
- "source": [
2262
- "## 26. Vector#invert, #primitive_invert"
2263
- ]
2264
- },
2265
- {
2266
- "cell_type": "code",
2267
- "execution_count": 64,
2268
- "id": "ea352e12-7e8a-43be-b8ac-797adbc47708",
2269
- "metadata": {},
2270
- "outputs": [
2271
- {
2272
- "data": {
2273
- "text/plain": [
2274
- "#<RedAmber::Vector(:boolean, size=4):0x000000000000faf0>\n",
2275
- "[true, true, false, nil]\n"
2276
- ]
2277
- },
2278
- "execution_count": 64,
2279
- "metadata": {},
2280
- "output_type": "execute_result"
2281
- }
2282
- ],
2283
- "source": [
2284
- "vector = Vector.new(booleans)"
2285
- ]
2286
- },
2287
- {
2288
- "cell_type": "markdown",
2289
- "id": "2a0f82e0-157b-4185-9254-0618be291f9b",
2290
- "metadata": {},
2291
- "source": [
2292
- "nil is converted to nil by `Vector#invert`."
2293
- ]
2294
- },
2295
- {
2296
- "cell_type": "code",
2297
- "execution_count": 65,
2298
- "id": "596c521f-12bf-4448-9e5d-e1b4a2c3d896",
2299
- "metadata": {},
2300
- "outputs": [
2301
- {
2302
- "data": {
2303
- "text/plain": [
2304
- "#<RedAmber::Vector(:boolean, size=4):0x000000000000fb04>\n",
2305
- "[false, false, true, nil]\n"
2306
- ]
2307
- },
2308
- "execution_count": 65,
2309
- "metadata": {},
2310
- "output_type": "execute_result"
2311
- }
2312
- ],
2313
- "source": [
2314
- "vector.invert\n",
2315
- "# or\n",
2316
- "!vector"
2317
- ]
2318
- },
2319
- {
2320
- "cell_type": "markdown",
2321
- "id": "a1aec910-3055-4627-a02b-22d45f2ceb70",
2322
- "metadata": {},
2323
- "source": [
2324
- "So `df.pick(booleans) != df.drop(booleans.invert)` when booleans have any nils.\n",
2325
- "\n",
2326
- "On the other hand, `Vector#primitive_invert` follows Ruby's `BasicObject#!`'s behavior. Then pick and drop keep 'MECE' behavior."
2327
- ]
2328
- },
2329
- {
2330
- "cell_type": "code",
2331
- "execution_count": 66,
2332
- "id": "4dcaba48-1cea-4ce9-b4a9-b079b43af7ec",
2333
- "metadata": {},
2334
- "outputs": [
2335
- {
2336
- "data": {
2337
- "text/plain": [
2338
- "#<RedAmber::Vector(:boolean, size=4):0x000000000000fb18>\n",
2339
- "[false, false, true, true]\n"
2340
- ]
2341
- },
2342
- "execution_count": 66,
2343
- "metadata": {},
2344
- "output_type": "execute_result"
2345
- }
2346
- ],
2347
- "source": [
2348
- "vector.primitive_invert"
2349
- ]
2350
- },
2351
- {
2352
- "cell_type": "code",
2353
- "execution_count": 67,
2354
- "id": "c7ae4dad-275a-49e0-a0b0-bf3686248070",
2355
- "metadata": {},
2356
- "outputs": [
2357
- {
2358
- "data": {
2359
- "text/plain": [
2360
- "true"
2361
- ]
2362
- },
2363
- "execution_count": 67,
2364
- "metadata": {},
2365
- "output_type": "execute_result"
2366
- }
2367
- ],
2368
- "source": [
2369
- "df.pick(vector) == df.drop(vector.primitive_invert)"
2370
- ]
2371
- },
2372
- {
2373
- "cell_type": "markdown",
2374
- "id": "9a6cec74-43f0-4a72-8262-25b1e311f602",
2375
- "metadata": {},
2376
- "source": [
2377
- "## 27. Pick/drop and [ ]"
2378
- ]
2379
- },
2380
- {
2381
- "cell_type": "markdown",
2382
- "id": "32c8f74d-b3ce-4305-9af7-6ea70052c773",
2383
- "metadata": {},
2384
- "source": [
2385
- "When `pick` or `drop` select a single column (variable), it returns a `DataFrame` with one column (variable)."
2386
- ]
2387
- },
2388
- {
2389
- "cell_type": "code",
2390
- "execution_count": 68,
2391
- "id": "e13aee24-cac6-41ad-b8a3-0ec26edbe5d1",
2392
- "metadata": {},
2393
- "outputs": [
2394
- {
2395
- "data": {
2396
- "text/html": [
2397
- "RedAmber::DataFrame <5 x 1 vector> <table><tr><th>x</th></tr><tr><td>1</td></tr><tr><td>2</td></tr><tr><td>3</td></tr><tr><td>4</td></tr><tr><td>5</td></tr></table>"
2398
- ],
2399
- "text/plain": [
2400
- "#<RedAmber::DataFrame : 5 x 1 Vector, 0x000000000000fb2c>\n",
2401
- " x\n",
2402
- " <uint8>\n",
2403
- "1 1\n",
2404
- "2 2\n",
2405
- "3 3\n",
2406
- "4 4\n",
2407
- "5 5\n"
2408
- ]
2409
- },
2410
- "execution_count": 68,
2411
- "metadata": {},
2412
- "output_type": "execute_result"
2413
- }
2414
- ],
2415
- "source": [
2416
- "df.pick(:x) # or\n",
2417
- "df.drop(:y, :s, :b)"
2418
- ]
2419
- },
2420
- {
2421
- "cell_type": "markdown",
2422
- "id": "3e47b9d2-929e-4674-9690-0a1fdf7b0a7d",
2423
- "metadata": {},
2424
- "source": [
2425
- "In contrast, when `[]` selects a single column (variable), it returns a `Vector`."
2426
- ]
2427
- },
2428
- {
2429
- "cell_type": "code",
2430
- "execution_count": 69,
2431
- "id": "60d228be-7357-434d-9d39-ee72c110e6fe",
2432
- "metadata": {},
2433
- "outputs": [
2434
- {
2435
- "data": {
2436
- "text/plain": [
2437
- "#<RedAmber::Vector(:uint8, size=5):0x000000000000f910>\n",
2438
- "[1, 2, 3, 4, 5]\n"
2439
- ]
2440
- },
2441
- "execution_count": 69,
2442
- "metadata": {},
2443
- "output_type": "execute_result"
2444
- }
2445
- ],
2446
- "source": [
2447
- "df[:x]"
2448
- ]
2449
- },
2450
- {
2451
- "cell_type": "markdown",
2452
- "id": "6d973934-e08b-4b45-8efb-52f9167e7238",
2453
- "metadata": {},
2454
- "source": [
2455
- "This behavior may be useful to use in a block of DataFrame manipulation verbs (like pick, drop, slice, remove, assign, rename)."
2456
- ]
2457
- },
2458
- {
2459
- "cell_type": "markdown",
2460
- "id": "34c9bcb0-889a-4190-b2b8-49765cd059c2",
2461
- "metadata": {},
2462
- "source": [
2463
- "## 28. Slice"
2464
- ]
2465
- },
2466
- {
2467
- "cell_type": "markdown",
2468
- "id": "9a428ba8-c306-4ab8-8607-51174e8e6ebe",
2469
- "metadata": {},
2470
- "source": [
2471
- "`slice` selects rows (observations) to create a subset of a DataFrame."
2472
- ]
2473
- },
2474
- {
2475
- "cell_type": "markdown",
2476
- "id": "6016d6d4-72d6-4ae2-b7dd-3d526c91ae61",
2477
- "metadata": {},
2478
- "source": [
2479
- "`slice(indeces)` accepts indices as arguments. Indices should be Integers, Floats or Ranges of Integers. Negative index from the tail like Ruby's Array is also acceptable."
2480
- ]
2481
- },
2482
- {
2483
- "cell_type": "code",
2484
- "execution_count": 70,
2485
- "id": "9cdce2e4-7876-4be6-bd1f-bc8ab6e6c871",
2486
- "metadata": {},
2487
- "outputs": [
2488
- {
2489
- "data": {
2490
- "text/html": [
2491
- "RedAmber::DataFrame <10 x 8 vectors> <table><tr><th>species</th><th>island</th><th>bill_length_mm</th><th>bill_depth_mm</th><th>flipper_length_mm</th><th>body_mass_g</th><th>sex</th><th>year</th></tr><tr><td>Adelie</td><td>Torgersen</td><td>39.1</td><td>18.7</td><td>181</td><td>3750</td><td>male</td><td>2007</td></tr><tr><td>Adelie</td><td>Torgersen</td><td>39.5</td><td>17.4</td><td>186</td><td>3800</td><td>female</td><td>2007</td></tr><tr><td>Adelie</td><td>Torgersen</td><td>40.3</td><td>18.0</td><td>195</td><td>3250</td><td>female</td><td>2007</td></tr><tr><td>Adelie</td><td>Torgersen</td><td><i>(nil)</i></td><td><i>(nil)</i></td><td><i>(nil)</i></td><td><i>(nil)</i></td><td><i>(nil)</i></td><td>2007</td></tr><tr><td colspan='8'>&#8942;</td></tr><tr><td>Gentoo</td><td>Biscoe</td><td>50.4</td><td>15.7</td><td>222</td><td>5750</td><td>male</td><td>2009</td></tr><tr><td>Gentoo</td><td>Biscoe</td><td>45.2</td><td>14.8</td><td>212</td><td>5200</td><td>female</td><td>2009</td></tr><tr><td>Gentoo</td><td>Biscoe</td><td>49.9</td><td>16.1</td><td>213</td><td>5400</td><td>male</td><td>2009</td></tr></table>"
2492
- ],
2493
- "text/plain": [
2494
- "#<RedAmber::DataFrame : 10 x 8 Vectors, 0x000000000000fb40>\n",
2495
- " species island bill_length_mm bill_depth_mm flipper_length_mm ... year\n",
2496
- " <string> <string> <double> <double> <uint8> ... <uint16>\n",
2497
- " 1 Adelie Torgersen 39.1 18.7 181 ... 2007\n",
2498
- " 2 Adelie Torgersen 39.5 17.4 186 ... 2007\n",
2499
- " 3 Adelie Torgersen 40.3 18.0 195 ... 2007\n",
2500
- " 4 Adelie Torgersen (nil) (nil) (nil) ... 2007\n",
2501
- " 5 Adelie Torgersen 36.7 19.3 193 ... 2007\n",
2502
- " : : : : : : ... :\n",
2503
- " 8 Gentoo Biscoe 50.4 15.7 222 ... 2009\n",
2504
- " 9 Gentoo Biscoe 45.2 14.8 212 ... 2009\n",
2505
- "10 Gentoo Biscoe 49.9 16.1 213 ... 2009\n"
2506
- ]
2507
- },
2508
- "execution_count": 70,
2509
- "metadata": {},
2510
- "output_type": "execute_result"
2511
- }
2512
- ],
2513
- "source": [
2514
- "# returns 5 rows at the start and 5 rows from the end\n",
2515
- "penguins.slice(0...5, -5..-1)"
2516
- ]
2517
- },
2518
- {
2519
- "cell_type": "code",
2520
- "execution_count": 71,
2521
- "id": "380ab809-09ae-4e69-a8e6-8d53d1e7822d",
2522
- "metadata": {},
2523
- "outputs": [
2524
- {
2525
- "data": {
2526
- "text/html": [
2527
- "RedAmber::DataFrame <1 x 8 vectors> <table><tr><th>species</th><th>island</th><th>bill_length_mm</th><th>bill_depth_mm</th><th>flipper_length_mm</th><th>body_mass_g</th><th>sex</th><th>year</th></tr><tr><td>Adelie</td><td>Biscoe</td><td>42.2</td><td>19.5</td><td>197</td><td>4275</td><td>male</td><td>2009</td></tr></table>"
2528
- ],
2529
- "text/plain": [
2530
- "#<RedAmber::DataFrame : 1 x 8 Vectors, 0x000000000000fb54>\n",
2531
- " species island bill_length_mm bill_depth_mm flipper_length_mm body_mass_g ... year\n",
2532
- " <string> <string> <double> <double> <uint8> <uint16> ... <uint16>\n",
2533
- "1 Adelie Biscoe 42.2 19.5 197 4275 ... 2009\n"
2534
- ]
2535
- },
2536
- "execution_count": 71,
2537
- "metadata": {},
2538
- "output_type": "execute_result"
2539
- }
2540
- ],
2541
- "source": [
2542
- "# slice accepts Float index\n",
2543
- "# 33% of 344 observations in index => 113.52 th data ??\n",
2544
- "penguins.slice(penguins.size * 0.33)"
2545
- ]
2546
- },
2547
- {
2548
- "cell_type": "markdown",
2549
- "id": "8139bb28-89f8-4058-b824-dde33ead0b60",
2550
- "metadata": {},
2551
- "source": [
2552
- "Indices in Vectors or Arrow::Arrays are also acceptable."
2553
- ]
2554
- },
2555
- {
2556
- "cell_type": "markdown",
2557
- "id": "6f79db8c-c706-4d60-949b-3f644474d375",
2558
- "metadata": {},
2559
- "source": [
2560
- "Another way to select in `slice` is to use booleans.\n",
2561
- "- Booleans is an Array, Arrow::Array, Vector or their Array.\n",
2562
- "- Each data type must be boolean.\n",
2563
- "- Size of booleans must be same as the size of self."
2564
- ]
2565
- },
2566
- {
2567
- "cell_type": "code",
2568
- "execution_count": 72,
2569
- "id": "f58ca131-7375-4489-90ce-6ba54b898eb5",
2570
- "metadata": {},
2571
- "outputs": [
2572
- {
2573
- "data": {
2574
- "text/plain": [
2575
- "#<RedAmber::Vector(:boolean, size=344):0x000000000000fb68>\n",
2576
- "[false, false, true, nil, false, false, false, false, false, true, false, false, ... ]\n"
2577
- ]
2578
- },
2579
- "execution_count": 72,
2580
- "metadata": {},
2581
- "output_type": "execute_result"
2582
- }
2583
- ],
2584
- "source": [
2585
- "# make booleans to check over 40\n",
2586
- "booleans = penguins[:bill_length_mm] > 40"
2587
- ]
2588
- },
2589
- {
2590
- "cell_type": "code",
2591
- "execution_count": 73,
2592
- "id": "176ab365-c66a-4712-97b9-4381a536321b",
2593
- "metadata": {},
2594
- "outputs": [
2595
- {
2596
- "data": {
2597
- "text/html": [
2598
- "RedAmber::DataFrame <242 x 8 vectors> <table><tr><th>species</th><th>island</th><th>bill_length_mm</th><th>bill_depth_mm</th><th>flipper_length_mm</th><th>body_mass_g</th><th>sex</th><th>year</th></tr><tr><td>Adelie</td><td>Torgersen</td><td>40.3</td><td>18.0</td><td>195</td><td>3250</td><td>female</td><td>2007</td></tr><tr><td>Adelie</td><td>Torgersen</td><td>42.0</td><td>20.2</td><td>190</td><td>4250</td><td><i>(nil)</i></td><td>2007</td></tr><tr><td>Adelie</td><td>Torgersen</td><td>41.1</td><td>17.6</td><td>182</td><td>3200</td><td>female</td><td>2007</td></tr><tr><td>Adelie</td><td>Torgersen</td><td>42.5</td><td>20.7</td><td>197</td><td>4500</td><td>male</td><td>2007</td></tr><tr><td colspan='8'>&#8942;</td></tr><tr><td>Gentoo</td><td>Biscoe</td><td>50.4</td><td>15.7</td><td>222</td><td>5750</td><td>male</td><td>2009</td></tr><tr><td>Gentoo</td><td>Biscoe</td><td>45.2</td><td>14.8</td><td>212</td><td>5200</td><td>female</td><td>2009</td></tr><tr><td>Gentoo</td><td>Biscoe</td><td>49.9</td><td>16.1</td><td>213</td><td>5400</td><td>male</td><td>2009</td></tr></table>"
2599
- ],
2600
- "text/plain": [
2601
- "#<RedAmber::DataFrame : 242 x 8 Vectors, 0x000000000000fb7c>\n",
2602
- " species island bill_length_mm bill_depth_mm flipper_length_mm ... year\n",
2603
- " <string> <string> <double> <double> <uint8> ... <uint16>\n",
2604
- " 1 Adelie Torgersen 40.3 18.0 195 ... 2007\n",
2605
- " 2 Adelie Torgersen 42.0 20.2 190 ... 2007\n",
2606
- " 3 Adelie Torgersen 41.1 17.6 182 ... 2007\n",
2607
- " 4 Adelie Torgersen 42.5 20.7 197 ... 2007\n",
2608
- " 5 Adelie Torgersen 46.0 21.5 194 ... 2007\n",
2609
- " : : : : : : ... :\n",
2610
- "240 Gentoo Biscoe 50.4 15.7 222 ... 2009\n",
2611
- "241 Gentoo Biscoe 45.2 14.8 212 ... 2009\n",
2612
- "242 Gentoo Biscoe 49.9 16.1 213 ... 2009\n"
2613
- ]
2614
- },
2615
- "execution_count": 73,
2616
- "metadata": {},
2617
- "output_type": "execute_result"
2618
- }
2619
- ],
2620
- "source": [
2621
- "penguins.slice(booleans)"
2622
- ]
2623
- },
2624
- {
2625
- "cell_type": "markdown",
2626
- "id": "3264a182-6b72-461a-b712-c3b708c53516",
2627
- "metadata": {},
2628
- "source": [
2629
- "`slice` accepts a block.\n",
2630
- "- We can't use both arguments and a block at a same time.\n",
2631
- "- The block should return indeces in any length or a boolean Array with a same length as `size`.\n",
2632
- "- Block is called in the context of self. So reciever 'self' can be omitted in the block."
2633
- ]
2634
- },
2635
- {
2636
- "cell_type": "code",
2637
- "execution_count": 74,
2638
- "id": "c95d3426-0bbb-430e-8d83-6e22434d99ed",
2639
- "metadata": {},
2640
- "outputs": [
2641
- {
2642
- "data": {
2643
- "text/html": [
2644
- "RedAmber::DataFrame <204 x 8 vectors> <table><tr><th>species</th><th>island</th><th>bill_length_mm</th><th>bill_depth_mm</th><th>flipper_length_mm</th><th>body_mass_g</th><th>sex</th><th>year</th></tr><tr><td>Adelie</td><td>Torgersen</td><td>39.1</td><td>18.7</td><td>181</td><td>3750</td><td>male</td><td>2007</td></tr><tr><td>Adelie</td><td>Torgersen</td><td>39.5</td><td>17.4</td><td>186</td><td>3800</td><td>female</td><td>2007</td></tr><tr><td>Adelie</td><td>Torgersen</td><td>40.3</td><td>18.0</td><td>195</td><td>3250</td><td>female</td><td>2007</td></tr><tr><td>Adelie</td><td>Torgersen</td><td>39.3</td><td>20.6</td><td>190</td><td>3650</td><td>male</td><td>2007</td></tr><tr><td colspan='8'>&#8942;</td></tr><tr><td>Gentoo</td><td>Biscoe</td><td>47.2</td><td>13.7</td><td>214</td><td>4925</td><td>female</td><td>2009</td></tr><tr><td>Gentoo</td><td>Biscoe</td><td>46.8</td><td>14.3</td><td>215</td><td>4850</td><td>female</td><td>2009</td></tr><tr><td>Gentoo</td><td>Biscoe</td><td>45.2</td><td>14.8</td><td>212</td><td>5200</td><td>female</td><td>2009</td></tr></table>"
2645
- ],
2646
- "text/plain": [
2647
- "#<RedAmber::DataFrame : 204 x 8 Vectors, 0x000000000000fb90>\n",
2648
- " species island bill_length_mm bill_depth_mm flipper_length_mm ... year\n",
2649
- " <string> <string> <double> <double> <uint8> ... <uint16>\n",
2650
- " 1 Adelie Torgersen 39.1 18.7 181 ... 2007\n",
2651
- " 2 Adelie Torgersen 39.5 17.4 186 ... 2007\n",
2652
- " 3 Adelie Torgersen 40.3 18.0 195 ... 2007\n",
2653
- " 4 Adelie Torgersen 39.3 20.6 190 ... 2007\n",
2654
- " 5 Adelie Torgersen 38.9 17.8 181 ... 2007\n",
2655
- " : : : : : : ... :\n",
2656
- "202 Gentoo Biscoe 47.2 13.7 214 ... 2009\n",
2657
- "203 Gentoo Biscoe 46.8 14.3 215 ... 2009\n",
2658
- "204 Gentoo Biscoe 45.2 14.8 212 ... 2009\n"
2659
- ]
2660
- },
2661
- "execution_count": 74,
2662
- "metadata": {},
2663
- "output_type": "execute_result"
2664
- }
2665
- ],
2666
- "source": [
2667
- "# return a DataFrame with bill_length_mm is in 2*std range around mean\n",
2668
- "penguins.slice do\n",
2669
- " vector = self[:bill_length_mm]\n",
2670
- " min = vector.mean - vector.std\n",
2671
- " max = vector.mean + vector.std\n",
2672
- " vector.to_a.map { |e| (min..max).include? e }\n",
2673
- "end"
2674
- ]
2675
- },
2676
- {
2677
- "cell_type": "markdown",
2678
- "id": "4fa42801-64f5-4432-856b-85c26a68515d",
2679
- "metadata": {},
2680
- "source": [
2681
- "## 29. Slice and nil option"
2682
- ]
2683
- },
2684
- {
2685
- "cell_type": "markdown",
2686
- "id": "31017a7e-0923-4283-bc92-246ebe2591c3",
2687
- "metadata": {},
2688
- "source": [
2689
- "`Arrow::Table#slice` uses `#filter` method with a option `Arrow::FilterOptions.null_selection_behavior = :emit_null`. This will propagate nil at the same row."
2690
- ]
2691
- },
2692
- {
2693
- "cell_type": "code",
2694
- "execution_count": 75,
2695
- "id": "8e4a8108-154b-4621-acd1-704ddf229d61",
2696
- "metadata": {},
2697
- "outputs": [
2698
- {
2699
- "data": {
2700
- "text/plain": [
2701
- "#<Arrow::Table:0x113e72048 ptr=0x7fcc50a542a0>\n",
2702
- "\t a\tb\t c\n",
2703
- "0\t 1\tA\t 1.000000\n",
2704
- "1\t(null)\t(null)\t (null)\n"
2705
- ]
2706
- },
2707
- "execution_count": 75,
2708
- "metadata": {},
2709
- "output_type": "execute_result"
2710
- }
2711
- ],
2712
- "source": [
2713
- "hash = { a: [1, 2, 3], b: %w[A B C], c: [1.0, 2, 3] }\n",
2714
- "table = Arrow::Table.new(hash)\n",
2715
- "table.slice([true, false, nil])"
2716
- ]
2717
- },
2718
- {
2719
- "cell_type": "markdown",
2720
- "id": "dbb57c5a-e949-42b8-a82c-9affb3fe5b7b",
2721
- "metadata": {},
2722
- "source": [
2723
- "Whereas in RedAmber, `DataFrame#slice` with booleans containing nil is treated as false. This behavior comes from `Allow::FilterOptions.null_selection_behavior = :drop`. This is a default value for `Arrow::Table.filter` method."
2724
- ]
2725
- },
2726
- {
2727
- "cell_type": "code",
2728
- "execution_count": 76,
2729
- "id": "851c3bf6-b9e9-41bd-92c5-5372ed934549",
2730
- "metadata": {},
2731
- "outputs": [
2732
- {
2733
- "data": {
2734
- "text/plain": [
2735
- "#<Arrow::Table:0x113e51438 ptr=0x7fcc4f7e4ed0>\n",
2736
- "\ta\tb\t c\n",
2737
- "0\t1\tA\t 1.000000\n"
2738
- ]
2739
- },
2740
- "execution_count": 76,
2741
- "metadata": {},
2742
- "output_type": "execute_result"
2743
- }
2744
- ],
2745
- "source": [
2746
- "RedAmber::DataFrame.new(table).slice([true, false, nil]).table"
2747
- ]
2748
- },
2749
- {
2750
- "cell_type": "markdown",
2751
- "id": "56398a3d-6146-43af-8b96-fec37730fc49",
2752
- "metadata": {},
2753
- "source": [
2754
- "## 30. Remove"
2755
- ]
2756
- },
2757
- {
2758
- "cell_type": "markdown",
2759
- "id": "9e042a97-8a5d-412e-8e4a-fda382225a2d",
2760
- "metadata": {},
2761
- "source": [
2762
- "Slice and reject rows (observations) to create a remainer DataFrame."
2763
- ]
2764
- },
2765
- {
2766
- "cell_type": "markdown",
2767
- "id": "2b4cbb97-eef3-4db8-8f25-c44c208ec554",
2768
- "metadata": {},
2769
- "source": [
2770
- "`#remove(indeces)` accepts indeces as arguments. Indeces should be an Integer or a Range of Integer."
2771
- ]
2772
- },
2773
- {
2774
- "cell_type": "code",
2775
- "execution_count": 77,
2776
- "id": "17e38ab8-886b-4114-bcaf-ee18df7d00cd",
2777
- "metadata": {},
2778
- "outputs": [
2779
- {
2780
- "data": {
2781
- "text/html": [
2782
- "RedAmber::DataFrame <334 x 8 vectors> <table><tr><th>species</th><th>island</th><th>bill_length_mm</th><th>bill_depth_mm</th><th>flipper_length_mm</th><th>body_mass_g</th><th>sex</th><th>year</th></tr><tr><td>Adelie</td><td>Torgersen</td><td>39.3</td><td>20.6</td><td>190</td><td>3650</td><td>male</td><td>2007</td></tr><tr><td>Adelie</td><td>Torgersen</td><td>38.9</td><td>17.8</td><td>181</td><td>3625</td><td>female</td><td>2007</td></tr><tr><td>Adelie</td><td>Torgersen</td><td>39.2</td><td>19.6</td><td>195</td><td>4675</td><td>male</td><td>2007</td></tr><tr><td>Adelie</td><td>Torgersen</td><td>34.1</td><td>18.1</td><td>193</td><td>3475</td><td><i>(nil)</i></td><td>2007</td></tr><tr><td colspan='8'>&#8942;</td></tr><tr><td>Gentoo</td><td>Biscoe</td><td>44.5</td><td>15.7</td><td>217</td><td>4875</td><td><i>(nil)</i></td><td>2009</td></tr><tr><td>Gentoo</td><td>Biscoe</td><td>48.8</td><td>16.2</td><td>222</td><td>6000</td><td>male</td><td>2009</td></tr><tr><td>Gentoo</td><td>Biscoe</td><td>47.2</td><td>13.7</td><td>214</td><td>4925</td><td>female</td><td>2009</td></tr></table>"
2783
- ],
2784
- "text/plain": [
2785
- "#<RedAmber::DataFrame : 334 x 8 Vectors, 0x000000000000fba4>\n",
2786
- " species island bill_length_mm bill_depth_mm flipper_length_mm ... year\n",
2787
- " <string> <string> <double> <double> <uint8> ... <uint16>\n",
2788
- " 1 Adelie Torgersen 39.3 20.6 190 ... 2007\n",
2789
- " 2 Adelie Torgersen 38.9 17.8 181 ... 2007\n",
2790
- " 3 Adelie Torgersen 39.2 19.6 195 ... 2007\n",
2791
- " 4 Adelie Torgersen 34.1 18.1 193 ... 2007\n",
2792
- " 5 Adelie Torgersen 42.0 20.2 190 ... 2007\n",
2793
- " : : : : : : ... :\n",
2794
- "332 Gentoo Biscoe 44.5 15.7 217 ... 2009\n",
2795
- "333 Gentoo Biscoe 48.8 16.2 222 ... 2009\n",
2796
- "334 Gentoo Biscoe 47.2 13.7 214 ... 2009\n"
2797
- ]
2798
- },
2799
- "execution_count": 77,
2800
- "metadata": {},
2801
- "output_type": "execute_result"
2802
- }
2803
- ],
2804
- "source": [
2805
- "# returns 6th to 339th obs. Remainer of 1st example of #30\n",
2806
- "penguins.remove(0...5, -5..-1)"
2807
- ]
2808
- },
2809
- {
2810
- "cell_type": "markdown",
2811
- "id": "def1c1c4-6b60-4864-ae24-c797fbf008a7",
2812
- "metadata": {},
2813
- "source": [
2814
- "`remove(booleans)` accepts booleans as a argument in an Array, a Vector or an Arrow::BooleanArray . Booleans must be same length as `#size`."
2815
- ]
2816
- },
2817
- {
2818
- "cell_type": "code",
2819
- "execution_count": 78,
2820
- "id": "6f169420-7eb2-457f-8d59-7a5c90aa3fa5",
2821
- "metadata": {},
2822
- "outputs": [
2823
- {
2824
- "data": {
2825
- "text/html": [
2826
- "RedAmber::DataFrame <333 x 8 vectors> <table><tr><th>species</th><th>island</th><th>bill_length_mm</th><th>bill_depth_mm</th><th>flipper_length_mm</th><th>body_mass_g</th><th>sex</th><th>year</th></tr><tr><td>Adelie</td><td>Torgersen</td><td>39.1</td><td>18.7</td><td>181</td><td>3750</td><td>male</td><td>2007</td></tr><tr><td>Adelie</td><td>Torgersen</td><td>39.5</td><td>17.4</td><td>186</td><td>3800</td><td>female</td><td>2007</td></tr><tr><td>Adelie</td><td>Torgersen</td><td>40.3</td><td>18.0</td><td>195</td><td>3250</td><td>female</td><td>2007</td></tr><tr><td>Adelie</td><td>Torgersen</td><td>36.7</td><td>19.3</td><td>193</td><td>3450</td><td>female</td><td>2007</td></tr><tr><td colspan='8'>&#8942;</td></tr><tr><td>Gentoo</td><td>Biscoe</td><td>50.4</td><td>15.7</td><td>222</td><td>5750</td><td>male</td><td>2009</td></tr><tr><td>Gentoo</td><td>Biscoe</td><td>45.2</td><td>14.8</td><td>212</td><td>5200</td><td>female</td><td>2009</td></tr><tr><td>Gentoo</td><td>Biscoe</td><td>49.9</td><td>16.1</td><td>213</td><td>5400</td><td>male</td><td>2009</td></tr></table>"
2827
- ],
2828
- "text/plain": [
2829
- "#<RedAmber::DataFrame : 333 x 8 Vectors, 0x000000000000fbb8>\n",
2830
- " species island bill_length_mm bill_depth_mm flipper_length_mm ... year\n",
2831
- " <string> <string> <double> <double> <uint8> ... <uint16>\n",
2832
- " 1 Adelie Torgersen 39.1 18.7 181 ... 2007\n",
2833
- " 2 Adelie Torgersen 39.5 17.4 186 ... 2007\n",
2834
- " 3 Adelie Torgersen 40.3 18.0 195 ... 2007\n",
2835
- " 4 Adelie Torgersen 36.7 19.3 193 ... 2007\n",
2836
- " 5 Adelie Torgersen 39.3 20.6 190 ... 2007\n",
2837
- " : : : : : : ... :\n",
2838
- "331 Gentoo Biscoe 50.4 15.7 222 ... 2009\n",
2839
- "332 Gentoo Biscoe 45.2 14.8 212 ... 2009\n",
2840
- "333 Gentoo Biscoe 49.9 16.1 213 ... 2009\n"
2841
- ]
2842
- },
2843
- "execution_count": 78,
2844
- "metadata": {},
2845
- "output_type": "execute_result"
2846
- }
2847
- ],
2848
- "source": [
2849
- "# remove all observation contains nil\n",
2850
- "removed = penguins.remove { vectors.map(&:is_nil).reduce(&:|) }"
2851
- ]
2852
- },
2853
- {
2854
- "cell_type": "markdown",
2855
- "id": "5f1864c9-4ae4-4fcd-9840-ea424ef5e27d",
2856
- "metadata": {},
2857
- "source": [
2858
- "`remove {block}` is also acceptable. We can't use both arguments and a block at a same time. The block should return indeces or a boolean Array with a same length as size. Block is called in the context of self."
2859
- ]
2860
- },
2861
- {
2862
- "cell_type": "code",
2863
- "execution_count": 79,
2864
- "id": "a6807c65-25e5-4ee1-8d1b-6018c46b3999",
2865
- "metadata": {},
2866
- "outputs": [
2867
- {
2868
- "data": {
2869
- "text/html": [
2870
- "RedAmber::DataFrame <140 x 8 vectors> <table><tr><th>species</th><th>island</th><th>bill_length_mm</th><th>bill_depth_mm</th><th>flipper_length_mm</th><th>body_mass_g</th><th>sex</th><th>year</th></tr><tr><td>Adelie</td><td>Torgersen</td><td><i>(nil)</i></td><td><i>(nil)</i></td><td><i>(nil)</i></td><td><i>(nil)</i></td><td><i>(nil)</i></td><td>2007</td></tr><tr><td>Adelie</td><td>Torgersen</td><td>36.7</td><td>19.3</td><td>193</td><td>3450</td><td>female</td><td>2007</td></tr><tr><td>Adelie</td><td>Torgersen</td><td>34.1</td><td>18.1</td><td>193</td><td>3475</td><td><i>(nil)</i></td><td>2007</td></tr><tr><td>Adelie</td><td>Torgersen</td><td>37.8</td><td>17.1</td><td>186</td><td>3300</td><td><i>(nil)</i></td><td>2007</td></tr><tr><td colspan='8'>&#8942;</td></tr><tr><td>Gentoo</td><td>Biscoe</td><td><i>(nil)</i></td><td><i>(nil)</i></td><td><i>(nil)</i></td><td><i>(nil)</i></td><td><i>(nil)</i></td><td>2009</td></tr><tr><td>Gentoo</td><td>Biscoe</td><td>50.4</td><td>15.7</td><td>222</td><td>5750</td><td>male</td><td>2009</td></tr><tr><td>Gentoo</td><td>Biscoe</td><td>49.9</td><td>16.1</td><td>213</td><td>5400</td><td>male</td><td>2009</td></tr></table>"
2871
- ],
2872
- "text/plain": [
2873
- "#<RedAmber::DataFrame : 140 x 8 Vectors, 0x000000000000fbcc>\n",
2874
- " species island bill_length_mm bill_depth_mm flipper_length_mm ... year\n",
2875
- " <string> <string> <double> <double> <uint8> ... <uint16>\n",
2876
- " 1 Adelie Torgersen (nil) (nil) (nil) ... 2007\n",
2877
- " 2 Adelie Torgersen 36.7 19.3 193 ... 2007\n",
2878
- " 3 Adelie Torgersen 34.1 18.1 193 ... 2007\n",
2879
- " 4 Adelie Torgersen 37.8 17.1 186 ... 2007\n",
2880
- " 5 Adelie Torgersen 37.8 17.3 180 ... 2007\n",
2881
- " : : : : : : ... :\n",
2882
- "138 Gentoo Biscoe (nil) (nil) (nil) ... 2009\n",
2883
- "139 Gentoo Biscoe 50.4 15.7 222 ... 2009\n",
2884
- "140 Gentoo Biscoe 49.9 16.1 213 ... 2009\n"
2885
- ]
2886
- },
2887
- "execution_count": 79,
2888
- "metadata": {},
2889
- "output_type": "execute_result"
2890
- }
2891
- ],
2892
- "source": [
2893
- "# Remove data in 2*std range around mean\n",
2894
- "penguins.remove do\n",
2895
- " vector = self[:bill_length_mm]\n",
2896
- " min = vector.mean - vector.std\n",
2897
- " max = vector.mean + vector.std\n",
2898
- " vector.to_a.map { |e| (min..max).include? e }\n",
2899
- "end"
2900
- ]
2901
- },
2902
- {
2903
- "cell_type": "markdown",
2904
- "id": "591e6b22-da98-4336-b22e-c7bc9bcf2ebf",
2905
- "metadata": {},
2906
- "source": [
2907
- "## 31. Remove and nil"
2908
- ]
2909
- },
2910
- {
2911
- "cell_type": "markdown",
2912
- "id": "67926d1b-c76e-4cb7-b679-6545d850e7e4",
2913
- "metadata": {},
2914
- "source": [
2915
- "When `remove` used with booleans, nil in booleans is treated as false. This behavior is aligned with Ruby's `nil#!`."
2916
- ]
2917
- },
2918
- {
2919
- "cell_type": "code",
2920
- "execution_count": 80,
2921
- "id": "8575614e-f702-4ee4-ac7b-745e9b32e803",
2922
- "metadata": {},
2923
- "outputs": [
2924
- {
2925
- "data": {
2926
- "text/html": [
2927
- "RedAmber::DataFrame <3 x 3 vectors> <table><tr><th>a</th><th>b</th><th>c</th></tr><tr><td>1</td><td>A</td><td>1.0</td></tr><tr><td>2</td><td>B</td><td>2.0</td></tr><tr><td><i>(nil)</i></td><td>C</td><td>3.0</td></tr></table>"
2928
- ],
2929
- "text/plain": [
2930
- "#<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fbe0>\n",
2931
- " a b c\n",
2932
- " <uint8> <string> <double>\n",
2933
- "1 1 A 1.0\n",
2934
- "2 2 B 2.0\n",
2935
- "3 (nil) C 3.0\n"
2936
- ]
2937
- },
2938
- "execution_count": 80,
2939
- "metadata": {},
2940
- "output_type": "execute_result"
2941
- }
2942
- ],
2943
- "source": [
2944
- "df = RedAmber::DataFrame.new(a: [1, 2, nil], b: %w[A B C], c: [1.0, 2, 3])"
2945
- ]
2946
- },
2947
- {
2948
- "cell_type": "code",
2949
- "execution_count": 81,
2950
- "id": "932a5e71-8cef-44e5-a789-ce97329bc001",
2951
- "metadata": {},
2952
- "outputs": [
2953
- {
2954
- "data": {
2955
- "text/plain": [
2956
- "#<RedAmber::Vector(:boolean, size=3):0x000000000000fbf4>\n",
2957
- "[true, false, nil]\n"
2958
- ]
2959
- },
2960
- "execution_count": 81,
2961
- "metadata": {},
2962
- "output_type": "execute_result"
2963
- }
2964
- ],
2965
- "source": [
2966
- "booleans = df[:a] < 2"
2967
- ]
2968
- },
2969
- {
2970
- "cell_type": "code",
2971
- "execution_count": 82,
2972
- "id": "74cf6aa6-8913-433d-97ad-bba2d548afe5",
2973
- "metadata": {},
2974
- "outputs": [
2975
- {
2976
- "data": {
2977
- "text/plain": [
2978
- "[false, true, true]"
2979
- ]
2980
- },
2981
- "execution_count": 82,
2982
- "metadata": {},
2983
- "output_type": "execute_result"
2984
- }
2985
- ],
2986
- "source": [
2987
- "booleans_invert = booleans.to_a.map(&:!)"
2988
- ]
2989
- },
2990
- {
2991
- "cell_type": "code",
2992
- "execution_count": 83,
2993
- "id": "5e466a06-cb17-4dc1-a5b0-34bfd3ffb78b",
2994
- "metadata": {},
2995
- "outputs": [
2996
- {
2997
- "data": {
2998
- "text/plain": [
2999
- "true"
3000
- ]
3001
- },
3002
- "execution_count": 83,
3003
- "metadata": {},
3004
- "output_type": "execute_result"
3005
- }
3006
- ],
3007
- "source": [
3008
- "df.slice(booleans) == df.remove(booleans_invert)"
3009
- ]
3010
- },
3011
- {
3012
- "cell_type": "markdown",
3013
- "id": "8bca0b06-2d08-4c28-8b4c-4fd088f2d2d3",
3014
- "metadata": {},
3015
- "source": [
3016
- "Whereas `Vector#invert` returns nil for elements nil. This will bring different result. (See #26)"
3017
- ]
3018
- },
3019
- {
3020
- "cell_type": "code",
3021
- "execution_count": 84,
3022
- "id": "077b216f-0a08-413e-95c9-12789d15a9ba",
3023
- "metadata": {},
3024
- "outputs": [
3025
- {
3026
- "data": {
3027
- "text/plain": [
3028
- "#<RedAmber::Vector(:boolean, size=3):0x000000000000fc08>\n",
3029
- "[false, true, nil]\n"
3030
- ]
3031
- },
3032
- "execution_count": 84,
3033
- "metadata": {},
3034
- "output_type": "execute_result"
3035
- }
3036
- ],
3037
- "source": [
3038
- "booleans.invert"
3039
- ]
3040
- },
3041
- {
3042
- "cell_type": "code",
3043
- "execution_count": 85,
3044
- "id": "b3df62a6-c4a3-44cb-bde6-f6be12b120c8",
3045
- "metadata": {},
3046
- "outputs": [
3047
- {
3048
- "data": {
3049
- "text/html": [
3050
- "RedAmber::DataFrame <2 x 3 vectors> <table><tr><th>a</th><th>b</th><th>c</th></tr><tr><td>1</td><td>A</td><td>1.0</td></tr><tr><td><i>(nil)</i></td><td>C</td><td>3.0</td></tr></table>"
3051
- ],
3052
- "text/plain": [
3053
- "#<RedAmber::DataFrame : 2 x 3 Vectors, 0x000000000000fc1c>\n",
3054
- " a b c\n",
3055
- " <uint8> <string> <double>\n",
3056
- "1 1 A 1.0\n",
3057
- "2 (nil) C 3.0\n"
3058
- ]
3059
- },
3060
- "execution_count": 85,
3061
- "metadata": {},
3062
- "output_type": "execute_result"
3063
- }
3064
- ],
3065
- "source": [
3066
- "df.remove(booleans.invert)"
3067
- ]
3068
- },
3069
- {
3070
- "cell_type": "markdown",
3071
- "id": "e05f00b6-3bae-4650-8bbc-d4e0692f6f85",
3072
- "metadata": {},
3073
- "source": [
3074
- "We have `#primitive_invert` method in Vector. This method returns the same result as `.to_a.map(&:!)` above."
3075
- ]
3076
- },
3077
- {
3078
- "cell_type": "code",
3079
- "execution_count": 86,
3080
- "id": "296ca3cd-a6da-4603-a576-d8c36a810e4f",
3081
- "metadata": {},
3082
- "outputs": [
3083
- {
3084
- "data": {
3085
- "text/plain": [
3086
- "#<RedAmber::Vector(:boolean, size=3):0x000000000000fc30>\n",
3087
- "[false, true, true]\n"
3088
- ]
3089
- },
3090
- "execution_count": 86,
3091
- "metadata": {},
3092
- "output_type": "execute_result"
3093
- }
3094
- ],
3095
- "source": [
3096
- "booleans.primitive_invert"
3097
- ]
3098
- },
3099
- {
3100
- "cell_type": "code",
3101
- "execution_count": 87,
3102
- "id": "ba5b8c0b-b94e-4209-adcd-258ea3b87bfd",
3103
- "metadata": {},
3104
- "outputs": [
3105
- {
3106
- "data": {
3107
- "text/html": [
3108
- "RedAmber::DataFrame <1 x 3 vectors> <table><tr><th>a</th><th>b</th><th>c</th></tr><tr><td>1</td><td>A</td><td>1.0</td></tr></table>"
3109
- ],
3110
- "text/plain": [
3111
- "#<RedAmber::DataFrame : 1 x 3 Vectors, 0x000000000000fc44>\n",
3112
- " a b c\n",
3113
- " <uint8> <string> <double>\n",
3114
- "1 1 A 1.0\n"
3115
- ]
3116
- },
3117
- "execution_count": 87,
3118
- "metadata": {},
3119
- "output_type": "execute_result"
3120
- }
3121
- ],
3122
- "source": [
3123
- "df.remove(booleans.primitive_invert)"
3124
- ]
3125
- },
3126
- {
3127
- "cell_type": "code",
3128
- "execution_count": 88,
3129
- "id": "2446792f-0b0a-4642-acae-b4fec89261c1",
3130
- "metadata": {},
3131
- "outputs": [
3132
- {
3133
- "data": {
3134
- "text/plain": [
3135
- "true"
3136
- ]
3137
- },
3138
- "execution_count": 88,
3139
- "metadata": {},
3140
- "output_type": "execute_result"
3141
- }
3142
- ],
3143
- "source": [
3144
- "df.slice(booleans) == df.remove(booleans.primitive_invert)"
3145
- ]
3146
- },
3147
- {
3148
- "cell_type": "markdown",
3149
- "id": "7c23a4ad-0c17-4178-b58a-abfd8153d49b",
3150
- "metadata": {},
3151
- "source": [
3152
- "## 32. Remove nil"
3153
- ]
3154
- },
3155
- {
3156
- "cell_type": "markdown",
3157
- "id": "84c7238b-1029-416f-b495-9d045f77b22c",
3158
- "metadata": {},
3159
- "source": [
3160
- "Remove any observations containing nil."
3161
- ]
3162
- },
3163
- {
3164
- "cell_type": "code",
3165
- "execution_count": 89,
3166
- "id": "de4bb615-d14d-4c90-ab54-db2f375b9f00",
3167
- "metadata": {},
3168
- "outputs": [
3169
- {
3170
- "data": {
3171
- "text/html": [
3172
- "RedAmber::DataFrame <333 x 8 vectors> <table><tr><th>species</th><th>island</th><th>bill_length_mm</th><th>bill_depth_mm</th><th>flipper_length_mm</th><th>body_mass_g</th><th>sex</th><th>year</th></tr><tr><td>Adelie</td><td>Torgersen</td><td>39.1</td><td>18.7</td><td>181</td><td>3750</td><td>male</td><td>2007</td></tr><tr><td>Adelie</td><td>Torgersen</td><td>39.5</td><td>17.4</td><td>186</td><td>3800</td><td>female</td><td>2007</td></tr><tr><td>Adelie</td><td>Torgersen</td><td>40.3</td><td>18.0</td><td>195</td><td>3250</td><td>female</td><td>2007</td></tr><tr><td>Adelie</td><td>Torgersen</td><td>36.7</td><td>19.3</td><td>193</td><td>3450</td><td>female</td><td>2007</td></tr><tr><td colspan='8'>&#8942;</td></tr><tr><td>Gentoo</td><td>Biscoe</td><td>50.4</td><td>15.7</td><td>222</td><td>5750</td><td>male</td><td>2009</td></tr><tr><td>Gentoo</td><td>Biscoe</td><td>45.2</td><td>14.8</td><td>212</td><td>5200</td><td>female</td><td>2009</td></tr><tr><td>Gentoo</td><td>Biscoe</td><td>49.9</td><td>16.1</td><td>213</td><td>5400</td><td>male</td><td>2009</td></tr></table>"
3173
- ],
3174
- "text/plain": [
3175
- "#<RedAmber::DataFrame : 333 x 8 Vectors, 0x000000000000fc58>\n",
3176
- " species island bill_length_mm bill_depth_mm flipper_length_mm ... year\n",
3177
- " <string> <string> <double> <double> <uint8> ... <uint16>\n",
3178
- " 1 Adelie Torgersen 39.1 18.7 181 ... 2007\n",
3179
- " 2 Adelie Torgersen 39.5 17.4 186 ... 2007\n",
3180
- " 3 Adelie Torgersen 40.3 18.0 195 ... 2007\n",
3181
- " 4 Adelie Torgersen 36.7 19.3 193 ... 2007\n",
3182
- " 5 Adelie Torgersen 39.3 20.6 190 ... 2007\n",
3183
- " : : : : : : ... :\n",
3184
- "331 Gentoo Biscoe 50.4 15.7 222 ... 2009\n",
3185
- "332 Gentoo Biscoe 45.2 14.8 212 ... 2009\n",
3186
- "333 Gentoo Biscoe 49.9 16.1 213 ... 2009\n"
3187
- ]
3188
- },
3189
- "execution_count": 89,
3190
- "metadata": {},
3191
- "output_type": "execute_result"
3192
- }
3193
- ],
3194
- "source": [
3195
- "penguins.remove_nil"
3196
- ]
3197
- },
3198
- {
3199
- "cell_type": "markdown",
3200
- "id": "4a4ae8f9-dcf8-4dad-bb77-af076e9cadb5",
3201
- "metadata": {},
3202
- "source": [
3203
- "The roundabout way for this is to use `#remove`."
3204
- ]
3205
- },
3206
- {
3207
- "cell_type": "code",
3208
- "execution_count": 90,
3209
- "id": "27a3da5f-0ea2-4c5d-a6c3-c0e20f2224a3",
3210
- "metadata": {},
3211
- "outputs": [
3212
- {
3213
- "data": {
3214
- "text/html": [
3215
- "RedAmber::DataFrame <333 x 8 vectors> <table><tr><th>species</th><th>island</th><th>bill_length_mm</th><th>bill_depth_mm</th><th>flipper_length_mm</th><th>body_mass_g</th><th>sex</th><th>year</th></tr><tr><td>Adelie</td><td>Torgersen</td><td>39.1</td><td>18.7</td><td>181</td><td>3750</td><td>male</td><td>2007</td></tr><tr><td>Adelie</td><td>Torgersen</td><td>39.5</td><td>17.4</td><td>186</td><td>3800</td><td>female</td><td>2007</td></tr><tr><td>Adelie</td><td>Torgersen</td><td>40.3</td><td>18.0</td><td>195</td><td>3250</td><td>female</td><td>2007</td></tr><tr><td>Adelie</td><td>Torgersen</td><td>36.7</td><td>19.3</td><td>193</td><td>3450</td><td>female</td><td>2007</td></tr><tr><td colspan='8'>&#8942;</td></tr><tr><td>Gentoo</td><td>Biscoe</td><td>50.4</td><td>15.7</td><td>222</td><td>5750</td><td>male</td><td>2009</td></tr><tr><td>Gentoo</td><td>Biscoe</td><td>45.2</td><td>14.8</td><td>212</td><td>5200</td><td>female</td><td>2009</td></tr><tr><td>Gentoo</td><td>Biscoe</td><td>49.9</td><td>16.1</td><td>213</td><td>5400</td><td>male</td><td>2009</td></tr></table>"
3216
- ],
3217
- "text/plain": [
3218
- "#<RedAmber::DataFrame : 333 x 8 Vectors, 0x000000000000fc6c>\n",
3219
- " species island bill_length_mm bill_depth_mm flipper_length_mm ... year\n",
3220
- " <string> <string> <double> <double> <uint8> ... <uint16>\n",
3221
- " 1 Adelie Torgersen 39.1 18.7 181 ... 2007\n",
3222
- " 2 Adelie Torgersen 39.5 17.4 186 ... 2007\n",
3223
- " 3 Adelie Torgersen 40.3 18.0 195 ... 2007\n",
3224
- " 4 Adelie Torgersen 36.7 19.3 193 ... 2007\n",
3225
- " 5 Adelie Torgersen 39.3 20.6 190 ... 2007\n",
3226
- " : : : : : : ... :\n",
3227
- "331 Gentoo Biscoe 50.4 15.7 222 ... 2009\n",
3228
- "332 Gentoo Biscoe 45.2 14.8 212 ... 2009\n",
3229
- "333 Gentoo Biscoe 49.9 16.1 213 ... 2009\n"
3230
- ]
3231
- },
3232
- "execution_count": 90,
3233
- "metadata": {},
3234
- "output_type": "execute_result"
3235
- }
3236
- ],
3237
- "source": [
3238
- "penguins.remove { vectors.map(&:is_nil).reduce(&:|) }"
3239
- ]
3240
- },
3241
- {
3242
- "cell_type": "markdown",
3243
- "id": "4f2a58fd-f033-44f6-9eb4-ed893a2b5d1d",
3244
- "metadata": {},
3245
- "source": [
3246
- "## 33. Rename"
3247
- ]
3248
- },
3249
- {
3250
- "cell_type": "markdown",
3251
- "id": "c0d39506-8ae5-48e7-9dd2-acf38d4ec1a9",
3252
- "metadata": {},
3253
- "source": [
3254
- "Rename keys (column names) to create a updated DataFrame."
3255
- ]
3256
- },
3257
- {
3258
- "cell_type": "markdown",
3259
- "id": "3f6924ec-e86c-4089-ae40-6783027d3ce0",
3260
- "metadata": {},
3261
- "source": [
3262
- "`#rename(key_pairs)` accepts key_pairs as arguments. key_pairs should be a Hash of `{existing_key => new_key}` ."
3263
- ]
3264
- },
3265
- {
3266
- "cell_type": "code",
3267
- "execution_count": 91,
3268
- "id": "9396c96d-83d7-4b92-a4ca-27bc9e4d7b9d",
3269
- "metadata": {},
3270
- "outputs": [
3271
- {
3272
- "data": {
3273
- "text/html": [
3274
- "RedAmber::DataFrame <3 x 2 vectors> <table><tr><th>name</th><th>age</th></tr><tr><td>Yasuko</td><td>68</td></tr><tr><td>Rui</td><td>49</td></tr><tr><td>Hinata</td><td>28</td></tr></table>"
3275
- ],
3276
- "text/plain": [
3277
- "#<RedAmber::DataFrame : 3 x 2 Vectors, 0x000000000000fc80>\n",
3278
- " name age\n",
3279
- " <string> <uint8>\n",
3280
- "1 Yasuko 68\n",
3281
- "2 Rui 49\n",
3282
- "3 Hinata 28\n"
3283
- ]
3284
- },
3285
- "execution_count": 91,
3286
- "metadata": {},
3287
- "output_type": "execute_result"
3288
- }
3289
- ],
3290
- "source": [
3291
- "h = { name: %w[Yasuko Rui Hinata], age: [68, 49, 28] }\n",
3292
- "comecome = RedAmber::DataFrame.new(h)"
3293
- ]
3294
- },
3295
- {
3296
- "cell_type": "code",
3297
- "execution_count": 92,
3298
- "id": "fad279c6-1ca0-4493-bd69-0e9ef011bff7",
3299
- "metadata": {},
3300
- "outputs": [
3301
- {
3302
- "data": {
3303
- "text/html": [
3304
- "RedAmber::DataFrame <3 x 2 vectors> <table><tr><th>name</th><th>age_in_1993</th></tr><tr><td>Yasuko</td><td>68</td></tr><tr><td>Rui</td><td>49</td></tr><tr><td>Hinata</td><td>28</td></tr></table>"
3305
- ],
3306
- "text/plain": [
3307
- "#<RedAmber::DataFrame : 3 x 2 Vectors, 0x000000000000fc94>\n",
3308
- " name age_in_1993\n",
3309
- " <string> <uint8>\n",
3310
- "1 Yasuko 68\n",
3311
- "2 Rui 49\n",
3312
- "3 Hinata 28\n"
3313
- ]
3314
- },
3315
- "execution_count": 92,
3316
- "metadata": {},
3317
- "output_type": "execute_result"
3318
- }
3319
- ],
3320
- "source": [
3321
- "comecome.rename(:age => :age_in_1993)"
3322
- ]
3323
- },
3324
- {
3325
- "cell_type": "markdown",
3326
- "id": "9dabb005-9822-4c4b-aaa5-fa6f28f2ed43",
3327
- "metadata": {},
3328
- "source": [
3329
- "`#rename {block}` is also acceptable. We can't use both arguments and a block at a same time. The block should return key_pairs as a Hash of {existing_key => new_key}. Block is called in the context of self."
3330
- ]
3331
- },
3332
- {
3333
- "cell_type": "markdown",
3334
- "id": "aabbba20-6ef8-4da2-8dc0-0cb243cf3b23",
3335
- "metadata": {},
3336
- "source": [
3337
- "Symbol key and String key are distinguished."
3338
- ]
3339
- },
3340
- {
3341
- "cell_type": "markdown",
3342
- "id": "07f98b31-6123-4466-b4f8-f995c7cde474",
3343
- "metadata": {},
3344
- "source": [
3345
- "## 34. Assign"
3346
- ]
3347
- },
3348
- {
3349
- "cell_type": "markdown",
3350
- "id": "99f6787f-2b36-4360-b155-1c2d7874d25e",
3351
- "metadata": {},
3352
- "source": [
3353
- "Assign new or updated columns (variables) and create a updated DataFrame.\n",
3354
- "\n",
3355
- "- Columns with new keys will append new variables at right (bottom in TDR).\n",
3356
- "- Columns with exisiting keys will update corresponding vectors."
3357
- ]
3358
- },
3359
- {
3360
- "cell_type": "markdown",
3361
- "id": "b4b22da0-4ee2-4196-88e1-1cfea6a72f4d",
3362
- "metadata": {},
3363
- "source": [
3364
- "`#assign(key_pairs)` accepts pairs of key and values as arguments. key_pairs should be a Hash of `{key => array}` or `{key => Vector}` ."
3365
- ]
3366
- },
3367
- {
3368
- "cell_type": "code",
3369
- "execution_count": 93,
3370
- "id": "56dcfed8-a6f9-4d8c-bac3-e8ce7c0674a7",
3371
- "metadata": {},
3372
- "outputs": [
3373
- {
3374
- "data": {
3375
- "text/html": [
3376
- "RedAmber::DataFrame <3 x 2 vectors> <table><tr><th>name</th><th>age</th></tr><tr><td>Yasuko</td><td>68</td></tr><tr><td>Rui</td><td>49</td></tr><tr><td>Hinata</td><td>28</td></tr></table>"
3377
- ],
3378
- "text/plain": [
3379
- "#<RedAmber::DataFrame : 3 x 2 Vectors, 0x000000000000fca8>\n",
3380
- " name age\n",
3381
- " <string> <uint8>\n",
3382
- "1 Yasuko 68\n",
3383
- "2 Rui 49\n",
3384
- "3 Hinata 28\n"
3385
- ]
3386
- },
3387
- "execution_count": 93,
3388
- "metadata": {},
3389
- "output_type": "execute_result"
3390
- }
3391
- ],
3392
- "source": [
3393
- "comecome = RedAmber::DataFrame.new( name: %w[Yasuko Rui Hinata], age: [68, 49, 28] )"
3394
- ]
3395
- },
3396
- {
3397
- "cell_type": "code",
3398
- "execution_count": 94,
3399
- "id": "8da8d282-8798-44d5-bb7b-7fa2df922308",
3400
- "metadata": {},
3401
- "outputs": [
3402
- {
3403
- "data": {
3404
- "text/html": [
3405
- "RedAmber::DataFrame <3 x 3 vectors> <table><tr><th>name</th><th>age</th><th>brother</th></tr><tr><td>Yasuko</td><td>97</td><td>Santa</td></tr><tr><td>Rui</td><td>78</td><td><i>(nil)</i></td></tr><tr><td>Hinata</td><td>57</td><td>Momotaro</td></tr></table>"
3406
- ],
3407
- "text/plain": [
3408
- "#<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fcbc>\n",
3409
- " name age brother\n",
3410
- " <string> <uint8> <string>\n",
3411
- "1 Yasuko 97 Santa\n",
3412
- "2 Rui 78 (nil)\n",
3413
- "3 Hinata 57 Momotaro\n"
3414
- ]
3415
- },
3416
- "execution_count": 94,
3417
- "metadata": {},
3418
- "output_type": "execute_result"
3419
- }
3420
- ],
3421
- "source": [
3422
- "# update :age and add :brother\n",
3423
- "assigner = { age: [97, 78, 57], brother: ['Santa', nil, 'Momotaro'] }\n",
3424
- "comecome.assign(assigner)"
3425
- ]
3426
- },
3427
- {
3428
- "cell_type": "markdown",
3429
- "id": "e6d3ddfc-b16d-4b20-83df-357e9cdb32e6",
3430
- "metadata": {},
3431
- "source": [
3432
- "`#assign {block}` is also acceptable. We can't use both arguments and a block at a same time. The block should return pairs of key and values as a Hash of `{key => array}` or `{key => Vector}`. Block is called in the context of self."
3433
- ]
3434
- },
3435
- {
3436
- "cell_type": "code",
3437
- "execution_count": 95,
3438
- "id": "8d69edd0-7ad7-4318-8033-1785ce2543db",
3439
- "metadata": {},
3440
- "outputs": [
3441
- {
3442
- "data": {
3443
- "text/html": [
3444
- "RedAmber::DataFrame <5 x 3 vectors> <table><tr><th>index</th><th>float</th><th>string</th></tr><tr><td>0</td><td>0.0</td><td>A</td></tr><tr><td>1</td><td>1.1</td><td>B</td></tr><tr><td>2</td><td>2.2</td><td>C</td></tr><tr><td>3</td><td>NaN</td><td>D</td></tr><tr><td><i>(nil)</i></td><td><i>(nil)</i></td><td><i>(nil)</i></td></tr></table>"
3445
- ],
3446
- "text/plain": [
3447
- "#<RedAmber::DataFrame : 5 x 3 Vectors, 0x000000000000fcd0>\n",
3448
- " index float string\n",
3449
- " <uint8> <double> <string>\n",
3450
- "1 0 0.0 A\n",
3451
- "2 1 1.1 B\n",
3452
- "3 2 2.2 C\n",
3453
- "4 3 NaN D\n",
3454
- "5 (nil) (nil) (nil)\n"
3455
- ]
3456
- },
3457
- "execution_count": 95,
3458
- "metadata": {},
3459
- "output_type": "execute_result"
3460
- }
3461
- ],
3462
- "source": [
3463
- "df = RedAmber::DataFrame.new(\n",
3464
- " index: [0, 1, 2, 3, nil],\n",
3465
- " float: [0.0, 1.1, 2.2, Float::NAN, nil],\n",
3466
- " string: ['A', 'B', 'C', 'D', nil])"
3467
- ]
3468
- },
3469
- {
3470
- "cell_type": "code",
3471
- "execution_count": 96,
3472
- "id": "e884af01-d82b-42e7-8e92-62baf19919cb",
3473
- "metadata": {},
3474
- "outputs": [
3475
- {
3476
- "data": {
3477
- "text/html": [
3478
- "RedAmber::DataFrame <5 x 3 vectors> <table><tr><th>index</th><th>float</th><th>string</th></tr><tr><td>0</td><td>-0.0</td><td>A</td></tr><tr><td>255</td><td>-1.1</td><td>B</td></tr><tr><td>254</td><td>-2.2</td><td>C</td></tr><tr><td>253</td><td>NaN</td><td>D</td></tr><tr><td><i>(nil)</i></td><td><i>(nil)</i></td><td><i>(nil)</i></td></tr></table>"
3479
- ],
3480
- "text/plain": [
3481
- "#<RedAmber::DataFrame : 5 x 3 Vectors, 0x000000000000fce4>\n",
3482
- " index float string\n",
3483
- " <uint8> <double> <string>\n",
3484
- "1 0 -0.0 A\n",
3485
- "2 255 -1.1 B\n",
3486
- "3 254 -2.2 C\n",
3487
- "4 253 NaN D\n",
3488
- "5 (nil) (nil) (nil)\n"
3489
- ]
3490
- },
3491
- "execution_count": 96,
3492
- "metadata": {},
3493
- "output_type": "execute_result"
3494
- }
3495
- ],
3496
- "source": [
3497
- "# update numeric variables\n",
3498
- "df.assign do\n",
3499
- " assigner = {}\n",
3500
- " vectors.each_with_index do |v, i|\n",
3501
- " assigner[keys[i]] = -v if v.numeric?\n",
3502
- " end\n",
3503
- " assigner\n",
3504
- "end"
3505
- ]
3506
- },
3507
- {
3508
- "cell_type": "markdown",
3509
- "id": "c5c83559-f4d8-4ed2-8b20-5c50eb1faa14",
3510
- "metadata": {},
3511
- "source": [
3512
- "## 35. Coerce (Vector)"
3513
- ]
3514
- },
3515
- {
3516
- "cell_type": "markdown",
3517
- "id": "77bdfc69-b728-4335-b76e-e4be92f94310",
3518
- "metadata": {},
3519
- "source": [
3520
- "Vector has coerce method."
3521
- ]
3522
- },
3523
- {
3524
- "cell_type": "code",
3525
- "execution_count": 97,
3526
- "id": "2bfbe584-be54-486b-af32-e76b37c10e49",
3527
- "metadata": {},
3528
- "outputs": [
3529
- {
3530
- "data": {
3531
- "text/plain": [
3532
- "#<RedAmber::Vector(:uint8, size=3):0x000000000000fcf8>\n",
3533
- "[1, 2, 3]\n"
3534
- ]
3535
- },
3536
- "execution_count": 97,
3537
- "metadata": {},
3538
- "output_type": "execute_result"
3539
- }
3540
- ],
3541
- "source": [
3542
- "vector = RedAmber::Vector.new(1,2,3)"
3543
- ]
3544
- },
3545
- {
3546
- "cell_type": "code",
3547
- "execution_count": 98,
3548
- "id": "ce35d901-38a8-4f13-b2d1-29b83f6c5438",
3549
- "metadata": {},
3550
- "outputs": [
3551
- {
3552
- "data": {
3553
- "text/plain": [
3554
- "#<RedAmber::Vector(:int16, size=3):0x000000000000fd0c>\n",
3555
- "[-1, -2, -3]\n"
3556
- ]
3557
- },
3558
- "execution_count": 98,
3559
- "metadata": {},
3560
- "output_type": "execute_result"
3561
- }
3562
- ],
3563
- "source": [
3564
- "# Vector's `#*` method\n",
3565
- "vector * -1"
3566
- ]
3567
- },
3568
- {
3569
- "cell_type": "code",
3570
- "execution_count": 99,
3571
- "id": "7d5fc2be-f590-4678-92e9-faa27b618266",
3572
- "metadata": {},
3573
- "outputs": [
3574
- {
3575
- "data": {
3576
- "text/plain": [
3577
- "#<RedAmber::Vector(:int16, size=3):0x000000000000fd20>\n",
3578
- "[-1, -2, -3]\n"
3579
- ]
3580
- },
3581
- "execution_count": 99,
3582
- "metadata": {},
3583
- "output_type": "execute_result"
3584
- }
3585
- ],
3586
- "source": [
3587
- "# coerced calculation\n",
3588
- "-1 * vector"
3589
- ]
3590
- },
3591
- {
3592
- "cell_type": "code",
3593
- "execution_count": 100,
3594
- "id": "fa90a6af-add7-42f2-9707-7d726575aeb6",
3595
- "metadata": {},
3596
- "outputs": [
3597
- {
3598
- "data": {
3599
- "text/plain": [
3600
- "#<RedAmber::Vector(:uint8, size=3):0x000000000000fd34>\n",
3601
- "[255, 254, 253]\n"
3602
- ]
3603
- },
3604
- "execution_count": 100,
3605
- "metadata": {},
3606
- "output_type": "execute_result"
3607
- }
3608
- ],
3609
- "source": [
3610
- "# `@-` operator\n",
3611
- "-vector"
3612
- ]
3613
- },
3614
- {
3615
- "cell_type": "markdown",
3616
- "id": "4820b527-44e9-4738-aa0e-73604078b3b0",
3617
- "metadata": {
3618
- "tags": []
3619
- },
3620
- "source": [
3621
- "## 36. to_ary (Vector)"
3622
- ]
3623
- },
3624
- {
3625
- "cell_type": "markdown",
3626
- "id": "8507dcc4-74e3-44ad-aa54-cf43d55f2131",
3627
- "metadata": {},
3628
- "source": [
3629
- "`Vector#to_ary` will enable implicit conversion to an Array."
3630
- ]
3631
- },
3632
- {
3633
- "cell_type": "code",
3634
- "execution_count": 101,
3635
- "id": "b12bd7c8-2981-426c-8ae3-154504a8ea15",
3636
- "metadata": {},
3637
- "outputs": [
3638
- {
3639
- "data": {
3640
- "text/plain": [
3641
- "[3, 4, 5]"
3642
- ]
3643
- },
3644
- "execution_count": 101,
3645
- "metadata": {},
3646
- "output_type": "execute_result"
3647
- }
3648
- ],
3649
- "source": [
3650
- "Array(Vector.new([3, 4, 5]))"
3651
- ]
3652
- },
3653
- {
3654
- "cell_type": "code",
3655
- "execution_count": 102,
3656
- "id": "c0cb5a98-7cdf-43a8-b2f7-f9df1961c761",
3657
- "metadata": {},
3658
- "outputs": [
3659
- {
3660
- "data": {
3661
- "text/plain": [
3662
- "[1, 2, 3, 4, 5]"
3663
- ]
3664
- },
3665
- "execution_count": 102,
3666
- "metadata": {},
3667
- "output_type": "execute_result"
3668
- }
3669
- ],
3670
- "source": [
3671
- "[1, 2] + Vector.new([3, 4, 5])"
3672
- ]
3673
- },
3674
- {
3675
- "cell_type": "markdown",
3676
- "id": "216dde4f-e4d8-4f29-903a-8cbf75de5b8e",
3677
- "metadata": {},
3678
- "source": [
3679
- "## 37. Fill nil (Vector)"
3680
- ]
3681
- },
3682
- {
3683
- "cell_type": "markdown",
3684
- "id": "1959d0d7-6d09-4fa5-9365-1e2f7fc35d61",
3685
- "metadata": {},
3686
- "source": [
3687
- "`Vector#fill_nil_forward` or `Vector#fill_nil_backward` will\n",
3688
- "propagate the last valid observation forward (or backward).\n",
3689
- "Or preserve nil if all previous values are nil or at the end."
3690
- ]
3691
- },
3692
- {
3693
- "cell_type": "code",
3694
- "execution_count": 103,
3695
- "id": "d003b06a-859f-4de0-9e35-803efac85169",
3696
- "metadata": {},
3697
- "outputs": [
3698
- {
3699
- "data": {
3700
- "text/plain": [
3701
- "#<RedAmber::Vector(:uint8, size=5):0x000000000000fd48>\n",
3702
- "[0, 1, 1, 3, 3]\n"
3703
- ]
3704
- },
3705
- "execution_count": 103,
3706
- "metadata": {},
3707
- "output_type": "execute_result"
3708
- }
3709
- ],
3710
- "source": [
3711
- "integer = Vector.new([0, 1, nil, 3, nil])\n",
3712
- "integer.fill_nil_forward"
3713
- ]
3714
- },
3715
- {
3716
- "cell_type": "code",
3717
- "execution_count": 104,
3718
- "id": "c5d74006-d364-4e86-8a5e-9e96e87a96e0",
3719
- "metadata": {},
3720
- "outputs": [
3721
- {
3722
- "data": {
3723
- "text/plain": [
3724
- "#<RedAmber::Vector(:uint8, size=5):0x000000000000fd5c>\n",
3725
- "[0, 1, 3, 3, nil]\n"
3726
- ]
3727
- },
3728
- "execution_count": 104,
3729
- "metadata": {},
3730
- "output_type": "execute_result"
3731
- }
3732
- ],
3733
- "source": [
3734
- "integer.fill_nil_backward"
3735
- ]
3736
- },
3737
- {
3738
- "cell_type": "markdown",
3739
- "id": "347785a6-eab0-4864-a871-2c320005211e",
3740
- "metadata": {},
3741
- "source": [
3742
- "## 38. all?/any? (Vector)"
3743
- ]
3744
- },
3745
- {
3746
- "cell_type": "markdown",
3747
- "id": "f82a6f5d-03d3-4645-85f5-d25999165378",
3748
- "metadata": {},
3749
- "source": [
3750
- "`Vector#all?` returns true if all elements is true.\n",
3751
- "\n",
3752
- "`Vector#any?` returns true if exists any true.\n",
3753
- "\n",
3754
- "These are unary aggregation function."
3755
- ]
3756
- },
3757
- {
3758
- "cell_type": "code",
3759
- "execution_count": 105,
3760
- "id": "ebad37ad-0a09-48b1-ba3a-4e030a917837",
3761
- "metadata": {},
3762
- "outputs": [
3763
- {
3764
- "data": {
3765
- "text/plain": [
3766
- "true"
3767
- ]
3768
- },
3769
- "execution_count": 105,
3770
- "metadata": {},
3771
- "output_type": "execute_result"
3772
- }
3773
- ],
3774
- "source": [
3775
- "booleans = Vector.new([true, true, nil])\n",
3776
- "booleans.all?"
3777
- ]
3778
- },
3779
- {
3780
- "cell_type": "code",
3781
- "execution_count": 106,
3782
- "id": "97fc24da-03d4-406d-b353-562896775d60",
3783
- "metadata": {},
3784
- "outputs": [
3785
- {
3786
- "data": {
3787
- "text/plain": [
3788
- "true"
3789
- ]
3790
- },
3791
- "execution_count": 106,
3792
- "metadata": {},
3793
- "output_type": "execute_result"
3794
- }
3795
- ],
3796
- "source": [
3797
- "booleans.any?"
3798
- ]
3799
- },
3800
- {
3801
- "cell_type": "markdown",
3802
- "id": "0ff3b22d-9f7c-42f2-8d18-c89a06af681b",
3803
- "metadata": {},
3804
- "source": [
3805
- "If these methods are used with option `skip_nulls: false` nil is considered."
3806
- ]
3807
- },
3808
- {
3809
- "cell_type": "code",
3810
- "execution_count": 107,
3811
- "id": "3e0e5800-665a-4a05-b2cb-d152f3f077de",
3812
- "metadata": {},
3813
- "outputs": [
3814
- {
3815
- "data": {
3816
- "text/plain": [
3817
- "false"
3818
- ]
3819
- },
3820
- "execution_count": 107,
3821
- "metadata": {},
3822
- "output_type": "execute_result"
3823
- }
3824
- ],
3825
- "source": [
3826
- "booleans.all?(skip_nulls: false)"
3827
- ]
3828
- },
3829
- {
3830
- "cell_type": "code",
3831
- "execution_count": 108,
3832
- "id": "3e43f0c4-a254-4735-ac28-de14d2670c67",
3833
- "metadata": {},
3834
- "outputs": [
3835
- {
3836
- "data": {
3837
- "text/plain": [
3838
- "true"
3839
- ]
3840
- },
3841
- "execution_count": 108,
3842
- "metadata": {},
3843
- "output_type": "execute_result"
3844
- }
3845
- ],
3846
- "source": [
3847
- "booleans.any?(skip_nulls: false)"
3848
- ]
3849
- },
3850
- {
3851
- "cell_type": "markdown",
3852
- "id": "abc71a85-7958-4a21-91cf-8c96c0784525",
3853
- "metadata": {},
3854
- "source": [
3855
- "## 39. count/count_uniq (Vector)"
3856
- ]
3857
- },
3858
- {
3859
- "cell_type": "markdown",
3860
- "id": "3d556118-4105-4d12-806d-ba56c6ae3d1b",
3861
- "metadata": {},
3862
- "source": [
3863
- "`Vector#count` counts element.\n",
3864
- "\n",
3865
- "`Vector#count_uniq` counts unique element. `#count_distinct` is an alias (Arrow's name).\n",
3866
- "\n",
3867
- "These are unary aggregation function."
3868
- ]
3869
- },
3870
- {
3871
- "cell_type": "code",
3872
- "execution_count": 109,
3873
- "id": "2af73e32-1d7e-4f80-b54e-c40ef08b7034",
3874
- "metadata": {},
3875
- "outputs": [
3876
- {
3877
- "data": {
3878
- "text/plain": [
3879
- "3"
3880
- ]
3881
- },
3882
- "execution_count": 109,
3883
- "metadata": {},
3884
- "output_type": "execute_result"
3885
- }
3886
- ],
3887
- "source": [
3888
- "string = Vector.new(%w[A B A])\n",
3889
- "string.count"
3890
- ]
3891
- },
3892
- {
3893
- "cell_type": "code",
3894
- "execution_count": 110,
3895
- "id": "fe6d8d85-27b0-438f-b1b4-1b15e9eb05f9",
3896
- "metadata": {},
3897
- "outputs": [
3898
- {
3899
- "data": {
3900
- "text/plain": [
3901
- "2"
3902
- ]
3903
- },
3904
- "execution_count": 110,
3905
- "metadata": {},
3906
- "output_type": "execute_result"
3907
- }
3908
- ],
3909
- "source": [
3910
- "string.count_uniq # count_distinct is also OK"
3911
- ]
3912
- },
3913
- {
3914
- "cell_type": "markdown",
3915
- "id": "70abed9f-665a-4ea7-939e-4b185ee53755",
3916
- "metadata": {},
3917
- "source": [
3918
- "## 40. stddev/variance (Vector)"
3919
- ]
3920
- },
3921
- {
3922
- "cell_type": "markdown",
3923
- "id": "965de338-b3be-4d33-92e1-5ad7e2ed18f0",
3924
- "metadata": {},
3925
- "source": [
3926
- "These are unary element-wise function."
3927
- ]
3928
- },
3929
- {
3930
- "cell_type": "code",
3931
- "execution_count": 111,
3932
- "id": "0afec200-f377-432b-a260-ae5a0c5ce794",
3933
- "metadata": {},
3934
- "outputs": [
3935
- {
3936
- "data": {
3937
- "text/plain": [
3938
- "0.816496580927726"
3939
- ]
3940
- },
3941
- "execution_count": 111,
3942
- "metadata": {},
3943
- "output_type": "execute_result"
3944
- }
3945
- ],
3946
- "source": [
3947
- "integers = Vector.new([1, 2, 3, nil])\n",
3948
- "integers.stddev"
3949
- ]
3950
- },
3951
- {
3952
- "cell_type": "code",
3953
- "execution_count": 112,
3954
- "id": "2e40ac09-cb7f-4978-87e8-53f84f16f7c7",
3955
- "metadata": {},
3956
- "outputs": [
3957
- {
3958
- "data": {
3959
- "text/plain": [
3960
- "1.0"
3961
- ]
3962
- },
3963
- "execution_count": 112,
3964
- "metadata": {},
3965
- "output_type": "execute_result"
3966
- }
3967
- ],
3968
- "source": [
3969
- "# Unbiased standard deviation\n",
3970
- "integers.sd"
3971
- ]
3972
- },
3973
- {
3974
- "cell_type": "code",
3975
- "execution_count": 113,
3976
- "id": "e6158e3b-4af8-467c-a355-8e9f2e579548",
3977
- "metadata": {},
3978
- "outputs": [
3979
- {
3980
- "data": {
3981
- "text/plain": [
3982
- "0.6666666666666666"
3983
- ]
3984
- },
3985
- "execution_count": 113,
3986
- "metadata": {},
3987
- "output_type": "execute_result"
3988
- }
3989
- ],
3990
- "source": [
3991
- "integers.variance"
3992
- ]
3993
- },
3994
- {
3995
- "cell_type": "code",
3996
- "execution_count": 114,
3997
- "id": "d64d39f2-d979-49f1-9946-65890f40d646",
3998
- "metadata": {},
3999
- "outputs": [
4000
- {
4001
- "data": {
4002
- "text/plain": [
4003
- "1.0"
4004
- ]
4005
- },
4006
- "execution_count": 114,
4007
- "metadata": {},
4008
- "output_type": "execute_result"
4009
- }
4010
- ],
4011
- "source": [
4012
- "# Unbiased variance\n",
4013
- "integers.var"
4014
- ]
4015
- },
4016
- {
4017
- "cell_type": "markdown",
4018
- "id": "25023f5a-798a-40a5-ab84-a6615602f747",
4019
- "metadata": {},
4020
- "source": [
4021
- "## 41. negate (Vector)"
4022
- ]
4023
- },
4024
- {
4025
- "cell_type": "markdown",
4026
- "id": "00ddf322-ef50-40a1-86a6-22bf3d43f007",
4027
- "metadata": {},
4028
- "source": [
4029
- "These are unary element-wise function."
4030
- ]
4031
- },
4032
- {
4033
- "cell_type": "code",
4034
- "execution_count": 115,
4035
- "id": "ab5a357a-e98c-40a1-9b89-0b38645e416f",
4036
- "metadata": {},
4037
- "outputs": [
4038
- {
4039
- "data": {
4040
- "text/plain": [
4041
- "#<RedAmber::Vector(:double, size=3):0x000000000000fd70>\n",
4042
- "[-1.0, 2.0, -3.0]\n"
4043
- ]
4044
- },
4045
- "execution_count": 115,
4046
- "metadata": {},
4047
- "output_type": "execute_result"
4048
- }
4049
- ],
4050
- "source": [
4051
- "double = Vector.new([1.0, -2, 3])\n",
4052
- "double.negate"
4053
- ]
4054
- },
4055
- {
4056
- "cell_type": "code",
4057
- "execution_count": 116,
4058
- "id": "8a06c856-d61c-4752-a296-1fa207ffd9a1",
4059
- "metadata": {},
4060
- "outputs": [
4061
- {
4062
- "data": {
4063
- "text/plain": [
4064
- "#<RedAmber::Vector(:double, size=3):0x000000000000fd84>\n",
4065
- "[-1.0, 2.0, -3.0]\n"
4066
- ]
4067
- },
4068
- "execution_count": 116,
4069
- "metadata": {},
4070
- "output_type": "execute_result"
4071
- }
4072
- ],
4073
- "source": [
4074
- "# Same as #negate\n",
4075
- "-double"
4076
- ]
4077
- },
4078
- {
4079
- "cell_type": "markdown",
4080
- "id": "9b145724-d165-4ef3-8a06-2948dd0c7dbb",
4081
- "metadata": {},
4082
- "source": [
4083
- "## 42. round (Vector)"
4084
- ]
4085
- },
4086
- {
4087
- "cell_type": "markdown",
4088
- "id": "b780c2f3-935c-4b2f-b18a-b277cf7c24b7",
4089
- "metadata": {},
4090
- "source": [
4091
- "Otions for `#round`;\n",
4092
- "\n",
4093
- "- `:n-digits` The number of digits to show.\n",
4094
- "- `round_mode` Specify rounding mode.\n",
4095
- "\n",
4096
- "This is a unary element-wise function."
4097
- ]
4098
- },
4099
- {
4100
- "cell_type": "code",
4101
- "execution_count": 117,
4102
- "id": "e7a069b0-3547-4cd2-a2f0-0740f186b191",
4103
- "metadata": {},
4104
- "outputs": [
4105
- {
4106
- "data": {
4107
- "text/plain": [
4108
- "#<RedAmber::Vector(:double, size=5):0x000000000000fd98>\n",
4109
- "[15.15, 2.5, 3.5, -4.5, -5.5]\n"
4110
- ]
4111
- },
4112
- "execution_count": 117,
4113
- "metadata": {},
4114
- "output_type": "execute_result"
4115
- }
4116
- ],
4117
- "source": [
4118
- "double = RedAmber::Vector.new([15.15, 2.5, 3.5, -4.5, -5.5])"
4119
- ]
4120
- },
4121
- {
4122
- "cell_type": "code",
4123
- "execution_count": 118,
4124
- "id": "5ee84b24-8830-4788-a404-d5e1cca22abf",
4125
- "metadata": {},
4126
- "outputs": [
4127
- {
4128
- "data": {
4129
- "text/plain": [
4130
- "#<RedAmber::Vector(:double, size=5):0x000000000000fdac>\n",
4131
- "[15.0, 2.0, 4.0, -4.0, -6.0]\n"
4132
- ]
4133
- },
4134
- "execution_count": 118,
4135
- "metadata": {},
4136
- "output_type": "execute_result"
4137
- }
4138
- ],
4139
- "source": [
4140
- "double.round"
4141
- ]
4142
- },
4143
- {
4144
- "cell_type": "code",
4145
- "execution_count": 119,
4146
- "id": "20adb1ad-473c-4245-b959-7848c239fb76",
4147
- "metadata": {},
4148
- "outputs": [
4149
- {
4150
- "data": {
4151
- "text/plain": [
4152
- "#<RedAmber::Vector(:double, size=5):0x000000000000fdc0>\n",
4153
- "[15.0, 2.0, 4.0, -4.0, -6.0]\n"
4154
- ]
4155
- },
4156
- "execution_count": 119,
4157
- "metadata": {},
4158
- "output_type": "execute_result"
4159
- }
4160
- ],
4161
- "source": [
4162
- "double.round(mode: :half_to_even)"
4163
- ]
4164
- },
4165
- {
4166
- "cell_type": "code",
4167
- "execution_count": 120,
4168
- "id": "d2777ad8-2c24-48e4-8f5f-77403e3109ea",
4169
- "metadata": {},
4170
- "outputs": [
4171
- {
4172
- "data": {
4173
- "text/plain": [
4174
- "#<RedAmber::Vector(:double, size=5):0x000000000000fdd4>\n",
4175
- "[16.0, 3.0, 4.0, -5.0, -6.0]\n"
4176
- ]
4177
- },
4178
- "execution_count": 120,
4179
- "metadata": {},
4180
- "output_type": "execute_result"
4181
- }
4182
- ],
4183
- "source": [
4184
- "double.round(mode: :towards_infinity)"
4185
- ]
4186
- },
4187
- {
4188
- "cell_type": "code",
4189
- "execution_count": 121,
4190
- "id": "a8ab2735-74cb-4cfe-a5a2-61bfa90c72ac",
4191
- "metadata": {},
4192
- "outputs": [
4193
- {
4194
- "data": {
4195
- "text/plain": [
4196
- "#<RedAmber::Vector(:double, size=5):0x000000000000fde8>\n",
4197
- "[15.0, 3.0, 4.0, -4.0, -5.0]\n"
4198
- ]
4199
- },
4200
- "execution_count": 121,
4201
- "metadata": {},
4202
- "output_type": "execute_result"
4203
- }
4204
- ],
4205
- "source": [
4206
- "double.round(mode: :half_up)"
4207
- ]
4208
- },
4209
- {
4210
- "cell_type": "code",
4211
- "execution_count": 122,
4212
- "id": "3575481c-40ed-405f-a69c-7581d4dce2cf",
4213
- "metadata": {},
4214
- "outputs": [
4215
- {
4216
- "data": {
4217
- "text/plain": [
4218
- "#<RedAmber::Vector(:double, size=5):0x000000000000fdfc>\n",
4219
- "[15.0, 2.0, 3.0, -4.0, -5.0]\n"
4220
- ]
4221
- },
4222
- "execution_count": 122,
4223
- "metadata": {},
4224
- "output_type": "execute_result"
4225
- }
4226
- ],
4227
- "source": [
4228
- "double.round(mode: :half_towards_zero)"
4229
- ]
4230
- },
4231
- {
4232
- "cell_type": "code",
4233
- "execution_count": 123,
4234
- "id": "a86e4c5c-aced-4a88-b692-4e26b90f1653",
4235
- "metadata": {},
4236
- "outputs": [
4237
- {
4238
- "data": {
4239
- "text/plain": [
4240
- "#<RedAmber::Vector(:double, size=5):0x000000000000fe10>\n",
4241
- "[15.0, 3.0, 4.0, -5.0, -6.0]\n"
4242
- ]
4243
- },
4244
- "execution_count": 123,
4245
- "metadata": {},
4246
- "output_type": "execute_result"
4247
- }
4248
- ],
4249
- "source": [
4250
- "double.round(mode: :half_towards_infinity)"
4251
- ]
4252
- },
4253
- {
4254
- "cell_type": "code",
4255
- "execution_count": 124,
4256
- "id": "73f51bab-ff46-4b99-96a5-8c6547ad9d35",
4257
- "metadata": {},
4258
- "outputs": [
4259
- {
4260
- "data": {
4261
- "text/plain": [
4262
- "#<RedAmber::Vector(:double, size=5):0x000000000000fe24>\n",
4263
- "[15.0, 3.0, 3.0, -5.0, -5.0]\n"
4264
- ]
4265
- },
4266
- "execution_count": 124,
4267
- "metadata": {},
4268
- "output_type": "execute_result"
4269
- }
4270
- ],
4271
- "source": [
4272
- "double.round(mode: :half_to_odd)"
4273
- ]
4274
- },
4275
- {
4276
- "cell_type": "code",
4277
- "execution_count": 125,
4278
- "id": "a12c684c-4a63-4dac-a81b-969978812a24",
4279
- "metadata": {},
4280
- "outputs": [
4281
- {
4282
- "data": {
4283
- "text/plain": [
4284
- "#<RedAmber::Vector(:double, size=5):0x000000000000fe38>\n",
4285
- "[15.0, 2.0, 4.0, -4.0, -6.0]\n"
4286
- ]
4287
- },
4288
- "execution_count": 125,
4289
- "metadata": {},
4290
- "output_type": "execute_result"
4291
- }
4292
- ],
4293
- "source": [
4294
- "double.round(n_digits: 0)"
4295
- ]
4296
- },
4297
- {
4298
- "cell_type": "code",
4299
- "execution_count": 126,
4300
- "id": "17370f2b-0957-411b-8145-56aa9fc956ac",
4301
- "metadata": {},
4302
- "outputs": [
4303
- {
4304
- "data": {
4305
- "text/plain": [
4306
- "#<RedAmber::Vector(:double, size=5):0x000000000000fe4c>\n",
4307
- "[15.2, 2.5, 3.5, -4.5, -5.5]\n"
4308
- ]
4309
- },
4310
- "execution_count": 126,
4311
- "metadata": {},
4312
- "output_type": "execute_result"
4313
- }
4314
- ],
4315
- "source": [
4316
- "double.round(n_digits: 1)"
4317
- ]
4318
- },
4319
- {
4320
- "cell_type": "code",
4321
- "execution_count": 127,
4322
- "id": "53072cff-b28b-4672-b30a-8ca37562bc21",
4323
- "metadata": {},
4324
- "outputs": [
4325
- {
4326
- "data": {
4327
- "text/plain": [
4328
- "#<RedAmber::Vector(:double, size=5):0x000000000000fe60>\n",
4329
- "[20.0, 0.0, 0.0, -0.0, -10.0]\n"
4330
- ]
4331
- },
4332
- "execution_count": 127,
4333
- "metadata": {},
4334
- "output_type": "execute_result"
4335
- }
4336
- ],
4337
- "source": [
4338
- "double.round(n_digits: -1)"
4339
- ]
4340
- },
4341
- {
4342
- "cell_type": "markdown",
4343
- "id": "51dedfce-51c7-4e5b-b890-a90ad9cf7596",
4344
- "metadata": {},
4345
- "source": [
4346
- "## 43. and/or (Vector)"
4347
- ]
4348
- },
4349
- {
4350
- "cell_type": "markdown",
4351
- "id": "b2c4869b-6ebf-476c-b2fd-a4b9c0638dc5",
4352
- "metadata": {},
4353
- "source": [
4354
- "RedAmber select `and_kleene`/`or_kleene` as default `&`/`|` method.\n",
4355
- "\n",
4356
- "These are unary element-wise function."
4357
- ]
4358
- },
4359
- {
4360
- "cell_type": "code",
4361
- "execution_count": 128,
4362
- "id": "2d4f5853-1ed9-4d8b-87a9-b5c1faac5fae",
4363
- "metadata": {},
4364
- "outputs": [
4365
- {
4366
- "data": {
4367
- "text/plain": [
4368
- "#<RedAmber::Vector(:boolean, size=9):0x000000000000fe74>\n",
4369
- "[true, false, nil, false, false, false, nil, false, nil]\n"
4370
- ]
4371
- },
4372
- "execution_count": 128,
4373
- "metadata": {},
4374
- "output_type": "execute_result"
4375
- }
4376
- ],
4377
- "source": [
4378
- "bool_self = Vector.new([true, true, true, false, false, false, nil, nil, nil])\n",
4379
- "bool_other = Vector.new([true, false, nil, true, false, nil, true, false, nil])\n",
4380
- "\n",
4381
- "bool_self & bool_other # same as bool_self.and_kleene(bool_other)"
4382
- ]
4383
- },
4384
- {
4385
- "cell_type": "code",
4386
- "execution_count": 129,
4387
- "id": "236c9733-8d45-467e-b288-e6c18b9c39d2",
4388
- "metadata": {},
4389
- "outputs": [
4390
- {
4391
- "data": {
4392
- "text/plain": [
4393
- "#<RedAmber::Vector(:boolean, size=9):0x000000000000fe88>\n",
4394
- "[true, false, nil, true, false, nil, true, false, nil]\n"
4395
- ]
4396
- },
4397
- "execution_count": 129,
4398
- "metadata": {},
4399
- "output_type": "execute_result"
4400
- }
4401
- ],
4402
- "source": [
4403
- "# Ruby's primitive `&&`\n",
4404
- "bool_self && bool_other"
4405
- ]
4406
- },
4407
- {
4408
- "cell_type": "code",
4409
- "execution_count": 130,
4410
- "id": "4e984a9c-7d9c-465d-bf26-0c685dedd4bf",
4411
- "metadata": {},
4412
- "outputs": [
4413
- {
4414
- "data": {
4415
- "text/plain": [
4416
- "#<RedAmber::Vector(:boolean, size=9):0x000000000000fe9c>\n",
4417
- "[true, false, nil, false, false, nil, nil, nil, nil]\n"
4418
- ]
4419
- },
4420
- "execution_count": 130,
4421
- "metadata": {},
4422
- "output_type": "execute_result"
4423
- }
4424
- ],
4425
- "source": [
4426
- "# Arrow's default `and`\n",
4427
- "bool_self.and_org(bool_other)"
4428
- ]
4429
- },
4430
- {
4431
- "cell_type": "code",
4432
- "execution_count": 131,
4433
- "id": "0120ebf5-355d-41f5-83d5-49b9802f337b",
4434
- "metadata": {},
4435
- "outputs": [
4436
- {
4437
- "data": {
4438
- "text/plain": [
4439
- "#<RedAmber::Vector(:boolean, size=9):0x000000000000feb0>\n",
4440
- "[true, true, true, true, false, nil, true, nil, nil]\n"
4441
- ]
4442
- },
4443
- "execution_count": 131,
4444
- "metadata": {},
4445
- "output_type": "execute_result"
4446
- }
4447
- ],
4448
- "source": [
4449
- "bool_self | bool_other # same as bool_self.or_kleene(bool_other)"
4450
- ]
4451
- },
4452
- {
4453
- "cell_type": "code",
4454
- "execution_count": 132,
4455
- "id": "24ceee23-79df-4fcd-afd8-f3839a087785",
4456
- "metadata": {},
4457
- "outputs": [
4458
- {
4459
- "data": {
4460
- "text/plain": [
4461
- "#<RedAmber::Vector(:boolean, size=9):0x000000000000fec4>\n",
4462
- "[true, true, true, false, false, false, nil, nil, nil]\n"
4463
- ]
4464
- },
4465
- "execution_count": 132,
4466
- "metadata": {},
4467
- "output_type": "execute_result"
4468
- }
4469
- ],
4470
- "source": [
4471
- "# Ruby's primitive `||`\n",
4472
- "bool_self || bool_other"
4473
- ]
4474
- },
4475
- {
4476
- "cell_type": "code",
4477
- "execution_count": 133,
4478
- "id": "c152d04b-71a0-4b18-acd1-b5ab9e413d00",
4479
- "metadata": {},
4480
- "outputs": [
4481
- {
4482
- "data": {
4483
- "text/plain": [
4484
- "#<RedAmber::Vector(:boolean, size=9):0x000000000000fed8>\n",
4485
- "[true, true, nil, true, false, nil, nil, nil, nil]\n"
4486
- ]
4487
- },
4488
- "execution_count": 133,
4489
- "metadata": {},
4490
- "output_type": "execute_result"
4491
- }
4492
- ],
4493
- "source": [
4494
- "# Arrow's default `or`\n",
4495
- "bool_self.or_org(bool_other)"
4496
- ]
4497
- },
4498
- {
4499
- "cell_type": "markdown",
4500
- "id": "beede237-c5ed-4e12-a432-ec7e4546d786",
4501
- "metadata": {},
4502
- "source": [
4503
- "## 44. is_finite/is_nan/is_nil/is_na (Vector)"
4504
- ]
4505
- },
4506
- {
4507
- "cell_type": "markdown",
4508
- "id": "77418efd-c0d7-4d63-a7db-2d43fafd386e",
4509
- "metadata": {},
4510
- "source": [
4511
- "These are unary element-wise function."
4512
- ]
4513
- },
4514
- {
4515
- "cell_type": "code",
4516
- "execution_count": 134,
4517
- "id": "19558f9e-fdc4-46e5-90d0-724e4e8fbd8e",
4518
- "metadata": {},
4519
- "outputs": [
4520
- {
4521
- "data": {
4522
- "text/plain": [
4523
- "#<RedAmber::Vector(:double, size=5):0x000000000000feec>\n",
4524
- "[3.141592653589793, Infinity, -Infinity, NaN, nil]\n"
4525
- ]
4526
- },
4527
- "execution_count": 134,
4528
- "metadata": {},
4529
- "output_type": "execute_result"
4530
- }
4531
- ],
4532
- "source": [
4533
- "double = Vector.new([Math::PI, Float::INFINITY, -Float::INFINITY, Float::NAN, nil])"
4534
- ]
4535
- },
4536
- {
4537
- "cell_type": "code",
4538
- "execution_count": 135,
4539
- "id": "d90a7168-1f87-4363-9589-c1f161babc7d",
4540
- "metadata": {},
4541
- "outputs": [
4542
- {
4543
- "data": {
4544
- "text/plain": [
4545
- "#<RedAmber::Vector(:boolean, size=5):0x000000000000ff00>\n",
4546
- "[true, false, false, false, nil]\n"
4547
- ]
4548
- },
4549
- "execution_count": 135,
4550
- "metadata": {},
4551
- "output_type": "execute_result"
4552
- }
4553
- ],
4554
- "source": [
4555
- "double.is_finite"
4556
- ]
4557
- },
4558
- {
4559
- "cell_type": "code",
4560
- "execution_count": 136,
4561
- "id": "7d88049b-695f-4b0c-a105-8fb5797a58b1",
4562
- "metadata": {},
4563
- "outputs": [
4564
- {
4565
- "data": {
4566
- "text/plain": [
4567
- "#<RedAmber::Vector(:boolean, size=5):0x000000000000ff14>\n",
4568
- "[false, true, true, false, nil]\n"
4569
- ]
4570
- },
4571
- "execution_count": 136,
4572
- "metadata": {},
4573
- "output_type": "execute_result"
4574
- }
4575
- ],
4576
- "source": [
4577
- "double.is_inf"
4578
- ]
4579
- },
4580
- {
4581
- "cell_type": "code",
4582
- "execution_count": 137,
4583
- "id": "7d86a7b5-84bf-4031-9811-4076281920cf",
4584
- "metadata": {},
4585
- "outputs": [
4586
- {
4587
- "data": {
4588
- "text/plain": [
4589
- "#<RedAmber::Vector(:boolean, size=5):0x000000000000ff28>\n",
4590
- "[false, false, false, true, true]\n"
4591
- ]
4592
- },
4593
- "execution_count": 137,
4594
- "metadata": {},
4595
- "output_type": "execute_result"
4596
- }
4597
- ],
4598
- "source": [
4599
- "double.is_na"
4600
- ]
4601
- },
4602
- {
4603
- "cell_type": "code",
4604
- "execution_count": 138,
4605
- "id": "d562f826-7a37-4c57-8f92-777555987246",
4606
- "metadata": {},
4607
- "outputs": [
4608
- {
4609
- "data": {
4610
- "text/plain": [
4611
- "#<RedAmber::Vector(:boolean, size=5):0x000000000000ff3c>\n",
4612
- "[false, false, false, false, true]\n"
4613
- ]
4614
- },
4615
- "execution_count": 138,
4616
- "metadata": {},
4617
- "output_type": "execute_result"
4618
- }
4619
- ],
4620
- "source": [
4621
- "double.is_nil"
4622
- ]
4623
- },
4624
- {
4625
- "cell_type": "code",
4626
- "execution_count": 139,
4627
- "id": "e460dc6b-e48f-4462-9ce8-aa6069ebae27",
4628
- "metadata": {},
4629
- "outputs": [
4630
- {
4631
- "data": {
4632
- "text/plain": [
4633
- "#<RedAmber::Vector(:boolean, size=5):0x000000000000ff50>\n",
4634
- "[true, true, true, true, false]\n"
4635
- ]
4636
- },
4637
- "execution_count": 139,
4638
- "metadata": {},
4639
- "output_type": "execute_result"
4640
- }
4641
- ],
4642
- "source": [
4643
- "double.is_valid"
4644
- ]
4645
- },
4646
- {
4647
- "cell_type": "markdown",
4648
- "id": "2cca75eb-f0e8-4f85-89cb-3601512e76b0",
4649
- "metadata": {},
4650
- "source": [
4651
- "## 45. Prime-th rows"
4652
- ]
4653
- },
4654
- {
4655
- "cell_type": "code",
4656
- "execution_count": 140,
4657
- "id": "e0e56ecc-b24c-4a40-b3bb-26bb64eb59ef",
4658
- "metadata": {},
4659
- "outputs": [
4660
- {
4661
- "data": {
4662
- "text/html": [
4663
- "RedAmber::DataFrame <68 x 9 vectors> <table><tr><th>index</th><th>species</th><th>island</th><th>bill_length_mm</th><th>bill_depth_mm</th><th>flipper_length_mm</th><th>body_mass_g</th><th>sex</th><th>year</th></tr><tr><td>2</td><td>Adelie</td><td>Torgersen</td><td>39.5</td><td>17.4</td><td>186</td><td>3800</td><td>female</td><td>2007</td></tr><tr><td>3</td><td>Adelie</td><td>Torgersen</td><td>40.3</td><td>18.0</td><td>195</td><td>3250</td><td>female</td><td>2007</td></tr><tr><td>5</td><td>Adelie</td><td>Torgersen</td><td>36.7</td><td>19.3</td><td>193</td><td>3450</td><td>female</td><td>2007</td></tr><tr><td>7</td><td>Adelie</td><td>Torgersen</td><td>38.9</td><td>17.8</td><td>181</td><td>3625</td><td>female</td><td>2007</td></tr><tr><td colspan='9'>&#8942;</td></tr><tr><td>317</td><td>Gentoo</td><td>Biscoe</td><td>49.4</td><td>15.8</td><td>216</td><td>4925</td><td>male</td><td>2009</td></tr><tr><td>331</td><td>Gentoo</td><td>Biscoe</td><td>50.5</td><td>15.2</td><td>216</td><td>5000</td><td>female</td><td>2009</td></tr><tr><td>337</td><td>Gentoo</td><td>Biscoe</td><td>44.5</td><td>15.7</td><td>217</td><td>4875</td><td><i>(nil)</i></td><td>2009</td></tr></table>"
4664
- ],
4665
- "text/plain": [
4666
- "#<RedAmber::DataFrame : 68 x 9 Vectors, 0x000000000000ff64>\n",
4667
- " index species island bill_length_mm bill_depth_mm flipper_length_mm ... year\n",
4668
- " <uint16> <string> <string> <double> <double> <uint8> ... <uint16>\n",
4669
- " 1 2 Adelie Torgersen 39.5 17.4 186 ... 2007\n",
4670
- " 2 3 Adelie Torgersen 40.3 18.0 195 ... 2007\n",
4671
- " 3 5 Adelie Torgersen 36.7 19.3 193 ... 2007\n",
4672
- " 4 7 Adelie Torgersen 38.9 17.8 181 ... 2007\n",
4673
- " 5 11 Adelie Torgersen 37.8 17.1 186 ... 2007\n",
4674
- " : : : : : : : ... :\n",
4675
- "66 317 Gentoo Biscoe 49.4 15.8 216 ... 2009\n",
4676
- "67 331 Gentoo Biscoe 50.5 15.2 216 ... 2009\n",
4677
- "68 337 Gentoo Biscoe 44.5 15.7 217 ... 2009\n"
4678
- ]
4679
- },
4680
- "execution_count": 140,
4681
- "metadata": {},
4682
- "output_type": "execute_result"
4683
- }
4684
- ],
4685
- "source": [
4686
- "# prime-th rows ... Don't ask me what it means.\n",
4687
- "require 'prime'\n",
4688
- "penguins_with_index =\n",
4689
- " penguins.assign do\n",
4690
- " { index: Vector.new(penguins.indices) + 1 }\n",
4691
- " end.pick { [keys[-1], keys[0..-2]] }\n",
4692
- "penguins_with_index.slice { Vector.new(Prime.each(size).to_a) - 1 }"
4693
- ]
4694
- },
4695
- {
4696
- "cell_type": "markdown",
4697
- "id": "c9e8de1a-ad8f-4fdc-a65c-4d3db7123530",
4698
- "metadata": {},
4699
- "source": [
4700
- "## 46. Slice by Enumerator"
4701
- ]
4702
- },
4703
- {
4704
- "cell_type": "markdown",
4705
- "id": "32dd53a3-a822-4ae1-afe2-b5aa2bfbd3e3",
4706
- "metadata": {},
4707
- "source": [
4708
- "Slice accepts Enumerator as an option."
4709
- ]
4710
- },
4711
- {
4712
- "cell_type": "code",
4713
- "execution_count": 141,
4714
- "id": "b2a118fa-f3c0-4f31-9b45-6db27ccbebe6",
4715
- "metadata": {},
4716
- "outputs": [
4717
- {
4718
- "data": {
4719
- "text/html": [
4720
- "RedAmber::DataFrame <35 x 8 vectors> <table><tr><th>species</th><th>island</th><th>bill_length_mm</th><th>bill_depth_mm</th><th>flipper_length_mm</th><th>body_mass_g</th><th>sex</th><th>year</th></tr><tr><td>Adelie</td><td>Torgersen</td><td>39.1</td><td>18.7</td><td>181</td><td>3750</td><td>male</td><td>2007</td></tr><tr><td>Adelie</td><td>Torgersen</td><td>37.8</td><td>17.1</td><td>186</td><td>3300</td><td><i>(nil)</i></td><td>2007</td></tr><tr><td>Adelie</td><td>Biscoe</td><td>37.8</td><td>18.3</td><td>174</td><td>3400</td><td>female</td><td>2007</td></tr><tr><td>Adelie</td><td>Dream</td><td>39.5</td><td>16.7</td><td>178</td><td>3250</td><td>female</td><td>2007</td></tr><tr><td colspan='8'>&#8942;</td></tr><tr><td>Gentoo</td><td>Biscoe</td><td>48.5</td><td>15.0</td><td>219</td><td>4850</td><td>female</td><td>2009</td></tr><tr><td>Gentoo</td><td>Biscoe</td><td>50.5</td><td>15.2</td><td>216</td><td>5000</td><td>female</td><td>2009</td></tr><tr><td>Gentoo</td><td>Biscoe</td><td>46.8</td><td>14.3</td><td>215</td><td>4850</td><td>female</td><td>2009</td></tr></table>"
4721
- ],
4722
- "text/plain": [
4723
- "#<RedAmber::DataFrame : 35 x 8 Vectors, 0x000000000000ff78>\n",
4724
- " species island bill_length_mm bill_depth_mm flipper_length_mm ... year\n",
4725
- " <string> <string> <double> <double> <uint8> ... <uint16>\n",
4726
- " 1 Adelie Torgersen 39.1 18.7 181 ... 2007\n",
4727
- " 2 Adelie Torgersen 37.8 17.1 186 ... 2007\n",
4728
- " 3 Adelie Biscoe 37.8 18.3 174 ... 2007\n",
4729
- " 4 Adelie Dream 39.5 16.7 178 ... 2007\n",
4730
- " 5 Adelie Dream 36.5 18.0 182 ... 2007\n",
4731
- " : : : : : : ... :\n",
4732
- "33 Gentoo Biscoe 48.5 15.0 219 ... 2009\n",
4733
- "34 Gentoo Biscoe 50.5 15.2 216 ... 2009\n",
4734
- "35 Gentoo Biscoe 46.8 14.3 215 ... 2009\n"
4735
- ]
4736
- },
4737
- "execution_count": 141,
4738
- "metadata": {},
4739
- "output_type": "execute_result"
4740
- }
4741
- ],
4742
- "source": [
4743
- "# Select every 10 samples\n",
4744
- "penguins.slice(0.step by: 10, to: 340)"
4745
- ]
4746
- },
4747
- {
4748
- "cell_type": "markdown",
4749
- "id": "db312c2c-3a7c-4765-bfad-b3313b173a79",
4750
- "metadata": {},
4751
- "source": [
4752
- "## 47. Output mode"
4753
- ]
4754
- },
4755
- {
4756
- "cell_type": "markdown",
4757
- "id": "714ed8df-5aa3-4ac4-8b0d-6390aff73c8c",
4758
- "metadata": {},
4759
- "source": [
4760
- "Output mode of `#inspect` and `#to_iruby` is Table mode by default. If you prefer TDR mode set the environment variable `RED_AMBER_OUTPUT_MODE` to `\"TDR\"`."
4761
- ]
4762
- },
4763
- {
4764
- "cell_type": "code",
4765
- "execution_count": 142,
4766
- "id": "a721804b-006e-44c6-8d38-885eae747eaa",
4767
- "metadata": {},
4768
- "outputs": [
4769
- {
4770
- "data": {
4771
- "text/html": [
4772
- "RedAmber::DataFrame <344 x 8 vectors> <table><tr><th>species</th><th>island</th><th>bill_length_mm</th><th>bill_depth_mm</th><th>flipper_length_mm</th><th>body_mass_g</th><th>sex</th><th>year</th></tr><tr><td>Adelie</td><td>Torgersen</td><td>39.1</td><td>18.7</td><td>181</td><td>3750</td><td>male</td><td>2007</td></tr><tr><td>Adelie</td><td>Torgersen</td><td>39.5</td><td>17.4</td><td>186</td><td>3800</td><td>female</td><td>2007</td></tr><tr><td>Adelie</td><td>Torgersen</td><td>40.3</td><td>18.0</td><td>195</td><td>3250</td><td>female</td><td>2007</td></tr><tr><td>Adelie</td><td>Torgersen</td><td><i>(nil)</i></td><td><i>(nil)</i></td><td><i>(nil)</i></td><td><i>(nil)</i></td><td><i>(nil)</i></td><td>2007</td></tr><tr><td colspan='8'>&#8942;</td></tr><tr><td>Gentoo</td><td>Biscoe</td><td>50.4</td><td>15.7</td><td>222</td><td>5750</td><td>male</td><td>2009</td></tr><tr><td>Gentoo</td><td>Biscoe</td><td>45.2</td><td>14.8</td><td>212</td><td>5200</td><td>female</td><td>2009</td></tr><tr><td>Gentoo</td><td>Biscoe</td><td>49.9</td><td>16.1</td><td>213</td><td>5400</td><td>male</td><td>2009</td></tr></table>"
4773
- ],
4774
- "text/plain": [
4775
- "#<RedAmber::DataFrame : 344 x 8 Vectors, 0x000000000000f8ac>\n",
4776
- " species island bill_length_mm bill_depth_mm flipper_length_mm ... year\n",
4777
- " <string> <string> <double> <double> <uint8> ... <uint16>\n",
4778
- " 1 Adelie Torgersen 39.1 18.7 181 ... 2007\n",
4779
- " 2 Adelie Torgersen 39.5 17.4 186 ... 2007\n",
4780
- " 3 Adelie Torgersen 40.3 18.0 195 ... 2007\n",
4781
- " 4 Adelie Torgersen (nil) (nil) (nil) ... 2007\n",
4782
- " 5 Adelie Torgersen 36.7 19.3 193 ... 2007\n",
4783
- " : : : : : : ... :\n",
4784
- "342 Gentoo Biscoe 50.4 15.7 222 ... 2009\n",
4785
- "343 Gentoo Biscoe 45.2 14.8 212 ... 2009\n",
4786
- "344 Gentoo Biscoe 49.9 16.1 213 ... 2009\n"
4787
- ]
4788
- },
4789
- "execution_count": 142,
4790
- "metadata": {},
4791
- "output_type": "execute_result"
4792
- }
4793
- ],
4794
- "source": [
4795
- "ENV['RED_AMBER_OUTPUT_MODE'] = 'Table' # or nil (default)\n",
4796
- "penguins # Almost same as `puts penguins.to_s` in any mode"
4797
- ]
4798
- },
4799
- {
4800
- "cell_type": "code",
4801
- "execution_count": 143,
4802
- "id": "e4c9f70c-a4b1-4a81-bbc4-e9b14a6b6cb0",
4803
- "metadata": {},
4804
- "outputs": [
4805
- {
4806
- "name": "stdout",
4807
- "output_type": "stream",
4808
- "text": [
4809
- "#<RedAmber::DataFrame : 344 x 8 Vectors, 0x000000000000f8ac>\n",
4810
- "Vectors : 5 numeric, 3 strings\n",
4811
- "# key type level data_preview\n",
4812
- "1 :species string 3 {\"Adelie\"=>152, \"Chinstrap\"=>68, \"Gentoo\"=>124}\n",
4813
- "2 :island string 3 {\"Torgersen\"=>52, \"Biscoe\"=>168, \"Dream\"=>124}\n",
4814
- "3 :bill_length_mm double 165 [39.1, 39.5, 40.3, nil, 36.7, ... ], 2 nils\n",
4815
- " ... 5 more Vectors ...\n",
4816
- "\n"
4817
- ]
4818
- }
4819
- ],
4820
- "source": [
4821
- "ENV['RED_AMBER_OUTPUT_MODE'] = 'TDR'\n",
4822
- "p penguins; nil # Almost same as `penguins.tdr` in any mode"
4823
- ]
4824
- },
4825
- {
4826
- "cell_type": "code",
4827
- "execution_count": 144,
4828
- "id": "2786e9a7-e321-43c5-b56e-9f2ca9d62f8b",
4829
- "metadata": {},
4830
- "outputs": [
4831
- {
4832
- "data": {
4833
- "text/plain": [
4834
- "RedAmber::DataFrame : 344 x 8 Vectors\n",
4835
- "Vectors : 5 numeric, 3 strings\n",
4836
- "# key type level data_preview\n",
4837
- "1 :species string 3 {\"Adelie\"=>152, \"Chinstrap\"=>68, \"Gentoo\"=>124}\n",
4838
- "2 :island string 3 {\"Torgersen\"=>52, \"Biscoe\"=>168, \"Dream\"=>124}\n",
4839
- "3 :bill_length_mm double 165 [39.1, 39.5, 40.3, nil, 36.7, ... ], 2 nils\n",
4840
- "4 :bill_depth_mm double 81 [18.7, 17.4, 18.0, nil, 19.3, ... ], 2 nils\n",
4841
- "5 :flipper_length_mm uint8 56 [181, 186, 195, nil, 193, ... ], 2 nils\n",
4842
- "6 :body_mass_g uint16 95 [3750, 3800, 3250, nil, 3450, ... ], 2 nils\n",
4843
- "7 :sex string 3 {\"male\"=>168, \"female\"=>165, nil=>11}\n",
4844
- "8 :year uint16 3 {2007=>110, 2008=>114, 2009=>120}\n"
4845
- ]
4846
- },
4847
- "execution_count": 144,
4848
- "metadata": {},
4849
- "output_type": "execute_result"
4850
- }
4851
- ],
4852
- "source": [
4853
- "penguins"
4854
- ]
4855
- }
4856
- ],
4857
- "metadata": {
4858
- "kernelspec": {
4859
- "display_name": "Ruby 3.1.1",
4860
- "language": "ruby",
4861
- "name": "ruby"
4862
- },
4863
- "language_info": {
4864
- "file_extension": ".rb",
4865
- "mimetype": "application/x-ruby",
4866
- "name": "ruby",
4867
- "version": "3.1.1"
4868
- }
4869
- },
4870
- "nbformat": 4,
4871
- "nbformat_minor": 5
4872
- }