red_amber 0.1.6 → 0.1.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +3 -0
- data/CHANGELOG.md +44 -18
- data/Gemfile +4 -1
- data/README.md +51 -76
- data/Rakefile +1 -0
- data/benchmark/csv_load_penguins.yml +1 -1
- data/doc/47_examples_of_red_amber.ipynb +4872 -0
- data/doc/DataFrame.md +370 -210
- data/doc/Vector.md +68 -15
- data/doc/image/dataframe/assign.png +0 -0
- data/doc/image/dataframe/drop.png +0 -0
- data/doc/image/dataframe/pick.png +0 -0
- data/doc/image/dataframe/remove.png +0 -0
- data/doc/image/dataframe/rename.png +0 -0
- data/doc/image/dataframe/slice.png +0 -0
- data/doc/image/dataframe_model.png +0 -0
- data/doc/image/vector/binary_element_wise.png +0 -0
- data/doc/image/vector/unary_aggregation.png +0 -0
- data/doc/image/vector/unary_aggregation_w_option.png +0 -0
- data/doc/image/vector/unary_element_wise.png +0 -0
- data/lib/red-amber.rb +1 -25
- data/lib/red_amber/data_frame.rb +9 -7
- data/lib/red_amber/data_frame_displayable.rb +79 -4
- data/lib/red_amber/group.rb +61 -0
- data/lib/red_amber/vector.rb +17 -3
- data/lib/red_amber/vector_functions.rb +22 -20
- data/lib/red_amber/version.rb +1 -1
- data/lib/red_amber.rb +27 -1
- data/red_amber.gemspec +0 -2
- metadata +4 -31
- data/lib/red_amber/data_frame_observation_operation.rb +0 -11
@@ -0,0 +1,4872 @@
|
|
1
|
+
{
|
2
|
+
"cells": [
|
3
|
+
{
|
4
|
+
"cell_type": "markdown",
|
5
|
+
"id": "e355db8b-ebb6-4ea6-97b5-3b9fdadc302c",
|
6
|
+
"metadata": {},
|
7
|
+
"source": [
|
8
|
+
"# 47 examples of Red Amber"
|
9
|
+
]
|
10
|
+
},
|
11
|
+
{
|
12
|
+
"cell_type": "markdown",
|
13
|
+
"id": "f20f4970-db38-4d96-9a36-d4cf9d007596",
|
14
|
+
"metadata": {},
|
15
|
+
"source": [
|
16
|
+
"Last update: July 15, 2022 / RedAmber Version 0.1.7"
|
17
|
+
]
|
18
|
+
},
|
19
|
+
{
|
20
|
+
"cell_type": "markdown",
|
21
|
+
"id": "f6e927d0-b59a-4c4e-9f8a-4fa08f9a6b2f",
|
22
|
+
"metadata": {},
|
23
|
+
"source": [
|
24
|
+
"## 1. Install"
|
25
|
+
]
|
26
|
+
},
|
27
|
+
{
|
28
|
+
"cell_type": "markdown",
|
29
|
+
"id": "85eacfe6-fa11-4749-844f-5914d6cd7dbc",
|
30
|
+
"metadata": {},
|
31
|
+
"source": [
|
32
|
+
"Install requirements before you install Red Amber.\n",
|
33
|
+
"\n",
|
34
|
+
"- Apache Arrow GLib (>= 8.0.0)\n",
|
35
|
+
"\n",
|
36
|
+
"- Apache Parquet GLib (>= 8.0.0) # if you need IO from/to Parquet resource.\n",
|
37
|
+
"\n",
|
38
|
+
" See [Apache Arrow install document](https://arrow.apache.org/install/).\n",
|
39
|
+
" \n",
|
40
|
+
" Minimum installation example for the latest Ubuntu is in the ['Prepare the Apache Arrow' section in ci test](https://github.com/heronshoes/red_amber/blob/master/.github/workflows/test.yml) of Red Amber.\n",
|
41
|
+
"\n",
|
42
|
+
"Then add this line to your Gemfile:\n",
|
43
|
+
"```\n",
|
44
|
+
"gem 'red_amber'\n",
|
45
|
+
"```\n",
|
46
|
+
"\n",
|
47
|
+
"And then execute:\n",
|
48
|
+
"```\n",
|
49
|
+
"$ bundle install\n",
|
50
|
+
"```\n",
|
51
|
+
"\n",
|
52
|
+
"Or install it yourself as:\n",
|
53
|
+
"```\n",
|
54
|
+
"$ gem install red_amber\n",
|
55
|
+
"```"
|
56
|
+
]
|
57
|
+
},
|
58
|
+
{
|
59
|
+
"cell_type": "markdown",
|
60
|
+
"id": "8c08c45d-0818-4b43-bc65-4d43dd8b6b66",
|
61
|
+
"metadata": {},
|
62
|
+
"source": [
|
63
|
+
"## 2. Require"
|
64
|
+
]
|
65
|
+
},
|
66
|
+
{
|
67
|
+
"cell_type": "code",
|
68
|
+
"execution_count": 1,
|
69
|
+
"id": "74b76022-03ea-40ae-bac8-fc8743659042",
|
70
|
+
"metadata": {},
|
71
|
+
"outputs": [
|
72
|
+
{
|
73
|
+
"data": {
|
74
|
+
"text/plain": [
|
75
|
+
"\"0.1.7\""
|
76
|
+
]
|
77
|
+
},
|
78
|
+
"execution_count": 1,
|
79
|
+
"metadata": {},
|
80
|
+
"output_type": "execute_result"
|
81
|
+
}
|
82
|
+
],
|
83
|
+
"source": [
|
84
|
+
"require 'red_amber' # require 'red-amber' is also OK\n",
|
85
|
+
"include RedAmber\n",
|
86
|
+
"VERSION"
|
87
|
+
]
|
88
|
+
},
|
89
|
+
{
|
90
|
+
"cell_type": "markdown",
|
91
|
+
"id": "d8fb6289-39ea-4fa9-a165-b87ee6d125e9",
|
92
|
+
"metadata": {
|
93
|
+
"tags": []
|
94
|
+
},
|
95
|
+
"source": [
|
96
|
+
"## 3. Initialize"
|
97
|
+
]
|
98
|
+
},
|
99
|
+
{
|
100
|
+
"cell_type": "code",
|
101
|
+
"execution_count": 2,
|
102
|
+
"id": "51f81824-626a-4741-a29b-30ea357fe7b5",
|
103
|
+
"metadata": {},
|
104
|
+
"outputs": [
|
105
|
+
{
|
106
|
+
"data": {
|
107
|
+
"text/html": [
|
108
|
+
"RedAmber::DataFrame <3 x 2 vectors> <table><tr><th>x</th><th>y</th></tr><tr><td>1</td><td>A</td></tr><tr><td>2</td><td>B</td></tr><tr><td>3</td><td>C</td></tr></table>"
|
109
|
+
],
|
110
|
+
"text/plain": [
|
111
|
+
"#<RedAmber::DataFrame : 3 x 2 Vectors, 0x000000000000f85c>\n",
|
112
|
+
" x y\n",
|
113
|
+
" <uint8> <string>\n",
|
114
|
+
"1 1 A\n",
|
115
|
+
"2 2 B\n",
|
116
|
+
"3 3 C\n"
|
117
|
+
]
|
118
|
+
},
|
119
|
+
"execution_count": 2,
|
120
|
+
"metadata": {},
|
121
|
+
"output_type": "execute_result"
|
122
|
+
}
|
123
|
+
],
|
124
|
+
"source": [
|
125
|
+
"# From a Hash\n",
|
126
|
+
"DataFrame.new(x: [1, 2, 3], y: %w[A B C])"
|
127
|
+
]
|
128
|
+
},
|
129
|
+
{
|
130
|
+
"cell_type": "code",
|
131
|
+
"execution_count": 3,
|
132
|
+
"id": "20b696eb-c199-444d-a957-e0b1081f1506",
|
133
|
+
"metadata": {},
|
134
|
+
"outputs": [
|
135
|
+
{
|
136
|
+
"data": {
|
137
|
+
"text/html": [
|
138
|
+
"RedAmber::DataFrame <3 x 2 vectors> <table><tr><th>x</th><th>y</th></tr><tr><td>1</td><td>A</td></tr><tr><td>2</td><td>B</td></tr><tr><td>3</td><td>C</td></tr></table>"
|
139
|
+
],
|
140
|
+
"text/plain": [
|
141
|
+
"#<RedAmber::DataFrame : 3 x 2 Vectors, 0x000000000000f870>\n",
|
142
|
+
" x y\n",
|
143
|
+
" <uint8> <string>\n",
|
144
|
+
"1 1 A\n",
|
145
|
+
"2 2 B\n",
|
146
|
+
"3 3 C\n"
|
147
|
+
]
|
148
|
+
},
|
149
|
+
"execution_count": 3,
|
150
|
+
"metadata": {},
|
151
|
+
"output_type": "execute_result"
|
152
|
+
}
|
153
|
+
],
|
154
|
+
"source": [
|
155
|
+
"# From a schema and a column array\n",
|
156
|
+
"DataFrame.new({ x: :uint8, y: :string }, [[1, 'A'], [2, 'B'], [3, 'C']])"
|
157
|
+
]
|
158
|
+
},
|
159
|
+
{
|
160
|
+
"cell_type": "code",
|
161
|
+
"execution_count": 4,
|
162
|
+
"id": "21eab151-f977-4474-a6d1-576169e24b26",
|
163
|
+
"metadata": {},
|
164
|
+
"outputs": [
|
165
|
+
{
|
166
|
+
"data": {
|
167
|
+
"text/html": [
|
168
|
+
"RedAmber::DataFrame <3 x 2 vectors> <table><tr><th>x</th><th>y</th></tr><tr><td>1</td><td>A</td></tr><tr><td>2</td><td>B</td></tr><tr><td>3</td><td>C</td></tr></table>"
|
169
|
+
],
|
170
|
+
"text/plain": [
|
171
|
+
"#<RedAmber::DataFrame : 3 x 2 Vectors, 0x000000000000f884>\n",
|
172
|
+
" x y\n",
|
173
|
+
" <uint8> <string>\n",
|
174
|
+
"1 1 A\n",
|
175
|
+
"2 2 B\n",
|
176
|
+
"3 3 C\n"
|
177
|
+
]
|
178
|
+
},
|
179
|
+
"execution_count": 4,
|
180
|
+
"metadata": {},
|
181
|
+
"output_type": "execute_result"
|
182
|
+
}
|
183
|
+
],
|
184
|
+
"source": [
|
185
|
+
"# From a Arrow::Table\n",
|
186
|
+
"table = Arrow::Table.new(x: [1, 2, 3], y: %w[A B C])\n",
|
187
|
+
"DataFrame.new(table)"
|
188
|
+
]
|
189
|
+
},
|
190
|
+
{
|
191
|
+
"cell_type": "code",
|
192
|
+
"execution_count": 5,
|
193
|
+
"id": "aa09d3da-f332-45cd-92ca-712c6a679035",
|
194
|
+
"metadata": {},
|
195
|
+
"outputs": [
|
196
|
+
{
|
197
|
+
"data": {
|
198
|
+
"text/html": [
|
199
|
+
"RedAmber::DataFrame <3 x 2 vectors> <table><tr><th>x</th><th>y</th></tr><tr><td>1</td><td>A</td></tr><tr><td>2</td><td>B</td></tr><tr><td>3</td><td>C</td></tr></table>"
|
200
|
+
],
|
201
|
+
"text/plain": [
|
202
|
+
"#<RedAmber::DataFrame : 3 x 2 Vectors, 0x000000000000f898>\n",
|
203
|
+
" x y\n",
|
204
|
+
" <uint8> <string>\n",
|
205
|
+
"1 1 A\n",
|
206
|
+
"2 2 B\n",
|
207
|
+
"3 3 C\n"
|
208
|
+
]
|
209
|
+
},
|
210
|
+
"execution_count": 5,
|
211
|
+
"metadata": {},
|
212
|
+
"output_type": "execute_result"
|
213
|
+
}
|
214
|
+
],
|
215
|
+
"source": [
|
216
|
+
"# From a Rover::DataFrame\n",
|
217
|
+
"require 'rover'\n",
|
218
|
+
"rover = Rover::DataFrame.new(x: [1, 2, 3], y: %w[A B C])\n",
|
219
|
+
"DataFrame.new(rover)"
|
220
|
+
]
|
221
|
+
},
|
222
|
+
{
|
223
|
+
"cell_type": "code",
|
224
|
+
"execution_count": 6,
|
225
|
+
"id": "cd2c3677-00fb-48fe-bb94-18bc0815db72",
|
226
|
+
"metadata": {},
|
227
|
+
"outputs": [
|
228
|
+
{
|
229
|
+
"data": {
|
230
|
+
"text/html": [
|
231
|
+
"RedAmber::DataFrame <344 x 8 vectors> <table><tr><th>species</th><th>island</th><th>bill_length_mm</th><th>bill_depth_mm</th><th>flipper_length_mm</th><th>body_mass_g</th><th>sex</th><th>year</th></tr><tr><td>Adelie</td><td>Torgersen</td><td>39.1</td><td>18.7</td><td>181</td><td>3750</td><td>male</td><td>2007</td></tr><tr><td>Adelie</td><td>Torgersen</td><td>39.5</td><td>17.4</td><td>186</td><td>3800</td><td>female</td><td>2007</td></tr><tr><td>Adelie</td><td>Torgersen</td><td>40.3</td><td>18.0</td><td>195</td><td>3250</td><td>female</td><td>2007</td></tr><tr><td>Adelie</td><td>Torgersen</td><td><i>(nil)</i></td><td><i>(nil)</i></td><td><i>(nil)</i></td><td><i>(nil)</i></td><td><i>(nil)</i></td><td>2007</td></tr><tr><td colspan='8'>⋮</td></tr><tr><td>Gentoo</td><td>Biscoe</td><td>50.4</td><td>15.7</td><td>222</td><td>5750</td><td>male</td><td>2009</td></tr><tr><td>Gentoo</td><td>Biscoe</td><td>45.2</td><td>14.8</td><td>212</td><td>5200</td><td>female</td><td>2009</td></tr><tr><td>Gentoo</td><td>Biscoe</td><td>49.9</td><td>16.1</td><td>213</td><td>5400</td><td>male</td><td>2009</td></tr></table>"
|
232
|
+
],
|
233
|
+
"text/plain": [
|
234
|
+
"#<RedAmber::DataFrame : 344 x 8 Vectors, 0x000000000000f8ac>\n",
|
235
|
+
" species island bill_length_mm bill_depth_mm flipper_length_mm ... year\n",
|
236
|
+
" <string> <string> <double> <double> <uint8> ... <uint16>\n",
|
237
|
+
" 1 Adelie Torgersen 39.1 18.7 181 ... 2007\n",
|
238
|
+
" 2 Adelie Torgersen 39.5 17.4 186 ... 2007\n",
|
239
|
+
" 3 Adelie Torgersen 40.3 18.0 195 ... 2007\n",
|
240
|
+
" 4 Adelie Torgersen (nil) (nil) (nil) ... 2007\n",
|
241
|
+
" 5 Adelie Torgersen 36.7 19.3 193 ... 2007\n",
|
242
|
+
" : : : : : : ... :\n",
|
243
|
+
"342 Gentoo Biscoe 50.4 15.7 222 ... 2009\n",
|
244
|
+
"343 Gentoo Biscoe 45.2 14.8 212 ... 2009\n",
|
245
|
+
"344 Gentoo Biscoe 49.9 16.1 213 ... 2009\n"
|
246
|
+
]
|
247
|
+
},
|
248
|
+
"execution_count": 6,
|
249
|
+
"metadata": {},
|
250
|
+
"output_type": "execute_result"
|
251
|
+
}
|
252
|
+
],
|
253
|
+
"source": [
|
254
|
+
"# from a red-datasets\n",
|
255
|
+
"require 'datasets-arrow'\n",
|
256
|
+
"dataset = Datasets::Penguins.new\n",
|
257
|
+
"penguins = DataFrame.new(dataset.to_arrow)"
|
258
|
+
]
|
259
|
+
},
|
260
|
+
{
|
261
|
+
"cell_type": "markdown",
|
262
|
+
"id": "3a2d12b4-7623-42c7-9e32-76cf303c7cea",
|
263
|
+
"metadata": {},
|
264
|
+
"source": [
|
265
|
+
"It should be in future version;\n",
|
266
|
+
"```ruby\n",
|
267
|
+
"require 'datasets-red-amber'\n",
|
268
|
+
"penguins = Datasets::Penguins.new.to_red_amber\n",
|
269
|
+
"```"
|
270
|
+
]
|
271
|
+
},
|
272
|
+
{
|
273
|
+
"cell_type": "code",
|
274
|
+
"execution_count": 7,
|
275
|
+
"id": "2e4619b7-bf6d-4081-9066-b186da8fdf5b",
|
276
|
+
"metadata": {},
|
277
|
+
"outputs": [
|
278
|
+
{
|
279
|
+
"data": {
|
280
|
+
"text/html": [
|
281
|
+
"RedAmber::DataFrame <32 x 11 vectors> <table><tr><th>mpg</th><th>cyl</th><th>disp</th><th>hp</th><th>drat</th><th>wt</th><th>qsec</th><th>vs</th><th>am</th><th>gear</th><th>carb</th></tr><tr><td>21.0</td><td>6</td><td>160.0</td><td>110</td><td>3.9</td><td>2.62</td><td>16.46</td><td>0</td><td>1</td><td>4</td><td>4</td></tr><tr><td>21.0</td><td>6</td><td>160.0</td><td>110</td><td>3.9</td><td>2.875</td><td>17.02</td><td>0</td><td>1</td><td>4</td><td>4</td></tr><tr><td>22.8</td><td>4</td><td>108.0</td><td>93</td><td>3.85</td><td>2.32</td><td>18.61</td><td>1</td><td>1</td><td>4</td><td>1</td></tr><tr><td>21.4</td><td>6</td><td>258.0</td><td>110</td><td>3.08</td><td>3.215</td><td>19.44</td><td>1</td><td>0</td><td>3</td><td>1</td></tr><tr><td colspan='11'>⋮</td></tr><tr><td>19.7</td><td>6</td><td>145.0</td><td>175</td><td>3.62</td><td>2.77</td><td>15.5</td><td>0</td><td>1</td><td>5</td><td>6</td></tr><tr><td>15.0</td><td>8</td><td>301.0</td><td>335</td><td>3.54</td><td>3.57</td><td>14.6</td><td>0</td><td>1</td><td>5</td><td>8</td></tr><tr><td>21.4</td><td>4</td><td>121.0</td><td>109</td><td>4.11</td><td>2.78</td><td>18.6</td><td>1</td><td>1</td><td>4</td><td>2</td></tr></table>"
|
282
|
+
],
|
283
|
+
"text/plain": [
|
284
|
+
"#<RedAmber::DataFrame : 32 x 11 Vectors, 0x000000000000f8c0>\n",
|
285
|
+
" mpg cyl disp hp drat wt qsec vs am ... carb\n",
|
286
|
+
" <double> <uint8> <double> <uint16> <double> <double> <double> <uint8> <uint8> ... <uint8>\n",
|
287
|
+
" 1 21.0 6 160.0 110 3.9 2.6 16.5 0 1 ... 4\n",
|
288
|
+
" 2 21.0 6 160.0 110 3.9 2.9 17.0 0 1 ... 4\n",
|
289
|
+
" 3 22.8 4 108.0 93 3.9 2.3 18.6 1 1 ... 1\n",
|
290
|
+
" 4 21.4 6 258.0 110 3.1 3.2 19.4 1 0 ... 1\n",
|
291
|
+
" 5 18.7 8 360.0 175 3.2 3.4 17.0 0 0 ... 2\n",
|
292
|
+
" : : : : : : : : : : ... :\n",
|
293
|
+
"30 19.7 6 145.0 175 3.6 2.8 15.5 0 1 ... 6\n",
|
294
|
+
"31 15.0 8 301.0 335 3.5 3.6 14.6 0 1 ... 8\n",
|
295
|
+
"32 21.4 4 121.0 109 4.1 2.8 18.6 1 1 ... 2\n"
|
296
|
+
]
|
297
|
+
},
|
298
|
+
"execution_count": 7,
|
299
|
+
"metadata": {},
|
300
|
+
"output_type": "execute_result"
|
301
|
+
}
|
302
|
+
],
|
303
|
+
"source": [
|
304
|
+
"dataset = Datasets::Rdatasets.new('datasets', 'mtcars')\n",
|
305
|
+
"mtcars = DataFrame.new(dataset.to_arrow)"
|
306
|
+
]
|
307
|
+
},
|
308
|
+
{
|
309
|
+
"cell_type": "markdown",
|
310
|
+
"id": "e1f77a54-3a43-4d17-bb6f-332ef13832a3",
|
311
|
+
"metadata": {},
|
312
|
+
"source": [
|
313
|
+
"## 4. Load"
|
314
|
+
]
|
315
|
+
},
|
316
|
+
{
|
317
|
+
"cell_type": "markdown",
|
318
|
+
"id": "0fed4f43-3fbb-43e5-af0d-f93401deea78",
|
319
|
+
"metadata": {},
|
320
|
+
"source": [
|
321
|
+
"`RedAmber::DataFrame` delegates `#load` to `Arrow::Table#load`. We can load from `[.arrow, .arrows, .csv, .csv.gz, .tsv]` files."
|
322
|
+
]
|
323
|
+
},
|
324
|
+
{
|
325
|
+
"cell_type": "code",
|
326
|
+
"execution_count": 8,
|
327
|
+
"id": "4203e671-0a0a-405c-8482-53a8cd78a891",
|
328
|
+
"metadata": {},
|
329
|
+
"outputs": [
|
330
|
+
{
|
331
|
+
"data": {
|
332
|
+
"text/html": [
|
333
|
+
"RedAmber::DataFrame <3 x 2 vectors> <table><tr><th>name</th><th>age</th></tr><tr><td>Yasuko</td><td>68</td></tr><tr><td>Rui</td><td>49</td></tr><tr><td>Hinata</td><td>28</td></tr></table>"
|
334
|
+
],
|
335
|
+
"text/plain": [
|
336
|
+
"#<RedAmber::DataFrame : 3 x 2 Vectors, 0x000000000000f8d4>\n",
|
337
|
+
" name age\n",
|
338
|
+
" <string> <int64>\n",
|
339
|
+
"1 Yasuko 68\n",
|
340
|
+
"2 Rui 49\n",
|
341
|
+
"3 Hinata 28\n"
|
342
|
+
]
|
343
|
+
},
|
344
|
+
"execution_count": 8,
|
345
|
+
"metadata": {},
|
346
|
+
"output_type": "execute_result"
|
347
|
+
}
|
348
|
+
],
|
349
|
+
"source": [
|
350
|
+
"DataFrame.load(\"test/entity/with_header.csv\")"
|
351
|
+
]
|
352
|
+
},
|
353
|
+
{
|
354
|
+
"cell_type": "markdown",
|
355
|
+
"id": "29875147-1371-4575-a565-69c3534c15f2",
|
356
|
+
"metadata": {},
|
357
|
+
"source": [
|
358
|
+
"## 5. Load from a URI"
|
359
|
+
]
|
360
|
+
},
|
361
|
+
{
|
362
|
+
"cell_type": "code",
|
363
|
+
"execution_count": 9,
|
364
|
+
"id": "916b86e2-e3a2-4ebb-8770-9e8a29c46523",
|
365
|
+
"metadata": {},
|
366
|
+
"outputs": [
|
367
|
+
{
|
368
|
+
"data": {
|
369
|
+
"text/html": [
|
370
|
+
"RedAmber::DataFrame <344 x 7 vectors> <table><tr><th>species</th><th>island</th><th>bill_length_mm</th><th>bill_depth_mm</th><th>flipper_length_mm</th><th>body_mass_g</th><th>sex</th></tr><tr><td>Adelie</td><td>Torgersen</td><td>39.1</td><td>18.7</td><td>181</td><td>3750</td><td>MALE</td></tr><tr><td>Adelie</td><td>Torgersen</td><td>39.5</td><td>17.4</td><td>186</td><td>3800</td><td>FEMALE</td></tr><tr><td>Adelie</td><td>Torgersen</td><td>40.3</td><td>18.0</td><td>195</td><td>3250</td><td>FEMALE</td></tr><tr><td>Adelie</td><td>Torgersen</td><td><i>(nil)</i></td><td><i>(nil)</i></td><td><i>(nil)</i></td><td><i>(nil)</i></td><td></td></tr><tr><td colspan='7'>⋮</td></tr><tr><td>Gentoo</td><td>Biscoe</td><td>50.4</td><td>15.7</td><td>222</td><td>5750</td><td>MALE</td></tr><tr><td>Gentoo</td><td>Biscoe</td><td>45.2</td><td>14.8</td><td>212</td><td>5200</td><td>FEMALE</td></tr><tr><td>Gentoo</td><td>Biscoe</td><td>49.9</td><td>16.1</td><td>213</td><td>5400</td><td>MALE</td></tr></table>"
|
371
|
+
],
|
372
|
+
"text/plain": [
|
373
|
+
"#<RedAmber::DataFrame : 344 x 7 Vectors, 0x000000000000f8e8>\n",
|
374
|
+
" species island bill_length_mm bill_depth_mm flipper_length_mm ... sex\n",
|
375
|
+
" <string> <string> <double> <double> <int64> ... <string>\n",
|
376
|
+
" 1 Adelie Torgersen 39.1 18.7 181 ... MALE\n",
|
377
|
+
" 2 Adelie Torgersen 39.5 17.4 186 ... FEMALE\n",
|
378
|
+
" 3 Adelie Torgersen 40.3 18.0 195 ... FEMALE\n",
|
379
|
+
" 4 Adelie Torgersen (nil) (nil) (nil) ...\n",
|
380
|
+
" 5 Adelie Torgersen 36.7 19.3 193 ... FEMALE\n",
|
381
|
+
" : : : : : : ... :\n",
|
382
|
+
"342 Gentoo Biscoe 50.4 15.7 222 ... MALE\n",
|
383
|
+
"343 Gentoo Biscoe 45.2 14.8 212 ... FEMALE\n",
|
384
|
+
"344 Gentoo Biscoe 49.9 16.1 213 ... MALE\n"
|
385
|
+
]
|
386
|
+
},
|
387
|
+
"execution_count": 9,
|
388
|
+
"metadata": {},
|
389
|
+
"output_type": "execute_result"
|
390
|
+
}
|
391
|
+
],
|
392
|
+
"source": [
|
393
|
+
"uri = URI(\"https://raw.githubusercontent.com/mwaskom/seaborn-data/master/penguins.csv\")\n",
|
394
|
+
"DataFrame.load(uri)"
|
395
|
+
]
|
396
|
+
},
|
397
|
+
{
|
398
|
+
"cell_type": "markdown",
|
399
|
+
"id": "e6abe64d-e97f-437e-9c54-18f9e06e9668",
|
400
|
+
"metadata": {},
|
401
|
+
"source": [
|
402
|
+
"## 6. Save"
|
403
|
+
]
|
404
|
+
},
|
405
|
+
{
|
406
|
+
"cell_type": "code",
|
407
|
+
"execution_count": 10,
|
408
|
+
"id": "91c0fb62-7990-47f1-9fb6-b0529bc1783f",
|
409
|
+
"metadata": {},
|
410
|
+
"outputs": [
|
411
|
+
{
|
412
|
+
"data": {
|
413
|
+
"text/plain": [
|
414
|
+
"true"
|
415
|
+
]
|
416
|
+
},
|
417
|
+
"execution_count": 10,
|
418
|
+
"metadata": {},
|
419
|
+
"output_type": "execute_result"
|
420
|
+
}
|
421
|
+
],
|
422
|
+
"source": [
|
423
|
+
"penguins.save(\"file.arrow\")\n",
|
424
|
+
"penguins.save(\"file.arrows\")\n",
|
425
|
+
"penguins.save(\"file.csv\")\n",
|
426
|
+
"penguins.save(\"file.csv.gz\")\n",
|
427
|
+
"penguins.save(\"file.tsv\")\n",
|
428
|
+
"penguins.save(\"file.feather\")"
|
429
|
+
]
|
430
|
+
},
|
431
|
+
{
|
432
|
+
"cell_type": "markdown",
|
433
|
+
"id": "d1d30973-9e2f-406a-9f42-9e6e4c966baf",
|
434
|
+
"metadata": {},
|
435
|
+
"source": [
|
436
|
+
"## 7. to_s/inspect"
|
437
|
+
]
|
438
|
+
},
|
439
|
+
{
|
440
|
+
"cell_type": "markdown",
|
441
|
+
"id": "a7bc9cb7-eae4-495f-831e-b747e486d0bd",
|
442
|
+
"metadata": {},
|
443
|
+
"source": [
|
444
|
+
"`to_s` or `inspect` (it uses to_s inside) shows a preview of the dataframe.\n",
|
445
|
+
"\n",
|
446
|
+
"It shows first 5 and last 3 rows if it has many rows. Columns are also omitted if line is exceeded 80 letters."
|
447
|
+
]
|
448
|
+
},
|
449
|
+
{
|
450
|
+
"cell_type": "code",
|
451
|
+
"execution_count": 11,
|
452
|
+
"id": "af6d29ef-2e1c-4a08-a8b2-d69acda79ec5",
|
453
|
+
"metadata": {},
|
454
|
+
"outputs": [
|
455
|
+
{
|
456
|
+
"name": "stdout",
|
457
|
+
"output_type": "stream",
|
458
|
+
"text": [
|
459
|
+
"#<RedAmber::DataFrame : 5 x 4 Vectors, 0x000000000000f8fc>\n",
|
460
|
+
" x y s b\n",
|
461
|
+
" <uint8> <double> <string> <boolean>\n",
|
462
|
+
"1 1 1.0 A true\n",
|
463
|
+
"2 2 2.0 B false\n",
|
464
|
+
"3 3 3.0 C true\n",
|
465
|
+
"4 4 NaN D false\n",
|
466
|
+
"5 5 (nil) (nil) (nil)\n",
|
467
|
+
"\n"
|
468
|
+
]
|
469
|
+
}
|
470
|
+
],
|
471
|
+
"source": [
|
472
|
+
"df = DataFrame.new(\n",
|
473
|
+
" x: [1, 2, 3, 4, 5],\n",
|
474
|
+
" y: [1, 2, 3, 0/0.0, nil],\n",
|
475
|
+
" s: %w[A B C D] << nil,\n",
|
476
|
+
" b: [true, false, true, false, nil])\n",
|
477
|
+
"p df; nil"
|
478
|
+
]
|
479
|
+
},
|
480
|
+
{
|
481
|
+
"cell_type": "code",
|
482
|
+
"execution_count": 12,
|
483
|
+
"id": "cdff2e60-bd0a-4d12-b348-201a49bbbbbe",
|
484
|
+
"metadata": {},
|
485
|
+
"outputs": [
|
486
|
+
{
|
487
|
+
"name": "stdout",
|
488
|
+
"output_type": "stream",
|
489
|
+
"text": [
|
490
|
+
"#<RedAmber::DataFrame : 344 x 8 Vectors, 0x000000000000f8ac>\n",
|
491
|
+
" species island bill_length_mm bill_depth_mm flipper_length_mm ... year\n",
|
492
|
+
" <string> <string> <double> <double> <uint8> ... <uint16>\n",
|
493
|
+
" 1 Adelie Torgersen 39.1 18.7 181 ... 2007\n",
|
494
|
+
" 2 Adelie Torgersen 39.5 17.4 186 ... 2007\n",
|
495
|
+
" 3 Adelie Torgersen 40.3 18.0 195 ... 2007\n",
|
496
|
+
" 4 Adelie Torgersen (nil) (nil) (nil) ... 2007\n",
|
497
|
+
" 5 Adelie Torgersen 36.7 19.3 193 ... 2007\n",
|
498
|
+
" : : : : : : ... :\n",
|
499
|
+
"342 Gentoo Biscoe 50.4 15.7 222 ... 2009\n",
|
500
|
+
"343 Gentoo Biscoe 45.2 14.8 212 ... 2009\n",
|
501
|
+
"344 Gentoo Biscoe 49.9 16.1 213 ... 2009\n",
|
502
|
+
"\n"
|
503
|
+
]
|
504
|
+
}
|
505
|
+
],
|
506
|
+
"source": [
|
507
|
+
"p penguins; nil"
|
508
|
+
]
|
509
|
+
},
|
510
|
+
{
|
511
|
+
"cell_type": "markdown",
|
512
|
+
"id": "cb44df38-58f7-479c-b7a4-c9c305639292",
|
513
|
+
"metadata": {},
|
514
|
+
"source": [
|
515
|
+
"## 8. Show table"
|
516
|
+
]
|
517
|
+
},
|
518
|
+
{
|
519
|
+
"cell_type": "code",
|
520
|
+
"execution_count": 13,
|
521
|
+
"id": "fc710035-8134-4b18-89fe-8c58b95e0e0e",
|
522
|
+
"metadata": {},
|
523
|
+
"outputs": [
|
524
|
+
{
|
525
|
+
"data": {
|
526
|
+
"text/plain": [
|
527
|
+
"#<Arrow::Table:0x113637c20 ptr=0x7fcc504bb870>\n",
|
528
|
+
"\tx\t y\ts\tb\n",
|
529
|
+
"0\t1\t 1.000000\tA\ttrue\n",
|
530
|
+
"1\t2\t 2.000000\tB\tfalse\n",
|
531
|
+
"2\t3\t 3.000000\tC\ttrue\n",
|
532
|
+
"3\t4\t NaN\tD\tfalse\n",
|
533
|
+
"4\t5\t (null)\t(null)\t(null)\n"
|
534
|
+
]
|
535
|
+
},
|
536
|
+
"execution_count": 13,
|
537
|
+
"metadata": {},
|
538
|
+
"output_type": "execute_result"
|
539
|
+
}
|
540
|
+
],
|
541
|
+
"source": [
|
542
|
+
"df.table"
|
543
|
+
]
|
544
|
+
},
|
545
|
+
{
|
546
|
+
"cell_type": "code",
|
547
|
+
"execution_count": 14,
|
548
|
+
"id": "2634fb7b-194f-4277-94ba-05f39c497ffa",
|
549
|
+
"metadata": {},
|
550
|
+
"outputs": [
|
551
|
+
{
|
552
|
+
"data": {
|
553
|
+
"text/plain": [
|
554
|
+
"#<Arrow::Table:0x10fcb7c20 ptr=0x7fcc5057dc70>\n",
|
555
|
+
"\tspecies\tisland\tbill_length_mm\tbill_depth_mm\tflipper_length_mm\tbody_mass_g\tsex\tyear\n",
|
556
|
+
" 0\tAdelie \tTorgersen\t 39.100000\t 18.700000\t 181\t 3750\tmale\t2007\n",
|
557
|
+
" 1\tAdelie \tTorgersen\t 39.500000\t 17.400000\t 186\t 3800\tfemale\t2007\n",
|
558
|
+
" 2\tAdelie \tTorgersen\t 40.300000\t 18.000000\t 195\t 3250\tfemale\t2007\n",
|
559
|
+
" 3\tAdelie \tTorgersen\t (null)\t (null)\t (null)\t (null)\t(null)\t2007\n",
|
560
|
+
" 4\tAdelie \tTorgersen\t 36.700000\t 19.300000\t 193\t 3450\tfemale\t2007\n",
|
561
|
+
" 5\tAdelie \tTorgersen\t 39.300000\t 20.600000\t 190\t 3650\tmale\t2007\n",
|
562
|
+
" 6\tAdelie \tTorgersen\t 38.900000\t 17.800000\t 181\t 3625\tfemale\t2007\n",
|
563
|
+
" 7\tAdelie \tTorgersen\t 39.200000\t 19.600000\t 195\t 4675\tmale\t2007\n",
|
564
|
+
" 8\tAdelie \tTorgersen\t 34.100000\t 18.100000\t 193\t 3475\t(null)\t2007\n",
|
565
|
+
" 9\tAdelie \tTorgersen\t 42.000000\t 20.200000\t 190\t 4250\t(null)\t2007\n",
|
566
|
+
"...\n",
|
567
|
+
"334\tGentoo \tBiscoe\t 46.200000\t 14.100000\t 217\t 4375\tfemale\t2009\n",
|
568
|
+
"335\tGentoo \tBiscoe\t 55.100000\t 16.000000\t 230\t 5850\tmale\t2009\n",
|
569
|
+
"336\tGentoo \tBiscoe\t 44.500000\t 15.700000\t 217\t 4875\t(null)\t2009\n",
|
570
|
+
"337\tGentoo \tBiscoe\t 48.800000\t 16.200000\t 222\t 6000\tmale\t2009\n",
|
571
|
+
"338\tGentoo \tBiscoe\t 47.200000\t 13.700000\t 214\t 4925\tfemale\t2009\n",
|
572
|
+
"339\tGentoo \tBiscoe\t (null)\t (null)\t (null)\t (null)\t(null)\t2009\n",
|
573
|
+
"340\tGentoo \tBiscoe\t 46.800000\t 14.300000\t 215\t 4850\tfemale\t2009\n",
|
574
|
+
"341\tGentoo \tBiscoe\t 50.400000\t 15.700000\t 222\t 5750\tmale\t2009\n",
|
575
|
+
"342\tGentoo \tBiscoe\t 45.200000\t 14.800000\t 212\t 5200\tfemale\t2009\n",
|
576
|
+
"343\tGentoo \tBiscoe\t 49.900000\t 16.100000\t 213\t 5400\tmale\t2009\n"
|
577
|
+
]
|
578
|
+
},
|
579
|
+
"execution_count": 14,
|
580
|
+
"metadata": {},
|
581
|
+
"output_type": "execute_result"
|
582
|
+
}
|
583
|
+
],
|
584
|
+
"source": [
|
585
|
+
"penguins.table"
|
586
|
+
]
|
587
|
+
},
|
588
|
+
{
|
589
|
+
"cell_type": "code",
|
590
|
+
"execution_count": 15,
|
591
|
+
"id": "9dba2a67-ede7-4663-907b-9b2dd5db1605",
|
592
|
+
"metadata": {},
|
593
|
+
"outputs": [
|
594
|
+
{
|
595
|
+
"name": "stdout",
|
596
|
+
"output_type": "stream",
|
597
|
+
"text": [
|
598
|
+
"x: uint8\n",
|
599
|
+
"y: double\n",
|
600
|
+
"s: string\n",
|
601
|
+
"b: bool\n",
|
602
|
+
"----\n",
|
603
|
+
"x:\n",
|
604
|
+
" [\n",
|
605
|
+
" [\n",
|
606
|
+
" 1,\n",
|
607
|
+
" 2,\n",
|
608
|
+
" 3,\n",
|
609
|
+
" 4,\n",
|
610
|
+
" 5\n",
|
611
|
+
" ]\n",
|
612
|
+
" ]\n",
|
613
|
+
"y:\n",
|
614
|
+
" [\n",
|
615
|
+
" [\n",
|
616
|
+
" 1,\n",
|
617
|
+
" 2,\n",
|
618
|
+
" 3,\n",
|
619
|
+
" nan,\n",
|
620
|
+
" null\n",
|
621
|
+
" ]\n",
|
622
|
+
" ]\n",
|
623
|
+
"s:\n",
|
624
|
+
" [\n",
|
625
|
+
" [\n",
|
626
|
+
" \"A\",\n",
|
627
|
+
" \"B\",\n",
|
628
|
+
" \"C\",\n",
|
629
|
+
" \"D\",\n",
|
630
|
+
" null\n",
|
631
|
+
" ]\n",
|
632
|
+
" ]\n",
|
633
|
+
"b:\n",
|
634
|
+
" [\n",
|
635
|
+
" [\n",
|
636
|
+
" true,\n",
|
637
|
+
" false,\n",
|
638
|
+
" true,\n",
|
639
|
+
" false,\n",
|
640
|
+
" null\n",
|
641
|
+
" ]\n",
|
642
|
+
" ]\n"
|
643
|
+
]
|
644
|
+
}
|
645
|
+
],
|
646
|
+
"source": [
|
647
|
+
"# This is a Red Arrow's feature\n",
|
648
|
+
"puts df.table.to_s(format: :column)"
|
649
|
+
]
|
650
|
+
},
|
651
|
+
{
|
652
|
+
"cell_type": "code",
|
653
|
+
"execution_count": 16,
|
654
|
+
"id": "d1cc17b8-1cfc-4986-9dec-7bca02be32f0",
|
655
|
+
"metadata": {},
|
656
|
+
"outputs": [
|
657
|
+
{
|
658
|
+
"name": "stdout",
|
659
|
+
"output_type": "stream",
|
660
|
+
"text": [
|
661
|
+
"==================== 0 ====================\n",
|
662
|
+
"x: 1\n",
|
663
|
+
"y: 1.000000\n",
|
664
|
+
"s: A\n",
|
665
|
+
"b: true\n",
|
666
|
+
"==================== 1 ====================\n",
|
667
|
+
"x: 2\n",
|
668
|
+
"y: 2.000000\n",
|
669
|
+
"s: B\n",
|
670
|
+
"b: false\n",
|
671
|
+
"==================== 2 ====================\n",
|
672
|
+
"x: 3\n",
|
673
|
+
"y: 3.000000\n",
|
674
|
+
"s: C\n",
|
675
|
+
"b: true\n",
|
676
|
+
"==================== 3 ====================\n",
|
677
|
+
"x: 4\n",
|
678
|
+
"y: NaN\n",
|
679
|
+
"s: D\n",
|
680
|
+
"b: false\n",
|
681
|
+
"==================== 4 ====================\n",
|
682
|
+
"x: 5\n",
|
683
|
+
"y: (null)\n",
|
684
|
+
"s: (null)\n",
|
685
|
+
"b: (null)\n"
|
686
|
+
]
|
687
|
+
}
|
688
|
+
],
|
689
|
+
"source": [
|
690
|
+
"# This is also a Red Arrow's feature\n",
|
691
|
+
"puts df.table.to_s(format: :list)"
|
692
|
+
]
|
693
|
+
},
|
694
|
+
{
|
695
|
+
"cell_type": "markdown",
|
696
|
+
"id": "16e4ae6b-2399-43f0-be8e-65669b95c7b6",
|
697
|
+
"metadata": {},
|
698
|
+
"source": [
|
699
|
+
"## 9. TDR"
|
700
|
+
]
|
701
|
+
},
|
702
|
+
{
|
703
|
+
"cell_type": "markdown",
|
704
|
+
"id": "2d14eb4b-9026-4cc5-a71a-598946d40b67",
|
705
|
+
"metadata": {},
|
706
|
+
"source": [
|
707
|
+
"TDR means 'Transposed Dataframe Representation'. It shows columns in lateral just the same shape as initializing by a Hash. TDR has some information which is useful for the exploratory data processing.\n",
|
708
|
+
"\n",
|
709
|
+
"- DataFrame shape: n_rows x n_columns\n",
|
710
|
+
"- Data types\n",
|
711
|
+
"- Levels: number of unique elements\n",
|
712
|
+
"- Data preview: same data is aggregated if level is smaller (tally mode)\n",
|
713
|
+
"- Show counts of abnormal element: NaN and nil"
|
714
|
+
]
|
715
|
+
},
|
716
|
+
{
|
717
|
+
"cell_type": "code",
|
718
|
+
"execution_count": 17,
|
719
|
+
"id": "8050462f-7c60-41b7-a011-af11763784dc",
|
720
|
+
"metadata": {},
|
721
|
+
"outputs": [
|
722
|
+
{
|
723
|
+
"name": "stdout",
|
724
|
+
"output_type": "stream",
|
725
|
+
"text": [
|
726
|
+
"RedAmber::DataFrame : 5 x 4 Vectors\n",
|
727
|
+
"Vectors : 2 numeric, 1 string, 1 boolean\n",
|
728
|
+
"# key type level data_preview\n",
|
729
|
+
"1 :x uint8 5 [1, 2, 3, 4, 5]\n",
|
730
|
+
"2 :y double 5 [1.0, 2.0, 3.0, NaN, nil], 1 NaN, 1 nil\n",
|
731
|
+
"3 :s string 5 [\"A\", \"B\", \"C\", \"D\", nil], 1 nil\n",
|
732
|
+
"4 :b boolean 3 {true=>2, false=>2, nil=>1}\n"
|
733
|
+
]
|
734
|
+
}
|
735
|
+
],
|
736
|
+
"source": [
|
737
|
+
"# use the same dataframe as #7\n",
|
738
|
+
"df.tdr"
|
739
|
+
]
|
740
|
+
},
|
741
|
+
{
|
742
|
+
"cell_type": "code",
|
743
|
+
"execution_count": 18,
|
744
|
+
"id": "bb616ffe-c19a-4b02-a011-601ceb3db656",
|
745
|
+
"metadata": {},
|
746
|
+
"outputs": [
|
747
|
+
{
|
748
|
+
"name": "stdout",
|
749
|
+
"output_type": "stream",
|
750
|
+
"text": [
|
751
|
+
"RedAmber::DataFrame : 344 x 8 Vectors\n",
|
752
|
+
"Vectors : 5 numeric, 3 strings\n",
|
753
|
+
"# key type level data_preview\n",
|
754
|
+
"1 :species string 3 {\"Adelie\"=>152, \"Chinstrap\"=>68, \"Gentoo\"=>124}\n",
|
755
|
+
"2 :island string 3 {\"Torgersen\"=>52, \"Biscoe\"=>168, \"Dream\"=>124}\n",
|
756
|
+
"3 :bill_length_mm double 165 [39.1, 39.5, 40.3, nil, 36.7, ... ], 2 nils\n",
|
757
|
+
"4 :bill_depth_mm double 81 [18.7, 17.4, 18.0, nil, 19.3, ... ], 2 nils\n",
|
758
|
+
"5 :flipper_length_mm uint8 56 [181, 186, 195, nil, 193, ... ], 2 nils\n",
|
759
|
+
"6 :body_mass_g uint16 95 [3750, 3800, 3250, nil, 3450, ... ], 2 nils\n",
|
760
|
+
"7 :sex string 3 {\"male\"=>168, \"female\"=>165, nil=>11}\n",
|
761
|
+
"8 :year uint16 3 {2007=>110, 2008=>114, 2009=>120}\n"
|
762
|
+
]
|
763
|
+
}
|
764
|
+
],
|
765
|
+
"source": [
|
766
|
+
"penguins.tdr"
|
767
|
+
]
|
768
|
+
},
|
769
|
+
{
|
770
|
+
"cell_type": "markdown",
|
771
|
+
"id": "73b8dc18-079f-4d40-8d0e-239f010550da",
|
772
|
+
"metadata": {},
|
773
|
+
"source": [
|
774
|
+
"`#tdr` has some options:\n",
|
775
|
+
"\n",
|
776
|
+
"`limit` : to limit a number of variables to show. Default value is `limit=10`."
|
777
|
+
]
|
778
|
+
},
|
779
|
+
{
|
780
|
+
"cell_type": "code",
|
781
|
+
"execution_count": 19,
|
782
|
+
"id": "0962845d-e642-4d2a-9607-43e197b46bc5",
|
783
|
+
"metadata": {},
|
784
|
+
"outputs": [
|
785
|
+
{
|
786
|
+
"name": "stdout",
|
787
|
+
"output_type": "stream",
|
788
|
+
"text": [
|
789
|
+
"RedAmber::DataFrame : 344 x 8 Vectors\n",
|
790
|
+
"Vectors : 5 numeric, 3 strings\n",
|
791
|
+
"# key type level data_preview\n",
|
792
|
+
"1 :species string 3 {\"Adelie\"=>152, \"Chinstrap\"=>68, \"Gentoo\"=>124}\n",
|
793
|
+
"2 :island string 3 {\"Torgersen\"=>52, \"Biscoe\"=>168, \"Dream\"=>124}\n",
|
794
|
+
"3 :bill_length_mm double 165 [39.1, 39.5, 40.3, nil, 36.7, ... ], 2 nils\n",
|
795
|
+
" ... 5 more Vectors ...\n"
|
796
|
+
]
|
797
|
+
}
|
798
|
+
],
|
799
|
+
"source": [
|
800
|
+
"penguins.tdr(3)"
|
801
|
+
]
|
802
|
+
},
|
803
|
+
{
|
804
|
+
"cell_type": "markdown",
|
805
|
+
"id": "573606c4-23b9-4b38-8c92-a04f1c1e8781",
|
806
|
+
"metadata": {},
|
807
|
+
"source": [
|
808
|
+
"`elements` : max number of elements to show in observations. Default value is `elements: 5`."
|
809
|
+
]
|
810
|
+
},
|
811
|
+
{
|
812
|
+
"cell_type": "code",
|
813
|
+
"execution_count": 20,
|
814
|
+
"id": "f957d2bd-e8c0-42a1-a3b4-0a9478e740bf",
|
815
|
+
"metadata": {},
|
816
|
+
"outputs": [
|
817
|
+
{
|
818
|
+
"name": "stdout",
|
819
|
+
"output_type": "stream",
|
820
|
+
"text": [
|
821
|
+
"RedAmber::DataFrame : 344 x 8 Vectors\n",
|
822
|
+
"Vectors : 5 numeric, 3 strings\n",
|
823
|
+
"# key type level data_preview\n",
|
824
|
+
"1 :species string 3 {\"Adelie\"=>152, \"Chinstrap\"=>68, \"Gentoo\"=>124}\n",
|
825
|
+
"2 :island string 3 {\"Torgersen\"=>52, \"Biscoe\"=>168, \"Dream\"=>124}\n",
|
826
|
+
"3 :bill_length_mm double 165 [39.1, 39.5, 40.3, ... ], 2 nils\n",
|
827
|
+
"4 :bill_depth_mm double 81 [18.7, 17.4, 18.0, ... ], 2 nils\n",
|
828
|
+
"5 :flipper_length_mm uint8 56 [181, 186, 195, ... ], 2 nils\n",
|
829
|
+
"6 :body_mass_g uint16 95 [3750, 3800, 3250, ... ], 2 nils\n",
|
830
|
+
"7 :sex string 3 {\"male\"=>168, \"female\"=>165, nil=>11}\n",
|
831
|
+
"8 :year uint16 3 {2007=>110, 2008=>114, 2009=>120}\n"
|
832
|
+
]
|
833
|
+
}
|
834
|
+
],
|
835
|
+
"source": [
|
836
|
+
"penguins.tdr(elements: 3) # Show first 3 items in data"
|
837
|
+
]
|
838
|
+
},
|
839
|
+
{
|
840
|
+
"cell_type": "markdown",
|
841
|
+
"id": "d37ece79-1999-49eb-a2d1-831184ee6509",
|
842
|
+
"metadata": {},
|
843
|
+
"source": [
|
844
|
+
"`tally` : max level to use tally mode. Level means size of `tally`ed hash. Default value is `tally: 5`."
|
845
|
+
]
|
846
|
+
},
|
847
|
+
{
|
848
|
+
"cell_type": "code",
|
849
|
+
"execution_count": 21,
|
850
|
+
"id": "9c1c472c-3d15-4bca-9a1b-7f86c63d3ed8",
|
851
|
+
"metadata": {},
|
852
|
+
"outputs": [
|
853
|
+
{
|
854
|
+
"name": "stdout",
|
855
|
+
"output_type": "stream",
|
856
|
+
"text": [
|
857
|
+
"RedAmber::DataFrame : 344 x 8 Vectors\n",
|
858
|
+
"Vectors : 5 numeric, 3 strings\n",
|
859
|
+
"# key type level data_preview\n",
|
860
|
+
"1 :species string 3 [\"Adelie\", \"Adelie\", \"Adelie\", \"Adelie\", \"Adelie\", ... ]\n",
|
861
|
+
"2 :island string 3 [\"Torgersen\", \"Torgersen\", \"Torgersen\", \"Torgersen\", \"Torgersen\", ... ]\n",
|
862
|
+
"3 :bill_length_mm double 165 [39.1, 39.5, 40.3, nil, 36.7, ... ], 2 nils\n",
|
863
|
+
"4 :bill_depth_mm double 81 [18.7, 17.4, 18.0, nil, 19.3, ... ], 2 nils\n",
|
864
|
+
"5 :flipper_length_mm uint8 56 [181, 186, 195, nil, 193, ... ], 2 nils\n",
|
865
|
+
"6 :body_mass_g uint16 95 [3750, 3800, 3250, nil, 3450, ... ], 2 nils\n",
|
866
|
+
"7 :sex string 3 [\"male\", \"female\", \"female\", nil, \"female\", ... ], 11 nils\n",
|
867
|
+
"8 :year uint16 3 [2007, 2007, 2007, 2007, 2007, ... ]\n"
|
868
|
+
]
|
869
|
+
}
|
870
|
+
],
|
871
|
+
"source": [
|
872
|
+
"penguins.tdr(tally: 0) # Don't use tally mode"
|
873
|
+
]
|
874
|
+
},
|
875
|
+
{
|
876
|
+
"cell_type": "markdown",
|
877
|
+
"id": "e3c38037-90a1-4fc5-9904-41fc74085908",
|
878
|
+
"metadata": {},
|
879
|
+
"source": [
|
880
|
+
"`#tdr_str` returns a String. `#tdr` do the same thing as `puts #tdr_str`"
|
881
|
+
]
|
882
|
+
},
|
883
|
+
{
|
884
|
+
"cell_type": "markdown",
|
885
|
+
"id": "21d68764-1bc1-4915-99b6-5ae938b85999",
|
886
|
+
"metadata": {},
|
887
|
+
"source": [
|
888
|
+
"## 10. Size and shape"
|
889
|
+
]
|
890
|
+
},
|
891
|
+
{
|
892
|
+
"cell_type": "code",
|
893
|
+
"execution_count": 22,
|
894
|
+
"id": "487399f8-a3ef-467f-aa7f-ecbaee5fcb75",
|
895
|
+
"metadata": {},
|
896
|
+
"outputs": [
|
897
|
+
{
|
898
|
+
"data": {
|
899
|
+
"text/plain": [
|
900
|
+
"5"
|
901
|
+
]
|
902
|
+
},
|
903
|
+
"execution_count": 22,
|
904
|
+
"metadata": {},
|
905
|
+
"output_type": "execute_result"
|
906
|
+
}
|
907
|
+
],
|
908
|
+
"source": [
|
909
|
+
"# same as n_rows, n_obs\n",
|
910
|
+
"df.size"
|
911
|
+
]
|
912
|
+
},
|
913
|
+
{
|
914
|
+
"cell_type": "code",
|
915
|
+
"execution_count": 23,
|
916
|
+
"id": "dc7441c3-7c85-4ce1-a20e-de8f41f280b4",
|
917
|
+
"metadata": {},
|
918
|
+
"outputs": [
|
919
|
+
{
|
920
|
+
"data": {
|
921
|
+
"text/plain": [
|
922
|
+
"4"
|
923
|
+
]
|
924
|
+
},
|
925
|
+
"execution_count": 23,
|
926
|
+
"metadata": {},
|
927
|
+
"output_type": "execute_result"
|
928
|
+
}
|
929
|
+
],
|
930
|
+
"source": [
|
931
|
+
"# same as n_cols, n_vars\n",
|
932
|
+
"df.n_keys"
|
933
|
+
]
|
934
|
+
},
|
935
|
+
{
|
936
|
+
"cell_type": "code",
|
937
|
+
"execution_count": 24,
|
938
|
+
"id": "3d42fea6-801a-45f4-8e22-ea9d76ae070f",
|
939
|
+
"metadata": {},
|
940
|
+
"outputs": [
|
941
|
+
{
|
942
|
+
"data": {
|
943
|
+
"text/plain": [
|
944
|
+
"[5, 4]"
|
945
|
+
]
|
946
|
+
},
|
947
|
+
"execution_count": 24,
|
948
|
+
"metadata": {},
|
949
|
+
"output_type": "execute_result"
|
950
|
+
}
|
951
|
+
],
|
952
|
+
"source": [
|
953
|
+
"# [df.size, df.n_keys], [df.n_rows, df.n_cols]\n",
|
954
|
+
"df.shape"
|
955
|
+
]
|
956
|
+
},
|
957
|
+
{
|
958
|
+
"cell_type": "markdown",
|
959
|
+
"id": "bc5caa94-325f-4014-9c90-8ac909c2b378",
|
960
|
+
"metadata": {},
|
961
|
+
"source": [
|
962
|
+
"## 11. Keys"
|
963
|
+
]
|
964
|
+
},
|
965
|
+
{
|
966
|
+
"cell_type": "code",
|
967
|
+
"execution_count": 25,
|
968
|
+
"id": "bb47775f-fed0-42e6-8781-aa8b721d6112",
|
969
|
+
"metadata": {},
|
970
|
+
"outputs": [
|
971
|
+
{
|
972
|
+
"data": {
|
973
|
+
"text/plain": [
|
974
|
+
"[:x, :y, :s, :b]"
|
975
|
+
]
|
976
|
+
},
|
977
|
+
"execution_count": 25,
|
978
|
+
"metadata": {},
|
979
|
+
"output_type": "execute_result"
|
980
|
+
}
|
981
|
+
],
|
982
|
+
"source": [
|
983
|
+
"df.keys"
|
984
|
+
]
|
985
|
+
},
|
986
|
+
{
|
987
|
+
"cell_type": "code",
|
988
|
+
"execution_count": 26,
|
989
|
+
"id": "3d540ab0-3e52-47b7-b338-b4e0b3d929cb",
|
990
|
+
"metadata": {},
|
991
|
+
"outputs": [
|
992
|
+
{
|
993
|
+
"data": {
|
994
|
+
"text/plain": [
|
995
|
+
"[:species, :island, :bill_length_mm, :bill_depth_mm, :flipper_length_mm, :body_mass_g, :sex, :year]"
|
996
|
+
]
|
997
|
+
},
|
998
|
+
"execution_count": 26,
|
999
|
+
"metadata": {},
|
1000
|
+
"output_type": "execute_result"
|
1001
|
+
}
|
1002
|
+
],
|
1003
|
+
"source": [
|
1004
|
+
"penguins.keys"
|
1005
|
+
]
|
1006
|
+
},
|
1007
|
+
{
|
1008
|
+
"cell_type": "markdown",
|
1009
|
+
"id": "decc6a61-9994-4d60-9827-b257cafafb70",
|
1010
|
+
"metadata": {},
|
1011
|
+
"source": [
|
1012
|
+
"## 12. Types"
|
1013
|
+
]
|
1014
|
+
},
|
1015
|
+
{
|
1016
|
+
"cell_type": "code",
|
1017
|
+
"execution_count": 27,
|
1018
|
+
"id": "bf9cd2bc-a213-427e-bc00-f2083b0e0471",
|
1019
|
+
"metadata": {},
|
1020
|
+
"outputs": [
|
1021
|
+
{
|
1022
|
+
"data": {
|
1023
|
+
"text/plain": [
|
1024
|
+
"[:uint8, :double, :string, :boolean]"
|
1025
|
+
]
|
1026
|
+
},
|
1027
|
+
"execution_count": 27,
|
1028
|
+
"metadata": {},
|
1029
|
+
"output_type": "execute_result"
|
1030
|
+
}
|
1031
|
+
],
|
1032
|
+
"source": [
|
1033
|
+
"df.types"
|
1034
|
+
]
|
1035
|
+
},
|
1036
|
+
{
|
1037
|
+
"cell_type": "code",
|
1038
|
+
"execution_count": 28,
|
1039
|
+
"id": "b1ecb891-98b5-4919-9f37-1847202007d8",
|
1040
|
+
"metadata": {},
|
1041
|
+
"outputs": [
|
1042
|
+
{
|
1043
|
+
"data": {
|
1044
|
+
"text/plain": [
|
1045
|
+
"[:string, :string, :double, :double, :uint8, :uint16, :string, :uint16]"
|
1046
|
+
]
|
1047
|
+
},
|
1048
|
+
"execution_count": 28,
|
1049
|
+
"metadata": {},
|
1050
|
+
"output_type": "execute_result"
|
1051
|
+
}
|
1052
|
+
],
|
1053
|
+
"source": [
|
1054
|
+
"penguins.types"
|
1055
|
+
]
|
1056
|
+
},
|
1057
|
+
{
|
1058
|
+
"cell_type": "markdown",
|
1059
|
+
"id": "869b3670-62f8-4c23-807b-d6d100a1981e",
|
1060
|
+
"metadata": {},
|
1061
|
+
"source": [
|
1062
|
+
"## 13. Data type classes"
|
1063
|
+
]
|
1064
|
+
},
|
1065
|
+
{
|
1066
|
+
"cell_type": "code",
|
1067
|
+
"execution_count": 29,
|
1068
|
+
"id": "776ab4db-073b-4b30-931a-8ec77284cdc4",
|
1069
|
+
"metadata": {},
|
1070
|
+
"outputs": [
|
1071
|
+
{
|
1072
|
+
"data": {
|
1073
|
+
"text/plain": [
|
1074
|
+
"[Arrow::UInt8DataType, Arrow::DoubleDataType, Arrow::StringDataType, Arrow::BooleanDataType]"
|
1075
|
+
]
|
1076
|
+
},
|
1077
|
+
"execution_count": 29,
|
1078
|
+
"metadata": {},
|
1079
|
+
"output_type": "execute_result"
|
1080
|
+
}
|
1081
|
+
],
|
1082
|
+
"source": [
|
1083
|
+
"df.type_classes"
|
1084
|
+
]
|
1085
|
+
},
|
1086
|
+
{
|
1087
|
+
"cell_type": "code",
|
1088
|
+
"execution_count": 30,
|
1089
|
+
"id": "0546a5d0-cab1-4ca8-a2e5-0637d0fd48b6",
|
1090
|
+
"metadata": {},
|
1091
|
+
"outputs": [
|
1092
|
+
{
|
1093
|
+
"data": {
|
1094
|
+
"text/plain": [
|
1095
|
+
"[Arrow::StringDataType, Arrow::StringDataType, Arrow::DoubleDataType, Arrow::DoubleDataType, Arrow::UInt8DataType, Arrow::UInt16DataType, Arrow::StringDataType, Arrow::UInt16DataType]"
|
1096
|
+
]
|
1097
|
+
},
|
1098
|
+
"execution_count": 30,
|
1099
|
+
"metadata": {},
|
1100
|
+
"output_type": "execute_result"
|
1101
|
+
}
|
1102
|
+
],
|
1103
|
+
"source": [
|
1104
|
+
"penguins.type_classes"
|
1105
|
+
]
|
1106
|
+
},
|
1107
|
+
{
|
1108
|
+
"cell_type": "markdown",
|
1109
|
+
"id": "1c2513f6-909e-47fd-a543-66c4f424f44e",
|
1110
|
+
"metadata": {},
|
1111
|
+
"source": [
|
1112
|
+
"## 14. Indices"
|
1113
|
+
]
|
1114
|
+
},
|
1115
|
+
{
|
1116
|
+
"cell_type": "code",
|
1117
|
+
"execution_count": 31,
|
1118
|
+
"id": "e6e9d7ef-1471-4f23-9210-56045c9fabd5",
|
1119
|
+
"metadata": {},
|
1120
|
+
"outputs": [
|
1121
|
+
{
|
1122
|
+
"data": {
|
1123
|
+
"text/plain": [
|
1124
|
+
"[0, 1, 2, 3, 4]"
|
1125
|
+
]
|
1126
|
+
},
|
1127
|
+
"execution_count": 31,
|
1128
|
+
"metadata": {},
|
1129
|
+
"output_type": "execute_result"
|
1130
|
+
}
|
1131
|
+
],
|
1132
|
+
"source": [
|
1133
|
+
"df.indexes\n",
|
1134
|
+
"# or\n",
|
1135
|
+
"df.indices"
|
1136
|
+
]
|
1137
|
+
},
|
1138
|
+
{
|
1139
|
+
"cell_type": "markdown",
|
1140
|
+
"id": "3908395f-b086-4fbb-9855-e1ce233f0595",
|
1141
|
+
"metadata": {},
|
1142
|
+
"source": [
|
1143
|
+
"## 15. To an Array or a Hash"
|
1144
|
+
]
|
1145
|
+
},
|
1146
|
+
{
|
1147
|
+
"cell_type": "markdown",
|
1148
|
+
"id": "22cb724e-cf61-40d9-a58b-9cc793e83645",
|
1149
|
+
"metadata": {},
|
1150
|
+
"source": [
|
1151
|
+
"DataFrame#to_a returns an array of row-oriented data without a header."
|
1152
|
+
]
|
1153
|
+
},
|
1154
|
+
{
|
1155
|
+
"cell_type": "code",
|
1156
|
+
"execution_count": 32,
|
1157
|
+
"id": "4054daad-9266-4002-8942-c0891050cb4d",
|
1158
|
+
"metadata": {},
|
1159
|
+
"outputs": [
|
1160
|
+
{
|
1161
|
+
"data": {
|
1162
|
+
"text/plain": [
|
1163
|
+
"[[1, 1.0, \"A\", true], [2, 2.0, \"B\", false], [3, 3.0, \"C\", true], [4, NaN, \"D\", false], [5, nil, nil, nil]]"
|
1164
|
+
]
|
1165
|
+
},
|
1166
|
+
"execution_count": 32,
|
1167
|
+
"metadata": {},
|
1168
|
+
"output_type": "execute_result"
|
1169
|
+
}
|
1170
|
+
],
|
1171
|
+
"source": [
|
1172
|
+
"df.to_a"
|
1173
|
+
]
|
1174
|
+
},
|
1175
|
+
{
|
1176
|
+
"cell_type": "markdown",
|
1177
|
+
"id": "f6abae59-fe31-4056-9de8-7c36e35235de",
|
1178
|
+
"metadata": {},
|
1179
|
+
"source": [
|
1180
|
+
"If you need a column-oriented array with keys, use `.to_h.to_a`"
|
1181
|
+
]
|
1182
|
+
},
|
1183
|
+
{
|
1184
|
+
"cell_type": "code",
|
1185
|
+
"execution_count": 33,
|
1186
|
+
"id": "d3631290-eb74-4d21-a469-86381c668c7f",
|
1187
|
+
"metadata": {},
|
1188
|
+
"outputs": [
|
1189
|
+
{
|
1190
|
+
"data": {
|
1191
|
+
"text/plain": [
|
1192
|
+
"{:x=>[1, 2, 3, 4, 5], :y=>[1.0, 2.0, 3.0, NaN, nil], :s=>[\"A\", \"B\", \"C\", \"D\", nil], :b=>[true, false, true, false, nil]}"
|
1193
|
+
]
|
1194
|
+
},
|
1195
|
+
"execution_count": 33,
|
1196
|
+
"metadata": {},
|
1197
|
+
"output_type": "execute_result"
|
1198
|
+
}
|
1199
|
+
],
|
1200
|
+
"source": [
|
1201
|
+
"df.to_h"
|
1202
|
+
]
|
1203
|
+
},
|
1204
|
+
{
|
1205
|
+
"cell_type": "code",
|
1206
|
+
"execution_count": 34,
|
1207
|
+
"id": "08c45e92-f640-4e62-bc96-ee259d0ecff4",
|
1208
|
+
"metadata": {},
|
1209
|
+
"outputs": [
|
1210
|
+
{
|
1211
|
+
"data": {
|
1212
|
+
"text/plain": [
|
1213
|
+
"[[:x, [1, 2, 3, 4, 5]], [:y, [1.0, 2.0, 3.0, NaN, nil]], [:s, [\"A\", \"B\", \"C\", \"D\", nil]], [:b, [true, false, true, false, nil]]]"
|
1214
|
+
]
|
1215
|
+
},
|
1216
|
+
"execution_count": 34,
|
1217
|
+
"metadata": {},
|
1218
|
+
"output_type": "execute_result"
|
1219
|
+
}
|
1220
|
+
],
|
1221
|
+
"source": [
|
1222
|
+
"df.to_h.to_a"
|
1223
|
+
]
|
1224
|
+
},
|
1225
|
+
{
|
1226
|
+
"cell_type": "markdown",
|
1227
|
+
"id": "39b65fc0-4405-4414-9a74-91c724ef587c",
|
1228
|
+
"metadata": {},
|
1229
|
+
"source": [
|
1230
|
+
"## 16. Schema"
|
1231
|
+
]
|
1232
|
+
},
|
1233
|
+
{
|
1234
|
+
"cell_type": "code",
|
1235
|
+
"execution_count": 35,
|
1236
|
+
"id": "36db7842-e9b0-4473-84d4-3aef987d427f",
|
1237
|
+
"metadata": {},
|
1238
|
+
"outputs": [
|
1239
|
+
{
|
1240
|
+
"data": {
|
1241
|
+
"text/plain": [
|
1242
|
+
"{:x=>:uint8, :y=>:double, :s=>:string, :b=>:boolean}"
|
1243
|
+
]
|
1244
|
+
},
|
1245
|
+
"execution_count": 35,
|
1246
|
+
"metadata": {},
|
1247
|
+
"output_type": "execute_result"
|
1248
|
+
}
|
1249
|
+
],
|
1250
|
+
"source": [
|
1251
|
+
"df.schema"
|
1252
|
+
]
|
1253
|
+
},
|
1254
|
+
{
|
1255
|
+
"cell_type": "markdown",
|
1256
|
+
"id": "3e61237d-ac67-45bb-827c-a769dff61809",
|
1257
|
+
"metadata": {},
|
1258
|
+
"source": [
|
1259
|
+
"## 17. Vector"
|
1260
|
+
]
|
1261
|
+
},
|
1262
|
+
{
|
1263
|
+
"cell_type": "markdown",
|
1264
|
+
"id": "27402307-aaad-49c8-88ca-65346668601d",
|
1265
|
+
"metadata": {},
|
1266
|
+
"source": [
|
1267
|
+
"Each variable (column in the table) is represented by a Vector object."
|
1268
|
+
]
|
1269
|
+
},
|
1270
|
+
{
|
1271
|
+
"cell_type": "code",
|
1272
|
+
"execution_count": 36,
|
1273
|
+
"id": "6c9ba041-231d-4057-a280-acf620b68525",
|
1274
|
+
"metadata": {},
|
1275
|
+
"outputs": [
|
1276
|
+
{
|
1277
|
+
"data": {
|
1278
|
+
"text/plain": [
|
1279
|
+
"#<RedAmber::Vector(:uint8, size=5):0x000000000000f910>\n",
|
1280
|
+
"[1, 2, 3, 4, 5]\n"
|
1281
|
+
]
|
1282
|
+
},
|
1283
|
+
"execution_count": 36,
|
1284
|
+
"metadata": {},
|
1285
|
+
"output_type": "execute_result"
|
1286
|
+
}
|
1287
|
+
],
|
1288
|
+
"source": [
|
1289
|
+
"df[:x] # This syntax comes later"
|
1290
|
+
]
|
1291
|
+
},
|
1292
|
+
{
|
1293
|
+
"cell_type": "markdown",
|
1294
|
+
"id": "3e13d06d-b432-45b2-9745-0c6ef9228e23",
|
1295
|
+
"metadata": {},
|
1296
|
+
"source": [
|
1297
|
+
"Or create new Vector by the constructor."
|
1298
|
+
]
|
1299
|
+
},
|
1300
|
+
{
|
1301
|
+
"cell_type": "code",
|
1302
|
+
"execution_count": 37,
|
1303
|
+
"id": "3e18a4e0-238c-4800-8bda-a88a57dde3e9",
|
1304
|
+
"metadata": {},
|
1305
|
+
"outputs": [
|
1306
|
+
{
|
1307
|
+
"data": {
|
1308
|
+
"text/plain": [
|
1309
|
+
"#<RedAmber::Vector(:uint8, size=5):0x000000000000f924>\n",
|
1310
|
+
"[1, 2, 3, 4, 5]\n"
|
1311
|
+
]
|
1312
|
+
},
|
1313
|
+
"execution_count": 37,
|
1314
|
+
"metadata": {},
|
1315
|
+
"output_type": "execute_result"
|
1316
|
+
}
|
1317
|
+
],
|
1318
|
+
"source": [
|
1319
|
+
"Vector.new(1, 2, 3, 4, 5)"
|
1320
|
+
]
|
1321
|
+
},
|
1322
|
+
{
|
1323
|
+
"cell_type": "code",
|
1324
|
+
"execution_count": 38,
|
1325
|
+
"id": "3bd55d9d-b988-46b2-bc11-e3dc5f4adc6c",
|
1326
|
+
"metadata": {},
|
1327
|
+
"outputs": [
|
1328
|
+
{
|
1329
|
+
"data": {
|
1330
|
+
"text/plain": [
|
1331
|
+
"#<RedAmber::Vector(:uint8, size=5):0x000000000000f938>\n",
|
1332
|
+
"[1, 2, 3, 4, 5]\n"
|
1333
|
+
]
|
1334
|
+
},
|
1335
|
+
"execution_count": 38,
|
1336
|
+
"metadata": {},
|
1337
|
+
"output_type": "execute_result"
|
1338
|
+
}
|
1339
|
+
],
|
1340
|
+
"source": [
|
1341
|
+
"Vector.new(1..5)"
|
1342
|
+
]
|
1343
|
+
},
|
1344
|
+
{
|
1345
|
+
"cell_type": "code",
|
1346
|
+
"execution_count": 39,
|
1347
|
+
"id": "19688e6e-b59b-4a84-8c07-57e87cd0e242",
|
1348
|
+
"metadata": {},
|
1349
|
+
"outputs": [
|
1350
|
+
{
|
1351
|
+
"data": {
|
1352
|
+
"text/plain": [
|
1353
|
+
"#<RedAmber::Vector(:uint8, size=5):0x000000000000f94c>\n",
|
1354
|
+
"[1, 2, 3, 4, 5]\n"
|
1355
|
+
]
|
1356
|
+
},
|
1357
|
+
"execution_count": 39,
|
1358
|
+
"metadata": {},
|
1359
|
+
"output_type": "execute_result"
|
1360
|
+
}
|
1361
|
+
],
|
1362
|
+
"source": [
|
1363
|
+
"Vector.new([1, 2, 3], [4, 5])"
|
1364
|
+
]
|
1365
|
+
},
|
1366
|
+
{
|
1367
|
+
"cell_type": "code",
|
1368
|
+
"execution_count": 40,
|
1369
|
+
"id": "076bd0e2-01ab-4497-9b9b-84f72a4805bc",
|
1370
|
+
"metadata": {},
|
1371
|
+
"outputs": [
|
1372
|
+
{
|
1373
|
+
"data": {
|
1374
|
+
"text/plain": [
|
1375
|
+
"#<RedAmber::Vector(:uint8, size=5):0x000000000000f960>\n",
|
1376
|
+
"[1, 2, 3, 4, 5]\n"
|
1377
|
+
]
|
1378
|
+
},
|
1379
|
+
"execution_count": 40,
|
1380
|
+
"metadata": {},
|
1381
|
+
"output_type": "execute_result"
|
1382
|
+
}
|
1383
|
+
],
|
1384
|
+
"source": [
|
1385
|
+
"array = Arrow::Array.new([1, 2, 3, 4, 5])\n",
|
1386
|
+
"Vector.new(array)"
|
1387
|
+
]
|
1388
|
+
},
|
1389
|
+
{
|
1390
|
+
"cell_type": "markdown",
|
1391
|
+
"id": "22091661-e78a-4c66-9e48-4c3c676469b4",
|
1392
|
+
"metadata": {},
|
1393
|
+
"source": [
|
1394
|
+
"- TODO: `Vector[1..5]` as a constructor"
|
1395
|
+
]
|
1396
|
+
},
|
1397
|
+
{
|
1398
|
+
"cell_type": "markdown",
|
1399
|
+
"id": "b729bdba-87a2-4282-bd0e-319fe17f42da",
|
1400
|
+
"metadata": {},
|
1401
|
+
"source": [
|
1402
|
+
"## 18. Vectors"
|
1403
|
+
]
|
1404
|
+
},
|
1405
|
+
{
|
1406
|
+
"cell_type": "markdown",
|
1407
|
+
"id": "f5ddd840-2f84-467b-a9bb-feb769573b69",
|
1408
|
+
"metadata": {},
|
1409
|
+
"source": [
|
1410
|
+
"Returns an Array of Vectors in a DataFrame."
|
1411
|
+
]
|
1412
|
+
},
|
1413
|
+
{
|
1414
|
+
"cell_type": "code",
|
1415
|
+
"execution_count": 41,
|
1416
|
+
"id": "d3ae03f2-e2fe-4a15-abe1-331185448d61",
|
1417
|
+
"metadata": {},
|
1418
|
+
"outputs": [
|
1419
|
+
{
|
1420
|
+
"data": {
|
1421
|
+
"text/plain": [
|
1422
|
+
"[#<RedAmber::Vector(:uint8, size=5):0x000000000000f910>\n",
|
1423
|
+
"[1, 2, 3, 4, 5]\n",
|
1424
|
+
", #<RedAmber::Vector(:double, size=5):0x000000000000f974>\n",
|
1425
|
+
"[1.0, 2.0, 3.0, NaN, nil]\n",
|
1426
|
+
", #<RedAmber::Vector(:string, size=5):0x000000000000f988>\n",
|
1427
|
+
"[\"A\", \"B\", \"C\", \"D\", nil]\n",
|
1428
|
+
", #<RedAmber::Vector(:boolean, size=5):0x000000000000f99c>\n",
|
1429
|
+
"[true, false, true, false, nil]\n",
|
1430
|
+
"]"
|
1431
|
+
]
|
1432
|
+
},
|
1433
|
+
"execution_count": 41,
|
1434
|
+
"metadata": {},
|
1435
|
+
"output_type": "execute_result"
|
1436
|
+
}
|
1437
|
+
],
|
1438
|
+
"source": [
|
1439
|
+
"df.vectors"
|
1440
|
+
]
|
1441
|
+
},
|
1442
|
+
{
|
1443
|
+
"cell_type": "markdown",
|
1444
|
+
"id": "8ac88ff3-0cb6-43d6-a999-0c2e8c6defb7",
|
1445
|
+
"metadata": {
|
1446
|
+
"tags": []
|
1447
|
+
},
|
1448
|
+
"source": [
|
1449
|
+
"## 19. Variables\n",
|
1450
|
+
"\n",
|
1451
|
+
"Returns key and Vector pairs in a Hash."
|
1452
|
+
]
|
1453
|
+
},
|
1454
|
+
{
|
1455
|
+
"cell_type": "code",
|
1456
|
+
"execution_count": 42,
|
1457
|
+
"id": "3351a216-6fe5-485e-8686-53c1e754fa2e",
|
1458
|
+
"metadata": {},
|
1459
|
+
"outputs": [
|
1460
|
+
{
|
1461
|
+
"data": {
|
1462
|
+
"text/plain": [
|
1463
|
+
"{:x=>#<RedAmber::Vector(:uint8, size=5):0x000000000000f910>\n",
|
1464
|
+
"[1, 2, 3, 4, 5]\n",
|
1465
|
+
", :y=>#<RedAmber::Vector(:double, size=5):0x000000000000f974>\n",
|
1466
|
+
"[1.0, 2.0, 3.0, NaN, nil]\n",
|
1467
|
+
", :s=>#<RedAmber::Vector(:string, size=5):0x000000000000f988>\n",
|
1468
|
+
"[\"A\", \"B\", \"C\", \"D\", nil]\n",
|
1469
|
+
", :b=>#<RedAmber::Vector(:boolean, size=5):0x000000000000f99c>\n",
|
1470
|
+
"[true, false, true, false, nil]\n",
|
1471
|
+
"}"
|
1472
|
+
]
|
1473
|
+
},
|
1474
|
+
"execution_count": 42,
|
1475
|
+
"metadata": {},
|
1476
|
+
"output_type": "execute_result"
|
1477
|
+
}
|
1478
|
+
],
|
1479
|
+
"source": [
|
1480
|
+
"df.variables"
|
1481
|
+
]
|
1482
|
+
},
|
1483
|
+
{
|
1484
|
+
"cell_type": "markdown",
|
1485
|
+
"id": "3b518c1c-eda7-406f-a885-b2344b1726eb",
|
1486
|
+
"metadata": {},
|
1487
|
+
"source": [
|
1488
|
+
"## 20. Select columns by #[ ]"
|
1489
|
+
]
|
1490
|
+
},
|
1491
|
+
{
|
1492
|
+
"cell_type": "markdown",
|
1493
|
+
"id": "767b4e49-19eb-4d5f-b030-91bd78f0f5b9",
|
1494
|
+
"metadata": {},
|
1495
|
+
"source": [
|
1496
|
+
"`DataFrame#[]` is overloading column operations and row operations.\n",
|
1497
|
+
"\n",
|
1498
|
+
"- For columns (variables)\n",
|
1499
|
+
" - Key in a Symbol: `df[:symbol]`\n",
|
1500
|
+
" - Key in a String: `df[\"string\"]`\n",
|
1501
|
+
" - Keys in an Array: `df[:symbol1, \"string\", :symbol2]`\n",
|
1502
|
+
" - Keys by indeces: `df[df.keys[0]`, `df[df.keys[1,2]]`, `df[df.keys[1..]]`"
|
1503
|
+
]
|
1504
|
+
},
|
1505
|
+
{
|
1506
|
+
"cell_type": "code",
|
1507
|
+
"execution_count": 43,
|
1508
|
+
"id": "ccf60edc-cccf-49e3-a503-1ca532247130",
|
1509
|
+
"metadata": {},
|
1510
|
+
"outputs": [
|
1511
|
+
{
|
1512
|
+
"data": {
|
1513
|
+
"text/html": [
|
1514
|
+
"RedAmber::DataFrame <5 x 2 vectors> <table><tr><th>x</th><th>y</th></tr><tr><td>1</td><td>1.0</td></tr><tr><td>2</td><td>2.0</td></tr><tr><td>3</td><td>3.0</td></tr><tr><td>4</td><td>NaN</td></tr><tr><td>5</td><td><i>(nil)</i></td></tr></table>"
|
1515
|
+
],
|
1516
|
+
"text/plain": [
|
1517
|
+
"#<RedAmber::DataFrame : 5 x 2 Vectors, 0x000000000000f9b0>\n",
|
1518
|
+
" x y\n",
|
1519
|
+
" <uint8> <double>\n",
|
1520
|
+
"1 1 1.0\n",
|
1521
|
+
"2 2 2.0\n",
|
1522
|
+
"3 3 3.0\n",
|
1523
|
+
"4 4 NaN\n",
|
1524
|
+
"5 5 (nil)\n"
|
1525
|
+
]
|
1526
|
+
},
|
1527
|
+
"execution_count": 43,
|
1528
|
+
"metadata": {},
|
1529
|
+
"output_type": "execute_result"
|
1530
|
+
}
|
1531
|
+
],
|
1532
|
+
"source": [
|
1533
|
+
"# Keys in a Symbol and a String\n",
|
1534
|
+
"df[:x, 'y']"
|
1535
|
+
]
|
1536
|
+
},
|
1537
|
+
{
|
1538
|
+
"cell_type": "code",
|
1539
|
+
"execution_count": 44,
|
1540
|
+
"id": "8500f8c0-ff5a-4537-9f47-03d675e31b18",
|
1541
|
+
"metadata": {},
|
1542
|
+
"outputs": [
|
1543
|
+
{
|
1544
|
+
"data": {
|
1545
|
+
"text/html": [
|
1546
|
+
"RedAmber::DataFrame <5 x 2 vectors> <table><tr><th>x</th><th>y</th></tr><tr><td>1</td><td>1.0</td></tr><tr><td>2</td><td>2.0</td></tr><tr><td>3</td><td>3.0</td></tr><tr><td>4</td><td>NaN</td></tr><tr><td>5</td><td><i>(nil)</i></td></tr></table>"
|
1547
|
+
],
|
1548
|
+
"text/plain": [
|
1549
|
+
"#<RedAmber::DataFrame : 5 x 2 Vectors, 0x000000000000f9c4>\n",
|
1550
|
+
" x y\n",
|
1551
|
+
" <uint8> <double>\n",
|
1552
|
+
"1 1 1.0\n",
|
1553
|
+
"2 2 2.0\n",
|
1554
|
+
"3 3 3.0\n",
|
1555
|
+
"4 4 NaN\n",
|
1556
|
+
"5 5 (nil)\n"
|
1557
|
+
]
|
1558
|
+
},
|
1559
|
+
"execution_count": 44,
|
1560
|
+
"metadata": {},
|
1561
|
+
"output_type": "execute_result"
|
1562
|
+
}
|
1563
|
+
],
|
1564
|
+
"source": [
|
1565
|
+
"# Keys in a Range\n",
|
1566
|
+
"df['x'..'y']"
|
1567
|
+
]
|
1568
|
+
},
|
1569
|
+
{
|
1570
|
+
"cell_type": "code",
|
1571
|
+
"execution_count": 45,
|
1572
|
+
"id": "db35cae1-35c2-47de-a7e8-906161f21282",
|
1573
|
+
"metadata": {},
|
1574
|
+
"outputs": [
|
1575
|
+
{
|
1576
|
+
"data": {
|
1577
|
+
"text/html": [
|
1578
|
+
"RedAmber::DataFrame <5 x 3 vectors> <table><tr><th>s</th><th>b</th><th>x</th></tr><tr><td>A</td><td>true</td><td>1</td></tr><tr><td>B</td><td>false</td><td>2</td></tr><tr><td>C</td><td>true</td><td>3</td></tr><tr><td>D</td><td>false</td><td>4</td></tr><tr><td><i>(nil)</i></td><td><i>(nil)</i></td><td>5</td></tr></table>"
|
1579
|
+
],
|
1580
|
+
"text/plain": [
|
1581
|
+
"#<RedAmber::DataFrame : 5 x 3 Vectors, 0x000000000000f9d8>\n",
|
1582
|
+
" s b x\n",
|
1583
|
+
" <string> <boolean> <uint8>\n",
|
1584
|
+
"1 A true 1\n",
|
1585
|
+
"2 B false 2\n",
|
1586
|
+
"3 C true 3\n",
|
1587
|
+
"4 D false 4\n",
|
1588
|
+
"5 (nil) (nil) 5\n"
|
1589
|
+
]
|
1590
|
+
},
|
1591
|
+
"execution_count": 45,
|
1592
|
+
"metadata": {},
|
1593
|
+
"output_type": "execute_result"
|
1594
|
+
}
|
1595
|
+
],
|
1596
|
+
"source": [
|
1597
|
+
"# Keys with a index Range, and a symbol\n",
|
1598
|
+
"df[df.keys[2..], :x]"
|
1599
|
+
]
|
1600
|
+
},
|
1601
|
+
{
|
1602
|
+
"cell_type": "markdown",
|
1603
|
+
"id": "03e14403-f7bc-4350-9e7b-715901164331",
|
1604
|
+
"metadata": {},
|
1605
|
+
"source": [
|
1606
|
+
"## 21. Select rows by #[ ]\n",
|
1607
|
+
"`DataFrame#[]` is overloading column operations and row operations.\n",
|
1608
|
+
"\n",
|
1609
|
+
"- For rows (observations)\n",
|
1610
|
+
" - Select rows by a Index: `df[index]`\n",
|
1611
|
+
" - Select rows by Indices: `df[indices]` # Array, Arrow::Array, Vectors are acceptable for indices\n",
|
1612
|
+
" - Select rows by Ranges: `df[range]`\n",
|
1613
|
+
" - Select rows by Booleans: `df[booleans]` # Array, Arrow::Array, Vectors are acceptable for booleans"
|
1614
|
+
]
|
1615
|
+
},
|
1616
|
+
{
|
1617
|
+
"cell_type": "code",
|
1618
|
+
"execution_count": 46,
|
1619
|
+
"id": "e3bc60a7-611e-4fd8-9770-8e0d167d3fee",
|
1620
|
+
"metadata": {},
|
1621
|
+
"outputs": [
|
1622
|
+
{
|
1623
|
+
"data": {
|
1624
|
+
"text/html": [
|
1625
|
+
"RedAmber::DataFrame <3 x 4 vectors> <table><tr><th>x</th><th>y</th><th>s</th><th>b</th></tr><tr><td>1</td><td>1.0</td><td>A</td><td>true</td></tr><tr><td>3</td><td>3.0</td><td>C</td><td>true</td></tr><tr><td>2</td><td>2.0</td><td>B</td><td>false</td></tr></table>"
|
1626
|
+
],
|
1627
|
+
"text/plain": [
|
1628
|
+
"#<RedAmber::DataFrame : 3 x 4 Vectors, 0x000000000000f9ec>\n",
|
1629
|
+
" x y s b\n",
|
1630
|
+
" <uint8> <double> <string> <boolean>\n",
|
1631
|
+
"1 1 1.0 A true\n",
|
1632
|
+
"2 3 3.0 C true\n",
|
1633
|
+
"3 2 2.0 B false\n"
|
1634
|
+
]
|
1635
|
+
},
|
1636
|
+
"execution_count": 46,
|
1637
|
+
"metadata": {},
|
1638
|
+
"output_type": "execute_result"
|
1639
|
+
}
|
1640
|
+
],
|
1641
|
+
"source": [
|
1642
|
+
"# indices\n",
|
1643
|
+
"df[0, 2, 1]"
|
1644
|
+
]
|
1645
|
+
},
|
1646
|
+
{
|
1647
|
+
"cell_type": "code",
|
1648
|
+
"execution_count": 47,
|
1649
|
+
"id": "2b8b3801-ae37-4629-9db5-ff937941c895",
|
1650
|
+
"metadata": {},
|
1651
|
+
"outputs": [
|
1652
|
+
{
|
1653
|
+
"data": {
|
1654
|
+
"text/html": [
|
1655
|
+
"RedAmber::DataFrame <3 x 4 vectors> <table><tr><th>x</th><th>y</th><th>s</th><th>b</th></tr><tr><td>2</td><td>2.0</td><td>B</td><td>false</td></tr><tr><td>3</td><td>3.0</td><td>C</td><td>true</td></tr><tr><td>5</td><td><i>(nil)</i></td><td><i>(nil)</i></td><td><i>(nil)</i></td></tr></table>"
|
1656
|
+
],
|
1657
|
+
"text/plain": [
|
1658
|
+
"#<RedAmber::DataFrame : 3 x 4 Vectors, 0x000000000000fa00>\n",
|
1659
|
+
" x y s b\n",
|
1660
|
+
" <uint8> <double> <string> <boolean>\n",
|
1661
|
+
"1 2 2.0 B false\n",
|
1662
|
+
"2 3 3.0 C true\n",
|
1663
|
+
"3 5 (nil) (nil) (nil)\n"
|
1664
|
+
]
|
1665
|
+
},
|
1666
|
+
"execution_count": 47,
|
1667
|
+
"metadata": {},
|
1668
|
+
"output_type": "execute_result"
|
1669
|
+
}
|
1670
|
+
],
|
1671
|
+
"source": [
|
1672
|
+
"# including a Range\n",
|
1673
|
+
"# negative indices are also acceptable\n",
|
1674
|
+
"df[1..2, -1]"
|
1675
|
+
]
|
1676
|
+
},
|
1677
|
+
{
|
1678
|
+
"cell_type": "code",
|
1679
|
+
"execution_count": 48,
|
1680
|
+
"id": "3f6f8d73-a66c-4773-9bf5-0878c700f2d6",
|
1681
|
+
"metadata": {},
|
1682
|
+
"outputs": [
|
1683
|
+
{
|
1684
|
+
"data": {
|
1685
|
+
"text/html": [
|
1686
|
+
"RedAmber::DataFrame <3 x 4 vectors> <table><tr><th>x</th><th>y</th><th>s</th><th>b</th></tr><tr><td>2</td><td>2.0</td><td>B</td><td>false</td></tr><tr><td>3</td><td>3.0</td><td>C</td><td>true</td></tr><tr><td>5</td><td><i>(nil)</i></td><td><i>(nil)</i></td><td><i>(nil)</i></td></tr></table>"
|
1687
|
+
],
|
1688
|
+
"text/plain": [
|
1689
|
+
"#<RedAmber::DataFrame : 3 x 4 Vectors, 0x000000000000fa14>\n",
|
1690
|
+
" x y s b\n",
|
1691
|
+
" <uint8> <double> <string> <boolean>\n",
|
1692
|
+
"1 2 2.0 B false\n",
|
1693
|
+
"2 3 3.0 C true\n",
|
1694
|
+
"3 5 (nil) (nil) (nil)\n"
|
1695
|
+
]
|
1696
|
+
},
|
1697
|
+
"execution_count": 48,
|
1698
|
+
"metadata": {},
|
1699
|
+
"output_type": "execute_result"
|
1700
|
+
}
|
1701
|
+
],
|
1702
|
+
"source": [
|
1703
|
+
"# booleans\n",
|
1704
|
+
"# length of boolean should be the same as self\n",
|
1705
|
+
"df[false, true, true, false, true]"
|
1706
|
+
]
|
1707
|
+
},
|
1708
|
+
{
|
1709
|
+
"cell_type": "code",
|
1710
|
+
"execution_count": 49,
|
1711
|
+
"id": "abe57279-54fd-48ec-a1a4-c7453211e776",
|
1712
|
+
"metadata": {},
|
1713
|
+
"outputs": [
|
1714
|
+
{
|
1715
|
+
"data": {
|
1716
|
+
"text/html": [
|
1717
|
+
"RedAmber::DataFrame <3 x 4 vectors> <table><tr><th>x</th><th>y</th><th>s</th><th>b</th></tr><tr><td>1</td><td>1.0</td><td>A</td><td>true</td></tr><tr><td>3</td><td>3.0</td><td>C</td><td>true</td></tr><tr><td>5</td><td><i>(nil)</i></td><td><i>(nil)</i></td><td><i>(nil)</i></td></tr></table>"
|
1718
|
+
],
|
1719
|
+
"text/plain": [
|
1720
|
+
"#<RedAmber::DataFrame : 3 x 4 Vectors, 0x000000000000fa28>\n",
|
1721
|
+
" x y s b\n",
|
1722
|
+
" <uint8> <double> <string> <boolean>\n",
|
1723
|
+
"1 1 1.0 A true\n",
|
1724
|
+
"2 3 3.0 C true\n",
|
1725
|
+
"3 5 (nil) (nil) (nil)\n"
|
1726
|
+
]
|
1727
|
+
},
|
1728
|
+
"execution_count": 49,
|
1729
|
+
"metadata": {},
|
1730
|
+
"output_type": "execute_result"
|
1731
|
+
}
|
1732
|
+
],
|
1733
|
+
"source": [
|
1734
|
+
"# Arrow::Array\n",
|
1735
|
+
"indices = Arrow::UInt8Array.new([0,2,4])\n",
|
1736
|
+
"df[indices]"
|
1737
|
+
]
|
1738
|
+
},
|
1739
|
+
{
|
1740
|
+
"cell_type": "code",
|
1741
|
+
"execution_count": 50,
|
1742
|
+
"id": "2266611f-23d8-4645-a1e8-b07c2370fb3f",
|
1743
|
+
"metadata": {},
|
1744
|
+
"outputs": [
|
1745
|
+
{
|
1746
|
+
"data": {
|
1747
|
+
"text/html": [
|
1748
|
+
"RedAmber::DataFrame <3 x 4 vectors> <table><tr><th>x</th><th>y</th><th>s</th><th>b</th></tr><tr><td>3</td><td>3.0</td><td>C</td><td>true</td></tr><tr><td>4</td><td>NaN</td><td>D</td><td>false</td></tr><tr><td>5</td><td><i>(nil)</i></td><td><i>(nil)</i></td><td><i>(nil)</i></td></tr></table>"
|
1749
|
+
],
|
1750
|
+
"text/plain": [
|
1751
|
+
"#<RedAmber::DataFrame : 3 x 4 Vectors, 0x000000000000fa3c>\n",
|
1752
|
+
" x y s b\n",
|
1753
|
+
" <uint8> <double> <string> <boolean>\n",
|
1754
|
+
"1 3 3.0 C true\n",
|
1755
|
+
"2 4 NaN D false\n",
|
1756
|
+
"3 5 (nil) (nil) (nil)\n"
|
1757
|
+
]
|
1758
|
+
},
|
1759
|
+
"execution_count": 50,
|
1760
|
+
"metadata": {},
|
1761
|
+
"output_type": "execute_result"
|
1762
|
+
}
|
1763
|
+
],
|
1764
|
+
"source": [
|
1765
|
+
"# By a Vector as indices\n",
|
1766
|
+
"indices = Vector.new(df.indices)\n",
|
1767
|
+
"# indices > 1 returns a boolean Vector\n",
|
1768
|
+
"df[indices > 1]"
|
1769
|
+
]
|
1770
|
+
},
|
1771
|
+
{
|
1772
|
+
"cell_type": "code",
|
1773
|
+
"execution_count": 51,
|
1774
|
+
"id": "0ea2da7e-aeca-4874-be4a-6af563aa378b",
|
1775
|
+
"metadata": {},
|
1776
|
+
"outputs": [
|
1777
|
+
{
|
1778
|
+
"data": {
|
1779
|
+
"text/plain": [
|
1780
|
+
"#<RedAmber::Vector(:boolean, size=5):0x000000000000f99c>\n",
|
1781
|
+
"[true, false, true, false, nil]\n"
|
1782
|
+
]
|
1783
|
+
},
|
1784
|
+
"execution_count": 51,
|
1785
|
+
"metadata": {},
|
1786
|
+
"output_type": "execute_result"
|
1787
|
+
}
|
1788
|
+
],
|
1789
|
+
"source": [
|
1790
|
+
"# By a Vector as booleans\n",
|
1791
|
+
"booleans = df[:b]"
|
1792
|
+
]
|
1793
|
+
},
|
1794
|
+
{
|
1795
|
+
"cell_type": "code",
|
1796
|
+
"execution_count": 52,
|
1797
|
+
"id": "9f842890-6359-4266-9a23-2f8f813ef548",
|
1798
|
+
"metadata": {},
|
1799
|
+
"outputs": [
|
1800
|
+
{
|
1801
|
+
"data": {
|
1802
|
+
"text/html": [
|
1803
|
+
"RedAmber::DataFrame <2 x 4 vectors> <table><tr><th>x</th><th>y</th><th>s</th><th>b</th></tr><tr><td>1</td><td>1.0</td><td>A</td><td>true</td></tr><tr><td>3</td><td>3.0</td><td>C</td><td>true</td></tr></table>"
|
1804
|
+
],
|
1805
|
+
"text/plain": [
|
1806
|
+
"#<RedAmber::DataFrame : 2 x 4 Vectors, 0x000000000000fa50>\n",
|
1807
|
+
" x y s b\n",
|
1808
|
+
" <uint8> <double> <string> <boolean>\n",
|
1809
|
+
"1 1 1.0 A true\n",
|
1810
|
+
"2 3 3.0 C true\n"
|
1811
|
+
]
|
1812
|
+
},
|
1813
|
+
"execution_count": 52,
|
1814
|
+
"metadata": {},
|
1815
|
+
"output_type": "execute_result"
|
1816
|
+
}
|
1817
|
+
],
|
1818
|
+
"source": [
|
1819
|
+
"df[booleans]"
|
1820
|
+
]
|
1821
|
+
},
|
1822
|
+
{
|
1823
|
+
"cell_type": "markdown",
|
1824
|
+
"id": "98a04874-cb2c-44c0-b410-b330b9d12b0f",
|
1825
|
+
"metadata": {},
|
1826
|
+
"source": [
|
1827
|
+
"## 22. empty?"
|
1828
|
+
]
|
1829
|
+
},
|
1830
|
+
{
|
1831
|
+
"cell_type": "code",
|
1832
|
+
"execution_count": 53,
|
1833
|
+
"id": "7b1ab319-90a7-4f09-8629-04dcd94076cb",
|
1834
|
+
"metadata": {},
|
1835
|
+
"outputs": [
|
1836
|
+
{
|
1837
|
+
"data": {
|
1838
|
+
"text/plain": [
|
1839
|
+
"false"
|
1840
|
+
]
|
1841
|
+
},
|
1842
|
+
"execution_count": 53,
|
1843
|
+
"metadata": {},
|
1844
|
+
"output_type": "execute_result"
|
1845
|
+
}
|
1846
|
+
],
|
1847
|
+
"source": [
|
1848
|
+
"df.empty?"
|
1849
|
+
]
|
1850
|
+
},
|
1851
|
+
{
|
1852
|
+
"cell_type": "code",
|
1853
|
+
"execution_count": 54,
|
1854
|
+
"id": "1e09c32f-20a8-4175-827f-cdb98063535a",
|
1855
|
+
"metadata": {},
|
1856
|
+
"outputs": [
|
1857
|
+
{
|
1858
|
+
"data": {
|
1859
|
+
"text/plain": [
|
1860
|
+
"true"
|
1861
|
+
]
|
1862
|
+
},
|
1863
|
+
"execution_count": 54,
|
1864
|
+
"metadata": {},
|
1865
|
+
"output_type": "execute_result"
|
1866
|
+
}
|
1867
|
+
],
|
1868
|
+
"source": [
|
1869
|
+
"DataFrame.new.empty?"
|
1870
|
+
]
|
1871
|
+
},
|
1872
|
+
{
|
1873
|
+
"cell_type": "code",
|
1874
|
+
"execution_count": 55,
|
1875
|
+
"id": "3f9f8771-87dd-44eb-8aac-6a3ed8b4c183",
|
1876
|
+
"metadata": {},
|
1877
|
+
"outputs": [
|
1878
|
+
{
|
1879
|
+
"data": {
|
1880
|
+
"text/plain": [
|
1881
|
+
"(empty DataFrame)"
|
1882
|
+
]
|
1883
|
+
},
|
1884
|
+
"execution_count": 55,
|
1885
|
+
"metadata": {},
|
1886
|
+
"output_type": "execute_result"
|
1887
|
+
}
|
1888
|
+
],
|
1889
|
+
"source": [
|
1890
|
+
"DataFrame.new"
|
1891
|
+
]
|
1892
|
+
},
|
1893
|
+
{
|
1894
|
+
"cell_type": "markdown",
|
1895
|
+
"id": "86b826dd-10e6-4087-9162-b89ac6561a61",
|
1896
|
+
"metadata": {},
|
1897
|
+
"source": [
|
1898
|
+
"## 23. Select columns by pick"
|
1899
|
+
]
|
1900
|
+
},
|
1901
|
+
{
|
1902
|
+
"cell_type": "markdown",
|
1903
|
+
"id": "b5aefd22-4e96-4dc5-91d2-e6826256bda6",
|
1904
|
+
"metadata": {
|
1905
|
+
"tags": []
|
1906
|
+
},
|
1907
|
+
"source": [
|
1908
|
+
"`DataFrame#pick` accepts an Array of keys to pick up columns (variables). You can change the order of columns at a same time."
|
1909
|
+
]
|
1910
|
+
},
|
1911
|
+
{
|
1912
|
+
"cell_type": "code",
|
1913
|
+
"execution_count": 56,
|
1914
|
+
"id": "68124521-b823-424d-9e06-d11aa927d618",
|
1915
|
+
"metadata": {
|
1916
|
+
"tags": []
|
1917
|
+
},
|
1918
|
+
"outputs": [
|
1919
|
+
{
|
1920
|
+
"data": {
|
1921
|
+
"text/html": [
|
1922
|
+
"RedAmber::DataFrame <5 x 2 vectors> <table><tr><th>s</th><th>y</th></tr><tr><td>A</td><td>1.0</td></tr><tr><td>B</td><td>2.0</td></tr><tr><td>C</td><td>3.0</td></tr><tr><td>D</td><td>NaN</td></tr><tr><td><i>(nil)</i></td><td><i>(nil)</i></td></tr></table>"
|
1923
|
+
],
|
1924
|
+
"text/plain": [
|
1925
|
+
"#<RedAmber::DataFrame : 5 x 2 Vectors, 0x000000000000fa64>\n",
|
1926
|
+
" s y\n",
|
1927
|
+
" <string> <double>\n",
|
1928
|
+
"1 A 1.0\n",
|
1929
|
+
"2 B 2.0\n",
|
1930
|
+
"3 C 3.0\n",
|
1931
|
+
"4 D NaN\n",
|
1932
|
+
"5 (nil) (nil)\n"
|
1933
|
+
]
|
1934
|
+
},
|
1935
|
+
"execution_count": 56,
|
1936
|
+
"metadata": {},
|
1937
|
+
"output_type": "execute_result"
|
1938
|
+
}
|
1939
|
+
],
|
1940
|
+
"source": [
|
1941
|
+
"df.pick(:s, :y)\n",
|
1942
|
+
"# or\n",
|
1943
|
+
"df.pick([:s, :y]) # OK too."
|
1944
|
+
]
|
1945
|
+
},
|
1946
|
+
{
|
1947
|
+
"cell_type": "markdown",
|
1948
|
+
"id": "a76dca00-da8f-4959-be18-7a1015a9d13c",
|
1949
|
+
"metadata": {},
|
1950
|
+
"source": [
|
1951
|
+
"Or use a boolean Array of lengeh `n_key` to `pick`. This style remains the order of variables."
|
1952
|
+
]
|
1953
|
+
},
|
1954
|
+
{
|
1955
|
+
"cell_type": "code",
|
1956
|
+
"execution_count": 57,
|
1957
|
+
"id": "b91f8925-529c-43c9-93ba-e21bcac0f2f7",
|
1958
|
+
"metadata": {},
|
1959
|
+
"outputs": [
|
1960
|
+
{
|
1961
|
+
"data": {
|
1962
|
+
"text/html": [
|
1963
|
+
"RedAmber::DataFrame <5 x 2 vectors> <table><tr><th>y</th><th>s</th></tr><tr><td>1.0</td><td>A</td></tr><tr><td>2.0</td><td>B</td></tr><tr><td>3.0</td><td>C</td></tr><tr><td>NaN</td><td>D</td></tr><tr><td><i>(nil)</i></td><td><i>(nil)</i></td></tr></table>"
|
1964
|
+
],
|
1965
|
+
"text/plain": [
|
1966
|
+
"#<RedAmber::DataFrame : 5 x 2 Vectors, 0x000000000000fa78>\n",
|
1967
|
+
" y s\n",
|
1968
|
+
" <double> <string>\n",
|
1969
|
+
"1 1.0 A\n",
|
1970
|
+
"2 2.0 B\n",
|
1971
|
+
"3 3.0 C\n",
|
1972
|
+
"4 NaN D\n",
|
1973
|
+
"5 (nil) (nil)\n"
|
1974
|
+
]
|
1975
|
+
},
|
1976
|
+
"execution_count": 57,
|
1977
|
+
"metadata": {},
|
1978
|
+
"output_type": "execute_result"
|
1979
|
+
}
|
1980
|
+
],
|
1981
|
+
"source": [
|
1982
|
+
"df.pick(false, true, true, false)\n",
|
1983
|
+
"# or\n",
|
1984
|
+
"df.pick([false, true, true, false]) # OK"
|
1985
|
+
]
|
1986
|
+
},
|
1987
|
+
{
|
1988
|
+
"cell_type": "markdown",
|
1989
|
+
"id": "5f903182-745b-4923-99d8-14a9b9c6ea4c",
|
1990
|
+
"metadata": {},
|
1991
|
+
"source": [
|
1992
|
+
"`#pick` also accepts a block in the context of self.\n",
|
1993
|
+
"\n",
|
1994
|
+
"Next example is picking up numeric variables."
|
1995
|
+
]
|
1996
|
+
},
|
1997
|
+
{
|
1998
|
+
"cell_type": "code",
|
1999
|
+
"execution_count": 58,
|
2000
|
+
"id": "37bb0a49-c38a-484c-91d4-3e23ab43a727",
|
2001
|
+
"metadata": {},
|
2002
|
+
"outputs": [
|
2003
|
+
{
|
2004
|
+
"data": {
|
2005
|
+
"text/html": [
|
2006
|
+
"RedAmber::DataFrame <5 x 2 vectors> <table><tr><th>x</th><th>y</th></tr><tr><td>1</td><td>1.0</td></tr><tr><td>2</td><td>2.0</td></tr><tr><td>3</td><td>3.0</td></tr><tr><td>4</td><td>NaN</td></tr><tr><td>5</td><td><i>(nil)</i></td></tr></table>"
|
2007
|
+
],
|
2008
|
+
"text/plain": [
|
2009
|
+
"#<RedAmber::DataFrame : 5 x 2 Vectors, 0x000000000000fa8c>\n",
|
2010
|
+
" x y\n",
|
2011
|
+
" <uint8> <double>\n",
|
2012
|
+
"1 1 1.0\n",
|
2013
|
+
"2 2 2.0\n",
|
2014
|
+
"3 3 3.0\n",
|
2015
|
+
"4 4 NaN\n",
|
2016
|
+
"5 5 (nil)\n"
|
2017
|
+
]
|
2018
|
+
},
|
2019
|
+
"execution_count": 58,
|
2020
|
+
"metadata": {},
|
2021
|
+
"output_type": "execute_result"
|
2022
|
+
}
|
2023
|
+
],
|
2024
|
+
"source": [
|
2025
|
+
"# reciever is required with the argument style\n",
|
2026
|
+
"df.pick(df.vectors.map(&:numeric?))\n",
|
2027
|
+
"\n",
|
2028
|
+
"# with a block\n",
|
2029
|
+
"df.pick { vectors.map(&:numeric?) }"
|
2030
|
+
]
|
2031
|
+
},
|
2032
|
+
{
|
2033
|
+
"cell_type": "markdown",
|
2034
|
+
"id": "e51f07c0-54eb-4114-8cd6-63c7780e7248",
|
2035
|
+
"metadata": {},
|
2036
|
+
"source": [
|
2037
|
+
"The name `pick` comes from the action to pick variables(columns) according to the label keys."
|
2038
|
+
]
|
2039
|
+
},
|
2040
|
+
{
|
2041
|
+
"cell_type": "markdown",
|
2042
|
+
"id": "7c1815e4-de6c-425e-8602-b8dd66836250",
|
2043
|
+
"metadata": {},
|
2044
|
+
"source": [
|
2045
|
+
"## 24. Reject columns by drop"
|
2046
|
+
]
|
2047
|
+
},
|
2048
|
+
{
|
2049
|
+
"cell_type": "markdown",
|
2050
|
+
"id": "d1ab045e-66f9-4922-8bf2-35aee7f2812e",
|
2051
|
+
"metadata": {
|
2052
|
+
"tags": []
|
2053
|
+
},
|
2054
|
+
"source": [
|
2055
|
+
"`DataFrame#drop` accepts an Array keys to drop columns (variables) to create remainer DataFrame."
|
2056
|
+
]
|
2057
|
+
},
|
2058
|
+
{
|
2059
|
+
"cell_type": "code",
|
2060
|
+
"execution_count": 59,
|
2061
|
+
"id": "7ccace08-62b0-4b0b-93fb-81edf673abf7",
|
2062
|
+
"metadata": {},
|
2063
|
+
"outputs": [
|
2064
|
+
{
|
2065
|
+
"data": {
|
2066
|
+
"text/html": [
|
2067
|
+
"RedAmber::DataFrame <5 x 2 vectors> <table><tr><th>y</th><th>s</th></tr><tr><td>1.0</td><td>A</td></tr><tr><td>2.0</td><td>B</td></tr><tr><td>3.0</td><td>C</td></tr><tr><td>NaN</td><td>D</td></tr><tr><td><i>(nil)</i></td><td><i>(nil)</i></td></tr></table>"
|
2068
|
+
],
|
2069
|
+
"text/plain": [
|
2070
|
+
"#<RedAmber::DataFrame : 5 x 2 Vectors, 0x000000000000faa0>\n",
|
2071
|
+
" y s\n",
|
2072
|
+
" <double> <string>\n",
|
2073
|
+
"1 1.0 A\n",
|
2074
|
+
"2 2.0 B\n",
|
2075
|
+
"3 3.0 C\n",
|
2076
|
+
"4 NaN D\n",
|
2077
|
+
"5 (nil) (nil)\n"
|
2078
|
+
]
|
2079
|
+
},
|
2080
|
+
"execution_count": 59,
|
2081
|
+
"metadata": {},
|
2082
|
+
"output_type": "execute_result"
|
2083
|
+
}
|
2084
|
+
],
|
2085
|
+
"source": [
|
2086
|
+
"df.drop(:x, :b)\n",
|
2087
|
+
"# df.drop([:x, :b]) #is OK too."
|
2088
|
+
]
|
2089
|
+
},
|
2090
|
+
{
|
2091
|
+
"cell_type": "markdown",
|
2092
|
+
"id": "2085b349-95c5-4607-b029-f7c3d630ac1c",
|
2093
|
+
"metadata": {},
|
2094
|
+
"source": [
|
2095
|
+
"Or use a boolean Array of lengeh `n_key` to `drop`."
|
2096
|
+
]
|
2097
|
+
},
|
2098
|
+
{
|
2099
|
+
"cell_type": "code",
|
2100
|
+
"execution_count": 60,
|
2101
|
+
"id": "785c02f1-1e16-4722-9961-4b49223c8290",
|
2102
|
+
"metadata": {},
|
2103
|
+
"outputs": [
|
2104
|
+
{
|
2105
|
+
"data": {
|
2106
|
+
"text/html": [
|
2107
|
+
"RedAmber::DataFrame <5 x 2 vectors> <table><tr><th>y</th><th>s</th></tr><tr><td>1.0</td><td>A</td></tr><tr><td>2.0</td><td>B</td></tr><tr><td>3.0</td><td>C</td></tr><tr><td>NaN</td><td>D</td></tr><tr><td><i>(nil)</i></td><td><i>(nil)</i></td></tr></table>"
|
2108
|
+
],
|
2109
|
+
"text/plain": [
|
2110
|
+
"#<RedAmber::DataFrame : 5 x 2 Vectors, 0x000000000000fab4>\n",
|
2111
|
+
" y s\n",
|
2112
|
+
" <double> <string>\n",
|
2113
|
+
"1 1.0 A\n",
|
2114
|
+
"2 2.0 B\n",
|
2115
|
+
"3 3.0 C\n",
|
2116
|
+
"4 NaN D\n",
|
2117
|
+
"5 (nil) (nil)\n"
|
2118
|
+
]
|
2119
|
+
},
|
2120
|
+
"execution_count": 60,
|
2121
|
+
"metadata": {},
|
2122
|
+
"output_type": "execute_result"
|
2123
|
+
}
|
2124
|
+
],
|
2125
|
+
"source": [
|
2126
|
+
"df.drop(true, false, false, true)\n",
|
2127
|
+
"# df.drop([true, false, false, true]) # is OK too"
|
2128
|
+
]
|
2129
|
+
},
|
2130
|
+
{
|
2131
|
+
"cell_type": "markdown",
|
2132
|
+
"id": "d246161e-02cc-40fb-8921-26b37eb3956f",
|
2133
|
+
"metadata": {},
|
2134
|
+
"source": [
|
2135
|
+
"`#drop` also accepts a block in the context of self.\n",
|
2136
|
+
"\n",
|
2137
|
+
"Next example will drop variables which have nil or NaN values."
|
2138
|
+
]
|
2139
|
+
},
|
2140
|
+
{
|
2141
|
+
"cell_type": "code",
|
2142
|
+
"execution_count": 61,
|
2143
|
+
"id": "069932e3-d393-4ede-9eb5-7aac8625e0c0",
|
2144
|
+
"metadata": {},
|
2145
|
+
"outputs": [
|
2146
|
+
{
|
2147
|
+
"data": {
|
2148
|
+
"text/html": [
|
2149
|
+
"RedAmber::DataFrame <5 x 1 vector> <table><tr><th>x</th></tr><tr><td>1</td></tr><tr><td>2</td></tr><tr><td>3</td></tr><tr><td>4</td></tr><tr><td>5</td></tr></table>"
|
2150
|
+
],
|
2151
|
+
"text/plain": [
|
2152
|
+
"#<RedAmber::DataFrame : 5 x 1 Vector, 0x000000000000fac8>\n",
|
2153
|
+
" x\n",
|
2154
|
+
" <uint8>\n",
|
2155
|
+
"1 1\n",
|
2156
|
+
"2 2\n",
|
2157
|
+
"3 3\n",
|
2158
|
+
"4 4\n",
|
2159
|
+
"5 5\n"
|
2160
|
+
]
|
2161
|
+
},
|
2162
|
+
"execution_count": 61,
|
2163
|
+
"metadata": {},
|
2164
|
+
"output_type": "execute_result"
|
2165
|
+
}
|
2166
|
+
],
|
2167
|
+
"source": [
|
2168
|
+
"df.drop { vectors.map { |v| v.is_na.any } }"
|
2169
|
+
]
|
2170
|
+
},
|
2171
|
+
{
|
2172
|
+
"cell_type": "markdown",
|
2173
|
+
"id": "88b064d6-7d90-4a0b-b9c8-d92e103269fb",
|
2174
|
+
"metadata": {},
|
2175
|
+
"source": [
|
2176
|
+
"Argument style is also acceptable but it requires the reciever 'df'."
|
2177
|
+
]
|
2178
|
+
},
|
2179
|
+
{
|
2180
|
+
"cell_type": "code",
|
2181
|
+
"execution_count": 62,
|
2182
|
+
"id": "3003a5c2-0966-4f2c-9643-59e8b546c8aa",
|
2183
|
+
"metadata": {},
|
2184
|
+
"outputs": [
|
2185
|
+
{
|
2186
|
+
"data": {
|
2187
|
+
"text/html": [
|
2188
|
+
"RedAmber::DataFrame <5 x 1 vector> <table><tr><th>x</th></tr><tr><td>1</td></tr><tr><td>2</td></tr><tr><td>3</td></tr><tr><td>4</td></tr><tr><td>5</td></tr></table>"
|
2189
|
+
],
|
2190
|
+
"text/plain": [
|
2191
|
+
"#<RedAmber::DataFrame : 5 x 1 Vector, 0x000000000000fadc>\n",
|
2192
|
+
" x\n",
|
2193
|
+
" <uint8>\n",
|
2194
|
+
"1 1\n",
|
2195
|
+
"2 2\n",
|
2196
|
+
"3 3\n",
|
2197
|
+
"4 4\n",
|
2198
|
+
"5 5\n"
|
2199
|
+
]
|
2200
|
+
},
|
2201
|
+
"execution_count": 62,
|
2202
|
+
"metadata": {},
|
2203
|
+
"output_type": "execute_result"
|
2204
|
+
}
|
2205
|
+
],
|
2206
|
+
"source": [
|
2207
|
+
"df.drop(df.vectors.map { |v| v.is_na.any })"
|
2208
|
+
]
|
2209
|
+
},
|
2210
|
+
{
|
2211
|
+
"cell_type": "markdown",
|
2212
|
+
"id": "c6fce15c-d4a9-4281-9c07-457e78d3c13e",
|
2213
|
+
"metadata": {},
|
2214
|
+
"source": [
|
2215
|
+
"The name `drop` comes from the pair word of `pick`."
|
2216
|
+
]
|
2217
|
+
},
|
2218
|
+
{
|
2219
|
+
"cell_type": "markdown",
|
2220
|
+
"id": "0f6dc86c-828d-4f9f-8b07-fce63c30fdca",
|
2221
|
+
"metadata": {},
|
2222
|
+
"source": [
|
2223
|
+
"## 25. Pick/drop and nil"
|
2224
|
+
]
|
2225
|
+
},
|
2226
|
+
{
|
2227
|
+
"cell_type": "markdown",
|
2228
|
+
"id": "0a108878-565b-400e-9a47-a15aae09429c",
|
2229
|
+
"metadata": {},
|
2230
|
+
"source": [
|
2231
|
+
"When `pick` or `drop` is used with booleans, nil in the booleans is treated as false. This behavior is aligned with Ruby's `BasicObject#!`."
|
2232
|
+
]
|
2233
|
+
},
|
2234
|
+
{
|
2235
|
+
"cell_type": "code",
|
2236
|
+
"execution_count": 63,
|
2237
|
+
"id": "7c01fbb4-9bfa-4afc-8e6b-45c97c0beb03",
|
2238
|
+
"metadata": {},
|
2239
|
+
"outputs": [
|
2240
|
+
{
|
2241
|
+
"data": {
|
2242
|
+
"text/plain": [
|
2243
|
+
"true"
|
2244
|
+
]
|
2245
|
+
},
|
2246
|
+
"execution_count": 63,
|
2247
|
+
"metadata": {},
|
2248
|
+
"output_type": "execute_result"
|
2249
|
+
}
|
2250
|
+
],
|
2251
|
+
"source": [
|
2252
|
+
"booleans = [true, true, false, nil]\n",
|
2253
|
+
"booleans_invert = booleans.map(&:!) # => [false, false, true, true] because nil.! is true\n",
|
2254
|
+
"df.pick(booleans) == df.drop(booleans_invert)"
|
2255
|
+
]
|
2256
|
+
},
|
2257
|
+
{
|
2258
|
+
"cell_type": "markdown",
|
2259
|
+
"id": "12a24264-9b7a-42a1-a541-e292e3876e35",
|
2260
|
+
"metadata": {},
|
2261
|
+
"source": [
|
2262
|
+
"## 26. Vector#invert, #primitive_invert"
|
2263
|
+
]
|
2264
|
+
},
|
2265
|
+
{
|
2266
|
+
"cell_type": "code",
|
2267
|
+
"execution_count": 64,
|
2268
|
+
"id": "ea352e12-7e8a-43be-b8ac-797adbc47708",
|
2269
|
+
"metadata": {},
|
2270
|
+
"outputs": [
|
2271
|
+
{
|
2272
|
+
"data": {
|
2273
|
+
"text/plain": [
|
2274
|
+
"#<RedAmber::Vector(:boolean, size=4):0x000000000000faf0>\n",
|
2275
|
+
"[true, true, false, nil]\n"
|
2276
|
+
]
|
2277
|
+
},
|
2278
|
+
"execution_count": 64,
|
2279
|
+
"metadata": {},
|
2280
|
+
"output_type": "execute_result"
|
2281
|
+
}
|
2282
|
+
],
|
2283
|
+
"source": [
|
2284
|
+
"vector = Vector.new(booleans)"
|
2285
|
+
]
|
2286
|
+
},
|
2287
|
+
{
|
2288
|
+
"cell_type": "markdown",
|
2289
|
+
"id": "2a0f82e0-157b-4185-9254-0618be291f9b",
|
2290
|
+
"metadata": {},
|
2291
|
+
"source": [
|
2292
|
+
"nil is converted to nil by `Vector#invert`."
|
2293
|
+
]
|
2294
|
+
},
|
2295
|
+
{
|
2296
|
+
"cell_type": "code",
|
2297
|
+
"execution_count": 65,
|
2298
|
+
"id": "596c521f-12bf-4448-9e5d-e1b4a2c3d896",
|
2299
|
+
"metadata": {},
|
2300
|
+
"outputs": [
|
2301
|
+
{
|
2302
|
+
"data": {
|
2303
|
+
"text/plain": [
|
2304
|
+
"#<RedAmber::Vector(:boolean, size=4):0x000000000000fb04>\n",
|
2305
|
+
"[false, false, true, nil]\n"
|
2306
|
+
]
|
2307
|
+
},
|
2308
|
+
"execution_count": 65,
|
2309
|
+
"metadata": {},
|
2310
|
+
"output_type": "execute_result"
|
2311
|
+
}
|
2312
|
+
],
|
2313
|
+
"source": [
|
2314
|
+
"vector.invert\n",
|
2315
|
+
"# or\n",
|
2316
|
+
"!vector"
|
2317
|
+
]
|
2318
|
+
},
|
2319
|
+
{
|
2320
|
+
"cell_type": "markdown",
|
2321
|
+
"id": "a1aec910-3055-4627-a02b-22d45f2ceb70",
|
2322
|
+
"metadata": {},
|
2323
|
+
"source": [
|
2324
|
+
"So `df.pick(booleans) != df.drop(booleans.invert)` when booleans have any nils.\n",
|
2325
|
+
"\n",
|
2326
|
+
"On the other hand, `Vector#primitive_invert` follows Ruby's `BasicObject#!`'s behavior. Then pick and drop keep 'MECE' behavior."
|
2327
|
+
]
|
2328
|
+
},
|
2329
|
+
{
|
2330
|
+
"cell_type": "code",
|
2331
|
+
"execution_count": 66,
|
2332
|
+
"id": "4dcaba48-1cea-4ce9-b4a9-b079b43af7ec",
|
2333
|
+
"metadata": {},
|
2334
|
+
"outputs": [
|
2335
|
+
{
|
2336
|
+
"data": {
|
2337
|
+
"text/plain": [
|
2338
|
+
"#<RedAmber::Vector(:boolean, size=4):0x000000000000fb18>\n",
|
2339
|
+
"[false, false, true, true]\n"
|
2340
|
+
]
|
2341
|
+
},
|
2342
|
+
"execution_count": 66,
|
2343
|
+
"metadata": {},
|
2344
|
+
"output_type": "execute_result"
|
2345
|
+
}
|
2346
|
+
],
|
2347
|
+
"source": [
|
2348
|
+
"vector.primitive_invert"
|
2349
|
+
]
|
2350
|
+
},
|
2351
|
+
{
|
2352
|
+
"cell_type": "code",
|
2353
|
+
"execution_count": 67,
|
2354
|
+
"id": "c7ae4dad-275a-49e0-a0b0-bf3686248070",
|
2355
|
+
"metadata": {},
|
2356
|
+
"outputs": [
|
2357
|
+
{
|
2358
|
+
"data": {
|
2359
|
+
"text/plain": [
|
2360
|
+
"true"
|
2361
|
+
]
|
2362
|
+
},
|
2363
|
+
"execution_count": 67,
|
2364
|
+
"metadata": {},
|
2365
|
+
"output_type": "execute_result"
|
2366
|
+
}
|
2367
|
+
],
|
2368
|
+
"source": [
|
2369
|
+
"df.pick(vector) == df.drop(vector.primitive_invert)"
|
2370
|
+
]
|
2371
|
+
},
|
2372
|
+
{
|
2373
|
+
"cell_type": "markdown",
|
2374
|
+
"id": "9a6cec74-43f0-4a72-8262-25b1e311f602",
|
2375
|
+
"metadata": {},
|
2376
|
+
"source": [
|
2377
|
+
"## 27. Pick/drop and [ ]"
|
2378
|
+
]
|
2379
|
+
},
|
2380
|
+
{
|
2381
|
+
"cell_type": "markdown",
|
2382
|
+
"id": "32c8f74d-b3ce-4305-9af7-6ea70052c773",
|
2383
|
+
"metadata": {},
|
2384
|
+
"source": [
|
2385
|
+
"When `pick` or `drop` select a single column (variable), it returns a `DataFrame` with one column (variable)."
|
2386
|
+
]
|
2387
|
+
},
|
2388
|
+
{
|
2389
|
+
"cell_type": "code",
|
2390
|
+
"execution_count": 68,
|
2391
|
+
"id": "e13aee24-cac6-41ad-b8a3-0ec26edbe5d1",
|
2392
|
+
"metadata": {},
|
2393
|
+
"outputs": [
|
2394
|
+
{
|
2395
|
+
"data": {
|
2396
|
+
"text/html": [
|
2397
|
+
"RedAmber::DataFrame <5 x 1 vector> <table><tr><th>x</th></tr><tr><td>1</td></tr><tr><td>2</td></tr><tr><td>3</td></tr><tr><td>4</td></tr><tr><td>5</td></tr></table>"
|
2398
|
+
],
|
2399
|
+
"text/plain": [
|
2400
|
+
"#<RedAmber::DataFrame : 5 x 1 Vector, 0x000000000000fb2c>\n",
|
2401
|
+
" x\n",
|
2402
|
+
" <uint8>\n",
|
2403
|
+
"1 1\n",
|
2404
|
+
"2 2\n",
|
2405
|
+
"3 3\n",
|
2406
|
+
"4 4\n",
|
2407
|
+
"5 5\n"
|
2408
|
+
]
|
2409
|
+
},
|
2410
|
+
"execution_count": 68,
|
2411
|
+
"metadata": {},
|
2412
|
+
"output_type": "execute_result"
|
2413
|
+
}
|
2414
|
+
],
|
2415
|
+
"source": [
|
2416
|
+
"df.pick(:x) # or\n",
|
2417
|
+
"df.drop(:y, :s, :b)"
|
2418
|
+
]
|
2419
|
+
},
|
2420
|
+
{
|
2421
|
+
"cell_type": "markdown",
|
2422
|
+
"id": "3e47b9d2-929e-4674-9690-0a1fdf7b0a7d",
|
2423
|
+
"metadata": {},
|
2424
|
+
"source": [
|
2425
|
+
"In contrast, when `[]` selects a single column (variable), it returns a `Vector`."
|
2426
|
+
]
|
2427
|
+
},
|
2428
|
+
{
|
2429
|
+
"cell_type": "code",
|
2430
|
+
"execution_count": 69,
|
2431
|
+
"id": "60d228be-7357-434d-9d39-ee72c110e6fe",
|
2432
|
+
"metadata": {},
|
2433
|
+
"outputs": [
|
2434
|
+
{
|
2435
|
+
"data": {
|
2436
|
+
"text/plain": [
|
2437
|
+
"#<RedAmber::Vector(:uint8, size=5):0x000000000000f910>\n",
|
2438
|
+
"[1, 2, 3, 4, 5]\n"
|
2439
|
+
]
|
2440
|
+
},
|
2441
|
+
"execution_count": 69,
|
2442
|
+
"metadata": {},
|
2443
|
+
"output_type": "execute_result"
|
2444
|
+
}
|
2445
|
+
],
|
2446
|
+
"source": [
|
2447
|
+
"df[:x]"
|
2448
|
+
]
|
2449
|
+
},
|
2450
|
+
{
|
2451
|
+
"cell_type": "markdown",
|
2452
|
+
"id": "6d973934-e08b-4b45-8efb-52f9167e7238",
|
2453
|
+
"metadata": {},
|
2454
|
+
"source": [
|
2455
|
+
"This behavior may be useful to use in a block of DataFrame manipulation verbs (like pick, drop, slice, remove, assign, rename)."
|
2456
|
+
]
|
2457
|
+
},
|
2458
|
+
{
|
2459
|
+
"cell_type": "markdown",
|
2460
|
+
"id": "34c9bcb0-889a-4190-b2b8-49765cd059c2",
|
2461
|
+
"metadata": {},
|
2462
|
+
"source": [
|
2463
|
+
"## 28. Slice"
|
2464
|
+
]
|
2465
|
+
},
|
2466
|
+
{
|
2467
|
+
"cell_type": "markdown",
|
2468
|
+
"id": "9a428ba8-c306-4ab8-8607-51174e8e6ebe",
|
2469
|
+
"metadata": {},
|
2470
|
+
"source": [
|
2471
|
+
"`slice` selects rows (observations) to create a subset of a DataFrame."
|
2472
|
+
]
|
2473
|
+
},
|
2474
|
+
{
|
2475
|
+
"cell_type": "markdown",
|
2476
|
+
"id": "6016d6d4-72d6-4ae2-b7dd-3d526c91ae61",
|
2477
|
+
"metadata": {},
|
2478
|
+
"source": [
|
2479
|
+
"`slice(indeces)` accepts indices as arguments. Indices should be Integers, Floats or Ranges of Integers. Negative index from the tail like Ruby's Array is also acceptable."
|
2480
|
+
]
|
2481
|
+
},
|
2482
|
+
{
|
2483
|
+
"cell_type": "code",
|
2484
|
+
"execution_count": 70,
|
2485
|
+
"id": "9cdce2e4-7876-4be6-bd1f-bc8ab6e6c871",
|
2486
|
+
"metadata": {},
|
2487
|
+
"outputs": [
|
2488
|
+
{
|
2489
|
+
"data": {
|
2490
|
+
"text/html": [
|
2491
|
+
"RedAmber::DataFrame <10 x 8 vectors> <table><tr><th>species</th><th>island</th><th>bill_length_mm</th><th>bill_depth_mm</th><th>flipper_length_mm</th><th>body_mass_g</th><th>sex</th><th>year</th></tr><tr><td>Adelie</td><td>Torgersen</td><td>39.1</td><td>18.7</td><td>181</td><td>3750</td><td>male</td><td>2007</td></tr><tr><td>Adelie</td><td>Torgersen</td><td>39.5</td><td>17.4</td><td>186</td><td>3800</td><td>female</td><td>2007</td></tr><tr><td>Adelie</td><td>Torgersen</td><td>40.3</td><td>18.0</td><td>195</td><td>3250</td><td>female</td><td>2007</td></tr><tr><td>Adelie</td><td>Torgersen</td><td><i>(nil)</i></td><td><i>(nil)</i></td><td><i>(nil)</i></td><td><i>(nil)</i></td><td><i>(nil)</i></td><td>2007</td></tr><tr><td colspan='8'>⋮</td></tr><tr><td>Gentoo</td><td>Biscoe</td><td>50.4</td><td>15.7</td><td>222</td><td>5750</td><td>male</td><td>2009</td></tr><tr><td>Gentoo</td><td>Biscoe</td><td>45.2</td><td>14.8</td><td>212</td><td>5200</td><td>female</td><td>2009</td></tr><tr><td>Gentoo</td><td>Biscoe</td><td>49.9</td><td>16.1</td><td>213</td><td>5400</td><td>male</td><td>2009</td></tr></table>"
|
2492
|
+
],
|
2493
|
+
"text/plain": [
|
2494
|
+
"#<RedAmber::DataFrame : 10 x 8 Vectors, 0x000000000000fb40>\n",
|
2495
|
+
" species island bill_length_mm bill_depth_mm flipper_length_mm ... year\n",
|
2496
|
+
" <string> <string> <double> <double> <uint8> ... <uint16>\n",
|
2497
|
+
" 1 Adelie Torgersen 39.1 18.7 181 ... 2007\n",
|
2498
|
+
" 2 Adelie Torgersen 39.5 17.4 186 ... 2007\n",
|
2499
|
+
" 3 Adelie Torgersen 40.3 18.0 195 ... 2007\n",
|
2500
|
+
" 4 Adelie Torgersen (nil) (nil) (nil) ... 2007\n",
|
2501
|
+
" 5 Adelie Torgersen 36.7 19.3 193 ... 2007\n",
|
2502
|
+
" : : : : : : ... :\n",
|
2503
|
+
" 8 Gentoo Biscoe 50.4 15.7 222 ... 2009\n",
|
2504
|
+
" 9 Gentoo Biscoe 45.2 14.8 212 ... 2009\n",
|
2505
|
+
"10 Gentoo Biscoe 49.9 16.1 213 ... 2009\n"
|
2506
|
+
]
|
2507
|
+
},
|
2508
|
+
"execution_count": 70,
|
2509
|
+
"metadata": {},
|
2510
|
+
"output_type": "execute_result"
|
2511
|
+
}
|
2512
|
+
],
|
2513
|
+
"source": [
|
2514
|
+
"# returns 5 rows at the start and 5 rows from the end\n",
|
2515
|
+
"penguins.slice(0...5, -5..-1)"
|
2516
|
+
]
|
2517
|
+
},
|
2518
|
+
{
|
2519
|
+
"cell_type": "code",
|
2520
|
+
"execution_count": 71,
|
2521
|
+
"id": "380ab809-09ae-4e69-a8e6-8d53d1e7822d",
|
2522
|
+
"metadata": {},
|
2523
|
+
"outputs": [
|
2524
|
+
{
|
2525
|
+
"data": {
|
2526
|
+
"text/html": [
|
2527
|
+
"RedAmber::DataFrame <1 x 8 vectors> <table><tr><th>species</th><th>island</th><th>bill_length_mm</th><th>bill_depth_mm</th><th>flipper_length_mm</th><th>body_mass_g</th><th>sex</th><th>year</th></tr><tr><td>Adelie</td><td>Biscoe</td><td>42.2</td><td>19.5</td><td>197</td><td>4275</td><td>male</td><td>2009</td></tr></table>"
|
2528
|
+
],
|
2529
|
+
"text/plain": [
|
2530
|
+
"#<RedAmber::DataFrame : 1 x 8 Vectors, 0x000000000000fb54>\n",
|
2531
|
+
" species island bill_length_mm bill_depth_mm flipper_length_mm body_mass_g ... year\n",
|
2532
|
+
" <string> <string> <double> <double> <uint8> <uint16> ... <uint16>\n",
|
2533
|
+
"1 Adelie Biscoe 42.2 19.5 197 4275 ... 2009\n"
|
2534
|
+
]
|
2535
|
+
},
|
2536
|
+
"execution_count": 71,
|
2537
|
+
"metadata": {},
|
2538
|
+
"output_type": "execute_result"
|
2539
|
+
}
|
2540
|
+
],
|
2541
|
+
"source": [
|
2542
|
+
"# slice accepts Float index\n",
|
2543
|
+
"# 33% of 344 observations in index => 113.52 th data ??\n",
|
2544
|
+
"penguins.slice(penguins.size * 0.33)"
|
2545
|
+
]
|
2546
|
+
},
|
2547
|
+
{
|
2548
|
+
"cell_type": "markdown",
|
2549
|
+
"id": "8139bb28-89f8-4058-b824-dde33ead0b60",
|
2550
|
+
"metadata": {},
|
2551
|
+
"source": [
|
2552
|
+
"Indices in Vectors or Arrow::Arrays are also acceptable."
|
2553
|
+
]
|
2554
|
+
},
|
2555
|
+
{
|
2556
|
+
"cell_type": "markdown",
|
2557
|
+
"id": "6f79db8c-c706-4d60-949b-3f644474d375",
|
2558
|
+
"metadata": {},
|
2559
|
+
"source": [
|
2560
|
+
"Another way to select in `slice` is to use booleans.\n",
|
2561
|
+
"- Booleans is an Array, Arrow::Array, Vector or their Array.\n",
|
2562
|
+
"- Each data type must be boolean.\n",
|
2563
|
+
"- Size of booleans must be same as the size of self."
|
2564
|
+
]
|
2565
|
+
},
|
2566
|
+
{
|
2567
|
+
"cell_type": "code",
|
2568
|
+
"execution_count": 72,
|
2569
|
+
"id": "f58ca131-7375-4489-90ce-6ba54b898eb5",
|
2570
|
+
"metadata": {},
|
2571
|
+
"outputs": [
|
2572
|
+
{
|
2573
|
+
"data": {
|
2574
|
+
"text/plain": [
|
2575
|
+
"#<RedAmber::Vector(:boolean, size=344):0x000000000000fb68>\n",
|
2576
|
+
"[false, false, true, nil, false, false, false, false, false, true, false, false, ... ]\n"
|
2577
|
+
]
|
2578
|
+
},
|
2579
|
+
"execution_count": 72,
|
2580
|
+
"metadata": {},
|
2581
|
+
"output_type": "execute_result"
|
2582
|
+
}
|
2583
|
+
],
|
2584
|
+
"source": [
|
2585
|
+
"# make booleans to check over 40\n",
|
2586
|
+
"booleans = penguins[:bill_length_mm] > 40"
|
2587
|
+
]
|
2588
|
+
},
|
2589
|
+
{
|
2590
|
+
"cell_type": "code",
|
2591
|
+
"execution_count": 73,
|
2592
|
+
"id": "176ab365-c66a-4712-97b9-4381a536321b",
|
2593
|
+
"metadata": {},
|
2594
|
+
"outputs": [
|
2595
|
+
{
|
2596
|
+
"data": {
|
2597
|
+
"text/html": [
|
2598
|
+
"RedAmber::DataFrame <242 x 8 vectors> <table><tr><th>species</th><th>island</th><th>bill_length_mm</th><th>bill_depth_mm</th><th>flipper_length_mm</th><th>body_mass_g</th><th>sex</th><th>year</th></tr><tr><td>Adelie</td><td>Torgersen</td><td>40.3</td><td>18.0</td><td>195</td><td>3250</td><td>female</td><td>2007</td></tr><tr><td>Adelie</td><td>Torgersen</td><td>42.0</td><td>20.2</td><td>190</td><td>4250</td><td><i>(nil)</i></td><td>2007</td></tr><tr><td>Adelie</td><td>Torgersen</td><td>41.1</td><td>17.6</td><td>182</td><td>3200</td><td>female</td><td>2007</td></tr><tr><td>Adelie</td><td>Torgersen</td><td>42.5</td><td>20.7</td><td>197</td><td>4500</td><td>male</td><td>2007</td></tr><tr><td colspan='8'>⋮</td></tr><tr><td>Gentoo</td><td>Biscoe</td><td>50.4</td><td>15.7</td><td>222</td><td>5750</td><td>male</td><td>2009</td></tr><tr><td>Gentoo</td><td>Biscoe</td><td>45.2</td><td>14.8</td><td>212</td><td>5200</td><td>female</td><td>2009</td></tr><tr><td>Gentoo</td><td>Biscoe</td><td>49.9</td><td>16.1</td><td>213</td><td>5400</td><td>male</td><td>2009</td></tr></table>"
|
2599
|
+
],
|
2600
|
+
"text/plain": [
|
2601
|
+
"#<RedAmber::DataFrame : 242 x 8 Vectors, 0x000000000000fb7c>\n",
|
2602
|
+
" species island bill_length_mm bill_depth_mm flipper_length_mm ... year\n",
|
2603
|
+
" <string> <string> <double> <double> <uint8> ... <uint16>\n",
|
2604
|
+
" 1 Adelie Torgersen 40.3 18.0 195 ... 2007\n",
|
2605
|
+
" 2 Adelie Torgersen 42.0 20.2 190 ... 2007\n",
|
2606
|
+
" 3 Adelie Torgersen 41.1 17.6 182 ... 2007\n",
|
2607
|
+
" 4 Adelie Torgersen 42.5 20.7 197 ... 2007\n",
|
2608
|
+
" 5 Adelie Torgersen 46.0 21.5 194 ... 2007\n",
|
2609
|
+
" : : : : : : ... :\n",
|
2610
|
+
"240 Gentoo Biscoe 50.4 15.7 222 ... 2009\n",
|
2611
|
+
"241 Gentoo Biscoe 45.2 14.8 212 ... 2009\n",
|
2612
|
+
"242 Gentoo Biscoe 49.9 16.1 213 ... 2009\n"
|
2613
|
+
]
|
2614
|
+
},
|
2615
|
+
"execution_count": 73,
|
2616
|
+
"metadata": {},
|
2617
|
+
"output_type": "execute_result"
|
2618
|
+
}
|
2619
|
+
],
|
2620
|
+
"source": [
|
2621
|
+
"penguins.slice(booleans)"
|
2622
|
+
]
|
2623
|
+
},
|
2624
|
+
{
|
2625
|
+
"cell_type": "markdown",
|
2626
|
+
"id": "3264a182-6b72-461a-b712-c3b708c53516",
|
2627
|
+
"metadata": {},
|
2628
|
+
"source": [
|
2629
|
+
"`slice` accepts a block.\n",
|
2630
|
+
"- We can't use both arguments and a block at a same time.\n",
|
2631
|
+
"- The block should return indeces in any length or a boolean Array with a same length as `size`.\n",
|
2632
|
+
"- Block is called in the context of self. So reciever 'self' can be omitted in the block."
|
2633
|
+
]
|
2634
|
+
},
|
2635
|
+
{
|
2636
|
+
"cell_type": "code",
|
2637
|
+
"execution_count": 74,
|
2638
|
+
"id": "c95d3426-0bbb-430e-8d83-6e22434d99ed",
|
2639
|
+
"metadata": {},
|
2640
|
+
"outputs": [
|
2641
|
+
{
|
2642
|
+
"data": {
|
2643
|
+
"text/html": [
|
2644
|
+
"RedAmber::DataFrame <204 x 8 vectors> <table><tr><th>species</th><th>island</th><th>bill_length_mm</th><th>bill_depth_mm</th><th>flipper_length_mm</th><th>body_mass_g</th><th>sex</th><th>year</th></tr><tr><td>Adelie</td><td>Torgersen</td><td>39.1</td><td>18.7</td><td>181</td><td>3750</td><td>male</td><td>2007</td></tr><tr><td>Adelie</td><td>Torgersen</td><td>39.5</td><td>17.4</td><td>186</td><td>3800</td><td>female</td><td>2007</td></tr><tr><td>Adelie</td><td>Torgersen</td><td>40.3</td><td>18.0</td><td>195</td><td>3250</td><td>female</td><td>2007</td></tr><tr><td>Adelie</td><td>Torgersen</td><td>39.3</td><td>20.6</td><td>190</td><td>3650</td><td>male</td><td>2007</td></tr><tr><td colspan='8'>⋮</td></tr><tr><td>Gentoo</td><td>Biscoe</td><td>47.2</td><td>13.7</td><td>214</td><td>4925</td><td>female</td><td>2009</td></tr><tr><td>Gentoo</td><td>Biscoe</td><td>46.8</td><td>14.3</td><td>215</td><td>4850</td><td>female</td><td>2009</td></tr><tr><td>Gentoo</td><td>Biscoe</td><td>45.2</td><td>14.8</td><td>212</td><td>5200</td><td>female</td><td>2009</td></tr></table>"
|
2645
|
+
],
|
2646
|
+
"text/plain": [
|
2647
|
+
"#<RedAmber::DataFrame : 204 x 8 Vectors, 0x000000000000fb90>\n",
|
2648
|
+
" species island bill_length_mm bill_depth_mm flipper_length_mm ... year\n",
|
2649
|
+
" <string> <string> <double> <double> <uint8> ... <uint16>\n",
|
2650
|
+
" 1 Adelie Torgersen 39.1 18.7 181 ... 2007\n",
|
2651
|
+
" 2 Adelie Torgersen 39.5 17.4 186 ... 2007\n",
|
2652
|
+
" 3 Adelie Torgersen 40.3 18.0 195 ... 2007\n",
|
2653
|
+
" 4 Adelie Torgersen 39.3 20.6 190 ... 2007\n",
|
2654
|
+
" 5 Adelie Torgersen 38.9 17.8 181 ... 2007\n",
|
2655
|
+
" : : : : : : ... :\n",
|
2656
|
+
"202 Gentoo Biscoe 47.2 13.7 214 ... 2009\n",
|
2657
|
+
"203 Gentoo Biscoe 46.8 14.3 215 ... 2009\n",
|
2658
|
+
"204 Gentoo Biscoe 45.2 14.8 212 ... 2009\n"
|
2659
|
+
]
|
2660
|
+
},
|
2661
|
+
"execution_count": 74,
|
2662
|
+
"metadata": {},
|
2663
|
+
"output_type": "execute_result"
|
2664
|
+
}
|
2665
|
+
],
|
2666
|
+
"source": [
|
2667
|
+
"# return a DataFrame with bill_length_mm is in 2*std range around mean\n",
|
2668
|
+
"penguins.slice do\n",
|
2669
|
+
" vector = self[:bill_length_mm]\n",
|
2670
|
+
" min = vector.mean - vector.std\n",
|
2671
|
+
" max = vector.mean + vector.std\n",
|
2672
|
+
" vector.to_a.map { |e| (min..max).include? e }\n",
|
2673
|
+
"end"
|
2674
|
+
]
|
2675
|
+
},
|
2676
|
+
{
|
2677
|
+
"cell_type": "markdown",
|
2678
|
+
"id": "4fa42801-64f5-4432-856b-85c26a68515d",
|
2679
|
+
"metadata": {},
|
2680
|
+
"source": [
|
2681
|
+
"## 29. Slice and nil option"
|
2682
|
+
]
|
2683
|
+
},
|
2684
|
+
{
|
2685
|
+
"cell_type": "markdown",
|
2686
|
+
"id": "31017a7e-0923-4283-bc92-246ebe2591c3",
|
2687
|
+
"metadata": {},
|
2688
|
+
"source": [
|
2689
|
+
"`Arrow::Table#slice` uses `#filter` method with a option `Arrow::FilterOptions.null_selection_behavior = :emit_null`. This will propagate nil at the same row."
|
2690
|
+
]
|
2691
|
+
},
|
2692
|
+
{
|
2693
|
+
"cell_type": "code",
|
2694
|
+
"execution_count": 75,
|
2695
|
+
"id": "8e4a8108-154b-4621-acd1-704ddf229d61",
|
2696
|
+
"metadata": {},
|
2697
|
+
"outputs": [
|
2698
|
+
{
|
2699
|
+
"data": {
|
2700
|
+
"text/plain": [
|
2701
|
+
"#<Arrow::Table:0x113e72048 ptr=0x7fcc50a542a0>\n",
|
2702
|
+
"\t a\tb\t c\n",
|
2703
|
+
"0\t 1\tA\t 1.000000\n",
|
2704
|
+
"1\t(null)\t(null)\t (null)\n"
|
2705
|
+
]
|
2706
|
+
},
|
2707
|
+
"execution_count": 75,
|
2708
|
+
"metadata": {},
|
2709
|
+
"output_type": "execute_result"
|
2710
|
+
}
|
2711
|
+
],
|
2712
|
+
"source": [
|
2713
|
+
"hash = { a: [1, 2, 3], b: %w[A B C], c: [1.0, 2, 3] }\n",
|
2714
|
+
"table = Arrow::Table.new(hash)\n",
|
2715
|
+
"table.slice([true, false, nil])"
|
2716
|
+
]
|
2717
|
+
},
|
2718
|
+
{
|
2719
|
+
"cell_type": "markdown",
|
2720
|
+
"id": "dbb57c5a-e949-42b8-a82c-9affb3fe5b7b",
|
2721
|
+
"metadata": {},
|
2722
|
+
"source": [
|
2723
|
+
"Whereas in RedAmber, `DataFrame#slice` with booleans containing nil is treated as false. This behavior comes from `Allow::FilterOptions.null_selection_behavior = :drop`. This is a default value for `Arrow::Table.filter` method."
|
2724
|
+
]
|
2725
|
+
},
|
2726
|
+
{
|
2727
|
+
"cell_type": "code",
|
2728
|
+
"execution_count": 76,
|
2729
|
+
"id": "851c3bf6-b9e9-41bd-92c5-5372ed934549",
|
2730
|
+
"metadata": {},
|
2731
|
+
"outputs": [
|
2732
|
+
{
|
2733
|
+
"data": {
|
2734
|
+
"text/plain": [
|
2735
|
+
"#<Arrow::Table:0x113e51438 ptr=0x7fcc4f7e4ed0>\n",
|
2736
|
+
"\ta\tb\t c\n",
|
2737
|
+
"0\t1\tA\t 1.000000\n"
|
2738
|
+
]
|
2739
|
+
},
|
2740
|
+
"execution_count": 76,
|
2741
|
+
"metadata": {},
|
2742
|
+
"output_type": "execute_result"
|
2743
|
+
}
|
2744
|
+
],
|
2745
|
+
"source": [
|
2746
|
+
"RedAmber::DataFrame.new(table).slice([true, false, nil]).table"
|
2747
|
+
]
|
2748
|
+
},
|
2749
|
+
{
|
2750
|
+
"cell_type": "markdown",
|
2751
|
+
"id": "56398a3d-6146-43af-8b96-fec37730fc49",
|
2752
|
+
"metadata": {},
|
2753
|
+
"source": [
|
2754
|
+
"## 30. Remove"
|
2755
|
+
]
|
2756
|
+
},
|
2757
|
+
{
|
2758
|
+
"cell_type": "markdown",
|
2759
|
+
"id": "9e042a97-8a5d-412e-8e4a-fda382225a2d",
|
2760
|
+
"metadata": {},
|
2761
|
+
"source": [
|
2762
|
+
"Slice and reject rows (observations) to create a remainer DataFrame."
|
2763
|
+
]
|
2764
|
+
},
|
2765
|
+
{
|
2766
|
+
"cell_type": "markdown",
|
2767
|
+
"id": "2b4cbb97-eef3-4db8-8f25-c44c208ec554",
|
2768
|
+
"metadata": {},
|
2769
|
+
"source": [
|
2770
|
+
"`#remove(indeces)` accepts indeces as arguments. Indeces should be an Integer or a Range of Integer."
|
2771
|
+
]
|
2772
|
+
},
|
2773
|
+
{
|
2774
|
+
"cell_type": "code",
|
2775
|
+
"execution_count": 77,
|
2776
|
+
"id": "17e38ab8-886b-4114-bcaf-ee18df7d00cd",
|
2777
|
+
"metadata": {},
|
2778
|
+
"outputs": [
|
2779
|
+
{
|
2780
|
+
"data": {
|
2781
|
+
"text/html": [
|
2782
|
+
"RedAmber::DataFrame <334 x 8 vectors> <table><tr><th>species</th><th>island</th><th>bill_length_mm</th><th>bill_depth_mm</th><th>flipper_length_mm</th><th>body_mass_g</th><th>sex</th><th>year</th></tr><tr><td>Adelie</td><td>Torgersen</td><td>39.3</td><td>20.6</td><td>190</td><td>3650</td><td>male</td><td>2007</td></tr><tr><td>Adelie</td><td>Torgersen</td><td>38.9</td><td>17.8</td><td>181</td><td>3625</td><td>female</td><td>2007</td></tr><tr><td>Adelie</td><td>Torgersen</td><td>39.2</td><td>19.6</td><td>195</td><td>4675</td><td>male</td><td>2007</td></tr><tr><td>Adelie</td><td>Torgersen</td><td>34.1</td><td>18.1</td><td>193</td><td>3475</td><td><i>(nil)</i></td><td>2007</td></tr><tr><td colspan='8'>⋮</td></tr><tr><td>Gentoo</td><td>Biscoe</td><td>44.5</td><td>15.7</td><td>217</td><td>4875</td><td><i>(nil)</i></td><td>2009</td></tr><tr><td>Gentoo</td><td>Biscoe</td><td>48.8</td><td>16.2</td><td>222</td><td>6000</td><td>male</td><td>2009</td></tr><tr><td>Gentoo</td><td>Biscoe</td><td>47.2</td><td>13.7</td><td>214</td><td>4925</td><td>female</td><td>2009</td></tr></table>"
|
2783
|
+
],
|
2784
|
+
"text/plain": [
|
2785
|
+
"#<RedAmber::DataFrame : 334 x 8 Vectors, 0x000000000000fba4>\n",
|
2786
|
+
" species island bill_length_mm bill_depth_mm flipper_length_mm ... year\n",
|
2787
|
+
" <string> <string> <double> <double> <uint8> ... <uint16>\n",
|
2788
|
+
" 1 Adelie Torgersen 39.3 20.6 190 ... 2007\n",
|
2789
|
+
" 2 Adelie Torgersen 38.9 17.8 181 ... 2007\n",
|
2790
|
+
" 3 Adelie Torgersen 39.2 19.6 195 ... 2007\n",
|
2791
|
+
" 4 Adelie Torgersen 34.1 18.1 193 ... 2007\n",
|
2792
|
+
" 5 Adelie Torgersen 42.0 20.2 190 ... 2007\n",
|
2793
|
+
" : : : : : : ... :\n",
|
2794
|
+
"332 Gentoo Biscoe 44.5 15.7 217 ... 2009\n",
|
2795
|
+
"333 Gentoo Biscoe 48.8 16.2 222 ... 2009\n",
|
2796
|
+
"334 Gentoo Biscoe 47.2 13.7 214 ... 2009\n"
|
2797
|
+
]
|
2798
|
+
},
|
2799
|
+
"execution_count": 77,
|
2800
|
+
"metadata": {},
|
2801
|
+
"output_type": "execute_result"
|
2802
|
+
}
|
2803
|
+
],
|
2804
|
+
"source": [
|
2805
|
+
"# returns 6th to 339th obs. Remainer of 1st example of #30\n",
|
2806
|
+
"penguins.remove(0...5, -5..-1)"
|
2807
|
+
]
|
2808
|
+
},
|
2809
|
+
{
|
2810
|
+
"cell_type": "markdown",
|
2811
|
+
"id": "def1c1c4-6b60-4864-ae24-c797fbf008a7",
|
2812
|
+
"metadata": {},
|
2813
|
+
"source": [
|
2814
|
+
"`remove(booleans)` accepts booleans as a argument in an Array, a Vector or an Arrow::BooleanArray . Booleans must be same length as `#size`."
|
2815
|
+
]
|
2816
|
+
},
|
2817
|
+
{
|
2818
|
+
"cell_type": "code",
|
2819
|
+
"execution_count": 78,
|
2820
|
+
"id": "6f169420-7eb2-457f-8d59-7a5c90aa3fa5",
|
2821
|
+
"metadata": {},
|
2822
|
+
"outputs": [
|
2823
|
+
{
|
2824
|
+
"data": {
|
2825
|
+
"text/html": [
|
2826
|
+
"RedAmber::DataFrame <333 x 8 vectors> <table><tr><th>species</th><th>island</th><th>bill_length_mm</th><th>bill_depth_mm</th><th>flipper_length_mm</th><th>body_mass_g</th><th>sex</th><th>year</th></tr><tr><td>Adelie</td><td>Torgersen</td><td>39.1</td><td>18.7</td><td>181</td><td>3750</td><td>male</td><td>2007</td></tr><tr><td>Adelie</td><td>Torgersen</td><td>39.5</td><td>17.4</td><td>186</td><td>3800</td><td>female</td><td>2007</td></tr><tr><td>Adelie</td><td>Torgersen</td><td>40.3</td><td>18.0</td><td>195</td><td>3250</td><td>female</td><td>2007</td></tr><tr><td>Adelie</td><td>Torgersen</td><td>36.7</td><td>19.3</td><td>193</td><td>3450</td><td>female</td><td>2007</td></tr><tr><td colspan='8'>⋮</td></tr><tr><td>Gentoo</td><td>Biscoe</td><td>50.4</td><td>15.7</td><td>222</td><td>5750</td><td>male</td><td>2009</td></tr><tr><td>Gentoo</td><td>Biscoe</td><td>45.2</td><td>14.8</td><td>212</td><td>5200</td><td>female</td><td>2009</td></tr><tr><td>Gentoo</td><td>Biscoe</td><td>49.9</td><td>16.1</td><td>213</td><td>5400</td><td>male</td><td>2009</td></tr></table>"
|
2827
|
+
],
|
2828
|
+
"text/plain": [
|
2829
|
+
"#<RedAmber::DataFrame : 333 x 8 Vectors, 0x000000000000fbb8>\n",
|
2830
|
+
" species island bill_length_mm bill_depth_mm flipper_length_mm ... year\n",
|
2831
|
+
" <string> <string> <double> <double> <uint8> ... <uint16>\n",
|
2832
|
+
" 1 Adelie Torgersen 39.1 18.7 181 ... 2007\n",
|
2833
|
+
" 2 Adelie Torgersen 39.5 17.4 186 ... 2007\n",
|
2834
|
+
" 3 Adelie Torgersen 40.3 18.0 195 ... 2007\n",
|
2835
|
+
" 4 Adelie Torgersen 36.7 19.3 193 ... 2007\n",
|
2836
|
+
" 5 Adelie Torgersen 39.3 20.6 190 ... 2007\n",
|
2837
|
+
" : : : : : : ... :\n",
|
2838
|
+
"331 Gentoo Biscoe 50.4 15.7 222 ... 2009\n",
|
2839
|
+
"332 Gentoo Biscoe 45.2 14.8 212 ... 2009\n",
|
2840
|
+
"333 Gentoo Biscoe 49.9 16.1 213 ... 2009\n"
|
2841
|
+
]
|
2842
|
+
},
|
2843
|
+
"execution_count": 78,
|
2844
|
+
"metadata": {},
|
2845
|
+
"output_type": "execute_result"
|
2846
|
+
}
|
2847
|
+
],
|
2848
|
+
"source": [
|
2849
|
+
"# remove all observation contains nil\n",
|
2850
|
+
"removed = penguins.remove { vectors.map(&:is_nil).reduce(&:|) }"
|
2851
|
+
]
|
2852
|
+
},
|
2853
|
+
{
|
2854
|
+
"cell_type": "markdown",
|
2855
|
+
"id": "5f1864c9-4ae4-4fcd-9840-ea424ef5e27d",
|
2856
|
+
"metadata": {},
|
2857
|
+
"source": [
|
2858
|
+
"`remove {block}` is also acceptable. We can't use both arguments and a block at a same time. The block should return indeces or a boolean Array with a same length as size. Block is called in the context of self."
|
2859
|
+
]
|
2860
|
+
},
|
2861
|
+
{
|
2862
|
+
"cell_type": "code",
|
2863
|
+
"execution_count": 79,
|
2864
|
+
"id": "a6807c65-25e5-4ee1-8d1b-6018c46b3999",
|
2865
|
+
"metadata": {},
|
2866
|
+
"outputs": [
|
2867
|
+
{
|
2868
|
+
"data": {
|
2869
|
+
"text/html": [
|
2870
|
+
"RedAmber::DataFrame <140 x 8 vectors> <table><tr><th>species</th><th>island</th><th>bill_length_mm</th><th>bill_depth_mm</th><th>flipper_length_mm</th><th>body_mass_g</th><th>sex</th><th>year</th></tr><tr><td>Adelie</td><td>Torgersen</td><td><i>(nil)</i></td><td><i>(nil)</i></td><td><i>(nil)</i></td><td><i>(nil)</i></td><td><i>(nil)</i></td><td>2007</td></tr><tr><td>Adelie</td><td>Torgersen</td><td>36.7</td><td>19.3</td><td>193</td><td>3450</td><td>female</td><td>2007</td></tr><tr><td>Adelie</td><td>Torgersen</td><td>34.1</td><td>18.1</td><td>193</td><td>3475</td><td><i>(nil)</i></td><td>2007</td></tr><tr><td>Adelie</td><td>Torgersen</td><td>37.8</td><td>17.1</td><td>186</td><td>3300</td><td><i>(nil)</i></td><td>2007</td></tr><tr><td colspan='8'>⋮</td></tr><tr><td>Gentoo</td><td>Biscoe</td><td><i>(nil)</i></td><td><i>(nil)</i></td><td><i>(nil)</i></td><td><i>(nil)</i></td><td><i>(nil)</i></td><td>2009</td></tr><tr><td>Gentoo</td><td>Biscoe</td><td>50.4</td><td>15.7</td><td>222</td><td>5750</td><td>male</td><td>2009</td></tr><tr><td>Gentoo</td><td>Biscoe</td><td>49.9</td><td>16.1</td><td>213</td><td>5400</td><td>male</td><td>2009</td></tr></table>"
|
2871
|
+
],
|
2872
|
+
"text/plain": [
|
2873
|
+
"#<RedAmber::DataFrame : 140 x 8 Vectors, 0x000000000000fbcc>\n",
|
2874
|
+
" species island bill_length_mm bill_depth_mm flipper_length_mm ... year\n",
|
2875
|
+
" <string> <string> <double> <double> <uint8> ... <uint16>\n",
|
2876
|
+
" 1 Adelie Torgersen (nil) (nil) (nil) ... 2007\n",
|
2877
|
+
" 2 Adelie Torgersen 36.7 19.3 193 ... 2007\n",
|
2878
|
+
" 3 Adelie Torgersen 34.1 18.1 193 ... 2007\n",
|
2879
|
+
" 4 Adelie Torgersen 37.8 17.1 186 ... 2007\n",
|
2880
|
+
" 5 Adelie Torgersen 37.8 17.3 180 ... 2007\n",
|
2881
|
+
" : : : : : : ... :\n",
|
2882
|
+
"138 Gentoo Biscoe (nil) (nil) (nil) ... 2009\n",
|
2883
|
+
"139 Gentoo Biscoe 50.4 15.7 222 ... 2009\n",
|
2884
|
+
"140 Gentoo Biscoe 49.9 16.1 213 ... 2009\n"
|
2885
|
+
]
|
2886
|
+
},
|
2887
|
+
"execution_count": 79,
|
2888
|
+
"metadata": {},
|
2889
|
+
"output_type": "execute_result"
|
2890
|
+
}
|
2891
|
+
],
|
2892
|
+
"source": [
|
2893
|
+
"# Remove data in 2*std range around mean\n",
|
2894
|
+
"penguins.remove do\n",
|
2895
|
+
" vector = self[:bill_length_mm]\n",
|
2896
|
+
" min = vector.mean - vector.std\n",
|
2897
|
+
" max = vector.mean + vector.std\n",
|
2898
|
+
" vector.to_a.map { |e| (min..max).include? e }\n",
|
2899
|
+
"end"
|
2900
|
+
]
|
2901
|
+
},
|
2902
|
+
{
|
2903
|
+
"cell_type": "markdown",
|
2904
|
+
"id": "591e6b22-da98-4336-b22e-c7bc9bcf2ebf",
|
2905
|
+
"metadata": {},
|
2906
|
+
"source": [
|
2907
|
+
"## 31. Remove and nil"
|
2908
|
+
]
|
2909
|
+
},
|
2910
|
+
{
|
2911
|
+
"cell_type": "markdown",
|
2912
|
+
"id": "67926d1b-c76e-4cb7-b679-6545d850e7e4",
|
2913
|
+
"metadata": {},
|
2914
|
+
"source": [
|
2915
|
+
"When `remove` used with booleans, nil in booleans is treated as false. This behavior is aligned with Ruby's `nil#!`."
|
2916
|
+
]
|
2917
|
+
},
|
2918
|
+
{
|
2919
|
+
"cell_type": "code",
|
2920
|
+
"execution_count": 80,
|
2921
|
+
"id": "8575614e-f702-4ee4-ac7b-745e9b32e803",
|
2922
|
+
"metadata": {},
|
2923
|
+
"outputs": [
|
2924
|
+
{
|
2925
|
+
"data": {
|
2926
|
+
"text/html": [
|
2927
|
+
"RedAmber::DataFrame <3 x 3 vectors> <table><tr><th>a</th><th>b</th><th>c</th></tr><tr><td>1</td><td>A</td><td>1.0</td></tr><tr><td>2</td><td>B</td><td>2.0</td></tr><tr><td><i>(nil)</i></td><td>C</td><td>3.0</td></tr></table>"
|
2928
|
+
],
|
2929
|
+
"text/plain": [
|
2930
|
+
"#<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fbe0>\n",
|
2931
|
+
" a b c\n",
|
2932
|
+
" <uint8> <string> <double>\n",
|
2933
|
+
"1 1 A 1.0\n",
|
2934
|
+
"2 2 B 2.0\n",
|
2935
|
+
"3 (nil) C 3.0\n"
|
2936
|
+
]
|
2937
|
+
},
|
2938
|
+
"execution_count": 80,
|
2939
|
+
"metadata": {},
|
2940
|
+
"output_type": "execute_result"
|
2941
|
+
}
|
2942
|
+
],
|
2943
|
+
"source": [
|
2944
|
+
"df = RedAmber::DataFrame.new(a: [1, 2, nil], b: %w[A B C], c: [1.0, 2, 3])"
|
2945
|
+
]
|
2946
|
+
},
|
2947
|
+
{
|
2948
|
+
"cell_type": "code",
|
2949
|
+
"execution_count": 81,
|
2950
|
+
"id": "932a5e71-8cef-44e5-a789-ce97329bc001",
|
2951
|
+
"metadata": {},
|
2952
|
+
"outputs": [
|
2953
|
+
{
|
2954
|
+
"data": {
|
2955
|
+
"text/plain": [
|
2956
|
+
"#<RedAmber::Vector(:boolean, size=3):0x000000000000fbf4>\n",
|
2957
|
+
"[true, false, nil]\n"
|
2958
|
+
]
|
2959
|
+
},
|
2960
|
+
"execution_count": 81,
|
2961
|
+
"metadata": {},
|
2962
|
+
"output_type": "execute_result"
|
2963
|
+
}
|
2964
|
+
],
|
2965
|
+
"source": [
|
2966
|
+
"booleans = df[:a] < 2"
|
2967
|
+
]
|
2968
|
+
},
|
2969
|
+
{
|
2970
|
+
"cell_type": "code",
|
2971
|
+
"execution_count": 82,
|
2972
|
+
"id": "74cf6aa6-8913-433d-97ad-bba2d548afe5",
|
2973
|
+
"metadata": {},
|
2974
|
+
"outputs": [
|
2975
|
+
{
|
2976
|
+
"data": {
|
2977
|
+
"text/plain": [
|
2978
|
+
"[false, true, true]"
|
2979
|
+
]
|
2980
|
+
},
|
2981
|
+
"execution_count": 82,
|
2982
|
+
"metadata": {},
|
2983
|
+
"output_type": "execute_result"
|
2984
|
+
}
|
2985
|
+
],
|
2986
|
+
"source": [
|
2987
|
+
"booleans_invert = booleans.to_a.map(&:!)"
|
2988
|
+
]
|
2989
|
+
},
|
2990
|
+
{
|
2991
|
+
"cell_type": "code",
|
2992
|
+
"execution_count": 83,
|
2993
|
+
"id": "5e466a06-cb17-4dc1-a5b0-34bfd3ffb78b",
|
2994
|
+
"metadata": {},
|
2995
|
+
"outputs": [
|
2996
|
+
{
|
2997
|
+
"data": {
|
2998
|
+
"text/plain": [
|
2999
|
+
"true"
|
3000
|
+
]
|
3001
|
+
},
|
3002
|
+
"execution_count": 83,
|
3003
|
+
"metadata": {},
|
3004
|
+
"output_type": "execute_result"
|
3005
|
+
}
|
3006
|
+
],
|
3007
|
+
"source": [
|
3008
|
+
"df.slice(booleans) == df.remove(booleans_invert)"
|
3009
|
+
]
|
3010
|
+
},
|
3011
|
+
{
|
3012
|
+
"cell_type": "markdown",
|
3013
|
+
"id": "8bca0b06-2d08-4c28-8b4c-4fd088f2d2d3",
|
3014
|
+
"metadata": {},
|
3015
|
+
"source": [
|
3016
|
+
"Whereas `Vector#invert` returns nil for elements nil. This will bring different result. (See #26)"
|
3017
|
+
]
|
3018
|
+
},
|
3019
|
+
{
|
3020
|
+
"cell_type": "code",
|
3021
|
+
"execution_count": 84,
|
3022
|
+
"id": "077b216f-0a08-413e-95c9-12789d15a9ba",
|
3023
|
+
"metadata": {},
|
3024
|
+
"outputs": [
|
3025
|
+
{
|
3026
|
+
"data": {
|
3027
|
+
"text/plain": [
|
3028
|
+
"#<RedAmber::Vector(:boolean, size=3):0x000000000000fc08>\n",
|
3029
|
+
"[false, true, nil]\n"
|
3030
|
+
]
|
3031
|
+
},
|
3032
|
+
"execution_count": 84,
|
3033
|
+
"metadata": {},
|
3034
|
+
"output_type": "execute_result"
|
3035
|
+
}
|
3036
|
+
],
|
3037
|
+
"source": [
|
3038
|
+
"booleans.invert"
|
3039
|
+
]
|
3040
|
+
},
|
3041
|
+
{
|
3042
|
+
"cell_type": "code",
|
3043
|
+
"execution_count": 85,
|
3044
|
+
"id": "b3df62a6-c4a3-44cb-bde6-f6be12b120c8",
|
3045
|
+
"metadata": {},
|
3046
|
+
"outputs": [
|
3047
|
+
{
|
3048
|
+
"data": {
|
3049
|
+
"text/html": [
|
3050
|
+
"RedAmber::DataFrame <2 x 3 vectors> <table><tr><th>a</th><th>b</th><th>c</th></tr><tr><td>1</td><td>A</td><td>1.0</td></tr><tr><td><i>(nil)</i></td><td>C</td><td>3.0</td></tr></table>"
|
3051
|
+
],
|
3052
|
+
"text/plain": [
|
3053
|
+
"#<RedAmber::DataFrame : 2 x 3 Vectors, 0x000000000000fc1c>\n",
|
3054
|
+
" a b c\n",
|
3055
|
+
" <uint8> <string> <double>\n",
|
3056
|
+
"1 1 A 1.0\n",
|
3057
|
+
"2 (nil) C 3.0\n"
|
3058
|
+
]
|
3059
|
+
},
|
3060
|
+
"execution_count": 85,
|
3061
|
+
"metadata": {},
|
3062
|
+
"output_type": "execute_result"
|
3063
|
+
}
|
3064
|
+
],
|
3065
|
+
"source": [
|
3066
|
+
"df.remove(booleans.invert)"
|
3067
|
+
]
|
3068
|
+
},
|
3069
|
+
{
|
3070
|
+
"cell_type": "markdown",
|
3071
|
+
"id": "e05f00b6-3bae-4650-8bbc-d4e0692f6f85",
|
3072
|
+
"metadata": {},
|
3073
|
+
"source": [
|
3074
|
+
"We have `#primitive_invert` method in Vector. This method returns the same result as `.to_a.map(&:!)` above."
|
3075
|
+
]
|
3076
|
+
},
|
3077
|
+
{
|
3078
|
+
"cell_type": "code",
|
3079
|
+
"execution_count": 86,
|
3080
|
+
"id": "296ca3cd-a6da-4603-a576-d8c36a810e4f",
|
3081
|
+
"metadata": {},
|
3082
|
+
"outputs": [
|
3083
|
+
{
|
3084
|
+
"data": {
|
3085
|
+
"text/plain": [
|
3086
|
+
"#<RedAmber::Vector(:boolean, size=3):0x000000000000fc30>\n",
|
3087
|
+
"[false, true, true]\n"
|
3088
|
+
]
|
3089
|
+
},
|
3090
|
+
"execution_count": 86,
|
3091
|
+
"metadata": {},
|
3092
|
+
"output_type": "execute_result"
|
3093
|
+
}
|
3094
|
+
],
|
3095
|
+
"source": [
|
3096
|
+
"booleans.primitive_invert"
|
3097
|
+
]
|
3098
|
+
},
|
3099
|
+
{
|
3100
|
+
"cell_type": "code",
|
3101
|
+
"execution_count": 87,
|
3102
|
+
"id": "ba5b8c0b-b94e-4209-adcd-258ea3b87bfd",
|
3103
|
+
"metadata": {},
|
3104
|
+
"outputs": [
|
3105
|
+
{
|
3106
|
+
"data": {
|
3107
|
+
"text/html": [
|
3108
|
+
"RedAmber::DataFrame <1 x 3 vectors> <table><tr><th>a</th><th>b</th><th>c</th></tr><tr><td>1</td><td>A</td><td>1.0</td></tr></table>"
|
3109
|
+
],
|
3110
|
+
"text/plain": [
|
3111
|
+
"#<RedAmber::DataFrame : 1 x 3 Vectors, 0x000000000000fc44>\n",
|
3112
|
+
" a b c\n",
|
3113
|
+
" <uint8> <string> <double>\n",
|
3114
|
+
"1 1 A 1.0\n"
|
3115
|
+
]
|
3116
|
+
},
|
3117
|
+
"execution_count": 87,
|
3118
|
+
"metadata": {},
|
3119
|
+
"output_type": "execute_result"
|
3120
|
+
}
|
3121
|
+
],
|
3122
|
+
"source": [
|
3123
|
+
"df.remove(booleans.primitive_invert)"
|
3124
|
+
]
|
3125
|
+
},
|
3126
|
+
{
|
3127
|
+
"cell_type": "code",
|
3128
|
+
"execution_count": 88,
|
3129
|
+
"id": "2446792f-0b0a-4642-acae-b4fec89261c1",
|
3130
|
+
"metadata": {},
|
3131
|
+
"outputs": [
|
3132
|
+
{
|
3133
|
+
"data": {
|
3134
|
+
"text/plain": [
|
3135
|
+
"true"
|
3136
|
+
]
|
3137
|
+
},
|
3138
|
+
"execution_count": 88,
|
3139
|
+
"metadata": {},
|
3140
|
+
"output_type": "execute_result"
|
3141
|
+
}
|
3142
|
+
],
|
3143
|
+
"source": [
|
3144
|
+
"df.slice(booleans) == df.remove(booleans.primitive_invert)"
|
3145
|
+
]
|
3146
|
+
},
|
3147
|
+
{
|
3148
|
+
"cell_type": "markdown",
|
3149
|
+
"id": "7c23a4ad-0c17-4178-b58a-abfd8153d49b",
|
3150
|
+
"metadata": {},
|
3151
|
+
"source": [
|
3152
|
+
"## 32. Remove nil"
|
3153
|
+
]
|
3154
|
+
},
|
3155
|
+
{
|
3156
|
+
"cell_type": "markdown",
|
3157
|
+
"id": "84c7238b-1029-416f-b495-9d045f77b22c",
|
3158
|
+
"metadata": {},
|
3159
|
+
"source": [
|
3160
|
+
"Remove any observations containing nil."
|
3161
|
+
]
|
3162
|
+
},
|
3163
|
+
{
|
3164
|
+
"cell_type": "code",
|
3165
|
+
"execution_count": 89,
|
3166
|
+
"id": "de4bb615-d14d-4c90-ab54-db2f375b9f00",
|
3167
|
+
"metadata": {},
|
3168
|
+
"outputs": [
|
3169
|
+
{
|
3170
|
+
"data": {
|
3171
|
+
"text/html": [
|
3172
|
+
"RedAmber::DataFrame <333 x 8 vectors> <table><tr><th>species</th><th>island</th><th>bill_length_mm</th><th>bill_depth_mm</th><th>flipper_length_mm</th><th>body_mass_g</th><th>sex</th><th>year</th></tr><tr><td>Adelie</td><td>Torgersen</td><td>39.1</td><td>18.7</td><td>181</td><td>3750</td><td>male</td><td>2007</td></tr><tr><td>Adelie</td><td>Torgersen</td><td>39.5</td><td>17.4</td><td>186</td><td>3800</td><td>female</td><td>2007</td></tr><tr><td>Adelie</td><td>Torgersen</td><td>40.3</td><td>18.0</td><td>195</td><td>3250</td><td>female</td><td>2007</td></tr><tr><td>Adelie</td><td>Torgersen</td><td>36.7</td><td>19.3</td><td>193</td><td>3450</td><td>female</td><td>2007</td></tr><tr><td colspan='8'>⋮</td></tr><tr><td>Gentoo</td><td>Biscoe</td><td>50.4</td><td>15.7</td><td>222</td><td>5750</td><td>male</td><td>2009</td></tr><tr><td>Gentoo</td><td>Biscoe</td><td>45.2</td><td>14.8</td><td>212</td><td>5200</td><td>female</td><td>2009</td></tr><tr><td>Gentoo</td><td>Biscoe</td><td>49.9</td><td>16.1</td><td>213</td><td>5400</td><td>male</td><td>2009</td></tr></table>"
|
3173
|
+
],
|
3174
|
+
"text/plain": [
|
3175
|
+
"#<RedAmber::DataFrame : 333 x 8 Vectors, 0x000000000000fc58>\n",
|
3176
|
+
" species island bill_length_mm bill_depth_mm flipper_length_mm ... year\n",
|
3177
|
+
" <string> <string> <double> <double> <uint8> ... <uint16>\n",
|
3178
|
+
" 1 Adelie Torgersen 39.1 18.7 181 ... 2007\n",
|
3179
|
+
" 2 Adelie Torgersen 39.5 17.4 186 ... 2007\n",
|
3180
|
+
" 3 Adelie Torgersen 40.3 18.0 195 ... 2007\n",
|
3181
|
+
" 4 Adelie Torgersen 36.7 19.3 193 ... 2007\n",
|
3182
|
+
" 5 Adelie Torgersen 39.3 20.6 190 ... 2007\n",
|
3183
|
+
" : : : : : : ... :\n",
|
3184
|
+
"331 Gentoo Biscoe 50.4 15.7 222 ... 2009\n",
|
3185
|
+
"332 Gentoo Biscoe 45.2 14.8 212 ... 2009\n",
|
3186
|
+
"333 Gentoo Biscoe 49.9 16.1 213 ... 2009\n"
|
3187
|
+
]
|
3188
|
+
},
|
3189
|
+
"execution_count": 89,
|
3190
|
+
"metadata": {},
|
3191
|
+
"output_type": "execute_result"
|
3192
|
+
}
|
3193
|
+
],
|
3194
|
+
"source": [
|
3195
|
+
"penguins.remove_nil"
|
3196
|
+
]
|
3197
|
+
},
|
3198
|
+
{
|
3199
|
+
"cell_type": "markdown",
|
3200
|
+
"id": "4a4ae8f9-dcf8-4dad-bb77-af076e9cadb5",
|
3201
|
+
"metadata": {},
|
3202
|
+
"source": [
|
3203
|
+
"The roundabout way for this is to use `#remove`."
|
3204
|
+
]
|
3205
|
+
},
|
3206
|
+
{
|
3207
|
+
"cell_type": "code",
|
3208
|
+
"execution_count": 90,
|
3209
|
+
"id": "27a3da5f-0ea2-4c5d-a6c3-c0e20f2224a3",
|
3210
|
+
"metadata": {},
|
3211
|
+
"outputs": [
|
3212
|
+
{
|
3213
|
+
"data": {
|
3214
|
+
"text/html": [
|
3215
|
+
"RedAmber::DataFrame <333 x 8 vectors> <table><tr><th>species</th><th>island</th><th>bill_length_mm</th><th>bill_depth_mm</th><th>flipper_length_mm</th><th>body_mass_g</th><th>sex</th><th>year</th></tr><tr><td>Adelie</td><td>Torgersen</td><td>39.1</td><td>18.7</td><td>181</td><td>3750</td><td>male</td><td>2007</td></tr><tr><td>Adelie</td><td>Torgersen</td><td>39.5</td><td>17.4</td><td>186</td><td>3800</td><td>female</td><td>2007</td></tr><tr><td>Adelie</td><td>Torgersen</td><td>40.3</td><td>18.0</td><td>195</td><td>3250</td><td>female</td><td>2007</td></tr><tr><td>Adelie</td><td>Torgersen</td><td>36.7</td><td>19.3</td><td>193</td><td>3450</td><td>female</td><td>2007</td></tr><tr><td colspan='8'>⋮</td></tr><tr><td>Gentoo</td><td>Biscoe</td><td>50.4</td><td>15.7</td><td>222</td><td>5750</td><td>male</td><td>2009</td></tr><tr><td>Gentoo</td><td>Biscoe</td><td>45.2</td><td>14.8</td><td>212</td><td>5200</td><td>female</td><td>2009</td></tr><tr><td>Gentoo</td><td>Biscoe</td><td>49.9</td><td>16.1</td><td>213</td><td>5400</td><td>male</td><td>2009</td></tr></table>"
|
3216
|
+
],
|
3217
|
+
"text/plain": [
|
3218
|
+
"#<RedAmber::DataFrame : 333 x 8 Vectors, 0x000000000000fc6c>\n",
|
3219
|
+
" species island bill_length_mm bill_depth_mm flipper_length_mm ... year\n",
|
3220
|
+
" <string> <string> <double> <double> <uint8> ... <uint16>\n",
|
3221
|
+
" 1 Adelie Torgersen 39.1 18.7 181 ... 2007\n",
|
3222
|
+
" 2 Adelie Torgersen 39.5 17.4 186 ... 2007\n",
|
3223
|
+
" 3 Adelie Torgersen 40.3 18.0 195 ... 2007\n",
|
3224
|
+
" 4 Adelie Torgersen 36.7 19.3 193 ... 2007\n",
|
3225
|
+
" 5 Adelie Torgersen 39.3 20.6 190 ... 2007\n",
|
3226
|
+
" : : : : : : ... :\n",
|
3227
|
+
"331 Gentoo Biscoe 50.4 15.7 222 ... 2009\n",
|
3228
|
+
"332 Gentoo Biscoe 45.2 14.8 212 ... 2009\n",
|
3229
|
+
"333 Gentoo Biscoe 49.9 16.1 213 ... 2009\n"
|
3230
|
+
]
|
3231
|
+
},
|
3232
|
+
"execution_count": 90,
|
3233
|
+
"metadata": {},
|
3234
|
+
"output_type": "execute_result"
|
3235
|
+
}
|
3236
|
+
],
|
3237
|
+
"source": [
|
3238
|
+
"penguins.remove { vectors.map(&:is_nil).reduce(&:|) }"
|
3239
|
+
]
|
3240
|
+
},
|
3241
|
+
{
|
3242
|
+
"cell_type": "markdown",
|
3243
|
+
"id": "4f2a58fd-f033-44f6-9eb4-ed893a2b5d1d",
|
3244
|
+
"metadata": {},
|
3245
|
+
"source": [
|
3246
|
+
"## 33. Rename"
|
3247
|
+
]
|
3248
|
+
},
|
3249
|
+
{
|
3250
|
+
"cell_type": "markdown",
|
3251
|
+
"id": "c0d39506-8ae5-48e7-9dd2-acf38d4ec1a9",
|
3252
|
+
"metadata": {},
|
3253
|
+
"source": [
|
3254
|
+
"Rename keys (column names) to create a updated DataFrame."
|
3255
|
+
]
|
3256
|
+
},
|
3257
|
+
{
|
3258
|
+
"cell_type": "markdown",
|
3259
|
+
"id": "3f6924ec-e86c-4089-ae40-6783027d3ce0",
|
3260
|
+
"metadata": {},
|
3261
|
+
"source": [
|
3262
|
+
"`#rename(key_pairs)` accepts key_pairs as arguments. key_pairs should be a Hash of `{existing_key => new_key}` ."
|
3263
|
+
]
|
3264
|
+
},
|
3265
|
+
{
|
3266
|
+
"cell_type": "code",
|
3267
|
+
"execution_count": 91,
|
3268
|
+
"id": "9396c96d-83d7-4b92-a4ca-27bc9e4d7b9d",
|
3269
|
+
"metadata": {},
|
3270
|
+
"outputs": [
|
3271
|
+
{
|
3272
|
+
"data": {
|
3273
|
+
"text/html": [
|
3274
|
+
"RedAmber::DataFrame <3 x 2 vectors> <table><tr><th>name</th><th>age</th></tr><tr><td>Yasuko</td><td>68</td></tr><tr><td>Rui</td><td>49</td></tr><tr><td>Hinata</td><td>28</td></tr></table>"
|
3275
|
+
],
|
3276
|
+
"text/plain": [
|
3277
|
+
"#<RedAmber::DataFrame : 3 x 2 Vectors, 0x000000000000fc80>\n",
|
3278
|
+
" name age\n",
|
3279
|
+
" <string> <uint8>\n",
|
3280
|
+
"1 Yasuko 68\n",
|
3281
|
+
"2 Rui 49\n",
|
3282
|
+
"3 Hinata 28\n"
|
3283
|
+
]
|
3284
|
+
},
|
3285
|
+
"execution_count": 91,
|
3286
|
+
"metadata": {},
|
3287
|
+
"output_type": "execute_result"
|
3288
|
+
}
|
3289
|
+
],
|
3290
|
+
"source": [
|
3291
|
+
"h = { name: %w[Yasuko Rui Hinata], age: [68, 49, 28] }\n",
|
3292
|
+
"comecome = RedAmber::DataFrame.new(h)"
|
3293
|
+
]
|
3294
|
+
},
|
3295
|
+
{
|
3296
|
+
"cell_type": "code",
|
3297
|
+
"execution_count": 92,
|
3298
|
+
"id": "fad279c6-1ca0-4493-bd69-0e9ef011bff7",
|
3299
|
+
"metadata": {},
|
3300
|
+
"outputs": [
|
3301
|
+
{
|
3302
|
+
"data": {
|
3303
|
+
"text/html": [
|
3304
|
+
"RedAmber::DataFrame <3 x 2 vectors> <table><tr><th>name</th><th>age_in_1993</th></tr><tr><td>Yasuko</td><td>68</td></tr><tr><td>Rui</td><td>49</td></tr><tr><td>Hinata</td><td>28</td></tr></table>"
|
3305
|
+
],
|
3306
|
+
"text/plain": [
|
3307
|
+
"#<RedAmber::DataFrame : 3 x 2 Vectors, 0x000000000000fc94>\n",
|
3308
|
+
" name age_in_1993\n",
|
3309
|
+
" <string> <uint8>\n",
|
3310
|
+
"1 Yasuko 68\n",
|
3311
|
+
"2 Rui 49\n",
|
3312
|
+
"3 Hinata 28\n"
|
3313
|
+
]
|
3314
|
+
},
|
3315
|
+
"execution_count": 92,
|
3316
|
+
"metadata": {},
|
3317
|
+
"output_type": "execute_result"
|
3318
|
+
}
|
3319
|
+
],
|
3320
|
+
"source": [
|
3321
|
+
"comecome.rename(:age => :age_in_1993)"
|
3322
|
+
]
|
3323
|
+
},
|
3324
|
+
{
|
3325
|
+
"cell_type": "markdown",
|
3326
|
+
"id": "9dabb005-9822-4c4b-aaa5-fa6f28f2ed43",
|
3327
|
+
"metadata": {},
|
3328
|
+
"source": [
|
3329
|
+
"`#rename {block}` is also acceptable. We can't use both arguments and a block at a same time. The block should return key_pairs as a Hash of {existing_key => new_key}. Block is called in the context of self."
|
3330
|
+
]
|
3331
|
+
},
|
3332
|
+
{
|
3333
|
+
"cell_type": "markdown",
|
3334
|
+
"id": "aabbba20-6ef8-4da2-8dc0-0cb243cf3b23",
|
3335
|
+
"metadata": {},
|
3336
|
+
"source": [
|
3337
|
+
"Symbol key and String key are distinguished."
|
3338
|
+
]
|
3339
|
+
},
|
3340
|
+
{
|
3341
|
+
"cell_type": "markdown",
|
3342
|
+
"id": "07f98b31-6123-4466-b4f8-f995c7cde474",
|
3343
|
+
"metadata": {},
|
3344
|
+
"source": [
|
3345
|
+
"## 34. Assign"
|
3346
|
+
]
|
3347
|
+
},
|
3348
|
+
{
|
3349
|
+
"cell_type": "markdown",
|
3350
|
+
"id": "99f6787f-2b36-4360-b155-1c2d7874d25e",
|
3351
|
+
"metadata": {},
|
3352
|
+
"source": [
|
3353
|
+
"Assign new or updated columns (variables) and create a updated DataFrame.\n",
|
3354
|
+
"\n",
|
3355
|
+
"- Columns with new keys will append new variables at right (bottom in TDR).\n",
|
3356
|
+
"- Columns with exisiting keys will update corresponding vectors."
|
3357
|
+
]
|
3358
|
+
},
|
3359
|
+
{
|
3360
|
+
"cell_type": "markdown",
|
3361
|
+
"id": "b4b22da0-4ee2-4196-88e1-1cfea6a72f4d",
|
3362
|
+
"metadata": {},
|
3363
|
+
"source": [
|
3364
|
+
"`#assign(key_pairs)` accepts pairs of key and values as arguments. key_pairs should be a Hash of `{key => array}` or `{key => Vector}` ."
|
3365
|
+
]
|
3366
|
+
},
|
3367
|
+
{
|
3368
|
+
"cell_type": "code",
|
3369
|
+
"execution_count": 93,
|
3370
|
+
"id": "56dcfed8-a6f9-4d8c-bac3-e8ce7c0674a7",
|
3371
|
+
"metadata": {},
|
3372
|
+
"outputs": [
|
3373
|
+
{
|
3374
|
+
"data": {
|
3375
|
+
"text/html": [
|
3376
|
+
"RedAmber::DataFrame <3 x 2 vectors> <table><tr><th>name</th><th>age</th></tr><tr><td>Yasuko</td><td>68</td></tr><tr><td>Rui</td><td>49</td></tr><tr><td>Hinata</td><td>28</td></tr></table>"
|
3377
|
+
],
|
3378
|
+
"text/plain": [
|
3379
|
+
"#<RedAmber::DataFrame : 3 x 2 Vectors, 0x000000000000fca8>\n",
|
3380
|
+
" name age\n",
|
3381
|
+
" <string> <uint8>\n",
|
3382
|
+
"1 Yasuko 68\n",
|
3383
|
+
"2 Rui 49\n",
|
3384
|
+
"3 Hinata 28\n"
|
3385
|
+
]
|
3386
|
+
},
|
3387
|
+
"execution_count": 93,
|
3388
|
+
"metadata": {},
|
3389
|
+
"output_type": "execute_result"
|
3390
|
+
}
|
3391
|
+
],
|
3392
|
+
"source": [
|
3393
|
+
"comecome = RedAmber::DataFrame.new( name: %w[Yasuko Rui Hinata], age: [68, 49, 28] )"
|
3394
|
+
]
|
3395
|
+
},
|
3396
|
+
{
|
3397
|
+
"cell_type": "code",
|
3398
|
+
"execution_count": 94,
|
3399
|
+
"id": "8da8d282-8798-44d5-bb7b-7fa2df922308",
|
3400
|
+
"metadata": {},
|
3401
|
+
"outputs": [
|
3402
|
+
{
|
3403
|
+
"data": {
|
3404
|
+
"text/html": [
|
3405
|
+
"RedAmber::DataFrame <3 x 3 vectors> <table><tr><th>name</th><th>age</th><th>brother</th></tr><tr><td>Yasuko</td><td>97</td><td>Santa</td></tr><tr><td>Rui</td><td>78</td><td><i>(nil)</i></td></tr><tr><td>Hinata</td><td>57</td><td>Momotaro</td></tr></table>"
|
3406
|
+
],
|
3407
|
+
"text/plain": [
|
3408
|
+
"#<RedAmber::DataFrame : 3 x 3 Vectors, 0x000000000000fcbc>\n",
|
3409
|
+
" name age brother\n",
|
3410
|
+
" <string> <uint8> <string>\n",
|
3411
|
+
"1 Yasuko 97 Santa\n",
|
3412
|
+
"2 Rui 78 (nil)\n",
|
3413
|
+
"3 Hinata 57 Momotaro\n"
|
3414
|
+
]
|
3415
|
+
},
|
3416
|
+
"execution_count": 94,
|
3417
|
+
"metadata": {},
|
3418
|
+
"output_type": "execute_result"
|
3419
|
+
}
|
3420
|
+
],
|
3421
|
+
"source": [
|
3422
|
+
"# update :age and add :brother\n",
|
3423
|
+
"assigner = { age: [97, 78, 57], brother: ['Santa', nil, 'Momotaro'] }\n",
|
3424
|
+
"comecome.assign(assigner)"
|
3425
|
+
]
|
3426
|
+
},
|
3427
|
+
{
|
3428
|
+
"cell_type": "markdown",
|
3429
|
+
"id": "e6d3ddfc-b16d-4b20-83df-357e9cdb32e6",
|
3430
|
+
"metadata": {},
|
3431
|
+
"source": [
|
3432
|
+
"`#assign {block}` is also acceptable. We can't use both arguments and a block at a same time. The block should return pairs of key and values as a Hash of `{key => array}` or `{key => Vector}`. Block is called in the context of self."
|
3433
|
+
]
|
3434
|
+
},
|
3435
|
+
{
|
3436
|
+
"cell_type": "code",
|
3437
|
+
"execution_count": 95,
|
3438
|
+
"id": "8d69edd0-7ad7-4318-8033-1785ce2543db",
|
3439
|
+
"metadata": {},
|
3440
|
+
"outputs": [
|
3441
|
+
{
|
3442
|
+
"data": {
|
3443
|
+
"text/html": [
|
3444
|
+
"RedAmber::DataFrame <5 x 3 vectors> <table><tr><th>index</th><th>float</th><th>string</th></tr><tr><td>0</td><td>0.0</td><td>A</td></tr><tr><td>1</td><td>1.1</td><td>B</td></tr><tr><td>2</td><td>2.2</td><td>C</td></tr><tr><td>3</td><td>NaN</td><td>D</td></tr><tr><td><i>(nil)</i></td><td><i>(nil)</i></td><td><i>(nil)</i></td></tr></table>"
|
3445
|
+
],
|
3446
|
+
"text/plain": [
|
3447
|
+
"#<RedAmber::DataFrame : 5 x 3 Vectors, 0x000000000000fcd0>\n",
|
3448
|
+
" index float string\n",
|
3449
|
+
" <uint8> <double> <string>\n",
|
3450
|
+
"1 0 0.0 A\n",
|
3451
|
+
"2 1 1.1 B\n",
|
3452
|
+
"3 2 2.2 C\n",
|
3453
|
+
"4 3 NaN D\n",
|
3454
|
+
"5 (nil) (nil) (nil)\n"
|
3455
|
+
]
|
3456
|
+
},
|
3457
|
+
"execution_count": 95,
|
3458
|
+
"metadata": {},
|
3459
|
+
"output_type": "execute_result"
|
3460
|
+
}
|
3461
|
+
],
|
3462
|
+
"source": [
|
3463
|
+
"df = RedAmber::DataFrame.new(\n",
|
3464
|
+
" index: [0, 1, 2, 3, nil],\n",
|
3465
|
+
" float: [0.0, 1.1, 2.2, Float::NAN, nil],\n",
|
3466
|
+
" string: ['A', 'B', 'C', 'D', nil])"
|
3467
|
+
]
|
3468
|
+
},
|
3469
|
+
{
|
3470
|
+
"cell_type": "code",
|
3471
|
+
"execution_count": 96,
|
3472
|
+
"id": "e884af01-d82b-42e7-8e92-62baf19919cb",
|
3473
|
+
"metadata": {},
|
3474
|
+
"outputs": [
|
3475
|
+
{
|
3476
|
+
"data": {
|
3477
|
+
"text/html": [
|
3478
|
+
"RedAmber::DataFrame <5 x 3 vectors> <table><tr><th>index</th><th>float</th><th>string</th></tr><tr><td>0</td><td>-0.0</td><td>A</td></tr><tr><td>255</td><td>-1.1</td><td>B</td></tr><tr><td>254</td><td>-2.2</td><td>C</td></tr><tr><td>253</td><td>NaN</td><td>D</td></tr><tr><td><i>(nil)</i></td><td><i>(nil)</i></td><td><i>(nil)</i></td></tr></table>"
|
3479
|
+
],
|
3480
|
+
"text/plain": [
|
3481
|
+
"#<RedAmber::DataFrame : 5 x 3 Vectors, 0x000000000000fce4>\n",
|
3482
|
+
" index float string\n",
|
3483
|
+
" <uint8> <double> <string>\n",
|
3484
|
+
"1 0 -0.0 A\n",
|
3485
|
+
"2 255 -1.1 B\n",
|
3486
|
+
"3 254 -2.2 C\n",
|
3487
|
+
"4 253 NaN D\n",
|
3488
|
+
"5 (nil) (nil) (nil)\n"
|
3489
|
+
]
|
3490
|
+
},
|
3491
|
+
"execution_count": 96,
|
3492
|
+
"metadata": {},
|
3493
|
+
"output_type": "execute_result"
|
3494
|
+
}
|
3495
|
+
],
|
3496
|
+
"source": [
|
3497
|
+
"# update numeric variables\n",
|
3498
|
+
"df.assign do\n",
|
3499
|
+
" assigner = {}\n",
|
3500
|
+
" vectors.each_with_index do |v, i|\n",
|
3501
|
+
" assigner[keys[i]] = -v if v.numeric?\n",
|
3502
|
+
" end\n",
|
3503
|
+
" assigner\n",
|
3504
|
+
"end"
|
3505
|
+
]
|
3506
|
+
},
|
3507
|
+
{
|
3508
|
+
"cell_type": "markdown",
|
3509
|
+
"id": "c5c83559-f4d8-4ed2-8b20-5c50eb1faa14",
|
3510
|
+
"metadata": {},
|
3511
|
+
"source": [
|
3512
|
+
"## 35. Coerce (Vector)"
|
3513
|
+
]
|
3514
|
+
},
|
3515
|
+
{
|
3516
|
+
"cell_type": "markdown",
|
3517
|
+
"id": "77bdfc69-b728-4335-b76e-e4be92f94310",
|
3518
|
+
"metadata": {},
|
3519
|
+
"source": [
|
3520
|
+
"Vector has coerce method."
|
3521
|
+
]
|
3522
|
+
},
|
3523
|
+
{
|
3524
|
+
"cell_type": "code",
|
3525
|
+
"execution_count": 97,
|
3526
|
+
"id": "2bfbe584-be54-486b-af32-e76b37c10e49",
|
3527
|
+
"metadata": {},
|
3528
|
+
"outputs": [
|
3529
|
+
{
|
3530
|
+
"data": {
|
3531
|
+
"text/plain": [
|
3532
|
+
"#<RedAmber::Vector(:uint8, size=3):0x000000000000fcf8>\n",
|
3533
|
+
"[1, 2, 3]\n"
|
3534
|
+
]
|
3535
|
+
},
|
3536
|
+
"execution_count": 97,
|
3537
|
+
"metadata": {},
|
3538
|
+
"output_type": "execute_result"
|
3539
|
+
}
|
3540
|
+
],
|
3541
|
+
"source": [
|
3542
|
+
"vector = RedAmber::Vector.new(1,2,3)"
|
3543
|
+
]
|
3544
|
+
},
|
3545
|
+
{
|
3546
|
+
"cell_type": "code",
|
3547
|
+
"execution_count": 98,
|
3548
|
+
"id": "ce35d901-38a8-4f13-b2d1-29b83f6c5438",
|
3549
|
+
"metadata": {},
|
3550
|
+
"outputs": [
|
3551
|
+
{
|
3552
|
+
"data": {
|
3553
|
+
"text/plain": [
|
3554
|
+
"#<RedAmber::Vector(:int16, size=3):0x000000000000fd0c>\n",
|
3555
|
+
"[-1, -2, -3]\n"
|
3556
|
+
]
|
3557
|
+
},
|
3558
|
+
"execution_count": 98,
|
3559
|
+
"metadata": {},
|
3560
|
+
"output_type": "execute_result"
|
3561
|
+
}
|
3562
|
+
],
|
3563
|
+
"source": [
|
3564
|
+
"# Vector's `#*` method\n",
|
3565
|
+
"vector * -1"
|
3566
|
+
]
|
3567
|
+
},
|
3568
|
+
{
|
3569
|
+
"cell_type": "code",
|
3570
|
+
"execution_count": 99,
|
3571
|
+
"id": "7d5fc2be-f590-4678-92e9-faa27b618266",
|
3572
|
+
"metadata": {},
|
3573
|
+
"outputs": [
|
3574
|
+
{
|
3575
|
+
"data": {
|
3576
|
+
"text/plain": [
|
3577
|
+
"#<RedAmber::Vector(:int16, size=3):0x000000000000fd20>\n",
|
3578
|
+
"[-1, -2, -3]\n"
|
3579
|
+
]
|
3580
|
+
},
|
3581
|
+
"execution_count": 99,
|
3582
|
+
"metadata": {},
|
3583
|
+
"output_type": "execute_result"
|
3584
|
+
}
|
3585
|
+
],
|
3586
|
+
"source": [
|
3587
|
+
"# coerced calculation\n",
|
3588
|
+
"-1 * vector"
|
3589
|
+
]
|
3590
|
+
},
|
3591
|
+
{
|
3592
|
+
"cell_type": "code",
|
3593
|
+
"execution_count": 100,
|
3594
|
+
"id": "fa90a6af-add7-42f2-9707-7d726575aeb6",
|
3595
|
+
"metadata": {},
|
3596
|
+
"outputs": [
|
3597
|
+
{
|
3598
|
+
"data": {
|
3599
|
+
"text/plain": [
|
3600
|
+
"#<RedAmber::Vector(:uint8, size=3):0x000000000000fd34>\n",
|
3601
|
+
"[255, 254, 253]\n"
|
3602
|
+
]
|
3603
|
+
},
|
3604
|
+
"execution_count": 100,
|
3605
|
+
"metadata": {},
|
3606
|
+
"output_type": "execute_result"
|
3607
|
+
}
|
3608
|
+
],
|
3609
|
+
"source": [
|
3610
|
+
"# `@-` operator\n",
|
3611
|
+
"-vector"
|
3612
|
+
]
|
3613
|
+
},
|
3614
|
+
{
|
3615
|
+
"cell_type": "markdown",
|
3616
|
+
"id": "4820b527-44e9-4738-aa0e-73604078b3b0",
|
3617
|
+
"metadata": {
|
3618
|
+
"tags": []
|
3619
|
+
},
|
3620
|
+
"source": [
|
3621
|
+
"## 36. to_ary (Vector)"
|
3622
|
+
]
|
3623
|
+
},
|
3624
|
+
{
|
3625
|
+
"cell_type": "markdown",
|
3626
|
+
"id": "8507dcc4-74e3-44ad-aa54-cf43d55f2131",
|
3627
|
+
"metadata": {},
|
3628
|
+
"source": [
|
3629
|
+
"`Vector#to_ary` will enable implicit conversion to an Array."
|
3630
|
+
]
|
3631
|
+
},
|
3632
|
+
{
|
3633
|
+
"cell_type": "code",
|
3634
|
+
"execution_count": 101,
|
3635
|
+
"id": "b12bd7c8-2981-426c-8ae3-154504a8ea15",
|
3636
|
+
"metadata": {},
|
3637
|
+
"outputs": [
|
3638
|
+
{
|
3639
|
+
"data": {
|
3640
|
+
"text/plain": [
|
3641
|
+
"[3, 4, 5]"
|
3642
|
+
]
|
3643
|
+
},
|
3644
|
+
"execution_count": 101,
|
3645
|
+
"metadata": {},
|
3646
|
+
"output_type": "execute_result"
|
3647
|
+
}
|
3648
|
+
],
|
3649
|
+
"source": [
|
3650
|
+
"Array(Vector.new([3, 4, 5]))"
|
3651
|
+
]
|
3652
|
+
},
|
3653
|
+
{
|
3654
|
+
"cell_type": "code",
|
3655
|
+
"execution_count": 102,
|
3656
|
+
"id": "c0cb5a98-7cdf-43a8-b2f7-f9df1961c761",
|
3657
|
+
"metadata": {},
|
3658
|
+
"outputs": [
|
3659
|
+
{
|
3660
|
+
"data": {
|
3661
|
+
"text/plain": [
|
3662
|
+
"[1, 2, 3, 4, 5]"
|
3663
|
+
]
|
3664
|
+
},
|
3665
|
+
"execution_count": 102,
|
3666
|
+
"metadata": {},
|
3667
|
+
"output_type": "execute_result"
|
3668
|
+
}
|
3669
|
+
],
|
3670
|
+
"source": [
|
3671
|
+
"[1, 2] + Vector.new([3, 4, 5])"
|
3672
|
+
]
|
3673
|
+
},
|
3674
|
+
{
|
3675
|
+
"cell_type": "markdown",
|
3676
|
+
"id": "216dde4f-e4d8-4f29-903a-8cbf75de5b8e",
|
3677
|
+
"metadata": {},
|
3678
|
+
"source": [
|
3679
|
+
"## 37. Fill nil (Vector)"
|
3680
|
+
]
|
3681
|
+
},
|
3682
|
+
{
|
3683
|
+
"cell_type": "markdown",
|
3684
|
+
"id": "1959d0d7-6d09-4fa5-9365-1e2f7fc35d61",
|
3685
|
+
"metadata": {},
|
3686
|
+
"source": [
|
3687
|
+
"`Vector#fill_nil_forward` or `Vector#fill_nil_backward` will\n",
|
3688
|
+
"propagate the last valid observation forward (or backward).\n",
|
3689
|
+
"Or preserve nil if all previous values are nil or at the end."
|
3690
|
+
]
|
3691
|
+
},
|
3692
|
+
{
|
3693
|
+
"cell_type": "code",
|
3694
|
+
"execution_count": 103,
|
3695
|
+
"id": "d003b06a-859f-4de0-9e35-803efac85169",
|
3696
|
+
"metadata": {},
|
3697
|
+
"outputs": [
|
3698
|
+
{
|
3699
|
+
"data": {
|
3700
|
+
"text/plain": [
|
3701
|
+
"#<RedAmber::Vector(:uint8, size=5):0x000000000000fd48>\n",
|
3702
|
+
"[0, 1, 1, 3, 3]\n"
|
3703
|
+
]
|
3704
|
+
},
|
3705
|
+
"execution_count": 103,
|
3706
|
+
"metadata": {},
|
3707
|
+
"output_type": "execute_result"
|
3708
|
+
}
|
3709
|
+
],
|
3710
|
+
"source": [
|
3711
|
+
"integer = Vector.new([0, 1, nil, 3, nil])\n",
|
3712
|
+
"integer.fill_nil_forward"
|
3713
|
+
]
|
3714
|
+
},
|
3715
|
+
{
|
3716
|
+
"cell_type": "code",
|
3717
|
+
"execution_count": 104,
|
3718
|
+
"id": "c5d74006-d364-4e86-8a5e-9e96e87a96e0",
|
3719
|
+
"metadata": {},
|
3720
|
+
"outputs": [
|
3721
|
+
{
|
3722
|
+
"data": {
|
3723
|
+
"text/plain": [
|
3724
|
+
"#<RedAmber::Vector(:uint8, size=5):0x000000000000fd5c>\n",
|
3725
|
+
"[0, 1, 3, 3, nil]\n"
|
3726
|
+
]
|
3727
|
+
},
|
3728
|
+
"execution_count": 104,
|
3729
|
+
"metadata": {},
|
3730
|
+
"output_type": "execute_result"
|
3731
|
+
}
|
3732
|
+
],
|
3733
|
+
"source": [
|
3734
|
+
"integer.fill_nil_backward"
|
3735
|
+
]
|
3736
|
+
},
|
3737
|
+
{
|
3738
|
+
"cell_type": "markdown",
|
3739
|
+
"id": "347785a6-eab0-4864-a871-2c320005211e",
|
3740
|
+
"metadata": {},
|
3741
|
+
"source": [
|
3742
|
+
"## 38. all?/any? (Vector)"
|
3743
|
+
]
|
3744
|
+
},
|
3745
|
+
{
|
3746
|
+
"cell_type": "markdown",
|
3747
|
+
"id": "f82a6f5d-03d3-4645-85f5-d25999165378",
|
3748
|
+
"metadata": {},
|
3749
|
+
"source": [
|
3750
|
+
"`Vector#all?` returns true if all elements is true.\n",
|
3751
|
+
"\n",
|
3752
|
+
"`Vector#any?` returns true if exists any true.\n",
|
3753
|
+
"\n",
|
3754
|
+
"These are unary aggregation function."
|
3755
|
+
]
|
3756
|
+
},
|
3757
|
+
{
|
3758
|
+
"cell_type": "code",
|
3759
|
+
"execution_count": 105,
|
3760
|
+
"id": "ebad37ad-0a09-48b1-ba3a-4e030a917837",
|
3761
|
+
"metadata": {},
|
3762
|
+
"outputs": [
|
3763
|
+
{
|
3764
|
+
"data": {
|
3765
|
+
"text/plain": [
|
3766
|
+
"true"
|
3767
|
+
]
|
3768
|
+
},
|
3769
|
+
"execution_count": 105,
|
3770
|
+
"metadata": {},
|
3771
|
+
"output_type": "execute_result"
|
3772
|
+
}
|
3773
|
+
],
|
3774
|
+
"source": [
|
3775
|
+
"booleans = Vector.new([true, true, nil])\n",
|
3776
|
+
"booleans.all?"
|
3777
|
+
]
|
3778
|
+
},
|
3779
|
+
{
|
3780
|
+
"cell_type": "code",
|
3781
|
+
"execution_count": 106,
|
3782
|
+
"id": "97fc24da-03d4-406d-b353-562896775d60",
|
3783
|
+
"metadata": {},
|
3784
|
+
"outputs": [
|
3785
|
+
{
|
3786
|
+
"data": {
|
3787
|
+
"text/plain": [
|
3788
|
+
"true"
|
3789
|
+
]
|
3790
|
+
},
|
3791
|
+
"execution_count": 106,
|
3792
|
+
"metadata": {},
|
3793
|
+
"output_type": "execute_result"
|
3794
|
+
}
|
3795
|
+
],
|
3796
|
+
"source": [
|
3797
|
+
"booleans.any?"
|
3798
|
+
]
|
3799
|
+
},
|
3800
|
+
{
|
3801
|
+
"cell_type": "markdown",
|
3802
|
+
"id": "0ff3b22d-9f7c-42f2-8d18-c89a06af681b",
|
3803
|
+
"metadata": {},
|
3804
|
+
"source": [
|
3805
|
+
"If these methods are used with option `skip_nulls: false` nil is considered."
|
3806
|
+
]
|
3807
|
+
},
|
3808
|
+
{
|
3809
|
+
"cell_type": "code",
|
3810
|
+
"execution_count": 107,
|
3811
|
+
"id": "3e0e5800-665a-4a05-b2cb-d152f3f077de",
|
3812
|
+
"metadata": {},
|
3813
|
+
"outputs": [
|
3814
|
+
{
|
3815
|
+
"data": {
|
3816
|
+
"text/plain": [
|
3817
|
+
"false"
|
3818
|
+
]
|
3819
|
+
},
|
3820
|
+
"execution_count": 107,
|
3821
|
+
"metadata": {},
|
3822
|
+
"output_type": "execute_result"
|
3823
|
+
}
|
3824
|
+
],
|
3825
|
+
"source": [
|
3826
|
+
"booleans.all?(skip_nulls: false)"
|
3827
|
+
]
|
3828
|
+
},
|
3829
|
+
{
|
3830
|
+
"cell_type": "code",
|
3831
|
+
"execution_count": 108,
|
3832
|
+
"id": "3e43f0c4-a254-4735-ac28-de14d2670c67",
|
3833
|
+
"metadata": {},
|
3834
|
+
"outputs": [
|
3835
|
+
{
|
3836
|
+
"data": {
|
3837
|
+
"text/plain": [
|
3838
|
+
"true"
|
3839
|
+
]
|
3840
|
+
},
|
3841
|
+
"execution_count": 108,
|
3842
|
+
"metadata": {},
|
3843
|
+
"output_type": "execute_result"
|
3844
|
+
}
|
3845
|
+
],
|
3846
|
+
"source": [
|
3847
|
+
"booleans.any?(skip_nulls: false)"
|
3848
|
+
]
|
3849
|
+
},
|
3850
|
+
{
|
3851
|
+
"cell_type": "markdown",
|
3852
|
+
"id": "abc71a85-7958-4a21-91cf-8c96c0784525",
|
3853
|
+
"metadata": {},
|
3854
|
+
"source": [
|
3855
|
+
"## 39. count/count_uniq (Vector)"
|
3856
|
+
]
|
3857
|
+
},
|
3858
|
+
{
|
3859
|
+
"cell_type": "markdown",
|
3860
|
+
"id": "3d556118-4105-4d12-806d-ba56c6ae3d1b",
|
3861
|
+
"metadata": {},
|
3862
|
+
"source": [
|
3863
|
+
"`Vector#count` counts element.\n",
|
3864
|
+
"\n",
|
3865
|
+
"`Vector#count_uniq` counts unique element. `#count_distinct` is an alias (Arrow's name).\n",
|
3866
|
+
"\n",
|
3867
|
+
"These are unary aggregation function."
|
3868
|
+
]
|
3869
|
+
},
|
3870
|
+
{
|
3871
|
+
"cell_type": "code",
|
3872
|
+
"execution_count": 109,
|
3873
|
+
"id": "2af73e32-1d7e-4f80-b54e-c40ef08b7034",
|
3874
|
+
"metadata": {},
|
3875
|
+
"outputs": [
|
3876
|
+
{
|
3877
|
+
"data": {
|
3878
|
+
"text/plain": [
|
3879
|
+
"3"
|
3880
|
+
]
|
3881
|
+
},
|
3882
|
+
"execution_count": 109,
|
3883
|
+
"metadata": {},
|
3884
|
+
"output_type": "execute_result"
|
3885
|
+
}
|
3886
|
+
],
|
3887
|
+
"source": [
|
3888
|
+
"string = Vector.new(%w[A B A])\n",
|
3889
|
+
"string.count"
|
3890
|
+
]
|
3891
|
+
},
|
3892
|
+
{
|
3893
|
+
"cell_type": "code",
|
3894
|
+
"execution_count": 110,
|
3895
|
+
"id": "fe6d8d85-27b0-438f-b1b4-1b15e9eb05f9",
|
3896
|
+
"metadata": {},
|
3897
|
+
"outputs": [
|
3898
|
+
{
|
3899
|
+
"data": {
|
3900
|
+
"text/plain": [
|
3901
|
+
"2"
|
3902
|
+
]
|
3903
|
+
},
|
3904
|
+
"execution_count": 110,
|
3905
|
+
"metadata": {},
|
3906
|
+
"output_type": "execute_result"
|
3907
|
+
}
|
3908
|
+
],
|
3909
|
+
"source": [
|
3910
|
+
"string.count_uniq # count_distinct is also OK"
|
3911
|
+
]
|
3912
|
+
},
|
3913
|
+
{
|
3914
|
+
"cell_type": "markdown",
|
3915
|
+
"id": "70abed9f-665a-4ea7-939e-4b185ee53755",
|
3916
|
+
"metadata": {},
|
3917
|
+
"source": [
|
3918
|
+
"## 40. stddev/variance (Vector)"
|
3919
|
+
]
|
3920
|
+
},
|
3921
|
+
{
|
3922
|
+
"cell_type": "markdown",
|
3923
|
+
"id": "965de338-b3be-4d33-92e1-5ad7e2ed18f0",
|
3924
|
+
"metadata": {},
|
3925
|
+
"source": [
|
3926
|
+
"These are unary element-wise function."
|
3927
|
+
]
|
3928
|
+
},
|
3929
|
+
{
|
3930
|
+
"cell_type": "code",
|
3931
|
+
"execution_count": 111,
|
3932
|
+
"id": "0afec200-f377-432b-a260-ae5a0c5ce794",
|
3933
|
+
"metadata": {},
|
3934
|
+
"outputs": [
|
3935
|
+
{
|
3936
|
+
"data": {
|
3937
|
+
"text/plain": [
|
3938
|
+
"0.816496580927726"
|
3939
|
+
]
|
3940
|
+
},
|
3941
|
+
"execution_count": 111,
|
3942
|
+
"metadata": {},
|
3943
|
+
"output_type": "execute_result"
|
3944
|
+
}
|
3945
|
+
],
|
3946
|
+
"source": [
|
3947
|
+
"integers = Vector.new([1, 2, 3, nil])\n",
|
3948
|
+
"integers.stddev"
|
3949
|
+
]
|
3950
|
+
},
|
3951
|
+
{
|
3952
|
+
"cell_type": "code",
|
3953
|
+
"execution_count": 112,
|
3954
|
+
"id": "2e40ac09-cb7f-4978-87e8-53f84f16f7c7",
|
3955
|
+
"metadata": {},
|
3956
|
+
"outputs": [
|
3957
|
+
{
|
3958
|
+
"data": {
|
3959
|
+
"text/plain": [
|
3960
|
+
"1.0"
|
3961
|
+
]
|
3962
|
+
},
|
3963
|
+
"execution_count": 112,
|
3964
|
+
"metadata": {},
|
3965
|
+
"output_type": "execute_result"
|
3966
|
+
}
|
3967
|
+
],
|
3968
|
+
"source": [
|
3969
|
+
"# Unbiased standard deviation\n",
|
3970
|
+
"integers.sd"
|
3971
|
+
]
|
3972
|
+
},
|
3973
|
+
{
|
3974
|
+
"cell_type": "code",
|
3975
|
+
"execution_count": 113,
|
3976
|
+
"id": "e6158e3b-4af8-467c-a355-8e9f2e579548",
|
3977
|
+
"metadata": {},
|
3978
|
+
"outputs": [
|
3979
|
+
{
|
3980
|
+
"data": {
|
3981
|
+
"text/plain": [
|
3982
|
+
"0.6666666666666666"
|
3983
|
+
]
|
3984
|
+
},
|
3985
|
+
"execution_count": 113,
|
3986
|
+
"metadata": {},
|
3987
|
+
"output_type": "execute_result"
|
3988
|
+
}
|
3989
|
+
],
|
3990
|
+
"source": [
|
3991
|
+
"integers.variance"
|
3992
|
+
]
|
3993
|
+
},
|
3994
|
+
{
|
3995
|
+
"cell_type": "code",
|
3996
|
+
"execution_count": 114,
|
3997
|
+
"id": "d64d39f2-d979-49f1-9946-65890f40d646",
|
3998
|
+
"metadata": {},
|
3999
|
+
"outputs": [
|
4000
|
+
{
|
4001
|
+
"data": {
|
4002
|
+
"text/plain": [
|
4003
|
+
"1.0"
|
4004
|
+
]
|
4005
|
+
},
|
4006
|
+
"execution_count": 114,
|
4007
|
+
"metadata": {},
|
4008
|
+
"output_type": "execute_result"
|
4009
|
+
}
|
4010
|
+
],
|
4011
|
+
"source": [
|
4012
|
+
"# Unbiased variance\n",
|
4013
|
+
"integers.var"
|
4014
|
+
]
|
4015
|
+
},
|
4016
|
+
{
|
4017
|
+
"cell_type": "markdown",
|
4018
|
+
"id": "25023f5a-798a-40a5-ab84-a6615602f747",
|
4019
|
+
"metadata": {},
|
4020
|
+
"source": [
|
4021
|
+
"## 41. negate (Vector)"
|
4022
|
+
]
|
4023
|
+
},
|
4024
|
+
{
|
4025
|
+
"cell_type": "markdown",
|
4026
|
+
"id": "00ddf322-ef50-40a1-86a6-22bf3d43f007",
|
4027
|
+
"metadata": {},
|
4028
|
+
"source": [
|
4029
|
+
"These are unary element-wise function."
|
4030
|
+
]
|
4031
|
+
},
|
4032
|
+
{
|
4033
|
+
"cell_type": "code",
|
4034
|
+
"execution_count": 115,
|
4035
|
+
"id": "ab5a357a-e98c-40a1-9b89-0b38645e416f",
|
4036
|
+
"metadata": {},
|
4037
|
+
"outputs": [
|
4038
|
+
{
|
4039
|
+
"data": {
|
4040
|
+
"text/plain": [
|
4041
|
+
"#<RedAmber::Vector(:double, size=3):0x000000000000fd70>\n",
|
4042
|
+
"[-1.0, 2.0, -3.0]\n"
|
4043
|
+
]
|
4044
|
+
},
|
4045
|
+
"execution_count": 115,
|
4046
|
+
"metadata": {},
|
4047
|
+
"output_type": "execute_result"
|
4048
|
+
}
|
4049
|
+
],
|
4050
|
+
"source": [
|
4051
|
+
"double = Vector.new([1.0, -2, 3])\n",
|
4052
|
+
"double.negate"
|
4053
|
+
]
|
4054
|
+
},
|
4055
|
+
{
|
4056
|
+
"cell_type": "code",
|
4057
|
+
"execution_count": 116,
|
4058
|
+
"id": "8a06c856-d61c-4752-a296-1fa207ffd9a1",
|
4059
|
+
"metadata": {},
|
4060
|
+
"outputs": [
|
4061
|
+
{
|
4062
|
+
"data": {
|
4063
|
+
"text/plain": [
|
4064
|
+
"#<RedAmber::Vector(:double, size=3):0x000000000000fd84>\n",
|
4065
|
+
"[-1.0, 2.0, -3.0]\n"
|
4066
|
+
]
|
4067
|
+
},
|
4068
|
+
"execution_count": 116,
|
4069
|
+
"metadata": {},
|
4070
|
+
"output_type": "execute_result"
|
4071
|
+
}
|
4072
|
+
],
|
4073
|
+
"source": [
|
4074
|
+
"# Same as #negate\n",
|
4075
|
+
"-double"
|
4076
|
+
]
|
4077
|
+
},
|
4078
|
+
{
|
4079
|
+
"cell_type": "markdown",
|
4080
|
+
"id": "9b145724-d165-4ef3-8a06-2948dd0c7dbb",
|
4081
|
+
"metadata": {},
|
4082
|
+
"source": [
|
4083
|
+
"## 42. round (Vector)"
|
4084
|
+
]
|
4085
|
+
},
|
4086
|
+
{
|
4087
|
+
"cell_type": "markdown",
|
4088
|
+
"id": "b780c2f3-935c-4b2f-b18a-b277cf7c24b7",
|
4089
|
+
"metadata": {},
|
4090
|
+
"source": [
|
4091
|
+
"Otions for `#round`;\n",
|
4092
|
+
"\n",
|
4093
|
+
"- `:n-digits` The number of digits to show.\n",
|
4094
|
+
"- `round_mode` Specify rounding mode.\n",
|
4095
|
+
"\n",
|
4096
|
+
"This is a unary element-wise function."
|
4097
|
+
]
|
4098
|
+
},
|
4099
|
+
{
|
4100
|
+
"cell_type": "code",
|
4101
|
+
"execution_count": 117,
|
4102
|
+
"id": "e7a069b0-3547-4cd2-a2f0-0740f186b191",
|
4103
|
+
"metadata": {},
|
4104
|
+
"outputs": [
|
4105
|
+
{
|
4106
|
+
"data": {
|
4107
|
+
"text/plain": [
|
4108
|
+
"#<RedAmber::Vector(:double, size=5):0x000000000000fd98>\n",
|
4109
|
+
"[15.15, 2.5, 3.5, -4.5, -5.5]\n"
|
4110
|
+
]
|
4111
|
+
},
|
4112
|
+
"execution_count": 117,
|
4113
|
+
"metadata": {},
|
4114
|
+
"output_type": "execute_result"
|
4115
|
+
}
|
4116
|
+
],
|
4117
|
+
"source": [
|
4118
|
+
"double = RedAmber::Vector.new([15.15, 2.5, 3.5, -4.5, -5.5])"
|
4119
|
+
]
|
4120
|
+
},
|
4121
|
+
{
|
4122
|
+
"cell_type": "code",
|
4123
|
+
"execution_count": 118,
|
4124
|
+
"id": "5ee84b24-8830-4788-a404-d5e1cca22abf",
|
4125
|
+
"metadata": {},
|
4126
|
+
"outputs": [
|
4127
|
+
{
|
4128
|
+
"data": {
|
4129
|
+
"text/plain": [
|
4130
|
+
"#<RedAmber::Vector(:double, size=5):0x000000000000fdac>\n",
|
4131
|
+
"[15.0, 2.0, 4.0, -4.0, -6.0]\n"
|
4132
|
+
]
|
4133
|
+
},
|
4134
|
+
"execution_count": 118,
|
4135
|
+
"metadata": {},
|
4136
|
+
"output_type": "execute_result"
|
4137
|
+
}
|
4138
|
+
],
|
4139
|
+
"source": [
|
4140
|
+
"double.round"
|
4141
|
+
]
|
4142
|
+
},
|
4143
|
+
{
|
4144
|
+
"cell_type": "code",
|
4145
|
+
"execution_count": 119,
|
4146
|
+
"id": "20adb1ad-473c-4245-b959-7848c239fb76",
|
4147
|
+
"metadata": {},
|
4148
|
+
"outputs": [
|
4149
|
+
{
|
4150
|
+
"data": {
|
4151
|
+
"text/plain": [
|
4152
|
+
"#<RedAmber::Vector(:double, size=5):0x000000000000fdc0>\n",
|
4153
|
+
"[15.0, 2.0, 4.0, -4.0, -6.0]\n"
|
4154
|
+
]
|
4155
|
+
},
|
4156
|
+
"execution_count": 119,
|
4157
|
+
"metadata": {},
|
4158
|
+
"output_type": "execute_result"
|
4159
|
+
}
|
4160
|
+
],
|
4161
|
+
"source": [
|
4162
|
+
"double.round(mode: :half_to_even)"
|
4163
|
+
]
|
4164
|
+
},
|
4165
|
+
{
|
4166
|
+
"cell_type": "code",
|
4167
|
+
"execution_count": 120,
|
4168
|
+
"id": "d2777ad8-2c24-48e4-8f5f-77403e3109ea",
|
4169
|
+
"metadata": {},
|
4170
|
+
"outputs": [
|
4171
|
+
{
|
4172
|
+
"data": {
|
4173
|
+
"text/plain": [
|
4174
|
+
"#<RedAmber::Vector(:double, size=5):0x000000000000fdd4>\n",
|
4175
|
+
"[16.0, 3.0, 4.0, -5.0, -6.0]\n"
|
4176
|
+
]
|
4177
|
+
},
|
4178
|
+
"execution_count": 120,
|
4179
|
+
"metadata": {},
|
4180
|
+
"output_type": "execute_result"
|
4181
|
+
}
|
4182
|
+
],
|
4183
|
+
"source": [
|
4184
|
+
"double.round(mode: :towards_infinity)"
|
4185
|
+
]
|
4186
|
+
},
|
4187
|
+
{
|
4188
|
+
"cell_type": "code",
|
4189
|
+
"execution_count": 121,
|
4190
|
+
"id": "a8ab2735-74cb-4cfe-a5a2-61bfa90c72ac",
|
4191
|
+
"metadata": {},
|
4192
|
+
"outputs": [
|
4193
|
+
{
|
4194
|
+
"data": {
|
4195
|
+
"text/plain": [
|
4196
|
+
"#<RedAmber::Vector(:double, size=5):0x000000000000fde8>\n",
|
4197
|
+
"[15.0, 3.0, 4.0, -4.0, -5.0]\n"
|
4198
|
+
]
|
4199
|
+
},
|
4200
|
+
"execution_count": 121,
|
4201
|
+
"metadata": {},
|
4202
|
+
"output_type": "execute_result"
|
4203
|
+
}
|
4204
|
+
],
|
4205
|
+
"source": [
|
4206
|
+
"double.round(mode: :half_up)"
|
4207
|
+
]
|
4208
|
+
},
|
4209
|
+
{
|
4210
|
+
"cell_type": "code",
|
4211
|
+
"execution_count": 122,
|
4212
|
+
"id": "3575481c-40ed-405f-a69c-7581d4dce2cf",
|
4213
|
+
"metadata": {},
|
4214
|
+
"outputs": [
|
4215
|
+
{
|
4216
|
+
"data": {
|
4217
|
+
"text/plain": [
|
4218
|
+
"#<RedAmber::Vector(:double, size=5):0x000000000000fdfc>\n",
|
4219
|
+
"[15.0, 2.0, 3.0, -4.0, -5.0]\n"
|
4220
|
+
]
|
4221
|
+
},
|
4222
|
+
"execution_count": 122,
|
4223
|
+
"metadata": {},
|
4224
|
+
"output_type": "execute_result"
|
4225
|
+
}
|
4226
|
+
],
|
4227
|
+
"source": [
|
4228
|
+
"double.round(mode: :half_towards_zero)"
|
4229
|
+
]
|
4230
|
+
},
|
4231
|
+
{
|
4232
|
+
"cell_type": "code",
|
4233
|
+
"execution_count": 123,
|
4234
|
+
"id": "a86e4c5c-aced-4a88-b692-4e26b90f1653",
|
4235
|
+
"metadata": {},
|
4236
|
+
"outputs": [
|
4237
|
+
{
|
4238
|
+
"data": {
|
4239
|
+
"text/plain": [
|
4240
|
+
"#<RedAmber::Vector(:double, size=5):0x000000000000fe10>\n",
|
4241
|
+
"[15.0, 3.0, 4.0, -5.0, -6.0]\n"
|
4242
|
+
]
|
4243
|
+
},
|
4244
|
+
"execution_count": 123,
|
4245
|
+
"metadata": {},
|
4246
|
+
"output_type": "execute_result"
|
4247
|
+
}
|
4248
|
+
],
|
4249
|
+
"source": [
|
4250
|
+
"double.round(mode: :half_towards_infinity)"
|
4251
|
+
]
|
4252
|
+
},
|
4253
|
+
{
|
4254
|
+
"cell_type": "code",
|
4255
|
+
"execution_count": 124,
|
4256
|
+
"id": "73f51bab-ff46-4b99-96a5-8c6547ad9d35",
|
4257
|
+
"metadata": {},
|
4258
|
+
"outputs": [
|
4259
|
+
{
|
4260
|
+
"data": {
|
4261
|
+
"text/plain": [
|
4262
|
+
"#<RedAmber::Vector(:double, size=5):0x000000000000fe24>\n",
|
4263
|
+
"[15.0, 3.0, 3.0, -5.0, -5.0]\n"
|
4264
|
+
]
|
4265
|
+
},
|
4266
|
+
"execution_count": 124,
|
4267
|
+
"metadata": {},
|
4268
|
+
"output_type": "execute_result"
|
4269
|
+
}
|
4270
|
+
],
|
4271
|
+
"source": [
|
4272
|
+
"double.round(mode: :half_to_odd)"
|
4273
|
+
]
|
4274
|
+
},
|
4275
|
+
{
|
4276
|
+
"cell_type": "code",
|
4277
|
+
"execution_count": 125,
|
4278
|
+
"id": "a12c684c-4a63-4dac-a81b-969978812a24",
|
4279
|
+
"metadata": {},
|
4280
|
+
"outputs": [
|
4281
|
+
{
|
4282
|
+
"data": {
|
4283
|
+
"text/plain": [
|
4284
|
+
"#<RedAmber::Vector(:double, size=5):0x000000000000fe38>\n",
|
4285
|
+
"[15.0, 2.0, 4.0, -4.0, -6.0]\n"
|
4286
|
+
]
|
4287
|
+
},
|
4288
|
+
"execution_count": 125,
|
4289
|
+
"metadata": {},
|
4290
|
+
"output_type": "execute_result"
|
4291
|
+
}
|
4292
|
+
],
|
4293
|
+
"source": [
|
4294
|
+
"double.round(n_digits: 0)"
|
4295
|
+
]
|
4296
|
+
},
|
4297
|
+
{
|
4298
|
+
"cell_type": "code",
|
4299
|
+
"execution_count": 126,
|
4300
|
+
"id": "17370f2b-0957-411b-8145-56aa9fc956ac",
|
4301
|
+
"metadata": {},
|
4302
|
+
"outputs": [
|
4303
|
+
{
|
4304
|
+
"data": {
|
4305
|
+
"text/plain": [
|
4306
|
+
"#<RedAmber::Vector(:double, size=5):0x000000000000fe4c>\n",
|
4307
|
+
"[15.2, 2.5, 3.5, -4.5, -5.5]\n"
|
4308
|
+
]
|
4309
|
+
},
|
4310
|
+
"execution_count": 126,
|
4311
|
+
"metadata": {},
|
4312
|
+
"output_type": "execute_result"
|
4313
|
+
}
|
4314
|
+
],
|
4315
|
+
"source": [
|
4316
|
+
"double.round(n_digits: 1)"
|
4317
|
+
]
|
4318
|
+
},
|
4319
|
+
{
|
4320
|
+
"cell_type": "code",
|
4321
|
+
"execution_count": 127,
|
4322
|
+
"id": "53072cff-b28b-4672-b30a-8ca37562bc21",
|
4323
|
+
"metadata": {},
|
4324
|
+
"outputs": [
|
4325
|
+
{
|
4326
|
+
"data": {
|
4327
|
+
"text/plain": [
|
4328
|
+
"#<RedAmber::Vector(:double, size=5):0x000000000000fe60>\n",
|
4329
|
+
"[20.0, 0.0, 0.0, -0.0, -10.0]\n"
|
4330
|
+
]
|
4331
|
+
},
|
4332
|
+
"execution_count": 127,
|
4333
|
+
"metadata": {},
|
4334
|
+
"output_type": "execute_result"
|
4335
|
+
}
|
4336
|
+
],
|
4337
|
+
"source": [
|
4338
|
+
"double.round(n_digits: -1)"
|
4339
|
+
]
|
4340
|
+
},
|
4341
|
+
{
|
4342
|
+
"cell_type": "markdown",
|
4343
|
+
"id": "51dedfce-51c7-4e5b-b890-a90ad9cf7596",
|
4344
|
+
"metadata": {},
|
4345
|
+
"source": [
|
4346
|
+
"## 43. and/or (Vector)"
|
4347
|
+
]
|
4348
|
+
},
|
4349
|
+
{
|
4350
|
+
"cell_type": "markdown",
|
4351
|
+
"id": "b2c4869b-6ebf-476c-b2fd-a4b9c0638dc5",
|
4352
|
+
"metadata": {},
|
4353
|
+
"source": [
|
4354
|
+
"RedAmber select `and_kleene`/`or_kleene` as default `&`/`|` method.\n",
|
4355
|
+
"\n",
|
4356
|
+
"These are unary element-wise function."
|
4357
|
+
]
|
4358
|
+
},
|
4359
|
+
{
|
4360
|
+
"cell_type": "code",
|
4361
|
+
"execution_count": 128,
|
4362
|
+
"id": "2d4f5853-1ed9-4d8b-87a9-b5c1faac5fae",
|
4363
|
+
"metadata": {},
|
4364
|
+
"outputs": [
|
4365
|
+
{
|
4366
|
+
"data": {
|
4367
|
+
"text/plain": [
|
4368
|
+
"#<RedAmber::Vector(:boolean, size=9):0x000000000000fe74>\n",
|
4369
|
+
"[true, false, nil, false, false, false, nil, false, nil]\n"
|
4370
|
+
]
|
4371
|
+
},
|
4372
|
+
"execution_count": 128,
|
4373
|
+
"metadata": {},
|
4374
|
+
"output_type": "execute_result"
|
4375
|
+
}
|
4376
|
+
],
|
4377
|
+
"source": [
|
4378
|
+
"bool_self = Vector.new([true, true, true, false, false, false, nil, nil, nil])\n",
|
4379
|
+
"bool_other = Vector.new([true, false, nil, true, false, nil, true, false, nil])\n",
|
4380
|
+
"\n",
|
4381
|
+
"bool_self & bool_other # same as bool_self.and_kleene(bool_other)"
|
4382
|
+
]
|
4383
|
+
},
|
4384
|
+
{
|
4385
|
+
"cell_type": "code",
|
4386
|
+
"execution_count": 129,
|
4387
|
+
"id": "236c9733-8d45-467e-b288-e6c18b9c39d2",
|
4388
|
+
"metadata": {},
|
4389
|
+
"outputs": [
|
4390
|
+
{
|
4391
|
+
"data": {
|
4392
|
+
"text/plain": [
|
4393
|
+
"#<RedAmber::Vector(:boolean, size=9):0x000000000000fe88>\n",
|
4394
|
+
"[true, false, nil, true, false, nil, true, false, nil]\n"
|
4395
|
+
]
|
4396
|
+
},
|
4397
|
+
"execution_count": 129,
|
4398
|
+
"metadata": {},
|
4399
|
+
"output_type": "execute_result"
|
4400
|
+
}
|
4401
|
+
],
|
4402
|
+
"source": [
|
4403
|
+
"# Ruby's primitive `&&`\n",
|
4404
|
+
"bool_self && bool_other"
|
4405
|
+
]
|
4406
|
+
},
|
4407
|
+
{
|
4408
|
+
"cell_type": "code",
|
4409
|
+
"execution_count": 130,
|
4410
|
+
"id": "4e984a9c-7d9c-465d-bf26-0c685dedd4bf",
|
4411
|
+
"metadata": {},
|
4412
|
+
"outputs": [
|
4413
|
+
{
|
4414
|
+
"data": {
|
4415
|
+
"text/plain": [
|
4416
|
+
"#<RedAmber::Vector(:boolean, size=9):0x000000000000fe9c>\n",
|
4417
|
+
"[true, false, nil, false, false, nil, nil, nil, nil]\n"
|
4418
|
+
]
|
4419
|
+
},
|
4420
|
+
"execution_count": 130,
|
4421
|
+
"metadata": {},
|
4422
|
+
"output_type": "execute_result"
|
4423
|
+
}
|
4424
|
+
],
|
4425
|
+
"source": [
|
4426
|
+
"# Arrow's default `and`\n",
|
4427
|
+
"bool_self.and_org(bool_other)"
|
4428
|
+
]
|
4429
|
+
},
|
4430
|
+
{
|
4431
|
+
"cell_type": "code",
|
4432
|
+
"execution_count": 131,
|
4433
|
+
"id": "0120ebf5-355d-41f5-83d5-49b9802f337b",
|
4434
|
+
"metadata": {},
|
4435
|
+
"outputs": [
|
4436
|
+
{
|
4437
|
+
"data": {
|
4438
|
+
"text/plain": [
|
4439
|
+
"#<RedAmber::Vector(:boolean, size=9):0x000000000000feb0>\n",
|
4440
|
+
"[true, true, true, true, false, nil, true, nil, nil]\n"
|
4441
|
+
]
|
4442
|
+
},
|
4443
|
+
"execution_count": 131,
|
4444
|
+
"metadata": {},
|
4445
|
+
"output_type": "execute_result"
|
4446
|
+
}
|
4447
|
+
],
|
4448
|
+
"source": [
|
4449
|
+
"bool_self | bool_other # same as bool_self.or_kleene(bool_other)"
|
4450
|
+
]
|
4451
|
+
},
|
4452
|
+
{
|
4453
|
+
"cell_type": "code",
|
4454
|
+
"execution_count": 132,
|
4455
|
+
"id": "24ceee23-79df-4fcd-afd8-f3839a087785",
|
4456
|
+
"metadata": {},
|
4457
|
+
"outputs": [
|
4458
|
+
{
|
4459
|
+
"data": {
|
4460
|
+
"text/plain": [
|
4461
|
+
"#<RedAmber::Vector(:boolean, size=9):0x000000000000fec4>\n",
|
4462
|
+
"[true, true, true, false, false, false, nil, nil, nil]\n"
|
4463
|
+
]
|
4464
|
+
},
|
4465
|
+
"execution_count": 132,
|
4466
|
+
"metadata": {},
|
4467
|
+
"output_type": "execute_result"
|
4468
|
+
}
|
4469
|
+
],
|
4470
|
+
"source": [
|
4471
|
+
"# Ruby's primitive `||`\n",
|
4472
|
+
"bool_self || bool_other"
|
4473
|
+
]
|
4474
|
+
},
|
4475
|
+
{
|
4476
|
+
"cell_type": "code",
|
4477
|
+
"execution_count": 133,
|
4478
|
+
"id": "c152d04b-71a0-4b18-acd1-b5ab9e413d00",
|
4479
|
+
"metadata": {},
|
4480
|
+
"outputs": [
|
4481
|
+
{
|
4482
|
+
"data": {
|
4483
|
+
"text/plain": [
|
4484
|
+
"#<RedAmber::Vector(:boolean, size=9):0x000000000000fed8>\n",
|
4485
|
+
"[true, true, nil, true, false, nil, nil, nil, nil]\n"
|
4486
|
+
]
|
4487
|
+
},
|
4488
|
+
"execution_count": 133,
|
4489
|
+
"metadata": {},
|
4490
|
+
"output_type": "execute_result"
|
4491
|
+
}
|
4492
|
+
],
|
4493
|
+
"source": [
|
4494
|
+
"# Arrow's default `or`\n",
|
4495
|
+
"bool_self.or_org(bool_other)"
|
4496
|
+
]
|
4497
|
+
},
|
4498
|
+
{
|
4499
|
+
"cell_type": "markdown",
|
4500
|
+
"id": "beede237-c5ed-4e12-a432-ec7e4546d786",
|
4501
|
+
"metadata": {},
|
4502
|
+
"source": [
|
4503
|
+
"## 44. is_finite/is_nan/is_nil/is_na (Vector)"
|
4504
|
+
]
|
4505
|
+
},
|
4506
|
+
{
|
4507
|
+
"cell_type": "markdown",
|
4508
|
+
"id": "77418efd-c0d7-4d63-a7db-2d43fafd386e",
|
4509
|
+
"metadata": {},
|
4510
|
+
"source": [
|
4511
|
+
"These are unary element-wise function."
|
4512
|
+
]
|
4513
|
+
},
|
4514
|
+
{
|
4515
|
+
"cell_type": "code",
|
4516
|
+
"execution_count": 134,
|
4517
|
+
"id": "19558f9e-fdc4-46e5-90d0-724e4e8fbd8e",
|
4518
|
+
"metadata": {},
|
4519
|
+
"outputs": [
|
4520
|
+
{
|
4521
|
+
"data": {
|
4522
|
+
"text/plain": [
|
4523
|
+
"#<RedAmber::Vector(:double, size=5):0x000000000000feec>\n",
|
4524
|
+
"[3.141592653589793, Infinity, -Infinity, NaN, nil]\n"
|
4525
|
+
]
|
4526
|
+
},
|
4527
|
+
"execution_count": 134,
|
4528
|
+
"metadata": {},
|
4529
|
+
"output_type": "execute_result"
|
4530
|
+
}
|
4531
|
+
],
|
4532
|
+
"source": [
|
4533
|
+
"double = Vector.new([Math::PI, Float::INFINITY, -Float::INFINITY, Float::NAN, nil])"
|
4534
|
+
]
|
4535
|
+
},
|
4536
|
+
{
|
4537
|
+
"cell_type": "code",
|
4538
|
+
"execution_count": 135,
|
4539
|
+
"id": "d90a7168-1f87-4363-9589-c1f161babc7d",
|
4540
|
+
"metadata": {},
|
4541
|
+
"outputs": [
|
4542
|
+
{
|
4543
|
+
"data": {
|
4544
|
+
"text/plain": [
|
4545
|
+
"#<RedAmber::Vector(:boolean, size=5):0x000000000000ff00>\n",
|
4546
|
+
"[true, false, false, false, nil]\n"
|
4547
|
+
]
|
4548
|
+
},
|
4549
|
+
"execution_count": 135,
|
4550
|
+
"metadata": {},
|
4551
|
+
"output_type": "execute_result"
|
4552
|
+
}
|
4553
|
+
],
|
4554
|
+
"source": [
|
4555
|
+
"double.is_finite"
|
4556
|
+
]
|
4557
|
+
},
|
4558
|
+
{
|
4559
|
+
"cell_type": "code",
|
4560
|
+
"execution_count": 136,
|
4561
|
+
"id": "7d88049b-695f-4b0c-a105-8fb5797a58b1",
|
4562
|
+
"metadata": {},
|
4563
|
+
"outputs": [
|
4564
|
+
{
|
4565
|
+
"data": {
|
4566
|
+
"text/plain": [
|
4567
|
+
"#<RedAmber::Vector(:boolean, size=5):0x000000000000ff14>\n",
|
4568
|
+
"[false, true, true, false, nil]\n"
|
4569
|
+
]
|
4570
|
+
},
|
4571
|
+
"execution_count": 136,
|
4572
|
+
"metadata": {},
|
4573
|
+
"output_type": "execute_result"
|
4574
|
+
}
|
4575
|
+
],
|
4576
|
+
"source": [
|
4577
|
+
"double.is_inf"
|
4578
|
+
]
|
4579
|
+
},
|
4580
|
+
{
|
4581
|
+
"cell_type": "code",
|
4582
|
+
"execution_count": 137,
|
4583
|
+
"id": "7d86a7b5-84bf-4031-9811-4076281920cf",
|
4584
|
+
"metadata": {},
|
4585
|
+
"outputs": [
|
4586
|
+
{
|
4587
|
+
"data": {
|
4588
|
+
"text/plain": [
|
4589
|
+
"#<RedAmber::Vector(:boolean, size=5):0x000000000000ff28>\n",
|
4590
|
+
"[false, false, false, true, true]\n"
|
4591
|
+
]
|
4592
|
+
},
|
4593
|
+
"execution_count": 137,
|
4594
|
+
"metadata": {},
|
4595
|
+
"output_type": "execute_result"
|
4596
|
+
}
|
4597
|
+
],
|
4598
|
+
"source": [
|
4599
|
+
"double.is_na"
|
4600
|
+
]
|
4601
|
+
},
|
4602
|
+
{
|
4603
|
+
"cell_type": "code",
|
4604
|
+
"execution_count": 138,
|
4605
|
+
"id": "d562f826-7a37-4c57-8f92-777555987246",
|
4606
|
+
"metadata": {},
|
4607
|
+
"outputs": [
|
4608
|
+
{
|
4609
|
+
"data": {
|
4610
|
+
"text/plain": [
|
4611
|
+
"#<RedAmber::Vector(:boolean, size=5):0x000000000000ff3c>\n",
|
4612
|
+
"[false, false, false, false, true]\n"
|
4613
|
+
]
|
4614
|
+
},
|
4615
|
+
"execution_count": 138,
|
4616
|
+
"metadata": {},
|
4617
|
+
"output_type": "execute_result"
|
4618
|
+
}
|
4619
|
+
],
|
4620
|
+
"source": [
|
4621
|
+
"double.is_nil"
|
4622
|
+
]
|
4623
|
+
},
|
4624
|
+
{
|
4625
|
+
"cell_type": "code",
|
4626
|
+
"execution_count": 139,
|
4627
|
+
"id": "e460dc6b-e48f-4462-9ce8-aa6069ebae27",
|
4628
|
+
"metadata": {},
|
4629
|
+
"outputs": [
|
4630
|
+
{
|
4631
|
+
"data": {
|
4632
|
+
"text/plain": [
|
4633
|
+
"#<RedAmber::Vector(:boolean, size=5):0x000000000000ff50>\n",
|
4634
|
+
"[true, true, true, true, false]\n"
|
4635
|
+
]
|
4636
|
+
},
|
4637
|
+
"execution_count": 139,
|
4638
|
+
"metadata": {},
|
4639
|
+
"output_type": "execute_result"
|
4640
|
+
}
|
4641
|
+
],
|
4642
|
+
"source": [
|
4643
|
+
"double.is_valid"
|
4644
|
+
]
|
4645
|
+
},
|
4646
|
+
{
|
4647
|
+
"cell_type": "markdown",
|
4648
|
+
"id": "2cca75eb-f0e8-4f85-89cb-3601512e76b0",
|
4649
|
+
"metadata": {},
|
4650
|
+
"source": [
|
4651
|
+
"## 45. Prime-th rows"
|
4652
|
+
]
|
4653
|
+
},
|
4654
|
+
{
|
4655
|
+
"cell_type": "code",
|
4656
|
+
"execution_count": 140,
|
4657
|
+
"id": "e0e56ecc-b24c-4a40-b3bb-26bb64eb59ef",
|
4658
|
+
"metadata": {},
|
4659
|
+
"outputs": [
|
4660
|
+
{
|
4661
|
+
"data": {
|
4662
|
+
"text/html": [
|
4663
|
+
"RedAmber::DataFrame <68 x 9 vectors> <table><tr><th>index</th><th>species</th><th>island</th><th>bill_length_mm</th><th>bill_depth_mm</th><th>flipper_length_mm</th><th>body_mass_g</th><th>sex</th><th>year</th></tr><tr><td>2</td><td>Adelie</td><td>Torgersen</td><td>39.5</td><td>17.4</td><td>186</td><td>3800</td><td>female</td><td>2007</td></tr><tr><td>3</td><td>Adelie</td><td>Torgersen</td><td>40.3</td><td>18.0</td><td>195</td><td>3250</td><td>female</td><td>2007</td></tr><tr><td>5</td><td>Adelie</td><td>Torgersen</td><td>36.7</td><td>19.3</td><td>193</td><td>3450</td><td>female</td><td>2007</td></tr><tr><td>7</td><td>Adelie</td><td>Torgersen</td><td>38.9</td><td>17.8</td><td>181</td><td>3625</td><td>female</td><td>2007</td></tr><tr><td colspan='9'>⋮</td></tr><tr><td>317</td><td>Gentoo</td><td>Biscoe</td><td>49.4</td><td>15.8</td><td>216</td><td>4925</td><td>male</td><td>2009</td></tr><tr><td>331</td><td>Gentoo</td><td>Biscoe</td><td>50.5</td><td>15.2</td><td>216</td><td>5000</td><td>female</td><td>2009</td></tr><tr><td>337</td><td>Gentoo</td><td>Biscoe</td><td>44.5</td><td>15.7</td><td>217</td><td>4875</td><td><i>(nil)</i></td><td>2009</td></tr></table>"
|
4664
|
+
],
|
4665
|
+
"text/plain": [
|
4666
|
+
"#<RedAmber::DataFrame : 68 x 9 Vectors, 0x000000000000ff64>\n",
|
4667
|
+
" index species island bill_length_mm bill_depth_mm flipper_length_mm ... year\n",
|
4668
|
+
" <uint16> <string> <string> <double> <double> <uint8> ... <uint16>\n",
|
4669
|
+
" 1 2 Adelie Torgersen 39.5 17.4 186 ... 2007\n",
|
4670
|
+
" 2 3 Adelie Torgersen 40.3 18.0 195 ... 2007\n",
|
4671
|
+
" 3 5 Adelie Torgersen 36.7 19.3 193 ... 2007\n",
|
4672
|
+
" 4 7 Adelie Torgersen 38.9 17.8 181 ... 2007\n",
|
4673
|
+
" 5 11 Adelie Torgersen 37.8 17.1 186 ... 2007\n",
|
4674
|
+
" : : : : : : : ... :\n",
|
4675
|
+
"66 317 Gentoo Biscoe 49.4 15.8 216 ... 2009\n",
|
4676
|
+
"67 331 Gentoo Biscoe 50.5 15.2 216 ... 2009\n",
|
4677
|
+
"68 337 Gentoo Biscoe 44.5 15.7 217 ... 2009\n"
|
4678
|
+
]
|
4679
|
+
},
|
4680
|
+
"execution_count": 140,
|
4681
|
+
"metadata": {},
|
4682
|
+
"output_type": "execute_result"
|
4683
|
+
}
|
4684
|
+
],
|
4685
|
+
"source": [
|
4686
|
+
"# prime-th rows ... Don't ask me what it means.\n",
|
4687
|
+
"require 'prime'\n",
|
4688
|
+
"penguins_with_index =\n",
|
4689
|
+
" penguins.assign do\n",
|
4690
|
+
" { index: Vector.new(penguins.indices) + 1 }\n",
|
4691
|
+
" end.pick { [keys[-1], keys[0..-2]] }\n",
|
4692
|
+
"penguins_with_index.slice { Vector.new(Prime.each(size).to_a) - 1 }"
|
4693
|
+
]
|
4694
|
+
},
|
4695
|
+
{
|
4696
|
+
"cell_type": "markdown",
|
4697
|
+
"id": "c9e8de1a-ad8f-4fdc-a65c-4d3db7123530",
|
4698
|
+
"metadata": {},
|
4699
|
+
"source": [
|
4700
|
+
"## 46. Slice by Enumerator"
|
4701
|
+
]
|
4702
|
+
},
|
4703
|
+
{
|
4704
|
+
"cell_type": "markdown",
|
4705
|
+
"id": "32dd53a3-a822-4ae1-afe2-b5aa2bfbd3e3",
|
4706
|
+
"metadata": {},
|
4707
|
+
"source": [
|
4708
|
+
"Slice accepts Enumerator as an option."
|
4709
|
+
]
|
4710
|
+
},
|
4711
|
+
{
|
4712
|
+
"cell_type": "code",
|
4713
|
+
"execution_count": 141,
|
4714
|
+
"id": "b2a118fa-f3c0-4f31-9b45-6db27ccbebe6",
|
4715
|
+
"metadata": {},
|
4716
|
+
"outputs": [
|
4717
|
+
{
|
4718
|
+
"data": {
|
4719
|
+
"text/html": [
|
4720
|
+
"RedAmber::DataFrame <35 x 8 vectors> <table><tr><th>species</th><th>island</th><th>bill_length_mm</th><th>bill_depth_mm</th><th>flipper_length_mm</th><th>body_mass_g</th><th>sex</th><th>year</th></tr><tr><td>Adelie</td><td>Torgersen</td><td>39.1</td><td>18.7</td><td>181</td><td>3750</td><td>male</td><td>2007</td></tr><tr><td>Adelie</td><td>Torgersen</td><td>37.8</td><td>17.1</td><td>186</td><td>3300</td><td><i>(nil)</i></td><td>2007</td></tr><tr><td>Adelie</td><td>Biscoe</td><td>37.8</td><td>18.3</td><td>174</td><td>3400</td><td>female</td><td>2007</td></tr><tr><td>Adelie</td><td>Dream</td><td>39.5</td><td>16.7</td><td>178</td><td>3250</td><td>female</td><td>2007</td></tr><tr><td colspan='8'>⋮</td></tr><tr><td>Gentoo</td><td>Biscoe</td><td>48.5</td><td>15.0</td><td>219</td><td>4850</td><td>female</td><td>2009</td></tr><tr><td>Gentoo</td><td>Biscoe</td><td>50.5</td><td>15.2</td><td>216</td><td>5000</td><td>female</td><td>2009</td></tr><tr><td>Gentoo</td><td>Biscoe</td><td>46.8</td><td>14.3</td><td>215</td><td>4850</td><td>female</td><td>2009</td></tr></table>"
|
4721
|
+
],
|
4722
|
+
"text/plain": [
|
4723
|
+
"#<RedAmber::DataFrame : 35 x 8 Vectors, 0x000000000000ff78>\n",
|
4724
|
+
" species island bill_length_mm bill_depth_mm flipper_length_mm ... year\n",
|
4725
|
+
" <string> <string> <double> <double> <uint8> ... <uint16>\n",
|
4726
|
+
" 1 Adelie Torgersen 39.1 18.7 181 ... 2007\n",
|
4727
|
+
" 2 Adelie Torgersen 37.8 17.1 186 ... 2007\n",
|
4728
|
+
" 3 Adelie Biscoe 37.8 18.3 174 ... 2007\n",
|
4729
|
+
" 4 Adelie Dream 39.5 16.7 178 ... 2007\n",
|
4730
|
+
" 5 Adelie Dream 36.5 18.0 182 ... 2007\n",
|
4731
|
+
" : : : : : : ... :\n",
|
4732
|
+
"33 Gentoo Biscoe 48.5 15.0 219 ... 2009\n",
|
4733
|
+
"34 Gentoo Biscoe 50.5 15.2 216 ... 2009\n",
|
4734
|
+
"35 Gentoo Biscoe 46.8 14.3 215 ... 2009\n"
|
4735
|
+
]
|
4736
|
+
},
|
4737
|
+
"execution_count": 141,
|
4738
|
+
"metadata": {},
|
4739
|
+
"output_type": "execute_result"
|
4740
|
+
}
|
4741
|
+
],
|
4742
|
+
"source": [
|
4743
|
+
"# Select every 10 samples\n",
|
4744
|
+
"penguins.slice(0.step by: 10, to: 340)"
|
4745
|
+
]
|
4746
|
+
},
|
4747
|
+
{
|
4748
|
+
"cell_type": "markdown",
|
4749
|
+
"id": "db312c2c-3a7c-4765-bfad-b3313b173a79",
|
4750
|
+
"metadata": {},
|
4751
|
+
"source": [
|
4752
|
+
"## 47. Output mode"
|
4753
|
+
]
|
4754
|
+
},
|
4755
|
+
{
|
4756
|
+
"cell_type": "markdown",
|
4757
|
+
"id": "714ed8df-5aa3-4ac4-8b0d-6390aff73c8c",
|
4758
|
+
"metadata": {},
|
4759
|
+
"source": [
|
4760
|
+
"Output mode of `#inspect` and `#to_iruby` is Table mode by default. If you prefer TDR mode set the environment variable `RED_AMBER_OUTPUT_MODE` to `\"TDR\"`."
|
4761
|
+
]
|
4762
|
+
},
|
4763
|
+
{
|
4764
|
+
"cell_type": "code",
|
4765
|
+
"execution_count": 142,
|
4766
|
+
"id": "a721804b-006e-44c6-8d38-885eae747eaa",
|
4767
|
+
"metadata": {},
|
4768
|
+
"outputs": [
|
4769
|
+
{
|
4770
|
+
"data": {
|
4771
|
+
"text/html": [
|
4772
|
+
"RedAmber::DataFrame <344 x 8 vectors> <table><tr><th>species</th><th>island</th><th>bill_length_mm</th><th>bill_depth_mm</th><th>flipper_length_mm</th><th>body_mass_g</th><th>sex</th><th>year</th></tr><tr><td>Adelie</td><td>Torgersen</td><td>39.1</td><td>18.7</td><td>181</td><td>3750</td><td>male</td><td>2007</td></tr><tr><td>Adelie</td><td>Torgersen</td><td>39.5</td><td>17.4</td><td>186</td><td>3800</td><td>female</td><td>2007</td></tr><tr><td>Adelie</td><td>Torgersen</td><td>40.3</td><td>18.0</td><td>195</td><td>3250</td><td>female</td><td>2007</td></tr><tr><td>Adelie</td><td>Torgersen</td><td><i>(nil)</i></td><td><i>(nil)</i></td><td><i>(nil)</i></td><td><i>(nil)</i></td><td><i>(nil)</i></td><td>2007</td></tr><tr><td colspan='8'>⋮</td></tr><tr><td>Gentoo</td><td>Biscoe</td><td>50.4</td><td>15.7</td><td>222</td><td>5750</td><td>male</td><td>2009</td></tr><tr><td>Gentoo</td><td>Biscoe</td><td>45.2</td><td>14.8</td><td>212</td><td>5200</td><td>female</td><td>2009</td></tr><tr><td>Gentoo</td><td>Biscoe</td><td>49.9</td><td>16.1</td><td>213</td><td>5400</td><td>male</td><td>2009</td></tr></table>"
|
4773
|
+
],
|
4774
|
+
"text/plain": [
|
4775
|
+
"#<RedAmber::DataFrame : 344 x 8 Vectors, 0x000000000000f8ac>\n",
|
4776
|
+
" species island bill_length_mm bill_depth_mm flipper_length_mm ... year\n",
|
4777
|
+
" <string> <string> <double> <double> <uint8> ... <uint16>\n",
|
4778
|
+
" 1 Adelie Torgersen 39.1 18.7 181 ... 2007\n",
|
4779
|
+
" 2 Adelie Torgersen 39.5 17.4 186 ... 2007\n",
|
4780
|
+
" 3 Adelie Torgersen 40.3 18.0 195 ... 2007\n",
|
4781
|
+
" 4 Adelie Torgersen (nil) (nil) (nil) ... 2007\n",
|
4782
|
+
" 5 Adelie Torgersen 36.7 19.3 193 ... 2007\n",
|
4783
|
+
" : : : : : : ... :\n",
|
4784
|
+
"342 Gentoo Biscoe 50.4 15.7 222 ... 2009\n",
|
4785
|
+
"343 Gentoo Biscoe 45.2 14.8 212 ... 2009\n",
|
4786
|
+
"344 Gentoo Biscoe 49.9 16.1 213 ... 2009\n"
|
4787
|
+
]
|
4788
|
+
},
|
4789
|
+
"execution_count": 142,
|
4790
|
+
"metadata": {},
|
4791
|
+
"output_type": "execute_result"
|
4792
|
+
}
|
4793
|
+
],
|
4794
|
+
"source": [
|
4795
|
+
"ENV['RED_AMBER_OUTPUT_MODE'] = 'Table' # or nil (default)\n",
|
4796
|
+
"penguins # Almost same as `puts penguins.to_s` in any mode"
|
4797
|
+
]
|
4798
|
+
},
|
4799
|
+
{
|
4800
|
+
"cell_type": "code",
|
4801
|
+
"execution_count": 143,
|
4802
|
+
"id": "e4c9f70c-a4b1-4a81-bbc4-e9b14a6b6cb0",
|
4803
|
+
"metadata": {},
|
4804
|
+
"outputs": [
|
4805
|
+
{
|
4806
|
+
"name": "stdout",
|
4807
|
+
"output_type": "stream",
|
4808
|
+
"text": [
|
4809
|
+
"#<RedAmber::DataFrame : 344 x 8 Vectors, 0x000000000000f8ac>\n",
|
4810
|
+
"Vectors : 5 numeric, 3 strings\n",
|
4811
|
+
"# key type level data_preview\n",
|
4812
|
+
"1 :species string 3 {\"Adelie\"=>152, \"Chinstrap\"=>68, \"Gentoo\"=>124}\n",
|
4813
|
+
"2 :island string 3 {\"Torgersen\"=>52, \"Biscoe\"=>168, \"Dream\"=>124}\n",
|
4814
|
+
"3 :bill_length_mm double 165 [39.1, 39.5, 40.3, nil, 36.7, ... ], 2 nils\n",
|
4815
|
+
" ... 5 more Vectors ...\n",
|
4816
|
+
"\n"
|
4817
|
+
]
|
4818
|
+
}
|
4819
|
+
],
|
4820
|
+
"source": [
|
4821
|
+
"ENV['RED_AMBER_OUTPUT_MODE'] = 'TDR'\n",
|
4822
|
+
"p penguins; nil # Almost same as `penguins.tdr` in any mode"
|
4823
|
+
]
|
4824
|
+
},
|
4825
|
+
{
|
4826
|
+
"cell_type": "code",
|
4827
|
+
"execution_count": 144,
|
4828
|
+
"id": "2786e9a7-e321-43c5-b56e-9f2ca9d62f8b",
|
4829
|
+
"metadata": {},
|
4830
|
+
"outputs": [
|
4831
|
+
{
|
4832
|
+
"data": {
|
4833
|
+
"text/plain": [
|
4834
|
+
"RedAmber::DataFrame : 344 x 8 Vectors\n",
|
4835
|
+
"Vectors : 5 numeric, 3 strings\n",
|
4836
|
+
"# key type level data_preview\n",
|
4837
|
+
"1 :species string 3 {\"Adelie\"=>152, \"Chinstrap\"=>68, \"Gentoo\"=>124}\n",
|
4838
|
+
"2 :island string 3 {\"Torgersen\"=>52, \"Biscoe\"=>168, \"Dream\"=>124}\n",
|
4839
|
+
"3 :bill_length_mm double 165 [39.1, 39.5, 40.3, nil, 36.7, ... ], 2 nils\n",
|
4840
|
+
"4 :bill_depth_mm double 81 [18.7, 17.4, 18.0, nil, 19.3, ... ], 2 nils\n",
|
4841
|
+
"5 :flipper_length_mm uint8 56 [181, 186, 195, nil, 193, ... ], 2 nils\n",
|
4842
|
+
"6 :body_mass_g uint16 95 [3750, 3800, 3250, nil, 3450, ... ], 2 nils\n",
|
4843
|
+
"7 :sex string 3 {\"male\"=>168, \"female\"=>165, nil=>11}\n",
|
4844
|
+
"8 :year uint16 3 {2007=>110, 2008=>114, 2009=>120}\n"
|
4845
|
+
]
|
4846
|
+
},
|
4847
|
+
"execution_count": 144,
|
4848
|
+
"metadata": {},
|
4849
|
+
"output_type": "execute_result"
|
4850
|
+
}
|
4851
|
+
],
|
4852
|
+
"source": [
|
4853
|
+
"penguins"
|
4854
|
+
]
|
4855
|
+
}
|
4856
|
+
],
|
4857
|
+
"metadata": {
|
4858
|
+
"kernelspec": {
|
4859
|
+
"display_name": "Ruby 3.1.1",
|
4860
|
+
"language": "ruby",
|
4861
|
+
"name": "ruby"
|
4862
|
+
},
|
4863
|
+
"language_info": {
|
4864
|
+
"file_extension": ".rb",
|
4865
|
+
"mimetype": "application/x-ruby",
|
4866
|
+
"name": "ruby",
|
4867
|
+
"version": "3.1.1"
|
4868
|
+
}
|
4869
|
+
},
|
4870
|
+
"nbformat": 4,
|
4871
|
+
"nbformat_minor": 5
|
4872
|
+
}
|