red_amber 0.4.0 → 0.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +20 -5
- data/CHANGELOG.md +104 -4
- data/README.md +18 -16
- data/benchmark/basic.yml +8 -8
- data/benchmark/combine.yml +3 -3
- data/benchmark/dataframe.yml +15 -9
- data/benchmark/group.yml +6 -6
- data/benchmark/reshape.yml +6 -6
- data/benchmark/vector.yml +6 -6
- data/doc/CODE_OF_CONDUCT.md +1 -1
- data/docker/.env +4 -0
- data/docker/Dockerfile +66 -0
- data/docker/Gemfile +21 -0
- data/docker/Gemfile.lock +80 -0
- data/docker/docker-compose.yml +21 -0
- data/docker/example +74 -0
- data/docker/notebook/examples_of_red_amber.ipynb +8562 -0
- data/docker/notebook/red-amber.ipynb +188 -0
- data/docker/readme.md +118 -0
- data/lib/red_amber/data_frame.rb +25 -10
- data/lib/red_amber/data_frame_combinable.rb +117 -73
- data/lib/red_amber/data_frame_displayable.rb +100 -51
- data/lib/red_amber/data_frame_indexable.rb +4 -4
- data/lib/red_amber/data_frame_reshaping.rb +1 -1
- data/lib/red_amber/data_frame_selectable.rb +1 -4
- data/lib/red_amber/data_frame_variable_operation.rb +7 -2
- data/lib/red_amber/group.rb +17 -18
- data/lib/red_amber/helper.rb +4 -4
- data/lib/red_amber/refinements.rb +15 -2
- data/lib/red_amber/subframes.rb +319 -191
- data/lib/red_amber/vector.rb +7 -30
- data/lib/red_amber/vector_binary_element_wise.rb +149 -1
- data/lib/red_amber/vector_selectable.rb +49 -12
- data/lib/red_amber/vector_unary_element_wise.rb +93 -0
- data/lib/red_amber/version.rb +1 -1
- data/red_amber.gemspec +3 -3
- metadata +16 -7
@@ -0,0 +1,188 @@
|
|
1
|
+
{
|
2
|
+
"cells": [
|
3
|
+
{
|
4
|
+
"cell_type": "markdown",
|
5
|
+
"metadata": {},
|
6
|
+
"source": [
|
7
|
+
"# RedAmber Examples\n",
|
8
|
+
"\n",
|
9
|
+
"This notebook walks through the [README of RedAmber](https://github.com/heronshoes/red_amber#readme)."
|
10
|
+
]
|
11
|
+
},
|
12
|
+
{
|
13
|
+
"cell_type": "markdown",
|
14
|
+
"metadata": {},
|
15
|
+
"source": [
|
16
|
+
"## `RedAmber::DataFrame`"
|
17
|
+
]
|
18
|
+
},
|
19
|
+
{
|
20
|
+
"cell_type": "code",
|
21
|
+
"execution_count": null,
|
22
|
+
"metadata": {
|
23
|
+
"tags": []
|
24
|
+
},
|
25
|
+
"outputs": [],
|
26
|
+
"source": [
|
27
|
+
"require 'red_amber'\n",
|
28
|
+
"include RedAmber\n",
|
29
|
+
"require 'datasets-arrow'\n",
|
30
|
+
"\n",
|
31
|
+
"{RedAmber: VERSION, Datasets: Datasets::VERSION}"
|
32
|
+
]
|
33
|
+
},
|
34
|
+
{
|
35
|
+
"cell_type": "markdown",
|
36
|
+
"metadata": {},
|
37
|
+
"source": [
|
38
|
+
"## Example: diamonds dataset\n",
|
39
|
+
"\n",
|
40
|
+
"For the first loading of Datasets::Diamonds, it will take some time to download."
|
41
|
+
]
|
42
|
+
},
|
43
|
+
{
|
44
|
+
"cell_type": "code",
|
45
|
+
"execution_count": null,
|
46
|
+
"metadata": {
|
47
|
+
"tags": []
|
48
|
+
},
|
49
|
+
"outputs": [],
|
50
|
+
"source": [
|
51
|
+
"dataset = Datasets::Diamonds.new\n",
|
52
|
+
"diamonds = DataFrame.new(dataset)"
|
53
|
+
]
|
54
|
+
},
|
55
|
+
{
|
56
|
+
"cell_type": "code",
|
57
|
+
"execution_count": null,
|
58
|
+
"metadata": {
|
59
|
+
"tags": []
|
60
|
+
},
|
61
|
+
"outputs": [],
|
62
|
+
"source": [
|
63
|
+
"df = diamonds\n",
|
64
|
+
" .slice { carat > 1 } # or use #filter instead of #slice\n",
|
65
|
+
" .group(:cut)\n",
|
66
|
+
" .mean(:price) # `pick` prior to `group` is not required if `:price` is specified here.\n",
|
67
|
+
" .sort('-mean(price)')"
|
68
|
+
]
|
69
|
+
},
|
70
|
+
{
|
71
|
+
"cell_type": "code",
|
72
|
+
"execution_count": null,
|
73
|
+
"metadata": {
|
74
|
+
"tags": []
|
75
|
+
},
|
76
|
+
"outputs": [],
|
77
|
+
"source": [
|
78
|
+
"usdjpy = 110.0 # when the yen was stronger\n",
|
79
|
+
"\n",
|
80
|
+
"df.rename('mean(price)': :mean_price_USD)\n",
|
81
|
+
" .assign(:mean_price_JPY) { mean_price_USD * usdjpy }"
|
82
|
+
]
|
83
|
+
},
|
84
|
+
{
|
85
|
+
"cell_type": "markdown",
|
86
|
+
"metadata": {
|
87
|
+
"tags": []
|
88
|
+
},
|
89
|
+
"source": [
|
90
|
+
"## Example: starwars dataset"
|
91
|
+
]
|
92
|
+
},
|
93
|
+
{
|
94
|
+
"cell_type": "code",
|
95
|
+
"execution_count": null,
|
96
|
+
"metadata": {
|
97
|
+
"tags": []
|
98
|
+
},
|
99
|
+
"outputs": [],
|
100
|
+
"source": [
|
101
|
+
"uri = URI('https://vincentarelbundock.github.io/Rdatasets/csv/dplyr/starwars.csv')\n",
|
102
|
+
"\n",
|
103
|
+
"starwars = DataFrame.load(uri)"
|
104
|
+
]
|
105
|
+
},
|
106
|
+
{
|
107
|
+
"cell_type": "code",
|
108
|
+
"execution_count": null,
|
109
|
+
"metadata": {
|
110
|
+
"tags": []
|
111
|
+
},
|
112
|
+
"outputs": [],
|
113
|
+
"source": [
|
114
|
+
"starwars\n",
|
115
|
+
" .drop(0) # delete unnecessary index column\n",
|
116
|
+
" .remove { species == \"NA\" } # delete unnecessary rows\n",
|
117
|
+
" .group(:species) { [count(:species), mean(:height, :mass)] }\n",
|
118
|
+
" .slice { count > 1 } # or use #filter instead of slice"
|
119
|
+
]
|
120
|
+
},
|
121
|
+
{
|
122
|
+
"cell_type": "markdown",
|
123
|
+
"metadata": {},
|
124
|
+
"source": [
|
125
|
+
"## `RedAmber::Vector`"
|
126
|
+
]
|
127
|
+
},
|
128
|
+
{
|
129
|
+
"cell_type": "code",
|
130
|
+
"execution_count": null,
|
131
|
+
"metadata": {
|
132
|
+
"tags": []
|
133
|
+
},
|
134
|
+
"outputs": [],
|
135
|
+
"source": [
|
136
|
+
"penguins = DataFrame.new(Datasets::Penguins.new)"
|
137
|
+
]
|
138
|
+
},
|
139
|
+
{
|
140
|
+
"cell_type": "code",
|
141
|
+
"execution_count": null,
|
142
|
+
"metadata": {
|
143
|
+
"tags": []
|
144
|
+
},
|
145
|
+
"outputs": [],
|
146
|
+
"source": [
|
147
|
+
"penguins[:bill_length_mm]"
|
148
|
+
]
|
149
|
+
},
|
150
|
+
{
|
151
|
+
"cell_type": "code",
|
152
|
+
"execution_count": null,
|
153
|
+
"metadata": {
|
154
|
+
"tags": []
|
155
|
+
},
|
156
|
+
"outputs": [],
|
157
|
+
"source": [
|
158
|
+
"penguins[:bill_length_mm] < 40"
|
159
|
+
]
|
160
|
+
},
|
161
|
+
{
|
162
|
+
"cell_type": "code",
|
163
|
+
"execution_count": null,
|
164
|
+
"metadata": {
|
165
|
+
"tags": []
|
166
|
+
},
|
167
|
+
"outputs": [],
|
168
|
+
"source": [
|
169
|
+
"penguins[:bill_length_mm].mean"
|
170
|
+
]
|
171
|
+
}
|
172
|
+
],
|
173
|
+
"metadata": {
|
174
|
+
"kernelspec": {
|
175
|
+
"display_name": "Ruby 3.0.2",
|
176
|
+
"language": "ruby",
|
177
|
+
"name": "ruby"
|
178
|
+
},
|
179
|
+
"language_info": {
|
180
|
+
"file_extension": ".rb",
|
181
|
+
"mimetype": "application/x-ruby",
|
182
|
+
"name": "ruby",
|
183
|
+
"version": "3.0.2"
|
184
|
+
}
|
185
|
+
},
|
186
|
+
"nbformat": 4,
|
187
|
+
"nbformat_minor": 4
|
188
|
+
}
|
data/docker/readme.md
ADDED
@@ -0,0 +1,118 @@
|
|
1
|
+
# RedAmber Minimal Notebook
|
2
|
+
|
3
|
+
This is a docker image containing RedAmber created from
|
4
|
+
[jupyter/minimal-notebook](https://jupyter-docker-stacks.readthedocs.io/en/latest/using/selecting.html#jupyter-minimal-notebook)
|
5
|
+
|
6
|
+
## Contents
|
7
|
+
|
8
|
+
- From jupyter/minimal-notebook:
|
9
|
+
- Based on 2023-03-13 (295612d3ade4)
|
10
|
+
- x86-64
|
11
|
+
- Ubuntu-22.04
|
12
|
+
- python-3.10.9
|
13
|
+
- lab-3.6.1
|
14
|
+
- notebook-6.5.3
|
15
|
+
- System ruby-dev:
|
16
|
+
- Ruby 3.0.2
|
17
|
+
- Arrow 11.0.0 for Ubuntu:
|
18
|
+
- libarrow-dev
|
19
|
+
- libarrow-glib-dev
|
20
|
+
- libparquet-dev
|
21
|
+
- libparquet-glib-dev
|
22
|
+
- Locally installed iruby:
|
23
|
+
- Using Ruby 3.0.2
|
24
|
+
- Locally installed bundler and Gemfile:
|
25
|
+
- RedAmber 0.4.1
|
26
|
+
- Others (see Gemfile)
|
27
|
+
|
28
|
+
## Install
|
29
|
+
|
30
|
+
```
|
31
|
+
git clone https://github.com/heronshoes/red_amber.git
|
32
|
+
cd docker
|
33
|
+
```
|
34
|
+
|
35
|
+
Edit ENV variable in `.env` as you like.
|
36
|
+
|
37
|
+
[note] NB_USER is fixed for `jovyan`, the common user name in Jupyter,
|
38
|
+
can not change it in this version.
|
39
|
+
|
40
|
+
If TZ is not used in your host system, define it here.
|
41
|
+
Otherwise UTC is used in the container.
|
42
|
+
|
43
|
+
TOKEN will be used for token-based authentication.
|
44
|
+
|
45
|
+
```
|
46
|
+
# Example
|
47
|
+
TZ=Asia/Tokyo
|
48
|
+
TOKEN='something'
|
49
|
+
```
|
50
|
+
|
51
|
+
Then build `red_amber-minimal-notebook` container. It will take a while.
|
52
|
+
|
53
|
+
```
|
54
|
+
docker-compose build
|
55
|
+
```
|
56
|
+
|
57
|
+
## Start Jupyter Lab
|
58
|
+
|
59
|
+
After build, start the container. Adding `-d` option will detach it in background.
|
60
|
+
|
61
|
+
```
|
62
|
+
docker-compose up
|
63
|
+
```
|
64
|
+
|
65
|
+
You can access Jupyter Lab from `http://localhost:8888/` in your browser.
|
66
|
+
|
67
|
+
- `red-amber.ipynb`:
|
68
|
+
- Walks through the [README of RedAmber](https://github.com/heronshoes/red_amber#readme).
|
69
|
+
- `examples_of_red_amber.ipynb`:
|
70
|
+
- [Examples of RedAmber](https://github.com/heronshoes/red_amber/blob/main/docker/notebook/examples_of_red_amber.ipynb) in Notebook style.
|
71
|
+
|
72
|
+
## Example in REPL
|
73
|
+
|
74
|
+
You can try RedAmber in irb with pre-loaded datasets.
|
75
|
+
|
76
|
+
Start `terminal` in Jupyter.
|
77
|
+
|
78
|
+
For the first run,
|
79
|
+
|
80
|
+
```
|
81
|
+
source ~/.bashrc
|
82
|
+
../example
|
83
|
+
|
84
|
+
```
|
85
|
+
|
86
|
+
It will take a while for the first run to fetch and prepare red-datasets cache.
|
87
|
+
|
88
|
+
If irb starts you can see:
|
89
|
+
|
90
|
+
```ruby
|
91
|
+
|
92
|
+
69: # Welcome to RedAmber example!
|
93
|
+
70: # This environment will offer these pre-loaded datasets:
|
94
|
+
71: # penguins, diamonds, iris, starwars, simpsons_paradox_covid,
|
95
|
+
72: # mtcars, band_members, band_instruments, band_instruments2
|
96
|
+
73: # (original) import_cars, comecome, dataframe, subframes
|
97
|
+
=> 74: binding.irb
|
98
|
+
|
99
|
+
irb(main):001:0>
|
100
|
+
```
|
101
|
+
|
102
|
+
RedAmber is already loaded in this environment with some datasets shown above.
|
103
|
+
|
104
|
+
```ruby
|
105
|
+
irb(main):002:0> dataframe
|
106
|
+
=>
|
107
|
+
#<RedAmber::DataFrame : 6 x 3 Vectors, 0x0000000000003818>
|
108
|
+
x y z
|
109
|
+
<uint8> <string> <boolean>
|
110
|
+
0 1 A false
|
111
|
+
1 2 A true
|
112
|
+
2 3 B false
|
113
|
+
3 4 B (nil)
|
114
|
+
4 5 B true
|
115
|
+
5 6 C false
|
116
|
+
```
|
117
|
+
|
118
|
+
Next time you start this environment, you can simply invoke as `../example`.
|
data/lib/red_amber/data_frame.rb
CHANGED
@@ -33,6 +33,23 @@ module RedAmber
|
|
33
33
|
instance.instance_variable_set(:@table, table)
|
34
34
|
instance
|
35
35
|
end
|
36
|
+
|
37
|
+
# Return new DataFrame for specified schema and value.
|
38
|
+
#
|
39
|
+
# @param dataframe_for_schema [Dataframe]
|
40
|
+
# schema of this dataframe will be used.
|
41
|
+
# @param dataframe_for_value [DataFrame]
|
42
|
+
# column values of thes dataframe will be used.
|
43
|
+
# @return [DataFrame]
|
44
|
+
# created DataFrame.
|
45
|
+
# @since 0.4.1
|
46
|
+
#
|
47
|
+
def new_dataframe_with_schema(dataframe_for_schema, dataframe_for_value)
|
48
|
+
DataFrame.create(
|
49
|
+
Arrow::Table.new(dataframe_for_schema.table.schema,
|
50
|
+
dataframe_for_value.table.columns)
|
51
|
+
)
|
52
|
+
end
|
36
53
|
end
|
37
54
|
|
38
55
|
# Creates a new DataFrame.
|
@@ -194,7 +211,7 @@ module RedAmber
|
|
194
211
|
# `key => Vector` pairs for each columns.
|
195
212
|
#
|
196
213
|
def variables
|
197
|
-
@variables
|
214
|
+
@variables ||= init_instance_vars(:variables)
|
198
215
|
end
|
199
216
|
alias_method :vars, :variables
|
200
217
|
|
@@ -204,7 +221,7 @@ module RedAmber
|
|
204
221
|
# keys in an Array.
|
205
222
|
#
|
206
223
|
def keys
|
207
|
-
@keys
|
224
|
+
@keys ||= init_instance_vars(:keys)
|
208
225
|
end
|
209
226
|
alias_method :column_names, :keys
|
210
227
|
alias_method :var_names, :keys
|
@@ -240,7 +257,7 @@ module RedAmber
|
|
240
257
|
# abbreviated Red Arrow data type names.
|
241
258
|
#
|
242
259
|
def types
|
243
|
-
@types
|
260
|
+
@types ||= @table.columns.map do |column|
|
244
261
|
column.data.value_type.nick.to_sym
|
245
262
|
end
|
246
263
|
end
|
@@ -251,7 +268,7 @@ module RedAmber
|
|
251
268
|
# an Array of Red Arrow data type Classes.
|
252
269
|
#
|
253
270
|
def type_classes
|
254
|
-
@
|
271
|
+
@type_classes ||= @table.columns.map { |column| column.data_type.class }
|
255
272
|
end
|
256
273
|
|
257
274
|
# Returns Vectors in an Array.
|
@@ -260,7 +277,7 @@ module RedAmber
|
|
260
277
|
# an Array of Vector.
|
261
278
|
#
|
262
279
|
def vectors
|
263
|
-
@vectors
|
280
|
+
@vectors ||= init_instance_vars(:vectors)
|
264
281
|
end
|
265
282
|
|
266
283
|
# Returns column-oriented data in a Hash.
|
@@ -682,7 +699,7 @@ module RedAmber
|
|
682
699
|
|
683
700
|
# Catch variable (column) key as method name.
|
684
701
|
def method_missing(name, *args, &block)
|
685
|
-
return
|
702
|
+
return variables[name] if args.empty? && key?(name)
|
686
703
|
|
687
704
|
super
|
688
705
|
end
|
@@ -723,11 +740,9 @@ module RedAmber
|
|
723
740
|
end
|
724
741
|
|
725
742
|
def name_unnamed_keys
|
726
|
-
return unless @table.key?('')
|
743
|
+
return unless @table.key?(:'')
|
727
744
|
|
728
|
-
|
729
|
-
keys = @table.schema.fields.map { |f| f.name.to_sym }
|
730
|
-
unnamed = (:unnamed1..).find { |e| !keys.include?(e) }
|
745
|
+
unnamed = (:unnamed1..).find { |name| !@table.key?(name) }
|
731
746
|
fields =
|
732
747
|
@table.schema.fields.map do |field|
|
733
748
|
if field.name.empty?
|