red_amber 0.4.0 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,188 @@
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {},
6
+ "source": [
7
+ "# RedAmber Examples\n",
8
+ "\n",
9
+ "This notebook walks through the [README of RedAmber](https://github.com/heronshoes/red_amber#readme)."
10
+ ]
11
+ },
12
+ {
13
+ "cell_type": "markdown",
14
+ "metadata": {},
15
+ "source": [
16
+ "## `RedAmber::DataFrame`"
17
+ ]
18
+ },
19
+ {
20
+ "cell_type": "code",
21
+ "execution_count": null,
22
+ "metadata": {
23
+ "tags": []
24
+ },
25
+ "outputs": [],
26
+ "source": [
27
+ "require 'red_amber'\n",
28
+ "include RedAmber\n",
29
+ "require 'datasets-arrow'\n",
30
+ "\n",
31
+ "{RedAmber: VERSION, Datasets: Datasets::VERSION}"
32
+ ]
33
+ },
34
+ {
35
+ "cell_type": "markdown",
36
+ "metadata": {},
37
+ "source": [
38
+ "## Example: diamonds dataset\n",
39
+ "\n",
40
+ "For the first loading of Datasets::Diamonds, it will take some time to download."
41
+ ]
42
+ },
43
+ {
44
+ "cell_type": "code",
45
+ "execution_count": null,
46
+ "metadata": {
47
+ "tags": []
48
+ },
49
+ "outputs": [],
50
+ "source": [
51
+ "dataset = Datasets::Diamonds.new\n",
52
+ "diamonds = DataFrame.new(dataset)"
53
+ ]
54
+ },
55
+ {
56
+ "cell_type": "code",
57
+ "execution_count": null,
58
+ "metadata": {
59
+ "tags": []
60
+ },
61
+ "outputs": [],
62
+ "source": [
63
+ "df = diamonds\n",
64
+ " .slice { carat > 1 } # or use #filter instead of #slice\n",
65
+ " .group(:cut)\n",
66
+ " .mean(:price) # `pick` prior to `group` is not required if `:price` is specified here.\n",
67
+ " .sort('-mean(price)')"
68
+ ]
69
+ },
70
+ {
71
+ "cell_type": "code",
72
+ "execution_count": null,
73
+ "metadata": {
74
+ "tags": []
75
+ },
76
+ "outputs": [],
77
+ "source": [
78
+ "usdjpy = 110.0 # when the yen was stronger\n",
79
+ "\n",
80
+ "df.rename('mean(price)': :mean_price_USD)\n",
81
+ " .assign(:mean_price_JPY) { mean_price_USD * usdjpy }"
82
+ ]
83
+ },
84
+ {
85
+ "cell_type": "markdown",
86
+ "metadata": {
87
+ "tags": []
88
+ },
89
+ "source": [
90
+ "## Example: starwars dataset"
91
+ ]
92
+ },
93
+ {
94
+ "cell_type": "code",
95
+ "execution_count": null,
96
+ "metadata": {
97
+ "tags": []
98
+ },
99
+ "outputs": [],
100
+ "source": [
101
+ "uri = URI('https://vincentarelbundock.github.io/Rdatasets/csv/dplyr/starwars.csv')\n",
102
+ "\n",
103
+ "starwars = DataFrame.load(uri)"
104
+ ]
105
+ },
106
+ {
107
+ "cell_type": "code",
108
+ "execution_count": null,
109
+ "metadata": {
110
+ "tags": []
111
+ },
112
+ "outputs": [],
113
+ "source": [
114
+ "starwars\n",
115
+ " .drop(0) # delete unnecessary index column\n",
116
+ " .remove { species == \"NA\" } # delete unnecessary rows\n",
117
+ " .group(:species) { [count(:species), mean(:height, :mass)] }\n",
118
+ " .slice { count > 1 } # or use #filter instead of slice"
119
+ ]
120
+ },
121
+ {
122
+ "cell_type": "markdown",
123
+ "metadata": {},
124
+ "source": [
125
+ "## `RedAmber::Vector`"
126
+ ]
127
+ },
128
+ {
129
+ "cell_type": "code",
130
+ "execution_count": null,
131
+ "metadata": {
132
+ "tags": []
133
+ },
134
+ "outputs": [],
135
+ "source": [
136
+ "penguins = DataFrame.new(Datasets::Penguins.new)"
137
+ ]
138
+ },
139
+ {
140
+ "cell_type": "code",
141
+ "execution_count": null,
142
+ "metadata": {
143
+ "tags": []
144
+ },
145
+ "outputs": [],
146
+ "source": [
147
+ "penguins[:bill_length_mm]"
148
+ ]
149
+ },
150
+ {
151
+ "cell_type": "code",
152
+ "execution_count": null,
153
+ "metadata": {
154
+ "tags": []
155
+ },
156
+ "outputs": [],
157
+ "source": [
158
+ "penguins[:bill_length_mm] < 40"
159
+ ]
160
+ },
161
+ {
162
+ "cell_type": "code",
163
+ "execution_count": null,
164
+ "metadata": {
165
+ "tags": []
166
+ },
167
+ "outputs": [],
168
+ "source": [
169
+ "penguins[:bill_length_mm].mean"
170
+ ]
171
+ }
172
+ ],
173
+ "metadata": {
174
+ "kernelspec": {
175
+ "display_name": "Ruby 3.0.2",
176
+ "language": "ruby",
177
+ "name": "ruby"
178
+ },
179
+ "language_info": {
180
+ "file_extension": ".rb",
181
+ "mimetype": "application/x-ruby",
182
+ "name": "ruby",
183
+ "version": "3.0.2"
184
+ }
185
+ },
186
+ "nbformat": 4,
187
+ "nbformat_minor": 4
188
+ }
data/docker/readme.md ADDED
@@ -0,0 +1,118 @@
1
+ # RedAmber Minimal Notebook
2
+
3
+ This is a docker image containing RedAmber created from
4
+ [jupyter/minimal-notebook](https://jupyter-docker-stacks.readthedocs.io/en/latest/using/selecting.html#jupyter-minimal-notebook)
5
+
6
+ ## Contents
7
+
8
+ - From jupyter/minimal-notebook:
9
+ - Based on 2023-03-13 (295612d3ade4)
10
+ - x86-64
11
+ - Ubuntu-22.04
12
+ - python-3.10.9
13
+ - lab-3.6.1
14
+ - notebook-6.5.3
15
+ - System ruby-dev:
16
+ - Ruby 3.0.2
17
+ - Arrow 11.0.0 for Ubuntu:
18
+ - libarrow-dev
19
+ - libarrow-glib-dev
20
+ - libparquet-dev
21
+ - libparquet-glib-dev
22
+ - Locally installed iruby:
23
+ - Using Ruby 3.0.2
24
+ - Locally installed bundler and Gemfile:
25
+ - RedAmber 0.4.1
26
+ - Others (see Gemfile)
27
+
28
+ ## Install
29
+
30
+ ```
31
+ git clone https://github.com/heronshoes/red_amber.git
32
+ cd docker
33
+ ```
34
+
35
+ Edit ENV variable in `.env` as you like.
36
+
37
+ [note] NB_USER is fixed for `jovyan`, the common user name in Jupyter,
38
+ can not change it in this version.
39
+
40
+ If TZ is not used in your host system, define it here.
41
+ Otherwise UTC is used in the container.
42
+
43
+ TOKEN will be used for token-based authentication.
44
+
45
+ ```
46
+ # Example
47
+ TZ=Asia/Tokyo
48
+ TOKEN='something'
49
+ ```
50
+
51
+ Then build `red_amber-minimal-notebook` container. It will take a while.
52
+
53
+ ```
54
+ docker-compose build
55
+ ```
56
+
57
+ ## Start Jupyter Lab
58
+
59
+ After build, start the container. Adding `-d` option will detach it in background.
60
+
61
+ ```
62
+ docker-compose up
63
+ ```
64
+
65
+ You can access Jupyter Lab from `http://localhost:8888/` in your browser.
66
+
67
+ - `red-amber.ipynb`:
68
+ - Walks through the [README of RedAmber](https://github.com/heronshoes/red_amber#readme).
69
+ - `examples_of_red_amber.ipynb`:
70
+ - [Examples of RedAmber](https://github.com/heronshoes/red_amber/blob/main/docker/notebook/examples_of_red_amber.ipynb) in Notebook style.
71
+
72
+ ## Example in REPL
73
+
74
+ You can try RedAmber in irb with pre-loaded datasets.
75
+
76
+ Start `terminal` in Jupyter.
77
+
78
+ For the first run,
79
+
80
+ ```
81
+ source ~/.bashrc
82
+ ../example
83
+
84
+ ```
85
+
86
+ It will take a while for the first run to fetch and prepare red-datasets cache.
87
+
88
+ If irb starts you can see:
89
+
90
+ ```ruby
91
+
92
+ 69: # Welcome to RedAmber example!
93
+ 70: # This environment will offer these pre-loaded datasets:
94
+ 71: # penguins, diamonds, iris, starwars, simpsons_paradox_covid,
95
+ 72: # mtcars, band_members, band_instruments, band_instruments2
96
+ 73: # (original) import_cars, comecome, dataframe, subframes
97
+ => 74: binding.irb
98
+
99
+ irb(main):001:0>
100
+ ```
101
+
102
+ RedAmber is already loaded in this environment with some datasets shown above.
103
+
104
+ ```ruby
105
+ irb(main):002:0> dataframe
106
+ =>
107
+ #<RedAmber::DataFrame : 6 x 3 Vectors, 0x0000000000003818>
108
+ x y z
109
+ <uint8> <string> <boolean>
110
+ 0 1 A false
111
+ 1 2 A true
112
+ 2 3 B false
113
+ 3 4 B (nil)
114
+ 4 5 B true
115
+ 5 6 C false
116
+ ```
117
+
118
+ Next time you start this environment, you can simply invoke as `../example`.
@@ -33,6 +33,23 @@ module RedAmber
33
33
  instance.instance_variable_set(:@table, table)
34
34
  instance
35
35
  end
36
+
37
+ # Return new DataFrame for specified schema and value.
38
+ #
39
+ # @param dataframe_for_schema [Dataframe]
40
+ # schema of this dataframe will be used.
41
+ # @param dataframe_for_value [DataFrame]
42
+ # column values of thes dataframe will be used.
43
+ # @return [DataFrame]
44
+ # created DataFrame.
45
+ # @since 0.4.1
46
+ #
47
+ def new_dataframe_with_schema(dataframe_for_schema, dataframe_for_value)
48
+ DataFrame.create(
49
+ Arrow::Table.new(dataframe_for_schema.table.schema,
50
+ dataframe_for_value.table.columns)
51
+ )
52
+ end
36
53
  end
37
54
 
38
55
  # Creates a new DataFrame.
@@ -194,7 +211,7 @@ module RedAmber
194
211
  # `key => Vector` pairs for each columns.
195
212
  #
196
213
  def variables
197
- @variables || @variables = init_instance_vars(:variables)
214
+ @variables ||= init_instance_vars(:variables)
198
215
  end
199
216
  alias_method :vars, :variables
200
217
 
@@ -204,7 +221,7 @@ module RedAmber
204
221
  # keys in an Array.
205
222
  #
206
223
  def keys
207
- @keys || @keys = init_instance_vars(:keys)
224
+ @keys ||= init_instance_vars(:keys)
208
225
  end
209
226
  alias_method :column_names, :keys
210
227
  alias_method :var_names, :keys
@@ -240,7 +257,7 @@ module RedAmber
240
257
  # abbreviated Red Arrow data type names.
241
258
  #
242
259
  def types
243
- @types || @types = @table.columns.map do |column|
260
+ @types ||= @table.columns.map do |column|
244
261
  column.data.value_type.nick.to_sym
245
262
  end
246
263
  end
@@ -251,7 +268,7 @@ module RedAmber
251
268
  # an Array of Red Arrow data type Classes.
252
269
  #
253
270
  def type_classes
254
- @data_types || @data_types = @table.columns.map { |column| column.data_type.class }
271
+ @type_classes ||= @table.columns.map { |column| column.data_type.class }
255
272
  end
256
273
 
257
274
  # Returns Vectors in an Array.
@@ -260,7 +277,7 @@ module RedAmber
260
277
  # an Array of Vector.
261
278
  #
262
279
  def vectors
263
- @vectors || @vectors = init_instance_vars(:vectors)
280
+ @vectors ||= init_instance_vars(:vectors)
264
281
  end
265
282
 
266
283
  # Returns column-oriented data in a Hash.
@@ -682,7 +699,7 @@ module RedAmber
682
699
 
683
700
  # Catch variable (column) key as method name.
684
701
  def method_missing(name, *args, &block)
685
- return v(name) if args.empty? && key?(name)
702
+ return variables[name] if args.empty? && key?(name)
686
703
 
687
704
  super
688
705
  end
@@ -723,11 +740,9 @@ module RedAmber
723
740
  end
724
741
 
725
742
  def name_unnamed_keys
726
- return unless @table.key?('')
743
+ return unless @table.key?(:'')
727
744
 
728
- # We can't use #keys because it causes mismatch of @table and @keys
729
- keys = @table.schema.fields.map { |f| f.name.to_sym }
730
- unnamed = (:unnamed1..).find { |e| !keys.include?(e) }
745
+ unnamed = (:unnamed1..).find { |name| !@table.key?(name) }
731
746
  fields =
732
747
  @table.schema.fields.map do |field|
733
748
  if field.name.empty?