red_amber 0.4.1 → 0.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +3 -3
- data/CHANGELOG.md +36 -1
- data/README.md +13 -11
- data/benchmark/basic.yml +1 -1
- data/doc/CODE_OF_CONDUCT.md +1 -1
- data/docker/.env +4 -0
- data/docker/Dockerfile +66 -0
- data/docker/Gemfile +21 -0
- data/docker/Gemfile.lock +80 -0
- data/docker/docker-compose.yml +21 -0
- data/docker/example +74 -0
- data/docker/notebook/examples_of_red_amber.ipynb +8562 -0
- data/docker/notebook/red-amber.ipynb +188 -0
- data/docker/readme.md +118 -0
- data/lib/red_amber/group.rb +16 -16
- data/lib/red_amber/subframes.rb +158 -65
- data/lib/red_amber/vector_binary_element_wise.rb +54 -25
- data/lib/red_amber/version.rb +1 -1
- data/red_amber.gemspec +3 -3
- metadata +16 -7
@@ -0,0 +1,188 @@
|
|
1
|
+
{
|
2
|
+
"cells": [
|
3
|
+
{
|
4
|
+
"cell_type": "markdown",
|
5
|
+
"metadata": {},
|
6
|
+
"source": [
|
7
|
+
"# RedAmber Examples\n",
|
8
|
+
"\n",
|
9
|
+
"This notebook walks through the [README of RedAmber](https://github.com/heronshoes/red_amber#readme)."
|
10
|
+
]
|
11
|
+
},
|
12
|
+
{
|
13
|
+
"cell_type": "markdown",
|
14
|
+
"metadata": {},
|
15
|
+
"source": [
|
16
|
+
"## `RedAmber::DataFrame`"
|
17
|
+
]
|
18
|
+
},
|
19
|
+
{
|
20
|
+
"cell_type": "code",
|
21
|
+
"execution_count": null,
|
22
|
+
"metadata": {
|
23
|
+
"tags": []
|
24
|
+
},
|
25
|
+
"outputs": [],
|
26
|
+
"source": [
|
27
|
+
"require 'red_amber'\n",
|
28
|
+
"include RedAmber\n",
|
29
|
+
"require 'datasets-arrow'\n",
|
30
|
+
"\n",
|
31
|
+
"{RedAmber: VERSION, Datasets: Datasets::VERSION}"
|
32
|
+
]
|
33
|
+
},
|
34
|
+
{
|
35
|
+
"cell_type": "markdown",
|
36
|
+
"metadata": {},
|
37
|
+
"source": [
|
38
|
+
"## Example: diamonds dataset\n",
|
39
|
+
"\n",
|
40
|
+
"For the first loading of Datasets::Diamonds, it will take some time to download."
|
41
|
+
]
|
42
|
+
},
|
43
|
+
{
|
44
|
+
"cell_type": "code",
|
45
|
+
"execution_count": null,
|
46
|
+
"metadata": {
|
47
|
+
"tags": []
|
48
|
+
},
|
49
|
+
"outputs": [],
|
50
|
+
"source": [
|
51
|
+
"dataset = Datasets::Diamonds.new\n",
|
52
|
+
"diamonds = DataFrame.new(dataset)"
|
53
|
+
]
|
54
|
+
},
|
55
|
+
{
|
56
|
+
"cell_type": "code",
|
57
|
+
"execution_count": null,
|
58
|
+
"metadata": {
|
59
|
+
"tags": []
|
60
|
+
},
|
61
|
+
"outputs": [],
|
62
|
+
"source": [
|
63
|
+
"df = diamonds\n",
|
64
|
+
" .slice { carat > 1 } # or use #filter instead of #slice\n",
|
65
|
+
" .group(:cut)\n",
|
66
|
+
" .mean(:price) # `pick` prior to `group` is not required if `:price` is specified here.\n",
|
67
|
+
" .sort('-mean(price)')"
|
68
|
+
]
|
69
|
+
},
|
70
|
+
{
|
71
|
+
"cell_type": "code",
|
72
|
+
"execution_count": null,
|
73
|
+
"metadata": {
|
74
|
+
"tags": []
|
75
|
+
},
|
76
|
+
"outputs": [],
|
77
|
+
"source": [
|
78
|
+
"usdjpy = 110.0 # when the yen was stronger\n",
|
79
|
+
"\n",
|
80
|
+
"df.rename('mean(price)': :mean_price_USD)\n",
|
81
|
+
" .assign(:mean_price_JPY) { mean_price_USD * usdjpy }"
|
82
|
+
]
|
83
|
+
},
|
84
|
+
{
|
85
|
+
"cell_type": "markdown",
|
86
|
+
"metadata": {
|
87
|
+
"tags": []
|
88
|
+
},
|
89
|
+
"source": [
|
90
|
+
"## Example: starwars dataset"
|
91
|
+
]
|
92
|
+
},
|
93
|
+
{
|
94
|
+
"cell_type": "code",
|
95
|
+
"execution_count": null,
|
96
|
+
"metadata": {
|
97
|
+
"tags": []
|
98
|
+
},
|
99
|
+
"outputs": [],
|
100
|
+
"source": [
|
101
|
+
"uri = URI('https://vincentarelbundock.github.io/Rdatasets/csv/dplyr/starwars.csv')\n",
|
102
|
+
"\n",
|
103
|
+
"starwars = DataFrame.load(uri)"
|
104
|
+
]
|
105
|
+
},
|
106
|
+
{
|
107
|
+
"cell_type": "code",
|
108
|
+
"execution_count": null,
|
109
|
+
"metadata": {
|
110
|
+
"tags": []
|
111
|
+
},
|
112
|
+
"outputs": [],
|
113
|
+
"source": [
|
114
|
+
"starwars\n",
|
115
|
+
" .drop(0) # delete unnecessary index column\n",
|
116
|
+
" .remove { species == \"NA\" } # delete unnecessary rows\n",
|
117
|
+
" .group(:species) { [count(:species), mean(:height, :mass)] }\n",
|
118
|
+
" .slice { count > 1 } # or use #filter instead of slice"
|
119
|
+
]
|
120
|
+
},
|
121
|
+
{
|
122
|
+
"cell_type": "markdown",
|
123
|
+
"metadata": {},
|
124
|
+
"source": [
|
125
|
+
"## `RedAmber::Vector`"
|
126
|
+
]
|
127
|
+
},
|
128
|
+
{
|
129
|
+
"cell_type": "code",
|
130
|
+
"execution_count": null,
|
131
|
+
"metadata": {
|
132
|
+
"tags": []
|
133
|
+
},
|
134
|
+
"outputs": [],
|
135
|
+
"source": [
|
136
|
+
"penguins = DataFrame.new(Datasets::Penguins.new)"
|
137
|
+
]
|
138
|
+
},
|
139
|
+
{
|
140
|
+
"cell_type": "code",
|
141
|
+
"execution_count": null,
|
142
|
+
"metadata": {
|
143
|
+
"tags": []
|
144
|
+
},
|
145
|
+
"outputs": [],
|
146
|
+
"source": [
|
147
|
+
"penguins[:bill_length_mm]"
|
148
|
+
]
|
149
|
+
},
|
150
|
+
{
|
151
|
+
"cell_type": "code",
|
152
|
+
"execution_count": null,
|
153
|
+
"metadata": {
|
154
|
+
"tags": []
|
155
|
+
},
|
156
|
+
"outputs": [],
|
157
|
+
"source": [
|
158
|
+
"penguins[:bill_length_mm] < 40"
|
159
|
+
]
|
160
|
+
},
|
161
|
+
{
|
162
|
+
"cell_type": "code",
|
163
|
+
"execution_count": null,
|
164
|
+
"metadata": {
|
165
|
+
"tags": []
|
166
|
+
},
|
167
|
+
"outputs": [],
|
168
|
+
"source": [
|
169
|
+
"penguins[:bill_length_mm].mean"
|
170
|
+
]
|
171
|
+
}
|
172
|
+
],
|
173
|
+
"metadata": {
|
174
|
+
"kernelspec": {
|
175
|
+
"display_name": "Ruby 3.0.2",
|
176
|
+
"language": "ruby",
|
177
|
+
"name": "ruby"
|
178
|
+
},
|
179
|
+
"language_info": {
|
180
|
+
"file_extension": ".rb",
|
181
|
+
"mimetype": "application/x-ruby",
|
182
|
+
"name": "ruby",
|
183
|
+
"version": "3.0.2"
|
184
|
+
}
|
185
|
+
},
|
186
|
+
"nbformat": 4,
|
187
|
+
"nbformat_minor": 4
|
188
|
+
}
|
data/docker/readme.md
ADDED
@@ -0,0 +1,118 @@
|
|
1
|
+
# RedAmber Minimal Notebook
|
2
|
+
|
3
|
+
This is a docker image containing RedAmber created from
|
4
|
+
[jupyter/minimal-notebook](https://jupyter-docker-stacks.readthedocs.io/en/latest/using/selecting.html#jupyter-minimal-notebook)
|
5
|
+
|
6
|
+
## Contents
|
7
|
+
|
8
|
+
- From jupyter/minimal-notebook:
|
9
|
+
- Based on 2023-03-13 (295612d3ade4)
|
10
|
+
- x86-64
|
11
|
+
- Ubuntu-22.04
|
12
|
+
- python-3.10.9
|
13
|
+
- lab-3.6.1
|
14
|
+
- notebook-6.5.3
|
15
|
+
- System ruby-dev:
|
16
|
+
- Ruby 3.0.2
|
17
|
+
- Arrow 11.0.0 for Ubuntu:
|
18
|
+
- libarrow-dev
|
19
|
+
- libarrow-glib-dev
|
20
|
+
- libparquet-dev
|
21
|
+
- libparquet-glib-dev
|
22
|
+
- Locally installed iruby:
|
23
|
+
- Using Ruby 3.0.2
|
24
|
+
- Locally installed bundler and Gemfile:
|
25
|
+
- RedAmber 0.4.1
|
26
|
+
- Others (see Gemfile)
|
27
|
+
|
28
|
+
## Install
|
29
|
+
|
30
|
+
```
|
31
|
+
git clone https://github.com/heronshoes/red_amber.git
|
32
|
+
cd docker
|
33
|
+
```
|
34
|
+
|
35
|
+
Edit ENV variable in `.env` as you like.
|
36
|
+
|
37
|
+
[note] NB_USER is fixed for `jovyan`, the common user name in Jupyter,
|
38
|
+
can not change it in this version.
|
39
|
+
|
40
|
+
If TZ is not used in your host system, define it here.
|
41
|
+
Otherwise UTC is used in the container.
|
42
|
+
|
43
|
+
TOKEN will be used for token-based authentication.
|
44
|
+
|
45
|
+
```
|
46
|
+
# Example
|
47
|
+
TZ=Asia/Tokyo
|
48
|
+
TOKEN='something'
|
49
|
+
```
|
50
|
+
|
51
|
+
Then build `red_amber-minimal-notebook` container. It will take a while.
|
52
|
+
|
53
|
+
```
|
54
|
+
docker-compose build
|
55
|
+
```
|
56
|
+
|
57
|
+
## Start Jupyter Lab
|
58
|
+
|
59
|
+
After build, start the container. Adding `-d` option will detach it in background.
|
60
|
+
|
61
|
+
```
|
62
|
+
docker-compose up
|
63
|
+
```
|
64
|
+
|
65
|
+
You can access Jupyter Lab from `http://localhost:8888/` in your browser.
|
66
|
+
|
67
|
+
- `red-amber.ipynb`:
|
68
|
+
- Walks through the [README of RedAmber](https://github.com/heronshoes/red_amber#readme).
|
69
|
+
- `examples_of_red_amber.ipynb`:
|
70
|
+
- [Examples of RedAmber](https://github.com/heronshoes/red_amber/blob/main/docker/notebook/examples_of_red_amber.ipynb) in Notebook style.
|
71
|
+
|
72
|
+
## Example in REPL
|
73
|
+
|
74
|
+
You can try RedAmber in irb with pre-loaded datasets.
|
75
|
+
|
76
|
+
Start `terminal` in Jupyter.
|
77
|
+
|
78
|
+
For the first run,
|
79
|
+
|
80
|
+
```
|
81
|
+
source ~/.bashrc
|
82
|
+
../example
|
83
|
+
|
84
|
+
```
|
85
|
+
|
86
|
+
It will take a while for the first run to fetch and prepare red-datasets cache.
|
87
|
+
|
88
|
+
If irb starts you can see:
|
89
|
+
|
90
|
+
```ruby
|
91
|
+
|
92
|
+
69: # Welcome to RedAmber example!
|
93
|
+
70: # This environment will offer these pre-loaded datasets:
|
94
|
+
71: # penguins, diamonds, iris, starwars, simpsons_paradox_covid,
|
95
|
+
72: # mtcars, band_members, band_instruments, band_instruments2
|
96
|
+
73: # (original) import_cars, comecome, dataframe, subframes
|
97
|
+
=> 74: binding.irb
|
98
|
+
|
99
|
+
irb(main):001:0>
|
100
|
+
```
|
101
|
+
|
102
|
+
RedAmber is already loaded in this environment with some datasets shown above.
|
103
|
+
|
104
|
+
```ruby
|
105
|
+
irb(main):002:0> dataframe
|
106
|
+
=>
|
107
|
+
#<RedAmber::DataFrame : 6 x 3 Vectors, 0x0000000000003818>
|
108
|
+
x y z
|
109
|
+
<uint8> <string> <boolean>
|
110
|
+
0 1 A false
|
111
|
+
1 2 A true
|
112
|
+
2 3 B false
|
113
|
+
3 4 B (nil)
|
114
|
+
4 5 B true
|
115
|
+
5 6 C false
|
116
|
+
```
|
117
|
+
|
118
|
+
Next time you start this environment, you can simply invoke as `../example`.
|
data/lib/red_amber/group.rb
CHANGED
@@ -60,11 +60,11 @@ module RedAmber
|
|
60
60
|
#
|
61
61
|
# # =>
|
62
62
|
# #<RedAmber::Group : 0x000000000000f410>
|
63
|
-
# species
|
64
|
-
# <string>
|
65
|
-
# 0 Adelie
|
66
|
-
# 1 Chinstrap
|
67
|
-
# 2 Gentoo
|
63
|
+
# species count
|
64
|
+
# <string> <uint8>
|
65
|
+
# 0 Adelie 152
|
66
|
+
# 1 Chinstrap 68
|
67
|
+
# 2 Gentoo 124
|
68
68
|
#
|
69
69
|
def initialize(dataframe, *group_keys)
|
70
70
|
@dataframe = dataframe
|
@@ -186,14 +186,14 @@ module RedAmber
|
|
186
186
|
#
|
187
187
|
# # =>
|
188
188
|
# #<RedAmber::Group : 0x0000000000003a98>
|
189
|
-
# species
|
190
|
-
# <string>
|
191
|
-
# 0 Adelie
|
192
|
-
# 1 Chinstrap
|
193
|
-
# 2 Gentoo
|
189
|
+
# species count
|
190
|
+
# <string> <uint8>
|
191
|
+
# 0 Adelie 152
|
192
|
+
# 1 Chinstrap 68
|
193
|
+
# 2 Gentoo 124
|
194
194
|
#
|
195
195
|
def inspect
|
196
|
-
"#<#{self.class} : #{format('0x%016x', object_id)}>\n#{
|
196
|
+
"#<#{self.class} : #{format('0x%016x', object_id)}>\n#{count(@group_keys)}"
|
197
197
|
end
|
198
198
|
|
199
199
|
# Summarize Group by aggregation functions from the block.
|
@@ -210,11 +210,11 @@ module RedAmber
|
|
210
210
|
#
|
211
211
|
# # =>
|
212
212
|
# #<RedAmber::Group : 0x000000000000c314>
|
213
|
-
# species
|
214
|
-
# <string>
|
215
|
-
# 0 Adelie
|
216
|
-
# 1 Chinstrap
|
217
|
-
# 2 Gentoo
|
213
|
+
# species count
|
214
|
+
# <string> <uint8>
|
215
|
+
# 0 Adelie 152
|
216
|
+
# 1 Chinstrap 68
|
217
|
+
# 2 Gentoo 124
|
218
218
|
#
|
219
219
|
# group.summarize { mean(:bill_length_mm) }
|
220
220
|
#
|
data/lib/red_amber/subframes.rb
CHANGED
@@ -10,6 +10,38 @@ module RedAmber
|
|
10
10
|
using RefineArray
|
11
11
|
using RefineArrayLike
|
12
12
|
|
13
|
+
# Entity to select sub-dataframes
|
14
|
+
class Selectors
|
15
|
+
attr_reader :selectors, :size, :sizes
|
16
|
+
|
17
|
+
def initialize(selectors)
|
18
|
+
@selectors = selectors
|
19
|
+
@size = selectors.size
|
20
|
+
@sizes = []
|
21
|
+
end
|
22
|
+
|
23
|
+
def each
|
24
|
+
@selectors.each
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
# Boolean selectors of sub-dataframes
|
29
|
+
class Filters < Selectors
|
30
|
+
def sizes
|
31
|
+
# count true
|
32
|
+
@sizes = @selectors.map { |s| s.to_a.count { _1 } } # rubocop:disable Performance/Size
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
# Index selectors of sub-dataframes
|
37
|
+
class Indices < Selectors
|
38
|
+
def sizes
|
39
|
+
@sizes = @selectors.map(&:size)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
private_constant :Selectors, :Filters, :Indices
|
44
|
+
|
13
45
|
class << self
|
14
46
|
# Create SubFrames from a Group.
|
15
47
|
#
|
@@ -79,13 +111,8 @@ module RedAmber
|
|
79
111
|
def by_indices(dataframe, subset_indices)
|
80
112
|
instance = allocate
|
81
113
|
instance.instance_variable_set(:@baseframe, dataframe)
|
82
|
-
|
83
|
-
|
84
|
-
subset_indices.each do |i|
|
85
|
-
y.yield DataFrame.new_dataframe_with_schema(dataframe, dataframe.take(i))
|
86
|
-
end
|
87
|
-
end
|
88
|
-
instance.instance_variable_set(:@enum, enum)
|
114
|
+
instance.instance_variable_set(:@selectors, Indices.new(subset_indices))
|
115
|
+
instance.instance_variable_set(:@frames, [])
|
89
116
|
instance
|
90
117
|
end
|
91
118
|
|
@@ -105,13 +132,8 @@ module RedAmber
|
|
105
132
|
def by_filters(dataframe, subset_filters)
|
106
133
|
instance = allocate
|
107
134
|
instance.instance_variable_set(:@baseframe, dataframe)
|
108
|
-
|
109
|
-
|
110
|
-
subset_filters.each do |i|
|
111
|
-
y.yield DataFrame.new_dataframe_with_schema(dataframe, dataframe.filter(i))
|
112
|
-
end
|
113
|
-
end
|
114
|
-
instance.instance_variable_set(:@enum, enum)
|
135
|
+
instance.instance_variable_set(:@selectors, Filters.new(subset_filters))
|
136
|
+
instance.instance_variable_set(:@frames, [])
|
115
137
|
instance
|
116
138
|
end
|
117
139
|
|
@@ -130,18 +152,13 @@ module RedAmber
|
|
130
152
|
case Array(dataframes)
|
131
153
|
when [] || [nil]
|
132
154
|
instance.instance_variable_set(:@baseframe, DataFrame.new)
|
155
|
+
instance.instance_variable_set(:@selectors, [])
|
133
156
|
instance.instance_variable_set(:@frames, [])
|
134
|
-
enum = [].each
|
135
157
|
else
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
y.yield i
|
140
|
-
end
|
141
|
-
end
|
142
|
-
instance.instance_variable_set(:@baseframe, enum.lazy)
|
158
|
+
instance.instance_variable_set(:@baseframe, nil)
|
159
|
+
instance.instance_variable_set(:@selectors, nil)
|
160
|
+
instance.instance_variable_set(:@frames, dataframes)
|
143
161
|
end
|
144
|
-
instance.instance_variable_set(:@enum, enum)
|
145
162
|
instance
|
146
163
|
end
|
147
164
|
|
@@ -261,40 +278,34 @@ module RedAmber
|
|
261
278
|
#
|
262
279
|
# @since 0.4.0
|
263
280
|
#
|
264
|
-
def initialize(dataframe,
|
281
|
+
def initialize(dataframe, selectors = nil, &block)
|
265
282
|
unless dataframe.is_a?(DataFrame)
|
266
283
|
raise SubFramesArgumentError, "not a DataFrame: #{dataframe}"
|
267
284
|
end
|
268
285
|
|
269
286
|
if block
|
270
|
-
unless
|
287
|
+
unless selectors.nil?
|
271
288
|
raise SubFramesArgumentError, 'Must not specify both arguments and block.'
|
272
289
|
end
|
273
290
|
|
274
|
-
|
291
|
+
selectors = yield(dataframe)
|
275
292
|
end
|
276
293
|
|
277
|
-
if dataframe.empty? ||
|
294
|
+
if dataframe.empty? || selectors.nil? || selectors.empty?
|
278
295
|
@baseframe = DataFrame.new
|
279
|
-
@
|
280
|
-
@enum = @frames.each
|
296
|
+
@selectors = Selectors.new([])
|
281
297
|
else
|
282
|
-
@baseframe =
|
283
|
-
@
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
dataframe.filter(i)
|
291
|
-
else
|
292
|
-
raise SubFramesArgumentError, "illegal type: #{i}"
|
293
|
-
end
|
294
|
-
yielder.yield DataFrame.new_dataframe_with_schema(dataframe, df)
|
295
|
-
end
|
298
|
+
@baseframe = dataframe
|
299
|
+
@selectors =
|
300
|
+
if selectors[0].boolean?
|
301
|
+
Filters.new(selectors)
|
302
|
+
elsif selectors[0].numeric?
|
303
|
+
Indices.new(selectors)
|
304
|
+
else
|
305
|
+
raise SubFramesArgumentError, "illegal type: #{selectors}"
|
296
306
|
end
|
297
307
|
end
|
308
|
+
@frames = []
|
298
309
|
end
|
299
310
|
|
300
311
|
# Return concatenated SubFrames as a DataFrame.
|
@@ -305,11 +316,7 @@ module RedAmber
|
|
305
316
|
# @since 0.4.0
|
306
317
|
#
|
307
318
|
def baseframe
|
308
|
-
|
309
|
-
@baseframe = reduce(&:concatenate)
|
310
|
-
else
|
311
|
-
@baseframe
|
312
|
-
end
|
319
|
+
@baseframe ||= reduce(&:concatenate)
|
313
320
|
end
|
314
321
|
alias_method :concatenate, :baseframe
|
315
322
|
alias_method :concat, :baseframe
|
@@ -384,7 +391,19 @@ module RedAmber
|
|
384
391
|
def each(&block)
|
385
392
|
return enum_for(__method__) { size } unless block
|
386
393
|
|
387
|
-
|
394
|
+
if @selectors
|
395
|
+
@selectors.each.with_index do |selector, i|
|
396
|
+
if i < @frames.size
|
397
|
+
yield @frames[i]
|
398
|
+
else
|
399
|
+
frame = get_subframe(selector)
|
400
|
+
@frames << frame
|
401
|
+
yield frame
|
402
|
+
end
|
403
|
+
end
|
404
|
+
else
|
405
|
+
@frames.each(&block)
|
406
|
+
end
|
388
407
|
nil
|
389
408
|
end
|
390
409
|
|
@@ -916,6 +935,26 @@ module RedAmber
|
|
916
935
|
#
|
917
936
|
define_subframable_method :filter_map
|
918
937
|
|
938
|
+
# Return 0...num sub-dataframes in self.
|
939
|
+
#
|
940
|
+
# @param num [Integer, Float]
|
941
|
+
# num of sub-dataframes to pick up. `num`` must be positive or zero.
|
942
|
+
# @return [SubFrames]
|
943
|
+
# A new SubFrames.
|
944
|
+
# If n == 0, it returns empty SubFrames.
|
945
|
+
# If n >= size, it returns self.
|
946
|
+
# @since 0.4.2
|
947
|
+
#
|
948
|
+
def take(num)
|
949
|
+
if num.zero?
|
950
|
+
SubFrames.new(DataFrame.new, [])
|
951
|
+
elsif num >= size
|
952
|
+
self
|
953
|
+
else
|
954
|
+
SubFrames.by_dataframes(frames(num))
|
955
|
+
end
|
956
|
+
end
|
957
|
+
|
919
958
|
# Number of subsets.
|
920
959
|
#
|
921
960
|
# @return [Integer]
|
@@ -923,7 +962,12 @@ module RedAmber
|
|
923
962
|
# @since 0.4.0
|
924
963
|
#
|
925
964
|
def size
|
926
|
-
@size ||=
|
965
|
+
@size ||=
|
966
|
+
if @selectors
|
967
|
+
@selectors.size
|
968
|
+
else
|
969
|
+
@frames.size
|
970
|
+
end
|
927
971
|
end
|
928
972
|
|
929
973
|
# Size list of subsets.
|
@@ -933,7 +977,12 @@ module RedAmber
|
|
933
977
|
# @since 0.4.0
|
934
978
|
#
|
935
979
|
def sizes
|
936
|
-
@sizes ||=
|
980
|
+
@sizes ||=
|
981
|
+
if @selectors
|
982
|
+
@selectors.sizes
|
983
|
+
else
|
984
|
+
@frames.map(&:size)
|
985
|
+
end
|
937
986
|
end
|
938
987
|
|
939
988
|
# Indices at the top of each sub DataFrames.
|
@@ -945,10 +994,17 @@ module RedAmber
|
|
945
994
|
# @since 0.4.0
|
946
995
|
#
|
947
996
|
def offset_indices
|
948
|
-
|
949
|
-
|
950
|
-
|
951
|
-
|
997
|
+
case @selectors
|
998
|
+
when Filters
|
999
|
+
@selectors.selectors.map do |selector|
|
1000
|
+
selector.each.with_index.find { |x, _| x }[1]
|
1001
|
+
end
|
1002
|
+
else # Indices, nil
|
1003
|
+
sum = 0
|
1004
|
+
sizes.map do |size|
|
1005
|
+
sum += size
|
1006
|
+
sum - size
|
1007
|
+
end
|
952
1008
|
end
|
953
1009
|
end
|
954
1010
|
|
@@ -965,11 +1021,11 @@ module RedAmber
|
|
965
1021
|
# Test if self has only one subset and it is comprehensive.
|
966
1022
|
#
|
967
1023
|
# @return [true, false]
|
968
|
-
# true if only member of self is equal to universal DataFrame.
|
1024
|
+
# true if the only member of self is equal to universal DataFrame.
|
969
1025
|
# @since 0.4.0
|
970
1026
|
#
|
971
1027
|
def universal?
|
972
|
-
size == 1 &&
|
1028
|
+
size == 1 && first == @baseframe
|
973
1029
|
end
|
974
1030
|
|
975
1031
|
# Return string representation of self.
|
@@ -1012,7 +1068,7 @@ module RedAmber
|
|
1012
1068
|
#
|
1013
1069
|
# @since 0.4.0
|
1014
1070
|
#
|
1015
|
-
def to_s(limit:
|
1071
|
+
def to_s(limit: 5)
|
1016
1072
|
_to_s(limit: limit)
|
1017
1073
|
end
|
1018
1074
|
|
@@ -1064,10 +1120,10 @@ module RedAmber
|
|
1064
1120
|
#
|
1065
1121
|
# @since 0.4.0
|
1066
1122
|
#
|
1067
|
-
def inspect(limit:
|
1123
|
+
def inspect(limit: 5)
|
1068
1124
|
shape =
|
1069
|
-
if @baseframe.
|
1070
|
-
|
1125
|
+
if @baseframe.nil?
|
1126
|
+
'(Not prepared)'
|
1071
1127
|
else
|
1072
1128
|
baseframe.shape_str(with_id: true)
|
1073
1129
|
end
|
@@ -1079,14 +1135,51 @@ module RedAmber
|
|
1079
1135
|
"---\n#{_to_s(limit: limit, with_id: true)}"
|
1080
1136
|
end
|
1081
1137
|
|
1138
|
+
# Return an Array of sub DataFrames
|
1139
|
+
#
|
1140
|
+
# @overload frames
|
1141
|
+
# Returns all sub dataframes.
|
1142
|
+
#
|
1143
|
+
# @return [Array<DataFrame>]
|
1144
|
+
# sub DataFrames.
|
1145
|
+
#
|
1146
|
+
# @overload frames(n_frames)
|
1147
|
+
# Returns partial sub dataframes.
|
1148
|
+
#
|
1149
|
+
# @param n_frames [Integer]
|
1150
|
+
# num of dataframes to retrieve.
|
1151
|
+
# @return [Array<DataFrame>]
|
1152
|
+
# sub DataFrames.
|
1153
|
+
#
|
1154
|
+
# @since 0.4.2
|
1155
|
+
#
|
1156
|
+
def frames(n_frames = nil)
|
1157
|
+
n_frames = size if n_frames.nil?
|
1158
|
+
|
1159
|
+
if @frames.size < n_frames
|
1160
|
+
@frames = each.take(n_frames)
|
1161
|
+
else
|
1162
|
+
@frames.take(n_frames)
|
1163
|
+
end
|
1164
|
+
end
|
1165
|
+
|
1082
1166
|
private
|
1083
1167
|
|
1084
|
-
|
1085
|
-
|
1168
|
+
# Get sub dataframe specified by 'selector'
|
1169
|
+
def get_subframe(selector)
|
1170
|
+
df =
|
1171
|
+
case @selectors
|
1172
|
+
when Filters
|
1173
|
+
@baseframe.filter(selector)
|
1174
|
+
when Indices
|
1175
|
+
@baseframe.take(selector)
|
1176
|
+
end
|
1177
|
+
DataFrame.new_dataframe_with_schema(@baseframe, df)
|
1086
1178
|
end
|
1087
1179
|
|
1088
|
-
|
1089
|
-
|
1180
|
+
# Subcontractor of to_s
|
1181
|
+
def _to_s(limit: 5, with_id: false)
|
1182
|
+
a = each.take(limit).map do |df|
|
1090
1183
|
if with_id
|
1091
1184
|
"#<#{df.shape_str(with_id: with_id)}>\n" \
|
1092
1185
|
"#{df.to_s(head: 2, tail: 2)}"
|