red_amber 0.2.1 → 0.2.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +15 -0
- data/CHANGELOG.md +170 -20
- data/Gemfile +4 -2
- data/README.md +121 -302
- data/benchmark/basic.yml +79 -0
- data/benchmark/combine.yml +63 -0
- data/benchmark/drop_nil.yml +15 -3
- data/benchmark/group.yml +33 -0
- data/benchmark/reshape.yml +27 -0
- data/benchmark/{csv_load_penguins.yml → rover/csv_load_penguins.yml} +3 -3
- data/benchmark/rover/flights.yml +23 -0
- data/benchmark/rover/penguins.yml +23 -0
- data/benchmark/rover/planes.yml +23 -0
- data/benchmark/rover/weather.yml +23 -0
- data/doc/DataFrame.md +611 -318
- data/doc/Vector.md +31 -36
- data/doc/image/basic_verbs.png +0 -0
- data/doc/image/dataframe/assign.png +0 -0
- data/doc/image/dataframe/assign_operation.png +0 -0
- data/doc/image/dataframe/drop.png +0 -0
- data/doc/image/dataframe/join.png +0 -0
- data/doc/image/dataframe/pick.png +0 -0
- data/doc/image/dataframe/pick_operation.png +0 -0
- data/doc/image/dataframe/remove.png +0 -0
- data/doc/image/dataframe/rename.png +0 -0
- data/doc/image/dataframe/rename_operation.png +0 -0
- data/doc/image/dataframe/reshaping_DataFrames.png +0 -0
- data/doc/image/dataframe/set_and_bind.png +0 -0
- data/doc/image/dataframe/slice.png +0 -0
- data/doc/image/dataframe/slice_operation.png +0 -0
- data/doc/image/dataframe_model.png +0 -0
- data/doc/image/group_operation.png +0 -0
- data/doc/image/replace-if_then.png +0 -0
- data/doc/image/reshaping_dataframe.png +0 -0
- data/doc/image/screenshot.png +0 -0
- data/doc/image/vector/binary_element_wise.png +0 -0
- data/doc/image/vector/unary_aggregation.png +0 -0
- data/doc/image/vector/unary_aggregation_w_option.png +0 -0
- data/doc/image/vector/unary_element_wise.png +0 -0
- data/lib/red_amber/data_frame.rb +16 -42
- data/lib/red_amber/data_frame_combinable.rb +283 -0
- data/lib/red_amber/data_frame_displayable.rb +58 -3
- data/lib/red_amber/data_frame_loadsave.rb +36 -0
- data/lib/red_amber/data_frame_reshaping.rb +8 -6
- data/lib/red_amber/data_frame_selectable.rb +9 -9
- data/lib/red_amber/data_frame_variable_operation.rb +27 -21
- data/lib/red_amber/group.rb +100 -17
- data/lib/red_amber/helper.rb +20 -30
- data/lib/red_amber/vector.rb +56 -30
- data/lib/red_amber/vector_functions.rb +0 -8
- data/lib/red_amber/vector_selectable.rb +9 -1
- data/lib/red_amber/vector_updatable.rb +61 -63
- data/lib/red_amber/version.rb +1 -1
- data/lib/red_amber.rb +2 -0
- data/red_amber.gemspec +1 -1
- metadata +32 -11
- data/doc/examples_of_red_amber.ipynb +0 -8979
data/benchmark/drop_nil.yml
CHANGED
@@ -1,11 +1,23 @@
|
|
1
|
+
contexts:
|
2
|
+
- gems:
|
3
|
+
red_amber: 0.1.8
|
4
|
+
- gems:
|
5
|
+
red_amber: 0.2.2
|
6
|
+
- name: HEAD
|
7
|
+
prelude: |
|
8
|
+
$LOAD_PATH.unshift(File.expand_path('lib'))
|
9
|
+
require 'red_amber'
|
10
|
+
|
1
11
|
prelude: |
|
2
12
|
require 'datasets-arrow'
|
3
13
|
require 'red_amber'
|
4
14
|
|
5
15
|
penguins = RedAmber::DataFrame.new(Datasets::Penguins.new.to_arrow)
|
6
16
|
|
7
|
-
def
|
8
|
-
penguins.remove { vectors.map
|
17
|
+
def remove_nil(penguins)
|
18
|
+
penguins.remove { vectors.map(&:is_nil).reduce(&:|) }
|
9
19
|
end
|
10
20
|
|
11
|
-
benchmark:
|
21
|
+
benchmark:
|
22
|
+
'Remove and reduce': remove_nil(penguins)
|
23
|
+
'remove_nil method': penguins.remove_nil
|
data/benchmark/group.yml
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
contexts:
|
2
|
+
- name: HEAD
|
3
|
+
prelude: |
|
4
|
+
$LOAD_PATH.unshift(File.expand_path('lib'))
|
5
|
+
- gems:
|
6
|
+
red_amber: 0.2.2
|
7
|
+
|
8
|
+
prelude: |
|
9
|
+
require 'red_amber'
|
10
|
+
require 'datasets-arrow'
|
11
|
+
|
12
|
+
ds = Datasets::Rdatasets.new('nycflights13', 'flights')
|
13
|
+
df = RedAmber::DataFrame.new(ds.to_arrow)
|
14
|
+
.assign(:flight) { flight.map(&:to_s) }
|
15
|
+
|
16
|
+
slicer = df[:distance] > 1000
|
17
|
+
distance_km = df[:distance] * 1.852
|
18
|
+
|
19
|
+
benchmark:
|
20
|
+
'G01: sum distance by destination': |
|
21
|
+
df.group(:dest).sum(:distance)
|
22
|
+
|
23
|
+
'G02: sum arr_delay by month and day': |
|
24
|
+
df.group(:month, :day).sum(:arr_delay)
|
25
|
+
|
26
|
+
'G03: sum arr_delay, mean distance by flight': |
|
27
|
+
df.group(:flight) { [sum(:arr_delay), mean(:distance)] }
|
28
|
+
|
29
|
+
'G04: mean air_time, distance by flight': |
|
30
|
+
df.group(:flight).mean(:air_time, :distance)
|
31
|
+
|
32
|
+
'G05: sum dep_delay, arr_delay by carrer': |
|
33
|
+
df.group(:carrier).sum(:dep_delay, :arr_delay)
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# --repeat-count 3
|
2
|
+
|
3
|
+
contexts:
|
4
|
+
- name: HEAD
|
5
|
+
prelude: |
|
6
|
+
$LOAD_PATH.unshift(File.expand_path('lib'))
|
7
|
+
- gems:
|
8
|
+
red_amber: 0.2.2
|
9
|
+
|
10
|
+
prelude: |
|
11
|
+
require 'red_amber'
|
12
|
+
require 'datasets-arrow'
|
13
|
+
|
14
|
+
ds = Datasets::Rdatasets.new('tidyr', 'billboard')
|
15
|
+
df = RedAmber::DataFrame.new(ds.to_arrow)
|
16
|
+
sub_df = df.pick(:track, df.keys.select{ |k| k.start_with? 'wk' })
|
17
|
+
long_df = df.to_long(:artist, :track, :'date.entered', name: :week, value: :rank)
|
18
|
+
|
19
|
+
benchmark:
|
20
|
+
'R01: Transpose a DataFrame': |
|
21
|
+
sub_df.transpose(name: :week)
|
22
|
+
|
23
|
+
'R02: Reshape to longer DataFrame': |
|
24
|
+
df.to_long(:artist, :track, :'date.entered', name: :week, value: :rank)
|
25
|
+
|
26
|
+
'R03: Reshape to wider DataFrame': |
|
27
|
+
long_df.to_wide(name: :week, value: :rank)
|
@@ -2,12 +2,12 @@ prelude: |
|
|
2
2
|
require 'rover'
|
3
3
|
require 'red_amber'
|
4
4
|
|
5
|
-
penguins_csv = '
|
5
|
+
penguins_csv = 'tmp/penguins.csv'
|
6
6
|
|
7
7
|
unless File.exist?(penguins_csv)
|
8
8
|
require 'datasets-arrow'
|
9
|
-
|
10
|
-
RedAmber::DataFrame.new(
|
9
|
+
ds = Datasets::Penguins.new
|
10
|
+
RedAmber::DataFrame.new(ds).save(penguins_csv)
|
11
11
|
end
|
12
12
|
|
13
13
|
benchmark:
|
@@ -0,0 +1,23 @@
|
|
1
|
+
contexts:
|
2
|
+
- gems:
|
3
|
+
red_amber: 0.2.2
|
4
|
+
- name: HEAD
|
5
|
+
prelude: |
|
6
|
+
$LOAD_PATH.unshift(File.expand_path('lib'))
|
7
|
+
require 'red_amber'
|
8
|
+
|
9
|
+
prelude: |
|
10
|
+
require 'rover'
|
11
|
+
require 'datasets-arrow'
|
12
|
+
ds = Datasets::Rdatasets.new('nycflights13', 'flights')
|
13
|
+
df = RedAmber::DataFrame.new(ds)
|
14
|
+
rover = Rover::DataFrame.new(df.to_h)
|
15
|
+
group_keys = [:month, :origin]
|
16
|
+
summary_key = :air_time
|
17
|
+
|
18
|
+
benchmark:
|
19
|
+
'penguins Group by Rover': |
|
20
|
+
rover.group(group_keys).count
|
21
|
+
|
22
|
+
'penguins Group by RedAmber': |
|
23
|
+
df.group(group_keys).count
|
@@ -0,0 +1,23 @@
|
|
1
|
+
contexts:
|
2
|
+
- gems:
|
3
|
+
red_amber: 0.2.2
|
4
|
+
- name: HEAD
|
5
|
+
prelude: |
|
6
|
+
$LOAD_PATH.unshift(File.expand_path('lib'))
|
7
|
+
require 'red_amber'
|
8
|
+
|
9
|
+
prelude: |
|
10
|
+
require 'rover'
|
11
|
+
require 'datasets-arrow'
|
12
|
+
ds = Datasets::Penguins.new
|
13
|
+
df = RedAmber::DataFrame.new(ds)
|
14
|
+
rover = Rover::DataFrame.new(df.to_h)
|
15
|
+
group_keys = [:species, :island]
|
16
|
+
summary_key = :body_mass_g
|
17
|
+
|
18
|
+
benchmark:
|
19
|
+
'penguins Group by Rover': |
|
20
|
+
rover.group(group_keys).mean(summary_key)
|
21
|
+
|
22
|
+
'penguins Group by RedAmber': |
|
23
|
+
df.group(group_keys).mean(summary_key)
|
@@ -0,0 +1,23 @@
|
|
1
|
+
contexts:
|
2
|
+
- gems:
|
3
|
+
red_amber: 0.2.2
|
4
|
+
- name: HEAD
|
5
|
+
prelude: |
|
6
|
+
$LOAD_PATH.unshift(File.expand_path('lib'))
|
7
|
+
require 'red_amber'
|
8
|
+
|
9
|
+
prelude: |
|
10
|
+
require 'rover'
|
11
|
+
require 'datasets-arrow'
|
12
|
+
ds = Datasets::Rdatasets.new('nycflights13', 'planes')
|
13
|
+
df = RedAmber::DataFrame.new(ds)
|
14
|
+
rover = Rover::DataFrame.new(df.to_h)
|
15
|
+
group_keys = [:engines, :engine]
|
16
|
+
summary_key = :seats
|
17
|
+
|
18
|
+
benchmark:
|
19
|
+
'penguins Group by Rover': |
|
20
|
+
rover.group(group_keys).mean(summary_key)
|
21
|
+
|
22
|
+
'penguins Group by RedAmber': |
|
23
|
+
df.group(group_keys).mean(summary_key)
|
@@ -0,0 +1,23 @@
|
|
1
|
+
contexts:
|
2
|
+
- gems:
|
3
|
+
red_amber: 0.2.2
|
4
|
+
- name: HEAD
|
5
|
+
prelude: |
|
6
|
+
$LOAD_PATH.unshift(File.expand_path('lib'))
|
7
|
+
require 'red_amber'
|
8
|
+
|
9
|
+
prelude: |
|
10
|
+
require 'rover'
|
11
|
+
require 'datasets-arrow'
|
12
|
+
ds = Datasets::Rdatasets.new('nycflights13', 'weather')
|
13
|
+
df = RedAmber::DataFrame.new(ds)
|
14
|
+
rover = Rover::DataFrame.new(df.to_h)
|
15
|
+
group_keys = [:month, :origin]
|
16
|
+
summary_key = :temp
|
17
|
+
|
18
|
+
benchmark:
|
19
|
+
'penguins Group by Rover': |
|
20
|
+
rover.group(group_keys).mean(summary_key)
|
21
|
+
|
22
|
+
'penguins Group by RedAmber': |
|
23
|
+
df.group(group_keys).mean(summary_key)
|