red_amber 0.2.1 → 0.2.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (58) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +15 -0
  3. data/CHANGELOG.md +170 -20
  4. data/Gemfile +4 -2
  5. data/README.md +121 -302
  6. data/benchmark/basic.yml +79 -0
  7. data/benchmark/combine.yml +63 -0
  8. data/benchmark/drop_nil.yml +15 -3
  9. data/benchmark/group.yml +33 -0
  10. data/benchmark/reshape.yml +27 -0
  11. data/benchmark/{csv_load_penguins.yml → rover/csv_load_penguins.yml} +3 -3
  12. data/benchmark/rover/flights.yml +23 -0
  13. data/benchmark/rover/penguins.yml +23 -0
  14. data/benchmark/rover/planes.yml +23 -0
  15. data/benchmark/rover/weather.yml +23 -0
  16. data/doc/DataFrame.md +611 -318
  17. data/doc/Vector.md +31 -36
  18. data/doc/image/basic_verbs.png +0 -0
  19. data/doc/image/dataframe/assign.png +0 -0
  20. data/doc/image/dataframe/assign_operation.png +0 -0
  21. data/doc/image/dataframe/drop.png +0 -0
  22. data/doc/image/dataframe/join.png +0 -0
  23. data/doc/image/dataframe/pick.png +0 -0
  24. data/doc/image/dataframe/pick_operation.png +0 -0
  25. data/doc/image/dataframe/remove.png +0 -0
  26. data/doc/image/dataframe/rename.png +0 -0
  27. data/doc/image/dataframe/rename_operation.png +0 -0
  28. data/doc/image/dataframe/reshaping_DataFrames.png +0 -0
  29. data/doc/image/dataframe/set_and_bind.png +0 -0
  30. data/doc/image/dataframe/slice.png +0 -0
  31. data/doc/image/dataframe/slice_operation.png +0 -0
  32. data/doc/image/dataframe_model.png +0 -0
  33. data/doc/image/group_operation.png +0 -0
  34. data/doc/image/replace-if_then.png +0 -0
  35. data/doc/image/reshaping_dataframe.png +0 -0
  36. data/doc/image/screenshot.png +0 -0
  37. data/doc/image/vector/binary_element_wise.png +0 -0
  38. data/doc/image/vector/unary_aggregation.png +0 -0
  39. data/doc/image/vector/unary_aggregation_w_option.png +0 -0
  40. data/doc/image/vector/unary_element_wise.png +0 -0
  41. data/lib/red_amber/data_frame.rb +16 -42
  42. data/lib/red_amber/data_frame_combinable.rb +283 -0
  43. data/lib/red_amber/data_frame_displayable.rb +58 -3
  44. data/lib/red_amber/data_frame_loadsave.rb +36 -0
  45. data/lib/red_amber/data_frame_reshaping.rb +8 -6
  46. data/lib/red_amber/data_frame_selectable.rb +9 -9
  47. data/lib/red_amber/data_frame_variable_operation.rb +27 -21
  48. data/lib/red_amber/group.rb +100 -17
  49. data/lib/red_amber/helper.rb +20 -30
  50. data/lib/red_amber/vector.rb +56 -30
  51. data/lib/red_amber/vector_functions.rb +0 -8
  52. data/lib/red_amber/vector_selectable.rb +9 -1
  53. data/lib/red_amber/vector_updatable.rb +61 -63
  54. data/lib/red_amber/version.rb +1 -1
  55. data/lib/red_amber.rb +2 -0
  56. data/red_amber.gemspec +1 -1
  57. metadata +32 -11
  58. data/doc/examples_of_red_amber.ipynb +0 -8979
@@ -1,11 +1,23 @@
1
+ contexts:
2
+ - gems:
3
+ red_amber: 0.1.8
4
+ - gems:
5
+ red_amber: 0.2.2
6
+ - name: HEAD
7
+ prelude: |
8
+ $LOAD_PATH.unshift(File.expand_path('lib'))
9
+ require 'red_amber'
10
+
1
11
  prelude: |
2
12
  require 'datasets-arrow'
3
13
  require 'red_amber'
4
14
 
5
15
  penguins = RedAmber::DataFrame.new(Datasets::Penguins.new.to_arrow)
6
16
 
7
- def drop_nil(penguins)
8
- penguins.remove { vectors.map { |v| v.is_nil} }
17
+ def remove_nil(penguins)
18
+ penguins.remove { vectors.map(&:is_nil).reduce(&:|) }
9
19
  end
10
20
 
11
- benchmark: drop_nil(penguins)
21
+ benchmark:
22
+ 'Remove and reduce': remove_nil(penguins)
23
+ 'remove_nil method': penguins.remove_nil
@@ -0,0 +1,33 @@
1
+ contexts:
2
+ - name: HEAD
3
+ prelude: |
4
+ $LOAD_PATH.unshift(File.expand_path('lib'))
5
+ - gems:
6
+ red_amber: 0.2.2
7
+
8
+ prelude: |
9
+ require 'red_amber'
10
+ require 'datasets-arrow'
11
+
12
+ ds = Datasets::Rdatasets.new('nycflights13', 'flights')
13
+ df = RedAmber::DataFrame.new(ds.to_arrow)
14
+ .assign(:flight) { flight.map(&:to_s) }
15
+
16
+ slicer = df[:distance] > 1000
17
+ distance_km = df[:distance] * 1.852
18
+
19
+ benchmark:
20
+ 'G01: sum distance by destination': |
21
+ df.group(:dest).sum(:distance)
22
+
23
+ 'G02: sum arr_delay by month and day': |
24
+ df.group(:month, :day).sum(:arr_delay)
25
+
26
+ 'G03: sum arr_delay, mean distance by flight': |
27
+ df.group(:flight) { [sum(:arr_delay), mean(:distance)] }
28
+
29
+ 'G04: mean air_time, distance by flight': |
30
+ df.group(:flight).mean(:air_time, :distance)
31
+
32
+ 'G05: sum dep_delay, arr_delay by carrer': |
33
+ df.group(:carrier).sum(:dep_delay, :arr_delay)
@@ -0,0 +1,27 @@
1
+ # --repeat-count 3
2
+
3
+ contexts:
4
+ - name: HEAD
5
+ prelude: |
6
+ $LOAD_PATH.unshift(File.expand_path('lib'))
7
+ - gems:
8
+ red_amber: 0.2.2
9
+
10
+ prelude: |
11
+ require 'red_amber'
12
+ require 'datasets-arrow'
13
+
14
+ ds = Datasets::Rdatasets.new('tidyr', 'billboard')
15
+ df = RedAmber::DataFrame.new(ds.to_arrow)
16
+ sub_df = df.pick(:track, df.keys.select{ |k| k.start_with? 'wk' })
17
+ long_df = df.to_long(:artist, :track, :'date.entered', name: :week, value: :rank)
18
+
19
+ benchmark:
20
+ 'R01: Transpose a DataFrame': |
21
+ sub_df.transpose(name: :week)
22
+
23
+ 'R02: Reshape to longer DataFrame': |
24
+ df.to_long(:artist, :track, :'date.entered', name: :week, value: :rank)
25
+
26
+ 'R03: Reshape to wider DataFrame': |
27
+ long_df.to_wide(name: :week, value: :rank)
@@ -2,12 +2,12 @@ prelude: |
2
2
  require 'rover'
3
3
  require 'red_amber'
4
4
 
5
- penguins_csv = 'benchmark/cache/penguins.csv'
5
+ penguins_csv = 'tmp/penguins.csv'
6
6
 
7
7
  unless File.exist?(penguins_csv)
8
8
  require 'datasets-arrow'
9
- arrow = Datasets::Penguins.new.to_arrow
10
- RedAmber::DataFrame.new(arrow).save(penguins_csv)
9
+ ds = Datasets::Penguins.new
10
+ RedAmber::DataFrame.new(ds).save(penguins_csv)
11
11
  end
12
12
 
13
13
  benchmark:
@@ -0,0 +1,23 @@
1
+ contexts:
2
+ - gems:
3
+ red_amber: 0.2.2
4
+ - name: HEAD
5
+ prelude: |
6
+ $LOAD_PATH.unshift(File.expand_path('lib'))
7
+ require 'red_amber'
8
+
9
+ prelude: |
10
+ require 'rover'
11
+ require 'datasets-arrow'
12
+ ds = Datasets::Rdatasets.new('nycflights13', 'flights')
13
+ df = RedAmber::DataFrame.new(ds)
14
+ rover = Rover::DataFrame.new(df.to_h)
15
+ group_keys = [:month, :origin]
16
+ summary_key = :air_time
17
+
18
+ benchmark:
19
+ 'penguins Group by Rover': |
20
+ rover.group(group_keys).count
21
+
22
+ 'penguins Group by RedAmber': |
23
+ df.group(group_keys).count
@@ -0,0 +1,23 @@
1
+ contexts:
2
+ - gems:
3
+ red_amber: 0.2.2
4
+ - name: HEAD
5
+ prelude: |
6
+ $LOAD_PATH.unshift(File.expand_path('lib'))
7
+ require 'red_amber'
8
+
9
+ prelude: |
10
+ require 'rover'
11
+ require 'datasets-arrow'
12
+ ds = Datasets::Penguins.new
13
+ df = RedAmber::DataFrame.new(ds)
14
+ rover = Rover::DataFrame.new(df.to_h)
15
+ group_keys = [:species, :island]
16
+ summary_key = :body_mass_g
17
+
18
+ benchmark:
19
+ 'penguins Group by Rover': |
20
+ rover.group(group_keys).mean(summary_key)
21
+
22
+ 'penguins Group by RedAmber': |
23
+ df.group(group_keys).mean(summary_key)
@@ -0,0 +1,23 @@
1
+ contexts:
2
+ - gems:
3
+ red_amber: 0.2.2
4
+ - name: HEAD
5
+ prelude: |
6
+ $LOAD_PATH.unshift(File.expand_path('lib'))
7
+ require 'red_amber'
8
+
9
+ prelude: |
10
+ require 'rover'
11
+ require 'datasets-arrow'
12
+ ds = Datasets::Rdatasets.new('nycflights13', 'planes')
13
+ df = RedAmber::DataFrame.new(ds)
14
+ rover = Rover::DataFrame.new(df.to_h)
15
+ group_keys = [:engines, :engine]
16
+ summary_key = :seats
17
+
18
+ benchmark:
19
+ 'penguins Group by Rover': |
20
+ rover.group(group_keys).mean(summary_key)
21
+
22
+ 'penguins Group by RedAmber': |
23
+ df.group(group_keys).mean(summary_key)
@@ -0,0 +1,23 @@
1
+ contexts:
2
+ - gems:
3
+ red_amber: 0.2.2
4
+ - name: HEAD
5
+ prelude: |
6
+ $LOAD_PATH.unshift(File.expand_path('lib'))
7
+ require 'red_amber'
8
+
9
+ prelude: |
10
+ require 'rover'
11
+ require 'datasets-arrow'
12
+ ds = Datasets::Rdatasets.new('nycflights13', 'weather')
13
+ df = RedAmber::DataFrame.new(ds)
14
+ rover = Rover::DataFrame.new(df.to_h)
15
+ group_keys = [:month, :origin]
16
+ summary_key = :temp
17
+
18
+ benchmark:
19
+ 'penguins Group by Rover': |
20
+ rover.group(group_keys).mean(summary_key)
21
+
22
+ 'penguins Group by RedAmber': |
23
+ df.group(group_keys).mean(summary_key)