red_amber 0.2.1 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +15 -0
  3. data/CHANGELOG.md +170 -20
  4. data/Gemfile +4 -2
  5. data/README.md +121 -302
  6. data/benchmark/basic.yml +79 -0
  7. data/benchmark/combine.yml +63 -0
  8. data/benchmark/drop_nil.yml +15 -3
  9. data/benchmark/group.yml +33 -0
  10. data/benchmark/reshape.yml +27 -0
  11. data/benchmark/{csv_load_penguins.yml → rover/csv_load_penguins.yml} +3 -3
  12. data/benchmark/rover/flights.yml +23 -0
  13. data/benchmark/rover/penguins.yml +23 -0
  14. data/benchmark/rover/planes.yml +23 -0
  15. data/benchmark/rover/weather.yml +23 -0
  16. data/doc/DataFrame.md +611 -318
  17. data/doc/Vector.md +31 -36
  18. data/doc/image/basic_verbs.png +0 -0
  19. data/doc/image/dataframe/assign.png +0 -0
  20. data/doc/image/dataframe/assign_operation.png +0 -0
  21. data/doc/image/dataframe/drop.png +0 -0
  22. data/doc/image/dataframe/join.png +0 -0
  23. data/doc/image/dataframe/pick.png +0 -0
  24. data/doc/image/dataframe/pick_operation.png +0 -0
  25. data/doc/image/dataframe/remove.png +0 -0
  26. data/doc/image/dataframe/rename.png +0 -0
  27. data/doc/image/dataframe/rename_operation.png +0 -0
  28. data/doc/image/dataframe/reshaping_DataFrames.png +0 -0
  29. data/doc/image/dataframe/set_and_bind.png +0 -0
  30. data/doc/image/dataframe/slice.png +0 -0
  31. data/doc/image/dataframe/slice_operation.png +0 -0
  32. data/doc/image/dataframe_model.png +0 -0
  33. data/doc/image/group_operation.png +0 -0
  34. data/doc/image/replace-if_then.png +0 -0
  35. data/doc/image/reshaping_dataframe.png +0 -0
  36. data/doc/image/screenshot.png +0 -0
  37. data/doc/image/vector/binary_element_wise.png +0 -0
  38. data/doc/image/vector/unary_aggregation.png +0 -0
  39. data/doc/image/vector/unary_aggregation_w_option.png +0 -0
  40. data/doc/image/vector/unary_element_wise.png +0 -0
  41. data/lib/red_amber/data_frame.rb +16 -42
  42. data/lib/red_amber/data_frame_combinable.rb +283 -0
  43. data/lib/red_amber/data_frame_displayable.rb +58 -3
  44. data/lib/red_amber/data_frame_loadsave.rb +36 -0
  45. data/lib/red_amber/data_frame_reshaping.rb +8 -6
  46. data/lib/red_amber/data_frame_selectable.rb +9 -9
  47. data/lib/red_amber/data_frame_variable_operation.rb +27 -21
  48. data/lib/red_amber/group.rb +100 -17
  49. data/lib/red_amber/helper.rb +20 -30
  50. data/lib/red_amber/vector.rb +56 -30
  51. data/lib/red_amber/vector_functions.rb +0 -8
  52. data/lib/red_amber/vector_selectable.rb +9 -1
  53. data/lib/red_amber/vector_updatable.rb +61 -63
  54. data/lib/red_amber/version.rb +1 -1
  55. data/lib/red_amber.rb +2 -0
  56. data/red_amber.gemspec +1 -1
  57. metadata +32 -11
  58. data/doc/examples_of_red_amber.ipynb +0 -8979
@@ -1,11 +1,23 @@
1
+ contexts:
2
+ - gems:
3
+ red_amber: 0.1.8
4
+ - gems:
5
+ red_amber: 0.2.2
6
+ - name: HEAD
7
+ prelude: |
8
+ $LOAD_PATH.unshift(File.expand_path('lib'))
9
+ require 'red_amber'
10
+
1
11
  prelude: |
2
12
  require 'datasets-arrow'
3
13
  require 'red_amber'
4
14
 
5
15
  penguins = RedAmber::DataFrame.new(Datasets::Penguins.new.to_arrow)
6
16
 
7
- def drop_nil(penguins)
8
- penguins.remove { vectors.map { |v| v.is_nil} }
17
+ def remove_nil(penguins)
18
+ penguins.remove { vectors.map(&:is_nil).reduce(&:|) }
9
19
  end
10
20
 
11
- benchmark: drop_nil(penguins)
21
+ benchmark:
22
+ 'Remove and reduce': remove_nil(penguins)
23
+ 'remove_nil method': penguins.remove_nil
@@ -0,0 +1,33 @@
1
+ contexts:
2
+ - name: HEAD
3
+ prelude: |
4
+ $LOAD_PATH.unshift(File.expand_path('lib'))
5
+ - gems:
6
+ red_amber: 0.2.2
7
+
8
+ prelude: |
9
+ require 'red_amber'
10
+ require 'datasets-arrow'
11
+
12
+ ds = Datasets::Rdatasets.new('nycflights13', 'flights')
13
+ df = RedAmber::DataFrame.new(ds.to_arrow)
14
+ .assign(:flight) { flight.map(&:to_s) }
15
+
16
+ slicer = df[:distance] > 1000
17
+ distance_km = df[:distance] * 1.852
18
+
19
+ benchmark:
20
+ 'G01: sum distance by destination': |
21
+ df.group(:dest).sum(:distance)
22
+
23
+ 'G02: sum arr_delay by month and day': |
24
+ df.group(:month, :day).sum(:arr_delay)
25
+
26
+ 'G03: sum arr_delay, mean distance by flight': |
27
+ df.group(:flight) { [sum(:arr_delay), mean(:distance)] }
28
+
29
+ 'G04: mean air_time, distance by flight': |
30
+ df.group(:flight).mean(:air_time, :distance)
31
+
32
+ 'G05: sum dep_delay, arr_delay by carrer': |
33
+ df.group(:carrier).sum(:dep_delay, :arr_delay)
@@ -0,0 +1,27 @@
1
+ # --repeat-count 3
2
+
3
+ contexts:
4
+ - name: HEAD
5
+ prelude: |
6
+ $LOAD_PATH.unshift(File.expand_path('lib'))
7
+ - gems:
8
+ red_amber: 0.2.2
9
+
10
+ prelude: |
11
+ require 'red_amber'
12
+ require 'datasets-arrow'
13
+
14
+ ds = Datasets::Rdatasets.new('tidyr', 'billboard')
15
+ df = RedAmber::DataFrame.new(ds.to_arrow)
16
+ sub_df = df.pick(:track, df.keys.select{ |k| k.start_with? 'wk' })
17
+ long_df = df.to_long(:artist, :track, :'date.entered', name: :week, value: :rank)
18
+
19
+ benchmark:
20
+ 'R01: Transpose a DataFrame': |
21
+ sub_df.transpose(name: :week)
22
+
23
+ 'R02: Reshape to longer DataFrame': |
24
+ df.to_long(:artist, :track, :'date.entered', name: :week, value: :rank)
25
+
26
+ 'R03: Reshape to wider DataFrame': |
27
+ long_df.to_wide(name: :week, value: :rank)
@@ -2,12 +2,12 @@ prelude: |
2
2
  require 'rover'
3
3
  require 'red_amber'
4
4
 
5
- penguins_csv = 'benchmark/cache/penguins.csv'
5
+ penguins_csv = 'tmp/penguins.csv'
6
6
 
7
7
  unless File.exist?(penguins_csv)
8
8
  require 'datasets-arrow'
9
- arrow = Datasets::Penguins.new.to_arrow
10
- RedAmber::DataFrame.new(arrow).save(penguins_csv)
9
+ ds = Datasets::Penguins.new
10
+ RedAmber::DataFrame.new(ds).save(penguins_csv)
11
11
  end
12
12
 
13
13
  benchmark:
@@ -0,0 +1,23 @@
1
+ contexts:
2
+ - gems:
3
+ red_amber: 0.2.2
4
+ - name: HEAD
5
+ prelude: |
6
+ $LOAD_PATH.unshift(File.expand_path('lib'))
7
+ require 'red_amber'
8
+
9
+ prelude: |
10
+ require 'rover'
11
+ require 'datasets-arrow'
12
+ ds = Datasets::Rdatasets.new('nycflights13', 'flights')
13
+ df = RedAmber::DataFrame.new(ds)
14
+ rover = Rover::DataFrame.new(df.to_h)
15
+ group_keys = [:month, :origin]
16
+ summary_key = :air_time
17
+
18
+ benchmark:
19
+ 'penguins Group by Rover': |
20
+ rover.group(group_keys).count
21
+
22
+ 'penguins Group by RedAmber': |
23
+ df.group(group_keys).count
@@ -0,0 +1,23 @@
1
+ contexts:
2
+ - gems:
3
+ red_amber: 0.2.2
4
+ - name: HEAD
5
+ prelude: |
6
+ $LOAD_PATH.unshift(File.expand_path('lib'))
7
+ require 'red_amber'
8
+
9
+ prelude: |
10
+ require 'rover'
11
+ require 'datasets-arrow'
12
+ ds = Datasets::Penguins.new
13
+ df = RedAmber::DataFrame.new(ds)
14
+ rover = Rover::DataFrame.new(df.to_h)
15
+ group_keys = [:species, :island]
16
+ summary_key = :body_mass_g
17
+
18
+ benchmark:
19
+ 'penguins Group by Rover': |
20
+ rover.group(group_keys).mean(summary_key)
21
+
22
+ 'penguins Group by RedAmber': |
23
+ df.group(group_keys).mean(summary_key)
@@ -0,0 +1,23 @@
1
+ contexts:
2
+ - gems:
3
+ red_amber: 0.2.2
4
+ - name: HEAD
5
+ prelude: |
6
+ $LOAD_PATH.unshift(File.expand_path('lib'))
7
+ require 'red_amber'
8
+
9
+ prelude: |
10
+ require 'rover'
11
+ require 'datasets-arrow'
12
+ ds = Datasets::Rdatasets.new('nycflights13', 'planes')
13
+ df = RedAmber::DataFrame.new(ds)
14
+ rover = Rover::DataFrame.new(df.to_h)
15
+ group_keys = [:engines, :engine]
16
+ summary_key = :seats
17
+
18
+ benchmark:
19
+ 'penguins Group by Rover': |
20
+ rover.group(group_keys).mean(summary_key)
21
+
22
+ 'penguins Group by RedAmber': |
23
+ df.group(group_keys).mean(summary_key)
@@ -0,0 +1,23 @@
1
+ contexts:
2
+ - gems:
3
+ red_amber: 0.2.2
4
+ - name: HEAD
5
+ prelude: |
6
+ $LOAD_PATH.unshift(File.expand_path('lib'))
7
+ require 'red_amber'
8
+
9
+ prelude: |
10
+ require 'rover'
11
+ require 'datasets-arrow'
12
+ ds = Datasets::Rdatasets.new('nycflights13', 'weather')
13
+ df = RedAmber::DataFrame.new(ds)
14
+ rover = Rover::DataFrame.new(df.to_h)
15
+ group_keys = [:month, :origin]
16
+ summary_key = :temp
17
+
18
+ benchmark:
19
+ 'penguins Group by Rover': |
20
+ rover.group(group_keys).mean(summary_key)
21
+
22
+ 'penguins Group by RedAmber': |
23
+ df.group(group_keys).mean(summary_key)