rabbit-slide-kou-rubykaigi-takeout-2021 2021.9.11.0 → 2021.9.11.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4e7819063a4ebcbedeb7a6ea8ffad85ca4e15a57ce9aa6dbcecd02f338610d7c
4
- data.tar.gz: 6a9734a5a02321f2ce4c1e20676556c0c6efa9aa18649cfa263d1dd3832ce702
3
+ metadata.gz: 5d4e14fc04effc4f025a441319f3e66fe5abd2debdadf30c60018a42747a7f52
4
+ data.tar.gz: 60c90b5cca89e7c4a4927b5d3266b3a1cce3e4d9ab05a0ad4759b0105117c35f
5
5
  SHA512:
6
- metadata.gz: a89c84882adff989423129df414b42cdc4e3f49bc61fb2651bb722105d1a4d06aa5271892cfcb790cc25a54cb6de2d11e1f335977fcf7ba59c436c97d2002eb8
7
- data.tar.gz: 8a2d3ea6e38fda7c3972a8f31f5d5dd75c38ac94d852968133f27f1c99e24e0a7aa6a26155f2be330660a1012433310ca0d52bf3fe76395414cfa18b571f269c
6
+ metadata.gz: 9ef2cdeea3f7b003dbbbac777c1011ff4ff85bd4547790140e68e7e89728df51ac8dbe835f1d41a5589ff1868783b76327696f09af0123b56d5150ecc4abb0c6
7
+ data.tar.gz: be5b7964361a8d3bf1324ebcfc4cedf7a7723830faee85043a55ecbbca37eca7091159cad9ea5703dbe1d9395e67132c1afb45743ad4d575629c2a6332247b10
data/Rakefile CHANGED
@@ -5,8 +5,8 @@ require "rabbit/task/slide"
5
5
  spec = nil
6
6
  Rabbit::Task::Slide.new do |task|
7
7
  spec = task.spec
8
- # spec.files += Dir.glob("doc/**/*.*")
9
8
  spec.files += Dir.glob("images/**/*.*")
9
+ spec.files += Dir.glob("benchmarks/*.rb")
10
10
  # spec.files -= Dir.glob("private/**/*.*")
11
11
  spec.add_runtime_dependency("rabbit-theme-clear-code")
12
12
  end
data/benchmarks/add.rb ADDED
@@ -0,0 +1,37 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Rehearsal ------------------------------------------------
4
+ # Array 0.040361 0.004003 0.044364 ( 0.044364)
5
+ # Arrow::Array 0.004211 0.000001 0.004212 ( 0.023220)
6
+ # Numo::UInt8 0.000217 0.000001 0.000218 ( 0.000217)
7
+ # --------------------------------------- total: 0.048794sec
8
+ #
9
+ # user system total real
10
+ # Array 0.042996 0.000000 0.042996 ( 0.042996)
11
+ # Arrow::Array 0.001329 0.000000 0.001329 ( 0.001329)
12
+ # Numo::UInt8 0.000108 0.000000 0.000108 ( 0.000106)
13
+
14
+ require "benchmark"
15
+
16
+ require "arrow"
17
+ require "numo/narray"
18
+
19
+ array = 1000000.times.to_a
20
+ arrow_array = Arrow::Array.new(array)
21
+ numo_array = Numo::UInt8[array]
22
+
23
+ Benchmark.bmbm do |job|
24
+ job.report("Array") do
25
+ array.collect do |value|
26
+ value + 1
27
+ end
28
+ end
29
+
30
+ job.report("Arrow::Array") do
31
+ Arrow::Function.find("add").execute([arrow_array, 1]).value
32
+ end
33
+
34
+ job.report("Numo::UInt8") do
35
+ numo_array + 1
36
+ end
37
+ end
@@ -0,0 +1,44 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Rehearsal ------------------------------------------
4
+ # Ruby 1.254892 0.031662 1.286554 ( 1.286584)
5
+ # Arrow 0.002342 0.000113 0.002455 ( 0.002430)
6
+ # DuckDB 0.459440 0.000000 0.459440 ( 0.459448)
7
+ # --------------------------------- total: 1.748449sec
8
+ #
9
+ # user system total real
10
+ # Ruby 1.252897 0.003972 1.256869 ( 1.256864)
11
+ # Arrow 0.001361 0.000037 0.001398 ( 0.001395)
12
+ # DuckDB 0.459227 0.000000 0.459227 ( 0.459101)
13
+
14
+ require "benchmark"
15
+
16
+ require "datasets-arrow"
17
+ require "arrow-duckdb"
18
+
19
+ dataset = Datasets::PostalCodeJapan.new
20
+ arrow_dataset = dataset.to_arrow
21
+ db = DuckDB::Database.open
22
+ connection = db.connect
23
+ connection.register("codes", arrow_dataset)
24
+
25
+ Benchmark.bmbm do |job|
26
+ job.report("Ruby") do
27
+ dataset.find_all do |row|
28
+ row.prefecture == "東京都"
29
+ end
30
+ end
31
+
32
+ job.report("Arrow") do
33
+ arrow_dataset.slice do |slicer|
34
+ slicer.prefecture == "東京都"
35
+ end
36
+ end
37
+
38
+ job.report("DuckDB") do
39
+ result = connection.query("SELECT * FROM codes WHERE prefecture = ?",
40
+ "東京都",
41
+ output: :arrow)
42
+ result.to_table
43
+ end
44
+ end
@@ -0,0 +1,65 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Rehearsal -----------------------------------------------------
4
+ # CSV: Ruby: 0 0.867305 0.043896 0.911201 ( 0.911251)
5
+ # CSV: Red Arrow: 0 0.168953 0.044898 0.213851 ( 0.016073)
6
+ # Apache Arrow: 0 0.000396 0.000040 0.000436 ( 0.000437)
7
+ # CSV: Ruby: 1 1.820866 0.057001 1.877867 ( 1.874802)
8
+ # CSV: Red Arrow: 1 0.358689 0.055034 0.413723 ( 0.021555)
9
+ # Apache Arrow: 1 0.000393 0.000028 0.000421 ( 0.000421)
10
+ # CSV: Ruby: 2 4.141544 0.089525 4.231069 ( 4.215467)
11
+ # CSV: Red Arrow: 2 0.704372 0.133196 0.837568 ( 0.040835)
12
+ # Apache Arrow: 2 0.000447 0.000025 0.000472 ( 0.000472)
13
+ # CSV: Ruby: 3 7.919982 0.188867 8.108849 ( 8.082351)
14
+ # CSV: Red Arrow: 3 1.352553 0.235089 1.587642 ( 0.080943)
15
+ # Apache Arrow: 3 0.000544 0.000028 0.000572 ( 0.000573)
16
+ # CSV: Ruby: 4 20.722920 0.449558 21.172478 ( 21.123315)
17
+ # CSV: Red Arrow: 4 2.570883 0.427440 2.998323 ( 0.146126)
18
+ # Apache Arrow: 4 0.000898 0.000000 0.000898 ( 0.000899)
19
+ # ------------------------------------------- total: 42.355370sec
20
+ #
21
+ # user system total real
22
+ # CSV: Ruby: 0 0.792350 0.104316 0.896666 ( 0.828678)
23
+ # CSV: Red Arrow: 0 0.173136 0.007682 0.180818 ( 0.012443)
24
+ # Apache Arrow: 0 0.000574 0.000026 0.000600 ( 0.000592)
25
+ # CSV: Ruby: 1 1.816272 0.043755 1.860027 ( 1.840314)
26
+ # CSV: Red Arrow: 1 0.387305 0.058731 0.446036 ( 0.021403)
27
+ # Apache Arrow: 1 0.000662 0.000031 0.000693 ( 0.000685)
28
+ # CSV: Ruby: 2 3.755837 0.055756 3.811593 ( 3.797536)
29
+ # CSV: Red Arrow: 2 0.702905 0.145476 0.848381 ( 0.040435)
30
+ # Apache Arrow: 2 0.000833 0.000038 0.000871 ( 0.000862)
31
+ # CSV: Ruby: 3 8.003925 0.227114 8.231039 ( 8.205680)
32
+ # CSV: Red Arrow: 3 1.250248 0.320944 1.571192 ( 0.074629)
33
+ # Apache Arrow: 3 0.004287 0.000013 0.004300 ( 0.004290)
34
+ # CSV: Ruby: 4 19.384487 0.514361 19.898848 ( 19.850910)
35
+ # CSV: Red Arrow: 4 2.586117 0.454729 3.040846 ( 0.138448)
36
+ # Apache Arrow: 4 0.008140 0.000005 0.008145 ( 0.008139)
37
+
38
+ require "benchmark"
39
+
40
+ require "datasets-arrow"
41
+
42
+ dataset = Datasets::PostalCodeJapan.new
43
+ table = dataset.to_arrow
44
+ n = 5
45
+ # n.times do |i|
46
+ # table.save("/tmp/codes.#{i}.csv")
47
+ # table.save("/tmp/codes.#{i}.arrow")
48
+ # table = table.concatenate([table])
49
+ # end
50
+
51
+ Benchmark.bmbm do |job|
52
+ n.times do |i|
53
+ job.report("CSV: Ruby: #{i}") do
54
+ CSV.read("/tmp/codes.#{i}.csv").size
55
+ end
56
+
57
+ job.report("CSV: Red Arrow: #{i}") do
58
+ Arrow::Table.load("/tmp/codes.#{i}.csv")
59
+ end
60
+
61
+ job.report("Apache Arrow: #{i}") do
62
+ Arrow::Table.load("/tmp/codes.#{i}.arrow")
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,46 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Rehearsal ----------------------------------------------
4
+ # Sequential 4.371320 0.279862 4.651182 ( 4.560637)
5
+ # Ractor 4.930339 0.494265 5.424604 ( 1.454782)
6
+ # ------------------------------------ total: 10.075786sec
7
+ #
8
+ # user system total real
9
+ # Sequential 4.434695 0.396617 4.831312 ( 4.573742)
10
+ # Ractor 4.871155 0.518646 5.389801 ( 1.454987)
11
+
12
+ require "benchmark"
13
+
14
+ require "datasets-arrow"
15
+
16
+ table = Datasets::PostalCodeJapan.new.to_arrow
17
+ Ractor.make_shareable(table)
18
+
19
+ # Show warning
20
+ Ractor.new {}.take
21
+
22
+ n_ractors = 4
23
+ n_jobs_per_ractor = 1000
24
+
25
+ Benchmark.bmbm do |job|
26
+ job.report("Sequential") do
27
+ (n_ractors * n_jobs_per_ractor).times do
28
+ table.slice do |slicer|
29
+ slicer.prefecture == "東京都"
30
+ end
31
+ end
32
+ end
33
+
34
+ job.report("Ractor") do
35
+ ractors = n_ractors.times.collect do
36
+ Ractor.new(table, n_jobs_per_ractor) do |t, n|
37
+ n.times do
38
+ t.slice do |slicer|
39
+ slicer.prefecture == "東京都"
40
+ end
41
+ end
42
+ end
43
+ end
44
+ ractors.each(&:take)
45
+ end
46
+ end
data/benchmarks/sum.rb ADDED
@@ -0,0 +1,35 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Rehearsal ------------------------------------------------
4
+ # Array 0.000759 0.000000 0.000759 ( 0.000754)
5
+ # Arrow::Array 0.002483 0.000000 0.002483 ( 0.021204)
6
+ # Numo::Int8 0.000610 0.000000 0.000610 ( 0.000610)
7
+ # --------------------------------------- total: 0.003852sec
8
+ #
9
+ # user system total real
10
+ # Array 0.000763 0.000006 0.000769 ( 0.000766)
11
+ # Arrow::Array 0.000333 0.000002 0.000335 ( 0.000333)
12
+ # Numo::Int8 0.000634 0.000004 0.000638 ( 0.000636)
13
+
14
+ require "benchmark"
15
+
16
+ require "arrow"
17
+ require "numo/narray"
18
+
19
+ array = 1000000.times.to_a
20
+ arrow_array = Arrow::Array.new(array)
21
+ numo_array = Numo::Int8[array]
22
+
23
+ Benchmark.bmbm do |job|
24
+ job.report("Array") do
25
+ array.sum
26
+ end
27
+
28
+ job.report("Arrow::Array") do
29
+ arrow_array.sum
30
+ end
31
+
32
+ job.report("Numo::Int8") do
33
+ numo_array.sum
34
+ end
35
+ end
data/config.yaml CHANGED
@@ -7,14 +7,13 @@ tags:
7
7
  - ruby
8
8
  - apache_arrow
9
9
  presentation_date: 2021-09-11
10
- version: 2021.9.11.0
10
+ version: 2021.9.11.2
11
11
  licenses:
12
12
  - CC-BY-SA-4.0
13
- slideshare_id:
13
+ slideshare_id: rubykaigi-takeout-2021
14
14
  speaker_deck_id:
15
- ustream_id:
16
15
  vimeo_id:
17
- youtube_id:
16
+ youtube_id: okXiuYiP2C4
18
17
  author:
19
18
  markup_language: :rd
20
19
  name: Sutou Kouhei
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rabbit-slide-kou-rubykaigi-takeout-2021
3
3
  version: !ruby/object:Gem::Version
4
- version: 2021.9.11.0
4
+ version: 2021.9.11.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sutou Kouhei
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-08-23 00:00:00.000000000 Z
11
+ date: 2021-09-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rabbit
@@ -59,6 +59,11 @@ files:
59
59
  - ".rabbit"
60
60
  - README.rd
61
61
  - Rakefile
62
+ - benchmarks/add.rb
63
+ - benchmarks/filter.rb
64
+ - benchmarks/load.rb
65
+ - benchmarks/ractor.rb
66
+ - benchmarks/sum.rb
62
67
  - config.yaml
63
68
  - images/apache-arrow-commits-kou.png
64
69
  - images/clear-code-rubykaigi-takeout-2021-gold-sponsor.png