rabbit-slide-kou-bakusokudb-2024-12 2024.12.16.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.rabbit +1 -0
- data/README.rd +47 -0
- data/Rakefile +18 -0
- data/config.yaml +22 -0
- data/images/amazon-athena-improvement.svg +58 -0
- data/images/apache-arrow-and-amazon-athena.svg +1846 -0
- data/images/apache-arrow-and-apache-spark.svg +1306 -0
- data/images/apache-arrow-and-data-interchange.svg +833 -0
- data/images/apache-spark-improvement.svg +58 -0
- data/images/columnar.svg +641 -0
- data/images/simd-null.svg +348 -0
- data/pdf/bakusokudb-2024-12-why-apache-arrow-format-is-fast.pdf +0 -0
- data/theme.rb +1 -0
- data/tools/amazon-athena-improvement.R +14 -0
- data/tools/apache-spark-improvement.R +11 -0
- data/why-apache-arrow-format-is-fast.rab +506 -0
- metadata +88 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: dfce8b59746d3f4806b7841e914b018dbe07a74f0c013e2ead64a185fa2519ea
|
4
|
+
data.tar.gz: 74b305bb048194e3b9cb0579b520e617d6e84aebaabfd1326ce8578b3b0e4ecb
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 05ffb2e97247e79fa61e4fff7f4e0c8743676f124add70e20003c398b1d5083d71c2576f5e565e640d400f02e2905e3a044b7b47444c5878e3f226708b70ae49
|
7
|
+
data.tar.gz: 85cc1d253a56e4c47f54db60978e0940fa8c58c1f1cd0cc09f21b5ed0b9ccd210fb6176029d55c36a70b0d8ae8634379b6b38bf414f04b55e258e5acb3c8b786
|
data/.rabbit
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--size=1920,1080 why-apache-arrow-format-is-fast.rab
|
data/README.rd
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
= Apache Arrowフォーマットはなぜ速いのか
|
2
|
+
|
3
|
+
ビッグデータをどう扱えばよいか。これまでは各プロダクト毎に効率的な扱い方を実装していましたが、これからはそんな時代ではありません!ビッグデータの扱いでも、共通で必要なものはプロダクトを超えて協力して開発して共有する、そんな時代です!ビッグデータのための共通基盤、それがオープンソースのApache Arrowです。
|
4
|
+
|
5
|
+
このセッションではApache Arrow開発チームの主要メンバーがApache Arrowフォーマットがなぜ速いのかを説明します。
|
6
|
+
|
7
|
+
== ライセンス
|
8
|
+
|
9
|
+
=== スライド
|
10
|
+
|
11
|
+
CC BY-SA 4.0
|
12
|
+
|
13
|
+
原著作者:須藤功平
|
14
|
+
|
15
|
+
==== 株式会社クリアコードのロゴ
|
16
|
+
|
17
|
+
CC BY-SA 4.0
|
18
|
+
|
19
|
+
原著作者:株式会社クリアコード
|
20
|
+
|
21
|
+
ページヘッダーで使っています。
|
22
|
+
|
23
|
+
==== Apache Arrowのロゴ
|
24
|
+
|
25
|
+
Apache License 2.0
|
26
|
+
|
27
|
+
原著作者:The Apache Software Foundation
|
28
|
+
|
29
|
+
== 作者向け
|
30
|
+
|
31
|
+
=== 表示
|
32
|
+
|
33
|
+
rake
|
34
|
+
|
35
|
+
=== 公開
|
36
|
+
|
37
|
+
rake publish
|
38
|
+
|
39
|
+
== 閲覧者向け
|
40
|
+
|
41
|
+
=== インストール
|
42
|
+
|
43
|
+
gem install rabbit-slide-kou-bakusokudb-2024-12
|
44
|
+
|
45
|
+
=== 表示
|
46
|
+
|
47
|
+
rabbit rabbit-slide-kou-bakusokudb-2024-12.gem
|
data/Rakefile
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
require "rabbit/task/slide"
|
2
|
+
|
3
|
+
# Edit ./config.yaml to customize meta data
|
4
|
+
|
5
|
+
spec = nil
|
6
|
+
Rabbit::Task::Slide.new do |task|
|
7
|
+
spec = task.spec
|
8
|
+
spec.files += Dir.glob("images/**/*.*")
|
9
|
+
spec.files += Dir.glob("tools/**/*.*")
|
10
|
+
# spec.files -= Dir.glob("private/**/*.*")
|
11
|
+
spec.add_runtime_dependency("rabbit-theme-clear-code")
|
12
|
+
end
|
13
|
+
|
14
|
+
desc "Tag #{spec.version}"
|
15
|
+
task :tag do
|
16
|
+
sh("git", "tag", "-a", spec.version.to_s, "-m", "Publish #{spec.version}")
|
17
|
+
sh("git", "push", "--tags")
|
18
|
+
end
|
data/config.yaml
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
---
|
2
|
+
id: bakusokudb-2024-12
|
3
|
+
base_name: why-apache-arrow-format-is-fast
|
4
|
+
tags:
|
5
|
+
- rabbit
|
6
|
+
- ApacheArrow
|
7
|
+
presentation_date: 2024-12-16
|
8
|
+
version: 2024.12.16.0
|
9
|
+
licenses:
|
10
|
+
- CC-BY-SA-4.0
|
11
|
+
slideshare_id: bakusokudb-2024-12
|
12
|
+
speaker_deck_id:
|
13
|
+
ustream_id:
|
14
|
+
vimeo_id:
|
15
|
+
youtube_id:
|
16
|
+
author:
|
17
|
+
markup_language: :rd
|
18
|
+
name: 須藤功平
|
19
|
+
email: kou@clear-code.com
|
20
|
+
rubygems_user: kou
|
21
|
+
slideshare_user: kou
|
22
|
+
speaker_deck_user:
|
@@ -0,0 +1,58 @@
|
|
1
|
+
<?xml version='1.0' encoding='UTF-8' ?>
|
2
|
+
<svg xmlns='http://www.w3.org/2000/svg' xmlns:xlink='http://www.w3.org/1999/xlink' viewBox='0 0 504.00 273.60'>
|
3
|
+
<defs>
|
4
|
+
<style type='text/css'><![CDATA[
|
5
|
+
line, polyline, polygon, path, rect, circle {
|
6
|
+
fill: none;
|
7
|
+
stroke: #000000;
|
8
|
+
stroke-linecap: round;
|
9
|
+
stroke-linejoin: round;
|
10
|
+
stroke-miterlimit: 10.00;
|
11
|
+
}
|
12
|
+
]]></style>
|
13
|
+
</defs>
|
14
|
+
<rect width='100%' height='100%' style='stroke: none; fill: #FFFFFF;'/>
|
15
|
+
<rect x='0.000000000000064' y='0.00' width='504.00' height='273.60' style='stroke-width: 1.07; stroke: #FFFFFF; fill: #FFFFFF;' />
|
16
|
+
<defs>
|
17
|
+
<clipPath id='cpMTY5LjI1fDQ5OC41MnwyMjkuMzZ8MjIuNzg='>
|
18
|
+
<rect x='169.25' y='22.78' width='329.27' height='206.58' />
|
19
|
+
</clipPath>
|
20
|
+
</defs>
|
21
|
+
<rect x='169.25' y='22.78' width='329.27' height='206.58' style='stroke-width: 1.07; stroke: none; fill: #EBEBEB;' clip-path='url(#cpMTY5LjI1fDQ5OC41MnwyMjkuMzZ8MjIuNzg=)' />
|
22
|
+
<polyline points='220.14,229.36 220.14,22.78 ' style='stroke-width: 0.53; stroke: #FFFFFF; stroke-linecap: butt;' clip-path='url(#cpMTY5LjI1fDQ5OC41MnwyMjkuMzZ8MjIuNzg=)' />
|
23
|
+
<polyline points='291.98,229.36 291.98,22.78 ' style='stroke-width: 0.53; stroke: #FFFFFF; stroke-linecap: butt;' clip-path='url(#cpMTY5LjI1fDQ5OC41MnwyMjkuMzZ8MjIuNzg=)' />
|
24
|
+
<polyline points='363.82,229.36 363.82,22.78 ' style='stroke-width: 0.53; stroke: #FFFFFF; stroke-linecap: butt;' clip-path='url(#cpMTY5LjI1fDQ5OC41MnwyMjkuMzZ8MjIuNzg=)' />
|
25
|
+
<polyline points='435.66,229.36 435.66,22.78 ' style='stroke-width: 0.53; stroke: #FFFFFF; stroke-linecap: butt;' clip-path='url(#cpMTY5LjI1fDQ5OC41MnwyMjkuMzZ8MjIuNzg=)' />
|
26
|
+
<polyline points='169.25,173.02 498.52,173.02 ' style='stroke-width: 1.07; stroke: #FFFFFF; stroke-linecap: butt;' clip-path='url(#cpMTY5LjI1fDQ5OC41MnwyMjkuMzZ8MjIuNzg=)' />
|
27
|
+
<polyline points='169.25,79.12 498.52,79.12 ' style='stroke-width: 1.07; stroke: #FFFFFF; stroke-linecap: butt;' clip-path='url(#cpMTY5LjI1fDQ5OC41MnwyMjkuMzZ8MjIuNzg=)' />
|
28
|
+
<polyline points='184.22,229.36 184.22,22.78 ' style='stroke-width: 1.07; stroke: #FFFFFF; stroke-linecap: butt;' clip-path='url(#cpMTY5LjI1fDQ5OC41MnwyMjkuMzZ8MjIuNzg=)' />
|
29
|
+
<polyline points='256.06,229.36 256.06,22.78 ' style='stroke-width: 1.07; stroke: #FFFFFF; stroke-linecap: butt;' clip-path='url(#cpMTY5LjI1fDQ5OC41MnwyMjkuMzZ8MjIuNzg=)' />
|
30
|
+
<polyline points='327.90,229.36 327.90,22.78 ' style='stroke-width: 1.07; stroke: #FFFFFF; stroke-linecap: butt;' clip-path='url(#cpMTY5LjI1fDQ5OC41MnwyMjkuMzZ8MjIuNzg=)' />
|
31
|
+
<polyline points='399.74,229.36 399.74,22.78 ' style='stroke-width: 1.07; stroke: #FFFFFF; stroke-linecap: butt;' clip-path='url(#cpMTY5LjI1fDQ5OC41MnwyMjkuMzZ8MjIuNzg=)' />
|
32
|
+
<polyline points='471.58,229.36 471.58,22.78 ' style='stroke-width: 1.07; stroke: #FFFFFF; stroke-linecap: butt;' clip-path='url(#cpMTY5LjI1fDQ5OC41MnwyMjkuMzZ8MjIuNzg=)' />
|
33
|
+
<rect x='184.22' y='130.76' width='299.34' height='84.51' style='stroke-width: 1.07; stroke: none; stroke-linecap: square; stroke-linejoin: miter; fill: #595959;' clip-path='url(#cpMTY5LjI1fDQ5OC41MnwyMjkuMzZ8MjIuNzg=)' />
|
34
|
+
<rect x='184.22' y='36.86' width='23.95' height='84.51' style='stroke-width: 1.07; stroke: none; stroke-linecap: square; stroke-linejoin: miter; fill: #595959;' clip-path='url(#cpMTY5LjI1fDQ5OC41MnwyMjkuMzZ8MjIuNzg=)' />
|
35
|
+
<defs>
|
36
|
+
<clipPath id='cpMC4wMHw1MDQuMDB8MjczLjYwfDAuMDA='>
|
37
|
+
<rect x='0.00' y='0.00' width='504.00' height='273.60' />
|
38
|
+
</clipPath>
|
39
|
+
</defs>
|
40
|
+
<g clip-path='url(#cpMC4wMHw1MDQuMDB8MjczLjYwfDAuMDA=)'><text x='56.45' y='176.05' style='font-size: 8.80px; fill: #4D4D4D; font-family: Liberation Sans;' textLength='107.86px' lengthAdjust='spacingAndGlyphs'>Athena + CSV on S3 Query</text></g>
|
41
|
+
<g clip-path='url(#cpMC4wMHw1MDQuMDB8MjczLjYwfDAuMDA=)'><text x='18.07' y='82.15' style='font-size: 8.80px; fill: #4D4D4D; font-family: Liberation Sans;' textLength='146.25px' lengthAdjust='spacingAndGlyphs'>Federated S3 Query w/Apache Arrow</text></g>
|
42
|
+
<polyline points='166.51,173.02 169.25,173.02 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' clip-path='url(#cpMC4wMHw1MDQuMDB8MjczLjYwfDAuMDA=)' />
|
43
|
+
<polyline points='166.51,79.12 169.25,79.12 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' clip-path='url(#cpMC4wMHw1MDQuMDB8MjczLjYwfDAuMDA=)' />
|
44
|
+
<polyline points='184.22,232.10 184.22,229.36 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' clip-path='url(#cpMC4wMHw1MDQuMDB8MjczLjYwfDAuMDA=)' />
|
45
|
+
<polyline points='256.06,232.10 256.06,229.36 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' clip-path='url(#cpMC4wMHw1MDQuMDB8MjczLjYwfDAuMDA=)' />
|
46
|
+
<polyline points='327.90,232.10 327.90,229.36 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' clip-path='url(#cpMC4wMHw1MDQuMDB8MjczLjYwfDAuMDA=)' />
|
47
|
+
<polyline points='399.74,232.10 399.74,229.36 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' clip-path='url(#cpMC4wMHw1MDQuMDB8MjczLjYwfDAuMDA=)' />
|
48
|
+
<polyline points='471.58,232.10 471.58,229.36 ' style='stroke-width: 1.07; stroke: #333333; stroke-linecap: butt;' clip-path='url(#cpMC4wMHw1MDQuMDB8MjczLjYwfDAuMDA=)' />
|
49
|
+
<g clip-path='url(#cpMC4wMHw1MDQuMDB8MjczLjYwfDAuMDA=)'><text x='181.77' y='240.34' style='font-size: 8.80px; fill: #4D4D4D; font-family: Liberation Sans;' textLength='4.89px' lengthAdjust='spacingAndGlyphs'>0</text></g>
|
50
|
+
<g clip-path='url(#cpMC4wMHw1MDQuMDB8MjczLjYwfDAuMDA=)'><text x='253.61' y='240.34' style='font-size: 8.80px; fill: #4D4D4D; font-family: Liberation Sans;' textLength='4.89px' lengthAdjust='spacingAndGlyphs'>2</text></g>
|
51
|
+
<g clip-path='url(#cpMC4wMHw1MDQuMDB8MjczLjYwfDAuMDA=)'><text x='325.45' y='240.34' style='font-size: 8.80px; fill: #4D4D4D; font-family: Liberation Sans;' textLength='4.89px' lengthAdjust='spacingAndGlyphs'>4</text></g>
|
52
|
+
<g clip-path='url(#cpMC4wMHw1MDQuMDB8MjczLjYwfDAuMDA=)'><text x='397.29' y='240.34' style='font-size: 8.80px; fill: #4D4D4D; font-family: Liberation Sans;' textLength='4.89px' lengthAdjust='spacingAndGlyphs'>6</text></g>
|
53
|
+
<g clip-path='url(#cpMC4wMHw1MDQuMDB8MjczLjYwfDAuMDA=)'><text x='469.13' y='240.34' style='font-size: 8.80px; fill: #4D4D4D; font-family: Liberation Sans;' textLength='4.89px' lengthAdjust='spacingAndGlyphs'>8</text></g>
|
54
|
+
<g clip-path='url(#cpMC4wMHw1MDQuMDB8MjczLjYwfDAuMDA=)'><text x='197.86' y='252.48' style='font-size: 11.00px; font-family: Liberation Sans;' textLength='272.05px' lengthAdjust='spacingAndGlyphs'>Elapsed time to process 1 billion rows (Shorter is faster)</text></g>
|
55
|
+
<g clip-path='url(#cpMC4wMHw1MDQuMDB8MjczLjYwfDAuMDA=)'><text transform='translate(13.05,144.41) rotate(-90)' style='font-size: 11.00px; font-family: Liberation Sans;' textLength='36.69px' lengthAdjust='spacingAndGlyphs'>Method</text></g>
|
56
|
+
<g clip-path='url(#cpMC4wMHw1MDQuMDB8MjczLjYwfDAuMDA=)'><text x='169.25' y='14.56' style='font-size: 13.20px; font-family: Liberation Sans;' textLength='319.90px' lengthAdjust='spacingAndGlyphs'>Apache Arrow improves data interchange performance</text></g>
|
57
|
+
<g clip-path='url(#cpMC4wMHw1MDQuMDB8MjczLjYwfDAuMDA=)'><text x='59.75' y='266.29' style='font-size: 8.80px; font-family: Liberation Sans;' textLength='438.77px' lengthAdjust='spacingAndGlyphs'>Data at https://github.com/awslabs/aws-athena-query-federation/tree/master/athena-federation-sdk#performance</text></g>
|
58
|
+
</svg>
|