rabbit-slide-kou-db-tech-showcase-online-2020 2020.12.8.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.rabbit +1 -0
- data/README.rd +48 -0
- data/Rakefile +18 -0
- data/config.yaml +23 -0
- data/images/amazon-athena-improvement.svg +58 -0
- data/images/apache-arrow-and-amazon-athena.svg +1846 -0
- data/images/apache-arrow-and-apache-spark.svg +1306 -0
- data/images/apache-arrow-and-data-interchange.svg +833 -0
- data/images/apache-spark-improvement.svg +58 -0
- data/images/columnar.svg +641 -0
- data/images/simd-null.svg +285 -0
- data/pdf/db-tech-showcase-online-2020-why-apache-arrow-format-is-fast.pdf +0 -0
- data/theme.rb +5 -0
- data/thumbnail.png +0 -0
- data/why-apache-arrow-format-is-fast.rab +460 -0
- metadata +89 -0
@@ -0,0 +1,285 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
2
|
+
<svg
|
3
|
+
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
4
|
+
xmlns:cc="http://creativecommons.org/ns#"
|
5
|
+
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
6
|
+
xmlns:svg="http://www.w3.org/2000/svg"
|
7
|
+
xmlns="http://www.w3.org/2000/svg"
|
8
|
+
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
|
9
|
+
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
|
10
|
+
width="182.15048mm"
|
11
|
+
height="125.74078mm"
|
12
|
+
viewBox="0 0 182.15048 125.74078"
|
13
|
+
version="1.1"
|
14
|
+
id="svg8"
|
15
|
+
inkscape:version="1.0.1 (3bc2e813f5, 2020-09-07)"
|
16
|
+
sodipodi:docname="simd-null.svg">
|
17
|
+
<defs
|
18
|
+
id="defs2" />
|
19
|
+
<sodipodi:namedview
|
20
|
+
id="base"
|
21
|
+
pagecolor="#ffffff"
|
22
|
+
bordercolor="#666666"
|
23
|
+
borderopacity="1.0"
|
24
|
+
inkscape:pageopacity="0.0"
|
25
|
+
inkscape:pageshadow="2"
|
26
|
+
inkscape:zoom="0.98994949"
|
27
|
+
inkscape:cx="251.26811"
|
28
|
+
inkscape:cy="332.35173"
|
29
|
+
inkscape:document-units="mm"
|
30
|
+
inkscape:current-layer="layer1"
|
31
|
+
showgrid="false"
|
32
|
+
fit-margin-top="5"
|
33
|
+
fit-margin-left="5"
|
34
|
+
fit-margin-bottom="5"
|
35
|
+
fit-margin-right="5"
|
36
|
+
inkscape:window-width="3440"
|
37
|
+
inkscape:window-height="1376"
|
38
|
+
inkscape:window-x="0"
|
39
|
+
inkscape:window-y="27"
|
40
|
+
inkscape:window-maximized="1"
|
41
|
+
inkscape:document-rotation="0" />
|
42
|
+
<metadata
|
43
|
+
id="metadata5">
|
44
|
+
<rdf:RDF>
|
45
|
+
<cc:Work
|
46
|
+
rdf:about="">
|
47
|
+
<dc:format>image/svg+xml</dc:format>
|
48
|
+
<dc:type
|
49
|
+
rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
|
50
|
+
<dc:title></dc:title>
|
51
|
+
</cc:Work>
|
52
|
+
</rdf:RDF>
|
53
|
+
</metadata>
|
54
|
+
<g
|
55
|
+
inkscape:label="レイヤー 1"
|
56
|
+
inkscape:groupmode="layer"
|
57
|
+
id="layer1"
|
58
|
+
transform="translate(18.78782,-3.4599898)">
|
59
|
+
<rect
|
60
|
+
style="color:#000000;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;fill:#ffffff;fill-opacity:1;fill-rule:nonzero;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate"
|
61
|
+
id="rect815"
|
62
|
+
width="41.015972"
|
63
|
+
height="33.886806"
|
64
|
+
x="13.229166"
|
65
|
+
y="21.455357" />
|
66
|
+
<text
|
67
|
+
xml:space="preserve"
|
68
|
+
style="color:#000000;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:10.5833px;line-height:100%;font-family:sans-serif;-inkscape-font-specification:'sans-serif, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-indent:0;text-align:center;text-decoration:none;text-decoration-line:none;text-decoration-style:solid;text-decoration-color:#000000;letter-spacing:0px;word-spacing:0px;text-transform:none;writing-mode:lr-tb;direction:ltr;baseline-shift:baseline;text-anchor:middle;white-space:normal;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate"
|
69
|
+
x="-3.3676131"
|
70
|
+
y="34.808594"
|
71
|
+
id="text827"><tspan
|
72
|
+
sodipodi:role="line"
|
73
|
+
id="tspan825"
|
74
|
+
x="-3.3676131"
|
75
|
+
y="34.808594"
|
76
|
+
style="stroke-width:1">null</tspan></text>
|
77
|
+
<text
|
78
|
+
xml:space="preserve"
|
79
|
+
style="color:#000000;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:10.5833px;line-height:100%;font-family:sans-serif;-inkscape-font-specification:'sans-serif, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-indent:0;text-align:center;text-decoration:none;text-decoration-line:none;text-decoration-style:solid;text-decoration-color:#000000;letter-spacing:0px;word-spacing:0px;text-transform:none;writing-mode:lr-tb;direction:ltr;baseline-shift:baseline;text-anchor:middle;white-space:normal;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate"
|
80
|
+
x="33.79007"
|
81
|
+
y="34.808594"
|
82
|
+
id="text831"><tspan
|
83
|
+
sodipodi:role="line"
|
84
|
+
id="tspan829"
|
85
|
+
x="33.79007"
|
86
|
+
y="34.808594"
|
87
|
+
style="stroke-width:1">1 0 1</tspan></text>
|
88
|
+
<text
|
89
|
+
xml:space="preserve"
|
90
|
+
style="color:#000000;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:10.5833px;line-height:100%;font-family:sans-serif;-inkscape-font-specification:'sans-serif, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-indent:0;text-align:center;text-decoration:none;text-decoration-line:none;text-decoration-style:solid;text-decoration-color:#000000;letter-spacing:0px;word-spacing:0px;text-transform:none;writing-mode:lr-tb;direction:ltr;baseline-shift:baseline;text-anchor:middle;white-space:normal;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate"
|
91
|
+
x="-3.6807373"
|
92
|
+
y="49.937122"
|
93
|
+
id="text835"><tspan
|
94
|
+
sodipodi:role="line"
|
95
|
+
id="tspan833"
|
96
|
+
x="-3.6807373"
|
97
|
+
y="49.937122"
|
98
|
+
style="stroke-width:1">data</tspan></text>
|
99
|
+
<text
|
100
|
+
xml:space="preserve"
|
101
|
+
style="color:#000000;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:10.5833px;line-height:100%;font-family:sans-serif;-inkscape-font-specification:'sans-serif, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-indent:0;text-align:center;text-decoration:none;text-decoration-line:none;text-decoration-style:solid;text-decoration-color:#000000;letter-spacing:0px;word-spacing:0px;text-transform:none;writing-mode:lr-tb;direction:ltr;baseline-shift:baseline;text-anchor:middle;white-space:normal;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate"
|
102
|
+
x="33.297943"
|
103
|
+
y="50.042957"
|
104
|
+
id="text839"><tspan
|
105
|
+
sodipodi:role="line"
|
106
|
+
id="tspan837"
|
107
|
+
x="35.943775"
|
108
|
+
y="50.042957"
|
109
|
+
style="stroke-width:1">1 X 3 </tspan></text>
|
110
|
+
<rect
|
111
|
+
y="21.455357"
|
112
|
+
x="86.995552"
|
113
|
+
height="33.886806"
|
114
|
+
width="41.015972"
|
115
|
+
id="rect843"
|
116
|
+
style="color:#000000;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;fill:#ffffff;fill-opacity:1;fill-rule:nonzero;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate" />
|
117
|
+
<text
|
118
|
+
id="text851"
|
119
|
+
y="34.808594"
|
120
|
+
x="107.50354"
|
121
|
+
style="color:#000000;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:10.5833px;line-height:100%;font-family:sans-serif;-inkscape-font-specification:'sans-serif, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-indent:0;text-align:center;text-decoration:none;text-decoration-line:none;text-decoration-style:solid;text-decoration-color:#000000;letter-spacing:0px;word-spacing:0px;text-transform:none;writing-mode:lr-tb;direction:ltr;baseline-shift:baseline;text-anchor:middle;white-space:normal;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate"
|
122
|
+
xml:space="preserve"><tspan
|
123
|
+
style="stroke-width:1"
|
124
|
+
y="34.808594"
|
125
|
+
x="107.50354"
|
126
|
+
id="tspan849"
|
127
|
+
sodipodi:role="line">0 1 1</tspan></text>
|
128
|
+
<text
|
129
|
+
id="text855"
|
130
|
+
y="50.042957"
|
131
|
+
x="107.60938"
|
132
|
+
style="color:#000000;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:10.5833px;line-height:100%;font-family:sans-serif;-inkscape-font-specification:'sans-serif, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-indent:0;text-align:center;text-decoration:none;text-decoration-line:none;text-decoration-style:solid;text-decoration-color:#000000;letter-spacing:0px;word-spacing:0px;text-transform:none;writing-mode:lr-tb;direction:ltr;baseline-shift:baseline;text-anchor:middle;white-space:normal;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate"
|
133
|
+
xml:space="preserve"><tspan
|
134
|
+
style="stroke-width:1"
|
135
|
+
y="50.042957"
|
136
|
+
x="107.60938"
|
137
|
+
id="tspan853"
|
138
|
+
sodipodi:role="line">X 2 5</tspan></text>
|
139
|
+
<text
|
140
|
+
xml:space="preserve"
|
141
|
+
style="color:#000000;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:10.5833px;line-height:100%;font-family:sans-serif;-inkscape-font-specification:'sans-serif, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-indent:0;text-align:center;text-decoration:none;text-decoration-line:none;text-decoration-style:solid;text-decoration-color:#000000;letter-spacing:0px;word-spacing:0px;text-transform:none;writing-mode:lr-tb;direction:ltr;baseline-shift:baseline;text-anchor:middle;white-space:normal;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate"
|
142
|
+
x="70.829102"
|
143
|
+
y="17.364624"
|
144
|
+
id="text859"><tspan
|
145
|
+
sodipodi:role="line"
|
146
|
+
id="tspan857"
|
147
|
+
x="70.829102"
|
148
|
+
y="17.364624"
|
149
|
+
style="stroke-width:1">+</tspan></text>
|
150
|
+
<text
|
151
|
+
xml:space="preserve"
|
152
|
+
style="color:#000000;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:10.5833px;line-height:100%;font-family:sans-serif;-inkscape-font-specification:'sans-serif, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-indent:0;text-align:center;text-decoration:none;text-decoration-line:none;text-decoration-style:solid;text-decoration-color:#000000;letter-spacing:0px;word-spacing:0px;text-transform:none;writing-mode:lr-tb;direction:ltr;baseline-shift:baseline;text-anchor:middle;white-space:normal;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate"
|
153
|
+
x="33.125305"
|
154
|
+
y="16.767906"
|
155
|
+
id="text863"><tspan
|
156
|
+
sodipodi:role="line"
|
157
|
+
id="tspan861"
|
158
|
+
x="33.125305"
|
159
|
+
y="16.767906"
|
160
|
+
style="stroke-width:1">[1, null, 3]</tspan></text>
|
161
|
+
<text
|
162
|
+
xml:space="preserve"
|
163
|
+
style="color:#000000;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:10.5833px;line-height:100%;font-family:sans-serif;-inkscape-font-specification:'sans-serif, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-indent:0;text-align:center;text-decoration:none;text-decoration-line:none;text-decoration-style:solid;text-decoration-color:#000000;letter-spacing:0px;word-spacing:0px;text-transform:none;writing-mode:lr-tb;direction:ltr;baseline-shift:baseline;text-anchor:middle;white-space:normal;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate"
|
164
|
+
x="108.5329"
|
165
|
+
y="16.767906"
|
166
|
+
id="text867"><tspan
|
167
|
+
sodipodi:role="line"
|
168
|
+
id="tspan865"
|
169
|
+
x="108.5329"
|
170
|
+
y="16.767906"
|
171
|
+
style="stroke-width:1">[null, 2, 5]</tspan></text>
|
172
|
+
<rect
|
173
|
+
y="74.641991"
|
174
|
+
x="11.892818"
|
175
|
+
height="33.886806"
|
176
|
+
width="41.015972"
|
177
|
+
id="rect869"
|
178
|
+
style="color:#000000;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;fill:#ffffff;fill-opacity:1;fill-rule:nonzero;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate" />
|
179
|
+
<text
|
180
|
+
id="text877"
|
181
|
+
y="87.995232"
|
182
|
+
x="32.400803"
|
183
|
+
style="color:#000000;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:10.5833px;line-height:100%;font-family:sans-serif;-inkscape-font-specification:'sans-serif, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-indent:0;text-align:center;text-decoration:none;text-decoration-line:none;text-decoration-style:solid;text-decoration-color:#000000;letter-spacing:0px;word-spacing:0px;text-transform:none;writing-mode:lr-tb;direction:ltr;baseline-shift:baseline;text-anchor:middle;white-space:normal;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate"
|
184
|
+
xml:space="preserve"><tspan
|
185
|
+
style="stroke-width:1"
|
186
|
+
y="87.995232"
|
187
|
+
x="32.400803"
|
188
|
+
id="tspan875"
|
189
|
+
sodipodi:role="line">0 0 1</tspan></text>
|
190
|
+
<text
|
191
|
+
id="text885"
|
192
|
+
y="103.22959"
|
193
|
+
x="32.136223"
|
194
|
+
style="color:#000000;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:10.5833px;line-height:100%;font-family:sans-serif;-inkscape-font-specification:'sans-serif, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-indent:0;text-align:center;text-decoration:none;text-decoration-line:none;text-decoration-style:solid;text-decoration-color:#000000;letter-spacing:0px;word-spacing:0px;text-transform:none;writing-mode:lr-tb;direction:ltr;baseline-shift:baseline;text-anchor:middle;white-space:normal;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate"
|
195
|
+
xml:space="preserve"><tspan
|
196
|
+
style="stroke-width:1"
|
197
|
+
y="103.22959"
|
198
|
+
x="34.782055"
|
199
|
+
id="tspan883"
|
200
|
+
sodipodi:role="line">X X 8 </tspan></text>
|
201
|
+
<text
|
202
|
+
id="text912"
|
203
|
+
y="122.74027"
|
204
|
+
x="32.48177"
|
205
|
+
style="color:#000000;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:10.5833px;line-height:100%;font-family:sans-serif;-inkscape-font-specification:'sans-serif, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-indent:0;text-align:center;text-decoration:none;text-decoration-line:none;text-decoration-style:solid;text-decoration-color:#000000;letter-spacing:0px;word-spacing:0px;text-transform:none;writing-mode:lr-tb;direction:ltr;baseline-shift:baseline;text-anchor:middle;white-space:normal;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate"
|
206
|
+
xml:space="preserve"><tspan
|
207
|
+
style="stroke-width:1"
|
208
|
+
y="122.74027"
|
209
|
+
x="32.48177"
|
210
|
+
id="tspan910"
|
211
|
+
sodipodi:role="line">[null, null, 8]</tspan></text>
|
212
|
+
<path
|
213
|
+
style="fill:none;fill-rule:evenodd;stroke:#204a87;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
|
214
|
+
d="M 48.643053,31.601359 H 61.887955 V 85.055264 H 48.108515"
|
215
|
+
id="path934"
|
216
|
+
inkscape:connector-curvature="0"
|
217
|
+
sodipodi:nodetypes="cccc" />
|
218
|
+
<path
|
219
|
+
style="fill:none;fill-rule:evenodd;stroke:#204a87;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
|
220
|
+
d="M 91.216399,31.601359 H 57.730217"
|
221
|
+
id="path938"
|
222
|
+
inkscape:connector-curvature="0"
|
223
|
+
sodipodi:nodetypes="cc" />
|
224
|
+
<text
|
225
|
+
xml:space="preserve"
|
226
|
+
style="color:#000000;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:10.5833px;line-height:100%;font-family:sans-serif;-inkscape-font-specification:'sans-serif, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-indent:0;text-align:center;text-decoration:none;text-decoration-line:none;text-decoration-style:solid;text-decoration-color:#000000;letter-spacing:0px;word-spacing:0px;text-transform:none;writing-mode:lr-tb;direction:ltr;baseline-shift:baseline;text-anchor:middle;white-space:normal;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate"
|
227
|
+
x="19.087797"
|
228
|
+
y="69.269341"
|
229
|
+
id="text942"><tspan
|
230
|
+
sodipodi:role="line"
|
231
|
+
id="tspan940"
|
232
|
+
x="19.087797"
|
233
|
+
y="69.269341"
|
234
|
+
style="stroke-width:1">ビット単位の&</tspan></text>
|
235
|
+
<path
|
236
|
+
style="fill:none;fill-rule:evenodd;stroke:#a40000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:1.00000003, 1.00000003;stroke-dashoffset:0;stroke-opacity:1"
|
237
|
+
d="M 48.380951,46.590772 H 72.004464 V 98.373511 H 49.136905"
|
238
|
+
id="path944"
|
239
|
+
inkscape:connector-curvature="0" />
|
240
|
+
<path
|
241
|
+
style="fill:none;fill-rule:evenodd;stroke:#a40000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:1.00000003, 1.00000003;stroke-dashoffset:0;stroke-opacity:1"
|
242
|
+
d="M 91.281248,46.590772 H 72.004464"
|
243
|
+
id="path946"
|
244
|
+
inkscape:connector-curvature="0"
|
245
|
+
sodipodi:nodetypes="cc" />
|
246
|
+
<text
|
247
|
+
xml:space="preserve"
|
248
|
+
style="color:#000000;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:10.5833px;line-height:100%;font-family:sans-serif;-inkscape-font-specification:'sans-serif, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-indent:0;text-align:start;text-decoration:none;text-decoration-line:none;text-decoration-style:solid;text-decoration-color:#000000;letter-spacing:0px;word-spacing:0px;text-transform:none;writing-mode:lr-tb;direction:ltr;baseline-shift:baseline;text-anchor:start;white-space:normal;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate"
|
249
|
+
x="79.452759"
|
250
|
+
y="69.093643"
|
251
|
+
id="text950"><tspan
|
252
|
+
sodipodi:role="line"
|
253
|
+
id="tspan948"
|
254
|
+
x="79.452759"
|
255
|
+
y="69.093643"
|
256
|
+
style="text-align:start;text-anchor:start;stroke-width:1">nullのところも</tspan><tspan
|
257
|
+
sodipodi:role="line"
|
258
|
+
x="79.452759"
|
259
|
+
y="79.887405"
|
260
|
+
style="text-align:start;text-anchor:start;stroke-width:1"
|
261
|
+
id="tspan954">気にせずSIMDで+</tspan></text>
|
262
|
+
<text
|
263
|
+
id="text958"
|
264
|
+
y="87.889397"
|
265
|
+
x="-3.3676131"
|
266
|
+
style="color:#000000;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:10.5833px;line-height:100%;font-family:sans-serif;-inkscape-font-specification:'sans-serif, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-indent:0;text-align:center;text-decoration:none;text-decoration-line:none;text-decoration-style:solid;text-decoration-color:#000000;letter-spacing:0px;word-spacing:0px;text-transform:none;writing-mode:lr-tb;direction:ltr;baseline-shift:baseline;text-anchor:middle;white-space:normal;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate"
|
267
|
+
xml:space="preserve"><tspan
|
268
|
+
style="stroke-width:1"
|
269
|
+
y="87.889397"
|
270
|
+
x="-3.3676131"
|
271
|
+
id="tspan956"
|
272
|
+
sodipodi:role="line">null</tspan></text>
|
273
|
+
<text
|
274
|
+
id="text962"
|
275
|
+
y="103.12376"
|
276
|
+
x="-3.6807373"
|
277
|
+
style="color:#000000;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:10.5833px;line-height:100%;font-family:sans-serif;-inkscape-font-specification:'sans-serif, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-indent:0;text-align:center;text-decoration:none;text-decoration-line:none;text-decoration-style:solid;text-decoration-color:#000000;letter-spacing:0px;word-spacing:0px;text-transform:none;writing-mode:lr-tb;direction:ltr;baseline-shift:baseline;text-anchor:middle;white-space:normal;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate"
|
278
|
+
xml:space="preserve"><tspan
|
279
|
+
style="stroke-width:1"
|
280
|
+
y="103.12376"
|
281
|
+
x="-3.6807373"
|
282
|
+
id="tspan960"
|
283
|
+
sodipodi:role="line">data</tspan></text>
|
284
|
+
</g>
|
285
|
+
</svg>
|
data/theme.rb
ADDED
data/thumbnail.png
ADDED
Binary file
|
@@ -0,0 +1,460 @@
|
|
1
|
+
= Apache Arrowフォーマットは\nなぜ速いのか
|
2
|
+
|
3
|
+
: author
|
4
|
+
須藤功平
|
5
|
+
: institution
|
6
|
+
株式会社クリアコード
|
7
|
+
: content-source
|
8
|
+
db tech showcase ONLINE 2020
|
9
|
+
: date
|
10
|
+
2020-12-08
|
11
|
+
: start-time
|
12
|
+
2020-12-08T15:30:00+09:00
|
13
|
+
: end-time
|
14
|
+
2020-12-08T16:10:00+09:00
|
15
|
+
: theme
|
16
|
+
.
|
17
|
+
|
18
|
+
= Apache Arrowと私
|
19
|
+
|
20
|
+
* 2016-12-21に最初のコミット
|
21
|
+
* 2017-05-10にコミッター
|
22
|
+
* 2017-09-15にPMCメンバー
|
23
|
+
* 2020-11-25現在コミット数2位(508人中)
|
24
|
+
|
25
|
+
= Apache Arrow
|
26
|
+
|
27
|
+
* データ分析ツールの基盤を提供
|
28
|
+
* ツールで必要になるやつ全部入り
|
29
|
+
* 各種プログラミング言語をサポート
|
30
|
+
|
31
|
+
= 全部ってなに!?
|
32
|
+
|
33
|
+
(('tag:left'))
|
34
|
+
(('tag:wrap-word-char'))
|
35
|
+
(('tag:justify'))
|
36
|
+
データフォーマットとかそのデータを高速処理する機能とか他の各種データフォーマットと変換する機能とかローカル・リモートにあるデータを透過的に読み書きする機能とか高速RPCとかなんだけど、全部を説明すると「すごそうだけどよくわからないね!」と言われる!
|
37
|
+
|
38
|
+
= 今日のトピック
|
39
|
+
|
40
|
+
Apache Arrow\nフォーマット
|
41
|
+
|
42
|
+
= Apache Arrowフォーマット
|
43
|
+
|
44
|
+
* データフォーマット
|
45
|
+
* 通信用とインメモリー用の両方
|
46
|
+
* 表形式のデータ用
|
47
|
+
* =データフレーム形式のデータ用
|
48
|
+
* 速い!
|
49
|
+
|
50
|
+
= 速い!
|
51
|
+
|
52
|
+
* データ((*交換*))が速い!
|
53
|
+
* データ((*処理*))が速い!
|
54
|
+
|
55
|
+
= データ交換が速い!
|
56
|
+
|
57
|
+
(('tag:center'))
|
58
|
+
(('tag:margin-bottom * -0.5'))
|
59
|
+
Apache Arrowフォーマットにすると高速化!
|
60
|
+
|
61
|
+
# image
|
62
|
+
# src = images/apache-arrow-and-data-interchange.svg
|
63
|
+
# relative-height = 100
|
64
|
+
|
65
|
+
= 利用事例:Apache Spark
|
66
|
+
|
67
|
+
# image
|
68
|
+
# src = images/apache-arrow-and-apache-spark.svg
|
69
|
+
# relative-height = 100
|
70
|
+
|
71
|
+
== スライドプロパティー
|
72
|
+
|
73
|
+
: enable-title-on-image
|
74
|
+
false
|
75
|
+
|
76
|
+
= 利用事例:Amazon Athena
|
77
|
+
|
78
|
+
# image
|
79
|
+
# src = images/apache-arrow-and-amazon-athena.svg
|
80
|
+
# relative-height = 100
|
81
|
+
|
82
|
+
== スライドプロパティー
|
83
|
+
|
84
|
+
: enable-title-on-image
|
85
|
+
false
|
86
|
+
|
87
|
+
= 利用事例:RAPIDS
|
88
|
+
|
89
|
+
# image
|
90
|
+
# src = https://docs.rapids.ai/overview/RAPIDS%200.15%20Release%20Deck.pdf
|
91
|
+
# page = 3
|
92
|
+
# relative-height = 130
|
93
|
+
# relative-clip-y = 15
|
94
|
+
# relative-clip-height = 70
|
95
|
+
|
96
|
+
(('tag:right'))
|
97
|
+
(('tag:margin-top * 6'))
|
98
|
+
(('note:((<URL:https://docs.rapids.ai/overview/RAPIDS%200.15%20Release%20Deck.pdf#page=3>))'))
|
99
|
+
|
100
|
+
== スライドプロパティー
|
101
|
+
|
102
|
+
: enable-clear-blue-slide-body-vertical-centering
|
103
|
+
false
|
104
|
+
|
105
|
+
: enable-title-on-image
|
106
|
+
false
|
107
|
+
|
108
|
+
= 利用事例:RAPIDS
|
109
|
+
|
110
|
+
# image
|
111
|
+
# src = https://docs.rapids.ai/overview/RAPIDS%200.15%20Release%20Deck.pdf
|
112
|
+
# page = 8
|
113
|
+
# relative-height = 120
|
114
|
+
# relative-clip-y = 15
|
115
|
+
# relative-clip-height = 80
|
116
|
+
|
117
|
+
(('tag:right'))
|
118
|
+
(('tag:margin-top * 5'))
|
119
|
+
(('note:((<URL:https://docs.rapids.ai/overview/RAPIDS%200.15%20Release%20Deck.pdf#page=8>))'))
|
120
|
+
|
121
|
+
== スライドプロパティー
|
122
|
+
|
123
|
+
: enable-clear-blue-slide-body-vertical-centering
|
124
|
+
false
|
125
|
+
|
126
|
+
: enable-title-on-image
|
127
|
+
false
|
128
|
+
|
129
|
+
= どうして速いの?
|
130
|
+
|
131
|
+
* シリアライズコストが低い
|
132
|
+
* すぐに送れるようになる
|
133
|
+
* デシリアライズコストが低い
|
134
|
+
* 受け取ったデータをすぐに使えるようになる
|
135
|
+
|
136
|
+
= シリアライズ処理
|
137
|
+
|
138
|
+
(1) メタデータを用意
|
139
|
+
(2) メタデータ+元データそのものを送信
|
140
|
+
* 元データを加工しないから速い!
|
141
|
+
* なにもしないのが最速!
|
142
|
+
|
143
|
+
= 元データを加工する例:JSON
|
144
|
+
|
145
|
+
0x01 0x02(8bit数値の配列)
|
146
|
+
↓
|
147
|
+
"[1,2]"(JSON)
|
148
|
+
0x01→0x49(数値→ASCIIの文字:'1')
|
149
|
+
0x02→0x50(数値→ASCIIの文字:'2')
|
150
|
+
|
151
|
+
= 元データそのものを使うと…
|
152
|
+
|
153
|
+
* 変換処理にCPUを使わなくてよい
|
154
|
+
* 速い
|
155
|
+
* 変換後のデータ用のメモリー確保ゼロ
|
156
|
+
* 大きなメモリー確保はコストが高い
|
157
|
+
* 一定の作業領域を使い回すとかしなくてよい
|
158
|
+
* 速い
|
159
|
+
|
160
|
+
= デシリアライズ処理
|
161
|
+
|
162
|
+
(1) メタデータをパース
|
163
|
+
(2) メタデータを基に元データを取り出す
|
164
|
+
* 元データをそのまま使えるから速い!
|
165
|
+
* なにもしないのが最速!
|
166
|
+
|
167
|
+
= 元データを元に戻す例:JSON
|
168
|
+
|
169
|
+
"[1,2]"(JSON)
|
170
|
+
↓
|
171
|
+
0x01 0x02(8bit数値の配列)
|
172
|
+
0x49→0x01(ASCIIの文字:'1'→数値)
|
173
|
+
0x50→0x02(ASCIIの文字:'2'→数値)
|
174
|
+
|
175
|
+
= 元データを取り出せると…
|
176
|
+
|
177
|
+
* 変換処理にCPUを使わなくてよい
|
178
|
+
* 速い
|
179
|
+
* 変換後のデータ用のメモリー確保ゼロ
|
180
|
+
* すでにあるデータをそのまま使うのでゼロコピー
|
181
|
+
* 速い
|
182
|
+
* メモリーマップで直接データを使える
|
183
|
+
* ディスク上のメモリー以上のデータを扱える
|
184
|
+
|
185
|
+
= {,デ}シリアライズコスト
|
186
|
+
|
187
|
+
* Apache Arrowフォーマット
|
188
|
+
* ほぼメタデータのパースコストだけ
|
189
|
+
* それ以外の多くのフォーマット
|
190
|
+
* データ変換処理(CPU)
|
191
|
+
* 作業用メモリー確保処理(メモリー)
|
192
|
+
|
193
|
+
= データ交換が速い!
|
194
|
+
|
195
|
+
(('tag:center'))
|
196
|
+
(('tag:margin-bottom * -0.5'))
|
197
|
+
Apache Arrowフォーマットにすると高速化!
|
198
|
+
|
199
|
+
# image
|
200
|
+
# src = images/apache-arrow-and-data-interchange.svg
|
201
|
+
# relative-height = 100
|
202
|
+
|
203
|
+
= 利用事例:Apache Spark
|
204
|
+
|
205
|
+
# image
|
206
|
+
# src = images/apache-arrow-and-apache-spark.svg
|
207
|
+
# relative-height = 100
|
208
|
+
|
209
|
+
== スライドプロパティー
|
210
|
+
|
211
|
+
: enable-title-on-image
|
212
|
+
false
|
213
|
+
|
214
|
+
= 利用事例:Amazon Athena
|
215
|
+
|
216
|
+
# image
|
217
|
+
# src = images/apache-arrow-and-amazon-athena.svg
|
218
|
+
# relative-height = 100
|
219
|
+
|
220
|
+
== スライドプロパティー
|
221
|
+
|
222
|
+
: enable-title-on-image
|
223
|
+
false
|
224
|
+
|
225
|
+
= 利用事例:RAPIDS
|
226
|
+
|
227
|
+
# image
|
228
|
+
# src = https://docs.rapids.ai/overview/RAPIDS%200.15%20Release%20Deck.pdf
|
229
|
+
# page = 8
|
230
|
+
# relative-height = 120
|
231
|
+
# relative-clip-y = 15
|
232
|
+
# relative-clip-height = 80
|
233
|
+
|
234
|
+
(('tag:right'))
|
235
|
+
(('tag:margin-top * 5'))
|
236
|
+
(('note:((<URL:https://docs.rapids.ai/overview/RAPIDS%200.15%20Release%20Deck.pdf#page=8>))'))
|
237
|
+
|
238
|
+
== スライドプロパティー
|
239
|
+
|
240
|
+
: enable-clear-blue-slide-body-vertical-centering
|
241
|
+
false
|
242
|
+
|
243
|
+
: enable-title-on-image
|
244
|
+
false
|
245
|
+
|
246
|
+
= データサイズは?
|
247
|
+
|
248
|
+
* CPU・メモリーにやさしくて速いんだね!
|
249
|
+
* じゃあ、データサイズはどうなの?
|
250
|
+
* 大きいとネットワーク・IOがボトルネック
|
251
|
+
* 本来のデータ処理を最大限リソースを使いたい
|
252
|
+
* データ交換をボトルネックにしたくない
|
253
|
+
|
254
|
+
= データサイズ
|
255
|
+
|
256
|
+
* 別に小さくない
|
257
|
+
* 元データそのままなのでデータ量に応じて増加
|
258
|
+
* Zstandard・LZ4での圧縮をサポート
|
259
|
+
* ゼロコピーではなくなるがサイズは数分の1
|
260
|
+
* 圧縮・展開が必要→元データそのものを使えない
|
261
|
+
* CPU・メモリー負荷は上がるが\n
|
262
|
+
ネットワーク・IO負荷は下がる
|
263
|
+
* ネットワーク・IOがボトルネックになるなら効く
|
264
|
+
|
265
|
+
= 圧縮時のデータサイズと読み込み速度
|
266
|
+
|
267
|
+
# image
|
268
|
+
# src = https://ursalabs.org/20200414_file_sizes.png
|
269
|
+
# align = left
|
270
|
+
# vertical-align = top
|
271
|
+
# relative-width = 75
|
272
|
+
|
273
|
+
# image
|
274
|
+
# src = https://ursalabs.org/20200414_read_py.png
|
275
|
+
# align = right
|
276
|
+
# vertical-align = bottom
|
277
|
+
# relative-width = 75
|
278
|
+
# relative-clip-y = 12
|
279
|
+
# relative-margin-top = 12
|
280
|
+
# relative-margin-left = -3.8
|
281
|
+
|
282
|
+
(('tag:right'))
|
283
|
+
(('tag:margin-top * 15'))
|
284
|
+
(('note:((<URL:https://ursalabs.org/blog/2020-feather-v2/>))'))
|
285
|
+
|
286
|
+
== スライドプロパティー
|
287
|
+
|
288
|
+
: enable-clear-blue-slide-body-vertical-centering
|
289
|
+
false
|
290
|
+
|
291
|
+
: enable-title-on-image
|
292
|
+
false
|
293
|
+
|
294
|
+
= データ交換が速い!のまとめ
|
295
|
+
|
296
|
+
* {,デ}シリアライズが速い
|
297
|
+
* 元データをそのまま使うので処理が少ない
|
298
|
+
* CPU・メモリーにやさしい
|
299
|
+
* 圧縮もサポート
|
300
|
+
* ネットワーク・IOがボトルネックならこれ
|
301
|
+
* CPU・メモリー負荷は上がるが\n
|
302
|
+
データ交換のボトルネックを解消できるかも
|
303
|
+
|
304
|
+
= 交換したデータの扱い
|
305
|
+
|
306
|
+
* データ分析はデータ交換だけじゃない
|
307
|
+
* データ交換だけ速くしても基盤とは言えない
|
308
|
+
* データ処理も速くしないと!
|
309
|
+
* データ処理を速くするにはデータ構造が大事
|
310
|
+
|
311
|
+
= 高速処理のためのデータ構造
|
312
|
+
|
313
|
+
* 基本方針:
|
314
|
+
* 関連するデータを近くに置く
|
315
|
+
* 効果:
|
316
|
+
* CPUキャッシュミスを減らす
|
317
|
+
* SIMDを活用できる
|
318
|
+
|
319
|
+
= データ分析時の関連データ
|
320
|
+
|
321
|
+
* 分析時はカラムごとの処理が多い
|
322
|
+
* 集計・ソート・絞り込み…
|
323
|
+
* 同じカラムのデータを近くに置く
|
324
|
+
* カラムナーフォーマット
|
325
|
+
|
326
|
+
= カラムナーフォーマット
|
327
|
+
|
328
|
+
# img
|
329
|
+
# src = images/columnar.svg
|
330
|
+
# relative_height = 100
|
331
|
+
|
332
|
+
== スライドプロパティー
|
333
|
+
|
334
|
+
: enable-title-on-image
|
335
|
+
false
|
336
|
+
|
337
|
+
= 各カラムでのデータの配置
|
338
|
+
|
339
|
+
* 関連するデータを近くに置く
|
340
|
+
* 定数時間でアクセスできるように置く
|
341
|
+
* SIMDできるように置く
|
342
|
+
* アラインする\n
|
343
|
+
(('note:アライン:データの境界を64の倍数とかに揃える'))
|
344
|
+
* 条件分岐をなくす
|
345
|
+
|
346
|
+
= 真偽値・数値のデータの配置
|
347
|
+
|
348
|
+
固定長データなので連続して配置
|
349
|
+
32ビット整数:[1, 2, 3]
|
350
|
+
0x01 0x00 0x00 0x00 0x02 0x00 0x00 0x00 0x03 ...
|
351
|
+
|
352
|
+
= 文字列・バイト列:データの配置
|
353
|
+
|
354
|
+
実データバイト列+長さ配列に配置
|
355
|
+
UTF-8文字列:["Hello", "", "!"]
|
356
|
+
実データバイト列:"Hello!"
|
357
|
+
長さ配列:[0, 5, 5, 6]
|
358
|
+
i番目の長さ:長さ配列[i+1] - 長さ配列[i]
|
359
|
+
i番目のデータ:
|
360
|
+
実データバイト列[長さ配列[i]..長さ配列[i+1]]
|
361
|
+
注:長さ→データ→長さ→データ→…で置くと
|
362
|
+
定数時間でi番目にアクセスできない
|
363
|
+
|
364
|
+
= nullと条件分岐
|
365
|
+
|
366
|
+
* null対応のアプローチ:
|
367
|
+
* null値:Julia: ((<(({missing}))|URL:https://docs.julialang.org/en/v1/manual/missing/>)), R: ((<(({NA}))|URL:https://cran.r-project.org/doc/manuals/r-release/R-lang.html#NA-handling>))
|
368
|
+
* 別途ビットマップを用意:Apache Arrow
|
369
|
+
* ビットマップを使うと条件分岐をなくせる
|
370
|
+
|
371
|
+
= nullと条件分岐とSIMD
|
372
|
+
|
373
|
+
# img
|
374
|
+
# src = images/simd-null.svg
|
375
|
+
# relative_height = 100
|
376
|
+
|
377
|
+
== スライドプロパティー
|
378
|
+
|
379
|
+
: enable-title-on-image
|
380
|
+
false
|
381
|
+
|
382
|
+
= nullと条件分岐とSIMD
|
383
|
+
|
384
|
+
# image
|
385
|
+
# src = https://wesmckinney.com/images/bitmaps_vs_sentinels.png
|
386
|
+
# relative-height = 90
|
387
|
+
|
388
|
+
(('tag:right'))
|
389
|
+
(('note:((<URL:https://wesmckinney.com/blog/bitmaps-vs-sentinel-values/>))'))
|
390
|
+
|
391
|
+
== スライドプロパティー
|
392
|
+
|
393
|
+
: enable-clear-blue-slide-body-vertical-centering
|
394
|
+
false
|
395
|
+
|
396
|
+
: enable-title-on-image
|
397
|
+
false
|
398
|
+
|
399
|
+
= 高速なデータ処理のまとめ
|
400
|
+
|
401
|
+
* 高速なデータ処理にはデータ構造が重要
|
402
|
+
* データ分析にはカラムナーフォマットが適切
|
403
|
+
* 定数時間でアクセス可能なデータの配置
|
404
|
+
* SIMDにやさしいデータの持ち方
|
405
|
+
* アライン・null用のビットマップ
|
406
|
+
|
407
|
+
= まとめ
|
408
|
+
|
409
|
+
* Apache Arrow
|
410
|
+
* なんかデータ分析に便利そうなすごいやつ!
|
411
|
+
* 「よくわからん」と言われるからといって\n
|
412
|
+
全体像を説明してもらえなかった!
|
413
|
+
* 使い方も説明してもらえなかった!
|
414
|
+
* Apache Arrowフォーマット
|
415
|
+
* これだけ説明してもらえた!
|
416
|
+
|
417
|
+
= まとめ:Apache Arrowフォーマット
|
418
|
+
|
419
|
+
* Apache Arrowフォーマット
|
420
|
+
* 通信用・インメモリー用のデータフォーマット
|
421
|
+
* 表形式のデータ用
|
422
|
+
* Apache Arrowフォーマットは速い
|
423
|
+
* データ((*交換*))が速い
|
424
|
+
* データ((*処理*))が速い
|
425
|
+
* データ交換してすぐに高速処理できるフォーマット
|
426
|
+
|
427
|
+
= まとめ:なぜデータ交換が速いのか
|
428
|
+
|
429
|
+
* 元データをそのままやりとりできるから
|
430
|
+
* {,デ}シリアライズコストが低い
|
431
|
+
* CPU・メモリーにやさしい
|
432
|
+
* ネットワーク・IOの負荷を下げたい
|
433
|
+
* Zstandard・LZ4による圧縮
|
434
|
+
* CPU・メモリー負荷は上がるが\n
|
435
|
+
ネットワーク・IO負荷は下がる
|
436
|
+
|
437
|
+
= まとめ:なぜデータ処理が速いのか
|
438
|
+
|
439
|
+
* 最適化されたデータ構造
|
440
|
+
* カラムナーフォーマット
|
441
|
+
* SIMDを使えるデータの持ち方
|
442
|
+
* 最適化された実装
|
443
|
+
* (('wait'))今回は紹介していない!!!
|
444
|
+
|
445
|
+
= 次回予告!
|
446
|
+
|
447
|
+
* 案1:Apache Arrowデータの高速処理
|
448
|
+
* 案2:Apache Arrowデータの高速RPC
|
449
|
+
* 案3:○○言語でApache Arrowを使う方法
|
450
|
+
* ...
|
451
|
+
|
452
|
+
= 次のステップ
|
453
|
+
|
454
|
+
* もっと詳しく知りたくなったから\n
|
455
|
+
イベント・社内・…で紹介して!
|
456
|
+
* ((<URL:https://www.clear-code.com/contact/>))
|
457
|
+
* 使いたくなったから技術サポートして!
|
458
|
+
* ((<URL:https://www.clear-code.com/contact/>))
|
459
|
+
* Apache Arrowの開発に参加したい!
|
460
|
+
* ((<URL:https://arrow.apache.org/community/>))
|