rabbit-slide-kou-the-data-thread 2022.6.23.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.rabbit +2 -0
- data/README.rd +24 -0
- data/Rakefile +17 -0
- data/config.yaml +26 -0
- data/pdf/the-data-thread-why-apache-arrow-is-important-for-ruby.pdf +0 -0
- data/why-apache-arrow-is-important-for-ruby.rab +280 -0
- metadata +77 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 3a582887c2909c23b74c8bbcc201d7dc7ce28add0fce163e60ddb77078c3f8c8
|
4
|
+
data.tar.gz: 25d55acc603431bab0c8e80799ff66bd0c15421e166d461e29d404647e7b569f
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 64df77296cf1448074cf2e70ea50a0fe88891b75c21100692d8bd384e10345fa940711637bb2d698f093ecd9e9f32aee350b6982da68d17a4f97f48a2a703a0b
|
7
|
+
data.tar.gz: ca32124b43f01a52996457fe262a848d319f21b333fa5a343e82972f06c49eae2e6b631fec4276531d35c057c7b19b919d2738db4c549fe147bff0b61a4933b1
|
data/.rabbit
ADDED
data/README.rd
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
= Why Apache Arrow is important for Ruby
|
2
|
+
|
3
|
+
This talk describes why Apache Arrow is important for Ruby.
|
4
|
+
|
5
|
+
== For author
|
6
|
+
|
7
|
+
=== Show
|
8
|
+
|
9
|
+
rake
|
10
|
+
|
11
|
+
=== Publish
|
12
|
+
|
13
|
+
rake publish
|
14
|
+
|
15
|
+
== For viewers
|
16
|
+
|
17
|
+
=== Install
|
18
|
+
|
19
|
+
gem install rabbit-slide-kou-the-data-thread
|
20
|
+
|
21
|
+
=== Show
|
22
|
+
|
23
|
+
rabbit rabbit-slide-kou-the-data-thread.gem
|
24
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
require "rabbit/task/slide"
|
2
|
+
|
3
|
+
# Edit ./config.yaml to customize meta data
|
4
|
+
|
5
|
+
spec = nil
|
6
|
+
Rabbit::Task::Slide.new do |task|
|
7
|
+
spec = task.spec
|
8
|
+
# spec.files += Dir.glob("doc/**/*.*")
|
9
|
+
# spec.files -= Dir.glob("private/**/*.*")
|
10
|
+
spec.add_runtime_dependency("rabbit-theme-clear-code")
|
11
|
+
end
|
12
|
+
|
13
|
+
desc "Tag #{spec.version}"
|
14
|
+
task :tag do
|
15
|
+
sh("git", "tag", "-a", spec.version.to_s, "-m", "Publish #{spec.version}")
|
16
|
+
sh("git", "push", "--tags")
|
17
|
+
end
|
data/config.yaml
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
---
|
2
|
+
id: the-data-thread
|
3
|
+
base_name: why-apache-arrow-is-important-for-ruby
|
4
|
+
tags:
|
5
|
+
- rabbit
|
6
|
+
- thedatathread
|
7
|
+
- apachearrow
|
8
|
+
- ruby
|
9
|
+
presentation_date: 2022-06-23
|
10
|
+
version: 2022.6.23.0
|
11
|
+
licenses:
|
12
|
+
- CC-BY-SA-4.0
|
13
|
+
slideshare_id:
|
14
|
+
speaker_deck_id:
|
15
|
+
vimeo_id:
|
16
|
+
youtube_id:
|
17
|
+
width: 1920
|
18
|
+
height: 1080
|
19
|
+
source_code_uri:
|
20
|
+
author:
|
21
|
+
markup_language: :rd
|
22
|
+
name: Sutou Kouhei
|
23
|
+
email: kou@clear-code.com
|
24
|
+
rubygems_user: kou
|
25
|
+
slideshare_user: kou
|
26
|
+
speaker_deck_user:
|
Binary file
|
@@ -0,0 +1,280 @@
|
|
1
|
+
= Why Apache Arrow is important for Ruby
|
2
|
+
|
3
|
+
: author
|
4
|
+
Sutou Kouhei
|
5
|
+
: institution
|
6
|
+
ClearCode Inc.
|
7
|
+
: content-source
|
8
|
+
The Data Thread
|
9
|
+
: date
|
10
|
+
2022-06-23
|
11
|
+
: allotted-time
|
12
|
+
25m
|
13
|
+
: theme
|
14
|
+
clear-code
|
15
|
+
|
16
|
+
= Me
|
17
|
+
|
18
|
+
* Name: Sutou Kouhei\n
|
19
|
+
(('note:(Family Given)'))
|
20
|
+
* ID: kou (call me kou)\n
|
21
|
+
(('note:(ktou or kous when I can't use kou)'))
|
22
|
+
* Ruby committer since 2004
|
23
|
+
* This year's Apache Arrow PMC chair
|
24
|
+
|
25
|
+
# image
|
26
|
+
# src = http://www.gravatar.com/avatar/ee6ffca720cc428d70247dcd7377dd48.jpeg?s=800
|
27
|
+
# align = right
|
28
|
+
# vertical-align = top
|
29
|
+
# relative-width = 30
|
30
|
+
# relative-margin-right = -10
|
31
|
+
# relative-margin-bottom = 0.5
|
32
|
+
# caption = My profile picture is my "Shocker combatman" figure on my Happy Hacking Keyboard
|
33
|
+
# caption-font-size = 1
|
34
|
+
|
35
|
+
= Why I work on Apache Arrow
|
36
|
+
|
37
|
+
For Ruby!\n
|
38
|
+
(I love Ruby!)
|
39
|
+
|
40
|
+
= Ruby
|
41
|
+
|
42
|
+
* Widely used for Web application\n
|
43
|
+
(('note:(I rarely write Web app)'))
|
44
|
+
* Ruby on Rails is an useful Web app framework
|
45
|
+
* e.g.: GitHub, GitLab, Shopify, Discourse, ...
|
46
|
+
* Not widely used for data processing
|
47
|
+
* Even though Ruby is a general purpose programming language...
|
48
|
+
|
49
|
+
= Ruby and data processing\n(('note:Negative spiral'))
|
50
|
+
|
51
|
+
# mermaid
|
52
|
+
# relative_width = 90
|
53
|
+
graph LR;
|
54
|
+
A[Few users]-->B[Small community];
|
55
|
+
B-->C[Few developers];
|
56
|
+
C-->D[Few useful tools];
|
57
|
+
D-->A;
|
58
|
+
|
59
|
+
== Slide properties
|
60
|
+
|
61
|
+
: enable-title-on-image
|
62
|
+
false
|
63
|
+
|
64
|
+
= How to break\nthe negative spiral?
|
65
|
+
|
66
|
+
# mermaid
|
67
|
+
# relative_width = 40
|
68
|
+
# align = right
|
69
|
+
# vertical-align = top
|
70
|
+
# relative-margin-right = -10
|
71
|
+
# relative-margin-bottom = 0.5
|
72
|
+
graph LR;
|
73
|
+
A[Few users]-->B[Small community];
|
74
|
+
B-->C[Few developers];
|
75
|
+
C-->D[Few useful tools];
|
76
|
+
D-->A;
|
77
|
+
|
78
|
+
* Few users: Expand useful tools?
|
79
|
+
* Small community: Increase # of users?
|
80
|
+
* Few developers: Expand community?
|
81
|
+
* Few useful tools:\n
|
82
|
+
Increase # of developers?
|
83
|
+
|
84
|
+
= Expand useful tools\nwith few developers
|
85
|
+
|
86
|
+
# mermaid
|
87
|
+
# relative_width = 90
|
88
|
+
graph LR;
|
89
|
+
subgraph all[" "]
|
90
|
+
direction TB
|
91
|
+
subgraph Negative spiral
|
92
|
+
N0[Few users]-->N1[Small community];
|
93
|
+
N1-->N2[Few developers];
|
94
|
+
N2-->N3[Few useful tools];
|
95
|
+
N3-->N0;
|
96
|
+
end
|
97
|
+
subgraph Positive spiral
|
98
|
+
P0[More users]-->P1[Larger community];
|
99
|
+
P1-->P2[More developers];
|
100
|
+
P2-->P3[More useful tools];
|
101
|
+
P3-->P0;
|
102
|
+
end
|
103
|
+
N2-.->P3;
|
104
|
+
end
|
105
|
+
style all fill-opacity:0,stroke-width:0px
|
106
|
+
|
107
|
+
== Slide properties
|
108
|
+
|
109
|
+
: enable-title-on-image
|
110
|
+
false
|
111
|
+
|
112
|
+
= But how?
|
113
|
+
|
114
|
+
Apache Arrow
|
115
|
+
|
116
|
+
= Apache Arrow
|
117
|
+
|
118
|
+
* Cross-language dev platform
|
119
|
+
* Ruby community doesn't need to dev everything
|
120
|
+
* We can share common implementations
|
121
|
+
* Apache Arrow and Ruby
|
122
|
+
* I've donated the Ruby bindings for C++ in 2017
|
123
|
+
* Ruby bindings: Red Arrow
|
124
|
+
* Many features are already bound:\n
|
125
|
+
Parquet, Dataset, Gandiva, Flight, ...
|
126
|
+
|
127
|
+
= Red Data Tools
|
128
|
+
|
129
|
+
I started a new project in 2017:
|
130
|
+
|
131
|
+
# blockquote
|
132
|
+
# title = https://red-data-tools.github.io/
|
133
|
+
Red Data Tools is a project that provides data processing tools for Ruby.
|
134
|
+
|
135
|
+
= Red Data Tools: Policy 1
|
136
|
+
|
137
|
+
# blockquote
|
138
|
+
# title = https://red-data-tools.github.io/
|
139
|
+
|
140
|
+
Collaborate across the Ruby community
|
141
|
+
|
142
|
+
We collaborate with the Ruby community and other communities. For example, we use Apache Arrow, shared with many languages, and join in development of Apache Arrow to share benefits.
|
143
|
+
|
144
|
+
= What fields I work on
|
145
|
+
|
146
|
+
* Not only Ruby related features
|
147
|
+
* To be a good Apache Arrow community member
|
148
|
+
* Community support
|
149
|
+
* Answer questions from users
|
150
|
+
* Review pull requests
|
151
|
+
|
152
|
+
= What features I work on
|
153
|
+
|
154
|
+
* Ruby related
|
155
|
+
* C++ impl., C GLib bindings, Linux packages, Homebrew, MSYS2, Release, CI, ...
|
156
|
+
* Not Ruby related
|
157
|
+
* wheel, jar, MATLAB bindings, Julia impl., ...
|
158
|
+
|
159
|
+
= What fields\nRed Data Tools members work on
|
160
|
+
|
161
|
+
* C GLib bindings
|
162
|
+
* Red Arrow
|
163
|
+
* Tensor
|
164
|
+
* Big endian
|
165
|
+
* C++ compute functions
|
166
|
+
|
167
|
+
= What skills I have\n(('note:not used for Apache Arrow yet'))
|
168
|
+
|
169
|
+
Develop MySQL/PostgreSQL plugin
|
170
|
+
|
171
|
+
* I'm a developer of Mroonga/PGroonga
|
172
|
+
* Mroonga: A MySQL plugin for full text search\n
|
173
|
+
(('note:(múlúnɡά)'))
|
174
|
+
* PGroonga: A PG plugin for full text search\n
|
175
|
+
(('note:(píːzí:lúnɡά)'))
|
176
|
+
* Use case: Impl. Flight SQL adapter?
|
177
|
+
|
178
|
+
and more...
|
179
|
+
|
180
|
+
= Apache Arrow and Ruby community
|
181
|
+
|
182
|
+
* Ruby community uses Arrow's work
|
183
|
+
* Ruby community joins in Arrow dev
|
184
|
+
|
185
|
+
= What feature is useful for Ruby?
|
186
|
+
|
187
|
+
Fast data interchange
|
188
|
+
|
189
|
+
= Fast data interchange
|
190
|
+
|
191
|
+
* It's still difficult to use Ruby\n
|
192
|
+
for full data processing
|
193
|
+
* Because Apache Arrow doesn't solve everything
|
194
|
+
* Increase usage of Ruby step by step
|
195
|
+
* Because Ruby can integrate with other languages by Apache Arrow's fast data interchange feature
|
196
|
+
|
197
|
+
= Integration examples
|
198
|
+
|
199
|
+
* DuckDB:\n
|
200
|
+
Arrow ready in-process SQL OLAP DBMS
|
201
|
+
* ((<(('note:https://github.com/red-data-tools/red-arrow-duckdb'))>))
|
202
|
+
* DataFusion:\n
|
203
|
+
Arrow native SQL query engine
|
204
|
+
* WIP: Export C API #1113\n
|
205
|
+
((<(('note:https://github.com/apache/arrow-datafusion/issues/1113'))>))
|
206
|
+
|
207
|
+
= What feature is useful for Ruby?
|
208
|
+
|
209
|
+
Web app related features\n
|
210
|
+
(('note:Because many Ruby users develop Web apps with Ruby on Rails'))
|
211
|
+
|
212
|
+
= What features are useful\nfor Web app
|
213
|
+
|
214
|
+
* Visualization related features
|
215
|
+
* For dashboard
|
216
|
+
* Fast data interchange with RDBMS
|
217
|
+
* Web app may have batch jobs to process large data in RDBMS
|
218
|
+
* See also: mrkn's talk on RubyKaigi 2019\n
|
219
|
+
(('note:(mrkn is an Apache Arrow committer from Red Data Tools)'))\n
|
220
|
+
((<(('note:https://speakerdeck.com/mrkn/reducing-activerecord-memory-consumption-using-apache-arrow'))>))
|
221
|
+
|
222
|
+
= Fast data interchange with RDBMS
|
223
|
+
|
224
|
+
* Apache Arrow Flight SQL
|
225
|
+
* Apache Arrow Database Connectivity: ADBC\n
|
226
|
+
((<(('note:https://docs.google.com/document/d/1t7NrC76SyxL_OffATmjzZs2xcj1owdUsIF2WKL_Zw1U/'))>))
|
227
|
+
|
228
|
+
= Fast data interchange with RDBMS
|
229
|
+
|
230
|
+
# mermaid
|
231
|
+
# relative_width = 90
|
232
|
+
graph LR;
|
233
|
+
subgraph all[" "]
|
234
|
+
direction TB
|
235
|
+
subgraph Apache Arrow Flight SQL
|
236
|
+
FLIGHT0[RDBMS] -->|Apache Arrow Flight| FLIGHT1[Library];
|
237
|
+
FLIGHT1 -->|No conversion| FLIGHT2[Web app];
|
238
|
+
end
|
239
|
+
subgraph Apache Arrow Database Connectivity
|
240
|
+
ADBC0[RDBMS] -->|Own protocol| ADBC1[Library];
|
241
|
+
ADBC1 -->|"Own format→Apache Arrow"| ADBC2[Web app];
|
242
|
+
end
|
243
|
+
end
|
244
|
+
style all fill:#fff,stroke-width:0px
|
245
|
+
|
246
|
+
== Slide properties
|
247
|
+
|
248
|
+
: enable-title-on-image
|
249
|
+
false
|
250
|
+
|
251
|
+
= Apache Arrow data⇄Ruby objects
|
252
|
+
|
253
|
+
* Red Arrow has fast converter
|
254
|
+
* Implemented in C++
|
255
|
+
* Faster than\n
|
256
|
+
RDBMS's own format data⇄Ruby objects
|
257
|
+
* Both of Flight SQL and ADBC will improve performance
|
258
|
+
|
259
|
+
= Wrap up
|
260
|
+
|
261
|
+
* Ruby community joins in Arrow dev
|
262
|
+
* To use Ruby for data processing
|
263
|
+
* Ruby community is interested in:
|
264
|
+
* Integration with other data processing systems
|
265
|
+
* RDBMS related improvements
|
266
|
+
|
267
|
+
= Topics I didn't talk today
|
268
|
+
|
269
|
+
* GObject Introspection (GI)
|
270
|
+
* Ruby bindings are generated at run-time not compile-time
|
271
|
+
* How does GI work for it?
|
272
|
+
* Linux packaging
|
273
|
+
* How to build deb/rpm for Debian/Ubuntu/CentOS/AlmaLinux/Amazon Linux on x86_64 and arm64?
|
274
|
+
|
275
|
+
= Acknowledgment
|
276
|
+
|
277
|
+
* Voltron Data
|
278
|
+
* Most of my Apache Arrow related work is being done with financial support from Voltron Data since 2022-04
|
279
|
+
* Yukiko Yoshimoto at ClearCode
|
280
|
+
* Add English subtitle to this video
|
metadata
ADDED
@@ -0,0 +1,77 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: rabbit-slide-kou-the-data-thread
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 2022.6.23.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Sutou Kouhei
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2022-05-26 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: rabbit
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 2.0.2
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 2.0.2
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rabbit-theme-clear-code
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
description: This talk describes why Apache Arrow is important for Ruby.
|
42
|
+
email:
|
43
|
+
- kou@clear-code.com
|
44
|
+
executables: []
|
45
|
+
extensions: []
|
46
|
+
extra_rdoc_files: []
|
47
|
+
files:
|
48
|
+
- ".rabbit"
|
49
|
+
- README.rd
|
50
|
+
- Rakefile
|
51
|
+
- config.yaml
|
52
|
+
- pdf/the-data-thread-why-apache-arrow-is-important-for-ruby.pdf
|
53
|
+
- why-apache-arrow-is-important-for-ruby.rab
|
54
|
+
homepage: https://slide.rabbit-shocker.org/authors/kou/the-data-thread/
|
55
|
+
licenses:
|
56
|
+
- CC-BY-SA-4.0
|
57
|
+
metadata: {}
|
58
|
+
post_install_message:
|
59
|
+
rdoc_options: []
|
60
|
+
require_paths:
|
61
|
+
- lib
|
62
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
63
|
+
requirements:
|
64
|
+
- - ">="
|
65
|
+
- !ruby/object:Gem::Version
|
66
|
+
version: '0'
|
67
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
68
|
+
requirements:
|
69
|
+
- - ">="
|
70
|
+
- !ruby/object:Gem::Version
|
71
|
+
version: '0'
|
72
|
+
requirements: []
|
73
|
+
rubygems_version: 3.4.0.dev
|
74
|
+
signing_key:
|
75
|
+
specification_version: 4
|
76
|
+
summary: Why Apache Arrow is important for Ruby
|
77
|
+
test_files: []
|