rabbit-slide-kou-the-data-thread 2022.6.23.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rabbit +2 -0
- data/README.rd +24 -0
- data/Rakefile +17 -0
- data/config.yaml +26 -0
- data/pdf/the-data-thread-why-apache-arrow-is-important-for-ruby.pdf +0 -0
- data/why-apache-arrow-is-important-for-ruby.rab +280 -0
- metadata +77 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 3a582887c2909c23b74c8bbcc201d7dc7ce28add0fce163e60ddb77078c3f8c8
|
4
|
+
data.tar.gz: 25d55acc603431bab0c8e80799ff66bd0c15421e166d461e29d404647e7b569f
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 64df77296cf1448074cf2e70ea50a0fe88891b75c21100692d8bd384e10345fa940711637bb2d698f093ecd9e9f32aee350b6982da68d17a4f97f48a2a703a0b
|
7
|
+
data.tar.gz: ca32124b43f01a52996457fe262a848d319f21b333fa5a343e82972f06c49eae2e6b631fec4276531d35c057c7b19b919d2738db4c549fe147bff0b61a4933b1
|
data/.rabbit
ADDED
data/README.rd
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
= Why Apache Arrow is important for Ruby
|
2
|
+
|
3
|
+
This talk describes why Apache Arrow is important for Ruby.
|
4
|
+
|
5
|
+
== For author
|
6
|
+
|
7
|
+
=== Show
|
8
|
+
|
9
|
+
rake
|
10
|
+
|
11
|
+
=== Publish
|
12
|
+
|
13
|
+
rake publish
|
14
|
+
|
15
|
+
== For viewers
|
16
|
+
|
17
|
+
=== Install
|
18
|
+
|
19
|
+
gem install rabbit-slide-kou-the-data-thread
|
20
|
+
|
21
|
+
=== Show
|
22
|
+
|
23
|
+
rabbit rabbit-slide-kou-the-data-thread.gem
|
24
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
require "rabbit/task/slide"
|
2
|
+
|
3
|
+
# Edit ./config.yaml to customize meta data
|
4
|
+
|
5
|
+
spec = nil
|
6
|
+
Rabbit::Task::Slide.new do |task|
|
7
|
+
spec = task.spec
|
8
|
+
# spec.files += Dir.glob("doc/**/*.*")
|
9
|
+
# spec.files -= Dir.glob("private/**/*.*")
|
10
|
+
spec.add_runtime_dependency("rabbit-theme-clear-code")
|
11
|
+
end
|
12
|
+
|
13
|
+
desc "Tag #{spec.version}"
|
14
|
+
task :tag do
|
15
|
+
sh("git", "tag", "-a", spec.version.to_s, "-m", "Publish #{spec.version}")
|
16
|
+
sh("git", "push", "--tags")
|
17
|
+
end
|
data/config.yaml
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
---
|
2
|
+
id: the-data-thread
|
3
|
+
base_name: why-apache-arrow-is-important-for-ruby
|
4
|
+
tags:
|
5
|
+
- rabbit
|
6
|
+
- thedatathread
|
7
|
+
- apachearrow
|
8
|
+
- ruby
|
9
|
+
presentation_date: 2022-06-23
|
10
|
+
version: 2022.6.23.0
|
11
|
+
licenses:
|
12
|
+
- CC-BY-SA-4.0
|
13
|
+
slideshare_id:
|
14
|
+
speaker_deck_id:
|
15
|
+
vimeo_id:
|
16
|
+
youtube_id:
|
17
|
+
width: 1920
|
18
|
+
height: 1080
|
19
|
+
source_code_uri:
|
20
|
+
author:
|
21
|
+
markup_language: :rd
|
22
|
+
name: Sutou Kouhei
|
23
|
+
email: kou@clear-code.com
|
24
|
+
rubygems_user: kou
|
25
|
+
slideshare_user: kou
|
26
|
+
speaker_deck_user:
|
Binary file
|
@@ -0,0 +1,280 @@
|
|
1
|
+
= Why Apache Arrow is important for Ruby
|
2
|
+
|
3
|
+
: author
|
4
|
+
Sutou Kouhei
|
5
|
+
: institution
|
6
|
+
ClearCode Inc.
|
7
|
+
: content-source
|
8
|
+
The Data Thread
|
9
|
+
: date
|
10
|
+
2022-06-23
|
11
|
+
: allotted-time
|
12
|
+
25m
|
13
|
+
: theme
|
14
|
+
clear-code
|
15
|
+
|
16
|
+
= Me
|
17
|
+
|
18
|
+
* Name: Sutou Kouhei\n
|
19
|
+
(('note:(Family Given)'))
|
20
|
+
* ID: kou (call me kou)\n
|
21
|
+
(('note:(ktou or kous when I can't use kou)'))
|
22
|
+
* Ruby committer since 2004
|
23
|
+
* This year's Apache Arrow PMC chair
|
24
|
+
|
25
|
+
# image
|
26
|
+
# src = http://www.gravatar.com/avatar/ee6ffca720cc428d70247dcd7377dd48.jpeg?s=800
|
27
|
+
# align = right
|
28
|
+
# vertical-align = top
|
29
|
+
# relative-width = 30
|
30
|
+
# relative-margin-right = -10
|
31
|
+
# relative-margin-bottom = 0.5
|
32
|
+
# caption = My profile picture is my "Shocker combatman" figure on my Happy Hacking Keyboard
|
33
|
+
# caption-font-size = 1
|
34
|
+
|
35
|
+
= Why I work on Apache Arrow
|
36
|
+
|
37
|
+
For Ruby!\n
|
38
|
+
(I love Ruby!)
|
39
|
+
|
40
|
+
= Ruby
|
41
|
+
|
42
|
+
* Widely used for Web application\n
|
43
|
+
(('note:(I rarely write Web app)'))
|
44
|
+
* Ruby on Rails is an useful Web app framework
|
45
|
+
* e.g.: GitHub, GitLab, Shopify, Discourse, ...
|
46
|
+
* Not widely used for data processing
|
47
|
+
* Even though Ruby is a general purpose programming language...
|
48
|
+
|
49
|
+
= Ruby and data processing\n(('note:Negative spiral'))
|
50
|
+
|
51
|
+
# mermaid
|
52
|
+
# relative_width = 90
|
53
|
+
graph LR;
|
54
|
+
A[Few users]-->B[Small community];
|
55
|
+
B-->C[Few developers];
|
56
|
+
C-->D[Few useful tools];
|
57
|
+
D-->A;
|
58
|
+
|
59
|
+
== Slide properties
|
60
|
+
|
61
|
+
: enable-title-on-image
|
62
|
+
false
|
63
|
+
|
64
|
+
= How to break\nthe negative spiral?
|
65
|
+
|
66
|
+
# mermaid
|
67
|
+
# relative_width = 40
|
68
|
+
# align = right
|
69
|
+
# vertical-align = top
|
70
|
+
# relative-margin-right = -10
|
71
|
+
# relative-margin-bottom = 0.5
|
72
|
+
graph LR;
|
73
|
+
A[Few users]-->B[Small community];
|
74
|
+
B-->C[Few developers];
|
75
|
+
C-->D[Few useful tools];
|
76
|
+
D-->A;
|
77
|
+
|
78
|
+
* Few users: Expand useful tools?
|
79
|
+
* Small community: Increase # of users?
|
80
|
+
* Few developers: Expand community?
|
81
|
+
* Few useful tools:\n
|
82
|
+
Increase # of developers?
|
83
|
+
|
84
|
+
= Expand useful tools\nwith few developers
|
85
|
+
|
86
|
+
# mermaid
|
87
|
+
# relative_width = 90
|
88
|
+
graph LR;
|
89
|
+
subgraph all[" "]
|
90
|
+
direction TB
|
91
|
+
subgraph Negative spiral
|
92
|
+
N0[Few users]-->N1[Small community];
|
93
|
+
N1-->N2[Few developers];
|
94
|
+
N2-->N3[Few useful tools];
|
95
|
+
N3-->N0;
|
96
|
+
end
|
97
|
+
subgraph Positive spiral
|
98
|
+
P0[More users]-->P1[Larger community];
|
99
|
+
P1-->P2[More developers];
|
100
|
+
P2-->P3[More useful tools];
|
101
|
+
P3-->P0;
|
102
|
+
end
|
103
|
+
N2-.->P3;
|
104
|
+
end
|
105
|
+
style all fill-opacity:0,stroke-width:0px
|
106
|
+
|
107
|
+
== Slide properties
|
108
|
+
|
109
|
+
: enable-title-on-image
|
110
|
+
false
|
111
|
+
|
112
|
+
= But how?
|
113
|
+
|
114
|
+
Apache Arrow
|
115
|
+
|
116
|
+
= Apache Arrow
|
117
|
+
|
118
|
+
* Cross-language dev platform
|
119
|
+
* Ruby community doesn't need to dev everything
|
120
|
+
* We can share common implementations
|
121
|
+
* Apache Arrow and Ruby
|
122
|
+
* I've donated the Ruby bindings for C++ in 2017
|
123
|
+
* Ruby bindings: Red Arrow
|
124
|
+
* Many features are already bound:\n
|
125
|
+
Parquet, Dataset, Gandiva, Flight, ...
|
126
|
+
|
127
|
+
= Red Data Tools
|
128
|
+
|
129
|
+
I started a new project in 2017:
|
130
|
+
|
131
|
+
# blockquote
|
132
|
+
# title = https://red-data-tools.github.io/
|
133
|
+
Red Data Tools is a project that provides data processing tools for Ruby.
|
134
|
+
|
135
|
+
= Red Data Tools: Policy 1
|
136
|
+
|
137
|
+
# blockquote
|
138
|
+
# title = https://red-data-tools.github.io/
|
139
|
+
|
140
|
+
Collaborate across the Ruby community
|
141
|
+
|
142
|
+
We collaborate with the Ruby community and other communities. For example, we use Apache Arrow, shared with many languages, and join in development of Apache Arrow to share benefits.
|
143
|
+
|
144
|
+
= What fields I work on
|
145
|
+
|
146
|
+
* Not only Ruby related features
|
147
|
+
* To be a good Apache Arrow community member
|
148
|
+
* Community support
|
149
|
+
* Answer questions from users
|
150
|
+
* Review pull requests
|
151
|
+
|
152
|
+
= What features I work on
|
153
|
+
|
154
|
+
* Ruby related
|
155
|
+
* C++ impl., C GLib bindings, Linux packages, Homebrew, MSYS2, Release, CI, ...
|
156
|
+
* Not Ruby related
|
157
|
+
* wheel, jar, MATLAB bindings, Julia impl., ...
|
158
|
+
|
159
|
+
= What fields\nRed Data Tools members work on
|
160
|
+
|
161
|
+
* C GLib bindings
|
162
|
+
* Red Arrow
|
163
|
+
* Tensor
|
164
|
+
* Big endian
|
165
|
+
* C++ compute functions
|
166
|
+
|
167
|
+
= What skills I have\n(('note:not used for Apache Arrow yet'))
|
168
|
+
|
169
|
+
Develop MySQL/PostgreSQL plugin
|
170
|
+
|
171
|
+
* I'm a developer of Mroonga/PGroonga
|
172
|
+
* Mroonga: A MySQL plugin for full text search\n
|
173
|
+
(('note:(múlúnɡά)'))
|
174
|
+
* PGroonga: A PG plugin for full text search\n
|
175
|
+
(('note:(píːzí:lúnɡά)'))
|
176
|
+
* Use case: Impl. Flight SQL adapter?
|
177
|
+
|
178
|
+
and more...
|
179
|
+
|
180
|
+
= Apache Arrow and Ruby community
|
181
|
+
|
182
|
+
* Ruby community uses Arrow's work
|
183
|
+
* Ruby community joins in Arrow dev
|
184
|
+
|
185
|
+
= What feature is useful for Ruby?
|
186
|
+
|
187
|
+
Fast data interchange
|
188
|
+
|
189
|
+
= Fast data interchange
|
190
|
+
|
191
|
+
* It's still difficult to use Ruby\n
|
192
|
+
for full data processing
|
193
|
+
* Because Apache Arrow doesn't solve everything
|
194
|
+
* Increase usage of Ruby step by step
|
195
|
+
* Because Ruby can integrate with other languages by Apache Arrow's fast data interchange feature
|
196
|
+
|
197
|
+
= Integration examples
|
198
|
+
|
199
|
+
* DuckDB:\n
|
200
|
+
Arrow ready in-process SQL OLAP DBMS
|
201
|
+
* ((<(('note:https://github.com/red-data-tools/red-arrow-duckdb'))>))
|
202
|
+
* DataFusion:\n
|
203
|
+
Arrow native SQL query engine
|
204
|
+
* WIP: Export C API #1113\n
|
205
|
+
((<(('note:https://github.com/apache/arrow-datafusion/issues/1113'))>))
|
206
|
+
|
207
|
+
= What feature is useful for Ruby?
|
208
|
+
|
209
|
+
Web app related features\n
|
210
|
+
(('note:Because many Ruby users develop Web apps with Ruby on Rails'))
|
211
|
+
|
212
|
+
= What features are useful\nfor Web app
|
213
|
+
|
214
|
+
* Visualization related features
|
215
|
+
* For dashboard
|
216
|
+
* Fast data interchange with RDBMS
|
217
|
+
* Web app may have batch jobs to process large data in RDBMS
|
218
|
+
* See also: mrkn's talk on RubyKaigi 2019\n
|
219
|
+
(('note:(mrkn is an Apache Arrow committer from Red Data Tools)'))\n
|
220
|
+
((<(('note:https://speakerdeck.com/mrkn/reducing-activerecord-memory-consumption-using-apache-arrow'))>))
|
221
|
+
|
222
|
+
= Fast data interchange with RDBMS
|
223
|
+
|
224
|
+
* Apache Arrow Flight SQL
|
225
|
+
* Apache Arrow Database Connectivity: ADBC\n
|
226
|
+
((<(('note:https://docs.google.com/document/d/1t7NrC76SyxL_OffATmjzZs2xcj1owdUsIF2WKL_Zw1U/'))>))
|
227
|
+
|
228
|
+
= Fast data interchange with RDBMS
|
229
|
+
|
230
|
+
# mermaid
|
231
|
+
# relative_width = 90
|
232
|
+
graph LR;
|
233
|
+
subgraph all[" "]
|
234
|
+
direction TB
|
235
|
+
subgraph Apache Arrow Flight SQL
|
236
|
+
FLIGHT0[RDBMS] -->|Apache Arrow Flight| FLIGHT1[Library];
|
237
|
+
FLIGHT1 -->|No conversion| FLIGHT2[Web app];
|
238
|
+
end
|
239
|
+
subgraph Apache Arrow Database Connectivity
|
240
|
+
ADBC0[RDBMS] -->|Own protocol| ADBC1[Library];
|
241
|
+
ADBC1 -->|"Own format→Apache Arrow"| ADBC2[Web app];
|
242
|
+
end
|
243
|
+
end
|
244
|
+
style all fill:#fff,stroke-width:0px
|
245
|
+
|
246
|
+
== Slide properties
|
247
|
+
|
248
|
+
: enable-title-on-image
|
249
|
+
false
|
250
|
+
|
251
|
+
= Apache Arrow data⇄Ruby objects
|
252
|
+
|
253
|
+
* Red Arrow has fast converter
|
254
|
+
* Implemented in C++
|
255
|
+
* Faster than\n
|
256
|
+
RDBMS's own format data⇄Ruby objects
|
257
|
+
* Both of Flight SQL and ADBC will improve performance
|
258
|
+
|
259
|
+
= Wrap up
|
260
|
+
|
261
|
+
* Ruby community joins in Arrow dev
|
262
|
+
* To use Ruby for data processing
|
263
|
+
* Ruby community is interested in:
|
264
|
+
* Integration with other data processing systems
|
265
|
+
* RDBMS related improvements
|
266
|
+
|
267
|
+
= Topics I didn't talk today
|
268
|
+
|
269
|
+
* GObject Introspection (GI)
|
270
|
+
* Ruby bindings are generated at run-time not compile-time
|
271
|
+
* How does GI work for it?
|
272
|
+
* Linux packaging
|
273
|
+
* How to build deb/rpm for Debian/Ubuntu/CentOS/AlmaLinux/Amazon Linux on x86_64 and arm64?
|
274
|
+
|
275
|
+
= Acknowledgment
|
276
|
+
|
277
|
+
* Voltron Data
|
278
|
+
* Most of my Apache Arrow related work is being done with financial support from Voltron Data since 2022-04
|
279
|
+
* Yukiko Yoshimoto at ClearCode
|
280
|
+
* Add English subtitle to this video
|
metadata
ADDED
@@ -0,0 +1,77 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: rabbit-slide-kou-the-data-thread
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 2022.6.23.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Sutou Kouhei
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2022-05-26 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: rabbit
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 2.0.2
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 2.0.2
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rabbit-theme-clear-code
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
description: This talk describes why Apache Arrow is important for Ruby.
|
42
|
+
email:
|
43
|
+
- kou@clear-code.com
|
44
|
+
executables: []
|
45
|
+
extensions: []
|
46
|
+
extra_rdoc_files: []
|
47
|
+
files:
|
48
|
+
- ".rabbit"
|
49
|
+
- README.rd
|
50
|
+
- Rakefile
|
51
|
+
- config.yaml
|
52
|
+
- pdf/the-data-thread-why-apache-arrow-is-important-for-ruby.pdf
|
53
|
+
- why-apache-arrow-is-important-for-ruby.rab
|
54
|
+
homepage: https://slide.rabbit-shocker.org/authors/kou/the-data-thread/
|
55
|
+
licenses:
|
56
|
+
- CC-BY-SA-4.0
|
57
|
+
metadata: {}
|
58
|
+
post_install_message:
|
59
|
+
rdoc_options: []
|
60
|
+
require_paths:
|
61
|
+
- lib
|
62
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
63
|
+
requirements:
|
64
|
+
- - ">="
|
65
|
+
- !ruby/object:Gem::Version
|
66
|
+
version: '0'
|
67
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
68
|
+
requirements:
|
69
|
+
- - ">="
|
70
|
+
- !ruby/object:Gem::Version
|
71
|
+
version: '0'
|
72
|
+
requirements: []
|
73
|
+
rubygems_version: 3.4.0.dev
|
74
|
+
signing_key:
|
75
|
+
specification_version: 4
|
76
|
+
summary: Why Apache Arrow is important for Ruby
|
77
|
+
test_files: []
|