rabbit-slide-kou-the-data-thread 2022.6.23.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 3a582887c2909c23b74c8bbcc201d7dc7ce28add0fce163e60ddb77078c3f8c8
4
+ data.tar.gz: 25d55acc603431bab0c8e80799ff66bd0c15421e166d461e29d404647e7b569f
5
+ SHA512:
6
+ metadata.gz: 64df77296cf1448074cf2e70ea50a0fe88891b75c21100692d8bd384e10345fa940711637bb2d698f093ecd9e9f32aee350b6982da68d17a4f97f48a2a703a0b
7
+ data.tar.gz: ca32124b43f01a52996457fe262a848d319f21b333fa5a343e82972f06c49eae2e6b631fec4276531d35c057c7b19b919d2738db4c549fe147bff0b61a4933b1
data/.rabbit ADDED
@@ -0,0 +1,2 @@
1
+ --size 1920,1080
2
+ why-apache-arrow-is-important-for-ruby.rab
data/README.rd ADDED
@@ -0,0 +1,24 @@
1
+ = Why Apache Arrow is important for Ruby
2
+
3
+ This talk describes why Apache Arrow is important for Ruby.
4
+
5
+ == For author
6
+
7
+ === Show
8
+
9
+ rake
10
+
11
+ === Publish
12
+
13
+ rake publish
14
+
15
+ == For viewers
16
+
17
+ === Install
18
+
19
+ gem install rabbit-slide-kou-the-data-thread
20
+
21
+ === Show
22
+
23
+ rabbit rabbit-slide-kou-the-data-thread.gem
24
+
data/Rakefile ADDED
@@ -0,0 +1,17 @@
1
+ require "rabbit/task/slide"
2
+
3
+ # Edit ./config.yaml to customize meta data
4
+
5
+ spec = nil
6
+ Rabbit::Task::Slide.new do |task|
7
+ spec = task.spec
8
+ # spec.files += Dir.glob("doc/**/*.*")
9
+ # spec.files -= Dir.glob("private/**/*.*")
10
+ spec.add_runtime_dependency("rabbit-theme-clear-code")
11
+ end
12
+
13
+ desc "Tag #{spec.version}"
14
+ task :tag do
15
+ sh("git", "tag", "-a", spec.version.to_s, "-m", "Publish #{spec.version}")
16
+ sh("git", "push", "--tags")
17
+ end
data/config.yaml ADDED
@@ -0,0 +1,26 @@
1
+ ---
2
+ id: the-data-thread
3
+ base_name: why-apache-arrow-is-important-for-ruby
4
+ tags:
5
+ - rabbit
6
+ - thedatathread
7
+ - apachearrow
8
+ - ruby
9
+ presentation_date: 2022-06-23
10
+ version: 2022.6.23.0
11
+ licenses:
12
+ - CC-BY-SA-4.0
13
+ slideshare_id:
14
+ speaker_deck_id:
15
+ vimeo_id:
16
+ youtube_id:
17
+ width: 1920
18
+ height: 1080
19
+ source_code_uri:
20
+ author:
21
+ markup_language: :rd
22
+ name: Sutou Kouhei
23
+ email: kou@clear-code.com
24
+ rubygems_user: kou
25
+ slideshare_user: kou
26
+ speaker_deck_user:
@@ -0,0 +1,280 @@
1
+ = Why Apache Arrow is important for Ruby
2
+
3
+ : author
4
+ Sutou Kouhei
5
+ : institution
6
+ ClearCode Inc.
7
+ : content-source
8
+ The Data Thread
9
+ : date
10
+ 2022-06-23
11
+ : allotted-time
12
+ 25m
13
+ : theme
14
+ clear-code
15
+
16
+ = Me
17
+
18
+ * Name: Sutou Kouhei\n
19
+ (('note:(Family Given)'))
20
+ * ID: kou (call me kou)\n
21
+ (('note:(ktou or kous when I can't use kou)'))
22
+ * Ruby committer since 2004
23
+ * This year's Apache Arrow PMC chair
24
+
25
+ # image
26
+ # src = http://www.gravatar.com/avatar/ee6ffca720cc428d70247dcd7377dd48.jpeg?s=800
27
+ # align = right
28
+ # vertical-align = top
29
+ # relative-width = 30
30
+ # relative-margin-right = -10
31
+ # relative-margin-bottom = 0.5
32
+ # caption = My profile picture is my "Shocker combatman" figure on my Happy Hacking Keyboard
33
+ # caption-font-size = 1
34
+
35
+ = Why I work on Apache Arrow
36
+
37
+ For Ruby!\n
38
+ (I love Ruby!)
39
+
40
+ = Ruby
41
+
42
+ * Widely used for Web application\n
43
+ (('note:(I rarely write Web app)'))
44
+ * Ruby on Rails is an useful Web app framework
45
+ * e.g.: GitHub, GitLab, Shopify, Discourse, ...
46
+ * Not widely used for data processing
47
+ * Even though Ruby is a general purpose programming language...
48
+
49
+ = Ruby and data processing\n(('note:Negative spiral'))
50
+
51
+ # mermaid
52
+ # relative_width = 90
53
+ graph LR;
54
+ A[Few users]-->B[Small community];
55
+ B-->C[Few developers];
56
+ C-->D[Few useful tools];
57
+ D-->A;
58
+
59
+ == Slide properties
60
+
61
+ : enable-title-on-image
62
+ false
63
+
64
+ = How to break\nthe negative spiral?
65
+
66
+ # mermaid
67
+ # relative_width = 40
68
+ # align = right
69
+ # vertical-align = top
70
+ # relative-margin-right = -10
71
+ # relative-margin-bottom = 0.5
72
+ graph LR;
73
+ A[Few users]-->B[Small community];
74
+ B-->C[Few developers];
75
+ C-->D[Few useful tools];
76
+ D-->A;
77
+
78
+ * Few users: Expand useful tools?
79
+ * Small community: Increase # of users?
80
+ * Few developers: Expand community?
81
+ * Few useful tools:\n
82
+ Increase # of developers?
83
+
84
+ = Expand useful tools\nwith few developers
85
+
86
+ # mermaid
87
+ # relative_width = 90
88
+ graph LR;
89
+ subgraph all[" "]
90
+ direction TB
91
+ subgraph Negative spiral
92
+ N0[Few users]-->N1[Small community];
93
+ N1-->N2[Few developers];
94
+ N2-->N3[Few useful tools];
95
+ N3-->N0;
96
+ end
97
+ subgraph Positive spiral
98
+ P0[More users]-->P1[Larger community];
99
+ P1-->P2[More developers];
100
+ P2-->P3[More useful tools];
101
+ P3-->P0;
102
+ end
103
+ N2-.->P3;
104
+ end
105
+ style all fill-opacity:0,stroke-width:0px
106
+
107
+ == Slide properties
108
+
109
+ : enable-title-on-image
110
+ false
111
+
112
+ = But how?
113
+
114
+ Apache Arrow
115
+
116
+ = Apache Arrow
117
+
118
+ * Cross-language dev platform
119
+ * Ruby community doesn't need to dev everything
120
+ * We can share common implementations
121
+ * Apache Arrow and Ruby
122
+ * I've donated the Ruby bindings for C++ in 2017
123
+ * Ruby bindings: Red Arrow
124
+ * Many features are already bound:\n
125
+ Parquet, Dataset, Gandiva, Flight, ...
126
+
127
+ = Red Data Tools
128
+
129
+ I started a new project in 2017:
130
+
131
+ # blockquote
132
+ # title = https://red-data-tools.github.io/
133
+ Red Data Tools is a project that provides data processing tools for Ruby.
134
+
135
+ = Red Data Tools: Policy 1
136
+
137
+ # blockquote
138
+ # title = https://red-data-tools.github.io/
139
+
140
+ Collaborate across the Ruby community
141
+
142
+ We collaborate with the Ruby community and other communities. For example, we use Apache Arrow, shared with many languages, and join in development of Apache Arrow to share benefits.
143
+
144
+ = What fields I work on
145
+
146
+ * Not only Ruby related features
147
+ * To be a good Apache Arrow community member
148
+ * Community support
149
+ * Answer questions from users
150
+ * Review pull requests
151
+
152
+ = What features I work on
153
+
154
+ * Ruby related
155
+ * C++ impl., C GLib bindings, Linux packages, Homebrew, MSYS2, Release, CI, ...
156
+ * Not Ruby related
157
+ * wheel, jar, MATLAB bindings, Julia impl., ...
158
+
159
+ = What fields\nRed Data Tools members work on
160
+
161
+ * C GLib bindings
162
+ * Red Arrow
163
+ * Tensor
164
+ * Big endian
165
+ * C++ compute functions
166
+
167
+ = What skills I have\n(('note:not used for Apache Arrow yet'))
168
+
169
+ Develop MySQL/PostgreSQL plugin
170
+
171
+ * I'm a developer of Mroonga/PGroonga
172
+ * Mroonga: A MySQL plugin for full text search\n
173
+ (('note:(múlúnɡά)'))
174
+ * PGroonga: A PG plugin for full text search\n
175
+ (('note:(píːzí:lúnɡά)'))
176
+ * Use case: Impl. Flight SQL adapter?
177
+
178
+ and more...
179
+
180
+ = Apache Arrow and Ruby community
181
+
182
+ * Ruby community uses Arrow's work
183
+ * Ruby community joins in Arrow dev
184
+
185
+ = What feature is useful for Ruby?
186
+
187
+ Fast data interchange
188
+
189
+ = Fast data interchange
190
+
191
+ * It's still difficult to use Ruby\n
192
+ for full data processing
193
+ * Because Apache Arrow doesn't solve everything
194
+ * Increase usage of Ruby step by step
195
+ * Because Ruby can integrate with other languages by Apache Arrow's fast data interchange feature
196
+
197
+ = Integration examples
198
+
199
+ * DuckDB:\n
200
+ Arrow ready in-process SQL OLAP DBMS
201
+ * ((<(('note:https://github.com/red-data-tools/red-arrow-duckdb'))>))
202
+ * DataFusion:\n
203
+ Arrow native SQL query engine
204
+ * WIP: Export C API #1113\n
205
+ ((<(('note:https://github.com/apache/arrow-datafusion/issues/1113'))>))
206
+
207
+ = What feature is useful for Ruby?
208
+
209
+ Web app related features\n
210
+ (('note:Because many Ruby users develop Web apps with Ruby on Rails'))
211
+
212
+ = What features are useful\nfor Web app
213
+
214
+ * Visualization related features
215
+ * For dashboard
216
+ * Fast data interchange with RDBMS
217
+ * Web app may have batch jobs to process large data in RDBMS
218
+ * See also: mrkn's talk on RubyKaigi 2019\n
219
+ (('note:(mrkn is an Apache Arrow committer from Red Data Tools)'))\n
220
+ ((<(('note:https://speakerdeck.com/mrkn/reducing-activerecord-memory-consumption-using-apache-arrow'))>))
221
+
222
+ = Fast data interchange with RDBMS
223
+
224
+ * Apache Arrow Flight SQL
225
+ * Apache Arrow Database Connectivity: ADBC\n
226
+ ((<(('note:https://docs.google.com/document/d/1t7NrC76SyxL_OffATmjzZs2xcj1owdUsIF2WKL_Zw1U/'))>))
227
+
228
+ = Fast data interchange with RDBMS
229
+
230
+ # mermaid
231
+ # relative_width = 90
232
+ graph LR;
233
+ subgraph all[" "]
234
+ direction TB
235
+ subgraph Apache Arrow Flight SQL
236
+ FLIGHT0[RDBMS] -->|Apache Arrow Flight| FLIGHT1[Library];
237
+ FLIGHT1 -->|No conversion| FLIGHT2[Web app];
238
+ end
239
+ subgraph Apache Arrow Database Connectivity
240
+ ADBC0[RDBMS] -->|Own protocol| ADBC1[Library];
241
+ ADBC1 -->|"Own format→Apache Arrow"| ADBC2[Web app];
242
+ end
243
+ end
244
+ style all fill:#fff,stroke-width:0px
245
+
246
+ == Slide properties
247
+
248
+ : enable-title-on-image
249
+ false
250
+
251
+ = Apache Arrow data⇄Ruby objects
252
+
253
+ * Red Arrow has fast converter
254
+ * Implemented in C++
255
+ * Faster than\n
256
+ RDBMS's own format data⇄Ruby objects
257
+ * Both of Flight SQL and ADBC will improve performance
258
+
259
+ = Wrap up
260
+
261
+ * Ruby community joins in Arrow dev
262
+ * To use Ruby for data processing
263
+ * Ruby community is interested in:
264
+ * Integration with other data processing systems
265
+ * RDBMS related improvements
266
+
267
+ = Topics I didn't talk today
268
+
269
+ * GObject Introspection (GI)
270
+ * Ruby bindings are generated at run-time not compile-time
271
+ * How does GI work for it?
272
+ * Linux packaging
273
+ * How to build deb/rpm for Debian/Ubuntu/CentOS/AlmaLinux/Amazon Linux on x86_64 and arm64?
274
+
275
+ = Acknowledgment
276
+
277
+ * Voltron Data
278
+ * Most of my Apache Arrow related work is being done with financial support from Voltron Data since 2022-04
279
+ * Yukiko Yoshimoto at ClearCode
280
+ * Add English subtitle to this video
metadata ADDED
@@ -0,0 +1,77 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rabbit-slide-kou-the-data-thread
3
+ version: !ruby/object:Gem::Version
4
+ version: 2022.6.23.0
5
+ platform: ruby
6
+ authors:
7
+ - Sutou Kouhei
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2022-05-26 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rabbit
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: 2.0.2
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: 2.0.2
27
+ - !ruby/object:Gem::Dependency
28
+ name: rabbit-theme-clear-code
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ description: This talk describes why Apache Arrow is important for Ruby.
42
+ email:
43
+ - kou@clear-code.com
44
+ executables: []
45
+ extensions: []
46
+ extra_rdoc_files: []
47
+ files:
48
+ - ".rabbit"
49
+ - README.rd
50
+ - Rakefile
51
+ - config.yaml
52
+ - pdf/the-data-thread-why-apache-arrow-is-important-for-ruby.pdf
53
+ - why-apache-arrow-is-important-for-ruby.rab
54
+ homepage: https://slide.rabbit-shocker.org/authors/kou/the-data-thread/
55
+ licenses:
56
+ - CC-BY-SA-4.0
57
+ metadata: {}
58
+ post_install_message:
59
+ rdoc_options: []
60
+ require_paths:
61
+ - lib
62
+ required_ruby_version: !ruby/object:Gem::Requirement
63
+ requirements:
64
+ - - ">="
65
+ - !ruby/object:Gem::Version
66
+ version: '0'
67
+ required_rubygems_version: !ruby/object:Gem::Requirement
68
+ requirements:
69
+ - - ">="
70
+ - !ruby/object:Gem::Version
71
+ version: '0'
72
+ requirements: []
73
+ rubygems_version: 3.4.0.dev
74
+ signing_key:
75
+ specification_version: 4
76
+ summary: Why Apache Arrow is important for Ruby
77
+ test_files: []