rabbit-slide-kou-the-data-thread 2022.6.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 3a582887c2909c23b74c8bbcc201d7dc7ce28add0fce163e60ddb77078c3f8c8
4
+ data.tar.gz: 25d55acc603431bab0c8e80799ff66bd0c15421e166d461e29d404647e7b569f
5
+ SHA512:
6
+ metadata.gz: 64df77296cf1448074cf2e70ea50a0fe88891b75c21100692d8bd384e10345fa940711637bb2d698f093ecd9e9f32aee350b6982da68d17a4f97f48a2a703a0b
7
+ data.tar.gz: ca32124b43f01a52996457fe262a848d319f21b333fa5a343e82972f06c49eae2e6b631fec4276531d35c057c7b19b919d2738db4c549fe147bff0b61a4933b1
data/.rabbit ADDED
@@ -0,0 +1,2 @@
1
+ --size 1920,1080
2
+ why-apache-arrow-is-important-for-ruby.rab
data/README.rd ADDED
@@ -0,0 +1,24 @@
1
+ = Why Apache Arrow is important for Ruby
2
+
3
+ This talk describes why Apache Arrow is important for Ruby.
4
+
5
+ == For author
6
+
7
+ === Show
8
+
9
+ rake
10
+
11
+ === Publish
12
+
13
+ rake publish
14
+
15
+ == For viewers
16
+
17
+ === Install
18
+
19
+ gem install rabbit-slide-kou-the-data-thread
20
+
21
+ === Show
22
+
23
+ rabbit rabbit-slide-kou-the-data-thread.gem
24
+
data/Rakefile ADDED
@@ -0,0 +1,17 @@
1
+ require "rabbit/task/slide"
2
+
3
+ # Edit ./config.yaml to customize meta data
4
+
5
+ spec = nil
6
+ Rabbit::Task::Slide.new do |task|
7
+ spec = task.spec
8
+ # spec.files += Dir.glob("doc/**/*.*")
9
+ # spec.files -= Dir.glob("private/**/*.*")
10
+ spec.add_runtime_dependency("rabbit-theme-clear-code")
11
+ end
12
+
13
+ desc "Tag #{spec.version}"
14
+ task :tag do
15
+ sh("git", "tag", "-a", spec.version.to_s, "-m", "Publish #{spec.version}")
16
+ sh("git", "push", "--tags")
17
+ end
data/config.yaml ADDED
@@ -0,0 +1,26 @@
1
+ ---
2
+ id: the-data-thread
3
+ base_name: why-apache-arrow-is-important-for-ruby
4
+ tags:
5
+ - rabbit
6
+ - thedatathread
7
+ - apachearrow
8
+ - ruby
9
+ presentation_date: 2022-06-23
10
+ version: 2022.6.23.0
11
+ licenses:
12
+ - CC-BY-SA-4.0
13
+ slideshare_id:
14
+ speaker_deck_id:
15
+ vimeo_id:
16
+ youtube_id:
17
+ width: 1920
18
+ height: 1080
19
+ source_code_uri:
20
+ author:
21
+ markup_language: :rd
22
+ name: Sutou Kouhei
23
+ email: kou@clear-code.com
24
+ rubygems_user: kou
25
+ slideshare_user: kou
26
+ speaker_deck_user:
@@ -0,0 +1,280 @@
1
+ = Why Apache Arrow is important for Ruby
2
+
3
+ : author
4
+ Sutou Kouhei
5
+ : institution
6
+ ClearCode Inc.
7
+ : content-source
8
+ The Data Thread
9
+ : date
10
+ 2022-06-23
11
+ : allotted-time
12
+ 25m
13
+ : theme
14
+ clear-code
15
+
16
+ = Me
17
+
18
+ * Name: Sutou Kouhei\n
19
+ (('note:(Family Given)'))
20
+ * ID: kou (call me kou)\n
21
+ (('note:(ktou or kous when I can't use kou)'))
22
+ * Ruby committer since 2004
23
+ * This year's Apache Arrow PMC chair
24
+
25
+ # image
26
+ # src = http://www.gravatar.com/avatar/ee6ffca720cc428d70247dcd7377dd48.jpeg?s=800
27
+ # align = right
28
+ # vertical-align = top
29
+ # relative-width = 30
30
+ # relative-margin-right = -10
31
+ # relative-margin-bottom = 0.5
32
+ # caption = My profile picture is my "Shocker combatman" figure on my Happy Hacking Keyboard
33
+ # caption-font-size = 1
34
+
35
+ = Why I work on Apache Arrow
36
+
37
+ For Ruby!\n
38
+ (I love Ruby!)
39
+
40
+ = Ruby
41
+
42
+ * Widely used for Web application\n
43
+ (('note:(I rarely write Web app)'))
44
+ * Ruby on Rails is an useful Web app framework
45
+ * e.g.: GitHub, GitLab, Shopify, Discourse, ...
46
+ * Not widely used for data processing
47
+ * Even though Ruby is a general purpose programming language...
48
+
49
+ = Ruby and data processing\n(('note:Negative spiral'))
50
+
51
+ # mermaid
52
+ # relative_width = 90
53
+ graph LR;
54
+ A[Few users]-->B[Small community];
55
+ B-->C[Few developers];
56
+ C-->D[Few useful tools];
57
+ D-->A;
58
+
59
+ == Slide properties
60
+
61
+ : enable-title-on-image
62
+ false
63
+
64
+ = How to break\nthe negative spiral?
65
+
66
+ # mermaid
67
+ # relative_width = 40
68
+ # align = right
69
+ # vertical-align = top
70
+ # relative-margin-right = -10
71
+ # relative-margin-bottom = 0.5
72
+ graph LR;
73
+ A[Few users]-->B[Small community];
74
+ B-->C[Few developers];
75
+ C-->D[Few useful tools];
76
+ D-->A;
77
+
78
+ * Few users: Expand useful tools?
79
+ * Small community: Increase # of users?
80
+ * Few developers: Expand community?
81
+ * Few useful tools:\n
82
+ Increase # of developers?
83
+
84
+ = Expand useful tools\nwith few developers
85
+
86
+ # mermaid
87
+ # relative_width = 90
88
+ graph LR;
89
+ subgraph all[" "]
90
+ direction TB
91
+ subgraph Negative spiral
92
+ N0[Few users]-->N1[Small community];
93
+ N1-->N2[Few developers];
94
+ N2-->N3[Few useful tools];
95
+ N3-->N0;
96
+ end
97
+ subgraph Positive spiral
98
+ P0[More users]-->P1[Larger community];
99
+ P1-->P2[More developers];
100
+ P2-->P3[More useful tools];
101
+ P3-->P0;
102
+ end
103
+ N2-.->P3;
104
+ end
105
+ style all fill-opacity:0,stroke-width:0px
106
+
107
+ == Slide properties
108
+
109
+ : enable-title-on-image
110
+ false
111
+
112
+ = But how?
113
+
114
+ Apache Arrow
115
+
116
+ = Apache Arrow
117
+
118
+ * Cross-language dev platform
119
+ * Ruby community doesn't need to dev everything
120
+ * We can share common implementations
121
+ * Apache Arrow and Ruby
122
+ * I've donated the Ruby bindings for C++ in 2017
123
+ * Ruby bindings: Red Arrow
124
+ * Many features are already bound:\n
125
+ Parquet, Dataset, Gandiva, Flight, ...
126
+
127
+ = Red Data Tools
128
+
129
+ I started a new project in 2017:
130
+
131
+ # blockquote
132
+ # title = https://red-data-tools.github.io/
133
+ Red Data Tools is a project that provides data processing tools for Ruby.
134
+
135
+ = Red Data Tools: Policy 1
136
+
137
+ # blockquote
138
+ # title = https://red-data-tools.github.io/
139
+
140
+ Collaborate across the Ruby community
141
+
142
+ We collaborate with the Ruby community and other communities. For example, we use Apache Arrow, shared with many languages, and join in development of Apache Arrow to share benefits.
143
+
144
+ = What fields I work on
145
+
146
+ * Not only Ruby related features
147
+ * To be a good Apache Arrow community member
148
+ * Community support
149
+ * Answer questions from users
150
+ * Review pull requests
151
+
152
+ = What features I work on
153
+
154
+ * Ruby related
155
+ * C++ impl., C GLib bindings, Linux packages, Homebrew, MSYS2, Release, CI, ...
156
+ * Not Ruby related
157
+ * wheel, jar, MATLAB bindings, Julia impl., ...
158
+
159
+ = What fields\nRed Data Tools members work on
160
+
161
+ * C GLib bindings
162
+ * Red Arrow
163
+ * Tensor
164
+ * Big endian
165
+ * C++ compute functions
166
+
167
+ = What skills I have\n(('note:not used for Apache Arrow yet'))
168
+
169
+ Develop MySQL/PostgreSQL plugin
170
+
171
+ * I'm a developer of Mroonga/PGroonga
172
+ * Mroonga: A MySQL plugin for full text search\n
173
+ (('note:(múlúnɡά)'))
174
+ * PGroonga: A PG plugin for full text search\n
175
+ (('note:(píːzí:lúnɡά)'))
176
+ * Use case: Impl. Flight SQL adapter?
177
+
178
+ and more...
179
+
180
+ = Apache Arrow and Ruby community
181
+
182
+ * Ruby community uses Arrow's work
183
+ * Ruby community joins in Arrow dev
184
+
185
+ = What feature is useful for Ruby?
186
+
187
+ Fast data interchange
188
+
189
+ = Fast data interchange
190
+
191
+ * It's still difficult to use Ruby\n
192
+ for full data processing
193
+ * Because Apache Arrow doesn't solve everything
194
+ * Increase usage of Ruby step by step
195
+ * Because Ruby can integrate with other languages by Apache Arrow's fast data interchange feature
196
+
197
+ = Integration examples
198
+
199
+ * DuckDB:\n
200
+ Arrow ready in-process SQL OLAP DBMS
201
+ * ((<(('note:https://github.com/red-data-tools/red-arrow-duckdb'))>))
202
+ * DataFusion:\n
203
+ Arrow native SQL query engine
204
+ * WIP: Export C API #1113\n
205
+ ((<(('note:https://github.com/apache/arrow-datafusion/issues/1113'))>))
206
+
207
+ = What feature is useful for Ruby?
208
+
209
+ Web app related features\n
210
+ (('note:Because many Ruby users develop Web apps with Ruby on Rails'))
211
+
212
+ = What features are useful\nfor Web app
213
+
214
+ * Visualization related features
215
+ * For dashboard
216
+ * Fast data interchange with RDBMS
217
+ * Web app may have batch jobs to process large data in RDBMS
218
+ * See also: mrkn's talk on RubyKaigi 2019\n
219
+ (('note:(mrkn is an Apache Arrow committer from Red Data Tools)'))\n
220
+ ((<(('note:https://speakerdeck.com/mrkn/reducing-activerecord-memory-consumption-using-apache-arrow'))>))
221
+
222
+ = Fast data interchange with RDBMS
223
+
224
+ * Apache Arrow Flight SQL
225
+ * Apache Arrow Database Connectivity: ADBC\n
226
+ ((<(('note:https://docs.google.com/document/d/1t7NrC76SyxL_OffATmjzZs2xcj1owdUsIF2WKL_Zw1U/'))>))
227
+
228
+ = Fast data interchange with RDBMS
229
+
230
+ # mermaid
231
+ # relative_width = 90
232
+ graph LR;
233
+ subgraph all[" "]
234
+ direction TB
235
+ subgraph Apache Arrow Flight SQL
236
+ FLIGHT0[RDBMS] -->|Apache Arrow Flight| FLIGHT1[Library];
237
+ FLIGHT1 -->|No conversion| FLIGHT2[Web app];
238
+ end
239
+ subgraph Apache Arrow Database Connectivity
240
+ ADBC0[RDBMS] -->|Own protocol| ADBC1[Library];
241
+ ADBC1 -->|"Own format→Apache Arrow"| ADBC2[Web app];
242
+ end
243
+ end
244
+ style all fill:#fff,stroke-width:0px
245
+
246
+ == Slide properties
247
+
248
+ : enable-title-on-image
249
+ false
250
+
251
+ = Apache Arrow data⇄Ruby objects
252
+
253
+ * Red Arrow has fast converter
254
+ * Implemented in C++
255
+ * Faster than\n
256
+ RDBMS's own format data⇄Ruby objects
257
+ * Both of Flight SQL and ADBC will improve performance
258
+
259
+ = Wrap up
260
+
261
+ * Ruby community joins in Arrow dev
262
+ * To use Ruby for data processing
263
+ * Ruby community is interested in:
264
+ * Integration with other data processing systems
265
+ * RDBMS related improvements
266
+
267
+ = Topics I didn't talk today
268
+
269
+ * GObject Introspection (GI)
270
+ * Ruby bindings are generated at run-time not compile-time
271
+ * How does GI work for it?
272
+ * Linux packaging
273
+ * How to build deb/rpm for Debian/Ubuntu/CentOS/AlmaLinux/Amazon Linux on x86_64 and arm64?
274
+
275
+ = Acknowledgment
276
+
277
+ * Voltron Data
278
+ * Most of my Apache Arrow related work is being done with financial support from Voltron Data since 2022-04
279
+ * Yukiko Yoshimoto at ClearCode
280
+ * Add English subtitle to this video
metadata ADDED
@@ -0,0 +1,77 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rabbit-slide-kou-the-data-thread
3
+ version: !ruby/object:Gem::Version
4
+ version: 2022.6.23.0
5
+ platform: ruby
6
+ authors:
7
+ - Sutou Kouhei
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2022-05-26 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rabbit
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: 2.0.2
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: 2.0.2
27
+ - !ruby/object:Gem::Dependency
28
+ name: rabbit-theme-clear-code
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ description: This talk describes why Apache Arrow is important for Ruby.
42
+ email:
43
+ - kou@clear-code.com
44
+ executables: []
45
+ extensions: []
46
+ extra_rdoc_files: []
47
+ files:
48
+ - ".rabbit"
49
+ - README.rd
50
+ - Rakefile
51
+ - config.yaml
52
+ - pdf/the-data-thread-why-apache-arrow-is-important-for-ruby.pdf
53
+ - why-apache-arrow-is-important-for-ruby.rab
54
+ homepage: https://slide.rabbit-shocker.org/authors/kou/the-data-thread/
55
+ licenses:
56
+ - CC-BY-SA-4.0
57
+ metadata: {}
58
+ post_install_message:
59
+ rdoc_options: []
60
+ require_paths:
61
+ - lib
62
+ required_ruby_version: !ruby/object:Gem::Requirement
63
+ requirements:
64
+ - - ">="
65
+ - !ruby/object:Gem::Version
66
+ version: '0'
67
+ required_rubygems_version: !ruby/object:Gem::Requirement
68
+ requirements:
69
+ - - ">="
70
+ - !ruby/object:Gem::Version
71
+ version: '0'
72
+ requirements: []
73
+ rubygems_version: 3.4.0.dev
74
+ signing_key:
75
+ specification_version: 4
76
+ summary: Why Apache Arrow is important for Ruby
77
+ test_files: []