RubyGems - rabbit-slide-kou-rubykaigi-2022 - Versions diffs - 2022.9.10 - Mend

rabbit-slide-kou-rubykaigi-2022 2022.9.10

Files changed (12) hide show

checksums.yaml +7 -0
data/.rabbit +2 -0
data/README.rd +48 -0
data/Rakefile +18 -0
data/config.yaml +24 -0
data/fast-data-processing-with-ruby-and-apache-arrow.rab +803 -0
data/images/apache-arrow-commits-kou-with-mark.png +0 -0
data/images/apache-arrow-commits-kou.png +0 -0
data/images/clear-code-rubykaigi-2022-silver-sponsor.png +0 -0
data/pdf/rubykaigi-2022-fast-data-processing-with-ruby-and-apache-arrow.pdf +0 -0
data/theme.rb +1 -0
metadata +89 -0

checksums.yaml ADDED Viewed

@@ -0,0 +1,7 @@
+---
+SHA256:
+  metadata.gz: 1599f5092cdaeb9b59633f195c0b5392633b2cbab26c5fb98eb72cced8212be5
+  data.tar.gz: c2fad8b0605d89c25cbc3d22c48a908c663b558416d792ee32df9bdb02894072
+SHA512:
+  metadata.gz: 2d8bf9c9a96d49f7fd2dfe8e54b8ddc94e0ad9d9ed844e59b62b85e3a89e02a5be98f121351cb41508f07921b6f6b51546a49fdf00b414c409a98ee9ba2e0d11
+  data.tar.gz: 65075214f316ffeb6faa536ee8526c1050d0b9ac4c0eb994aa34e23f846d0922e5cdb05ba63c6ff9a04226ca0e53d59967092035124baadcd48bcacd7bf31f54

data/.rabbit ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ --size 960,540
2	+ fast-data-processing-with-ruby-and-apache-arrow.rab

data/README.rd ADDED Viewed

@@ -0,0 +1,48 @@
+= Fast data processing with Ruby and Apache Arrow
+I introduced Ruby and Apache Arrow integration including the "super fast large data interchange and processing" Apache Arrow feature at RubyKaigi Takeout 2021.
+This talk introduces how we can use the "super fast large data interchange and processing" Apache Arrow feature in Ruby. Here are some use cases:
+* Fast data retrieval (fast (({pluck}))) from DB such as MySQL and PostgreSQL for batch processes in a Ruby on Rails application
+* Fast data interchange with JavaScript for dynamic visualization in a Ruby on Rails application
+* Fast OLAP with in-process DB such as DuckDB and Apache Arrow DataFusion in a Ruby on Rails application or irb session
+== License
+=== Slide
+CC BY-SA 4.0
+Use the followings for notation of the author:
+  * Sutou Kouhei
+==== ClearCode Inc. logo
+CC BY-SA 4.0
+Author: ClearCode Inc.
+It is used in page header and some pages in the slide.
+== For author
+=== Show
+  rake
+=== Publish
+  rake publish
+== For viewers
+=== Install
+  gem install rabbit-slide-kou-rubykaigi-2022
+=== Show
+  rabbit rabbit-slide-kou-rubykaigi-2022.gem

data/Rakefile ADDED Viewed

@@ -0,0 +1,18 @@
+require "rabbit/task/slide"
+# Edit ./config.yaml to customize meta data
+spec = nil
+Rabbit::Task::Slide.new do |task|
+  spec = task.spec
+  spec.files += Dir.glob("images/**/*.*")
+  # spec.files += Dir.glob("doc/**/*.*")
+  # spec.files -= Dir.glob("private/**/*.*")
+  spec.add_runtime_dependency("rabbit-theme-clear-code")
+end
+desc "Tag #{spec.version}"
+task :tag do
+  sh("git", "tag", "-a", spec.version.to_s, "-m", "Publish #{spec.version}")
+  sh("git", "push", "--tags")
+end

data/config.yaml ADDED Viewed

@@ -0,0 +1,24 @@
+---
+id: rubykaigi-2022
+base_name: fast-data-processing-with-ruby-and-apache-arrow
+tags:
+- rabbit
+- rubykaigi
+- ruby
+- apache_arrow
+presentation_date: 2022-09-10
+version: 2022.9.10
+licenses:
+- CC-BY-SA-4.0
+slideshare_id: rubykaigi-2022
+speaker_deck_id:
+vimeo_id:
+youtube_id:
+source_code_uri: "https://gitlab.com/ktou/rabbit-slide-kou-rubykaigi-2022"
+author:
+  markup_language: :rd
+  name: Sutou Kouhei
+  email: kou@clear-code.com
+  rubygems_user: kou
+  slideshare_user: kou
+  speaker_deck_user:

data/fast-data-processing-with-ruby-and-apache-arrow.rab ADDED Viewed

@@ -0,0 +1,803 @@
+= Fast data processing\n(('tag:small: with Ruby and Apache Arrow'))
+: author
+   Sutou Kouhei
+: institution
+   ClearCode Inc.
+: content-source
+   RubyKaigi 2022
+: date
+   2022-09-10
+: start-time
+   2022-09-10T14:10:00+09:00
+: end-time
+   2022-09-10T14:40:00+09:00
+: theme
+   .
+= Sutou Kouhei\nA president Ruby committer
+The president of ClearCode Inc.\n
+(('note:クリアコードの社長'))
+  # img
+  # src = images/clear-code-rubykaigi-2022-silver-sponsor.png
+  # relative_height = 100
+  # reflect_ratio = 0.1
+= Sutou Kouhei\nThe Apache Arrow PMC chair
+  * PMC: Project Management Committee\n
+    (('note:Apache Arrowのプロジェクト管理委員会のリーダー'))
+  * #2 commits(('note:（コミット数2位）'))
+  # img
+  # src = images/apache-arrow-commits-kou-with-mark.png
+  # relative_height = 120
+  # reflect_ratio = 0.1
+= Sutou Kouhei\nThe pioneer in Ruby and Arrow
+  * A Ruby committer
+    * Maintain some standard libraries/default gems\n
+      (('note:標準ライブラリーとかデフォルトgemのメンテナンスをしている'))
+  * The author of Red Arrow
+  * Red Arrow:
+    * The official Apache Arrow library for Ruby\n
+      (('note:公式のRuby用のApache Arrowライブラリー'))
+= Why do I work on Red Arrow?\n(('note:なぜRed Arrowの開発をしているか'))
+  * To use Ruby for data processing too!\n
+    (('note:データ処理でもRubyを使いたい！'))
+    * At least a part of data processing\n
+      (('note:データ処理の全部と言わず一部だけでも'))
+  * Data processing is an important task\n
+    (('note:データ処理は最近の重要なタスクの1つ'))
+    * # of Rubyists will be increased by this\n
+      (('note:データ処理にRubyを使えるようになるとRubyistが増えるはず'))
+= Current situation\n(('note:Negative spiral'))\n(('note:今は負のスパイラル'))
+  # mermaid
+  # relative_width = 90
+  graph LR;
+    A[Few users]-->B[Small community];
+    B-->C[Few developers];
+    C-->D[Few useful tools];
+    D-->A;
+(('tag:margin-top * 4'))
+(('tag:center'))
+How to break the negative spiral?\n
+(('note:どうやってこの負のスパイラルを打開する？'))
+== Slide properties
+: enable-title-on-image
+   false
+= Expand useful tools\nwith few developers\n(('note:少人数で便利なツールを増やせればいいんじゃない？'))
+  # mermaid
+  # relative_width = 90
+  graph LR;
+    subgraph all[" "]
+      direction TB
+      subgraph Negative spiral
+        N0[Few users]-->N1[Small community];
+        N1-->N2(Few developers);
+        N2-->N3[Few useful tools];
+        N3-->N0;
+      end
+      subgraph Positive spiral
+        P0[More users]-->P1[Larger community];
+        P1-->P2[More developers];
+        P2-->P3(More useful tools);
+        P3-->P0;
+      end
+      N2-.->P3;
+    end
+    style all fill-opacity:0,stroke-width:0px
+    style N2 stroke-width:5px
+    style P3 stroke-width:5px
+== Slide properties
+: enable-title-on-image
+   false
+= But how?\n(('note:でもどうやって？'))
+Apache Arrow
+= Apache Arrow
+  * ((*Cross-language*)) dev platform for data\n
+    (('note:複数言語対応のデータ用の開発プラットフォーム'))
+    * Ruby community doesn't need to dev everything\n
+      (('note:Rubyコミュニティーがすべてを開発しなくてもよい'))
+    * We can share common implementations\n
+      (('note:共通の実装を言語を超えて共有できる'))
+  * Today's highlighted features\n
+    (('note:今日注目する機能'))
+    * Fast data processing(('note:（高速データ処理）'))
+    * Fast data interchange(('note:（高速データ交換）'))
+= My approach\n(('note:私のアプローチ'))
+  # mermaid
+  # relative_width = 90
+  graph LR;
+    subgraph all[" "]
+      direction TB
+      subgraph Negative spiral
+        N0[Few users]-->N1[Small community];
+        N1-->N2(Few developers);
+        N2-->N3[Few useful tools];
+        N3-->N0;
+      end
+      subgraph Positive spiral
+        P0(More users)-->P1[Larger community];
+        P1-->P2[More developers];
+        P2-->P3(More useful tools);
+        P3-->P0;
+      end
+      N2-. Apache Arrow .->P3;
+    end
+    style all fill-opacity:0,stroke-width:0px
+    style N2 stroke-width:5px
+    style P0 stroke-width:5px
+    style P3 stroke-width:5px
+== Slide properties
+: enable-title-on-image
+   false
+= Goal of this talk\n(('note:このトークのゴール'))
+    # mermaid
+    # relative_width = 35
+    # align = right
+    # vertical-align = top
+    # relative-margin-right = -10
+    # relative-margin-top = -7
+    graph LR;
+      subgraph all[" "]
+        direction TB
+        subgraph Negative spiral
+          N0[Few users]-->N1[Small community];
+          N1-->N2(Few developers);
+          N2-->N3[Few useful tools];
+          N3-->N0;
+        end
+        subgraph Positive spiral
+          P0(More users)-->P1[Larger community];
+          P1-->P2[More developers];
+          P2-->P3(More useful tools);
+          P3-->P0;
+        end
+        N2-. Apache Arrow .->P3;
+      end
+      style all fill-opacity:0,stroke-width:0px
+      style N2 stroke-width:5px
+      style P0 stroke-width:5px
+      style P3 stroke-width:5px
+  * You want to use Ruby\n
+    for some data processings\n
+    (('note:いくつかのデータ処理でRubyを使いたくなる'))
+    * Especially, you want to implement a BI tool\n
+      (('note:特にBIツールを作りたくなる'))
+  * You join Red Data Tools project\n
+    (('note:Red Data Toolsプロジェクトに参加する'))
+    * It provides data processing tools for Ruby\n
+      (('note:Ruby用のデータ処理ツールを提供するプロジェクト'))\n
+      (('note:((<URL:https://red-data-tools.github.io/>))'))
+= Fast data processing\n(('note:高速データ処理'))
+  * Ruby is slow to process data\n
+    (('note:Rubyでデータを処理すると遅い'))
+  * Resolve in external process:(('note:（別プロセスで解決）'))\n
+    (('note:Use case: Web app, batch process for Web app'))
+    * Use fast data processing module (e.g.: DB)\n
+      (('note:DBとか速いデータ処理モジュールを使う'))
+  * Resolve in the same process:(('note:（プロセス内で解決）'))\n
+    (('note:Use cases: IRB, batch process for Web app'))
+    * Implement core features in other fast lang\n
+      (('note:他の速い言語でコアの機能を実装'))
+= External process\n(('note:別プロセス'))
+    # mermaid
+    # relative_width = 35
+    # align = right
+    # vertical-align = top
+    # relative-margin-right = -10
+    # relative-margin-top = 0
+    sequenceDiagram
+      Ruby->>+External process: Request
+      Note right of External process: Fast data processing
+      External process-->>-Ruby: Response
+  * Popular case\n
+    in current Ruby usage\n
+    (('note:今のRubyの使われ方だとよくあるケース'))
+  * Small response: No problem\n
+    (('note:レスポンスが小さい場合は問題ない'))
+  * Large response:\n
+    (('note:レスポンスが大きい場合：'))
+    * Sending/receiving response are slow\n
+      (('note:レスポンスの送信・受信処理が遅い'))
+= Sending/receiving response\n(('note:レスポンスの送受信'))
+    # mermaid
+    # relative_width = 40
+    # align = right
+    # vertical-align = top
+    # relative-margin-right = -10
+    # relative-margin-top = 0
+    sequenceDiagram
+      participant Ruby
+      participant External process
+      Note right of External process: Serialize
+      External process-->>Ruby: Send
+      Note left of Ruby: Deserialize
+  * Serialize/deserialize\n
+    are slow\n
+    (('note:シリアライズ・デシリアライズが遅い'))
+  * How to speed them up?\n
+    (('note:どうやって高速化すればよいか'))
+    * Apache Arrow format
+    * Serialize/deserialize cost ≒ 0\n
+      (('note:シリアライズ・デシリアライズコストがほぼ0'))
+= Why Apache Arrow format is fast\n(('note:Apache Arrowフォーマットはなぜ速いのか'))
+  # img
+  # src = https://slide.rabbit-shocker.org/authors/kou/db-tech-showcase-online-2020/why-apache-arrow-format-is-fast.pdf
+  # relative_height = 80
+(('tag:center'))
+(('note:((<URL:https://slide.rabbit-shocker.org/authors/kou/db-tech-showcase-online-2020/>))'))
+== Slide properties
+: enable-title-on-image
+   false
+= Apache Arrow Flight SQL
+    # mermaid
+    # relative_width = 35
+    # align = right
+    # vertical-align = top
+    # relative-margin-right = -10
+    # relative-margin-top = 0
+    sequenceDiagram
+      participant Ruby
+      participant SQL DB
+      Ruby->>SQL DB: Request (SQL)
+      Note right of SQL DB: Fast data processing
+      SQL DB-->>Ruby: Response (Apache Arrow data)
+  * gRPC based protocol\n
+    (('note:gRPCベースのプロトコル'))
+    * NOTE: Other network libraries\nsuch as UCX can be used\n
+      (('note:UCXなど他のネットワークライブラリーも使える'))
+  * Specialized to Apache Arrow format\n
+    (('note:Apache Arrowフォーマットに特化'))
+    * Serialize/deserialize cost ≒ 0\n
+      (('note:シリアライズ・デシリアライズコストがほぼ0'))
+= Red Arrow Flight SQL
+  # rouge ruby
+  require "arrow-flight-sql"
+  location = "grpc://server:2929"
+  client = ArrowFlight::Client.new(location)
+  sql_client = ArrowFlightSQL::Client.new(client)
+  info = sql_client.execute("SELECT * FROM logs")
+  info.endpoints.each do |endpoint|
+    reader = sql_client.do_get(endpoint.ticket)
+    reader.read_all
+  end
+= Which SQL DBs support\nApache Arrow Flight SQL?
+  # RT
+  SQL DB, Support?
+  MySQL, No
+  PostgreSQL, No
+  BigQuery, No
+  Trino, No
+  Dremio, Yes
+= Why don't most SQL DBs support it?\n(('note:どうしてほとんどのSQL DBはサポートしていないの？'))
+  * Flight SQL is a new protocol\n
+    (('note:Apache Arrow Flight SQLは新しいプロトコルだから'))
+    * The first release: 2022-02(('note:（最初のリリース）'))
+    * Still experimental(('note:（まだ実験的扱い）'))
+  * Tradition SQL DBs may not support\n
+    (('note:MySQL・PostgreSQLとか昔からあるSQL DBはサポートしないかも'))
+  * New SQL DBs will support because...\n
+    (('note:新しいSQL DBはサポートするはず。なぜなら…'))
+= Compatibility is important\n(('note:互換性が重要だから'))
+  * New SQL DBs often use major protocols\n
+    (('note:新しいSQL DBは既存のメジャーなプロトコルを使うことが多い'))
+    * To reuse existing client libraries\n
+      (('note:ユーザーは既存のクライアントライブラリーで新しいSQL DBを使える'))
+  * For example:
+    * MySQL protocol: TiDB, ...
+    * PostgreSQL protocol: (('tag:x-small:Cloud Spanner, CockroachDB, ...'))
+= Future\n(('note:将来'))
+  * (('tag:small:Flight SQL client libraries will be increased'))\n
+    (('note:Flight SQLのクライアントライブラリーが充実するだろう'))
+  * New SQL DBs will support Flight SQL\n
+    (('note:新しいSQL DBはFlight SQLをサポートするだろう'))
+    * To reuse existing client libraries\n
+      (('note:既存のクライアントライブラリーを再利用するため'))
+  * (('tag:small:BI tools will support Flight SQL by default'))\n
+    (('note:BIツールはデフォルトでFlight SQLをサポートするだろう'))
+= What should we do next?\n(('note:私たちは次はなにをするべき？'))
+    # mermaid
+    # relative_width = 30
+    # align = right
+    # vertical-align = top
+    # relative-margin-right = -10
+    # relative-margin-top = 0
+    graph LR;
+      subgraph all[" "]
+        direction TB
+        subgraph Negative spiral
+          N0[Few users]-->N1[Small community];
+          N1-->N2(Few developers);
+          N2-->N3[Few useful tools];
+          N3-->N0;
+        end
+        subgraph Positive spiral
+          P0[More users]-->P1[Larger community];
+          P1-->P2[More developers];
+          P2-->P3(More useful tools);
+          P3-->P0;
+        end
+        N2-.->P3;
+      end
+      style all fill-opacity:0,stroke-width:0px
+      style N2 stroke-width:5px
+      style P3 stroke-width:5px
+      style P0 stroke-width:5px
+  * Implement an Active Record\n
+    adapter for Flight SQL\n
+    (('note:Flight SQL用のActive Recordアダプターを'))\n
+    (('note:実装するといいんじゃないかな'))
+    * For easy to use from Ruby on Rails apps\n
+      (('note:Ruby on Railsアプリから使いやすくなるはず'))
+  * Join Red Data Tools!\n
+    (('note:Red Data Toolsで開発しようぜ！'))\n
+    (('note:((<URL:https://red-data-tools.github.io/>))'))
+= But I'm using MySQL/PostgreSQL...\n(('note:でも、MySQL/PostgreSQLを使っているし。。。'))
+  # img
+  # src = https://1zbpvb1efqtf3zvbfn3m51uy-wpengine.netdna-ssl.com/wp-content/uploads/2022/08/adbc-3.png
+  # caption = ADBC: Apache Arrow Database Connectivity
+  # relative_height = 70
+(('tag:xx-small'))
+((<URL:https://voltrondata.com/news/simplifying-database-connectivity-with-arrow-flight-sql-and-adbc/>))
+= ADBC
+    # img
+    # src = https://1zbpvb1efqtf3zvbfn3m51uy-wpengine.netdna-ssl.com/wp-content/uploads/2022/08/adbc-3.png
+    # align = right
+    # vertical-align = top
+    # relative_width = 40
+    # relative-margin-right = -10
+    # relative-margin-top = 0
+  * Generic ((*fast*))\n
+    SQL DB client API\n
+    (('note:任意のSQL DBに接続できる高速なAPI'))
+    * We can use Flight SQL\n
+      through ADBC\n
+      (('note:ADBC経由でFlight SQLも使える'))
+  * Flight SQL is the most fast driver\n
+    (('note:Flight SQLが最速のドライバー'))
+    * But the same API for all SQL DBs is useful\n
+      like Active Record for Rubyists\n
+      (('note:なんだけど、すべてのSQL DBに同じAPIでアクセスできるのは便利'))\n
+      (('note:Active Recordも便利でしょ？'))
+= ADBC and Ruby
+  * Implementing Ruby bindings\n
+    (('note:Rubyバインディングを実装中'))
+  * Join Red Data Tools\n
+    to implement an Active Record adapter for ADBC!\n
+    (('note:Red Data ToolsでADBC用のActive Recordアダプターを開発しようぜ！'))\n
+    (('note:((<URL:https://red-data-tools.github.io/>))'))
+= Red ADBC
+  # rouge ruby
+  require "adbc"
+  options = {
+    driver: "adbc_driver_sqlite",
+    filename: ":memory:",
+  }
+  ADBC::Database.open(**options) do |database|
+    database.connect do |connection|
+      puts(connection.query("SELECT 1"))
+    end
+  end
+= Wrap up: External process case\n(('note:まとめ：別プロセスの場合'))
+  * Large response is slow\n
+    (('note:レスポンスが大きいときに遅い'))
+    * Bottle neck is serialize/deserialize\n
+      (('note:ボトルネックはシリアライズ・デシリアライズ'))
+  * Apache Arrow Flight SQL/ADBC\n
+    (('note:そこでApache Arrow Flight SQL/ADBCですよ！'))
+  * Let's implement AR adapters for them\n
+    (('note:Active Recordのアダプターを実装しようぜ！'))
+= In-process\n(('note:同一プロセス'))
+    # mermaid
+    # relative_width = 35
+    # align = right
+    # vertical-align = top
+    # relative-margin-right = -10
+    # relative-margin-top = -10
+    sequenceDiagram
+      Ruby->>+Library: Pass data
+      Note right of Library: Fast data processing
+      Library-->>-Ruby: Return processed data
+  * Not popular case\n
+    in current Ruby usage\n
+    (('note:今のRubyの使われ方だとあまりないケース'))
+    * Use case: process data\n
+      on local/remote storage\n
+      (('note:IRB, batch process for Web app'))\n
+      (('note:ユースケース：ローカル・リモートストレージにあるデータの処理'))\n
+  * Need a fast data processing library\n
+    implemented in other fast language\n
+    (('note:高速にデータ処理できる他の速い言語で実装されたライブラリーが必要'))
+= Fast language?\n(('note:速い言語？'))
+  * C/C++
+  * Rust
+  * Julia
+  * ...
+= A C++ case: Apache Arrow
+  * Apache Arrow has a computation module\n
+    (('note:Apache Arrowは計算モジュールも提供している'))
+  * Ruby bindings: Red Arrow/red-arrow\n
+    (('note:RubyバインディングはRed Arrow/red-arrow'))
+  * Data frame based on Red Arrow\n
+    (('note:Red Arrowベースのデータフレームもある'))
+    * RedAmber/red_amber by @heronshoes\n
+      (('note:Red Data Toolsメンバーでもある鈴木さんが開発'))\n
+      (('note:((<URL:https://mybinder.org/v2/gh/RubyData/docker-stacks/master?filepath=red-amber.ipynb>))'))
+= Red Arrow
+  # rouge ruby
+  require "datasets-arrow"
+  codes = Datasets::PostalCodeJapan.new.to_arrow
+  require "arrow"
+  sliced_codes = codes.slice do |slicer|
+    slicer.prefecture == "東京都"
+  end
+  puts(sliced_codes)
+= Red Amber
+  # rouge ruby
+  require "datasets-arrow"
+  codes = Datasets::PostalCodeJapan.new.to_arrow
+  require "red_amber"
+  data_frame = RedAmber::DataFrame.new(codes)
+  prefecture = data_frame[:prefecture]
+  puts(data_frame[prefecture == "東京都"])
+= A C++ case: DuckDB
+  * Similar to SQLite\n
+    but for data analytics\n
+    (('note:データ分析向けのSQLiteみたいなやつ'))
+    * Fast aggregation/filter/sort\n
+      (('note:高速な集計・フィルター・ソート'))
+  * Ruby bindings: ruby-duckdb by @suketa\n
+    (('note:Rubyコミッターでもある助田さんがRubyバインディングを開発'))
+    * We can impl. fast data processing with DuckDB\n
+      (('note:DuckDBを使って高速データ処理を実現できる！'))
+    * (('wait'))If we can interchange data w/ DuckDB fast...\n
+      (('note:DuckDBと高速にデータ交換できればね…'))
+= Is fast data interchange important?\n(('note:高速データ交換は重要なの？'))
+  # mermaid
+  # relative_height = 70
+  sequenceDiagram
+    Ruby->>+DuckDB: Load data (!)
+    Note right of DuckDB: Fast data processing
+    DuckDB->>-Ruby: Read result (!)
+(('tag:center'))
+(('tag:x-small'))
+If (!) are slow, total data processing is also slow\n
+(('note:(!)が遅いと全体のデータ処理も遅くなる'))
+== Slide properties
+: enable-title-on-image
+   false
+= Fast data interchange\n(('note:高速なデータ交換'))
+    # mermaid
+    # relative_height = 40
+    # align = right
+    # vertical-align = top
+    # relative-margin-right = -12
+    # relative-margin-top = -5
+    sequenceDiagram
+      Ruby->>+DuckDB: Pass Apache Arrow data directly
+      Note right of DuckDB: Fast data processing
+      DuckDB->>-Ruby: Read result with C data interface
+  * Use data as-is: zero-copy\n
+    (('note:データをそのまま使う：ゼロコピー'))
+  * Apache Arrow C data/stream interface
+    * C ABI for fast data interchange\n
+      (('note:高速にデータ交換するためのC ABI'))
+    * FYI: C ABI in Ruby: MemoryView\n
+      (('note:参考：RubyもMmeoryViewというC ABIを提供している'))
+= C data interface
+  # rouge c
+  struct ArrowArray {
+    // Array data description
+    int64_t length;
+    int64_t null_count;
+    int64_t offset;
+    int64_t n_buffers;
+    int64_t n_children;
+    const void** buffers;
+    struct ArrowArray** children;
+    struct ArrowArray* dictionary;
+    // Release callback
+    void (*release)(struct ArrowArray*);
+    // Opaque producer-specific data
+    void* private_data;
+  };
+= DuckDB with Apache Arrow
+  # rouge ruby
+  require "datasets-arrow"
+  codes = Datasets::PostalCodeJapan.new.to_arrow
+  require "arrow-duckdb"
+  db = DuckDB::Database.open
+  c = db.connect
+  c.register("codes", codes) do # Use Apache Arrow data as-is
+    c.query("SELECT * FROM codes WHERE prefecture = ?",
+            "東京都", # Tokyo
+            output: :arrow) # Output as Apache Arrow data
+     .to_table # C data interface
+  end
+= C data interface on Web\n(('note:Web上でもC data interface'))
+  * Some unofficial WebAssembly ports exist\n
+    (('note:非公式ながらいくつかWebAssembly対応のApache Arrowライブラリーがある'))
+    * Rust based, Go based, ...
+  * WASM Ruby + C data I/F is useful?\n
+    (('note:WebAssembly版のRubyとC data interfaceでなんかできるかも？'))
+    * FYI: DuckDB supports WebAssembly too\n
+      (('note:参考：DuckDBもWebAssemblyをサポートしている'))
+= A Rust case: Arrow DataFusion
+  * SQL query engine
+    * Internal memory layout is Apache Arrow\n
+      (('note:DuckDBと似ているが内部のメモリーレイアウトはApache Arrow'))
+  * Direct Ruby bindings:(('note:（直接のバインディング）'))
+    * arrow-datafusion by @jychen7 with Magnus
+  * Ruby bindings via C API:(('note:（C API経由）'))
+    * datafusion-c with cargo-c
+    * Red DataFusion with datafusion-c
+= Ruby bindings via C API\n(('note:C API経由のRubyバインディング'))
+      # mermaid
+      # relative_width = 35
+      # align = right
+      # vertical-align = top
+      # relative-margin-right = -10
+      # relative-margin-top = -7
+      graph LR;
+        subgraph all[" "]
+          direction TB
+          subgraph Negative spiral
+            N0[Few users]-->N1[Small community];
+            N1-->N2(Few developers);
+            N2-->N3[Few useful tools];
+            N3-->N0;
+          end
+          subgraph Positive spiral
+            P0(More users)-->P1[Larger community];
+            P1-->P2[More developers];
+            P2-->P3(More useful tools);
+            P3-->P0;
+          end
+          N2-. Apache Arrow .->P3;
+        end
+        style all fill-opacity:0,stroke-width:0px
+        style N2 stroke-width:5px
+        style P3 stroke-width:5px
+  * To develop with\ndevs from other langs\n
+    (('note:他言語の開発者と一緒に開発するため'))
+    * C API is useful for other languages too\n
+      (('note:C APIはJavaやGoなど他の言語のバインディング開発でも有用'))
+  * C API provides a normal C library\n
+    (('note:C APIは普通のCライブラリーを提供する'))
+    * Headers: Generated by cbindgen automatically
+    * Shared libraries: Built with cargo-c
+= Red DataFusion
+  # rouge ruby
+  require "datasets-arrow"
+  codes = Datasets::PostalCodeJapan.new.to_arrow
+  require "datafusion"
+  context = DataFusion::SessionContext.new
+  context.register("codes", codes) # C data interface
+  data_frame = context.sql(<<-SQL)
+  SELECT * FROM codes WHERE prefecture = '東京都'
+  SQL
+  puts(data_frame.to_table) # C data interface
+= Remote data\n(('note:リモートデータ'))
+  * Recent modules can read remote data\n
+    (('note:最近のモジュールはリモートデータを読み込める'))
+  * At least these modules support it:\n
+    (('note:少なくとも次のモジュールはできる'))
+    * Apache Arrow
+    * DuckDB
+    * Apache Arrow DataFusion
+= Remote data example: DuckDB
+  # rouge sql
+  SELECT COUNT(*)
+    FROM parquet_scan('s3://ookla-open-data/parquet/performance/*/*/*/*.parquet',
+                      HIVE_PARTITIONING=1);
+  -- ┌───────┐
+  -- │ count_star() │
+  -- ├───────┤
+  -- │ 144567188    │
+  -- └───────┘
+= Wrap up: In-process case\n(('note:まとめ：同一プロセスの場合'))
+    # mermaid
+    # relative_width = 35
+    # align = right
+    # vertical-align = top
+    # relative-margin-right = -10
+    # relative-margin-top = 0
+    sequenceDiagram
+      Ruby->>+Library: Pass data
+      Note right of Library: Fast data processing
+      Library-->>-Ruby: Return processed data
+  * To process data\n
+    on local/remote storage\n
+    (('note:ローカル・リモートストレージにあるデータの処理'))
+  * Low-level APIs (bindings)\n
+    are preparing\n
+    (('note:低レベルのAPIは整備できてきた'))
+  * Let's implement high-level API!\n
+    (('note:高レベルのAPIを実装しようぜ！'))
+    * e.g.: RedAmber, Active Record adapters, ...\n
+      (('note:((<URL:https://red-data-tools.github.io/>))'))
+= Acknowledgment\n(('note:謝辞'))
+  * Voltron Data
+    * Supports my Apache Arrow related work\n
+      (('note:私のApache Arrow関係の作業にお金を払ってくれている'))
+    * You can join Voltron Data or ClearCode\n
+      to work on Apache Arrow as a job\n
+      (('note:Voltron Dataさんかクリアコードに転職すると'))\n
+      (('note:仕事でApache Arrowを開発できるよ！'))\n
+      (('note:((<URL:https://www.clear-code.com/recruitment/>))'))
+  * Red Data Tools members
+    * They develop data processing tools for Ruby!\n
+      (('note:Ruby用のデータ処理ツールを開発しているよ！'))
+= Wrap up\n(('note:まとめ'))
+  * Can use Ruby for fast data processing\n
+    with Apache Arrow (('note:in some cases'))\n
+    (('note:Apache Arrowを使えば高速なデータ処理にRubyを使える…こともある'))
+    * External process: Fast data interchange\n
+      (('note:別プロセスの場合：高速データ交換機能を使う'))
+    * In-process: Fast data processing/interchange\n
+      (('note:同一プロセスの場合：高速データ処理・交換機能を使う'))
+  * But we still have missing pieces\n
+    (('note:でも、まだ足りないところがある'))
+= Goal of this talk\n(('note:このトークのゴール'))
+    # mermaid
+    # relative_width = 35
+    # align = right
+    # vertical-align = top
+    # relative-margin-right = -10
+    # relative-margin-top = -7
+    graph LR;
+      subgraph all[" "]
+        direction TB
+        subgraph Negative spiral
+          N0[Few users]-->N1[Small community];
+          N1-->N2(Few developers);
+          N2-->N3[Few useful tools];
+          N3-->N0;
+        end
+        subgraph Positive spiral
+          P0(More users)-->P1[Larger community];
+          P1-->P2[More developers];
+          P2-->P3(More useful tools);
+          P3-->P0;
+        end
+        N2-. Apache Arrow .->P3;
+      end
+      style all fill-opacity:0,stroke-width:0px
+      style N2 stroke-width:5px
+      style P0 stroke-width:5px
+      style P3 stroke-width:5px
+  * You want to use Ruby\n
+    for some data processings\n
+    (('note:いくつかのデータ処理でRubyを使いたくなる'))
+    * Especially, you want to implement a BI tool\n
+      (('note:特にBIツールを作りたくなる'))
+  * You join Red Data Tools project\n
+    (('note:Red Data Toolsプロジェクトに参加する'))
+    * It provides data processing tools for Ruby\n
+      (('note:Ruby用のデータ処理ツールを提供するプロジェクト'))\n
+      (('note:((<URL:https://red-data-tools.github.io/>))'))

data/images/apache-arrow-commits-kou-with-mark.png ADDED Viewed

Binary file

data/images/apache-arrow-commits-kou.png ADDED Viewed

Binary file

data/images/clear-code-rubykaigi-2022-silver-sponsor.png ADDED Viewed

Binary file

data/pdf/rubykaigi-2022-fast-data-processing-with-ruby-and-apache-arrow.pdf ADDED Viewed

Binary file

data/theme.rb ADDED Viewed

	@@ -0,0 +1 @@
1	+ include_theme("clear-code")

metadata ADDED Viewed

@@ -0,0 +1,89 @@
+--- !ruby/object:Gem::Specification
+name: rabbit-slide-kou-rubykaigi-2022
+version: !ruby/object:Gem::Version
+  version: 2022.9.10
+platform: ruby
+authors:
+- Sutou Kouhei
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2022-09-09 00:00:00.000000000 Z
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: rabbit
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: 2.0.2
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: 2.0.2
+- !ruby/object:Gem::Dependency
+  name: rabbit-theme-clear-code
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
+description: |-
+  I introduced Ruby and Apache Arrow integration including the "super fast large data interchange and processing" Apache Arrow feature at RubyKaigi Takeout 2021.
+  This talk introduces how we can use the "super fast large data interchange and processing" Apache Arrow feature in Ruby. Here are some use cases:
+  * Fast data retrieval (fast (({pluck}))) from DB such as MySQL and PostgreSQL for batch processes in a Ruby on Rails application
+  * Fast data interchange with JavaScript for dynamic visualization in a Ruby on Rails application
+  * Fast OLAP with in-process DB such as DuckDB and Apache Arrow DataFusion in a Ruby on Rails application or irb session
+email:
+- kou@clear-code.com
+executables: []
+extensions: []
+extra_rdoc_files: []
+files:
+- ".rabbit"
+- README.rd
+- Rakefile
+- config.yaml
+- fast-data-processing-with-ruby-and-apache-arrow.rab
+- images/apache-arrow-commits-kou-with-mark.png
+- images/apache-arrow-commits-kou.png
+- images/clear-code-rubykaigi-2022-silver-sponsor.png
+- pdf/rubykaigi-2022-fast-data-processing-with-ruby-and-apache-arrow.pdf
+- theme.rb
+homepage: https://slide.rabbit-shocker.org/authors/kou/rubykaigi-2022/
+licenses:
+- CC-BY-SA-4.0
+metadata:
+  source_code_uri: https://gitlab.com/ktou/rabbit-slide-kou-rubykaigi-2022
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: '0'
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: '0'
+requirements: []
+rubygems_version: 3.4.0.dev
+signing_key:
+specification_version: 4
+summary: Fast data processing with Ruby and Apache Arrow
+test_files: []