rabbit-slide-abetomo-apache-arrow-meetup-in-tokyo-2025-spring 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rabbit +2 -0
- data/README.md +24 -0
- data/Rakefile +17 -0
- data/config.yaml +23 -0
- data/groonga-and-apache-arrow.md +104 -0
- data/pdf/apache-arrow-meetup-in-tokyo-2025-spring-groonga-and-apache-arrow.pdf +0 -0
- metadata +62 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 512ebe3a058cf615117be3f7e2f651d72febb5b1d8cfdbd46e1b6753280d4457
|
4
|
+
data.tar.gz: feb12fb97436878f41c07cabc0aee4916bf78237f0326f4bb37890371637d081
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: f1b4116c585c49437cb250c2e998553f01324030be4a4474d195e3fc6a78e033aed1c9360d0e7b4a152da7c712c36f2762ff6281a8f0fac2cd8358922d9bab6a
|
7
|
+
data.tar.gz: 9410083634d8318dca1d80698a34a6e0859e3f7dfe8616284089a176800b72c7dbceb1c3d9e370c5e3095e40dffc763ab65a47f4e72329459cb8495ae9a754b3
|
data/.rabbit
ADDED
data/README.md
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
# Groonga and Apache Arrow
|
2
|
+
|
3
|
+
https://red-data-tools.connpass.com/event/349680/
|
4
|
+
|
5
|
+
## For author
|
6
|
+
|
7
|
+
### Show
|
8
|
+
|
9
|
+
rake
|
10
|
+
|
11
|
+
### Publish
|
12
|
+
|
13
|
+
rake publish
|
14
|
+
|
15
|
+
## For viewers
|
16
|
+
|
17
|
+
### Install
|
18
|
+
|
19
|
+
gem install rabbit-slide-abetomo-apache-arrow-meetup-in-tokyo-2025-spring
|
20
|
+
|
21
|
+
### Show
|
22
|
+
|
23
|
+
rabbit rabbit-slide-abetomo-apache-arrow-meetup-in-tokyo-2025-spring.gem
|
24
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
require "rabbit/task/slide"
|
2
|
+
|
3
|
+
# Edit ./config.yaml to customize meta data
|
4
|
+
|
5
|
+
spec = nil
|
6
|
+
Rabbit::Task::Slide.new do |task|
|
7
|
+
spec = task.spec
|
8
|
+
# spec.files += Dir.glob("doc/**/*.*")
|
9
|
+
# spec.files -= Dir.glob("private/**/*.*")
|
10
|
+
# spec.add_runtime_dependency("rabbit-theme-YOUR-THEME")
|
11
|
+
end
|
12
|
+
|
13
|
+
desc "Tag #{spec.version}"
|
14
|
+
task :tag do
|
15
|
+
sh("git", "tag", "-a", spec.version.to_s, "-m", "Publish #{spec.version}")
|
16
|
+
sh("git", "push", "--tags")
|
17
|
+
end
|
data/config.yaml
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
---
|
2
|
+
id: apache-arrow-meetup-in-tokyo-2025-spring
|
3
|
+
base_name: groonga-and-apache-arrow
|
4
|
+
tags: []
|
5
|
+
presentation_date:
|
6
|
+
presentation_start_time:
|
7
|
+
presentation_end_time:
|
8
|
+
version: 1.0.0
|
9
|
+
licenses: []
|
10
|
+
slideshare_id:
|
11
|
+
speaker_deck_id:
|
12
|
+
vimeo_id:
|
13
|
+
youtube_id:
|
14
|
+
width: 800
|
15
|
+
height: 600
|
16
|
+
source_code_uri:
|
17
|
+
author:
|
18
|
+
markup_language: :markdown
|
19
|
+
name: Abe Tomoaki
|
20
|
+
email: abe@clear-code.com
|
21
|
+
rubygems_user: abetomo
|
22
|
+
slideshare_user:
|
23
|
+
speaker_deck_user:
|
@@ -0,0 +1,104 @@
|
|
1
|
+
# Groonga and Apache Arrow
|
2
|
+
|
3
|
+
author
|
4
|
+
: Abe Tomoaki
|
5
|
+
|
6
|
+
institution
|
7
|
+
: ClearCode Inc.
|
8
|
+
|
9
|
+
theme
|
10
|
+
: clear-code
|
11
|
+
|
12
|
+
# GroongaはApache Arrowフォーマットで入出力できるよ!
|
13
|
+
|
14
|
+
# Groongaとは?
|
15
|
+
|
16
|
+
* オープンソースのカラムストア機能付き全文検索エンジン
|
17
|
+
* 即時更新
|
18
|
+
* 参照ロックフリー
|
19
|
+
* 集計もできる
|
20
|
+
|
21
|
+
# 例で紹介Groonga: 検索
|
22
|
+
|
23
|
+
```js
|
24
|
+
// SQLで書くとこういう感じ
|
25
|
+
// select _key, introduction
|
26
|
+
// from Users
|
27
|
+
// where introduction &@ 'engine'
|
28
|
+
query = new URLSearchParams({
|
29
|
+
table: 'Users',
|
30
|
+
output_columns: '_key,introduction',
|
31
|
+
match_columns: 'introduction',
|
32
|
+
query: 'engine', // 「engine」で全文検索
|
33
|
+
// output_type: 'apache-arrow', このパラメータを付与するとApache Arrowフォーマットになる
|
34
|
+
command_version: 3
|
35
|
+
});
|
36
|
+
response = await fetch(`http://localhost:10041/d/select?${query}`);
|
37
|
+
(await response.json()).body.drilldowns.department.records
|
38
|
+
// [
|
39
|
+
// [ "Groonga", "Groonga is an open-source fulltext search engine and column store." ],
|
40
|
+
// [ "Mroonga", "A MySQL pluggable storage engine based on Groonga." ]
|
41
|
+
// ]
|
42
|
+
```
|
43
|
+
|
44
|
+
# 例で紹介Groonga: 集計
|
45
|
+
|
46
|
+
```js
|
47
|
+
// SQLで書くとこういう感じ
|
48
|
+
// select department, avg(age)
|
49
|
+
// from Users
|
50
|
+
// group by department
|
51
|
+
|
52
|
+
// drilldownというので集計できる
|
53
|
+
query = new URLSearchParams({
|
54
|
+
table: 'Users',
|
55
|
+
output_columns: '_id',
|
56
|
+
drilldown: 'department',
|
57
|
+
drilldown_calc_types: 'AVG',
|
58
|
+
drilldown_calc_target: 'age',
|
59
|
+
drilldown_output_columns: '_key,_avg',
|
60
|
+
command_version: 3
|
61
|
+
});
|
62
|
+
response = await fetch(`http://localhost:10041/d/select?${query}`);
|
63
|
+
(await response.json()).body.drilldowns.department.records
|
64
|
+
// [
|
65
|
+
// [ "A", 37 ],
|
66
|
+
// [ "B", 15.5 ]
|
67
|
+
// ]
|
68
|
+
```
|
69
|
+
|
70
|
+
# 事例紹介
|
71
|
+
|
72
|
+
# 活用事例1: Groonga delta
|
73
|
+
|
74
|
+
* MySQL/MariaDBのデータをリアルタイムでGroongaに同期するツール
|
75
|
+
* 差分情報をApache Parquet形式で保存
|
76
|
+
* Groongaへの取り込みを高速に行える
|
77
|
+
|
78
|
+
# Groonga delta補足
|
79
|
+
|
80
|
+
* MySQL/MariaDBのbinlog(差分情報)を一度ストレージに書き出す
|
81
|
+
* これにより途中で失敗してもやり直しがしやすい
|
82
|
+
* この書き出すフォーマットがApache Parquet形式
|
83
|
+
|
84
|
+
# 活用事例2: ログ収集製品
|
85
|
+
|
86
|
+
* 某ログ収集製品でGroongaが利用されている
|
87
|
+
* データのやり取りにApache Arrowフォーマットを使うことで高速化できた
|
88
|
+
|
89
|
+
# まとめ
|
90
|
+
|
91
|
+
* GroongaはApache Arrowフォーマットで入出力できるよ!
|
92
|
+
* Groongaは全文検索も集計もできるよ!
|
93
|
+
|
94
|
+
# 宣伝
|
95
|
+
|
96
|
+
何かとApache Parquet形式でデータを保存する時代なので、Groongaでデータ分析の前処理とかしてみるといいよ!
|
97
|
+
|
98
|
+
# 参考
|
99
|
+
|
100
|
+
* Groonga
|
101
|
+
* https://groonga.org/
|
102
|
+
* Groonga delta
|
103
|
+
* https://github.com/groonga/groonga-delta
|
104
|
+
* https://www.clear-code.com/blog/2022/5/20/groonga-delta.html
|
metadata
ADDED
@@ -0,0 +1,62 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: rabbit-slide-abetomo-apache-arrow-meetup-in-tokyo-2025-spring
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Abe Tomoaki
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2025-04-11 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: rabbit
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 2.0.2
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 2.0.2
|
27
|
+
description: https://red-data-tools.connpass.com/event/349680/
|
28
|
+
email:
|
29
|
+
- abe@clear-code.com
|
30
|
+
executables: []
|
31
|
+
extensions: []
|
32
|
+
extra_rdoc_files: []
|
33
|
+
files:
|
34
|
+
- ".rabbit"
|
35
|
+
- README.md
|
36
|
+
- Rakefile
|
37
|
+
- config.yaml
|
38
|
+
- groonga-and-apache-arrow.md
|
39
|
+
- pdf/apache-arrow-meetup-in-tokyo-2025-spring-groonga-and-apache-arrow.pdf
|
40
|
+
homepage: https://slide.rabbit-shocker.org/authors/abetomo/apache-arrow-meetup-in-tokyo-2025-spring/
|
41
|
+
licenses: []
|
42
|
+
metadata: {}
|
43
|
+
post_install_message:
|
44
|
+
rdoc_options: []
|
45
|
+
require_paths:
|
46
|
+
- lib
|
47
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
48
|
+
requirements:
|
49
|
+
- - ">="
|
50
|
+
- !ruby/object:Gem::Version
|
51
|
+
version: '0'
|
52
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
53
|
+
requirements:
|
54
|
+
- - ">="
|
55
|
+
- !ruby/object:Gem::Version
|
56
|
+
version: '0'
|
57
|
+
requirements: []
|
58
|
+
rubygems_version: 3.3.5
|
59
|
+
signing_key:
|
60
|
+
specification_version: 4
|
61
|
+
summary: Groonga and Apache Arrow
|
62
|
+
test_files: []
|