rabbit-slide-kou-mariadb-community-event-2016-07-21 2016.7.21.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,396 @@
1
+ = Mroonga\n2016
2
+
3
+ : subtitle
4
+ 高速日本語全文検索 for MariaDB\n
5
+ (('note:Super fast full text search for MariaDB'))
6
+ : author
7
+ Kouhei Sutou
8
+ : institution
9
+ ClearCode Inc.
10
+ : content-source
11
+ MariaDB Community Event in Tokyo
12
+ : date
13
+ 2016-07-21
14
+ : allotted-time
15
+ 30m
16
+ : theme
17
+ .
18
+
19
+ = Mroonga
20
+
21
+ * 読み方:むるんが\n
22
+ (('note:Pronunciation: múlúnɡά'))
23
+ * ストレージエンジン\n
24
+ (('note:Storage engine'))
25
+ * (('wait'))MariaDBバンドル\n
26
+ (('note:Bundled in MariaDB'))
27
+ * 別途インストールしなくてもよい\n
28
+ (('note:No need to install Mroonga separately'))
29
+
30
+ = 特徴\n(('note:Characteristics'))
31
+
32
+ * (('wait'))
33
+ 高速日本語全文検索(('note:(全言語OK)'))\n
34
+ (('note:Super fast full text search for all languages'))
35
+ * (('wait'))
36
+ カラムストアによる高速処理\n
37
+ (('note:Super fast processing by column store architecture'))
38
+ * (('wait'))
39
+ 全文検索初心者でも使える\n
40
+ (('note:Easy to use by full text search beginners'))
41
+ * (('wait'))
42
+ 全文検索上級者は活用できる\n
43
+ (('note:Features for full text search specialists'))
44
+
45
+ = 高速日本語全文検索\n(('note:Super fast full text search'))
46
+
47
+ (1) ベンチマーク\n
48
+ (('note:Benchmark'))
49
+ (2) 速さの秘密\n
50
+ (('note:The reason why Mroonga is fast'))
51
+
52
+ = ベンチマーク環境\n(('note:Benchmark environment'))
53
+
54
+ * 対象:Wikipedia日本語版\n
55
+ (('note:Target: Japanese version Wikipedia'))
56
+ * レコード数:約185万件\n
57
+ (('note:The number of records: About 1.85 millions'))
58
+ * データサイズ:約7GB\n
59
+ (('note:Data size: About 7GB'))
60
+ * メモリー4GB・SSD250GB(('note:(ConoHa)'))\n
61
+ (('note:Memory: 4GB, SSD: 250GB'))
62
+
63
+ = 補足\n(('note:Supplement'))
64
+
65
+ * MySQL 5.7を使用\n
66
+ (('note:MySQL 5.7 is used'))
67
+ * MariaDBのInnoDBは日本語未対応\n
68
+ (('note:InnoDB in MariaDB doesn't support Japanese yet'))
69
+ * 他人のベンチマークは参考程度\n
70
+ (('note:Just refer benchmark result by others'))
71
+ * 検討時は実環境でベンチマークを!\n
72
+ (('note:Run benchmark with the real data on real env'))
73
+
74
+ (('note:詳細(Detail):'))\n
75
+ (('note:https://github.com/groonga/wikipedia-search/issues/4'))
76
+
77
+ = 検索1\n(('note:Search1'))
78
+
79
+ (('tag:center'))
80
+ キーワード:テレビアニメ\n
81
+ (('note:(ヒット数:約2万3千件)'))\n
82
+ (('note:Keyword: TV animation'))\n
83
+ (('note:(N hits: About 23K)'))
84
+
85
+ # RT
86
+ delimiter = [|]
87
+
88
+ InnoDB ngram | 3m2s
89
+ InnoDB MeCab | 6m20s
90
+ Mroonga:((*1*)) | 0.11s
91
+
92
+ = 検索2\n(('note:Search2'))
93
+
94
+ (('tag:center'))
95
+ キーワード:データベース\n
96
+ (('note:(ヒット数:約1万7千件)'))\n
97
+ (('note:Keyword: Database'))\n
98
+ (('note:(N hits: About 17K)'))
99
+
100
+ # RT
101
+ delimiter = [|]
102
+
103
+ InnoDB ngram | 36s
104
+ InnoDB MeCab:((*1*)) | 0.03s
105
+ Mroonga:((*2*)) | 0.09s
106
+
107
+ = 検索3\n(('note:Search3'))
108
+
109
+ (('tag:center'))
110
+ キーワード:PostgreSQL OR MySQL\n
111
+ (('note:(ヒット数:約400件)'))\n
112
+ (('note:Keyword: PostgreSQL OR MySQL'))\n
113
+ (('note:(N hits: About 400)'))
114
+
115
+ # RT
116
+ delimiter = [|]
117
+
118
+ InnoDB ngram | N/A(Error)
119
+ InnoDB MeCab:((*1*)) | 0.005s
120
+ Mroonga:((*2*)) | 0.028s
121
+
122
+ = 検索4\n(('note:Search4'))
123
+
124
+ (('tag:center'))
125
+ キーワード:日本\n
126
+ (('note:(ヒット数:約63万件)'))\n
127
+ (('note:Keyword: Japan'))\n
128
+ (('note:(N hits: About 630K)'))
129
+
130
+ # RT
131
+ delimiter = [|]
132
+
133
+ InnoDB ngram | 1.3s
134
+ InnoDB MeCab | 1.3s
135
+ Mroonga:((*1*)) | 0.21s
136
+
137
+ = 検索まとめ\n(('note:Wrap up search'))
138
+
139
+ * (('wait'))Mroonga:安定して速い\n
140
+ (('note:Always fast'))
141
+ * (('wait'))InnoDB FTS MeCab
142
+ * ハマれば速い\n
143
+ (('note:Fast only for one token query'))
144
+ * (('wait'))InnoDB FTS ngram
145
+ * 安定して遅い\n
146
+ (('note:Always slow'))
147
+
148
+ = 速さの秘密\n(('note:The reason why Mroonga is fast'))
149
+
150
+ * 最適化された転置索引実装\n
151
+ (('note:Optimized inverted index implementation'))
152
+ * (('wait'))2段階のデータ圧縮\n
153
+ (('note:2 level data compression'))
154
+ * (('wait'))高速なポスティングリスト探索\n
155
+ (('note:Fast posting list search'))
156
+ * (('wait'))検索だけでなく更新も速い\n
157
+ (('note:Not only search but also update is fast'))
158
+
159
+ (('wait'))
160
+ (('note:11年以上開発が続いている全文検索エンジンGroongaを使用'))\n
161
+ (('note:Groonga full text search engine (11 years old) is used'))
162
+
163
+ = もっと速さの秘密\n(('note:More reasons why Mroonga is fast'))
164
+
165
+ * カラムストアを活かした最適化\n
166
+ (('note:Optimizations based on column store architecture'))
167
+ * ポイント1:余計なI/Oを減らす\n
168
+ (('note:Point1: Reduce needless I/O'))
169
+ * ポイント2:I/Oを局所化\n
170
+ (('note:Point2: Localize I/O'))
171
+
172
+ = カラムストア\n(('note:Column store'))
173
+
174
+ # image
175
+ # src = images/column-store.svg
176
+ # relative_height = 100
177
+
178
+ = 必要なカラムのみアクセス\n(('note:Access to only needed columns'))
179
+
180
+ # coderay sql
181
+ -- Access to only a
182
+ SELECT a
183
+ FROM table
184
+ -- Access to only c
185
+ WHERE c = XXX;
186
+ -- b isn't accessed
187
+
188
+ = 減ったI/O\n(('note:Reduced I/O'))
189
+
190
+ # image
191
+ # src = images/not-access-to-needless-columns.svg
192
+ # relative_height = 100
193
+
194
+ = 行カウント\n(('note:Row count'))
195
+
196
+ # coderay sql
197
+ -- No column values are needed
198
+ SELECT COUNT(*)
199
+ FROM table
200
+ -- Access to only full text search index of c
201
+ WHERE MATCH(c)
202
+ AGAINST('+keyword' IN BOOLEAN MODE);
203
+ -- a, b and c aren't accessed
204
+
205
+ = 減ったI/O\n(('note:Reduced I/O'))
206
+
207
+ # image
208
+ # src = images/count-star.svg
209
+ # relative_height = 100
210
+
211
+ = (({ORDER BY LIMIT}))
212
+
213
+ # coderay sql
214
+ SELECT *
215
+ FROM table
216
+ WHERE MATCH(c)
217
+ AGAINST('+keyword' IN BOOLEAN MODE)
218
+ -- Mroonga processes ORDER BY LIMIT
219
+ -- instead of MariaDB
220
+ -- → Mroonga returns only 10 records
221
+ -- to MariaDB instead of all matched records
222
+ ORDER BY a LIMIT 10;
223
+
224
+ = Optimized (({ORDER BY LIMIT}))
225
+
226
+ * (('wait'))検索(('note:(Search)')) by Mroonga
227
+ * カラム毎の処理でI/Oを局所化\n
228
+ (('note:(索引非使用時)'))\n
229
+ (('note:Localize I/O by per column processing'))\n
230
+ (('note:(on no index case)'))
231
+ * (('wait'))ソート(('note:(Sort)')) by Mroonga
232
+ * カラム毎の処理でI/Oを局所化\n
233
+ (('note:Localize I/O by per column processing'))
234
+ * (('wait'))(({OFFSET}))/(({LIMIT})) by Mroonga
235
+
236
+ = カラム毎の処理は速い\n(('note:Per column processing is fast'))
237
+
238
+ # image
239
+ # src = images/per-column-processing.svg
240
+ # relative_height = 100
241
+
242
+ = 最適化のまとめ\n(('note:Wrap up optimization'))
243
+
244
+ * 転置索引実装が速い\n
245
+ (('note:Inverted index implementation is fast'))
246
+ * 検索も更新も速い\n
247
+ (('note:Both search and update are fast'))
248
+ * カラムストアで速い\n
249
+ (('note:Fast by column store architecture'))
250
+ * ポイント:I/O削減・I/O局所化\n
251
+ (('note:Points: Reduce and localize I/O'))
252
+
253
+ = 全文検索初心者でも使える\n(('note:Easy to use by beginners'))
254
+
255
+ * (('wait'))インストールが簡単\n
256
+ (('note:Easy to install'))
257
+ * (('wait'))MySQLの標準機能のみで使える\n
258
+ (('note:Usable only with MySQL standard features'))
259
+
260
+ = インストールが簡単\n(('note:Easy to install'))
261
+
262
+ * (('wait'))MariaDBバンドル\n
263
+ (('note:MariaDB bundles Mroonga'))
264
+ * (('wait'))Apt/Yumリポジトリー\n
265
+ (('note:Apt/Yum repositories'))
266
+ * (('wait'))MariaDB込みのWindowsバイナリ\n
267
+ (('note:Windows binary with MariaDB'))
268
+
269
+ = 標準機能のみで使える\n(('note:Require only MySQL standard features'))
270
+
271
+ # coderay sql
272
+ -- Create
273
+ CREATE TABLE table (
274
+ -- ...,
275
+ FULLTEXT INDEX (column)
276
+ ) ENGINE=Mroonga;
277
+
278
+ = 標準機能のみで使える\n(('note:Require only MySQL standard features'))
279
+
280
+ # coderay sql
281
+ -- Convert
282
+ ALTER TABLE table
283
+ ADD FULLTEXT INDEX (column)
284
+ ENGINE=Mroonga;
285
+
286
+ = 標準機能のみで使える\n(('note:Require only MySQL standard features'))
287
+
288
+ # coderay sql
289
+ SELECT * FROM table
290
+ WHERE
291
+ MATCH(column)
292
+ AGAINST('+keyword'
293
+ IN BOOLEAN MODE);
294
+
295
+ = 全文検索上級者向け機能\n(('note:Features for specialists'))
296
+
297
+ * (('wait'))
298
+ カスタマイズ\n
299
+ (('note:Customizable'))
300
+ * デフォルト値はいい感じ\n
301
+ →初心者はカスタマイズなしでよい\n
302
+ (('note:Suitable default values'))\n
303
+ (('note:→Beginners don't need to customize'))
304
+ * (('wait'))
305
+ Groongaの機能をもっと使える\n
306
+ (('note:(高速・高機能)'))\n
307
+ (('note:Specialists can use more Groonga features'))\n
308
+ (('note:(Fast and high functionality)'))
309
+
310
+ = 文字正規化ルール変更\n(('note:Change normalizer'))
311
+
312
+ # coderay sql
313
+ CREATE TABLE table (
314
+ -- ...,
315
+ FULLTEXT INDEX (column)
316
+ --
317
+ -- Specify a parameter as comment
318
+ COMMENT='normalizer "NormalizerAuto"'
319
+ ) ENGINE=Mroonga;
320
+
321
+ = 文字正規化ルール変更\n(('note:Change normalizer'))
322
+
323
+ # coderay sql
324
+ CREATE TABLE table (
325
+ -- ...,
326
+ FULLTEXT INDEX (column)
327
+ -- MariaDB:
328
+ -- Custom parameter can be used
329
+ NORMALIZER='NormalizerAuto'
330
+ ) ENGINE=Mroonga;
331
+
332
+ = Groongaの検索機能を使う\n(('note:Use full Groonga search features'))
333
+
334
+ # coderay sql
335
+ SELECT * FROM table
336
+ WHERE
337
+ -- "c1" is meaningless with "*SS" pragma
338
+ MATCH(c1)
339
+ -- "*SS" is a pragma to use
340
+ -- full Groonga search features
341
+ -- Multiple indexes can be used in A query
342
+ AGAINST('*SS c1 @ "keyword" && c2 < 100'
343
+ IN BOOLEAN MODE);
344
+
345
+ = 今後\n(('note:Futures'))
346
+
347
+ * (('wait'))
348
+ 最新機能サポート\n
349
+ (('note:Support the latest features'))
350
+ * JSONを全文検索\n
351
+ (('note:(JSON型のデータの読み書きは対応済み)'))\n
352
+ (('note:Full text search against JSON'))\n
353
+ (('note:(Storing/fetching JSON are already supported)'))
354
+ * virtual column/generated column
355
+ * (('wait'))
356
+ 最新版をMariaDBにバンドル\n
357
+ (('note:Bundle the latest Mroonga to MariaDB'))
358
+
359
+ = 最新版をバンドル\n(('note:Bundle the latest Mroonga'))
360
+
361
+ * (('wait'))
362
+ Mroongaは毎月リリース\n
363
+ (('note:Mroonga is released monthly'))
364
+ * (('wait'))
365
+ MariaDB 10.2.1 bundles Mroonga ((*5.04*))
366
+ * The latest Mroonga is 6.06
367
+ * Mroonga supports MariaDB 10.2 since ((*6.03*))
368
+ * How can we improve this?
369
+
370
+ = まとめ1\n(('note:Wrap up1'))
371
+
372
+ * (('wait'))
373
+ 高速日本語全文検索(('note:(全言語OK)'))\n
374
+ (('note:Super fast full text search for all languages'))
375
+ * (('wait'))
376
+ カラムストアによる高速処理\n
377
+ (('note:Super fast processing by column store architecture'))
378
+ * (('wait'))
379
+ 全文検索初心者でも使える\n
380
+ (('note:Easy to use by full text search beginners'))
381
+ * (('wait'))
382
+ 全文検索上級者は活用できる\n
383
+ (('note:Features for full text search specialists'))
384
+
385
+ = まとめ2\n(('note:Wrap up2'))
386
+
387
+ * (('wait'))
388
+ 今後もMroongaは便利になる\n
389
+ (('note:We continue to improve Mroonga'))
390
+ * (('wait'))
391
+ MariaDBで最新Mroongaを使える\n
392
+ (('note:MariaDB will bundle the latest Mroonga'))
393
+
394
+ (('wait'))
395
+ MariaDBで全文検索ならMroonga!\n
396
+ (('note:Mroonga is the best for full text search on MariaDB!'))
@@ -0,0 +1,3 @@
1
+ @groonga_product = "mroonga"
2
+
3
+ include_theme("groonga")
metadata ADDED
@@ -0,0 +1,84 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rabbit-slide-kou-mariadb-community-event-2016-07-21
3
+ version: !ruby/object:Gem::Version
4
+ version: 2016.7.21.0
5
+ platform: ruby
6
+ authors:
7
+ - Kouhei Sutou
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2016-07-20 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rabbit
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: 2.0.2
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: 2.0.2
27
+ - !ruby/object:Gem::Dependency
28
+ name: rabbit-theme-groonga
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ description: 2016年7月時点でのMroongaの最新情報を紹介します。MariaDBコミュニティイベント in Tokyoでの紹介なのでMariaDBに関連する情報を多めに紹介します。
42
+ email:
43
+ - kou@clear-code.com
44
+ executables: []
45
+ extensions: []
46
+ extra_rdoc_files: []
47
+ files:
48
+ - ".rabbit"
49
+ - README.rd
50
+ - Rakefile
51
+ - config.yaml
52
+ - images/column-store.svg
53
+ - images/count-star.svg
54
+ - images/not-access-to-needless-columns.svg
55
+ - images/per-column-processing.svg
56
+ - mroonga.rab
57
+ - pdf/mariadb-community-event-2016-07-21-mroonga.pdf
58
+ - theme.rb
59
+ homepage: http://slide.rabbit-shocker.org/authors/kou/mariadb-community-event-2016-07-21/
60
+ licenses:
61
+ - CC BY-SA 4.0
62
+ - CC BY 3.0
63
+ metadata: {}
64
+ post_install_message:
65
+ rdoc_options: []
66
+ require_paths:
67
+ - lib
68
+ required_ruby_version: !ruby/object:Gem::Requirement
69
+ requirements:
70
+ - - ">="
71
+ - !ruby/object:Gem::Version
72
+ version: '0'
73
+ required_rubygems_version: !ruby/object:Gem::Requirement
74
+ requirements:
75
+ - - ">="
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ requirements: []
79
+ rubyforge_project:
80
+ rubygems_version: 2.5.1
81
+ signing_key:
82
+ specification_version: 4
83
+ summary: Mroonga最新情報2016
84
+ test_files: []