rabbit-slide-kou-jpmug-db-study-1 2018.1.30.0

Sign up to get free protection for your applications and to get access to all the features.
data/memo.md ADDED
@@ -0,0 +1,304 @@
1
+ # メモ
2
+
3
+ ## ベンチマーク
4
+
5
+ ### データ
6
+
7
+ シカゴの犯罪データ。651万レコード。
8
+
9
+ https://catalog.data.gov/dataset/crimes-2001-to-present-398a4
10
+
11
+ ### バージョン
12
+
13
+ MariaDBは10.3.2。Mroongaは7.10。
14
+
15
+ `innodb_buffer_pool_size`は512M。
16
+
17
+ ### インデックス作成時間
18
+
19
+ Mroongaのテーブルの`block`に全文検索インデックスを作成。7秒。
20
+
21
+ ```text
22
+ MariaDB> create fulltext index block_index on crimes (block);
23
+ Query OK, 0 rows affected (6.989 sec)
24
+ Records: 0 Duplicates: 0 Warnings: 0
25
+ ```
26
+
27
+ Mroongaのテーブルの`description`に全文検索インデックスを作成。6秒。
28
+
29
+ ```text
30
+ MariaDB> create fulltext index description_index on crimes (description);
31
+ Query OK, 0 rows affected (5.873 sec)
32
+ Records: 0 Duplicates: 0 Warnings: 0
33
+ ```
34
+
35
+ InnoDBのテーブルの`block`に全文検索インデックスを作成。35秒。
36
+
37
+ ```text
38
+ MariaDB> create fulltext index block_index on crimes2 (block);
39
+ Query OK, 0 rows affected (35.382 sec)
40
+ Records: 0 Duplicates: 0 Warnings: 0
41
+ ```
42
+
43
+ InnoDBのテーブルの`description`に全文検索インデックスを作成。1分50秒。
44
+
45
+ ```text
46
+ MariaDB> create fulltext index description_index on crimes2 (description);
47
+ Query OK, 0 rows affected, 1 warning (1 min 49.443 sec)
48
+ Records: 0 Duplicates: 0 Warnings: 1
49
+ ```
50
+
51
+ ### 全文検索だけ
52
+
53
+ 半分くらい(300万件くらい)ヒットするケース。Mroongaは1秒。
54
+
55
+ ```text
56
+ MariaDB > select count(*) from crimes where match(block) against('+ave' in boolean mode);
57
+ +----------+
58
+ | count(*) |
59
+ +----------+
60
+ | 3126521 |
61
+ +----------+
62
+ 1 row in set (1.066 sec)
63
+ ```
64
+
65
+ 半分くらい(300万件くらい)ヒットするケース。Groongaは0.3秒。
66
+
67
+ ```text
68
+ MariaDB > select mroonga_command('select --table crimes --query "block:@ave" --limit 0 --output_columns _key') as response;
69
+ +----------------------------------+
70
+ | response |
71
+ +----------------------------------+
72
+ | [[[3126521],[["_key","Int32"]]]] |
73
+ +----------------------------------+
74
+ 1 row in set (0.305 sec)
75
+ ```
76
+
77
+ 半分くらい(300万件くらい)ヒットするケース。InnoDBは18秒。
78
+
79
+ ```text
80
+ MariaDB> select count(*) from crimes2 where match(block) against('+ave' in boolean mode);
81
+ +----------+
82
+ | count(*) |
83
+ +----------+
84
+ | 3126521 |
85
+ +----------+
86
+ 1 row in set (17.563 sec)
87
+ ```
88
+
89
+ 4万件くらいヒットするケース。Mroongaは0.02秒。
90
+
91
+ ```text
92
+ MariaDB> select count(*) from crimes where match(block) against('+milwaukee' in boolean mode);
93
+ +----------+
94
+ | count(*) |
95
+ +----------+
96
+ | 38781 |
97
+ +----------+
98
+ 1 row in set (0.022 sec)
99
+ ```
100
+
101
+ 4万件くらいヒットするケース。Groongaは0.01秒。
102
+
103
+ ```text
104
+ MariaDB> select mroonga_command('select --table crimes --query "block:@milwaukee" --limit 0 --output_columns _key') as response;
105
+ +--------------------------------+
106
+ | response |
107
+ +--------------------------------+
108
+ | [[[38781],[["_key","Int32"]]]] |
109
+ +--------------------------------+
110
+ 1 row in set (0.010 sec)
111
+ ```
112
+
113
+ 4万件ヒットするケース。InnoDBは0.2秒。
114
+
115
+ ```text
116
+ MariaDB> select count(*) from crimes2 where match(block) against('+milwaukee' in boolean mode);
117
+ +----------+
118
+ | count(*) |
119
+ +----------+
120
+ | 38781 |
121
+ +----------+
122
+ 1 row in set (0.189 sec)
123
+ ```
124
+
125
+ ### 通常の検索だけ
126
+
127
+ 数値の等価条件1つで、26万件ヒットするケース。Mroongaの通常バージョンは1.3秒。
128
+
129
+ ```text
130
+ MariaDB> select count(*) from crimes where year = 2017;
131
+ +----------+
132
+ | count(*) |
133
+ +----------+
134
+ | 265156 |
135
+ +----------+
136
+ 1 row in set (1.285 sec)
137
+ ```
138
+
139
+ 数値の等価条件1つで、26万件ヒットするケース。Mroongaの最適化ONバージョンは0.4秒。
140
+
141
+ ```text
142
+ MariaDB> set mroonga_condition_push_down_type = all;
143
+ Query OK, 0 rows affected (0.000 sec)
144
+
145
+ MariaDB> select count(*) from crimes where year = 2017;
146
+ +----------+
147
+ | count(*) |
148
+ +----------+
149
+ | 265156 |
150
+ +----------+
151
+ 1 row in set (0.395 sec)
152
+ ```
153
+
154
+ 数値の等価条件1つで、26万件ヒットするケース。Groongaは0.4秒。
155
+
156
+ ```text
157
+ MariaDB> select mroonga_command('select --table crimes --filter "year == 2017" --limit 0 --output_columns _key') as response;
158
+ +---------------------------------+
159
+ | response |
160
+ +---------------------------------+
161
+ | [[[265156],[["_key","Int32"]]]] |
162
+ +---------------------------------+
163
+ 1 row in set (0.361 sec)
164
+ ```
165
+
166
+ 数値の等価条件1つで、26万件ヒットするケース。InnoDBは1.3秒。
167
+
168
+ ```text
169
+ MariaDB> select count(*) from crimes2 where year = 2017;
170
+ +----------+
171
+ | count(*) |
172
+ +----------+
173
+ | 265156 |
174
+ +----------+
175
+ 1 row in set (1.304 sec)
176
+ ```
177
+
178
+ 数値の等価条件1つと真偽値の等価条件2つで、7000件ヒットするケース。Mroongaの通常バージョンは2.3秒。
179
+
180
+ ```text
181
+ MariaDB> set mroonga_condition_push_down_type = default;
182
+ Query OK, 0 rows affected (0.000 sec)
183
+
184
+ MariaDB> select count(*) from crimes where year = 2017 and domestic = true and arrest = true;
185
+ +----------+
186
+ | count(*) |
187
+ +----------+
188
+ | 7148 |
189
+ +----------+
190
+ 1 row in set (2.262 sec)
191
+ ```
192
+
193
+ 数値の等価条件1つと真偽値の等価条件2つで、7000件ヒットするケース。Mroongaの最適化ONバージョンは0.4秒。
194
+
195
+ ```text
196
+ MariaDB> set mroonga_condition_push_down_type = all;
197
+ Query OK, 0 rows affected (0.000 sec)
198
+
199
+ MariaDB> select count(*) from crimes where year = 2017 and domestic = true and arrest = true;
200
+ +----------+
201
+ | count(*) |
202
+ +----------+
203
+ | 7148 |
204
+ +----------+
205
+ 1 row in set (0.365 sec)
206
+ ```
207
+
208
+ 数値の等価条件1つと真偽値の等価条件2つで、7000件ヒットするケース。Groongaは0.4秒。
209
+
210
+ ```text
211
+ MariaDB> select mroonga_command('select --table crimes --filter "year == 2017 && domestic == true && arrest == true" --limit 0 --output_columns _key') as response;
212
+ +-------------------------------+
213
+ | response |
214
+ +-------------------------------+
215
+ | [[[7148],[["_key","Int32"]]]] |
216
+ +-------------------------------+
217
+ 1 row in set (0.365 sec)
218
+ ```
219
+
220
+ 数値の等価条件1つと真偽値の等価条件2つで、7000件ヒットするケース。InnoDBは1.6秒。
221
+
222
+ ```text
223
+ MariaDB> select count(*) from crimes2 where year = 2017 and domestic = true and arrest = true;
224
+ +----------+
225
+ | count(*) |
226
+ +----------+
227
+ | 7148 |
228
+ +----------+
229
+ 1 row in set (1.582 sec)
230
+ ```
231
+
232
+ ### 全文検索と通常の検索
233
+
234
+ 数値の等価条件1つと真偽値の等価条件2つと全文検索(300万件くらいヒット)で、4000件ヒットするケース。Mroongaは0.4秒。(このときは常に最適化が効く。)
235
+
236
+ ```text
237
+ MariaDB> select count(*) from crimes where year = 2017 and domestic = true and arrest = true and match(block) against('+ave' in boolean mode);
238
+ +----------+
239
+ | count(*) |
240
+ +----------+
241
+ | 3982 |
242
+ +----------+
243
+ 1 row in set (0.440 sec)
244
+ ```
245
+
246
+ 数値の等価条件1つと真偽値の等価条件2つと全文検索(300万件くらいヒット)で、4000件ヒットするケース。Groongaは0.4秒。
247
+
248
+ ```text
249
+ MariaDB> select mroonga_command('select --table crimes --filter "block @ \'ave\' && year == 2017 && domestic == true && arrest == true" --limit 0 --output_columns _key') as response;
250
+ +-------------------------------+
251
+ | response |
252
+ +-------------------------------+
253
+ | [[[3982],[["_key","Int32"]]]] |
254
+ +-------------------------------+
255
+ 1 row in set (0.436 sec)
256
+ ```
257
+
258
+ 数値の等価条件1つと真偽値の等価条件2つと全文検索(300万件くらいヒット)で、4000件ヒットするケース。InnoDBは18秒。
259
+
260
+ ```text
261
+ MariaDB> select count(*) from crimes2 where year = 2017 and domestic = true and arrest = true and match(block) against('+ave' in boolean mode);
262
+ +----------+
263
+ | count(*) |
264
+ +----------+
265
+ | 3982 |
266
+ +----------+
267
+ 1 row in set (17.697 sec)
268
+ ```
269
+
270
+ 数値の等価条件1つと真偽値の等価条件2つと全文検索(4万件くらいヒット)で、10件ヒットするケース。Mroongaは0.01秒。(このときは常に最適化が効く。)
271
+
272
+ ```text
273
+ MariaDB> select count(*) from crimes where year = 2017 and domestic = true and arrest = true and match(block) against('+milwaukee' in boolean mode);
274
+ +----------+
275
+ | count(*) |
276
+ +----------+
277
+ | 10 |
278
+ +----------+
279
+ 1 row in set (0.010 sec)
280
+ ```
281
+
282
+ 数値の等価条件1つと真偽値の等価条件2つと全文検索(4万件くらいヒット)で、10件ヒットするケース。Groongaは0.01秒。
283
+
284
+ ```text
285
+ MariaDB> select mroonga_command('select --table crimes --filter "block @ \'milwaukee\' && year == 2017 && domestic == true && arrest == true" --limit 0 --output_columns _key') as response;
286
+ +-----------------------------+
287
+ | response |
288
+ +-----------------------------+
289
+ | [[[10],[["_key","Int32"]]]] |
290
+ +-----------------------------+
291
+ 1 row in set (0.010 sec)
292
+ ```
293
+
294
+ 数値の等価条件1つと真偽値の等価条件2つと全文検索(4万件くらいヒット)で、10件ヒットするケース。InnoDBは0.2秒。
295
+
296
+ ```text
297
+ MariaDB> select count(*) from crimes2 where year = 2017 and domestic = true and arrest = true and match(block) against('+milwaukee' in boolean mode);
298
+ +----------+
299
+ | count(*) |
300
+ +----------+
301
+ | 10 |
302
+ +----------+
303
+ 1 row in set (0.197 sec)
304
+ ```
data/theme.rb ADDED
@@ -0,0 +1,5 @@
1
+ @title_slide_title_font_size = @large_font_size
2
+
3
+ @groonga_product = "mroonga"
4
+
5
+ include_theme("groonga")
metadata ADDED
@@ -0,0 +1,90 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rabbit-slide-kou-jpmug-db-study-1
3
+ version: !ruby/object:Gem::Version
4
+ version: 2018.1.30.0
5
+ platform: ruby
6
+ authors:
7
+ - 須藤功平
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2018-01-30 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rabbit
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: 2.0.2
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: 2.0.2
27
+ - !ruby/object:Gem::Dependency
28
+ name: rabbit-theme-groonga
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ description: MariaDBには超高速な全文検索を実現するMroongaストレージエンジンがバンドルされていることを知っていますか?Mroongaを使うと日本語だけでなくアジア圏の言語も含むすべての言語をサポートした超高速な全文検索システムを簡単に作ることができます。どれだけ簡単に作ることができるか紹介します。
42
+ email:
43
+ - kou@clear-code.com
44
+ executables: []
45
+ extensions: []
46
+ extra_rdoc_files: []
47
+ files:
48
+ - ".rabbit"
49
+ - README.rd
50
+ - Rakefile
51
+ - config.yaml
52
+ - images/chupa-text-web-ui-extract-metadata.png
53
+ - images/chupa-text-web-ui-extract-text-and-screenshot.png
54
+ - images/chupa-text-web-ui-form.png
55
+ - images/column-store.svg
56
+ - images/count-star.svg
57
+ - images/not-access-to-needless-columns.svg
58
+ - images/per-column-processing.svg
59
+ - images/php-document-search-search.png
60
+ - images/php-document-search.png
61
+ - mariadb-mroonga-all-languages-supported-super-fast-full-text-search-system.rab
62
+ - memo.md
63
+ - pdf/jpmug-db-study-1-mariadb-mroonga-all-languages-supported-super-fast-full-text-search-system.pdf
64
+ - theme.rb
65
+ homepage: http://slide.rabbit-shocker.org/authors/kou/jpmug-db-study-1/
66
+ licenses:
67
+ - CC-BY-SA-4.0
68
+ - CC-BY-3.0
69
+ metadata: {}
70
+ post_install_message:
71
+ rdoc_options: []
72
+ require_paths:
73
+ - lib
74
+ required_ruby_version: !ruby/object:Gem::Requirement
75
+ requirements:
76
+ - - ">="
77
+ - !ruby/object:Gem::Version
78
+ version: '0'
79
+ required_rubygems_version: !ruby/object:Gem::Requirement
80
+ requirements:
81
+ - - ">="
82
+ - !ruby/object:Gem::Version
83
+ version: '0'
84
+ requirements: []
85
+ rubyforge_project:
86
+ rubygems_version: 2.5.2.2
87
+ signing_key:
88
+ specification_version: 4
89
+ summary: MariaDBとMroongaで作る全言語対応超高速全文検索システム
90
+ test_files: []