rabbit-slide-Piro-fast-fulltext-search-in-ruby-groonga 2015.02.12.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 2665b7d270ea483a4c254cea718d706fff2283fd
4
+ data.tar.gz: d4fce5102f1d170c9de0d04dd59491b6ecf5c2bc
5
+ SHA512:
6
+ metadata.gz: 0f8b2e5d26ed91ea754ed7f7f9184ee52985fc7833a0f74c5c12c694997c922a8e52d91ce13cc44a2cf06d10191c65b74032c6f84b4947c035e842a6a7eb10e1
7
+ data.tar.gz: 80c7afa54fd5af9ce2838f1c1873318498b65741793b96055039fa1d5d84449bf84dbde341d12d7e2f8d7cadaad3747935ec1dfd6b08f6da7a167d948cf463f8
data/.rabbit ADDED
@@ -0,0 +1 @@
1
+ fast-fulltext-search-in-ruby-groonga-rroonga-droonga.md
@@ -0,0 +1,24 @@
1
+ # Javaいらず!Rubyで高速全文検索 -Groonga, Rroonga, Droonga-
2
+
3
+ [Tokyo Rubyist Meetup](http://trbmeetup.doorkeeper.jp/events/19450)用の発表資料です。
4
+
5
+ ## 作者向け
6
+
7
+ ### 表示
8
+
9
+ rake
10
+
11
+ ### 公開
12
+
13
+ rake publish
14
+
15
+ ## 閲覧者向け
16
+
17
+ ### インストール
18
+
19
+ gem install rabbit-slide-Piro-fast-fulltext-search-in-ruby-groonga-rroonga-droonga
20
+
21
+ ### 表示
22
+
23
+ rabbit rabbit-slide-Piro-fast-fulltext-search-in-ruby-groonga-rroonga-droonga.gem
24
+
@@ -0,0 +1,24 @@
1
+ # Fast fulltext search in Ruby, without Java -Groonga, Rroonga and Droonga-
2
+
3
+ This is slides for a presentation at [Tokyo Rubyist Meetup](http://trbmeetup.doorkeeper.jp/events/19450).
4
+
5
+ ## For author
6
+
7
+ ### To show this slides
8
+
9
+ rake
10
+
11
+ ### To publish this slides
12
+
13
+ rake publish
14
+
15
+ ## For others
16
+
17
+ ### To install
18
+
19
+ gem install rabbit-slide-Piro-fast-fulltext-search-in-ruby-groonga-rroonga-droonga
20
+
21
+ ### To show
22
+
23
+ rabbit rabbit-slide-Piro-fast-fulltext-search-in-ruby-groonga-rroonga-droonga.gem
24
+
@@ -0,0 +1,17 @@
1
+ require "rabbit/task/slide"
2
+
3
+ # Edit ./config.yaml to customize meta data
4
+
5
+ spec = nil
6
+ Rabbit::Task::Slide.new do |task|
7
+ spec = task.spec
8
+ # task.spec.files += Dir.glob("doc/**/*.*")
9
+ # task.spec.files -= Dir.glob("private/**/*.*")
10
+ task.spec.add_runtime_dependency("rabbit-theme-groonga")
11
+ end
12
+
13
+ desc "Tag #{spec.version}"
14
+ task :tag do
15
+ sh("git", "tag", "-a", spec.version.to_s, "-m", "Publish #{spec.version}")
16
+ sh("git", "push", "--tags")
17
+ end
@@ -0,0 +1,21 @@
1
+ ---
2
+ id: fast-fulltext-search-in-ruby-groonga
3
+ base_name: fast-fulltext-search-in-ruby-groonga
4
+ tags:
5
+ - droonga
6
+ - groonga
7
+ presentation_date: 2015/02/12
8
+ version: 2015.02.12.0
9
+ licenses: []
10
+ slideshare_id: fastfulltextsearchinrubygroonga
11
+ speaker_deck_id:
12
+ ustream_id:
13
+ vimeo_id:
14
+ youtube_id:
15
+ author:
16
+ markup_language: :markdown
17
+ name: 結城洋志
18
+ email: yuki@clear-code.com
19
+ rubygems_user: Piro
20
+ slideshare_user: pirooutsiderreflex
21
+ speaker_deck_user:
@@ -0,0 +1,481 @@
1
+ # trbmeetup
2
+
3
+ subtitle
4
+ : Fast fulltext search in Ruby, without Java
5
+ -Groonga, Rroonga and Droonga-
6
+
7
+ author
8
+ : YUKI Hiroshi
9
+
10
+ institution
11
+ : ClearCode Inc.
12
+
13
+ allotted_time
14
+ : 30m
15
+
16
+ theme
17
+ : groonga
18
+
19
+
20
+
21
+ # Abstract
22
+
23
+ * Fulltext search?
24
+ * Groonga and Rroonga
25
+ - easy fulltext search in Ruby
26
+ * Droonga
27
+ - scalable fulltext search
28
+
29
+ # Introduction
30
+
31
+ What's
32
+ *fulltext search*?
33
+
34
+ # Searching without index
35
+
36
+ ex. `Array#grep`
37
+ ex. `LIKE` operator in SQL
38
+
39
+ ~~~
40
+ SELECT name,location
41
+ FROM Store
42
+ WHERE name LIKE '%Tokyo%';
43
+ ~~~
44
+ {: lang="sql"}
45
+
46
+ * easy, simple, but *slow*
47
+
48
+ # Fulltext search w/ index
49
+
50
+ ![](images/latency.png){:relative_width="60" align="right" relative_margin_right="-10"}
51
+
52
+ * Fast!!
53
+
54
+ # Demonstration
55
+
56
+ Methods
57
+ : * `Array#grep` (not indexed)
58
+ * `GrnMini::Array#select` (indexed)
59
+
60
+ Data
61
+ : * Wikipedia(ja) pages
62
+
63
+ # Demonstration: Result
64
+
65
+ ![](images/demo-latency.png){:relative_height="90"}
66
+
67
+ # Off topic: why fast?
68
+
69
+ ![](images/inversed-index-add-1.png){:relative_width="80"}
70
+
71
+ # Off topic: why fast?
72
+
73
+ ![](images/inversed-index-add-2.png){:relative_width="80"}
74
+
75
+ # Off topic: why fast?
76
+
77
+ ![](images/inversed-index-add-3.png){:relative_width="80"}
78
+
79
+ # Off topic: why fast?
80
+
81
+ ![](images/inversed-index-search-1.png){:relative_width="80"}
82
+
83
+ # Off topic: why fast?
84
+
85
+ ![](images/inversed-index-search-2.png){:relative_width="80"}
86
+
87
+ # Off topic: why fast?
88
+
89
+ ![](images/inversed-index-search-3.png){:relative_width="80"}
90
+
91
+ # Off topic: why fast?
92
+
93
+ ![](images/tokenize-bigram.png){:relative_height="90"}
94
+
95
+ # Off topic: why fast?
96
+
97
+ ![](images/tokenize-mecab.png){:relative_height="90"}
98
+
99
+ # How introduce?
100
+
101
+ Major ways
102
+
103
+ * Sunspot
104
+ * elasticsearch-ruby
105
+
106
+ # Sunspot?
107
+
108
+ A client library of
109
+ *Solr*
110
+ for Ruby and Rails
111
+ (ActiveRecord)
112
+
113
+ # Sunspot: Usage
114
+
115
+ ~~~
116
+ class Post < ActiveRecord::Base
117
+ searchable do
118
+ # ...
119
+ end
120
+ end
121
+
122
+ result = Post.search do
123
+ fulltext 'best pizza'
124
+ # ...
125
+ end
126
+ ~~~
127
+ {: lang="ruby"}
128
+
129
+ # elasticsearch-ruby?
130
+
131
+ A client library of *Elasticsearch* for Ruby
132
+
133
+ ~~~
134
+ client = Elasticsearch::Client.new(log: true)
135
+ client.transport.reload_connections!
136
+ client.cluster.health
137
+ client.search(q: "test")
138
+ ~~~
139
+ {: lang="ruby"}
140
+
141
+ # Relations of services
142
+
143
+ ![](images/application-with-index.png){:relative_width="80"}
144
+
145
+ # But...
146
+
147
+ * [Apache Solr](http://lucene.apache.org/solr/): "built on Apache Lucene™."
148
+ * [Elasticsearch](http://www.elasticsearch.org/overview/elasticsearch/): "Build on top of Apache Lucene™"
149
+ * [Apache Lucene](http://lucene.apache.org/): "written entirely *in Java*."
150
+
151
+ # Java!!
152
+
153
+ ![](images/application-with-index-java.png){:relative_width="80"}
154
+
155
+ # In short
156
+
157
+ * They require *Java*.
158
+ * My Ruby product have to be combined with *Java*, just for fulltext search.
159
+
160
+ # Alternative choice
161
+
162
+ *Groonga*
163
+ and
164
+ *Rroonga*
165
+
166
+ # Groonga
167
+
168
+ * Fast fulltext search engine written in *C*
169
+ * Originally designed to search increasing huge numbers of comments in "2ch" (like Twitter)
170
+
171
+ # Groonga
172
+
173
+ * Realtime indexing
174
+ * Read/write lock-free
175
+ * Parallel updating and searching, without penalty
176
+ * Returns latest result ASAP
177
+ * No transaction
178
+ * No warranty for data consistency
179
+
180
+ # Relations of services
181
+
182
+ ![](images/application-with-groonga.png){:relative_width="80"}
183
+
184
+ # Groonga's interfaces
185
+
186
+ via command line interface
187
+
188
+ ~~~
189
+ $ groonga="groonga /path/to/database/db"
190
+ $ $groonga table_create --name Entries
191
+ --flags TABLE_PAT_KEY --key_type ShortText
192
+ $ $groonga select --table Entries
193
+ --query "title:@Ruby"
194
+ ~~~
195
+
196
+ # Groonga's interfaces
197
+
198
+ via HTTP
199
+
200
+ ~~~
201
+ $ groonga -d --protocol http --port 10041
202
+ /path/to/database/db
203
+
204
+ $ endpoint="http://groonga:10041"
205
+ $ curl "${endpoint}/d/table_create?name=Entries&
206
+ flags=TABLE_PAT_KEY&key_type=ShortText"
207
+ $ curl "${endpoint}/d/select?table=Entries&
208
+ query=title:@Ruby"
209
+ ~~~
210
+
211
+ # Groonga's interfaces
212
+
213
+ Narrowly-defined "Groonga"
214
+ : * CLI or server
215
+
216
+ libgroonga
217
+ : * In-process library
218
+ * Like as "better SQLite"
219
+
220
+ # Groonga
221
+
222
+ ![](images/groonga.png){:relative_height="90"}
223
+
224
+ # *R*roonga
225
+
226
+ ![](images/groonga-rroonga.png){:relative_height="90"}
227
+
228
+ # *R*roonga
229
+
230
+ * Based on libgroonga
231
+ * Low-level binding of Groonga for *Ruby*
232
+
233
+ # Relations of services
234
+
235
+ ![](images/application-with-groonga-rroonga.png){:relative_width="80"}
236
+
237
+ # Usage: Install
238
+
239
+ ~~~
240
+ % sudo gem install rroonga
241
+ ~~~
242
+
243
+ Groonga (libgroonga) is also installed as a part of the package.
244
+
245
+ # Usage: Prepare
246
+
247
+ ~~~
248
+ require "groonga"
249
+
250
+ Groonga::Database.create(path: "/tmp/bookmark.db")
251
+ # Or
252
+ Groonga::Database.open("/tmp/bookmark.db")
253
+ ~~~
254
+ {: lang="ruby"}
255
+
256
+ # Usage: Schema
257
+
258
+ ~~~
259
+ Groonga::Schema.define do |schema|
260
+ schema.create_table("Items",
261
+ type: :hash,
262
+ key_type: "ShortText") do |table|
263
+ table.text("title")
264
+ end
265
+ schema.create_table("Terms",
266
+ type: :patricia_trie,
267
+ normalizer: "NormalizerAuto",
268
+ default_tokenizer: "TokenBigram") do |table|
269
+ table.index("Items.title")
270
+ end
271
+ end
272
+ ~~~
273
+ {: lang="ruby"}
274
+
275
+ # Usage: Data loading
276
+
277
+ ~~~
278
+ items = Groonga["Items"]
279
+ items.add("http://en.wikipedia.org/wiki/Ruby",
280
+ title: "Wikipedia")
281
+ items.add("http://www.ruby-lang.org/",
282
+ title: "Ruby")
283
+ ~~~
284
+ {: lang="ruby"}
285
+
286
+ # Usage: Fulltext search
287
+
288
+ ~~~
289
+ items = Groonga["Items"]
290
+ ruby_items = items.select do |record|
291
+ record.title =~ "Ruby"
292
+ end
293
+ ~~~
294
+ {: lang="ruby"}
295
+
296
+ # FYI: GrnMini
297
+
298
+ * Lightweight wrapper
299
+ for Rroonga
300
+ * Limited features,
301
+ but easy to use
302
+
303
+ # FYI: GrnMini: Code
304
+
305
+ ~~~
306
+ require "grn_mini"
307
+
308
+ GrnMini::create_or_open("/tmp/bookmarks.db")
309
+
310
+ items = GrnMini::Array.new("Items")
311
+ items << { url: "http://en.wikipedia.org/wiki/Ruby",
312
+ title: "Ruby - Wikipedia" }
313
+ items << { url: "http://www.ruby-lang.org/",
314
+ title: "Ruby Language" }
315
+
316
+ ruby_items = items.select("title:@Ruby")
317
+ ~~~
318
+ {: lang="ruby"}
319
+
320
+ Good first step to try fulltext search in your Ruby product.
321
+
322
+ # For much more load...
323
+
324
+ Groonga
325
+ : works with *single process* on a computer
326
+
327
+ *D*roonga
328
+ : works with *multiple computers* constructiong a Droonga cluster
329
+
330
+ # Droonga
331
+
332
+ ![](images/droonga.png){:relative_width="80"}
333
+
334
+ # Droonga
335
+
336
+ * Scalable
337
+ (replication + partitioning)
338
+ * Groonga compatible
339
+ HTTP interface
340
+ * Client library for Ruby
341
+ (`droonga-client`)
342
+
343
+ # Droonga
344
+
345
+ ![](images/droonga-throughput.png){:relative_height="90"}
346
+
347
+ # Usage of Droonga
348
+
349
+ Setup a Droonga node
350
+
351
+ ~~~
352
+ # base="https://raw.githubusercontent.com/droonga"
353
+ # curl ${base}/droonga-engine/master/install.sh | \
354
+ bash
355
+ # curl ${base}/droonga-http-server/master/install.sh | \
356
+ bash
357
+ # droonga-engine-catalog-generate --hosts=node0,node1,node2
358
+ # service droonga-engine start
359
+ # service droonga-http-server start
360
+ ~~~
361
+
362
+ # Usage of Droonga
363
+
364
+ Fulltext search via HTTP
365
+ (compatible to Groonga)
366
+
367
+ ~~~
368
+ $ endpoint="http://node0:10041"
369
+ $ curl "${endpoint}/d/table_create?name=Store&
370
+ flags=TABLE_PAT_KEY&key_type=ShortText"
371
+ ~~~
372
+
373
+
374
+ # More chices
375
+
376
+ * *M*roonga
377
+ * Add-on for *MySQL/MariaDB*
378
+ (Bundled to MariaDB by default)
379
+ * *PG*roonga
380
+ * Add-on for *PostgreSQL*
381
+
382
+ # Relations of services
383
+
384
+ ![](images/mroonga-pgroonga.png){:relative_width="80"}
385
+
386
+
387
+ # SQL w/ fulltext search
388
+
389
+ Mroonga
390
+
391
+ ~~~
392
+ SELECT name,location
393
+ FROM Store
394
+ WHERE MATCH(name)
395
+ AGAINST('+東京' IN BOOLEAN MODE);
396
+ ~~~
397
+ {: lang="sql"}
398
+
399
+
400
+ # SQL w/ fulltext search
401
+
402
+ PGroonga
403
+
404
+ ~~~
405
+ SELECT name,location
406
+ FROM Store WHERE name %% '東京';
407
+
408
+ SELECT name,location
409
+ FROM Store WHERE name @@ '東京 OR 大阪';
410
+
411
+ SELECT name,location
412
+ FROM Store WHERE name LIKE '%東京%';
413
+ /* alias to "name @@ '東京'"*/
414
+ ~~~
415
+ {: lang="sql"}
416
+
417
+
418
+ # Conclusion
419
+
420
+ * *Rroonga* (and *GrnMini*) introduces fast fulltext search into your Ruby product instantly
421
+ * *Droonga* for increasing load
422
+ * *Mroonga* and *PGroonga*
423
+ for existing RDBMS
424
+
425
+ # References
426
+
427
+ Sunspot
428
+ : http://sunspot.github.io/
429
+
430
+ elasticsearch-ruby
431
+ : https://github.com/elasticsearch/elasticsearch-ruby
432
+
433
+ # References
434
+
435
+ Apache Lucene
436
+ : http://lucene.apache.org/
437
+
438
+ Apache Solr
439
+ : http://lucene.apache.org/solr/
440
+
441
+ Elasticsearch
442
+ : http://www.elasticsearch.org/overview/elasticsearch/
443
+
444
+ # References
445
+
446
+ Groonga
447
+ : http://groonga.org/
448
+
449
+ Rroonga
450
+ : http://ranguba.org/
451
+
452
+ GrnMini
453
+ : https://github.com/ongaeshi/grn_mini
454
+
455
+ # References
456
+
457
+ Droonga
458
+ : http://droonga.org/
459
+
460
+ Mroonga
461
+ : http://mroonga.org/
462
+
463
+ PGroonga
464
+ : http://pgroonga.github.io/
465
+
466
+ # References
467
+
468
+ Comparison of PostgreSQL, pg_bigm and PGroonga
469
+ : http://blog.createfield.com/entry/2015/02/03/094940
470
+
471
+ # Advertisement
472
+
473
+ ![](images/syskan.jpg){:relative_width="40" align="right" relative_margin_right="-10"}
474
+
475
+ * Serial comic
476
+ at Nikkei Linux
477
+ * 2015.2.18
478
+ Release
479
+ * ¥1728
480
+ (tax-inclusive)
481
+ * Paper/Kindle
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
metadata ADDED
@@ -0,0 +1,98 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rabbit-slide-Piro-fast-fulltext-search-in-ruby-groonga
3
+ version: !ruby/object:Gem::Version
4
+ version: 2015.02.12.0
5
+ platform: ruby
6
+ authors:
7
+ - 結城洋志
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-02-12 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rabbit
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '>='
18
+ - !ruby/object:Gem::Version
19
+ version: 2.0.2
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '>='
25
+ - !ruby/object:Gem::Version
26
+ version: 2.0.2
27
+ - !ruby/object:Gem::Dependency
28
+ name: rabbit-theme-groonga
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ description: '[Tokyo Rubyist Meetup](http://trbmeetup.doorkeeper.jp/events/19450)用の発表資料です。'
42
+ email:
43
+ - yuki@clear-code.com
44
+ executables: []
45
+ extensions: []
46
+ extra_rdoc_files: []
47
+ files:
48
+ - .rabbit
49
+ - README.ja.md
50
+ - README.md
51
+ - Rakefile
52
+ - config.yaml
53
+ - fast-fulltext-search-in-ruby-groonga-rroonga-droonga.md
54
+ - images/application-with-groonga-rroonga.png
55
+ - images/application-with-groonga.png
56
+ - images/application-with-index-java.png
57
+ - images/application-with-index.png
58
+ - images/demo-latency.png
59
+ - images/droonga-throughput.png
60
+ - images/droonga.png
61
+ - images/groonga-rroonga.png
62
+ - images/groonga.png
63
+ - images/inversed-index-add-1.png
64
+ - images/inversed-index-add-2.png
65
+ - images/inversed-index-add-3.png
66
+ - images/inversed-index-search-1.png
67
+ - images/inversed-index-search-2.png
68
+ - images/inversed-index-search-3.png
69
+ - images/latency.png
70
+ - images/mroonga-pgroonga.png
71
+ - images/syskan.jpg
72
+ - images/tokenize-bigram.png
73
+ - images/tokenize-mecab.png
74
+ - pdf/fast-fulltext-search-in-ruby-groonga-fast-fulltext-search-in-ruby-groonga.pdf
75
+ homepage: http://slide.rabbit-shocker.org/authors/Piro/fast-fulltext-search-in-ruby-groonga/
76
+ licenses: []
77
+ metadata: {}
78
+ post_install_message:
79
+ rdoc_options: []
80
+ require_paths:
81
+ - lib
82
+ required_ruby_version: !ruby/object:Gem::Requirement
83
+ requirements:
84
+ - - '>='
85
+ - !ruby/object:Gem::Version
86
+ version: '0'
87
+ required_rubygems_version: !ruby/object:Gem::Requirement
88
+ requirements:
89
+ - - '>='
90
+ - !ruby/object:Gem::Version
91
+ version: '0'
92
+ requirements: []
93
+ rubyforge_project:
94
+ rubygems_version: 2.4.1
95
+ signing_key:
96
+ specification_version: 4
97
+ summary: Javaいらず!Rubyで高速全文検索 -Groonga, Rroonga, Droonga-
98
+ test_files: []