rabbit-slide-Piro-fast-fulltext-search-in-ruby-groonga 2015.02.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 2665b7d270ea483a4c254cea718d706fff2283fd
4
+ data.tar.gz: d4fce5102f1d170c9de0d04dd59491b6ecf5c2bc
5
+ SHA512:
6
+ metadata.gz: 0f8b2e5d26ed91ea754ed7f7f9184ee52985fc7833a0f74c5c12c694997c922a8e52d91ce13cc44a2cf06d10191c65b74032c6f84b4947c035e842a6a7eb10e1
7
+ data.tar.gz: 80c7afa54fd5af9ce2838f1c1873318498b65741793b96055039fa1d5d84449bf84dbde341d12d7e2f8d7cadaad3747935ec1dfd6b08f6da7a167d948cf463f8
data/.rabbit ADDED
@@ -0,0 +1 @@
1
+ fast-fulltext-search-in-ruby-groonga-rroonga-droonga.md
@@ -0,0 +1,24 @@
1
+ # Javaいらず!Rubyで高速全文検索 -Groonga, Rroonga, Droonga-
2
+
3
+ [Tokyo Rubyist Meetup](http://trbmeetup.doorkeeper.jp/events/19450)用の発表資料です。
4
+
5
+ ## 作者向け
6
+
7
+ ### 表示
8
+
9
+ rake
10
+
11
+ ### 公開
12
+
13
+ rake publish
14
+
15
+ ## 閲覧者向け
16
+
17
+ ### インストール
18
+
19
+ gem install rabbit-slide-Piro-fast-fulltext-search-in-ruby-groonga-rroonga-droonga
20
+
21
+ ### 表示
22
+
23
+ rabbit rabbit-slide-Piro-fast-fulltext-search-in-ruby-groonga-rroonga-droonga.gem
24
+
@@ -0,0 +1,24 @@
1
+ # Fast fulltext search in Ruby, without Java -Groonga, Rroonga and Droonga-
2
+
3
+ This is slides for a presentation at [Tokyo Rubyist Meetup](http://trbmeetup.doorkeeper.jp/events/19450).
4
+
5
+ ## For author
6
+
7
+ ### To show this slides
8
+
9
+ rake
10
+
11
+ ### To publish this slides
12
+
13
+ rake publish
14
+
15
+ ## For others
16
+
17
+ ### To install
18
+
19
+ gem install rabbit-slide-Piro-fast-fulltext-search-in-ruby-groonga-rroonga-droonga
20
+
21
+ ### To show
22
+
23
+ rabbit rabbit-slide-Piro-fast-fulltext-search-in-ruby-groonga-rroonga-droonga.gem
24
+
@@ -0,0 +1,17 @@
1
+ require "rabbit/task/slide"
2
+
3
+ # Edit ./config.yaml to customize meta data
4
+
5
+ spec = nil
6
+ Rabbit::Task::Slide.new do |task|
7
+ spec = task.spec
8
+ # task.spec.files += Dir.glob("doc/**/*.*")
9
+ # task.spec.files -= Dir.glob("private/**/*.*")
10
+ task.spec.add_runtime_dependency("rabbit-theme-groonga")
11
+ end
12
+
13
+ desc "Tag #{spec.version}"
14
+ task :tag do
15
+ sh("git", "tag", "-a", spec.version.to_s, "-m", "Publish #{spec.version}")
16
+ sh("git", "push", "--tags")
17
+ end
@@ -0,0 +1,21 @@
1
+ ---
2
+ id: fast-fulltext-search-in-ruby-groonga
3
+ base_name: fast-fulltext-search-in-ruby-groonga
4
+ tags:
5
+ - droonga
6
+ - groonga
7
+ presentation_date: 2015/02/12
8
+ version: 2015.02.12.0
9
+ licenses: []
10
+ slideshare_id: fastfulltextsearchinrubygroonga
11
+ speaker_deck_id:
12
+ ustream_id:
13
+ vimeo_id:
14
+ youtube_id:
15
+ author:
16
+ markup_language: :markdown
17
+ name: 結城洋志
18
+ email: yuki@clear-code.com
19
+ rubygems_user: Piro
20
+ slideshare_user: pirooutsiderreflex
21
+ speaker_deck_user:
@@ -0,0 +1,481 @@
1
+ # trbmeetup
2
+
3
+ subtitle
4
+ : Fast fulltext search in Ruby, without Java
5
+ -Groonga, Rroonga and Droonga-
6
+
7
+ author
8
+ : YUKI Hiroshi
9
+
10
+ institution
11
+ : ClearCode Inc.
12
+
13
+ allotted_time
14
+ : 30m
15
+
16
+ theme
17
+ : groonga
18
+
19
+
20
+
21
+ # Abstract
22
+
23
+ * Fulltext search?
24
+ * Groonga and Rroonga
25
+ - easy fulltext search in Ruby
26
+ * Droonga
27
+ - scalable fulltext search
28
+
29
+ # Introduction
30
+
31
+ What's
32
+ *fulltext search*?
33
+
34
+ # Searching without index
35
+
36
+ ex. `Array#grep`
37
+ ex. `LIKE` operator in SQL
38
+
39
+ ~~~
40
+ SELECT name,location
41
+ FROM Store
42
+ WHERE name LIKE '%Tokyo%';
43
+ ~~~
44
+ {: lang="sql"}
45
+
46
+ * easy, simple, but *slow*
47
+
48
+ # Fulltext search w/ index
49
+
50
+ ![](images/latency.png){:relative_width="60" align="right" relative_margin_right="-10"}
51
+
52
+ * Fast!!
53
+
54
+ # Demonstration
55
+
56
+ Methods
57
+ : * `Array#grep` (not indexed)
58
+ * `GrnMini::Array#select` (indexed)
59
+
60
+ Data
61
+ : * Wikipedia(ja) pages
62
+
63
+ # Demonstration: Result
64
+
65
+ ![](images/demo-latency.png){:relative_height="90"}
66
+
67
+ # Off topic: why fast?
68
+
69
+ ![](images/inversed-index-add-1.png){:relative_width="80"}
70
+
71
+ # Off topic: why fast?
72
+
73
+ ![](images/inversed-index-add-2.png){:relative_width="80"}
74
+
75
+ # Off topic: why fast?
76
+
77
+ ![](images/inversed-index-add-3.png){:relative_width="80"}
78
+
79
+ # Off topic: why fast?
80
+
81
+ ![](images/inversed-index-search-1.png){:relative_width="80"}
82
+
83
+ # Off topic: why fast?
84
+
85
+ ![](images/inversed-index-search-2.png){:relative_width="80"}
86
+
87
+ # Off topic: why fast?
88
+
89
+ ![](images/inversed-index-search-3.png){:relative_width="80"}
90
+
91
+ # Off topic: why fast?
92
+
93
+ ![](images/tokenize-bigram.png){:relative_height="90"}
94
+
95
+ # Off topic: why fast?
96
+
97
+ ![](images/tokenize-mecab.png){:relative_height="90"}
98
+
99
+ # How introduce?
100
+
101
+ Major ways
102
+
103
+ * Sunspot
104
+ * elasticsearch-ruby
105
+
106
+ # Sunspot?
107
+
108
+ A client library of
109
+ *Solr*
110
+ for Ruby and Rails
111
+ (ActiveRecord)
112
+
113
+ # Sunspot: Usage
114
+
115
+ ~~~
116
+ class Post < ActiveRecord::Base
117
+ searchable do
118
+ # ...
119
+ end
120
+ end
121
+
122
+ result = Post.search do
123
+ fulltext 'best pizza'
124
+ # ...
125
+ end
126
+ ~~~
127
+ {: lang="ruby"}
128
+
129
+ # elasticsearch-ruby?
130
+
131
+ A client library of *Elasticsearch* for Ruby
132
+
133
+ ~~~
134
+ client = Elasticsearch::Client.new(log: true)
135
+ client.transport.reload_connections!
136
+ client.cluster.health
137
+ client.search(q: "test")
138
+ ~~~
139
+ {: lang="ruby"}
140
+
141
+ # Relations of services
142
+
143
+ ![](images/application-with-index.png){:relative_width="80"}
144
+
145
+ # But...
146
+
147
+ * [Apache Solr](http://lucene.apache.org/solr/): "built on Apache Lucene™."
148
+ * [Elasticsearch](http://www.elasticsearch.org/overview/elasticsearch/): "Build on top of Apache Lucene™"
149
+ * [Apache Lucene](http://lucene.apache.org/): "written entirely *in Java*."
150
+
151
+ # Java!!
152
+
153
+ ![](images/application-with-index-java.png){:relative_width="80"}
154
+
155
+ # In short
156
+
157
+ * They require *Java*.
158
+ * My Ruby product have to be combined with *Java*, just for fulltext search.
159
+
160
+ # Alternative choice
161
+
162
+ *Groonga*
163
+ and
164
+ *Rroonga*
165
+
166
+ # Groonga
167
+
168
+ * Fast fulltext search engine written in *C*
169
+ * Originally designed to search increasing huge numbers of comments in "2ch" (like Twitter)
170
+
171
+ # Groonga
172
+
173
+ * Realtime indexing
174
+ * Read/write lock-free
175
+ * Parallel updating and searching, without penalty
176
+ * Returns latest result ASAP
177
+ * No transaction
178
+ * No warranty for data consistency
179
+
180
+ # Relations of services
181
+
182
+ ![](images/application-with-groonga.png){:relative_width="80"}
183
+
184
+ # Groonga's interfaces
185
+
186
+ via command line interface
187
+
188
+ ~~~
189
+ $ groonga="groonga /path/to/database/db"
190
+ $ $groonga table_create --name Entries
191
+ --flags TABLE_PAT_KEY --key_type ShortText
192
+ $ $groonga select --table Entries
193
+ --query "title:@Ruby"
194
+ ~~~
195
+
196
+ # Groonga's interfaces
197
+
198
+ via HTTP
199
+
200
+ ~~~
201
+ $ groonga -d --protocol http --port 10041
202
+ /path/to/database/db
203
+
204
+ $ endpoint="http://groonga:10041"
205
+ $ curl "${endpoint}/d/table_create?name=Entries&
206
+ flags=TABLE_PAT_KEY&key_type=ShortText"
207
+ $ curl "${endpoint}/d/select?table=Entries&
208
+ query=title:@Ruby"
209
+ ~~~
210
+
211
+ # Groonga's interfaces
212
+
213
+ Narrowly-defined "Groonga"
214
+ : * CLI or server
215
+
216
+ libgroonga
217
+ : * In-process library
218
+ * Like as "better SQLite"
219
+
220
+ # Groonga
221
+
222
+ ![](images/groonga.png){:relative_height="90"}
223
+
224
+ # *R*roonga
225
+
226
+ ![](images/groonga-rroonga.png){:relative_height="90"}
227
+
228
+ # *R*roonga
229
+
230
+ * Based on libgroonga
231
+ * Low-level binding of Groonga for *Ruby*
232
+
233
+ # Relations of services
234
+
235
+ ![](images/application-with-groonga-rroonga.png){:relative_width="80"}
236
+
237
+ # Usage: Install
238
+
239
+ ~~~
240
+ % sudo gem install rroonga
241
+ ~~~
242
+
243
+ Groonga (libgroonga) is also installed as a part of the package.
244
+
245
+ # Usage: Prepare
246
+
247
+ ~~~
248
+ require "groonga"
249
+
250
+ Groonga::Database.create(path: "/tmp/bookmark.db")
251
+ # Or
252
+ Groonga::Database.open("/tmp/bookmark.db")
253
+ ~~~
254
+ {: lang="ruby"}
255
+
256
+ # Usage: Schema
257
+
258
+ ~~~
259
+ Groonga::Schema.define do |schema|
260
+ schema.create_table("Items",
261
+ type: :hash,
262
+ key_type: "ShortText") do |table|
263
+ table.text("title")
264
+ end
265
+ schema.create_table("Terms",
266
+ type: :patricia_trie,
267
+ normalizer: "NormalizerAuto",
268
+ default_tokenizer: "TokenBigram") do |table|
269
+ table.index("Items.title")
270
+ end
271
+ end
272
+ ~~~
273
+ {: lang="ruby"}
274
+
275
+ # Usage: Data loading
276
+
277
+ ~~~
278
+ items = Groonga["Items"]
279
+ items.add("http://en.wikipedia.org/wiki/Ruby",
280
+ title: "Wikipedia")
281
+ items.add("http://www.ruby-lang.org/",
282
+ title: "Ruby")
283
+ ~~~
284
+ {: lang="ruby"}
285
+
286
+ # Usage: Fulltext search
287
+
288
+ ~~~
289
+ items = Groonga["Items"]
290
+ ruby_items = items.select do |record|
291
+ record.title =~ "Ruby"
292
+ end
293
+ ~~~
294
+ {: lang="ruby"}
295
+
296
+ # FYI: GrnMini
297
+
298
+ * Lightweight wrapper
299
+ for Rroonga
300
+ * Limited features,
301
+ but easy to use
302
+
303
+ # FYI: GrnMini: Code
304
+
305
+ ~~~
306
+ require "grn_mini"
307
+
308
+ GrnMini::create_or_open("/tmp/bookmarks.db")
309
+
310
+ items = GrnMini::Array.new("Items")
311
+ items << { url: "http://en.wikipedia.org/wiki/Ruby",
312
+ title: "Ruby - Wikipedia" }
313
+ items << { url: "http://www.ruby-lang.org/",
314
+ title: "Ruby Language" }
315
+
316
+ ruby_items = items.select("title:@Ruby")
317
+ ~~~
318
+ {: lang="ruby"}
319
+
320
+ Good first step to try fulltext search in your Ruby product.
321
+
322
+ # For much more load...
323
+
324
+ Groonga
325
+ : works with *single process* on a computer
326
+
327
+ *D*roonga
328
+ : works with *multiple computers* constructiong a Droonga cluster
329
+
330
+ # Droonga
331
+
332
+ ![](images/droonga.png){:relative_width="80"}
333
+
334
+ # Droonga
335
+
336
+ * Scalable
337
+ (replication + partitioning)
338
+ * Groonga compatible
339
+ HTTP interface
340
+ * Client library for Ruby
341
+ (`droonga-client`)
342
+
343
+ # Droonga
344
+
345
+ ![](images/droonga-throughput.png){:relative_height="90"}
346
+
347
+ # Usage of Droonga
348
+
349
+ Setup a Droonga node
350
+
351
+ ~~~
352
+ # base="https://raw.githubusercontent.com/droonga"
353
+ # curl ${base}/droonga-engine/master/install.sh | \
354
+ bash
355
+ # curl ${base}/droonga-http-server/master/install.sh | \
356
+ bash
357
+ # droonga-engine-catalog-generate --hosts=node0,node1,node2
358
+ # service droonga-engine start
359
+ # service droonga-http-server start
360
+ ~~~
361
+
362
+ # Usage of Droonga
363
+
364
+ Fulltext search via HTTP
365
+ (compatible to Groonga)
366
+
367
+ ~~~
368
+ $ endpoint="http://node0:10041"
369
+ $ curl "${endpoint}/d/table_create?name=Store&
370
+ flags=TABLE_PAT_KEY&key_type=ShortText"
371
+ ~~~
372
+
373
+
374
+ # More chices
375
+
376
+ * *M*roonga
377
+ * Add-on for *MySQL/MariaDB*
378
+ (Bundled to MariaDB by default)
379
+ * *PG*roonga
380
+ * Add-on for *PostgreSQL*
381
+
382
+ # Relations of services
383
+
384
+ ![](images/mroonga-pgroonga.png){:relative_width="80"}
385
+
386
+
387
+ # SQL w/ fulltext search
388
+
389
+ Mroonga
390
+
391
+ ~~~
392
+ SELECT name,location
393
+ FROM Store
394
+ WHERE MATCH(name)
395
+ AGAINST('+東京' IN BOOLEAN MODE);
396
+ ~~~
397
+ {: lang="sql"}
398
+
399
+
400
+ # SQL w/ fulltext search
401
+
402
+ PGroonga
403
+
404
+ ~~~
405
+ SELECT name,location
406
+ FROM Store WHERE name %% '東京';
407
+
408
+ SELECT name,location
409
+ FROM Store WHERE name @@ '東京 OR 大阪';
410
+
411
+ SELECT name,location
412
+ FROM Store WHERE name LIKE '%東京%';
413
+ /* alias to "name @@ '東京'"*/
414
+ ~~~
415
+ {: lang="sql"}
416
+
417
+
418
+ # Conclusion
419
+
420
+ * *Rroonga* (and *GrnMini*) introduces fast fulltext search into your Ruby product instantly
421
+ * *Droonga* for increasing load
422
+ * *Mroonga* and *PGroonga*
423
+ for existing RDBMS
424
+
425
+ # References
426
+
427
+ Sunspot
428
+ : http://sunspot.github.io/
429
+
430
+ elasticsearch-ruby
431
+ : https://github.com/elasticsearch/elasticsearch-ruby
432
+
433
+ # References
434
+
435
+ Apache Lucene
436
+ : http://lucene.apache.org/
437
+
438
+ Apache Solr
439
+ : http://lucene.apache.org/solr/
440
+
441
+ Elasticsearch
442
+ : http://www.elasticsearch.org/overview/elasticsearch/
443
+
444
+ # References
445
+
446
+ Groonga
447
+ : http://groonga.org/
448
+
449
+ Rroonga
450
+ : http://ranguba.org/
451
+
452
+ GrnMini
453
+ : https://github.com/ongaeshi/grn_mini
454
+
455
+ # References
456
+
457
+ Droonga
458
+ : http://droonga.org/
459
+
460
+ Mroonga
461
+ : http://mroonga.org/
462
+
463
+ PGroonga
464
+ : http://pgroonga.github.io/
465
+
466
+ # References
467
+
468
+ Comparison of PostgreSQL, pg_bigm and PGroonga
469
+ : http://blog.createfield.com/entry/2015/02/03/094940
470
+
471
+ # Advertisement
472
+
473
+ ![](images/syskan.jpg){:relative_width="40" align="right" relative_margin_right="-10"}
474
+
475
+ * Serial comic
476
+ at Nikkei Linux
477
+ * 2015.2.18
478
+ Release
479
+ * ¥1728
480
+ (tax-inclusive)
481
+ * Paper/Kindle
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
metadata ADDED
@@ -0,0 +1,98 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rabbit-slide-Piro-fast-fulltext-search-in-ruby-groonga
3
+ version: !ruby/object:Gem::Version
4
+ version: 2015.02.12.0
5
+ platform: ruby
6
+ authors:
7
+ - 結城洋志
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-02-12 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rabbit
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '>='
18
+ - !ruby/object:Gem::Version
19
+ version: 2.0.2
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '>='
25
+ - !ruby/object:Gem::Version
26
+ version: 2.0.2
27
+ - !ruby/object:Gem::Dependency
28
+ name: rabbit-theme-groonga
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ description: '[Tokyo Rubyist Meetup](http://trbmeetup.doorkeeper.jp/events/19450)用の発表資料です。'
42
+ email:
43
+ - yuki@clear-code.com
44
+ executables: []
45
+ extensions: []
46
+ extra_rdoc_files: []
47
+ files:
48
+ - .rabbit
49
+ - README.ja.md
50
+ - README.md
51
+ - Rakefile
52
+ - config.yaml
53
+ - fast-fulltext-search-in-ruby-groonga-rroonga-droonga.md
54
+ - images/application-with-groonga-rroonga.png
55
+ - images/application-with-groonga.png
56
+ - images/application-with-index-java.png
57
+ - images/application-with-index.png
58
+ - images/demo-latency.png
59
+ - images/droonga-throughput.png
60
+ - images/droonga.png
61
+ - images/groonga-rroonga.png
62
+ - images/groonga.png
63
+ - images/inversed-index-add-1.png
64
+ - images/inversed-index-add-2.png
65
+ - images/inversed-index-add-3.png
66
+ - images/inversed-index-search-1.png
67
+ - images/inversed-index-search-2.png
68
+ - images/inversed-index-search-3.png
69
+ - images/latency.png
70
+ - images/mroonga-pgroonga.png
71
+ - images/syskan.jpg
72
+ - images/tokenize-bigram.png
73
+ - images/tokenize-mecab.png
74
+ - pdf/fast-fulltext-search-in-ruby-groonga-fast-fulltext-search-in-ruby-groonga.pdf
75
+ homepage: http://slide.rabbit-shocker.org/authors/Piro/fast-fulltext-search-in-ruby-groonga/
76
+ licenses: []
77
+ metadata: {}
78
+ post_install_message:
79
+ rdoc_options: []
80
+ require_paths:
81
+ - lib
82
+ required_ruby_version: !ruby/object:Gem::Requirement
83
+ requirements:
84
+ - - '>='
85
+ - !ruby/object:Gem::Version
86
+ version: '0'
87
+ required_rubygems_version: !ruby/object:Gem::Requirement
88
+ requirements:
89
+ - - '>='
90
+ - !ruby/object:Gem::Version
91
+ version: '0'
92
+ requirements: []
93
+ rubyforge_project:
94
+ rubygems_version: 2.4.1
95
+ signing_key:
96
+ specification_version: 4
97
+ summary: Javaいらず!Rubyで高速全文検索 -Groonga, Rroonga, Droonga-
98
+ test_files: []