rroonga 4.0.0 → 4.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.yardopts +6 -0
- data/doc/text/news.textile +26 -0
- data/ext/groonga/rb-grn-column.c +42 -2
- data/ext/groonga/rb-grn-index-column.c +57 -159
- data/ext/groonga/rb-grn-object.c +7 -0
- data/ext/groonga/rb-grn-table.c +19 -3
- data/ext/groonga/rb-grn-utils.c +4 -1
- data/ext/groonga/rb-grn-variable-size-column.c +405 -5
- data/ext/groonga/rb-grn.h +18 -1
- data/lib/groonga/dumper.rb +46 -22
- data/lib/groonga/expression-builder.rb +2 -4
- data/lib/groonga/schema.rb +16 -234
- data/rroonga-build.rb +3 -3
- data/rroonga.gemspec +1 -1
- data/test/test-array.rb +6 -5
- data/test/test-column.rb +0 -14
- data/test/test-database.rb +6 -5
- data/test/test-double-array-trie.rb +6 -5
- data/test/test-expression.rb +2 -2
- data/test/test-hash.rb +6 -5
- data/test/test-index-column.rb +163 -33
- data/test/test-patricia-trie.rb +6 -5
- data/test/test-schema-dumper.rb +39 -7
- data/test/test-table-dumper.rb +46 -1
- data/test/test-table-select-weight.rb +12 -0
- data/test/test-variable-size-column.rb +87 -3
- metadata +140 -139
data/rroonga-build.rb
CHANGED
@@ -19,15 +19,15 @@ module RroongaBuild
|
|
19
19
|
module RequiredGroongaVersion
|
20
20
|
MAJOR = 4
|
21
21
|
MINOR = 0
|
22
|
-
MICRO =
|
22
|
+
MICRO = 1
|
23
23
|
VERSION = [MAJOR, MINOR, MICRO]
|
24
|
-
RELEASED_DATE = Time.utc(2014,
|
24
|
+
RELEASED_DATE = Time.utc(2014, 3, 29)
|
25
25
|
end
|
26
26
|
|
27
27
|
module LatestGroongaVersion
|
28
28
|
MAJOR = 4
|
29
29
|
MINOR = 0
|
30
|
-
MICRO =
|
30
|
+
MICRO = 1
|
31
31
|
VERSION = [MAJOR, MINOR, MICRO]
|
32
32
|
end
|
33
33
|
|
data/rroonga.gemspec
CHANGED
@@ -59,7 +59,7 @@ Gem::Specification.new do |s|
|
|
59
59
|
description = clean_white_space.call(entries[entries.index("Description") + 1])
|
60
60
|
s.summary, s.description, = description.split(/\n\n+/, 3)
|
61
61
|
|
62
|
-
s.files = ["README.textile", "AUTHORS", "Rakefile", "Gemfile"]
|
62
|
+
s.files = ["README.textile", "AUTHORS", "Rakefile", "Gemfile", ".yardopts"]
|
63
63
|
s.files += Dir.glob("doc/text/*.textile")
|
64
64
|
s.files += ["#{s.name}.gemspec"]
|
65
65
|
s.files += ["rroonga-build.rb", "extconf.rb"]
|
data/test/test-array.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright (C) 2009-
|
1
|
+
# Copyright (C) 2009-2014 Kouhei Sutou <kou@clear-code.com>
|
2
2
|
#
|
3
3
|
# This library is free software; you can redistribute it and/or
|
4
4
|
# modify it under the terms of the GNU Lesser General Public
|
@@ -117,11 +117,12 @@ class ArrayTest < Test::Unit::TestCase
|
|
117
117
|
users = Groonga::Array.create(:name => "Users")
|
118
118
|
users.define_column("name", "ShortText")
|
119
119
|
users.define_column("address", "ShortText")
|
120
|
-
|
121
|
-
|
122
|
-
|
120
|
+
large_data = "x" * (2 ** 16)
|
121
|
+
100.times do |i|
|
122
|
+
users.add(:name => "user #{i}" + large_data,
|
123
|
+
:address => "address #{i}" + large_data)
|
123
124
|
end
|
124
|
-
assert_equal(
|
125
|
+
assert_equal(2, users.defrag)
|
125
126
|
end
|
126
127
|
|
127
128
|
def test_rename
|
data/test/test-column.rb
CHANGED
@@ -93,20 +93,6 @@ class ColumnTest < Test::Unit::TestCase
|
|
93
93
|
assert_equal([@bookmarks_content], @bookmarks_index_content.sources)
|
94
94
|
end
|
95
95
|
|
96
|
-
def test_update_index_column
|
97
|
-
groonga = @bookmarks.add
|
98
|
-
groonga["content"] = "<html><body>groonga</body></html>"
|
99
|
-
|
100
|
-
ruby = @bookmarks.add
|
101
|
-
ruby["content"] = "<html><body>ruby</body></html>"
|
102
|
-
|
103
|
-
@bookmarks_index_content[groonga.id] = groonga["content"]
|
104
|
-
@bookmarks_index_content[ruby.id] = ruby["content"]
|
105
|
-
|
106
|
-
assert_content_search([groonga], "groonga")
|
107
|
-
assert_content_search([groonga, ruby], "html")
|
108
|
-
end
|
109
|
-
|
110
96
|
def test_range
|
111
97
|
assert_equal(context[Groonga::Type::SHORT_TEXT], @bookmarks_uri.range)
|
112
98
|
assert_equal(context[Groonga::Type::TEXT], @bookmarks_comment.range)
|
data/test/test-database.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright (C) 2009-
|
1
|
+
# Copyright (C) 2009-2014 Kouhei Sutou <kou@clear-code.com>
|
2
2
|
#
|
3
3
|
# This library is free software; you can redistribute it and/or
|
4
4
|
# modify it under the terms of the GNU Lesser General Public
|
@@ -146,11 +146,12 @@ class DatabaseTest < Test::Unit::TestCase
|
|
146
146
|
end
|
147
147
|
end
|
148
148
|
users = context["Users"]
|
149
|
-
|
150
|
-
|
151
|
-
|
149
|
+
large_data = "x" * (2 ** 16)
|
150
|
+
100.times do |i|
|
151
|
+
users.add(:name => "user #{i}" + large_data,
|
152
|
+
:address => "address #{i}" + large_data)
|
152
153
|
end
|
153
|
-
assert_equal(
|
154
|
+
assert_equal(2, @database.defrag)
|
154
155
|
end
|
155
156
|
|
156
157
|
def test_tables
|
@@ -1,6 +1,6 @@
|
|
1
1
|
# -*- coding: utf-8 -*-
|
2
2
|
#
|
3
|
-
# Copyright (C) 2011-
|
3
|
+
# Copyright (C) 2011-2014 Kouhei Sutou <kou@clear-code.com>
|
4
4
|
#
|
5
5
|
# This library is free software; you can redistribute it and/or
|
6
6
|
# modify it under the terms of the GNU Lesser General Public
|
@@ -158,12 +158,13 @@ class DoubleArrayTrieTest < Test::Unit::TestCase
|
|
158
158
|
:key_type => "ShortText")
|
159
159
|
users.define_column("name", "ShortText")
|
160
160
|
users.define_column("address", "ShortText")
|
161
|
-
|
161
|
+
large_data = "x" * (2 ** 16)
|
162
|
+
100.times do |i|
|
162
163
|
users.add("user #{i}",
|
163
|
-
:name => "user #{i}"
|
164
|
-
:address => "address #{i}"
|
164
|
+
:name => "user #{i}" + large_data,
|
165
|
+
:address => "address #{i}" + large_data)
|
165
166
|
end
|
166
|
-
assert_equal(
|
167
|
+
assert_equal(2, users.defrag)
|
167
168
|
end
|
168
169
|
|
169
170
|
def test_rename
|
data/test/test-expression.rb
CHANGED
@@ -37,7 +37,7 @@ class ExpressionTest < Test::Unit::TestCase
|
|
37
37
|
|
38
38
|
def test_get_value
|
39
39
|
users = Groonga::Hash.create(:name => "Users")
|
40
|
-
|
40
|
+
users.define_column("name", "ShortText")
|
41
41
|
|
42
42
|
morita = users.add("morita", :name => "mori daijiro")
|
43
43
|
|
@@ -51,7 +51,7 @@ class ExpressionTest < Test::Unit::TestCase
|
|
51
51
|
|
52
52
|
def test_get_value_with_variable
|
53
53
|
users = Groonga::Hash.create(:name => "Users")
|
54
|
-
|
54
|
+
users.define_column("name", "ShortText")
|
55
55
|
|
56
56
|
morita = users.add("morita", :name => "mori daijiro")
|
57
57
|
gunyara_kun = users.add("gunyara-kun", :name => "Tasuku SUENAGA")
|
data/test/test-hash.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# -*- coding: utf-8 -*-
|
2
2
|
#
|
3
|
-
# Copyright (C) 2009-
|
3
|
+
# Copyright (C) 2009-2014 Kouhei Sutou <kou@clear-code.com>
|
4
4
|
#
|
5
5
|
# This library is free software; you can redistribute it and/or
|
6
6
|
# modify it under the terms of the GNU Lesser General Public
|
@@ -367,12 +367,13 @@ class HashTest < Test::Unit::TestCase
|
|
367
367
|
:key_type => "ShortText")
|
368
368
|
users.define_column("name", "ShortText")
|
369
369
|
users.define_column("address", "ShortText")
|
370
|
-
|
370
|
+
large_data = "x" * (2 ** 16)
|
371
|
+
100.times do |i|
|
371
372
|
users.add("user #{i}",
|
372
|
-
:name => "user #{i}"
|
373
|
-
:address => "address #{i}"
|
373
|
+
:name => "user #{i}" + large_data,
|
374
|
+
:address => "address #{i}" + large_data)
|
374
375
|
end
|
375
|
-
assert_equal(
|
376
|
+
assert_equal(2, users.defrag)
|
376
377
|
end
|
377
378
|
|
378
379
|
def test_rename
|
data/test/test-index-column.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# -*- coding: utf-8 -*-
|
2
2
|
#
|
3
|
-
# Copyright (C) 2009-
|
3
|
+
# Copyright (C) 2009-2014 Kouhei Sutou <kou@clear-code.com>
|
4
4
|
#
|
5
5
|
# This library is free software; you can redistribute it and/or
|
6
6
|
# modify it under the terms of the GNU Lesser General Public
|
@@ -55,40 +55,24 @@ class IndexColumnTest < Test::Unit::TestCase
|
|
55
55
|
class CRUDTest < self
|
56
56
|
setup
|
57
57
|
def setup_schema
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
:key_type => "ShortText",
|
63
|
-
:default_tokenizer => "TokenBigram")
|
64
|
-
@index = terms.define_index_column("content", @articles,
|
65
|
-
:with_position => true,
|
66
|
-
:with_section => true)
|
67
|
-
end
|
68
|
-
|
69
|
-
def test_array_set_with_record
|
70
|
-
content = <<-EOC
|
71
|
-
groonga は組み込み型の全文検索エンジンライブラリです。
|
72
|
-
DBMSやスクリプト言語処理系等に組み込むことによって、その
|
73
|
-
全文検索機能を強化することができます。また、リレーショナ
|
74
|
-
ルモデルに基づくデータストア機能を内包しており、groonga
|
75
|
-
単体でも高速なデータストアサーバとして使用することができ
|
76
|
-
ます。
|
77
|
-
|
78
|
-
■全文検索方式
|
79
|
-
転置索引型の全文検索エンジンです。転置索引は圧縮されてファ
|
80
|
-
イルに格納され、検索時のディスク読み出し量を小さく、かつ
|
81
|
-
局所的に抑えるように設計されています。用途に応じて以下の
|
82
|
-
索引タイプを選択できます。
|
83
|
-
EOC
|
84
|
-
|
85
|
-
groonga = @articles.add(:content => content)
|
58
|
+
Groonga::Schema.define do |schema|
|
59
|
+
schema.create_table("Articles") do |table|
|
60
|
+
table.text("content")
|
61
|
+
end
|
86
62
|
|
87
|
-
|
88
|
-
|
63
|
+
schema.create_table("Terms",
|
64
|
+
:type => :hash,
|
65
|
+
:key_type => "ShortText",
|
66
|
+
:default_tokenizer => "TokenBigram") do |table|
|
67
|
+
table.index("Articles.content",
|
68
|
+
:name => "articles_content",
|
69
|
+
:with_position => true,
|
70
|
+
:with_section => true)
|
71
|
+
end
|
89
72
|
end
|
90
|
-
|
91
|
-
|
73
|
+
|
74
|
+
@articles = Groonga["Articles"]
|
75
|
+
@index = Groonga["Terms.articles_content"]
|
92
76
|
end
|
93
77
|
|
94
78
|
def test_add
|
@@ -365,4 +349,150 @@ class IndexColumnTest < Test::Unit::TestCase
|
|
365
349
|
@index.disk_usage)
|
366
350
|
end
|
367
351
|
end
|
352
|
+
|
353
|
+
class SearchTest < self
|
354
|
+
class WeightTest < self
|
355
|
+
def setup
|
356
|
+
super
|
357
|
+
setup_schema
|
358
|
+
end
|
359
|
+
|
360
|
+
def setup_schema
|
361
|
+
Groonga::Schema.define do |schema|
|
362
|
+
schema.create_table("Memos",
|
363
|
+
:type => :hash,
|
364
|
+
:key_type => :short_text) do |table|
|
365
|
+
table.short_text("tags",
|
366
|
+
:type => :vector,
|
367
|
+
:with_weight => true)
|
368
|
+
end
|
369
|
+
|
370
|
+
schema.create_table("Tags",
|
371
|
+
:type => :hash,
|
372
|
+
:key_type => :short_text) do |table|
|
373
|
+
table.index("Memos.tags",
|
374
|
+
:name => "memos_index",
|
375
|
+
:with_weight => true)
|
376
|
+
end
|
377
|
+
end
|
378
|
+
|
379
|
+
@memos = context["Memos"]
|
380
|
+
@index = context["Tags.memos_index"]
|
381
|
+
end
|
382
|
+
|
383
|
+
def search(keyword, options={})
|
384
|
+
@index.search(keyword, options).collect do |record|
|
385
|
+
{
|
386
|
+
:key => record._key,
|
387
|
+
:score => record.score,
|
388
|
+
}
|
389
|
+
end
|
390
|
+
end
|
391
|
+
|
392
|
+
def test_index
|
393
|
+
record = @memos.add("Rroonga is fun!")
|
394
|
+
record.tags = [
|
395
|
+
{
|
396
|
+
:value => "rroonga",
|
397
|
+
:weight => 9,
|
398
|
+
}
|
399
|
+
]
|
400
|
+
|
401
|
+
expected = [
|
402
|
+
{
|
403
|
+
:key => "Rroonga is fun!",
|
404
|
+
:score => 10,
|
405
|
+
}
|
406
|
+
]
|
407
|
+
assert_equal(expected, search("rroonga"))
|
408
|
+
end
|
409
|
+
|
410
|
+
def test_search
|
411
|
+
record = @memos.add("Rroonga is fun!")
|
412
|
+
record.tags = [
|
413
|
+
{
|
414
|
+
:value => "rroonga",
|
415
|
+
}
|
416
|
+
]
|
417
|
+
|
418
|
+
expected = [
|
419
|
+
{
|
420
|
+
:key => "Rroonga is fun!",
|
421
|
+
:score => 5,
|
422
|
+
}
|
423
|
+
]
|
424
|
+
assert_equal(expected, search("rroonga", :weight => 5))
|
425
|
+
end
|
426
|
+
|
427
|
+
def test_index_and_search
|
428
|
+
record = @memos.add("Rroonga is fun!")
|
429
|
+
record.tags = [
|
430
|
+
{
|
431
|
+
:value => "rroonga",
|
432
|
+
:weight => 9,
|
433
|
+
}
|
434
|
+
]
|
435
|
+
|
436
|
+
expected = [
|
437
|
+
{
|
438
|
+
:key => "Rroonga is fun!",
|
439
|
+
:score => 50
|
440
|
+
}
|
441
|
+
]
|
442
|
+
assert_equal(expected, search("rroonga", :weight => 5))
|
443
|
+
end
|
444
|
+
end
|
445
|
+
|
446
|
+
class OperatorTest < self
|
447
|
+
def setup
|
448
|
+
super
|
449
|
+
setup_schema
|
450
|
+
end
|
451
|
+
|
452
|
+
def setup_schema
|
453
|
+
Groonga::Schema.define do |schema|
|
454
|
+
schema.create_table("Memos",
|
455
|
+
:type => :hash,
|
456
|
+
:key_type => :short_text) do |table|
|
457
|
+
table.short_text("tags", :type => :vector)
|
458
|
+
end
|
459
|
+
|
460
|
+
schema.create_table("Tags",
|
461
|
+
:type => :hash,
|
462
|
+
:key_type => :short_text) do |table|
|
463
|
+
table.index("Memos.tags",
|
464
|
+
:name => "memos_index")
|
465
|
+
end
|
466
|
+
end
|
467
|
+
|
468
|
+
@memos = context["Memos"]
|
469
|
+
@index = context["Tags.memos_index"]
|
470
|
+
end
|
471
|
+
|
472
|
+
def test_adjust
|
473
|
+
@memos.add("Rroonga is fun!", :tags => ["rroonga", "groonga"])
|
474
|
+
@memos.add("Groonga is fast!", :tags => ["groonga"])
|
475
|
+
|
476
|
+
result = @index.search("groonga")
|
477
|
+
@index.search("rroonga", :result => result, :operator => :adjust)
|
478
|
+
expected = [
|
479
|
+
{
|
480
|
+
:key => "Rroonga is fun!",
|
481
|
+
:score => 2,
|
482
|
+
},
|
483
|
+
{
|
484
|
+
:key => "Groonga is fast!",
|
485
|
+
:score => 1,
|
486
|
+
}
|
487
|
+
]
|
488
|
+
actual = result.collect do |record|
|
489
|
+
{
|
490
|
+
:key => record._key,
|
491
|
+
:score => record.score,
|
492
|
+
}
|
493
|
+
end
|
494
|
+
assert_equal(expected, actual)
|
495
|
+
end
|
496
|
+
end
|
497
|
+
end
|
368
498
|
end
|
data/test/test-patricia-trie.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# -*- coding: utf-8 -*-
|
2
2
|
#
|
3
|
-
# Copyright (C) 2009-
|
3
|
+
# Copyright (C) 2009-2014 Kouhei Sutou <kou@clear-code.com>
|
4
4
|
#
|
5
5
|
# This library is free software; you can redistribute it and/or
|
6
6
|
# modify it under the terms of the GNU Lesser General Public
|
@@ -417,12 +417,13 @@ class PatriciaTrieTest < Test::Unit::TestCase
|
|
417
417
|
:key_type => "ShortText")
|
418
418
|
users.define_column("name", "ShortText")
|
419
419
|
users.define_column("address", "ShortText")
|
420
|
-
|
420
|
+
large_data = "x" * (2 ** 16)
|
421
|
+
100.times do |i|
|
421
422
|
users.add("user #{i}",
|
422
|
-
:name => "user #{i}"
|
423
|
-
:address => "address #{i}"
|
423
|
+
:name => "user #{i}" + large_data,
|
424
|
+
:address => "address #{i}" + large_data)
|
424
425
|
end
|
425
|
-
assert_equal(
|
426
|
+
assert_equal(2, users.defrag)
|
426
427
|
end
|
427
428
|
|
428
429
|
def test_rename
|
data/test/test-schema-dumper.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright (C) 2009-
|
1
|
+
# Copyright (C) 2009-2014 Kouhei Sutou <kou@clear-code.com>
|
2
2
|
#
|
3
3
|
# This library is free software; you can redistribute it and/or
|
4
4
|
# modify it under the terms of the GNU Lesser General Public
|
@@ -96,14 +96,26 @@ class SchemaDumperTest < Test::Unit::TestCase
|
|
96
96
|
schema.create_table("Terms",
|
97
97
|
:type => :patricia_trie,
|
98
98
|
:key_type => "ShortText",
|
99
|
-
:
|
100
|
-
:
|
99
|
+
:default_tokenizer => "TokenBigram",
|
100
|
+
:normalizer => "NormalizerAuto") do |table|
|
101
101
|
table.index("Items", "_key")
|
102
102
|
table.index("Items", "title")
|
103
103
|
end
|
104
104
|
end
|
105
105
|
end
|
106
106
|
|
107
|
+
def define_weight_vector_schema
|
108
|
+
Groonga::Schema.define do |schema|
|
109
|
+
schema.create_table("Memos",
|
110
|
+
:type => :patricia_trie,
|
111
|
+
:key_type => "ShortText") do |table|
|
112
|
+
table.short_text("tags",
|
113
|
+
:type => :vector,
|
114
|
+
:with_weight => true)
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
107
119
|
def define_double_array_trie_schema
|
108
120
|
Groonga::Schema.define do |schema|
|
109
121
|
schema.create_table("Accounts",
|
@@ -208,14 +220,26 @@ end
|
|
208
220
|
create_table("Terms",
|
209
221
|
:type => :patricia_trie,
|
210
222
|
:key_type => "ShortText",
|
211
|
-
:key_normalize => true,
|
212
223
|
:default_tokenizer => "TokenBigram",
|
224
|
+
:normalizer => "NormalizerAuto",
|
213
225
|
:force => true) do |table|
|
214
226
|
end
|
215
227
|
|
216
228
|
change_table("Terms") do |table|
|
217
|
-
table.index("Items", "_key", :name => "Items__key")
|
218
|
-
table.index("Items", "title", :name => "Items_title")
|
229
|
+
table.index("Items", "_key", :name => "Items__key", :with_position => true)
|
230
|
+
table.index("Items", "title", :name => "Items_title", :with_position => true)
|
231
|
+
end
|
232
|
+
SCHEMA
|
233
|
+
end
|
234
|
+
|
235
|
+
def test_weight_vector
|
236
|
+
define_weight_vector_schema
|
237
|
+
assert_equal(<<-SCHEMA, dump)
|
238
|
+
create_table("Memos",
|
239
|
+
:type => :patricia_trie,
|
240
|
+
:key_type => "ShortText",
|
241
|
+
:force => true) do |table|
|
242
|
+
table.short_text("tags", :type => :vector, :with_weight => true)
|
219
243
|
end
|
220
244
|
SCHEMA
|
221
245
|
end
|
@@ -282,13 +306,21 @@ column_create Comments item COLUMN_SCALAR Items
|
|
282
306
|
table_create Items TABLE_HASH_KEY --key_type ShortText
|
283
307
|
column_create Items title COLUMN_SCALAR ShortText
|
284
308
|
|
285
|
-
table_create Terms TABLE_PAT_KEY
|
309
|
+
table_create Terms TABLE_PAT_KEY --key_type ShortText --default_tokenizer TokenBigram --normalizer NormalizerAuto
|
286
310
|
|
287
311
|
column_create Terms Items__key COLUMN_INDEX|WITH_POSITION Items _key
|
288
312
|
column_create Terms Items_title COLUMN_INDEX|WITH_POSITION Items title
|
289
313
|
SCHEMA
|
290
314
|
end
|
291
315
|
|
316
|
+
def test_weight_vector
|
317
|
+
define_weight_vector_schema
|
318
|
+
assert_equal(<<-SCHEMA, dump)
|
319
|
+
table_create Memos TABLE_PAT_KEY --key_type ShortText
|
320
|
+
column_create Memos tags COLUMN_VECTOR|WITH_WEIGHT ShortText
|
321
|
+
SCHEMA
|
322
|
+
end
|
323
|
+
|
292
324
|
def test_double_array_trie
|
293
325
|
define_double_array_trie_schema
|
294
326
|
assert_equal(<<-SCHEMA, dump)
|