rroonga 5.1.1 → 6.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Rakefile +1 -1
- data/bin/grndump +9 -0
- data/doc/text/cross-compile.md +72 -0
- data/doc/text/install.md +121 -0
- data/doc/text/news.md +1451 -0
- data/doc/text/release.md +135 -0
- data/doc/text/tutorial.md +402 -0
- data/ext/groonga/rb-grn-column.c +54 -24
- data/ext/groonga/rb-grn-config.c +131 -10
- data/ext/groonga/rb-grn-context.c +0 -10
- data/ext/groonga/rb-grn-encoding.c +0 -2
- data/ext/groonga/rb-grn-expression.c +63 -3
- data/ext/groonga/rb-grn-index.c +34 -0
- data/ext/groonga/rb-grn-object.c +10 -10
- data/ext/groonga/rb-grn-operator.c +7 -0
- data/ext/groonga/rb-grn-plugin.c +39 -0
- data/ext/groonga/rb-grn-utils.c +5 -65
- data/ext/groonga/rb-grn.h +9 -9
- data/ext/groonga/rb-groonga.c +31 -1
- data/lib/groonga/column.rb +13 -0
- data/lib/groonga/dumper.rb +3 -1
- data/lib/groonga/expression-builder.rb +66 -18
- data/lib/groonga/{expression-builder-19.rb → index.rb} +11 -19
- data/lib/groonga/record.rb +1 -1
- data/lib/groonga.rb +1 -0
- data/lib/rroonga.rb +16 -0
- data/rroonga-build.rb +6 -6
- data/rroonga.gemspec +1 -1
- data/test/groonga-test-utils.rb +0 -8
- data/test/test-column.rb +107 -1
- data/test/test-config.rb +18 -0
- data/test/test-database.rb +0 -2
- data/test/test-error-message.rb +20 -0
- data/test/test-expression-builder.rb +172 -29
- data/test/test-plugin.rb +25 -1
- data/test/test-table-dumper.rb +35 -6
- data/test/test-table-select.rb +0 -1
- metadata +62 -53
data/doc/text/release.md
ADDED
@@ -0,0 +1,135 @@
|
|
1
|
+
# Release
|
2
|
+
|
3
|
+
This document describes how to release Rroonga.
|
4
|
+
|
5
|
+
## 1. Summarize the changes for this release
|
6
|
+
|
7
|
+
### Show the commits since the latest release
|
8
|
+
|
9
|
+
% git log -p --reverse <the latest release version>..HEAD
|
10
|
+
|
11
|
+
For example:
|
12
|
+
|
13
|
+
% git log -p --reverse 4.0.3..HEAD
|
14
|
+
|
15
|
+
Or
|
16
|
+
|
17
|
+
[Commits on GitHub](https://github.com/ranguba/rroonga/commits/master)
|
18
|
+
|
19
|
+
### Extract the commits related to users
|
20
|
+
|
21
|
+
#### Including
|
22
|
+
|
23
|
+
* Changed specifications
|
24
|
+
* Added new features
|
25
|
+
* ...
|
26
|
+
|
27
|
+
#### Excluding
|
28
|
+
|
29
|
+
* Refactorings
|
30
|
+
* Tests
|
31
|
+
* ...
|
32
|
+
|
33
|
+
### Categorize
|
34
|
+
|
35
|
+
* Improvements: new features, improved behavior, ...
|
36
|
+
* Fixes: fixed bugs, ...
|
37
|
+
|
38
|
+
### Thanks
|
39
|
+
|
40
|
+
If a contributor name is in a commit log, create "Thanks" group and write the name to the group.
|
41
|
+
|
42
|
+
### Template for a new release for news.textile
|
43
|
+
|
44
|
+
## X.X.X: YYYY-MM-DD {#version-x-x-x}
|
45
|
+
|
46
|
+
### Improvements
|
47
|
+
|
48
|
+
* ...
|
49
|
+
|
50
|
+
### Fixes
|
51
|
+
|
52
|
+
* ...
|
53
|
+
|
54
|
+
### Thanks
|
55
|
+
|
56
|
+
* ...
|
57
|
+
|
58
|
+
## 2. Translate
|
59
|
+
|
60
|
+
### 2-1. Update PO
|
61
|
+
|
62
|
+
Update PO files (e.g.: ja.po) by the following command:
|
63
|
+
|
64
|
+
$ rake reference:po:update
|
65
|
+
|
66
|
+
### 2-2. Edit PO
|
67
|
+
|
68
|
+
Then, edit PO files:
|
69
|
+
|
70
|
+
* Edit msgid and msgstr.
|
71
|
+
* Search fuzzy.
|
72
|
+
* Edit if necessary.
|
73
|
+
* Then, remove fuzzy.
|
74
|
+
|
75
|
+
## 3. Upload gem to RubyGems.org
|
76
|
+
|
77
|
+
### 3-1. For Linux
|
78
|
+
|
79
|
+
$ rake release
|
80
|
+
|
81
|
+
### 3-2. For Windows
|
82
|
+
|
83
|
+
$ rake build:windows
|
84
|
+
$ gem push pkg/rroonga-<the latest release version>-x86-mingw32.gem
|
85
|
+
$ gem push pkg/rroonga-<the latest release version>-x64-mingw32.gem
|
86
|
+
|
87
|
+
### 3-3. Bump version
|
88
|
+
|
89
|
+
$ $EDITOR ext/groonga/rb-grn.h
|
90
|
+
|
91
|
+
## 4. Update ranguba.org
|
92
|
+
|
93
|
+
### 4-1. Update the latest version and release date
|
94
|
+
|
95
|
+
Update version and release date in ranguba.org (index.html、ja/index.html).
|
96
|
+
|
97
|
+
First, clone ranguba.org repository by the following command:
|
98
|
+
|
99
|
+
$ cd ..
|
100
|
+
$ git clone git@github.com:ranguba/ranguba.org.git
|
101
|
+
$ cd ranguba.org
|
102
|
+
|
103
|
+
Or
|
104
|
+
|
105
|
+
$ cd ../ranguba.org
|
106
|
+
$ git pull
|
107
|
+
|
108
|
+
Second, update the latest version and release date:
|
109
|
+
|
110
|
+
$ $EDITOR _config.yml
|
111
|
+
|
112
|
+
### 4-2. Update reference manual
|
113
|
+
|
114
|
+
First, copy references to reference html directory in ranguba.org:
|
115
|
+
|
116
|
+
$ cd ../rroonga
|
117
|
+
$ rake release:references:upload
|
118
|
+
|
119
|
+
Second, commit and push the html directory:
|
120
|
+
|
121
|
+
$ cd ../ranguba.org
|
122
|
+
$ git add .
|
123
|
+
$ git commit
|
124
|
+
$ git push
|
125
|
+
|
126
|
+
## 5. Announce in mailing lists
|
127
|
+
|
128
|
+
* groonga-talk (English)
|
129
|
+
* e.g.: http://sourceforge.net/p/groonga/mailman/message/33144993/
|
130
|
+
* groonga-dev (Japanese)
|
131
|
+
* e.g.: http://sourceforge.jp/projects/groonga/lists/archive/dev/2014-December/003014.html
|
132
|
+
* ruby-talk (English) (optional)
|
133
|
+
* e.g.: http://blade.nagaokaut.ac.jp/cgi-bin/scat.rb/ruby/ruby-talk/428992
|
134
|
+
* ruby-list (Japanese) (optional)
|
135
|
+
* e.g.: http://blade.nagaokaut.ac.jp/cgi-bin/scat.rb/ruby/ruby-list/50025
|
@@ -0,0 +1,402 @@
|
|
1
|
+
# Tutorial
|
2
|
+
|
3
|
+
This page introduce how to use Rroonga via a simple application making.
|
4
|
+
|
5
|
+
## Install
|
6
|
+
|
7
|
+
You can install Rroonga in your compter with RubyGems.
|
8
|
+
|
9
|
+
% sudo gem install rroonga
|
10
|
+
|
11
|
+
## Create Database
|
12
|
+
|
13
|
+
Let's create database for simple bookmark application.
|
14
|
+
Please execute irb with loading Rroonga with this command:
|
15
|
+
|
16
|
+
% irb --simple-prompt -r groonga
|
17
|
+
>>
|
18
|
+
|
19
|
+
Then, try to create database in a file.
|
20
|
+
|
21
|
+
>> Groonga::Database.create(:path => "/tmp/bookmark.db")
|
22
|
+
=> #<Groonga::Database ...>
|
23
|
+
|
24
|
+
From now, the created database is used implicitly.
|
25
|
+
You don't have to be aware of it after you created a database first.
|
26
|
+
|
27
|
+
## Define table
|
28
|
+
|
29
|
+
Groonga supports 4 types of tables.
|
30
|
+
|
31
|
+
Groonga::Hash
|
32
|
+
: Hash table. It manages records via each primary key. It supports
|
33
|
+
very quickly exact match search.
|
34
|
+
|
35
|
+
Groonga::PatriciaTrie
|
36
|
+
: Patricia Trie. It supports some search such as predictive search and
|
37
|
+
common prefix search, but it provides a little slowly exact match search
|
38
|
+
than Groonga::Hash. It provides cursor to take records in ascending
|
39
|
+
or descending order.
|
40
|
+
|
41
|
+
Groonga::DoubleArrayTrie
|
42
|
+
: Double Array Trie. It requires large spaces rather than other
|
43
|
+
tables, but it can update key without ID change. It provides exract
|
44
|
+
match search, predictive search and common prefix search and cursor
|
45
|
+
like Groonga::PatriciaTrie.
|
46
|
+
|
47
|
+
Groonga::Array
|
48
|
+
: Array. It doesn't have primary keys. It manages records by ID.
|
49
|
+
|
50
|
+
Now, you use Groonga::Hash and create the table named `Items`. The type
|
51
|
+
of its primary key is String.
|
52
|
+
|
53
|
+
>> Groonga::Schema.create_table("Items", :type => :hash)
|
54
|
+
=> [...]
|
55
|
+
|
56
|
+
You have `Items` table by this code.
|
57
|
+
You can refer the defined table with Groonga.[] like below:
|
58
|
+
|
59
|
+
>> items = Groonga["Items"]
|
60
|
+
=> #<Groonga::Hash ...>
|
61
|
+
|
62
|
+
You can treat it like Hash.
|
63
|
+
For example, let's type `items.size` to get the number of records in
|
64
|
+
the table.
|
65
|
+
|
66
|
+
>> items.size
|
67
|
+
=> 0
|
68
|
+
|
69
|
+
## Add records
|
70
|
+
|
71
|
+
Let's add records to `Items` table.
|
72
|
+
|
73
|
+
>> items.add("http://en.wikipedia.org/wiki/Ruby")
|
74
|
+
=> #<Groonga::Record ...>
|
75
|
+
>> items.add("http://www.ruby-lang.org/")
|
76
|
+
=> #<Groonga::Record ...>
|
77
|
+
|
78
|
+
Please check the number of records. It increases from 0 to 2.
|
79
|
+
|
80
|
+
>> items.size
|
81
|
+
=> 2
|
82
|
+
|
83
|
+
If you can get record by primary key, type like below:
|
84
|
+
|
85
|
+
>> items["http://en.wikipedia.org/wiki/Ruby"]
|
86
|
+
=> #<Groonga::Record ...>
|
87
|
+
|
88
|
+
## Full text search
|
89
|
+
|
90
|
+
Let's add item's title to full text search.
|
91
|
+
|
92
|
+
first, you add the `Text` type column "`title`" to `Items` table.
|
93
|
+
|
94
|
+
>> Groonga::Schema.change_table("Items") do |table|
|
95
|
+
?> table.text("title")
|
96
|
+
>> end
|
97
|
+
=> [...]
|
98
|
+
|
99
|
+
Defined columns is named as `#{TABLE_NAME}.#{COLUMN_NAME}`.
|
100
|
+
You can refer them with {Groonga.[]} as same as tables.
|
101
|
+
|
102
|
+
>> title_column = Groonga["Items.title"]
|
103
|
+
=> #<Groonga::VariableSizeColumn ...>
|
104
|
+
|
105
|
+
|
106
|
+
Secondly, let's add the table containing terms from splited from texts.
|
107
|
+
Then you define the `Terms` for it.
|
108
|
+
|
109
|
+
>> Groonga::Schema.create_table("Terms",
|
110
|
+
?> :type => :patricia_trie,
|
111
|
+
?> :normalizer => :NormalizerAuto,
|
112
|
+
?> :default_tokenizer => "TokenBigram")
|
113
|
+
|
114
|
+
You specify `:default_tokenzier => "TokenBigram"` for "Tokenizer" in
|
115
|
+
the above code.
|
116
|
+
"Tokenizer" is the object to split terms from texts. The default value
|
117
|
+
for it is none.
|
118
|
+
Full text search requires a tokenizer, so you specify "Bigram", a type
|
119
|
+
of N-gram.
|
120
|
+
Full text search with N-gram uses splited N characters and their
|
121
|
+
position in texts. "N" in N-gram specifies the number of each terms.
|
122
|
+
Groonga supports Unigram (N=1), Bigram (N=2) and Trigram (N=3).
|
123
|
+
|
124
|
+
You also specify `:normalizer => :NormalizerAuto` to search texts with
|
125
|
+
ignoring the case.
|
126
|
+
|
127
|
+
Now, you ready table for terms, so you define the index of
|
128
|
+
`Items.tiltle` column.
|
129
|
+
|
130
|
+
>> Groonga::Schema.change_table("Terms") do |table|
|
131
|
+
?> table.index("Items.title")
|
132
|
+
>> end
|
133
|
+
=> [...]
|
134
|
+
|
135
|
+
You may feel a few unreasonable code. The index of `Items` table's
|
136
|
+
column is defined as the column in `Terms`.
|
137
|
+
|
138
|
+
When a record is added to `Items`, groonga adds records associated
|
139
|
+
each terms in it to `Terms` automatically.
|
140
|
+
|
141
|
+
|
142
|
+
`Terms` is a few particular table, but you can add some columns to term
|
143
|
+
table such as `Terms` and manage many attributes of each terms. It is
|
144
|
+
very useful to process particular search.
|
145
|
+
|
146
|
+
Now, you finished table definition.
|
147
|
+
Let's put some values to `title` of each record you added before.
|
148
|
+
|
149
|
+
>> items["http://en.wikipedia.org/wiki/Ruby"].title = "Ruby"
|
150
|
+
=> "Ruby"
|
151
|
+
>> items["http://www.ruby-lang.org/"].title = "Ruby Programming Language"
|
152
|
+
"Ruby Programming Language"
|
153
|
+
|
154
|
+
Now, you can do full text search like above:
|
155
|
+
|
156
|
+
>> ruby_items = items.select {|record| record.title =~ "Ruby"}
|
157
|
+
=> #<Groonga::Hash ..., normalizer: (nil)>
|
158
|
+
|
159
|
+
Groonga returns the search result as Groonga::Hash.
|
160
|
+
Keys in this hash table is records of hitted `Items`.
|
161
|
+
|
162
|
+
>> ruby_items.collect {|record| record.key.key}
|
163
|
+
=> ["http://en.wikipedia.org/wiki/Ruby", "http://www.ruby-lang.org/"]
|
164
|
+
|
165
|
+
In above example, you get records in `Items` with `record.key`, and
|
166
|
+
keys of them with `record.key.key`.
|
167
|
+
|
168
|
+
You can access a refered key in records briefly with `record["_key"]`.
|
169
|
+
|
170
|
+
>> ruby_items.collect {|record| record["_key"]}
|
171
|
+
=> ["http://en.wikipedia.org/wiki/Ruby", "http://www.ruby-lang.org/"]
|
172
|
+
|
173
|
+
## Improve the simple bookmark application
|
174
|
+
|
175
|
+
Let's try to improve this simple application a little. You can create
|
176
|
+
bookmark application for multi users and they can comment to each
|
177
|
+
bookmarks.
|
178
|
+
|
179
|
+
First, you add tables for users and for comments like below:
|
180
|
+
|
181
|
+
!http://qwik.jp/senna/senna2.files/rect4605.png!
|
182
|
+
|
183
|
+
Let's add the table for users, `Users`.
|
184
|
+
|
185
|
+
>> Groonga::Schema.create_table("Users", :type => :hash) do |table|
|
186
|
+
?> table.text("name")
|
187
|
+
>> end
|
188
|
+
=> [...]
|
189
|
+
|
190
|
+
|
191
|
+
Next, let's add the table for comments as `Comments`.
|
192
|
+
|
193
|
+
>> Groonga::Schema.create_table("Comments") do |table|
|
194
|
+
?> table.reference("item")
|
195
|
+
>> table.reference("author", "Users")
|
196
|
+
>> table.text("content")
|
197
|
+
>> table.time("issued")
|
198
|
+
>> end
|
199
|
+
=> [...]
|
200
|
+
|
201
|
+
Then you define the index of `content` column in `Comments` for full
|
202
|
+
text search.
|
203
|
+
|
204
|
+
>> Groonga::Schema.change_table("Terms") do |table|
|
205
|
+
?> table.index("Comments.content")
|
206
|
+
>> end
|
207
|
+
=> [...]
|
208
|
+
|
209
|
+
You finish table definition by above code.
|
210
|
+
|
211
|
+
Secondly, you add some users to `Users`.
|
212
|
+
|
213
|
+
>> users = Groonga["Users"]
|
214
|
+
=> #<Groonga::Hash ...>
|
215
|
+
>> users.add("alice", :name => "Alice")
|
216
|
+
=> #<Groonga::Record ...>
|
217
|
+
>> users.add("bob", :name => "Bob")
|
218
|
+
=> #<Groonga::Record ...>
|
219
|
+
|
220
|
+
Now, let's write the process to bookmark by a user.
|
221
|
+
You assume that the user, `moritan`, bookmark a page including
|
222
|
+
infomation related Ruby.
|
223
|
+
|
224
|
+
First, you check if the page has been added `Items` already.
|
225
|
+
|
226
|
+
>> items.has_key?("http://www.ruby-doc.org/")
|
227
|
+
=> false
|
228
|
+
|
229
|
+
The page hasn't been added, so you add it to `Items`.
|
230
|
+
|
231
|
+
>> items.add("http://www.ruby-doc.org/",
|
232
|
+
?> :title => "Ruby-Doc.org: Documenting the Ruby Language")
|
233
|
+
=> #<Groonga::Record ...>
|
234
|
+
|
235
|
+
Next, you add the record to `Comments`. This record contains this page
|
236
|
+
as its `item` column.
|
237
|
+
|
238
|
+
>> require "time"
|
239
|
+
=> true
|
240
|
+
>> comments = Groonga["Comments"]
|
241
|
+
=> #<Groonga::Array ...>
|
242
|
+
>> comments.add(:item => "http://www.ruby-doc.org/",
|
243
|
+
?> :author => "alice",
|
244
|
+
?> :content => "Ruby documents",
|
245
|
+
?> :issued => Time.parse("2010-11-20T18:01:22+09:00"))
|
246
|
+
=> #<Groonga::Record ...>
|
247
|
+
|
248
|
+
## Define methods for this process
|
249
|
+
|
250
|
+
For usefull, you define methods for above processes.
|
251
|
+
|
252
|
+
>> @items = items
|
253
|
+
=> #<Groonga::Hash ...>
|
254
|
+
>> @comments = comments
|
255
|
+
=> #<Groonga::Array ...>
|
256
|
+
>> def add_bookmark(url, title, author, content, issued)
|
257
|
+
>> item = @items[url] || @items.add(url, :title => title)
|
258
|
+
>> @comments.add(:item => item,
|
259
|
+
?> :author => author,
|
260
|
+
?> :content => content,
|
261
|
+
?> :issued => issued)
|
262
|
+
>> end
|
263
|
+
=> nil
|
264
|
+
|
265
|
+
You assign `items` and `comments` to each instance variable, so you can
|
266
|
+
use them in `add_bookmark` method.
|
267
|
+
|
268
|
+
`add_bookmark` executes processes like below:
|
269
|
+
|
270
|
+
* Check if the record associated the page exists in `Items` table.
|
271
|
+
* If not, add the record to it.
|
272
|
+
* Add the record to `Comments` table.
|
273
|
+
|
274
|
+
With this method, lets bookmark some pages.
|
275
|
+
|
276
|
+
>> add_bookmark("https://rubygems.org/",
|
277
|
+
?> "RubyGems.org | your community gem host", "alice", "Ruby gems",
|
278
|
+
?> Time.parse("2010-10-07T14:18:28+09:00"))
|
279
|
+
=> #<Groonga::Record ...>
|
280
|
+
>> add_bookmark("http://ranguba.org/",
|
281
|
+
?> "Fulltext search by Ruby with groonga - Ranguba", "bob",
|
282
|
+
?> "Ruby groonga fulltextsearch",
|
283
|
+
?> Time.parse("2010-11-11T12:39:59+09:00"))
|
284
|
+
=> #<Groonga::Record ...>
|
285
|
+
>> add_bookmark("http://www.ruby-doc.org/",
|
286
|
+
?> "ruby-doc", "bob", "ruby documents",
|
287
|
+
?> Time.parse("2010-07-28T20:46:23+09:00"))
|
288
|
+
=> #<Groonga::Record ...>
|
289
|
+
|
290
|
+
## Full text search part 2
|
291
|
+
|
292
|
+
Let's do full text search for added records.
|
293
|
+
|
294
|
+
>> records = comments.select do |record|
|
295
|
+
?> record["content"] =~ "Ruby"
|
296
|
+
>> end
|
297
|
+
=> #<Groonga::Hash ...>
|
298
|
+
>> records.each do |record|
|
299
|
+
?> comment = record
|
300
|
+
>> p [comment.id,
|
301
|
+
?> comment.issued,
|
302
|
+
?> comment.item.title,
|
303
|
+
?> comment.author.name,
|
304
|
+
?> comment.content]
|
305
|
+
>> end
|
306
|
+
[1, 2010-11-20 18:01:22 +0900, "Ruby-Doc.org: Documenting the Ruby Language", "Alice", "Ruby documents"]
|
307
|
+
[2, 2010-10-07 14:18:28 +0900, "RubyGems.org | your community gem host", "Alice", "Ruby gems"]
|
308
|
+
[3, 2010-11-11 12:39:59 +0900, "Fulltext search by Ruby with groonga - Ranguba", "Bob", "Ruby groonga fulltextsearch"]
|
309
|
+
[4, 2010-07-28 20:46:23 +0900, "Ruby-Doc.org: Documenting the Ruby Language", "Bob", "ruby documents"]
|
310
|
+
|
311
|
+
You can access the columns with the same name method as each them.
|
312
|
+
These methods suport to access the complex data type.
|
313
|
+
(In usually RDB, you should namage JOIN tables, `Items`, `Comments`,
|
314
|
+
`Users`.)
|
315
|
+
|
316
|
+
The search is finished when the first sentence in this codes. The
|
317
|
+
results of this search is the object as records set.
|
318
|
+
|
319
|
+
>> records
|
320
|
+
#<Groonga::Hash ..., size: <4>>
|
321
|
+
|
322
|
+
You can arrange this records set before output.
|
323
|
+
For example, sort these records in the descending order by date.
|
324
|
+
|
325
|
+
>> records.sort([{:key => "issued", :order => "descending"}]).each do |record|
|
326
|
+
?> comment = record
|
327
|
+
>> p [comment.id,
|
328
|
+
?> comment.issued,
|
329
|
+
?> comment.item.title,
|
330
|
+
?> comment.author.name,
|
331
|
+
?> comment.content]
|
332
|
+
>> end
|
333
|
+
[1, 2010-11-20 18:01:22 +0900, "Ruby-Doc.org: Documenting the Ruby Language", "Alice", "Ruby documents"]
|
334
|
+
[2, 2010-11-11 12:39:59 +0900, "Fulltext search by Ruby with groonga - Ranguba", "Bob", "Ruby groonga fulltextsearch"]
|
335
|
+
[3, 2010-10-07 14:18:28 +0900, "RubyGems.org | your community gem host", "Alice", "Ruby gems"]
|
336
|
+
[4, 2010-07-28 20:46:23 +0900, "Ruby-Doc.org: Documenting the Ruby Language", "Bob", "ruby documents"]
|
337
|
+
=> [...]
|
338
|
+
|
339
|
+
Let's group the result by each item for easy view.
|
340
|
+
|
341
|
+
>> records.group("item").each do |record|
|
342
|
+
?> item = record.key
|
343
|
+
>> p [record.n_sub_records,
|
344
|
+
?> item.key,
|
345
|
+
?> item.title]
|
346
|
+
>> end
|
347
|
+
[2, "http://www.ruby-doc.org/", "Ruby-Doc.org: Documenting the Ruby Language"]
|
348
|
+
[1, "https://rubygems.org/", "RubyGems.org | your community gem host"]
|
349
|
+
[1, "http://ranguba.org/", "Fulltext search by Ruby with groonga - Ranguba"]
|
350
|
+
=> nil
|
351
|
+
|
352
|
+
`n_sub_records` is the number of records in each group.
|
353
|
+
It is similar value as count() function of a query including "GROUP
|
354
|
+
BY" in SQL.
|
355
|
+
|
356
|
+
## more complex search
|
357
|
+
|
358
|
+
Now, you challenge the more useful search.
|
359
|
+
|
360
|
+
You should calcurate goodness of fit of search explicitly.
|
361
|
+
|
362
|
+
You can use `Items.title` and `Comments.content` as search targets now.
|
363
|
+
`Items.title` is the a few reliable information taken from each
|
364
|
+
original pages. On the other hands, `Comments.content` is the less
|
365
|
+
reliable information because this depends on users of bookmark
|
366
|
+
application.
|
367
|
+
|
368
|
+
Then, you search records with this policy:
|
369
|
+
|
370
|
+
* Search item matched `Items.title` or `Comments.content`.
|
371
|
+
* Add 10 times heavier weight to socres of each record matched
|
372
|
+
`Items.title` than ones of `Comments.comment`.
|
373
|
+
* If multi `comment` of one item are matched keyword, specify the sum
|
374
|
+
of scores of each `coments` as score of the item.
|
375
|
+
|
376
|
+
On this policy, you try to type below:
|
377
|
+
|
378
|
+
>> ruby_comments = @comments.select {|record| record.content =~ "Ruby"}
|
379
|
+
=> #<Groonga::Hash ..., size: <4>
|
380
|
+
>> ruby_items = @items.select do |record|
|
381
|
+
?> target = record.match_target do |match_record|
|
382
|
+
?> match_record.title * 10
|
383
|
+
>> end
|
384
|
+
>> target =~ "Ruby"
|
385
|
+
>> end
|
386
|
+
#<Groonga::Hash ..., size: <4>>
|
387
|
+
|
388
|
+
You group the results of *ruby_comments* in each item and union
|
389
|
+
*ruby_items* .
|
390
|
+
|
391
|
+
>> ruby_items = ruby_comments.group("item").union!(ruby_items)
|
392
|
+
#<Groonga::Hash ..., size: <5>>
|
393
|
+
>> ruby_items.sort([{:key => "_score", :order => "descending"}]).each do |record|
|
394
|
+
>> p [record.score, record.title]
|
395
|
+
>> end
|
396
|
+
[22, "Ruby-Doc.org: Documenting the Ruby Language"]
|
397
|
+
[11, "Fulltext search by Ruby with groonga - Ranguba"]
|
398
|
+
[10, "Ruby Programming Language"]
|
399
|
+
[10, "Ruby"]
|
400
|
+
[1, "RubyGems.org | your community gem host"]
|
401
|
+
|
402
|
+
Then, you get the result.
|
data/ext/groonga/rb-grn-column.c
CHANGED
@@ -2,6 +2,7 @@
|
|
2
2
|
/* vim: set sts=4 sw=4 ts=8 noet: */
|
3
3
|
/*
|
4
4
|
Copyright (C) 2009-2015 Kouhei Sutou <kou@clear-code.com>
|
5
|
+
Copyright (C) 2016 Masafumi Yokoyama <yokoyama@clear-code.com>
|
5
6
|
|
6
7
|
This library is free software; you can redistribute it and/or
|
7
8
|
modify it under the terms of the GNU Lesser General Public
|
@@ -709,44 +710,72 @@ rb_grn_column_with_weight_p(VALUE self)
|
|
709
710
|
}
|
710
711
|
|
711
712
|
/*
|
712
|
-
* Return indexes on `column
|
713
|
-
*
|
714
|
-
*
|
715
|
-
*
|
716
|
-
*
|
713
|
+
* Return indexes on `column`. If operator is specified, indexes that
|
714
|
+
* can executes the operator are only returned. Otherwise, all indexes
|
715
|
+
* are returned.
|
716
|
+
*
|
717
|
+
* Index means that index column and section.
|
718
|
+
*
|
719
|
+
* @overload find_indexes(options={})
|
720
|
+
* @param options [::Hash] The name and value pairs.
|
721
|
+
* Omitted names are initialized as the default value.
|
722
|
+
* @option options :operator (nil) The operator that should be
|
723
|
+
* executable by indexes. `nil` means that all operators.
|
724
|
+
* @return [Array<Groonga::Index>] Target indexes on `column`.
|
725
|
+
*
|
726
|
+
* @since 6.0.0
|
717
727
|
*/
|
718
728
|
static VALUE
|
719
|
-
|
729
|
+
rb_grn_column_find_indexes (int argc, VALUE *argv, VALUE self)
|
720
730
|
{
|
731
|
+
VALUE rb_options;
|
732
|
+
VALUE rb_operator;
|
721
733
|
grn_ctx *context;
|
722
734
|
grn_obj *column;
|
723
735
|
grn_index_datum *index_data = NULL;
|
724
736
|
int i, n_indexes;
|
725
|
-
|
726
|
-
VALUE rb_operator, rb_indexes;
|
737
|
+
VALUE rb_indexes;
|
727
738
|
|
728
|
-
rb_scan_args(argc, argv, "01", &
|
739
|
+
rb_scan_args(argc, argv, "01", &rb_options);
|
740
|
+
rb_grn_scan_options(rb_options,
|
741
|
+
"operator", &rb_operator,
|
742
|
+
NULL);
|
729
743
|
|
730
744
|
rb_grn_column_deconstruct(SELF(self), &column, &context,
|
731
|
-
|
732
|
-
|
745
|
+
NULL, NULL,
|
746
|
+
NULL, NULL, NULL);
|
733
747
|
|
734
|
-
if (
|
748
|
+
if (NIL_P(rb_operator)) {
|
749
|
+
n_indexes = grn_column_get_all_index_data(context, column, NULL, 0);
|
750
|
+
if (n_indexes == 0)
|
751
|
+
return rb_ary_new();
|
752
|
+
|
753
|
+
index_data = xmalloc(sizeof(grn_index_datum) * n_indexes);
|
754
|
+
n_indexes = grn_column_get_all_index_data(context, column,
|
755
|
+
index_data, n_indexes);
|
756
|
+
} else {
|
757
|
+
grn_operator operator;
|
735
758
|
operator = RVAL2GRNOPERATOR(rb_operator);
|
759
|
+
n_indexes = grn_column_find_index_data(context, column, operator,
|
760
|
+
NULL, 0);
|
761
|
+
if (n_indexes == 0)
|
762
|
+
return rb_ary_new();
|
763
|
+
|
764
|
+
index_data = xmalloc(sizeof(grn_index_datum) * n_indexes);
|
765
|
+
n_indexes = grn_column_find_index_data(context, column, operator,
|
766
|
+
index_data, n_indexes);
|
736
767
|
}
|
737
768
|
|
738
|
-
rb_indexes =
|
739
|
-
n_indexes = grn_column_find_index_data(context, column, operator, NULL, 0);
|
740
|
-
if (n_indexes == 0)
|
741
|
-
return rb_indexes;
|
742
|
-
|
743
|
-
index_data = xmalloc(sizeof(grn_index_datum) * n_indexes);
|
744
|
-
n_indexes = grn_column_find_index_data(context, column, operator,
|
745
|
-
index_data, n_indexes);
|
769
|
+
rb_indexes = rb_ary_new_capa(n_indexes);
|
746
770
|
for (i = 0; i < n_indexes; i++) {
|
747
|
-
VALUE
|
748
|
-
|
749
|
-
|
771
|
+
VALUE rb_index_column;
|
772
|
+
VALUE rb_section;
|
773
|
+
rb_index_column = GRNOBJECT2RVAL(Qnil,
|
774
|
+
context,
|
775
|
+
index_data[i].index,
|
776
|
+
GRN_FALSE);
|
777
|
+
rb_section = UINT2NUM(index_data[i].section);
|
778
|
+
rb_ary_push(rb_indexes, rb_grn_index_new(rb_index_column, rb_section));
|
750
779
|
grn_obj_unlink(context, index_data[i].index);
|
751
780
|
}
|
752
781
|
xfree(index_data);
|
@@ -812,7 +841,8 @@ rb_grn_init_column (VALUE mGrn)
|
|
812
841
|
rb_define_method(rb_cGrnColumn, "with_weight?",
|
813
842
|
rb_grn_column_with_weight_p, 0);
|
814
843
|
|
815
|
-
rb_define_method(rb_cGrnColumn, "
|
844
|
+
rb_define_method(rb_cGrnColumn, "find_indexes",
|
845
|
+
rb_grn_column_find_indexes, -1);
|
816
846
|
|
817
847
|
rb_define_method(rb_cGrnColumn, "rename", rb_grn_column_rename, 1);
|
818
848
|
|