groonga 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (52) hide show
  1. data/NEWS.ja.rdoc +11 -0
  2. data/NEWS.rdoc +11 -0
  3. data/README.ja.rdoc +4 -3
  4. data/README.rdoc +4 -3
  5. data/Rakefile +1 -1
  6. data/TUTORIAL.ja.rdoc +168 -44
  7. data/benchmark/common.rb +49 -0
  8. data/benchmark/read-write-small-many-items.rb +156 -0
  9. data/benchmark/write-small-many-items.rb +145 -0
  10. data/example/bookmark.rb +68 -20
  11. data/ext/rb-grn-array-cursor.c +8 -0
  12. data/ext/rb-grn-array.c +40 -11
  13. data/ext/rb-grn-column.c +38 -209
  14. data/ext/rb-grn-context.c +203 -56
  15. data/ext/rb-grn-database.c +119 -5
  16. data/ext/rb-grn-encoding-support.c +64 -0
  17. data/ext/rb-grn-encoding.c +58 -1
  18. data/ext/rb-grn-fix-size-column.c +220 -0
  19. data/ext/rb-grn-hash-cursor.c +8 -0
  20. data/ext/rb-grn-hash.c +244 -2
  21. data/ext/rb-grn-index-column.c +474 -0
  22. data/ext/rb-grn-object.c +143 -265
  23. data/ext/rb-grn-patricia-trie.c +148 -2
  24. data/ext/rb-grn-query.c +5 -3
  25. data/ext/rb-grn-record.c +3 -2
  26. data/ext/rb-grn-snippet.c +5 -3
  27. data/ext/rb-grn-table-cursor-key-support.c +3 -3
  28. data/ext/rb-grn-table-cursor.c +106 -112
  29. data/ext/rb-grn-table-key-support.c +220 -118
  30. data/ext/rb-grn-table.c +336 -80
  31. data/ext/rb-grn-type.c +5 -4
  32. data/ext/rb-grn-utils.c +62 -63
  33. data/ext/rb-grn.h +215 -14
  34. data/ext/rb-groonga.c +7 -16
  35. data/extconf.rb +3 -1
  36. data/html/favicon.ico +0 -0
  37. data/html/favicon.xcf +0 -0
  38. data/html/index.html +1 -7
  39. data/lib/groonga/record.rb +6 -1
  40. data/test/groonga-test-utils.rb +1 -0
  41. data/test/test-array.rb +81 -0
  42. data/test/test-column.rb +22 -12
  43. data/test/test-context.rb +1 -29
  44. data/test/test-database.rb +30 -0
  45. data/test/test-hash.rb +194 -0
  46. data/test/test-index-column.rb +57 -0
  47. data/test/test-patricia-trie.rb +82 -0
  48. data/test/test-record.rb +10 -10
  49. data/test/test-table.rb +37 -130
  50. data/test/test-type.rb +4 -3
  51. metadata +15 -4
  52. data/benchmark/small-many-items.rb +0 -175
data/test/test-table.rb CHANGED
@@ -36,9 +36,8 @@ class TableTest < Test::Unit::TestCase
36
36
  def test_open
37
37
  table_path = @tables_dir + "table"
38
38
  table = Groonga::Hash.create(:name => "bookmarks",
39
- :path => table_path.to_s)
39
+ :path => table_path.to_s)
40
40
  assert_equal("bookmarks", table.name)
41
- table.close
42
41
 
43
42
  called = false
44
43
  Groonga::Table.open(:name => "bookmarks") do |_table|
@@ -113,25 +112,32 @@ class TableTest < Test::Unit::TestCase
113
112
  table_path = @tables_dir + "table"
114
113
  table = Groonga::Hash.create(:name => "bookmarks",
115
114
  :path => table_path.to_s)
116
- column = table.define_column("name", "<text>",
117
- :type => "index",
118
- :compress => "zlib",
119
- :with_section => true,
120
- :with_weight => true,
121
- :with_position => true)
115
+ column = table.define_column("name", "<text>")
122
116
  assert_equal("bookmarks.name", column.name)
123
117
  assert_equal(column, table.column("name"))
124
118
  end
125
119
 
120
+ def test_define_index_column
121
+ bookmarks = Groonga::Hash.create(:name => "<bookmarks>")
122
+ bookmarks.define_column("content", "<text>")
123
+ terms = Groonga::Hash.create(:name => "<terms>")
124
+ terms.default_tokenizer = "<token:bigram>"
125
+ index = terms.define_index_column("content-index", bookmarks,
126
+ :with_section => true,
127
+ :source => "<bookmarks>.content")
128
+ bookmarks.add("google", :content => "Search engine")
129
+ assert_equal(["google"],
130
+ index.search("engine").collect {|record| record.key.key})
131
+ end
132
+
126
133
  def test_add_column
127
134
  bookmarks = Groonga::Hash.create(:name => "bookmarks",
128
135
  :path => (@tables_dir + "bookmarks").to_s)
129
136
 
130
137
  description_column_path = @columns_dir + "description"
131
138
  bookmarks_description =
132
- bookmarks.define_column("description", "<text>",
133
- :type => "index",
134
- :path => description_column_path.to_s)
139
+ bookmarks.define_index_column("description", "<text>",
140
+ :path => description_column_path.to_s)
135
141
 
136
142
  books = Groonga::Hash.create(:name => "books",
137
143
  :path => (@tables_dir + "books").to_s)
@@ -273,94 +279,6 @@ class TableTest < Test::Unit::TestCase
273
279
  assert_not_predicate(bookmarks_path, :exist?)
274
280
  end
275
281
 
276
- def test_array_reference
277
- bookmarks_path = @tables_dir + "bookmarks"
278
- bookmarks = Groonga::Hash.create(:name => "bookmarks",
279
- :path => bookmarks_path.to_s,
280
- :key_type => "<shorttext>")
281
- bookmark = bookmarks.add("http://google.com/")
282
- assert_equal(bookmark, bookmarks["http://google.com/"])
283
- end
284
-
285
- def test_tokenizer
286
- table = Groonga::Hash.create
287
- assert_equal(Groonga::Context.default["<token:bigram>"],
288
- table.default_tokenizer)
289
- table.default_tokenizer = "<token:mecab>"
290
- assert_equal(Groonga::Context.default["<token:mecab>"],
291
- table.default_tokenizer)
292
- end
293
-
294
- def test_inspect_anonymous
295
- path = @tables_dir + "anoymous.groonga"
296
- anonymous_table = Groonga::Hash.create(:path => path.to_s)
297
- assert_equal("#<Groonga::Hash " +
298
- "id: <#{anonymous_table.id}>, " +
299
- "name: (anonymous), " +
300
- "path: <#{path}>, " +
301
- "domain: <nil>, " +
302
- "range: <nil>, " +
303
- "encoding: <#{encoding.inspect}>, " +
304
- "size: <0>>",
305
- anonymous_table.inspect)
306
- end
307
-
308
- def test_inspect_anonymous_temporary
309
- anonymous_table = Groonga::Hash.create
310
- assert_equal("#<Groonga::Hash " +
311
- "id: <#{anonymous_table.id}>, " +
312
- "name: (anonymous), " +
313
- "path: (temporary), " +
314
- "domain: <nil>, " +
315
- "range: <nil>, " +
316
- "encoding: <#{encoding.inspect}>, " +
317
- "size: <0>>",
318
- anonymous_table.inspect)
319
- end
320
-
321
- def test_inspect_named
322
- path = @tables_dir + "named.groonga"
323
- named_table = Groonga::Hash.create(:name => "name", :path => path.to_s)
324
- assert_equal("#<Groonga::Hash " +
325
- "id: <#{named_table.id}>, " +
326
- "name: <name>, " +
327
- "path: <#{path}>, " +
328
- "domain: <nil>, " +
329
- "range: <nil>, " +
330
- "encoding: <#{encoding.inspect}>, " +
331
- "size: <0>>",
332
- named_table.inspect)
333
- end
334
-
335
- def test_inspect_named_temporary
336
- named_table = Groonga::Hash.create(:name => "name")
337
- assert_equal("#<Groonga::Hash " +
338
- "id: <#{named_table.id}>, " +
339
- "name: <name>, " +
340
- "path: (temporary), " +
341
- "domain: <nil>, " +
342
- "range: <nil>, " +
343
- "encoding: <#{encoding.inspect}>, " +
344
- "size: <0>>",
345
- named_table.inspect)
346
- end
347
-
348
- def test_inspect_size
349
- path = @tables_dir + "named.groonga"
350
- contain_table = Groonga::Array.create(:name => "name", :path => path.to_s)
351
- 3.times do
352
- contain_table.add
353
- end
354
- assert_equal("#<Groonga::Array " +
355
- "id: <#{contain_table.id}>, " +
356
- "name: <name>, " +
357
- "path: <#{path}>, " +
358
- "domain: <nil>, " +
359
- "range: <nil>, " +
360
- "size: <3>>",
361
- contain_table.inspect)
362
- end
363
-
364
282
  def test_temporary_add
365
283
  table = Groonga::Hash.create(:key_type => "<shorttext>")
366
284
  assert_equal(0, table.size)
@@ -368,27 +286,6 @@ class TableTest < Test::Unit::TestCase
368
286
  assert_equal(1, table.size)
369
287
  end
370
288
 
371
- def test_search
372
- users = Groonga::Array.create(:name => "<users>")
373
- user_name = users.define_column("name", "<shorttext>")
374
-
375
- bookmarks = Groonga::Hash.create(:name => "<bookmarks>",
376
- :key_type => "<shorttext>")
377
- bookmark_user_id = bookmarks.define_column("user_id", users)
378
-
379
- daijiro = users.add
380
- daijiro["name"] = "daijiro"
381
- gunyarakun = users.add
382
- gunyarakun["name"] = "gunyarakun"
383
-
384
- groonga = bookmarks.add("http://groonga.org/")
385
- groonga["user_id"] = daijiro
386
-
387
- records = bookmarks.search("http://groonga.org/")
388
- assert_equal(["daijiro"],
389
- records.records.collect {|record| record[".user_id.name"]})
390
- end
391
-
392
289
  def test_each
393
290
  users = Groonga::Array.create(:name => "<users>")
394
291
  user_name = users.define_column("name", "<shorttext>")
@@ -414,16 +311,6 @@ class TableTest < Test::Unit::TestCase
414
311
  # assert_equal(0, users.size) # truncate isn't implemented in groonga.
415
312
  end
416
313
 
417
- def test_encoding
418
- assert_equal(Groonga::Encoding.default,
419
- Groonga::Hash.create.encoding)
420
- assert_equal(Groonga::Encoding.default,
421
- Groonga::PatriciaTrie.create.encoding)
422
-
423
- array = Groonga::Array.create
424
- assert_false(array.respond_to?(:encoding))
425
- end
426
-
427
314
  def test_sort
428
315
  bookmarks = Groonga::Array.create(:name => "<bookmarks>")
429
316
  id_column = bookmarks.define_column("id", "<int>")
@@ -444,4 +331,24 @@ class TableTest < Test::Unit::TestCase
444
331
  end
445
332
  assert_equal((180..199).to_a.reverse, results)
446
333
  end
334
+
335
+ def test_sort_without_limit
336
+ bookmarks = Groonga::Array.create(:name => "<bookmarks>")
337
+ id_column = bookmarks.define_column("id", "<int>")
338
+ 100.times do |i|
339
+ bookmark = bookmarks.add
340
+ bookmark["id"] = i + 100
341
+ end
342
+
343
+ results = bookmarks.sort([
344
+ {
345
+ :key => "id",
346
+ :order => :descending,
347
+ },
348
+ ])
349
+ results = results.records(:order => :ascending).collect do |record|
350
+ id_column[record.value.unpack("i")[0]]
351
+ end
352
+ assert_equal((100..199).to_a.reverse, results)
353
+ end
447
354
  end
data/test/test-type.rb CHANGED
@@ -24,9 +24,10 @@ class TypeTest < Test::Unit::TestCase
24
24
  end
25
25
 
26
26
  def test_builtins
27
- assert_equal_type("<int>", Groonga::Type::INT)
28
- assert_equal_type("<uint>", Groonga::Type::UINT)
27
+ assert_equal_type("<int>", Groonga::Type::INT32)
28
+ assert_equal_type("<uint>", Groonga::Type::UINT32)
29
29
  assert_equal_type("<int64>", Groonga::Type::INT64)
30
+ assert_equal_type("<uint64>", Groonga::Type::UINT64)
30
31
  assert_equal_type("<float>", Groonga::Type::FLOAT)
31
32
  assert_equal_type("<time>", Groonga::Type::TIME)
32
33
  assert_equal_type("<shorttext>", Groonga::Type::SHORT_TEXT)
@@ -35,7 +36,7 @@ class TypeTest < Test::Unit::TestCase
35
36
  end
36
37
 
37
38
  def test_inspect
38
- assert_equal("#<Groonga::Type id: <8>, " +
39
+ assert_equal("#<Groonga::Type id: <9>, " +
39
40
  "name: <<longtext>>, " +
40
41
  "path: (temporary), " +
41
42
  "domain: <nil>, " +
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: groonga
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kouhei Sutou
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-04-30 00:00:00 +09:00
12
+ date: 2009-06-04 00:00:00 +09:00
13
13
  default_executable:
14
14
  dependencies: []
15
15
 
@@ -39,7 +39,9 @@ files:
39
39
  - README.rdoc
40
40
  - Rakefile
41
41
  - TUTORIAL.ja.rdoc
42
- - benchmark/small-many-items.rb
42
+ - benchmark/common.rb
43
+ - benchmark/read-write-small-many-items.rb
44
+ - benchmark/write-small-many-items.rb
43
45
  - example/bookmark.rb
44
46
  - ext/.gitignore
45
47
  - ext/rb-grn-accessor.c
@@ -48,10 +50,13 @@ files:
48
50
  - ext/rb-grn-column.c
49
51
  - ext/rb-grn-context.c
50
52
  - ext/rb-grn-database.c
53
+ - ext/rb-grn-encoding-support.c
51
54
  - ext/rb-grn-encoding.c
52
55
  - ext/rb-grn-exception.c
56
+ - ext/rb-grn-fix-size-column.c
53
57
  - ext/rb-grn-hash-cursor.c
54
58
  - ext/rb-grn-hash.c
59
+ - ext/rb-grn-index-column.c
55
60
  - ext/rb-grn-logger.c
56
61
  - ext/rb-grn-object.c
57
62
  - ext/rb-grn-patricia-trie-cursor.c
@@ -73,6 +78,8 @@ files:
73
78
  - html/developer.html
74
79
  - html/developer.svg
75
80
  - html/download.svg
81
+ - html/favicon.ico
82
+ - html/favicon.xcf
76
83
  - html/footer.html.erb
77
84
  - html/head.html.erb
78
85
  - html/header.html.erb
@@ -166,11 +173,15 @@ files:
166
173
  - test/.gitignore
167
174
  - test/groonga-test-utils.rb
168
175
  - test/run-test.rb
176
+ - test/test-array.rb
169
177
  - test/test-column.rb
170
178
  - test/test-context.rb
171
179
  - test/test-database.rb
172
180
  - test/test-encoding.rb
173
181
  - test/test-exception.rb
182
+ - test/test-hash.rb
183
+ - test/test-index-column.rb
184
+ - test/test-patricia-trie.rb
174
185
  - test/test-procedure.rb
175
186
  - test/test-query.rb
176
187
  - test/test-record.rb
@@ -205,7 +216,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
205
216
  requirements: []
206
217
 
207
218
  rubyforge_project: groonga
208
- rubygems_version: 1.3.2
219
+ rubygems_version: 1.3.3
209
220
  signing_key:
210
221
  specification_version: 3
211
222
  summary: Ruby bindings for groonga that provides full text search and column store features.
@@ -1,175 +0,0 @@
1
- #!/usr/bin/ruby
2
-
3
- # This benchmark is based on Tokyo Cabinet's benchmark at
4
- # http://alpha.mixi.co.jp/blog/?p=791
5
- #
6
- # On my environment at 2009/04/29:
7
- # % for x in {0..8}; do ruby benchmark/small-many-items.rb $x; done
8
- # user system total real memory
9
- # Hash 1.070000 0.140000 1.210000 ( 1.462675) 46.234MB
10
- # groonga: Hash: memory 1.380000 0.140000 1.520000 ( 1.804040) 23.531MB
11
- # groonga: Trie: memory 1.780000 0.110000 1.890000 ( 2.136395) 15.520MB
12
- # groonga: Hash: file 1.380000 0.180000 1.560000 ( 1.879252) 23.535MB
13
- # groonga: Trie: file 1.650000 0.160000 1.810000 ( 2.257756) 15.523MB
14
- # TC: Hash: memory 0.680000 0.170000 0.850000 ( 1.038155) 38.246MB
15
- # TC: Tree: memory 0.640000 0.130000 0.770000 ( 1.029011) 30.609MB
16
- # TC: Hash: file 1.150000 2.900000 4.050000 ( 4.908274) 0.164MB
17
- # TC: Tree: file 0.970000 0.210000 1.180000 ( 1.416418) 5.367MB
18
- #
19
- # Ruby: Debian GNU/Linux sid at 2009/04/29:
20
- # ruby 1.8.7 (2008-08-11 patchlevel 72) [x86_64-linux]
21
- # groonga: HEAD at 2009/04/29: c97c3cf78b8f0761ca48ef211caa155135f89487
22
- # Ruby/Groonga: trunk at 2009/04/29: r221
23
- # Tokyo Cabinet: 1.4.17
24
- # Tokyo Cabinet Ruby: 1.23
25
-
26
- require 'benchmark'
27
- require 'tempfile'
28
-
29
- def memory_usage()
30
- status = `cat /proc/#{$$}/status`
31
- lines = status.split("\n")
32
- lines.each do |line|
33
- if line =~ /^VmRSS:/
34
- line.gsub!(/.*:\s*(\d+).*/, '\1')
35
- return line.to_i / 1024.0
36
- end
37
- end
38
- return -1;
39
- end
40
-
41
- n = 500000
42
-
43
- @items = []
44
-
45
- def item(label, &block)
46
- @items << [label, block]
47
- end
48
-
49
- def report(index=0)
50
- width = @items.collect do |label, _|
51
- label.length
52
- end.max
53
-
54
- label, block = @items[index]
55
- if label.nil?
56
- puts "unavailable report ID: #{index}"
57
- puts "available IDs:"
58
- @items.each_with_index do |(label, block), i|
59
- puts "#{i}: #{label}"
60
- end
61
- exit 1
62
- end
63
-
64
- if index.zero?
65
- puts(" " * (width - 1) + Benchmark::Tms::CAPTION.rstrip + "memory".rjust(14))
66
- end
67
- # GC.disable
68
- before = memory_usage
69
- result = Benchmark.measure(&block)
70
- # GC.enable
71
- GC.start
72
- size = memory_usage - before
73
-
74
- formatted_size = "%10.3f" % size
75
- puts "#{label.ljust(width)} #{result.to_s.strip} #{formatted_size}MB"
76
- end
77
-
78
- values = []
79
- n.times do |i|
80
- values << "%08d" % i
81
- end
82
-
83
- item("Hash") do
84
- hash = {}
85
- values.each do |value|
86
- hash[value] = value
87
- end
88
- end
89
-
90
- begin
91
- base_dir = File.expand_path(File.join(File.dirname(__FILE__), ".."))
92
- $LOAD_PATH.unshift(File.join(base_dir, "src"))
93
- $LOAD_PATH.unshift(File.join(base_dir, "src", "lib"))
94
-
95
- require 'groonga'
96
- Groonga::Database.create
97
-
98
- item("groonga: Hash: memory") do
99
- hash = Groonga::Hash.create(:key_type => "<shorttext>",
100
- :value_size => 8)
101
- values.each do |value|
102
- hash[value] = value
103
- end
104
- end
105
-
106
- item("groonga: Trie: memory") do
107
- hash = Groonga::PatriciaTrie.create(:key_type => "<shorttext>",
108
- :value_size => 8)
109
- values.each do |value|
110
- hash[value] = value
111
- end
112
- end
113
-
114
- hash_file = Tempfile.new("groonga-hash")
115
- item("groonga: Hash: file") do
116
- hash = Groonga::Hash.create(:key_type => "<shorttext>",
117
- :value_size => 8,
118
- :path => hash_file.path)
119
- values.each do |value|
120
- hash[value] = value
121
- end
122
- end
123
-
124
- trie_file = Tempfile.new("groonga-trie")
125
- item("groonga: Trie: file") do
126
- hash = Groonga::PatriciaTrie.create(:key_type => "<shorttext>",
127
- :value_size => 8,
128
- :path => trie_file.path)
129
- values.each do |value|
130
- hash[value] = value
131
- end
132
- end
133
- rescue LoadError
134
- end
135
-
136
- begin
137
- require 'tokyocabinet'
138
-
139
- item("TC: Hash: memory") do
140
- db = TokyoCabinet::ADB::new
141
- db.open("*#bnum=#{n}#mode=wct#xmsiz=0")
142
- values.each do |value|
143
- db.put(value, value)
144
- end
145
- end
146
-
147
- item("TC: Tree: memory") do
148
- db = TokyoCabinet::ADB::new
149
- db.open("+#bnum=#{n}#mode=wct#xmsiz=0")
150
- values.each do |value|
151
- db.put(value, value)
152
- end
153
- end
154
-
155
- hash_file = Tempfile.new(["tc-hash", ".tch"])
156
- item("TC: Hash: file") do
157
- db = TokyoCabinet::ADB::new
158
- db.open("#{hash_file.path}#bnum=#{n}#mode=wct#xmsiz=0")
159
- values.each do |value|
160
- db.put(value, value)
161
- end
162
- end
163
-
164
- tree_file = Tempfile.new(["tc-tree", ".tcb"])
165
- item("TC: Tree: file") do
166
- db = TokyoCabinet::ADB::new
167
- db.open("#{tree_file.path}#bnum=#{n}#mode=wct#xmsiz=0")
168
- values.each do |value|
169
- db.put(value, value)
170
- end
171
- end
172
- rescue LoadError
173
- end
174
-
175
- report(Integer(ARGV[0] || 0))