groonga 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. data/NEWS.ja.rdoc +11 -0
  2. data/NEWS.rdoc +11 -0
  3. data/README.ja.rdoc +4 -3
  4. data/README.rdoc +4 -3
  5. data/Rakefile +1 -1
  6. data/TUTORIAL.ja.rdoc +168 -44
  7. data/benchmark/common.rb +49 -0
  8. data/benchmark/read-write-small-many-items.rb +156 -0
  9. data/benchmark/write-small-many-items.rb +145 -0
  10. data/example/bookmark.rb +68 -20
  11. data/ext/rb-grn-array-cursor.c +8 -0
  12. data/ext/rb-grn-array.c +40 -11
  13. data/ext/rb-grn-column.c +38 -209
  14. data/ext/rb-grn-context.c +203 -56
  15. data/ext/rb-grn-database.c +119 -5
  16. data/ext/rb-grn-encoding-support.c +64 -0
  17. data/ext/rb-grn-encoding.c +58 -1
  18. data/ext/rb-grn-fix-size-column.c +220 -0
  19. data/ext/rb-grn-hash-cursor.c +8 -0
  20. data/ext/rb-grn-hash.c +244 -2
  21. data/ext/rb-grn-index-column.c +474 -0
  22. data/ext/rb-grn-object.c +143 -265
  23. data/ext/rb-grn-patricia-trie.c +148 -2
  24. data/ext/rb-grn-query.c +5 -3
  25. data/ext/rb-grn-record.c +3 -2
  26. data/ext/rb-grn-snippet.c +5 -3
  27. data/ext/rb-grn-table-cursor-key-support.c +3 -3
  28. data/ext/rb-grn-table-cursor.c +106 -112
  29. data/ext/rb-grn-table-key-support.c +220 -118
  30. data/ext/rb-grn-table.c +336 -80
  31. data/ext/rb-grn-type.c +5 -4
  32. data/ext/rb-grn-utils.c +62 -63
  33. data/ext/rb-grn.h +215 -14
  34. data/ext/rb-groonga.c +7 -16
  35. data/extconf.rb +3 -1
  36. data/html/favicon.ico +0 -0
  37. data/html/favicon.xcf +0 -0
  38. data/html/index.html +1 -7
  39. data/lib/groonga/record.rb +6 -1
  40. data/test/groonga-test-utils.rb +1 -0
  41. data/test/test-array.rb +81 -0
  42. data/test/test-column.rb +22 -12
  43. data/test/test-context.rb +1 -29
  44. data/test/test-database.rb +30 -0
  45. data/test/test-hash.rb +194 -0
  46. data/test/test-index-column.rb +57 -0
  47. data/test/test-patricia-trie.rb +82 -0
  48. data/test/test-record.rb +10 -10
  49. data/test/test-table.rb +37 -130
  50. data/test/test-type.rb +4 -3
  51. metadata +15 -4
  52. data/benchmark/small-many-items.rb +0 -175
data/test/test-table.rb CHANGED
@@ -36,9 +36,8 @@ class TableTest < Test::Unit::TestCase
36
36
  def test_open
37
37
  table_path = @tables_dir + "table"
38
38
  table = Groonga::Hash.create(:name => "bookmarks",
39
- :path => table_path.to_s)
39
+ :path => table_path.to_s)
40
40
  assert_equal("bookmarks", table.name)
41
- table.close
42
41
 
43
42
  called = false
44
43
  Groonga::Table.open(:name => "bookmarks") do |_table|
@@ -113,25 +112,32 @@ class TableTest < Test::Unit::TestCase
113
112
  table_path = @tables_dir + "table"
114
113
  table = Groonga::Hash.create(:name => "bookmarks",
115
114
  :path => table_path.to_s)
116
- column = table.define_column("name", "<text>",
117
- :type => "index",
118
- :compress => "zlib",
119
- :with_section => true,
120
- :with_weight => true,
121
- :with_position => true)
115
+ column = table.define_column("name", "<text>")
122
116
  assert_equal("bookmarks.name", column.name)
123
117
  assert_equal(column, table.column("name"))
124
118
  end
125
119
 
120
+ def test_define_index_column
121
+ bookmarks = Groonga::Hash.create(:name => "<bookmarks>")
122
+ bookmarks.define_column("content", "<text>")
123
+ terms = Groonga::Hash.create(:name => "<terms>")
124
+ terms.default_tokenizer = "<token:bigram>"
125
+ index = terms.define_index_column("content-index", bookmarks,
126
+ :with_section => true,
127
+ :source => "<bookmarks>.content")
128
+ bookmarks.add("google", :content => "Search engine")
129
+ assert_equal(["google"],
130
+ index.search("engine").collect {|record| record.key.key})
131
+ end
132
+
126
133
  def test_add_column
127
134
  bookmarks = Groonga::Hash.create(:name => "bookmarks",
128
135
  :path => (@tables_dir + "bookmarks").to_s)
129
136
 
130
137
  description_column_path = @columns_dir + "description"
131
138
  bookmarks_description =
132
- bookmarks.define_column("description", "<text>",
133
- :type => "index",
134
- :path => description_column_path.to_s)
139
+ bookmarks.define_index_column("description", "<text>",
140
+ :path => description_column_path.to_s)
135
141
 
136
142
  books = Groonga::Hash.create(:name => "books",
137
143
  :path => (@tables_dir + "books").to_s)
@@ -273,94 +279,6 @@ class TableTest < Test::Unit::TestCase
273
279
  assert_not_predicate(bookmarks_path, :exist?)
274
280
  end
275
281
 
276
- def test_array_reference
277
- bookmarks_path = @tables_dir + "bookmarks"
278
- bookmarks = Groonga::Hash.create(:name => "bookmarks",
279
- :path => bookmarks_path.to_s,
280
- :key_type => "<shorttext>")
281
- bookmark = bookmarks.add("http://google.com/")
282
- assert_equal(bookmark, bookmarks["http://google.com/"])
283
- end
284
-
285
- def test_tokenizer
286
- table = Groonga::Hash.create
287
- assert_equal(Groonga::Context.default["<token:bigram>"],
288
- table.default_tokenizer)
289
- table.default_tokenizer = "<token:mecab>"
290
- assert_equal(Groonga::Context.default["<token:mecab>"],
291
- table.default_tokenizer)
292
- end
293
-
294
- def test_inspect_anonymous
295
- path = @tables_dir + "anoymous.groonga"
296
- anonymous_table = Groonga::Hash.create(:path => path.to_s)
297
- assert_equal("#<Groonga::Hash " +
298
- "id: <#{anonymous_table.id}>, " +
299
- "name: (anonymous), " +
300
- "path: <#{path}>, " +
301
- "domain: <nil>, " +
302
- "range: <nil>, " +
303
- "encoding: <#{encoding.inspect}>, " +
304
- "size: <0>>",
305
- anonymous_table.inspect)
306
- end
307
-
308
- def test_inspect_anonymous_temporary
309
- anonymous_table = Groonga::Hash.create
310
- assert_equal("#<Groonga::Hash " +
311
- "id: <#{anonymous_table.id}>, " +
312
- "name: (anonymous), " +
313
- "path: (temporary), " +
314
- "domain: <nil>, " +
315
- "range: <nil>, " +
316
- "encoding: <#{encoding.inspect}>, " +
317
- "size: <0>>",
318
- anonymous_table.inspect)
319
- end
320
-
321
- def test_inspect_named
322
- path = @tables_dir + "named.groonga"
323
- named_table = Groonga::Hash.create(:name => "name", :path => path.to_s)
324
- assert_equal("#<Groonga::Hash " +
325
- "id: <#{named_table.id}>, " +
326
- "name: <name>, " +
327
- "path: <#{path}>, " +
328
- "domain: <nil>, " +
329
- "range: <nil>, " +
330
- "encoding: <#{encoding.inspect}>, " +
331
- "size: <0>>",
332
- named_table.inspect)
333
- end
334
-
335
- def test_inspect_named_temporary
336
- named_table = Groonga::Hash.create(:name => "name")
337
- assert_equal("#<Groonga::Hash " +
338
- "id: <#{named_table.id}>, " +
339
- "name: <name>, " +
340
- "path: (temporary), " +
341
- "domain: <nil>, " +
342
- "range: <nil>, " +
343
- "encoding: <#{encoding.inspect}>, " +
344
- "size: <0>>",
345
- named_table.inspect)
346
- end
347
-
348
- def test_inspect_size
349
- path = @tables_dir + "named.groonga"
350
- contain_table = Groonga::Array.create(:name => "name", :path => path.to_s)
351
- 3.times do
352
- contain_table.add
353
- end
354
- assert_equal("#<Groonga::Array " +
355
- "id: <#{contain_table.id}>, " +
356
- "name: <name>, " +
357
- "path: <#{path}>, " +
358
- "domain: <nil>, " +
359
- "range: <nil>, " +
360
- "size: <3>>",
361
- contain_table.inspect)
362
- end
363
-
364
282
  def test_temporary_add
365
283
  table = Groonga::Hash.create(:key_type => "<shorttext>")
366
284
  assert_equal(0, table.size)
@@ -368,27 +286,6 @@ class TableTest < Test::Unit::TestCase
368
286
  assert_equal(1, table.size)
369
287
  end
370
288
 
371
- def test_search
372
- users = Groonga::Array.create(:name => "<users>")
373
- user_name = users.define_column("name", "<shorttext>")
374
-
375
- bookmarks = Groonga::Hash.create(:name => "<bookmarks>",
376
- :key_type => "<shorttext>")
377
- bookmark_user_id = bookmarks.define_column("user_id", users)
378
-
379
- daijiro = users.add
380
- daijiro["name"] = "daijiro"
381
- gunyarakun = users.add
382
- gunyarakun["name"] = "gunyarakun"
383
-
384
- groonga = bookmarks.add("http://groonga.org/")
385
- groonga["user_id"] = daijiro
386
-
387
- records = bookmarks.search("http://groonga.org/")
388
- assert_equal(["daijiro"],
389
- records.records.collect {|record| record[".user_id.name"]})
390
- end
391
-
392
289
  def test_each
393
290
  users = Groonga::Array.create(:name => "<users>")
394
291
  user_name = users.define_column("name", "<shorttext>")
@@ -414,16 +311,6 @@ class TableTest < Test::Unit::TestCase
414
311
  # assert_equal(0, users.size) # truncate isn't implemented in groonga.
415
312
  end
416
313
 
417
- def test_encoding
418
- assert_equal(Groonga::Encoding.default,
419
- Groonga::Hash.create.encoding)
420
- assert_equal(Groonga::Encoding.default,
421
- Groonga::PatriciaTrie.create.encoding)
422
-
423
- array = Groonga::Array.create
424
- assert_false(array.respond_to?(:encoding))
425
- end
426
-
427
314
  def test_sort
428
315
  bookmarks = Groonga::Array.create(:name => "<bookmarks>")
429
316
  id_column = bookmarks.define_column("id", "<int>")
@@ -444,4 +331,24 @@ class TableTest < Test::Unit::TestCase
444
331
  end
445
332
  assert_equal((180..199).to_a.reverse, results)
446
333
  end
334
+
335
+ def test_sort_without_limit
336
+ bookmarks = Groonga::Array.create(:name => "<bookmarks>")
337
+ id_column = bookmarks.define_column("id", "<int>")
338
+ 100.times do |i|
339
+ bookmark = bookmarks.add
340
+ bookmark["id"] = i + 100
341
+ end
342
+
343
+ results = bookmarks.sort([
344
+ {
345
+ :key => "id",
346
+ :order => :descending,
347
+ },
348
+ ])
349
+ results = results.records(:order => :ascending).collect do |record|
350
+ id_column[record.value.unpack("i")[0]]
351
+ end
352
+ assert_equal((100..199).to_a.reverse, results)
353
+ end
447
354
  end
data/test/test-type.rb CHANGED
@@ -24,9 +24,10 @@ class TypeTest < Test::Unit::TestCase
24
24
  end
25
25
 
26
26
  def test_builtins
27
- assert_equal_type("<int>", Groonga::Type::INT)
28
- assert_equal_type("<uint>", Groonga::Type::UINT)
27
+ assert_equal_type("<int>", Groonga::Type::INT32)
28
+ assert_equal_type("<uint>", Groonga::Type::UINT32)
29
29
  assert_equal_type("<int64>", Groonga::Type::INT64)
30
+ assert_equal_type("<uint64>", Groonga::Type::UINT64)
30
31
  assert_equal_type("<float>", Groonga::Type::FLOAT)
31
32
  assert_equal_type("<time>", Groonga::Type::TIME)
32
33
  assert_equal_type("<shorttext>", Groonga::Type::SHORT_TEXT)
@@ -35,7 +36,7 @@ class TypeTest < Test::Unit::TestCase
35
36
  end
36
37
 
37
38
  def test_inspect
38
- assert_equal("#<Groonga::Type id: <8>, " +
39
+ assert_equal("#<Groonga::Type id: <9>, " +
39
40
  "name: <<longtext>>, " +
40
41
  "path: (temporary), " +
41
42
  "domain: <nil>, " +
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: groonga
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kouhei Sutou
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-04-30 00:00:00 +09:00
12
+ date: 2009-06-04 00:00:00 +09:00
13
13
  default_executable:
14
14
  dependencies: []
15
15
 
@@ -39,7 +39,9 @@ files:
39
39
  - README.rdoc
40
40
  - Rakefile
41
41
  - TUTORIAL.ja.rdoc
42
- - benchmark/small-many-items.rb
42
+ - benchmark/common.rb
43
+ - benchmark/read-write-small-many-items.rb
44
+ - benchmark/write-small-many-items.rb
43
45
  - example/bookmark.rb
44
46
  - ext/.gitignore
45
47
  - ext/rb-grn-accessor.c
@@ -48,10 +50,13 @@ files:
48
50
  - ext/rb-grn-column.c
49
51
  - ext/rb-grn-context.c
50
52
  - ext/rb-grn-database.c
53
+ - ext/rb-grn-encoding-support.c
51
54
  - ext/rb-grn-encoding.c
52
55
  - ext/rb-grn-exception.c
56
+ - ext/rb-grn-fix-size-column.c
53
57
  - ext/rb-grn-hash-cursor.c
54
58
  - ext/rb-grn-hash.c
59
+ - ext/rb-grn-index-column.c
55
60
  - ext/rb-grn-logger.c
56
61
  - ext/rb-grn-object.c
57
62
  - ext/rb-grn-patricia-trie-cursor.c
@@ -73,6 +78,8 @@ files:
73
78
  - html/developer.html
74
79
  - html/developer.svg
75
80
  - html/download.svg
81
+ - html/favicon.ico
82
+ - html/favicon.xcf
76
83
  - html/footer.html.erb
77
84
  - html/head.html.erb
78
85
  - html/header.html.erb
@@ -166,11 +173,15 @@ files:
166
173
  - test/.gitignore
167
174
  - test/groonga-test-utils.rb
168
175
  - test/run-test.rb
176
+ - test/test-array.rb
169
177
  - test/test-column.rb
170
178
  - test/test-context.rb
171
179
  - test/test-database.rb
172
180
  - test/test-encoding.rb
173
181
  - test/test-exception.rb
182
+ - test/test-hash.rb
183
+ - test/test-index-column.rb
184
+ - test/test-patricia-trie.rb
174
185
  - test/test-procedure.rb
175
186
  - test/test-query.rb
176
187
  - test/test-record.rb
@@ -205,7 +216,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
205
216
  requirements: []
206
217
 
207
218
  rubyforge_project: groonga
208
- rubygems_version: 1.3.2
219
+ rubygems_version: 1.3.3
209
220
  signing_key:
210
221
  specification_version: 3
211
222
  summary: Ruby bindings for groonga that provides full text search and column store features.
@@ -1,175 +0,0 @@
1
- #!/usr/bin/ruby
2
-
3
- # This benchmark is based on Tokyo Cabinet's benchmark at
4
- # http://alpha.mixi.co.jp/blog/?p=791
5
- #
6
- # On my environment at 2009/04/29:
7
- # % for x in {0..8}; do ruby benchmark/small-many-items.rb $x; done
8
- # user system total real memory
9
- # Hash 1.070000 0.140000 1.210000 ( 1.462675) 46.234MB
10
- # groonga: Hash: memory 1.380000 0.140000 1.520000 ( 1.804040) 23.531MB
11
- # groonga: Trie: memory 1.780000 0.110000 1.890000 ( 2.136395) 15.520MB
12
- # groonga: Hash: file 1.380000 0.180000 1.560000 ( 1.879252) 23.535MB
13
- # groonga: Trie: file 1.650000 0.160000 1.810000 ( 2.257756) 15.523MB
14
- # TC: Hash: memory 0.680000 0.170000 0.850000 ( 1.038155) 38.246MB
15
- # TC: Tree: memory 0.640000 0.130000 0.770000 ( 1.029011) 30.609MB
16
- # TC: Hash: file 1.150000 2.900000 4.050000 ( 4.908274) 0.164MB
17
- # TC: Tree: file 0.970000 0.210000 1.180000 ( 1.416418) 5.367MB
18
- #
19
- # Ruby: Debian GNU/Linux sid at 2009/04/29:
20
- # ruby 1.8.7 (2008-08-11 patchlevel 72) [x86_64-linux]
21
- # groonga: HEAD at 2009/04/29: c97c3cf78b8f0761ca48ef211caa155135f89487
22
- # Ruby/Groonga: trunk at 2009/04/29: r221
23
- # Tokyo Cabinet: 1.4.17
24
- # Tokyo Cabinet Ruby: 1.23
25
-
26
- require 'benchmark'
27
- require 'tempfile'
28
-
29
- def memory_usage()
30
- status = `cat /proc/#{$$}/status`
31
- lines = status.split("\n")
32
- lines.each do |line|
33
- if line =~ /^VmRSS:/
34
- line.gsub!(/.*:\s*(\d+).*/, '\1')
35
- return line.to_i / 1024.0
36
- end
37
- end
38
- return -1;
39
- end
40
-
41
- n = 500000
42
-
43
- @items = []
44
-
45
- def item(label, &block)
46
- @items << [label, block]
47
- end
48
-
49
- def report(index=0)
50
- width = @items.collect do |label, _|
51
- label.length
52
- end.max
53
-
54
- label, block = @items[index]
55
- if label.nil?
56
- puts "unavailable report ID: #{index}"
57
- puts "available IDs:"
58
- @items.each_with_index do |(label, block), i|
59
- puts "#{i}: #{label}"
60
- end
61
- exit 1
62
- end
63
-
64
- if index.zero?
65
- puts(" " * (width - 1) + Benchmark::Tms::CAPTION.rstrip + "memory".rjust(14))
66
- end
67
- # GC.disable
68
- before = memory_usage
69
- result = Benchmark.measure(&block)
70
- # GC.enable
71
- GC.start
72
- size = memory_usage - before
73
-
74
- formatted_size = "%10.3f" % size
75
- puts "#{label.ljust(width)} #{result.to_s.strip} #{formatted_size}MB"
76
- end
77
-
78
- values = []
79
- n.times do |i|
80
- values << "%08d" % i
81
- end
82
-
83
- item("Hash") do
84
- hash = {}
85
- values.each do |value|
86
- hash[value] = value
87
- end
88
- end
89
-
90
- begin
91
- base_dir = File.expand_path(File.join(File.dirname(__FILE__), ".."))
92
- $LOAD_PATH.unshift(File.join(base_dir, "src"))
93
- $LOAD_PATH.unshift(File.join(base_dir, "src", "lib"))
94
-
95
- require 'groonga'
96
- Groonga::Database.create
97
-
98
- item("groonga: Hash: memory") do
99
- hash = Groonga::Hash.create(:key_type => "<shorttext>",
100
- :value_size => 8)
101
- values.each do |value|
102
- hash[value] = value
103
- end
104
- end
105
-
106
- item("groonga: Trie: memory") do
107
- hash = Groonga::PatriciaTrie.create(:key_type => "<shorttext>",
108
- :value_size => 8)
109
- values.each do |value|
110
- hash[value] = value
111
- end
112
- end
113
-
114
- hash_file = Tempfile.new("groonga-hash")
115
- item("groonga: Hash: file") do
116
- hash = Groonga::Hash.create(:key_type => "<shorttext>",
117
- :value_size => 8,
118
- :path => hash_file.path)
119
- values.each do |value|
120
- hash[value] = value
121
- end
122
- end
123
-
124
- trie_file = Tempfile.new("groonga-trie")
125
- item("groonga: Trie: file") do
126
- hash = Groonga::PatriciaTrie.create(:key_type => "<shorttext>",
127
- :value_size => 8,
128
- :path => trie_file.path)
129
- values.each do |value|
130
- hash[value] = value
131
- end
132
- end
133
- rescue LoadError
134
- end
135
-
136
- begin
137
- require 'tokyocabinet'
138
-
139
- item("TC: Hash: memory") do
140
- db = TokyoCabinet::ADB::new
141
- db.open("*#bnum=#{n}#mode=wct#xmsiz=0")
142
- values.each do |value|
143
- db.put(value, value)
144
- end
145
- end
146
-
147
- item("TC: Tree: memory") do
148
- db = TokyoCabinet::ADB::new
149
- db.open("+#bnum=#{n}#mode=wct#xmsiz=0")
150
- values.each do |value|
151
- db.put(value, value)
152
- end
153
- end
154
-
155
- hash_file = Tempfile.new(["tc-hash", ".tch"])
156
- item("TC: Hash: file") do
157
- db = TokyoCabinet::ADB::new
158
- db.open("#{hash_file.path}#bnum=#{n}#mode=wct#xmsiz=0")
159
- values.each do |value|
160
- db.put(value, value)
161
- end
162
- end
163
-
164
- tree_file = Tempfile.new(["tc-tree", ".tcb"])
165
- item("TC: Tree: file") do
166
- db = TokyoCabinet::ADB::new
167
- db.open("#{tree_file.path}#bnum=#{n}#mode=wct#xmsiz=0")
168
- values.each do |value|
169
- db.put(value, value)
170
- end
171
- end
172
- rescue LoadError
173
- end
174
-
175
- report(Integer(ARGV[0] || 0))