rroonga 1.0.8 → 1.0.9
Sign up to get free protection for your applications and to get access to all the features.
- data/NEWS.ja.rdoc +47 -0
- data/NEWS.rdoc +48 -0
- data/README.ja.rdoc +1 -0
- data/README.rdoc +1 -0
- data/Rakefile +32 -13
- data/benchmark/create-wikipedia-database.rb +212 -0
- data/benchmark/repeat-load.rb +213 -0
- data/benchmark/select.rb +1052 -0
- data/ext/groonga/mkmf.log +99 -0
- data/ext/groonga/rb-grn-column.c +57 -6
- data/ext/groonga/rb-grn-context.c +15 -9
- data/ext/groonga/rb-grn-expression.c +7 -7
- data/ext/groonga/{rb-grn-operation.c → rb-grn-operator.c} +89 -87
- data/ext/groonga/rb-grn-patricia-trie.c +5 -5
- data/ext/groonga/rb-grn-query.c +4 -4
- data/ext/groonga/rb-grn-table.c +16 -19
- data/ext/groonga/rb-grn.h +3 -3
- data/ext/groonga/rb-groonga.c +1 -1
- data/html/index.html +4 -4
- data/lib/groonga/context.rb +34 -0
- data/lib/groonga/expression-builder.rb +34 -2
- data/lib/groonga/record.rb +8 -6
- data/lib/groonga/schema.rb +40 -4
- data/rroonga-build.rb +2 -2
- data/test-unit/Rakefile +5 -18
- data/test-unit/html/classic.html +15 -0
- data/test-unit/html/index.html +13 -235
- data/test-unit/html/index.html.ja +15 -258
- data/test-unit/lib/test/unit.rb +1 -6
- data/test-unit/lib/test/unit/assertions.rb +11 -115
- data/test-unit/lib/test/unit/autorunner.rb +2 -5
- data/test-unit/lib/test/unit/collector/load.rb +1 -1
- data/test-unit/lib/test/unit/color-scheme.rb +2 -6
- data/test-unit/lib/test/unit/diff.rb +1 -17
- data/test-unit/lib/test/unit/testcase.rb +0 -7
- data/test-unit/lib/test/unit/testresult.rb +2 -34
- data/test-unit/lib/test/unit/ui/console/testrunner.rb +45 -9
- data/test-unit/lib/test/unit/ui/tap/testrunner.rb +12 -2
- data/test-unit/lib/test/unit/ui/testrunner.rb +0 -25
- data/test-unit/lib/test/unit/util/backtracefilter.rb +0 -1
- data/test-unit/lib/test/unit/version.rb +1 -1
- data/test-unit/test/test-color-scheme.rb +2 -4
- data/test-unit/test/test_assertions.rb +5 -51
- data/test/test-column.rb +31 -1
- data/test/test-context-select.rb +45 -14
- data/test/test-context.rb +36 -0
- data/test/test-database.rb +13 -0
- data/test/test-expression-builder.rb +32 -5
- data/test/test-record.rb +34 -1
- data/test/test-schema.rb +52 -2
- data/test/test-table-select-weight.rb +20 -1
- data/test/test-table.rb +58 -0
- metadata +13 -41
- data/test-unit-notify/Rakefile +0 -47
- data/test-unit-notify/lib/test/unit/notify.rb +0 -104
- data/test-unit/COPYING +0 -56
- data/test-unit/GPL +0 -340
- data/test-unit/PSFL +0 -271
- data/test-unit/html/bar.svg +0 -153
- data/test-unit/html/developer.svg +0 -469
- data/test-unit/html/favicon.ico +0 -0
- data/test-unit/html/favicon.svg +0 -82
- data/test-unit/html/heading-mark.svg +0 -393
- data/test-unit/html/install.svg +0 -636
- data/test-unit/html/logo.svg +0 -483
- data/test-unit/html/test-unit.css +0 -339
- data/test-unit/html/tutorial.svg +0 -559
- data/test-unit/lib/test/unit/util/output.rb +0 -31
- data/test-unit/test/ui/test_tap.rb +0 -33
- data/test-unit/test/util/test-output.rb +0 -11
data/NEWS.ja.rdoc
CHANGED
@@ -1,5 +1,52 @@
|
|
1
1
|
= お知らせ
|
2
2
|
|
3
|
+
== 1.0.9: 2011-01-29
|
4
|
+
|
5
|
+
=== 改良
|
6
|
+
|
7
|
+
* Windows上でのgem作成に対応。
|
8
|
+
[ongaeshiさんがパッチ提供]
|
9
|
+
* Groonga::Schemaでテーブル・カラムを削除した時に作成したディレクトリを
|
10
|
+
削除するようにした。
|
11
|
+
* Groonga::Context#create_databaseを追加。
|
12
|
+
* Groonga::Context#open_databaseを追加。
|
13
|
+
* Groonga::Column#indexesを追加。
|
14
|
+
* Groonga::Table#select内でのインデックスカラムにマッチする記法をサポート。
|
15
|
+
table.select do |record|
|
16
|
+
record.match("query") do |match_record|
|
17
|
+
(match_record.index("Terms.title") * 1000) |
|
18
|
+
(match_record.index("Terms.description") * 100)
|
19
|
+
match_record.content
|
20
|
+
end
|
21
|
+
end
|
22
|
+
* Groonga::Table#select内での前方一致検索に対応。
|
23
|
+
table.select do |record|
|
24
|
+
record.name.prefix_search("groo")
|
25
|
+
end
|
26
|
+
* Groonga::Table#select内での後方一致検索に対応。
|
27
|
+
table.select do |record|
|
28
|
+
record.name.suffix_search("nga")
|
29
|
+
end
|
30
|
+
* :default_tokenizerのスキーマダンプに対応。
|
31
|
+
* :key_normalizeのスキーマダンプに対応。
|
32
|
+
* Groonga::Table#have_column?が擬似カラムに対応。
|
33
|
+
* Groonga::Record#have_column?が擬似カラムに対応。
|
34
|
+
|
35
|
+
=== 変更
|
36
|
+
|
37
|
+
* Groonga::OperatoionをGroonga::Operatorに変更。
|
38
|
+
(しばらくはGroonga::Operationも使えるが、非推奨。)
|
39
|
+
|
40
|
+
=== 修正
|
41
|
+
|
42
|
+
* 複数のGroonga::Contextを利用した場合に
|
43
|
+
Groonga::Table#selectでクラッシュする問題を修正。
|
44
|
+
* 例外発生時にクラッシュする問題を修正。
|
45
|
+
|
46
|
+
=== 感謝
|
47
|
+
|
48
|
+
* ongaeshioさん
|
49
|
+
|
3
50
|
== 1.0.8: 2010-12-25
|
4
51
|
|
5
52
|
=== 改良
|
data/NEWS.rdoc
CHANGED
@@ -1,5 +1,53 @@
|
|
1
1
|
= NEWS
|
2
2
|
|
3
|
+
== 1.0.9: 2011-01-29
|
4
|
+
|
5
|
+
=== Improvements
|
6
|
+
|
7
|
+
* Supported gem creation on Windows.
|
8
|
+
[Patch by ongaeshi]
|
9
|
+
* Supported generated directory that is created by Groonga::Schema removal
|
10
|
+
when table or column is removed.
|
11
|
+
* Added Groonga::Context#create_database.
|
12
|
+
* Added Groonga::Context#open_database.
|
13
|
+
* Added Groonga::Column#indexes.
|
14
|
+
* Supported a notation for specifying index column as match target in
|
15
|
+
Groonga::Table#select:
|
16
|
+
table.select do |record|
|
17
|
+
record.match("query") do |match_record|
|
18
|
+
(match_record.index("Terms.title") * 1000) |
|
19
|
+
(match_record.index("Terms.description") * 100)
|
20
|
+
match_record.content
|
21
|
+
end
|
22
|
+
end
|
23
|
+
* Supported prefix search in Groonga::Table#select:
|
24
|
+
table.select do |record|
|
25
|
+
record.name.prefix_search("groo")
|
26
|
+
end
|
27
|
+
* Supported suffix search in Groonga::Table#select:
|
28
|
+
table.select do |record|
|
29
|
+
record.name.suffix_search("nga")
|
30
|
+
end
|
31
|
+
* Supported :default_tokenizer schema dump.
|
32
|
+
* Supported :key_normalize schema dump.
|
33
|
+
* Supported pseudo columns by Groonga::Table#have_column?.
|
34
|
+
* Supported pseudo columns by Groonga::Record#have_column?.
|
35
|
+
|
36
|
+
=== Changes
|
37
|
+
|
38
|
+
* Renamed Groonga::Operatoion to Groonga::Operator.
|
39
|
+
(Groonga::Operation is deprecated but still usable.)
|
40
|
+
|
41
|
+
=== Fixes
|
42
|
+
|
43
|
+
* Fixed a crash bug when not default Groonga::Context is used in
|
44
|
+
Groonga::Table#select.
|
45
|
+
* Fixed a crash bug when an exception is occurred.
|
46
|
+
|
47
|
+
=== Thanks
|
48
|
+
|
49
|
+
* ongaeshio
|
50
|
+
|
3
51
|
== 1.0.8: 2010-12-25
|
4
52
|
|
5
53
|
=== Improvements
|
data/README.ja.rdoc
CHANGED
data/README.rdoc
CHANGED
data/Rakefile
CHANGED
@@ -144,28 +144,41 @@ ObjectSpace.each_object(Rake::RDocTask) do |rdoc_task|
|
|
144
144
|
rdoc_task.rdoc_files += Dir.glob("**/*.rdoc")
|
145
145
|
end
|
146
146
|
|
147
|
+
def windows?(platform=nil)
|
148
|
+
platform ||= RUBY_PLATFORM
|
149
|
+
platform =~ /mswin(?!ce)|mingw|cygwin|bccwin/
|
150
|
+
end
|
151
|
+
|
152
|
+
def collect_binary_files(binary_dir)
|
153
|
+
binary_files = []
|
154
|
+
Find.find(binary_dir) do |name|
|
155
|
+
next unless File.file?(name)
|
156
|
+
next if /\.zip\z/i =~ name
|
157
|
+
binary_files << name
|
158
|
+
end
|
159
|
+
binary_files
|
160
|
+
end
|
161
|
+
|
147
162
|
relative_vendor_dir = "vendor"
|
148
163
|
relative_binary_dir = File.join("vendor", "local")
|
149
164
|
vendor_dir = File.join(base_dir, relative_vendor_dir)
|
150
165
|
binary_dir = File.join(base_dir, relative_binary_dir)
|
166
|
+
|
151
167
|
Rake::ExtensionTask.new("groonga", project.spec) do |ext|
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
binary_files << name
|
168
|
+
if windows?
|
169
|
+
ext.gem_spec.files += collect_binary_files(relative_binary_dir)
|
170
|
+
else
|
171
|
+
ext.cross_compile = true
|
172
|
+
ext.cross_compiling do |spec|
|
173
|
+
if windows?(spec.platform.to_s)
|
174
|
+
spec.files += collect_binary_files(relative_binary_dir)
|
160
175
|
end
|
161
|
-
spec.files += binary_files
|
162
176
|
end
|
163
177
|
end
|
164
178
|
end
|
165
179
|
|
166
180
|
task :publish_docs => [:prepare_docs_for_publishing]
|
167
181
|
|
168
|
-
|
169
182
|
include ERB::Util
|
170
183
|
|
171
184
|
def apply_template(file, head, header, footer)
|
@@ -236,8 +249,14 @@ project.spec.executables.clear
|
|
236
249
|
task(:release).prerequisites.reject! {|name| name == "clean"}
|
237
250
|
|
238
251
|
namespace :win32 do
|
252
|
+
if ENV["GROONGA64"] == "yes"
|
253
|
+
host = "amd64-mingw32msvc"
|
254
|
+
else
|
255
|
+
host = "i586-mingw32msvc"
|
256
|
+
end
|
257
|
+
|
239
258
|
desc "Build MeCab and groonga and install them into vendor/local/."
|
240
|
-
task(:build => :build_groonga)
|
259
|
+
task(:build => [:build_mecab, :build_mecab_dict, :build_groonga])
|
241
260
|
|
242
261
|
desc "Build MeCab and install it into vendor/local/."
|
243
262
|
task(:build_mecab) do
|
@@ -260,7 +279,7 @@ namespace :win32 do
|
|
260
279
|
Dir.chdir(File.join(tmp_dir, mecab_base)) do
|
261
280
|
sh("./configure",
|
262
281
|
"--prefix=#{binary_dir}",
|
263
|
-
"--host
|
282
|
+
"--host=#{host}") or exit(false)
|
264
283
|
sh("env", "GREP_OPTIONS=--text", "nice", "make", "-j8") or exit(false)
|
265
284
|
sh("env", "GREP_OPTIONS=--text", "make", "install") or exit(false)
|
266
285
|
|
@@ -321,7 +340,7 @@ namespace :win32 do
|
|
321
340
|
mecab_config = File.join(binary_dir, "bin", "mecab-config")
|
322
341
|
args = ["./configure",
|
323
342
|
"--prefix=#{binary_dir}",
|
324
|
-
"--host
|
343
|
+
"--host=#{host}",
|
325
344
|
"--without-cutter",
|
326
345
|
"--disable-benchmark"]
|
327
346
|
if File.exist?(mecab_config)
|
@@ -0,0 +1,212 @@
|
|
1
|
+
# Wikipedia data: http://download.wikimedia.org/jawiki/latest/jawiki-latest-pages-articles.xml.bz2
|
2
|
+
|
3
|
+
require 'time'
|
4
|
+
require 'fileutils'
|
5
|
+
require 'groonga'
|
6
|
+
|
7
|
+
require 'nokogiri'
|
8
|
+
|
9
|
+
class WikipediaExtractor
|
10
|
+
def initialize(listener)
|
11
|
+
@listener = listener
|
12
|
+
end
|
13
|
+
|
14
|
+
def extract(input)
|
15
|
+
extractor = Extractor.new(@listener)
|
16
|
+
parser = Nokogiri::XML::SAX::Parser.new(extractor)
|
17
|
+
parser.parse(input)
|
18
|
+
end
|
19
|
+
|
20
|
+
class Extractor
|
21
|
+
def initialize(listener)
|
22
|
+
@listener = listener
|
23
|
+
@name_stack = []
|
24
|
+
@text_stack = []
|
25
|
+
@contributor_stack = []
|
26
|
+
@page = {}
|
27
|
+
end
|
28
|
+
|
29
|
+
def start_document
|
30
|
+
end
|
31
|
+
|
32
|
+
def end_document
|
33
|
+
end
|
34
|
+
|
35
|
+
def start_element_namespace(name, attrs=[], prefix=nil, uri=nil, ns=[])
|
36
|
+
@name_stack << name
|
37
|
+
@text_stack << ""
|
38
|
+
case @name_stack.join(".")
|
39
|
+
when "mediawiki.page"
|
40
|
+
@page = {}
|
41
|
+
when "mediawiki.page.revision.contributor"
|
42
|
+
@contributor_stack << {}
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def end_element_namespace(name, prefix=nil, uri=nil)
|
47
|
+
case @name_stack.join(".")
|
48
|
+
when "mediawiki.page"
|
49
|
+
@listener.page(@page)
|
50
|
+
when "mediawiki.page.title"
|
51
|
+
title = @text_stack.last
|
52
|
+
@page[:title] = @listener.title(title) || title
|
53
|
+
when "mediawiki.page.revision.timestamp"
|
54
|
+
timestamp = Time.parse(@text_stack.last)
|
55
|
+
@page[:timestamp] = @listener.timestamp(timestamp) || timestamp
|
56
|
+
when "mediawiki.page.revision.contributor"
|
57
|
+
contributor = @contributor_stack.pop
|
58
|
+
@page[:contributor] = @listener.contributor(contributor) || contributor
|
59
|
+
when "mediawiki.page.revision.contributor.id"
|
60
|
+
@contributor_stack.last[:id] = Integer(@text_stack.last)
|
61
|
+
when "mediawiki.page.revision.contributor.username"
|
62
|
+
@contributor_stack.last[:name] = @text_stack.last
|
63
|
+
when "mediawiki.page.revision.text"
|
64
|
+
content = @text_stack.last
|
65
|
+
@page[:content] = @listener.content(content) || content
|
66
|
+
end
|
67
|
+
@name_stack.pop
|
68
|
+
@text_stack.pop
|
69
|
+
end
|
70
|
+
|
71
|
+
def characters(string)
|
72
|
+
elements_without_interested_text = [
|
73
|
+
"mediawiki", "siteinfo", "case",
|
74
|
+
"namespaces", "revisions",
|
75
|
+
"contributor",
|
76
|
+
]
|
77
|
+
return if elements_without_interested_text.include?(@name_stack.last)
|
78
|
+
@text_stack.last << string
|
79
|
+
end
|
80
|
+
|
81
|
+
def xmldecl(*arguments, &block)
|
82
|
+
end
|
83
|
+
|
84
|
+
def comment(string)
|
85
|
+
end
|
86
|
+
|
87
|
+
def warning(string)
|
88
|
+
end
|
89
|
+
|
90
|
+
def error(string)
|
91
|
+
end
|
92
|
+
|
93
|
+
def cdata_block(string)
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
class WikipediaImporter
|
99
|
+
def initialize(groonga_loader)
|
100
|
+
@groonga_loader = groonga_loader
|
101
|
+
end
|
102
|
+
|
103
|
+
def title(title)
|
104
|
+
end
|
105
|
+
|
106
|
+
def timestamp(timestamp)
|
107
|
+
end
|
108
|
+
|
109
|
+
def contributor(contributor)
|
110
|
+
end
|
111
|
+
|
112
|
+
def content(content)
|
113
|
+
end
|
114
|
+
|
115
|
+
def page(page)
|
116
|
+
@groonga_loader.load(page)
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
module TimeDrilldownable
|
121
|
+
def define_time_columns(table)
|
122
|
+
table.int32("year")
|
123
|
+
table.int32("month")
|
124
|
+
table.short_text("date")
|
125
|
+
table.int32("wday")
|
126
|
+
table.int32("hour")
|
127
|
+
end
|
128
|
+
|
129
|
+
def add_time(table, key, time)
|
130
|
+
table[key]["year"] = time.year
|
131
|
+
table[key]["month"] = time.month
|
132
|
+
table[key]["date"] = time.strftime("%m/%d")
|
133
|
+
table[key]["wday"] = time.wday
|
134
|
+
table[key]["hour"] = time.hour
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
class GroongaLoader
|
139
|
+
include TimeDrilldownable
|
140
|
+
|
141
|
+
def initialize
|
142
|
+
FileUtils.rm_rf("/tmp/wikipedia-db")
|
143
|
+
FileUtils.mkdir_p("/tmp/wikipedia-db")
|
144
|
+
@context = Groonga::Context.new
|
145
|
+
@context.create_database("/tmp/wikipedia-db/db")
|
146
|
+
|
147
|
+
Groonga::Schema.define(:context => @context) do |schema|
|
148
|
+
schema.create_table("Users", :type => :hash, :key_type => "Int64") do |table|
|
149
|
+
table.short_text("name")
|
150
|
+
end
|
151
|
+
|
152
|
+
schema.create_table("Documents", :type => :patricia_trie, :key_type => "ShortText") do |table|
|
153
|
+
table.long_text("content")
|
154
|
+
table.time("timestamp")
|
155
|
+
define_time_columns(table)
|
156
|
+
table.reference("last_contributor", "Users")
|
157
|
+
table.column("links", "Documents", :type => :vector)
|
158
|
+
end
|
159
|
+
|
160
|
+
schema.create_table("Terms", :type => :hash, :default_tokenizer => "TokenBigram") do |table|
|
161
|
+
table.index("Documents._key")
|
162
|
+
table.index("Documents.content")
|
163
|
+
end
|
164
|
+
end
|
165
|
+
@documents = @context["Documents"]
|
166
|
+
@users = @context["Users"]
|
167
|
+
@terms = @context["Terms"]
|
168
|
+
end
|
169
|
+
|
170
|
+
LOCK_TIMEOUT_SECONDS = 10
|
171
|
+
def lock
|
172
|
+
@context.database.lock(:timeout => LOCK_TIMEOUT_SECONDS * 1000) do
|
173
|
+
yield
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
def load(page)
|
178
|
+
lock do
|
179
|
+
do_load(page)
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
183
|
+
def do_load(page)
|
184
|
+
content = page.delete(:content)
|
185
|
+
timestamp = page.delete(:timestamp)
|
186
|
+
title = page.delete(:title)
|
187
|
+
contributor = page.delete(:contributor)
|
188
|
+
|
189
|
+
puts "loading: #{title}"
|
190
|
+
@documents.add(title, :content => content, :timestamp => timestamp)
|
191
|
+
load_links(title, content)
|
192
|
+
add_time(@documents, title, timestamp)
|
193
|
+
|
194
|
+
if not contributor.empty?
|
195
|
+
@documents.add(title, :last_contributor => contributor[:id])
|
196
|
+
@users[contributor[:id]][:name] = contributor[:name]
|
197
|
+
end
|
198
|
+
end
|
199
|
+
|
200
|
+
def load_links(title, content)
|
201
|
+
links = content.scan(/\[\[.*?\]\]/)
|
202
|
+
links = links.collect do |link|
|
203
|
+
link.sub(/\A\[\[/, '').sub(/\]\]\z/, '').sub(/\|[^\|]+\z/, '')
|
204
|
+
end
|
205
|
+
@documents.add(title, :links => links)
|
206
|
+
end
|
207
|
+
end
|
208
|
+
|
209
|
+
if __FILE__ == $0
|
210
|
+
extractor = WikipediaExtractor.new(WikipediaImporter.new(GroongaLoader.new))
|
211
|
+
extractor.extract(ARGF)
|
212
|
+
end
|
@@ -0,0 +1,213 @@
|
|
1
|
+
require 'fileutils'
|
2
|
+
require 'shellwords'
|
3
|
+
|
4
|
+
require 'groonga'
|
5
|
+
|
6
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
7
|
+
require 'create-wikipedia-database'
|
8
|
+
|
9
|
+
class SampleRecords
|
10
|
+
def initialize(record_count)
|
11
|
+
@record_count = record_count
|
12
|
+
@current_count = 0
|
13
|
+
@records = []
|
14
|
+
|
15
|
+
initialize_sample_records
|
16
|
+
end
|
17
|
+
|
18
|
+
def initialize_sample_records
|
19
|
+
extractor = WikipediaExtractor.new(WikipediaImporter.new(self))
|
20
|
+
#@records = @record_count.times.collect do
|
21
|
+
# create_random_item
|
22
|
+
#end
|
23
|
+
catch(:stop_extract) do
|
24
|
+
extractor.extract(ARGF)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def load(page)
|
29
|
+
@current_count += 1
|
30
|
+
record = {
|
31
|
+
:_key => page[:title],
|
32
|
+
:content => page[:content],
|
33
|
+
}
|
34
|
+
#pp record
|
35
|
+
@records << record
|
36
|
+
throw :stop_extract if @current_count == @record_count
|
37
|
+
end
|
38
|
+
|
39
|
+
def values(count=nil)
|
40
|
+
count ||= @record_count
|
41
|
+
|
42
|
+
if count == 1
|
43
|
+
[first_record]
|
44
|
+
else
|
45
|
+
@records[0, count - 1] + [first_record]
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def first_record
|
50
|
+
@records.first
|
51
|
+
end
|
52
|
+
|
53
|
+
def each(*arguments, &block)
|
54
|
+
values.each(*arguments, &block)
|
55
|
+
end
|
56
|
+
|
57
|
+
def n_records(count)
|
58
|
+
values(count)
|
59
|
+
end
|
60
|
+
|
61
|
+
def create_random_item
|
62
|
+
{"_key" => "ryoqun"}
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
class RepeatLoadRunner
|
67
|
+
DATABASE_DIRECTORY = "/tmp/repeat-overwrite"
|
68
|
+
|
69
|
+
def initialize(sample_records, options=nil)
|
70
|
+
@options = options || {}
|
71
|
+
@context = Groonga::Context.new(:encoding => :none)
|
72
|
+
@sample_records = sample_records
|
73
|
+
end
|
74
|
+
|
75
|
+
DEFAULT_REPEAT_COUNT = 1
|
76
|
+
DEFAULT_RECORD_COUNT = 1
|
77
|
+
def repeat_count
|
78
|
+
@options[:repeat_count] || DEFAULT_REPEAT_COUNT
|
79
|
+
end
|
80
|
+
|
81
|
+
def record_count
|
82
|
+
@options[:record_count] || DEFAULT_RECORD_COUNT
|
83
|
+
end
|
84
|
+
|
85
|
+
def with_index?
|
86
|
+
@options[:with_index]
|
87
|
+
end
|
88
|
+
|
89
|
+
def database_directory
|
90
|
+
DATABASE_DIRECTORY
|
91
|
+
end
|
92
|
+
|
93
|
+
def database_path
|
94
|
+
"#{database_directory}/db"
|
95
|
+
end
|
96
|
+
|
97
|
+
def setup_database
|
98
|
+
FileUtils.rm_rf(database_directory)
|
99
|
+
FileUtils.mkdir_p(database_directory)
|
100
|
+
|
101
|
+
@context.create_database(database_path)
|
102
|
+
Groonga::Schema.define(:context => @context) do |schema|
|
103
|
+
schema.create_table("Contents", :type => :hash, :key_type => "ShortText") do |table|
|
104
|
+
table.long_text("content")
|
105
|
+
end
|
106
|
+
|
107
|
+
if with_index?
|
108
|
+
schema.create_table("Terms", :type => :hash, :default_tokenizer => "TokenBigram") do |table|
|
109
|
+
table.index("Contents._key")
|
110
|
+
table.index("Contents.content")
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
def run
|
117
|
+
setup_database
|
118
|
+
|
119
|
+
before_load
|
120
|
+
add_record
|
121
|
+
after_load
|
122
|
+
end
|
123
|
+
|
124
|
+
private
|
125
|
+
def add_record
|
126
|
+
puts "loading..."
|
127
|
+
repeat_count.times do |count|
|
128
|
+
add_record_via_load_command
|
129
|
+
if repeat_count != 1 and count.zero?
|
130
|
+
after_first_load
|
131
|
+
end
|
132
|
+
end
|
133
|
+
puts "... (#{repeat_count} times repeated)"
|
134
|
+
puts
|
135
|
+
end
|
136
|
+
|
137
|
+
def add_record_via_load_command
|
138
|
+
puts "iteration: loading #{record_count} records"
|
139
|
+
count = 0
|
140
|
+
@sample_records.n_records(record_count).each do |record|
|
141
|
+
mangle_record(record)
|
142
|
+
|
143
|
+
#pp record
|
144
|
+
command = "load --table Contents --input_type json --values '#{JSON.generate(record).gsub(/\'/, '').gsub(/\"/, '\\\"')}'"
|
145
|
+
#puts command
|
146
|
+
@context.send(command)
|
147
|
+
if record_count != 1 and count.zero?
|
148
|
+
after_first_load
|
149
|
+
end
|
150
|
+
count += 1
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
def mangle_record(record)
|
155
|
+
record[:content] = record[:content][0, 400]
|
156
|
+
record[:_key] = :FIXED_KEY
|
157
|
+
end
|
158
|
+
|
159
|
+
def before_load
|
160
|
+
puts "before load:"
|
161
|
+
measure_database_size
|
162
|
+
puts
|
163
|
+
end
|
164
|
+
|
165
|
+
def after_first_load
|
166
|
+
puts "after first load:"
|
167
|
+
measure_database_size
|
168
|
+
puts
|
169
|
+
end
|
170
|
+
|
171
|
+
def after_load
|
172
|
+
puts "after load:"
|
173
|
+
measure_database_size
|
174
|
+
puts
|
175
|
+
end
|
176
|
+
|
177
|
+
def measure_database_size
|
178
|
+
#measure_apparent_size
|
179
|
+
measure_actual_size
|
180
|
+
end
|
181
|
+
|
182
|
+
def measure_apparent_size
|
183
|
+
puts "apparent disk usage:"
|
184
|
+
puts execute_du("--apparent-size")
|
185
|
+
end
|
186
|
+
|
187
|
+
def measure_actual_size
|
188
|
+
puts "actual disk usage:"
|
189
|
+
puts execute_du
|
190
|
+
end
|
191
|
+
|
192
|
+
def execute_du(options=nil)
|
193
|
+
`sync`
|
194
|
+
`find #{database_directory} -type f -print0 | xargs -0 du --human-readable #{options.to_s} | sort -k 2 | uniq`
|
195
|
+
end
|
196
|
+
end
|
197
|
+
|
198
|
+
sample_records = SampleRecords.new(1000)
|
199
|
+
|
200
|
+
#puts "load one record, repeat one time"
|
201
|
+
#RepeatLoadRunner.new(sample_records).run
|
202
|
+
|
203
|
+
#puts "load one record, repeat 100 time"
|
204
|
+
#RepeatLoadRunner.new(sample_records, :repeat_count => 100).run
|
205
|
+
|
206
|
+
#puts "load one record, repeat 100 time with index column defined"
|
207
|
+
#RepeatLoadRunner.new(sample_records, :repeat_count => 100, :with_index => true).run
|
208
|
+
|
209
|
+
puts "load 100 records, repeat 1 time"
|
210
|
+
RepeatLoadRunner.new(sample_records, :record_count => 100).run
|
211
|
+
|
212
|
+
puts "load 100 records, repeat 1 time with index column defined"
|
213
|
+
RepeatLoadRunner.new(sample_records, :record_count => 100, :with_index => true).run
|