rroonga 1.0.8 → 1.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/NEWS.ja.rdoc +47 -0
- data/NEWS.rdoc +48 -0
- data/README.ja.rdoc +1 -0
- data/README.rdoc +1 -0
- data/Rakefile +32 -13
- data/benchmark/create-wikipedia-database.rb +212 -0
- data/benchmark/repeat-load.rb +213 -0
- data/benchmark/select.rb +1052 -0
- data/ext/groonga/mkmf.log +99 -0
- data/ext/groonga/rb-grn-column.c +57 -6
- data/ext/groonga/rb-grn-context.c +15 -9
- data/ext/groonga/rb-grn-expression.c +7 -7
- data/ext/groonga/{rb-grn-operation.c → rb-grn-operator.c} +89 -87
- data/ext/groonga/rb-grn-patricia-trie.c +5 -5
- data/ext/groonga/rb-grn-query.c +4 -4
- data/ext/groonga/rb-grn-table.c +16 -19
- data/ext/groonga/rb-grn.h +3 -3
- data/ext/groonga/rb-groonga.c +1 -1
- data/html/index.html +4 -4
- data/lib/groonga/context.rb +34 -0
- data/lib/groonga/expression-builder.rb +34 -2
- data/lib/groonga/record.rb +8 -6
- data/lib/groonga/schema.rb +40 -4
- data/rroonga-build.rb +2 -2
- data/test-unit/Rakefile +5 -18
- data/test-unit/html/classic.html +15 -0
- data/test-unit/html/index.html +13 -235
- data/test-unit/html/index.html.ja +15 -258
- data/test-unit/lib/test/unit.rb +1 -6
- data/test-unit/lib/test/unit/assertions.rb +11 -115
- data/test-unit/lib/test/unit/autorunner.rb +2 -5
- data/test-unit/lib/test/unit/collector/load.rb +1 -1
- data/test-unit/lib/test/unit/color-scheme.rb +2 -6
- data/test-unit/lib/test/unit/diff.rb +1 -17
- data/test-unit/lib/test/unit/testcase.rb +0 -7
- data/test-unit/lib/test/unit/testresult.rb +2 -34
- data/test-unit/lib/test/unit/ui/console/testrunner.rb +45 -9
- data/test-unit/lib/test/unit/ui/tap/testrunner.rb +12 -2
- data/test-unit/lib/test/unit/ui/testrunner.rb +0 -25
- data/test-unit/lib/test/unit/util/backtracefilter.rb +0 -1
- data/test-unit/lib/test/unit/version.rb +1 -1
- data/test-unit/test/test-color-scheme.rb +2 -4
- data/test-unit/test/test_assertions.rb +5 -51
- data/test/test-column.rb +31 -1
- data/test/test-context-select.rb +45 -14
- data/test/test-context.rb +36 -0
- data/test/test-database.rb +13 -0
- data/test/test-expression-builder.rb +32 -5
- data/test/test-record.rb +34 -1
- data/test/test-schema.rb +52 -2
- data/test/test-table-select-weight.rb +20 -1
- data/test/test-table.rb +58 -0
- metadata +13 -41
- data/test-unit-notify/Rakefile +0 -47
- data/test-unit-notify/lib/test/unit/notify.rb +0 -104
- data/test-unit/COPYING +0 -56
- data/test-unit/GPL +0 -340
- data/test-unit/PSFL +0 -271
- data/test-unit/html/bar.svg +0 -153
- data/test-unit/html/developer.svg +0 -469
- data/test-unit/html/favicon.ico +0 -0
- data/test-unit/html/favicon.svg +0 -82
- data/test-unit/html/heading-mark.svg +0 -393
- data/test-unit/html/install.svg +0 -636
- data/test-unit/html/logo.svg +0 -483
- data/test-unit/html/test-unit.css +0 -339
- data/test-unit/html/tutorial.svg +0 -559
- data/test-unit/lib/test/unit/util/output.rb +0 -31
- data/test-unit/test/ui/test_tap.rb +0 -33
- data/test-unit/test/util/test-output.rb +0 -11
data/NEWS.ja.rdoc
CHANGED
@@ -1,5 +1,52 @@
|
|
1
1
|
= お知らせ
|
2
2
|
|
3
|
+
== 1.0.9: 2011-01-29
|
4
|
+
|
5
|
+
=== 改良
|
6
|
+
|
7
|
+
* Windows上でのgem作成に対応。
|
8
|
+
[ongaeshiさんがパッチ提供]
|
9
|
+
* Groonga::Schemaでテーブル・カラムを削除した時に作成したディレクトリを
|
10
|
+
削除するようにした。
|
11
|
+
* Groonga::Context#create_databaseを追加。
|
12
|
+
* Groonga::Context#open_databaseを追加。
|
13
|
+
* Groonga::Column#indexesを追加。
|
14
|
+
* Groonga::Table#select内でのインデックスカラムにマッチする記法をサポート。
|
15
|
+
table.select do |record|
|
16
|
+
record.match("query") do |match_record|
|
17
|
+
(match_record.index("Terms.title") * 1000) |
|
18
|
+
(match_record.index("Terms.description") * 100)
|
19
|
+
match_record.content
|
20
|
+
end
|
21
|
+
end
|
22
|
+
* Groonga::Table#select内での前方一致検索に対応。
|
23
|
+
table.select do |record|
|
24
|
+
record.name.prefix_search("groo")
|
25
|
+
end
|
26
|
+
* Groonga::Table#select内での後方一致検索に対応。
|
27
|
+
table.select do |record|
|
28
|
+
record.name.suffix_search("nga")
|
29
|
+
end
|
30
|
+
* :default_tokenizerのスキーマダンプに対応。
|
31
|
+
* :key_normalizeのスキーマダンプに対応。
|
32
|
+
* Groonga::Table#have_column?が擬似カラムに対応。
|
33
|
+
* Groonga::Record#have_column?が擬似カラムに対応。
|
34
|
+
|
35
|
+
=== 変更
|
36
|
+
|
37
|
+
* Groonga::OperatoionをGroonga::Operatorに変更。
|
38
|
+
(しばらくはGroonga::Operationも使えるが、非推奨。)
|
39
|
+
|
40
|
+
=== 修正
|
41
|
+
|
42
|
+
* 複数のGroonga::Contextを利用した場合に
|
43
|
+
Groonga::Table#selectでクラッシュする問題を修正。
|
44
|
+
* 例外発生時にクラッシュする問題を修正。
|
45
|
+
|
46
|
+
=== 感謝
|
47
|
+
|
48
|
+
* ongaeshioさん
|
49
|
+
|
3
50
|
== 1.0.8: 2010-12-25
|
4
51
|
|
5
52
|
=== 改良
|
data/NEWS.rdoc
CHANGED
@@ -1,5 +1,53 @@
|
|
1
1
|
= NEWS
|
2
2
|
|
3
|
+
== 1.0.9: 2011-01-29
|
4
|
+
|
5
|
+
=== Improvements
|
6
|
+
|
7
|
+
* Supported gem creation on Windows.
|
8
|
+
[Patch by ongaeshi]
|
9
|
+
* Supported generated directory that is created by Groonga::Schema removal
|
10
|
+
when table or column is removed.
|
11
|
+
* Added Groonga::Context#create_database.
|
12
|
+
* Added Groonga::Context#open_database.
|
13
|
+
* Added Groonga::Column#indexes.
|
14
|
+
* Supported a notation for specifying index column as match target in
|
15
|
+
Groonga::Table#select:
|
16
|
+
table.select do |record|
|
17
|
+
record.match("query") do |match_record|
|
18
|
+
(match_record.index("Terms.title") * 1000) |
|
19
|
+
(match_record.index("Terms.description") * 100)
|
20
|
+
match_record.content
|
21
|
+
end
|
22
|
+
end
|
23
|
+
* Supported prefix search in Groonga::Table#select:
|
24
|
+
table.select do |record|
|
25
|
+
record.name.prefix_search("groo")
|
26
|
+
end
|
27
|
+
* Supported suffix search in Groonga::Table#select:
|
28
|
+
table.select do |record|
|
29
|
+
record.name.suffix_search("nga")
|
30
|
+
end
|
31
|
+
* Supported :default_tokenizer schema dump.
|
32
|
+
* Supported :key_normalize schema dump.
|
33
|
+
* Supported pseudo columns by Groonga::Table#have_column?.
|
34
|
+
* Supported pseudo columns by Groonga::Record#have_column?.
|
35
|
+
|
36
|
+
=== Changes
|
37
|
+
|
38
|
+
* Renamed Groonga::Operatoion to Groonga::Operator.
|
39
|
+
(Groonga::Operation is deprecated but still usable.)
|
40
|
+
|
41
|
+
=== Fixes
|
42
|
+
|
43
|
+
* Fixed a crash bug when not default Groonga::Context is used in
|
44
|
+
Groonga::Table#select.
|
45
|
+
* Fixed a crash bug when an exception is occurred.
|
46
|
+
|
47
|
+
=== Thanks
|
48
|
+
|
49
|
+
* ongaeshio
|
50
|
+
|
3
51
|
== 1.0.8: 2010-12-25
|
4
52
|
|
5
53
|
=== Improvements
|
data/README.ja.rdoc
CHANGED
data/README.rdoc
CHANGED
data/Rakefile
CHANGED
@@ -144,28 +144,41 @@ ObjectSpace.each_object(Rake::RDocTask) do |rdoc_task|
|
|
144
144
|
rdoc_task.rdoc_files += Dir.glob("**/*.rdoc")
|
145
145
|
end
|
146
146
|
|
147
|
+
def windows?(platform=nil)
|
148
|
+
platform ||= RUBY_PLATFORM
|
149
|
+
platform =~ /mswin(?!ce)|mingw|cygwin|bccwin/
|
150
|
+
end
|
151
|
+
|
152
|
+
def collect_binary_files(binary_dir)
|
153
|
+
binary_files = []
|
154
|
+
Find.find(binary_dir) do |name|
|
155
|
+
next unless File.file?(name)
|
156
|
+
next if /\.zip\z/i =~ name
|
157
|
+
binary_files << name
|
158
|
+
end
|
159
|
+
binary_files
|
160
|
+
end
|
161
|
+
|
147
162
|
relative_vendor_dir = "vendor"
|
148
163
|
relative_binary_dir = File.join("vendor", "local")
|
149
164
|
vendor_dir = File.join(base_dir, relative_vendor_dir)
|
150
165
|
binary_dir = File.join(base_dir, relative_binary_dir)
|
166
|
+
|
151
167
|
Rake::ExtensionTask.new("groonga", project.spec) do |ext|
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
binary_files << name
|
168
|
+
if windows?
|
169
|
+
ext.gem_spec.files += collect_binary_files(relative_binary_dir)
|
170
|
+
else
|
171
|
+
ext.cross_compile = true
|
172
|
+
ext.cross_compiling do |spec|
|
173
|
+
if windows?(spec.platform.to_s)
|
174
|
+
spec.files += collect_binary_files(relative_binary_dir)
|
160
175
|
end
|
161
|
-
spec.files += binary_files
|
162
176
|
end
|
163
177
|
end
|
164
178
|
end
|
165
179
|
|
166
180
|
task :publish_docs => [:prepare_docs_for_publishing]
|
167
181
|
|
168
|
-
|
169
182
|
include ERB::Util
|
170
183
|
|
171
184
|
def apply_template(file, head, header, footer)
|
@@ -236,8 +249,14 @@ project.spec.executables.clear
|
|
236
249
|
task(:release).prerequisites.reject! {|name| name == "clean"}
|
237
250
|
|
238
251
|
namespace :win32 do
|
252
|
+
if ENV["GROONGA64"] == "yes"
|
253
|
+
host = "amd64-mingw32msvc"
|
254
|
+
else
|
255
|
+
host = "i586-mingw32msvc"
|
256
|
+
end
|
257
|
+
|
239
258
|
desc "Build MeCab and groonga and install them into vendor/local/."
|
240
|
-
task(:build => :build_groonga)
|
259
|
+
task(:build => [:build_mecab, :build_mecab_dict, :build_groonga])
|
241
260
|
|
242
261
|
desc "Build MeCab and install it into vendor/local/."
|
243
262
|
task(:build_mecab) do
|
@@ -260,7 +279,7 @@ namespace :win32 do
|
|
260
279
|
Dir.chdir(File.join(tmp_dir, mecab_base)) do
|
261
280
|
sh("./configure",
|
262
281
|
"--prefix=#{binary_dir}",
|
263
|
-
"--host
|
282
|
+
"--host=#{host}") or exit(false)
|
264
283
|
sh("env", "GREP_OPTIONS=--text", "nice", "make", "-j8") or exit(false)
|
265
284
|
sh("env", "GREP_OPTIONS=--text", "make", "install") or exit(false)
|
266
285
|
|
@@ -321,7 +340,7 @@ namespace :win32 do
|
|
321
340
|
mecab_config = File.join(binary_dir, "bin", "mecab-config")
|
322
341
|
args = ["./configure",
|
323
342
|
"--prefix=#{binary_dir}",
|
324
|
-
"--host
|
343
|
+
"--host=#{host}",
|
325
344
|
"--without-cutter",
|
326
345
|
"--disable-benchmark"]
|
327
346
|
if File.exist?(mecab_config)
|
@@ -0,0 +1,212 @@
|
|
1
|
+
# Wikipedia data: http://download.wikimedia.org/jawiki/latest/jawiki-latest-pages-articles.xml.bz2
|
2
|
+
|
3
|
+
require 'time'
|
4
|
+
require 'fileutils'
|
5
|
+
require 'groonga'
|
6
|
+
|
7
|
+
require 'nokogiri'
|
8
|
+
|
9
|
+
class WikipediaExtractor
|
10
|
+
def initialize(listener)
|
11
|
+
@listener = listener
|
12
|
+
end
|
13
|
+
|
14
|
+
def extract(input)
|
15
|
+
extractor = Extractor.new(@listener)
|
16
|
+
parser = Nokogiri::XML::SAX::Parser.new(extractor)
|
17
|
+
parser.parse(input)
|
18
|
+
end
|
19
|
+
|
20
|
+
class Extractor
|
21
|
+
def initialize(listener)
|
22
|
+
@listener = listener
|
23
|
+
@name_stack = []
|
24
|
+
@text_stack = []
|
25
|
+
@contributor_stack = []
|
26
|
+
@page = {}
|
27
|
+
end
|
28
|
+
|
29
|
+
def start_document
|
30
|
+
end
|
31
|
+
|
32
|
+
def end_document
|
33
|
+
end
|
34
|
+
|
35
|
+
def start_element_namespace(name, attrs=[], prefix=nil, uri=nil, ns=[])
|
36
|
+
@name_stack << name
|
37
|
+
@text_stack << ""
|
38
|
+
case @name_stack.join(".")
|
39
|
+
when "mediawiki.page"
|
40
|
+
@page = {}
|
41
|
+
when "mediawiki.page.revision.contributor"
|
42
|
+
@contributor_stack << {}
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def end_element_namespace(name, prefix=nil, uri=nil)
|
47
|
+
case @name_stack.join(".")
|
48
|
+
when "mediawiki.page"
|
49
|
+
@listener.page(@page)
|
50
|
+
when "mediawiki.page.title"
|
51
|
+
title = @text_stack.last
|
52
|
+
@page[:title] = @listener.title(title) || title
|
53
|
+
when "mediawiki.page.revision.timestamp"
|
54
|
+
timestamp = Time.parse(@text_stack.last)
|
55
|
+
@page[:timestamp] = @listener.timestamp(timestamp) || timestamp
|
56
|
+
when "mediawiki.page.revision.contributor"
|
57
|
+
contributor = @contributor_stack.pop
|
58
|
+
@page[:contributor] = @listener.contributor(contributor) || contributor
|
59
|
+
when "mediawiki.page.revision.contributor.id"
|
60
|
+
@contributor_stack.last[:id] = Integer(@text_stack.last)
|
61
|
+
when "mediawiki.page.revision.contributor.username"
|
62
|
+
@contributor_stack.last[:name] = @text_stack.last
|
63
|
+
when "mediawiki.page.revision.text"
|
64
|
+
content = @text_stack.last
|
65
|
+
@page[:content] = @listener.content(content) || content
|
66
|
+
end
|
67
|
+
@name_stack.pop
|
68
|
+
@text_stack.pop
|
69
|
+
end
|
70
|
+
|
71
|
+
def characters(string)
|
72
|
+
elements_without_interested_text = [
|
73
|
+
"mediawiki", "siteinfo", "case",
|
74
|
+
"namespaces", "revisions",
|
75
|
+
"contributor",
|
76
|
+
]
|
77
|
+
return if elements_without_interested_text.include?(@name_stack.last)
|
78
|
+
@text_stack.last << string
|
79
|
+
end
|
80
|
+
|
81
|
+
def xmldecl(*arguments, &block)
|
82
|
+
end
|
83
|
+
|
84
|
+
def comment(string)
|
85
|
+
end
|
86
|
+
|
87
|
+
def warning(string)
|
88
|
+
end
|
89
|
+
|
90
|
+
def error(string)
|
91
|
+
end
|
92
|
+
|
93
|
+
def cdata_block(string)
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
class WikipediaImporter
|
99
|
+
def initialize(groonga_loader)
|
100
|
+
@groonga_loader = groonga_loader
|
101
|
+
end
|
102
|
+
|
103
|
+
def title(title)
|
104
|
+
end
|
105
|
+
|
106
|
+
def timestamp(timestamp)
|
107
|
+
end
|
108
|
+
|
109
|
+
def contributor(contributor)
|
110
|
+
end
|
111
|
+
|
112
|
+
def content(content)
|
113
|
+
end
|
114
|
+
|
115
|
+
def page(page)
|
116
|
+
@groonga_loader.load(page)
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
module TimeDrilldownable
|
121
|
+
def define_time_columns(table)
|
122
|
+
table.int32("year")
|
123
|
+
table.int32("month")
|
124
|
+
table.short_text("date")
|
125
|
+
table.int32("wday")
|
126
|
+
table.int32("hour")
|
127
|
+
end
|
128
|
+
|
129
|
+
def add_time(table, key, time)
|
130
|
+
table[key]["year"] = time.year
|
131
|
+
table[key]["month"] = time.month
|
132
|
+
table[key]["date"] = time.strftime("%m/%d")
|
133
|
+
table[key]["wday"] = time.wday
|
134
|
+
table[key]["hour"] = time.hour
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
class GroongaLoader
|
139
|
+
include TimeDrilldownable
|
140
|
+
|
141
|
+
def initialize
|
142
|
+
FileUtils.rm_rf("/tmp/wikipedia-db")
|
143
|
+
FileUtils.mkdir_p("/tmp/wikipedia-db")
|
144
|
+
@context = Groonga::Context.new
|
145
|
+
@context.create_database("/tmp/wikipedia-db/db")
|
146
|
+
|
147
|
+
Groonga::Schema.define(:context => @context) do |schema|
|
148
|
+
schema.create_table("Users", :type => :hash, :key_type => "Int64") do |table|
|
149
|
+
table.short_text("name")
|
150
|
+
end
|
151
|
+
|
152
|
+
schema.create_table("Documents", :type => :patricia_trie, :key_type => "ShortText") do |table|
|
153
|
+
table.long_text("content")
|
154
|
+
table.time("timestamp")
|
155
|
+
define_time_columns(table)
|
156
|
+
table.reference("last_contributor", "Users")
|
157
|
+
table.column("links", "Documents", :type => :vector)
|
158
|
+
end
|
159
|
+
|
160
|
+
schema.create_table("Terms", :type => :hash, :default_tokenizer => "TokenBigram") do |table|
|
161
|
+
table.index("Documents._key")
|
162
|
+
table.index("Documents.content")
|
163
|
+
end
|
164
|
+
end
|
165
|
+
@documents = @context["Documents"]
|
166
|
+
@users = @context["Users"]
|
167
|
+
@terms = @context["Terms"]
|
168
|
+
end
|
169
|
+
|
170
|
+
LOCK_TIMEOUT_SECONDS = 10
|
171
|
+
def lock
|
172
|
+
@context.database.lock(:timeout => LOCK_TIMEOUT_SECONDS * 1000) do
|
173
|
+
yield
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
def load(page)
|
178
|
+
lock do
|
179
|
+
do_load(page)
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
183
|
+
def do_load(page)
|
184
|
+
content = page.delete(:content)
|
185
|
+
timestamp = page.delete(:timestamp)
|
186
|
+
title = page.delete(:title)
|
187
|
+
contributor = page.delete(:contributor)
|
188
|
+
|
189
|
+
puts "loading: #{title}"
|
190
|
+
@documents.add(title, :content => content, :timestamp => timestamp)
|
191
|
+
load_links(title, content)
|
192
|
+
add_time(@documents, title, timestamp)
|
193
|
+
|
194
|
+
if not contributor.empty?
|
195
|
+
@documents.add(title, :last_contributor => contributor[:id])
|
196
|
+
@users[contributor[:id]][:name] = contributor[:name]
|
197
|
+
end
|
198
|
+
end
|
199
|
+
|
200
|
+
def load_links(title, content)
|
201
|
+
links = content.scan(/\[\[.*?\]\]/)
|
202
|
+
links = links.collect do |link|
|
203
|
+
link.sub(/\A\[\[/, '').sub(/\]\]\z/, '').sub(/\|[^\|]+\z/, '')
|
204
|
+
end
|
205
|
+
@documents.add(title, :links => links)
|
206
|
+
end
|
207
|
+
end
|
208
|
+
|
209
|
+
if __FILE__ == $0
|
210
|
+
extractor = WikipediaExtractor.new(WikipediaImporter.new(GroongaLoader.new))
|
211
|
+
extractor.extract(ARGF)
|
212
|
+
end
|
@@ -0,0 +1,213 @@
|
|
1
|
+
require 'fileutils'
|
2
|
+
require 'shellwords'
|
3
|
+
|
4
|
+
require 'groonga'
|
5
|
+
|
6
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
7
|
+
require 'create-wikipedia-database'
|
8
|
+
|
9
|
+
class SampleRecords
|
10
|
+
def initialize(record_count)
|
11
|
+
@record_count = record_count
|
12
|
+
@current_count = 0
|
13
|
+
@records = []
|
14
|
+
|
15
|
+
initialize_sample_records
|
16
|
+
end
|
17
|
+
|
18
|
+
def initialize_sample_records
|
19
|
+
extractor = WikipediaExtractor.new(WikipediaImporter.new(self))
|
20
|
+
#@records = @record_count.times.collect do
|
21
|
+
# create_random_item
|
22
|
+
#end
|
23
|
+
catch(:stop_extract) do
|
24
|
+
extractor.extract(ARGF)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def load(page)
|
29
|
+
@current_count += 1
|
30
|
+
record = {
|
31
|
+
:_key => page[:title],
|
32
|
+
:content => page[:content],
|
33
|
+
}
|
34
|
+
#pp record
|
35
|
+
@records << record
|
36
|
+
throw :stop_extract if @current_count == @record_count
|
37
|
+
end
|
38
|
+
|
39
|
+
def values(count=nil)
|
40
|
+
count ||= @record_count
|
41
|
+
|
42
|
+
if count == 1
|
43
|
+
[first_record]
|
44
|
+
else
|
45
|
+
@records[0, count - 1] + [first_record]
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def first_record
|
50
|
+
@records.first
|
51
|
+
end
|
52
|
+
|
53
|
+
def each(*arguments, &block)
|
54
|
+
values.each(*arguments, &block)
|
55
|
+
end
|
56
|
+
|
57
|
+
def n_records(count)
|
58
|
+
values(count)
|
59
|
+
end
|
60
|
+
|
61
|
+
def create_random_item
|
62
|
+
{"_key" => "ryoqun"}
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
class RepeatLoadRunner
|
67
|
+
DATABASE_DIRECTORY = "/tmp/repeat-overwrite"
|
68
|
+
|
69
|
+
def initialize(sample_records, options=nil)
|
70
|
+
@options = options || {}
|
71
|
+
@context = Groonga::Context.new(:encoding => :none)
|
72
|
+
@sample_records = sample_records
|
73
|
+
end
|
74
|
+
|
75
|
+
DEFAULT_REPEAT_COUNT = 1
|
76
|
+
DEFAULT_RECORD_COUNT = 1
|
77
|
+
def repeat_count
|
78
|
+
@options[:repeat_count] || DEFAULT_REPEAT_COUNT
|
79
|
+
end
|
80
|
+
|
81
|
+
def record_count
|
82
|
+
@options[:record_count] || DEFAULT_RECORD_COUNT
|
83
|
+
end
|
84
|
+
|
85
|
+
def with_index?
|
86
|
+
@options[:with_index]
|
87
|
+
end
|
88
|
+
|
89
|
+
def database_directory
|
90
|
+
DATABASE_DIRECTORY
|
91
|
+
end
|
92
|
+
|
93
|
+
def database_path
|
94
|
+
"#{database_directory}/db"
|
95
|
+
end
|
96
|
+
|
97
|
+
def setup_database
|
98
|
+
FileUtils.rm_rf(database_directory)
|
99
|
+
FileUtils.mkdir_p(database_directory)
|
100
|
+
|
101
|
+
@context.create_database(database_path)
|
102
|
+
Groonga::Schema.define(:context => @context) do |schema|
|
103
|
+
schema.create_table("Contents", :type => :hash, :key_type => "ShortText") do |table|
|
104
|
+
table.long_text("content")
|
105
|
+
end
|
106
|
+
|
107
|
+
if with_index?
|
108
|
+
schema.create_table("Terms", :type => :hash, :default_tokenizer => "TokenBigram") do |table|
|
109
|
+
table.index("Contents._key")
|
110
|
+
table.index("Contents.content")
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
def run
|
117
|
+
setup_database
|
118
|
+
|
119
|
+
before_load
|
120
|
+
add_record
|
121
|
+
after_load
|
122
|
+
end
|
123
|
+
|
124
|
+
private
|
125
|
+
def add_record
|
126
|
+
puts "loading..."
|
127
|
+
repeat_count.times do |count|
|
128
|
+
add_record_via_load_command
|
129
|
+
if repeat_count != 1 and count.zero?
|
130
|
+
after_first_load
|
131
|
+
end
|
132
|
+
end
|
133
|
+
puts "... (#{repeat_count} times repeated)"
|
134
|
+
puts
|
135
|
+
end
|
136
|
+
|
137
|
+
def add_record_via_load_command
|
138
|
+
puts "iteration: loading #{record_count} records"
|
139
|
+
count = 0
|
140
|
+
@sample_records.n_records(record_count).each do |record|
|
141
|
+
mangle_record(record)
|
142
|
+
|
143
|
+
#pp record
|
144
|
+
command = "load --table Contents --input_type json --values '#{JSON.generate(record).gsub(/\'/, '').gsub(/\"/, '\\\"')}'"
|
145
|
+
#puts command
|
146
|
+
@context.send(command)
|
147
|
+
if record_count != 1 and count.zero?
|
148
|
+
after_first_load
|
149
|
+
end
|
150
|
+
count += 1
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
def mangle_record(record)
|
155
|
+
record[:content] = record[:content][0, 400]
|
156
|
+
record[:_key] = :FIXED_KEY
|
157
|
+
end
|
158
|
+
|
159
|
+
def before_load
|
160
|
+
puts "before load:"
|
161
|
+
measure_database_size
|
162
|
+
puts
|
163
|
+
end
|
164
|
+
|
165
|
+
def after_first_load
|
166
|
+
puts "after first load:"
|
167
|
+
measure_database_size
|
168
|
+
puts
|
169
|
+
end
|
170
|
+
|
171
|
+
def after_load
|
172
|
+
puts "after load:"
|
173
|
+
measure_database_size
|
174
|
+
puts
|
175
|
+
end
|
176
|
+
|
177
|
+
def measure_database_size
|
178
|
+
#measure_apparent_size
|
179
|
+
measure_actual_size
|
180
|
+
end
|
181
|
+
|
182
|
+
def measure_apparent_size
|
183
|
+
puts "apparent disk usage:"
|
184
|
+
puts execute_du("--apparent-size")
|
185
|
+
end
|
186
|
+
|
187
|
+
def measure_actual_size
|
188
|
+
puts "actual disk usage:"
|
189
|
+
puts execute_du
|
190
|
+
end
|
191
|
+
|
192
|
+
def execute_du(options=nil)
|
193
|
+
`sync`
|
194
|
+
`find #{database_directory} -type f -print0 | xargs -0 du --human-readable #{options.to_s} | sort -k 2 | uniq`
|
195
|
+
end
|
196
|
+
end
|
197
|
+
|
198
|
+
sample_records = SampleRecords.new(1000)
|
199
|
+
|
200
|
+
#puts "load one record, repeat one time"
|
201
|
+
#RepeatLoadRunner.new(sample_records).run
|
202
|
+
|
203
|
+
#puts "load one record, repeat 100 time"
|
204
|
+
#RepeatLoadRunner.new(sample_records, :repeat_count => 100).run
|
205
|
+
|
206
|
+
#puts "load one record, repeat 100 time with index column defined"
|
207
|
+
#RepeatLoadRunner.new(sample_records, :repeat_count => 100, :with_index => true).run
|
208
|
+
|
209
|
+
puts "load 100 records, repeat 1 time"
|
210
|
+
RepeatLoadRunner.new(sample_records, :record_count => 100).run
|
211
|
+
|
212
|
+
puts "load 100 records, repeat 1 time with index column defined"
|
213
|
+
RepeatLoadRunner.new(sample_records, :record_count => 100, :with_index => true).run
|