tx 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.txt +1 -0
- data/ext/Makefile +163 -0
- data/ext/depend +6 -0
- data/ext/extconf.rb +15 -0
- data/ext/ssv.cpp +355 -0
- data/ext/ssv.hpp +93 -0
- data/ext/swig.patch +192 -0
- data/ext/tx.cpp +442 -0
- data/ext/tx.hpp +62 -0
- data/ext/tx_swig.cpp +164 -0
- data/ext/tx_swig.h +93 -0
- data/ext/tx_swig.i +17 -0
- data/ext/tx_swig_wrap.cxx +9884 -0
- data/lib/i386-msvcrt/tx_core.so +0 -0
- data/lib/tx.rb +219 -0
- data/test/test_tx.rb +169 -0
- metadata +86 -0
Binary file
|
data/lib/tx.rb
ADDED
@@ -0,0 +1,219 @@
|
|
1
|
+
require "tx_core"
|
2
|
+
require "forwardable"
|
3
|
+
|
4
|
+
|
5
|
+
module Tx #:nodoc: all
|
6
|
+
|
7
|
+
module Util
|
8
|
+
|
9
|
+
module_function
|
10
|
+
|
11
|
+
# Defines wrapper methods which perform boundary checking of pos and len.
|
12
|
+
def def_wrapper_methods(*methods)
|
13
|
+
methods.each() do |name|
|
14
|
+
define_method(name) do |*args|
|
15
|
+
(str, pos, len, *opt) = args
|
16
|
+
raise(ArgumentError, "argument pos is negative") if pos && pos < 0
|
17
|
+
str_len = bytesize(str)
|
18
|
+
pos ||= 0
|
19
|
+
pos = str_len if pos > str_len
|
20
|
+
len = str_len - pos if !len || len < 0 || len > str_len - pos
|
21
|
+
add_encoding(@unsafe.__send__(name, str, pos, len, *opt))
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
if RUBY_VERSION >= "1.9.0"
|
27
|
+
|
28
|
+
def default_encoding
|
29
|
+
return Encoding.default_internal || Encoding::UTF_8
|
30
|
+
end
|
31
|
+
|
32
|
+
def add_encoding(obj)
|
33
|
+
case obj
|
34
|
+
when Array
|
35
|
+
obj.each(){ |e| add_encoding(e) }
|
36
|
+
when String
|
37
|
+
obj.force_encoding(@encoding)
|
38
|
+
end
|
39
|
+
return obj
|
40
|
+
end
|
41
|
+
|
42
|
+
def to_binary(str)
|
43
|
+
return str.dup().force_encoding(Encoding::ASCII_8BIT)
|
44
|
+
end
|
45
|
+
|
46
|
+
def bytesize(str)
|
47
|
+
return str.bytesize
|
48
|
+
end
|
49
|
+
|
50
|
+
else
|
51
|
+
|
52
|
+
def default_encoding
|
53
|
+
return nil
|
54
|
+
end
|
55
|
+
|
56
|
+
def add_encoding(obj)
|
57
|
+
return obj
|
58
|
+
end
|
59
|
+
|
60
|
+
def to_binary(str)
|
61
|
+
return str
|
62
|
+
end
|
63
|
+
|
64
|
+
def bytesize(str)
|
65
|
+
return str.length
|
66
|
+
end
|
67
|
+
|
68
|
+
end
|
69
|
+
|
70
|
+
end
|
71
|
+
|
72
|
+
# Wrapper of UnsafeIndex. Boundary checking of pos/len and some methods are added.
|
73
|
+
class Index
|
74
|
+
|
75
|
+
extend(Forwardable)
|
76
|
+
extend(Util)
|
77
|
+
include(Util)
|
78
|
+
include(Enumerable)
|
79
|
+
|
80
|
+
class << self
|
81
|
+
alias open new
|
82
|
+
end
|
83
|
+
|
84
|
+
def initialize(arg, encoding = nil)
|
85
|
+
if arg.is_a?(UnsafeIndex)
|
86
|
+
@unsafe = arg
|
87
|
+
else
|
88
|
+
@unsafe = UnsafeIndex.new()
|
89
|
+
if !@unsafe.open(arg)
|
90
|
+
raise(IOError, "failed to open #{arg}")
|
91
|
+
end
|
92
|
+
end
|
93
|
+
@encoding = encoding || default_encoding()
|
94
|
+
end
|
95
|
+
|
96
|
+
attr_reader(:encoding)
|
97
|
+
def_delegators(:@unsafe, :num_keys, :result_log, :error_log)
|
98
|
+
def_wrapper_methods(:longest_prefix, :include, :search_prefixes, :search_expansions)
|
99
|
+
alias common_prefix_search search_prefixes
|
100
|
+
alias predictive_search search_expansions
|
101
|
+
alias include? include
|
102
|
+
alias size num_keys
|
103
|
+
|
104
|
+
def inspect()
|
105
|
+
return "\#<%p:0x%x>" % [self.class, self.object_id]
|
106
|
+
end
|
107
|
+
|
108
|
+
def to_a()
|
109
|
+
return search_expansions("")
|
110
|
+
end
|
111
|
+
|
112
|
+
def each(&block)
|
113
|
+
to_a().each(&block)
|
114
|
+
end
|
115
|
+
|
116
|
+
def scan(str, &block)
|
117
|
+
bstr = to_binary(str)
|
118
|
+
result = []
|
119
|
+
pos = 0
|
120
|
+
while pos < bytesize(str)
|
121
|
+
plen = longest_prefix(str, pos)
|
122
|
+
if plen >= 0
|
123
|
+
args = [add_encoding(bstr[pos, plen]), pos]
|
124
|
+
block ? yield(*args) : result.push(args)
|
125
|
+
end
|
126
|
+
pos += plen > 0 ? plen : 1
|
127
|
+
end
|
128
|
+
return block ? str : result
|
129
|
+
end
|
130
|
+
|
131
|
+
def gsub(str, &block)
|
132
|
+
bstr = to_binary(str)
|
133
|
+
result = add_encoding("")
|
134
|
+
prev_pos = 0
|
135
|
+
scan(str) do |match, pos|
|
136
|
+
result << add_encoding(bstr[prev_pos...pos])
|
137
|
+
result << yield(match, pos)
|
138
|
+
prev_pos = pos + bytesize(match)
|
139
|
+
end
|
140
|
+
result << add_encoding(bstr[prev_pos..-1])
|
141
|
+
return result
|
142
|
+
end
|
143
|
+
|
144
|
+
end
|
145
|
+
|
146
|
+
# Wrapper of UnsafeMap. Boundary checking of pos/len and some methods are added.
|
147
|
+
class Map
|
148
|
+
|
149
|
+
extend(Forwardable)
|
150
|
+
extend(Util)
|
151
|
+
include(Util)
|
152
|
+
include(Enumerable)
|
153
|
+
|
154
|
+
class << self
|
155
|
+
alias open new
|
156
|
+
end
|
157
|
+
|
158
|
+
def initialize(file_pefix, encoding = nil)
|
159
|
+
@unsafe = UnsafeMap.new()
|
160
|
+
if !@unsafe.open(file_pefix)
|
161
|
+
raise(IOError, "failed to open #{file_pefix}.key, #{file_pefix}.val or #{file_pefix}.map")
|
162
|
+
end
|
163
|
+
@encoding = encoding || default_encoding()
|
164
|
+
@key_index = Index.new(@unsafe.key_index, @encoding)
|
165
|
+
@value_index = Index.new(@unsafe.value_index, @encoding)
|
166
|
+
end
|
167
|
+
|
168
|
+
attr_reader(:key_index, :value_index, :encoding)
|
169
|
+
def_wrapper_methods(:has_key, :lookup)
|
170
|
+
alias has_key? has_key
|
171
|
+
|
172
|
+
def inspect()
|
173
|
+
return "\#<%p:0x%x>" % [self.class, self.object_id]
|
174
|
+
end
|
175
|
+
|
176
|
+
def [](str, pos = 0, len = -1)
|
177
|
+
return has_key(str, pos, len) ? lookup(str, pos, len) : nil
|
178
|
+
end
|
179
|
+
|
180
|
+
def size
|
181
|
+
return self.keys.sizse
|
182
|
+
end
|
183
|
+
|
184
|
+
def keys
|
185
|
+
return @key_index.search_expansions("")
|
186
|
+
end
|
187
|
+
|
188
|
+
def values
|
189
|
+
return self.keys.map(){ |k| lookup(k) }
|
190
|
+
end
|
191
|
+
|
192
|
+
def each_key(&block)
|
193
|
+
return self.keys.each(&block)
|
194
|
+
end
|
195
|
+
|
196
|
+
def each_value(&block)
|
197
|
+
return self.values.each(&block)
|
198
|
+
end
|
199
|
+
|
200
|
+
def each(&block)
|
201
|
+
each_key(){ |k| yield([k, lookup(k)]) }
|
202
|
+
end
|
203
|
+
|
204
|
+
def each_pair(&block)
|
205
|
+
each_key(){ |k| yield(k, lookup(k)) }
|
206
|
+
end
|
207
|
+
|
208
|
+
def scan(str, &block)
|
209
|
+
result = []
|
210
|
+
@key_index.scan(str) do |key, pos|
|
211
|
+
args = [key, pos, lookup(key)]
|
212
|
+
block ? yield(*args) : result.push(args)
|
213
|
+
end
|
214
|
+
return block ? str : result
|
215
|
+
end
|
216
|
+
|
217
|
+
end
|
218
|
+
|
219
|
+
end
|
data/test/test_tx.rb
ADDED
@@ -0,0 +1,169 @@
|
|
1
|
+
# -*- encoding: UTF-8 -*-
|
2
|
+
|
3
|
+
$KCODE = "u"
|
4
|
+
$LOAD_PATH.unshift("./lib", "./ext")
|
5
|
+
require "test/unit"
|
6
|
+
require "enumerator"
|
7
|
+
require "tempfile"
|
8
|
+
require "tx"
|
9
|
+
|
10
|
+
TEST_ENCODING = RUBY_VERSION >= "1.9.0" ? Encoding::UTF_8 : nil
|
11
|
+
|
12
|
+
class TC_TxIndex < Test::Unit::TestCase
|
13
|
+
|
14
|
+
def setup
|
15
|
+
@builder = Tx::Builder.new()
|
16
|
+
@builder.add_all(%w(foo ho hog hoga hoge hogeshi))
|
17
|
+
@tempfile = Tempfile.new("tx_test")
|
18
|
+
@builder.build(@tempfile.path)
|
19
|
+
@index = Tx::Index.open(@tempfile.path, TEST_ENCODING)
|
20
|
+
end
|
21
|
+
|
22
|
+
def test_basic
|
23
|
+
assert_equal(6, @index.num_keys)
|
24
|
+
assert_equal(4, @index.longest_prefix("hogeshaa"))
|
25
|
+
assert_equal(6, @index.longest_prefix("hogeshaa", 0, -1, true))
|
26
|
+
assert(@index.include("hoge"))
|
27
|
+
assert(!@index.include("hogera"))
|
28
|
+
assert_equal(%w(ho hog), @index.search_prefixes("hog"))
|
29
|
+
assert_equal(%w(ho hog), @index.search_prefixes("aahog", 2, 5))
|
30
|
+
assert_equal(%w(hog hoga hoge hogeshi), @index.search_expansions("hog").sort())
|
31
|
+
assert_equal(%w(hog hoga hoge hogeshi), @index.search_expansions("aahogeshi", 2, 3).sort())
|
32
|
+
assert_equal(%w(foo ho hog hoga hoge hogeshi), @index.to_a().sort())
|
33
|
+
assert_equal(%w(foo ho hog hoga hoge hogeshi), @index.enum_for(:each).to_a().sort())
|
34
|
+
end
|
35
|
+
|
36
|
+
def test_scan
|
37
|
+
str = "hohogefugahogaboke"
|
38
|
+
expected = [["ho", 0], ["hoge", 2], ["hoga", 10]]
|
39
|
+
assert_equal(expected, @index.scan(str))
|
40
|
+
result = []
|
41
|
+
@index.scan(str) do |s, i|
|
42
|
+
result.push([s, i])
|
43
|
+
end
|
44
|
+
assert_equal(expected, result)
|
45
|
+
end
|
46
|
+
|
47
|
+
def test_gsub
|
48
|
+
result = @index.gsub("hohogefugahogaboke"){ |s, i| s.upcase }
|
49
|
+
assert_equal("HOHOGEfugaHOGAboke", result)
|
50
|
+
end
|
51
|
+
|
52
|
+
def test_open
|
53
|
+
assert_raise(IOError) do
|
54
|
+
Tx::Index.new("noexist.index")
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
def test_no_error_log
|
59
|
+
assert_equal("", @builder.error_log)
|
60
|
+
assert_equal("", @index.error_log)
|
61
|
+
end
|
62
|
+
|
63
|
+
end
|
64
|
+
|
65
|
+
class TC_TxIndexMultiByte < Test::Unit::TestCase
|
66
|
+
|
67
|
+
def setup
|
68
|
+
@builder = Tx::Builder.new()
|
69
|
+
@builder.add_all(%w(ふー ほ ほが ほげ ほげし))
|
70
|
+
@tempfile = Tempfile.new("tx_test")
|
71
|
+
@builder.build(@tempfile.path)
|
72
|
+
@index = Tx::Index.open(@tempfile.path, TEST_ENCODING)
|
73
|
+
end
|
74
|
+
|
75
|
+
def test_encoding
|
76
|
+
if RUBY_VERSION >= "1.9.0"
|
77
|
+
assert_equal(TEST_ENCODING, @index.search_prefixes("ほが")[0].encoding)
|
78
|
+
assert_equal(TEST_ENCODING, @index.search_expansions("ほが")[0].encoding)
|
79
|
+
assert_equal(TEST_ENCODING, @index.to_a()[0].encoding)
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
def test_scan
|
84
|
+
str = "ほほげふがほがぼけ"
|
85
|
+
expected = [["ほ", 0], ["ほげ", 3], ["ほが", 15]]
|
86
|
+
assert_equal(expected, @index.scan(str))
|
87
|
+
result = []
|
88
|
+
@index.scan(str) do |s, i|
|
89
|
+
result.push([s, i])
|
90
|
+
end
|
91
|
+
assert_equal(expected, result)
|
92
|
+
end
|
93
|
+
|
94
|
+
def test_gsub
|
95
|
+
result = @index.gsub("ほほげふがほがぼけ") do |s, i|
|
96
|
+
s.gsub(/ほ/, "ホ").gsub(/が/, "ガ").gsub(/げ/, "ゲ")
|
97
|
+
end
|
98
|
+
assert_equal("ホホゲふがホガぼけ", result)
|
99
|
+
end
|
100
|
+
|
101
|
+
end
|
102
|
+
|
103
|
+
class TC_TxMap < Test::Unit::TestCase
|
104
|
+
|
105
|
+
def setup
|
106
|
+
@builder = Tx::MapBuilder.new()
|
107
|
+
@builder.add("ho", "foo")
|
108
|
+
@builder.add_all(["hoge", "bar", "hogeshi", "foobar"])
|
109
|
+
@tempfile = Tempfile.new("tx_test")
|
110
|
+
@builder.build(@tempfile.path)
|
111
|
+
@map = Tx::Map.open(@tempfile.path, TEST_ENCODING)
|
112
|
+
end
|
113
|
+
|
114
|
+
def test_basic
|
115
|
+
assert(@map.has_key("hoge"))
|
116
|
+
assert_equal("bar", @map.lookup("hoge"))
|
117
|
+
assert_equal("bar", @map["hoge"])
|
118
|
+
assert(!@map.has_key("foo"))
|
119
|
+
assert_equal("", @map.lookup("foo"))
|
120
|
+
assert_equal(nil, @map["foo"])
|
121
|
+
assert(@map.key_index.include("hoge"))
|
122
|
+
assert(@map.value_index.include("foo"))
|
123
|
+
assert_equal(%w(ho hoge hogeshi), @map.keys.sort())
|
124
|
+
assert_equal(%w(bar foo foobar), @map.values.sort())
|
125
|
+
assert_equal(%w(ho hoge hogeshi), @map.enum_for(:each_key).sort())
|
126
|
+
assert_equal(%w(bar foo foobar), @map.enum_for(:each_value).sort())
|
127
|
+
assert_equal(
|
128
|
+
[["ho", "foo"], ["hoge", "bar"], ["hogeshi", "foobar"]],
|
129
|
+
@map.enum_for(:each).sort())
|
130
|
+
assert_equal(
|
131
|
+
[["ho", "foo"], ["hoge", "bar"], ["hogeshi", "foobar"]],
|
132
|
+
@map.enum_for(:each_pair).sort())
|
133
|
+
end
|
134
|
+
|
135
|
+
def test_scan
|
136
|
+
str = "hogehogahoyo"
|
137
|
+
expected = [["hoge", 0, "bar"], ["ho", 4, "foo"], ["ho", 8, "foo"]]
|
138
|
+
assert_equal(expected, @map.scan(str))
|
139
|
+
result = []
|
140
|
+
@map.scan(str) do |k, i, v|
|
141
|
+
result.push([k, i, v])
|
142
|
+
end
|
143
|
+
assert_equal(expected, result)
|
144
|
+
end
|
145
|
+
|
146
|
+
end
|
147
|
+
|
148
|
+
class TC_TxMapMultiByte < Test::Unit::TestCase
|
149
|
+
|
150
|
+
def setup
|
151
|
+
@builder = Tx::MapBuilder.new()
|
152
|
+
@builder.add("ほ", "ふー")
|
153
|
+
@builder.add_all(["ほげ", "ばー", "ほげし", "ふーばー"])
|
154
|
+
@tempfile = Tempfile.new("tx_test")
|
155
|
+
@builder.build(@tempfile.path)
|
156
|
+
@map = Tx::Map.open(@tempfile.path, TEST_ENCODING)
|
157
|
+
end
|
158
|
+
|
159
|
+
def test_encoding
|
160
|
+
if RUBY_VERSION >= "1.9.0"
|
161
|
+
assert_equal(TEST_ENCODING, @map.lookup("ほげ").encoding)
|
162
|
+
assert_equal(TEST_ENCODING, @map.key_index.encoding)
|
163
|
+
assert_equal(TEST_ENCODING, @map.value_index.encoding)
|
164
|
+
assert_equal(TEST_ENCODING, @map.keys[0].encoding)
|
165
|
+
assert_equal(TEST_ENCODING, @map.values[0].encoding)
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
end
|
metadata
ADDED
@@ -0,0 +1,86 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: tx
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 21
|
5
|
+
prerelease: false
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 0
|
9
|
+
- 5
|
10
|
+
version: 0.0.5
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Hiroshi Ichikawa
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2010-09-19 00:00:00 +09:00
|
19
|
+
default_executable:
|
20
|
+
dependencies: []
|
21
|
+
|
22
|
+
description: Ruby 1.8/1.9 binding of Tx, a library for a compact trie data structure
|
23
|
+
email: gimite+txruby@gmail.com
|
24
|
+
executables: []
|
25
|
+
|
26
|
+
extensions:
|
27
|
+
- ext/extconf.rb
|
28
|
+
extra_rdoc_files:
|
29
|
+
- README.txt
|
30
|
+
files:
|
31
|
+
- README.txt
|
32
|
+
- lib/tx.rb
|
33
|
+
- lib/i386-msvcrt/tx_core.so
|
34
|
+
- ext/depend
|
35
|
+
- ext/tx_swig.h
|
36
|
+
- ext/tx.cpp
|
37
|
+
- ext/tx_swig.i
|
38
|
+
- ext/swig.patch
|
39
|
+
- ext/Makefile
|
40
|
+
- ext/tx_swig_wrap.cxx
|
41
|
+
- ext/tx.hpp
|
42
|
+
- ext/tx_swig.cpp
|
43
|
+
- ext/extconf.rb
|
44
|
+
- ext/ssv.cpp
|
45
|
+
- ext/ssv.hpp
|
46
|
+
- test/test_tx.rb
|
47
|
+
has_rdoc: true
|
48
|
+
homepage: http://gimite.net/en/index.php?tx-ruby
|
49
|
+
licenses: []
|
50
|
+
|
51
|
+
post_install_message:
|
52
|
+
rdoc_options:
|
53
|
+
- --quiet
|
54
|
+
- --title
|
55
|
+
- tx-ruby Reference
|
56
|
+
- --main
|
57
|
+
- README.txt
|
58
|
+
require_paths:
|
59
|
+
- lib
|
60
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
61
|
+
none: false
|
62
|
+
requirements:
|
63
|
+
- - ">="
|
64
|
+
- !ruby/object:Gem::Version
|
65
|
+
hash: 3
|
66
|
+
segments:
|
67
|
+
- 0
|
68
|
+
version: "0"
|
69
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
70
|
+
none: false
|
71
|
+
requirements:
|
72
|
+
- - ">="
|
73
|
+
- !ruby/object:Gem::Version
|
74
|
+
hash: 3
|
75
|
+
segments:
|
76
|
+
- 0
|
77
|
+
version: "0"
|
78
|
+
requirements: []
|
79
|
+
|
80
|
+
rubyforge_project:
|
81
|
+
rubygems_version: 1.3.7
|
82
|
+
signing_key:
|
83
|
+
specification_version: 3
|
84
|
+
summary: Ruby 1.8/1.9 binding of Tx, a library for a compact trie data structure
|
85
|
+
test_files: []
|
86
|
+
|