tx 0.0.5
Sign up to get free protection for your applications and to get access to all the features.
- data/README.txt +1 -0
- data/ext/Makefile +163 -0
- data/ext/depend +6 -0
- data/ext/extconf.rb +15 -0
- data/ext/ssv.cpp +355 -0
- data/ext/ssv.hpp +93 -0
- data/ext/swig.patch +192 -0
- data/ext/tx.cpp +442 -0
- data/ext/tx.hpp +62 -0
- data/ext/tx_swig.cpp +164 -0
- data/ext/tx_swig.h +93 -0
- data/ext/tx_swig.i +17 -0
- data/ext/tx_swig_wrap.cxx +9884 -0
- data/lib/i386-msvcrt/tx_core.so +0 -0
- data/lib/tx.rb +219 -0
- data/test/test_tx.rb +169 -0
- metadata +86 -0
Binary file
|
data/lib/tx.rb
ADDED
@@ -0,0 +1,219 @@
|
|
1
|
+
require "tx_core"
|
2
|
+
require "forwardable"
|
3
|
+
|
4
|
+
|
5
|
+
module Tx #:nodoc: all
|
6
|
+
|
7
|
+
module Util
|
8
|
+
|
9
|
+
module_function
|
10
|
+
|
11
|
+
# Defines wrapper methods which perform boundary checking of pos and len.
|
12
|
+
def def_wrapper_methods(*methods)
|
13
|
+
methods.each() do |name|
|
14
|
+
define_method(name) do |*args|
|
15
|
+
(str, pos, len, *opt) = args
|
16
|
+
raise(ArgumentError, "argument pos is negative") if pos && pos < 0
|
17
|
+
str_len = bytesize(str)
|
18
|
+
pos ||= 0
|
19
|
+
pos = str_len if pos > str_len
|
20
|
+
len = str_len - pos if !len || len < 0 || len > str_len - pos
|
21
|
+
add_encoding(@unsafe.__send__(name, str, pos, len, *opt))
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
if RUBY_VERSION >= "1.9.0"
|
27
|
+
|
28
|
+
def default_encoding
|
29
|
+
return Encoding.default_internal || Encoding::UTF_8
|
30
|
+
end
|
31
|
+
|
32
|
+
def add_encoding(obj)
|
33
|
+
case obj
|
34
|
+
when Array
|
35
|
+
obj.each(){ |e| add_encoding(e) }
|
36
|
+
when String
|
37
|
+
obj.force_encoding(@encoding)
|
38
|
+
end
|
39
|
+
return obj
|
40
|
+
end
|
41
|
+
|
42
|
+
def to_binary(str)
|
43
|
+
return str.dup().force_encoding(Encoding::ASCII_8BIT)
|
44
|
+
end
|
45
|
+
|
46
|
+
def bytesize(str)
|
47
|
+
return str.bytesize
|
48
|
+
end
|
49
|
+
|
50
|
+
else
|
51
|
+
|
52
|
+
def default_encoding
|
53
|
+
return nil
|
54
|
+
end
|
55
|
+
|
56
|
+
def add_encoding(obj)
|
57
|
+
return obj
|
58
|
+
end
|
59
|
+
|
60
|
+
def to_binary(str)
|
61
|
+
return str
|
62
|
+
end
|
63
|
+
|
64
|
+
def bytesize(str)
|
65
|
+
return str.length
|
66
|
+
end
|
67
|
+
|
68
|
+
end
|
69
|
+
|
70
|
+
end
|
71
|
+
|
72
|
+
# Wrapper of UnsafeIndex. Boundary checking of pos/len and some methods are added.
|
73
|
+
class Index
|
74
|
+
|
75
|
+
extend(Forwardable)
|
76
|
+
extend(Util)
|
77
|
+
include(Util)
|
78
|
+
include(Enumerable)
|
79
|
+
|
80
|
+
class << self
|
81
|
+
alias open new
|
82
|
+
end
|
83
|
+
|
84
|
+
def initialize(arg, encoding = nil)
|
85
|
+
if arg.is_a?(UnsafeIndex)
|
86
|
+
@unsafe = arg
|
87
|
+
else
|
88
|
+
@unsafe = UnsafeIndex.new()
|
89
|
+
if !@unsafe.open(arg)
|
90
|
+
raise(IOError, "failed to open #{arg}")
|
91
|
+
end
|
92
|
+
end
|
93
|
+
@encoding = encoding || default_encoding()
|
94
|
+
end
|
95
|
+
|
96
|
+
attr_reader(:encoding)
|
97
|
+
def_delegators(:@unsafe, :num_keys, :result_log, :error_log)
|
98
|
+
def_wrapper_methods(:longest_prefix, :include, :search_prefixes, :search_expansions)
|
99
|
+
alias common_prefix_search search_prefixes
|
100
|
+
alias predictive_search search_expansions
|
101
|
+
alias include? include
|
102
|
+
alias size num_keys
|
103
|
+
|
104
|
+
def inspect()
|
105
|
+
return "\#<%p:0x%x>" % [self.class, self.object_id]
|
106
|
+
end
|
107
|
+
|
108
|
+
def to_a()
|
109
|
+
return search_expansions("")
|
110
|
+
end
|
111
|
+
|
112
|
+
def each(&block)
|
113
|
+
to_a().each(&block)
|
114
|
+
end
|
115
|
+
|
116
|
+
def scan(str, &block)
|
117
|
+
bstr = to_binary(str)
|
118
|
+
result = []
|
119
|
+
pos = 0
|
120
|
+
while pos < bytesize(str)
|
121
|
+
plen = longest_prefix(str, pos)
|
122
|
+
if plen >= 0
|
123
|
+
args = [add_encoding(bstr[pos, plen]), pos]
|
124
|
+
block ? yield(*args) : result.push(args)
|
125
|
+
end
|
126
|
+
pos += plen > 0 ? plen : 1
|
127
|
+
end
|
128
|
+
return block ? str : result
|
129
|
+
end
|
130
|
+
|
131
|
+
def gsub(str, &block)
|
132
|
+
bstr = to_binary(str)
|
133
|
+
result = add_encoding("")
|
134
|
+
prev_pos = 0
|
135
|
+
scan(str) do |match, pos|
|
136
|
+
result << add_encoding(bstr[prev_pos...pos])
|
137
|
+
result << yield(match, pos)
|
138
|
+
prev_pos = pos + bytesize(match)
|
139
|
+
end
|
140
|
+
result << add_encoding(bstr[prev_pos..-1])
|
141
|
+
return result
|
142
|
+
end
|
143
|
+
|
144
|
+
end
|
145
|
+
|
146
|
+
# Wrapper of UnsafeMap. Boundary checking of pos/len and some methods are added.
|
147
|
+
class Map
|
148
|
+
|
149
|
+
extend(Forwardable)
|
150
|
+
extend(Util)
|
151
|
+
include(Util)
|
152
|
+
include(Enumerable)
|
153
|
+
|
154
|
+
class << self
|
155
|
+
alias open new
|
156
|
+
end
|
157
|
+
|
158
|
+
def initialize(file_pefix, encoding = nil)
|
159
|
+
@unsafe = UnsafeMap.new()
|
160
|
+
if !@unsafe.open(file_pefix)
|
161
|
+
raise(IOError, "failed to open #{file_pefix}.key, #{file_pefix}.val or #{file_pefix}.map")
|
162
|
+
end
|
163
|
+
@encoding = encoding || default_encoding()
|
164
|
+
@key_index = Index.new(@unsafe.key_index, @encoding)
|
165
|
+
@value_index = Index.new(@unsafe.value_index, @encoding)
|
166
|
+
end
|
167
|
+
|
168
|
+
attr_reader(:key_index, :value_index, :encoding)
|
169
|
+
def_wrapper_methods(:has_key, :lookup)
|
170
|
+
alias has_key? has_key
|
171
|
+
|
172
|
+
def inspect()
|
173
|
+
return "\#<%p:0x%x>" % [self.class, self.object_id]
|
174
|
+
end
|
175
|
+
|
176
|
+
def [](str, pos = 0, len = -1)
|
177
|
+
return has_key(str, pos, len) ? lookup(str, pos, len) : nil
|
178
|
+
end
|
179
|
+
|
180
|
+
def size
|
181
|
+
return self.keys.sizse
|
182
|
+
end
|
183
|
+
|
184
|
+
def keys
|
185
|
+
return @key_index.search_expansions("")
|
186
|
+
end
|
187
|
+
|
188
|
+
def values
|
189
|
+
return self.keys.map(){ |k| lookup(k) }
|
190
|
+
end
|
191
|
+
|
192
|
+
def each_key(&block)
|
193
|
+
return self.keys.each(&block)
|
194
|
+
end
|
195
|
+
|
196
|
+
def each_value(&block)
|
197
|
+
return self.values.each(&block)
|
198
|
+
end
|
199
|
+
|
200
|
+
def each(&block)
|
201
|
+
each_key(){ |k| yield([k, lookup(k)]) }
|
202
|
+
end
|
203
|
+
|
204
|
+
def each_pair(&block)
|
205
|
+
each_key(){ |k| yield(k, lookup(k)) }
|
206
|
+
end
|
207
|
+
|
208
|
+
def scan(str, &block)
|
209
|
+
result = []
|
210
|
+
@key_index.scan(str) do |key, pos|
|
211
|
+
args = [key, pos, lookup(key)]
|
212
|
+
block ? yield(*args) : result.push(args)
|
213
|
+
end
|
214
|
+
return block ? str : result
|
215
|
+
end
|
216
|
+
|
217
|
+
end
|
218
|
+
|
219
|
+
end
|
data/test/test_tx.rb
ADDED
@@ -0,0 +1,169 @@
|
|
1
|
+
# -*- encoding: UTF-8 -*-
|
2
|
+
|
3
|
+
$KCODE = "u"
|
4
|
+
$LOAD_PATH.unshift("./lib", "./ext")
|
5
|
+
require "test/unit"
|
6
|
+
require "enumerator"
|
7
|
+
require "tempfile"
|
8
|
+
require "tx"
|
9
|
+
|
10
|
+
TEST_ENCODING = RUBY_VERSION >= "1.9.0" ? Encoding::UTF_8 : nil
|
11
|
+
|
12
|
+
class TC_TxIndex < Test::Unit::TestCase
|
13
|
+
|
14
|
+
def setup
|
15
|
+
@builder = Tx::Builder.new()
|
16
|
+
@builder.add_all(%w(foo ho hog hoga hoge hogeshi))
|
17
|
+
@tempfile = Tempfile.new("tx_test")
|
18
|
+
@builder.build(@tempfile.path)
|
19
|
+
@index = Tx::Index.open(@tempfile.path, TEST_ENCODING)
|
20
|
+
end
|
21
|
+
|
22
|
+
def test_basic
|
23
|
+
assert_equal(6, @index.num_keys)
|
24
|
+
assert_equal(4, @index.longest_prefix("hogeshaa"))
|
25
|
+
assert_equal(6, @index.longest_prefix("hogeshaa", 0, -1, true))
|
26
|
+
assert(@index.include("hoge"))
|
27
|
+
assert(!@index.include("hogera"))
|
28
|
+
assert_equal(%w(ho hog), @index.search_prefixes("hog"))
|
29
|
+
assert_equal(%w(ho hog), @index.search_prefixes("aahog", 2, 5))
|
30
|
+
assert_equal(%w(hog hoga hoge hogeshi), @index.search_expansions("hog").sort())
|
31
|
+
assert_equal(%w(hog hoga hoge hogeshi), @index.search_expansions("aahogeshi", 2, 3).sort())
|
32
|
+
assert_equal(%w(foo ho hog hoga hoge hogeshi), @index.to_a().sort())
|
33
|
+
assert_equal(%w(foo ho hog hoga hoge hogeshi), @index.enum_for(:each).to_a().sort())
|
34
|
+
end
|
35
|
+
|
36
|
+
def test_scan
|
37
|
+
str = "hohogefugahogaboke"
|
38
|
+
expected = [["ho", 0], ["hoge", 2], ["hoga", 10]]
|
39
|
+
assert_equal(expected, @index.scan(str))
|
40
|
+
result = []
|
41
|
+
@index.scan(str) do |s, i|
|
42
|
+
result.push([s, i])
|
43
|
+
end
|
44
|
+
assert_equal(expected, result)
|
45
|
+
end
|
46
|
+
|
47
|
+
def test_gsub
|
48
|
+
result = @index.gsub("hohogefugahogaboke"){ |s, i| s.upcase }
|
49
|
+
assert_equal("HOHOGEfugaHOGAboke", result)
|
50
|
+
end
|
51
|
+
|
52
|
+
def test_open
|
53
|
+
assert_raise(IOError) do
|
54
|
+
Tx::Index.new("noexist.index")
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
def test_no_error_log
|
59
|
+
assert_equal("", @builder.error_log)
|
60
|
+
assert_equal("", @index.error_log)
|
61
|
+
end
|
62
|
+
|
63
|
+
end
|
64
|
+
|
65
|
+
class TC_TxIndexMultiByte < Test::Unit::TestCase
|
66
|
+
|
67
|
+
def setup
|
68
|
+
@builder = Tx::Builder.new()
|
69
|
+
@builder.add_all(%w(ふー ほ ほが ほげ ほげし))
|
70
|
+
@tempfile = Tempfile.new("tx_test")
|
71
|
+
@builder.build(@tempfile.path)
|
72
|
+
@index = Tx::Index.open(@tempfile.path, TEST_ENCODING)
|
73
|
+
end
|
74
|
+
|
75
|
+
def test_encoding
|
76
|
+
if RUBY_VERSION >= "1.9.0"
|
77
|
+
assert_equal(TEST_ENCODING, @index.search_prefixes("ほが")[0].encoding)
|
78
|
+
assert_equal(TEST_ENCODING, @index.search_expansions("ほが")[0].encoding)
|
79
|
+
assert_equal(TEST_ENCODING, @index.to_a()[0].encoding)
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
def test_scan
|
84
|
+
str = "ほほげふがほがぼけ"
|
85
|
+
expected = [["ほ", 0], ["ほげ", 3], ["ほが", 15]]
|
86
|
+
assert_equal(expected, @index.scan(str))
|
87
|
+
result = []
|
88
|
+
@index.scan(str) do |s, i|
|
89
|
+
result.push([s, i])
|
90
|
+
end
|
91
|
+
assert_equal(expected, result)
|
92
|
+
end
|
93
|
+
|
94
|
+
def test_gsub
|
95
|
+
result = @index.gsub("ほほげふがほがぼけ") do |s, i|
|
96
|
+
s.gsub(/ほ/, "ホ").gsub(/が/, "ガ").gsub(/げ/, "ゲ")
|
97
|
+
end
|
98
|
+
assert_equal("ホホゲふがホガぼけ", result)
|
99
|
+
end
|
100
|
+
|
101
|
+
end
|
102
|
+
|
103
|
+
class TC_TxMap < Test::Unit::TestCase
|
104
|
+
|
105
|
+
def setup
|
106
|
+
@builder = Tx::MapBuilder.new()
|
107
|
+
@builder.add("ho", "foo")
|
108
|
+
@builder.add_all(["hoge", "bar", "hogeshi", "foobar"])
|
109
|
+
@tempfile = Tempfile.new("tx_test")
|
110
|
+
@builder.build(@tempfile.path)
|
111
|
+
@map = Tx::Map.open(@tempfile.path, TEST_ENCODING)
|
112
|
+
end
|
113
|
+
|
114
|
+
def test_basic
|
115
|
+
assert(@map.has_key("hoge"))
|
116
|
+
assert_equal("bar", @map.lookup("hoge"))
|
117
|
+
assert_equal("bar", @map["hoge"])
|
118
|
+
assert(!@map.has_key("foo"))
|
119
|
+
assert_equal("", @map.lookup("foo"))
|
120
|
+
assert_equal(nil, @map["foo"])
|
121
|
+
assert(@map.key_index.include("hoge"))
|
122
|
+
assert(@map.value_index.include("foo"))
|
123
|
+
assert_equal(%w(ho hoge hogeshi), @map.keys.sort())
|
124
|
+
assert_equal(%w(bar foo foobar), @map.values.sort())
|
125
|
+
assert_equal(%w(ho hoge hogeshi), @map.enum_for(:each_key).sort())
|
126
|
+
assert_equal(%w(bar foo foobar), @map.enum_for(:each_value).sort())
|
127
|
+
assert_equal(
|
128
|
+
[["ho", "foo"], ["hoge", "bar"], ["hogeshi", "foobar"]],
|
129
|
+
@map.enum_for(:each).sort())
|
130
|
+
assert_equal(
|
131
|
+
[["ho", "foo"], ["hoge", "bar"], ["hogeshi", "foobar"]],
|
132
|
+
@map.enum_for(:each_pair).sort())
|
133
|
+
end
|
134
|
+
|
135
|
+
def test_scan
|
136
|
+
str = "hogehogahoyo"
|
137
|
+
expected = [["hoge", 0, "bar"], ["ho", 4, "foo"], ["ho", 8, "foo"]]
|
138
|
+
assert_equal(expected, @map.scan(str))
|
139
|
+
result = []
|
140
|
+
@map.scan(str) do |k, i, v|
|
141
|
+
result.push([k, i, v])
|
142
|
+
end
|
143
|
+
assert_equal(expected, result)
|
144
|
+
end
|
145
|
+
|
146
|
+
end
|
147
|
+
|
148
|
+
class TC_TxMapMultiByte < Test::Unit::TestCase
|
149
|
+
|
150
|
+
def setup
|
151
|
+
@builder = Tx::MapBuilder.new()
|
152
|
+
@builder.add("ほ", "ふー")
|
153
|
+
@builder.add_all(["ほげ", "ばー", "ほげし", "ふーばー"])
|
154
|
+
@tempfile = Tempfile.new("tx_test")
|
155
|
+
@builder.build(@tempfile.path)
|
156
|
+
@map = Tx::Map.open(@tempfile.path, TEST_ENCODING)
|
157
|
+
end
|
158
|
+
|
159
|
+
def test_encoding
|
160
|
+
if RUBY_VERSION >= "1.9.0"
|
161
|
+
assert_equal(TEST_ENCODING, @map.lookup("ほげ").encoding)
|
162
|
+
assert_equal(TEST_ENCODING, @map.key_index.encoding)
|
163
|
+
assert_equal(TEST_ENCODING, @map.value_index.encoding)
|
164
|
+
assert_equal(TEST_ENCODING, @map.keys[0].encoding)
|
165
|
+
assert_equal(TEST_ENCODING, @map.values[0].encoding)
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
end
|
metadata
ADDED
@@ -0,0 +1,86 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: tx
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 21
|
5
|
+
prerelease: false
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 0
|
9
|
+
- 5
|
10
|
+
version: 0.0.5
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Hiroshi Ichikawa
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2010-09-19 00:00:00 +09:00
|
19
|
+
default_executable:
|
20
|
+
dependencies: []
|
21
|
+
|
22
|
+
description: Ruby 1.8/1.9 binding of Tx, a library for a compact trie data structure
|
23
|
+
email: gimite+txruby@gmail.com
|
24
|
+
executables: []
|
25
|
+
|
26
|
+
extensions:
|
27
|
+
- ext/extconf.rb
|
28
|
+
extra_rdoc_files:
|
29
|
+
- README.txt
|
30
|
+
files:
|
31
|
+
- README.txt
|
32
|
+
- lib/tx.rb
|
33
|
+
- lib/i386-msvcrt/tx_core.so
|
34
|
+
- ext/depend
|
35
|
+
- ext/tx_swig.h
|
36
|
+
- ext/tx.cpp
|
37
|
+
- ext/tx_swig.i
|
38
|
+
- ext/swig.patch
|
39
|
+
- ext/Makefile
|
40
|
+
- ext/tx_swig_wrap.cxx
|
41
|
+
- ext/tx.hpp
|
42
|
+
- ext/tx_swig.cpp
|
43
|
+
- ext/extconf.rb
|
44
|
+
- ext/ssv.cpp
|
45
|
+
- ext/ssv.hpp
|
46
|
+
- test/test_tx.rb
|
47
|
+
has_rdoc: true
|
48
|
+
homepage: http://gimite.net/en/index.php?tx-ruby
|
49
|
+
licenses: []
|
50
|
+
|
51
|
+
post_install_message:
|
52
|
+
rdoc_options:
|
53
|
+
- --quiet
|
54
|
+
- --title
|
55
|
+
- tx-ruby Reference
|
56
|
+
- --main
|
57
|
+
- README.txt
|
58
|
+
require_paths:
|
59
|
+
- lib
|
60
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
61
|
+
none: false
|
62
|
+
requirements:
|
63
|
+
- - ">="
|
64
|
+
- !ruby/object:Gem::Version
|
65
|
+
hash: 3
|
66
|
+
segments:
|
67
|
+
- 0
|
68
|
+
version: "0"
|
69
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
70
|
+
none: false
|
71
|
+
requirements:
|
72
|
+
- - ">="
|
73
|
+
- !ruby/object:Gem::Version
|
74
|
+
hash: 3
|
75
|
+
segments:
|
76
|
+
- 0
|
77
|
+
version: "0"
|
78
|
+
requirements: []
|
79
|
+
|
80
|
+
rubyforge_project:
|
81
|
+
rubygems_version: 1.3.7
|
82
|
+
signing_key:
|
83
|
+
specification_version: 3
|
84
|
+
summary: Ruby 1.8/1.9 binding of Tx, a library for a compact trie data structure
|
85
|
+
test_files: []
|
86
|
+
|