tx 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,219 @@
1
+ require "tx_core"
2
+ require "forwardable"
3
+
4
+
5
+ module Tx #:nodoc: all
6
+
7
+ module Util
8
+
9
+ module_function
10
+
11
+ # Defines wrapper methods which perform boundary checking of pos and len.
12
+ def def_wrapper_methods(*methods)
13
+ methods.each() do |name|
14
+ define_method(name) do |*args|
15
+ (str, pos, len, *opt) = args
16
+ raise(ArgumentError, "argument pos is negative") if pos && pos < 0
17
+ str_len = bytesize(str)
18
+ pos ||= 0
19
+ pos = str_len if pos > str_len
20
+ len = str_len - pos if !len || len < 0 || len > str_len - pos
21
+ add_encoding(@unsafe.__send__(name, str, pos, len, *opt))
22
+ end
23
+ end
24
+ end
25
+
26
+ if RUBY_VERSION >= "1.9.0"
27
+
28
+ def default_encoding
29
+ return Encoding.default_internal || Encoding::UTF_8
30
+ end
31
+
32
+ def add_encoding(obj)
33
+ case obj
34
+ when Array
35
+ obj.each(){ |e| add_encoding(e) }
36
+ when String
37
+ obj.force_encoding(@encoding)
38
+ end
39
+ return obj
40
+ end
41
+
42
+ def to_binary(str)
43
+ return str.dup().force_encoding(Encoding::ASCII_8BIT)
44
+ end
45
+
46
+ def bytesize(str)
47
+ return str.bytesize
48
+ end
49
+
50
+ else
51
+
52
+ def default_encoding
53
+ return nil
54
+ end
55
+
56
+ def add_encoding(obj)
57
+ return obj
58
+ end
59
+
60
+ def to_binary(str)
61
+ return str
62
+ end
63
+
64
+ def bytesize(str)
65
+ return str.length
66
+ end
67
+
68
+ end
69
+
70
+ end
71
+
72
+ # Wrapper of UnsafeIndex. Boundary checking of pos/len and some methods are added.
73
+ class Index
74
+
75
+ extend(Forwardable)
76
+ extend(Util)
77
+ include(Util)
78
+ include(Enumerable)
79
+
80
+ class << self
81
+ alias open new
82
+ end
83
+
84
+ def initialize(arg, encoding = nil)
85
+ if arg.is_a?(UnsafeIndex)
86
+ @unsafe = arg
87
+ else
88
+ @unsafe = UnsafeIndex.new()
89
+ if !@unsafe.open(arg)
90
+ raise(IOError, "failed to open #{arg}")
91
+ end
92
+ end
93
+ @encoding = encoding || default_encoding()
94
+ end
95
+
96
+ attr_reader(:encoding)
97
+ def_delegators(:@unsafe, :num_keys, :result_log, :error_log)
98
+ def_wrapper_methods(:longest_prefix, :include, :search_prefixes, :search_expansions)
99
+ alias common_prefix_search search_prefixes
100
+ alias predictive_search search_expansions
101
+ alias include? include
102
+ alias size num_keys
103
+
104
+ def inspect()
105
+ return "\#<%p:0x%x>" % [self.class, self.object_id]
106
+ end
107
+
108
+ def to_a()
109
+ return search_expansions("")
110
+ end
111
+
112
+ def each(&block)
113
+ to_a().each(&block)
114
+ end
115
+
116
+ def scan(str, &block)
117
+ bstr = to_binary(str)
118
+ result = []
119
+ pos = 0
120
+ while pos < bytesize(str)
121
+ plen = longest_prefix(str, pos)
122
+ if plen >= 0
123
+ args = [add_encoding(bstr[pos, plen]), pos]
124
+ block ? yield(*args) : result.push(args)
125
+ end
126
+ pos += plen > 0 ? plen : 1
127
+ end
128
+ return block ? str : result
129
+ end
130
+
131
+ def gsub(str, &block)
132
+ bstr = to_binary(str)
133
+ result = add_encoding("")
134
+ prev_pos = 0
135
+ scan(str) do |match, pos|
136
+ result << add_encoding(bstr[prev_pos...pos])
137
+ result << yield(match, pos)
138
+ prev_pos = pos + bytesize(match)
139
+ end
140
+ result << add_encoding(bstr[prev_pos..-1])
141
+ return result
142
+ end
143
+
144
+ end
145
+
146
+ # Wrapper of UnsafeMap. Boundary checking of pos/len and some methods are added.
147
+ class Map
148
+
149
+ extend(Forwardable)
150
+ extend(Util)
151
+ include(Util)
152
+ include(Enumerable)
153
+
154
+ class << self
155
+ alias open new
156
+ end
157
+
158
+ def initialize(file_pefix, encoding = nil)
159
+ @unsafe = UnsafeMap.new()
160
+ if !@unsafe.open(file_pefix)
161
+ raise(IOError, "failed to open #{file_pefix}.key, #{file_pefix}.val or #{file_pefix}.map")
162
+ end
163
+ @encoding = encoding || default_encoding()
164
+ @key_index = Index.new(@unsafe.key_index, @encoding)
165
+ @value_index = Index.new(@unsafe.value_index, @encoding)
166
+ end
167
+
168
+ attr_reader(:key_index, :value_index, :encoding)
169
+ def_wrapper_methods(:has_key, :lookup)
170
+ alias has_key? has_key
171
+
172
+ def inspect()
173
+ return "\#<%p:0x%x>" % [self.class, self.object_id]
174
+ end
175
+
176
+ def [](str, pos = 0, len = -1)
177
+ return has_key(str, pos, len) ? lookup(str, pos, len) : nil
178
+ end
179
+
180
+ def size
181
+ return self.keys.sizse
182
+ end
183
+
184
+ def keys
185
+ return @key_index.search_expansions("")
186
+ end
187
+
188
+ def values
189
+ return self.keys.map(){ |k| lookup(k) }
190
+ end
191
+
192
+ def each_key(&block)
193
+ return self.keys.each(&block)
194
+ end
195
+
196
+ def each_value(&block)
197
+ return self.values.each(&block)
198
+ end
199
+
200
+ def each(&block)
201
+ each_key(){ |k| yield([k, lookup(k)]) }
202
+ end
203
+
204
+ def each_pair(&block)
205
+ each_key(){ |k| yield(k, lookup(k)) }
206
+ end
207
+
208
+ def scan(str, &block)
209
+ result = []
210
+ @key_index.scan(str) do |key, pos|
211
+ args = [key, pos, lookup(key)]
212
+ block ? yield(*args) : result.push(args)
213
+ end
214
+ return block ? str : result
215
+ end
216
+
217
+ end
218
+
219
+ end
@@ -0,0 +1,169 @@
1
+ # -*- encoding: UTF-8 -*-
2
+
3
+ $KCODE = "u"
4
+ $LOAD_PATH.unshift("./lib", "./ext")
5
+ require "test/unit"
6
+ require "enumerator"
7
+ require "tempfile"
8
+ require "tx"
9
+
10
+ TEST_ENCODING = RUBY_VERSION >= "1.9.0" ? Encoding::UTF_8 : nil
11
+
12
+ class TC_TxIndex < Test::Unit::TestCase
13
+
14
+ def setup
15
+ @builder = Tx::Builder.new()
16
+ @builder.add_all(%w(foo ho hog hoga hoge hogeshi))
17
+ @tempfile = Tempfile.new("tx_test")
18
+ @builder.build(@tempfile.path)
19
+ @index = Tx::Index.open(@tempfile.path, TEST_ENCODING)
20
+ end
21
+
22
+ def test_basic
23
+ assert_equal(6, @index.num_keys)
24
+ assert_equal(4, @index.longest_prefix("hogeshaa"))
25
+ assert_equal(6, @index.longest_prefix("hogeshaa", 0, -1, true))
26
+ assert(@index.include("hoge"))
27
+ assert(!@index.include("hogera"))
28
+ assert_equal(%w(ho hog), @index.search_prefixes("hog"))
29
+ assert_equal(%w(ho hog), @index.search_prefixes("aahog", 2, 5))
30
+ assert_equal(%w(hog hoga hoge hogeshi), @index.search_expansions("hog").sort())
31
+ assert_equal(%w(hog hoga hoge hogeshi), @index.search_expansions("aahogeshi", 2, 3).sort())
32
+ assert_equal(%w(foo ho hog hoga hoge hogeshi), @index.to_a().sort())
33
+ assert_equal(%w(foo ho hog hoga hoge hogeshi), @index.enum_for(:each).to_a().sort())
34
+ end
35
+
36
+ def test_scan
37
+ str = "hohogefugahogaboke"
38
+ expected = [["ho", 0], ["hoge", 2], ["hoga", 10]]
39
+ assert_equal(expected, @index.scan(str))
40
+ result = []
41
+ @index.scan(str) do |s, i|
42
+ result.push([s, i])
43
+ end
44
+ assert_equal(expected, result)
45
+ end
46
+
47
+ def test_gsub
48
+ result = @index.gsub("hohogefugahogaboke"){ |s, i| s.upcase }
49
+ assert_equal("HOHOGEfugaHOGAboke", result)
50
+ end
51
+
52
+ def test_open
53
+ assert_raise(IOError) do
54
+ Tx::Index.new("noexist.index")
55
+ end
56
+ end
57
+
58
+ def test_no_error_log
59
+ assert_equal("", @builder.error_log)
60
+ assert_equal("", @index.error_log)
61
+ end
62
+
63
+ end
64
+
65
+ class TC_TxIndexMultiByte < Test::Unit::TestCase
66
+
67
+ def setup
68
+ @builder = Tx::Builder.new()
69
+ @builder.add_all(%w(ふー ほ ほが ほげ ほげし))
70
+ @tempfile = Tempfile.new("tx_test")
71
+ @builder.build(@tempfile.path)
72
+ @index = Tx::Index.open(@tempfile.path, TEST_ENCODING)
73
+ end
74
+
75
+ def test_encoding
76
+ if RUBY_VERSION >= "1.9.0"
77
+ assert_equal(TEST_ENCODING, @index.search_prefixes("ほが")[0].encoding)
78
+ assert_equal(TEST_ENCODING, @index.search_expansions("ほが")[0].encoding)
79
+ assert_equal(TEST_ENCODING, @index.to_a()[0].encoding)
80
+ end
81
+ end
82
+
83
+ def test_scan
84
+ str = "ほほげふがほがぼけ"
85
+ expected = [["ほ", 0], ["ほげ", 3], ["ほが", 15]]
86
+ assert_equal(expected, @index.scan(str))
87
+ result = []
88
+ @index.scan(str) do |s, i|
89
+ result.push([s, i])
90
+ end
91
+ assert_equal(expected, result)
92
+ end
93
+
94
+ def test_gsub
95
+ result = @index.gsub("ほほげふがほがぼけ") do |s, i|
96
+ s.gsub(/ほ/, "ホ").gsub(/が/, "ガ").gsub(/げ/, "ゲ")
97
+ end
98
+ assert_equal("ホホゲふがホガぼけ", result)
99
+ end
100
+
101
+ end
102
+
103
+ class TC_TxMap < Test::Unit::TestCase
104
+
105
+ def setup
106
+ @builder = Tx::MapBuilder.new()
107
+ @builder.add("ho", "foo")
108
+ @builder.add_all(["hoge", "bar", "hogeshi", "foobar"])
109
+ @tempfile = Tempfile.new("tx_test")
110
+ @builder.build(@tempfile.path)
111
+ @map = Tx::Map.open(@tempfile.path, TEST_ENCODING)
112
+ end
113
+
114
+ def test_basic
115
+ assert(@map.has_key("hoge"))
116
+ assert_equal("bar", @map.lookup("hoge"))
117
+ assert_equal("bar", @map["hoge"])
118
+ assert(!@map.has_key("foo"))
119
+ assert_equal("", @map.lookup("foo"))
120
+ assert_equal(nil, @map["foo"])
121
+ assert(@map.key_index.include("hoge"))
122
+ assert(@map.value_index.include("foo"))
123
+ assert_equal(%w(ho hoge hogeshi), @map.keys.sort())
124
+ assert_equal(%w(bar foo foobar), @map.values.sort())
125
+ assert_equal(%w(ho hoge hogeshi), @map.enum_for(:each_key).sort())
126
+ assert_equal(%w(bar foo foobar), @map.enum_for(:each_value).sort())
127
+ assert_equal(
128
+ [["ho", "foo"], ["hoge", "bar"], ["hogeshi", "foobar"]],
129
+ @map.enum_for(:each).sort())
130
+ assert_equal(
131
+ [["ho", "foo"], ["hoge", "bar"], ["hogeshi", "foobar"]],
132
+ @map.enum_for(:each_pair).sort())
133
+ end
134
+
135
+ def test_scan
136
+ str = "hogehogahoyo"
137
+ expected = [["hoge", 0, "bar"], ["ho", 4, "foo"], ["ho", 8, "foo"]]
138
+ assert_equal(expected, @map.scan(str))
139
+ result = []
140
+ @map.scan(str) do |k, i, v|
141
+ result.push([k, i, v])
142
+ end
143
+ assert_equal(expected, result)
144
+ end
145
+
146
+ end
147
+
148
+ class TC_TxMapMultiByte < Test::Unit::TestCase
149
+
150
+ def setup
151
+ @builder = Tx::MapBuilder.new()
152
+ @builder.add("ほ", "ふー")
153
+ @builder.add_all(["ほげ", "ばー", "ほげし", "ふーばー"])
154
+ @tempfile = Tempfile.new("tx_test")
155
+ @builder.build(@tempfile.path)
156
+ @map = Tx::Map.open(@tempfile.path, TEST_ENCODING)
157
+ end
158
+
159
+ def test_encoding
160
+ if RUBY_VERSION >= "1.9.0"
161
+ assert_equal(TEST_ENCODING, @map.lookup("ほげ").encoding)
162
+ assert_equal(TEST_ENCODING, @map.key_index.encoding)
163
+ assert_equal(TEST_ENCODING, @map.value_index.encoding)
164
+ assert_equal(TEST_ENCODING, @map.keys[0].encoding)
165
+ assert_equal(TEST_ENCODING, @map.values[0].encoding)
166
+ end
167
+ end
168
+
169
+ end
metadata ADDED
@@ -0,0 +1,86 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: tx
3
+ version: !ruby/object:Gem::Version
4
+ hash: 21
5
+ prerelease: false
6
+ segments:
7
+ - 0
8
+ - 0
9
+ - 5
10
+ version: 0.0.5
11
+ platform: ruby
12
+ authors:
13
+ - Hiroshi Ichikawa
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2010-09-19 00:00:00 +09:00
19
+ default_executable:
20
+ dependencies: []
21
+
22
+ description: Ruby 1.8/1.9 binding of Tx, a library for a compact trie data structure
23
+ email: gimite+txruby@gmail.com
24
+ executables: []
25
+
26
+ extensions:
27
+ - ext/extconf.rb
28
+ extra_rdoc_files:
29
+ - README.txt
30
+ files:
31
+ - README.txt
32
+ - lib/tx.rb
33
+ - lib/i386-msvcrt/tx_core.so
34
+ - ext/depend
35
+ - ext/tx_swig.h
36
+ - ext/tx.cpp
37
+ - ext/tx_swig.i
38
+ - ext/swig.patch
39
+ - ext/Makefile
40
+ - ext/tx_swig_wrap.cxx
41
+ - ext/tx.hpp
42
+ - ext/tx_swig.cpp
43
+ - ext/extconf.rb
44
+ - ext/ssv.cpp
45
+ - ext/ssv.hpp
46
+ - test/test_tx.rb
47
+ has_rdoc: true
48
+ homepage: http://gimite.net/en/index.php?tx-ruby
49
+ licenses: []
50
+
51
+ post_install_message:
52
+ rdoc_options:
53
+ - --quiet
54
+ - --title
55
+ - tx-ruby Reference
56
+ - --main
57
+ - README.txt
58
+ require_paths:
59
+ - lib
60
+ required_ruby_version: !ruby/object:Gem::Requirement
61
+ none: false
62
+ requirements:
63
+ - - ">="
64
+ - !ruby/object:Gem::Version
65
+ hash: 3
66
+ segments:
67
+ - 0
68
+ version: "0"
69
+ required_rubygems_version: !ruby/object:Gem::Requirement
70
+ none: false
71
+ requirements:
72
+ - - ">="
73
+ - !ruby/object:Gem::Version
74
+ hash: 3
75
+ segments:
76
+ - 0
77
+ version: "0"
78
+ requirements: []
79
+
80
+ rubyforge_project:
81
+ rubygems_version: 1.3.7
82
+ signing_key:
83
+ specification_version: 3
84
+ summary: Ruby 1.8/1.9 binding of Tx, a library for a compact trie data structure
85
+ test_files: []
86
+