tx 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,219 @@
1
+ require "tx_core"
2
+ require "forwardable"
3
+
4
+
5
+ module Tx #:nodoc: all
6
+
7
+ module Util
8
+
9
+ module_function
10
+
11
+ # Defines wrapper methods which perform boundary checking of pos and len.
12
+ def def_wrapper_methods(*methods)
13
+ methods.each() do |name|
14
+ define_method(name) do |*args|
15
+ (str, pos, len, *opt) = args
16
+ raise(ArgumentError, "argument pos is negative") if pos && pos < 0
17
+ str_len = bytesize(str)
18
+ pos ||= 0
19
+ pos = str_len if pos > str_len
20
+ len = str_len - pos if !len || len < 0 || len > str_len - pos
21
+ add_encoding(@unsafe.__send__(name, str, pos, len, *opt))
22
+ end
23
+ end
24
+ end
25
+
26
+ if RUBY_VERSION >= "1.9.0"
27
+
28
+ def default_encoding
29
+ return Encoding.default_internal || Encoding::UTF_8
30
+ end
31
+
32
+ def add_encoding(obj)
33
+ case obj
34
+ when Array
35
+ obj.each(){ |e| add_encoding(e) }
36
+ when String
37
+ obj.force_encoding(@encoding)
38
+ end
39
+ return obj
40
+ end
41
+
42
+ def to_binary(str)
43
+ return str.dup().force_encoding(Encoding::ASCII_8BIT)
44
+ end
45
+
46
+ def bytesize(str)
47
+ return str.bytesize
48
+ end
49
+
50
+ else
51
+
52
+ def default_encoding
53
+ return nil
54
+ end
55
+
56
+ def add_encoding(obj)
57
+ return obj
58
+ end
59
+
60
+ def to_binary(str)
61
+ return str
62
+ end
63
+
64
+ def bytesize(str)
65
+ return str.length
66
+ end
67
+
68
+ end
69
+
70
+ end
71
+
72
+ # Wrapper of UnsafeIndex. Boundary checking of pos/len and some methods are added.
73
+ class Index
74
+
75
+ extend(Forwardable)
76
+ extend(Util)
77
+ include(Util)
78
+ include(Enumerable)
79
+
80
+ class << self
81
+ alias open new
82
+ end
83
+
84
+ def initialize(arg, encoding = nil)
85
+ if arg.is_a?(UnsafeIndex)
86
+ @unsafe = arg
87
+ else
88
+ @unsafe = UnsafeIndex.new()
89
+ if !@unsafe.open(arg)
90
+ raise(IOError, "failed to open #{arg}")
91
+ end
92
+ end
93
+ @encoding = encoding || default_encoding()
94
+ end
95
+
96
+ attr_reader(:encoding)
97
+ def_delegators(:@unsafe, :num_keys, :result_log, :error_log)
98
+ def_wrapper_methods(:longest_prefix, :include, :search_prefixes, :search_expansions)
99
+ alias common_prefix_search search_prefixes
100
+ alias predictive_search search_expansions
101
+ alias include? include
102
+ alias size num_keys
103
+
104
+ def inspect()
105
+ return "\#<%p:0x%x>" % [self.class, self.object_id]
106
+ end
107
+
108
+ def to_a()
109
+ return search_expansions("")
110
+ end
111
+
112
+ def each(&block)
113
+ to_a().each(&block)
114
+ end
115
+
116
+ def scan(str, &block)
117
+ bstr = to_binary(str)
118
+ result = []
119
+ pos = 0
120
+ while pos < bytesize(str)
121
+ plen = longest_prefix(str, pos)
122
+ if plen >= 0
123
+ args = [add_encoding(bstr[pos, plen]), pos]
124
+ block ? yield(*args) : result.push(args)
125
+ end
126
+ pos += plen > 0 ? plen : 1
127
+ end
128
+ return block ? str : result
129
+ end
130
+
131
+ def gsub(str, &block)
132
+ bstr = to_binary(str)
133
+ result = add_encoding("")
134
+ prev_pos = 0
135
+ scan(str) do |match, pos|
136
+ result << add_encoding(bstr[prev_pos...pos])
137
+ result << yield(match, pos)
138
+ prev_pos = pos + bytesize(match)
139
+ end
140
+ result << add_encoding(bstr[prev_pos..-1])
141
+ return result
142
+ end
143
+
144
+ end
145
+
146
+ # Wrapper of UnsafeMap. Boundary checking of pos/len and some methods are added.
147
+ class Map
148
+
149
+ extend(Forwardable)
150
+ extend(Util)
151
+ include(Util)
152
+ include(Enumerable)
153
+
154
+ class << self
155
+ alias open new
156
+ end
157
+
158
+ def initialize(file_pefix, encoding = nil)
159
+ @unsafe = UnsafeMap.new()
160
+ if !@unsafe.open(file_pefix)
161
+ raise(IOError, "failed to open #{file_pefix}.key, #{file_pefix}.val or #{file_pefix}.map")
162
+ end
163
+ @encoding = encoding || default_encoding()
164
+ @key_index = Index.new(@unsafe.key_index, @encoding)
165
+ @value_index = Index.new(@unsafe.value_index, @encoding)
166
+ end
167
+
168
+ attr_reader(:key_index, :value_index, :encoding)
169
+ def_wrapper_methods(:has_key, :lookup)
170
+ alias has_key? has_key
171
+
172
+ def inspect()
173
+ return "\#<%p:0x%x>" % [self.class, self.object_id]
174
+ end
175
+
176
+ def [](str, pos = 0, len = -1)
177
+ return has_key(str, pos, len) ? lookup(str, pos, len) : nil
178
+ end
179
+
180
+ def size
181
+ return self.keys.sizse
182
+ end
183
+
184
+ def keys
185
+ return @key_index.search_expansions("")
186
+ end
187
+
188
+ def values
189
+ return self.keys.map(){ |k| lookup(k) }
190
+ end
191
+
192
+ def each_key(&block)
193
+ return self.keys.each(&block)
194
+ end
195
+
196
+ def each_value(&block)
197
+ return self.values.each(&block)
198
+ end
199
+
200
+ def each(&block)
201
+ each_key(){ |k| yield([k, lookup(k)]) }
202
+ end
203
+
204
+ def each_pair(&block)
205
+ each_key(){ |k| yield(k, lookup(k)) }
206
+ end
207
+
208
+ def scan(str, &block)
209
+ result = []
210
+ @key_index.scan(str) do |key, pos|
211
+ args = [key, pos, lookup(key)]
212
+ block ? yield(*args) : result.push(args)
213
+ end
214
+ return block ? str : result
215
+ end
216
+
217
+ end
218
+
219
+ end
@@ -0,0 +1,169 @@
1
+ # -*- encoding: UTF-8 -*-
2
+
3
+ $KCODE = "u"
4
+ $LOAD_PATH.unshift("./lib", "./ext")
5
+ require "test/unit"
6
+ require "enumerator"
7
+ require "tempfile"
8
+ require "tx"
9
+
10
+ TEST_ENCODING = RUBY_VERSION >= "1.9.0" ? Encoding::UTF_8 : nil
11
+
12
+ class TC_TxIndex < Test::Unit::TestCase
13
+
14
+ def setup
15
+ @builder = Tx::Builder.new()
16
+ @builder.add_all(%w(foo ho hog hoga hoge hogeshi))
17
+ @tempfile = Tempfile.new("tx_test")
18
+ @builder.build(@tempfile.path)
19
+ @index = Tx::Index.open(@tempfile.path, TEST_ENCODING)
20
+ end
21
+
22
+ def test_basic
23
+ assert_equal(6, @index.num_keys)
24
+ assert_equal(4, @index.longest_prefix("hogeshaa"))
25
+ assert_equal(6, @index.longest_prefix("hogeshaa", 0, -1, true))
26
+ assert(@index.include("hoge"))
27
+ assert(!@index.include("hogera"))
28
+ assert_equal(%w(ho hog), @index.search_prefixes("hog"))
29
+ assert_equal(%w(ho hog), @index.search_prefixes("aahog", 2, 5))
30
+ assert_equal(%w(hog hoga hoge hogeshi), @index.search_expansions("hog").sort())
31
+ assert_equal(%w(hog hoga hoge hogeshi), @index.search_expansions("aahogeshi", 2, 3).sort())
32
+ assert_equal(%w(foo ho hog hoga hoge hogeshi), @index.to_a().sort())
33
+ assert_equal(%w(foo ho hog hoga hoge hogeshi), @index.enum_for(:each).to_a().sort())
34
+ end
35
+
36
+ def test_scan
37
+ str = "hohogefugahogaboke"
38
+ expected = [["ho", 0], ["hoge", 2], ["hoga", 10]]
39
+ assert_equal(expected, @index.scan(str))
40
+ result = []
41
+ @index.scan(str) do |s, i|
42
+ result.push([s, i])
43
+ end
44
+ assert_equal(expected, result)
45
+ end
46
+
47
+ def test_gsub
48
+ result = @index.gsub("hohogefugahogaboke"){ |s, i| s.upcase }
49
+ assert_equal("HOHOGEfugaHOGAboke", result)
50
+ end
51
+
52
+ def test_open
53
+ assert_raise(IOError) do
54
+ Tx::Index.new("noexist.index")
55
+ end
56
+ end
57
+
58
+ def test_no_error_log
59
+ assert_equal("", @builder.error_log)
60
+ assert_equal("", @index.error_log)
61
+ end
62
+
63
+ end
64
+
65
+ class TC_TxIndexMultiByte < Test::Unit::TestCase
66
+
67
+ def setup
68
+ @builder = Tx::Builder.new()
69
+ @builder.add_all(%w(ふー ほ ほが ほげ ほげし))
70
+ @tempfile = Tempfile.new("tx_test")
71
+ @builder.build(@tempfile.path)
72
+ @index = Tx::Index.open(@tempfile.path, TEST_ENCODING)
73
+ end
74
+
75
+ def test_encoding
76
+ if RUBY_VERSION >= "1.9.0"
77
+ assert_equal(TEST_ENCODING, @index.search_prefixes("ほが")[0].encoding)
78
+ assert_equal(TEST_ENCODING, @index.search_expansions("ほが")[0].encoding)
79
+ assert_equal(TEST_ENCODING, @index.to_a()[0].encoding)
80
+ end
81
+ end
82
+
83
+ def test_scan
84
+ str = "ほほげふがほがぼけ"
85
+ expected = [["ほ", 0], ["ほげ", 3], ["ほが", 15]]
86
+ assert_equal(expected, @index.scan(str))
87
+ result = []
88
+ @index.scan(str) do |s, i|
89
+ result.push([s, i])
90
+ end
91
+ assert_equal(expected, result)
92
+ end
93
+
94
+ def test_gsub
95
+ result = @index.gsub("ほほげふがほがぼけ") do |s, i|
96
+ s.gsub(/ほ/, "ホ").gsub(/が/, "ガ").gsub(/げ/, "ゲ")
97
+ end
98
+ assert_equal("ホホゲふがホガぼけ", result)
99
+ end
100
+
101
+ end
102
+
103
+ class TC_TxMap < Test::Unit::TestCase
104
+
105
+ def setup
106
+ @builder = Tx::MapBuilder.new()
107
+ @builder.add("ho", "foo")
108
+ @builder.add_all(["hoge", "bar", "hogeshi", "foobar"])
109
+ @tempfile = Tempfile.new("tx_test")
110
+ @builder.build(@tempfile.path)
111
+ @map = Tx::Map.open(@tempfile.path, TEST_ENCODING)
112
+ end
113
+
114
+ def test_basic
115
+ assert(@map.has_key("hoge"))
116
+ assert_equal("bar", @map.lookup("hoge"))
117
+ assert_equal("bar", @map["hoge"])
118
+ assert(!@map.has_key("foo"))
119
+ assert_equal("", @map.lookup("foo"))
120
+ assert_equal(nil, @map["foo"])
121
+ assert(@map.key_index.include("hoge"))
122
+ assert(@map.value_index.include("foo"))
123
+ assert_equal(%w(ho hoge hogeshi), @map.keys.sort())
124
+ assert_equal(%w(bar foo foobar), @map.values.sort())
125
+ assert_equal(%w(ho hoge hogeshi), @map.enum_for(:each_key).sort())
126
+ assert_equal(%w(bar foo foobar), @map.enum_for(:each_value).sort())
127
+ assert_equal(
128
+ [["ho", "foo"], ["hoge", "bar"], ["hogeshi", "foobar"]],
129
+ @map.enum_for(:each).sort())
130
+ assert_equal(
131
+ [["ho", "foo"], ["hoge", "bar"], ["hogeshi", "foobar"]],
132
+ @map.enum_for(:each_pair).sort())
133
+ end
134
+
135
+ def test_scan
136
+ str = "hogehogahoyo"
137
+ expected = [["hoge", 0, "bar"], ["ho", 4, "foo"], ["ho", 8, "foo"]]
138
+ assert_equal(expected, @map.scan(str))
139
+ result = []
140
+ @map.scan(str) do |k, i, v|
141
+ result.push([k, i, v])
142
+ end
143
+ assert_equal(expected, result)
144
+ end
145
+
146
+ end
147
+
148
+ class TC_TxMapMultiByte < Test::Unit::TestCase
149
+
150
+ def setup
151
+ @builder = Tx::MapBuilder.new()
152
+ @builder.add("ほ", "ふー")
153
+ @builder.add_all(["ほげ", "ばー", "ほげし", "ふーばー"])
154
+ @tempfile = Tempfile.new("tx_test")
155
+ @builder.build(@tempfile.path)
156
+ @map = Tx::Map.open(@tempfile.path, TEST_ENCODING)
157
+ end
158
+
159
+ def test_encoding
160
+ if RUBY_VERSION >= "1.9.0"
161
+ assert_equal(TEST_ENCODING, @map.lookup("ほげ").encoding)
162
+ assert_equal(TEST_ENCODING, @map.key_index.encoding)
163
+ assert_equal(TEST_ENCODING, @map.value_index.encoding)
164
+ assert_equal(TEST_ENCODING, @map.keys[0].encoding)
165
+ assert_equal(TEST_ENCODING, @map.values[0].encoding)
166
+ end
167
+ end
168
+
169
+ end
metadata ADDED
@@ -0,0 +1,86 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: tx
3
+ version: !ruby/object:Gem::Version
4
+ hash: 21
5
+ prerelease: false
6
+ segments:
7
+ - 0
8
+ - 0
9
+ - 5
10
+ version: 0.0.5
11
+ platform: ruby
12
+ authors:
13
+ - Hiroshi Ichikawa
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2010-09-19 00:00:00 +09:00
19
+ default_executable:
20
+ dependencies: []
21
+
22
+ description: Ruby 1.8/1.9 binding of Tx, a library for a compact trie data structure
23
+ email: gimite+txruby@gmail.com
24
+ executables: []
25
+
26
+ extensions:
27
+ - ext/extconf.rb
28
+ extra_rdoc_files:
29
+ - README.txt
30
+ files:
31
+ - README.txt
32
+ - lib/tx.rb
33
+ - lib/i386-msvcrt/tx_core.so
34
+ - ext/depend
35
+ - ext/tx_swig.h
36
+ - ext/tx.cpp
37
+ - ext/tx_swig.i
38
+ - ext/swig.patch
39
+ - ext/Makefile
40
+ - ext/tx_swig_wrap.cxx
41
+ - ext/tx.hpp
42
+ - ext/tx_swig.cpp
43
+ - ext/extconf.rb
44
+ - ext/ssv.cpp
45
+ - ext/ssv.hpp
46
+ - test/test_tx.rb
47
+ has_rdoc: true
48
+ homepage: http://gimite.net/en/index.php?tx-ruby
49
+ licenses: []
50
+
51
+ post_install_message:
52
+ rdoc_options:
53
+ - --quiet
54
+ - --title
55
+ - tx-ruby Reference
56
+ - --main
57
+ - README.txt
58
+ require_paths:
59
+ - lib
60
+ required_ruby_version: !ruby/object:Gem::Requirement
61
+ none: false
62
+ requirements:
63
+ - - ">="
64
+ - !ruby/object:Gem::Version
65
+ hash: 3
66
+ segments:
67
+ - 0
68
+ version: "0"
69
+ required_rubygems_version: !ruby/object:Gem::Requirement
70
+ none: false
71
+ requirements:
72
+ - - ">="
73
+ - !ruby/object:Gem::Version
74
+ hash: 3
75
+ segments:
76
+ - 0
77
+ version: "0"
78
+ requirements: []
79
+
80
+ rubyforge_project:
81
+ rubygems_version: 1.3.7
82
+ signing_key:
83
+ specification_version: 3
84
+ summary: Ruby 1.8/1.9 binding of Tx, a library for a compact trie data structure
85
+ test_files: []
86
+