distributed-trie 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1 @@
1
+
data/COPYING ADDED
@@ -0,0 +1,33 @@
1
+ Copyright and condition of use of main portion of the source:
2
+ -----------------------------------------------------------------------------
3
+
4
+ Copyright (c) 2012 Kiyoka Nishiyama <kiyoka@sumibi.org>
5
+
6
+ Redistribution and use in source and binary forms, with or without
7
+ modification, are permitted provided that the following conditions
8
+ are met:
9
+
10
+ 1. Redistributions of source code must retain the above copyright
11
+ notice, this list of conditions and the following disclaimer.
12
+
13
+ 2. Redistributions in binary form must reproduce the above copyright
14
+ notice, this list of conditions and the following disclaimer in the
15
+ documentation and/or other materials provided with the distribution.
16
+
17
+ 3. Neither the name of the authors nor the names of its contributors
18
+ may be used to endorse or promote products derived from this
19
+ software without specific prior written permission.
20
+
21
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
27
+ TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32
+
33
+ -----------------------------------------------------------------------------
@@ -0,0 +1,59 @@
1
+ # What is distributed-trie
2
+
3
+ * distributed-trie is a trie library on key-value store.
4
+ * It is scalable.
5
+ * It supports Tokyo Cabinet / memcached / gdbm / pure hash / Redis / DynamoDB / SimpleDB
6
+
7
+ ## The reason why i developed
8
+ * I need a trie library for Sekka ( japanese input method ).
9
+ * I need a trie library which written in pure Ruby.
10
+ * I need a trie library which can scale out.
11
+
12
+ ## Installing
13
+
14
+ gem install distributed-trie
15
+
16
+ ## Features
17
+ * Add keyword to trie.
18
+ * Delete keyword to trie. ( not implemented... )
19
+ * commonPrefixSearch by keyword.
20
+ * fuzzySearch by jaro winker edit distance.
21
+ * search with user-defined-function.
22
+
23
+ ## Architecture
24
+ * distributed-trie gem only manage trie data structure.
25
+ * You should manage your application data which corresponds to trie key.
26
+
27
+ ![Figure]( http://pix.am/kYLz.png )
28
+
29
+
30
+ ## Sample code
31
+
32
+ require 'distributedtrie'
33
+ require 'distributedtrie/kvs/tokyocabinet'
34
+ kvsTc = DistributedTrie::KvsTc.new( '/tmp/distributed-trie.tch' )
35
+ trie = DistributedTrie::Trie.new( kvsTc, "Sample::" )
36
+ trie.addKey!( "apple" )
37
+ trie.addKey!( "application" )
38
+ trie.addKey!( "orange" )
39
+ trie.commit!
40
+ result = trie.commonPrefixSearch( "app" )
41
+ print result
42
+ # => [ "apple", "application" ]
43
+ result = trie.fuzzySearch( "app", 0.80 )
44
+ print result
45
+ # => [[0.9066666666666667, "apple"], [0.8236914600550963, "application"]]
46
+
47
+ ## Requires
48
+ - Ruby 1.9.1 or higher
49
+ - JRuby 1.6.6 or higher
50
+ - fuzzy-string-match gem
51
+
52
+ ## Author
53
+ - Copyright (C) Kiyoka Nishiyama <kiyoka@sumibi.org>
54
+
55
+ ## See also
56
+ - <http://github.com/kiyoka/distributed-trie>
57
+
58
+ ## License
59
+ - BSD License
@@ -0,0 +1,72 @@
1
+ # -*- mode: ruby; -*-
2
+ # Rakefile for Distributed-Trie
3
+ # Release Engineering
4
+ # 1. edit the VERSION.yml file
5
+ # 2. rake
6
+ # 3. rake gemspec && rake build
7
+ # to generate distributed-trie-x.x.x.gem
8
+ # 4. install distributed-trie-x.x.x.gem to clean environment and test
9
+ # 5. rake release
10
+ # 6. gem push pkg/distributed-trie-x.x.x.gem ( need gem version 1.3.6 or higer. Please "gem update --system" to update )
11
+
12
+ require 'rake'
13
+ begin
14
+ require 'jeweler2'
15
+ Jeweler::Tasks.new do |gemspec|
16
+ gemspec.name = "distributed-trie"
17
+ gemspec.summary = "distributed-trie is a trie library on key-value store."
18
+ gemspec.description = "distributed-trie is a trie library on key-value store."
19
+ gemspec.email = "kiyoka@sumibi.org"
20
+ gemspec.homepage = "http://github.com/kiyoka/distributed-trie"
21
+ gemspec.authors = ["Kiyoka Nishiyama"]
22
+ gemspec.files = FileList[
23
+ 'Rakefile',
24
+ '.gemtest',
25
+ 'VERSION.yml',
26
+ 'README.md',
27
+ 'COPYING',
28
+ 'lib/**/*.rb',
29
+ 'lib/*.png',
30
+ 'test/*'
31
+ ].to_a
32
+ gemspec.add_development_dependency "rake"
33
+ gemspec.add_development_dependency "rspec"
34
+ gemspec.add_dependency( "fuzzy-string-match", ">= 0.9.3" )
35
+ end
36
+ rescue LoadError
37
+ puts 'Jeweler2 not available. If you want to build a gemfile, please install with "sudo gem install jeweler2"'
38
+ end
39
+
40
+ task :default => [:test] do
41
+ end
42
+
43
+ task :test do
44
+ sh "time ruby -I ./lib `which rspec` -b ./test/internal_spec.rb -r ./test/rspec_formatter_for_emacs.rb -f CustomFormatter"
45
+ sh "time ruby -I ./lib `which rspec` -b ./test/usecase_spec.rb -r ./test/rspec_formatter_for_emacs.rb -f CustomFormatter"
46
+ sh "time ruby -I ./lib `which rspec` -b ./test/bigdata_spec.rb -r ./test/rspec_formatter_for_emacs.rb -f CustomFormatter"
47
+ end
48
+
49
+ DATA="aspell.dump.txt"
50
+ #DATA="wlist_match1.txt"
51
+
52
+ task :bench_setup do
53
+ sh "ruby -I ./lib ./benchmark/bench.rb setup ./data/#{DATA}"
54
+ end
55
+
56
+ task :bench do
57
+ # URL http://www.keithv.com/software/wlist/wlist_match1.zip
58
+ sh "ruby -I ./lib ./benchmark/bench.rb main ./data/#{DATA}"
59
+ end
60
+
61
+ task :bench_random do
62
+ sh "ruby -I ./lib ./benchmark/bench.rb random ./data/#{DATA}"
63
+ end
64
+
65
+ task :dumptc do
66
+ sh "bash -c 'tchmgr list -pv /tmp/distributed-trie.tch > /tmp/distributed-trie.txt'"
67
+ end
68
+
69
+ task :data do
70
+ sh "aspell -l en dump master > ./data/aspell.dump.txt"
71
+ end
72
+
@@ -0,0 +1,4 @@
1
+ ---
2
+ :major: 0
3
+ :minor: 8
4
+ :patch: 0
@@ -0,0 +1,38 @@
1
+ #
2
+ # Distributed Trie
3
+ #
4
+ #
5
+ # Copyright (c) 2012 Kiyoka Nishiyama <kiyoka@sumibi.org>
6
+ #
7
+ # Redistribution and use in source and binary forms, with or without
8
+ # modification, are permitted provided that the following conditions
9
+ # are met:
10
+ #
11
+ # 1. Redistributions of source code must retain the above copyright
12
+ # notice, this list of conditions and the following disclaimer.
13
+ #
14
+ # 2. Redistributions in binary form must reproduce the above copyright
15
+ # notice, this list of conditions and the following disclaimer in the
16
+ # documentation and/or other materials provided with the distribution.
17
+ #
18
+ # 3. Neither the name of the authors nor the names of its contributors
19
+ # may be used to endorse or promote products derived from this
20
+ # software without specific prior written permission.
21
+ #
22
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23
+ # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24
+ # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25
+ # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26
+ # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27
+ # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
28
+ # TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
29
+ # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
30
+ # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
31
+ # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
32
+ # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33
+ #
34
+ #
35
+ require 'distributedtrie/kvsif'
36
+ require 'distributedtrie/trie'
37
+
38
+
@@ -0,0 +1,55 @@
1
+ #
2
+ # Distributed Trie / KvsIFs
3
+ #
4
+ #
5
+ # Copyright (c) 2012 Kiyoka Nishiyama <kiyoka@sumibi.org>
6
+ #
7
+ # Redistribution and use in source and binary forms, with or without
8
+ # modification, are permitted provided that the following conditions
9
+ # are met:
10
+ #
11
+ # 1. Redistributions of source code must retain the above copyright
12
+ # notice, this list of conditions and the following disclaimer.
13
+ #
14
+ # 2. Redistributions in binary form must reproduce the above copyright
15
+ # notice, this list of conditions and the following disclaimer in the
16
+ # documentation and/or other materials provided with the distribution.
17
+ #
18
+ # 3. Neither the name of the authors nor the names of its contributors
19
+ # may be used to endorse or promote products derived from this
20
+ # software without specific prior written permission.
21
+ #
22
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23
+ # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24
+ # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25
+ # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26
+ # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27
+ # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
28
+ # TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
29
+ # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
30
+ # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
31
+ # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
32
+ # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33
+ #
34
+ #
35
+ require 'distributedtrie/kvsif'
36
+ module DistributedTrie
37
+
38
+ # pure hash implementation
39
+ class KvsBase < KvsIf
40
+ def put!( key, value, timeout = 0 )
41
+ @db[ key.force_encoding("ASCII-8BIT") ] = value.force_encoding("ASCII-8BIT")
42
+ end
43
+
44
+ def get( key, fallback = false )
45
+ val = @db[ key ]
46
+ if val
47
+ val.force_encoding("UTF-8")
48
+ else
49
+ fallback
50
+ end
51
+ end
52
+
53
+ def enabled?() true end
54
+ end
55
+ end
@@ -0,0 +1,45 @@
1
+ #
2
+ # Distributed Trie / KvsDbm
3
+ #
4
+ #
5
+ # Copyright (c) 2012 Kiyoka Nishiyama <kiyoka@sumibi.org>
6
+ #
7
+ # Redistribution and use in source and binary forms, with or without
8
+ # modification, are permitted provided that the following conditions
9
+ # are met:
10
+ #
11
+ # 1. Redistributions of source code must retain the above copyright
12
+ # notice, this list of conditions and the following disclaimer.
13
+ #
14
+ # 2. Redistributions in binary form must reproduce the above copyright
15
+ # notice, this list of conditions and the following disclaimer in the
16
+ # documentation and/or other materials provided with the distribution.
17
+ #
18
+ # 3. Neither the name of the authors nor the names of its contributors
19
+ # may be used to endorse or promote products derived from this
20
+ # software without specific prior written permission.
21
+ #
22
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23
+ # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24
+ # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25
+ # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26
+ # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27
+ # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
28
+ # TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
29
+ # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
30
+ # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
31
+ # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
32
+ # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33
+ #
34
+ #
35
+ require 'distributedtrie/kvs/base'
36
+ require 'dbm'
37
+ module DistributedTrie
38
+
39
+ # dbm implementation
40
+ class KvsDbm < KvsBase
41
+ def initialize( dbFilename )
42
+ @db = DBM.new( dbFilename )
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,81 @@
1
+ #
2
+ # Distributed Trie / KvsDydb
3
+ #
4
+ #
5
+ # Copyright (c) 2012 Kiyoka Nishiyama <kiyoka@sumibi.org>
6
+ #
7
+ # Redistribution and use in source and binary forms, with or without
8
+ # modification, are permitted provided that the following conditions
9
+ # are met:
10
+ #
11
+ # 1. Redistributions of source code must retain the above copyright
12
+ # notice, this list of conditions and the following disclaimer.
13
+ #
14
+ # 2. Redistributions in binary form must reproduce the above copyright
15
+ # notice, this list of conditions and the following disclaimer in the
16
+ # documentation and/or other materials provided with the distribution.
17
+ #
18
+ # 3. Neither the name of the authors nor the names of its contributors
19
+ # may be used to endorse or promote products derived from this
20
+ # software without specific prior written permission.
21
+ #
22
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23
+ # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24
+ # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25
+ # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26
+ # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27
+ # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
28
+ # TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
29
+ # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
30
+ # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
31
+ # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
32
+ # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33
+ #
34
+ #
35
+ require 'distributedtrie/kvs/base'
36
+ module DistributedTrie
37
+
38
+ # AWS DynamoDB implementation
39
+ begin
40
+ require 'aws-sdk'
41
+ class KvsDydb < KvsBase
42
+ def initialize( tableName )
43
+ printf( "Amazon DynamoDB access_key_id: %s\n", ENV['AMAZON_ACCESS_KEY_ID'])
44
+ printf( "Amazon DynamoDB secret_access_key: %s\n", ENV['AMAZON_SECRET_ACCESS_KEY'])
45
+ @tableName = tableName
46
+ @db = AWS::DynamoDB.new(
47
+ :access_key_id => ENV['AMAZON_ACCESS_KEY_ID'],
48
+ :secret_access_key => ENV['AMAZON_SECRET_ACCESS_KEY'],
49
+ :dynamo_db_endpoint => 'dynamodb.ap-northeast-1.amazonaws.com',
50
+ :use_ssl => false )
51
+ @table = @db.tables[ @tableName ]
52
+ @table.hash_key = [ :key, :string ]
53
+ end
54
+ def put!( key, value, timeout = 0 )
55
+ item = @table.items.create( 'key' => key, 'val' => value.force_encoding("ASCII-8BIT"))
56
+ puts "DynamoDB put: " + key + " , " + value
57
+ end
58
+ def get( key, fallback = false )
59
+ item = @table.items[key]
60
+ val = nil
61
+ item.attributes.each { |name,value |
62
+ val = value if name == "val"
63
+ }
64
+ if val
65
+ puts "DynamoDB get: " + key + "," + val
66
+ val.force_encoding("UTF-8")
67
+ else
68
+ fallback
69
+ end
70
+ end
71
+ def enabled?() true end
72
+
73
+ attr_reader :db
74
+ end
75
+ rescue LoadError
76
+ class KvsDydb < KvsBase
77
+ def initialize( tableName ) end
78
+ def enabled?() false end
79
+ end
80
+ end
81
+ end
@@ -0,0 +1,52 @@
1
+ #
2
+ # Distributed Trie / KvsMemcache
3
+ #
4
+ #
5
+ # Copyright (c) 2012 Kiyoka Nishiyama <kiyoka@sumibi.org>
6
+ #
7
+ # Redistribution and use in source and binary forms, with or without
8
+ # modification, are permitted provided that the following conditions
9
+ # are met:
10
+ #
11
+ # 1. Redistributions of source code must retain the above copyright
12
+ # notice, this list of conditions and the following disclaimer.
13
+ #
14
+ # 2. Redistributions in binary form must reproduce the above copyright
15
+ # notice, this list of conditions and the following disclaimer in the
16
+ # documentation and/or other materials provided with the distribution.
17
+ #
18
+ # 3. Neither the name of the authors nor the names of its contributors
19
+ # may be used to endorse or promote products derived from this
20
+ # software without specific prior written permission.
21
+ #
22
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23
+ # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24
+ # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25
+ # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26
+ # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27
+ # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
28
+ # TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
29
+ # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
30
+ # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
31
+ # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
32
+ # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33
+ #
34
+ #
35
+ require 'distributedtrie/kvs/base'
36
+ require 'memcache'
37
+ module DistributedTrie
38
+
39
+ # Memcache client implementation
40
+ class KvsMemcache < KvsBase
41
+ def initialize( host = "localhost", port = "1978" )
42
+ @db = MemCache.new(
43
+ host + ":" + port,
44
+ :connect_timeout => 1000.0,
45
+ :timeout => 1000.0 )
46
+ end
47
+
48
+ def put!( key, value, timeout = 0 )
49
+ @db.set( key.force_encoding("ASCII-8BIT"), value.force_encoding("ASCII-8BIT"), timeout )
50
+ end
51
+ end
52
+ end