distributed-trie 0.8.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1 @@
1
+
data/COPYING ADDED
@@ -0,0 +1,33 @@
1
+ Copyright and condition of use of main portion of the source:
2
+ -----------------------------------------------------------------------------
3
+
4
+ Copyright (c) 2012 Kiyoka Nishiyama <kiyoka@sumibi.org>
5
+
6
+ Redistribution and use in source and binary forms, with or without
7
+ modification, are permitted provided that the following conditions
8
+ are met:
9
+
10
+ 1. Redistributions of source code must retain the above copyright
11
+ notice, this list of conditions and the following disclaimer.
12
+
13
+ 2. Redistributions in binary form must reproduce the above copyright
14
+ notice, this list of conditions and the following disclaimer in the
15
+ documentation and/or other materials provided with the distribution.
16
+
17
+ 3. Neither the name of the authors nor the names of its contributors
18
+ may be used to endorse or promote products derived from this
19
+ software without specific prior written permission.
20
+
21
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
27
+ TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32
+
33
+ -----------------------------------------------------------------------------
@@ -0,0 +1,59 @@
1
+ # What is distributed-trie
2
+
3
+ * distributed-trie is a trie library on key-value store.
4
+ * It is scalable.
5
+ * It supports Tokyo Cabinet / memcached / gdbm / pure hash / Redis / DynamoDB / SimpleDB
6
+
7
+ ## The reason why i developed
8
+ * I need a trie library for Sekka ( japanese input method ).
9
+ * I need a trie library which written in pure Ruby.
10
+ * I need a trie library which can scale out.
11
+
12
+ ## Installing
13
+
14
+ gem install distributed-trie
15
+
16
+ ## Features
17
+ * Add keyword to trie.
18
+ * Delete keyword to trie. ( not implemented... )
19
+ * commonPrefixSearch by keyword.
20
+ * fuzzySearch by jaro winker edit distance.
21
+ * search with user-defined-function.
22
+
23
+ ## Architecture
24
+ * distributed-trie gem only manage trie data structure.
25
+ * You should manage your application data which corresponds to trie key.
26
+
27
+ ![Figure]( http://pix.am/kYLz.png )
28
+
29
+
30
+ ## Sample code
31
+
32
+ require 'distributedtrie'
33
+ require 'distributedtrie/kvs/tokyocabinet'
34
+ kvsTc = DistributedTrie::KvsTc.new( '/tmp/distributed-trie.tch' )
35
+ trie = DistributedTrie::Trie.new( kvsTc, "Sample::" )
36
+ trie.addKey!( "apple" )
37
+ trie.addKey!( "application" )
38
+ trie.addKey!( "orange" )
39
+ trie.commit!
40
+ result = trie.commonPrefixSearch( "app" )
41
+ print result
42
+ # => [ "apple", "application" ]
43
+ result = trie.fuzzySearch( "app", 0.80 )
44
+ print result
45
+ # => [[0.9066666666666667, "apple"], [0.8236914600550963, "application"]]
46
+
47
+ ## Requires
48
+ - Ruby 1.9.1 or higher
49
+ - JRuby 1.6.6 or higher
50
+ - fuzzy-string-match gem
51
+
52
+ ## Author
53
+ - Copyright (C) Kiyoka Nishiyama <kiyoka@sumibi.org>
54
+
55
+ ## See also
56
+ - <http://github.com/kiyoka/distributed-trie>
57
+
58
+ ## License
59
+ - BSD License
@@ -0,0 +1,72 @@
1
+ # -*- mode: ruby; -*-
2
+ # Rakefile for Distributed-Trie
3
+ # Release Engineering
4
+ # 1. edit the VERSION.yml file
5
+ # 2. rake
6
+ # 3. rake gemspec && rake build
7
+ # to generate distributed-trie-x.x.x.gem
8
+ # 4. install distributed-trie-x.x.x.gem to clean environment and test
9
+ # 5. rake release
10
+ # 6. gem push pkg/distributed-trie-x.x.x.gem ( need gem version 1.3.6 or higer. Please "gem update --system" to update )
11
+
12
+ require 'rake'
13
+ begin
14
+ require 'jeweler2'
15
+ Jeweler::Tasks.new do |gemspec|
16
+ gemspec.name = "distributed-trie"
17
+ gemspec.summary = "distributed-trie is a trie library on key-value store."
18
+ gemspec.description = "distributed-trie is a trie library on key-value store."
19
+ gemspec.email = "kiyoka@sumibi.org"
20
+ gemspec.homepage = "http://github.com/kiyoka/distributed-trie"
21
+ gemspec.authors = ["Kiyoka Nishiyama"]
22
+ gemspec.files = FileList[
23
+ 'Rakefile',
24
+ '.gemtest',
25
+ 'VERSION.yml',
26
+ 'README.md',
27
+ 'COPYING',
28
+ 'lib/**/*.rb',
29
+ 'lib/*.png',
30
+ 'test/*'
31
+ ].to_a
32
+ gemspec.add_development_dependency "rake"
33
+ gemspec.add_development_dependency "rspec"
34
+ gemspec.add_dependency( "fuzzy-string-match", ">= 0.9.3" )
35
+ end
36
+ rescue LoadError
37
+ puts 'Jeweler2 not available. If you want to build a gemfile, please install with "sudo gem install jeweler2"'
38
+ end
39
+
40
+ task :default => [:test] do
41
+ end
42
+
43
+ task :test do
44
+ sh "time ruby -I ./lib `which rspec` -b ./test/internal_spec.rb -r ./test/rspec_formatter_for_emacs.rb -f CustomFormatter"
45
+ sh "time ruby -I ./lib `which rspec` -b ./test/usecase_spec.rb -r ./test/rspec_formatter_for_emacs.rb -f CustomFormatter"
46
+ sh "time ruby -I ./lib `which rspec` -b ./test/bigdata_spec.rb -r ./test/rspec_formatter_for_emacs.rb -f CustomFormatter"
47
+ end
48
+
49
+ DATA="aspell.dump.txt"
50
+ #DATA="wlist_match1.txt"
51
+
52
+ task :bench_setup do
53
+ sh "ruby -I ./lib ./benchmark/bench.rb setup ./data/#{DATA}"
54
+ end
55
+
56
+ task :bench do
57
+ # URL http://www.keithv.com/software/wlist/wlist_match1.zip
58
+ sh "ruby -I ./lib ./benchmark/bench.rb main ./data/#{DATA}"
59
+ end
60
+
61
+ task :bench_random do
62
+ sh "ruby -I ./lib ./benchmark/bench.rb random ./data/#{DATA}"
63
+ end
64
+
65
+ task :dumptc do
66
+ sh "bash -c 'tchmgr list -pv /tmp/distributed-trie.tch > /tmp/distributed-trie.txt'"
67
+ end
68
+
69
+ task :data do
70
+ sh "aspell -l en dump master > ./data/aspell.dump.txt"
71
+ end
72
+
@@ -0,0 +1,4 @@
1
+ ---
2
+ :major: 0
3
+ :minor: 8
4
+ :patch: 0
@@ -0,0 +1,38 @@
1
+ #
2
+ # Distributed Trie
3
+ #
4
+ #
5
+ # Copyright (c) 2012 Kiyoka Nishiyama <kiyoka@sumibi.org>
6
+ #
7
+ # Redistribution and use in source and binary forms, with or without
8
+ # modification, are permitted provided that the following conditions
9
+ # are met:
10
+ #
11
+ # 1. Redistributions of source code must retain the above copyright
12
+ # notice, this list of conditions and the following disclaimer.
13
+ #
14
+ # 2. Redistributions in binary form must reproduce the above copyright
15
+ # notice, this list of conditions and the following disclaimer in the
16
+ # documentation and/or other materials provided with the distribution.
17
+ #
18
+ # 3. Neither the name of the authors nor the names of its contributors
19
+ # may be used to endorse or promote products derived from this
20
+ # software without specific prior written permission.
21
+ #
22
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23
+ # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24
+ # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25
+ # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26
+ # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27
+ # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
28
+ # TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
29
+ # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
30
+ # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
31
+ # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
32
+ # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33
+ #
34
+ #
35
+ require 'distributedtrie/kvsif'
36
+ require 'distributedtrie/trie'
37
+
38
+
@@ -0,0 +1,55 @@
1
+ #
2
+ # Distributed Trie / KvsIFs
3
+ #
4
+ #
5
+ # Copyright (c) 2012 Kiyoka Nishiyama <kiyoka@sumibi.org>
6
+ #
7
+ # Redistribution and use in source and binary forms, with or without
8
+ # modification, are permitted provided that the following conditions
9
+ # are met:
10
+ #
11
+ # 1. Redistributions of source code must retain the above copyright
12
+ # notice, this list of conditions and the following disclaimer.
13
+ #
14
+ # 2. Redistributions in binary form must reproduce the above copyright
15
+ # notice, this list of conditions and the following disclaimer in the
16
+ # documentation and/or other materials provided with the distribution.
17
+ #
18
+ # 3. Neither the name of the authors nor the names of its contributors
19
+ # may be used to endorse or promote products derived from this
20
+ # software without specific prior written permission.
21
+ #
22
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23
+ # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24
+ # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25
+ # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26
+ # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27
+ # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
28
+ # TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
29
+ # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
30
+ # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
31
+ # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
32
+ # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33
+ #
34
+ #
35
+ require 'distributedtrie/kvsif'
36
+ module DistributedTrie
37
+
38
+ # pure hash implementation
39
+ class KvsBase < KvsIf
40
+ def put!( key, value, timeout = 0 )
41
+ @db[ key.force_encoding("ASCII-8BIT") ] = value.force_encoding("ASCII-8BIT")
42
+ end
43
+
44
+ def get( key, fallback = false )
45
+ val = @db[ key ]
46
+ if val
47
+ val.force_encoding("UTF-8")
48
+ else
49
+ fallback
50
+ end
51
+ end
52
+
53
+ def enabled?() true end
54
+ end
55
+ end
@@ -0,0 +1,45 @@
1
+ #
2
+ # Distributed Trie / KvsDbm
3
+ #
4
+ #
5
+ # Copyright (c) 2012 Kiyoka Nishiyama <kiyoka@sumibi.org>
6
+ #
7
+ # Redistribution and use in source and binary forms, with or without
8
+ # modification, are permitted provided that the following conditions
9
+ # are met:
10
+ #
11
+ # 1. Redistributions of source code must retain the above copyright
12
+ # notice, this list of conditions and the following disclaimer.
13
+ #
14
+ # 2. Redistributions in binary form must reproduce the above copyright
15
+ # notice, this list of conditions and the following disclaimer in the
16
+ # documentation and/or other materials provided with the distribution.
17
+ #
18
+ # 3. Neither the name of the authors nor the names of its contributors
19
+ # may be used to endorse or promote products derived from this
20
+ # software without specific prior written permission.
21
+ #
22
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23
+ # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24
+ # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25
+ # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26
+ # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27
+ # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
28
+ # TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
29
+ # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
30
+ # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
31
+ # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
32
+ # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33
+ #
34
+ #
35
+ require 'distributedtrie/kvs/base'
36
+ require 'dbm'
37
+ module DistributedTrie
38
+
39
+ # dbm implementation
40
+ class KvsDbm < KvsBase
41
+ def initialize( dbFilename )
42
+ @db = DBM.new( dbFilename )
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,81 @@
1
+ #
2
+ # Distributed Trie / KvsDydb
3
+ #
4
+ #
5
+ # Copyright (c) 2012 Kiyoka Nishiyama <kiyoka@sumibi.org>
6
+ #
7
+ # Redistribution and use in source and binary forms, with or without
8
+ # modification, are permitted provided that the following conditions
9
+ # are met:
10
+ #
11
+ # 1. Redistributions of source code must retain the above copyright
12
+ # notice, this list of conditions and the following disclaimer.
13
+ #
14
+ # 2. Redistributions in binary form must reproduce the above copyright
15
+ # notice, this list of conditions and the following disclaimer in the
16
+ # documentation and/or other materials provided with the distribution.
17
+ #
18
+ # 3. Neither the name of the authors nor the names of its contributors
19
+ # may be used to endorse or promote products derived from this
20
+ # software without specific prior written permission.
21
+ #
22
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23
+ # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24
+ # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25
+ # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26
+ # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27
+ # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
28
+ # TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
29
+ # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
30
+ # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
31
+ # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
32
+ # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33
+ #
34
+ #
35
+ require 'distributedtrie/kvs/base'
36
+ module DistributedTrie
37
+
38
+ # AWS DynamoDB implementation
39
+ begin
40
+ require 'aws-sdk'
41
+ class KvsDydb < KvsBase
42
+ def initialize( tableName )
43
+ printf( "Amazon DynamoDB access_key_id: %s\n", ENV['AMAZON_ACCESS_KEY_ID'])
44
+ printf( "Amazon DynamoDB secret_access_key: %s\n", ENV['AMAZON_SECRET_ACCESS_KEY'])
45
+ @tableName = tableName
46
+ @db = AWS::DynamoDB.new(
47
+ :access_key_id => ENV['AMAZON_ACCESS_KEY_ID'],
48
+ :secret_access_key => ENV['AMAZON_SECRET_ACCESS_KEY'],
49
+ :dynamo_db_endpoint => 'dynamodb.ap-northeast-1.amazonaws.com',
50
+ :use_ssl => false )
51
+ @table = @db.tables[ @tableName ]
52
+ @table.hash_key = [ :key, :string ]
53
+ end
54
+ def put!( key, value, timeout = 0 )
55
+ item = @table.items.create( 'key' => key, 'val' => value.force_encoding("ASCII-8BIT"))
56
+ puts "DynamoDB put: " + key + " , " + value
57
+ end
58
+ def get( key, fallback = false )
59
+ item = @table.items[key]
60
+ val = nil
61
+ item.attributes.each { |name,value |
62
+ val = value if name == "val"
63
+ }
64
+ if val
65
+ puts "DynamoDB get: " + key + "," + val
66
+ val.force_encoding("UTF-8")
67
+ else
68
+ fallback
69
+ end
70
+ end
71
+ def enabled?() true end
72
+
73
+ attr_reader :db
74
+ end
75
+ rescue LoadError
76
+ class KvsDydb < KvsBase
77
+ def initialize( tableName ) end
78
+ def enabled?() false end
79
+ end
80
+ end
81
+ end
@@ -0,0 +1,52 @@
1
+ #
2
+ # Distributed Trie / KvsMemcache
3
+ #
4
+ #
5
+ # Copyright (c) 2012 Kiyoka Nishiyama <kiyoka@sumibi.org>
6
+ #
7
+ # Redistribution and use in source and binary forms, with or without
8
+ # modification, are permitted provided that the following conditions
9
+ # are met:
10
+ #
11
+ # 1. Redistributions of source code must retain the above copyright
12
+ # notice, this list of conditions and the following disclaimer.
13
+ #
14
+ # 2. Redistributions in binary form must reproduce the above copyright
15
+ # notice, this list of conditions and the following disclaimer in the
16
+ # documentation and/or other materials provided with the distribution.
17
+ #
18
+ # 3. Neither the name of the authors nor the names of its contributors
19
+ # may be used to endorse or promote products derived from this
20
+ # software without specific prior written permission.
21
+ #
22
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23
+ # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24
+ # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25
+ # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26
+ # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27
+ # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
28
+ # TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
29
+ # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
30
+ # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
31
+ # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
32
+ # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33
+ #
34
+ #
35
+ require 'distributedtrie/kvs/base'
36
+ require 'memcache'
37
+ module DistributedTrie
38
+
39
+ # Memcache client implementation
40
+ class KvsMemcache < KvsBase
41
+ def initialize( host = "localhost", port = "1978" )
42
+ @db = MemCache.new(
43
+ host + ":" + port,
44
+ :connect_timeout => 1000.0,
45
+ :timeout => 1000.0 )
46
+ end
47
+
48
+ def put!( key, value, timeout = 0 )
49
+ @db.set( key.force_encoding("ASCII-8BIT"), value.force_encoding("ASCII-8BIT"), timeout )
50
+ end
51
+ end
52
+ end