fuzzzy 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,99 @@
1
+ require 'pp'
2
+ module Fuzzzy
3
+ module Server
4
+ class HTTP < Grape::API
5
+ class ParamsError < StandardError
6
+ end
7
+
8
+ format :json
9
+ default_format :json
10
+ error_format :json
11
+ version 'v1', :using => :path
12
+
13
+ rescue_from :all do |e|
14
+ rack_response({:error => e.class.name, :message => e.message}.to_json)
15
+ end
16
+
17
+ helpers do
18
+ include Index
19
+
20
+ def search
21
+ context = search_context
22
+ check_context!(:query, context)
23
+ context[:distance] = context[:distance].to_i if context[:distance]
24
+ _searcher(context[:index_method]).search(context)
25
+ end
26
+
27
+ def check_context! *keys
28
+ context = keys.pop
29
+ ([:index_name, :index_method] + keys).each do |key|
30
+ raise ParamsError.new("Parameter :#{key} not found") if context[key].nil?
31
+ end
32
+ end
33
+
34
+ def construct_context *keys
35
+ context = {}
36
+ ([:index_name, :index_method] + keys).each do |key|
37
+ context[key] = params[key] if params[key]
38
+ end
39
+ context
40
+ end
41
+
42
+ def index_context
43
+ construct_context(:id, :dictionary_string)
44
+ end
45
+
46
+ def search_context
47
+ construct_context(:query, :distance, :sort_by, :with_cache)
48
+ end
49
+ end
50
+
51
+ namespace :info do
52
+ http_basic do |u, p|
53
+ u == 'admin' && p == 'password'
54
+ end
55
+
56
+ get do
57
+ info = {
58
+ :ruby => RUBY_VERSION,
59
+ :environment => Fuzzzy.env,
60
+ :redis => Fuzzzy.redis.client.id,
61
+ :root_dir => Fuzzzy.root.to_s
62
+ }
63
+ info[:stopwords] = Fuzzzy.stopwords if params[:show_stopwords]
64
+ info
65
+ end
66
+
67
+ get 'indexes' do
68
+ indexes_info = {
69
+ :redis_size => Fuzzzy.redis.info['used_memory_human'],
70
+ :indexes => Fuzzzy.redis.hgetall(Fuzzzy::Redis.counter_key)
71
+ }
72
+ end
73
+ end
74
+
75
+ resource :indexes do
76
+ # curl /v1/indexes?index_name=city:name&index_method=ngram&query=search%20string
77
+ get do
78
+ search
79
+ end
80
+
81
+ post '/search' do
82
+ search
83
+ end
84
+
85
+ post do
86
+ context = index_context
87
+ check_context!(:id, :dictionary_string, context)
88
+ _indexer(context[:index_method]).create_index(context)
89
+ end
90
+
91
+ delete do
92
+ context = index_context
93
+ check_context!(:id, context)
94
+ _indexer(context[:index_method]).delete_index(context)
95
+ end
96
+ end
97
+ end
98
+ end
99
+ end
@@ -0,0 +1,3 @@
1
+ module Fuzzzy
2
+ VERSION = '0.0.1'
3
+ end
@@ -0,0 +1,7 @@
1
+ development:
2
+ host: localhost
3
+ database: moneybot_dev
4
+
5
+ test:
6
+ host: localhost
7
+ database: moneybot_test
@@ -0,0 +1,8 @@
1
+ class City
2
+ include Mongoid::Document
3
+ include Fuzzzy::Mongoid::Index
4
+
5
+ field :name, :type => String
6
+ field :country, :type => String
7
+
8
+ end
@@ -0,0 +1,9 @@
1
+ class IndexedCity
2
+ include Mongoid::Document
3
+ include Fuzzzy::Mongoid::Index
4
+
5
+ field :name, :type => String
6
+ field :country, :type => String
7
+
8
+ define_fuzzzy_index :name
9
+ end
@@ -0,0 +1,142 @@
1
+ require 'spec_helper'
2
+
3
+ describe Fuzzzy::Ngram::Indexer do
4
+ let(:indexer){Fuzzzy::Ngram::Indexer.new}
5
+ let(:context){{
6
+ :index_name => 'city:name',
7
+ :method => :ngram,
8
+ :dictionary_string => dictionary_string,
9
+ :id => id
10
+ }}
11
+ let(:dictionary_string){'moscow'}
12
+ let(:id){'12345'}
13
+ let(:counter_key){Fuzzzy::Redis.counter_key}
14
+
15
+ before do
16
+ keys = Fuzzzy.redis.keys("*")
17
+ Fuzzzy.redis.del(*keys) if keys.length > 0
18
+ end
19
+
20
+ describe '#ngrams' do
21
+ specify do
22
+ indexer.with_context(context) do
23
+ indexer.ngrams('mo').should == ['mo']
24
+ end
25
+ end
26
+ specify do
27
+ indexer.with_context(context) do
28
+ indexer.ngrams('mos').should == ['mos']
29
+ end
30
+ end
31
+ specify do
32
+ indexer.with_context(context) do
33
+ indexer.ngrams.should == ['mos', 'osc', 'sco', 'cow']
34
+ end
35
+ end
36
+ end
37
+
38
+ describe '#create_index' do
39
+ let(:keys){Fuzzzy.redis.keys}
40
+ let(:dictionary_keys){["fuzzzy:city:name:dictionary:#{id}"]}
41
+
42
+ before do
43
+ indexer.create_index(context)
44
+ end
45
+
46
+ specify{keys.size.should == 6}
47
+ specify do
48
+ keys.should =~ [
49
+ 'fuzzzy:city:name:ngram_i:mos:0',
50
+ 'fuzzzy:city:name:ngram_i:osc:1',
51
+ 'fuzzzy:city:name:ngram_i:sco:2',
52
+ 'fuzzzy:city:name:ngram_i:cow:3',
53
+ ] + dictionary_keys + [counter_key]
54
+ end
55
+ specify do
56
+ Fuzzzy.redis.mget(*dictionary_keys).should == ['moscow']
57
+ end
58
+ specify do
59
+ Fuzzzy.redis.sunion(*(keys - dictionary_keys - [counter_key])).should == [id]
60
+ end
61
+ specify{Fuzzzy.redis.hgetall(counter_key).should == {'city:name' => '1'}}
62
+
63
+ context 'with empty string' do
64
+ let(:dictionary_string){''}
65
+
66
+ specify{keys.size.should == 0}
67
+ end
68
+
69
+ context 'with nulled string' do
70
+ let(:dictionary_string){nil}
71
+
72
+ specify{keys.size.should == 0}
73
+ end
74
+
75
+ context 'with multiple calls' do
76
+ let(:another_id){'11111'}
77
+ let(:dictionary_keys){[
78
+ "fuzzzy:city:name:dictionary:#{id}",
79
+ "fuzzzy:city:name:dictionary:#{another_id}"
80
+ ]}
81
+
82
+ before do
83
+ indexer.create_index(context.merge(
84
+ :dictionary_string => 'Mostyn',
85
+ :id => another_id
86
+ ))
87
+ end
88
+
89
+ specify{keys.size.should == 10}
90
+ specify do
91
+ keys.should =~ [
92
+ 'fuzzzy:city:name:ngram_i:mos:0',
93
+ 'fuzzzy:city:name:ngram_i:osc:1',
94
+ 'fuzzzy:city:name:ngram_i:sco:2',
95
+ 'fuzzzy:city:name:ngram_i:cow:3',
96
+ 'fuzzzy:city:name:ngram_i:ost:1',
97
+ 'fuzzzy:city:name:ngram_i:sty:2',
98
+ 'fuzzzy:city:name:ngram_i:tyn:3',
99
+ ] + dictionary_keys + [counter_key]
100
+ end
101
+ specify do
102
+ Fuzzzy.redis.mget(*dictionary_keys).should == [dictionary_string, 'mostyn']
103
+ end
104
+ specify do
105
+ Fuzzzy.redis.sunion(*(keys - dictionary_keys - [counter_key])).should =~ [id, another_id]
106
+ end
107
+ specify{Fuzzzy.redis.hgetall(counter_key).should == {'city:name' => '2'}}
108
+ end
109
+ end
110
+
111
+ describe '#delete_index' do
112
+ let(:keys){Fuzzzy.redis.keys}
113
+ let(:another_id){'11111'}
114
+ let(:dictionary_keys){["fuzzzy:city:name:dictionary:#{another_id}"]}
115
+
116
+ before do
117
+ indexer.create_index(context)
118
+ indexer.create_index(context.merge(
119
+ :dictionary_string => 'Mostyn',
120
+ :id => another_id
121
+ ))
122
+ indexer.delete_index(context)
123
+ end
124
+
125
+ specify{keys.size.should == 6}
126
+ specify do
127
+ keys.should =~ [
128
+ 'fuzzzy:city:name:ngram_i:mos:0',
129
+ 'fuzzzy:city:name:ngram_i:ost:1',
130
+ 'fuzzzy:city:name:ngram_i:sty:2',
131
+ 'fuzzzy:city:name:ngram_i:tyn:3',
132
+ ] + dictionary_keys + [counter_key]
133
+ end
134
+ specify do
135
+ Fuzzzy.redis.mget(*dictionary_keys).should == ['mostyn']
136
+ end
137
+ specify do
138
+ Fuzzzy.redis.sunion(*(keys - dictionary_keys - [counter_key])).should =~ [another_id]
139
+ end
140
+ specify{Fuzzzy.redis.hgetall(counter_key).should == {'city:name' => '1'}}
141
+ end
142
+ end
@@ -0,0 +1,194 @@
1
+ require 'spec_helper'
2
+
3
+ describe Fuzzzy::Ngram::Searcher do
4
+ let(:indexer){Fuzzzy::Ngram::Indexer.new}
5
+ let(:index_context){{
6
+ :index_name => 'city:name',
7
+ :method => :ngram
8
+ }}
9
+ let(:searcher){Fuzzzy::Ngram::Searcher.new}
10
+ let(:context){index_context.merge(:query => query_string, :distance => 1)}
11
+
12
+ before do
13
+ keys = Fuzzzy.redis.keys("*")
14
+ Fuzzzy.redis.del(*keys) if keys.length > 0
15
+ end
16
+
17
+ describe '#segment_points' do
18
+ context 'when distance = 0' do
19
+ let(:result){[]}
20
+ let(:sample){[
21
+ [0], # mos
22
+ [1], # osk
23
+ [2], # sko
24
+ [3], # kow
25
+ ]}
26
+ before do
27
+ searcher.with_context(:distance => 0, :query => 'moscow') do
28
+ searcher.segment_points(index) do |i|
29
+ result << i
30
+ end
31
+ end
32
+ end
33
+
34
+ (0...4).each do |idx|
35
+ context "and index = #{idx}" do
36
+ let(:index){idx}
37
+ specify{result.should == sample[index]}
38
+ end
39
+ end
40
+ end
41
+
42
+ context 'when distance = 1' do
43
+ let(:result){[]}
44
+ let(:sample){[
45
+ [0, 1], # mos
46
+ [0, 1, 2], # osk
47
+ [1, 2, 3], # sko
48
+ [2, 3, 4], # kow
49
+ ]}
50
+ before do
51
+ searcher.with_context(:distance => 1, :query => 'moscow') do
52
+ searcher.segment_points(index) do |i|
53
+ result << i
54
+ end
55
+ end
56
+ end
57
+
58
+ (0...4).each do |idx|
59
+ context "and index = #{idx}" do
60
+ let(:index){idx}
61
+ specify{result.should == sample[index]}
62
+ end
63
+ end
64
+ end
65
+
66
+ context 'when distance = 3' do
67
+ let(:result){[]}
68
+ let(:sample){[
69
+ [0, 1, 2, 3], # mos
70
+ [0, 1, 2, 3, 4], # osk
71
+ [0, 1, 2, 3, 4, 5], # sko
72
+ [0, 1, 2, 3, 4, 5, 6] # kow
73
+ ]}
74
+ before do
75
+ searcher.with_context(:distance => 3, :query => 'moscow') do
76
+ searcher.segment_points(index) do |i|
77
+ result << i
78
+ end
79
+ end
80
+ end
81
+
82
+ (0...4).each do |idx|
83
+ context "and index = #{idx}" do
84
+ let(:index){idx}
85
+ specify{result.should == sample[index]}
86
+ end
87
+ end
88
+ end
89
+
90
+ context 'when distance = 3 and long word' do
91
+ let(:result){[]}
92
+ let(:sample){[
93
+ [0, 1, 2, 3], # lev
94
+ [0, 1, 2, 3, 4], # eve
95
+ [0, 1, 2, 3, 4, 5], # ven
96
+ [0, 1, 2, 3, 4, 5, 6], # ens
97
+ [1, 2, 3, 4, 5, 6, 7], # nsh
98
+ [2, 3, 4, 5, 6, 7, 8], # sht
99
+ [3, 4, 5, 6, 7, 8, 9], # hte
100
+ [4, 5, 6, 7, 8, 9, 10], # tei
101
+ [5, 6, 7, 8, 9, 10, 11] # ein
102
+ ]}
103
+ before do
104
+ searcher.with_context(:distance => 3, :query => 'levenshtein') do
105
+ searcher.segment_points(index) do |i|
106
+ result << i
107
+ end
108
+ end
109
+ end
110
+
111
+ (0...9).each do |idx|
112
+ context "and index = #{idx}" do
113
+ let(:index){idx}
114
+ specify{result.should == sample[index]}
115
+ end
116
+ end
117
+ end
118
+ end
119
+
120
+ describe '#index_keys' do
121
+ let(:query_string){'mascow'}
122
+ specify do
123
+ searcher.with_context(context) do
124
+ searcher.index_keys.should =~ [
125
+ searcher.index_key('mas', 0),
126
+ searcher.index_key('mas', 1),
127
+ searcher.index_key('asc', 0),
128
+ searcher.index_key('asc', 1),
129
+ searcher.index_key('asc', 2),
130
+ searcher.index_key('sco', 1),
131
+ searcher.index_key('sco', 2),
132
+ searcher.index_key('sco', 3),
133
+ searcher.index_key('cow', 2),
134
+ searcher.index_key('cow', 3),
135
+ searcher.index_key('cow', 4)
136
+ ]
137
+ end
138
+ end
139
+ end
140
+
141
+ describe '#search' do
142
+ context 'single word - #1' do
143
+ before do
144
+ indexer.create_index(index_context.merge(
145
+ :dictionary_string => dictionary_string,
146
+ :id => id
147
+ ))
148
+ end
149
+
150
+ let(:query_string){'mascow'}
151
+ let(:dictionary_string){'moscow'}
152
+ let(:id){'12345'}
153
+
154
+ specify{searcher.search(context).should == [id]}
155
+ end
156
+
157
+ context 'single word - #2' do
158
+ before do
159
+ indexer.create_index(index_context.merge(
160
+ :dictionary_string => dictionary_string,
161
+ :id => id
162
+ ))
163
+ end
164
+
165
+ let(:query_string){'jenergija'}
166
+ let(:dictionary_string){'energiya'}
167
+ let(:id){'12345'}
168
+
169
+ specify{searcher.search(context.merge(
170
+ :distance => 2
171
+ )).should == [id]}
172
+ end
173
+
174
+ context 'single word - #2' do
175
+ before do
176
+ indexer.create_index(index_context.merge(
177
+ :dictionary_string => dictionary_string,
178
+ :id => id
179
+ ))
180
+ end
181
+
182
+ let(:query_string){'rhus'}
183
+ let(:dictionary_string){'Aarhus'}
184
+ let(:id){'12345'}
185
+
186
+ specify{searcher.search(context.merge(
187
+ :distance => 2
188
+ )).should == [id]}
189
+ end
190
+
191
+ context 'many words' do
192
+ end
193
+ end
194
+ end