fuzzzy 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,99 @@
1
+ require 'pp'
2
+ module Fuzzzy
3
+ module Server
4
+ class HTTP < Grape::API
5
+ class ParamsError < StandardError
6
+ end
7
+
8
+ format :json
9
+ default_format :json
10
+ error_format :json
11
+ version 'v1', :using => :path
12
+
13
+ rescue_from :all do |e|
14
+ rack_response({:error => e.class.name, :message => e.message}.to_json)
15
+ end
16
+
17
+ helpers do
18
+ include Index
19
+
20
+ def search
21
+ context = search_context
22
+ check_context!(:query, context)
23
+ context[:distance] = context[:distance].to_i if context[:distance]
24
+ _searcher(context[:index_method]).search(context)
25
+ end
26
+
27
+ def check_context! *keys
28
+ context = keys.pop
29
+ ([:index_name, :index_method] + keys).each do |key|
30
+ raise ParamsError.new("Parameter :#{key} not found") if context[key].nil?
31
+ end
32
+ end
33
+
34
+ def construct_context *keys
35
+ context = {}
36
+ ([:index_name, :index_method] + keys).each do |key|
37
+ context[key] = params[key] if params[key]
38
+ end
39
+ context
40
+ end
41
+
42
+ def index_context
43
+ construct_context(:id, :dictionary_string)
44
+ end
45
+
46
+ def search_context
47
+ construct_context(:query, :distance, :sort_by, :with_cache)
48
+ end
49
+ end
50
+
51
+ namespace :info do
52
+ http_basic do |u, p|
53
+ u == 'admin' && p == 'password'
54
+ end
55
+
56
+ get do
57
+ info = {
58
+ :ruby => RUBY_VERSION,
59
+ :environment => Fuzzzy.env,
60
+ :redis => Fuzzzy.redis.client.id,
61
+ :root_dir => Fuzzzy.root.to_s
62
+ }
63
+ info[:stopwords] = Fuzzzy.stopwords if params[:show_stopwords]
64
+ info
65
+ end
66
+
67
+ get 'indexes' do
68
+ indexes_info = {
69
+ :redis_size => Fuzzzy.redis.info['used_memory_human'],
70
+ :indexes => Fuzzzy.redis.hgetall(Fuzzzy::Redis.counter_key)
71
+ }
72
+ end
73
+ end
74
+
75
+ resource :indexes do
76
+ # curl /v1/indexes?index_name=city:name&index_method=ngram&query=search%20string
77
+ get do
78
+ search
79
+ end
80
+
81
+ post '/search' do
82
+ search
83
+ end
84
+
85
+ post do
86
+ context = index_context
87
+ check_context!(:id, :dictionary_string, context)
88
+ _indexer(context[:index_method]).create_index(context)
89
+ end
90
+
91
+ delete do
92
+ context = index_context
93
+ check_context!(:id, context)
94
+ _indexer(context[:index_method]).delete_index(context)
95
+ end
96
+ end
97
+ end
98
+ end
99
+ end
@@ -0,0 +1,3 @@
1
+ module Fuzzzy
2
+ VERSION = '0.0.1'
3
+ end
@@ -0,0 +1,7 @@
1
+ development:
2
+ host: localhost
3
+ database: moneybot_dev
4
+
5
+ test:
6
+ host: localhost
7
+ database: moneybot_test
@@ -0,0 +1,8 @@
1
+ class City
2
+ include Mongoid::Document
3
+ include Fuzzzy::Mongoid::Index
4
+
5
+ field :name, :type => String
6
+ field :country, :type => String
7
+
8
+ end
@@ -0,0 +1,9 @@
1
+ class IndexedCity
2
+ include Mongoid::Document
3
+ include Fuzzzy::Mongoid::Index
4
+
5
+ field :name, :type => String
6
+ field :country, :type => String
7
+
8
+ define_fuzzzy_index :name
9
+ end
@@ -0,0 +1,142 @@
1
+ require 'spec_helper'
2
+
3
+ describe Fuzzzy::Ngram::Indexer do
4
+ let(:indexer){Fuzzzy::Ngram::Indexer.new}
5
+ let(:context){{
6
+ :index_name => 'city:name',
7
+ :method => :ngram,
8
+ :dictionary_string => dictionary_string,
9
+ :id => id
10
+ }}
11
+ let(:dictionary_string){'moscow'}
12
+ let(:id){'12345'}
13
+ let(:counter_key){Fuzzzy::Redis.counter_key}
14
+
15
+ before do
16
+ keys = Fuzzzy.redis.keys("*")
17
+ Fuzzzy.redis.del(*keys) if keys.length > 0
18
+ end
19
+
20
+ describe '#ngrams' do
21
+ specify do
22
+ indexer.with_context(context) do
23
+ indexer.ngrams('mo').should == ['mo']
24
+ end
25
+ end
26
+ specify do
27
+ indexer.with_context(context) do
28
+ indexer.ngrams('mos').should == ['mos']
29
+ end
30
+ end
31
+ specify do
32
+ indexer.with_context(context) do
33
+ indexer.ngrams.should == ['mos', 'osc', 'sco', 'cow']
34
+ end
35
+ end
36
+ end
37
+
38
+ describe '#create_index' do
39
+ let(:keys){Fuzzzy.redis.keys}
40
+ let(:dictionary_keys){["fuzzzy:city:name:dictionary:#{id}"]}
41
+
42
+ before do
43
+ indexer.create_index(context)
44
+ end
45
+
46
+ specify{keys.size.should == 6}
47
+ specify do
48
+ keys.should =~ [
49
+ 'fuzzzy:city:name:ngram_i:mos:0',
50
+ 'fuzzzy:city:name:ngram_i:osc:1',
51
+ 'fuzzzy:city:name:ngram_i:sco:2',
52
+ 'fuzzzy:city:name:ngram_i:cow:3',
53
+ ] + dictionary_keys + [counter_key]
54
+ end
55
+ specify do
56
+ Fuzzzy.redis.mget(*dictionary_keys).should == ['moscow']
57
+ end
58
+ specify do
59
+ Fuzzzy.redis.sunion(*(keys - dictionary_keys - [counter_key])).should == [id]
60
+ end
61
+ specify{Fuzzzy.redis.hgetall(counter_key).should == {'city:name' => '1'}}
62
+
63
+ context 'with empty string' do
64
+ let(:dictionary_string){''}
65
+
66
+ specify{keys.size.should == 0}
67
+ end
68
+
69
+ context 'with nulled string' do
70
+ let(:dictionary_string){nil}
71
+
72
+ specify{keys.size.should == 0}
73
+ end
74
+
75
+ context 'with multiple calls' do
76
+ let(:another_id){'11111'}
77
+ let(:dictionary_keys){[
78
+ "fuzzzy:city:name:dictionary:#{id}",
79
+ "fuzzzy:city:name:dictionary:#{another_id}"
80
+ ]}
81
+
82
+ before do
83
+ indexer.create_index(context.merge(
84
+ :dictionary_string => 'Mostyn',
85
+ :id => another_id
86
+ ))
87
+ end
88
+
89
+ specify{keys.size.should == 10}
90
+ specify do
91
+ keys.should =~ [
92
+ 'fuzzzy:city:name:ngram_i:mos:0',
93
+ 'fuzzzy:city:name:ngram_i:osc:1',
94
+ 'fuzzzy:city:name:ngram_i:sco:2',
95
+ 'fuzzzy:city:name:ngram_i:cow:3',
96
+ 'fuzzzy:city:name:ngram_i:ost:1',
97
+ 'fuzzzy:city:name:ngram_i:sty:2',
98
+ 'fuzzzy:city:name:ngram_i:tyn:3',
99
+ ] + dictionary_keys + [counter_key]
100
+ end
101
+ specify do
102
+ Fuzzzy.redis.mget(*dictionary_keys).should == [dictionary_string, 'mostyn']
103
+ end
104
+ specify do
105
+ Fuzzzy.redis.sunion(*(keys - dictionary_keys - [counter_key])).should =~ [id, another_id]
106
+ end
107
+ specify{Fuzzzy.redis.hgetall(counter_key).should == {'city:name' => '2'}}
108
+ end
109
+ end
110
+
111
+ describe '#delete_index' do
112
+ let(:keys){Fuzzzy.redis.keys}
113
+ let(:another_id){'11111'}
114
+ let(:dictionary_keys){["fuzzzy:city:name:dictionary:#{another_id}"]}
115
+
116
+ before do
117
+ indexer.create_index(context)
118
+ indexer.create_index(context.merge(
119
+ :dictionary_string => 'Mostyn',
120
+ :id => another_id
121
+ ))
122
+ indexer.delete_index(context)
123
+ end
124
+
125
+ specify{keys.size.should == 6}
126
+ specify do
127
+ keys.should =~ [
128
+ 'fuzzzy:city:name:ngram_i:mos:0',
129
+ 'fuzzzy:city:name:ngram_i:ost:1',
130
+ 'fuzzzy:city:name:ngram_i:sty:2',
131
+ 'fuzzzy:city:name:ngram_i:tyn:3',
132
+ ] + dictionary_keys + [counter_key]
133
+ end
134
+ specify do
135
+ Fuzzzy.redis.mget(*dictionary_keys).should == ['mostyn']
136
+ end
137
+ specify do
138
+ Fuzzzy.redis.sunion(*(keys - dictionary_keys - [counter_key])).should =~ [another_id]
139
+ end
140
+ specify{Fuzzzy.redis.hgetall(counter_key).should == {'city:name' => '1'}}
141
+ end
142
+ end
@@ -0,0 +1,194 @@
1
+ require 'spec_helper'
2
+
3
+ describe Fuzzzy::Ngram::Searcher do
4
+ let(:indexer){Fuzzzy::Ngram::Indexer.new}
5
+ let(:index_context){{
6
+ :index_name => 'city:name',
7
+ :method => :ngram
8
+ }}
9
+ let(:searcher){Fuzzzy::Ngram::Searcher.new}
10
+ let(:context){index_context.merge(:query => query_string, :distance => 1)}
11
+
12
+ before do
13
+ keys = Fuzzzy.redis.keys("*")
14
+ Fuzzzy.redis.del(*keys) if keys.length > 0
15
+ end
16
+
17
+ describe '#segment_points' do
18
+ context 'when distance = 0' do
19
+ let(:result){[]}
20
+ let(:sample){[
21
+ [0], # mos
22
+ [1], # osk
23
+ [2], # sko
24
+ [3], # kow
25
+ ]}
26
+ before do
27
+ searcher.with_context(:distance => 0, :query => 'moscow') do
28
+ searcher.segment_points(index) do |i|
29
+ result << i
30
+ end
31
+ end
32
+ end
33
+
34
+ (0...4).each do |idx|
35
+ context "and index = #{idx}" do
36
+ let(:index){idx}
37
+ specify{result.should == sample[index]}
38
+ end
39
+ end
40
+ end
41
+
42
+ context 'when distance = 1' do
43
+ let(:result){[]}
44
+ let(:sample){[
45
+ [0, 1], # mos
46
+ [0, 1, 2], # osk
47
+ [1, 2, 3], # sko
48
+ [2, 3, 4], # kow
49
+ ]}
50
+ before do
51
+ searcher.with_context(:distance => 1, :query => 'moscow') do
52
+ searcher.segment_points(index) do |i|
53
+ result << i
54
+ end
55
+ end
56
+ end
57
+
58
+ (0...4).each do |idx|
59
+ context "and index = #{idx}" do
60
+ let(:index){idx}
61
+ specify{result.should == sample[index]}
62
+ end
63
+ end
64
+ end
65
+
66
+ context 'when distance = 3' do
67
+ let(:result){[]}
68
+ let(:sample){[
69
+ [0, 1, 2, 3], # mos
70
+ [0, 1, 2, 3, 4], # osk
71
+ [0, 1, 2, 3, 4, 5], # sko
72
+ [0, 1, 2, 3, 4, 5, 6] # kow
73
+ ]}
74
+ before do
75
+ searcher.with_context(:distance => 3, :query => 'moscow') do
76
+ searcher.segment_points(index) do |i|
77
+ result << i
78
+ end
79
+ end
80
+ end
81
+
82
+ (0...4).each do |idx|
83
+ context "and index = #{idx}" do
84
+ let(:index){idx}
85
+ specify{result.should == sample[index]}
86
+ end
87
+ end
88
+ end
89
+
90
+ context 'when distance = 3 and long word' do
91
+ let(:result){[]}
92
+ let(:sample){[
93
+ [0, 1, 2, 3], # lev
94
+ [0, 1, 2, 3, 4], # eve
95
+ [0, 1, 2, 3, 4, 5], # ven
96
+ [0, 1, 2, 3, 4, 5, 6], # ens
97
+ [1, 2, 3, 4, 5, 6, 7], # nsh
98
+ [2, 3, 4, 5, 6, 7, 8], # sht
99
+ [3, 4, 5, 6, 7, 8, 9], # hte
100
+ [4, 5, 6, 7, 8, 9, 10], # tei
101
+ [5, 6, 7, 8, 9, 10, 11] # ein
102
+ ]}
103
+ before do
104
+ searcher.with_context(:distance => 3, :query => 'levenshtein') do
105
+ searcher.segment_points(index) do |i|
106
+ result << i
107
+ end
108
+ end
109
+ end
110
+
111
+ (0...9).each do |idx|
112
+ context "and index = #{idx}" do
113
+ let(:index){idx}
114
+ specify{result.should == sample[index]}
115
+ end
116
+ end
117
+ end
118
+ end
119
+
120
+ describe '#index_keys' do
121
+ let(:query_string){'mascow'}
122
+ specify do
123
+ searcher.with_context(context) do
124
+ searcher.index_keys.should =~ [
125
+ searcher.index_key('mas', 0),
126
+ searcher.index_key('mas', 1),
127
+ searcher.index_key('asc', 0),
128
+ searcher.index_key('asc', 1),
129
+ searcher.index_key('asc', 2),
130
+ searcher.index_key('sco', 1),
131
+ searcher.index_key('sco', 2),
132
+ searcher.index_key('sco', 3),
133
+ searcher.index_key('cow', 2),
134
+ searcher.index_key('cow', 3),
135
+ searcher.index_key('cow', 4)
136
+ ]
137
+ end
138
+ end
139
+ end
140
+
141
+ describe '#search' do
142
+ context 'single word - #1' do
143
+ before do
144
+ indexer.create_index(index_context.merge(
145
+ :dictionary_string => dictionary_string,
146
+ :id => id
147
+ ))
148
+ end
149
+
150
+ let(:query_string){'mascow'}
151
+ let(:dictionary_string){'moscow'}
152
+ let(:id){'12345'}
153
+
154
+ specify{searcher.search(context).should == [id]}
155
+ end
156
+
157
+ context 'single word - #2' do
158
+ before do
159
+ indexer.create_index(index_context.merge(
160
+ :dictionary_string => dictionary_string,
161
+ :id => id
162
+ ))
163
+ end
164
+
165
+ let(:query_string){'jenergija'}
166
+ let(:dictionary_string){'energiya'}
167
+ let(:id){'12345'}
168
+
169
+ specify{searcher.search(context.merge(
170
+ :distance => 2
171
+ )).should == [id]}
172
+ end
173
+
174
+ context 'single word - #2' do
175
+ before do
176
+ indexer.create_index(index_context.merge(
177
+ :dictionary_string => dictionary_string,
178
+ :id => id
179
+ ))
180
+ end
181
+
182
+ let(:query_string){'rhus'}
183
+ let(:dictionary_string){'Aarhus'}
184
+ let(:id){'12345'}
185
+
186
+ specify{searcher.search(context.merge(
187
+ :distance => 2
188
+ )).should == [id]}
189
+ end
190
+
191
+ context 'many words' do
192
+ end
193
+ end
194
+ end