fuzzy_matcher 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +4 -0
- data/LICENSE +22 -0
- data/README.md +29 -0
- data/Rakefile +2 -0
- data/lib/fuzzy_matcher/adapter.rb +151 -0
- data/lib/fuzzy_matcher/indexer.rb +54 -0
- data/lib/fuzzy_matcher/searcher.rb +28 -0
- data/lib/fuzzy_matcher/version.rb +3 -0
- data/lib/fuzzy_matcher.rb +7 -0
- metadata +76 -0
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2012 Kirill Zonov
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
# FuzzyMatcher
|
2
|
+
|
3
|
+
TODO: Write a gem description
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
Add this line to your application's Gemfile:
|
8
|
+
|
9
|
+
gem 'fuzzy_matcher'
|
10
|
+
|
11
|
+
And then execute:
|
12
|
+
|
13
|
+
$ bundle
|
14
|
+
|
15
|
+
Or install it yourself as:
|
16
|
+
|
17
|
+
$ gem install fuzzy_matcher
|
18
|
+
|
19
|
+
## Usage
|
20
|
+
|
21
|
+
TODO: Write usage instructions here
|
22
|
+
|
23
|
+
## Contributing
|
24
|
+
|
25
|
+
1. Fork it
|
26
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
27
|
+
3. Commit your changes (`git commit -am 'Added some feature'`)
|
28
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
29
|
+
5. Create new Pull Request
|
data/Rakefile
ADDED
@@ -0,0 +1,151 @@
|
|
1
|
+
require "pg"
|
2
|
+
require "mysql2"
|
3
|
+
|
4
|
+
module FuzzyMatcher
|
5
|
+
class Adapter
|
6
|
+
attr_reader :type, :connection, :table_name
|
7
|
+
|
8
|
+
AVAILABLE_DBS = ['pg', 'mysql']
|
9
|
+
|
10
|
+
def initialize(db_type, db_name, db_user, db_password, table_name = 'library')
|
11
|
+
@type = db_type
|
12
|
+
@table_name = table_name
|
13
|
+
@connection = make_connection(db_name, db_user, db_password)
|
14
|
+
end
|
15
|
+
|
16
|
+
def send_query(query)
|
17
|
+
connection.send(query_method, query)
|
18
|
+
end
|
19
|
+
|
20
|
+
def send_find_query(conditions)
|
21
|
+
query_string = "select * from #{@table_name}_indexed where #{conditions}"
|
22
|
+
parse(send_query "#{query_string}")
|
23
|
+
end
|
24
|
+
|
25
|
+
def create_index_table(height)
|
26
|
+
case @type
|
27
|
+
when 'pg'
|
28
|
+
create_table_pg(height)
|
29
|
+
when 'mysql'
|
30
|
+
create_table_mysql(height)
|
31
|
+
end
|
32
|
+
fill_index_table
|
33
|
+
end
|
34
|
+
|
35
|
+
def select_all(columns)
|
36
|
+
send_query "select #{columns.to_s} from #{@table_name}_indexed"
|
37
|
+
end
|
38
|
+
|
39
|
+
def parse(values, known_key = true, value = 'value')
|
40
|
+
case @type
|
41
|
+
when 'pg'
|
42
|
+
pg_parse_values(values, value)
|
43
|
+
when 'mysql'
|
44
|
+
mysql_parse_values(values, known_key)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def build_fqa(level_values, values, distance_function)
|
49
|
+
level_values.each_with_index do |lv, id|
|
50
|
+
values.each do |v|
|
51
|
+
dist = calculate_distance(distance_function, lv, v)
|
52
|
+
|
53
|
+
column = "u#{id}"
|
54
|
+
update(v, column, dist)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def calculate_distance(distance_function, level_value, value)
|
60
|
+
query_string = "select #{distance_function}('#{level_value}','#{value}')"
|
61
|
+
result = parse(send_query(query_string), false, distance_function)
|
62
|
+
result.is_a?(Array) ? result[0] : result
|
63
|
+
end
|
64
|
+
|
65
|
+
private
|
66
|
+
|
67
|
+
def query_method
|
68
|
+
case @type
|
69
|
+
when 'pg' then :exec
|
70
|
+
when 'mysql' then :query
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def update(value, column, dist)
|
75
|
+
query_string = "update #{@table_name}_indexed set #{column} = #{dist} where value = '#{value}'"
|
76
|
+
send_query query_string
|
77
|
+
end
|
78
|
+
|
79
|
+
def pg_connection(db_name, db_user, db_password)
|
80
|
+
PG.connect(host: 'localhost', user: db_user, password: db_password, dbname: db_name)
|
81
|
+
end
|
82
|
+
|
83
|
+
def mysql_connection(db_name, db_user, db_password)
|
84
|
+
Mysql2::Client.new(username: db_user, password: db_password, database: db_name)
|
85
|
+
end
|
86
|
+
|
87
|
+
def make_connection(db_name, db_user, db_password)
|
88
|
+
case @type
|
89
|
+
when 'pg'
|
90
|
+
pg_connection(db_name, db_user, db_password)
|
91
|
+
when 'mysql'
|
92
|
+
mysql_connection(db_name, db_user, db_password)
|
93
|
+
else
|
94
|
+
raise "Current available pg and mysql databases"
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
def create_table_pg(height)
|
99
|
+
index_columns = take_index_columns(height)
|
100
|
+
query_string = "CREATE TABLE #{@table_name}_indexed
|
101
|
+
(
|
102
|
+
id integer NOT NULL DEFAULT 0,
|
103
|
+
value character(20),
|
104
|
+
#{index_columns}
|
105
|
+
CONSTRAINT #{@table_name}_indexed_pkey PRIMARY KEY (id )
|
106
|
+
)
|
107
|
+
WITH (
|
108
|
+
OIDS=FALSE
|
109
|
+
);"
|
110
|
+
drop_and_create(query_string)
|
111
|
+
end
|
112
|
+
|
113
|
+
|
114
|
+
def create_table_mysql(height)
|
115
|
+
index_columns = take_index_columns(height)
|
116
|
+
query_string = "CREATE TABLE `#{@table_name}_indexed` (
|
117
|
+
`id` int(11) NOT NULL AUTO_INCREMENT,
|
118
|
+
`value` varchar(45) DEFAULT NULL,
|
119
|
+
#{index_columns}
|
120
|
+
PRIMARY KEY (`id`),
|
121
|
+
UNIQUE KEY `id_UNIQUE` (`id`),
|
122
|
+
UNIQUE KEY `value_UNIQUE` (`value`)
|
123
|
+
) ENGINE=InnoDB AUTO_INCREMENT=19 DEFAULT CHARSET=koi8r"
|
124
|
+
drop_and_create(query_string)
|
125
|
+
end
|
126
|
+
|
127
|
+
def take_index_columns(height)
|
128
|
+
result = ''
|
129
|
+
height.times { |h| result << "u#{h} integer," }
|
130
|
+
result
|
131
|
+
end
|
132
|
+
|
133
|
+
def drop_and_create(create_table_string)
|
134
|
+
send_query "drop table if exists #{@table_name}_indexed;"
|
135
|
+
send_query create_table_string
|
136
|
+
end
|
137
|
+
|
138
|
+
def pg_parse_values(result, key = "value")
|
139
|
+
result.field_values(key).collect { |v| v.rstrip }
|
140
|
+
end
|
141
|
+
|
142
|
+
def mysql_parse_values(result, known_key, key = "value")
|
143
|
+
result.collect { |v| known_key ? v[key] : v.values[0] }
|
144
|
+
end
|
145
|
+
|
146
|
+
def fill_index_table
|
147
|
+
query_string = "insert into #{@table_name}_indexed (id, value) select id, value from #{@table_name}"
|
148
|
+
send_query query_string
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
module FuzzyMatcher
|
2
|
+
class Indexer
|
3
|
+
class << self
|
4
|
+
def index!(connection, distance_function, height)
|
5
|
+
level_values = select_level_values(connection, height)
|
6
|
+
connection.create_index_table(height)
|
7
|
+
index_values(connection, level_values, distance_function)
|
8
|
+
level_values
|
9
|
+
end
|
10
|
+
|
11
|
+
private
|
12
|
+
|
13
|
+
def select_level_values(conn, height)
|
14
|
+
indexes = []
|
15
|
+
height.times do |l|
|
16
|
+
query_result =
|
17
|
+
conn.send_query query_for_select_levels(conn)
|
18
|
+
indexes << parse_result(conn.type, query_result)
|
19
|
+
end
|
20
|
+
indexes
|
21
|
+
end
|
22
|
+
|
23
|
+
# Mysql and Postgresql has different
|
24
|
+
# random functions
|
25
|
+
def rand_func(db_type)
|
26
|
+
case db_type
|
27
|
+
when "pg" then "random()"
|
28
|
+
when "mysql" then "rand()"
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def query_for_select_levels(connection)
|
33
|
+
"select value from #{connection.table_name} order by #{rand_func(connection.type)} limit 1"
|
34
|
+
end
|
35
|
+
|
36
|
+
def parse_result(type, result)
|
37
|
+
case type
|
38
|
+
when "pg"
|
39
|
+
# rstrip because string we may take
|
40
|
+
# as "word "
|
41
|
+
result.field_values("value")[0].rstrip
|
42
|
+
when "mysql"
|
43
|
+
result.first["value"]
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def index_values(connection, level_values, distance_function)
|
48
|
+
unparsed_result = connection.select_all(:value)
|
49
|
+
values = connection.parse(unparsed_result)
|
50
|
+
connection.build_fqa(level_values, values, distance_function)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module FuzzyMatcher
|
2
|
+
class Searcher
|
3
|
+
class << self
|
4
|
+
def find(level_values, conn, distance_function, height, accuracy, aim)
|
5
|
+
conditions = condition_string(level_values, distance_function, accuracy, aim)
|
6
|
+
result = conn.send_find_query(conditions)
|
7
|
+
clarify_result(conn, distance_function, accuracy, aim, result)
|
8
|
+
end
|
9
|
+
|
10
|
+
private
|
11
|
+
|
12
|
+
def clarify_result(conn, distance_function, accuracy, aim, result)
|
13
|
+
result.delete_if do |r|
|
14
|
+
conn.calculate_distance(distance_function, aim, r).to_i > accuracy
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def condition_string(level_values, distance_function, accuracy, aim)
|
19
|
+
conditions = []
|
20
|
+
level_values.each_with_index do |lv,i|
|
21
|
+
conditions <<
|
22
|
+
"abs(#{distance_function}('#{lv}','#{aim}') - u#{i})<#{accuracy}"
|
23
|
+
end
|
24
|
+
conditions.join(" and ")
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
metadata
ADDED
@@ -0,0 +1,76 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: fuzzy_matcher
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.3
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Kirill Zonov
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-05-03 00:00:00.000000000Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: pg
|
16
|
+
requirement: &2160266960 !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *2160266960
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: mysql2
|
27
|
+
requirement: &2160282900 !ruby/object:Gem::Requirement
|
28
|
+
none: false
|
29
|
+
requirements:
|
30
|
+
- - ! '>='
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '0'
|
33
|
+
type: :runtime
|
34
|
+
prerelease: false
|
35
|
+
version_requirements: *2160282900
|
36
|
+
description: fuzzy matcher
|
37
|
+
email:
|
38
|
+
- graffzon@gmail.com
|
39
|
+
executables: []
|
40
|
+
extensions: []
|
41
|
+
extra_rdoc_files: []
|
42
|
+
files:
|
43
|
+
- lib/fuzzy_matcher/adapter.rb
|
44
|
+
- lib/fuzzy_matcher/indexer.rb
|
45
|
+
- lib/fuzzy_matcher/searcher.rb
|
46
|
+
- lib/fuzzy_matcher/version.rb
|
47
|
+
- lib/fuzzy_matcher.rb
|
48
|
+
- Gemfile
|
49
|
+
- LICENSE
|
50
|
+
- Rakefile
|
51
|
+
- README.md
|
52
|
+
homepage: ''
|
53
|
+
licenses: []
|
54
|
+
post_install_message:
|
55
|
+
rdoc_options: []
|
56
|
+
require_paths:
|
57
|
+
- lib
|
58
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
59
|
+
none: false
|
60
|
+
requirements:
|
61
|
+
- - ! '>='
|
62
|
+
- !ruby/object:Gem::Version
|
63
|
+
version: '0'
|
64
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
65
|
+
none: false
|
66
|
+
requirements:
|
67
|
+
- - ! '>='
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0'
|
70
|
+
requirements: []
|
71
|
+
rubyforge_project:
|
72
|
+
rubygems_version: 1.8.6
|
73
|
+
signing_key:
|
74
|
+
specification_version: 3
|
75
|
+
summary: smth..
|
76
|
+
test_files: []
|