fuzzy_matcher 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in fuzzy_matcher.gemspec
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 Kirill Zonov
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,29 @@
1
+ # FuzzyMatcher
2
+
3
+ TODO: Write a gem description
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'fuzzy_matcher'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install fuzzy_matcher
18
+
19
+ ## Usage
20
+
21
+ TODO: Write usage instructions here
22
+
23
+ ## Contributing
24
+
25
+ 1. Fork it
26
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
27
+ 3. Commit your changes (`git commit -am 'Added some feature'`)
28
+ 4. Push to the branch (`git push origin my-new-feature`)
29
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
@@ -0,0 +1,151 @@
1
+ require "pg"
2
+ require "mysql2"
3
+
4
+ module FuzzyMatcher
5
+ class Adapter
6
+ attr_reader :type, :connection, :table_name
7
+
8
+ AVAILABLE_DBS = ['pg', 'mysql']
9
+
10
+ def initialize(db_type, db_name, db_user, db_password, table_name = 'library')
11
+ @type = db_type
12
+ @table_name = table_name
13
+ @connection = make_connection(db_name, db_user, db_password)
14
+ end
15
+
16
+ def send_query(query)
17
+ connection.send(query_method, query)
18
+ end
19
+
20
+ def send_find_query(conditions)
21
+ query_string = "select * from #{@table_name}_indexed where #{conditions}"
22
+ parse(send_query "#{query_string}")
23
+ end
24
+
25
+ def create_index_table(height)
26
+ case @type
27
+ when 'pg'
28
+ create_table_pg(height)
29
+ when 'mysql'
30
+ create_table_mysql(height)
31
+ end
32
+ fill_index_table
33
+ end
34
+
35
+ def select_all(columns)
36
+ send_query "select #{columns.to_s} from #{@table_name}_indexed"
37
+ end
38
+
39
+ def parse(values, known_key = true, value = 'value')
40
+ case @type
41
+ when 'pg'
42
+ pg_parse_values(values, value)
43
+ when 'mysql'
44
+ mysql_parse_values(values, known_key)
45
+ end
46
+ end
47
+
48
+ def build_fqa(level_values, values, distance_function)
49
+ level_values.each_with_index do |lv, id|
50
+ values.each do |v|
51
+ dist = calculate_distance(distance_function, lv, v)
52
+
53
+ column = "u#{id}"
54
+ update(v, column, dist)
55
+ end
56
+ end
57
+ end
58
+
59
+ def calculate_distance(distance_function, level_value, value)
60
+ query_string = "select #{distance_function}('#{level_value}','#{value}')"
61
+ result = parse(send_query(query_string), false, distance_function)
62
+ result.is_a?(Array) ? result[0] : result
63
+ end
64
+
65
+ private
66
+
67
+ def query_method
68
+ case @type
69
+ when 'pg' then :exec
70
+ when 'mysql' then :query
71
+ end
72
+ end
73
+
74
+ def update(value, column, dist)
75
+ query_string = "update #{@table_name}_indexed set #{column} = #{dist} where value = '#{value}'"
76
+ send_query query_string
77
+ end
78
+
79
+ def pg_connection(db_name, db_user, db_password)
80
+ PG.connect(host: 'localhost', user: db_user, password: db_password, dbname: db_name)
81
+ end
82
+
83
+ def mysql_connection(db_name, db_user, db_password)
84
+ Mysql2::Client.new(username: db_user, password: db_password, database: db_name)
85
+ end
86
+
87
+ def make_connection(db_name, db_user, db_password)
88
+ case @type
89
+ when 'pg'
90
+ pg_connection(db_name, db_user, db_password)
91
+ when 'mysql'
92
+ mysql_connection(db_name, db_user, db_password)
93
+ else
94
+ raise "Current available pg and mysql databases"
95
+ end
96
+ end
97
+
98
+ def create_table_pg(height)
99
+ index_columns = take_index_columns(height)
100
+ query_string = "CREATE TABLE #{@table_name}_indexed
101
+ (
102
+ id integer NOT NULL DEFAULT 0,
103
+ value character(20),
104
+ #{index_columns}
105
+ CONSTRAINT #{@table_name}_indexed_pkey PRIMARY KEY (id )
106
+ )
107
+ WITH (
108
+ OIDS=FALSE
109
+ );"
110
+ drop_and_create(query_string)
111
+ end
112
+
113
+
114
+ def create_table_mysql(height)
115
+ index_columns = take_index_columns(height)
116
+ query_string = "CREATE TABLE `#{@table_name}_indexed` (
117
+ `id` int(11) NOT NULL AUTO_INCREMENT,
118
+ `value` varchar(45) DEFAULT NULL,
119
+ #{index_columns}
120
+ PRIMARY KEY (`id`),
121
+ UNIQUE KEY `id_UNIQUE` (`id`),
122
+ UNIQUE KEY `value_UNIQUE` (`value`)
123
+ ) ENGINE=InnoDB AUTO_INCREMENT=19 DEFAULT CHARSET=koi8r"
124
+ drop_and_create(query_string)
125
+ end
126
+
127
+ def take_index_columns(height)
128
+ result = ''
129
+ height.times { |h| result << "u#{h} integer," }
130
+ result
131
+ end
132
+
133
+ def drop_and_create(create_table_string)
134
+ send_query "drop table if exists #{@table_name}_indexed;"
135
+ send_query create_table_string
136
+ end
137
+
138
+ def pg_parse_values(result, key = "value")
139
+ result.field_values(key).collect { |v| v.rstrip }
140
+ end
141
+
142
+ def mysql_parse_values(result, known_key, key = "value")
143
+ result.collect { |v| known_key ? v[key] : v.values[0] }
144
+ end
145
+
146
+ def fill_index_table
147
+ query_string = "insert into #{@table_name}_indexed (id, value) select id, value from #{@table_name}"
148
+ send_query query_string
149
+ end
150
+ end
151
+ end
@@ -0,0 +1,54 @@
1
+ module FuzzyMatcher
2
+ class Indexer
3
+ class << self
4
+ def index!(connection, distance_function, height)
5
+ level_values = select_level_values(connection, height)
6
+ connection.create_index_table(height)
7
+ index_values(connection, level_values, distance_function)
8
+ level_values
9
+ end
10
+
11
+ private
12
+
13
+ def select_level_values(conn, height)
14
+ indexes = []
15
+ height.times do |l|
16
+ query_result =
17
+ conn.send_query query_for_select_levels(conn)
18
+ indexes << parse_result(conn.type, query_result)
19
+ end
20
+ indexes
21
+ end
22
+
23
+ # Mysql and Postgresql has different
24
+ # random functions
25
+ def rand_func(db_type)
26
+ case db_type
27
+ when "pg" then "random()"
28
+ when "mysql" then "rand()"
29
+ end
30
+ end
31
+
32
+ def query_for_select_levels(connection)
33
+ "select value from #{connection.table_name} order by #{rand_func(connection.type)} limit 1"
34
+ end
35
+
36
+ def parse_result(type, result)
37
+ case type
38
+ when "pg"
39
+ # rstrip because string we may take
40
+ # as "word "
41
+ result.field_values("value")[0].rstrip
42
+ when "mysql"
43
+ result.first["value"]
44
+ end
45
+ end
46
+
47
+ def index_values(connection, level_values, distance_function)
48
+ unparsed_result = connection.select_all(:value)
49
+ values = connection.parse(unparsed_result)
50
+ connection.build_fqa(level_values, values, distance_function)
51
+ end
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,28 @@
1
+ module FuzzyMatcher
2
+ class Searcher
3
+ class << self
4
+ def find(level_values, conn, distance_function, height, accuracy, aim)
5
+ conditions = condition_string(level_values, distance_function, accuracy, aim)
6
+ result = conn.send_find_query(conditions)
7
+ clarify_result(conn, distance_function, accuracy, aim, result)
8
+ end
9
+
10
+ private
11
+
12
+ def clarify_result(conn, distance_function, accuracy, aim, result)
13
+ result.delete_if do |r|
14
+ conn.calculate_distance(distance_function, aim, r).to_i > accuracy
15
+ end
16
+ end
17
+
18
+ def condition_string(level_values, distance_function, accuracy, aim)
19
+ conditions = []
20
+ level_values.each_with_index do |lv,i|
21
+ conditions <<
22
+ "abs(#{distance_function}('#{lv}','#{aim}') - u#{i})<#{accuracy}"
23
+ end
24
+ conditions.join(" and ")
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,3 @@
1
+ module FuzzyMatcher
2
+ VERSION = "0.0.3"
3
+ end
@@ -0,0 +1,7 @@
1
+ require "fuzzy_matcher/version"
2
+
3
+ module FuzzyMatcher
4
+ require "fuzzy_matcher/adapter"
5
+ require "fuzzy_matcher/indexer"
6
+ require "fuzzy_matcher/searcher"
7
+ end
metadata ADDED
@@ -0,0 +1,76 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: fuzzy_matcher
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.3
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Kirill Zonov
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-05-03 00:00:00.000000000Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: pg
16
+ requirement: &2160266960 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: *2160266960
25
+ - !ruby/object:Gem::Dependency
26
+ name: mysql2
27
+ requirement: &2160282900 !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ type: :runtime
34
+ prerelease: false
35
+ version_requirements: *2160282900
36
+ description: fuzzy matcher
37
+ email:
38
+ - graffzon@gmail.com
39
+ executables: []
40
+ extensions: []
41
+ extra_rdoc_files: []
42
+ files:
43
+ - lib/fuzzy_matcher/adapter.rb
44
+ - lib/fuzzy_matcher/indexer.rb
45
+ - lib/fuzzy_matcher/searcher.rb
46
+ - lib/fuzzy_matcher/version.rb
47
+ - lib/fuzzy_matcher.rb
48
+ - Gemfile
49
+ - LICENSE
50
+ - Rakefile
51
+ - README.md
52
+ homepage: ''
53
+ licenses: []
54
+ post_install_message:
55
+ rdoc_options: []
56
+ require_paths:
57
+ - lib
58
+ required_ruby_version: !ruby/object:Gem::Requirement
59
+ none: false
60
+ requirements:
61
+ - - ! '>='
62
+ - !ruby/object:Gem::Version
63
+ version: '0'
64
+ required_rubygems_version: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ! '>='
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ requirements: []
71
+ rubyforge_project:
72
+ rubygems_version: 1.8.6
73
+ signing_key:
74
+ specification_version: 3
75
+ summary: smth..
76
+ test_files: []