fuzzy_matcher 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in fuzzy_matcher.gemspec
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 Kirill Zonov
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,29 @@
1
+ # FuzzyMatcher
2
+
3
+ TODO: Write a gem description
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'fuzzy_matcher'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install fuzzy_matcher
18
+
19
+ ## Usage
20
+
21
+ TODO: Write usage instructions here
22
+
23
+ ## Contributing
24
+
25
+ 1. Fork it
26
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
27
+ 3. Commit your changes (`git commit -am 'Added some feature'`)
28
+ 4. Push to the branch (`git push origin my-new-feature`)
29
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
@@ -0,0 +1,151 @@
1
+ require "pg"
2
+ require "mysql2"
3
+
4
+ module FuzzyMatcher
5
+ class Adapter
6
+ attr_reader :type, :connection, :table_name
7
+
8
+ AVAILABLE_DBS = ['pg', 'mysql']
9
+
10
+ def initialize(db_type, db_name, db_user, db_password, table_name = 'library')
11
+ @type = db_type
12
+ @table_name = table_name
13
+ @connection = make_connection(db_name, db_user, db_password)
14
+ end
15
+
16
+ def send_query(query)
17
+ connection.send(query_method, query)
18
+ end
19
+
20
+ def send_find_query(conditions)
21
+ query_string = "select * from #{@table_name}_indexed where #{conditions}"
22
+ parse(send_query "#{query_string}")
23
+ end
24
+
25
+ def create_index_table(height)
26
+ case @type
27
+ when 'pg'
28
+ create_table_pg(height)
29
+ when 'mysql'
30
+ create_table_mysql(height)
31
+ end
32
+ fill_index_table
33
+ end
34
+
35
+ def select_all(columns)
36
+ send_query "select #{columns.to_s} from #{@table_name}_indexed"
37
+ end
38
+
39
+ def parse(values, known_key = true, value = 'value')
40
+ case @type
41
+ when 'pg'
42
+ pg_parse_values(values, value)
43
+ when 'mysql'
44
+ mysql_parse_values(values, known_key)
45
+ end
46
+ end
47
+
48
+ def build_fqa(level_values, values, distance_function)
49
+ level_values.each_with_index do |lv, id|
50
+ values.each do |v|
51
+ dist = calculate_distance(distance_function, lv, v)
52
+
53
+ column = "u#{id}"
54
+ update(v, column, dist)
55
+ end
56
+ end
57
+ end
58
+
59
+ def calculate_distance(distance_function, level_value, value)
60
+ query_string = "select #{distance_function}('#{level_value}','#{value}')"
61
+ result = parse(send_query(query_string), false, distance_function)
62
+ result.is_a?(Array) ? result[0] : result
63
+ end
64
+
65
+ private
66
+
67
+ def query_method
68
+ case @type
69
+ when 'pg' then :exec
70
+ when 'mysql' then :query
71
+ end
72
+ end
73
+
74
+ def update(value, column, dist)
75
+ query_string = "update #{@table_name}_indexed set #{column} = #{dist} where value = '#{value}'"
76
+ send_query query_string
77
+ end
78
+
79
+ def pg_connection(db_name, db_user, db_password)
80
+ PG.connect(host: 'localhost', user: db_user, password: db_password, dbname: db_name)
81
+ end
82
+
83
+ def mysql_connection(db_name, db_user, db_password)
84
+ Mysql2::Client.new(username: db_user, password: db_password, database: db_name)
85
+ end
86
+
87
+ def make_connection(db_name, db_user, db_password)
88
+ case @type
89
+ when 'pg'
90
+ pg_connection(db_name, db_user, db_password)
91
+ when 'mysql'
92
+ mysql_connection(db_name, db_user, db_password)
93
+ else
94
+ raise "Current available pg and mysql databases"
95
+ end
96
+ end
97
+
98
+ def create_table_pg(height)
99
+ index_columns = take_index_columns(height)
100
+ query_string = "CREATE TABLE #{@table_name}_indexed
101
+ (
102
+ id integer NOT NULL DEFAULT 0,
103
+ value character(20),
104
+ #{index_columns}
105
+ CONSTRAINT #{@table_name}_indexed_pkey PRIMARY KEY (id )
106
+ )
107
+ WITH (
108
+ OIDS=FALSE
109
+ );"
110
+ drop_and_create(query_string)
111
+ end
112
+
113
+
114
+ def create_table_mysql(height)
115
+ index_columns = take_index_columns(height)
116
+ query_string = "CREATE TABLE `#{@table_name}_indexed` (
117
+ `id` int(11) NOT NULL AUTO_INCREMENT,
118
+ `value` varchar(45) DEFAULT NULL,
119
+ #{index_columns}
120
+ PRIMARY KEY (`id`),
121
+ UNIQUE KEY `id_UNIQUE` (`id`),
122
+ UNIQUE KEY `value_UNIQUE` (`value`)
123
+ ) ENGINE=InnoDB AUTO_INCREMENT=19 DEFAULT CHARSET=koi8r"
124
+ drop_and_create(query_string)
125
+ end
126
+
127
+ def take_index_columns(height)
128
+ result = ''
129
+ height.times { |h| result << "u#{h} integer," }
130
+ result
131
+ end
132
+
133
+ def drop_and_create(create_table_string)
134
+ send_query "drop table if exists #{@table_name}_indexed;"
135
+ send_query create_table_string
136
+ end
137
+
138
+ def pg_parse_values(result, key = "value")
139
+ result.field_values(key).collect { |v| v.rstrip }
140
+ end
141
+
142
+ def mysql_parse_values(result, known_key, key = "value")
143
+ result.collect { |v| known_key ? v[key] : v.values[0] }
144
+ end
145
+
146
+ def fill_index_table
147
+ query_string = "insert into #{@table_name}_indexed (id, value) select id, value from #{@table_name}"
148
+ send_query query_string
149
+ end
150
+ end
151
+ end
@@ -0,0 +1,54 @@
1
+ module FuzzyMatcher
2
+ class Indexer
3
+ class << self
4
+ def index!(connection, distance_function, height)
5
+ level_values = select_level_values(connection, height)
6
+ connection.create_index_table(height)
7
+ index_values(connection, level_values, distance_function)
8
+ level_values
9
+ end
10
+
11
+ private
12
+
13
+ def select_level_values(conn, height)
14
+ indexes = []
15
+ height.times do |l|
16
+ query_result =
17
+ conn.send_query query_for_select_levels(conn)
18
+ indexes << parse_result(conn.type, query_result)
19
+ end
20
+ indexes
21
+ end
22
+
23
+ # Mysql and Postgresql has different
24
+ # random functions
25
+ def rand_func(db_type)
26
+ case db_type
27
+ when "pg" then "random()"
28
+ when "mysql" then "rand()"
29
+ end
30
+ end
31
+
32
+ def query_for_select_levels(connection)
33
+ "select value from #{connection.table_name} order by #{rand_func(connection.type)} limit 1"
34
+ end
35
+
36
+ def parse_result(type, result)
37
+ case type
38
+ when "pg"
39
+ # rstrip because string we may take
40
+ # as "word "
41
+ result.field_values("value")[0].rstrip
42
+ when "mysql"
43
+ result.first["value"]
44
+ end
45
+ end
46
+
47
+ def index_values(connection, level_values, distance_function)
48
+ unparsed_result = connection.select_all(:value)
49
+ values = connection.parse(unparsed_result)
50
+ connection.build_fqa(level_values, values, distance_function)
51
+ end
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,28 @@
1
+ module FuzzyMatcher
2
+ class Searcher
3
+ class << self
4
+ def find(level_values, conn, distance_function, height, accuracy, aim)
5
+ conditions = condition_string(level_values, distance_function, accuracy, aim)
6
+ result = conn.send_find_query(conditions)
7
+ clarify_result(conn, distance_function, accuracy, aim, result)
8
+ end
9
+
10
+ private
11
+
12
+ def clarify_result(conn, distance_function, accuracy, aim, result)
13
+ result.delete_if do |r|
14
+ conn.calculate_distance(distance_function, aim, r).to_i > accuracy
15
+ end
16
+ end
17
+
18
+ def condition_string(level_values, distance_function, accuracy, aim)
19
+ conditions = []
20
+ level_values.each_with_index do |lv,i|
21
+ conditions <<
22
+ "abs(#{distance_function}('#{lv}','#{aim}') - u#{i})<#{accuracy}"
23
+ end
24
+ conditions.join(" and ")
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,3 @@
1
+ module FuzzyMatcher
2
+ VERSION = "0.0.3"
3
+ end
@@ -0,0 +1,7 @@
1
+ require "fuzzy_matcher/version"
2
+
3
+ module FuzzyMatcher
4
+ require "fuzzy_matcher/adapter"
5
+ require "fuzzy_matcher/indexer"
6
+ require "fuzzy_matcher/searcher"
7
+ end
metadata ADDED
@@ -0,0 +1,76 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: fuzzy_matcher
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.3
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Kirill Zonov
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-05-03 00:00:00.000000000Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: pg
16
+ requirement: &2160266960 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: *2160266960
25
+ - !ruby/object:Gem::Dependency
26
+ name: mysql2
27
+ requirement: &2160282900 !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ type: :runtime
34
+ prerelease: false
35
+ version_requirements: *2160282900
36
+ description: fuzzy matcher
37
+ email:
38
+ - graffzon@gmail.com
39
+ executables: []
40
+ extensions: []
41
+ extra_rdoc_files: []
42
+ files:
43
+ - lib/fuzzy_matcher/adapter.rb
44
+ - lib/fuzzy_matcher/indexer.rb
45
+ - lib/fuzzy_matcher/searcher.rb
46
+ - lib/fuzzy_matcher/version.rb
47
+ - lib/fuzzy_matcher.rb
48
+ - Gemfile
49
+ - LICENSE
50
+ - Rakefile
51
+ - README.md
52
+ homepage: ''
53
+ licenses: []
54
+ post_install_message:
55
+ rdoc_options: []
56
+ require_paths:
57
+ - lib
58
+ required_ruby_version: !ruby/object:Gem::Requirement
59
+ none: false
60
+ requirements:
61
+ - - ! '>='
62
+ - !ruby/object:Gem::Version
63
+ version: '0'
64
+ required_rubygems_version: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ! '>='
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ requirements: []
71
+ rubyforge_project:
72
+ rubygems_version: 1.8.6
73
+ signing_key:
74
+ specification_version: 3
75
+ summary: smth..
76
+ test_files: []