ximate 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +7 -0
- data/Gemfile +4 -0
- data/README.rdoc +38 -0
- data/Rakefile +2 -0
- data/ext/bitap_fuzzy_search.c +88 -0
- data/ext/extconf.rb +32 -0
- data/ext/fuzzy_search.c +96 -0
- data/init.rb +1 -0
- data/lib/ximate.rb +5 -0
- data/lib/ximate/activerecord/relation.rb +22 -0
- data/lib/ximate/search.rb +79 -0
- data/lib/ximate/version.rb +3 -0
- data/ximate.gemspec +23 -0
- metadata +81 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/README.rdoc
ADDED
@@ -0,0 +1,38 @@
|
|
1
|
+
= Ximate
|
2
|
+
|
3
|
+
ApproXIMATE fuzzy search for Ruby on Rails activerecord models.
|
4
|
+
|
5
|
+
== Requirements
|
6
|
+
|
7
|
+
* Rails >= 3.0.0
|
8
|
+
|
9
|
+
|
10
|
+
== Installation
|
11
|
+
|
12
|
+
gem install ximate
|
13
|
+
|
14
|
+
== Usage
|
15
|
+
|
16
|
+
In your model puts some like this:
|
17
|
+
|
18
|
+
class Post < ActiveRecord::Base
|
19
|
+
define_index(:en) do
|
20
|
+
add_text title
|
21
|
+
add_text keywords.join(' ')
|
22
|
+
add_text body(:en)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
Then you can perform a search
|
27
|
+
|
28
|
+
Post.asearch('Economy').where(:public => true).limit(5)
|
29
|
+
|
30
|
+
== Questions or problems?
|
31
|
+
|
32
|
+
If you have any issues with rplot please add an {issue on
|
33
|
+
GitHub}[https://github.com/pioz/ximate/issues] or fork the project and
|
34
|
+
send a pull request.
|
35
|
+
|
36
|
+
== Copyright
|
37
|
+
|
38
|
+
Copyright (c) 2010 Enrico Pilotto. MIT license.
|
data/Rakefile
ADDED
@@ -0,0 +1,88 @@
|
|
1
|
+
#include <stdlib.h>
|
2
|
+
#include <string.h>
|
3
|
+
#include <limits.h>
|
4
|
+
#include <ruby.h>
|
5
|
+
|
6
|
+
#include <stdio.h>
|
7
|
+
|
8
|
+
char *
|
9
|
+
downcase2 (const char *s)
|
10
|
+
{
|
11
|
+
int i, size = strlen (s);
|
12
|
+
if (size > 31) size = 31;
|
13
|
+
char *down_s = malloc (size + 1);
|
14
|
+
for (i = 0; i < size; i++)
|
15
|
+
down_s[i] = tolower (s[i]);
|
16
|
+
down_s[size] = '\0';
|
17
|
+
return down_s;
|
18
|
+
}
|
19
|
+
|
20
|
+
|
21
|
+
static VALUE
|
22
|
+
bitap_fuzzy_search (VALUE self, VALUE text, VALUE pattern, VALUE errors_percent)
|
23
|
+
{
|
24
|
+
char *p = downcase2 (StringValuePtr (pattern));
|
25
|
+
if (p[0] == '\0') return Qnil;
|
26
|
+
char *t = downcase2 (StringValuePtr (text));
|
27
|
+
int n = strlen (t);
|
28
|
+
int m = strlen (p);
|
29
|
+
if (abs (n - m) > 2) return Qnil;
|
30
|
+
const char *result = NULL;
|
31
|
+
unsigned long *R;
|
32
|
+
unsigned long bitmasks[CHAR_MAX + 1];
|
33
|
+
int i, d;
|
34
|
+
|
35
|
+
int errors = (FIX2INT (errors_percent) * m) / 100;
|
36
|
+
if (errors == 0) errors = 1;
|
37
|
+
|
38
|
+
/* Initialize the bit array R */
|
39
|
+
R = malloc ((errors + 1) * sizeof (*R));
|
40
|
+
for (i = 0; i <= errors; ++i)
|
41
|
+
R[i] = ~1;
|
42
|
+
|
43
|
+
/* Initialize the pattern bitmasks */
|
44
|
+
for (i = 0; i <= CHAR_MAX; ++i)
|
45
|
+
bitmasks[i] = ~0;
|
46
|
+
for (i = 0; i < m; ++i)
|
47
|
+
bitmasks[p[i]] &= ~(1UL << i);
|
48
|
+
|
49
|
+
for (i = 0; t[i] != '\0'; ++i)
|
50
|
+
{
|
51
|
+
/* Update the bit arrays */
|
52
|
+
unsigned long old_Rd1 = R[0];
|
53
|
+
|
54
|
+
R[0] |= bitmasks[t[i]];
|
55
|
+
R[0] <<= 1;
|
56
|
+
|
57
|
+
for (d = 1; d <= errors; ++d)
|
58
|
+
{
|
59
|
+
unsigned long tmp = R[d];
|
60
|
+
/* Substitution is all we care about */
|
61
|
+
R[d] = (old_Rd1 & (R[d] | bitmasks[t[i]])) << 1;
|
62
|
+
old_Rd1 = tmp;
|
63
|
+
}
|
64
|
+
|
65
|
+
if (0 == (R[errors] & (1UL << m)) && (i - m + 1) == 0)
|
66
|
+
{
|
67
|
+
result = (t + i - m) + 1;
|
68
|
+
break;
|
69
|
+
}
|
70
|
+
}
|
71
|
+
|
72
|
+
free (R);
|
73
|
+
free (p);
|
74
|
+
free (t);
|
75
|
+
|
76
|
+
if (result)
|
77
|
+
return rb_str_new2 (result);
|
78
|
+
return Qnil;
|
79
|
+
}
|
80
|
+
|
81
|
+
|
82
|
+
void
|
83
|
+
Init_bitap_fuzzy_search ()
|
84
|
+
{
|
85
|
+
/* Define Bitap fuzzy search class */
|
86
|
+
VALUE fuzzy = rb_define_class ("Fuzzy", rb_cObject);
|
87
|
+
rb_define_singleton_method (fuzzy, "search", bitap_fuzzy_search, 3);
|
88
|
+
}
|
data/ext/extconf.rb
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
ENV['RC_ARCHS'] = '' if RUBY_PLATFORM =~ /darwin/
|
2
|
+
|
3
|
+
require 'mkmf'
|
4
|
+
|
5
|
+
LIBDIR = Config::CONFIG['libdir']
|
6
|
+
INCLUDEDIR = Config::CONFIG['includedir']
|
7
|
+
|
8
|
+
HEADER_DIRS = [
|
9
|
+
# First search /opt/local for macports
|
10
|
+
'/opt/local/include',
|
11
|
+
# Then search /usr/local for people that installed from source
|
12
|
+
'/usr/local/include',
|
13
|
+
# Check the ruby install locations
|
14
|
+
INCLUDEDIR,
|
15
|
+
# Finally fall back to /usr
|
16
|
+
'/usr/include',
|
17
|
+
]
|
18
|
+
|
19
|
+
LIB_DIRS = [
|
20
|
+
# First search /opt/local for macports
|
21
|
+
'/opt/local/lib',
|
22
|
+
# Then search /usr/local for people that installed from source
|
23
|
+
'/usr/local/lib',
|
24
|
+
# Check the ruby install locations
|
25
|
+
LIBDIR,
|
26
|
+
# Finally fall back to /usr
|
27
|
+
'/usr/lib',
|
28
|
+
]
|
29
|
+
|
30
|
+
dir_config('fuzzy_search', HEADER_DIRS, LIB_DIRS)
|
31
|
+
|
32
|
+
create_makefile('fuzzy_search/fuzzy_search')
|
data/ext/fuzzy_search.c
ADDED
@@ -0,0 +1,96 @@
|
|
1
|
+
#include <stdlib.h>
|
2
|
+
#include <string.h>
|
3
|
+
#include <limits.h>
|
4
|
+
#include <ruby.h>
|
5
|
+
|
6
|
+
#include <stdio.h>
|
7
|
+
|
8
|
+
char *
|
9
|
+
downcase (const char *s)
|
10
|
+
{
|
11
|
+
int i, size = strlen (s);
|
12
|
+
if (size > 31) size = 31;
|
13
|
+
char *down_s = malloc (size + 1);
|
14
|
+
for (i = 0; i < size; i++)
|
15
|
+
down_s[i] = tolower (s[i]);
|
16
|
+
down_s[size] = '\0';
|
17
|
+
return down_s;
|
18
|
+
}
|
19
|
+
|
20
|
+
int
|
21
|
+
minimum (int x, int y, int z)
|
22
|
+
{
|
23
|
+
int min = x;
|
24
|
+
if (y < min) min = y;
|
25
|
+
if (z < min) min = z;
|
26
|
+
return min;
|
27
|
+
}
|
28
|
+
|
29
|
+
int
|
30
|
+
maximun (int x, int y)
|
31
|
+
{
|
32
|
+
return (x < y) ? y : x;
|
33
|
+
}
|
34
|
+
|
35
|
+
int
|
36
|
+
levenshtein_distance (const char *s, const char *t)
|
37
|
+
{
|
38
|
+
/* Declarations */
|
39
|
+
int n = strlen (s);
|
40
|
+
int m = strlen (t);
|
41
|
+
int i, j, k, distance;
|
42
|
+
|
43
|
+
/* Init matrix */
|
44
|
+
int *prev = malloc ((n + 1) * sizeof (int));
|
45
|
+
int *curr = malloc ((n + 1) * sizeof (int));
|
46
|
+
int *tmp = NULL;
|
47
|
+
for (i = 0; i <= n; ++i) prev[i] = i;
|
48
|
+
|
49
|
+
/* Start */
|
50
|
+
for (i = 1; i <= m; i++)
|
51
|
+
{
|
52
|
+
curr[0] = i;
|
53
|
+
for (j = 1; j <= n; j++)
|
54
|
+
{
|
55
|
+
if (s[i-1] != t[j-1])
|
56
|
+
{
|
57
|
+
k = minimum (curr[j-1], prev[j-1], prev[j]);
|
58
|
+
curr[j] = k + 1;
|
59
|
+
}
|
60
|
+
else
|
61
|
+
curr[j] = prev[j-1];
|
62
|
+
}
|
63
|
+
tmp = prev;
|
64
|
+
prev = curr;
|
65
|
+
curr = tmp;
|
66
|
+
memset ((void*)curr, 0, sizeof (int) * (n + 1));
|
67
|
+
}
|
68
|
+
distance = prev[n];
|
69
|
+
|
70
|
+
free (prev);
|
71
|
+
free (curr);
|
72
|
+
|
73
|
+
return distance;
|
74
|
+
}
|
75
|
+
|
76
|
+
static VALUE
|
77
|
+
fuzzy_equal (VALUE self, VALUE text, VALUE pattern, VALUE errors_percent)
|
78
|
+
{
|
79
|
+
const char *t = StringValuePtr (text);
|
80
|
+
const char *p = StringValuePtr (pattern);
|
81
|
+
int errors = (errors_percent * maximun (strlen (t), strlen (p))) / 100;
|
82
|
+
int distance = levenshtein_distance (t, p);
|
83
|
+
// printf ("Allowed errors: %d - Levenshtein's distance: %d\n", errors, distance);
|
84
|
+
if (distance <= errors)
|
85
|
+
return Qtrue;
|
86
|
+
return Qfalse;
|
87
|
+
}
|
88
|
+
|
89
|
+
|
90
|
+
void
|
91
|
+
Init_fuzzy_search ()
|
92
|
+
{
|
93
|
+
/* Define Bitap fuzzy search class */
|
94
|
+
VALUE fuzzy = rb_define_class ("Fuzzy", rb_cObject);
|
95
|
+
rb_define_singleton_method (fuzzy, "equal", fuzzy_equal, 3);
|
96
|
+
}
|
data/init.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require 'ximate'
|
data/lib/ximate.rb
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
module ActiveRecord
|
2
|
+
|
3
|
+
class Relation
|
4
|
+
attr_accessor :ranks
|
5
|
+
|
6
|
+
alias_method :orig_to_a, :to_a
|
7
|
+
alias_method :orig_initialize, :initialize
|
8
|
+
|
9
|
+
def initialize(klass, table)
|
10
|
+
@ranks = {}
|
11
|
+
orig_initialize(klass, table)
|
12
|
+
end
|
13
|
+
|
14
|
+
def to_a
|
15
|
+
return orig_to_a if @ranks.empty?
|
16
|
+
orig_to_a.sort do |x, y|
|
17
|
+
@ranks[y.id] <=> @ranks[x.id]
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
@@ -0,0 +1,79 @@
|
|
1
|
+
module Ximate
|
2
|
+
|
3
|
+
DATA = {}
|
4
|
+
OPTIONS = {:order_by_rank => true, :error_percent => 20}
|
5
|
+
|
6
|
+
def self.included(base)
|
7
|
+
base.extend(Search)
|
8
|
+
end
|
9
|
+
|
10
|
+
|
11
|
+
module Search
|
12
|
+
def define_index(locale = I18n.default_locale, &block)
|
13
|
+
table = self.to_s.underscore.pluralize.to_sym
|
14
|
+
DATA[locale.to_sym] ||= {}
|
15
|
+
DATA[locale.to_sym][table] ||= {}
|
16
|
+
|
17
|
+
extend ClassMethods
|
18
|
+
include InstanceMethods
|
19
|
+
|
20
|
+
after_save :update_index
|
21
|
+
|
22
|
+
self.to_s.classify.constantize.all.each do |p|
|
23
|
+
p.update_index(locale, &block)
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
|
30
|
+
module ClassMethods
|
31
|
+
|
32
|
+
def asearch(pattern)
|
33
|
+
table = self.to_s.underscore.pluralize.to_sym
|
34
|
+
matches = {}
|
35
|
+
DATA[I18n.locale] ||= {}
|
36
|
+
DATA[I18n.locale][table] ||= {}
|
37
|
+
DATA[I18n.locale][table].each do |word, ids|
|
38
|
+
if Fuzzy.equal(word, pattern.downcase, OPTIONS[:error_percent])
|
39
|
+
ids.each {|id, rank| matches[id] = matches[id].to_i + rank}
|
40
|
+
end
|
41
|
+
end
|
42
|
+
return where('1 = 0') if matches.empty?
|
43
|
+
rel = scoped
|
44
|
+
rel.ranks = matches if OPTIONS[:order_by_rank]
|
45
|
+
rel.where("#{table}.id IN (#{matches.keys.join(',')})")
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
|
50
|
+
module InstanceMethods
|
51
|
+
|
52
|
+
def add_text(text)
|
53
|
+
@words ||= []
|
54
|
+
@words += text.to_s.gsub(/<[^>]*>/i, ' ').gsub(/[\.,'":;!\?\(\)]/, ' ').split(' ').map{|word| word.downcase}
|
55
|
+
end
|
56
|
+
|
57
|
+
def update_index(locale = I18n.default_locale, &block)
|
58
|
+
table = self.class.to_s.underscore.pluralize.to_sym
|
59
|
+
remove_index(locale)
|
60
|
+
instance_eval(&block)
|
61
|
+
@words.each do |word|
|
62
|
+
ids = (DATA[locale.to_sym][table][word] ||= {})
|
63
|
+
ids[self.id] ||= 0
|
64
|
+
ids[self.id] += 1
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def remove_index(locale)
|
69
|
+
table = self.class.to_s.underscore.pluralize.to_sym
|
70
|
+
@words = []
|
71
|
+
DATA[locale.to_sym][table].each do |word, ids|
|
72
|
+
ids.delete(self.id)
|
73
|
+
DATA[locale.to_sym][table].delete(word) if ids.empty?
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
end
|
78
|
+
|
79
|
+
end
|
data/ximate.gemspec
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path('../lib', __FILE__)
|
3
|
+
require 'ximate/version'
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = 'ximate'
|
7
|
+
s.version = Ximate::VERSION
|
8
|
+
s.platform = Gem::Platform::RUBY
|
9
|
+
s.authors = ['Enrico Pilotto']
|
10
|
+
s.email = ['enrico@megiston.it']
|
11
|
+
s.homepage = 'https://github.com/pioz/ximate'
|
12
|
+
s.summary = %q{Approximate fuzzy search for Ruby on Rails}
|
13
|
+
s.description = %q{Approximate fuzzy search for Ruby on Rails activerecord models.}
|
14
|
+
s.license = 'MIT'
|
15
|
+
|
16
|
+
s.rubyforge_project = 'ximate'
|
17
|
+
|
18
|
+
s.files = `git ls-files`.split("\n")
|
19
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
20
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
21
|
+
s.extensions = ['ext/extconf.rb']
|
22
|
+
s.require_paths = ['lib', 'ext']
|
23
|
+
end
|
metadata
ADDED
@@ -0,0 +1,81 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: ximate
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 29
|
5
|
+
prerelease:
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 0
|
9
|
+
- 1
|
10
|
+
version: 0.0.1
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Enrico Pilotto
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2011-05-05 00:00:00 +02:00
|
19
|
+
default_executable:
|
20
|
+
dependencies: []
|
21
|
+
|
22
|
+
description: Approximate fuzzy search for Ruby on Rails activerecord models.
|
23
|
+
email:
|
24
|
+
- enrico@megiston.it
|
25
|
+
executables: []
|
26
|
+
|
27
|
+
extensions:
|
28
|
+
- ext/extconf.rb
|
29
|
+
extra_rdoc_files: []
|
30
|
+
|
31
|
+
files:
|
32
|
+
- .gitignore
|
33
|
+
- Gemfile
|
34
|
+
- README.rdoc
|
35
|
+
- Rakefile
|
36
|
+
- ext/bitap_fuzzy_search.c
|
37
|
+
- ext/extconf.rb
|
38
|
+
- ext/fuzzy_search.c
|
39
|
+
- init.rb
|
40
|
+
- lib/ximate.rb
|
41
|
+
- lib/ximate/activerecord/relation.rb
|
42
|
+
- lib/ximate/search.rb
|
43
|
+
- lib/ximate/version.rb
|
44
|
+
- ximate.gemspec
|
45
|
+
has_rdoc: true
|
46
|
+
homepage: https://github.com/pioz/ximate
|
47
|
+
licenses:
|
48
|
+
- MIT
|
49
|
+
post_install_message:
|
50
|
+
rdoc_options: []
|
51
|
+
|
52
|
+
require_paths:
|
53
|
+
- lib
|
54
|
+
- ext
|
55
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
56
|
+
none: false
|
57
|
+
requirements:
|
58
|
+
- - ">="
|
59
|
+
- !ruby/object:Gem::Version
|
60
|
+
hash: 3
|
61
|
+
segments:
|
62
|
+
- 0
|
63
|
+
version: "0"
|
64
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
65
|
+
none: false
|
66
|
+
requirements:
|
67
|
+
- - ">="
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
hash: 3
|
70
|
+
segments:
|
71
|
+
- 0
|
72
|
+
version: "0"
|
73
|
+
requirements: []
|
74
|
+
|
75
|
+
rubyforge_project: ximate
|
76
|
+
rubygems_version: 1.4.2
|
77
|
+
signing_key:
|
78
|
+
specification_version: 3
|
79
|
+
summary: Approximate fuzzy search for Ruby on Rails
|
80
|
+
test_files: []
|
81
|
+
|