ximate 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +7 -0
- data/Gemfile +4 -0
- data/README.rdoc +38 -0
- data/Rakefile +2 -0
- data/ext/bitap_fuzzy_search.c +88 -0
- data/ext/extconf.rb +32 -0
- data/ext/fuzzy_search.c +96 -0
- data/init.rb +1 -0
- data/lib/ximate.rb +5 -0
- data/lib/ximate/activerecord/relation.rb +22 -0
- data/lib/ximate/search.rb +79 -0
- data/lib/ximate/version.rb +3 -0
- data/ximate.gemspec +23 -0
- metadata +81 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/README.rdoc
ADDED
@@ -0,0 +1,38 @@
|
|
1
|
+
= Ximate
|
2
|
+
|
3
|
+
ApproXIMATE fuzzy search for Ruby on Rails activerecord models.
|
4
|
+
|
5
|
+
== Requirements
|
6
|
+
|
7
|
+
* Rails >= 3.0.0
|
8
|
+
|
9
|
+
|
10
|
+
== Installation
|
11
|
+
|
12
|
+
gem install ximate
|
13
|
+
|
14
|
+
== Usage
|
15
|
+
|
16
|
+
In your model puts some like this:
|
17
|
+
|
18
|
+
class Post < ActiveRecord::Base
|
19
|
+
define_index(:en) do
|
20
|
+
add_text title
|
21
|
+
add_text keywords.join(' ')
|
22
|
+
add_text body(:en)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
Then you can perform a search
|
27
|
+
|
28
|
+
Post.asearch('Economy').where(:public => true).limit(5)
|
29
|
+
|
30
|
+
== Questions or problems?
|
31
|
+
|
32
|
+
If you have any issues with rplot please add an {issue on
|
33
|
+
GitHub}[https://github.com/pioz/ximate/issues] or fork the project and
|
34
|
+
send a pull request.
|
35
|
+
|
36
|
+
== Copyright
|
37
|
+
|
38
|
+
Copyright (c) 2010 Enrico Pilotto. MIT license.
|
data/Rakefile
ADDED
@@ -0,0 +1,88 @@
|
|
1
|
+
#include <stdlib.h>
|
2
|
+
#include <string.h>
|
3
|
+
#include <limits.h>
|
4
|
+
#include <ruby.h>
|
5
|
+
|
6
|
+
#include <stdio.h>
|
7
|
+
|
8
|
+
char *
|
9
|
+
downcase2 (const char *s)
|
10
|
+
{
|
11
|
+
int i, size = strlen (s);
|
12
|
+
if (size > 31) size = 31;
|
13
|
+
char *down_s = malloc (size + 1);
|
14
|
+
for (i = 0; i < size; i++)
|
15
|
+
down_s[i] = tolower (s[i]);
|
16
|
+
down_s[size] = '\0';
|
17
|
+
return down_s;
|
18
|
+
}
|
19
|
+
|
20
|
+
|
21
|
+
static VALUE
|
22
|
+
bitap_fuzzy_search (VALUE self, VALUE text, VALUE pattern, VALUE errors_percent)
|
23
|
+
{
|
24
|
+
char *p = downcase2 (StringValuePtr (pattern));
|
25
|
+
if (p[0] == '\0') return Qnil;
|
26
|
+
char *t = downcase2 (StringValuePtr (text));
|
27
|
+
int n = strlen (t);
|
28
|
+
int m = strlen (p);
|
29
|
+
if (abs (n - m) > 2) return Qnil;
|
30
|
+
const char *result = NULL;
|
31
|
+
unsigned long *R;
|
32
|
+
unsigned long bitmasks[CHAR_MAX + 1];
|
33
|
+
int i, d;
|
34
|
+
|
35
|
+
int errors = (FIX2INT (errors_percent) * m) / 100;
|
36
|
+
if (errors == 0) errors = 1;
|
37
|
+
|
38
|
+
/* Initialize the bit array R */
|
39
|
+
R = malloc ((errors + 1) * sizeof (*R));
|
40
|
+
for (i = 0; i <= errors; ++i)
|
41
|
+
R[i] = ~1;
|
42
|
+
|
43
|
+
/* Initialize the pattern bitmasks */
|
44
|
+
for (i = 0; i <= CHAR_MAX; ++i)
|
45
|
+
bitmasks[i] = ~0;
|
46
|
+
for (i = 0; i < m; ++i)
|
47
|
+
bitmasks[p[i]] &= ~(1UL << i);
|
48
|
+
|
49
|
+
for (i = 0; t[i] != '\0'; ++i)
|
50
|
+
{
|
51
|
+
/* Update the bit arrays */
|
52
|
+
unsigned long old_Rd1 = R[0];
|
53
|
+
|
54
|
+
R[0] |= bitmasks[t[i]];
|
55
|
+
R[0] <<= 1;
|
56
|
+
|
57
|
+
for (d = 1; d <= errors; ++d)
|
58
|
+
{
|
59
|
+
unsigned long tmp = R[d];
|
60
|
+
/* Substitution is all we care about */
|
61
|
+
R[d] = (old_Rd1 & (R[d] | bitmasks[t[i]])) << 1;
|
62
|
+
old_Rd1 = tmp;
|
63
|
+
}
|
64
|
+
|
65
|
+
if (0 == (R[errors] & (1UL << m)) && (i - m + 1) == 0)
|
66
|
+
{
|
67
|
+
result = (t + i - m) + 1;
|
68
|
+
break;
|
69
|
+
}
|
70
|
+
}
|
71
|
+
|
72
|
+
free (R);
|
73
|
+
free (p);
|
74
|
+
free (t);
|
75
|
+
|
76
|
+
if (result)
|
77
|
+
return rb_str_new2 (result);
|
78
|
+
return Qnil;
|
79
|
+
}
|
80
|
+
|
81
|
+
|
82
|
+
void
|
83
|
+
Init_bitap_fuzzy_search ()
|
84
|
+
{
|
85
|
+
/* Define Bitap fuzzy search class */
|
86
|
+
VALUE fuzzy = rb_define_class ("Fuzzy", rb_cObject);
|
87
|
+
rb_define_singleton_method (fuzzy, "search", bitap_fuzzy_search, 3);
|
88
|
+
}
|
data/ext/extconf.rb
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
ENV['RC_ARCHS'] = '' if RUBY_PLATFORM =~ /darwin/
|
2
|
+
|
3
|
+
require 'mkmf'
|
4
|
+
|
5
|
+
LIBDIR = Config::CONFIG['libdir']
|
6
|
+
INCLUDEDIR = Config::CONFIG['includedir']
|
7
|
+
|
8
|
+
HEADER_DIRS = [
|
9
|
+
# First search /opt/local for macports
|
10
|
+
'/opt/local/include',
|
11
|
+
# Then search /usr/local for people that installed from source
|
12
|
+
'/usr/local/include',
|
13
|
+
# Check the ruby install locations
|
14
|
+
INCLUDEDIR,
|
15
|
+
# Finally fall back to /usr
|
16
|
+
'/usr/include',
|
17
|
+
]
|
18
|
+
|
19
|
+
LIB_DIRS = [
|
20
|
+
# First search /opt/local for macports
|
21
|
+
'/opt/local/lib',
|
22
|
+
# Then search /usr/local for people that installed from source
|
23
|
+
'/usr/local/lib',
|
24
|
+
# Check the ruby install locations
|
25
|
+
LIBDIR,
|
26
|
+
# Finally fall back to /usr
|
27
|
+
'/usr/lib',
|
28
|
+
]
|
29
|
+
|
30
|
+
dir_config('fuzzy_search', HEADER_DIRS, LIB_DIRS)
|
31
|
+
|
32
|
+
create_makefile('fuzzy_search/fuzzy_search')
|
data/ext/fuzzy_search.c
ADDED
@@ -0,0 +1,96 @@
|
|
1
|
+
#include <stdlib.h>
|
2
|
+
#include <string.h>
|
3
|
+
#include <limits.h>
|
4
|
+
#include <ruby.h>
|
5
|
+
|
6
|
+
#include <stdio.h>
|
7
|
+
|
8
|
+
char *
|
9
|
+
downcase (const char *s)
|
10
|
+
{
|
11
|
+
int i, size = strlen (s);
|
12
|
+
if (size > 31) size = 31;
|
13
|
+
char *down_s = malloc (size + 1);
|
14
|
+
for (i = 0; i < size; i++)
|
15
|
+
down_s[i] = tolower (s[i]);
|
16
|
+
down_s[size] = '\0';
|
17
|
+
return down_s;
|
18
|
+
}
|
19
|
+
|
20
|
+
int
|
21
|
+
minimum (int x, int y, int z)
|
22
|
+
{
|
23
|
+
int min = x;
|
24
|
+
if (y < min) min = y;
|
25
|
+
if (z < min) min = z;
|
26
|
+
return min;
|
27
|
+
}
|
28
|
+
|
29
|
+
int
|
30
|
+
maximun (int x, int y)
|
31
|
+
{
|
32
|
+
return (x < y) ? y : x;
|
33
|
+
}
|
34
|
+
|
35
|
+
int
|
36
|
+
levenshtein_distance (const char *s, const char *t)
|
37
|
+
{
|
38
|
+
/* Declarations */
|
39
|
+
int n = strlen (s);
|
40
|
+
int m = strlen (t);
|
41
|
+
int i, j, k, distance;
|
42
|
+
|
43
|
+
/* Init matrix */
|
44
|
+
int *prev = malloc ((n + 1) * sizeof (int));
|
45
|
+
int *curr = malloc ((n + 1) * sizeof (int));
|
46
|
+
int *tmp = NULL;
|
47
|
+
for (i = 0; i <= n; ++i) prev[i] = i;
|
48
|
+
|
49
|
+
/* Start */
|
50
|
+
for (i = 1; i <= m; i++)
|
51
|
+
{
|
52
|
+
curr[0] = i;
|
53
|
+
for (j = 1; j <= n; j++)
|
54
|
+
{
|
55
|
+
if (s[i-1] != t[j-1])
|
56
|
+
{
|
57
|
+
k = minimum (curr[j-1], prev[j-1], prev[j]);
|
58
|
+
curr[j] = k + 1;
|
59
|
+
}
|
60
|
+
else
|
61
|
+
curr[j] = prev[j-1];
|
62
|
+
}
|
63
|
+
tmp = prev;
|
64
|
+
prev = curr;
|
65
|
+
curr = tmp;
|
66
|
+
memset ((void*)curr, 0, sizeof (int) * (n + 1));
|
67
|
+
}
|
68
|
+
distance = prev[n];
|
69
|
+
|
70
|
+
free (prev);
|
71
|
+
free (curr);
|
72
|
+
|
73
|
+
return distance;
|
74
|
+
}
|
75
|
+
|
76
|
+
static VALUE
|
77
|
+
fuzzy_equal (VALUE self, VALUE text, VALUE pattern, VALUE errors_percent)
|
78
|
+
{
|
79
|
+
const char *t = StringValuePtr (text);
|
80
|
+
const char *p = StringValuePtr (pattern);
|
81
|
+
int errors = (errors_percent * maximun (strlen (t), strlen (p))) / 100;
|
82
|
+
int distance = levenshtein_distance (t, p);
|
83
|
+
// printf ("Allowed errors: %d - Levenshtein's distance: %d\n", errors, distance);
|
84
|
+
if (distance <= errors)
|
85
|
+
return Qtrue;
|
86
|
+
return Qfalse;
|
87
|
+
}
|
88
|
+
|
89
|
+
|
90
|
+
void
|
91
|
+
Init_fuzzy_search ()
|
92
|
+
{
|
93
|
+
/* Define Bitap fuzzy search class */
|
94
|
+
VALUE fuzzy = rb_define_class ("Fuzzy", rb_cObject);
|
95
|
+
rb_define_singleton_method (fuzzy, "equal", fuzzy_equal, 3);
|
96
|
+
}
|
data/init.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require 'ximate'
|
data/lib/ximate.rb
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
module ActiveRecord
|
2
|
+
|
3
|
+
class Relation
|
4
|
+
attr_accessor :ranks
|
5
|
+
|
6
|
+
alias_method :orig_to_a, :to_a
|
7
|
+
alias_method :orig_initialize, :initialize
|
8
|
+
|
9
|
+
def initialize(klass, table)
|
10
|
+
@ranks = {}
|
11
|
+
orig_initialize(klass, table)
|
12
|
+
end
|
13
|
+
|
14
|
+
def to_a
|
15
|
+
return orig_to_a if @ranks.empty?
|
16
|
+
orig_to_a.sort do |x, y|
|
17
|
+
@ranks[y.id] <=> @ranks[x.id]
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
@@ -0,0 +1,79 @@
|
|
1
|
+
module Ximate
|
2
|
+
|
3
|
+
DATA = {}
|
4
|
+
OPTIONS = {:order_by_rank => true, :error_percent => 20}
|
5
|
+
|
6
|
+
def self.included(base)
|
7
|
+
base.extend(Search)
|
8
|
+
end
|
9
|
+
|
10
|
+
|
11
|
+
module Search
|
12
|
+
def define_index(locale = I18n.default_locale, &block)
|
13
|
+
table = self.to_s.underscore.pluralize.to_sym
|
14
|
+
DATA[locale.to_sym] ||= {}
|
15
|
+
DATA[locale.to_sym][table] ||= {}
|
16
|
+
|
17
|
+
extend ClassMethods
|
18
|
+
include InstanceMethods
|
19
|
+
|
20
|
+
after_save :update_index
|
21
|
+
|
22
|
+
self.to_s.classify.constantize.all.each do |p|
|
23
|
+
p.update_index(locale, &block)
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
|
30
|
+
module ClassMethods
|
31
|
+
|
32
|
+
def asearch(pattern)
|
33
|
+
table = self.to_s.underscore.pluralize.to_sym
|
34
|
+
matches = {}
|
35
|
+
DATA[I18n.locale] ||= {}
|
36
|
+
DATA[I18n.locale][table] ||= {}
|
37
|
+
DATA[I18n.locale][table].each do |word, ids|
|
38
|
+
if Fuzzy.equal(word, pattern.downcase, OPTIONS[:error_percent])
|
39
|
+
ids.each {|id, rank| matches[id] = matches[id].to_i + rank}
|
40
|
+
end
|
41
|
+
end
|
42
|
+
return where('1 = 0') if matches.empty?
|
43
|
+
rel = scoped
|
44
|
+
rel.ranks = matches if OPTIONS[:order_by_rank]
|
45
|
+
rel.where("#{table}.id IN (#{matches.keys.join(',')})")
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
|
50
|
+
module InstanceMethods
|
51
|
+
|
52
|
+
def add_text(text)
|
53
|
+
@words ||= []
|
54
|
+
@words += text.to_s.gsub(/<[^>]*>/i, ' ').gsub(/[\.,'":;!\?\(\)]/, ' ').split(' ').map{|word| word.downcase}
|
55
|
+
end
|
56
|
+
|
57
|
+
def update_index(locale = I18n.default_locale, &block)
|
58
|
+
table = self.class.to_s.underscore.pluralize.to_sym
|
59
|
+
remove_index(locale)
|
60
|
+
instance_eval(&block)
|
61
|
+
@words.each do |word|
|
62
|
+
ids = (DATA[locale.to_sym][table][word] ||= {})
|
63
|
+
ids[self.id] ||= 0
|
64
|
+
ids[self.id] += 1
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def remove_index(locale)
|
69
|
+
table = self.class.to_s.underscore.pluralize.to_sym
|
70
|
+
@words = []
|
71
|
+
DATA[locale.to_sym][table].each do |word, ids|
|
72
|
+
ids.delete(self.id)
|
73
|
+
DATA[locale.to_sym][table].delete(word) if ids.empty?
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
end
|
78
|
+
|
79
|
+
end
|
data/ximate.gemspec
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path('../lib', __FILE__)
|
3
|
+
require 'ximate/version'
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = 'ximate'
|
7
|
+
s.version = Ximate::VERSION
|
8
|
+
s.platform = Gem::Platform::RUBY
|
9
|
+
s.authors = ['Enrico Pilotto']
|
10
|
+
s.email = ['enrico@megiston.it']
|
11
|
+
s.homepage = 'https://github.com/pioz/ximate'
|
12
|
+
s.summary = %q{Approximate fuzzy search for Ruby on Rails}
|
13
|
+
s.description = %q{Approximate fuzzy search for Ruby on Rails activerecord models.}
|
14
|
+
s.license = 'MIT'
|
15
|
+
|
16
|
+
s.rubyforge_project = 'ximate'
|
17
|
+
|
18
|
+
s.files = `git ls-files`.split("\n")
|
19
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
20
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
21
|
+
s.extensions = ['ext/extconf.rb']
|
22
|
+
s.require_paths = ['lib', 'ext']
|
23
|
+
end
|
metadata
ADDED
@@ -0,0 +1,81 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: ximate
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 29
|
5
|
+
prerelease:
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 0
|
9
|
+
- 1
|
10
|
+
version: 0.0.1
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Enrico Pilotto
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2011-05-05 00:00:00 +02:00
|
19
|
+
default_executable:
|
20
|
+
dependencies: []
|
21
|
+
|
22
|
+
description: Approximate fuzzy search for Ruby on Rails activerecord models.
|
23
|
+
email:
|
24
|
+
- enrico@megiston.it
|
25
|
+
executables: []
|
26
|
+
|
27
|
+
extensions:
|
28
|
+
- ext/extconf.rb
|
29
|
+
extra_rdoc_files: []
|
30
|
+
|
31
|
+
files:
|
32
|
+
- .gitignore
|
33
|
+
- Gemfile
|
34
|
+
- README.rdoc
|
35
|
+
- Rakefile
|
36
|
+
- ext/bitap_fuzzy_search.c
|
37
|
+
- ext/extconf.rb
|
38
|
+
- ext/fuzzy_search.c
|
39
|
+
- init.rb
|
40
|
+
- lib/ximate.rb
|
41
|
+
- lib/ximate/activerecord/relation.rb
|
42
|
+
- lib/ximate/search.rb
|
43
|
+
- lib/ximate/version.rb
|
44
|
+
- ximate.gemspec
|
45
|
+
has_rdoc: true
|
46
|
+
homepage: https://github.com/pioz/ximate
|
47
|
+
licenses:
|
48
|
+
- MIT
|
49
|
+
post_install_message:
|
50
|
+
rdoc_options: []
|
51
|
+
|
52
|
+
require_paths:
|
53
|
+
- lib
|
54
|
+
- ext
|
55
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
56
|
+
none: false
|
57
|
+
requirements:
|
58
|
+
- - ">="
|
59
|
+
- !ruby/object:Gem::Version
|
60
|
+
hash: 3
|
61
|
+
segments:
|
62
|
+
- 0
|
63
|
+
version: "0"
|
64
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
65
|
+
none: false
|
66
|
+
requirements:
|
67
|
+
- - ">="
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
hash: 3
|
70
|
+
segments:
|
71
|
+
- 0
|
72
|
+
version: "0"
|
73
|
+
requirements: []
|
74
|
+
|
75
|
+
rubyforge_project: ximate
|
76
|
+
rubygems_version: 1.4.2
|
77
|
+
signing_key:
|
78
|
+
specification_version: 3
|
79
|
+
summary: Approximate fuzzy search for Ruby on Rails
|
80
|
+
test_files: []
|
81
|
+
|