ximate 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile.lock
4
+ ext/*.bundle
5
+ ext/*.o
6
+ ext/Makefile
7
+ pkg/*
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in ximate.gemspec
4
+ gemspec
@@ -0,0 +1,38 @@
1
+ = Ximate
2
+
3
+ ApproXIMATE fuzzy search for Ruby on Rails activerecord models.
4
+
5
+ == Requirements
6
+
7
+ * Rails >= 3.0.0
8
+
9
+
10
+ == Installation
11
+
12
+ gem install ximate
13
+
14
+ == Usage
15
+
16
+ In your model puts some like this:
17
+
18
+ class Post < ActiveRecord::Base
19
+ define_index(:en) do
20
+ add_text title
21
+ add_text keywords.join(' ')
22
+ add_text body(:en)
23
+ end
24
+ end
25
+
26
+ Then you can perform a search
27
+
28
+ Post.asearch('Economy').where(:public => true).limit(5)
29
+
30
+ == Questions or problems?
31
+
32
+ If you have any issues with rplot please add an {issue on
33
+ GitHub}[https://github.com/pioz/ximate/issues] or fork the project and
34
+ send a pull request.
35
+
36
+ == Copyright
37
+
38
+ Copyright (c) 2010 Enrico Pilotto. MIT license.
@@ -0,0 +1,2 @@
1
+ require 'bundler'
2
+ Bundler::GemHelper.install_tasks
@@ -0,0 +1,88 @@
1
+ #include <stdlib.h>
2
+ #include <string.h>
3
+ #include <limits.h>
4
+ #include <ruby.h>
5
+
6
+ #include <stdio.h>
7
+
8
+ char *
9
+ downcase2 (const char *s)
10
+ {
11
+ int i, size = strlen (s);
12
+ if (size > 31) size = 31;
13
+ char *down_s = malloc (size + 1);
14
+ for (i = 0; i < size; i++)
15
+ down_s[i] = tolower (s[i]);
16
+ down_s[size] = '\0';
17
+ return down_s;
18
+ }
19
+
20
+
21
+ static VALUE
22
+ bitap_fuzzy_search (VALUE self, VALUE text, VALUE pattern, VALUE errors_percent)
23
+ {
24
+ char *p = downcase2 (StringValuePtr (pattern));
25
+ if (p[0] == '\0') return Qnil;
26
+ char *t = downcase2 (StringValuePtr (text));
27
+ int n = strlen (t);
28
+ int m = strlen (p);
29
+ if (abs (n - m) > 2) return Qnil;
30
+ const char *result = NULL;
31
+ unsigned long *R;
32
+ unsigned long bitmasks[CHAR_MAX + 1];
33
+ int i, d;
34
+
35
+ int errors = (FIX2INT (errors_percent) * m) / 100;
36
+ if (errors == 0) errors = 1;
37
+
38
+ /* Initialize the bit array R */
39
+ R = malloc ((errors + 1) * sizeof (*R));
40
+ for (i = 0; i <= errors; ++i)
41
+ R[i] = ~1;
42
+
43
+ /* Initialize the pattern bitmasks */
44
+ for (i = 0; i <= CHAR_MAX; ++i)
45
+ bitmasks[i] = ~0;
46
+ for (i = 0; i < m; ++i)
47
+ bitmasks[p[i]] &= ~(1UL << i);
48
+
49
+ for (i = 0; t[i] != '\0'; ++i)
50
+ {
51
+ /* Update the bit arrays */
52
+ unsigned long old_Rd1 = R[0];
53
+
54
+ R[0] |= bitmasks[t[i]];
55
+ R[0] <<= 1;
56
+
57
+ for (d = 1; d <= errors; ++d)
58
+ {
59
+ unsigned long tmp = R[d];
60
+ /* Substitution is all we care about */
61
+ R[d] = (old_Rd1 & (R[d] | bitmasks[t[i]])) << 1;
62
+ old_Rd1 = tmp;
63
+ }
64
+
65
+ if (0 == (R[errors] & (1UL << m)) && (i - m + 1) == 0)
66
+ {
67
+ result = (t + i - m) + 1;
68
+ break;
69
+ }
70
+ }
71
+
72
+ free (R);
73
+ free (p);
74
+ free (t);
75
+
76
+ if (result)
77
+ return rb_str_new2 (result);
78
+ return Qnil;
79
+ }
80
+
81
+
82
+ void
83
+ Init_bitap_fuzzy_search ()
84
+ {
85
+ /* Define Bitap fuzzy search class */
86
+ VALUE fuzzy = rb_define_class ("Fuzzy", rb_cObject);
87
+ rb_define_singleton_method (fuzzy, "search", bitap_fuzzy_search, 3);
88
+ }
@@ -0,0 +1,32 @@
1
+ ENV['RC_ARCHS'] = '' if RUBY_PLATFORM =~ /darwin/
2
+
3
+ require 'mkmf'
4
+
5
+ LIBDIR = Config::CONFIG['libdir']
6
+ INCLUDEDIR = Config::CONFIG['includedir']
7
+
8
+ HEADER_DIRS = [
9
+ # First search /opt/local for macports
10
+ '/opt/local/include',
11
+ # Then search /usr/local for people that installed from source
12
+ '/usr/local/include',
13
+ # Check the ruby install locations
14
+ INCLUDEDIR,
15
+ # Finally fall back to /usr
16
+ '/usr/include',
17
+ ]
18
+
19
+ LIB_DIRS = [
20
+ # First search /opt/local for macports
21
+ '/opt/local/lib',
22
+ # Then search /usr/local for people that installed from source
23
+ '/usr/local/lib',
24
+ # Check the ruby install locations
25
+ LIBDIR,
26
+ # Finally fall back to /usr
27
+ '/usr/lib',
28
+ ]
29
+
30
+ dir_config('fuzzy_search', HEADER_DIRS, LIB_DIRS)
31
+
32
+ create_makefile('fuzzy_search/fuzzy_search')
@@ -0,0 +1,96 @@
1
+ #include <stdlib.h>
2
+ #include <string.h>
3
+ #include <limits.h>
4
+ #include <ruby.h>
5
+
6
+ #include <stdio.h>
7
+
8
+ char *
9
+ downcase (const char *s)
10
+ {
11
+ int i, size = strlen (s);
12
+ if (size > 31) size = 31;
13
+ char *down_s = malloc (size + 1);
14
+ for (i = 0; i < size; i++)
15
+ down_s[i] = tolower (s[i]);
16
+ down_s[size] = '\0';
17
+ return down_s;
18
+ }
19
+
20
+ int
21
+ minimum (int x, int y, int z)
22
+ {
23
+ int min = x;
24
+ if (y < min) min = y;
25
+ if (z < min) min = z;
26
+ return min;
27
+ }
28
+
29
+ int
30
+ maximun (int x, int y)
31
+ {
32
+ return (x < y) ? y : x;
33
+ }
34
+
35
+ int
36
+ levenshtein_distance (const char *s, const char *t)
37
+ {
38
+ /* Declarations */
39
+ int n = strlen (s);
40
+ int m = strlen (t);
41
+ int i, j, k, distance;
42
+
43
+ /* Init matrix */
44
+ int *prev = malloc ((n + 1) * sizeof (int));
45
+ int *curr = malloc ((n + 1) * sizeof (int));
46
+ int *tmp = NULL;
47
+ for (i = 0; i <= n; ++i) prev[i] = i;
48
+
49
+ /* Start */
50
+ for (i = 1; i <= m; i++)
51
+ {
52
+ curr[0] = i;
53
+ for (j = 1; j <= n; j++)
54
+ {
55
+ if (s[i-1] != t[j-1])
56
+ {
57
+ k = minimum (curr[j-1], prev[j-1], prev[j]);
58
+ curr[j] = k + 1;
59
+ }
60
+ else
61
+ curr[j] = prev[j-1];
62
+ }
63
+ tmp = prev;
64
+ prev = curr;
65
+ curr = tmp;
66
+ memset ((void*)curr, 0, sizeof (int) * (n + 1));
67
+ }
68
+ distance = prev[n];
69
+
70
+ free (prev);
71
+ free (curr);
72
+
73
+ return distance;
74
+ }
75
+
76
+ static VALUE
77
+ fuzzy_equal (VALUE self, VALUE text, VALUE pattern, VALUE errors_percent)
78
+ {
79
+ const char *t = StringValuePtr (text);
80
+ const char *p = StringValuePtr (pattern);
81
+ int errors = (errors_percent * maximun (strlen (t), strlen (p))) / 100;
82
+ int distance = levenshtein_distance (t, p);
83
+ // printf ("Allowed errors: %d - Levenshtein's distance: %d\n", errors, distance);
84
+ if (distance <= errors)
85
+ return Qtrue;
86
+ return Qfalse;
87
+ }
88
+
89
+
90
+ void
91
+ Init_fuzzy_search ()
92
+ {
93
+ /* Define Bitap fuzzy search class */
94
+ VALUE fuzzy = rb_define_class ("Fuzzy", rb_cObject);
95
+ rb_define_singleton_method (fuzzy, "equal", fuzzy_equal, 3);
96
+ }
data/init.rb ADDED
@@ -0,0 +1 @@
1
+ require 'ximate'
@@ -0,0 +1,5 @@
1
+ require File.join(File.dirname(__FILE__), 'ximate/search')
2
+ require File.join(File.dirname(__FILE__), 'ximate/activerecord/relation')
3
+ require File.join(File.dirname(__FILE__), '../ext/fuzzy_search')
4
+
5
+ ActiveRecord::Base.send(:include, Ximate)
@@ -0,0 +1,22 @@
1
+ module ActiveRecord
2
+
3
+ class Relation
4
+ attr_accessor :ranks
5
+
6
+ alias_method :orig_to_a, :to_a
7
+ alias_method :orig_initialize, :initialize
8
+
9
+ def initialize(klass, table)
10
+ @ranks = {}
11
+ orig_initialize(klass, table)
12
+ end
13
+
14
+ def to_a
15
+ return orig_to_a if @ranks.empty?
16
+ orig_to_a.sort do |x, y|
17
+ @ranks[y.id] <=> @ranks[x.id]
18
+ end
19
+ end
20
+ end
21
+
22
+ end
@@ -0,0 +1,79 @@
1
+ module Ximate
2
+
3
+ DATA = {}
4
+ OPTIONS = {:order_by_rank => true, :error_percent => 20}
5
+
6
+ def self.included(base)
7
+ base.extend(Search)
8
+ end
9
+
10
+
11
+ module Search
12
+ def define_index(locale = I18n.default_locale, &block)
13
+ table = self.to_s.underscore.pluralize.to_sym
14
+ DATA[locale.to_sym] ||= {}
15
+ DATA[locale.to_sym][table] ||= {}
16
+
17
+ extend ClassMethods
18
+ include InstanceMethods
19
+
20
+ after_save :update_index
21
+
22
+ self.to_s.classify.constantize.all.each do |p|
23
+ p.update_index(locale, &block)
24
+ end
25
+
26
+ end
27
+ end
28
+
29
+
30
+ module ClassMethods
31
+
32
+ def asearch(pattern)
33
+ table = self.to_s.underscore.pluralize.to_sym
34
+ matches = {}
35
+ DATA[I18n.locale] ||= {}
36
+ DATA[I18n.locale][table] ||= {}
37
+ DATA[I18n.locale][table].each do |word, ids|
38
+ if Fuzzy.equal(word, pattern.downcase, OPTIONS[:error_percent])
39
+ ids.each {|id, rank| matches[id] = matches[id].to_i + rank}
40
+ end
41
+ end
42
+ return where('1 = 0') if matches.empty?
43
+ rel = scoped
44
+ rel.ranks = matches if OPTIONS[:order_by_rank]
45
+ rel.where("#{table}.id IN (#{matches.keys.join(',')})")
46
+ end
47
+ end
48
+
49
+
50
+ module InstanceMethods
51
+
52
+ def add_text(text)
53
+ @words ||= []
54
+ @words += text.to_s.gsub(/<[^>]*>/i, ' ').gsub(/[\.,'":;!\?\(\)]/, ' ').split(' ').map{|word| word.downcase}
55
+ end
56
+
57
+ def update_index(locale = I18n.default_locale, &block)
58
+ table = self.class.to_s.underscore.pluralize.to_sym
59
+ remove_index(locale)
60
+ instance_eval(&block)
61
+ @words.each do |word|
62
+ ids = (DATA[locale.to_sym][table][word] ||= {})
63
+ ids[self.id] ||= 0
64
+ ids[self.id] += 1
65
+ end
66
+ end
67
+
68
+ def remove_index(locale)
69
+ table = self.class.to_s.underscore.pluralize.to_sym
70
+ @words = []
71
+ DATA[locale.to_sym][table].each do |word, ids|
72
+ ids.delete(self.id)
73
+ DATA[locale.to_sym][table].delete(word) if ids.empty?
74
+ end
75
+ end
76
+
77
+ end
78
+
79
+ end
@@ -0,0 +1,3 @@
1
+ module Ximate
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,23 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path('../lib', __FILE__)
3
+ require 'ximate/version'
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = 'ximate'
7
+ s.version = Ximate::VERSION
8
+ s.platform = Gem::Platform::RUBY
9
+ s.authors = ['Enrico Pilotto']
10
+ s.email = ['enrico@megiston.it']
11
+ s.homepage = 'https://github.com/pioz/ximate'
12
+ s.summary = %q{Approximate fuzzy search for Ruby on Rails}
13
+ s.description = %q{Approximate fuzzy search for Ruby on Rails activerecord models.}
14
+ s.license = 'MIT'
15
+
16
+ s.rubyforge_project = 'ximate'
17
+
18
+ s.files = `git ls-files`.split("\n")
19
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
20
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
21
+ s.extensions = ['ext/extconf.rb']
22
+ s.require_paths = ['lib', 'ext']
23
+ end
metadata ADDED
@@ -0,0 +1,81 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ximate
3
+ version: !ruby/object:Gem::Version
4
+ hash: 29
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 0
9
+ - 1
10
+ version: 0.0.1
11
+ platform: ruby
12
+ authors:
13
+ - Enrico Pilotto
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2011-05-05 00:00:00 +02:00
19
+ default_executable:
20
+ dependencies: []
21
+
22
+ description: Approximate fuzzy search for Ruby on Rails activerecord models.
23
+ email:
24
+ - enrico@megiston.it
25
+ executables: []
26
+
27
+ extensions:
28
+ - ext/extconf.rb
29
+ extra_rdoc_files: []
30
+
31
+ files:
32
+ - .gitignore
33
+ - Gemfile
34
+ - README.rdoc
35
+ - Rakefile
36
+ - ext/bitap_fuzzy_search.c
37
+ - ext/extconf.rb
38
+ - ext/fuzzy_search.c
39
+ - init.rb
40
+ - lib/ximate.rb
41
+ - lib/ximate/activerecord/relation.rb
42
+ - lib/ximate/search.rb
43
+ - lib/ximate/version.rb
44
+ - ximate.gemspec
45
+ has_rdoc: true
46
+ homepage: https://github.com/pioz/ximate
47
+ licenses:
48
+ - MIT
49
+ post_install_message:
50
+ rdoc_options: []
51
+
52
+ require_paths:
53
+ - lib
54
+ - ext
55
+ required_ruby_version: !ruby/object:Gem::Requirement
56
+ none: false
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ hash: 3
61
+ segments:
62
+ - 0
63
+ version: "0"
64
+ required_rubygems_version: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ">="
68
+ - !ruby/object:Gem::Version
69
+ hash: 3
70
+ segments:
71
+ - 0
72
+ version: "0"
73
+ requirements: []
74
+
75
+ rubyforge_project: ximate
76
+ rubygems_version: 1.4.2
77
+ signing_key:
78
+ specification_version: 3
79
+ summary: Approximate fuzzy search for Ruby on Rails
80
+ test_files: []
81
+