ximate 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile.lock
4
+ ext/*.bundle
5
+ ext/*.o
6
+ ext/Makefile
7
+ pkg/*
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in ximate.gemspec
4
+ gemspec
@@ -0,0 +1,38 @@
1
+ = Ximate
2
+
3
+ ApproXIMATE fuzzy search for Ruby on Rails activerecord models.
4
+
5
+ == Requirements
6
+
7
+ * Rails >= 3.0.0
8
+
9
+
10
+ == Installation
11
+
12
+ gem install ximate
13
+
14
+ == Usage
15
+
16
+ In your model puts some like this:
17
+
18
+ class Post < ActiveRecord::Base
19
+ define_index(:en) do
20
+ add_text title
21
+ add_text keywords.join(' ')
22
+ add_text body(:en)
23
+ end
24
+ end
25
+
26
+ Then you can perform a search
27
+
28
+ Post.asearch('Economy').where(:public => true).limit(5)
29
+
30
+ == Questions or problems?
31
+
32
+ If you have any issues with rplot please add an {issue on
33
+ GitHub}[https://github.com/pioz/ximate/issues] or fork the project and
34
+ send a pull request.
35
+
36
+ == Copyright
37
+
38
+ Copyright (c) 2010 Enrico Pilotto. MIT license.
@@ -0,0 +1,2 @@
1
+ require 'bundler'
2
+ Bundler::GemHelper.install_tasks
@@ -0,0 +1,88 @@
1
+ #include <stdlib.h>
2
+ #include <string.h>
3
+ #include <limits.h>
4
+ #include <ruby.h>
5
+
6
+ #include <stdio.h>
7
+
8
+ char *
9
+ downcase2 (const char *s)
10
+ {
11
+ int i, size = strlen (s);
12
+ if (size > 31) size = 31;
13
+ char *down_s = malloc (size + 1);
14
+ for (i = 0; i < size; i++)
15
+ down_s[i] = tolower (s[i]);
16
+ down_s[size] = '\0';
17
+ return down_s;
18
+ }
19
+
20
+
21
+ static VALUE
22
+ bitap_fuzzy_search (VALUE self, VALUE text, VALUE pattern, VALUE errors_percent)
23
+ {
24
+ char *p = downcase2 (StringValuePtr (pattern));
25
+ if (p[0] == '\0') return Qnil;
26
+ char *t = downcase2 (StringValuePtr (text));
27
+ int n = strlen (t);
28
+ int m = strlen (p);
29
+ if (abs (n - m) > 2) return Qnil;
30
+ const char *result = NULL;
31
+ unsigned long *R;
32
+ unsigned long bitmasks[CHAR_MAX + 1];
33
+ int i, d;
34
+
35
+ int errors = (FIX2INT (errors_percent) * m) / 100;
36
+ if (errors == 0) errors = 1;
37
+
38
+ /* Initialize the bit array R */
39
+ R = malloc ((errors + 1) * sizeof (*R));
40
+ for (i = 0; i <= errors; ++i)
41
+ R[i] = ~1;
42
+
43
+ /* Initialize the pattern bitmasks */
44
+ for (i = 0; i <= CHAR_MAX; ++i)
45
+ bitmasks[i] = ~0;
46
+ for (i = 0; i < m; ++i)
47
+ bitmasks[p[i]] &= ~(1UL << i);
48
+
49
+ for (i = 0; t[i] != '\0'; ++i)
50
+ {
51
+ /* Update the bit arrays */
52
+ unsigned long old_Rd1 = R[0];
53
+
54
+ R[0] |= bitmasks[t[i]];
55
+ R[0] <<= 1;
56
+
57
+ for (d = 1; d <= errors; ++d)
58
+ {
59
+ unsigned long tmp = R[d];
60
+ /* Substitution is all we care about */
61
+ R[d] = (old_Rd1 & (R[d] | bitmasks[t[i]])) << 1;
62
+ old_Rd1 = tmp;
63
+ }
64
+
65
+ if (0 == (R[errors] & (1UL << m)) && (i - m + 1) == 0)
66
+ {
67
+ result = (t + i - m) + 1;
68
+ break;
69
+ }
70
+ }
71
+
72
+ free (R);
73
+ free (p);
74
+ free (t);
75
+
76
+ if (result)
77
+ return rb_str_new2 (result);
78
+ return Qnil;
79
+ }
80
+
81
+
82
+ void
83
+ Init_bitap_fuzzy_search ()
84
+ {
85
+ /* Define Bitap fuzzy search class */
86
+ VALUE fuzzy = rb_define_class ("Fuzzy", rb_cObject);
87
+ rb_define_singleton_method (fuzzy, "search", bitap_fuzzy_search, 3);
88
+ }
@@ -0,0 +1,32 @@
1
+ ENV['RC_ARCHS'] = '' if RUBY_PLATFORM =~ /darwin/
2
+
3
+ require 'mkmf'
4
+
5
+ LIBDIR = Config::CONFIG['libdir']
6
+ INCLUDEDIR = Config::CONFIG['includedir']
7
+
8
+ HEADER_DIRS = [
9
+ # First search /opt/local for macports
10
+ '/opt/local/include',
11
+ # Then search /usr/local for people that installed from source
12
+ '/usr/local/include',
13
+ # Check the ruby install locations
14
+ INCLUDEDIR,
15
+ # Finally fall back to /usr
16
+ '/usr/include',
17
+ ]
18
+
19
+ LIB_DIRS = [
20
+ # First search /opt/local for macports
21
+ '/opt/local/lib',
22
+ # Then search /usr/local for people that installed from source
23
+ '/usr/local/lib',
24
+ # Check the ruby install locations
25
+ LIBDIR,
26
+ # Finally fall back to /usr
27
+ '/usr/lib',
28
+ ]
29
+
30
+ dir_config('fuzzy_search', HEADER_DIRS, LIB_DIRS)
31
+
32
+ create_makefile('fuzzy_search/fuzzy_search')
@@ -0,0 +1,96 @@
1
+ #include <stdlib.h>
2
+ #include <string.h>
3
+ #include <limits.h>
4
+ #include <ruby.h>
5
+
6
+ #include <stdio.h>
7
+
8
+ char *
9
+ downcase (const char *s)
10
+ {
11
+ int i, size = strlen (s);
12
+ if (size > 31) size = 31;
13
+ char *down_s = malloc (size + 1);
14
+ for (i = 0; i < size; i++)
15
+ down_s[i] = tolower (s[i]);
16
+ down_s[size] = '\0';
17
+ return down_s;
18
+ }
19
+
20
+ int
21
+ minimum (int x, int y, int z)
22
+ {
23
+ int min = x;
24
+ if (y < min) min = y;
25
+ if (z < min) min = z;
26
+ return min;
27
+ }
28
+
29
+ int
30
+ maximun (int x, int y)
31
+ {
32
+ return (x < y) ? y : x;
33
+ }
34
+
35
+ int
36
+ levenshtein_distance (const char *s, const char *t)
37
+ {
38
+ /* Declarations */
39
+ int n = strlen (s);
40
+ int m = strlen (t);
41
+ int i, j, k, distance;
42
+
43
+ /* Init matrix */
44
+ int *prev = malloc ((n + 1) * sizeof (int));
45
+ int *curr = malloc ((n + 1) * sizeof (int));
46
+ int *tmp = NULL;
47
+ for (i = 0; i <= n; ++i) prev[i] = i;
48
+
49
+ /* Start */
50
+ for (i = 1; i <= m; i++)
51
+ {
52
+ curr[0] = i;
53
+ for (j = 1; j <= n; j++)
54
+ {
55
+ if (s[i-1] != t[j-1])
56
+ {
57
+ k = minimum (curr[j-1], prev[j-1], prev[j]);
58
+ curr[j] = k + 1;
59
+ }
60
+ else
61
+ curr[j] = prev[j-1];
62
+ }
63
+ tmp = prev;
64
+ prev = curr;
65
+ curr = tmp;
66
+ memset ((void*)curr, 0, sizeof (int) * (n + 1));
67
+ }
68
+ distance = prev[n];
69
+
70
+ free (prev);
71
+ free (curr);
72
+
73
+ return distance;
74
+ }
75
+
76
+ static VALUE
77
+ fuzzy_equal (VALUE self, VALUE text, VALUE pattern, VALUE errors_percent)
78
+ {
79
+ const char *t = StringValuePtr (text);
80
+ const char *p = StringValuePtr (pattern);
81
+ int errors = (errors_percent * maximun (strlen (t), strlen (p))) / 100;
82
+ int distance = levenshtein_distance (t, p);
83
+ // printf ("Allowed errors: %d - Levenshtein's distance: %d\n", errors, distance);
84
+ if (distance <= errors)
85
+ return Qtrue;
86
+ return Qfalse;
87
+ }
88
+
89
+
90
+ void
91
+ Init_fuzzy_search ()
92
+ {
93
+ /* Define Bitap fuzzy search class */
94
+ VALUE fuzzy = rb_define_class ("Fuzzy", rb_cObject);
95
+ rb_define_singleton_method (fuzzy, "equal", fuzzy_equal, 3);
96
+ }
data/init.rb ADDED
@@ -0,0 +1 @@
1
+ require 'ximate'
@@ -0,0 +1,5 @@
1
+ require File.join(File.dirname(__FILE__), 'ximate/search')
2
+ require File.join(File.dirname(__FILE__), 'ximate/activerecord/relation')
3
+ require File.join(File.dirname(__FILE__), '../ext/fuzzy_search')
4
+
5
+ ActiveRecord::Base.send(:include, Ximate)
@@ -0,0 +1,22 @@
1
+ module ActiveRecord
2
+
3
+ class Relation
4
+ attr_accessor :ranks
5
+
6
+ alias_method :orig_to_a, :to_a
7
+ alias_method :orig_initialize, :initialize
8
+
9
+ def initialize(klass, table)
10
+ @ranks = {}
11
+ orig_initialize(klass, table)
12
+ end
13
+
14
+ def to_a
15
+ return orig_to_a if @ranks.empty?
16
+ orig_to_a.sort do |x, y|
17
+ @ranks[y.id] <=> @ranks[x.id]
18
+ end
19
+ end
20
+ end
21
+
22
+ end
@@ -0,0 +1,79 @@
1
+ module Ximate
2
+
3
+ DATA = {}
4
+ OPTIONS = {:order_by_rank => true, :error_percent => 20}
5
+
6
+ def self.included(base)
7
+ base.extend(Search)
8
+ end
9
+
10
+
11
+ module Search
12
+ def define_index(locale = I18n.default_locale, &block)
13
+ table = self.to_s.underscore.pluralize.to_sym
14
+ DATA[locale.to_sym] ||= {}
15
+ DATA[locale.to_sym][table] ||= {}
16
+
17
+ extend ClassMethods
18
+ include InstanceMethods
19
+
20
+ after_save :update_index
21
+
22
+ self.to_s.classify.constantize.all.each do |p|
23
+ p.update_index(locale, &block)
24
+ end
25
+
26
+ end
27
+ end
28
+
29
+
30
+ module ClassMethods
31
+
32
+ def asearch(pattern)
33
+ table = self.to_s.underscore.pluralize.to_sym
34
+ matches = {}
35
+ DATA[I18n.locale] ||= {}
36
+ DATA[I18n.locale][table] ||= {}
37
+ DATA[I18n.locale][table].each do |word, ids|
38
+ if Fuzzy.equal(word, pattern.downcase, OPTIONS[:error_percent])
39
+ ids.each {|id, rank| matches[id] = matches[id].to_i + rank}
40
+ end
41
+ end
42
+ return where('1 = 0') if matches.empty?
43
+ rel = scoped
44
+ rel.ranks = matches if OPTIONS[:order_by_rank]
45
+ rel.where("#{table}.id IN (#{matches.keys.join(',')})")
46
+ end
47
+ end
48
+
49
+
50
+ module InstanceMethods
51
+
52
+ def add_text(text)
53
+ @words ||= []
54
+ @words += text.to_s.gsub(/<[^>]*>/i, ' ').gsub(/[\.,'":;!\?\(\)]/, ' ').split(' ').map{|word| word.downcase}
55
+ end
56
+
57
+ def update_index(locale = I18n.default_locale, &block)
58
+ table = self.class.to_s.underscore.pluralize.to_sym
59
+ remove_index(locale)
60
+ instance_eval(&block)
61
+ @words.each do |word|
62
+ ids = (DATA[locale.to_sym][table][word] ||= {})
63
+ ids[self.id] ||= 0
64
+ ids[self.id] += 1
65
+ end
66
+ end
67
+
68
+ def remove_index(locale)
69
+ table = self.class.to_s.underscore.pluralize.to_sym
70
+ @words = []
71
+ DATA[locale.to_sym][table].each do |word, ids|
72
+ ids.delete(self.id)
73
+ DATA[locale.to_sym][table].delete(word) if ids.empty?
74
+ end
75
+ end
76
+
77
+ end
78
+
79
+ end
@@ -0,0 +1,3 @@
1
+ module Ximate
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,23 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path('../lib', __FILE__)
3
+ require 'ximate/version'
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = 'ximate'
7
+ s.version = Ximate::VERSION
8
+ s.platform = Gem::Platform::RUBY
9
+ s.authors = ['Enrico Pilotto']
10
+ s.email = ['enrico@megiston.it']
11
+ s.homepage = 'https://github.com/pioz/ximate'
12
+ s.summary = %q{Approximate fuzzy search for Ruby on Rails}
13
+ s.description = %q{Approximate fuzzy search for Ruby on Rails activerecord models.}
14
+ s.license = 'MIT'
15
+
16
+ s.rubyforge_project = 'ximate'
17
+
18
+ s.files = `git ls-files`.split("\n")
19
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
20
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
21
+ s.extensions = ['ext/extconf.rb']
22
+ s.require_paths = ['lib', 'ext']
23
+ end
metadata ADDED
@@ -0,0 +1,81 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ximate
3
+ version: !ruby/object:Gem::Version
4
+ hash: 29
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 0
9
+ - 1
10
+ version: 0.0.1
11
+ platform: ruby
12
+ authors:
13
+ - Enrico Pilotto
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2011-05-05 00:00:00 +02:00
19
+ default_executable:
20
+ dependencies: []
21
+
22
+ description: Approximate fuzzy search for Ruby on Rails activerecord models.
23
+ email:
24
+ - enrico@megiston.it
25
+ executables: []
26
+
27
+ extensions:
28
+ - ext/extconf.rb
29
+ extra_rdoc_files: []
30
+
31
+ files:
32
+ - .gitignore
33
+ - Gemfile
34
+ - README.rdoc
35
+ - Rakefile
36
+ - ext/bitap_fuzzy_search.c
37
+ - ext/extconf.rb
38
+ - ext/fuzzy_search.c
39
+ - init.rb
40
+ - lib/ximate.rb
41
+ - lib/ximate/activerecord/relation.rb
42
+ - lib/ximate/search.rb
43
+ - lib/ximate/version.rb
44
+ - ximate.gemspec
45
+ has_rdoc: true
46
+ homepage: https://github.com/pioz/ximate
47
+ licenses:
48
+ - MIT
49
+ post_install_message:
50
+ rdoc_options: []
51
+
52
+ require_paths:
53
+ - lib
54
+ - ext
55
+ required_ruby_version: !ruby/object:Gem::Requirement
56
+ none: false
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ hash: 3
61
+ segments:
62
+ - 0
63
+ version: "0"
64
+ required_rubygems_version: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ">="
68
+ - !ruby/object:Gem::Version
69
+ hash: 3
70
+ segments:
71
+ - 0
72
+ version: "0"
73
+ requirements: []
74
+
75
+ rubyforge_project: ximate
76
+ rubygems_version: 1.4.2
77
+ signing_key:
78
+ specification_version: 3
79
+ summary: Approximate fuzzy search for Ruby on Rails
80
+ test_files: []
81
+