ngramdistance-ffi 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,4 @@
1
+ README.markdown.html
2
+ *.rbc
3
+ pkg
4
+ .*.sw?
@@ -0,0 +1,3 @@
1
+ 1.0.0
2
+ -----
3
+ * started
data/Gemfile ADDED
@@ -0,0 +1,8 @@
1
+ source :rubygems
2
+
3
+ gem 'ffi'
4
+
5
+ group :test do
6
+ gem 'rspec', '1.3.1'
7
+ gem 'jeweler'
8
+ end
data/README.markdown ADDED
@@ -0,0 +1,21 @@
1
+ ngramdistance-ffi
2
+ ===============
3
+
4
+ Converted to FFI by Bali for Ruby portability.
5
+
6
+ This gem originally based on ngram distance.
7
+
8
+ Tested on:
9
+
10
+ * MRI 1.9.2
11
+
12
+ Known Issues
13
+ ============
14
+ * The C extension uses `char*` strings, and so Unicode strings will give incorrect distances.
15
+
16
+ Including in Gemfile
17
+ ====================
18
+
19
+ gem ' ngramdistance-ffi', :require => ' ngramdistance'
20
+
21
+ - end -
data/Rakefile ADDED
@@ -0,0 +1,19 @@
1
+ begin
2
+ require 'jeweler'
3
+ Jeweler::Tasks.new do |gemspec|
4
+ gemspec.name = " ngramdistance-ffi"
5
+ gemspec.summary = "An FFI version of the ngramdistance gem."
6
+ gemspec.description = "Provides a fast, cross-Ruby implementation of the ngramdistance distance algorithm."
7
+ gemspec.email = "dbalatero@gmail.com"
8
+ gemspec.homepage = "http://github.com/dbalatero/ ngramdistance-ffi"
9
+ gemspec.authors = ["Bali"]
10
+ gemspec.add_dependency "ffi"
11
+ gemspec.add_development_dependency "rspec"
12
+ gemspec.add_development_dependency "jeweler"
13
+ end
14
+
15
+ Jeweler::GemcutterTasks.new
16
+ rescue LoadError
17
+ puts "Jeweler not available. Install it with: gem install jeweler"
18
+ end
19
+
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 1.0.0
@@ -0,0 +1,3 @@
1
+ *.bundle
2
+ *.o
3
+ Makefile
@@ -0,0 +1,2 @@
1
+ require 'mkmf'
2
+ create_makefile('ngramdistance')
@@ -0,0 +1,140 @@
1
+ #include <string.h>
2
+ #include <stdlib.h>
3
+ #include <stdio.h>
4
+ #ifdef NGRAM_CASE_INSENSITIVE
5
+ #include <ctype.h>
6
+ #define eq(x, y) (tolower(x) == tolower(y))
7
+ #else
8
+ #define eq(x, y) ((x) == (y))
9
+ #endif
10
+
11
+ #ifndef max
12
+ #define max( a, b ) ( ((a) > (b)) ? (a) : (b) )
13
+ #endif
14
+
15
+ #ifndef min
16
+ #define min( a, b ) ( ((a) < (b)) ? (a) : (b) )
17
+ #endif
18
+
19
+
20
+ char* substring(const char* str, size_t begin, size_t len)
21
+ {
22
+ if (str == 0 || strlen(str) == 0 || strlen(str) < begin || strlen(str) < (begin+len))
23
+ return 0;
24
+
25
+ return strndup(str + begin, len);
26
+ }
27
+
28
+
29
+ float ngramdistance (const char *source, const char *target,int n) {
30
+ int sl = strlen(source);
31
+ int tl = strlen(target);
32
+
33
+ if (sl == 0 || tl == 0) {
34
+ if (sl == tl) {
35
+ return 1;
36
+ }
37
+ else {
38
+ return 0;
39
+ }
40
+ }
41
+ int cost = 0;
42
+ if (sl < n || tl < n) {
43
+ int ni = min(sl,tl);
44
+ int i=0;
45
+ for (i=0;i<ni;i++) {
46
+ if (eq(source[i],target[i])) {
47
+ cost++;
48
+ }
49
+ }
50
+ return (float) cost/(float)max(sl, tl);
51
+ }
52
+ int char_len = sl+n-1;
53
+ char* sa = calloc(char_len+1,sizeof(char));
54
+ float* p; //'previous' cost array, horizontally
55
+ float* d; // cost array, horizontally
56
+ float* _d; //placeholder to assist in swapping p and d
57
+
58
+ //construct sa with prefix
59
+ int i=0;
60
+ for (i=0;i<char_len;i++) {
61
+ if (i < n-1) {
62
+ sa[i]=0; //add prefix
63
+ }
64
+ else {
65
+ sa[i] = source[i-n+1];
66
+ }
67
+ }
68
+ int float_arr_len = sl+1;
69
+ p = calloc( float_arr_len+1 , sizeof( float));
70
+ d = calloc( float_arr_len+1 , sizeof( float));
71
+
72
+ // indexes into strings s and t
73
+ i=0; // iterates through source
74
+ int j=0; // iterates through target
75
+
76
+ char* t_j = calloc(n+1,sizeof(char)); // jth n-gram of t
77
+ for (i = 0; i<=sl; i++) {
78
+ p[i] = i;
79
+ }
80
+
81
+ for (j = 1; j<=tl; j++) {
82
+ //construct t_j n-gram
83
+ if (j < n) {
84
+ int ti=0;
85
+ for (ti=0;ti<n-j;ti++) {
86
+ t_j[ti]=0; //add prefix
87
+ }
88
+ for (ti=n-j;ti<n;ti++) {
89
+ t_j[ti]=target[ti-(n-j)];
90
+ }
91
+ }
92
+ else {
93
+ free (t_j);
94
+ t_j = substring(target,j-n, n);
95
+ }
96
+ d[0] = j;
97
+ for (i=1; i<=sl; i++) {
98
+ cost = 0;
99
+ int tn=n;
100
+ //compare sa to t_j
101
+ int ni=0;
102
+ for (ni=0;ni<n;ni++) {
103
+ if (!eq(sa[i-1+ni] , t_j[ni])) {
104
+ cost++;
105
+ }
106
+ else if (eq(sa[i-1+ni], 0)) { //discount matches on prefix
107
+ tn--;
108
+ }
109
+ }
110
+ float ec = (float) cost/(float)tn;
111
+ // minimum of cell to the left+1, to the top+1, diagonally left and up +cost
112
+ d[i] = min(min(d[i-1]+1, p[i]+1), p[i-1]+ec);
113
+ }
114
+ // copy current distance counts to 'previous row' distance counts
115
+ _d = p;
116
+ p = d;
117
+ d = _d;
118
+ }
119
+ float val = p[sl];
120
+ free(p);
121
+ free(d);
122
+ free(t_j);
123
+ free(sa);
124
+ // our last action in the above loop was to switch d and p, so p now
125
+ // actually has the most recent cost counts
126
+ return 1.0f - ((float) val / (float)max(tl, sl));
127
+ }
128
+
129
+
130
+ #ifdef TEST
131
+ #include <stdio.h>
132
+ #include "ngramdistance.h"
133
+
134
+ int main (int argc, char **argv) {
135
+ float distance;
136
+ if (argc < 3) return -1;
137
+ distance = ngramdistance(argv[1], argv[2],3);
138
+ printf("%s vs %s: %f\n", argv[1], argv[2],distance);
139
+ }
140
+ #endif
@@ -0,0 +1 @@
1
+ unsigned ngramdistance(const char *, const char *, int n);
@@ -0,0 +1,12 @@
1
+ require 'ffi'
2
+
3
+ module NGramDistance
4
+ extend FFI::Library
5
+
6
+ # Try loading in order.
7
+ library = File.dirname(__FILE__) + "/../ext/ngramdistance/ngramdistance"
8
+ candidates = ['.bundle', '.so', '.dylib', ''].map { |ext| library + ext }
9
+ ffi_lib(candidates)
10
+
11
+ attach_function :distance, :ngramdistance, [:string, :string, :int], :float
12
+ end
@@ -0,0 +1,64 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{ngramdistance-ffi}
8
+ s.version = "1.0.0"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Bali"]
12
+ s.date = %q{2012-06-18}
13
+ s.description = %q{Provides a fast, cross-Ruby implementation of the ngramdistance distance algorithm.}
14
+ s.email = %q{mailbali@gmail.com}
15
+ s.extensions = ["ext/ngramdistance/extconf.rb"]
16
+ s.extra_rdoc_files = [
17
+ "README.markdown"
18
+ ]
19
+ s.files = [
20
+ ".gitignore",
21
+ "CHANGELOG.markdown",
22
+ "Gemfile",
23
+ "README.markdown",
24
+ "Rakefile",
25
+ "VERSION",
26
+ "ext/ngramdistance/.gitignore",
27
+ "ext/ngramdistance/extconf.rb",
28
+ "ext/ngramdistance/ngramdistance.c",
29
+ "ext/ngramdistance/ngramdistance.h",
30
+ "ngramdistance-ffi.gemspec",
31
+ "lib/ngramdistance.rb",
32
+ "spec/ngramdistance_spec.rb",
33
+ "spec/spec_helper.rb"
34
+ ]
35
+ s.homepage = %q{https://github.com/pecbali/ngramdistance-ffi}
36
+ s.rdoc_options = ["--charset=UTF-8"]
37
+ s.require_paths = ["lib"]
38
+ s.rubygems_version = %q{1.3.7}
39
+ s.summary = %q{An FFI version of the ngramdistance gem.}
40
+ s.test_files = [
41
+ "spec/ngramdistance_spec.rb",
42
+ "spec/spec_helper.rb"
43
+ ]
44
+
45
+ if s.respond_to? :specification_version then
46
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
47
+ s.specification_version = 3
48
+
49
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
50
+ s.add_runtime_dependency(%q<ffi>, [">= 0"])
51
+ s.add_development_dependency(%q<rspec>, [">= 0"])
52
+ s.add_development_dependency(%q<jeweler>, [">= 0"])
53
+ else
54
+ s.add_dependency(%q<ffi>, [">= 0"])
55
+ s.add_dependency(%q<rspec>, [">= 0"])
56
+ s.add_dependency(%q<jeweler>, [">= 0"])
57
+ end
58
+ else
59
+ s.add_dependency(%q<ffi>, [">= 0"])
60
+ s.add_dependency(%q<rspec>, [">= 0"])
61
+ s.add_dependency(%q<jeweler>, [">= 0"])
62
+ end
63
+ end
64
+
@@ -0,0 +1,19 @@
1
+ require 'spec_helper'
2
+
3
+ describe NGramDistance do
4
+ fixtures = [
5
+ ["university", "univearsitty", 0.750000,
6
+ ["university", "university", 1.0],
7
+ ["hello", "jello",0.633333],
8
+ ["hello", "heloll", 0.666667],
9
+ ["hello", "saint", 0.0000],
10
+ ["hello", "", 0.0000]
11
+ ]
12
+
13
+ fixtures.each do |w1, w2, d|
14
+ it "should calculate a distance of #{d} between #{w1} and #{w2}" do
15
+ NGramDistance.distance(w1, w2,3).should be_close( d,0.05)
16
+ NGramDistance.distance(w2, w1,3).should be_close d,0.05)
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,3 @@
1
+ require 'spec'
2
+
3
+ require File.dirname(__FILE__) + "/../lib/levenshtein"
metadata ADDED
@@ -0,0 +1,112 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ngramdistance-ffi
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Bali
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-06-18 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: ffi
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ - !ruby/object:Gem::Dependency
31
+ name: rspec
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ - !ruby/object:Gem::Dependency
47
+ name: jeweler
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ description: Provides a fast, cross-Ruby implementation of the ngramdistance distance
63
+ algorithm.
64
+ email: mailbali@gmail.com
65
+ executables: []
66
+ extensions:
67
+ - ext/ngramdistance/extconf.rb
68
+ extra_rdoc_files:
69
+ - README.markdown
70
+ files:
71
+ - .gitignore
72
+ - CHANGELOG.markdown
73
+ - Gemfile
74
+ - README.markdown
75
+ - Rakefile
76
+ - VERSION
77
+ - ext/ngramdistance/.gitignore
78
+ - ext/ngramdistance/extconf.rb
79
+ - ext/ngramdistance/ngramdistance.c
80
+ - ext/ngramdistance/ngramdistance.h
81
+ - ngramdistance-ffi.gemspec
82
+ - lib/ngramdistance.rb
83
+ - spec/ngramdistance_spec.rb
84
+ - spec/spec_helper.rb
85
+ homepage: https://github.com/pecbali/ngramdistance-ffi
86
+ licenses: []
87
+ post_install_message:
88
+ rdoc_options:
89
+ - --charset=UTF-8
90
+ require_paths:
91
+ - lib
92
+ required_ruby_version: !ruby/object:Gem::Requirement
93
+ none: false
94
+ requirements:
95
+ - - ! '>='
96
+ - !ruby/object:Gem::Version
97
+ version: '0'
98
+ required_rubygems_version: !ruby/object:Gem::Requirement
99
+ none: false
100
+ requirements:
101
+ - - ! '>='
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ requirements: []
105
+ rubyforge_project:
106
+ rubygems_version: 1.8.24
107
+ signing_key:
108
+ specification_version: 3
109
+ summary: An FFI version of the ngramdistance gem.
110
+ test_files:
111
+ - spec/ngramdistance_spec.rb
112
+ - spec/spec_helper.rb