ngramdistance-ffi 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,4 @@
1
+ README.markdown.html
2
+ *.rbc
3
+ pkg
4
+ .*.sw?
@@ -0,0 +1,3 @@
1
+ 1.0.0
2
+ -----
3
+ * started
data/Gemfile ADDED
@@ -0,0 +1,8 @@
1
+ source :rubygems
2
+
3
+ gem 'ffi'
4
+
5
+ group :test do
6
+ gem 'rspec', '1.3.1'
7
+ gem 'jeweler'
8
+ end
data/README.markdown ADDED
@@ -0,0 +1,21 @@
1
+ ngramdistance-ffi
2
+ ===============
3
+
4
+ Converted to FFI by Bali for Ruby portability.
5
+
6
+ This gem originally based on ngram distance.
7
+
8
+ Tested on:
9
+
10
+ * MRI 1.9.2
11
+
12
+ Known Issues
13
+ ============
14
+ * The C extension uses `char*` strings, and so Unicode strings will give incorrect distances.
15
+
16
+ Including in Gemfile
17
+ ====================
18
+
19
+ gem ' ngramdistance-ffi', :require => ' ngramdistance'
20
+
21
+ - end -
data/Rakefile ADDED
@@ -0,0 +1,19 @@
1
+ begin
2
+ require 'jeweler'
3
+ Jeweler::Tasks.new do |gemspec|
4
+ gemspec.name = " ngramdistance-ffi"
5
+ gemspec.summary = "An FFI version of the ngramdistance gem."
6
+ gemspec.description = "Provides a fast, cross-Ruby implementation of the ngramdistance distance algorithm."
7
+ gemspec.email = "dbalatero@gmail.com"
8
+ gemspec.homepage = "http://github.com/dbalatero/ ngramdistance-ffi"
9
+ gemspec.authors = ["Bali"]
10
+ gemspec.add_dependency "ffi"
11
+ gemspec.add_development_dependency "rspec"
12
+ gemspec.add_development_dependency "jeweler"
13
+ end
14
+
15
+ Jeweler::GemcutterTasks.new
16
+ rescue LoadError
17
+ puts "Jeweler not available. Install it with: gem install jeweler"
18
+ end
19
+
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 1.0.0
@@ -0,0 +1,3 @@
1
+ *.bundle
2
+ *.o
3
+ Makefile
@@ -0,0 +1,2 @@
1
+ require 'mkmf'
2
+ create_makefile('ngramdistance')
@@ -0,0 +1,140 @@
1
+ #include <string.h>
2
+ #include <stdlib.h>
3
+ #include <stdio.h>
4
+ #ifdef NGRAM_CASE_INSENSITIVE
5
+ #include <ctype.h>
6
+ #define eq(x, y) (tolower(x) == tolower(y))
7
+ #else
8
+ #define eq(x, y) ((x) == (y))
9
+ #endif
10
+
11
+ #ifndef max
12
+ #define max( a, b ) ( ((a) > (b)) ? (a) : (b) )
13
+ #endif
14
+
15
+ #ifndef min
16
+ #define min( a, b ) ( ((a) < (b)) ? (a) : (b) )
17
+ #endif
18
+
19
+
20
+ char* substring(const char* str, size_t begin, size_t len)
21
+ {
22
+ if (str == 0 || strlen(str) == 0 || strlen(str) < begin || strlen(str) < (begin+len))
23
+ return 0;
24
+
25
+ return strndup(str + begin, len);
26
+ }
27
+
28
+
29
+ float ngramdistance (const char *source, const char *target,int n) {
30
+ int sl = strlen(source);
31
+ int tl = strlen(target);
32
+
33
+ if (sl == 0 || tl == 0) {
34
+ if (sl == tl) {
35
+ return 1;
36
+ }
37
+ else {
38
+ return 0;
39
+ }
40
+ }
41
+ int cost = 0;
42
+ if (sl < n || tl < n) {
43
+ int ni = min(sl,tl);
44
+ int i=0;
45
+ for (i=0;i<ni;i++) {
46
+ if (eq(source[i],target[i])) {
47
+ cost++;
48
+ }
49
+ }
50
+ return (float) cost/(float)max(sl, tl);
51
+ }
52
+ int char_len = sl+n-1;
53
+ char* sa = calloc(char_len+1,sizeof(char));
54
+ float* p; //'previous' cost array, horizontally
55
+ float* d; // cost array, horizontally
56
+ float* _d; //placeholder to assist in swapping p and d
57
+
58
+ //construct sa with prefix
59
+ int i=0;
60
+ for (i=0;i<char_len;i++) {
61
+ if (i < n-1) {
62
+ sa[i]=0; //add prefix
63
+ }
64
+ else {
65
+ sa[i] = source[i-n+1];
66
+ }
67
+ }
68
+ int float_arr_len = sl+1;
69
+ p = calloc( float_arr_len+1 , sizeof( float));
70
+ d = calloc( float_arr_len+1 , sizeof( float));
71
+
72
+ // indexes into strings s and t
73
+ i=0; // iterates through source
74
+ int j=0; // iterates through target
75
+
76
+ char* t_j = calloc(n+1,sizeof(char)); // jth n-gram of t
77
+ for (i = 0; i<=sl; i++) {
78
+ p[i] = i;
79
+ }
80
+
81
+ for (j = 1; j<=tl; j++) {
82
+ //construct t_j n-gram
83
+ if (j < n) {
84
+ int ti=0;
85
+ for (ti=0;ti<n-j;ti++) {
86
+ t_j[ti]=0; //add prefix
87
+ }
88
+ for (ti=n-j;ti<n;ti++) {
89
+ t_j[ti]=target[ti-(n-j)];
90
+ }
91
+ }
92
+ else {
93
+ free (t_j);
94
+ t_j = substring(target,j-n, n);
95
+ }
96
+ d[0] = j;
97
+ for (i=1; i<=sl; i++) {
98
+ cost = 0;
99
+ int tn=n;
100
+ //compare sa to t_j
101
+ int ni=0;
102
+ for (ni=0;ni<n;ni++) {
103
+ if (!eq(sa[i-1+ni] , t_j[ni])) {
104
+ cost++;
105
+ }
106
+ else if (eq(sa[i-1+ni], 0)) { //discount matches on prefix
107
+ tn--;
108
+ }
109
+ }
110
+ float ec = (float) cost/(float)tn;
111
+ // minimum of cell to the left+1, to the top+1, diagonally left and up +cost
112
+ d[i] = min(min(d[i-1]+1, p[i]+1), p[i-1]+ec);
113
+ }
114
+ // copy current distance counts to 'previous row' distance counts
115
+ _d = p;
116
+ p = d;
117
+ d = _d;
118
+ }
119
+ float val = p[sl];
120
+ free(p);
121
+ free(d);
122
+ free(t_j);
123
+ free(sa);
124
+ // our last action in the above loop was to switch d and p, so p now
125
+ // actually has the most recent cost counts
126
+ return 1.0f - ((float) val / (float)max(tl, sl));
127
+ }
128
+
129
+
130
+ #ifdef TEST
131
+ #include <stdio.h>
132
+ #include "ngramdistance.h"
133
+
134
+ int main (int argc, char **argv) {
135
+ float distance;
136
+ if (argc < 3) return -1;
137
+ distance = ngramdistance(argv[1], argv[2],3);
138
+ printf("%s vs %s: %f\n", argv[1], argv[2],distance);
139
+ }
140
+ #endif
@@ -0,0 +1 @@
1
+ unsigned ngramdistance(const char *, const char *, int n);
@@ -0,0 +1,12 @@
1
+ require 'ffi'
2
+
3
+ module NGramDistance
4
+ extend FFI::Library
5
+
6
+ # Try loading in order.
7
+ library = File.dirname(__FILE__) + "/../ext/ngramdistance/ngramdistance"
8
+ candidates = ['.bundle', '.so', '.dylib', ''].map { |ext| library + ext }
9
+ ffi_lib(candidates)
10
+
11
+ attach_function :distance, :ngramdistance, [:string, :string, :int], :float
12
+ end
@@ -0,0 +1,64 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{ngramdistance-ffi}
8
+ s.version = "1.0.0"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Bali"]
12
+ s.date = %q{2012-06-18}
13
+ s.description = %q{Provides a fast, cross-Ruby implementation of the ngramdistance distance algorithm.}
14
+ s.email = %q{mailbali@gmail.com}
15
+ s.extensions = ["ext/ngramdistance/extconf.rb"]
16
+ s.extra_rdoc_files = [
17
+ "README.markdown"
18
+ ]
19
+ s.files = [
20
+ ".gitignore",
21
+ "CHANGELOG.markdown",
22
+ "Gemfile",
23
+ "README.markdown",
24
+ "Rakefile",
25
+ "VERSION",
26
+ "ext/ngramdistance/.gitignore",
27
+ "ext/ngramdistance/extconf.rb",
28
+ "ext/ngramdistance/ngramdistance.c",
29
+ "ext/ngramdistance/ngramdistance.h",
30
+ "ngramdistance-ffi.gemspec",
31
+ "lib/ngramdistance.rb",
32
+ "spec/ngramdistance_spec.rb",
33
+ "spec/spec_helper.rb"
34
+ ]
35
+ s.homepage = %q{https://github.com/pecbali/ngramdistance-ffi}
36
+ s.rdoc_options = ["--charset=UTF-8"]
37
+ s.require_paths = ["lib"]
38
+ s.rubygems_version = %q{1.3.7}
39
+ s.summary = %q{An FFI version of the ngramdistance gem.}
40
+ s.test_files = [
41
+ "spec/ngramdistance_spec.rb",
42
+ "spec/spec_helper.rb"
43
+ ]
44
+
45
+ if s.respond_to? :specification_version then
46
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
47
+ s.specification_version = 3
48
+
49
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
50
+ s.add_runtime_dependency(%q<ffi>, [">= 0"])
51
+ s.add_development_dependency(%q<rspec>, [">= 0"])
52
+ s.add_development_dependency(%q<jeweler>, [">= 0"])
53
+ else
54
+ s.add_dependency(%q<ffi>, [">= 0"])
55
+ s.add_dependency(%q<rspec>, [">= 0"])
56
+ s.add_dependency(%q<jeweler>, [">= 0"])
57
+ end
58
+ else
59
+ s.add_dependency(%q<ffi>, [">= 0"])
60
+ s.add_dependency(%q<rspec>, [">= 0"])
61
+ s.add_dependency(%q<jeweler>, [">= 0"])
62
+ end
63
+ end
64
+
@@ -0,0 +1,19 @@
1
+ require 'spec_helper'
2
+
3
+ describe NGramDistance do
4
+ fixtures = [
5
+ ["university", "univearsitty", 0.750000,
6
+ ["university", "university", 1.0],
7
+ ["hello", "jello",0.633333],
8
+ ["hello", "heloll", 0.666667],
9
+ ["hello", "saint", 0.0000],
10
+ ["hello", "", 0.0000]
11
+ ]
12
+
13
+ fixtures.each do |w1, w2, d|
14
+ it "should calculate a distance of #{d} between #{w1} and #{w2}" do
15
+ NGramDistance.distance(w1, w2,3).should be_close( d,0.05)
16
+ NGramDistance.distance(w2, w1,3).should be_close d,0.05)
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,3 @@
1
+ require 'spec'
2
+
3
+ require File.dirname(__FILE__) + "/../lib/levenshtein"
metadata ADDED
@@ -0,0 +1,112 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ngramdistance-ffi
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Bali
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-06-18 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: ffi
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ - !ruby/object:Gem::Dependency
31
+ name: rspec
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ - !ruby/object:Gem::Dependency
47
+ name: jeweler
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ description: Provides a fast, cross-Ruby implementation of the ngramdistance distance
63
+ algorithm.
64
+ email: mailbali@gmail.com
65
+ executables: []
66
+ extensions:
67
+ - ext/ngramdistance/extconf.rb
68
+ extra_rdoc_files:
69
+ - README.markdown
70
+ files:
71
+ - .gitignore
72
+ - CHANGELOG.markdown
73
+ - Gemfile
74
+ - README.markdown
75
+ - Rakefile
76
+ - VERSION
77
+ - ext/ngramdistance/.gitignore
78
+ - ext/ngramdistance/extconf.rb
79
+ - ext/ngramdistance/ngramdistance.c
80
+ - ext/ngramdistance/ngramdistance.h
81
+ - ngramdistance-ffi.gemspec
82
+ - lib/ngramdistance.rb
83
+ - spec/ngramdistance_spec.rb
84
+ - spec/spec_helper.rb
85
+ homepage: https://github.com/pecbali/ngramdistance-ffi
86
+ licenses: []
87
+ post_install_message:
88
+ rdoc_options:
89
+ - --charset=UTF-8
90
+ require_paths:
91
+ - lib
92
+ required_ruby_version: !ruby/object:Gem::Requirement
93
+ none: false
94
+ requirements:
95
+ - - ! '>='
96
+ - !ruby/object:Gem::Version
97
+ version: '0'
98
+ required_rubygems_version: !ruby/object:Gem::Requirement
99
+ none: false
100
+ requirements:
101
+ - - ! '>='
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ requirements: []
105
+ rubyforge_project:
106
+ rubygems_version: 1.8.24
107
+ signing_key:
108
+ specification_version: 3
109
+ summary: An FFI version of the ngramdistance gem.
110
+ test_files:
111
+ - spec/ngramdistance_spec.rb
112
+ - spec/spec_helper.rb