tardistance 0.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/ext/tardistance/extconf.rb +3 -0
- data/ext/tardistance/tardistance.c +103 -0
- data/lib/tardistance.rb +12 -0
- metadata +50 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA1:
|
|
3
|
+
metadata.gz: ca244b08fa65deb122012d7b221e524b0e40c9cd
|
|
4
|
+
data.tar.gz: 3f263ccd9557f0b206892d10b30e52d8b78885e4
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 7a293d72090747165a3ffaa3f953501ec0b489abea27b1a33e95da488c21a04a45e27fbd1ac24dc90ff479c9a5c475c20d0ed3738f207b636400fccab64c38ec
|
|
7
|
+
data.tar.gz: 24c2be5411aed95846ced230403f9f156a2e573b2dd165438c6fbe3aee4bc48a27644f426524f58156bb4ed71059fa66c7dbc34a508b4de36d4dd690c9d625e8
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
#include <ruby.h>
|
|
2
|
+
#include <extconf.h>
|
|
3
|
+
|
|
4
|
+
char **getWordsArray(VALUE rarray);
|
|
5
|
+
char *getString(void);
|
|
6
|
+
double findDistance(char*,char*,int,int);
|
|
7
|
+
int min(int,int,int);
|
|
8
|
+
int findMin(int,int);
|
|
9
|
+
|
|
10
|
+
VALUE distance(VALUE self,VALUE str,VALUE rarray,VALUE cutoff) {
|
|
11
|
+
int arrayLen = RARRAY_LEN(rarray);
|
|
12
|
+
int stringLength = RSTRING_LEN(str);
|
|
13
|
+
long i =0;
|
|
14
|
+
long j=0;
|
|
15
|
+
long finalCount = 0;
|
|
16
|
+
char *searchWord;
|
|
17
|
+
char *data[1000];
|
|
18
|
+
int dataLen[1000];
|
|
19
|
+
VALUE finalResult[1000];
|
|
20
|
+
double cutoffScore = -1.0;
|
|
21
|
+
if(!NIL_P(cutoff)){
|
|
22
|
+
cutoffScore = NUM2DBL(cutoff);
|
|
23
|
+
}
|
|
24
|
+
if(cutoffScore < 0.0){
|
|
25
|
+
cutoffScore = 0.5;
|
|
26
|
+
}
|
|
27
|
+
if(!NIL_P(rarray) && arrayLen > 0 && arrayLen <= 1000 && stringLength > 0 && stringLength <= 100){
|
|
28
|
+
while(i < arrayLen){
|
|
29
|
+
VALUE element = rb_ary_entry(rarray,i);
|
|
30
|
+
data[i] = StringValuePtr(element);
|
|
31
|
+
dataLen[i] = RSTRING_LEN(element);
|
|
32
|
+
i++;
|
|
33
|
+
}
|
|
34
|
+
searchWord = (char*)StringValuePtr(str);
|
|
35
|
+
while(j<i){
|
|
36
|
+
if(findDistance(data[j],searchWord,dataLen[j],stringLength) <= cutoffScore){
|
|
37
|
+
finalResult[finalCount] = rb_str_new_cstr(data[j]);
|
|
38
|
+
finalCount++;
|
|
39
|
+
}
|
|
40
|
+
j++;
|
|
41
|
+
}
|
|
42
|
+
}else{
|
|
43
|
+
return rb_str_new2("Error:null value passed");
|
|
44
|
+
}
|
|
45
|
+
return rb_ary_new4(finalCount,finalResult);
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
double findDistance(char* dataStr,char* searchWord,int dataLen,int searchWordLen){
|
|
49
|
+
int k = 0 ,l = 0;
|
|
50
|
+
int dp[1001][101];
|
|
51
|
+
if (dataLen <= 1000 && searchWordLen <= 100){
|
|
52
|
+
char *a =dataStr;
|
|
53
|
+
char *b =searchWord;
|
|
54
|
+
for (k=0; k<=dataLen; k++){
|
|
55
|
+
b =searchWord;
|
|
56
|
+
for (l=0; l<=searchWordLen; l++){
|
|
57
|
+
if (k==0){
|
|
58
|
+
dp[k][l] = l;
|
|
59
|
+
}
|
|
60
|
+
else if (l==0){
|
|
61
|
+
dp[k][l] = k;
|
|
62
|
+
}
|
|
63
|
+
else if ( *a == *b){
|
|
64
|
+
dp[k][l] = dp[k-1][l-1];
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
else{
|
|
68
|
+
dp[k][l] = 1 + min(dp[k][l-1], // Insert
|
|
69
|
+
dp[k-1][l], // Remove
|
|
70
|
+
dp[k-1][l-1]); // Replace
|
|
71
|
+
}
|
|
72
|
+
b++;
|
|
73
|
+
}
|
|
74
|
+
a++;
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
return ((double)dp[k-1][l-1]/searchWordLen);
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
int min(int x,int y,int z){
|
|
81
|
+
return findMin(x,findMin(x,y));
|
|
82
|
+
}
|
|
83
|
+
int findMin(int r,int s){
|
|
84
|
+
if (r < s){
|
|
85
|
+
return r;
|
|
86
|
+
}
|
|
87
|
+
else{
|
|
88
|
+
return s;
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
void Init_tardistance()
|
|
93
|
+
{
|
|
94
|
+
VALUE mod = rb_define_module("PercentSearch");
|
|
95
|
+
rb_define_global_function("distance",distance,3);
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
int main()
|
|
99
|
+
{
|
|
100
|
+
/* code */
|
|
101
|
+
printf("Hello World\n");
|
|
102
|
+
return 0;
|
|
103
|
+
}
|
data/lib/tardistance.rb
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
|
|
2
|
+
class Tardistance
|
|
3
|
+
require_relative 'tardistance/tardistance'
|
|
4
|
+
include PercentSearch
|
|
5
|
+
def self.percent_change(search_string,search_array,percent_diff)
|
|
6
|
+
if search_array.kind_of?(Array) and search_string.is_a?(String) and percent_diff.is_a?(Numeric)
|
|
7
|
+
return distance(search_string,search_array,percent_diff)
|
|
8
|
+
else
|
|
9
|
+
raise "Invalid Parameters please pass percent_change(string,array,decimal)"
|
|
10
|
+
end
|
|
11
|
+
end
|
|
12
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: tardistance
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.0.0
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- Tarash Agarwal
|
|
8
|
+
autorequire:
|
|
9
|
+
bindir: bin
|
|
10
|
+
cert_chain: []
|
|
11
|
+
date: 2018-07-13 00:00:00.000000000 Z
|
|
12
|
+
dependencies: []
|
|
13
|
+
description: Filter strings on basis of percentage of changes to be made in the string
|
|
14
|
+
in terms of levenshtein distance by string length
|
|
15
|
+
email: tarash.agarwal@gmail.com
|
|
16
|
+
executables: []
|
|
17
|
+
extensions:
|
|
18
|
+
- ext/tardistance/extconf.rb
|
|
19
|
+
extra_rdoc_files: []
|
|
20
|
+
files:
|
|
21
|
+
- ext/tardistance/extconf.rb
|
|
22
|
+
- ext/tardistance/tardistance.c
|
|
23
|
+
- lib/tardistance.rb
|
|
24
|
+
homepage: https://github.com/tarashagarwal/tardistance.git
|
|
25
|
+
licenses:
|
|
26
|
+
- MIT
|
|
27
|
+
metadata:
|
|
28
|
+
authors_blog: https://tarashagarwal.github.io/
|
|
29
|
+
levenshtein distance algorithm: https://en.wikipedia.org/wiki/Levenshtein_distance
|
|
30
|
+
post_install_message:
|
|
31
|
+
rdoc_options: []
|
|
32
|
+
require_paths:
|
|
33
|
+
- lib
|
|
34
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
35
|
+
requirements:
|
|
36
|
+
- - ">="
|
|
37
|
+
- !ruby/object:Gem::Version
|
|
38
|
+
version: '0'
|
|
39
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
40
|
+
requirements:
|
|
41
|
+
- - ">="
|
|
42
|
+
- !ruby/object:Gem::Version
|
|
43
|
+
version: '0'
|
|
44
|
+
requirements: []
|
|
45
|
+
rubyforge_project:
|
|
46
|
+
rubygems_version: 2.6.13
|
|
47
|
+
signing_key:
|
|
48
|
+
specification_version: 4
|
|
49
|
+
summary: A fast method to filter strings
|
|
50
|
+
test_files: []
|