similarityTextCoefficients 0.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 4deed5d254b448b0f72730d1d7259e3f6fa44b41
4
+ data.tar.gz: 526d8ee307d479d0a93121d76430b53b16be8cfc
5
+ SHA512:
6
+ metadata.gz: eb7649ab9ba6cd423555dc12f108d7c29d08b66fd60b0df83f1dda4448cd5a762b319fc895cce9cd791e943e1364e829bef5e0e668095057e734bf08afaf0e2b
7
+ data.tar.gz: 1daafe71aaa86253a61fdd09a3bd428fec4c8f9cdd510b25db6e88bc5dfd06cfcbdf5d4c8bcef326a3114823ba70136bf64a6e9267406b988e2f8bf125b8380a
@@ -0,0 +1,98 @@
1
+ require "set"
2
+
3
+ class SimilarityTextCoefficients
4
+
5
+ def self.dice_coefficient(a, b)
6
+
7
+ vectorOne = Array.new
8
+ vectorTwo = Array.new
9
+
10
+ vectorOne=a.split(" ")
11
+
12
+ vectorTwo=b.split(" ")
13
+ newdice = 0
14
+
15
+ for x in 0.. vectorTwo.length-1
16
+
17
+ for y in 0.. vectorOne.length-1
18
+
19
+ a_bigrams = vectorOne[y].each_char.each_cons(2).to_set
20
+ b_bigrams = vectorTwo[x].each_char.each_cons(2).to_set
21
+
22
+ overlap = (a_bigrams & b_bigrams).size
23
+
24
+ total = a_bigrams.size + b_bigrams.size
25
+ dice = overlap * 2.0 / total
26
+
27
+ if newdice == 0
28
+
29
+ newdice=dice
30
+
31
+ end
32
+
33
+ if newdice < dice
34
+
35
+ newdice=dice
36
+
37
+ end
38
+
39
+ end
40
+
41
+ end
42
+
43
+ newdice
44
+
45
+ end
46
+
47
+
48
+ #Index of Jaccard =intersection/ ((numberA+numberB) -intersection )
49
+ #where
50
+ #numberA - number of elements in set А
51
+ #numberB- number of elements in set B
52
+ #intersection - number of elements in intersecting set
53
+
54
+ def self.createJaccardIndex(a,b)
55
+
56
+ vectorOne = Array.new
57
+ vectorTwo = Array.new
58
+ intersection=0
59
+
60
+ contvector=0
61
+
62
+ # We get number of itens for each group and put in them in a array One and Two
63
+
64
+ vectorOne=a.split(" ")
65
+
66
+ vectorTwo=b.split(" ")
67
+
68
+ if vectorOne.length < vectorTwo.length
69
+
70
+ numberA= vectorOne.length
71
+ numberB= vectorTwo.length
72
+
73
+ else
74
+ numberB= vectorOne.length
75
+ numberA= vectorTwo.length
76
+ end
77
+
78
+ numberA= vectorOne.length
79
+ numberB= vectorTwo.length
80
+
81
+ # We compare each iten inside of each array to obtain: number of elements in intersecting set
82
+
83
+ for x in 0.. vectorTwo.length-1
84
+
85
+ if ((vectorOne).include?(vectorTwo[x]) == true)
86
+ intersection+=1
87
+ end
88
+
89
+
90
+ end
91
+
92
+ indexJaccard= intersection.to_f/ ((numberA+numberB) -intersection )
93
+ return indexJaccard
94
+
95
+ end
96
+
97
+
98
+ end
metadata ADDED
@@ -0,0 +1,47 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: similarityTextCoefficients
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Gilberto Flores
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-04-27 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: Enter two strings and it compares their similarity and gives a score
14
+ between 0 and 1, when 1 is the similarity
15
+ email: gilbertofp16@gmail.com
16
+ executables: []
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - lib/similarityTextCoefficients.rb
21
+ homepage: http://rubygems.org/gems/SimilarityTextCoefficients
22
+ licenses:
23
+ - MIT
24
+ metadata: {}
25
+ post_install_message:
26
+ rdoc_options: []
27
+ require_paths:
28
+ - lib
29
+ required_ruby_version: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ required_rubygems_version: !ruby/object:Gem::Requirement
35
+ requirements:
36
+ - - ">="
37
+ - !ruby/object:Gem::Version
38
+ version: '0'
39
+ requirements: []
40
+ rubyforge_project:
41
+ rubygems_version: 2.4.6
42
+ signing_key:
43
+ specification_version: 4
44
+ summary: For text comparations it implements Dice's coefficient and Jaccard's or Tanimoto
45
+ index
46
+ test_files: []
47
+ has_rdoc: