kevintyll-ofac 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +6 -1
- data/README.rdoc +15 -1
- data/VERSION.yml +1 -1
- data/lib/ofac/models/ofac.rb +15 -1
- data/lib/ofac/ofac_match.rb +10 -1
- data/test/ofac_test.rb +22 -6
- metadata +2 -2
data/History.txt
CHANGED
@@ -6,4 +6,9 @@
|
|
6
6
|
== 1.0.0 2009-05-11
|
7
7
|
|
8
8
|
* 1 major enhancement:
|
9
|
-
* Initail release
|
9
|
+
* Initail release
|
10
|
+
|
11
|
+
== 1.1.0 2009-05-12
|
12
|
+
|
13
|
+
* 1 minor enhancement:
|
14
|
+
* Modified the match alogorithm to reduct the score if there is not an address or city match if the data is in the database.
|
data/README.rdoc
CHANGED
@@ -52,14 +52,28 @@ and Tyll will find a match, and there were 2 elements in the searched name, the
|
|
52
52
|
for <tt>:name</tt>. So since the weight for <tt>:name</tt> is 60, then we will add 60 to the score.
|
53
53
|
|
54
54
|
If you are trying to match the name Kevin Tyll, and there is a record for Teel, Kevin in the database, then an exact match
|
55
|
-
will be found for Kevin, and a sounds like match will be made for Tyll. Since there were 2 elements in
|
55
|
+
will be found for Kevin, and a sounds like match will be made for Tyll. Since there were 2 elements in the searched name,
|
56
56
|
and the weight for <tt>:name</tt> is 60, then each element is worth 30. Since Kevin was an exact match, it will add 30, and
|
57
57
|
since Tyll was a sounds like match, it will add 30 * .75. So the <tt>:name</tt> portion of the search will be worth 53.
|
58
58
|
|
59
|
+
If data is in the database for city and or address, and you pass data in for these elements, the score will be reduced by 10%
|
60
|
+
of the weight if there is no match or sounds like match. So if you get a match on name, you've already got a score of 60. So
|
61
|
+
if you don't pass in an address or city, or if you do, but there is no city or address info in the database, then your final score
|
62
|
+
will be 60. But if you do pass in a city, say Tampa, and the city in the Database is New York, then we will deduct 10% of the
|
63
|
+
weight (30 * .1) = 3 from the score since 30 is the weight for <tt>:city</tt>. So the final score will be 57.
|
64
|
+
|
65
|
+
If were searching for New York, and the database had New Deli, then there would be a match on New, but not on Deli.
|
66
|
+
Since there were 2 elements in the searched city, each hit is worth 15. So the match on New would add 15, but the non-match
|
67
|
+
on York would subtract (15 * .1) = 1.5 from the score. So the score would be (60 + 15 - 1.5) = 74, due to rounding.
|
68
|
+
|
69
|
+
Only <tt>:city</tt> and <tt>:address</tt> subtract from the score, No match on name simply returns 0.
|
70
|
+
|
59
71
|
Matches for name are made for both the name and any aliases in the OFAC database.
|
60
72
|
|
61
73
|
Matches for <tt>:city</tt> and <tt>:address</tt> will only be added to the score if there is first a match on <tt>:name</tt>.
|
62
74
|
|
75
|
+
We consider a score of 60 to be reasonable as a hit.
|
76
|
+
|
63
77
|
== SYNOPSIS:
|
64
78
|
Accepts a hash with the identity's demographic information
|
65
79
|
|
data/VERSION.yml
CHANGED
data/lib/ofac/models/ofac.rb
CHANGED
@@ -55,13 +55,27 @@ class Ofac
|
|
55
55
|
# for <tt>:name</tt>. So since the weight for <tt>:name</tt> is 60, then we will add 60 to the score.
|
56
56
|
#
|
57
57
|
# If you are trying to match the name Kevin Tyll, and there is a record for Teel, Kevin in the database, then an exact match
|
58
|
-
# will be found for Kevin, and a sounds like match will be made for Tyll. Since there were 2 elements in
|
58
|
+
# will be found for Kevin, and a sounds like match will be made for Tyll. Since there were 2 elements in the searched name,
|
59
59
|
# and the weight for <tt>:name</tt> is 60, then each element is worth 30. Since Kevin was an exact match, it will add 30, and
|
60
60
|
# since Tyll was a sounds like match, it will add 30 * .75. So the <tt>:name</tt> portion of the search will be worth 53.
|
61
61
|
#
|
62
|
+
# If data is in the database for city and or address, and you pass data in for these elements, the score will be reduced by 10%
|
63
|
+
# of the weight if there is no match or sounds like match. So if you get a match on name, you've already got a score of 60. So
|
64
|
+
# if you don't pass in an address or city, or if you do, but there is no city or address info in the database, then your final score
|
65
|
+
# will be 60. But if you do pass in a city, say Tampa, and the city in the Database is New York, then we will deduct 10% of the
|
66
|
+
# weight (30 * .1) = 3 from the score since 30 is the weight for <tt>:city</tt>. So the final score will be 57.
|
67
|
+
#
|
68
|
+
# If were searching for New York, and the database had New Deli, then there would be a match on New, but not on Deli.
|
69
|
+
# Since there were 2 elements in the searched city, each hit is worth 15. So the match on New would add 15, but the non-match
|
70
|
+
# on York would subtract (15 * .1) = 1.5 from the score. So the score would be (60 + 15 - 1.5) = 74, due to rounding.
|
71
|
+
#
|
72
|
+
# Only <tt>:city</tt> and <tt>:address</tt> subtract from the score, No match on name simply returns 0.
|
73
|
+
#
|
62
74
|
# Matches for name are made for both the name and any aliases in the OFAC database.
|
63
75
|
#
|
64
76
|
# Matches for <tt>:city</tt> and <tt>:address</tt> will only be added to the score if there is first a match on <tt>:name</tt>.
|
77
|
+
#
|
78
|
+
# We consider a score of 60 to be reasonable as a hit.
|
65
79
|
def score
|
66
80
|
@score || calculate_score
|
67
81
|
end
|
data/lib/ofac/ofac_match.rb
CHANGED
@@ -100,9 +100,10 @@ class OfacMatch
|
|
100
100
|
|
101
101
|
value = 0
|
102
102
|
partial_weight = 1/token_array.length.to_f
|
103
|
+
|
103
104
|
token_array.each do |partial_token|
|
104
105
|
#first see if we get an exact match of the partial
|
105
|
-
if match_array.include?(partial_token)
|
106
|
+
if success = match_array.include?(partial_token)
|
106
107
|
value += partial_weight
|
107
108
|
else
|
108
109
|
#otherwise, see if the partial sounds like any part of the OFAC record
|
@@ -110,10 +111,18 @@ class OfacMatch
|
|
110
111
|
if partial_match.ofac_sounds_like(partial_token,false)
|
111
112
|
#give partial value for every part of token that is matched.
|
112
113
|
value += partial_weight * 0.75
|
114
|
+
success = true
|
113
115
|
break
|
114
116
|
end
|
115
117
|
end
|
116
118
|
end
|
119
|
+
unless success
|
120
|
+
#if this for :address or :city
|
121
|
+
#and there is no match at all, subtract 10% of the weight from :name score
|
122
|
+
unless field == :name
|
123
|
+
value -= partial_weight * 0.1
|
124
|
+
end
|
125
|
+
end
|
117
126
|
end
|
118
127
|
end
|
119
128
|
end
|
data/test/ofac_test.rb
CHANGED
@@ -7,7 +7,7 @@ class OfacTest < Test::Unit::TestCase
|
|
7
7
|
setup_ofac_sdn_table
|
8
8
|
OfacSdnLoader.load_current_sdn_file #this method is mocked to load test files instead of the live files from the web.
|
9
9
|
end
|
10
|
-
|
10
|
+
|
11
11
|
should "give a score of 0 if no name is given" do
|
12
12
|
assert_equal 0, Ofac.new({:address => '123 somewhere'}).score
|
13
13
|
end
|
@@ -20,11 +20,27 @@ class OfacTest < Test::Unit::TestCase
|
|
20
20
|
assert_equal 0, Ofac.new({:name => 'Kevin', :address => '123 somewhere ln', :city => 'Clearwater'}).score
|
21
21
|
end
|
22
22
|
|
23
|
-
should "give a score of 60 if there is a name match" do
|
23
|
+
should "give a score of 60 if there is a name match and deduct scores for non matches on address and city" do
|
24
24
|
assert_equal 60, Ofac.new({:name => 'Oscar Hernandez'}).score
|
25
|
-
|
26
|
-
|
27
|
-
|
25
|
+
end
|
26
|
+
|
27
|
+
should "deduct scores for non matches on address and city if data is in the database" do
|
28
|
+
#if there is data for address or city in the database, and that info is passed in, then 10%
|
29
|
+
#of the weight will be deducted if there is not match or sounds like match
|
30
|
+
|
31
|
+
#only name matches
|
32
|
+
assert_equal 56, Ofac.new({:name => 'Oscar Hernandez', :city => 'no match', :address => 'no match'}).score
|
33
|
+
#only name matches
|
34
|
+
assert_equal 56, Ofac.new({:name => 'Oscar Hernandez', :city => 'Las Vegas', :address => 'no match'}).score
|
35
|
+
#name and city match
|
36
|
+
assert_equal 89, Ofac.new({:name => 'Oscar Hernandez', :city => 'Clearwater', :address => 'no match'}).score
|
37
|
+
#city is a partial match - Clearwater matches, but not Bay
|
38
|
+
#score = 60 for name + 15 for Clearwater - (15 * .1) for Bay = 73.5
|
39
|
+
assert_equal 74, Ofac.new({:name => 'Oscar Hernandez', :city => 'Clearwater Bay'}).score
|
40
|
+
end
|
41
|
+
|
42
|
+
should "not deduct from score if no data for city or address is in the database" do
|
43
|
+
assert_equal 60, Ofac.new({:name => 'Luis Lopez', :city => 'no match', :address => 'no match'}).score
|
28
44
|
end
|
29
45
|
|
30
46
|
should "give a score of 60 if there is a name match on alternate identity name" do
|
@@ -38,7 +54,7 @@ class OfacTest < Test::Unit::TestCase
|
|
38
54
|
end
|
39
55
|
|
40
56
|
should "give a score of 90 if there is a name and city match" do
|
41
|
-
assert_equal 90, Ofac.new({:name => 'Oscar Hernandez', :city => 'Clearwater'
|
57
|
+
assert_equal 90, Ofac.new({:name => 'Oscar Hernandez', :city => 'Clearwater'}).score
|
42
58
|
end
|
43
59
|
|
44
60
|
should "give a score of 100 if there is a name and city and address match" do
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: kevintyll-ofac
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kevin Tyll
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-05-
|
12
|
+
date: 2009-05-12 00:00:00 -07:00
|
13
13
|
default_executable:
|
14
14
|
dependencies: []
|
15
15
|
|