ofac 1.1.11
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/.gitignore +6 -0
- data/History.txt +72 -0
- data/LICENSE +20 -0
- data/PostInstall.txt +11 -0
- data/README.rdoc +123 -0
- data/Rakefile +60 -0
- data/VERSION.yml +4 -0
- data/generators/ofac_migration/ofac_migration_generator.rb +12 -0
- data/generators/ofac_migration/templates/migration.rb +31 -0
- data/lib/ofac.rb +9 -0
- data/lib/ofac/models/ofac.rb +177 -0
- data/lib/ofac/models/ofac_sdn.rb +5 -0
- data/lib/ofac/models/ofac_sdn_loader.rb +305 -0
- data/lib/ofac/ofac_match.rb +139 -0
- data/lib/ofac/ruby_string_extensions.rb +22 -0
- data/lib/tasks/ofac.rake +8 -0
- data/ofac.gemspec +104 -0
- data/test/files/test_address_data_load.pip +10 -0
- data/test/files/test_alt_data_load.pip +10 -0
- data/test/files/test_sdn_data_load.pip +9 -0
- data/test/files/valid_flattened_file.csv +19 -0
- data/test/mocks/test/ofac_sdn_loader.rb +20 -0
- data/test/ofac_sdn_loader_test.rb +40 -0
- data/test/ofac_test.rb +138 -0
- data/test/test_helper.rb +49 -0
- metadata +119 -0
data/.document
ADDED
data/.gitignore
ADDED
data/History.txt
ADDED
@@ -0,0 +1,72 @@
|
|
1
|
+
== 0.1.0 2009-05-7
|
2
|
+
|
3
|
+
* 1 major enhancement:
|
4
|
+
* Table creation and data load task complete
|
5
|
+
|
6
|
+
== 1.0.0 2009-05-11
|
7
|
+
|
8
|
+
* 1 major enhancement:
|
9
|
+
* Initail release
|
10
|
+
|
11
|
+
== 1.1.0 2009-05-12
|
12
|
+
|
13
|
+
* 1 minor enhancement:
|
14
|
+
* Modified the match alogorithm to reduct the score if there is not an address or city match if the data is in the database.
|
15
|
+
|
16
|
+
== 1.1.2 2009-05-13
|
17
|
+
|
18
|
+
* 2 minor changes:
|
19
|
+
* Changed the sql in the initial search to do a like instead of a soundex. For short names, the soundex returned almost the entire table
|
20
|
+
making the process take too long.
|
21
|
+
* Also changed the sql to only return individuals, also for the sake of performance.
|
22
|
+
|
23
|
+
== 1.1.3 2009-05-15
|
24
|
+
|
25
|
+
* 1 bug fix:
|
26
|
+
* fixed a bug that threw an error if a space was passed in for the name.
|
27
|
+
|
28
|
+
== 1.1.4 2009-06-02
|
29
|
+
|
30
|
+
* 1 minor enhancement:
|
31
|
+
* Improved performance by ignoring initial in names when searching the database for possible hits.
|
32
|
+
|
33
|
+
== 1.1.5 2009-06-03
|
34
|
+
|
35
|
+
* 1 bug fix:
|
36
|
+
* fixed a bug that threw an error if only single character initials are passed in for the name.
|
37
|
+
|
38
|
+
== 1.1.6 2009-06-29
|
39
|
+
|
40
|
+
* 1 minor enhancement:
|
41
|
+
* Allow passing of first and last name seperately...to improve performance.
|
42
|
+
|
43
|
+
== 1.1.7 2009-06-30
|
44
|
+
|
45
|
+
* 1 bug fix:
|
46
|
+
* fixed error when passing a nil value into the new :first_name or :last_name hash values when initializing the Ofac object.
|
47
|
+
|
48
|
+
== 1.1.8 2009-06-30
|
49
|
+
|
50
|
+
* 1 bug fix:
|
51
|
+
* Refactored the select on OfacSdn to use the AR connection instead of building sql and using the raw connection. Fixes a bug
|
52
|
+
introducted in 1.1.6 where quotes in the name raised an error.
|
53
|
+
|
54
|
+
== 1.1.9 2009-07-24
|
55
|
+
|
56
|
+
* 1 minor enhancement:
|
57
|
+
* Added a method, db_hit? for when your more concerned with speed than accuracy. db_hit? will retun true if there is an exact name match
|
58
|
+
in the ofac_sdn database. This method ignores address and city and does not produce a score.
|
59
|
+
Usage: Ofac.new({:name => 'Oscar Hernandez', :city => 'Clearwater', :address => '123 somewhere ln'}).db_hit?
|
60
|
+
|
61
|
+
== 1.1.10 2009-07-28
|
62
|
+
|
63
|
+
* 1 minor enhancement:
|
64
|
+
* Modified the select in OfacSdn to use the city to pull records from the database. This is another compromise to improve performance,
|
65
|
+
but still get a score. The db_hit? method is still faster, but this will still calculate a score for accuracy.
|
66
|
+
|
67
|
+
== 1.1.11 2009-07-29
|
68
|
+
|
69
|
+
* 1 minor enhancement:
|
70
|
+
* Removed the changes from 1.1.10. 1.1.10 produced a score of 0 when a higher score was more accurate.
|
71
|
+
Changed the scoring algorithm to not include partial matches on sounds like. This code has little impact on score, and
|
72
|
+
has a very high overhead on performance.
|
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2009 Kevin Tyll
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/PostInstall.txt
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
For more information on ofac, see http://kevintyll.github.com/ofac/
|
2
|
+
|
3
|
+
* To create the necessary db migration, from the command line, run:
|
4
|
+
script/generate ofac_migration
|
5
|
+
* Require the gem in your environment.rb file in the Rails::Initializer block:
|
6
|
+
config.gem 'kevintyll-ofac', :lib => 'ofac'
|
7
|
+
* To load your table with the current OFAC data, from the command line, run:
|
8
|
+
rake ofac:update_data
|
9
|
+
|
10
|
+
* The OFAC data is not updated with any regularity, but you can sign up for email notifications when the data changes at
|
11
|
+
http://www.treas.gov/offices/enforcement/ofac/sdn/index.shtml.
|
data/README.rdoc
ADDED
@@ -0,0 +1,123 @@
|
|
1
|
+
= ofac
|
2
|
+
|
3
|
+
* http://kevintyll.github.com/ofac
|
4
|
+
* http://www.drexel-labs.com
|
5
|
+
|
6
|
+
* http://www.treas.gov/offices/enforcement/ofac/sdn/index.shtml
|
7
|
+
|
8
|
+
== DESCRIPTION:
|
9
|
+
|
10
|
+
ofac is a ruby gem that tries to find a match of a person's name and address against the
|
11
|
+
Office of Foreign Assets Control's Specially Designated Nationals list...the so called
|
12
|
+
terrorist watch list.
|
13
|
+
|
14
|
+
This gem, like the ssn_validator gem, started as a need for the company I work for, Clarity Services Inc.
|
15
|
+
We decided once again to create a gem out of it and share it with the community. Much
|
16
|
+
thanks goes to the management at Clarity Services Inc. for allowing this code to be open sourced. Thanks
|
17
|
+
also to Larry Berland at Clarity Services Inc. The matching logic in the ofac_match.rb file was derived from
|
18
|
+
his work.
|
19
|
+
|
20
|
+
== FEATURES:
|
21
|
+
|
22
|
+
Creates a score, 1 - 100, based on how well the name, address and city match the data on the SDN list. Since
|
23
|
+
we have to match on strings, the likely hood of an exact match are virtually nil. So we've created an
|
24
|
+
algorithm that creates a score. The better the match, the higher the score. A score of 100 would be
|
25
|
+
a perfect match.
|
26
|
+
|
27
|
+
The score is calculated by adding up the weightings of each part that is matched. So
|
28
|
+
if only name is matched, then the max score is the weight for <tt>:name</tt> which is 60
|
29
|
+
|
30
|
+
It's possible to get partial matches, which will add partial weight to the score. If there
|
31
|
+
is not a match on the element as it is passed in, then each word element gets broken down
|
32
|
+
and matches are tried on each partial element. The weighting is distrubuted equally for
|
33
|
+
each partial that is matched.
|
34
|
+
|
35
|
+
If exact matches are not made, then a sounds like match is attempted. Any match made by sounds like
|
36
|
+
is given 75% of it's weight to the score.
|
37
|
+
Example:
|
38
|
+
|
39
|
+
If you are trying to match the name Kevin Tyll and there is a record for Smith, Kevin in the database, then
|
40
|
+
we will try to match both Kevin and Tyll separately, with each element Smith and Kevin. Since only Kevin
|
41
|
+
will find a match, and there were 2 elements in the searched name, the score will be added by half the weighting
|
42
|
+
for <tt>:name</tt>. So since the weight for <tt>:name</tt> is 60, then we will add 30 to the score.
|
43
|
+
|
44
|
+
If you are trying to match the name Kevin Gregory Tyll and there is a record for Tyll, Kevin in the database, then
|
45
|
+
we will try to match Kevin and Gregory and Tyll separately, with each element Tyll and Kevin. Since both Kevin
|
46
|
+
and Tyll will find a match, and there were 3 elements in the searched name, the score will be added by 2/3 the weighting
|
47
|
+
for <tt>:name</tt>. So since the weight for <tt>:name</tt> is 60, then we will add 40 to the score.
|
48
|
+
|
49
|
+
If you are trying to match the name Kevin Tyll and there is a record for Kevin Gregory Tyll in the database, then
|
50
|
+
we will try to match Kevin and Tyll separately, with each element Tyll and Kevin and Gregory. Since both Kevin
|
51
|
+
and Tyll will find a match, and there were 2 elements in the searched name, the score will be added by 2/2 the weighting
|
52
|
+
for <tt>:name</tt>. So since the weight for <tt>:name</tt> is 60, then we will add 60 to the score.
|
53
|
+
|
54
|
+
If you are trying to match the name Kevin Tyll, and there is a record for Teel, Kevin in the database, then an exact match
|
55
|
+
will be found for Kevin, and a sounds like match will be made for Tyll. Since there were 2 elements in the searched name,
|
56
|
+
and the weight for <tt>:name</tt> is 60, then each element is worth 30. Since Kevin was an exact match, it will add 30, and
|
57
|
+
since Tyll was a sounds like match, it will add 30 * .75. So the <tt>:name</tt> portion of the search will be worth 53.
|
58
|
+
|
59
|
+
If data is in the database for city and or address, and you pass data in for these elements, the score will be reduced by 10%
|
60
|
+
of the weight if there is no match or sounds like match. So if you get a match on name, you've already got a score of 60. So
|
61
|
+
if you don't pass in an address or city, or if you do, but there is no city or address info in the database, then your final score
|
62
|
+
will be 60. But if you do pass in a city, say Tampa, and the city in the Database is New York, then we will deduct 10% of the
|
63
|
+
weight (30 * .1) = 3 from the score since 30 is the weight for <tt>:city</tt>. So the final score will be 57.
|
64
|
+
|
65
|
+
If were searching for New York, and the database had New Deli, then there would be a match on New, but not on Deli.
|
66
|
+
Since there were 2 elements in the searched city, each hit is worth 15. So the match on New would add 15, but the non-match
|
67
|
+
on York would subtract (15 * .1) = 1.5 from the score. So the score would be (60 + 15 - 1.5) = 74, due to rounding.
|
68
|
+
|
69
|
+
Only <tt>:city</tt> and <tt>:address</tt> subtract from the score, No match on name simply returns 0.
|
70
|
+
|
71
|
+
Matches for name are made for both the name and any aliases in the OFAC database.
|
72
|
+
|
73
|
+
Matches for <tt>:city</tt> and <tt>:address</tt> will only be added to the score if there is first a match on <tt>:name</tt>.
|
74
|
+
|
75
|
+
We consider a score of 60 to be reasonable as a hit.
|
76
|
+
|
77
|
+
== SYNOPSIS:
|
78
|
+
Accepts a hash with the identity's demographic information
|
79
|
+
|
80
|
+
Ofac.new({:name => 'Oscar Hernandez', :city => 'Clearwater', :address => '123 somewhere ln'})
|
81
|
+
|
82
|
+
<tt>:name</tt> is required to get a score. If <tt>:name</tt> is missing, an error will not be thrown, but a score of 0 will be returned.
|
83
|
+
|
84
|
+
The more information provided, the higher the score could be. A score of 100 would mean all fields
|
85
|
+
were passed in, and all fields were 100% matches. If only the name is passed in without an address,
|
86
|
+
it will be impossible to get a score of 100, even if the name matches perfectly.
|
87
|
+
|
88
|
+
Acceptable hash keys and their weighting in score calculation:
|
89
|
+
|
90
|
+
* <tt>:name</tt> (weighting = 60%) (required) This can be a person, business, or marine vessel
|
91
|
+
* <tt>:address</tt> (weighting = 10%)
|
92
|
+
* <tt>:city</tt> (weighting = 30%)
|
93
|
+
|
94
|
+
* Instantiate the object with the identity's name, street address, and city.
|
95
|
+
ofac = Ofac.new(:name => 'Kevin Tyll', :city => 'Clearwater', :address => '123 Somewhere Ln.')
|
96
|
+
|
97
|
+
* Then get the score
|
98
|
+
ofac.score => return the score 1 - 100
|
99
|
+
|
100
|
+
* You can also get the list of all the partial matches with the score of each record.
|
101
|
+
ofac.possible_hits => returns an array of hashes.
|
102
|
+
|
103
|
+
== REQUIREMENTS:
|
104
|
+
|
105
|
+
* Rails 2.0.0 or greater
|
106
|
+
|
107
|
+
== INSTALL:
|
108
|
+
|
109
|
+
* To install the gem:
|
110
|
+
sudo gem install kevintyll-ofac
|
111
|
+
* To create the necessary db migration, from the command line, run:
|
112
|
+
script/generate ofac_migration
|
113
|
+
* Require the gem in your environment.rb file in the Rails::Initializer block:
|
114
|
+
config.gem 'kevintyll-ofac', :lib => 'ofac'
|
115
|
+
* To load your table with the current OFAC data, from the command line, run:
|
116
|
+
rake ofac:update_data
|
117
|
+
|
118
|
+
* The OFAC data is not updated with any regularity, but you can sign up for email notifications when the data changes at
|
119
|
+
http://www.treas.gov/offices/enforcement/ofac/sdn/index.shtml.
|
120
|
+
|
121
|
+
== Copyright
|
122
|
+
|
123
|
+
Copyright (c) 2009 Kevin Tyll. See LICENSE for details.
|
data/Rakefile
ADDED
@@ -0,0 +1,60 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'jeweler'
|
6
|
+
Jeweler::Tasks.new do |gem|
|
7
|
+
gem.name = "ofac"
|
8
|
+
gem.summary = %Q{Attempts to find a hit on the Office of Foreign Assets Control's Specially Designated Nationals list.}
|
9
|
+
gem.description = %Q{Attempts to find a hit on the Office of Foreign Assets Control's Specially Designated Nationals list.}
|
10
|
+
gem.email = "kevintyll@gmail.com"
|
11
|
+
gem.homepage = "http://github.com/kevintyll/ofac"
|
12
|
+
gem.authors = ["Kevin Tyll"]
|
13
|
+
gem.post_install_message = File.readlines("PostInstall.txt").join("")
|
14
|
+
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
15
|
+
end
|
16
|
+
|
17
|
+
Jeweler::GemcutterTasks.new
|
18
|
+
|
19
|
+
rescue LoadError
|
20
|
+
puts "Jeweler not available. Install it with: sudo gem install technicalpickles-jeweler -s http://gems.github.com"
|
21
|
+
end
|
22
|
+
|
23
|
+
require 'rake/testtask'
|
24
|
+
Rake::TestTask.new(:test) do |test|
|
25
|
+
test.libs << 'lib' << 'test'
|
26
|
+
test.pattern = 'test/**/*_test.rb'
|
27
|
+
test.verbose = true
|
28
|
+
end
|
29
|
+
|
30
|
+
begin
|
31
|
+
require 'rcov/rcovtask'
|
32
|
+
Rcov::RcovTask.new do |test|
|
33
|
+
test.libs << 'test'
|
34
|
+
test.pattern = 'test/**/*_test.rb'
|
35
|
+
test.verbose = true
|
36
|
+
end
|
37
|
+
rescue LoadError
|
38
|
+
task :rcov do
|
39
|
+
abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
|
44
|
+
task :default => :test
|
45
|
+
|
46
|
+
require 'rake/rdoctask'
|
47
|
+
Rake::RDocTask.new do |rdoc|
|
48
|
+
if File.exist?('VERSION.yml')
|
49
|
+
config = YAML.load(File.read('VERSION.yml'))
|
50
|
+
version = "#{config[:major]}.#{config[:minor]}.#{config[:patch]}"
|
51
|
+
else
|
52
|
+
version = ""
|
53
|
+
end
|
54
|
+
|
55
|
+
rdoc.rdoc_dir = 'rdoc'
|
56
|
+
rdoc.title = "ofac #{version}"
|
57
|
+
rdoc.rdoc_files.include('README*')
|
58
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
59
|
+
end
|
60
|
+
|
data/VERSION.yml
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
class CreateOfacSdnTable < ActiveRecord::Migration
|
2
|
+
|
3
|
+
def self.up
|
4
|
+
create_table :ofac_sdns do |t|
|
5
|
+
t.text :name
|
6
|
+
t.string :sdn_type
|
7
|
+
t.string :program
|
8
|
+
t.string :title
|
9
|
+
t.string :vessel_call_sign
|
10
|
+
t.string :vessel_type
|
11
|
+
t.string :vessel_tonnage
|
12
|
+
t.string :gross_registered_tonnage
|
13
|
+
t.string :vessel_flag
|
14
|
+
t.string :vessel_owner
|
15
|
+
t.text :remarks
|
16
|
+
t.text :address
|
17
|
+
t.string :city
|
18
|
+
t.string :country
|
19
|
+
t.string :address_remarks
|
20
|
+
t.string :alternate_identity_type
|
21
|
+
t.text :alternate_identity_name
|
22
|
+
t.string :alternate_identity_remarks
|
23
|
+
t.timestamps
|
24
|
+
end
|
25
|
+
add_index :ofac_sdns, :sdn_type
|
26
|
+
end
|
27
|
+
|
28
|
+
def self.down
|
29
|
+
drop_table :ofac_sdns
|
30
|
+
end
|
31
|
+
end
|
data/lib/ofac.rb
ADDED
@@ -0,0 +1,177 @@
|
|
1
|
+
class Ofac
|
2
|
+
|
3
|
+
|
4
|
+
# Accepts a hash with the identity's demographic information
|
5
|
+
#
|
6
|
+
# Ofac.new({:name => 'Oscar Hernandez', :city => 'Clearwater', :address => '123 somewhere ln'})
|
7
|
+
#
|
8
|
+
# <tt>:name</tt> is required to get a score. If <tt>:name</tt> is missing, an error will not be thrown, but a score of 0 will be returned.
|
9
|
+
#
|
10
|
+
# You can pass a string in for the full name:
|
11
|
+
# Ofac.new(:name => 'Victor De La Garza')
|
12
|
+
#
|
13
|
+
# Or you can specify the last and first names
|
14
|
+
# Ofac.new(:name => {:first_name => 'Victor', :last_name => 'De La Garza'})
|
15
|
+
#
|
16
|
+
# The first method will build a larger list of names for ruby to parse through and more likely to find similar names.
|
17
|
+
# The second method is quicker.
|
18
|
+
#
|
19
|
+
# The more information provided, the higher the score could be. A score of 100 would mean all fields
|
20
|
+
# were passed in, and all fields were 100% matches. If only the name is passed in without an address,
|
21
|
+
# it will be impossible to get a score of 100, even if the name matches perfectly.
|
22
|
+
#
|
23
|
+
# Acceptable hash keys and their weighting in score calculation:
|
24
|
+
#
|
25
|
+
# * <tt>:name</tt> (weighting = 60%) (required) This can be a person, business, or marine vessel
|
26
|
+
# * <tt>:address</tt> (weighting = 10%)
|
27
|
+
# * <tt>:city</tt> (weighting = 30%)
|
28
|
+
def initialize(identity)
|
29
|
+
@identity = identity
|
30
|
+
end
|
31
|
+
|
32
|
+
# Creates a score, 1 - 100, based on how well the name and address match the data on the
|
33
|
+
# SDN (Specially Designated Nationals) list.
|
34
|
+
#
|
35
|
+
# The score is calculated by adding up the weightings of each part that is matched. So
|
36
|
+
# if only name is matched, then the max score is the weight for <tt>:name</tt> which is 60
|
37
|
+
#
|
38
|
+
# It's possible to get partial matches, which will add partial weight to the score. If there
|
39
|
+
# is not a match on the element as it is passed in, then each word element gets broken down
|
40
|
+
# and matches are tried on each partial element. The weighting is distrubuted equally for
|
41
|
+
# each partial that is matched.
|
42
|
+
#
|
43
|
+
# If exact matches are not made, then a sounds like match is attempted. Any match made by sounds like
|
44
|
+
# is given 75% of it's weight to the score.
|
45
|
+
#
|
46
|
+
# Example:
|
47
|
+
#
|
48
|
+
# If you are trying to match the name Kevin Tyll and there is a record for Smith, Kevin in the database, then
|
49
|
+
# we will try to match both Kevin and Tyll separately, with each element Smith and Kevin. Since only Kevin
|
50
|
+
# will find a match, and there were 2 elements in the searched name, the score will be added by half the weighting
|
51
|
+
# for <tt>:name</tt>. So since the weight for <tt>:name</tt> is 60, then we will add 30 to the score.
|
52
|
+
#
|
53
|
+
# If you are trying to match the name Kevin Gregory Tyll and there is a record for Tyll, Kevin in the database, then
|
54
|
+
# we will try to match Kevin and Gregory and Tyll separately, with each element Tyll and Kevin. Since both Kevin
|
55
|
+
# and Tyll will find a match, and there were 3 elements in the searched name, the score will be added by 2/3 the weighting
|
56
|
+
# for <tt>:name</tt>. So since the weight for <tt>:name</tt> is 60, then we will add 40 to the score.
|
57
|
+
#
|
58
|
+
# If you are trying to match the name Kevin Tyll and there is a record for Kevin Gregory Tyll in the database, then
|
59
|
+
# we will try to match Kevin and Tyll separately, with each element Tyll and Kevin and Gregory. Since both Kevin
|
60
|
+
# and Tyll will find a match, and there were 2 elements in the searched name, the score will be added by 2/2 the weighting
|
61
|
+
# for <tt>:name</tt>. So since the weight for <tt>:name</tt> is 60, then we will add 60 to the score.
|
62
|
+
#
|
63
|
+
# If you are trying to match the name Kevin Tyll, and there is a record for Teel, Kevin in the database, then an exact match
|
64
|
+
# will be found for Kevin, and a sounds like match will be made for Tyll. Since there were 2 elements in the searched name,
|
65
|
+
# and the weight for <tt>:name</tt> is 60, then each element is worth 30. Since Kevin was an exact match, it will add 30, and
|
66
|
+
# since Tyll was a sounds like match, it will add 30 * .75. So the <tt>:name</tt> portion of the search will be worth 53.
|
67
|
+
#
|
68
|
+
# If data is in the database for city and or address, and you pass data in for these elements, the score will be reduced by 10%
|
69
|
+
# of the weight if there is no match or sounds like match. So if you get a match on name, you've already got a score of 60. So
|
70
|
+
# if you don't pass in an address or city, or if you do, but there is no city or address info in the database, then your final score
|
71
|
+
# will be 60. But if you do pass in a city, say Tampa, and the city in the Database is New York, then we will deduct 10% of the
|
72
|
+
# weight (30 * .1) = 3 from the score since 30 is the weight for <tt>:city</tt>. So the final score will be 57.
|
73
|
+
#
|
74
|
+
# If were searching for New York, and the database had New Deli, then there would be a match on New, but not on Deli.
|
75
|
+
# Since there were 2 elements in the searched city, each hit is worth 15. So the match on New would add 15, but the non-match
|
76
|
+
# on York would subtract (15 * .1) = 1.5 from the score. So the score would be (60 + 15 - 1.5) = 74, due to rounding.
|
77
|
+
#
|
78
|
+
# Only <tt>:city</tt> and <tt>:address</tt> subtract from the score, No match on name simply returns 0.
|
79
|
+
#
|
80
|
+
# Matches for name are made for both the name and any aliases in the OFAC database.
|
81
|
+
#
|
82
|
+
# Matches for <tt>:city</tt> and <tt>:address</tt> will only be added to the score if there is first a match on <tt>:name</tt>.
|
83
|
+
#
|
84
|
+
# We consider a score of 60 to be reasonable as a hit.
|
85
|
+
def score
|
86
|
+
@score || calculate_score
|
87
|
+
end
|
88
|
+
|
89
|
+
def db_hit?
|
90
|
+
unless @identity[:name].to_s.blank?
|
91
|
+
|
92
|
+
#first get a list from the database of possible matches by name
|
93
|
+
#this query is pretty liberal, we just want to get a list of possible
|
94
|
+
#matches from the database that we can run through our ruby matching algorithm
|
95
|
+
possible_sdns = []
|
96
|
+
name_array = process_name
|
97
|
+
|
98
|
+
name_array.delete_if{|n| n.strip.size < 2}
|
99
|
+
unless name_array.empty?
|
100
|
+
sql_name_partial = name_array.collect {|partial_name| ["name like ?", "%#{partial_name}%"]}
|
101
|
+
sql_alt_name_partial = name_array.collect {|partial_name| ["alternate_identity_name like ?", "%#{partial_name}%"]}
|
102
|
+
|
103
|
+
name_conditions = sql_name_partial.transpose
|
104
|
+
name_values = name_conditions.second
|
105
|
+
name_conditions = [name_conditions.first.join(' and ')]
|
106
|
+
alt_name_conditions = sql_alt_name_partial.transpose
|
107
|
+
alt_name_values = alt_name_conditions.second
|
108
|
+
alt_name_conditions = [alt_name_conditions.first.join(' and ')]
|
109
|
+
conditions = ["(#{name_conditions}) or (#{alt_name_conditions})"] + name_values + alt_name_values
|
110
|
+
|
111
|
+
possible_sdns = OfacSdn.find_all_by_sdn_type('individual',:select => 'name, alternate_identity_name, address, city', :conditions => conditions)
|
112
|
+
|
113
|
+
end
|
114
|
+
end
|
115
|
+
!possible_sdns.empty?
|
116
|
+
end
|
117
|
+
|
118
|
+
# Returns an array of hashes of records in the OFAC data that found partial matches with that record's score.
|
119
|
+
#
|
120
|
+
# Ofac.new({:name => 'Oscar Hernandez', :city => 'Clearwater', :address => '123 somewhere ln'}).possible_hits
|
121
|
+
#returns
|
122
|
+
# [{:address=>"123 Somewhere Ln", :score=>100, :name=>"HERNANDEZ, Oscar|GUAMATUR, S.A.", :city=>"Clearwater"}, {:address=>"123 Somewhere Ln", :score=>100, :name=>"HERNANDEZ, Oscar|Alternate Name", :city=>"Clearwater"}]
|
123
|
+
#
|
124
|
+
def possible_hits
|
125
|
+
@possible_hits || retrieve_possible_hits
|
126
|
+
end
|
127
|
+
|
128
|
+
private
|
129
|
+
|
130
|
+
def retrieve_possible_hits
|
131
|
+
score
|
132
|
+
@possible_hits
|
133
|
+
end
|
134
|
+
|
135
|
+
def calculate_score
|
136
|
+
unless @identity[:name].to_s.blank?
|
137
|
+
|
138
|
+
#first get a list from the database of possible matches by name
|
139
|
+
#this query is pretty liberal, we just want to get a list of possible
|
140
|
+
#matches from the database that we can run through our ruby matching algorithm
|
141
|
+
|
142
|
+
name_array = process_name
|
143
|
+
|
144
|
+
name_array.delete_if{|n| n.strip.size < 2}
|
145
|
+
unless name_array.empty?
|
146
|
+
sql_name_partial = name_array.collect {|partial_name| ["name like ?", "%#{partial_name}%"]}
|
147
|
+
sql_alt_name_partial = name_array.collect {|partial_name| ["alternate_identity_name like ?", "%#{partial_name}%"]}
|
148
|
+
conditions = sql_name_partial + sql_alt_name_partial
|
149
|
+
conditions = conditions.transpose
|
150
|
+
conditions = [conditions.first.join(' or ')] + conditions.second
|
151
|
+
|
152
|
+
possible_sdns = OfacSdn.find_all_by_sdn_type('individual',:select => 'name, alternate_identity_name, address, city', :conditions => conditions)
|
153
|
+
possible_sdns = possible_sdns.collect {|sdn|{:name => "#{sdn['name']}|#{sdn['alternate_identity_name']}", :city => sdn['city'], :address => sdn['address']}}
|
154
|
+
|
155
|
+
match = OfacMatch.new({:name => {:weight => 60, :token => "#{name_array.join(', ')}"},
|
156
|
+
:address => {:weight => 10, :token => @identity[:address]},
|
157
|
+
:city => {:weight => 30, :token => @identity[:city]}})
|
158
|
+
|
159
|
+
score = match.score(possible_sdns)
|
160
|
+
@possible_hits = match.possible_hits
|
161
|
+
end
|
162
|
+
end
|
163
|
+
@score = score || 0
|
164
|
+
return @score
|
165
|
+
end
|
166
|
+
|
167
|
+
def process_name
|
168
|
+
#you can pass in a full name, or specify the first and last name
|
169
|
+
if @identity[:name].kind_of?(Hash)
|
170
|
+
name_array = [@identity[:name][:last_name],@identity[:name][:first_name]].compact
|
171
|
+
else
|
172
|
+
partial_name = @identity[:name].gsub(/\W/,'|')
|
173
|
+
name_array = partial_name.split('|').reverse
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
end
|