ofac 1.1.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/.gitignore +6 -0
- data/History.txt +72 -0
- data/LICENSE +20 -0
- data/PostInstall.txt +11 -0
- data/README.rdoc +123 -0
- data/Rakefile +60 -0
- data/VERSION.yml +4 -0
- data/generators/ofac_migration/ofac_migration_generator.rb +12 -0
- data/generators/ofac_migration/templates/migration.rb +31 -0
- data/lib/ofac.rb +9 -0
- data/lib/ofac/models/ofac.rb +177 -0
- data/lib/ofac/models/ofac_sdn.rb +5 -0
- data/lib/ofac/models/ofac_sdn_loader.rb +305 -0
- data/lib/ofac/ofac_match.rb +139 -0
- data/lib/ofac/ruby_string_extensions.rb +22 -0
- data/lib/tasks/ofac.rake +8 -0
- data/ofac.gemspec +104 -0
- data/test/files/test_address_data_load.pip +10 -0
- data/test/files/test_alt_data_load.pip +10 -0
- data/test/files/test_sdn_data_load.pip +9 -0
- data/test/files/valid_flattened_file.csv +19 -0
- data/test/mocks/test/ofac_sdn_loader.rb +20 -0
- data/test/ofac_sdn_loader_test.rb +40 -0
- data/test/ofac_test.rb +138 -0
- data/test/test_helper.rb +49 -0
- metadata +119 -0
data/.document
ADDED
data/.gitignore
ADDED
data/History.txt
ADDED
@@ -0,0 +1,72 @@
|
|
1
|
+
== 0.1.0 2009-05-7
|
2
|
+
|
3
|
+
* 1 major enhancement:
|
4
|
+
* Table creation and data load task complete
|
5
|
+
|
6
|
+
== 1.0.0 2009-05-11
|
7
|
+
|
8
|
+
* 1 major enhancement:
|
9
|
+
* Initail release
|
10
|
+
|
11
|
+
== 1.1.0 2009-05-12
|
12
|
+
|
13
|
+
* 1 minor enhancement:
|
14
|
+
* Modified the match alogorithm to reduct the score if there is not an address or city match if the data is in the database.
|
15
|
+
|
16
|
+
== 1.1.2 2009-05-13
|
17
|
+
|
18
|
+
* 2 minor changes:
|
19
|
+
* Changed the sql in the initial search to do a like instead of a soundex. For short names, the soundex returned almost the entire table
|
20
|
+
making the process take too long.
|
21
|
+
* Also changed the sql to only return individuals, also for the sake of performance.
|
22
|
+
|
23
|
+
== 1.1.3 2009-05-15
|
24
|
+
|
25
|
+
* 1 bug fix:
|
26
|
+
* fixed a bug that threw an error if a space was passed in for the name.
|
27
|
+
|
28
|
+
== 1.1.4 2009-06-02
|
29
|
+
|
30
|
+
* 1 minor enhancement:
|
31
|
+
* Improved performance by ignoring initial in names when searching the database for possible hits.
|
32
|
+
|
33
|
+
== 1.1.5 2009-06-03
|
34
|
+
|
35
|
+
* 1 bug fix:
|
36
|
+
* fixed a bug that threw an error if only single character initials are passed in for the name.
|
37
|
+
|
38
|
+
== 1.1.6 2009-06-29
|
39
|
+
|
40
|
+
* 1 minor enhancement:
|
41
|
+
* Allow passing of first and last name seperately...to improve performance.
|
42
|
+
|
43
|
+
== 1.1.7 2009-06-30
|
44
|
+
|
45
|
+
* 1 bug fix:
|
46
|
+
* fixed error when passing a nil value into the new :first_name or :last_name hash values when initializing the Ofac object.
|
47
|
+
|
48
|
+
== 1.1.8 2009-06-30
|
49
|
+
|
50
|
+
* 1 bug fix:
|
51
|
+
* Refactored the select on OfacSdn to use the AR connection instead of building sql and using the raw connection. Fixes a bug
|
52
|
+
introducted in 1.1.6 where quotes in the name raised an error.
|
53
|
+
|
54
|
+
== 1.1.9 2009-07-24
|
55
|
+
|
56
|
+
* 1 minor enhancement:
|
57
|
+
* Added a method, db_hit? for when your more concerned with speed than accuracy. db_hit? will retun true if there is an exact name match
|
58
|
+
in the ofac_sdn database. This method ignores address and city and does not produce a score.
|
59
|
+
Usage: Ofac.new({:name => 'Oscar Hernandez', :city => 'Clearwater', :address => '123 somewhere ln'}).db_hit?
|
60
|
+
|
61
|
+
== 1.1.10 2009-07-28
|
62
|
+
|
63
|
+
* 1 minor enhancement:
|
64
|
+
* Modified the select in OfacSdn to use the city to pull records from the database. This is another compromise to improve performance,
|
65
|
+
but still get a score. The db_hit? method is still faster, but this will still calculate a score for accuracy.
|
66
|
+
|
67
|
+
== 1.1.11 2009-07-29
|
68
|
+
|
69
|
+
* 1 minor enhancement:
|
70
|
+
* Removed the changes from 1.1.10. 1.1.10 produced a score of 0 when a higher score was more accurate.
|
71
|
+
Changed the scoring algorithm to not include partial matches on sounds like. This code has little impact on score, and
|
72
|
+
has a very high overhead on performance.
|
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2009 Kevin Tyll
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/PostInstall.txt
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
For more information on ofac, see http://kevintyll.github.com/ofac/
|
2
|
+
|
3
|
+
* To create the necessary db migration, from the command line, run:
|
4
|
+
script/generate ofac_migration
|
5
|
+
* Require the gem in your environment.rb file in the Rails::Initializer block:
|
6
|
+
config.gem 'kevintyll-ofac', :lib => 'ofac'
|
7
|
+
* To load your table with the current OFAC data, from the command line, run:
|
8
|
+
rake ofac:update_data
|
9
|
+
|
10
|
+
* The OFAC data is not updated with any regularity, but you can sign up for email notifications when the data changes at
|
11
|
+
http://www.treas.gov/offices/enforcement/ofac/sdn/index.shtml.
|
data/README.rdoc
ADDED
@@ -0,0 +1,123 @@
|
|
1
|
+
= ofac
|
2
|
+
|
3
|
+
* http://kevintyll.github.com/ofac
|
4
|
+
* http://www.drexel-labs.com
|
5
|
+
|
6
|
+
* http://www.treas.gov/offices/enforcement/ofac/sdn/index.shtml
|
7
|
+
|
8
|
+
== DESCRIPTION:
|
9
|
+
|
10
|
+
ofac is a ruby gem that tries to find a match of a person's name and address against the
|
11
|
+
Office of Foreign Assets Control's Specially Designated Nationals list...the so called
|
12
|
+
terrorist watch list.
|
13
|
+
|
14
|
+
This gem, like the ssn_validator gem, started as a need for the company I work for, Clarity Services Inc.
|
15
|
+
We decided once again to create a gem out of it and share it with the community. Much
|
16
|
+
thanks goes to the management at Clarity Services Inc. for allowing this code to be open sourced. Thanks
|
17
|
+
also to Larry Berland at Clarity Services Inc. The matching logic in the ofac_match.rb file was derived from
|
18
|
+
his work.
|
19
|
+
|
20
|
+
== FEATURES:
|
21
|
+
|
22
|
+
Creates a score, 1 - 100, based on how well the name, address and city match the data on the SDN list. Since
|
23
|
+
we have to match on strings, the likely hood of an exact match are virtually nil. So we've created an
|
24
|
+
algorithm that creates a score. The better the match, the higher the score. A score of 100 would be
|
25
|
+
a perfect match.
|
26
|
+
|
27
|
+
The score is calculated by adding up the weightings of each part that is matched. So
|
28
|
+
if only name is matched, then the max score is the weight for <tt>:name</tt> which is 60
|
29
|
+
|
30
|
+
It's possible to get partial matches, which will add partial weight to the score. If there
|
31
|
+
is not a match on the element as it is passed in, then each word element gets broken down
|
32
|
+
and matches are tried on each partial element. The weighting is distrubuted equally for
|
33
|
+
each partial that is matched.
|
34
|
+
|
35
|
+
If exact matches are not made, then a sounds like match is attempted. Any match made by sounds like
|
36
|
+
is given 75% of it's weight to the score.
|
37
|
+
Example:
|
38
|
+
|
39
|
+
If you are trying to match the name Kevin Tyll and there is a record for Smith, Kevin in the database, then
|
40
|
+
we will try to match both Kevin and Tyll separately, with each element Smith and Kevin. Since only Kevin
|
41
|
+
will find a match, and there were 2 elements in the searched name, the score will be added by half the weighting
|
42
|
+
for <tt>:name</tt>. So since the weight for <tt>:name</tt> is 60, then we will add 30 to the score.
|
43
|
+
|
44
|
+
If you are trying to match the name Kevin Gregory Tyll and there is a record for Tyll, Kevin in the database, then
|
45
|
+
we will try to match Kevin and Gregory and Tyll separately, with each element Tyll and Kevin. Since both Kevin
|
46
|
+
and Tyll will find a match, and there were 3 elements in the searched name, the score will be added by 2/3 the weighting
|
47
|
+
for <tt>:name</tt>. So since the weight for <tt>:name</tt> is 60, then we will add 40 to the score.
|
48
|
+
|
49
|
+
If you are trying to match the name Kevin Tyll and there is a record for Kevin Gregory Tyll in the database, then
|
50
|
+
we will try to match Kevin and Tyll separately, with each element Tyll and Kevin and Gregory. Since both Kevin
|
51
|
+
and Tyll will find a match, and there were 2 elements in the searched name, the score will be added by 2/2 the weighting
|
52
|
+
for <tt>:name</tt>. So since the weight for <tt>:name</tt> is 60, then we will add 60 to the score.
|
53
|
+
|
54
|
+
If you are trying to match the name Kevin Tyll, and there is a record for Teel, Kevin in the database, then an exact match
|
55
|
+
will be found for Kevin, and a sounds like match will be made for Tyll. Since there were 2 elements in the searched name,
|
56
|
+
and the weight for <tt>:name</tt> is 60, then each element is worth 30. Since Kevin was an exact match, it will add 30, and
|
57
|
+
since Tyll was a sounds like match, it will add 30 * .75. So the <tt>:name</tt> portion of the search will be worth 53.
|
58
|
+
|
59
|
+
If data is in the database for city and or address, and you pass data in for these elements, the score will be reduced by 10%
|
60
|
+
of the weight if there is no match or sounds like match. So if you get a match on name, you've already got a score of 60. So
|
61
|
+
if you don't pass in an address or city, or if you do, but there is no city or address info in the database, then your final score
|
62
|
+
will be 60. But if you do pass in a city, say Tampa, and the city in the Database is New York, then we will deduct 10% of the
|
63
|
+
weight (30 * .1) = 3 from the score since 30 is the weight for <tt>:city</tt>. So the final score will be 57.
|
64
|
+
|
65
|
+
If were searching for New York, and the database had New Deli, then there would be a match on New, but not on Deli.
|
66
|
+
Since there were 2 elements in the searched city, each hit is worth 15. So the match on New would add 15, but the non-match
|
67
|
+
on York would subtract (15 * .1) = 1.5 from the score. So the score would be (60 + 15 - 1.5) = 74, due to rounding.
|
68
|
+
|
69
|
+
Only <tt>:city</tt> and <tt>:address</tt> subtract from the score, No match on name simply returns 0.
|
70
|
+
|
71
|
+
Matches for name are made for both the name and any aliases in the OFAC database.
|
72
|
+
|
73
|
+
Matches for <tt>:city</tt> and <tt>:address</tt> will only be added to the score if there is first a match on <tt>:name</tt>.
|
74
|
+
|
75
|
+
We consider a score of 60 to be reasonable as a hit.
|
76
|
+
|
77
|
+
== SYNOPSIS:
|
78
|
+
Accepts a hash with the identity's demographic information
|
79
|
+
|
80
|
+
Ofac.new({:name => 'Oscar Hernandez', :city => 'Clearwater', :address => '123 somewhere ln'})
|
81
|
+
|
82
|
+
<tt>:name</tt> is required to get a score. If <tt>:name</tt> is missing, an error will not be thrown, but a score of 0 will be returned.
|
83
|
+
|
84
|
+
The more information provided, the higher the score could be. A score of 100 would mean all fields
|
85
|
+
were passed in, and all fields were 100% matches. If only the name is passed in without an address,
|
86
|
+
it will be impossible to get a score of 100, even if the name matches perfectly.
|
87
|
+
|
88
|
+
Acceptable hash keys and their weighting in score calculation:
|
89
|
+
|
90
|
+
* <tt>:name</tt> (weighting = 60%) (required) This can be a person, business, or marine vessel
|
91
|
+
* <tt>:address</tt> (weighting = 10%)
|
92
|
+
* <tt>:city</tt> (weighting = 30%)
|
93
|
+
|
94
|
+
* Instantiate the object with the identity's name, street address, and city.
|
95
|
+
ofac = Ofac.new(:name => 'Kevin Tyll', :city => 'Clearwater', :address => '123 Somewhere Ln.')
|
96
|
+
|
97
|
+
* Then get the score
|
98
|
+
ofac.score => return the score 1 - 100
|
99
|
+
|
100
|
+
* You can also get the list of all the partial matches with the score of each record.
|
101
|
+
ofac.possible_hits => returns an array of hashes.
|
102
|
+
|
103
|
+
== REQUIREMENTS:
|
104
|
+
|
105
|
+
* Rails 2.0.0 or greater
|
106
|
+
|
107
|
+
== INSTALL:
|
108
|
+
|
109
|
+
* To install the gem:
|
110
|
+
sudo gem install kevintyll-ofac
|
111
|
+
* To create the necessary db migration, from the command line, run:
|
112
|
+
script/generate ofac_migration
|
113
|
+
* Require the gem in your environment.rb file in the Rails::Initializer block:
|
114
|
+
config.gem 'kevintyll-ofac', :lib => 'ofac'
|
115
|
+
* To load your table with the current OFAC data, from the command line, run:
|
116
|
+
rake ofac:update_data
|
117
|
+
|
118
|
+
* The OFAC data is not updated with any regularity, but you can sign up for email notifications when the data changes at
|
119
|
+
http://www.treas.gov/offices/enforcement/ofac/sdn/index.shtml.
|
120
|
+
|
121
|
+
== Copyright
|
122
|
+
|
123
|
+
Copyright (c) 2009 Kevin Tyll. See LICENSE for details.
|
data/Rakefile
ADDED
@@ -0,0 +1,60 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'jeweler'
|
6
|
+
Jeweler::Tasks.new do |gem|
|
7
|
+
gem.name = "ofac"
|
8
|
+
gem.summary = %Q{Attempts to find a hit on the Office of Foreign Assets Control's Specially Designated Nationals list.}
|
9
|
+
gem.description = %Q{Attempts to find a hit on the Office of Foreign Assets Control's Specially Designated Nationals list.}
|
10
|
+
gem.email = "kevintyll@gmail.com"
|
11
|
+
gem.homepage = "http://github.com/kevintyll/ofac"
|
12
|
+
gem.authors = ["Kevin Tyll"]
|
13
|
+
gem.post_install_message = File.readlines("PostInstall.txt").join("")
|
14
|
+
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
15
|
+
end
|
16
|
+
|
17
|
+
Jeweler::GemcutterTasks.new
|
18
|
+
|
19
|
+
rescue LoadError
|
20
|
+
puts "Jeweler not available. Install it with: sudo gem install technicalpickles-jeweler -s http://gems.github.com"
|
21
|
+
end
|
22
|
+
|
23
|
+
require 'rake/testtask'
|
24
|
+
Rake::TestTask.new(:test) do |test|
|
25
|
+
test.libs << 'lib' << 'test'
|
26
|
+
test.pattern = 'test/**/*_test.rb'
|
27
|
+
test.verbose = true
|
28
|
+
end
|
29
|
+
|
30
|
+
begin
|
31
|
+
require 'rcov/rcovtask'
|
32
|
+
Rcov::RcovTask.new do |test|
|
33
|
+
test.libs << 'test'
|
34
|
+
test.pattern = 'test/**/*_test.rb'
|
35
|
+
test.verbose = true
|
36
|
+
end
|
37
|
+
rescue LoadError
|
38
|
+
task :rcov do
|
39
|
+
abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
|
44
|
+
task :default => :test
|
45
|
+
|
46
|
+
require 'rake/rdoctask'
|
47
|
+
Rake::RDocTask.new do |rdoc|
|
48
|
+
if File.exist?('VERSION.yml')
|
49
|
+
config = YAML.load(File.read('VERSION.yml'))
|
50
|
+
version = "#{config[:major]}.#{config[:minor]}.#{config[:patch]}"
|
51
|
+
else
|
52
|
+
version = ""
|
53
|
+
end
|
54
|
+
|
55
|
+
rdoc.rdoc_dir = 'rdoc'
|
56
|
+
rdoc.title = "ofac #{version}"
|
57
|
+
rdoc.rdoc_files.include('README*')
|
58
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
59
|
+
end
|
60
|
+
|
data/VERSION.yml
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
class CreateOfacSdnTable < ActiveRecord::Migration
|
2
|
+
|
3
|
+
def self.up
|
4
|
+
create_table :ofac_sdns do |t|
|
5
|
+
t.text :name
|
6
|
+
t.string :sdn_type
|
7
|
+
t.string :program
|
8
|
+
t.string :title
|
9
|
+
t.string :vessel_call_sign
|
10
|
+
t.string :vessel_type
|
11
|
+
t.string :vessel_tonnage
|
12
|
+
t.string :gross_registered_tonnage
|
13
|
+
t.string :vessel_flag
|
14
|
+
t.string :vessel_owner
|
15
|
+
t.text :remarks
|
16
|
+
t.text :address
|
17
|
+
t.string :city
|
18
|
+
t.string :country
|
19
|
+
t.string :address_remarks
|
20
|
+
t.string :alternate_identity_type
|
21
|
+
t.text :alternate_identity_name
|
22
|
+
t.string :alternate_identity_remarks
|
23
|
+
t.timestamps
|
24
|
+
end
|
25
|
+
add_index :ofac_sdns, :sdn_type
|
26
|
+
end
|
27
|
+
|
28
|
+
def self.down
|
29
|
+
drop_table :ofac_sdns
|
30
|
+
end
|
31
|
+
end
|
data/lib/ofac.rb
ADDED
@@ -0,0 +1,177 @@
|
|
1
|
+
class Ofac
|
2
|
+
|
3
|
+
|
4
|
+
# Accepts a hash with the identity's demographic information
|
5
|
+
#
|
6
|
+
# Ofac.new({:name => 'Oscar Hernandez', :city => 'Clearwater', :address => '123 somewhere ln'})
|
7
|
+
#
|
8
|
+
# <tt>:name</tt> is required to get a score. If <tt>:name</tt> is missing, an error will not be thrown, but a score of 0 will be returned.
|
9
|
+
#
|
10
|
+
# You can pass a string in for the full name:
|
11
|
+
# Ofac.new(:name => 'Victor De La Garza')
|
12
|
+
#
|
13
|
+
# Or you can specify the last and first names
|
14
|
+
# Ofac.new(:name => {:first_name => 'Victor', :last_name => 'De La Garza'})
|
15
|
+
#
|
16
|
+
# The first method will build a larger list of names for ruby to parse through and more likely to find similar names.
|
17
|
+
# The second method is quicker.
|
18
|
+
#
|
19
|
+
# The more information provided, the higher the score could be. A score of 100 would mean all fields
|
20
|
+
# were passed in, and all fields were 100% matches. If only the name is passed in without an address,
|
21
|
+
# it will be impossible to get a score of 100, even if the name matches perfectly.
|
22
|
+
#
|
23
|
+
# Acceptable hash keys and their weighting in score calculation:
|
24
|
+
#
|
25
|
+
# * <tt>:name</tt> (weighting = 60%) (required) This can be a person, business, or marine vessel
|
26
|
+
# * <tt>:address</tt> (weighting = 10%)
|
27
|
+
# * <tt>:city</tt> (weighting = 30%)
|
28
|
+
def initialize(identity)
|
29
|
+
@identity = identity
|
30
|
+
end
|
31
|
+
|
32
|
+
# Creates a score, 1 - 100, based on how well the name and address match the data on the
|
33
|
+
# SDN (Specially Designated Nationals) list.
|
34
|
+
#
|
35
|
+
# The score is calculated by adding up the weightings of each part that is matched. So
|
36
|
+
# if only name is matched, then the max score is the weight for <tt>:name</tt> which is 60
|
37
|
+
#
|
38
|
+
# It's possible to get partial matches, which will add partial weight to the score. If there
|
39
|
+
# is not a match on the element as it is passed in, then each word element gets broken down
|
40
|
+
# and matches are tried on each partial element. The weighting is distrubuted equally for
|
41
|
+
# each partial that is matched.
|
42
|
+
#
|
43
|
+
# If exact matches are not made, then a sounds like match is attempted. Any match made by sounds like
|
44
|
+
# is given 75% of it's weight to the score.
|
45
|
+
#
|
46
|
+
# Example:
|
47
|
+
#
|
48
|
+
# If you are trying to match the name Kevin Tyll and there is a record for Smith, Kevin in the database, then
|
49
|
+
# we will try to match both Kevin and Tyll separately, with each element Smith and Kevin. Since only Kevin
|
50
|
+
# will find a match, and there were 2 elements in the searched name, the score will be added by half the weighting
|
51
|
+
# for <tt>:name</tt>. So since the weight for <tt>:name</tt> is 60, then we will add 30 to the score.
|
52
|
+
#
|
53
|
+
# If you are trying to match the name Kevin Gregory Tyll and there is a record for Tyll, Kevin in the database, then
|
54
|
+
# we will try to match Kevin and Gregory and Tyll separately, with each element Tyll and Kevin. Since both Kevin
|
55
|
+
# and Tyll will find a match, and there were 3 elements in the searched name, the score will be added by 2/3 the weighting
|
56
|
+
# for <tt>:name</tt>. So since the weight for <tt>:name</tt> is 60, then we will add 40 to the score.
|
57
|
+
#
|
58
|
+
# If you are trying to match the name Kevin Tyll and there is a record for Kevin Gregory Tyll in the database, then
|
59
|
+
# we will try to match Kevin and Tyll separately, with each element Tyll and Kevin and Gregory. Since both Kevin
|
60
|
+
# and Tyll will find a match, and there were 2 elements in the searched name, the score will be added by 2/2 the weighting
|
61
|
+
# for <tt>:name</tt>. So since the weight for <tt>:name</tt> is 60, then we will add 60 to the score.
|
62
|
+
#
|
63
|
+
# If you are trying to match the name Kevin Tyll, and there is a record for Teel, Kevin in the database, then an exact match
|
64
|
+
# will be found for Kevin, and a sounds like match will be made for Tyll. Since there were 2 elements in the searched name,
|
65
|
+
# and the weight for <tt>:name</tt> is 60, then each element is worth 30. Since Kevin was an exact match, it will add 30, and
|
66
|
+
# since Tyll was a sounds like match, it will add 30 * .75. So the <tt>:name</tt> portion of the search will be worth 53.
|
67
|
+
#
|
68
|
+
# If data is in the database for city and or address, and you pass data in for these elements, the score will be reduced by 10%
|
69
|
+
# of the weight if there is no match or sounds like match. So if you get a match on name, you've already got a score of 60. So
|
70
|
+
# if you don't pass in an address or city, or if you do, but there is no city or address info in the database, then your final score
|
71
|
+
# will be 60. But if you do pass in a city, say Tampa, and the city in the Database is New York, then we will deduct 10% of the
|
72
|
+
# weight (30 * .1) = 3 from the score since 30 is the weight for <tt>:city</tt>. So the final score will be 57.
|
73
|
+
#
|
74
|
+
# If were searching for New York, and the database had New Deli, then there would be a match on New, but not on Deli.
|
75
|
+
# Since there were 2 elements in the searched city, each hit is worth 15. So the match on New would add 15, but the non-match
|
76
|
+
# on York would subtract (15 * .1) = 1.5 from the score. So the score would be (60 + 15 - 1.5) = 74, due to rounding.
|
77
|
+
#
|
78
|
+
# Only <tt>:city</tt> and <tt>:address</tt> subtract from the score, No match on name simply returns 0.
|
79
|
+
#
|
80
|
+
# Matches for name are made for both the name and any aliases in the OFAC database.
|
81
|
+
#
|
82
|
+
# Matches for <tt>:city</tt> and <tt>:address</tt> will only be added to the score if there is first a match on <tt>:name</tt>.
|
83
|
+
#
|
84
|
+
# We consider a score of 60 to be reasonable as a hit.
|
85
|
+
def score
|
86
|
+
@score || calculate_score
|
87
|
+
end
|
88
|
+
|
89
|
+
def db_hit?
|
90
|
+
unless @identity[:name].to_s.blank?
|
91
|
+
|
92
|
+
#first get a list from the database of possible matches by name
|
93
|
+
#this query is pretty liberal, we just want to get a list of possible
|
94
|
+
#matches from the database that we can run through our ruby matching algorithm
|
95
|
+
possible_sdns = []
|
96
|
+
name_array = process_name
|
97
|
+
|
98
|
+
name_array.delete_if{|n| n.strip.size < 2}
|
99
|
+
unless name_array.empty?
|
100
|
+
sql_name_partial = name_array.collect {|partial_name| ["name like ?", "%#{partial_name}%"]}
|
101
|
+
sql_alt_name_partial = name_array.collect {|partial_name| ["alternate_identity_name like ?", "%#{partial_name}%"]}
|
102
|
+
|
103
|
+
name_conditions = sql_name_partial.transpose
|
104
|
+
name_values = name_conditions.second
|
105
|
+
name_conditions = [name_conditions.first.join(' and ')]
|
106
|
+
alt_name_conditions = sql_alt_name_partial.transpose
|
107
|
+
alt_name_values = alt_name_conditions.second
|
108
|
+
alt_name_conditions = [alt_name_conditions.first.join(' and ')]
|
109
|
+
conditions = ["(#{name_conditions}) or (#{alt_name_conditions})"] + name_values + alt_name_values
|
110
|
+
|
111
|
+
possible_sdns = OfacSdn.find_all_by_sdn_type('individual',:select => 'name, alternate_identity_name, address, city', :conditions => conditions)
|
112
|
+
|
113
|
+
end
|
114
|
+
end
|
115
|
+
!possible_sdns.empty?
|
116
|
+
end
|
117
|
+
|
118
|
+
# Returns an array of hashes of records in the OFAC data that found partial matches with that record's score.
|
119
|
+
#
|
120
|
+
# Ofac.new({:name => 'Oscar Hernandez', :city => 'Clearwater', :address => '123 somewhere ln'}).possible_hits
|
121
|
+
#returns
|
122
|
+
# [{:address=>"123 Somewhere Ln", :score=>100, :name=>"HERNANDEZ, Oscar|GUAMATUR, S.A.", :city=>"Clearwater"}, {:address=>"123 Somewhere Ln", :score=>100, :name=>"HERNANDEZ, Oscar|Alternate Name", :city=>"Clearwater"}]
|
123
|
+
#
|
124
|
+
def possible_hits
|
125
|
+
@possible_hits || retrieve_possible_hits
|
126
|
+
end
|
127
|
+
|
128
|
+
private
|
129
|
+
|
130
|
+
def retrieve_possible_hits
|
131
|
+
score
|
132
|
+
@possible_hits
|
133
|
+
end
|
134
|
+
|
135
|
+
def calculate_score
|
136
|
+
unless @identity[:name].to_s.blank?
|
137
|
+
|
138
|
+
#first get a list from the database of possible matches by name
|
139
|
+
#this query is pretty liberal, we just want to get a list of possible
|
140
|
+
#matches from the database that we can run through our ruby matching algorithm
|
141
|
+
|
142
|
+
name_array = process_name
|
143
|
+
|
144
|
+
name_array.delete_if{|n| n.strip.size < 2}
|
145
|
+
unless name_array.empty?
|
146
|
+
sql_name_partial = name_array.collect {|partial_name| ["name like ?", "%#{partial_name}%"]}
|
147
|
+
sql_alt_name_partial = name_array.collect {|partial_name| ["alternate_identity_name like ?", "%#{partial_name}%"]}
|
148
|
+
conditions = sql_name_partial + sql_alt_name_partial
|
149
|
+
conditions = conditions.transpose
|
150
|
+
conditions = [conditions.first.join(' or ')] + conditions.second
|
151
|
+
|
152
|
+
possible_sdns = OfacSdn.find_all_by_sdn_type('individual',:select => 'name, alternate_identity_name, address, city', :conditions => conditions)
|
153
|
+
possible_sdns = possible_sdns.collect {|sdn|{:name => "#{sdn['name']}|#{sdn['alternate_identity_name']}", :city => sdn['city'], :address => sdn['address']}}
|
154
|
+
|
155
|
+
match = OfacMatch.new({:name => {:weight => 60, :token => "#{name_array.join(', ')}"},
|
156
|
+
:address => {:weight => 10, :token => @identity[:address]},
|
157
|
+
:city => {:weight => 30, :token => @identity[:city]}})
|
158
|
+
|
159
|
+
score = match.score(possible_sdns)
|
160
|
+
@possible_hits = match.possible_hits
|
161
|
+
end
|
162
|
+
end
|
163
|
+
@score = score || 0
|
164
|
+
return @score
|
165
|
+
end
|
166
|
+
|
167
|
+
def process_name
|
168
|
+
#you can pass in a full name, or specify the first and last name
|
169
|
+
if @identity[:name].kind_of?(Hash)
|
170
|
+
name_array = [@identity[:name][:last_name],@identity[:name][:first_name]].compact
|
171
|
+
else
|
172
|
+
partial_name = @identity[:name].gsub(/\W/,'|')
|
173
|
+
name_array = partial_name.split('|').reverse
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
end
|