ofac 1.1.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,5 @@
1
+ require 'activerecord'
2
+
3
+ class OfacSdn < ActiveRecord::Base
4
+
5
+ end
@@ -0,0 +1,305 @@
1
+ require 'net/http'
2
+ require 'activerecord'
3
+ require 'active_record/connection_adapters/mysql_adapter'
4
+
5
+ class OfacSdnLoader
6
+
7
+
8
+ #Loads the most recent file from http://www.treas.gov/offices/enforcement/ofac/sdn/delimit/index.shtml
9
+ def self.load_current_sdn_file
10
+ puts "Reloading OFAC sdn data"
11
+ puts "Downloading OFAC data from http://www.treas.gov/offices/enforcement/ofac/sdn"
12
+ #get the 3 data files
13
+ sdn = Tempfile.new('sdn')
14
+ sdn.write(Net::HTTP.get(URI.parse('http://www.treas.gov/offices/enforcement/ofac/sdn/delimit/sdn.pip')))
15
+ sdn.rewind
16
+ address = Tempfile.new('sdn')
17
+ address.write(Net::HTTP.get(URI.parse('http://www.treas.gov/offices/enforcement/ofac/sdn/delimit/add.pip')))
18
+ address.rewind
19
+ alt = Tempfile.new('sdn')
20
+ alt.write(Net::HTTP.get(URI.parse('http://www.treas.gov/offices/enforcement/ofac/sdn/delimit/alt.pip')))
21
+ alt.rewind
22
+
23
+ if OfacSdn.connection.kind_of?(ActiveRecord::ConnectionAdapters::MysqlAdapter)
24
+ puts "Converting file to csv format for Mysql import. This could take several minutes."
25
+
26
+ csv_file = convert_to_flattened_csv(sdn, address, alt)
27
+
28
+ bulk_mysql_update(csv_file)
29
+ else
30
+ active_record_file_load(sdn, address, alt)
31
+ end
32
+
33
+ sdn.close
34
+ @address.close
35
+ @alt.close
36
+ end
37
+
38
+
39
+ private
40
+
41
+ #convert the file's null value to an empty string
42
+ #and removes " chars.
43
+ def self.clean_file_string(line)
44
+ line.gsub!(/-0-(\s)?/,'')
45
+ line.gsub!(/\n/,'')
46
+ line.gsub(/\"/,'')
47
+ end
48
+
49
+ #split the line into an array
50
+ def self.convert_line_to_array(line)
51
+ clean_file_string(line).split('|') unless line.nil?
52
+ end
53
+
54
+ #return an 2 arrays of the records matching the sdn primary key
55
+ #1 array of address records and one array of alt records
56
+ def self.foreign_key_records(sdn_id)
57
+ address_records = []
58
+ alt_records = []
59
+
60
+ #the first element in each array is the primary and foreign keys
61
+ #we are denormalizing the data
62
+ if @current_address_hash && @current_address_hash[:id] == sdn_id
63
+ address_records << @current_address_hash
64
+ loop do
65
+ @current_address_hash = address_text_to_hash(@address.gets)
66
+ if @current_address_hash && @current_address_hash[:id] == sdn_id
67
+ address_records << @current_address_hash
68
+ else
69
+ break
70
+ end
71
+ end
72
+ end
73
+
74
+ if @current_alt_hash && @current_alt_hash[:id] == sdn_id
75
+ alt_records << @current_alt_hash
76
+ loop do
77
+ @current_alt_hash = alt_text_to_hash(@alt.gets)
78
+ if @current_alt_hash && @current_alt_hash[:id] == sdn_id
79
+ alt_records << @current_alt_hash
80
+ else
81
+ break
82
+ end
83
+ end
84
+ end
85
+ return address_records, alt_records
86
+ end
87
+
88
+ def self.sdn_text_to_hash(line)
89
+ unless line.nil?
90
+ value_array = convert_line_to_array(line)
91
+ {:id => value_array[0],
92
+ :name => value_array[1],
93
+ :sdn_type => value_array[2],
94
+ :program => value_array[3],
95
+ :title => value_array[4],
96
+ :vessel_call_sign => value_array[5],
97
+ :vessel_type => value_array[6],
98
+ :vessel_tonnage => value_array[7],
99
+ :gross_registered_tonnage => value_array[8],
100
+ :vessel_flag => value_array[9],
101
+ :vessel_owner => value_array[10],
102
+ :remarks => value_array[11]
103
+ }
104
+ end
105
+ end
106
+
107
+ def self.address_text_to_hash(line)
108
+ unless line.nil?
109
+ value_array = convert_line_to_array(line)
110
+ {:id => value_array[0],
111
+ :address => value_array[2],
112
+ :city => value_array[3],
113
+ :country => value_array[4],
114
+ :address_remarks => value_array[5]
115
+ }
116
+ end
117
+ end
118
+
119
+ def self.alt_text_to_hash(line)
120
+ unless line.nil?
121
+ value_array = convert_line_to_array(line)
122
+ {:id => value_array[0],
123
+ :alternate_identity_type => value_array[2],
124
+ :alternate_identity_name => value_array[3],
125
+ :alternate_identity_remarks => value_array[4]
126
+ }
127
+ end
128
+ end
129
+
130
+ def self.convert_hash_to_mysql_import_string(record_hash)
131
+ # empty field for id to be generated by mysql.
132
+ new_line = "``|" +
133
+ # :name
134
+ "`#{record_hash[:name]}`|" +
135
+ # :sdn_type
136
+ "`#{record_hash[:sdn_type]}`|" +
137
+ # :program
138
+ "`#{record_hash[:program]}`|" +
139
+ # :title
140
+ "`#{record_hash[:title]}`|" +
141
+ # :vessel_call_sign
142
+ "`#{record_hash[:vessel_call_sign]}`|" +
143
+ # :vessel_type
144
+ "`#{record_hash[:vessel_type]}`|" +
145
+ # :vessel_tonnage
146
+ "`#{record_hash[:vessel_tonnage]}`|" +
147
+ # :gross_registered_tonnage
148
+ "`#{record_hash[:gross_registered_tonnage]}`|" +
149
+ # :vessel_flag
150
+ "`#{record_hash[:vessel_flag]}`|" +
151
+ # :vessel_owner
152
+ "`#{record_hash[:vessel_owner]}`|" +
153
+ # :remarks
154
+ "`#{record_hash[:remarks]}`|" +
155
+ # :address
156
+ "`#{record_hash[:address]}`|" +
157
+ # :city
158
+ "`#{record_hash[:city]}`|" +
159
+ # :country
160
+ "`#{record_hash[:country]}`|" +
161
+ # :address_remarks
162
+ "`#{record_hash[:address_remarks]}`|" +
163
+ # :alternate_identity_type
164
+ "`#{record_hash[:alternate_identity_type]}`|" +
165
+ # :alternate_identity_name
166
+ "`#{record_hash[:alternate_identity_name]}`|" +
167
+ # :alternate_identity_remarks
168
+ "`#{record_hash[:alternate_identity_remarks]}`|" +
169
+ #:created_at
170
+ "`#{Time.now.to_s(:db)}`|" +
171
+ # updated_at
172
+ "`#{Time.now.to_s(:db)}`" + "\n"
173
+
174
+ new_line
175
+ end
176
+
177
+ def self.convert_to_flattened_csv(sdn_file, address_file, alt_file)
178
+ @address = address_file
179
+ @alt = alt_file
180
+
181
+ csv_file = Tempfile.new("ofac") # create temp file for converted csv format.
182
+ #get the first line from the address and alt files
183
+ @current_address_hash = address_text_to_hash(@address.gets)
184
+ @current_alt_hash = alt_text_to_hash(@alt.gets)
185
+
186
+ start = Time.now
187
+
188
+ sdn_file.each_with_index do |line, i|
189
+
190
+ #initialize the address and alt atributes to empty strings
191
+ address_attributes = address_text_to_hash("|||||")
192
+ alt_attributes = alt_text_to_hash("||||")
193
+
194
+ sdn_attributes = sdn_text_to_hash(line)
195
+
196
+ #get the foreign key records for this sdn
197
+ address_records, alt_records = foreign_key_records(sdn_attributes[:id])
198
+
199
+ if address_records.empty?
200
+ #no matching address records, so initialized blank values will be used.
201
+ if alt_records.empty?
202
+ #no matching address records, so initialized blank values will be used.
203
+ csv_file.syswrite(convert_hash_to_mysql_import_string(sdn_attributes.merge(address_attributes).merge(alt_attributes)))
204
+ else
205
+ alt_records.each do |alt|
206
+ csv_file.syswrite(convert_hash_to_mysql_import_string(sdn_attributes.merge(address_attributes).merge(alt)))
207
+ end
208
+ end
209
+ else
210
+ address_records.each do |address|
211
+ if alt_records.empty?
212
+ #no matching address records, so initialized blank values will be used.
213
+ csv_file.syswrite(convert_hash_to_mysql_import_string(sdn_attributes.merge(address).merge(alt_attributes)))
214
+ else
215
+ alt_records.each do |alt|
216
+ csv_file.syswrite(convert_hash_to_mysql_import_string(sdn_attributes.merge(address).merge(alt)))
217
+ end
218
+ end
219
+ end
220
+ end
221
+ puts "#{i} records processed." if (i % 1000 == 0) && (i > 0)
222
+ end
223
+ puts "File conversion ran for #{(Time.now - start) / 60} minutes."
224
+ return csv_file
225
+ end
226
+
227
+ def self.active_record_file_load(sdn_file, address_file, alt_file)
228
+ @address = address_file
229
+ @alt = alt_file
230
+
231
+ #OFAC data is a complete list, so we have to dump and load
232
+ OfacSdn.delete_all
233
+
234
+ #get the first line from the address and alt files
235
+ @current_address_hash = address_text_to_hash(@address.gets)
236
+ @current_alt_hash = alt_text_to_hash(@alt.gets)
237
+ attributes = {}
238
+ sdn_file.each_with_index do |line, i|
239
+
240
+ #initialize the address and alt atributes to empty strings
241
+ address_attributes = address_text_to_hash("|||||")
242
+ alt_attributes = alt_text_to_hash("||||")
243
+
244
+ sdn_attributes = sdn_text_to_hash(line)
245
+
246
+ #get the foreign key records for this sdn
247
+ address_records, alt_records = foreign_key_records(sdn_attributes[:id])
248
+
249
+ if address_records.empty?
250
+ #no matching address records, so initialized blank values will be used.
251
+ if alt_records.empty?
252
+ #no matching address records, so initialized blank values will be used.
253
+ attributes = sdn_attributes.merge(address_attributes).merge(alt_attributes)
254
+ attributes.delete(:id)
255
+ OfacSdn.create(attributes)
256
+ else
257
+ alt_records.each do |alt|
258
+ attributes = sdn_attributes.merge(address_attributes).merge(alt)
259
+ attributes.delete(:id)
260
+ OfacSdn.create(attributes)
261
+ end
262
+ end
263
+ else
264
+ address_records.each do |address|
265
+ if alt_records.empty?
266
+ #no matching address records, so initialized blank values will be used.
267
+ attributes = sdn_attributes.merge(address).merge(alt_attributes)
268
+ attributes.delete(:id)
269
+ OfacSdn.create(attributes)
270
+ else
271
+ alt_records.each do |alt|
272
+ attributes = sdn_attributes.merge(address).merge(alt)
273
+ attributes.delete(:id)
274
+ OfacSdn.create(attributes)
275
+ end
276
+ end
277
+ end
278
+ end
279
+
280
+ puts "#{i} records processed." if (i % 5000 == 0) && (i > 0)
281
+ end
282
+ end
283
+
284
+ # For mysql, use:
285
+ # LOAD DATA LOCAL INFILE 'ssdm1.csv' INTO TABLE death_master_files FIELDS TERMINATED BY '|' ENCLOSED BY "`" LINES TERMINATED BY '\n';
286
+ # This is a much faster way of loading large amounts of data into mysql. For information on the LOAD DATA command
287
+ # see http://dev.mysql.com/doc/refman/5.1/en/load-data.html
288
+ def self.bulk_mysql_update(csv_file)
289
+ puts "Deleting all records in ofac_sdn..."
290
+
291
+ #OFAC data is a complete list, so we have to dump and load
292
+ OfacSdn.delete_all
293
+
294
+ puts "Importing into Mysql..."
295
+
296
+ mysql_command = <<-TEXT
297
+ LOAD DATA LOCAL INFILE '#{csv_file.path}' REPLACE INTO TABLE ofac_sdns FIELDS TERMINATED BY '|' ENCLOSED BY "`" LINES TERMINATED BY '\n';
298
+ TEXT
299
+
300
+ OfacSdn.connection.execute(mysql_command)
301
+ puts "Mysql import complete."
302
+
303
+ end
304
+
305
+ end
@@ -0,0 +1,139 @@
1
+ class OfacMatch
2
+
3
+ attr_reader :possible_hits
4
+
5
+ #Intialize a Match object with a record hash of fields you want to match on.
6
+ #Each key in the hash, also has a data hash value for the weight, token, and type.
7
+ #
8
+ # match = Ofac::Match.new({:name => {:weight => 10, :token => 'Kevin Tyll'},
9
+ # :city => {:weight => 40, :token => 'Clearwater', },
10
+ # :address => {:weight => 40, :token => '1234 Park St.', },
11
+ # :zip => {:weight => 10, :token => '33759', :type => :number}})
12
+ #
13
+ # data hash keys:
14
+ # * <tt>data[:weight]</tt> - value to apply to the score if there is a match (Default is 100/number of keys in the record hash)
15
+ # * <tt>data[:token]</tt> - string to match
16
+ # * <tt>data[:match]</tt> - set from records hash
17
+ # * <tt>data[:score]</tt> - output field
18
+ # * <tt>data[:type]</tt> - the type of match that should be performed (valid values are +:sound+ | +:number+) (Default is +:sound+)
19
+ def initialize(stats={})
20
+ @possible_hits = []
21
+ @stats = stats.dup
22
+ weight = 100
23
+ weight = 100 / @stats.length if @stats.length > 0
24
+ @stats.each_value do |data|
25
+ data[:weight] ||= weight
26
+ data[:match] ||= ''
27
+ data[:type] ||= :sound
28
+ data[:score] ||= 0
29
+ data[:token] = data[:token].to_s.upcase
30
+ end
31
+ end
32
+
33
+ # match_records is an array of hashes.
34
+ #
35
+ # The hash keys must match the record hash keys set when initialized.
36
+ #
37
+ # score will return the highest score of all the records that
38
+ # are sent in match_records.
39
+ def score(match_records)
40
+ score_results = Array.new
41
+ unless match_records.empty?
42
+ #place the match_records information
43
+ #into our @stats hash
44
+ match_records.each do |match|
45
+ match.each do |key, value|
46
+ @stats[key.to_sym][:match] = value.to_s.upcase
47
+ end
48
+ record_score = calculate_record
49
+ score_results.push(record_score)
50
+ @possible_hits << match.merge(:score => record_score) if record_score > 0
51
+ end
52
+ score = score_results.max #take max score
53
+ end
54
+ @possible_hits.uniq!
55
+ score ||= 0
56
+ end
57
+
58
+ private
59
+
60
+
61
+ # calculate the score for this record
62
+ # comparing the token to the match fields in the @stats hash
63
+ # and storing the score into the record
64
+ def calculate_record
65
+ score = 0
66
+ unless @stats.nil?
67
+ #need to make sure we check the name first, since city and address don't
68
+ #get added to the score unless there is a name match
69
+ [:name,:city,:address].each do |field|
70
+ data = @stats[field]
71
+ if (data[:token].blank?)
72
+ value = 0 #token is blank can't be sure of a match if nothing to match against
73
+ else
74
+ if (data[:match].blank?)
75
+ value = 0 #token has value match is blank
76
+ else
77
+ #token and match both have values
78
+ if (data[:type] == :number)
79
+ value = data[:token] == data[:match] ? 1 : 0
80
+ else
81
+ #first see if there is an exact match
82
+ value = data[:match].split('|').include?(data[:token]) ? 1 : 0
83
+
84
+ unless value > 0
85
+ #do a sounds like with the data as given to see if we get a match
86
+ #if match on sounds_like, only give .75 of the weight.
87
+ data[:match].split('|').each do |separate_value|
88
+ if data[:token].ofac_sounds_like(separate_value,false)
89
+ value = 0.75
90
+ break
91
+ else
92
+ value = 0
93
+ end
94
+ end
95
+
96
+ end
97
+
98
+ #if no match, then break the data down and see if we can find matches on the
99
+ #individual words
100
+ unless value > 0
101
+ token_data = data[:token].gsub(/\W/,'|')
102
+ token_array = token_data.split('|')
103
+ token_array.delete('')
104
+
105
+ match_data = data[:match].gsub(/\W/,'|')
106
+ match_array = match_data.split('|')
107
+ match_array.delete('')
108
+
109
+ value = 0
110
+ partial_weight = 1/token_array.length.to_f
111
+
112
+ token_array.each do |partial_token|
113
+ #first see if we get an exact match of the partial
114
+ if success = match_array.include?(partial_token)
115
+ value += partial_weight
116
+ end
117
+ unless success
118
+ #if this for :address or :city
119
+ #and there is no match at all, subtract 10% of the weight from :name score
120
+ unless field == :name
121
+ value -= partial_weight * 0.1
122
+ end
123
+ end
124
+ end
125
+ end
126
+ end
127
+ end
128
+ end
129
+ data[:score] = data[:weight] * value
130
+ score += data[:score]
131
+ break if field == :name && data[:score] == 0
132
+ end
133
+
134
+ end
135
+ score.round
136
+ end
137
+
138
+ end
139
+