@civiq/entity-resolution 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +135 -0
- package/data/bioguide-fec-mapping.json +4186 -0
- package/data/sec-sic-data.json +10427 -0
- package/dist/__tests__/company-entity-resolver.test.d.ts +6 -0
- package/dist/__tests__/company-entity-resolver.test.d.ts.map +1 -0
- package/dist/__tests__/company-entity-resolver.test.js +267 -0
- package/dist/__tests__/company-entity-resolver.test.js.map +1 -0
- package/dist/__tests__/entity-resolution.test.d.ts +2 -0
- package/dist/__tests__/entity-resolution.test.d.ts.map +1 -0
- package/dist/__tests__/entity-resolution.test.js +296 -0
- package/dist/__tests__/entity-resolution.test.js.map +1 -0
- package/dist/__tests__/fec-entity-resolution.test.d.ts +2 -0
- package/dist/__tests__/fec-entity-resolution.test.d.ts.map +1 -0
- package/dist/__tests__/fec-entity-resolution.test.js +49 -0
- package/dist/__tests__/fec-entity-resolution.test.js.map +1 -0
- package/dist/bioguide-fec-mapping.d.ts +27 -0
- package/dist/bioguide-fec-mapping.d.ts.map +1 -0
- package/dist/bioguide-fec-mapping.js +57 -0
- package/dist/bioguide-fec-mapping.js.map +1 -0
- package/dist/cache.d.ts +18 -0
- package/dist/cache.d.ts.map +1 -0
- package/dist/cache.js +18 -0
- package/dist/cache.js.map +1 -0
- package/dist/committee-agency-map.d.ts +58 -0
- package/dist/committee-agency-map.d.ts.map +1 -0
- package/dist/committee-agency-map.js +658 -0
- package/dist/committee-agency-map.js.map +1 -0
- package/dist/committee-alias-table.d.ts +11 -0
- package/dist/committee-alias-table.d.ts.map +1 -0
- package/dist/committee-alias-table.js +191 -0
- package/dist/committee-alias-table.js.map +1 -0
- package/dist/company-alias-table.d.ts +36 -0
- package/dist/company-alias-table.d.ts.map +1 -0
- package/dist/company-alias-table.js +1307 -0
- package/dist/company-alias-table.js.map +1 -0
- package/dist/company-entity-resolver.d.ts +94 -0
- package/dist/company-entity-resolver.d.ts.map +1 -0
- package/dist/company-entity-resolver.js +282 -0
- package/dist/company-entity-resolver.js.map +1 -0
- package/dist/configure.d.ts +15 -0
- package/dist/configure.d.ts.map +1 -0
- package/dist/configure.js +19 -0
- package/dist/configure.js.map +1 -0
- package/dist/fec-entity-resolution.d.ts +88 -0
- package/dist/fec-entity-resolution.d.ts.map +1 -0
- package/dist/fec-entity-resolution.js +407 -0
- package/dist/fec-entity-resolution.js.map +1 -0
- package/dist/index.d.ts +30 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +33 -0
- package/dist/index.js.map +1 -0
- package/dist/industry-taxonomy.d.ts +90 -0
- package/dist/industry-taxonomy.d.ts.map +1 -0
- package/dist/industry-taxonomy.js +1026 -0
- package/dist/industry-taxonomy.js.map +1 -0
- package/dist/lda-issue-policy-map.d.ts +13 -0
- package/dist/lda-issue-policy-map.d.ts.map +1 -0
- package/dist/lda-issue-policy-map.js +193 -0
- package/dist/lda-issue-policy-map.js.map +1 -0
- package/dist/lobbying-committee-resolver.d.ts +23 -0
- package/dist/lobbying-committee-resolver.d.ts.map +1 -0
- package/dist/lobbying-committee-resolver.js +158 -0
- package/dist/lobbying-committee-resolver.js.map +1 -0
- package/dist/logger.d.ts +20 -0
- package/dist/logger.d.ts.map +1 -0
- package/dist/logger.js +20 -0
- package/dist/logger.js.map +1 -0
- package/dist/sic-sector-map.d.ts +32 -0
- package/dist/sic-sector-map.d.ts.map +1 -0
- package/dist/sic-sector-map.js +109 -0
- package/dist/sic-sector-map.js.map +1 -0
- package/dist/ticker-industry-resolver.d.ts +22 -0
- package/dist/ticker-industry-resolver.d.ts.map +1 -0
- package/dist/ticker-industry-resolver.js +254 -0
- package/dist/ticker-industry-resolver.js.map +1 -0
- package/dist/types.d.ts +30 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +6 -0
- package/dist/types.js.map +1 -0
- package/package.json +51 -0
|
@@ -0,0 +1,1026 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (c) 2019-2025 Mark Sandford
|
|
3
|
+
* Licensed under the MIT License. See LICENSE and NOTICE files.
|
|
4
|
+
*/
|
|
5
|
+
/**
|
|
6
|
+
* Industry Taxonomy & Categorization System
|
|
7
|
+
*
|
|
8
|
+
* Categorizes FEC contributor employers and occupations into standardized industry sectors.
|
|
9
|
+
* Based on OpenSecrets' 13-sector classification model with keyword-based categorization.
|
|
10
|
+
*
|
|
11
|
+
* Sectors (inspired by OpenSecrets):
|
|
12
|
+
* 1. Agribusiness
|
|
13
|
+
* 2. Communications/Electronics
|
|
14
|
+
* 3. Construction
|
|
15
|
+
* 4. Defense
|
|
16
|
+
* 5. Energy/Natural Resources
|
|
17
|
+
* 6. Finance/Insurance/Real Estate
|
|
18
|
+
* 7. Health
|
|
19
|
+
* 8. Lawyers & Lobbyists
|
|
20
|
+
* 9. Transportation
|
|
21
|
+
* 10. Misc Business
|
|
22
|
+
* 11. Labor
|
|
23
|
+
* 12. Ideology/Single-Issue
|
|
24
|
+
* 13. Other/Unknown
|
|
25
|
+
*/
|
|
26
|
+
import { getLogger } from './logger';
|
|
27
|
+
/**
|
|
28
|
+
* Industry sector enumeration
|
|
29
|
+
*/
|
|
30
|
+
export var IndustrySector;
|
|
31
|
+
(function (IndustrySector) {
|
|
32
|
+
IndustrySector["AGRIBUSINESS"] = "Agribusiness";
|
|
33
|
+
IndustrySector["COMMUNICATIONS_ELECTRONICS"] = "Communications/Electronics";
|
|
34
|
+
IndustrySector["CONSTRUCTION"] = "Construction";
|
|
35
|
+
IndustrySector["DEFENSE"] = "Defense";
|
|
36
|
+
IndustrySector["ENERGY_NATURAL_RESOURCES"] = "Energy/Natural Resources";
|
|
37
|
+
IndustrySector["FINANCE_INSURANCE_REAL_ESTATE"] = "Finance/Insurance/Real Estate";
|
|
38
|
+
IndustrySector["HEALTH"] = "Health";
|
|
39
|
+
IndustrySector["LAWYERS_LOBBYISTS"] = "Lawyers & Lobbyists";
|
|
40
|
+
IndustrySector["TRANSPORTATION"] = "Transportation";
|
|
41
|
+
IndustrySector["MISC_BUSINESS"] = "Misc Business";
|
|
42
|
+
IndustrySector["LABOR"] = "Labor";
|
|
43
|
+
IndustrySector["IDEOLOGY_SINGLE_ISSUE"] = "Ideology/Single-Issue";
|
|
44
|
+
IndustrySector["OTHER"] = "Other";
|
|
45
|
+
})(IndustrySector || (IndustrySector = {}));
|
|
46
|
+
/**
|
|
47
|
+
* Industry taxonomy database
|
|
48
|
+
* Comprehensive keyword matching for employer and occupation classification
|
|
49
|
+
*/
|
|
50
|
+
const INDUSTRY_CATEGORIES = [
|
|
51
|
+
// AGRIBUSINESS
|
|
52
|
+
{
|
|
53
|
+
sector: IndustrySector.AGRIBUSINESS,
|
|
54
|
+
category: 'Crop Production',
|
|
55
|
+
keywords: ['farm', 'farming', 'agriculture', 'crop', 'grain', 'wheat', 'corn', 'soybean'],
|
|
56
|
+
occupationKeywords: ['farmer', 'agricultural', 'agronomist'],
|
|
57
|
+
},
|
|
58
|
+
{
|
|
59
|
+
sector: IndustrySector.AGRIBUSINESS,
|
|
60
|
+
category: 'Livestock',
|
|
61
|
+
keywords: ['ranch', 'cattle', 'dairy', 'livestock', 'poultry', 'beef', 'pork'],
|
|
62
|
+
occupationKeywords: ['rancher', 'veterinarian'],
|
|
63
|
+
},
|
|
64
|
+
{
|
|
65
|
+
sector: IndustrySector.AGRIBUSINESS,
|
|
66
|
+
category: 'Food Processing',
|
|
67
|
+
keywords: ['food processing', 'food service', 'restaurant', 'catering', 'grocery'],
|
|
68
|
+
occupationKeywords: ['chef', 'cook', 'food service'],
|
|
69
|
+
},
|
|
70
|
+
// COMMUNICATIONS/ELECTRONICS
|
|
71
|
+
{
|
|
72
|
+
sector: IndustrySector.COMMUNICATIONS_ELECTRONICS,
|
|
73
|
+
category: 'Telecommunications',
|
|
74
|
+
keywords: [
|
|
75
|
+
'telecom',
|
|
76
|
+
'verizon',
|
|
77
|
+
'at&t',
|
|
78
|
+
'comcast',
|
|
79
|
+
'spectrum',
|
|
80
|
+
'phone',
|
|
81
|
+
'wireless',
|
|
82
|
+
'cellular',
|
|
83
|
+
],
|
|
84
|
+
occupationKeywords: ['telecommunications'],
|
|
85
|
+
},
|
|
86
|
+
{
|
|
87
|
+
sector: IndustrySector.COMMUNICATIONS_ELECTRONICS,
|
|
88
|
+
category: 'Internet/Tech',
|
|
89
|
+
keywords: [
|
|
90
|
+
'google',
|
|
91
|
+
'amazon',
|
|
92
|
+
'microsoft',
|
|
93
|
+
'apple',
|
|
94
|
+
'facebook',
|
|
95
|
+
'meta',
|
|
96
|
+
'twitter',
|
|
97
|
+
'software',
|
|
98
|
+
'tech',
|
|
99
|
+
'technology',
|
|
100
|
+
'internet',
|
|
101
|
+
'web',
|
|
102
|
+
'digital',
|
|
103
|
+
],
|
|
104
|
+
occupationKeywords: ['software engineer', 'developer', 'programmer', 'data scientist', 'tech'],
|
|
105
|
+
},
|
|
106
|
+
{
|
|
107
|
+
sector: IndustrySector.COMMUNICATIONS_ELECTRONICS,
|
|
108
|
+
category: 'TV/Movies/Music',
|
|
109
|
+
keywords: [
|
|
110
|
+
'television',
|
|
111
|
+
'movie',
|
|
112
|
+
'film',
|
|
113
|
+
'music',
|
|
114
|
+
'entertainment',
|
|
115
|
+
'media',
|
|
116
|
+
'broadcasting',
|
|
117
|
+
'netflix',
|
|
118
|
+
'disney',
|
|
119
|
+
],
|
|
120
|
+
occupationKeywords: ['actor', 'producer', 'musician', 'artist'],
|
|
121
|
+
},
|
|
122
|
+
{
|
|
123
|
+
sector: IndustrySector.COMMUNICATIONS_ELECTRONICS,
|
|
124
|
+
category: 'Electronics Manufacturing',
|
|
125
|
+
keywords: ['electronics', 'semiconductor', 'chip', 'circuit', 'intel', 'nvidia', 'amd'],
|
|
126
|
+
occupationKeywords: ['electrical engineer', 'electronics'],
|
|
127
|
+
},
|
|
128
|
+
// CONSTRUCTION
|
|
129
|
+
{
|
|
130
|
+
sector: IndustrySector.CONSTRUCTION,
|
|
131
|
+
category: 'General Contractors',
|
|
132
|
+
keywords: ['construction', 'contractor', 'builder', 'building', 'developer', 'remodeling'],
|
|
133
|
+
occupationKeywords: ['contractor', 'construction', 'builder'],
|
|
134
|
+
},
|
|
135
|
+
{
|
|
136
|
+
sector: IndustrySector.CONSTRUCTION,
|
|
137
|
+
category: 'Home Builders',
|
|
138
|
+
keywords: ['home builder', 'homebuilder', 'residential construction', 'housing'],
|
|
139
|
+
},
|
|
140
|
+
{
|
|
141
|
+
sector: IndustrySector.CONSTRUCTION,
|
|
142
|
+
category: 'Construction Services',
|
|
143
|
+
keywords: ['plumbing', 'electrical', 'hvac', 'roofing', 'carpentry'],
|
|
144
|
+
occupationKeywords: ['plumber', 'electrician', 'carpenter', 'roofer'],
|
|
145
|
+
},
|
|
146
|
+
// DEFENSE
|
|
147
|
+
{
|
|
148
|
+
sector: IndustrySector.DEFENSE,
|
|
149
|
+
category: 'Defense Aerospace',
|
|
150
|
+
keywords: [
|
|
151
|
+
'boeing',
|
|
152
|
+
'lockheed',
|
|
153
|
+
'raytheon',
|
|
154
|
+
'northrop grumman',
|
|
155
|
+
'general dynamics',
|
|
156
|
+
'defense',
|
|
157
|
+
'aerospace',
|
|
158
|
+
'military',
|
|
159
|
+
],
|
|
160
|
+
occupationKeywords: ['defense', 'military'],
|
|
161
|
+
},
|
|
162
|
+
{
|
|
163
|
+
sector: IndustrySector.DEFENSE,
|
|
164
|
+
category: 'Defense Electronics',
|
|
165
|
+
keywords: ['defense electronics', 'missile', 'weapons systems'],
|
|
166
|
+
},
|
|
167
|
+
// ENERGY/NATURAL RESOURCES
|
|
168
|
+
{
|
|
169
|
+
sector: IndustrySector.ENERGY_NATURAL_RESOURCES,
|
|
170
|
+
category: 'Oil & Gas',
|
|
171
|
+
keywords: ['oil', 'gas', 'petroleum', 'exxon', 'chevron', 'shell', 'bp', 'energy', 'drilling'],
|
|
172
|
+
occupationKeywords: ['petroleum engineer', 'oil', 'gas'],
|
|
173
|
+
},
|
|
174
|
+
{
|
|
175
|
+
sector: IndustrySector.ENERGY_NATURAL_RESOURCES,
|
|
176
|
+
category: 'Electric Utilities',
|
|
177
|
+
keywords: ['electric', 'utility', 'power', 'electricity', 'grid'],
|
|
178
|
+
occupationKeywords: ['power plant', 'utility'],
|
|
179
|
+
},
|
|
180
|
+
{
|
|
181
|
+
sector: IndustrySector.ENERGY_NATURAL_RESOURCES,
|
|
182
|
+
category: 'Renewable Energy',
|
|
183
|
+
keywords: ['solar', 'wind', 'renewable', 'clean energy', 'green energy'],
|
|
184
|
+
},
|
|
185
|
+
{
|
|
186
|
+
sector: IndustrySector.ENERGY_NATURAL_RESOURCES,
|
|
187
|
+
category: 'Mining',
|
|
188
|
+
keywords: ['mining', 'coal', 'mineral', 'extraction'],
|
|
189
|
+
occupationKeywords: ['miner', 'mining'],
|
|
190
|
+
},
|
|
191
|
+
// FINANCE/INSURANCE/REAL ESTATE
|
|
192
|
+
{
|
|
193
|
+
sector: IndustrySector.FINANCE_INSURANCE_REAL_ESTATE,
|
|
194
|
+
category: 'Commercial Banks',
|
|
195
|
+
keywords: [
|
|
196
|
+
'bank',
|
|
197
|
+
'banking',
|
|
198
|
+
'chase',
|
|
199
|
+
'wells fargo',
|
|
200
|
+
'bank of america',
|
|
201
|
+
'citibank',
|
|
202
|
+
'financial services',
|
|
203
|
+
],
|
|
204
|
+
occupationKeywords: ['banker', 'banking'],
|
|
205
|
+
},
|
|
206
|
+
{
|
|
207
|
+
sector: IndustrySector.FINANCE_INSURANCE_REAL_ESTATE,
|
|
208
|
+
category: 'Insurance',
|
|
209
|
+
keywords: [
|
|
210
|
+
'insurance',
|
|
211
|
+
'allstate',
|
|
212
|
+
'state farm',
|
|
213
|
+
'geico',
|
|
214
|
+
'progressive',
|
|
215
|
+
'life insurance',
|
|
216
|
+
'health insurance',
|
|
217
|
+
],
|
|
218
|
+
occupationKeywords: ['insurance', 'actuary'],
|
|
219
|
+
},
|
|
220
|
+
{
|
|
221
|
+
sector: IndustrySector.FINANCE_INSURANCE_REAL_ESTATE,
|
|
222
|
+
category: 'Real Estate',
|
|
223
|
+
keywords: ['real estate', 'realty', 'property management', 'realtor'],
|
|
224
|
+
occupationKeywords: ['realtor', 'real estate agent', 'property manager'],
|
|
225
|
+
},
|
|
226
|
+
{
|
|
227
|
+
sector: IndustrySector.FINANCE_INSURANCE_REAL_ESTATE,
|
|
228
|
+
category: 'Securities & Investment',
|
|
229
|
+
keywords: [
|
|
230
|
+
'investment',
|
|
231
|
+
'securities',
|
|
232
|
+
'hedge fund',
|
|
233
|
+
'private equity',
|
|
234
|
+
'venture capital',
|
|
235
|
+
'asset management',
|
|
236
|
+
'goldman sachs',
|
|
237
|
+
'morgan stanley',
|
|
238
|
+
],
|
|
239
|
+
occupationKeywords: ['financial advisor', 'investment', 'trader', 'analyst'],
|
|
240
|
+
},
|
|
241
|
+
{
|
|
242
|
+
sector: IndustrySector.FINANCE_INSURANCE_REAL_ESTATE,
|
|
243
|
+
category: 'Accounting',
|
|
244
|
+
keywords: ['accounting', 'accountant', 'cpa', 'kpmg', 'deloitte', 'pwc', 'ernst & young'],
|
|
245
|
+
occupationKeywords: ['accountant', 'cpa', 'auditor'],
|
|
246
|
+
},
|
|
247
|
+
// HEALTH
|
|
248
|
+
{
|
|
249
|
+
sector: IndustrySector.HEALTH,
|
|
250
|
+
category: 'Health Professionals',
|
|
251
|
+
keywords: ['hospital', 'clinic', 'medical center', 'health system'],
|
|
252
|
+
occupationKeywords: [
|
|
253
|
+
'physician',
|
|
254
|
+
'doctor',
|
|
255
|
+
'nurse',
|
|
256
|
+
'surgeon',
|
|
257
|
+
'medical',
|
|
258
|
+
'healthcare',
|
|
259
|
+
'dentist',
|
|
260
|
+
],
|
|
261
|
+
},
|
|
262
|
+
{
|
|
263
|
+
sector: IndustrySector.HEALTH,
|
|
264
|
+
category: 'Pharmaceuticals',
|
|
265
|
+
keywords: ['pharmaceutical', 'pharma', 'pfizer', 'merck', 'johnson & johnson', 'drug'],
|
|
266
|
+
occupationKeywords: ['pharmacist', 'pharmaceutical'],
|
|
267
|
+
},
|
|
268
|
+
{
|
|
269
|
+
sector: IndustrySector.HEALTH,
|
|
270
|
+
category: 'Health Insurance',
|
|
271
|
+
keywords: ['health insurance', 'uhc', 'aetna', 'cigna', 'anthem', 'humana'],
|
|
272
|
+
},
|
|
273
|
+
{
|
|
274
|
+
sector: IndustrySector.HEALTH,
|
|
275
|
+
category: 'Medical Devices',
|
|
276
|
+
keywords: ['medical device', 'medtronic', 'abbott'],
|
|
277
|
+
},
|
|
278
|
+
// LAWYERS & LOBBYISTS
|
|
279
|
+
{
|
|
280
|
+
sector: IndustrySector.LAWYERS_LOBBYISTS,
|
|
281
|
+
category: 'Law Firms',
|
|
282
|
+
keywords: ['law firm', 'legal', 'attorney', 'esquire'],
|
|
283
|
+
occupationKeywords: ['attorney', 'lawyer', 'legal', 'counsel', 'paralegal'],
|
|
284
|
+
},
|
|
285
|
+
{
|
|
286
|
+
sector: IndustrySector.LAWYERS_LOBBYISTS,
|
|
287
|
+
category: 'Lobbyists',
|
|
288
|
+
keywords: ['lobbying', 'government relations', 'public affairs'],
|
|
289
|
+
occupationKeywords: ['lobbyist', 'government relations'],
|
|
290
|
+
},
|
|
291
|
+
// TRANSPORTATION
|
|
292
|
+
{
|
|
293
|
+
sector: IndustrySector.TRANSPORTATION,
|
|
294
|
+
category: 'Air Transport',
|
|
295
|
+
keywords: ['airline', 'aviation', 'american airlines', 'delta', 'united airlines'],
|
|
296
|
+
occupationKeywords: ['pilot', 'flight attendant', 'aviation'],
|
|
297
|
+
},
|
|
298
|
+
{
|
|
299
|
+
sector: IndustrySector.TRANSPORTATION,
|
|
300
|
+
category: 'Automotive',
|
|
301
|
+
keywords: ['automotive', 'auto', 'ford', 'gm', 'toyota', 'honda', 'car', 'vehicle'],
|
|
302
|
+
occupationKeywords: ['automotive', 'mechanic'],
|
|
303
|
+
},
|
|
304
|
+
{
|
|
305
|
+
sector: IndustrySector.TRANSPORTATION,
|
|
306
|
+
category: 'Railroads',
|
|
307
|
+
keywords: ['railroad', 'rail', 'amtrak', 'freight rail'],
|
|
308
|
+
},
|
|
309
|
+
{
|
|
310
|
+
sector: IndustrySector.TRANSPORTATION,
|
|
311
|
+
category: 'Trucking',
|
|
312
|
+
keywords: ['trucking', 'freight', 'logistics', 'shipping', 'ups', 'fedex'],
|
|
313
|
+
occupationKeywords: ['truck driver', 'driver'],
|
|
314
|
+
},
|
|
315
|
+
// MISC BUSINESS
|
|
316
|
+
{
|
|
317
|
+
sector: IndustrySector.MISC_BUSINESS,
|
|
318
|
+
category: 'Retail',
|
|
319
|
+
keywords: ['retail', 'walmart', 'target', 'costco', 'store', 'shop', 'grocery'],
|
|
320
|
+
occupationKeywords: ['retail', 'sales', 'cashier', 'merchandis'],
|
|
321
|
+
},
|
|
322
|
+
{
|
|
323
|
+
sector: IndustrySector.MISC_BUSINESS,
|
|
324
|
+
category: 'Manufacturing',
|
|
325
|
+
keywords: ['manufacturing', 'factory', 'production', 'industrial'],
|
|
326
|
+
occupationKeywords: ['engineer', 'manufacturing'],
|
|
327
|
+
},
|
|
328
|
+
{
|
|
329
|
+
sector: IndustrySector.MISC_BUSINESS,
|
|
330
|
+
category: 'Business Services',
|
|
331
|
+
keywords: [
|
|
332
|
+
'consulting',
|
|
333
|
+
'consultant',
|
|
334
|
+
'business services',
|
|
335
|
+
'management',
|
|
336
|
+
'staffing',
|
|
337
|
+
'recruiting',
|
|
338
|
+
],
|
|
339
|
+
occupationKeywords: [
|
|
340
|
+
'consultant',
|
|
341
|
+
'business analyst',
|
|
342
|
+
'executive',
|
|
343
|
+
'ceo',
|
|
344
|
+
'cfo',
|
|
345
|
+
'coo',
|
|
346
|
+
'cto',
|
|
347
|
+
'president',
|
|
348
|
+
'vice president',
|
|
349
|
+
'owner',
|
|
350
|
+
'partner',
|
|
351
|
+
'director',
|
|
352
|
+
'manager',
|
|
353
|
+
'founder',
|
|
354
|
+
'entrepreneur',
|
|
355
|
+
'business owner',
|
|
356
|
+
'principal',
|
|
357
|
+
'managing director',
|
|
358
|
+
],
|
|
359
|
+
},
|
|
360
|
+
{
|
|
361
|
+
sector: IndustrySector.MISC_BUSINESS,
|
|
362
|
+
category: 'Chemical',
|
|
363
|
+
keywords: ['chemical', 'dow', 'dupont'],
|
|
364
|
+
occupationKeywords: ['chemical engineer', 'chemist'],
|
|
365
|
+
},
|
|
366
|
+
{
|
|
367
|
+
sector: IndustrySector.MISC_BUSINESS,
|
|
368
|
+
category: 'Lodging/Tourism',
|
|
369
|
+
keywords: ['hotel', 'resort', 'hospitality', 'tourism', 'marriott', 'hilton'],
|
|
370
|
+
occupationKeywords: ['hotel', 'hospitality'],
|
|
371
|
+
},
|
|
372
|
+
// LABOR
|
|
373
|
+
{
|
|
374
|
+
sector: IndustrySector.LABOR,
|
|
375
|
+
category: 'Labor Unions',
|
|
376
|
+
keywords: [
|
|
377
|
+
'union',
|
|
378
|
+
'afl-cio',
|
|
379
|
+
'teamsters',
|
|
380
|
+
'seiu',
|
|
381
|
+
'uaw',
|
|
382
|
+
'afscme',
|
|
383
|
+
'laborers',
|
|
384
|
+
'steelworkers',
|
|
385
|
+
],
|
|
386
|
+
occupationKeywords: ['union'],
|
|
387
|
+
},
|
|
388
|
+
// IDEOLOGY/SINGLE-ISSUE
|
|
389
|
+
{
|
|
390
|
+
sector: IndustrySector.IDEOLOGY_SINGLE_ISSUE,
|
|
391
|
+
category: 'Non-Profit/Advocacy',
|
|
392
|
+
keywords: [
|
|
393
|
+
'non-profit',
|
|
394
|
+
'nonprofit',
|
|
395
|
+
'foundation',
|
|
396
|
+
'charity',
|
|
397
|
+
'advocacy',
|
|
398
|
+
'association',
|
|
399
|
+
'organization',
|
|
400
|
+
],
|
|
401
|
+
occupationKeywords: ['nonprofit', 'advocacy'],
|
|
402
|
+
},
|
|
403
|
+
{
|
|
404
|
+
sector: IndustrySector.IDEOLOGY_SINGLE_ISSUE,
|
|
405
|
+
category: 'Education',
|
|
406
|
+
keywords: ['school', 'university', 'college', 'education', 'academic'],
|
|
407
|
+
occupationKeywords: ['teacher', 'professor', 'educator', 'principal'],
|
|
408
|
+
},
|
|
409
|
+
{
|
|
410
|
+
sector: IndustrySector.IDEOLOGY_SINGLE_ISSUE,
|
|
411
|
+
category: 'Religious Organizations',
|
|
412
|
+
keywords: ['church', 'religious', 'ministry', 'faith'],
|
|
413
|
+
occupationKeywords: ['minister', 'pastor', 'clergy'],
|
|
414
|
+
},
|
|
415
|
+
// OTHER
|
|
416
|
+
{
|
|
417
|
+
sector: IndustrySector.OTHER,
|
|
418
|
+
category: 'Government',
|
|
419
|
+
keywords: ['government', 'federal', 'state', 'city', 'county', 'municipal', 'public sector'],
|
|
420
|
+
occupationKeywords: ['government', 'public'],
|
|
421
|
+
},
|
|
422
|
+
{
|
|
423
|
+
sector: IndustrySector.OTHER,
|
|
424
|
+
category: 'Retired',
|
|
425
|
+
keywords: ['retired', 'retirement', 'retiree'],
|
|
426
|
+
occupationKeywords: ['retired', 'retiree'],
|
|
427
|
+
},
|
|
428
|
+
{
|
|
429
|
+
sector: IndustrySector.OTHER,
|
|
430
|
+
category: 'Not Employed',
|
|
431
|
+
keywords: ['not employed', 'unemployed', 'homemaker', 'home maker'],
|
|
432
|
+
occupationKeywords: ['not employed', 'homemaker', 'home maker', 'student', 'unemployed'],
|
|
433
|
+
},
|
|
434
|
+
];
|
|
435
|
+
/**
|
|
436
|
+
* Non-informative employer values that should be skipped for keyword matching.
|
|
437
|
+
* When these appear, fall through directly to occupation-based matching.
|
|
438
|
+
*/
|
|
439
|
+
const NON_INFORMATIVE_EMPLOYERS = new Set([
|
|
440
|
+
'none',
|
|
441
|
+
'n/a',
|
|
442
|
+
'na',
|
|
443
|
+
'not employed',
|
|
444
|
+
'not applicable',
|
|
445
|
+
'information requested',
|
|
446
|
+
'information requested per best efforts',
|
|
447
|
+
'info requested',
|
|
448
|
+
'refused',
|
|
449
|
+
'self',
|
|
450
|
+
'self-employed',
|
|
451
|
+
'self employed',
|
|
452
|
+
'selfemployed',
|
|
453
|
+
'independent',
|
|
454
|
+
'private',
|
|
455
|
+
'personal',
|
|
456
|
+
'individual',
|
|
457
|
+
]);
|
|
458
|
+
/**
|
|
459
|
+
* Categorize a contribution based on employer and occupation
|
|
460
|
+
*/
|
|
461
|
+
export function categorizeContribution(employer, occupation) {
|
|
462
|
+
if (!employer && !occupation) {
|
|
463
|
+
return {
|
|
464
|
+
sector: IndustrySector.OTHER,
|
|
465
|
+
category: 'Unknown',
|
|
466
|
+
confidence: 'low',
|
|
467
|
+
matchSource: 'inferred',
|
|
468
|
+
};
|
|
469
|
+
}
|
|
470
|
+
const employerLower = employer?.toLowerCase().trim() || '';
|
|
471
|
+
const occupationLower = occupation?.toLowerCase().trim() || '';
|
|
472
|
+
// Check if employer is informative (not a placeholder like "NONE" or "SELF-EMPLOYED")
|
|
473
|
+
const hasInformativeEmployer = employerLower !== '' && !NON_INFORMATIVE_EMPLOYERS.has(employerLower);
|
|
474
|
+
// Try employer match first (higher confidence) — only if employer is informative
|
|
475
|
+
if (hasInformativeEmployer) {
|
|
476
|
+
for (const industry of INDUSTRY_CATEGORIES) {
|
|
477
|
+
for (const keyword of industry.keywords) {
|
|
478
|
+
if (employerLower.includes(keyword.toLowerCase())) {
|
|
479
|
+
return {
|
|
480
|
+
sector: industry.sector,
|
|
481
|
+
category: industry.category,
|
|
482
|
+
confidence: 'high',
|
|
483
|
+
matchedKeyword: keyword,
|
|
484
|
+
matchSource: 'employer',
|
|
485
|
+
};
|
|
486
|
+
}
|
|
487
|
+
}
|
|
488
|
+
}
|
|
489
|
+
}
|
|
490
|
+
// Try occupation match (medium confidence)
|
|
491
|
+
if (occupationLower) {
|
|
492
|
+
for (const industry of INDUSTRY_CATEGORIES) {
|
|
493
|
+
if (industry.occupationKeywords) {
|
|
494
|
+
for (const keyword of industry.occupationKeywords) {
|
|
495
|
+
if (occupationLower.includes(keyword.toLowerCase())) {
|
|
496
|
+
return {
|
|
497
|
+
sector: industry.sector,
|
|
498
|
+
category: industry.category,
|
|
499
|
+
confidence: 'medium',
|
|
500
|
+
matchedKeyword: keyword,
|
|
501
|
+
matchSource: 'occupation',
|
|
502
|
+
};
|
|
503
|
+
}
|
|
504
|
+
}
|
|
505
|
+
}
|
|
506
|
+
}
|
|
507
|
+
}
|
|
508
|
+
// Handle self-employed / independent with a useful occupation
|
|
509
|
+
if (NON_INFORMATIVE_EMPLOYERS.has(employerLower) && occupationLower) {
|
|
510
|
+
// Occupation didn't match specific industries above, so classify by occupation text
|
|
511
|
+
if (['homemaker', 'home maker', 'not employed', 'unemployed'].some(v => occupationLower.includes(v))) {
|
|
512
|
+
return {
|
|
513
|
+
sector: IndustrySector.OTHER,
|
|
514
|
+
category: 'Not Employed',
|
|
515
|
+
confidence: 'medium',
|
|
516
|
+
matchedKeyword: occupationLower,
|
|
517
|
+
matchSource: 'occupation',
|
|
518
|
+
};
|
|
519
|
+
}
|
|
520
|
+
if (occupationLower.includes('student')) {
|
|
521
|
+
return {
|
|
522
|
+
sector: IndustrySector.OTHER,
|
|
523
|
+
category: 'Not Employed',
|
|
524
|
+
confidence: 'medium',
|
|
525
|
+
matchedKeyword: 'student',
|
|
526
|
+
matchSource: 'occupation',
|
|
527
|
+
};
|
|
528
|
+
}
|
|
529
|
+
// Self-employed with an unclassified occupation
|
|
530
|
+
return {
|
|
531
|
+
sector: IndustrySector.MISC_BUSINESS,
|
|
532
|
+
category: 'Business Services',
|
|
533
|
+
confidence: 'low',
|
|
534
|
+
matchedKeyword: 'self-employed',
|
|
535
|
+
matchSource: 'inferred',
|
|
536
|
+
};
|
|
537
|
+
}
|
|
538
|
+
// No match found
|
|
539
|
+
return {
|
|
540
|
+
sector: IndustrySector.OTHER,
|
|
541
|
+
category: 'Other/Unknown',
|
|
542
|
+
confidence: 'low',
|
|
543
|
+
matchSource: 'inferred',
|
|
544
|
+
};
|
|
545
|
+
}
|
|
546
|
+
/**
|
|
547
|
+
* Aggregate contributions by industry sector.
|
|
548
|
+
* Uses smart categorization when contributor_name is available,
|
|
549
|
+
* falling back to PAC name matching for unclassified contributions.
|
|
550
|
+
*/
|
|
551
|
+
export function aggregateByIndustrySector(contributions) {
|
|
552
|
+
const sectorMap = new Map();
|
|
553
|
+
let totalContributions = 0;
|
|
554
|
+
for (const contrib of contributions) {
|
|
555
|
+
const categorization = categorizeContributionSmart(contrib.contributor_employer, contrib.contributor_occupation, contrib.contributor_name);
|
|
556
|
+
const amount = contrib.contribution_receipt_amount;
|
|
557
|
+
totalContributions += amount;
|
|
558
|
+
const existing = sectorMap.get(categorization.sector) || {
|
|
559
|
+
totalAmount: 0,
|
|
560
|
+
contributionCount: 0,
|
|
561
|
+
categories: new Map(),
|
|
562
|
+
};
|
|
563
|
+
existing.totalAmount += amount;
|
|
564
|
+
existing.contributionCount++;
|
|
565
|
+
const categoryData = existing.categories.get(categorization.category) || {
|
|
566
|
+
amount: 0,
|
|
567
|
+
count: 0,
|
|
568
|
+
};
|
|
569
|
+
categoryData.amount += amount;
|
|
570
|
+
categoryData.count++;
|
|
571
|
+
existing.categories.set(categorization.category, categoryData);
|
|
572
|
+
sectorMap.set(categorization.sector, existing);
|
|
573
|
+
}
|
|
574
|
+
const result = Array.from(sectorMap.entries()).map(([sector, data]) => ({
|
|
575
|
+
sector,
|
|
576
|
+
totalAmount: data.totalAmount,
|
|
577
|
+
contributionCount: data.contributionCount,
|
|
578
|
+
percentage: totalContributions > 0 ? (data.totalAmount / totalContributions) * 100 : 0,
|
|
579
|
+
categories: data.categories,
|
|
580
|
+
}));
|
|
581
|
+
getLogger().debug(`[Industry Taxonomy] Categorized ${contributions.length} contributions into ${result.length} sectors`);
|
|
582
|
+
return result.sort((a, b) => b.totalAmount - a.totalAmount);
|
|
583
|
+
}
|
|
584
|
+
/**
|
|
585
|
+
* PAC/Committee Name Keywords for Industry Classification
|
|
586
|
+
* Maps committee names to industry sectors (OpenSecrets-style)
|
|
587
|
+
*/
|
|
588
|
+
const PAC_NAME_KEYWORDS = [
|
|
589
|
+
// HEALTH
|
|
590
|
+
{
|
|
591
|
+
sector: IndustrySector.HEALTH,
|
|
592
|
+
category: 'Health Professionals',
|
|
593
|
+
keywords: [
|
|
594
|
+
'medical',
|
|
595
|
+
'physician',
|
|
596
|
+
'doctor',
|
|
597
|
+
'nurse',
|
|
598
|
+
'dental',
|
|
599
|
+
'hospital',
|
|
600
|
+
'health',
|
|
601
|
+
'healthcare',
|
|
602
|
+
'ama ',
|
|
603
|
+
'american medical',
|
|
604
|
+
],
|
|
605
|
+
},
|
|
606
|
+
{
|
|
607
|
+
sector: IndustrySector.HEALTH,
|
|
608
|
+
category: 'Pharmaceuticals',
|
|
609
|
+
keywords: ['pharma', 'pfizer', 'merck', 'johnson', 'lilly', 'abbvie', 'bristol', 'novartis'],
|
|
610
|
+
},
|
|
611
|
+
{
|
|
612
|
+
sector: IndustrySector.HEALTH,
|
|
613
|
+
category: 'Health Insurance',
|
|
614
|
+
keywords: ['blue cross', 'aetna', 'cigna', 'humana', 'anthem', 'kaiser', 'united health'],
|
|
615
|
+
},
|
|
616
|
+
// FINANCE
|
|
617
|
+
{
|
|
618
|
+
sector: IndustrySector.FINANCE_INSURANCE_REAL_ESTATE,
|
|
619
|
+
category: 'Commercial Banks',
|
|
620
|
+
keywords: [
|
|
621
|
+
'bank',
|
|
622
|
+
'banker',
|
|
623
|
+
'chase',
|
|
624
|
+
'wells fargo',
|
|
625
|
+
'citibank',
|
|
626
|
+
'jpmorgan',
|
|
627
|
+
'credit union',
|
|
628
|
+
'financial',
|
|
629
|
+
],
|
|
630
|
+
},
|
|
631
|
+
{
|
|
632
|
+
sector: IndustrySector.FINANCE_INSURANCE_REAL_ESTATE,
|
|
633
|
+
category: 'Securities & Investment',
|
|
634
|
+
keywords: [
|
|
635
|
+
'goldman',
|
|
636
|
+
'morgan stanley',
|
|
637
|
+
'investment',
|
|
638
|
+
'securities',
|
|
639
|
+
'hedge',
|
|
640
|
+
'capital',
|
|
641
|
+
'blackrock',
|
|
642
|
+
'fidelity',
|
|
643
|
+
],
|
|
644
|
+
},
|
|
645
|
+
{
|
|
646
|
+
sector: IndustrySector.FINANCE_INSURANCE_REAL_ESTATE,
|
|
647
|
+
category: 'Insurance',
|
|
648
|
+
keywords: ['insurance', 'allstate', 'state farm', 'progressive', 'geico', 'mutual', 'life ins'],
|
|
649
|
+
},
|
|
650
|
+
{
|
|
651
|
+
sector: IndustrySector.FINANCE_INSURANCE_REAL_ESTATE,
|
|
652
|
+
category: 'Real Estate',
|
|
653
|
+
keywords: ['realtor', 'real estate', 'realty', 'homebuilder', 'mortgage', 'property'],
|
|
654
|
+
},
|
|
655
|
+
// LABOR
|
|
656
|
+
{
|
|
657
|
+
sector: IndustrySector.LABOR,
|
|
658
|
+
category: 'Labor Unions',
|
|
659
|
+
keywords: [
|
|
660
|
+
'union',
|
|
661
|
+
'teamster',
|
|
662
|
+
'seiu',
|
|
663
|
+
'afscme',
|
|
664
|
+
'afl-cio',
|
|
665
|
+
'uaw',
|
|
666
|
+
'steelworker',
|
|
667
|
+
'laborer',
|
|
668
|
+
'ibew',
|
|
669
|
+
'ufcw',
|
|
670
|
+
'carpenters',
|
|
671
|
+
'plumbers',
|
|
672
|
+
'pipefitters',
|
|
673
|
+
'electrical workers',
|
|
674
|
+
'teachers',
|
|
675
|
+
'firefighter',
|
|
676
|
+
'police',
|
|
677
|
+
'working families',
|
|
678
|
+
],
|
|
679
|
+
},
|
|
680
|
+
// LAWYERS
|
|
681
|
+
{
|
|
682
|
+
sector: IndustrySector.LAWYERS_LOBBYISTS,
|
|
683
|
+
category: 'Lawyers/Law Firms',
|
|
684
|
+
keywords: [
|
|
685
|
+
'law',
|
|
686
|
+
'lawyer',
|
|
687
|
+
'attorney',
|
|
688
|
+
'legal',
|
|
689
|
+
'trial',
|
|
690
|
+
'justice',
|
|
691
|
+
'tort',
|
|
692
|
+
'litigation',
|
|
693
|
+
'bar association',
|
|
694
|
+
],
|
|
695
|
+
},
|
|
696
|
+
// ENERGY
|
|
697
|
+
{
|
|
698
|
+
sector: IndustrySector.ENERGY_NATURAL_RESOURCES,
|
|
699
|
+
category: 'Oil & Gas',
|
|
700
|
+
keywords: [
|
|
701
|
+
'oil',
|
|
702
|
+
'gas',
|
|
703
|
+
'petroleum',
|
|
704
|
+
'exxon',
|
|
705
|
+
'chevron',
|
|
706
|
+
'shell',
|
|
707
|
+
'bp ',
|
|
708
|
+
'conocophillips',
|
|
709
|
+
'energy',
|
|
710
|
+
'drilling',
|
|
711
|
+
'pipeline',
|
|
712
|
+
],
|
|
713
|
+
},
|
|
714
|
+
{
|
|
715
|
+
sector: IndustrySector.ENERGY_NATURAL_RESOURCES,
|
|
716
|
+
category: 'Electric Utilities',
|
|
717
|
+
keywords: ['electric', 'utility', 'power', 'grid', 'edison', 'duke energy', 'exelon'],
|
|
718
|
+
},
|
|
719
|
+
// DEFENSE
|
|
720
|
+
{
|
|
721
|
+
sector: IndustrySector.DEFENSE,
|
|
722
|
+
category: 'Defense Aerospace',
|
|
723
|
+
keywords: [
|
|
724
|
+
'defense',
|
|
725
|
+
'lockheed',
|
|
726
|
+
'raytheon',
|
|
727
|
+
'boeing',
|
|
728
|
+
'northrop',
|
|
729
|
+
'general dynamics',
|
|
730
|
+
'bae ',
|
|
731
|
+
'l3harris',
|
|
732
|
+
'aerospace',
|
|
733
|
+
'military',
|
|
734
|
+
],
|
|
735
|
+
},
|
|
736
|
+
// COMMUNICATIONS/TECH
|
|
737
|
+
{
|
|
738
|
+
sector: IndustrySector.COMMUNICATIONS_ELECTRONICS,
|
|
739
|
+
category: 'Internet/Tech',
|
|
740
|
+
keywords: [
|
|
741
|
+
'google',
|
|
742
|
+
'microsoft',
|
|
743
|
+
'apple',
|
|
744
|
+
'amazon',
|
|
745
|
+
'meta',
|
|
746
|
+
'facebook',
|
|
747
|
+
'tech',
|
|
748
|
+
'software',
|
|
749
|
+
'computer',
|
|
750
|
+
'internet',
|
|
751
|
+
'oracle',
|
|
752
|
+
'intel',
|
|
753
|
+
'cisco',
|
|
754
|
+
'ibm',
|
|
755
|
+
],
|
|
756
|
+
},
|
|
757
|
+
{
|
|
758
|
+
sector: IndustrySector.COMMUNICATIONS_ELECTRONICS,
|
|
759
|
+
category: 'Telecommunications',
|
|
760
|
+
keywords: ['telecom', 'verizon', 'at&t', 'comcast', 'charter', 't-mobile', 'wireless'],
|
|
761
|
+
},
|
|
762
|
+
{
|
|
763
|
+
sector: IndustrySector.COMMUNICATIONS_ELECTRONICS,
|
|
764
|
+
category: 'TV/Movies/Music',
|
|
765
|
+
keywords: [
|
|
766
|
+
'entertainment',
|
|
767
|
+
'movie',
|
|
768
|
+
'film',
|
|
769
|
+
'television',
|
|
770
|
+
'broadcast',
|
|
771
|
+
'media',
|
|
772
|
+
'disney',
|
|
773
|
+
'warner',
|
|
774
|
+
'fox',
|
|
775
|
+
'screen actors',
|
|
776
|
+
],
|
|
777
|
+
},
|
|
778
|
+
// TRANSPORTATION
|
|
779
|
+
{
|
|
780
|
+
sector: IndustrySector.TRANSPORTATION,
|
|
781
|
+
category: 'Air Transport',
|
|
782
|
+
keywords: ['airline', 'aviation', 'pilot', 'air transport', 'delta', 'united', 'american air'],
|
|
783
|
+
},
|
|
784
|
+
{
|
|
785
|
+
sector: IndustrySector.TRANSPORTATION,
|
|
786
|
+
category: 'Automotive',
|
|
787
|
+
keywords: [
|
|
788
|
+
'auto',
|
|
789
|
+
'automobile',
|
|
790
|
+
'car dealer',
|
|
791
|
+
'ford',
|
|
792
|
+
'gm ',
|
|
793
|
+
'general motors',
|
|
794
|
+
'toyota',
|
|
795
|
+
'honda',
|
|
796
|
+
],
|
|
797
|
+
},
|
|
798
|
+
{
|
|
799
|
+
sector: IndustrySector.TRANSPORTATION,
|
|
800
|
+
category: 'Trucking',
|
|
801
|
+
keywords: ['trucking', 'freight', 'logistics', 'ups', 'fedex', 'shipping'],
|
|
802
|
+
},
|
|
803
|
+
{
|
|
804
|
+
sector: IndustrySector.TRANSPORTATION,
|
|
805
|
+
category: 'Railroads',
|
|
806
|
+
keywords: ['railroad', 'rail', 'amtrak', 'bnsf', 'union pacific', 'csx'],
|
|
807
|
+
},
|
|
808
|
+
// AGRIBUSINESS
|
|
809
|
+
{
|
|
810
|
+
sector: IndustrySector.AGRIBUSINESS,
|
|
811
|
+
category: 'Crop Production',
|
|
812
|
+
keywords: ['farm', 'farmer', 'agriculture', 'crop', 'grain', 'corn', 'soybean', 'cotton'],
|
|
813
|
+
},
|
|
814
|
+
{
|
|
815
|
+
sector: IndustrySector.AGRIBUSINESS,
|
|
816
|
+
category: 'Food Processing',
|
|
817
|
+
keywords: [
|
|
818
|
+
'food',
|
|
819
|
+
'restaurant',
|
|
820
|
+
'grocery',
|
|
821
|
+
'beverage',
|
|
822
|
+
'coca-cola',
|
|
823
|
+
'pepsi',
|
|
824
|
+
'tyson',
|
|
825
|
+
'cargill',
|
|
826
|
+
],
|
|
827
|
+
},
|
|
828
|
+
// CONSTRUCTION
|
|
829
|
+
{
|
|
830
|
+
sector: IndustrySector.CONSTRUCTION,
|
|
831
|
+
category: 'General Contractors',
|
|
832
|
+
keywords: ['construction', 'contractor', 'builder', 'building trade', 'cement', 'steel'],
|
|
833
|
+
},
|
|
834
|
+
{
|
|
835
|
+
sector: IndustrySector.CONSTRUCTION,
|
|
836
|
+
category: 'Home Builders',
|
|
837
|
+
keywords: ['home builder', 'homebuilder', 'residential', 'housing'],
|
|
838
|
+
},
|
|
839
|
+
// MISC BUSINESS
|
|
840
|
+
{
|
|
841
|
+
sector: IndustrySector.MISC_BUSINESS,
|
|
842
|
+
category: 'Retail',
|
|
843
|
+
keywords: ['retail', 'walmart', 'target', 'store', 'merchant', 'shop'],
|
|
844
|
+
},
|
|
845
|
+
{
|
|
846
|
+
sector: IndustrySector.MISC_BUSINESS,
|
|
847
|
+
category: 'Manufacturing',
|
|
848
|
+
keywords: ['manufacturing', 'manufacturer', 'industrial', 'factory'],
|
|
849
|
+
},
|
|
850
|
+
{
|
|
851
|
+
sector: IndustrySector.MISC_BUSINESS,
|
|
852
|
+
category: 'Business Services',
|
|
853
|
+
keywords: ['business', 'chamber of commerce', 'nfib', 'small business'],
|
|
854
|
+
},
|
|
855
|
+
// IDEOLOGY
|
|
856
|
+
{
|
|
857
|
+
sector: IndustrySector.IDEOLOGY_SINGLE_ISSUE,
|
|
858
|
+
category: 'Pro-Israel',
|
|
859
|
+
keywords: ['israel', 'aipac', 'jewish', 'zionist'],
|
|
860
|
+
},
|
|
861
|
+
{
|
|
862
|
+
sector: IndustrySector.IDEOLOGY_SINGLE_ISSUE,
|
|
863
|
+
category: 'Gun Rights',
|
|
864
|
+
keywords: ['rifle', 'nra', 'gun', 'firearm', 'second amendment', '2nd amendment'],
|
|
865
|
+
},
|
|
866
|
+
{
|
|
867
|
+
sector: IndustrySector.IDEOLOGY_SINGLE_ISSUE,
|
|
868
|
+
category: 'Pro-Choice',
|
|
869
|
+
keywords: ['planned parenthood', 'naral', 'emily', 'pro-choice', 'reproductive'],
|
|
870
|
+
},
|
|
871
|
+
{
|
|
872
|
+
sector: IndustrySector.IDEOLOGY_SINGLE_ISSUE,
|
|
873
|
+
category: 'Environment',
|
|
874
|
+
keywords: ['environment', 'sierra', 'conservation', 'climate', 'green', 'lcv'],
|
|
875
|
+
},
|
|
876
|
+
{
|
|
877
|
+
sector: IndustrySector.IDEOLOGY_SINGLE_ISSUE,
|
|
878
|
+
category: 'Human Rights',
|
|
879
|
+
keywords: ['human rights', 'civil rights', 'aclu', 'naacp', 'equality'],
|
|
880
|
+
},
|
|
881
|
+
{
|
|
882
|
+
sector: IndustrySector.IDEOLOGY_SINGLE_ISSUE,
|
|
883
|
+
category: 'Education',
|
|
884
|
+
keywords: ['education', 'teacher', 'school', 'university', 'college', 'nea ', 'aft '],
|
|
885
|
+
},
|
|
886
|
+
];
|
|
887
|
+
/**
|
|
888
|
+
* Categorize a PAC/committee by its name
|
|
889
|
+
* Used for committee-to-committee transfers and PAC contributions
|
|
890
|
+
*/
|
|
891
|
+
export function categorizePACByName(committeeName) {
|
|
892
|
+
if (!committeeName) {
|
|
893
|
+
return {
|
|
894
|
+
sector: IndustrySector.OTHER,
|
|
895
|
+
category: 'Unknown PAC',
|
|
896
|
+
confidence: 'low',
|
|
897
|
+
matchSource: 'inferred',
|
|
898
|
+
};
|
|
899
|
+
}
|
|
900
|
+
const nameLower = committeeName.toLowerCase();
|
|
901
|
+
// Check for political party committees first
|
|
902
|
+
if (nameLower.includes('democratic') ||
|
|
903
|
+
nameLower.includes('dccc') ||
|
|
904
|
+
nameLower.includes('dscc') ||
|
|
905
|
+
nameLower.includes('dnc')) {
|
|
906
|
+
return {
|
|
907
|
+
sector: IndustrySector.OTHER,
|
|
908
|
+
category: 'Democratic Party',
|
|
909
|
+
confidence: 'high',
|
|
910
|
+
matchedKeyword: 'democratic',
|
|
911
|
+
matchSource: 'employer',
|
|
912
|
+
};
|
|
913
|
+
}
|
|
914
|
+
if (nameLower.includes('republican') ||
|
|
915
|
+
nameLower.includes('nrcc') ||
|
|
916
|
+
nameLower.includes('nrsc') ||
|
|
917
|
+
nameLower.includes('rnc')) {
|
|
918
|
+
return {
|
|
919
|
+
sector: IndustrySector.OTHER,
|
|
920
|
+
category: 'Republican Party',
|
|
921
|
+
confidence: 'high',
|
|
922
|
+
matchedKeyword: 'republican',
|
|
923
|
+
matchSource: 'employer',
|
|
924
|
+
};
|
|
925
|
+
}
|
|
926
|
+
// Check for joint fundraising/victory funds (pass-through)
|
|
927
|
+
if (nameLower.includes('victory') ||
|
|
928
|
+
nameLower.includes('joint') ||
|
|
929
|
+
nameLower.includes('senate 20') ||
|
|
930
|
+
nameLower.includes('house 20') ||
|
|
931
|
+
nameLower.includes('blue senate') ||
|
|
932
|
+
nameLower.includes('red senate') ||
|
|
933
|
+
nameLower.includes('making history') ||
|
|
934
|
+
nameLower.includes('north stars')) {
|
|
935
|
+
return {
|
|
936
|
+
sector: IndustrySector.OTHER,
|
|
937
|
+
category: 'Joint Fundraising',
|
|
938
|
+
confidence: 'medium',
|
|
939
|
+
matchedKeyword: 'joint-fund',
|
|
940
|
+
matchSource: 'employer',
|
|
941
|
+
};
|
|
942
|
+
}
|
|
943
|
+
// Check industry-specific PAC names
|
|
944
|
+
for (const pacCategory of PAC_NAME_KEYWORDS) {
|
|
945
|
+
for (const keyword of pacCategory.keywords) {
|
|
946
|
+
if (nameLower.includes(keyword.toLowerCase())) {
|
|
947
|
+
return {
|
|
948
|
+
sector: pacCategory.sector,
|
|
949
|
+
category: pacCategory.category,
|
|
950
|
+
confidence: 'high',
|
|
951
|
+
matchedKeyword: keyword,
|
|
952
|
+
matchSource: 'employer',
|
|
953
|
+
};
|
|
954
|
+
}
|
|
955
|
+
}
|
|
956
|
+
}
|
|
957
|
+
// Generic PAC fallback
|
|
958
|
+
if (nameLower.includes('pac') || nameLower.includes('committee')) {
|
|
959
|
+
return {
|
|
960
|
+
sector: IndustrySector.OTHER,
|
|
961
|
+
category: 'Unclassified PAC',
|
|
962
|
+
confidence: 'low',
|
|
963
|
+
matchSource: 'inferred',
|
|
964
|
+
};
|
|
965
|
+
}
|
|
966
|
+
return {
|
|
967
|
+
sector: IndustrySector.OTHER,
|
|
968
|
+
category: 'Unknown',
|
|
969
|
+
confidence: 'low',
|
|
970
|
+
matchSource: 'inferred',
|
|
971
|
+
};
|
|
972
|
+
}
|
|
973
|
+
/**
|
|
974
|
+
* Smart categorization: tries employer/occupation first, then falls back to contributor name (for PACs)
|
|
975
|
+
*/
|
|
976
|
+
export function categorizeContributionSmart(employer, occupation, contributorName) {
|
|
977
|
+
// First try standard employer/occupation categorization
|
|
978
|
+
const standardResult = categorizeContribution(employer, occupation);
|
|
979
|
+
// If we got a good match, return it
|
|
980
|
+
if (standardResult.confidence !== 'low' ||
|
|
981
|
+
(standardResult.category !== 'Other/Unknown' && standardResult.category !== 'Unknown')) {
|
|
982
|
+
return standardResult;
|
|
983
|
+
}
|
|
984
|
+
// If no employer/occupation, try to categorize by contributor name (for PACs)
|
|
985
|
+
if (contributorName) {
|
|
986
|
+
const pacResult = categorizePACByName(contributorName);
|
|
987
|
+
if (pacResult.confidence !== 'low' || pacResult.category !== 'Unknown') {
|
|
988
|
+
return pacResult;
|
|
989
|
+
}
|
|
990
|
+
}
|
|
991
|
+
return standardResult;
|
|
992
|
+
}
|
|
993
|
+
/**
|
|
994
|
+
* Get top categories across all sectors
|
|
995
|
+
*/
|
|
996
|
+
export function getTopCategories(contributions, limit = 10) {
|
|
997
|
+
const categoryMap = new Map();
|
|
998
|
+
let totalContributions = 0;
|
|
999
|
+
for (const contrib of contributions) {
|
|
1000
|
+
// Use smart categorization that tries employer/occupation first, then contributor name
|
|
1001
|
+
const categorization = categorizeContributionSmart(contrib.contributor_employer, contrib.contributor_occupation, contrib.contributor_name);
|
|
1002
|
+
const amount = contrib.contribution_receipt_amount;
|
|
1003
|
+
totalContributions += amount;
|
|
1004
|
+
const key = `${categorization.sector}:${categorization.category}`;
|
|
1005
|
+
const existing = categoryMap.get(key) || {
|
|
1006
|
+
sector: categorization.sector,
|
|
1007
|
+
totalAmount: 0,
|
|
1008
|
+
contributionCount: 0,
|
|
1009
|
+
};
|
|
1010
|
+
existing.totalAmount += amount;
|
|
1011
|
+
existing.contributionCount++;
|
|
1012
|
+
categoryMap.set(key, existing);
|
|
1013
|
+
}
|
|
1014
|
+
const result = Array.from(categoryMap.entries())
|
|
1015
|
+
.map(([key, data]) => ({
|
|
1016
|
+
sector: data.sector,
|
|
1017
|
+
category: key.split(':')[1],
|
|
1018
|
+
totalAmount: data.totalAmount,
|
|
1019
|
+
contributionCount: data.contributionCount,
|
|
1020
|
+
percentage: totalContributions > 0 ? (data.totalAmount / totalContributions) * 100 : 0,
|
|
1021
|
+
}))
|
|
1022
|
+
.sort((a, b) => b.totalAmount - a.totalAmount)
|
|
1023
|
+
.slice(0, limit);
|
|
1024
|
+
return result;
|
|
1025
|
+
}
|
|
1026
|
+
//# sourceMappingURL=industry-taxonomy.js.map
|