@civiq/entity-resolution 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +135 -0
  3. package/data/bioguide-fec-mapping.json +4186 -0
  4. package/data/sec-sic-data.json +10427 -0
  5. package/dist/__tests__/company-entity-resolver.test.d.ts +6 -0
  6. package/dist/__tests__/company-entity-resolver.test.d.ts.map +1 -0
  7. package/dist/__tests__/company-entity-resolver.test.js +267 -0
  8. package/dist/__tests__/company-entity-resolver.test.js.map +1 -0
  9. package/dist/__tests__/entity-resolution.test.d.ts +2 -0
  10. package/dist/__tests__/entity-resolution.test.d.ts.map +1 -0
  11. package/dist/__tests__/entity-resolution.test.js +296 -0
  12. package/dist/__tests__/entity-resolution.test.js.map +1 -0
  13. package/dist/__tests__/fec-entity-resolution.test.d.ts +2 -0
  14. package/dist/__tests__/fec-entity-resolution.test.d.ts.map +1 -0
  15. package/dist/__tests__/fec-entity-resolution.test.js +49 -0
  16. package/dist/__tests__/fec-entity-resolution.test.js.map +1 -0
  17. package/dist/bioguide-fec-mapping.d.ts +27 -0
  18. package/dist/bioguide-fec-mapping.d.ts.map +1 -0
  19. package/dist/bioguide-fec-mapping.js +57 -0
  20. package/dist/bioguide-fec-mapping.js.map +1 -0
  21. package/dist/cache.d.ts +18 -0
  22. package/dist/cache.d.ts.map +1 -0
  23. package/dist/cache.js +18 -0
  24. package/dist/cache.js.map +1 -0
  25. package/dist/committee-agency-map.d.ts +58 -0
  26. package/dist/committee-agency-map.d.ts.map +1 -0
  27. package/dist/committee-agency-map.js +658 -0
  28. package/dist/committee-agency-map.js.map +1 -0
  29. package/dist/committee-alias-table.d.ts +11 -0
  30. package/dist/committee-alias-table.d.ts.map +1 -0
  31. package/dist/committee-alias-table.js +191 -0
  32. package/dist/committee-alias-table.js.map +1 -0
  33. package/dist/company-alias-table.d.ts +36 -0
  34. package/dist/company-alias-table.d.ts.map +1 -0
  35. package/dist/company-alias-table.js +1307 -0
  36. package/dist/company-alias-table.js.map +1 -0
  37. package/dist/company-entity-resolver.d.ts +94 -0
  38. package/dist/company-entity-resolver.d.ts.map +1 -0
  39. package/dist/company-entity-resolver.js +282 -0
  40. package/dist/company-entity-resolver.js.map +1 -0
  41. package/dist/configure.d.ts +15 -0
  42. package/dist/configure.d.ts.map +1 -0
  43. package/dist/configure.js +19 -0
  44. package/dist/configure.js.map +1 -0
  45. package/dist/fec-entity-resolution.d.ts +88 -0
  46. package/dist/fec-entity-resolution.d.ts.map +1 -0
  47. package/dist/fec-entity-resolution.js +407 -0
  48. package/dist/fec-entity-resolution.js.map +1 -0
  49. package/dist/index.d.ts +30 -0
  50. package/dist/index.d.ts.map +1 -0
  51. package/dist/index.js +33 -0
  52. package/dist/index.js.map +1 -0
  53. package/dist/industry-taxonomy.d.ts +90 -0
  54. package/dist/industry-taxonomy.d.ts.map +1 -0
  55. package/dist/industry-taxonomy.js +1026 -0
  56. package/dist/industry-taxonomy.js.map +1 -0
  57. package/dist/lda-issue-policy-map.d.ts +13 -0
  58. package/dist/lda-issue-policy-map.d.ts.map +1 -0
  59. package/dist/lda-issue-policy-map.js +193 -0
  60. package/dist/lda-issue-policy-map.js.map +1 -0
  61. package/dist/lobbying-committee-resolver.d.ts +23 -0
  62. package/dist/lobbying-committee-resolver.d.ts.map +1 -0
  63. package/dist/lobbying-committee-resolver.js +158 -0
  64. package/dist/lobbying-committee-resolver.js.map +1 -0
  65. package/dist/logger.d.ts +20 -0
  66. package/dist/logger.d.ts.map +1 -0
  67. package/dist/logger.js +20 -0
  68. package/dist/logger.js.map +1 -0
  69. package/dist/sic-sector-map.d.ts +32 -0
  70. package/dist/sic-sector-map.d.ts.map +1 -0
  71. package/dist/sic-sector-map.js +109 -0
  72. package/dist/sic-sector-map.js.map +1 -0
  73. package/dist/ticker-industry-resolver.d.ts +22 -0
  74. package/dist/ticker-industry-resolver.d.ts.map +1 -0
  75. package/dist/ticker-industry-resolver.js +254 -0
  76. package/dist/ticker-industry-resolver.js.map +1 -0
  77. package/dist/types.d.ts +30 -0
  78. package/dist/types.d.ts.map +1 -0
  79. package/dist/types.js +6 -0
  80. package/dist/types.js.map +1 -0
  81. package/package.json +51 -0
@@ -0,0 +1,1026 @@
1
+ /**
2
+ * Copyright (c) 2019-2025 Mark Sandford
3
+ * Licensed under the MIT License. See LICENSE and NOTICE files.
4
+ */
5
+ /**
6
+ * Industry Taxonomy & Categorization System
7
+ *
8
+ * Categorizes FEC contributor employers and occupations into standardized industry sectors.
9
+ * Based on OpenSecrets' 13-sector classification model with keyword-based categorization.
10
+ *
11
+ * Sectors (inspired by OpenSecrets):
12
+ * 1. Agribusiness
13
+ * 2. Communications/Electronics
14
+ * 3. Construction
15
+ * 4. Defense
16
+ * 5. Energy/Natural Resources
17
+ * 6. Finance/Insurance/Real Estate
18
+ * 7. Health
19
+ * 8. Lawyers & Lobbyists
20
+ * 9. Transportation
21
+ * 10. Misc Business
22
+ * 11. Labor
23
+ * 12. Ideology/Single-Issue
24
+ * 13. Other/Unknown
25
+ */
26
+ import { getLogger } from './logger';
27
+ /**
28
+ * Industry sector enumeration
29
+ */
30
+ export var IndustrySector;
31
+ (function (IndustrySector) {
32
+ IndustrySector["AGRIBUSINESS"] = "Agribusiness";
33
+ IndustrySector["COMMUNICATIONS_ELECTRONICS"] = "Communications/Electronics";
34
+ IndustrySector["CONSTRUCTION"] = "Construction";
35
+ IndustrySector["DEFENSE"] = "Defense";
36
+ IndustrySector["ENERGY_NATURAL_RESOURCES"] = "Energy/Natural Resources";
37
+ IndustrySector["FINANCE_INSURANCE_REAL_ESTATE"] = "Finance/Insurance/Real Estate";
38
+ IndustrySector["HEALTH"] = "Health";
39
+ IndustrySector["LAWYERS_LOBBYISTS"] = "Lawyers & Lobbyists";
40
+ IndustrySector["TRANSPORTATION"] = "Transportation";
41
+ IndustrySector["MISC_BUSINESS"] = "Misc Business";
42
+ IndustrySector["LABOR"] = "Labor";
43
+ IndustrySector["IDEOLOGY_SINGLE_ISSUE"] = "Ideology/Single-Issue";
44
+ IndustrySector["OTHER"] = "Other";
45
+ })(IndustrySector || (IndustrySector = {}));
46
+ /**
47
+ * Industry taxonomy database
48
+ * Comprehensive keyword matching for employer and occupation classification
49
+ */
50
+ const INDUSTRY_CATEGORIES = [
51
+ // AGRIBUSINESS
52
+ {
53
+ sector: IndustrySector.AGRIBUSINESS,
54
+ category: 'Crop Production',
55
+ keywords: ['farm', 'farming', 'agriculture', 'crop', 'grain', 'wheat', 'corn', 'soybean'],
56
+ occupationKeywords: ['farmer', 'agricultural', 'agronomist'],
57
+ },
58
+ {
59
+ sector: IndustrySector.AGRIBUSINESS,
60
+ category: 'Livestock',
61
+ keywords: ['ranch', 'cattle', 'dairy', 'livestock', 'poultry', 'beef', 'pork'],
62
+ occupationKeywords: ['rancher', 'veterinarian'],
63
+ },
64
+ {
65
+ sector: IndustrySector.AGRIBUSINESS,
66
+ category: 'Food Processing',
67
+ keywords: ['food processing', 'food service', 'restaurant', 'catering', 'grocery'],
68
+ occupationKeywords: ['chef', 'cook', 'food service'],
69
+ },
70
+ // COMMUNICATIONS/ELECTRONICS
71
+ {
72
+ sector: IndustrySector.COMMUNICATIONS_ELECTRONICS,
73
+ category: 'Telecommunications',
74
+ keywords: [
75
+ 'telecom',
76
+ 'verizon',
77
+ 'at&t',
78
+ 'comcast',
79
+ 'spectrum',
80
+ 'phone',
81
+ 'wireless',
82
+ 'cellular',
83
+ ],
84
+ occupationKeywords: ['telecommunications'],
85
+ },
86
+ {
87
+ sector: IndustrySector.COMMUNICATIONS_ELECTRONICS,
88
+ category: 'Internet/Tech',
89
+ keywords: [
90
+ 'google',
91
+ 'amazon',
92
+ 'microsoft',
93
+ 'apple',
94
+ 'facebook',
95
+ 'meta',
96
+ 'twitter',
97
+ 'software',
98
+ 'tech',
99
+ 'technology',
100
+ 'internet',
101
+ 'web',
102
+ 'digital',
103
+ ],
104
+ occupationKeywords: ['software engineer', 'developer', 'programmer', 'data scientist', 'tech'],
105
+ },
106
+ {
107
+ sector: IndustrySector.COMMUNICATIONS_ELECTRONICS,
108
+ category: 'TV/Movies/Music',
109
+ keywords: [
110
+ 'television',
111
+ 'movie',
112
+ 'film',
113
+ 'music',
114
+ 'entertainment',
115
+ 'media',
116
+ 'broadcasting',
117
+ 'netflix',
118
+ 'disney',
119
+ ],
120
+ occupationKeywords: ['actor', 'producer', 'musician', 'artist'],
121
+ },
122
+ {
123
+ sector: IndustrySector.COMMUNICATIONS_ELECTRONICS,
124
+ category: 'Electronics Manufacturing',
125
+ keywords: ['electronics', 'semiconductor', 'chip', 'circuit', 'intel', 'nvidia', 'amd'],
126
+ occupationKeywords: ['electrical engineer', 'electronics'],
127
+ },
128
+ // CONSTRUCTION
129
+ {
130
+ sector: IndustrySector.CONSTRUCTION,
131
+ category: 'General Contractors',
132
+ keywords: ['construction', 'contractor', 'builder', 'building', 'developer', 'remodeling'],
133
+ occupationKeywords: ['contractor', 'construction', 'builder'],
134
+ },
135
+ {
136
+ sector: IndustrySector.CONSTRUCTION,
137
+ category: 'Home Builders',
138
+ keywords: ['home builder', 'homebuilder', 'residential construction', 'housing'],
139
+ },
140
+ {
141
+ sector: IndustrySector.CONSTRUCTION,
142
+ category: 'Construction Services',
143
+ keywords: ['plumbing', 'electrical', 'hvac', 'roofing', 'carpentry'],
144
+ occupationKeywords: ['plumber', 'electrician', 'carpenter', 'roofer'],
145
+ },
146
+ // DEFENSE
147
+ {
148
+ sector: IndustrySector.DEFENSE,
149
+ category: 'Defense Aerospace',
150
+ keywords: [
151
+ 'boeing',
152
+ 'lockheed',
153
+ 'raytheon',
154
+ 'northrop grumman',
155
+ 'general dynamics',
156
+ 'defense',
157
+ 'aerospace',
158
+ 'military',
159
+ ],
160
+ occupationKeywords: ['defense', 'military'],
161
+ },
162
+ {
163
+ sector: IndustrySector.DEFENSE,
164
+ category: 'Defense Electronics',
165
+ keywords: ['defense electronics', 'missile', 'weapons systems'],
166
+ },
167
+ // ENERGY/NATURAL RESOURCES
168
+ {
169
+ sector: IndustrySector.ENERGY_NATURAL_RESOURCES,
170
+ category: 'Oil & Gas',
171
+ keywords: ['oil', 'gas', 'petroleum', 'exxon', 'chevron', 'shell', 'bp', 'energy', 'drilling'],
172
+ occupationKeywords: ['petroleum engineer', 'oil', 'gas'],
173
+ },
174
+ {
175
+ sector: IndustrySector.ENERGY_NATURAL_RESOURCES,
176
+ category: 'Electric Utilities',
177
+ keywords: ['electric', 'utility', 'power', 'electricity', 'grid'],
178
+ occupationKeywords: ['power plant', 'utility'],
179
+ },
180
+ {
181
+ sector: IndustrySector.ENERGY_NATURAL_RESOURCES,
182
+ category: 'Renewable Energy',
183
+ keywords: ['solar', 'wind', 'renewable', 'clean energy', 'green energy'],
184
+ },
185
+ {
186
+ sector: IndustrySector.ENERGY_NATURAL_RESOURCES,
187
+ category: 'Mining',
188
+ keywords: ['mining', 'coal', 'mineral', 'extraction'],
189
+ occupationKeywords: ['miner', 'mining'],
190
+ },
191
+ // FINANCE/INSURANCE/REAL ESTATE
192
+ {
193
+ sector: IndustrySector.FINANCE_INSURANCE_REAL_ESTATE,
194
+ category: 'Commercial Banks',
195
+ keywords: [
196
+ 'bank',
197
+ 'banking',
198
+ 'chase',
199
+ 'wells fargo',
200
+ 'bank of america',
201
+ 'citibank',
202
+ 'financial services',
203
+ ],
204
+ occupationKeywords: ['banker', 'banking'],
205
+ },
206
+ {
207
+ sector: IndustrySector.FINANCE_INSURANCE_REAL_ESTATE,
208
+ category: 'Insurance',
209
+ keywords: [
210
+ 'insurance',
211
+ 'allstate',
212
+ 'state farm',
213
+ 'geico',
214
+ 'progressive',
215
+ 'life insurance',
216
+ 'health insurance',
217
+ ],
218
+ occupationKeywords: ['insurance', 'actuary'],
219
+ },
220
+ {
221
+ sector: IndustrySector.FINANCE_INSURANCE_REAL_ESTATE,
222
+ category: 'Real Estate',
223
+ keywords: ['real estate', 'realty', 'property management', 'realtor'],
224
+ occupationKeywords: ['realtor', 'real estate agent', 'property manager'],
225
+ },
226
+ {
227
+ sector: IndustrySector.FINANCE_INSURANCE_REAL_ESTATE,
228
+ category: 'Securities & Investment',
229
+ keywords: [
230
+ 'investment',
231
+ 'securities',
232
+ 'hedge fund',
233
+ 'private equity',
234
+ 'venture capital',
235
+ 'asset management',
236
+ 'goldman sachs',
237
+ 'morgan stanley',
238
+ ],
239
+ occupationKeywords: ['financial advisor', 'investment', 'trader', 'analyst'],
240
+ },
241
+ {
242
+ sector: IndustrySector.FINANCE_INSURANCE_REAL_ESTATE,
243
+ category: 'Accounting',
244
+ keywords: ['accounting', 'accountant', 'cpa', 'kpmg', 'deloitte', 'pwc', 'ernst & young'],
245
+ occupationKeywords: ['accountant', 'cpa', 'auditor'],
246
+ },
247
+ // HEALTH
248
+ {
249
+ sector: IndustrySector.HEALTH,
250
+ category: 'Health Professionals',
251
+ keywords: ['hospital', 'clinic', 'medical center', 'health system'],
252
+ occupationKeywords: [
253
+ 'physician',
254
+ 'doctor',
255
+ 'nurse',
256
+ 'surgeon',
257
+ 'medical',
258
+ 'healthcare',
259
+ 'dentist',
260
+ ],
261
+ },
262
+ {
263
+ sector: IndustrySector.HEALTH,
264
+ category: 'Pharmaceuticals',
265
+ keywords: ['pharmaceutical', 'pharma', 'pfizer', 'merck', 'johnson & johnson', 'drug'],
266
+ occupationKeywords: ['pharmacist', 'pharmaceutical'],
267
+ },
268
+ {
269
+ sector: IndustrySector.HEALTH,
270
+ category: 'Health Insurance',
271
+ keywords: ['health insurance', 'uhc', 'aetna', 'cigna', 'anthem', 'humana'],
272
+ },
273
+ {
274
+ sector: IndustrySector.HEALTH,
275
+ category: 'Medical Devices',
276
+ keywords: ['medical device', 'medtronic', 'abbott'],
277
+ },
278
+ // LAWYERS & LOBBYISTS
279
+ {
280
+ sector: IndustrySector.LAWYERS_LOBBYISTS,
281
+ category: 'Law Firms',
282
+ keywords: ['law firm', 'legal', 'attorney', 'esquire'],
283
+ occupationKeywords: ['attorney', 'lawyer', 'legal', 'counsel', 'paralegal'],
284
+ },
285
+ {
286
+ sector: IndustrySector.LAWYERS_LOBBYISTS,
287
+ category: 'Lobbyists',
288
+ keywords: ['lobbying', 'government relations', 'public affairs'],
289
+ occupationKeywords: ['lobbyist', 'government relations'],
290
+ },
291
+ // TRANSPORTATION
292
+ {
293
+ sector: IndustrySector.TRANSPORTATION,
294
+ category: 'Air Transport',
295
+ keywords: ['airline', 'aviation', 'american airlines', 'delta', 'united airlines'],
296
+ occupationKeywords: ['pilot', 'flight attendant', 'aviation'],
297
+ },
298
+ {
299
+ sector: IndustrySector.TRANSPORTATION,
300
+ category: 'Automotive',
301
+ keywords: ['automotive', 'auto', 'ford', 'gm', 'toyota', 'honda', 'car', 'vehicle'],
302
+ occupationKeywords: ['automotive', 'mechanic'],
303
+ },
304
+ {
305
+ sector: IndustrySector.TRANSPORTATION,
306
+ category: 'Railroads',
307
+ keywords: ['railroad', 'rail', 'amtrak', 'freight rail'],
308
+ },
309
+ {
310
+ sector: IndustrySector.TRANSPORTATION,
311
+ category: 'Trucking',
312
+ keywords: ['trucking', 'freight', 'logistics', 'shipping', 'ups', 'fedex'],
313
+ occupationKeywords: ['truck driver', 'driver'],
314
+ },
315
+ // MISC BUSINESS
316
+ {
317
+ sector: IndustrySector.MISC_BUSINESS,
318
+ category: 'Retail',
319
+ keywords: ['retail', 'walmart', 'target', 'costco', 'store', 'shop', 'grocery'],
320
+ occupationKeywords: ['retail', 'sales', 'cashier', 'merchandis'],
321
+ },
322
+ {
323
+ sector: IndustrySector.MISC_BUSINESS,
324
+ category: 'Manufacturing',
325
+ keywords: ['manufacturing', 'factory', 'production', 'industrial'],
326
+ occupationKeywords: ['engineer', 'manufacturing'],
327
+ },
328
+ {
329
+ sector: IndustrySector.MISC_BUSINESS,
330
+ category: 'Business Services',
331
+ keywords: [
332
+ 'consulting',
333
+ 'consultant',
334
+ 'business services',
335
+ 'management',
336
+ 'staffing',
337
+ 'recruiting',
338
+ ],
339
+ occupationKeywords: [
340
+ 'consultant',
341
+ 'business analyst',
342
+ 'executive',
343
+ 'ceo',
344
+ 'cfo',
345
+ 'coo',
346
+ 'cto',
347
+ 'president',
348
+ 'vice president',
349
+ 'owner',
350
+ 'partner',
351
+ 'director',
352
+ 'manager',
353
+ 'founder',
354
+ 'entrepreneur',
355
+ 'business owner',
356
+ 'principal',
357
+ 'managing director',
358
+ ],
359
+ },
360
+ {
361
+ sector: IndustrySector.MISC_BUSINESS,
362
+ category: 'Chemical',
363
+ keywords: ['chemical', 'dow', 'dupont'],
364
+ occupationKeywords: ['chemical engineer', 'chemist'],
365
+ },
366
+ {
367
+ sector: IndustrySector.MISC_BUSINESS,
368
+ category: 'Lodging/Tourism',
369
+ keywords: ['hotel', 'resort', 'hospitality', 'tourism', 'marriott', 'hilton'],
370
+ occupationKeywords: ['hotel', 'hospitality'],
371
+ },
372
+ // LABOR
373
+ {
374
+ sector: IndustrySector.LABOR,
375
+ category: 'Labor Unions',
376
+ keywords: [
377
+ 'union',
378
+ 'afl-cio',
379
+ 'teamsters',
380
+ 'seiu',
381
+ 'uaw',
382
+ 'afscme',
383
+ 'laborers',
384
+ 'steelworkers',
385
+ ],
386
+ occupationKeywords: ['union'],
387
+ },
388
+ // IDEOLOGY/SINGLE-ISSUE
389
+ {
390
+ sector: IndustrySector.IDEOLOGY_SINGLE_ISSUE,
391
+ category: 'Non-Profit/Advocacy',
392
+ keywords: [
393
+ 'non-profit',
394
+ 'nonprofit',
395
+ 'foundation',
396
+ 'charity',
397
+ 'advocacy',
398
+ 'association',
399
+ 'organization',
400
+ ],
401
+ occupationKeywords: ['nonprofit', 'advocacy'],
402
+ },
403
+ {
404
+ sector: IndustrySector.IDEOLOGY_SINGLE_ISSUE,
405
+ category: 'Education',
406
+ keywords: ['school', 'university', 'college', 'education', 'academic'],
407
+ occupationKeywords: ['teacher', 'professor', 'educator', 'principal'],
408
+ },
409
+ {
410
+ sector: IndustrySector.IDEOLOGY_SINGLE_ISSUE,
411
+ category: 'Religious Organizations',
412
+ keywords: ['church', 'religious', 'ministry', 'faith'],
413
+ occupationKeywords: ['minister', 'pastor', 'clergy'],
414
+ },
415
+ // OTHER
416
+ {
417
+ sector: IndustrySector.OTHER,
418
+ category: 'Government',
419
+ keywords: ['government', 'federal', 'state', 'city', 'county', 'municipal', 'public sector'],
420
+ occupationKeywords: ['government', 'public'],
421
+ },
422
+ {
423
+ sector: IndustrySector.OTHER,
424
+ category: 'Retired',
425
+ keywords: ['retired', 'retirement', 'retiree'],
426
+ occupationKeywords: ['retired', 'retiree'],
427
+ },
428
+ {
429
+ sector: IndustrySector.OTHER,
430
+ category: 'Not Employed',
431
+ keywords: ['not employed', 'unemployed', 'homemaker', 'home maker'],
432
+ occupationKeywords: ['not employed', 'homemaker', 'home maker', 'student', 'unemployed'],
433
+ },
434
+ ];
435
+ /**
436
+ * Non-informative employer values that should be skipped for keyword matching.
437
+ * When these appear, fall through directly to occupation-based matching.
438
+ */
439
+ const NON_INFORMATIVE_EMPLOYERS = new Set([
440
+ 'none',
441
+ 'n/a',
442
+ 'na',
443
+ 'not employed',
444
+ 'not applicable',
445
+ 'information requested',
446
+ 'information requested per best efforts',
447
+ 'info requested',
448
+ 'refused',
449
+ 'self',
450
+ 'self-employed',
451
+ 'self employed',
452
+ 'selfemployed',
453
+ 'independent',
454
+ 'private',
455
+ 'personal',
456
+ 'individual',
457
+ ]);
458
+ /**
459
+ * Categorize a contribution based on employer and occupation
460
+ */
461
+ export function categorizeContribution(employer, occupation) {
462
+ if (!employer && !occupation) {
463
+ return {
464
+ sector: IndustrySector.OTHER,
465
+ category: 'Unknown',
466
+ confidence: 'low',
467
+ matchSource: 'inferred',
468
+ };
469
+ }
470
+ const employerLower = employer?.toLowerCase().trim() || '';
471
+ const occupationLower = occupation?.toLowerCase().trim() || '';
472
+ // Check if employer is informative (not a placeholder like "NONE" or "SELF-EMPLOYED")
473
+ const hasInformativeEmployer = employerLower !== '' && !NON_INFORMATIVE_EMPLOYERS.has(employerLower);
474
+ // Try employer match first (higher confidence) — only if employer is informative
475
+ if (hasInformativeEmployer) {
476
+ for (const industry of INDUSTRY_CATEGORIES) {
477
+ for (const keyword of industry.keywords) {
478
+ if (employerLower.includes(keyword.toLowerCase())) {
479
+ return {
480
+ sector: industry.sector,
481
+ category: industry.category,
482
+ confidence: 'high',
483
+ matchedKeyword: keyword,
484
+ matchSource: 'employer',
485
+ };
486
+ }
487
+ }
488
+ }
489
+ }
490
+ // Try occupation match (medium confidence)
491
+ if (occupationLower) {
492
+ for (const industry of INDUSTRY_CATEGORIES) {
493
+ if (industry.occupationKeywords) {
494
+ for (const keyword of industry.occupationKeywords) {
495
+ if (occupationLower.includes(keyword.toLowerCase())) {
496
+ return {
497
+ sector: industry.sector,
498
+ category: industry.category,
499
+ confidence: 'medium',
500
+ matchedKeyword: keyword,
501
+ matchSource: 'occupation',
502
+ };
503
+ }
504
+ }
505
+ }
506
+ }
507
+ }
508
+ // Handle self-employed / independent with a useful occupation
509
+ if (NON_INFORMATIVE_EMPLOYERS.has(employerLower) && occupationLower) {
510
+ // Occupation didn't match specific industries above, so classify by occupation text
511
+ if (['homemaker', 'home maker', 'not employed', 'unemployed'].some(v => occupationLower.includes(v))) {
512
+ return {
513
+ sector: IndustrySector.OTHER,
514
+ category: 'Not Employed',
515
+ confidence: 'medium',
516
+ matchedKeyword: occupationLower,
517
+ matchSource: 'occupation',
518
+ };
519
+ }
520
+ if (occupationLower.includes('student')) {
521
+ return {
522
+ sector: IndustrySector.OTHER,
523
+ category: 'Not Employed',
524
+ confidence: 'medium',
525
+ matchedKeyword: 'student',
526
+ matchSource: 'occupation',
527
+ };
528
+ }
529
+ // Self-employed with an unclassified occupation
530
+ return {
531
+ sector: IndustrySector.MISC_BUSINESS,
532
+ category: 'Business Services',
533
+ confidence: 'low',
534
+ matchedKeyword: 'self-employed',
535
+ matchSource: 'inferred',
536
+ };
537
+ }
538
+ // No match found
539
+ return {
540
+ sector: IndustrySector.OTHER,
541
+ category: 'Other/Unknown',
542
+ confidence: 'low',
543
+ matchSource: 'inferred',
544
+ };
545
+ }
546
+ /**
547
+ * Aggregate contributions by industry sector.
548
+ * Uses smart categorization when contributor_name is available,
549
+ * falling back to PAC name matching for unclassified contributions.
550
+ */
551
+ export function aggregateByIndustrySector(contributions) {
552
+ const sectorMap = new Map();
553
+ let totalContributions = 0;
554
+ for (const contrib of contributions) {
555
+ const categorization = categorizeContributionSmart(contrib.contributor_employer, contrib.contributor_occupation, contrib.contributor_name);
556
+ const amount = contrib.contribution_receipt_amount;
557
+ totalContributions += amount;
558
+ const existing = sectorMap.get(categorization.sector) || {
559
+ totalAmount: 0,
560
+ contributionCount: 0,
561
+ categories: new Map(),
562
+ };
563
+ existing.totalAmount += amount;
564
+ existing.contributionCount++;
565
+ const categoryData = existing.categories.get(categorization.category) || {
566
+ amount: 0,
567
+ count: 0,
568
+ };
569
+ categoryData.amount += amount;
570
+ categoryData.count++;
571
+ existing.categories.set(categorization.category, categoryData);
572
+ sectorMap.set(categorization.sector, existing);
573
+ }
574
+ const result = Array.from(sectorMap.entries()).map(([sector, data]) => ({
575
+ sector,
576
+ totalAmount: data.totalAmount,
577
+ contributionCount: data.contributionCount,
578
+ percentage: totalContributions > 0 ? (data.totalAmount / totalContributions) * 100 : 0,
579
+ categories: data.categories,
580
+ }));
581
+ getLogger().debug(`[Industry Taxonomy] Categorized ${contributions.length} contributions into ${result.length} sectors`);
582
+ return result.sort((a, b) => b.totalAmount - a.totalAmount);
583
+ }
584
+ /**
585
+ * PAC/Committee Name Keywords for Industry Classification
586
+ * Maps committee names to industry sectors (OpenSecrets-style)
587
+ */
588
+ const PAC_NAME_KEYWORDS = [
589
+ // HEALTH
590
+ {
591
+ sector: IndustrySector.HEALTH,
592
+ category: 'Health Professionals',
593
+ keywords: [
594
+ 'medical',
595
+ 'physician',
596
+ 'doctor',
597
+ 'nurse',
598
+ 'dental',
599
+ 'hospital',
600
+ 'health',
601
+ 'healthcare',
602
+ 'ama ',
603
+ 'american medical',
604
+ ],
605
+ },
606
+ {
607
+ sector: IndustrySector.HEALTH,
608
+ category: 'Pharmaceuticals',
609
+ keywords: ['pharma', 'pfizer', 'merck', 'johnson', 'lilly', 'abbvie', 'bristol', 'novartis'],
610
+ },
611
+ {
612
+ sector: IndustrySector.HEALTH,
613
+ category: 'Health Insurance',
614
+ keywords: ['blue cross', 'aetna', 'cigna', 'humana', 'anthem', 'kaiser', 'united health'],
615
+ },
616
+ // FINANCE
617
+ {
618
+ sector: IndustrySector.FINANCE_INSURANCE_REAL_ESTATE,
619
+ category: 'Commercial Banks',
620
+ keywords: [
621
+ 'bank',
622
+ 'banker',
623
+ 'chase',
624
+ 'wells fargo',
625
+ 'citibank',
626
+ 'jpmorgan',
627
+ 'credit union',
628
+ 'financial',
629
+ ],
630
+ },
631
+ {
632
+ sector: IndustrySector.FINANCE_INSURANCE_REAL_ESTATE,
633
+ category: 'Securities & Investment',
634
+ keywords: [
635
+ 'goldman',
636
+ 'morgan stanley',
637
+ 'investment',
638
+ 'securities',
639
+ 'hedge',
640
+ 'capital',
641
+ 'blackrock',
642
+ 'fidelity',
643
+ ],
644
+ },
645
+ {
646
+ sector: IndustrySector.FINANCE_INSURANCE_REAL_ESTATE,
647
+ category: 'Insurance',
648
+ keywords: ['insurance', 'allstate', 'state farm', 'progressive', 'geico', 'mutual', 'life ins'],
649
+ },
650
+ {
651
+ sector: IndustrySector.FINANCE_INSURANCE_REAL_ESTATE,
652
+ category: 'Real Estate',
653
+ keywords: ['realtor', 'real estate', 'realty', 'homebuilder', 'mortgage', 'property'],
654
+ },
655
+ // LABOR
656
+ {
657
+ sector: IndustrySector.LABOR,
658
+ category: 'Labor Unions',
659
+ keywords: [
660
+ 'union',
661
+ 'teamster',
662
+ 'seiu',
663
+ 'afscme',
664
+ 'afl-cio',
665
+ 'uaw',
666
+ 'steelworker',
667
+ 'laborer',
668
+ 'ibew',
669
+ 'ufcw',
670
+ 'carpenters',
671
+ 'plumbers',
672
+ 'pipefitters',
673
+ 'electrical workers',
674
+ 'teachers',
675
+ 'firefighter',
676
+ 'police',
677
+ 'working families',
678
+ ],
679
+ },
680
+ // LAWYERS
681
+ {
682
+ sector: IndustrySector.LAWYERS_LOBBYISTS,
683
+ category: 'Lawyers/Law Firms',
684
+ keywords: [
685
+ 'law',
686
+ 'lawyer',
687
+ 'attorney',
688
+ 'legal',
689
+ 'trial',
690
+ 'justice',
691
+ 'tort',
692
+ 'litigation',
693
+ 'bar association',
694
+ ],
695
+ },
696
+ // ENERGY
697
+ {
698
+ sector: IndustrySector.ENERGY_NATURAL_RESOURCES,
699
+ category: 'Oil & Gas',
700
+ keywords: [
701
+ 'oil',
702
+ 'gas',
703
+ 'petroleum',
704
+ 'exxon',
705
+ 'chevron',
706
+ 'shell',
707
+ 'bp ',
708
+ 'conocophillips',
709
+ 'energy',
710
+ 'drilling',
711
+ 'pipeline',
712
+ ],
713
+ },
714
+ {
715
+ sector: IndustrySector.ENERGY_NATURAL_RESOURCES,
716
+ category: 'Electric Utilities',
717
+ keywords: ['electric', 'utility', 'power', 'grid', 'edison', 'duke energy', 'exelon'],
718
+ },
719
+ // DEFENSE
720
+ {
721
+ sector: IndustrySector.DEFENSE,
722
+ category: 'Defense Aerospace',
723
+ keywords: [
724
+ 'defense',
725
+ 'lockheed',
726
+ 'raytheon',
727
+ 'boeing',
728
+ 'northrop',
729
+ 'general dynamics',
730
+ 'bae ',
731
+ 'l3harris',
732
+ 'aerospace',
733
+ 'military',
734
+ ],
735
+ },
736
+ // COMMUNICATIONS/TECH
737
+ {
738
+ sector: IndustrySector.COMMUNICATIONS_ELECTRONICS,
739
+ category: 'Internet/Tech',
740
+ keywords: [
741
+ 'google',
742
+ 'microsoft',
743
+ 'apple',
744
+ 'amazon',
745
+ 'meta',
746
+ 'facebook',
747
+ 'tech',
748
+ 'software',
749
+ 'computer',
750
+ 'internet',
751
+ 'oracle',
752
+ 'intel',
753
+ 'cisco',
754
+ 'ibm',
755
+ ],
756
+ },
757
+ {
758
+ sector: IndustrySector.COMMUNICATIONS_ELECTRONICS,
759
+ category: 'Telecommunications',
760
+ keywords: ['telecom', 'verizon', 'at&t', 'comcast', 'charter', 't-mobile', 'wireless'],
761
+ },
762
+ {
763
+ sector: IndustrySector.COMMUNICATIONS_ELECTRONICS,
764
+ category: 'TV/Movies/Music',
765
+ keywords: [
766
+ 'entertainment',
767
+ 'movie',
768
+ 'film',
769
+ 'television',
770
+ 'broadcast',
771
+ 'media',
772
+ 'disney',
773
+ 'warner',
774
+ 'fox',
775
+ 'screen actors',
776
+ ],
777
+ },
778
+ // TRANSPORTATION
779
+ {
780
+ sector: IndustrySector.TRANSPORTATION,
781
+ category: 'Air Transport',
782
+ keywords: ['airline', 'aviation', 'pilot', 'air transport', 'delta', 'united', 'american air'],
783
+ },
784
+ {
785
+ sector: IndustrySector.TRANSPORTATION,
786
+ category: 'Automotive',
787
+ keywords: [
788
+ 'auto',
789
+ 'automobile',
790
+ 'car dealer',
791
+ 'ford',
792
+ 'gm ',
793
+ 'general motors',
794
+ 'toyota',
795
+ 'honda',
796
+ ],
797
+ },
798
+ {
799
+ sector: IndustrySector.TRANSPORTATION,
800
+ category: 'Trucking',
801
+ keywords: ['trucking', 'freight', 'logistics', 'ups', 'fedex', 'shipping'],
802
+ },
803
+ {
804
+ sector: IndustrySector.TRANSPORTATION,
805
+ category: 'Railroads',
806
+ keywords: ['railroad', 'rail', 'amtrak', 'bnsf', 'union pacific', 'csx'],
807
+ },
808
+ // AGRIBUSINESS
809
+ {
810
+ sector: IndustrySector.AGRIBUSINESS,
811
+ category: 'Crop Production',
812
+ keywords: ['farm', 'farmer', 'agriculture', 'crop', 'grain', 'corn', 'soybean', 'cotton'],
813
+ },
814
+ {
815
+ sector: IndustrySector.AGRIBUSINESS,
816
+ category: 'Food Processing',
817
+ keywords: [
818
+ 'food',
819
+ 'restaurant',
820
+ 'grocery',
821
+ 'beverage',
822
+ 'coca-cola',
823
+ 'pepsi',
824
+ 'tyson',
825
+ 'cargill',
826
+ ],
827
+ },
828
+ // CONSTRUCTION
829
+ {
830
+ sector: IndustrySector.CONSTRUCTION,
831
+ category: 'General Contractors',
832
+ keywords: ['construction', 'contractor', 'builder', 'building trade', 'cement', 'steel'],
833
+ },
834
+ {
835
+ sector: IndustrySector.CONSTRUCTION,
836
+ category: 'Home Builders',
837
+ keywords: ['home builder', 'homebuilder', 'residential', 'housing'],
838
+ },
839
+ // MISC BUSINESS
840
+ {
841
+ sector: IndustrySector.MISC_BUSINESS,
842
+ category: 'Retail',
843
+ keywords: ['retail', 'walmart', 'target', 'store', 'merchant', 'shop'],
844
+ },
845
+ {
846
+ sector: IndustrySector.MISC_BUSINESS,
847
+ category: 'Manufacturing',
848
+ keywords: ['manufacturing', 'manufacturer', 'industrial', 'factory'],
849
+ },
850
+ {
851
+ sector: IndustrySector.MISC_BUSINESS,
852
+ category: 'Business Services',
853
+ keywords: ['business', 'chamber of commerce', 'nfib', 'small business'],
854
+ },
855
+ // IDEOLOGY
856
+ {
857
+ sector: IndustrySector.IDEOLOGY_SINGLE_ISSUE,
858
+ category: 'Pro-Israel',
859
+ keywords: ['israel', 'aipac', 'jewish', 'zionist'],
860
+ },
861
+ {
862
+ sector: IndustrySector.IDEOLOGY_SINGLE_ISSUE,
863
+ category: 'Gun Rights',
864
+ keywords: ['rifle', 'nra', 'gun', 'firearm', 'second amendment', '2nd amendment'],
865
+ },
866
+ {
867
+ sector: IndustrySector.IDEOLOGY_SINGLE_ISSUE,
868
+ category: 'Pro-Choice',
869
+ keywords: ['planned parenthood', 'naral', 'emily', 'pro-choice', 'reproductive'],
870
+ },
871
+ {
872
+ sector: IndustrySector.IDEOLOGY_SINGLE_ISSUE,
873
+ category: 'Environment',
874
+ keywords: ['environment', 'sierra', 'conservation', 'climate', 'green', 'lcv'],
875
+ },
876
+ {
877
+ sector: IndustrySector.IDEOLOGY_SINGLE_ISSUE,
878
+ category: 'Human Rights',
879
+ keywords: ['human rights', 'civil rights', 'aclu', 'naacp', 'equality'],
880
+ },
881
+ {
882
+ sector: IndustrySector.IDEOLOGY_SINGLE_ISSUE,
883
+ category: 'Education',
884
+ keywords: ['education', 'teacher', 'school', 'university', 'college', 'nea ', 'aft '],
885
+ },
886
+ ];
887
+ /**
888
+ * Categorize a PAC/committee by its name
889
+ * Used for committee-to-committee transfers and PAC contributions
890
+ */
891
+ export function categorizePACByName(committeeName) {
892
+ if (!committeeName) {
893
+ return {
894
+ sector: IndustrySector.OTHER,
895
+ category: 'Unknown PAC',
896
+ confidence: 'low',
897
+ matchSource: 'inferred',
898
+ };
899
+ }
900
+ const nameLower = committeeName.toLowerCase();
901
+ // Check for political party committees first
902
+ if (nameLower.includes('democratic') ||
903
+ nameLower.includes('dccc') ||
904
+ nameLower.includes('dscc') ||
905
+ nameLower.includes('dnc')) {
906
+ return {
907
+ sector: IndustrySector.OTHER,
908
+ category: 'Democratic Party',
909
+ confidence: 'high',
910
+ matchedKeyword: 'democratic',
911
+ matchSource: 'employer',
912
+ };
913
+ }
914
+ if (nameLower.includes('republican') ||
915
+ nameLower.includes('nrcc') ||
916
+ nameLower.includes('nrsc') ||
917
+ nameLower.includes('rnc')) {
918
+ return {
919
+ sector: IndustrySector.OTHER,
920
+ category: 'Republican Party',
921
+ confidence: 'high',
922
+ matchedKeyword: 'republican',
923
+ matchSource: 'employer',
924
+ };
925
+ }
926
+ // Check for joint fundraising/victory funds (pass-through)
927
+ if (nameLower.includes('victory') ||
928
+ nameLower.includes('joint') ||
929
+ nameLower.includes('senate 20') ||
930
+ nameLower.includes('house 20') ||
931
+ nameLower.includes('blue senate') ||
932
+ nameLower.includes('red senate') ||
933
+ nameLower.includes('making history') ||
934
+ nameLower.includes('north stars')) {
935
+ return {
936
+ sector: IndustrySector.OTHER,
937
+ category: 'Joint Fundraising',
938
+ confidence: 'medium',
939
+ matchedKeyword: 'joint-fund',
940
+ matchSource: 'employer',
941
+ };
942
+ }
943
+ // Check industry-specific PAC names
944
+ for (const pacCategory of PAC_NAME_KEYWORDS) {
945
+ for (const keyword of pacCategory.keywords) {
946
+ if (nameLower.includes(keyword.toLowerCase())) {
947
+ return {
948
+ sector: pacCategory.sector,
949
+ category: pacCategory.category,
950
+ confidence: 'high',
951
+ matchedKeyword: keyword,
952
+ matchSource: 'employer',
953
+ };
954
+ }
955
+ }
956
+ }
957
+ // Generic PAC fallback
958
+ if (nameLower.includes('pac') || nameLower.includes('committee')) {
959
+ return {
960
+ sector: IndustrySector.OTHER,
961
+ category: 'Unclassified PAC',
962
+ confidence: 'low',
963
+ matchSource: 'inferred',
964
+ };
965
+ }
966
+ return {
967
+ sector: IndustrySector.OTHER,
968
+ category: 'Unknown',
969
+ confidence: 'low',
970
+ matchSource: 'inferred',
971
+ };
972
+ }
973
+ /**
974
+ * Smart categorization: tries employer/occupation first, then falls back to contributor name (for PACs)
975
+ */
976
+ export function categorizeContributionSmart(employer, occupation, contributorName) {
977
+ // First try standard employer/occupation categorization
978
+ const standardResult = categorizeContribution(employer, occupation);
979
+ // If we got a good match, return it
980
+ if (standardResult.confidence !== 'low' ||
981
+ (standardResult.category !== 'Other/Unknown' && standardResult.category !== 'Unknown')) {
982
+ return standardResult;
983
+ }
984
+ // If no employer/occupation, try to categorize by contributor name (for PACs)
985
+ if (contributorName) {
986
+ const pacResult = categorizePACByName(contributorName);
987
+ if (pacResult.confidence !== 'low' || pacResult.category !== 'Unknown') {
988
+ return pacResult;
989
+ }
990
+ }
991
+ return standardResult;
992
+ }
993
+ /**
994
+ * Get top categories across all sectors
995
+ */
996
+ export function getTopCategories(contributions, limit = 10) {
997
+ const categoryMap = new Map();
998
+ let totalContributions = 0;
999
+ for (const contrib of contributions) {
1000
+ // Use smart categorization that tries employer/occupation first, then contributor name
1001
+ const categorization = categorizeContributionSmart(contrib.contributor_employer, contrib.contributor_occupation, contrib.contributor_name);
1002
+ const amount = contrib.contribution_receipt_amount;
1003
+ totalContributions += amount;
1004
+ const key = `${categorization.sector}:${categorization.category}`;
1005
+ const existing = categoryMap.get(key) || {
1006
+ sector: categorization.sector,
1007
+ totalAmount: 0,
1008
+ contributionCount: 0,
1009
+ };
1010
+ existing.totalAmount += amount;
1011
+ existing.contributionCount++;
1012
+ categoryMap.set(key, existing);
1013
+ }
1014
+ const result = Array.from(categoryMap.entries())
1015
+ .map(([key, data]) => ({
1016
+ sector: data.sector,
1017
+ category: key.split(':')[1],
1018
+ totalAmount: data.totalAmount,
1019
+ contributionCount: data.contributionCount,
1020
+ percentage: totalContributions > 0 ? (data.totalAmount / totalContributions) * 100 : 0,
1021
+ }))
1022
+ .sort((a, b) => b.totalAmount - a.totalAmount)
1023
+ .slice(0, limit);
1024
+ return result;
1025
+ }
1026
+ //# sourceMappingURL=industry-taxonomy.js.map