corp-extractor 0.9.0__py3-none-any.whl → 0.9.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. {corp_extractor-0.9.0.dist-info → corp_extractor-0.9.4.dist-info}/METADATA +72 -11
  2. {corp_extractor-0.9.0.dist-info → corp_extractor-0.9.4.dist-info}/RECORD +34 -27
  3. statement_extractor/cli.py +1317 -101
  4. statement_extractor/database/embeddings.py +45 -0
  5. statement_extractor/database/hub.py +86 -136
  6. statement_extractor/database/importers/__init__.py +10 -2
  7. statement_extractor/database/importers/companies_house.py +16 -2
  8. statement_extractor/database/importers/companies_house_officers.py +431 -0
  9. statement_extractor/database/importers/gleif.py +23 -0
  10. statement_extractor/database/importers/import_utils.py +264 -0
  11. statement_extractor/database/importers/sec_edgar.py +17 -0
  12. statement_extractor/database/importers/sec_form4.py +512 -0
  13. statement_extractor/database/importers/wikidata.py +151 -43
  14. statement_extractor/database/importers/wikidata_dump.py +2282 -0
  15. statement_extractor/database/importers/wikidata_people.py +867 -325
  16. statement_extractor/database/migrate_v2.py +852 -0
  17. statement_extractor/database/models.py +155 -7
  18. statement_extractor/database/schema_v2.py +409 -0
  19. statement_extractor/database/seed_data.py +359 -0
  20. statement_extractor/database/store.py +3449 -233
  21. statement_extractor/document/deduplicator.py +10 -12
  22. statement_extractor/extractor.py +1 -1
  23. statement_extractor/models/__init__.py +3 -2
  24. statement_extractor/models/statement.py +15 -17
  25. statement_extractor/models.py +1 -1
  26. statement_extractor/pipeline/context.py +5 -5
  27. statement_extractor/pipeline/orchestrator.py +12 -12
  28. statement_extractor/plugins/base.py +17 -17
  29. statement_extractor/plugins/extractors/gliner2.py +28 -28
  30. statement_extractor/plugins/qualifiers/embedding_company.py +7 -5
  31. statement_extractor/plugins/qualifiers/person.py +120 -53
  32. statement_extractor/plugins/splitters/t5_gemma.py +35 -39
  33. {corp_extractor-0.9.0.dist-info → corp_extractor-0.9.4.dist-info}/WHEEL +0 -0
  34. {corp_extractor-0.9.0.dist-info → corp_extractor-0.9.4.dist-info}/entry_points.txt +0 -0
@@ -48,10 +48,12 @@ WIKIDATA_SPARQL_URL = "https://query.wikidata.org/sparql"
48
48
  # Simpler SPARQL query - directly query for companies with LEI codes (fastest, most reliable)
49
49
  # Avoids property path wildcards (wdt:P279*) which timeout on Wikidata
50
50
  LEI_COMPANY_QUERY = """
51
- SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel WHERE {
51
+ SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel ?inception ?dissolution WHERE {
52
52
  ?company wdt:P1278 ?lei.
53
53
  OPTIONAL { ?company wdt:P249 ?ticker. }
54
54
  OPTIONAL { ?company wdt:P17 ?country. }
55
+ OPTIONAL { ?company wdt:P571 ?inception. }
56
+ OPTIONAL { ?company wdt:P576 ?dissolution. }
55
57
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
56
58
  }
57
59
  LIMIT %d
@@ -60,10 +62,12 @@ OFFSET %d
60
62
 
61
63
  # Query for companies with stock exchange listing (has ticker)
62
64
  TICKER_COMPANY_QUERY = """
63
- SELECT ?company ?companyLabel ?ticker ?exchange ?exchangeLabel ?country ?countryLabel WHERE {
65
+ SELECT ?company ?companyLabel ?ticker ?exchange ?exchangeLabel ?country ?countryLabel ?inception ?dissolution WHERE {
64
66
  ?company wdt:P414 ?exchange.
65
67
  OPTIONAL { ?company wdt:P249 ?ticker. }
66
68
  OPTIONAL { ?company wdt:P17 ?country. }
69
+ OPTIONAL { ?company wdt:P571 ?inception. }
70
+ OPTIONAL { ?company wdt:P576 ?dissolution. }
67
71
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
68
72
  }
69
73
  LIMIT %d
@@ -72,11 +76,13 @@ OFFSET %d
72
76
 
73
77
  # Query for direct instances of public company (Q891723) - no subclass traversal
74
78
  PUBLIC_COMPANY_QUERY = """
75
- SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel WHERE {
79
+ SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel ?inception ?dissolution WHERE {
76
80
  ?company wdt:P31 wd:Q891723.
77
81
  OPTIONAL { ?company wdt:P1278 ?lei. }
78
82
  OPTIONAL { ?company wdt:P249 ?ticker. }
79
83
  OPTIONAL { ?company wdt:P17 ?country. }
84
+ OPTIONAL { ?company wdt:P571 ?inception. }
85
+ OPTIONAL { ?company wdt:P576 ?dissolution. }
80
86
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
81
87
  }
82
88
  LIMIT %d
@@ -85,11 +91,13 @@ OFFSET %d
85
91
 
86
92
  # Query for direct instances of business enterprise (Q4830453) - no subclass traversal
87
93
  BUSINESS_QUERY = """
88
- SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel WHERE {
94
+ SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel ?inception ?dissolution WHERE {
89
95
  ?company wdt:P31 wd:Q4830453.
90
96
  OPTIONAL { ?company wdt:P1278 ?lei. }
91
97
  OPTIONAL { ?company wdt:P249 ?ticker. }
92
98
  OPTIONAL { ?company wdt:P17 ?country. }
99
+ OPTIONAL { ?company wdt:P571 ?inception. }
100
+ OPTIONAL { ?company wdt:P576 ?dissolution. }
93
101
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
94
102
  }
95
103
  LIMIT %d
@@ -98,11 +106,13 @@ OFFSET %d
98
106
 
99
107
  # Query for direct instances of organization (Q43229) - includes NGOs, gov agencies, etc.
100
108
  ORGANIZATION_QUERY = """
101
- SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel WHERE {
109
+ SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel ?inception ?dissolution WHERE {
102
110
  ?company wdt:P31 wd:Q43229.
103
111
  OPTIONAL { ?company wdt:P1278 ?lei. }
104
112
  OPTIONAL { ?company wdt:P249 ?ticker. }
105
113
  OPTIONAL { ?company wdt:P17 ?country. }
114
+ OPTIONAL { ?company wdt:P571 ?inception. }
115
+ OPTIONAL { ?company wdt:P576 ?dissolution. }
106
116
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
107
117
  }
108
118
  LIMIT %d
@@ -111,11 +121,13 @@ OFFSET %d
111
121
 
112
122
  # Query for non-profit organizations (Q163740)
113
123
  NONPROFIT_QUERY = """
114
- SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel WHERE {
124
+ SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel ?inception ?dissolution WHERE {
115
125
  ?company wdt:P31 wd:Q163740.
116
126
  OPTIONAL { ?company wdt:P1278 ?lei. }
117
127
  OPTIONAL { ?company wdt:P249 ?ticker. }
118
128
  OPTIONAL { ?company wdt:P17 ?country. }
129
+ OPTIONAL { ?company wdt:P571 ?inception. }
130
+ OPTIONAL { ?company wdt:P576 ?dissolution. }
119
131
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
120
132
  }
121
133
  LIMIT %d
@@ -124,11 +136,13 @@ OFFSET %d
124
136
 
125
137
  # Query for government agencies (Q327333)
126
138
  GOV_AGENCY_QUERY = """
127
- SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel WHERE {
139
+ SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel ?inception ?dissolution WHERE {
128
140
  ?company wdt:P31 wd:Q327333.
129
141
  OPTIONAL { ?company wdt:P1278 ?lei. }
130
142
  OPTIONAL { ?company wdt:P249 ?ticker. }
131
143
  OPTIONAL { ?company wdt:P17 ?country. }
144
+ OPTIONAL { ?company wdt:P571 ?inception. }
145
+ OPTIONAL { ?company wdt:P576 ?dissolution. }
132
146
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
133
147
  }
134
148
  LIMIT %d
@@ -137,11 +151,13 @@ OFFSET %d
137
151
 
138
152
  # Query for enterprises (Q6881511) - broader than business enterprise
139
153
  ENTERPRISE_QUERY = """
140
- SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel WHERE {
154
+ SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel ?inception ?dissolution WHERE {
141
155
  ?company wdt:P31 wd:Q6881511.
142
156
  OPTIONAL { ?company wdt:P1278 ?lei. }
143
157
  OPTIONAL { ?company wdt:P249 ?ticker. }
144
158
  OPTIONAL { ?company wdt:P17 ?country. }
159
+ OPTIONAL { ?company wdt:P571 ?inception. }
160
+ OPTIONAL { ?company wdt:P576 ?dissolution. }
145
161
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
146
162
  }
147
163
  LIMIT %d
@@ -150,11 +166,13 @@ OFFSET %d
150
166
 
151
167
  # Query for corporations (Q167037)
152
168
  CORPORATION_QUERY = """
153
- SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel WHERE {
169
+ SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel ?inception ?dissolution WHERE {
154
170
  ?company wdt:P31 wd:Q167037.
155
171
  OPTIONAL { ?company wdt:P1278 ?lei. }
156
172
  OPTIONAL { ?company wdt:P249 ?ticker. }
157
173
  OPTIONAL { ?company wdt:P17 ?country. }
174
+ OPTIONAL { ?company wdt:P571 ?inception. }
175
+ OPTIONAL { ?company wdt:P576 ?dissolution. }
158
176
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
159
177
  }
160
178
  LIMIT %d
@@ -163,11 +181,13 @@ OFFSET %d
163
181
 
164
182
  # Query for subsidiaries (Q658255)
165
183
  SUBSIDIARY_QUERY = """
166
- SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel WHERE {
184
+ SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel ?inception ?dissolution WHERE {
167
185
  ?company wdt:P31 wd:Q658255.
168
186
  OPTIONAL { ?company wdt:P1278 ?lei. }
169
187
  OPTIONAL { ?company wdt:P249 ?ticker. }
170
188
  OPTIONAL { ?company wdt:P17 ?country. }
189
+ OPTIONAL { ?company wdt:P571 ?inception. }
190
+ OPTIONAL { ?company wdt:P576 ?dissolution. }
171
191
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
172
192
  }
173
193
  LIMIT %d
@@ -176,11 +196,13 @@ OFFSET %d
176
196
 
177
197
  # Query for banks (Q22687)
178
198
  BANK_QUERY = """
179
- SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel WHERE {
199
+ SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel ?inception ?dissolution WHERE {
180
200
  ?company wdt:P31 wd:Q22687.
181
201
  OPTIONAL { ?company wdt:P1278 ?lei. }
182
202
  OPTIONAL { ?company wdt:P249 ?ticker. }
183
203
  OPTIONAL { ?company wdt:P17 ?country. }
204
+ OPTIONAL { ?company wdt:P571 ?inception. }
205
+ OPTIONAL { ?company wdt:P576 ?dissolution. }
184
206
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
185
207
  }
186
208
  LIMIT %d
@@ -189,11 +211,13 @@ OFFSET %d
189
211
 
190
212
  # Query for insurance companies (Q6881511)
191
213
  INSURANCE_QUERY = """
192
- SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel WHERE {
214
+ SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel ?inception ?dissolution WHERE {
193
215
  ?company wdt:P31 wd:Q1145276.
194
216
  OPTIONAL { ?company wdt:P1278 ?lei. }
195
217
  OPTIONAL { ?company wdt:P249 ?ticker. }
196
218
  OPTIONAL { ?company wdt:P17 ?country. }
219
+ OPTIONAL { ?company wdt:P571 ?inception. }
220
+ OPTIONAL { ?company wdt:P576 ?dissolution. }
197
221
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
198
222
  }
199
223
  LIMIT %d
@@ -202,11 +226,13 @@ OFFSET %d
202
226
 
203
227
  # Query for airlines (Q46970)
204
228
  AIRLINE_QUERY = """
205
- SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel WHERE {
229
+ SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel ?inception ?dissolution WHERE {
206
230
  ?company wdt:P31 wd:Q46970.
207
231
  OPTIONAL { ?company wdt:P1278 ?lei. }
208
232
  OPTIONAL { ?company wdt:P249 ?ticker. }
209
233
  OPTIONAL { ?company wdt:P17 ?country. }
234
+ OPTIONAL { ?company wdt:P571 ?inception. }
235
+ OPTIONAL { ?company wdt:P576 ?dissolution. }
210
236
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
211
237
  }
212
238
  LIMIT %d
@@ -215,11 +241,13 @@ OFFSET %d
215
241
 
216
242
  # Query for law firms (Q613142)
217
243
  LAW_FIRM_QUERY = """
218
- SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel WHERE {
244
+ SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel ?inception ?dissolution WHERE {
219
245
  ?company wdt:P31 wd:Q613142.
220
246
  OPTIONAL { ?company wdt:P1278 ?lei. }
221
247
  OPTIONAL { ?company wdt:P249 ?ticker. }
222
248
  OPTIONAL { ?company wdt:P17 ?country. }
249
+ OPTIONAL { ?company wdt:P571 ?inception. }
250
+ OPTIONAL { ?company wdt:P576 ?dissolution. }
223
251
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
224
252
  }
225
253
  LIMIT %d
@@ -228,11 +256,13 @@ OFFSET %d
228
256
 
229
257
  # Query for educational institutions (Q2385804)
230
258
  EDUCATIONAL_QUERY = """
231
- SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel WHERE {
259
+ SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel ?inception ?dissolution WHERE {
232
260
  ?company wdt:P31 wd:Q2385804.
233
261
  OPTIONAL { ?company wdt:P1278 ?lei. }
234
262
  OPTIONAL { ?company wdt:P249 ?ticker. }
235
263
  OPTIONAL { ?company wdt:P17 ?country. }
264
+ OPTIONAL { ?company wdt:P571 ?inception. }
265
+ OPTIONAL { ?company wdt:P576 ?dissolution. }
236
266
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
237
267
  }
238
268
  LIMIT %d
@@ -241,11 +271,13 @@ OFFSET %d
241
271
 
242
272
  # Query for universities (Q3918)
243
273
  UNIVERSITY_QUERY = """
244
- SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel WHERE {
274
+ SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel ?inception ?dissolution WHERE {
245
275
  ?company wdt:P31 wd:Q3918.
246
276
  OPTIONAL { ?company wdt:P1278 ?lei. }
247
277
  OPTIONAL { ?company wdt:P249 ?ticker. }
248
278
  OPTIONAL { ?company wdt:P17 ?country. }
279
+ OPTIONAL { ?company wdt:P571 ?inception. }
280
+ OPTIONAL { ?company wdt:P576 ?dissolution. }
249
281
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
250
282
  }
251
283
  LIMIT %d
@@ -254,11 +286,13 @@ OFFSET %d
254
286
 
255
287
  # Query for research institutes (Q31855)
256
288
  RESEARCH_INSTITUTE_QUERY = """
257
- SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel WHERE {
289
+ SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel ?inception ?dissolution WHERE {
258
290
  ?company wdt:P31 wd:Q31855.
259
291
  OPTIONAL { ?company wdt:P1278 ?lei. }
260
292
  OPTIONAL { ?company wdt:P249 ?ticker. }
261
293
  OPTIONAL { ?company wdt:P17 ?country. }
294
+ OPTIONAL { ?company wdt:P571 ?inception. }
295
+ OPTIONAL { ?company wdt:P576 ?dissolution. }
262
296
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
263
297
  }
264
298
  LIMIT %d
@@ -267,11 +301,13 @@ OFFSET %d
267
301
 
268
302
  # Query for political parties (Q7278)
269
303
  POLITICAL_PARTY_QUERY = """
270
- SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel WHERE {
304
+ SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel ?inception ?dissolution WHERE {
271
305
  ?company wdt:P31 wd:Q7278.
272
306
  OPTIONAL { ?company wdt:P1278 ?lei. }
273
307
  OPTIONAL { ?company wdt:P249 ?ticker. }
274
308
  OPTIONAL { ?company wdt:P17 ?country. }
309
+ OPTIONAL { ?company wdt:P571 ?inception. }
310
+ OPTIONAL { ?company wdt:P576 ?dissolution. }
275
311
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
276
312
  }
277
313
  LIMIT %d
@@ -280,11 +316,13 @@ OFFSET %d
280
316
 
281
317
  # Query for trade unions (Q178790)
282
318
  TRADE_UNION_QUERY = """
283
- SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel WHERE {
319
+ SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel ?inception ?dissolution WHERE {
284
320
  ?company wdt:P31 wd:Q178790.
285
321
  OPTIONAL { ?company wdt:P1278 ?lei. }
286
322
  OPTIONAL { ?company wdt:P249 ?ticker. }
287
323
  OPTIONAL { ?company wdt:P17 ?country. }
324
+ OPTIONAL { ?company wdt:P571 ?inception. }
325
+ OPTIONAL { ?company wdt:P576 ?dissolution. }
288
326
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
289
327
  }
290
328
  LIMIT %d
@@ -293,11 +331,13 @@ OFFSET %d
293
331
 
294
332
  # Query for NGOs (Q79913)
295
333
  NGO_QUERY = """
296
- SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel WHERE {
334
+ SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel ?inception ?dissolution WHERE {
297
335
  ?company wdt:P31 wd:Q79913.
298
336
  OPTIONAL { ?company wdt:P1278 ?lei. }
299
337
  OPTIONAL { ?company wdt:P249 ?ticker. }
300
338
  OPTIONAL { ?company wdt:P17 ?country. }
339
+ OPTIONAL { ?company wdt:P571 ?inception. }
340
+ OPTIONAL { ?company wdt:P576 ?dissolution. }
301
341
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
302
342
  }
303
343
  LIMIT %d
@@ -306,11 +346,13 @@ OFFSET %d
306
346
 
307
347
  # Query for foundations (Q157031)
308
348
  FOUNDATION_QUERY = """
309
- SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel WHERE {
349
+ SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel ?inception ?dissolution WHERE {
310
350
  ?company wdt:P31 wd:Q157031.
311
351
  OPTIONAL { ?company wdt:P1278 ?lei. }
312
352
  OPTIONAL { ?company wdt:P249 ?ticker. }
313
353
  OPTIONAL { ?company wdt:P17 ?country. }
354
+ OPTIONAL { ?company wdt:P571 ?inception. }
355
+ OPTIONAL { ?company wdt:P576 ?dissolution. }
314
356
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
315
357
  }
316
358
  LIMIT %d
@@ -319,11 +361,13 @@ OFFSET %d
319
361
 
320
362
  # Query for international organizations (Q484652)
321
363
  INTL_ORG_QUERY = """
322
- SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel WHERE {
364
+ SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel ?inception ?dissolution WHERE {
323
365
  ?company wdt:P31 wd:Q484652.
324
366
  OPTIONAL { ?company wdt:P1278 ?lei. }
325
367
  OPTIONAL { ?company wdt:P249 ?ticker. }
326
368
  OPTIONAL { ?company wdt:P17 ?country. }
369
+ OPTIONAL { ?company wdt:P571 ?inception. }
370
+ OPTIONAL { ?company wdt:P576 ?dissolution. }
327
371
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
328
372
  }
329
373
  LIMIT %d
@@ -332,11 +376,13 @@ OFFSET %d
332
376
 
333
377
  # Query for sports teams/clubs (Q476028)
334
378
  SPORTS_CLUB_QUERY = """
335
- SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel WHERE {
379
+ SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel ?inception ?dissolution WHERE {
336
380
  ?company wdt:P31 wd:Q476028.
337
381
  OPTIONAL { ?company wdt:P1278 ?lei. }
338
382
  OPTIONAL { ?company wdt:P249 ?ticker. }
339
383
  OPTIONAL { ?company wdt:P17 ?country. }
384
+ OPTIONAL { ?company wdt:P571 ?inception. }
385
+ OPTIONAL { ?company wdt:P576 ?dissolution. }
340
386
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
341
387
  }
342
388
  LIMIT %d
@@ -345,11 +391,13 @@ OFFSET %d
345
391
 
346
392
  # Query for hospitals (Q16917)
347
393
  HOSPITAL_QUERY = """
348
- SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel WHERE {
394
+ SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel ?inception ?dissolution WHERE {
349
395
  ?company wdt:P31 wd:Q16917.
350
396
  OPTIONAL { ?company wdt:P1278 ?lei. }
351
397
  OPTIONAL { ?company wdt:P249 ?ticker. }
352
398
  OPTIONAL { ?company wdt:P17 ?country. }
399
+ OPTIONAL { ?company wdt:P571 ?inception. }
400
+ OPTIONAL { ?company wdt:P576 ?dissolution. }
353
401
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
354
402
  }
355
403
  LIMIT %d
@@ -358,11 +406,13 @@ OFFSET %d
358
406
 
359
407
  # Query for record labels (Q18127)
360
408
  RECORD_LABEL_QUERY = """
361
- SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel WHERE {
409
+ SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel ?inception ?dissolution WHERE {
362
410
  ?company wdt:P31 wd:Q18127.
363
411
  OPTIONAL { ?company wdt:P1278 ?lei. }
364
412
  OPTIONAL { ?company wdt:P249 ?ticker. }
365
413
  OPTIONAL { ?company wdt:P17 ?country. }
414
+ OPTIONAL { ?company wdt:P571 ?inception. }
415
+ OPTIONAL { ?company wdt:P576 ?dissolution. }
366
416
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
367
417
  }
368
418
  LIMIT %d
@@ -371,11 +421,13 @@ OFFSET %d
371
421
 
372
422
  # Query for film studios (Q1366047)
373
423
  FILM_STUDIO_QUERY = """
374
- SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel WHERE {
424
+ SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel ?inception ?dissolution WHERE {
375
425
  ?company wdt:P31 wd:Q1366047.
376
426
  OPTIONAL { ?company wdt:P1278 ?lei. }
377
427
  OPTIONAL { ?company wdt:P249 ?ticker. }
378
428
  OPTIONAL { ?company wdt:P17 ?country. }
429
+ OPTIONAL { ?company wdt:P571 ?inception. }
430
+ OPTIONAL { ?company wdt:P576 ?dissolution. }
379
431
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
380
432
  }
381
433
  LIMIT %d
@@ -384,11 +436,13 @@ OFFSET %d
384
436
 
385
437
  # Query for video game companies (Q1137109)
386
438
  VIDEO_GAME_COMPANY_QUERY = """
387
- SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel WHERE {
439
+ SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel ?inception ?dissolution WHERE {
388
440
  ?company wdt:P31 wd:Q1137109.
389
441
  OPTIONAL { ?company wdt:P1278 ?lei. }
390
442
  OPTIONAL { ?company wdt:P249 ?ticker. }
391
443
  OPTIONAL { ?company wdt:P17 ?country. }
444
+ OPTIONAL { ?company wdt:P571 ?inception. }
445
+ OPTIONAL { ?company wdt:P576 ?dissolution. }
392
446
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
393
447
  }
394
448
  LIMIT %d
@@ -397,11 +451,13 @@ OFFSET %d
397
451
 
398
452
  # Query for pharmaceutical companies (Q507619)
399
453
  PHARMA_QUERY = """
400
- SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel WHERE {
454
+ SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel ?inception ?dissolution WHERE {
401
455
  ?company wdt:P31 wd:Q507619.
402
456
  OPTIONAL { ?company wdt:P1278 ?lei. }
403
457
  OPTIONAL { ?company wdt:P249 ?ticker. }
404
458
  OPTIONAL { ?company wdt:P17 ?country. }
459
+ OPTIONAL { ?company wdt:P571 ?inception. }
460
+ OPTIONAL { ?company wdt:P576 ?dissolution. }
405
461
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
406
462
  }
407
463
  LIMIT %d
@@ -410,11 +466,13 @@ OFFSET %d
410
466
 
411
467
  # Query for tech companies (Q2979960)
412
468
  TECH_COMPANY_QUERY = """
413
- SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel WHERE {
469
+ SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel ?inception ?dissolution WHERE {
414
470
  ?company wdt:P31 wd:Q2979960.
415
471
  OPTIONAL { ?company wdt:P1278 ?lei. }
416
472
  OPTIONAL { ?company wdt:P249 ?ticker. }
417
473
  OPTIONAL { ?company wdt:P17 ?country. }
474
+ OPTIONAL { ?company wdt:P571 ?inception. }
475
+ OPTIONAL { ?company wdt:P576 ?dissolution. }
418
476
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
419
477
  }
420
478
  LIMIT %d
@@ -423,11 +481,13 @@ OFFSET %d
423
481
 
424
482
  # Query for retailers (Q1631111)
425
483
  RETAILER_QUERY = """
426
- SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel WHERE {
484
+ SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel ?inception ?dissolution WHERE {
427
485
  ?company wdt:P31 wd:Q1631111.
428
486
  OPTIONAL { ?company wdt:P1278 ?lei. }
429
487
  OPTIONAL { ?company wdt:P249 ?ticker. }
430
488
  OPTIONAL { ?company wdt:P17 ?country. }
489
+ OPTIONAL { ?company wdt:P571 ?inception. }
490
+ OPTIONAL { ?company wdt:P576 ?dissolution. }
431
491
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
432
492
  }
433
493
  LIMIT %d
@@ -436,11 +496,13 @@ OFFSET %d
436
496
 
437
497
  # Query for manufacturers (Q187652)
438
498
  MANUFACTURER_QUERY = """
439
- SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel WHERE {
499
+ SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel ?inception ?dissolution WHERE {
440
500
  ?company wdt:P31 wd:Q187652.
441
501
  OPTIONAL { ?company wdt:P1278 ?lei. }
442
502
  OPTIONAL { ?company wdt:P249 ?ticker. }
443
503
  OPTIONAL { ?company wdt:P17 ?country. }
504
+ OPTIONAL { ?company wdt:P571 ?inception. }
505
+ OPTIONAL { ?company wdt:P576 ?dissolution. }
444
506
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
445
507
  }
446
508
  LIMIT %d
@@ -449,11 +511,13 @@ OFFSET %d
449
511
 
450
512
  # Query for conglomerates (Q206652)
451
513
  CONGLOMERATE_QUERY = """
452
- SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel WHERE {
514
+ SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel ?inception ?dissolution WHERE {
453
515
  ?company wdt:P31 wd:Q206652.
454
516
  OPTIONAL { ?company wdt:P1278 ?lei. }
455
517
  OPTIONAL { ?company wdt:P249 ?ticker. }
456
518
  OPTIONAL { ?company wdt:P17 ?country. }
519
+ OPTIONAL { ?company wdt:P571 ?inception. }
520
+ OPTIONAL { ?company wdt:P576 ?dissolution. }
457
521
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
458
522
  }
459
523
  LIMIT %d
@@ -462,11 +526,13 @@ OFFSET %d
462
526
 
463
527
  # Query for investment companies (Q380649)
464
528
  INVESTMENT_COMPANY_QUERY = """
465
- SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel WHERE {
529
+ SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel ?inception ?dissolution WHERE {
466
530
  ?company wdt:P31 wd:Q380649.
467
531
  OPTIONAL { ?company wdt:P1278 ?lei. }
468
532
  OPTIONAL { ?company wdt:P249 ?ticker. }
469
533
  OPTIONAL { ?company wdt:P17 ?country. }
534
+ OPTIONAL { ?company wdt:P571 ?inception. }
535
+ OPTIONAL { ?company wdt:P576 ?dissolution. }
470
536
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
471
537
  }
472
538
  LIMIT %d
@@ -475,11 +541,13 @@ OFFSET %d
475
541
 
476
542
  # Property-based query: entities with a CEO (P169) - likely companies
477
543
  HAS_CEO_QUERY = """
478
- SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel WHERE {
544
+ SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel ?inception ?dissolution WHERE {
479
545
  ?company wdt:P169 ?ceo.
480
546
  OPTIONAL { ?company wdt:P1278 ?lei. }
481
547
  OPTIONAL { ?company wdt:P249 ?ticker. }
482
548
  OPTIONAL { ?company wdt:P17 ?country. }
549
+ OPTIONAL { ?company wdt:P571 ?inception. }
550
+ OPTIONAL { ?company wdt:P576 ?dissolution. }
483
551
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
484
552
  }
485
553
  LIMIT %d
@@ -488,11 +556,13 @@ OFFSET %d
488
556
 
489
557
  # Property-based query: entities with subsidiaries (P355) - parent companies
490
558
  HAS_SUBSIDIARIES_QUERY = """
491
- SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel WHERE {
559
+ SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel ?inception ?dissolution WHERE {
492
560
  ?company wdt:P355 ?subsidiary.
493
561
  OPTIONAL { ?company wdt:P1278 ?lei. }
494
562
  OPTIONAL { ?company wdt:P249 ?ticker. }
495
563
  OPTIONAL { ?company wdt:P17 ?country. }
564
+ OPTIONAL { ?company wdt:P571 ?inception. }
565
+ OPTIONAL { ?company wdt:P576 ?dissolution. }
496
566
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
497
567
  }
498
568
  LIMIT %d
@@ -501,11 +571,13 @@ OFFSET %d
501
571
 
502
572
  # Property-based query: entities owned by another entity (P127) - subsidiaries/companies
503
573
  OWNED_BY_QUERY = """
504
- SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel WHERE {
574
+ SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel ?inception ?dissolution WHERE {
505
575
  ?company wdt:P127 ?owner.
506
576
  OPTIONAL { ?company wdt:P1278 ?lei. }
507
577
  OPTIONAL { ?company wdt:P249 ?ticker. }
508
578
  OPTIONAL { ?company wdt:P17 ?country. }
579
+ OPTIONAL { ?company wdt:P571 ?inception. }
580
+ OPTIONAL { ?company wdt:P576 ?dissolution. }
509
581
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
510
582
  }
511
583
  LIMIT %d
@@ -514,11 +586,13 @@ OFFSET %d
514
586
 
515
587
  # Property-based query: entities with legal form (P1454) - structured companies
516
588
  HAS_LEGAL_FORM_QUERY = """
517
- SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel WHERE {
589
+ SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel ?inception ?dissolution WHERE {
518
590
  ?company wdt:P1454 ?legalForm.
519
591
  OPTIONAL { ?company wdt:P1278 ?lei. }
520
592
  OPTIONAL { ?company wdt:P249 ?ticker. }
521
593
  OPTIONAL { ?company wdt:P17 ?country. }
594
+ OPTIONAL { ?company wdt:P571 ?inception. }
595
+ OPTIONAL { ?company wdt:P576 ?dissolution. }
522
596
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
523
597
  }
524
598
  LIMIT %d
@@ -527,11 +601,13 @@ OFFSET %d
527
601
 
528
602
  # Property-based query: entities with employees count (P1128) - organizations
529
603
  HAS_EMPLOYEES_QUERY = """
530
- SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel WHERE {
604
+ SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel ?inception ?dissolution WHERE {
531
605
  ?company wdt:P1128 ?employees.
532
606
  OPTIONAL { ?company wdt:P1278 ?lei. }
533
607
  OPTIONAL { ?company wdt:P249 ?ticker. }
534
608
  OPTIONAL { ?company wdt:P17 ?country. }
609
+ OPTIONAL { ?company wdt:P571 ?inception. }
610
+ OPTIONAL { ?company wdt:P576 ?dissolution. }
535
611
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
536
612
  }
537
613
  LIMIT %d
@@ -540,11 +616,13 @@ OFFSET %d
540
616
 
541
617
  # Property-based query: entities with revenue (P2139) - companies
542
618
  HAS_REVENUE_QUERY = """
543
- SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel WHERE {
619
+ SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel ?inception ?dissolution WHERE {
544
620
  ?company wdt:P2139 ?revenue.
545
621
  OPTIONAL { ?company wdt:P1278 ?lei. }
546
622
  OPTIONAL { ?company wdt:P249 ?ticker. }
547
623
  OPTIONAL { ?company wdt:P17 ?country. }
624
+ OPTIONAL { ?company wdt:P571 ?inception. }
625
+ OPTIONAL { ?company wdt:P576 ?dissolution. }
548
626
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
549
627
  }
550
628
  LIMIT %d
@@ -877,6 +955,27 @@ class WikidataImporter:
877
955
 
878
956
  logger.info(f"Completed all query types: {total_count} total records")
879
957
 
958
+ @staticmethod
959
+ def _parse_wikidata_date(date_str: Optional[str]) -> Optional[str]:
960
+ """
961
+ Parse a Wikidata date string into ISO format (YYYY-MM-DD).
962
+
963
+ Wikidata returns dates like "2020-01-15T00:00:00Z" or just "2020".
964
+ Returns None if the date cannot be parsed.
965
+ """
966
+ if not date_str:
967
+ return None
968
+ # Handle ISO datetime format (e.g., "2020-01-15T00:00:00Z")
969
+ if "T" in date_str:
970
+ return date_str.split("T")[0]
971
+ # Handle year-only format (e.g., "2020")
972
+ if len(date_str) == 4 and date_str.isdigit():
973
+ return f"{date_str}-01-01"
974
+ # Return as-is if it looks like a date
975
+ if len(date_str) >= 4:
976
+ return date_str[:10] # Take first 10 chars (YYYY-MM-DD)
977
+ return None
978
+
880
979
  def _execute_sparql(self, query: str) -> dict[str, Any]:
881
980
  """Execute a SPARQL query against Wikidata."""
882
981
  params = urllib.parse.urlencode({
@@ -924,10 +1023,15 @@ class WikidataImporter:
924
1023
  ticker = binding.get("ticker", {}).get("value")
925
1024
  exchange_label = binding.get("exchangeLabel", {}).get("value")
926
1025
  country_label = binding.get("countryLabel", {}).get("value")
927
- inception = binding.get("inception", {}).get("value")
1026
+ inception_raw = binding.get("inception", {}).get("value")
1027
+ dissolution_raw = binding.get("dissolution", {}).get("value")
1028
+
1029
+ # Parse dates (Wikidata returns ISO datetime, extract date part)
1030
+ from_date = WikidataImporter._parse_wikidata_date(inception_raw)
1031
+ to_date = WikidataImporter._parse_wikidata_date(dissolution_raw)
928
1032
 
929
1033
  # Build record data
930
- record_data = {
1034
+ record_data: dict[str, Any] = {
931
1035
  "wikidata_id": wikidata_id,
932
1036
  "label": label,
933
1037
  }
@@ -939,8 +1043,10 @@ class WikidataImporter:
939
1043
  record_data["exchange"] = exchange_label
940
1044
  if country_label:
941
1045
  record_data["country"] = country_label
942
- if inception:
943
- record_data["inception"] = inception
1046
+ if from_date:
1047
+ record_data["inception"] = from_date
1048
+ if to_date:
1049
+ record_data["dissolution"] = to_date
944
1050
 
945
1051
  return CompanyRecord(
946
1052
  name=label.strip(),
@@ -948,6 +1054,8 @@ class WikidataImporter:
948
1054
  source_id=wikidata_id,
949
1055
  region=country_label or "",
950
1056
  entity_type=entity_type,
1057
+ from_date=from_date,
1058
+ to_date=to_date,
951
1059
  record=record_data,
952
1060
  )
953
1061