corp-extractor 0.9.0__py3-none-any.whl → 0.9.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {corp_extractor-0.9.0.dist-info → corp_extractor-0.9.4.dist-info}/METADATA +72 -11
- {corp_extractor-0.9.0.dist-info → corp_extractor-0.9.4.dist-info}/RECORD +34 -27
- statement_extractor/cli.py +1317 -101
- statement_extractor/database/embeddings.py +45 -0
- statement_extractor/database/hub.py +86 -136
- statement_extractor/database/importers/__init__.py +10 -2
- statement_extractor/database/importers/companies_house.py +16 -2
- statement_extractor/database/importers/companies_house_officers.py +431 -0
- statement_extractor/database/importers/gleif.py +23 -0
- statement_extractor/database/importers/import_utils.py +264 -0
- statement_extractor/database/importers/sec_edgar.py +17 -0
- statement_extractor/database/importers/sec_form4.py +512 -0
- statement_extractor/database/importers/wikidata.py +151 -43
- statement_extractor/database/importers/wikidata_dump.py +2282 -0
- statement_extractor/database/importers/wikidata_people.py +867 -325
- statement_extractor/database/migrate_v2.py +852 -0
- statement_extractor/database/models.py +155 -7
- statement_extractor/database/schema_v2.py +409 -0
- statement_extractor/database/seed_data.py +359 -0
- statement_extractor/database/store.py +3449 -233
- statement_extractor/document/deduplicator.py +10 -12
- statement_extractor/extractor.py +1 -1
- statement_extractor/models/__init__.py +3 -2
- statement_extractor/models/statement.py +15 -17
- statement_extractor/models.py +1 -1
- statement_extractor/pipeline/context.py +5 -5
- statement_extractor/pipeline/orchestrator.py +12 -12
- statement_extractor/plugins/base.py +17 -17
- statement_extractor/plugins/extractors/gliner2.py +28 -28
- statement_extractor/plugins/qualifiers/embedding_company.py +7 -5
- statement_extractor/plugins/qualifiers/person.py +120 -53
- statement_extractor/plugins/splitters/t5_gemma.py +35 -39
- {corp_extractor-0.9.0.dist-info → corp_extractor-0.9.4.dist-info}/WHEEL +0 -0
- {corp_extractor-0.9.0.dist-info → corp_extractor-0.9.4.dist-info}/entry_points.txt +0 -0
|
@@ -48,10 +48,12 @@ WIKIDATA_SPARQL_URL = "https://query.wikidata.org/sparql"
|
|
|
48
48
|
# Simpler SPARQL query - directly query for companies with LEI codes (fastest, most reliable)
|
|
49
49
|
# Avoids property path wildcards (wdt:P279*) which timeout on Wikidata
|
|
50
50
|
LEI_COMPANY_QUERY = """
|
|
51
|
-
SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel WHERE {
|
|
51
|
+
SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel ?inception ?dissolution WHERE {
|
|
52
52
|
?company wdt:P1278 ?lei.
|
|
53
53
|
OPTIONAL { ?company wdt:P249 ?ticker. }
|
|
54
54
|
OPTIONAL { ?company wdt:P17 ?country. }
|
|
55
|
+
OPTIONAL { ?company wdt:P571 ?inception. }
|
|
56
|
+
OPTIONAL { ?company wdt:P576 ?dissolution. }
|
|
55
57
|
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
|
56
58
|
}
|
|
57
59
|
LIMIT %d
|
|
@@ -60,10 +62,12 @@ OFFSET %d
|
|
|
60
62
|
|
|
61
63
|
# Query for companies with stock exchange listing (has ticker)
|
|
62
64
|
TICKER_COMPANY_QUERY = """
|
|
63
|
-
SELECT ?company ?companyLabel ?ticker ?exchange ?exchangeLabel ?country ?countryLabel WHERE {
|
|
65
|
+
SELECT ?company ?companyLabel ?ticker ?exchange ?exchangeLabel ?country ?countryLabel ?inception ?dissolution WHERE {
|
|
64
66
|
?company wdt:P414 ?exchange.
|
|
65
67
|
OPTIONAL { ?company wdt:P249 ?ticker. }
|
|
66
68
|
OPTIONAL { ?company wdt:P17 ?country. }
|
|
69
|
+
OPTIONAL { ?company wdt:P571 ?inception. }
|
|
70
|
+
OPTIONAL { ?company wdt:P576 ?dissolution. }
|
|
67
71
|
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
|
68
72
|
}
|
|
69
73
|
LIMIT %d
|
|
@@ -72,11 +76,13 @@ OFFSET %d
|
|
|
72
76
|
|
|
73
77
|
# Query for direct instances of public company (Q891723) - no subclass traversal
|
|
74
78
|
PUBLIC_COMPANY_QUERY = """
|
|
75
|
-
SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel WHERE {
|
|
79
|
+
SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel ?inception ?dissolution WHERE {
|
|
76
80
|
?company wdt:P31 wd:Q891723.
|
|
77
81
|
OPTIONAL { ?company wdt:P1278 ?lei. }
|
|
78
82
|
OPTIONAL { ?company wdt:P249 ?ticker. }
|
|
79
83
|
OPTIONAL { ?company wdt:P17 ?country. }
|
|
84
|
+
OPTIONAL { ?company wdt:P571 ?inception. }
|
|
85
|
+
OPTIONAL { ?company wdt:P576 ?dissolution. }
|
|
80
86
|
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
|
81
87
|
}
|
|
82
88
|
LIMIT %d
|
|
@@ -85,11 +91,13 @@ OFFSET %d
|
|
|
85
91
|
|
|
86
92
|
# Query for direct instances of business enterprise (Q4830453) - no subclass traversal
|
|
87
93
|
BUSINESS_QUERY = """
|
|
88
|
-
SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel WHERE {
|
|
94
|
+
SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel ?inception ?dissolution WHERE {
|
|
89
95
|
?company wdt:P31 wd:Q4830453.
|
|
90
96
|
OPTIONAL { ?company wdt:P1278 ?lei. }
|
|
91
97
|
OPTIONAL { ?company wdt:P249 ?ticker. }
|
|
92
98
|
OPTIONAL { ?company wdt:P17 ?country. }
|
|
99
|
+
OPTIONAL { ?company wdt:P571 ?inception. }
|
|
100
|
+
OPTIONAL { ?company wdt:P576 ?dissolution. }
|
|
93
101
|
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
|
94
102
|
}
|
|
95
103
|
LIMIT %d
|
|
@@ -98,11 +106,13 @@ OFFSET %d
|
|
|
98
106
|
|
|
99
107
|
# Query for direct instances of organization (Q43229) - includes NGOs, gov agencies, etc.
|
|
100
108
|
ORGANIZATION_QUERY = """
|
|
101
|
-
SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel WHERE {
|
|
109
|
+
SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel ?inception ?dissolution WHERE {
|
|
102
110
|
?company wdt:P31 wd:Q43229.
|
|
103
111
|
OPTIONAL { ?company wdt:P1278 ?lei. }
|
|
104
112
|
OPTIONAL { ?company wdt:P249 ?ticker. }
|
|
105
113
|
OPTIONAL { ?company wdt:P17 ?country. }
|
|
114
|
+
OPTIONAL { ?company wdt:P571 ?inception. }
|
|
115
|
+
OPTIONAL { ?company wdt:P576 ?dissolution. }
|
|
106
116
|
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
|
107
117
|
}
|
|
108
118
|
LIMIT %d
|
|
@@ -111,11 +121,13 @@ OFFSET %d
|
|
|
111
121
|
|
|
112
122
|
# Query for non-profit organizations (Q163740)
|
|
113
123
|
NONPROFIT_QUERY = """
|
|
114
|
-
SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel WHERE {
|
|
124
|
+
SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel ?inception ?dissolution WHERE {
|
|
115
125
|
?company wdt:P31 wd:Q163740.
|
|
116
126
|
OPTIONAL { ?company wdt:P1278 ?lei. }
|
|
117
127
|
OPTIONAL { ?company wdt:P249 ?ticker. }
|
|
118
128
|
OPTIONAL { ?company wdt:P17 ?country. }
|
|
129
|
+
OPTIONAL { ?company wdt:P571 ?inception. }
|
|
130
|
+
OPTIONAL { ?company wdt:P576 ?dissolution. }
|
|
119
131
|
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
|
120
132
|
}
|
|
121
133
|
LIMIT %d
|
|
@@ -124,11 +136,13 @@ OFFSET %d
|
|
|
124
136
|
|
|
125
137
|
# Query for government agencies (Q327333)
|
|
126
138
|
GOV_AGENCY_QUERY = """
|
|
127
|
-
SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel WHERE {
|
|
139
|
+
SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel ?inception ?dissolution WHERE {
|
|
128
140
|
?company wdt:P31 wd:Q327333.
|
|
129
141
|
OPTIONAL { ?company wdt:P1278 ?lei. }
|
|
130
142
|
OPTIONAL { ?company wdt:P249 ?ticker. }
|
|
131
143
|
OPTIONAL { ?company wdt:P17 ?country. }
|
|
144
|
+
OPTIONAL { ?company wdt:P571 ?inception. }
|
|
145
|
+
OPTIONAL { ?company wdt:P576 ?dissolution. }
|
|
132
146
|
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
|
133
147
|
}
|
|
134
148
|
LIMIT %d
|
|
@@ -137,11 +151,13 @@ OFFSET %d
|
|
|
137
151
|
|
|
138
152
|
# Query for enterprises (Q6881511) - broader than business enterprise
|
|
139
153
|
ENTERPRISE_QUERY = """
|
|
140
|
-
SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel WHERE {
|
|
154
|
+
SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel ?inception ?dissolution WHERE {
|
|
141
155
|
?company wdt:P31 wd:Q6881511.
|
|
142
156
|
OPTIONAL { ?company wdt:P1278 ?lei. }
|
|
143
157
|
OPTIONAL { ?company wdt:P249 ?ticker. }
|
|
144
158
|
OPTIONAL { ?company wdt:P17 ?country. }
|
|
159
|
+
OPTIONAL { ?company wdt:P571 ?inception. }
|
|
160
|
+
OPTIONAL { ?company wdt:P576 ?dissolution. }
|
|
145
161
|
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
|
146
162
|
}
|
|
147
163
|
LIMIT %d
|
|
@@ -150,11 +166,13 @@ OFFSET %d
|
|
|
150
166
|
|
|
151
167
|
# Query for corporations (Q167037)
|
|
152
168
|
CORPORATION_QUERY = """
|
|
153
|
-
SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel WHERE {
|
|
169
|
+
SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel ?inception ?dissolution WHERE {
|
|
154
170
|
?company wdt:P31 wd:Q167037.
|
|
155
171
|
OPTIONAL { ?company wdt:P1278 ?lei. }
|
|
156
172
|
OPTIONAL { ?company wdt:P249 ?ticker. }
|
|
157
173
|
OPTIONAL { ?company wdt:P17 ?country. }
|
|
174
|
+
OPTIONAL { ?company wdt:P571 ?inception. }
|
|
175
|
+
OPTIONAL { ?company wdt:P576 ?dissolution. }
|
|
158
176
|
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
|
159
177
|
}
|
|
160
178
|
LIMIT %d
|
|
@@ -163,11 +181,13 @@ OFFSET %d
|
|
|
163
181
|
|
|
164
182
|
# Query for subsidiaries (Q658255)
|
|
165
183
|
SUBSIDIARY_QUERY = """
|
|
166
|
-
SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel WHERE {
|
|
184
|
+
SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel ?inception ?dissolution WHERE {
|
|
167
185
|
?company wdt:P31 wd:Q658255.
|
|
168
186
|
OPTIONAL { ?company wdt:P1278 ?lei. }
|
|
169
187
|
OPTIONAL { ?company wdt:P249 ?ticker. }
|
|
170
188
|
OPTIONAL { ?company wdt:P17 ?country. }
|
|
189
|
+
OPTIONAL { ?company wdt:P571 ?inception. }
|
|
190
|
+
OPTIONAL { ?company wdt:P576 ?dissolution. }
|
|
171
191
|
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
|
172
192
|
}
|
|
173
193
|
LIMIT %d
|
|
@@ -176,11 +196,13 @@ OFFSET %d
|
|
|
176
196
|
|
|
177
197
|
# Query for banks (Q22687)
|
|
178
198
|
BANK_QUERY = """
|
|
179
|
-
SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel WHERE {
|
|
199
|
+
SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel ?inception ?dissolution WHERE {
|
|
180
200
|
?company wdt:P31 wd:Q22687.
|
|
181
201
|
OPTIONAL { ?company wdt:P1278 ?lei. }
|
|
182
202
|
OPTIONAL { ?company wdt:P249 ?ticker. }
|
|
183
203
|
OPTIONAL { ?company wdt:P17 ?country. }
|
|
204
|
+
OPTIONAL { ?company wdt:P571 ?inception. }
|
|
205
|
+
OPTIONAL { ?company wdt:P576 ?dissolution. }
|
|
184
206
|
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
|
185
207
|
}
|
|
186
208
|
LIMIT %d
|
|
@@ -189,11 +211,13 @@ OFFSET %d
|
|
|
189
211
|
|
|
190
212
|
# Query for insurance companies (Q6881511)
|
|
191
213
|
INSURANCE_QUERY = """
|
|
192
|
-
SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel WHERE {
|
|
214
|
+
SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel ?inception ?dissolution WHERE {
|
|
193
215
|
?company wdt:P31 wd:Q1145276.
|
|
194
216
|
OPTIONAL { ?company wdt:P1278 ?lei. }
|
|
195
217
|
OPTIONAL { ?company wdt:P249 ?ticker. }
|
|
196
218
|
OPTIONAL { ?company wdt:P17 ?country. }
|
|
219
|
+
OPTIONAL { ?company wdt:P571 ?inception. }
|
|
220
|
+
OPTIONAL { ?company wdt:P576 ?dissolution. }
|
|
197
221
|
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
|
198
222
|
}
|
|
199
223
|
LIMIT %d
|
|
@@ -202,11 +226,13 @@ OFFSET %d
|
|
|
202
226
|
|
|
203
227
|
# Query for airlines (Q46970)
|
|
204
228
|
AIRLINE_QUERY = """
|
|
205
|
-
SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel WHERE {
|
|
229
|
+
SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel ?inception ?dissolution WHERE {
|
|
206
230
|
?company wdt:P31 wd:Q46970.
|
|
207
231
|
OPTIONAL { ?company wdt:P1278 ?lei. }
|
|
208
232
|
OPTIONAL { ?company wdt:P249 ?ticker. }
|
|
209
233
|
OPTIONAL { ?company wdt:P17 ?country. }
|
|
234
|
+
OPTIONAL { ?company wdt:P571 ?inception. }
|
|
235
|
+
OPTIONAL { ?company wdt:P576 ?dissolution. }
|
|
210
236
|
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
|
211
237
|
}
|
|
212
238
|
LIMIT %d
|
|
@@ -215,11 +241,13 @@ OFFSET %d
|
|
|
215
241
|
|
|
216
242
|
# Query for law firms (Q613142)
|
|
217
243
|
LAW_FIRM_QUERY = """
|
|
218
|
-
SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel WHERE {
|
|
244
|
+
SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel ?inception ?dissolution WHERE {
|
|
219
245
|
?company wdt:P31 wd:Q613142.
|
|
220
246
|
OPTIONAL { ?company wdt:P1278 ?lei. }
|
|
221
247
|
OPTIONAL { ?company wdt:P249 ?ticker. }
|
|
222
248
|
OPTIONAL { ?company wdt:P17 ?country. }
|
|
249
|
+
OPTIONAL { ?company wdt:P571 ?inception. }
|
|
250
|
+
OPTIONAL { ?company wdt:P576 ?dissolution. }
|
|
223
251
|
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
|
224
252
|
}
|
|
225
253
|
LIMIT %d
|
|
@@ -228,11 +256,13 @@ OFFSET %d
|
|
|
228
256
|
|
|
229
257
|
# Query for educational institutions (Q2385804)
|
|
230
258
|
EDUCATIONAL_QUERY = """
|
|
231
|
-
SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel WHERE {
|
|
259
|
+
SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel ?inception ?dissolution WHERE {
|
|
232
260
|
?company wdt:P31 wd:Q2385804.
|
|
233
261
|
OPTIONAL { ?company wdt:P1278 ?lei. }
|
|
234
262
|
OPTIONAL { ?company wdt:P249 ?ticker. }
|
|
235
263
|
OPTIONAL { ?company wdt:P17 ?country. }
|
|
264
|
+
OPTIONAL { ?company wdt:P571 ?inception. }
|
|
265
|
+
OPTIONAL { ?company wdt:P576 ?dissolution. }
|
|
236
266
|
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
|
237
267
|
}
|
|
238
268
|
LIMIT %d
|
|
@@ -241,11 +271,13 @@ OFFSET %d
|
|
|
241
271
|
|
|
242
272
|
# Query for universities (Q3918)
|
|
243
273
|
UNIVERSITY_QUERY = """
|
|
244
|
-
SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel WHERE {
|
|
274
|
+
SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel ?inception ?dissolution WHERE {
|
|
245
275
|
?company wdt:P31 wd:Q3918.
|
|
246
276
|
OPTIONAL { ?company wdt:P1278 ?lei. }
|
|
247
277
|
OPTIONAL { ?company wdt:P249 ?ticker. }
|
|
248
278
|
OPTIONAL { ?company wdt:P17 ?country. }
|
|
279
|
+
OPTIONAL { ?company wdt:P571 ?inception. }
|
|
280
|
+
OPTIONAL { ?company wdt:P576 ?dissolution. }
|
|
249
281
|
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
|
250
282
|
}
|
|
251
283
|
LIMIT %d
|
|
@@ -254,11 +286,13 @@ OFFSET %d
|
|
|
254
286
|
|
|
255
287
|
# Query for research institutes (Q31855)
|
|
256
288
|
RESEARCH_INSTITUTE_QUERY = """
|
|
257
|
-
SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel WHERE {
|
|
289
|
+
SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel ?inception ?dissolution WHERE {
|
|
258
290
|
?company wdt:P31 wd:Q31855.
|
|
259
291
|
OPTIONAL { ?company wdt:P1278 ?lei. }
|
|
260
292
|
OPTIONAL { ?company wdt:P249 ?ticker. }
|
|
261
293
|
OPTIONAL { ?company wdt:P17 ?country. }
|
|
294
|
+
OPTIONAL { ?company wdt:P571 ?inception. }
|
|
295
|
+
OPTIONAL { ?company wdt:P576 ?dissolution. }
|
|
262
296
|
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
|
263
297
|
}
|
|
264
298
|
LIMIT %d
|
|
@@ -267,11 +301,13 @@ OFFSET %d
|
|
|
267
301
|
|
|
268
302
|
# Query for political parties (Q7278)
|
|
269
303
|
POLITICAL_PARTY_QUERY = """
|
|
270
|
-
SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel WHERE {
|
|
304
|
+
SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel ?inception ?dissolution WHERE {
|
|
271
305
|
?company wdt:P31 wd:Q7278.
|
|
272
306
|
OPTIONAL { ?company wdt:P1278 ?lei. }
|
|
273
307
|
OPTIONAL { ?company wdt:P249 ?ticker. }
|
|
274
308
|
OPTIONAL { ?company wdt:P17 ?country. }
|
|
309
|
+
OPTIONAL { ?company wdt:P571 ?inception. }
|
|
310
|
+
OPTIONAL { ?company wdt:P576 ?dissolution. }
|
|
275
311
|
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
|
276
312
|
}
|
|
277
313
|
LIMIT %d
|
|
@@ -280,11 +316,13 @@ OFFSET %d
|
|
|
280
316
|
|
|
281
317
|
# Query for trade unions (Q178790)
|
|
282
318
|
TRADE_UNION_QUERY = """
|
|
283
|
-
SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel WHERE {
|
|
319
|
+
SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel ?inception ?dissolution WHERE {
|
|
284
320
|
?company wdt:P31 wd:Q178790.
|
|
285
321
|
OPTIONAL { ?company wdt:P1278 ?lei. }
|
|
286
322
|
OPTIONAL { ?company wdt:P249 ?ticker. }
|
|
287
323
|
OPTIONAL { ?company wdt:P17 ?country. }
|
|
324
|
+
OPTIONAL { ?company wdt:P571 ?inception. }
|
|
325
|
+
OPTIONAL { ?company wdt:P576 ?dissolution. }
|
|
288
326
|
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
|
289
327
|
}
|
|
290
328
|
LIMIT %d
|
|
@@ -293,11 +331,13 @@ OFFSET %d
|
|
|
293
331
|
|
|
294
332
|
# Query for NGOs (Q79913)
|
|
295
333
|
NGO_QUERY = """
|
|
296
|
-
SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel WHERE {
|
|
334
|
+
SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel ?inception ?dissolution WHERE {
|
|
297
335
|
?company wdt:P31 wd:Q79913.
|
|
298
336
|
OPTIONAL { ?company wdt:P1278 ?lei. }
|
|
299
337
|
OPTIONAL { ?company wdt:P249 ?ticker. }
|
|
300
338
|
OPTIONAL { ?company wdt:P17 ?country. }
|
|
339
|
+
OPTIONAL { ?company wdt:P571 ?inception. }
|
|
340
|
+
OPTIONAL { ?company wdt:P576 ?dissolution. }
|
|
301
341
|
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
|
302
342
|
}
|
|
303
343
|
LIMIT %d
|
|
@@ -306,11 +346,13 @@ OFFSET %d
|
|
|
306
346
|
|
|
307
347
|
# Query for foundations (Q157031)
|
|
308
348
|
FOUNDATION_QUERY = """
|
|
309
|
-
SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel WHERE {
|
|
349
|
+
SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel ?inception ?dissolution WHERE {
|
|
310
350
|
?company wdt:P31 wd:Q157031.
|
|
311
351
|
OPTIONAL { ?company wdt:P1278 ?lei. }
|
|
312
352
|
OPTIONAL { ?company wdt:P249 ?ticker. }
|
|
313
353
|
OPTIONAL { ?company wdt:P17 ?country. }
|
|
354
|
+
OPTIONAL { ?company wdt:P571 ?inception. }
|
|
355
|
+
OPTIONAL { ?company wdt:P576 ?dissolution. }
|
|
314
356
|
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
|
315
357
|
}
|
|
316
358
|
LIMIT %d
|
|
@@ -319,11 +361,13 @@ OFFSET %d
|
|
|
319
361
|
|
|
320
362
|
# Query for international organizations (Q484652)
|
|
321
363
|
INTL_ORG_QUERY = """
|
|
322
|
-
SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel WHERE {
|
|
364
|
+
SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel ?inception ?dissolution WHERE {
|
|
323
365
|
?company wdt:P31 wd:Q484652.
|
|
324
366
|
OPTIONAL { ?company wdt:P1278 ?lei. }
|
|
325
367
|
OPTIONAL { ?company wdt:P249 ?ticker. }
|
|
326
368
|
OPTIONAL { ?company wdt:P17 ?country. }
|
|
369
|
+
OPTIONAL { ?company wdt:P571 ?inception. }
|
|
370
|
+
OPTIONAL { ?company wdt:P576 ?dissolution. }
|
|
327
371
|
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
|
328
372
|
}
|
|
329
373
|
LIMIT %d
|
|
@@ -332,11 +376,13 @@ OFFSET %d
|
|
|
332
376
|
|
|
333
377
|
# Query for sports teams/clubs (Q476028)
|
|
334
378
|
SPORTS_CLUB_QUERY = """
|
|
335
|
-
SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel WHERE {
|
|
379
|
+
SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel ?inception ?dissolution WHERE {
|
|
336
380
|
?company wdt:P31 wd:Q476028.
|
|
337
381
|
OPTIONAL { ?company wdt:P1278 ?lei. }
|
|
338
382
|
OPTIONAL { ?company wdt:P249 ?ticker. }
|
|
339
383
|
OPTIONAL { ?company wdt:P17 ?country. }
|
|
384
|
+
OPTIONAL { ?company wdt:P571 ?inception. }
|
|
385
|
+
OPTIONAL { ?company wdt:P576 ?dissolution. }
|
|
340
386
|
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
|
341
387
|
}
|
|
342
388
|
LIMIT %d
|
|
@@ -345,11 +391,13 @@ OFFSET %d
|
|
|
345
391
|
|
|
346
392
|
# Query for hospitals (Q16917)
|
|
347
393
|
HOSPITAL_QUERY = """
|
|
348
|
-
SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel WHERE {
|
|
394
|
+
SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel ?inception ?dissolution WHERE {
|
|
349
395
|
?company wdt:P31 wd:Q16917.
|
|
350
396
|
OPTIONAL { ?company wdt:P1278 ?lei. }
|
|
351
397
|
OPTIONAL { ?company wdt:P249 ?ticker. }
|
|
352
398
|
OPTIONAL { ?company wdt:P17 ?country. }
|
|
399
|
+
OPTIONAL { ?company wdt:P571 ?inception. }
|
|
400
|
+
OPTIONAL { ?company wdt:P576 ?dissolution. }
|
|
353
401
|
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
|
354
402
|
}
|
|
355
403
|
LIMIT %d
|
|
@@ -358,11 +406,13 @@ OFFSET %d
|
|
|
358
406
|
|
|
359
407
|
# Query for record labels (Q18127)
|
|
360
408
|
RECORD_LABEL_QUERY = """
|
|
361
|
-
SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel WHERE {
|
|
409
|
+
SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel ?inception ?dissolution WHERE {
|
|
362
410
|
?company wdt:P31 wd:Q18127.
|
|
363
411
|
OPTIONAL { ?company wdt:P1278 ?lei. }
|
|
364
412
|
OPTIONAL { ?company wdt:P249 ?ticker. }
|
|
365
413
|
OPTIONAL { ?company wdt:P17 ?country. }
|
|
414
|
+
OPTIONAL { ?company wdt:P571 ?inception. }
|
|
415
|
+
OPTIONAL { ?company wdt:P576 ?dissolution. }
|
|
366
416
|
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
|
367
417
|
}
|
|
368
418
|
LIMIT %d
|
|
@@ -371,11 +421,13 @@ OFFSET %d
|
|
|
371
421
|
|
|
372
422
|
# Query for film studios (Q1366047)
|
|
373
423
|
FILM_STUDIO_QUERY = """
|
|
374
|
-
SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel WHERE {
|
|
424
|
+
SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel ?inception ?dissolution WHERE {
|
|
375
425
|
?company wdt:P31 wd:Q1366047.
|
|
376
426
|
OPTIONAL { ?company wdt:P1278 ?lei. }
|
|
377
427
|
OPTIONAL { ?company wdt:P249 ?ticker. }
|
|
378
428
|
OPTIONAL { ?company wdt:P17 ?country. }
|
|
429
|
+
OPTIONAL { ?company wdt:P571 ?inception. }
|
|
430
|
+
OPTIONAL { ?company wdt:P576 ?dissolution. }
|
|
379
431
|
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
|
380
432
|
}
|
|
381
433
|
LIMIT %d
|
|
@@ -384,11 +436,13 @@ OFFSET %d
|
|
|
384
436
|
|
|
385
437
|
# Query for video game companies (Q1137109)
|
|
386
438
|
VIDEO_GAME_COMPANY_QUERY = """
|
|
387
|
-
SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel WHERE {
|
|
439
|
+
SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel ?inception ?dissolution WHERE {
|
|
388
440
|
?company wdt:P31 wd:Q1137109.
|
|
389
441
|
OPTIONAL { ?company wdt:P1278 ?lei. }
|
|
390
442
|
OPTIONAL { ?company wdt:P249 ?ticker. }
|
|
391
443
|
OPTIONAL { ?company wdt:P17 ?country. }
|
|
444
|
+
OPTIONAL { ?company wdt:P571 ?inception. }
|
|
445
|
+
OPTIONAL { ?company wdt:P576 ?dissolution. }
|
|
392
446
|
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
|
393
447
|
}
|
|
394
448
|
LIMIT %d
|
|
@@ -397,11 +451,13 @@ OFFSET %d
|
|
|
397
451
|
|
|
398
452
|
# Query for pharmaceutical companies (Q507619)
|
|
399
453
|
PHARMA_QUERY = """
|
|
400
|
-
SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel WHERE {
|
|
454
|
+
SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel ?inception ?dissolution WHERE {
|
|
401
455
|
?company wdt:P31 wd:Q507619.
|
|
402
456
|
OPTIONAL { ?company wdt:P1278 ?lei. }
|
|
403
457
|
OPTIONAL { ?company wdt:P249 ?ticker. }
|
|
404
458
|
OPTIONAL { ?company wdt:P17 ?country. }
|
|
459
|
+
OPTIONAL { ?company wdt:P571 ?inception. }
|
|
460
|
+
OPTIONAL { ?company wdt:P576 ?dissolution. }
|
|
405
461
|
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
|
406
462
|
}
|
|
407
463
|
LIMIT %d
|
|
@@ -410,11 +466,13 @@ OFFSET %d
|
|
|
410
466
|
|
|
411
467
|
# Query for tech companies (Q2979960)
|
|
412
468
|
TECH_COMPANY_QUERY = """
|
|
413
|
-
SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel WHERE {
|
|
469
|
+
SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel ?inception ?dissolution WHERE {
|
|
414
470
|
?company wdt:P31 wd:Q2979960.
|
|
415
471
|
OPTIONAL { ?company wdt:P1278 ?lei. }
|
|
416
472
|
OPTIONAL { ?company wdt:P249 ?ticker. }
|
|
417
473
|
OPTIONAL { ?company wdt:P17 ?country. }
|
|
474
|
+
OPTIONAL { ?company wdt:P571 ?inception. }
|
|
475
|
+
OPTIONAL { ?company wdt:P576 ?dissolution. }
|
|
418
476
|
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
|
419
477
|
}
|
|
420
478
|
LIMIT %d
|
|
@@ -423,11 +481,13 @@ OFFSET %d
|
|
|
423
481
|
|
|
424
482
|
# Query for retailers (Q1631111)
|
|
425
483
|
RETAILER_QUERY = """
|
|
426
|
-
SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel WHERE {
|
|
484
|
+
SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel ?inception ?dissolution WHERE {
|
|
427
485
|
?company wdt:P31 wd:Q1631111.
|
|
428
486
|
OPTIONAL { ?company wdt:P1278 ?lei. }
|
|
429
487
|
OPTIONAL { ?company wdt:P249 ?ticker. }
|
|
430
488
|
OPTIONAL { ?company wdt:P17 ?country. }
|
|
489
|
+
OPTIONAL { ?company wdt:P571 ?inception. }
|
|
490
|
+
OPTIONAL { ?company wdt:P576 ?dissolution. }
|
|
431
491
|
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
|
432
492
|
}
|
|
433
493
|
LIMIT %d
|
|
@@ -436,11 +496,13 @@ OFFSET %d
|
|
|
436
496
|
|
|
437
497
|
# Query for manufacturers (Q187652)
|
|
438
498
|
MANUFACTURER_QUERY = """
|
|
439
|
-
SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel WHERE {
|
|
499
|
+
SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel ?inception ?dissolution WHERE {
|
|
440
500
|
?company wdt:P31 wd:Q187652.
|
|
441
501
|
OPTIONAL { ?company wdt:P1278 ?lei. }
|
|
442
502
|
OPTIONAL { ?company wdt:P249 ?ticker. }
|
|
443
503
|
OPTIONAL { ?company wdt:P17 ?country. }
|
|
504
|
+
OPTIONAL { ?company wdt:P571 ?inception. }
|
|
505
|
+
OPTIONAL { ?company wdt:P576 ?dissolution. }
|
|
444
506
|
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
|
445
507
|
}
|
|
446
508
|
LIMIT %d
|
|
@@ -449,11 +511,13 @@ OFFSET %d
|
|
|
449
511
|
|
|
450
512
|
# Query for conglomerates (Q206652)
|
|
451
513
|
CONGLOMERATE_QUERY = """
|
|
452
|
-
SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel WHERE {
|
|
514
|
+
SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel ?inception ?dissolution WHERE {
|
|
453
515
|
?company wdt:P31 wd:Q206652.
|
|
454
516
|
OPTIONAL { ?company wdt:P1278 ?lei. }
|
|
455
517
|
OPTIONAL { ?company wdt:P249 ?ticker. }
|
|
456
518
|
OPTIONAL { ?company wdt:P17 ?country. }
|
|
519
|
+
OPTIONAL { ?company wdt:P571 ?inception. }
|
|
520
|
+
OPTIONAL { ?company wdt:P576 ?dissolution. }
|
|
457
521
|
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
|
458
522
|
}
|
|
459
523
|
LIMIT %d
|
|
@@ -462,11 +526,13 @@ OFFSET %d
|
|
|
462
526
|
|
|
463
527
|
# Query for investment companies (Q380649)
|
|
464
528
|
INVESTMENT_COMPANY_QUERY = """
|
|
465
|
-
SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel WHERE {
|
|
529
|
+
SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel ?inception ?dissolution WHERE {
|
|
466
530
|
?company wdt:P31 wd:Q380649.
|
|
467
531
|
OPTIONAL { ?company wdt:P1278 ?lei. }
|
|
468
532
|
OPTIONAL { ?company wdt:P249 ?ticker. }
|
|
469
533
|
OPTIONAL { ?company wdt:P17 ?country. }
|
|
534
|
+
OPTIONAL { ?company wdt:P571 ?inception. }
|
|
535
|
+
OPTIONAL { ?company wdt:P576 ?dissolution. }
|
|
470
536
|
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
|
471
537
|
}
|
|
472
538
|
LIMIT %d
|
|
@@ -475,11 +541,13 @@ OFFSET %d
|
|
|
475
541
|
|
|
476
542
|
# Property-based query: entities with a CEO (P169) - likely companies
|
|
477
543
|
HAS_CEO_QUERY = """
|
|
478
|
-
SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel WHERE {
|
|
544
|
+
SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel ?inception ?dissolution WHERE {
|
|
479
545
|
?company wdt:P169 ?ceo.
|
|
480
546
|
OPTIONAL { ?company wdt:P1278 ?lei. }
|
|
481
547
|
OPTIONAL { ?company wdt:P249 ?ticker. }
|
|
482
548
|
OPTIONAL { ?company wdt:P17 ?country. }
|
|
549
|
+
OPTIONAL { ?company wdt:P571 ?inception. }
|
|
550
|
+
OPTIONAL { ?company wdt:P576 ?dissolution. }
|
|
483
551
|
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
|
484
552
|
}
|
|
485
553
|
LIMIT %d
|
|
@@ -488,11 +556,13 @@ OFFSET %d
|
|
|
488
556
|
|
|
489
557
|
# Property-based query: entities with subsidiaries (P355) - parent companies
|
|
490
558
|
HAS_SUBSIDIARIES_QUERY = """
|
|
491
|
-
SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel WHERE {
|
|
559
|
+
SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel ?inception ?dissolution WHERE {
|
|
492
560
|
?company wdt:P355 ?subsidiary.
|
|
493
561
|
OPTIONAL { ?company wdt:P1278 ?lei. }
|
|
494
562
|
OPTIONAL { ?company wdt:P249 ?ticker. }
|
|
495
563
|
OPTIONAL { ?company wdt:P17 ?country. }
|
|
564
|
+
OPTIONAL { ?company wdt:P571 ?inception. }
|
|
565
|
+
OPTIONAL { ?company wdt:P576 ?dissolution. }
|
|
496
566
|
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
|
497
567
|
}
|
|
498
568
|
LIMIT %d
|
|
@@ -501,11 +571,13 @@ OFFSET %d
|
|
|
501
571
|
|
|
502
572
|
# Property-based query: entities owned by another entity (P127) - subsidiaries/companies
|
|
503
573
|
OWNED_BY_QUERY = """
|
|
504
|
-
SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel WHERE {
|
|
574
|
+
SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel ?inception ?dissolution WHERE {
|
|
505
575
|
?company wdt:P127 ?owner.
|
|
506
576
|
OPTIONAL { ?company wdt:P1278 ?lei. }
|
|
507
577
|
OPTIONAL { ?company wdt:P249 ?ticker. }
|
|
508
578
|
OPTIONAL { ?company wdt:P17 ?country. }
|
|
579
|
+
OPTIONAL { ?company wdt:P571 ?inception. }
|
|
580
|
+
OPTIONAL { ?company wdt:P576 ?dissolution. }
|
|
509
581
|
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
|
510
582
|
}
|
|
511
583
|
LIMIT %d
|
|
@@ -514,11 +586,13 @@ OFFSET %d
|
|
|
514
586
|
|
|
515
587
|
# Property-based query: entities with legal form (P1454) - structured companies
|
|
516
588
|
HAS_LEGAL_FORM_QUERY = """
|
|
517
|
-
SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel WHERE {
|
|
589
|
+
SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel ?inception ?dissolution WHERE {
|
|
518
590
|
?company wdt:P1454 ?legalForm.
|
|
519
591
|
OPTIONAL { ?company wdt:P1278 ?lei. }
|
|
520
592
|
OPTIONAL { ?company wdt:P249 ?ticker. }
|
|
521
593
|
OPTIONAL { ?company wdt:P17 ?country. }
|
|
594
|
+
OPTIONAL { ?company wdt:P571 ?inception. }
|
|
595
|
+
OPTIONAL { ?company wdt:P576 ?dissolution. }
|
|
522
596
|
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
|
523
597
|
}
|
|
524
598
|
LIMIT %d
|
|
@@ -527,11 +601,13 @@ OFFSET %d
|
|
|
527
601
|
|
|
528
602
|
# Property-based query: entities with employees count (P1128) - organizations
|
|
529
603
|
HAS_EMPLOYEES_QUERY = """
|
|
530
|
-
SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel WHERE {
|
|
604
|
+
SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel ?inception ?dissolution WHERE {
|
|
531
605
|
?company wdt:P1128 ?employees.
|
|
532
606
|
OPTIONAL { ?company wdt:P1278 ?lei. }
|
|
533
607
|
OPTIONAL { ?company wdt:P249 ?ticker. }
|
|
534
608
|
OPTIONAL { ?company wdt:P17 ?country. }
|
|
609
|
+
OPTIONAL { ?company wdt:P571 ?inception. }
|
|
610
|
+
OPTIONAL { ?company wdt:P576 ?dissolution. }
|
|
535
611
|
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
|
536
612
|
}
|
|
537
613
|
LIMIT %d
|
|
@@ -540,11 +616,13 @@ OFFSET %d
|
|
|
540
616
|
|
|
541
617
|
# Property-based query: entities with revenue (P2139) - companies
|
|
542
618
|
HAS_REVENUE_QUERY = """
|
|
543
|
-
SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel WHERE {
|
|
619
|
+
SELECT ?company ?companyLabel ?lei ?ticker ?country ?countryLabel ?inception ?dissolution WHERE {
|
|
544
620
|
?company wdt:P2139 ?revenue.
|
|
545
621
|
OPTIONAL { ?company wdt:P1278 ?lei. }
|
|
546
622
|
OPTIONAL { ?company wdt:P249 ?ticker. }
|
|
547
623
|
OPTIONAL { ?company wdt:P17 ?country. }
|
|
624
|
+
OPTIONAL { ?company wdt:P571 ?inception. }
|
|
625
|
+
OPTIONAL { ?company wdt:P576 ?dissolution. }
|
|
548
626
|
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
|
|
549
627
|
}
|
|
550
628
|
LIMIT %d
|
|
@@ -877,6 +955,27 @@ class WikidataImporter:
|
|
|
877
955
|
|
|
878
956
|
logger.info(f"Completed all query types: {total_count} total records")
|
|
879
957
|
|
|
958
|
+
@staticmethod
|
|
959
|
+
def _parse_wikidata_date(date_str: Optional[str]) -> Optional[str]:
|
|
960
|
+
"""
|
|
961
|
+
Parse a Wikidata date string into ISO format (YYYY-MM-DD).
|
|
962
|
+
|
|
963
|
+
Wikidata returns dates like "2020-01-15T00:00:00Z" or just "2020".
|
|
964
|
+
Returns None if the date cannot be parsed.
|
|
965
|
+
"""
|
|
966
|
+
if not date_str:
|
|
967
|
+
return None
|
|
968
|
+
# Handle ISO datetime format (e.g., "2020-01-15T00:00:00Z")
|
|
969
|
+
if "T" in date_str:
|
|
970
|
+
return date_str.split("T")[0]
|
|
971
|
+
# Handle year-only format (e.g., "2020")
|
|
972
|
+
if len(date_str) == 4 and date_str.isdigit():
|
|
973
|
+
return f"{date_str}-01-01"
|
|
974
|
+
# Return as-is if it looks like a date
|
|
975
|
+
if len(date_str) >= 4:
|
|
976
|
+
return date_str[:10] # Take first 10 chars (YYYY-MM-DD)
|
|
977
|
+
return None
|
|
978
|
+
|
|
880
979
|
def _execute_sparql(self, query: str) -> dict[str, Any]:
|
|
881
980
|
"""Execute a SPARQL query against Wikidata."""
|
|
882
981
|
params = urllib.parse.urlencode({
|
|
@@ -924,10 +1023,15 @@ class WikidataImporter:
|
|
|
924
1023
|
ticker = binding.get("ticker", {}).get("value")
|
|
925
1024
|
exchange_label = binding.get("exchangeLabel", {}).get("value")
|
|
926
1025
|
country_label = binding.get("countryLabel", {}).get("value")
|
|
927
|
-
|
|
1026
|
+
inception_raw = binding.get("inception", {}).get("value")
|
|
1027
|
+
dissolution_raw = binding.get("dissolution", {}).get("value")
|
|
1028
|
+
|
|
1029
|
+
# Parse dates (Wikidata returns ISO datetime, extract date part)
|
|
1030
|
+
from_date = WikidataImporter._parse_wikidata_date(inception_raw)
|
|
1031
|
+
to_date = WikidataImporter._parse_wikidata_date(dissolution_raw)
|
|
928
1032
|
|
|
929
1033
|
# Build record data
|
|
930
|
-
record_data = {
|
|
1034
|
+
record_data: dict[str, Any] = {
|
|
931
1035
|
"wikidata_id": wikidata_id,
|
|
932
1036
|
"label": label,
|
|
933
1037
|
}
|
|
@@ -939,8 +1043,10 @@ class WikidataImporter:
|
|
|
939
1043
|
record_data["exchange"] = exchange_label
|
|
940
1044
|
if country_label:
|
|
941
1045
|
record_data["country"] = country_label
|
|
942
|
-
if
|
|
943
|
-
record_data["inception"] =
|
|
1046
|
+
if from_date:
|
|
1047
|
+
record_data["inception"] = from_date
|
|
1048
|
+
if to_date:
|
|
1049
|
+
record_data["dissolution"] = to_date
|
|
944
1050
|
|
|
945
1051
|
return CompanyRecord(
|
|
946
1052
|
name=label.strip(),
|
|
@@ -948,6 +1054,8 @@ class WikidataImporter:
|
|
|
948
1054
|
source_id=wikidata_id,
|
|
949
1055
|
region=country_label or "",
|
|
950
1056
|
entity_type=entity_type,
|
|
1057
|
+
from_date=from_date,
|
|
1058
|
+
to_date=to_date,
|
|
951
1059
|
record=record_data,
|
|
952
1060
|
)
|
|
953
1061
|
|