corp-extractor 0.4.0__py3-none-any.whl → 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. {corp_extractor-0.4.0.dist-info → corp_extractor-0.9.0.dist-info}/METADATA +348 -64
  2. corp_extractor-0.9.0.dist-info/RECORD +76 -0
  3. statement_extractor/__init__.py +10 -1
  4. statement_extractor/cli.py +1663 -17
  5. statement_extractor/data/default_predicates.json +368 -0
  6. statement_extractor/data/statement_taxonomy.json +6972 -0
  7. statement_extractor/database/__init__.py +52 -0
  8. statement_extractor/database/embeddings.py +186 -0
  9. statement_extractor/database/hub.py +520 -0
  10. statement_extractor/database/importers/__init__.py +24 -0
  11. statement_extractor/database/importers/companies_house.py +545 -0
  12. statement_extractor/database/importers/gleif.py +538 -0
  13. statement_extractor/database/importers/sec_edgar.py +375 -0
  14. statement_extractor/database/importers/wikidata.py +1012 -0
  15. statement_extractor/database/importers/wikidata_people.py +632 -0
  16. statement_extractor/database/models.py +230 -0
  17. statement_extractor/database/resolver.py +245 -0
  18. statement_extractor/database/store.py +1609 -0
  19. statement_extractor/document/__init__.py +62 -0
  20. statement_extractor/document/chunker.py +410 -0
  21. statement_extractor/document/context.py +171 -0
  22. statement_extractor/document/deduplicator.py +173 -0
  23. statement_extractor/document/html_extractor.py +246 -0
  24. statement_extractor/document/loader.py +303 -0
  25. statement_extractor/document/pipeline.py +388 -0
  26. statement_extractor/document/summarizer.py +195 -0
  27. statement_extractor/extractor.py +1 -23
  28. statement_extractor/gliner_extraction.py +4 -74
  29. statement_extractor/llm.py +255 -0
  30. statement_extractor/models/__init__.py +89 -0
  31. statement_extractor/models/canonical.py +182 -0
  32. statement_extractor/models/document.py +308 -0
  33. statement_extractor/models/entity.py +102 -0
  34. statement_extractor/models/labels.py +220 -0
  35. statement_extractor/models/qualifiers.py +139 -0
  36. statement_extractor/models/statement.py +101 -0
  37. statement_extractor/models.py +4 -1
  38. statement_extractor/pipeline/__init__.py +39 -0
  39. statement_extractor/pipeline/config.py +129 -0
  40. statement_extractor/pipeline/context.py +177 -0
  41. statement_extractor/pipeline/orchestrator.py +416 -0
  42. statement_extractor/pipeline/registry.py +303 -0
  43. statement_extractor/plugins/__init__.py +55 -0
  44. statement_extractor/plugins/base.py +716 -0
  45. statement_extractor/plugins/extractors/__init__.py +13 -0
  46. statement_extractor/plugins/extractors/base.py +9 -0
  47. statement_extractor/plugins/extractors/gliner2.py +546 -0
  48. statement_extractor/plugins/labelers/__init__.py +29 -0
  49. statement_extractor/plugins/labelers/base.py +9 -0
  50. statement_extractor/plugins/labelers/confidence.py +138 -0
  51. statement_extractor/plugins/labelers/relation_type.py +87 -0
  52. statement_extractor/plugins/labelers/sentiment.py +159 -0
  53. statement_extractor/plugins/labelers/taxonomy.py +386 -0
  54. statement_extractor/plugins/labelers/taxonomy_embedding.py +477 -0
  55. statement_extractor/plugins/pdf/__init__.py +10 -0
  56. statement_extractor/plugins/pdf/pypdf.py +291 -0
  57. statement_extractor/plugins/qualifiers/__init__.py +30 -0
  58. statement_extractor/plugins/qualifiers/base.py +9 -0
  59. statement_extractor/plugins/qualifiers/companies_house.py +185 -0
  60. statement_extractor/plugins/qualifiers/embedding_company.py +420 -0
  61. statement_extractor/plugins/qualifiers/gleif.py +197 -0
  62. statement_extractor/plugins/qualifiers/person.py +785 -0
  63. statement_extractor/plugins/qualifiers/sec_edgar.py +209 -0
  64. statement_extractor/plugins/scrapers/__init__.py +10 -0
  65. statement_extractor/plugins/scrapers/http.py +236 -0
  66. statement_extractor/plugins/splitters/__init__.py +13 -0
  67. statement_extractor/plugins/splitters/base.py +9 -0
  68. statement_extractor/plugins/splitters/t5_gemma.py +293 -0
  69. statement_extractor/plugins/taxonomy/__init__.py +13 -0
  70. statement_extractor/plugins/taxonomy/embedding.py +484 -0
  71. statement_extractor/plugins/taxonomy/mnli.py +291 -0
  72. statement_extractor/scoring.py +8 -8
  73. corp_extractor-0.4.0.dist-info/RECORD +0 -12
  74. {corp_extractor-0.4.0.dist-info → corp_extractor-0.9.0.dist-info}/WHEEL +0 -0
  75. {corp_extractor-0.4.0.dist-info → corp_extractor-0.9.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,520 @@
1
+ """
2
+ HuggingFace Hub integration for entity/organization database distribution.
3
+
4
+ Provides functionality to:
5
+ - Download pre-built entity databases from HuggingFace Hub
6
+ - Upload/publish database updates
7
+ - Version management for database files
8
+ - Create "lite" versions without full records for smaller downloads
9
+ - Optional gzip compression for reduced file sizes
10
+ """
11
+
12
+ import gzip
13
+ import logging
14
+ import os
15
+ import shutil
16
+ import sqlite3
17
+ import tempfile
18
+ from pathlib import Path
19
+ from typing import Optional
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+ # Default HuggingFace repo for entity database
24
+ DEFAULT_REPO_ID = "Corp-o-Rate-Community/entity-references"
25
+ DEFAULT_DB_FILENAME = "entities-lite.db" # Lite is the default (smaller download)
26
+ DEFAULT_DB_FULL_FILENAME = "entities.db"
27
+ DEFAULT_DB_LITE_FILENAME = "entities-lite.db"
28
+ DEFAULT_DB_COMPRESSED_FILENAME = "entities.db.gz"
29
+ DEFAULT_DB_LITE_COMPRESSED_FILENAME = "entities-lite.db.gz"
30
+
31
+ # Local cache directory
32
+ DEFAULT_CACHE_DIR = Path.home() / ".cache" / "corp-extractor"
33
+
34
+
35
+ def get_database_path(
36
+ repo_id: str = DEFAULT_REPO_ID,
37
+ filename: str = DEFAULT_DB_FILENAME,
38
+ auto_download: bool = True,
39
+ full: bool = False,
40
+ ) -> Optional[Path]:
41
+ """
42
+ Get path to entity database, downloading if necessary.
43
+
44
+ Args:
45
+ repo_id: HuggingFace repo ID
46
+ filename: Database filename (overrides full flag if specified)
47
+ auto_download: Whether to download if not cached
48
+ full: If True, get the full database instead of lite
49
+
50
+ Returns:
51
+ Path to database file, or None if not available
52
+ """
53
+ # Override filename if full is requested and using default
54
+ if full and filename == DEFAULT_DB_FILENAME:
55
+ filename = DEFAULT_DB_FULL_FILENAME
56
+ # Check if database exists in cache
57
+ cache_dir = DEFAULT_CACHE_DIR
58
+
59
+ # Check common locations
60
+ possible_paths = [
61
+ cache_dir / filename,
62
+ cache_dir / "entities.db",
63
+ Path.home() / ".cache" / "huggingface" / "hub" / f"datasets--{repo_id.replace('/', '--')}" / filename,
64
+ ]
65
+
66
+ for path in possible_paths:
67
+ if path.exists():
68
+ logger.debug(f"Found cached database at {path}")
69
+ return path
70
+
71
+ # Try to download
72
+ if auto_download:
73
+ try:
74
+ return download_database(repo_id=repo_id, filename=filename)
75
+ except Exception as e:
76
+ logger.warning(f"Failed to download database: {e}")
77
+ return None
78
+
79
+ return None
80
+
81
+
82
+ def upload_database(
83
+ db_path: str | Path,
84
+ repo_id: str = DEFAULT_REPO_ID,
85
+ filename: str = DEFAULT_DB_FILENAME,
86
+ commit_message: str = "Update entity database",
87
+ token: Optional[str] = None,
88
+ ) -> str:
89
+ """
90
+ Upload entity database to HuggingFace Hub.
91
+
92
+ Args:
93
+ db_path: Local path to database file
94
+ repo_id: HuggingFace repo ID
95
+ filename: Target filename in repo
96
+ commit_message: Git commit message
97
+ token: HuggingFace API token (uses HF_TOKEN env var if not provided)
98
+
99
+ Returns:
100
+ URL of the uploaded file
101
+ """
102
+ try:
103
+ from huggingface_hub import HfApi, create_repo
104
+ except ImportError:
105
+ raise ImportError(
106
+ "huggingface_hub is required for database upload. "
107
+ "Install with: pip install huggingface_hub"
108
+ )
109
+
110
+ db_path = Path(db_path)
111
+ if not db_path.exists():
112
+ raise FileNotFoundError(f"Database file not found: {db_path}")
113
+
114
+ token = token or os.environ.get("HF_TOKEN")
115
+ if not token:
116
+ raise ValueError("HuggingFace token required. Set HF_TOKEN env var or pass token argument.")
117
+
118
+ api = HfApi(token=token)
119
+
120
+ # Create repo if it doesn't exist
121
+ try:
122
+ create_repo(
123
+ repo_id=repo_id,
124
+ repo_type="dataset",
125
+ exist_ok=True,
126
+ token=token,
127
+ )
128
+ except Exception as e:
129
+ logger.debug(f"Repo creation note: {e}")
130
+
131
+ # Upload file
132
+ logger.info(f"Uploading database to {repo_id}...")
133
+
134
+ result = api.upload_file(
135
+ path_or_fileobj=str(db_path),
136
+ path_in_repo=filename,
137
+ repo_id=repo_id,
138
+ repo_type="dataset",
139
+ commit_message=commit_message,
140
+ )
141
+
142
+ logger.info(f"Database uploaded successfully")
143
+ return result
144
+
145
+
146
+ def get_latest_version(repo_id: str = DEFAULT_REPO_ID) -> Optional[str]:
147
+ """
148
+ Get the latest version/commit of the database repo.
149
+
150
+ Args:
151
+ repo_id: HuggingFace repo ID
152
+
153
+ Returns:
154
+ Latest commit SHA or None if unavailable
155
+ """
156
+ try:
157
+ from huggingface_hub import HfApi
158
+
159
+ api = HfApi()
160
+ info = api.repo_info(repo_id=repo_id, repo_type="dataset")
161
+ return info.sha
162
+ except Exception as e:
163
+ logger.debug(f"Failed to get repo info: {e}")
164
+ return None
165
+
166
+
167
+ def check_for_updates(
168
+ repo_id: str = DEFAULT_REPO_ID,
169
+ current_version: Optional[str] = None,
170
+ ) -> tuple[bool, Optional[str]]:
171
+ """
172
+ Check if a newer version of the database is available.
173
+
174
+ Args:
175
+ repo_id: HuggingFace repo ID
176
+ current_version: Current cached version (commit SHA)
177
+
178
+ Returns:
179
+ Tuple of (update_available: bool, latest_version: str or None)
180
+ """
181
+ latest = get_latest_version(repo_id)
182
+
183
+ if latest is None:
184
+ return False, None
185
+
186
+ if current_version is None:
187
+ return True, latest
188
+
189
+ return latest != current_version, latest
190
+
191
+
192
+ def create_lite_database(
193
+ source_db_path: str | Path,
194
+ output_path: Optional[str | Path] = None,
195
+ ) -> Path:
196
+ """
197
+ Create a lite version of the database without full records.
198
+
199
+ The lite version strips the `record` column content (sets to empty {}),
200
+ significantly reducing file size while keeping embeddings and core fields.
201
+
202
+ Args:
203
+ source_db_path: Path to the full database
204
+ output_path: Output path for lite database (default: adds -lite suffix)
205
+
206
+ Returns:
207
+ Path to the lite database
208
+ """
209
+ source_db_path = Path(source_db_path)
210
+ if not source_db_path.exists():
211
+ raise FileNotFoundError(f"Source database not found: {source_db_path}")
212
+
213
+ if output_path is None:
214
+ output_path = source_db_path.with_stem(source_db_path.stem + "-lite")
215
+ output_path = Path(output_path)
216
+
217
+ logger.info(f"Creating lite database from {source_db_path}")
218
+ logger.info(f"Output: {output_path}")
219
+
220
+ # Copy the database first
221
+ shutil.copy2(source_db_path, output_path)
222
+
223
+ # Connect and strip record contents
224
+ # Use isolation_level=None for autocommit (required for VACUUM)
225
+ conn = sqlite3.connect(str(output_path), isolation_level=None)
226
+ try:
227
+ # Update all records to have empty record JSON
228
+ conn.execute("BEGIN")
229
+ cursor = conn.execute("UPDATE organizations SET record = '{}'")
230
+ updated = cursor.rowcount
231
+ logger.info(f"Stripped {updated} record fields")
232
+ conn.execute("COMMIT")
233
+
234
+ # Vacuum to reclaim space (must be outside transaction)
235
+ conn.execute("VACUUM")
236
+ finally:
237
+ conn.close()
238
+
239
+ # Log size reduction
240
+ original_size = source_db_path.stat().st_size
241
+ lite_size = output_path.stat().st_size
242
+ reduction = (1 - lite_size / original_size) * 100
243
+
244
+ logger.info(f"Original size: {original_size / (1024*1024):.1f}MB")
245
+ logger.info(f"Lite size: {lite_size / (1024*1024):.1f}MB")
246
+ logger.info(f"Size reduction: {reduction:.1f}%")
247
+
248
+ return output_path
249
+
250
+
251
+ def compress_database(
252
+ db_path: str | Path,
253
+ output_path: Optional[str | Path] = None,
254
+ ) -> Path:
255
+ """
256
+ Compress a database file using gzip.
257
+
258
+ Args:
259
+ db_path: Path to the database file
260
+ output_path: Output path for compressed file (default: adds .gz suffix)
261
+
262
+ Returns:
263
+ Path to the compressed file
264
+ """
265
+ db_path = Path(db_path)
266
+ if not db_path.exists():
267
+ raise FileNotFoundError(f"Database not found: {db_path}")
268
+
269
+ if output_path is None:
270
+ output_path = db_path.with_suffix(db_path.suffix + ".gz")
271
+ output_path = Path(output_path)
272
+
273
+ logger.info(f"Compressing {db_path} to {output_path}")
274
+
275
+ with open(db_path, "rb") as f_in:
276
+ with gzip.open(output_path, "wb", compresslevel=9) as f_out:
277
+ shutil.copyfileobj(f_in, f_out)
278
+
279
+ # Log compression results
280
+ original_size = db_path.stat().st_size
281
+ compressed_size = output_path.stat().st_size
282
+ ratio = (1 - compressed_size / original_size) * 100
283
+
284
+ logger.info(f"Original: {original_size / (1024*1024):.1f}MB")
285
+ logger.info(f"Compressed: {compressed_size / (1024*1024):.1f}MB")
286
+ logger.info(f"Compression ratio: {ratio:.1f}%")
287
+
288
+ return output_path
289
+
290
+
291
+ def decompress_database(
292
+ compressed_path: str | Path,
293
+ output_path: Optional[str | Path] = None,
294
+ ) -> Path:
295
+ """
296
+ Decompress a gzipped database file.
297
+
298
+ Args:
299
+ compressed_path: Path to the .gz file
300
+ output_path: Output path (default: removes .gz suffix)
301
+
302
+ Returns:
303
+ Path to the decompressed file
304
+ """
305
+ compressed_path = Path(compressed_path)
306
+ if not compressed_path.exists():
307
+ raise FileNotFoundError(f"Compressed file not found: {compressed_path}")
308
+
309
+ if output_path is None:
310
+ if compressed_path.suffix == ".gz":
311
+ output_path = compressed_path.with_suffix("")
312
+ else:
313
+ output_path = compressed_path.with_stem(compressed_path.stem + "-decompressed")
314
+ output_path = Path(output_path)
315
+
316
+ logger.info(f"Decompressing {compressed_path} to {output_path}")
317
+
318
+ with gzip.open(compressed_path, "rb") as f_in:
319
+ with open(output_path, "wb") as f_out:
320
+ shutil.copyfileobj(f_in, f_out)
321
+
322
+ logger.info(f"Decompressed to {output_path}")
323
+ return output_path
324
+
325
+
326
+ def upload_database_with_variants(
327
+ db_path: str | Path,
328
+ repo_id: str = DEFAULT_REPO_ID,
329
+ commit_message: str = "Update entity database",
330
+ token: Optional[str] = None,
331
+ include_lite: bool = True,
332
+ include_compressed: bool = True,
333
+ include_readme: bool = True,
334
+ ) -> dict[str, str]:
335
+ """
336
+ Upload entity database with optional lite and compressed variants.
337
+
338
+ Creates and uploads:
339
+ - entities.db (full database)
340
+ - entities-lite.db (without record data, smaller)
341
+ - entities.db.gz (compressed full database)
342
+ - entities-lite.db.gz (compressed lite database)
343
+ - README.md (dataset card from HUGGINGFACE_README.md)
344
+
345
+ Args:
346
+ db_path: Local path to full database file
347
+ repo_id: HuggingFace repo ID
348
+ commit_message: Git commit message
349
+ token: HuggingFace API token
350
+ include_lite: Whether to create and upload lite version
351
+ include_compressed: Whether to create and upload compressed versions
352
+ include_readme: Whether to upload the README.md dataset card
353
+
354
+ Returns:
355
+ Dict mapping filename to upload URL
356
+ """
357
+ try:
358
+ from huggingface_hub import HfApi, create_repo
359
+ except ImportError:
360
+ raise ImportError(
361
+ "huggingface_hub is required for database upload. "
362
+ "Install with: pip install huggingface_hub"
363
+ )
364
+
365
+ db_path = Path(db_path)
366
+ if not db_path.exists():
367
+ raise FileNotFoundError(f"Database file not found: {db_path}")
368
+
369
+ token = token or os.environ.get("HF_TOKEN")
370
+ if not token:
371
+ raise ValueError("HuggingFace token required. Set HF_TOKEN env var or pass token argument.")
372
+
373
+ api = HfApi(token=token)
374
+
375
+ # Create repo if it doesn't exist
376
+ try:
377
+ create_repo(
378
+ repo_id=repo_id,
379
+ repo_type="dataset",
380
+ exist_ok=True,
381
+ token=token,
382
+ )
383
+ except Exception as e:
384
+ logger.debug(f"Repo creation note: {e}")
385
+
386
+ results = {}
387
+
388
+ # Create temp directory for variants
389
+ with tempfile.TemporaryDirectory() as temp_dir:
390
+ temp_path = Path(temp_dir)
391
+ files_to_upload = []
392
+
393
+ # Full database
394
+ files_to_upload.append((db_path, DEFAULT_DB_FULL_FILENAME))
395
+
396
+ # Lite version
397
+ if include_lite:
398
+ lite_path = temp_path / DEFAULT_DB_LITE_FILENAME
399
+ create_lite_database(db_path, lite_path)
400
+ files_to_upload.append((lite_path, DEFAULT_DB_LITE_FILENAME))
401
+
402
+ # Compressed versions
403
+ if include_compressed:
404
+ # Compress full database
405
+ compressed_path = temp_path / DEFAULT_DB_COMPRESSED_FILENAME
406
+ compress_database(db_path, compressed_path)
407
+ files_to_upload.append((compressed_path, DEFAULT_DB_COMPRESSED_FILENAME))
408
+
409
+ # Compress lite database
410
+ if include_lite:
411
+ lite_compressed_path = temp_path / DEFAULT_DB_LITE_COMPRESSED_FILENAME
412
+ lite_path = temp_path / DEFAULT_DB_LITE_FILENAME
413
+ compress_database(lite_path, lite_compressed_path)
414
+ files_to_upload.append((lite_compressed_path, DEFAULT_DB_LITE_COMPRESSED_FILENAME))
415
+
416
+ # Copy all files to a staging directory for upload_folder
417
+ staging_dir = temp_path / "staging"
418
+ staging_dir.mkdir()
419
+
420
+ for local_path, remote_filename in files_to_upload:
421
+ shutil.copy2(local_path, staging_dir / remote_filename)
422
+ logger.info(f"Staged {remote_filename}")
423
+
424
+ # Add README.md from HUGGINGFACE_README.md
425
+ if include_readme:
426
+ # Look for HUGGINGFACE_README.md in the package directory
427
+ package_dir = Path(__file__).parent.parent.parent.parent # Go up to statement-extractor-lib
428
+ readme_source = package_dir / "HUGGINGFACE_README.md"
429
+ if readme_source.exists():
430
+ shutil.copy2(readme_source, staging_dir / "README.md")
431
+ files_to_upload.append((readme_source, "README.md"))
432
+ logger.info("Staged README.md from HUGGINGFACE_README.md")
433
+ else:
434
+ logger.warning(f"HUGGINGFACE_README.md not found at {readme_source}")
435
+
436
+ # Upload all files in a single commit to avoid LFS pointer issues
437
+ logger.info(f"Uploading {len(files_to_upload)} files to {repo_id}...")
438
+ api.upload_folder(
439
+ folder_path=str(staging_dir),
440
+ repo_id=repo_id,
441
+ repo_type="dataset",
442
+ commit_message=commit_message,
443
+ )
444
+
445
+ for _, remote_filename in files_to_upload:
446
+ results[remote_filename] = f"https://huggingface.co/datasets/{repo_id}/blob/main/{remote_filename}"
447
+ logger.info(f"Uploaded {remote_filename}")
448
+
449
+ return results
450
+
451
+
452
+ def download_database(
453
+ repo_id: str = DEFAULT_REPO_ID,
454
+ filename: str = DEFAULT_DB_FILENAME,
455
+ revision: Optional[str] = None,
456
+ cache_dir: Optional[Path] = None,
457
+ force_download: bool = False,
458
+ prefer_compressed: bool = True,
459
+ ) -> Path:
460
+ """
461
+ Download entity database from HuggingFace Hub.
462
+
463
+ Args:
464
+ repo_id: HuggingFace repo ID (e.g., "Corp-o-Rate-Community/entity-references")
465
+ filename: Database filename in the repo
466
+ revision: Git revision (branch, tag, commit) or None for latest
467
+ cache_dir: Local cache directory
468
+ force_download: Force re-download even if cached
469
+ prefer_compressed: Try to download compressed version first
470
+
471
+ Returns:
472
+ Path to the downloaded database file (decompressed if was .gz)
473
+ """
474
+ try:
475
+ from huggingface_hub import hf_hub_download
476
+ except ImportError:
477
+ raise ImportError(
478
+ "huggingface_hub is required for database download. "
479
+ "Install with: pip install huggingface_hub"
480
+ )
481
+
482
+ cache_dir = cache_dir or DEFAULT_CACHE_DIR
483
+ cache_dir.mkdir(parents=True, exist_ok=True)
484
+
485
+ # Try compressed version first if preferred
486
+ download_filename = filename
487
+ if prefer_compressed and not filename.endswith(".gz"):
488
+ compressed_filename = filename + ".gz"
489
+ try:
490
+ logger.info(f"Trying compressed version: {compressed_filename}")
491
+ local_path = hf_hub_download(
492
+ repo_id=repo_id,
493
+ filename=compressed_filename,
494
+ revision=revision,
495
+ cache_dir=str(cache_dir),
496
+ force_download=force_download,
497
+ repo_type="dataset",
498
+ )
499
+ # Decompress to final location
500
+ final_path = cache_dir / filename
501
+ decompress_database(local_path, final_path)
502
+ logger.info(f"Database downloaded and decompressed to {final_path}")
503
+ return final_path
504
+ except Exception as e:
505
+ logger.debug(f"Compressed version not available: {e}")
506
+
507
+ # Download uncompressed version
508
+ logger.info(f"Downloading entity database from {repo_id}...")
509
+
510
+ local_path = hf_hub_download(
511
+ repo_id=repo_id,
512
+ filename=download_filename,
513
+ revision=revision,
514
+ cache_dir=str(cache_dir),
515
+ force_download=force_download,
516
+ repo_type="dataset",
517
+ )
518
+
519
+ logger.info(f"Database downloaded to {local_path}")
520
+ return Path(local_path)
@@ -0,0 +1,24 @@
1
+ """
2
+ Data importers for the entity database.
3
+
4
+ Provides importers for various data sources:
5
+ - GLEIF: Legal Entity Identifier data
6
+ - SEC Edgar: US SEC company data
7
+ - Companies House: UK company data
8
+ - Wikidata: Wikipedia/Wikidata organization data
9
+ - Wikidata People: Notable people from Wikipedia/Wikidata
10
+ """
11
+
12
+ from .gleif import GleifImporter
13
+ from .sec_edgar import SecEdgarImporter
14
+ from .companies_house import CompaniesHouseImporter
15
+ from .wikidata import WikidataImporter
16
+ from .wikidata_people import WikidataPeopleImporter
17
+
18
+ __all__ = [
19
+ "GleifImporter",
20
+ "SecEdgarImporter",
21
+ "CompaniesHouseImporter",
22
+ "WikidataImporter",
23
+ "WikidataPeopleImporter",
24
+ ]