corp-extractor 0.5.0__py3-none-any.whl → 0.9.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. {corp_extractor-0.5.0.dist-info → corp_extractor-0.9.3.dist-info}/METADATA +228 -30
  2. corp_extractor-0.9.3.dist-info/RECORD +79 -0
  3. statement_extractor/__init__.py +1 -1
  4. statement_extractor/cli.py +2030 -24
  5. statement_extractor/data/statement_taxonomy.json +6949 -1159
  6. statement_extractor/database/__init__.py +52 -0
  7. statement_extractor/database/embeddings.py +186 -0
  8. statement_extractor/database/hub.py +428 -0
  9. statement_extractor/database/importers/__init__.py +32 -0
  10. statement_extractor/database/importers/companies_house.py +559 -0
  11. statement_extractor/database/importers/companies_house_officers.py +431 -0
  12. statement_extractor/database/importers/gleif.py +561 -0
  13. statement_extractor/database/importers/sec_edgar.py +392 -0
  14. statement_extractor/database/importers/sec_form4.py +512 -0
  15. statement_extractor/database/importers/wikidata.py +1120 -0
  16. statement_extractor/database/importers/wikidata_dump.py +1951 -0
  17. statement_extractor/database/importers/wikidata_people.py +1130 -0
  18. statement_extractor/database/models.py +254 -0
  19. statement_extractor/database/resolver.py +245 -0
  20. statement_extractor/database/store.py +3034 -0
  21. statement_extractor/document/__init__.py +62 -0
  22. statement_extractor/document/chunker.py +410 -0
  23. statement_extractor/document/context.py +171 -0
  24. statement_extractor/document/deduplicator.py +171 -0
  25. statement_extractor/document/html_extractor.py +246 -0
  26. statement_extractor/document/loader.py +303 -0
  27. statement_extractor/document/pipeline.py +388 -0
  28. statement_extractor/document/summarizer.py +195 -0
  29. statement_extractor/extractor.py +1 -1
  30. statement_extractor/models/__init__.py +19 -3
  31. statement_extractor/models/canonical.py +44 -1
  32. statement_extractor/models/document.py +308 -0
  33. statement_extractor/models/labels.py +47 -18
  34. statement_extractor/models/qualifiers.py +51 -3
  35. statement_extractor/models/statement.py +39 -15
  36. statement_extractor/models.py +1 -1
  37. statement_extractor/pipeline/config.py +6 -11
  38. statement_extractor/pipeline/context.py +5 -5
  39. statement_extractor/pipeline/orchestrator.py +90 -121
  40. statement_extractor/pipeline/registry.py +52 -46
  41. statement_extractor/plugins/__init__.py +20 -8
  42. statement_extractor/plugins/base.py +348 -78
  43. statement_extractor/plugins/extractors/gliner2.py +38 -28
  44. statement_extractor/plugins/labelers/taxonomy.py +18 -5
  45. statement_extractor/plugins/labelers/taxonomy_embedding.py +17 -6
  46. statement_extractor/plugins/pdf/__init__.py +10 -0
  47. statement_extractor/plugins/pdf/pypdf.py +291 -0
  48. statement_extractor/plugins/qualifiers/__init__.py +11 -0
  49. statement_extractor/plugins/qualifiers/companies_house.py +14 -3
  50. statement_extractor/plugins/qualifiers/embedding_company.py +422 -0
  51. statement_extractor/plugins/qualifiers/gleif.py +14 -3
  52. statement_extractor/plugins/qualifiers/person.py +588 -14
  53. statement_extractor/plugins/qualifiers/sec_edgar.py +14 -3
  54. statement_extractor/plugins/scrapers/__init__.py +10 -0
  55. statement_extractor/plugins/scrapers/http.py +236 -0
  56. statement_extractor/plugins/splitters/t5_gemma.py +176 -75
  57. statement_extractor/plugins/taxonomy/embedding.py +193 -46
  58. statement_extractor/plugins/taxonomy/mnli.py +16 -4
  59. statement_extractor/scoring.py +8 -8
  60. corp_extractor-0.5.0.dist-info/RECORD +0 -55
  61. statement_extractor/plugins/canonicalizers/__init__.py +0 -17
  62. statement_extractor/plugins/canonicalizers/base.py +0 -9
  63. statement_extractor/plugins/canonicalizers/location.py +0 -219
  64. statement_extractor/plugins/canonicalizers/organization.py +0 -230
  65. statement_extractor/plugins/canonicalizers/person.py +0 -242
  66. {corp_extractor-0.5.0.dist-info → corp_extractor-0.9.3.dist-info}/WHEEL +0 -0
  67. {corp_extractor-0.5.0.dist-info → corp_extractor-0.9.3.dist-info}/entry_points.txt +0 -0
@@ -1,6 +1,9 @@
1
1
  """
2
2
  CompaniesHouseQualifierPlugin - Qualifies UK ORG entities.
3
3
 
4
+ DEPRECATED: Use EmbeddingCompanyQualifier instead, which uses a local
5
+ embedding database with pre-loaded Companies House data for faster, offline matching.
6
+
4
7
  Uses the UK Companies House API to:
5
8
  - Look up company number by name
6
9
  - Retrieve company details, jurisdiction, officers
@@ -8,11 +11,11 @@ Uses the UK Companies House API to:
8
11
 
9
12
  import logging
10
13
  import os
14
+ import warnings
11
15
  from typing import Optional
12
16
 
13
17
  from ..base import BaseQualifierPlugin, PluginCapability
14
18
  from ...pipeline.context import PipelineContext
15
- from ...pipeline.registry import PluginRegistry
16
19
  from ...models import ExtractedEntity, EntityQualifiers, EntityType
17
20
 
18
21
  logger = logging.getLogger(__name__)
@@ -21,11 +24,12 @@ logger = logging.getLogger(__name__)
21
24
  CH_API_BASE = "https://api.company-information.service.gov.uk"
22
25
 
23
26
 
24
- @PluginRegistry.qualifier
27
+ # DEPRECATED: Not auto-registered. Use EmbeddingCompanyQualifier instead.
25
28
  class CompaniesHouseQualifierPlugin(BaseQualifierPlugin):
26
29
  """
27
- Qualifier plugin for UK ORG entities using Companies House API.
30
+ DEPRECATED: Use EmbeddingCompanyQualifier instead.
28
31
 
32
+ Qualifier plugin for UK ORG entities using Companies House API.
29
33
  Requires COMPANIES_HOUSE_API_KEY environment variable.
30
34
  """
31
35
 
@@ -38,11 +42,18 @@ class CompaniesHouseQualifierPlugin(BaseQualifierPlugin):
38
42
  """
39
43
  Initialize the Companies House qualifier.
40
44
 
45
+ DEPRECATED: Use EmbeddingCompanyQualifier instead.
46
+
41
47
  Args:
42
48
  api_key: Companies House API key (or use COMPANIES_HOUSE_API_KEY env var)
43
49
  timeout: API request timeout in seconds
44
50
  cache_results: Whether to cache API results
45
51
  """
52
+ warnings.warn(
53
+ "CompaniesHouseQualifierPlugin is deprecated. Use EmbeddingCompanyQualifier instead.",
54
+ DeprecationWarning,
55
+ stacklevel=2,
56
+ )
46
57
  self._api_key = api_key or os.environ.get("COMPANIES_HOUSE_API_KEY")
47
58
  self._timeout = timeout
48
59
  self._cache_results = cache_results
@@ -0,0 +1,422 @@
1
+ """
2
+ EmbeddingCompanyQualifier - Qualifies ORG entities using embedding similarity.
3
+
4
+ Uses a local embedding database to:
5
+ 1. Find similar company names by embedding
6
+ 2. Use LLM to confirm the best match
7
+ 3. Return CanonicalEntity with FQN and qualifiers
8
+ """
9
+
10
+ import logging
11
+ from typing import Optional
12
+
13
+ from ..base import BaseQualifierPlugin, PluginCapability
14
+ from ...pipeline.context import PipelineContext
15
+ from ...pipeline.registry import PluginRegistry
16
+ from ...models import (
17
+ ExtractedEntity,
18
+ EntityQualifiers,
19
+ EntityType,
20
+ QualifiedEntity,
21
+ CanonicalEntity,
22
+ CanonicalMatch,
23
+ )
24
+
25
+ logger = logging.getLogger(__name__)
26
+
27
+
28
+ # LLM prompt template for company matching confirmation
29
+ COMPANY_MATCH_PROMPT = """You are matching a company name extracted from text to a database of known companies.
30
+
31
+ Extracted name: "{query_name}"
32
+ {context_line}
33
+ Candidate matches (sorted by similarity):
34
+ {candidates}
35
+
36
+ Task: Select the BEST match, or respond "NONE" if no candidate is a good match.
37
+
38
+ Rules:
39
+ - The match should refer to the same legal entity
40
+ - Minor spelling differences or abbreviations are OK (e.g., "Apple" matches "Apple Inc.")
41
+ - Different companies with similar names should NOT match
42
+ - Consider the REGION when matching - prefer companies from regions mentioned in or relevant to the context
43
+ - If the extracted name is too generic or ambiguous, respond "NONE"
44
+
45
+ Respond with ONLY the number of the best match (1, 2, 3, etc.) or "NONE".
46
+ """
47
+
48
+
49
+ @PluginRegistry.qualifier
50
+ class EmbeddingCompanyQualifier(BaseQualifierPlugin):
51
+ """
52
+ Qualifier plugin for ORG entities using embedding similarity.
53
+
54
+ Uses a pre-built embedding database to find and confirm company matches.
55
+ This runs before API-based qualifiers (GLEIF, Companies House, SEC Edgar)
56
+ and provides faster, offline matching when the database is available.
57
+ """
58
+
59
+ def __init__(
60
+ self,
61
+ db_path: Optional[str] = None,
62
+ top_k: int = 20,
63
+ min_similarity: float = 0.3,
64
+ use_llm_confirmation: bool = True,
65
+ auto_download_db: bool = True,
66
+ ):
67
+ """
68
+ Initialize the embedding company qualifier.
69
+
70
+ Args:
71
+ db_path: Path to company database (auto-detects if None)
72
+ top_k: Number of candidates to retrieve
73
+ min_similarity: Minimum similarity threshold
74
+ use_llm_confirmation: Whether to use LLM for match confirmation
75
+ auto_download_db: Whether to auto-download database from HuggingFace
76
+ """
77
+ self._db_path = db_path
78
+ self._top_k = top_k
79
+ self._min_similarity = min_similarity
80
+ self._use_llm_confirmation = use_llm_confirmation
81
+ self._auto_download_db = auto_download_db
82
+
83
+ # Lazy-loaded components
84
+ self._database = None
85
+ self._embedder = None
86
+ self._llm = None
87
+ self._cache: dict[str, Optional[CanonicalEntity]] = {}
88
+
89
+ @property
90
+ def name(self) -> str:
91
+ return "embedding_company_qualifier"
92
+
93
+ @property
94
+ def priority(self) -> int:
95
+ return 5 # Runs before API-based qualifiers (GLEIF=10, CH=20, SEC=30)
96
+
97
+ @property
98
+ def capabilities(self) -> PluginCapability:
99
+ caps = PluginCapability.CACHING | PluginCapability.BATCH_PROCESSING
100
+ if self._use_llm_confirmation:
101
+ caps |= PluginCapability.LLM_REQUIRED
102
+ return caps
103
+
104
+ @property
105
+ def description(self) -> str:
106
+ return "Qualifies ORG entities using embedding similarity search with optional LLM confirmation"
107
+
108
+ @property
109
+ def supported_entity_types(self) -> set[EntityType]:
110
+ return {EntityType.ORG}
111
+
112
+ @property
113
+ def supported_identifier_types(self) -> list[str]:
114
+ return ["lei", "sec_cik", "ch_number"]
115
+
116
+ @property
117
+ def provided_identifier_types(self) -> list[str]:
118
+ return ["lei", "sec_cik", "ch_number", "canonical_id"]
119
+
120
+ def _get_database(self):
121
+ """Get or initialize the company database."""
122
+ if self._database is not None:
123
+ return self._database
124
+
125
+ from ...database.store import get_database
126
+ from ...database.hub import get_database_path
127
+
128
+ # Find database path
129
+ db_path = self._db_path
130
+ if db_path is None:
131
+ db_path = get_database_path(auto_download=self._auto_download_db)
132
+
133
+ if db_path is None:
134
+ logger.warning("Company database not available. Skipping embedding qualification.")
135
+ return None
136
+
137
+ # Use singleton to ensure index is only loaded once
138
+ self._database = get_database(db_path=db_path)
139
+ logger.info(f"Loaded company database from {db_path}")
140
+ return self._database
141
+
142
+ def _get_embedder(self):
143
+ """Get or initialize the embedder."""
144
+ if self._embedder is not None:
145
+ return self._embedder
146
+
147
+ from ...database import CompanyEmbedder
148
+ self._embedder = CompanyEmbedder()
149
+ return self._embedder
150
+
151
+ def _get_llm(self):
152
+ """Get or initialize the LLM for confirmation."""
153
+ if self._llm is not None:
154
+ return self._llm
155
+
156
+ if not self._use_llm_confirmation:
157
+ return None
158
+
159
+ try:
160
+ from ...llm import get_llm
161
+ self._llm = get_llm()
162
+ return self._llm
163
+ except Exception as e:
164
+ logger.warning(f"LLM not available for confirmation: {e}")
165
+ return None
166
+
167
+ def qualify(
168
+ self,
169
+ entity: ExtractedEntity,
170
+ context: PipelineContext,
171
+ ) -> Optional[CanonicalEntity]:
172
+ """
173
+ Qualify an ORG entity using embedding similarity.
174
+
175
+ Args:
176
+ entity: The ORG entity to qualify
177
+ context: Pipeline context
178
+
179
+ Returns:
180
+ CanonicalEntity with qualifiers, FQN, and canonical match, or None if no match
181
+ """
182
+ if entity.type != EntityType.ORG:
183
+ return None
184
+
185
+ # Check cache
186
+ cache_key = entity.text.lower().strip()
187
+ if cache_key in self._cache:
188
+ return self._cache[cache_key]
189
+
190
+ # Get database
191
+ database = self._get_database()
192
+ if database is None:
193
+ return None
194
+
195
+ # Get embedder
196
+ embedder = self._get_embedder()
197
+
198
+ # Embed query name
199
+ logger.debug(f" Embedding query: '{entity.text}'")
200
+ query_embedding = embedder.embed(entity.text)
201
+
202
+ # Search for similar companies using hybrid text + vector search
203
+ logger.debug(f" Searching database for similar companies...")
204
+ results = database.search(
205
+ query_embedding,
206
+ top_k=self._top_k,
207
+ query_text=entity.text, # Enable text-based pre-filtering
208
+ )
209
+
210
+ # Filter by minimum similarity
211
+ results = [(r, s) for r, s in results if s >= self._min_similarity]
212
+
213
+ if not results:
214
+ logger.debug(f" No matches found above threshold {self._min_similarity}")
215
+ self._cache[cache_key] = None
216
+ return None
217
+
218
+ # Log all candidates (scores are prominence-adjusted)
219
+ logger.info(f" Found {len(results)} candidates for '{entity.text}' (prominence-adjusted):")
220
+ for i, (record, score) in enumerate(results[:10], 1):
221
+ region_str = f" [{record.region}]" if record.region else ""
222
+ ticker = record.record.get("ticker", "")
223
+ ticker_str = f" ticker={ticker}" if ticker else ""
224
+ logger.info(f" {i}. {record.name}{region_str} (score={score:.3f}, source={record.source}{ticker_str})")
225
+
226
+ # Get best match (optionally with LLM confirmation)
227
+ logger.info(f" Selecting best match (LLM={self._use_llm_confirmation})...")
228
+ best_match = self._select_best_match(entity.text, results, context)
229
+
230
+ if best_match is None:
231
+ logger.info(f" No confident match for '{entity.text}'")
232
+ self._cache[cache_key] = None
233
+ return None
234
+
235
+ record, similarity = best_match
236
+ logger.info(f" Matched: '{record.name}' (source={record.source}, similarity={similarity:.3f})")
237
+
238
+ # Build CanonicalEntity from matched record
239
+ canonical = self._build_canonical_entity(entity, record, similarity)
240
+
241
+ self._cache[cache_key] = canonical
242
+ return canonical
243
+
244
+ def _select_best_match(
245
+ self,
246
+ query_name: str,
247
+ candidates: list[tuple],
248
+ context: "PipelineContext",
249
+ ) -> Optional[tuple]:
250
+ """
251
+ Select the best match from candidates.
252
+
253
+ Uses LLM if available and configured, otherwise returns top match.
254
+ """
255
+ if not candidates:
256
+ return None
257
+
258
+ # If only one strong match, use it directly
259
+ if len(candidates) == 1 and candidates[0][1] >= 0.9:
260
+ logger.info(f" Single strong match: '{candidates[0][0].name}' (sim={candidates[0][1]:.3f})")
261
+ return candidates[0]
262
+
263
+ # Try LLM confirmation
264
+ llm = self._get_llm()
265
+ if llm is not None:
266
+ try:
267
+ return self._llm_select_match(query_name, candidates, context)
268
+ except Exception as e:
269
+ logger.warning(f" LLM confirmation failed: {e}")
270
+
271
+ # Fallback: use top match if similarity is high enough
272
+ top_record, top_similarity = candidates[0]
273
+ if top_similarity >= 0.85:
274
+ logger.info(f" No LLM, using top match: '{top_record.name}' (sim={top_similarity:.3f})")
275
+ return candidates[0]
276
+
277
+ logger.info(f" No confident match for '{query_name}' (top sim={top_similarity:.3f} < 0.85)")
278
+ return None
279
+
280
+ def _llm_select_match(
281
+ self,
282
+ query_name: str,
283
+ candidates: list[tuple],
284
+ context: "PipelineContext",
285
+ ) -> Optional[tuple]:
286
+ """Use LLM to select the best match."""
287
+ # Format candidates for prompt with region info
288
+ candidate_lines = []
289
+ for i, (record, similarity) in enumerate(candidates[:10], 1): # Limit to top 10
290
+ region_str = f", region: {record.region}" if record.region else ""
291
+ candidate_lines.append(
292
+ f"{i}. {record.name} (source: {record.source}{region_str}, similarity: {similarity:.3f})"
293
+ )
294
+
295
+ # Build context line from source text if available
296
+ context_line = ""
297
+ if context.source_text:
298
+ # Truncate source text for prompt
299
+ source_preview = context.source_text[:500] + "..." if len(context.source_text) > 500 else context.source_text
300
+ context_line = f"Source text context: \"{source_preview}\"\n"
301
+
302
+ prompt = COMPANY_MATCH_PROMPT.format(
303
+ query_name=query_name,
304
+ context_line=context_line,
305
+ candidates="\n".join(candidate_lines),
306
+ )
307
+
308
+ # Get LLM response
309
+ response = self._llm.generate(prompt, max_tokens=10, stop=["\n"])
310
+ response = response.strip()
311
+
312
+ logger.info(f" LLM response for '{query_name}': {response}")
313
+
314
+ # Parse response
315
+ if response.upper() == "NONE":
316
+ logger.info(f" LLM chose: NONE (no match)")
317
+ return None
318
+
319
+ try:
320
+ idx = int(response) - 1
321
+ if 0 <= idx < len(candidates):
322
+ chosen = candidates[idx]
323
+ logger.info(f" LLM chose: #{idx + 1} '{chosen[0].name}' (sim={chosen[1]:.3f})")
324
+ return chosen
325
+ except ValueError:
326
+ logger.warning(f" LLM response '{response}' could not be parsed as number")
327
+
328
+ # Fallback to top match if LLM response is unclear
329
+ if candidates[0][1] >= 0.8:
330
+ logger.info(f" Fallback to top match: '{candidates[0][0].name}' (sim={candidates[0][1]:.3f})")
331
+ return candidates[0]
332
+
333
+ logger.info(f" No confident match (top sim={candidates[0][1]:.3f} < 0.8)")
334
+ return None
335
+
336
+ def _build_canonical_entity(
337
+ self,
338
+ entity: ExtractedEntity,
339
+ record,
340
+ similarity: float,
341
+ ) -> CanonicalEntity:
342
+ """Build CanonicalEntity from a matched company record."""
343
+ # Map source names to identifier prefixes
344
+ source = record.source
345
+ source_id = record.source_id
346
+ source_prefix_map = {
347
+ "gleif": "LEI",
348
+ "sec_edgar": "SEC-CIK",
349
+ "companies_house": "UK-CH",
350
+ "wikidata": "WIKIDATA",
351
+ }
352
+ source_prefix = source_prefix_map.get(source, source.upper())
353
+
354
+ # Build identifiers dict
355
+ identifiers = {
356
+ "source": source_prefix,
357
+ "source_id": source_id,
358
+ "canonical_id": f"{source_prefix}:{source_id}",
359
+ }
360
+
361
+ # Add source-specific identifiers for compatibility
362
+ if source == "gleif":
363
+ identifiers["lei"] = source_id
364
+ elif source == "sec_edgar":
365
+ identifiers["sec_cik"] = source_id
366
+ if record.record.get("ticker"):
367
+ identifiers["ticker"] = record.record["ticker"]
368
+ elif source == "companies_house":
369
+ identifiers["ch_number"] = source_id
370
+
371
+ # Extract location info from record
372
+ record_data = record.record
373
+ jurisdiction = record_data.get("jurisdiction")
374
+ country = record_data.get("country")
375
+ city = record_data.get("city")
376
+ region = record.region # From CompanyRecord
377
+
378
+ # Build qualifiers
379
+ qualifiers = EntityQualifiers(
380
+ legal_name=record.name,
381
+ region=region,
382
+ jurisdiction=jurisdiction,
383
+ country=country,
384
+ city=city,
385
+ identifiers=identifiers,
386
+ )
387
+
388
+ # Create QualifiedEntity
389
+ qualified = QualifiedEntity(
390
+ entity_ref=entity.entity_ref,
391
+ original_text=entity.text,
392
+ entity_type=entity.type,
393
+ qualifiers=qualifiers,
394
+ qualification_sources=[self.name],
395
+ )
396
+
397
+ # Build FQN: "LEGAL_NAME (SOURCE,REGION)"
398
+ fqn_parts = [source_prefix]
399
+ if region:
400
+ fqn_parts.append(region)
401
+ fqn = f"{record.name} ({','.join(fqn_parts)})"
402
+
403
+ # Create canonical match (clamp confidence to [0, 1] for float precision)
404
+ clamped_confidence = min(max(similarity, 0.0), 1.0)
405
+ canonical_match = CanonicalMatch(
406
+ canonical_id=f"{source_prefix}:{source_id}",
407
+ canonical_name=record.name,
408
+ match_method="embedding",
409
+ match_confidence=clamped_confidence,
410
+ match_details={"source": source, "similarity": similarity},
411
+ )
412
+
413
+ return CanonicalEntity(
414
+ entity_ref=entity.entity_ref,
415
+ qualified_entity=qualified,
416
+ canonical_match=canonical_match,
417
+ fqn=fqn,
418
+ )
419
+
420
+
421
+ # For testing without decorator
422
+ EmbeddingCompanyQualifierClass = EmbeddingCompanyQualifier
@@ -1,18 +1,21 @@
1
1
  """
2
2
  GLEIFQualifierPlugin - Qualifies ORG entities with LEI and related data.
3
3
 
4
+ DEPRECATED: Use EmbeddingCompanyQualifier instead, which uses a local
5
+ embedding database with pre-loaded GLEIF data for faster, offline matching.
6
+
4
7
  Uses the GLEIF (Global Legal Entity Identifier Foundation) API to:
5
8
  - Look up LEI by organization name
6
9
  - Retrieve legal name, jurisdiction, parent company info
7
10
  """
8
11
 
9
12
  import logging
13
+ import warnings
10
14
  from typing import Optional
11
15
  from urllib.parse import quote
12
16
 
13
17
  from ..base import BaseQualifierPlugin, PluginCapability
14
18
  from ...pipeline.context import PipelineContext
15
- from ...pipeline.registry import PluginRegistry
16
19
  from ...models import ExtractedEntity, EntityQualifiers, EntityType
17
20
 
18
21
  logger = logging.getLogger(__name__)
@@ -21,11 +24,12 @@ logger = logging.getLogger(__name__)
21
24
  GLEIF_API_BASE = "https://api.gleif.org/api/v1"
22
25
 
23
26
 
24
- @PluginRegistry.qualifier
27
+ # DEPRECATED: Not auto-registered. Use EmbeddingCompanyQualifier instead.
25
28
  class GLEIFQualifierPlugin(BaseQualifierPlugin):
26
29
  """
27
- Qualifier plugin for ORG entities using GLEIF API.
30
+ DEPRECATED: Use EmbeddingCompanyQualifier instead.
28
31
 
32
+ Qualifier plugin for ORG entities using GLEIF API.
29
33
  Looks up Legal Entity Identifiers (LEI) and related corporate data.
30
34
  """
31
35
 
@@ -37,10 +41,17 @@ class GLEIFQualifierPlugin(BaseQualifierPlugin):
37
41
  """
38
42
  Initialize the GLEIF qualifier.
39
43
 
44
+ DEPRECATED: Use EmbeddingCompanyQualifier instead.
45
+
40
46
  Args:
41
47
  timeout: API request timeout in seconds
42
48
  cache_results: Whether to cache API results
43
49
  """
50
+ warnings.warn(
51
+ "GLEIFQualifierPlugin is deprecated. Use EmbeddingCompanyQualifier instead.",
52
+ DeprecationWarning,
53
+ stacklevel=2,
54
+ )
44
55
  self._timeout = timeout
45
56
  self._cache_results = cache_results
46
57
  self._cache: dict[str, Optional[dict]] = {}