PyPI - corp-extractor - Versions diffs - 0.9.0__py3-none-any.whl → 0.9.4__py3-none-any.whl - Mend

corp-extractor 0.9.0py3-none-any.whl → 0.9.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

{corp_extractor-0.9.0.dist-info → corp_extractor-0.9.4.dist-info}/METADATA +72 -11
{corp_extractor-0.9.0.dist-info → corp_extractor-0.9.4.dist-info}/RECORD +34 -27
statement_extractor/cli.py +1317 -101
statement_extractor/database/embeddings.py +45 -0
statement_extractor/database/hub.py +86 -136
statement_extractor/database/importers/__init__.py +10 -2
statement_extractor/database/importers/companies_house.py +16 -2
statement_extractor/database/importers/companies_house_officers.py +431 -0
statement_extractor/database/importers/gleif.py +23 -0
statement_extractor/database/importers/import_utils.py +264 -0
statement_extractor/database/importers/sec_edgar.py +17 -0
statement_extractor/database/importers/sec_form4.py +512 -0
statement_extractor/database/importers/wikidata.py +151 -43
statement_extractor/database/importers/wikidata_dump.py +2282 -0
statement_extractor/database/importers/wikidata_people.py +867 -325
statement_extractor/database/migrate_v2.py +852 -0
statement_extractor/database/models.py +155 -7
statement_extractor/database/schema_v2.py +409 -0
statement_extractor/database/seed_data.py +359 -0
statement_extractor/database/store.py +3449 -233
statement_extractor/document/deduplicator.py +10 -12
statement_extractor/extractor.py +1 -1
statement_extractor/models/__init__.py +3 -2
statement_extractor/models/statement.py +15 -17
statement_extractor/models.py +1 -1
statement_extractor/pipeline/context.py +5 -5
statement_extractor/pipeline/orchestrator.py +12 -12
statement_extractor/plugins/base.py +17 -17
statement_extractor/plugins/extractors/gliner2.py +28 -28
statement_extractor/plugins/qualifiers/embedding_company.py +7 -5
statement_extractor/plugins/qualifiers/person.py +120 -53
statement_extractor/plugins/splitters/t5_gemma.py +35 -39
{corp_extractor-0.9.0.dist-info → corp_extractor-0.9.4.dist-info}/WHEEL +0 -0
{corp_extractor-0.9.0.dist-info → corp_extractor-0.9.4.dist-info}/entry_points.txt +0 -0

{corp_extractor-0.9.0.dist-info → corp_extractor-0.9.4.dist-info}/METADATA RENAMED Viewed

@@ -1,7 +1,7 @@
 Metadata-Version: 2.4
 Name: corp-extractor
-Version: 0.9.0
-Summary: Extract structured statements from text using T5-Gemma 2 and Diverse Beam Search
+Version: 0.9.4
+Summary: Extract structured entity and relationship information from text
 Project-URL: Homepage, https://github.com/corp-o-rate/statement-extractor
 Project-URL: Documentation, https://github.com/corp-o-rate/statement-extractor#readme
 Project-URL: Repository, https://github.com/corp-o-rate/statement-extractor
@@ -9,7 +9,7 @@ Project-URL: Issues, https://github.com/corp-o-rate/statement-extractor/issues
 Author-email: Corp-o-Rate <neil@corp-o-rate.com>
 Maintainer-email: Corp-o-Rate <neil@corp-o-rate.com>
 License: MIT
-Keywords: diverse-beam-search,embeddings,gemma,information-extraction,knowledge-graph,nlp,statement-extraction,subject-predicate-object,t5,transformers,triples
+Keywords: diverse-beam-search,embeddings,entities,entity-linking,entity-resolution,gemma,information-extraction,knowledge-graph,nlp,semantic-parsing,statement-extraction,subject-predicate-object,t5gemma2,transformers,triples
 Classifier: Development Status :: 4 - Beta
 Classifier: Intended Audience :: Developers
 Classifier: Intended Audience :: Science/Research
@@ -32,6 +32,7 @@ Requires-Dist: httpx>=0.25.0
 Requires-Dist: huggingface-hub>=0.20.0
 Requires-Dist: llama-cpp-python>=0.3.16
 Requires-Dist: numpy>=1.24.0
+Requires-Dist: pycountry>=24.6.1
 Requires-Dist: pydantic>=2.0.0
 Requires-Dist: pymupdf>=1.23.0
 Requires-Dist: sentence-transformers>=2.2.0
@@ -56,7 +57,7 @@ Description-Content-Type: text/markdown
 # Corp Extractor
-Extract structured subject-predicate-object statements from unstructured text using the T5-Gemma 2 model.
+Analyze complex text to extract relationship information about people and organizations. Runs entirely on your hardware (RTX 4090+, Apple M1 16GB+) with no external API dependencies. Uses fine-tuned T5-Gemma 2 for statement splitting and coreference resolution, plus GLiNER2 for entity extraction. Includes a database of 10M+ organizations and 40M+ people with quantized embeddings for fast entity qualification.
 [![PyPI version](https://img.shields.io/pypi/v/corp-extractor.svg)](https://pypi.org/project/corp-extractor/)
 [![Python 3.10+](https://img.shields.io/pypi/pyversions/corp-extractor.svg)](https://pypi.org/project/corp-extractor/)
@@ -64,7 +65,9 @@ Extract structured subject-predicate-object statements from unstructured text us
 ## Features
-- **Person Database** *(v0.9.0)*: Qualify notable people (executives, politicians, athletes, etc.) against Wikidata with canonical IDs
+- **Database v2 Schema** *(v0.9.4)*: Normalized schema with INTEGER FK references, new roles/locations tables, int8 scalar embeddings (75% smaller)
+- **Person Database** *(v0.9.2)*: Qualify notable people (executives, politicians, athletes, etc.) against Wikidata with canonical IDs
+- **Organization Canonicalization** *(v0.9.2)*: Link equivalent records across sources (LEI, ticker, CIK, name matching)
 - **5-Stage Pipeline** *(v0.8.0)*: Modular plugin-based architecture for full entity resolution
 - **Document Processing** *(v0.7.0)*: Process documents, URLs, and PDFs with chunking and deduplication
 - **Entity Embedding Database** *(v0.6.0)*: Fast entity qualification using vector similarity (~100K+ SEC, ~3M GLEIF, ~5M UK organizations)
@@ -221,10 +224,10 @@ Pipeline Options:
   -o, --output [table|json|yaml|triples]  Output format
 ```
-## New in v0.2.0: Quality Scoring & Beam Merging
+## Quality Scoring & Beam Merging
-By default, the library now:
-- **Scores each triple** for groundedness based on whether entities appear in source text
+By default, the library:
+- **Scores each triple** using semantic similarity (50%) + GLiNER2 entity recognition (50%)
 - **Merges top beams** instead of selecting one, improving coverage
 - **Uses embeddings** to detect semantically similar predicates ("bought" ≈ "acquired")
@@ -386,6 +389,43 @@ config = PipelineConfig.from_stage_string("1-3")  # Stages 1, 2, 3
 Taxonomy classifiers return **multiple labels** per statement above the confidence threshold.
+## Entity Database
+The library includes an **entity embedding database** for fast entity qualification using vector similarity search. It stores records from authoritative sources (GLEIF, SEC, Companies House, Wikidata) with 768-dimensional embeddings for semantic matching.
+**Quick start:**
+```bash
+corp-extractor db download              # Download pre-built database
+corp-extractor db search "Microsoft"    # Search organizations
+corp-extractor db search-people "Tim Cook"  # Search people
+corp-extractor db search-roles "CEO"    # Search roles (v0.9.4)
+corp-extractor db search-locations "California"  # Search locations (v0.9.4)
+```
+For comprehensive documentation including schema, CLI reference, Python API, and build instructions, see **[ENTITY_DATABASE.md](./ENTITY_DATABASE.md)**.
+## New in v0.9.4: Database v2 Schema
+v0.9.4 introduces a **normalized v2 schema** with significant improvements:
+- **INTEGER FK references** replace TEXT enum columns for better query performance
+- **New enum lookup tables**: `source_types`, `people_types`, `organization_types`, `location_types`
+- **New tables**: `roles` (job titles with Wikidata QID), `locations` (countries/states/cities with hierarchy)
+- **Scalar (int8) embeddings**: 75% storage reduction with ~92% recall at top-100
+- **QID as integers**: Wikidata QIDs stored as integers (Q prefix stripped)
+- **Human-readable views**: `organizations_view`, `people_view`, `roles_view`, `locations_view`
+**Migration:**
+```bash
+# Migrate existing v1 database to v2
+corp-extractor db migrate-v2 entities.db entities-v2.db
+# Generate int8 scalar embeddings
+corp-extractor db backfill-scalar
+```
+**Default database path**: `~/.cache/corp-extractor/entities-v2.db`
 ## New in v0.6.0: Entity Embedding Database
 v0.6.0 introduces an **entity embedding database** for fast entity qualification using vector similarity search.
@@ -405,7 +445,17 @@ v0.6.0 introduces an **entity embedding database** for fast entity qualification
 | Source | Records | Identifier | PersonType Classification |
 |--------|---------|------------|--------------------------|
-| Wikidata | Variable | Wikidata QID | executive, politician, athlete, artist, academic, scientist, journalist, entrepreneur, activist |
+| Wikidata (SPARQL) | Variable | Wikidata QID | executive, politician, athlete, artist, academic, scientist, journalist, entrepreneur, activist |
+| Wikidata (Dump) | All humans with enwiki | Wikidata QID | Classified from positions (P39) and occupations (P106) |
+**Date Fields**: All importers now include `from_date` and `to_date` where available:
+- **GLEIF**: LEI registration date
+- **SEC Edgar**: First SEC filing date
+- **Companies House**: Incorporation and dissolution dates
+- **Wikidata Orgs**: Inception (P571) and dissolution (P576) dates
+- **Wikidata People**: Position start (P580) and end (P582) dates
+**Note**: The same person can have multiple records with different role/org combinations (unique on `source_id + role + org`). Organizations discovered during people import are automatically inserted into the organizations table with `known_for_org_id` foreign key linking people to their organizations.
 ### EntityType Classification
@@ -430,6 +480,17 @@ corp-extractor db import-wikidata --limit 50000
 # Import notable people (v0.9.0)
 corp-extractor db import-people --type executive --limit 5000
 corp-extractor db import-people --all --limit 10000  # All person types
+corp-extractor db import-people --type executive --skip-existing  # Skip existing records
+corp-extractor db import-people --type executive --enrich-dates   # Fetch role start/end dates
+# Import from Wikidata dump (v0.9.1) - avoids SPARQL timeouts
+corp-extractor db import-wikidata-dump --download --limit 50000   # Downloads ~100GB dump
+corp-extractor db import-wikidata-dump --dump /path/to/dump.bz2 --people --no-orgs  # Local dump
+corp-extractor db import-wikidata-dump --dump dump.bz2 --locations --no-people --no-orgs  # Locations only (v0.9.4)
+# Migrate to v2 schema (v0.9.4)
+corp-extractor db migrate-v2 entities.db entities-v2.db
+corp-extractor db backfill-scalar        # Generate int8 embeddings (75% smaller)
 # Check status
 corp-extractor db status
@@ -474,7 +535,7 @@ corp-extractor db create-lite entities.db    # Create lite version
 corp-extractor db compress entities.db       # Compress with gzip
 ```
-See [COMPANY_DB.md](../COMPANY_DB.md) for complete build and publish instructions.
+See [ENTITY_DATABASE.md](./ENTITY_DATABASE.md) for complete build and publish instructions.
 ## New in v0.7.0: Document Processing
@@ -742,7 +803,7 @@ for text in texts:
 This library uses the T5-Gemma 2 statement extraction model with **Diverse Beam Search** ([Vijayakumar et al., 2016](https://arxiv.org/abs/1610.02424)):
 1. **Diverse Beam Search**: Generates 4+ candidate outputs using beam groups with diversity penalty
-2. **Quality Scoring**: Each triple scored for groundedness in source text
+2. **Quality Scoring**: Each triple scored via semantic similarity + GLiNER2 entity recognition
 3. **Beam Merging**: Top beams combined for better coverage
 4. **Embedding Dedup**: Semantic similarity removes near-duplicate predicates
 5. **Predicate Normalization**: Optional taxonomy matching via embeddings

{corp_extractor-0.9.0.dist-info → corp_extractor-0.9.4.dist-info}/RECORD RENAMED Viewed

@@ -1,51 +1,58 @@
 statement_extractor/__init__.py,sha256=vOJFsK6wNOoBvGYOvIKsseaqpFR8vNg_XPH-r8SmLas,3215
 statement_extractor/canonicalization.py,sha256=ZMLs6RLWJa_rOJ8XZ7PoHFU13-zeJkOMDnvK-ZaFa5s,5991
-statement_extractor/cli.py,sha256=BTFLIBZoNa2ADrYVslbXiZGrzhRWmi7ppbnAPV3xUyg,71191
-statement_extractor/extractor.py,sha256=CGJCmAMiIoDsPtjIdvOHYBcz8058eYpfLMngjELMJhI,38403
+statement_extractor/cli.py,sha256=2c3K5wUWL03xRndkvNI1rzFGkcYXJYzTxX4wVIP1O3I,125325
+statement_extractor/extractor.py,sha256=m10na6I2iU1GwokQTxodePttYgigHykoss5LWrE8JOQ,38418
 statement_extractor/gliner_extraction.py,sha256=OL4w-0_rZc6XCojaVsbGY4VdIXRJ6j8ZmeUeTOL0Ue0,8118
 statement_extractor/llm.py,sha256=1eBrYs-bUPqzbpiiif_hH-gE_DeM-l3cmddrIoLHFXo,8010
-statement_extractor/models.py,sha256=fXTT7qxPqynnrrpb77nCgs3K2yn_YgbSugSXv12boX4,12312
+statement_extractor/models.py,sha256=rBotCX2hRTMW4MXXkkWYv4JctP0HQR0NSJSlBcNhsF0,12302
 statement_extractor/predicate_comparer.py,sha256=jcuaBi5BYqD3TKoyj3pR9dxtX5ihfDJvjdhEd2LHCwc,26184
 statement_extractor/scoring.py,sha256=V9WHQ-QCAoycnnaTHydWkFo-48_lcS6Mkztxjfi4wVg,16632
 statement_extractor/data/default_predicates.json,sha256=7rhFXWHvStDj4sLYfBXKS50xGChPaqMKUaKTkMEJRGk,32870
 statement_extractor/data/statement_taxonomy.json,sha256=LI9RWthuJTFCcuaIbh6h3FEu8EJpejiKjAtNM_y1s8A,336543
 statement_extractor/database/__init__.py,sha256=1eScQOm7866v9xndaqCK-xsXDUhKBSj5YGtGoQ80LgU,1548
-statement_extractor/database/embeddings.py,sha256=j_gUTEdRyyQCPcx9imsOh1nVDPjeiRXXG22OZ7KIO4w,5535
-statement_extractor/database/hub.py,sha256=HOnRp62RnkXvk2KgwqOLVpEkXwy0LS0n3tIJrkYCo2c,16842
-statement_extractor/database/models.py,sha256=ke4byqJiiBlZfRhxqoC0nsdDhb6YSG2I4S5W5BRBNY4,8813
+statement_extractor/database/embeddings.py,sha256=VT49amsNyCuhnoGFfYsSii8bPIrnatzvzmQhoq_wlxQ,6965
+statement_extractor/database/hub.py,sha256=3T3yooMI2kpk-SnjSxxglKEVsckC_dGDUEWjnKEJWBk,15128
+statement_extractor/database/migrate_v2.py,sha256=I3zHEMPD5q2dTzLIxrqc6Fxj3y0XHe28UWKOd6CLD3g,29789
+statement_extractor/database/models.py,sha256=GSyZZUPjIWLY9V3l-Fi44dnc9SgD61mhuYZUEZEiDV0,15913
 statement_extractor/database/resolver.py,sha256=_fTITarFmAYOtuRbOos48ou_aqX4yJC0K2csdLbIktI,7202
-statement_extractor/database/store.py,sha256=1qdRZ7q5nTLUYbtUC9cWSLey_GVf5kAQ6dTF9EEwDXY,56735
-statement_extractor/database/importers/__init__.py,sha256=0CPqafekQpqxFmZhe3uMJLNssqxGzEniZlArGyl8QKU,668
-statement_extractor/database/importers/companies_house.py,sha256=G0DZAs_9RM7uTwY7imt70IXUVvhntoO-xXnJ0o6jjGw,19635
-statement_extractor/database/importers/gleif.py,sha256=MTFuksVf83Barn1c6JvBLBouxXbzogWulKb8oqEODAk,18948
-statement_extractor/database/importers/sec_edgar.py,sha256=_B4QcXhZ_5ulXTSVW9dKKAzFIVwn-VIh_X39jcUhqsg,12923
-statement_extractor/database/importers/wikidata.py,sha256=ZZYHiqSlYlco1TSzCLUKqdT-i5X1cUSK1EnsfWWwPAc,33770
-statement_extractor/database/importers/wikidata_people.py,sha256=loqyf5sbtBqCITiTxqV3PLyx3SefmVefhZE0Y-cRoC4,22205
+statement_extractor/database/schema_v2.py,sha256=QUxBp6-X2hM3DRY52vxYN0DRpDG0d1abXJ4uoWPYApA,13330
+statement_extractor/database/seed_data.py,sha256=z_F73_LfZxAoW3fg2Or-oRBjpD-9mn5TSwhkL2D4dWE,10030
+statement_extractor/database/store.py,sha256=fEAm4KWfJ0Z6ZzlVq6jmZKoVy8GlAGa5wgn2vV1jGDk,180742
+statement_extractor/database/importers/__init__.py,sha256=acIoX_BPdXv2DOMFyVbFZPDGNWp2s1FpC774loTqL5I,1121
+statement_extractor/database/importers/companies_house.py,sha256=b5OMFtoHhkPgoGK08ThQn9BtTu9uC_dYzBVpC10xT4U,20252
+statement_extractor/database/importers/companies_house_officers.py,sha256=QDFA0FzqDx9p6VjRrB7o4BE3e30l7i0ML_ktntsB-kA,15565
+statement_extractor/database/importers/gleif.py,sha256=sw4YYROD6wi7IbBEKGCn8kko0nOYbKOyukDJKGQp17Q,20200
+statement_extractor/database/importers/import_utils.py,sha256=2nVsUelN4_mKQ08qfzpeJsxkA9piyANznnmRs50Qt0w,6335
+statement_extractor/database/importers/sec_edgar.py,sha256=0nnhnOrf5d1wR9PGjl8AuNOnp4mfmEtopjkgUY_PLQc,13738
+statement_extractor/database/importers/sec_form4.py,sha256=ZoV-oyNhG5AOUm4u9hemmRI5KnpNs3Gw_dfisjkD3zU,18234
+statement_extractor/database/importers/wikidata.py,sha256=tRj4kEMVIq7sRXxjyxj-scl8eXybkrLVvyNDYV2T5lg,39572
+statement_extractor/database/importers/wikidata_dump.py,sha256=6vTluVuXm5INq5urhnd_es5i4mzE3HM0cEKJIblGTbU,93101
+statement_extractor/database/importers/wikidata_people.py,sha256=vrEFGvMdXUT3Fz_diJxQrR0qch7P-rAElKeBRnssSG0,44964
 statement_extractor/document/__init__.py,sha256=csbUUjxaZay-0WXtjZmULjDfL9VNxhOlePyKTMdRDYo,1714
 statement_extractor/document/chunker.py,sha256=I76p6Qwujk2kkN7GJ1sMwbQNOfEpbt29u-RxJdt15oE,14020
 statement_extractor/document/context.py,sha256=9DvyguwCjlef2MeNWZMgydvD54FPiOppjdvamQnrKzM,5450
-statement_extractor/document/deduplicator.py,sha256=8tPKWAGqNfjteOdnk7B82izyfIpvOebirZ-OIQKixwU,4821
+statement_extractor/document/deduplicator.py,sha256=R_RwEdVeVQBYZHvjkVA0ShAWr8x618VrO9dkYWXvifI,4771
 statement_extractor/document/html_extractor.py,sha256=YRhaIsurBJTeECLkL2YJsSv8gDJJN33fS-ESkGvDBGs,6600
 statement_extractor/document/loader.py,sha256=Ygund7bz4EVcwsFsxkrrgSjOCK4tbb_sqkMlzK_oEKM,8996
 statement_extractor/document/pipeline.py,sha256=h4q-CG_WtBLibkTXCFhfTizMme8bJS5f6ZWOECqhRYU,13675
 statement_extractor/document/summarizer.py,sha256=DOF6qPw0oWEtLSt97oXOFyzb0jGWZZ7frDFp11rL3is,5853
-statement_extractor/models/__init__.py,sha256=9FxKkJ4EIPXmSkMo_j9jqAKH6jTkvz5Tzk2YvQL7UVk,2884
+statement_extractor/models/__init__.py,sha256=OJOK0ral_jskrSxx6nCc3TB6JlVYaC5HI2eYXr9dhMQ,2971
 statement_extractor/models/canonical.py,sha256=LaSU3CUJZOtBM1SpRTAmK-3N7QnYmxZYJvQE1NVIjLY,6003
 statement_extractor/models/document.py,sha256=McCyXz88YtJtlsfiFzagjRAhY32ovpIDKXQI_eV_DZI,9203
 statement_extractor/models/entity.py,sha256=l2ny91BnnWwPo9zx1_Fb8WMKPNuIQFN0H7ILncylmcY,3214
 statement_extractor/models/labels.py,sha256=NUcjFDuGUOM82mgsaWOdoIVbRNiQ6TdN-imNuTograo,7326
 statement_extractor/models/qualifiers.py,sha256=l--khVzt-N6jgibZ-MSSl-3SdQUZJN9dGoxdNhRmM_I,5926
-statement_extractor/models/statement.py,sha256=agC4jcP9ospbZC91J6c0UgLAmfsg1tnqNcSvkqOtqBQ,3629
+statement_extractor/models/statement.py,sha256=Wpp2OtZ5inhqbtEcblWdcES7g7lA-FVjqjz6Jq7hqzo,3329
 statement_extractor/pipeline/__init__.py,sha256=Q3M2Arx9BWH_APZxM-P0G-C3ISguG1whiA5QhxDHQCA,1071
 statement_extractor/pipeline/config.py,sha256=FXtqMMpRmdeuHB86D6YrFx5A36XHVg5GlBBZuPEn4JA,3957
-statement_extractor/pipeline/context.py,sha256=wURDYtzDrmbHu40Af_C_oTtN55wnULKHNZjUx6O8t-0,6126
-statement_extractor/pipeline/orchestrator.py,sha256=1pe6hyEtd495LJrfH3QgxQadNqERmehQEs5LHsAVIxM,16580
+statement_extractor/pipeline/context.py,sha256=evAdyH5oOCNM_ILGZNS1mov3lM4D3mCvr5hzsjaB0Bs,6136
+statement_extractor/pipeline/orchestrator.py,sha256=qH6rD4_wI_kZ_e8NeIv2XYHUA07ldogFewFsZeRQVxw,16687
 statement_extractor/pipeline/registry.py,sha256=yBybhRd1HU2Y75TebLGBzF6nbPiHKZ0cHkyj-3CVnhg,11390
 statement_extractor/plugins/__init__.py,sha256=pIcPeoMFd-56jOM_kGrUWvPuwqN6vFJ-oUbu130-tzI,1345
-statement_extractor/plugins/base.py,sha256=ItqJZ5rH65gW4-pXpraRb45y7F3lXqsKECumhV3tDyk,21516
+statement_extractor/plugins/base.py,sha256=xC661iFtnhIxtZLTwuCc-0rFV1q2V3hCTV-uOaILsOA,21622
 statement_extractor/plugins/extractors/__init__.py,sha256=sqxTI7WwDLVQKwOiQXqWS72gjJnwb76Gs9N3LGetBnI,253
 statement_extractor/plugins/extractors/base.py,sha256=kNRsQ7BL84lXPXREm7CihrprDUaFwDDvMpBcbZlwSGA,179
-statement_extractor/plugins/extractors/gliner2.py,sha256=ObEQMNE6ArjRl2s4x3lkOSPs03cmtTYFlppnbhtkI7A,21876
+statement_extractor/plugins/extractors/gliner2.py,sha256=yDwKJVniMj4YwjR4Rm6MALDk633H5qcKcxa2xOLh9LI,21999
 statement_extractor/plugins/labelers/__init__.py,sha256=flHEoBvnzQ3vAKkIUHyezpYi2H3KJvYGRerCVnc80r0,965
 statement_extractor/plugins/labelers/base.py,sha256=hIgJKq2LU00OcL0Zjy1L9hP8K2onlM_xtZ63XcH8qDE,171
 statement_extractor/plugins/labelers/confidence.py,sha256=XiXjBYe-8ch_SCKnz0sAwTT1mJ_XKMsuzXBbwAW_OK0,4083
@@ -58,19 +65,19 @@ statement_extractor/plugins/pdf/pypdf.py,sha256=JgmWa1-6tiATbPvhONMqRd5kAXJ--tb8
 statement_extractor/plugins/qualifiers/__init__.py,sha256=H4FEZSw1GWBQB-Y79nQnLwhZ3okKQJqgJHGEA0Zp8pA,951
 statement_extractor/plugins/qualifiers/base.py,sha256=Kx--OdIh77mnjSkTl1NvUeekItRiG8AnBUcuznOZeBI,179
 statement_extractor/plugins/qualifiers/companies_house.py,sha256=6TlK6Zebb5wDJ9GGO3FvM9zOh27TWpio5BX9k7lBr7U,5854
-statement_extractor/plugins/qualifiers/embedding_company.py,sha256=EmCxImdXBCA7zxM1stAVeAYlzeNPC_jSlyVN5q1XEJA,14567
+statement_extractor/plugins/qualifiers/embedding_company.py,sha256=nc7oTFjEBuPiprjXKeFRiMYM6tNicMNum_xQ9LSgEOg,14756
 statement_extractor/plugins/qualifiers/gleif.py,sha256=zHzC9eOt0R9Z56n0CXgTF7POJqu6v03SRmiJLmv8OGE,6104
-statement_extractor/plugins/qualifiers/person.py,sha256=GZCUJaQncC_wB4nBQ4RLY5dJ-CdARMLpByc_Nn09wj8,28461
+statement_extractor/plugins/qualifiers/person.py,sha256=SKBCFnIKCJJt77qKyPi_kla7DDZl-n64FcU7txMKs9U,32154
 statement_extractor/plugins/qualifiers/sec_edgar.py,sha256=d7QqGiE-3lFDQiXkYmNQU62K4oP2XYK6NzV6LNKPC5k,6754
 statement_extractor/plugins/scrapers/__init__.py,sha256=mh1nmPtcsewrYeW5oELeke6DSzL8jsGOJ2OcH-A4-eo,208
 statement_extractor/plugins/scrapers/http.py,sha256=igoB1JN7U-FPdBFmNfrdZV-Ho4JQ3RXniLz17SmQx8I,7778
 statement_extractor/plugins/splitters/__init__.py,sha256=05CYeAEO0lZsapK5pjxZJbOCLI1kjeK6IQjftxqqg5g,224
 statement_extractor/plugins/splitters/base.py,sha256=GeIBchFTr8icRSfYR8bGSb4-GoEZ1N0IGN6Kl5W2mL0,175
-statement_extractor/plugins/splitters/t5_gemma.py,sha256=AwYYKQrAmiue5IK9bbJ-Uhfl9oCZTX1X_tmKguKIdjU,9982
+statement_extractor/plugins/splitters/t5_gemma.py,sha256=5qjxeHznuAA9hL8EbUDDGQ3N2gYLmtg0hv9BsLWzfMk,9971
 statement_extractor/plugins/taxonomy/__init__.py,sha256=8N0tW7pm95DSCqM-s99ea0Tigbi9bZMyTkKblR1qmLw,307
 statement_extractor/plugins/taxonomy/embedding.py,sha256=yCuNE8UeY8tH2dHGRKL3hmRQBmdz9_9YQ0t5_VTCf7E,16349
 statement_extractor/plugins/taxonomy/mnli.py,sha256=zPZlpAHQqdnwH7fXS_CSY0HCMnaSrrk-fDQb1ZIqqPc,9163
-corp_extractor-0.9.0.dist-info/METADATA,sha256=9pWemKEWyeEqW92sRd4SqdMykO-92kl5UIrs-P2xAn0,27553
-corp_extractor-0.9.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
-corp_extractor-0.9.0.dist-info/entry_points.txt,sha256=i0iKFqPIusvb-QTQ1zNnFgAqatgVah-jIhahbs5TToQ,115
-corp_extractor-0.9.0.dist-info/RECORD,,
+corp_extractor-0.9.4.dist-info/METADATA,sha256=a5pkoSpziVKqggeeSX_TGfSKk67GtB8ywpl6YzOdX6c,31449
+corp_extractor-0.9.4.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
+corp_extractor-0.9.4.dist-info/entry_points.txt,sha256=i0iKFqPIusvb-QTQ1zNnFgAqatgVah-jIhahbs5TToQ,115
+corp_extractor-0.9.4.dist-info/RECORD,,

corp-extractor 0.9.0__py3-none-any.whl → 0.9.4__py3-none-any.whl

corp-extractor 0.9.0py3-none-any.whl → 0.9.4py3-none-any.whl