corp-extractor 0.2.8__tar.gz → 0.9.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {corp_extractor-0.2.8 → corp_extractor-0.9.4}/.gitignore +1 -0
- corp_extractor-0.9.4/PKG-INFO +834 -0
- corp_extractor-0.9.4/README.md +777 -0
- {corp_extractor-0.2.8 → corp_extractor-0.9.4}/pyproject.toml +36 -10
- {corp_extractor-0.2.8 → corp_extractor-0.9.4}/src/statement_extractor/__init__.py +12 -1
- corp_extractor-0.9.4/src/statement_extractor/cli.py +3127 -0
- corp_extractor-0.9.4/src/statement_extractor/data/default_predicates.json +368 -0
- corp_extractor-0.9.4/src/statement_extractor/data/statement_taxonomy.json +6972 -0
- corp_extractor-0.9.4/src/statement_extractor/database/__init__.py +52 -0
- corp_extractor-0.9.4/src/statement_extractor/database/embeddings.py +231 -0
- corp_extractor-0.9.4/src/statement_extractor/database/hub.py +470 -0
- corp_extractor-0.9.4/src/statement_extractor/database/importers/__init__.py +32 -0
- corp_extractor-0.9.4/src/statement_extractor/database/importers/companies_house.py +559 -0
- corp_extractor-0.9.4/src/statement_extractor/database/importers/companies_house_officers.py +431 -0
- corp_extractor-0.9.4/src/statement_extractor/database/importers/gleif.py +561 -0
- corp_extractor-0.9.4/src/statement_extractor/database/importers/import_utils.py +264 -0
- corp_extractor-0.9.4/src/statement_extractor/database/importers/sec_edgar.py +392 -0
- corp_extractor-0.9.4/src/statement_extractor/database/importers/sec_form4.py +512 -0
- corp_extractor-0.9.4/src/statement_extractor/database/importers/wikidata.py +1120 -0
- corp_extractor-0.9.4/src/statement_extractor/database/importers/wikidata_dump.py +2282 -0
- corp_extractor-0.9.4/src/statement_extractor/database/importers/wikidata_people.py +1174 -0
- corp_extractor-0.9.4/src/statement_extractor/database/migrate_v2.py +852 -0
- corp_extractor-0.9.4/src/statement_extractor/database/models.py +378 -0
- corp_extractor-0.9.4/src/statement_extractor/database/resolver.py +245 -0
- corp_extractor-0.9.4/src/statement_extractor/database/schema_v2.py +409 -0
- corp_extractor-0.9.4/src/statement_extractor/database/seed_data.py +359 -0
- corp_extractor-0.9.4/src/statement_extractor/database/store.py +4825 -0
- corp_extractor-0.9.4/src/statement_extractor/document/__init__.py +62 -0
- corp_extractor-0.9.4/src/statement_extractor/document/chunker.py +410 -0
- corp_extractor-0.9.4/src/statement_extractor/document/context.py +171 -0
- corp_extractor-0.9.4/src/statement_extractor/document/deduplicator.py +171 -0
- corp_extractor-0.9.4/src/statement_extractor/document/html_extractor.py +246 -0
- corp_extractor-0.9.4/src/statement_extractor/document/loader.py +303 -0
- corp_extractor-0.9.4/src/statement_extractor/document/pipeline.py +388 -0
- corp_extractor-0.9.4/src/statement_extractor/document/summarizer.py +195 -0
- {corp_extractor-0.2.8 → corp_extractor-0.9.4}/src/statement_extractor/extractor.py +356 -25
- corp_extractor-0.9.4/src/statement_extractor/gliner_extraction.py +218 -0
- corp_extractor-0.9.4/src/statement_extractor/llm.py +255 -0
- corp_extractor-0.9.4/src/statement_extractor/models/__init__.py +90 -0
- corp_extractor-0.9.4/src/statement_extractor/models/canonical.py +182 -0
- corp_extractor-0.9.4/src/statement_extractor/models/document.py +308 -0
- corp_extractor-0.9.4/src/statement_extractor/models/entity.py +102 -0
- corp_extractor-0.9.4/src/statement_extractor/models/labels.py +220 -0
- corp_extractor-0.9.4/src/statement_extractor/models/qualifiers.py +139 -0
- corp_extractor-0.9.4/src/statement_extractor/models/statement.py +99 -0
- {corp_extractor-0.2.8 → corp_extractor-0.9.4}/src/statement_extractor/models.py +43 -2
- corp_extractor-0.9.4/src/statement_extractor/pipeline/__init__.py +39 -0
- corp_extractor-0.9.4/src/statement_extractor/pipeline/config.py +129 -0
- corp_extractor-0.9.4/src/statement_extractor/pipeline/context.py +177 -0
- corp_extractor-0.9.4/src/statement_extractor/pipeline/orchestrator.py +416 -0
- corp_extractor-0.9.4/src/statement_extractor/pipeline/registry.py +303 -0
- corp_extractor-0.9.4/src/statement_extractor/plugins/__init__.py +55 -0
- corp_extractor-0.9.4/src/statement_extractor/plugins/base.py +716 -0
- corp_extractor-0.9.4/src/statement_extractor/plugins/extractors/__init__.py +13 -0
- corp_extractor-0.9.4/src/statement_extractor/plugins/extractors/base.py +9 -0
- corp_extractor-0.9.4/src/statement_extractor/plugins/extractors/gliner2.py +546 -0
- corp_extractor-0.9.4/src/statement_extractor/plugins/labelers/__init__.py +29 -0
- corp_extractor-0.9.4/src/statement_extractor/plugins/labelers/base.py +9 -0
- corp_extractor-0.9.4/src/statement_extractor/plugins/labelers/confidence.py +138 -0
- corp_extractor-0.9.4/src/statement_extractor/plugins/labelers/relation_type.py +87 -0
- corp_extractor-0.9.4/src/statement_extractor/plugins/labelers/sentiment.py +159 -0
- corp_extractor-0.9.4/src/statement_extractor/plugins/labelers/taxonomy.py +386 -0
- corp_extractor-0.9.4/src/statement_extractor/plugins/labelers/taxonomy_embedding.py +477 -0
- corp_extractor-0.9.4/src/statement_extractor/plugins/pdf/__init__.py +10 -0
- corp_extractor-0.9.4/src/statement_extractor/plugins/pdf/pypdf.py +291 -0
- corp_extractor-0.9.4/src/statement_extractor/plugins/qualifiers/__init__.py +30 -0
- corp_extractor-0.9.4/src/statement_extractor/plugins/qualifiers/base.py +9 -0
- corp_extractor-0.9.4/src/statement_extractor/plugins/qualifiers/companies_house.py +185 -0
- corp_extractor-0.9.4/src/statement_extractor/plugins/qualifiers/embedding_company.py +422 -0
- corp_extractor-0.9.4/src/statement_extractor/plugins/qualifiers/gleif.py +197 -0
- corp_extractor-0.9.4/src/statement_extractor/plugins/qualifiers/person.py +852 -0
- corp_extractor-0.9.4/src/statement_extractor/plugins/qualifiers/sec_edgar.py +209 -0
- corp_extractor-0.9.4/src/statement_extractor/plugins/scrapers/__init__.py +10 -0
- corp_extractor-0.9.4/src/statement_extractor/plugins/scrapers/http.py +236 -0
- corp_extractor-0.9.4/src/statement_extractor/plugins/splitters/__init__.py +13 -0
- corp_extractor-0.9.4/src/statement_extractor/plugins/splitters/base.py +9 -0
- corp_extractor-0.9.4/src/statement_extractor/plugins/splitters/t5_gemma.py +289 -0
- corp_extractor-0.9.4/src/statement_extractor/plugins/taxonomy/__init__.py +13 -0
- corp_extractor-0.9.4/src/statement_extractor/plugins/taxonomy/embedding.py +484 -0
- corp_extractor-0.9.4/src/statement_extractor/plugins/taxonomy/mnli.py +291 -0
- {corp_extractor-0.2.8 → corp_extractor-0.9.4}/src/statement_extractor/predicate_comparer.py +17 -0
- {corp_extractor-0.2.8 → corp_extractor-0.9.4}/src/statement_extractor/scoring.py +130 -90
- corp_extractor-0.2.8/PKG-INFO +0 -377
- corp_extractor-0.2.8/README.md +0 -336
- corp_extractor-0.2.8/src/statement_extractor/cli.py +0 -215
- {corp_extractor-0.2.8 → corp_extractor-0.9.4}/src/statement_extractor/canonicalization.py +0 -0