corp-extractor 0.9.0__py3-none-any.whl → 0.9.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {corp_extractor-0.9.0.dist-info → corp_extractor-0.9.3.dist-info}/METADATA +40 -9
- {corp_extractor-0.9.0.dist-info → corp_extractor-0.9.3.dist-info}/RECORD +29 -26
- statement_extractor/cli.py +866 -77
- statement_extractor/database/hub.py +35 -127
- statement_extractor/database/importers/__init__.py +10 -2
- statement_extractor/database/importers/companies_house.py +16 -2
- statement_extractor/database/importers/companies_house_officers.py +431 -0
- statement_extractor/database/importers/gleif.py +23 -0
- statement_extractor/database/importers/sec_edgar.py +17 -0
- statement_extractor/database/importers/sec_form4.py +512 -0
- statement_extractor/database/importers/wikidata.py +151 -43
- statement_extractor/database/importers/wikidata_dump.py +1951 -0
- statement_extractor/database/importers/wikidata_people.py +823 -325
- statement_extractor/database/models.py +30 -6
- statement_extractor/database/store.py +1485 -60
- statement_extractor/document/deduplicator.py +10 -12
- statement_extractor/extractor.py +1 -1
- statement_extractor/models/__init__.py +3 -2
- statement_extractor/models/statement.py +15 -17
- statement_extractor/models.py +1 -1
- statement_extractor/pipeline/context.py +5 -5
- statement_extractor/pipeline/orchestrator.py +12 -12
- statement_extractor/plugins/base.py +17 -17
- statement_extractor/plugins/extractors/gliner2.py +28 -28
- statement_extractor/plugins/qualifiers/embedding_company.py +7 -5
- statement_extractor/plugins/qualifiers/person.py +11 -1
- statement_extractor/plugins/splitters/t5_gemma.py +35 -39
- {corp_extractor-0.9.0.dist-info → corp_extractor-0.9.3.dist-info}/WHEEL +0 -0
- {corp_extractor-0.9.0.dist-info → corp_extractor-0.9.3.dist-info}/entry_points.txt +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: corp-extractor
|
|
3
|
-
Version: 0.9.
|
|
4
|
-
Summary: Extract structured
|
|
3
|
+
Version: 0.9.3
|
|
4
|
+
Summary: Extract structured entity and relationship information from text
|
|
5
5
|
Project-URL: Homepage, https://github.com/corp-o-rate/statement-extractor
|
|
6
6
|
Project-URL: Documentation, https://github.com/corp-o-rate/statement-extractor#readme
|
|
7
7
|
Project-URL: Repository, https://github.com/corp-o-rate/statement-extractor
|
|
@@ -9,7 +9,7 @@ Project-URL: Issues, https://github.com/corp-o-rate/statement-extractor/issues
|
|
|
9
9
|
Author-email: Corp-o-Rate <neil@corp-o-rate.com>
|
|
10
10
|
Maintainer-email: Corp-o-Rate <neil@corp-o-rate.com>
|
|
11
11
|
License: MIT
|
|
12
|
-
Keywords: diverse-beam-search,embeddings,gemma,information-extraction,knowledge-graph,nlp,statement-extraction,subject-predicate-object,
|
|
12
|
+
Keywords: diverse-beam-search,embeddings,entities,entity-linking,entity-resolution,gemma,information-extraction,knowledge-graph,nlp,semantic-parsing,statement-extraction,subject-predicate-object,t5gemma2,transformers,triples
|
|
13
13
|
Classifier: Development Status :: 4 - Beta
|
|
14
14
|
Classifier: Intended Audience :: Developers
|
|
15
15
|
Classifier: Intended Audience :: Science/Research
|
|
@@ -32,6 +32,7 @@ Requires-Dist: httpx>=0.25.0
|
|
|
32
32
|
Requires-Dist: huggingface-hub>=0.20.0
|
|
33
33
|
Requires-Dist: llama-cpp-python>=0.3.16
|
|
34
34
|
Requires-Dist: numpy>=1.24.0
|
|
35
|
+
Requires-Dist: pycountry>=24.6.1
|
|
35
36
|
Requires-Dist: pydantic>=2.0.0
|
|
36
37
|
Requires-Dist: pymupdf>=1.23.0
|
|
37
38
|
Requires-Dist: sentence-transformers>=2.2.0
|
|
@@ -64,7 +65,8 @@ Extract structured subject-predicate-object statements from unstructured text us
|
|
|
64
65
|
|
|
65
66
|
## Features
|
|
66
67
|
|
|
67
|
-
- **Person Database** *(v0.9.
|
|
68
|
+
- **Person Database** *(v0.9.2)*: Qualify notable people (executives, politicians, athletes, etc.) against Wikidata with canonical IDs
|
|
69
|
+
- **Organization Canonicalization** *(v0.9.2)*: Link equivalent records across sources (LEI, ticker, CIK, name matching)
|
|
68
70
|
- **5-Stage Pipeline** *(v0.8.0)*: Modular plugin-based architecture for full entity resolution
|
|
69
71
|
- **Document Processing** *(v0.7.0)*: Process documents, URLs, and PDFs with chunking and deduplication
|
|
70
72
|
- **Entity Embedding Database** *(v0.6.0)*: Fast entity qualification using vector similarity (~100K+ SEC, ~3M GLEIF, ~5M UK organizations)
|
|
@@ -221,10 +223,10 @@ Pipeline Options:
|
|
|
221
223
|
-o, --output [table|json|yaml|triples] Output format
|
|
222
224
|
```
|
|
223
225
|
|
|
224
|
-
##
|
|
226
|
+
## Quality Scoring & Beam Merging
|
|
225
227
|
|
|
226
|
-
By default, the library
|
|
227
|
-
- **Scores each triple**
|
|
228
|
+
By default, the library:
|
|
229
|
+
- **Scores each triple** using semantic similarity (50%) + GLiNER2 entity recognition (50%)
|
|
228
230
|
- **Merges top beams** instead of selecting one, improving coverage
|
|
229
231
|
- **Uses embeddings** to detect semantically similar predicates ("bought" ≈ "acquired")
|
|
230
232
|
|
|
@@ -386,6 +388,19 @@ config = PipelineConfig.from_stage_string("1-3") # Stages 1, 2, 3
|
|
|
386
388
|
|
|
387
389
|
Taxonomy classifiers return **multiple labels** per statement above the confidence threshold.
|
|
388
390
|
|
|
391
|
+
## Entity Database
|
|
392
|
+
|
|
393
|
+
The library includes an **entity embedding database** for fast entity qualification using vector similarity search. It stores records from authoritative sources (GLEIF, SEC, Companies House, Wikidata) with 768-dimensional embeddings for semantic matching.
|
|
394
|
+
|
|
395
|
+
**Quick start:**
|
|
396
|
+
```bash
|
|
397
|
+
corp-extractor db download # Download pre-built database
|
|
398
|
+
corp-extractor db search "Microsoft" # Search organizations
|
|
399
|
+
corp-extractor db search-people "Tim Cook" # Search people
|
|
400
|
+
```
|
|
401
|
+
|
|
402
|
+
For comprehensive documentation including schema, CLI reference, Python API, and build instructions, see **[ENTITY_DATABASE.md](./ENTITY_DATABASE.md)**.
|
|
403
|
+
|
|
389
404
|
## New in v0.6.0: Entity Embedding Database
|
|
390
405
|
|
|
391
406
|
v0.6.0 introduces an **entity embedding database** for fast entity qualification using vector similarity search.
|
|
@@ -405,7 +420,17 @@ v0.6.0 introduces an **entity embedding database** for fast entity qualification
|
|
|
405
420
|
|
|
406
421
|
| Source | Records | Identifier | PersonType Classification |
|
|
407
422
|
|--------|---------|------------|--------------------------|
|
|
408
|
-
| Wikidata | Variable | Wikidata QID | executive, politician, athlete, artist, academic, scientist, journalist, entrepreneur, activist |
|
|
423
|
+
| Wikidata (SPARQL) | Variable | Wikidata QID | executive, politician, athlete, artist, academic, scientist, journalist, entrepreneur, activist |
|
|
424
|
+
| Wikidata (Dump) | All humans with enwiki | Wikidata QID | Classified from positions (P39) and occupations (P106) |
|
|
425
|
+
|
|
426
|
+
**Date Fields**: All importers now include `from_date` and `to_date` where available:
|
|
427
|
+
- **GLEIF**: LEI registration date
|
|
428
|
+
- **SEC Edgar**: First SEC filing date
|
|
429
|
+
- **Companies House**: Incorporation and dissolution dates
|
|
430
|
+
- **Wikidata Orgs**: Inception (P571) and dissolution (P576) dates
|
|
431
|
+
- **Wikidata People**: Position start (P580) and end (P582) dates
|
|
432
|
+
|
|
433
|
+
**Note**: The same person can have multiple records with different role/org combinations (unique on `source_id + role + org`). Organizations discovered during people import are automatically inserted into the organizations table with `known_for_org_id` foreign key linking people to their organizations.
|
|
409
434
|
|
|
410
435
|
### EntityType Classification
|
|
411
436
|
|
|
@@ -430,6 +455,12 @@ corp-extractor db import-wikidata --limit 50000
|
|
|
430
455
|
# Import notable people (v0.9.0)
|
|
431
456
|
corp-extractor db import-people --type executive --limit 5000
|
|
432
457
|
corp-extractor db import-people --all --limit 10000 # All person types
|
|
458
|
+
corp-extractor db import-people --type executive --skip-existing # Skip existing records
|
|
459
|
+
corp-extractor db import-people --type executive --enrich-dates # Fetch role start/end dates
|
|
460
|
+
|
|
461
|
+
# Import from Wikidata dump (v0.9.1) - avoids SPARQL timeouts
|
|
462
|
+
corp-extractor db import-wikidata-dump --download --limit 50000 # Downloads ~100GB dump
|
|
463
|
+
corp-extractor db import-wikidata-dump --dump /path/to/dump.bz2 --people --no-orgs # Local dump
|
|
433
464
|
|
|
434
465
|
# Check status
|
|
435
466
|
corp-extractor db status
|
|
@@ -742,7 +773,7 @@ for text in texts:
|
|
|
742
773
|
This library uses the T5-Gemma 2 statement extraction model with **Diverse Beam Search** ([Vijayakumar et al., 2016](https://arxiv.org/abs/1610.02424)):
|
|
743
774
|
|
|
744
775
|
1. **Diverse Beam Search**: Generates 4+ candidate outputs using beam groups with diversity penalty
|
|
745
|
-
2. **Quality Scoring**: Each triple scored
|
|
776
|
+
2. **Quality Scoring**: Each triple scored via semantic similarity + GLiNER2 entity recognition
|
|
746
777
|
3. **Beam Merging**: Top beams combined for better coverage
|
|
747
778
|
4. **Embedding Dedup**: Semantic similarity removes near-duplicate predicates
|
|
748
779
|
5. **Predicate Normalization**: Optional taxonomy matching via embeddings
|
|
@@ -1,51 +1,54 @@
|
|
|
1
1
|
statement_extractor/__init__.py,sha256=vOJFsK6wNOoBvGYOvIKsseaqpFR8vNg_XPH-r8SmLas,3215
|
|
2
2
|
statement_extractor/canonicalization.py,sha256=ZMLs6RLWJa_rOJ8XZ7PoHFU13-zeJkOMDnvK-ZaFa5s,5991
|
|
3
|
-
statement_extractor/cli.py,sha256=
|
|
4
|
-
statement_extractor/extractor.py,sha256=
|
|
3
|
+
statement_extractor/cli.py,sha256=l4YcqKmtks6exMAIHSUw_ukWGZ4x-v_V_Gnm-wOGc3g,106464
|
|
4
|
+
statement_extractor/extractor.py,sha256=m10na6I2iU1GwokQTxodePttYgigHykoss5LWrE8JOQ,38418
|
|
5
5
|
statement_extractor/gliner_extraction.py,sha256=OL4w-0_rZc6XCojaVsbGY4VdIXRJ6j8ZmeUeTOL0Ue0,8118
|
|
6
6
|
statement_extractor/llm.py,sha256=1eBrYs-bUPqzbpiiif_hH-gE_DeM-l3cmddrIoLHFXo,8010
|
|
7
|
-
statement_extractor/models.py,sha256=
|
|
7
|
+
statement_extractor/models.py,sha256=rBotCX2hRTMW4MXXkkWYv4JctP0HQR0NSJSlBcNhsF0,12302
|
|
8
8
|
statement_extractor/predicate_comparer.py,sha256=jcuaBi5BYqD3TKoyj3pR9dxtX5ihfDJvjdhEd2LHCwc,26184
|
|
9
9
|
statement_extractor/scoring.py,sha256=V9WHQ-QCAoycnnaTHydWkFo-48_lcS6Mkztxjfi4wVg,16632
|
|
10
10
|
statement_extractor/data/default_predicates.json,sha256=7rhFXWHvStDj4sLYfBXKS50xGChPaqMKUaKTkMEJRGk,32870
|
|
11
11
|
statement_extractor/data/statement_taxonomy.json,sha256=LI9RWthuJTFCcuaIbh6h3FEu8EJpejiKjAtNM_y1s8A,336543
|
|
12
12
|
statement_extractor/database/__init__.py,sha256=1eScQOm7866v9xndaqCK-xsXDUhKBSj5YGtGoQ80LgU,1548
|
|
13
13
|
statement_extractor/database/embeddings.py,sha256=j_gUTEdRyyQCPcx9imsOh1nVDPjeiRXXG22OZ7KIO4w,5535
|
|
14
|
-
statement_extractor/database/hub.py,sha256=
|
|
15
|
-
statement_extractor/database/models.py,sha256=
|
|
14
|
+
statement_extractor/database/hub.py,sha256=3xCvbCeqC6GR3XgVow7MAXg46ZPDYfDX8it93Xikw5w,13295
|
|
15
|
+
statement_extractor/database/models.py,sha256=4aLs5tp2QTAd9vAyPf80EUoHypd_K0jELcY4J51iaNw,10563
|
|
16
16
|
statement_extractor/database/resolver.py,sha256=_fTITarFmAYOtuRbOos48ou_aqX4yJC0K2csdLbIktI,7202
|
|
17
|
-
statement_extractor/database/store.py,sha256=
|
|
18
|
-
statement_extractor/database/importers/__init__.py,sha256=
|
|
19
|
-
statement_extractor/database/importers/companies_house.py,sha256=
|
|
20
|
-
statement_extractor/database/importers/
|
|
21
|
-
statement_extractor/database/importers/
|
|
22
|
-
statement_extractor/database/importers/
|
|
23
|
-
statement_extractor/database/importers/
|
|
17
|
+
statement_extractor/database/store.py,sha256=FPyfC6KjD6pjfU2jccVEvsAcCtfqMdwVJuS7aqCNrKA,112320
|
|
18
|
+
statement_extractor/database/importers/__init__.py,sha256=acIoX_BPdXv2DOMFyVbFZPDGNWp2s1FpC774loTqL5I,1121
|
|
19
|
+
statement_extractor/database/importers/companies_house.py,sha256=b5OMFtoHhkPgoGK08ThQn9BtTu9uC_dYzBVpC10xT4U,20252
|
|
20
|
+
statement_extractor/database/importers/companies_house_officers.py,sha256=QDFA0FzqDx9p6VjRrB7o4BE3e30l7i0ML_ktntsB-kA,15565
|
|
21
|
+
statement_extractor/database/importers/gleif.py,sha256=sw4YYROD6wi7IbBEKGCn8kko0nOYbKOyukDJKGQp17Q,20200
|
|
22
|
+
statement_extractor/database/importers/sec_edgar.py,sha256=0nnhnOrf5d1wR9PGjl8AuNOnp4mfmEtopjkgUY_PLQc,13738
|
|
23
|
+
statement_extractor/database/importers/sec_form4.py,sha256=ZoV-oyNhG5AOUm4u9hemmRI5KnpNs3Gw_dfisjkD3zU,18234
|
|
24
|
+
statement_extractor/database/importers/wikidata.py,sha256=tRj4kEMVIq7sRXxjyxj-scl8eXybkrLVvyNDYV2T5lg,39572
|
|
25
|
+
statement_extractor/database/importers/wikidata_dump.py,sha256=GSLn_BV4h-Efms2tp_eYyhqSJsRFjnZzyqgaUCDmyVY,77903
|
|
26
|
+
statement_extractor/database/importers/wikidata_people.py,sha256=s4AB2pQLK2qHK9X5BLoW-II3qZBbJG4zbU3Ro4FBT9o,43157
|
|
24
27
|
statement_extractor/document/__init__.py,sha256=csbUUjxaZay-0WXtjZmULjDfL9VNxhOlePyKTMdRDYo,1714
|
|
25
28
|
statement_extractor/document/chunker.py,sha256=I76p6Qwujk2kkN7GJ1sMwbQNOfEpbt29u-RxJdt15oE,14020
|
|
26
29
|
statement_extractor/document/context.py,sha256=9DvyguwCjlef2MeNWZMgydvD54FPiOppjdvamQnrKzM,5450
|
|
27
|
-
statement_extractor/document/deduplicator.py,sha256=
|
|
30
|
+
statement_extractor/document/deduplicator.py,sha256=R_RwEdVeVQBYZHvjkVA0ShAWr8x618VrO9dkYWXvifI,4771
|
|
28
31
|
statement_extractor/document/html_extractor.py,sha256=YRhaIsurBJTeECLkL2YJsSv8gDJJN33fS-ESkGvDBGs,6600
|
|
29
32
|
statement_extractor/document/loader.py,sha256=Ygund7bz4EVcwsFsxkrrgSjOCK4tbb_sqkMlzK_oEKM,8996
|
|
30
33
|
statement_extractor/document/pipeline.py,sha256=h4q-CG_WtBLibkTXCFhfTizMme8bJS5f6ZWOECqhRYU,13675
|
|
31
34
|
statement_extractor/document/summarizer.py,sha256=DOF6qPw0oWEtLSt97oXOFyzb0jGWZZ7frDFp11rL3is,5853
|
|
32
|
-
statement_extractor/models/__init__.py,sha256=
|
|
35
|
+
statement_extractor/models/__init__.py,sha256=OJOK0ral_jskrSxx6nCc3TB6JlVYaC5HI2eYXr9dhMQ,2971
|
|
33
36
|
statement_extractor/models/canonical.py,sha256=LaSU3CUJZOtBM1SpRTAmK-3N7QnYmxZYJvQE1NVIjLY,6003
|
|
34
37
|
statement_extractor/models/document.py,sha256=McCyXz88YtJtlsfiFzagjRAhY32ovpIDKXQI_eV_DZI,9203
|
|
35
38
|
statement_extractor/models/entity.py,sha256=l2ny91BnnWwPo9zx1_Fb8WMKPNuIQFN0H7ILncylmcY,3214
|
|
36
39
|
statement_extractor/models/labels.py,sha256=NUcjFDuGUOM82mgsaWOdoIVbRNiQ6TdN-imNuTograo,7326
|
|
37
40
|
statement_extractor/models/qualifiers.py,sha256=l--khVzt-N6jgibZ-MSSl-3SdQUZJN9dGoxdNhRmM_I,5926
|
|
38
|
-
statement_extractor/models/statement.py,sha256=
|
|
41
|
+
statement_extractor/models/statement.py,sha256=Wpp2OtZ5inhqbtEcblWdcES7g7lA-FVjqjz6Jq7hqzo,3329
|
|
39
42
|
statement_extractor/pipeline/__init__.py,sha256=Q3M2Arx9BWH_APZxM-P0G-C3ISguG1whiA5QhxDHQCA,1071
|
|
40
43
|
statement_extractor/pipeline/config.py,sha256=FXtqMMpRmdeuHB86D6YrFx5A36XHVg5GlBBZuPEn4JA,3957
|
|
41
|
-
statement_extractor/pipeline/context.py,sha256=
|
|
42
|
-
statement_extractor/pipeline/orchestrator.py,sha256=
|
|
44
|
+
statement_extractor/pipeline/context.py,sha256=evAdyH5oOCNM_ILGZNS1mov3lM4D3mCvr5hzsjaB0Bs,6136
|
|
45
|
+
statement_extractor/pipeline/orchestrator.py,sha256=qH6rD4_wI_kZ_e8NeIv2XYHUA07ldogFewFsZeRQVxw,16687
|
|
43
46
|
statement_extractor/pipeline/registry.py,sha256=yBybhRd1HU2Y75TebLGBzF6nbPiHKZ0cHkyj-3CVnhg,11390
|
|
44
47
|
statement_extractor/plugins/__init__.py,sha256=pIcPeoMFd-56jOM_kGrUWvPuwqN6vFJ-oUbu130-tzI,1345
|
|
45
|
-
statement_extractor/plugins/base.py,sha256=
|
|
48
|
+
statement_extractor/plugins/base.py,sha256=xC661iFtnhIxtZLTwuCc-0rFV1q2V3hCTV-uOaILsOA,21622
|
|
46
49
|
statement_extractor/plugins/extractors/__init__.py,sha256=sqxTI7WwDLVQKwOiQXqWS72gjJnwb76Gs9N3LGetBnI,253
|
|
47
50
|
statement_extractor/plugins/extractors/base.py,sha256=kNRsQ7BL84lXPXREm7CihrprDUaFwDDvMpBcbZlwSGA,179
|
|
48
|
-
statement_extractor/plugins/extractors/gliner2.py,sha256=
|
|
51
|
+
statement_extractor/plugins/extractors/gliner2.py,sha256=yDwKJVniMj4YwjR4Rm6MALDk633H5qcKcxa2xOLh9LI,21999
|
|
49
52
|
statement_extractor/plugins/labelers/__init__.py,sha256=flHEoBvnzQ3vAKkIUHyezpYi2H3KJvYGRerCVnc80r0,965
|
|
50
53
|
statement_extractor/plugins/labelers/base.py,sha256=hIgJKq2LU00OcL0Zjy1L9hP8K2onlM_xtZ63XcH8qDE,171
|
|
51
54
|
statement_extractor/plugins/labelers/confidence.py,sha256=XiXjBYe-8ch_SCKnz0sAwTT1mJ_XKMsuzXBbwAW_OK0,4083
|
|
@@ -58,19 +61,19 @@ statement_extractor/plugins/pdf/pypdf.py,sha256=JgmWa1-6tiATbPvhONMqRd5kAXJ--tb8
|
|
|
58
61
|
statement_extractor/plugins/qualifiers/__init__.py,sha256=H4FEZSw1GWBQB-Y79nQnLwhZ3okKQJqgJHGEA0Zp8pA,951
|
|
59
62
|
statement_extractor/plugins/qualifiers/base.py,sha256=Kx--OdIh77mnjSkTl1NvUeekItRiG8AnBUcuznOZeBI,179
|
|
60
63
|
statement_extractor/plugins/qualifiers/companies_house.py,sha256=6TlK6Zebb5wDJ9GGO3FvM9zOh27TWpio5BX9k7lBr7U,5854
|
|
61
|
-
statement_extractor/plugins/qualifiers/embedding_company.py,sha256=
|
|
64
|
+
statement_extractor/plugins/qualifiers/embedding_company.py,sha256=nc7oTFjEBuPiprjXKeFRiMYM6tNicMNum_xQ9LSgEOg,14756
|
|
62
65
|
statement_extractor/plugins/qualifiers/gleif.py,sha256=zHzC9eOt0R9Z56n0CXgTF7POJqu6v03SRmiJLmv8OGE,6104
|
|
63
|
-
statement_extractor/plugins/qualifiers/person.py,sha256=
|
|
66
|
+
statement_extractor/plugins/qualifiers/person.py,sha256=EN1T0G9NT6wOeIGljzZql11o63BujaHzK44yRqMTiRk,29034
|
|
64
67
|
statement_extractor/plugins/qualifiers/sec_edgar.py,sha256=d7QqGiE-3lFDQiXkYmNQU62K4oP2XYK6NzV6LNKPC5k,6754
|
|
65
68
|
statement_extractor/plugins/scrapers/__init__.py,sha256=mh1nmPtcsewrYeW5oELeke6DSzL8jsGOJ2OcH-A4-eo,208
|
|
66
69
|
statement_extractor/plugins/scrapers/http.py,sha256=igoB1JN7U-FPdBFmNfrdZV-Ho4JQ3RXniLz17SmQx8I,7778
|
|
67
70
|
statement_extractor/plugins/splitters/__init__.py,sha256=05CYeAEO0lZsapK5pjxZJbOCLI1kjeK6IQjftxqqg5g,224
|
|
68
71
|
statement_extractor/plugins/splitters/base.py,sha256=GeIBchFTr8icRSfYR8bGSb4-GoEZ1N0IGN6Kl5W2mL0,175
|
|
69
|
-
statement_extractor/plugins/splitters/t5_gemma.py,sha256=
|
|
72
|
+
statement_extractor/plugins/splitters/t5_gemma.py,sha256=5qjxeHznuAA9hL8EbUDDGQ3N2gYLmtg0hv9BsLWzfMk,9971
|
|
70
73
|
statement_extractor/plugins/taxonomy/__init__.py,sha256=8N0tW7pm95DSCqM-s99ea0Tigbi9bZMyTkKblR1qmLw,307
|
|
71
74
|
statement_extractor/plugins/taxonomy/embedding.py,sha256=yCuNE8UeY8tH2dHGRKL3hmRQBmdz9_9YQ0t5_VTCf7E,16349
|
|
72
75
|
statement_extractor/plugins/taxonomy/mnli.py,sha256=zPZlpAHQqdnwH7fXS_CSY0HCMnaSrrk-fDQb1ZIqqPc,9163
|
|
73
|
-
corp_extractor-0.9.
|
|
74
|
-
corp_extractor-0.9.
|
|
75
|
-
corp_extractor-0.9.
|
|
76
|
-
corp_extractor-0.9.
|
|
76
|
+
corp_extractor-0.9.3.dist-info/METADATA,sha256=Ps8LucareMigmuhXiPIDUXPgsWp5F7noVYT7VbTrSZA,29633
|
|
77
|
+
corp_extractor-0.9.3.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
78
|
+
corp_extractor-0.9.3.dist-info/entry_points.txt,sha256=i0iKFqPIusvb-QTQ1zNnFgAqatgVah-jIhahbs5TToQ,115
|
|
79
|
+
corp_extractor-0.9.3.dist-info/RECORD,,
|