resolvekit 0.1.3__tar.gz → 0.1.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {resolvekit-0.1.3 → resolvekit-0.1.5}/PKG-INFO +1 -1
- {resolvekit-0.1.3 → resolvekit-0.1.5}/pyproject.toml +1 -1
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/engine/suggest_rank.py +35 -9
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/packs/custom/pack.py +18 -7
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/packs/custom/sources/__init__.py +2 -0
- resolvekit-0.1.5/src/resolvekit/packs/custom/sources/fuzzy_retrieval.py +17 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/shared/sources/__init__.py +4 -0
- resolvekit-0.1.5/src/resolvekit/shared/sources/fuzzy_retrieval_brute_base.py +403 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/shared/sources/symspell_base.py +12 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/LICENSE +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/README.md +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/NOTICE.md +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/__init__.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/_convenience.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/_data/geo/admin1/metadata.json +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/_data/geo/admin2/metadata.json +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/_data/geo/admin3/metadata.json +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/_data/geo/admin4/metadata.json +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/_data/geo/admin5/metadata.json +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/_data/geo/cities/metadata.json +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/_data/geo/continental_unions/entities.sqlite +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/_data/geo/continental_unions/metadata.json +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/_data/geo/continental_unions/symspell.dict +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/_data/geo/continents/entities.sqlite +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/_data/geo/continents/metadata.json +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/_data/geo/continents/symspell.dict +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/_data/geo/countries/entities.sqlite +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/_data/geo/countries/geo_calibrator.json +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/_data/geo/countries/metadata.json +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/_data/geo/countries/symspell.dict +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/_data/geo/regions/entities.sqlite +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/_data/geo/regions/metadata.json +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/_data/geo/regions/symspell.dict +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/_data/manifest.json +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/_data/org/companies/entities.sqlite +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/_data/org/companies/metadata.json +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/_data/org/companies/symspell.dict +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/_data/org/data_sources/entities.sqlite +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/_data/org/data_sources/metadata.json +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/_data/org/data_sources/symspell.dict +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/_data/org/governments/entities.sqlite +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/_data/org/governments/metadata.json +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/_data/org/governments/symspell.dict +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/_data/org/lenders/entities.sqlite +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/_data/org/lenders/metadata.json +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/_data/org/lenders/symspell.dict +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/_data/org/political_parties/entities.sqlite +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/_data/org/political_parties/metadata.json +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/_data/org/political_parties/symspell.dict +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/_data/org/providers/entities.sqlite +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/_data/org/providers/metadata.json +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/_data/org/providers/symspell.dict +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/_data/parse/deny_list.json +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/_pandas_integration.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/_polars_integration.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/__init__.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/_outcomes.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/api.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/containment.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/country_geonames_aliases.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/datapack_layout.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/entity_validity.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/formal_names.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/geo_shared.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/groups.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/inspection.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/models.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/module_catalog.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/oecd_dac.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/pipeline/__init__.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/pipeline/build_report.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/pipeline/changelog.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/pipeline/chunk.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/pipeline/contribution.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/pipeline/core.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/pipeline/discover.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/pipeline/enrich.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/pipeline/geo_staging.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/pipeline/packaging.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/pipeline/promote.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/pipeline/qa.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/pipeline/reconcile.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/pipeline/stages.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/pipeline/types.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/presets.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/registry.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/sources/__init__.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/sources/datacommons/__init__.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/sources/datacommons/adapter.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/sources/datacommons/base_dc_api.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/sources/datacommons/bundle.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/sources/datacommons/canonicalize.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/sources/datacommons/client.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/sources/datacommons/constants.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/sources/datacommons/geo/__init__.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/sources/datacommons/geo/_admin_walk.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/sources/datacommons/geo/_chunk_callback.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/sources/datacommons/geo/_geo_regions.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/sources/datacommons/geo/_ordered_emitter.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/sources/datacommons/geo/_progress_context.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/sources/datacommons/geo/_streaming.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/sources/datacommons/geo/_type_mappings.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/sources/datacommons/geo/adapter.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/sources/datacommons/geo/dc_api.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/sources/datacommons/geo/discovery.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/sources/datacommons/geo/fetch.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/sources/datacommons/geo/mappings.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/sources/datacommons/geo/profile.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/sources/datacommons/geo/prominence.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/sources/datacommons/models.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/sources/datacommons/node.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/sources/datacommons/org/__init__.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/sources/datacommons/org/adapter.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/sources/datacommons/org/dc_api.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/sources/datacommons/org/discovery.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/sources/datacommons/org/fetch.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/sources/datacommons/org/mappings.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/sources/datacommons/org/profile.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/sources/datacommons/rows.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/sources/datacommons/specs.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/sources/datacommons/text.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/sources/discovery_events.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/sources/protocol.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/sources/seed/__init__.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/sources/seed/continents.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/sources/seed/m49.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/sources/wikidata/__init__.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/sources/wikidata/aliases.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/sources/wikidata/sitelinks.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/sqlite/__init__.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/sqlite/constants.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/sqlite/context.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/sqlite/diff.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/sqlite/export.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/sqlite/specs.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/sqlite/validate.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/sqlite/write.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/state.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/builder/utils.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/calibration/__init__.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/calibration/adapters/__init__.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/calibration/adapters/_cldr_source.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/calibration/adapters/_latin_filter.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/calibration/adapters/_wikidata_client.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/calibration/adapters/cldr.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/calibration/adapters/geonames.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/calibration/adapters/multilingual_names.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/calibration/adapters/synthetic.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/calibration/adapters/wikidata.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/calibration/dataset.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/calibration/evaluation.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/calibration/fitting.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/calibration/models.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/calibration/scoring_model.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/calibration/train.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/calibration/vectorize.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/__init__.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/api/__init__.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/api/_byod.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/api/_pivot.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/api/batch.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/api/bulk.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/api/cache.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/api/code_lookup.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/api/containment_api.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/api/context_input.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/api/diagnostics.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/api/entity_lookup.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/api/group_api.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/api/info.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/api/inspect.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/api/loading/__init__.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/api/loading/module_catalog.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/api/loading/pack_loader.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/api/loading/paths.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/api/loading/store_builder.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/api/modules.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/api/output_spec.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/api/output_view.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/api/query_prep.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/api/resolve_flow.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/api/resolver.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/api/snap.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/api/suggest_flow.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/byod/__init__.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/byod/build.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/byod/builder.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/byod/cache.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/byod/intake.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/byod/result.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/config.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/datapack.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/download_api.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/engine/__init__.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/engine/_stages.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/engine/config.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/engine/decision.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/engine/enrichment.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/engine/interfaces.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/engine/multi_runner.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/engine/router.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/engine/runner.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/engine/tier_utils.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/errors.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/errors_base.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/explain/__init__.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/explain/events.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/explain/feature_text.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/explain/helpers.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/explain/protocol.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/explain/renderers.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/explain/result_html.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/explain/result_types.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/explain/scorecard.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/explain/sink.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/linking/__init__.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/linking/base_linker.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/linking/base_normalizer.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/linking/linker.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/linking/normalizer.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/merge.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/model/__init__.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/model/_repr.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/model/bulk_result.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/model/candidate.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/model/crosswalk.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/model/entity.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/model/entity_attributes.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/model/features.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/model/generation.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/model/inspection.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/model/name_grammar.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/model/query.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/model/result.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/module_registry.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/overlay_loader.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/parse/__init__.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/parse/_pivot.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/parse/automaton.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/parse/denylist.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/parse/detect.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/parse/engine.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/parse/link.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/parse/offsets.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/parse/result.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/registry.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/remote.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/store/__init__.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/store/composed_sqlite.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/store/composite.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/store/interface.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/store/merging.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/store/sqlite.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/store/sqlite_helpers.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/store/store_view.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/util/__init__.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/util/iso_codes.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/util/normalization.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/util/sentinel.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/core/version.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/diagnostics/__init__.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/errors/__init__.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/extensions.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/packs/__init__.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/packs/_artifacts.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/packs/custom/__init__.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/packs/custom/decision.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/packs/custom/extractor.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/packs/custom/features.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/packs/custom/normalizer.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/packs/custom/scoring.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/packs/custom/sources/exact_code.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/packs/custom/sources/exact_name.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/packs/custom/sources/fts.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/packs/custom/sources/fuzzy.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/packs/geo/__init__.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/packs/geo/_specificity.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/packs/geo/build/__init__.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/packs/geo/build/builder.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/packs/geo/constraints/__init__.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/packs/geo/constraints/containment.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/packs/geo/constraints/membership.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/packs/geo/constraints/temporal.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/packs/geo/constraints/type_constraint.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/packs/geo/data/.gitkeep +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/packs/geo/decision.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/packs/geo/extractor.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/packs/geo/features.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/packs/geo/linker.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/packs/geo/normalizer.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/packs/geo/pack.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/packs/geo/routing.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/packs/geo/scoring.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/packs/geo/sources/__init__.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/packs/geo/sources/_short_input.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/packs/geo/sources/exact_code.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/packs/geo/sources/exact_name.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/packs/geo/sources/fts.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/packs/geo/sources/fuzzy.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/packs/geo/sources/fuzzy_retrieval.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/packs/geo/sources/query_shapes.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/packs/geo/sources/symspell.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/packs/org/__init__.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/packs/org/_acronym.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/packs/org/build/__init__.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/packs/org/build/builder.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/packs/org/constraints/__init__.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/packs/org/constraints/country_relevance.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/packs/org/constraints/parent_org.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/packs/org/constraints/temporal.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/packs/org/constraints/type_constraint.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/packs/org/data/.gitkeep +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/packs/org/decision.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/packs/org/feature_extractor.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/packs/org/features.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/packs/org/linker.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/packs/org/normalizer.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/packs/org/pack.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/packs/org/routing.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/packs/org/scoring.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/packs/org/sources/__init__.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/packs/org/sources/acronym.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/packs/org/sources/exact_code.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/packs/org/sources/exact_name.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/packs/org/sources/fts.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/packs/org/sources/fuzzy.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/packs/org/sources/symspell.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/pandas/__init__.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/polars/__init__.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/py.typed +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/shared/__init__.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/shared/build/__init__.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/shared/build/base_builder.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/shared/build/schema.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/shared/constraints/__init__.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/shared/constraints/temporal_constraint.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/shared/constraints/type_constraint.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/shared/scoring_base.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/shared/sources/code_helpers.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/shared/sources/fts_base.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/shared/sources/fuzzy_base.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/shared/sources/fuzzy_retrieval_base.py +0 -0
- {resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/types/__init__.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: resolvekit
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.5
|
|
4
4
|
Summary: Entity and place resolution system that maps messy place/entity strings and codes to canonical entities
|
|
5
5
|
Keywords: entity-resolution,geocoding,place-names,data-commons,iso-codes,offline,disambiguation,normalization
|
|
6
6
|
Author: Jorge Rivera
|
|
@@ -49,15 +49,26 @@ PROMINENCE_LIVE_TYPE_PREFIXES: Final[frozenset[str]] = frozenset(
|
|
|
49
49
|
"geo.subregion",
|
|
50
50
|
"geo.region",
|
|
51
51
|
"geo.continental_union",
|
|
52
|
-
# City and
|
|
53
|
-
#
|
|
54
|
-
#
|
|
55
|
-
# the data ships, without a follow-on code change.
|
|
52
|
+
# City and all sub-country admin tiers carry live prominence in the
|
|
53
|
+
# shipped remote packs (Wikidata sitelinks with DC population
|
|
54
|
+
# fallback for admin3/admin4).
|
|
56
55
|
"geo.city",
|
|
56
|
+
"geo.admin1",
|
|
57
57
|
"geo.admin2",
|
|
58
|
+
"geo.admin3",
|
|
59
|
+
"geo.admin4",
|
|
60
|
+
"geo.admin5",
|
|
58
61
|
}
|
|
59
62
|
)
|
|
60
63
|
|
|
64
|
+
# Prominence floor for the exact-match lift on prominence-ranked tiers. An
|
|
65
|
+
# exact full-name hit from a ranked tier below this floor loses its lift: an
|
|
66
|
+
# obscure place whose name merely coincides with the prefix ("Germ", a French
|
|
67
|
+
# commune) must not outrank a famous completion ("Germany"). Unranked tiers
|
|
68
|
+
# (orgs — no prominence data) always keep the lift, so typed acronyms ("UN",
|
|
69
|
+
# "NATO") still surface first.
|
|
70
|
+
EXACT_MATCH_MIN_PROMINENCE: Final[float] = 0.3
|
|
71
|
+
|
|
61
72
|
# Numeric rank for each MatchClass (lower = better).
|
|
62
73
|
MATCH_CLASS_RANK: Final[dict[MatchClass, int]] = {
|
|
63
74
|
MatchClass.EXACT_PREFIX: 0,
|
|
@@ -143,15 +154,22 @@ def sort_key(
|
|
|
143
154
|
``exact_match_rank`` is 0 when the user's query equals the matched name in
|
|
144
155
|
full (``exact_match=True``) and 1 otherwise. This lifts entities whose
|
|
145
156
|
complete short name was typed (e.g. "EU", "NATO") above longer-named
|
|
146
|
-
entities that merely *start with* those letters
|
|
147
|
-
|
|
157
|
+
entities that merely *start with* those letters — with one exception:
|
|
158
|
+
a candidate from a prominence-ranked tier whose prominence falls below
|
|
159
|
+
``EXACT_MATCH_MIN_PROMINENCE`` loses the lift, so an obscure place whose
|
|
160
|
+
full name coincides with the prefix ("Germ") cannot outrank a famous
|
|
161
|
+
completion ("Germany"). Unranked tiers always keep the lift.
|
|
148
162
|
|
|
149
163
|
The final ``entity_id`` field gives a total order so that equal inputs
|
|
150
164
|
always produce the same sequence.
|
|
151
165
|
"""
|
|
166
|
+
exact_lift = c.exact_match and (
|
|
167
|
+
ranking_quality(c.entity_type) == "unranked"
|
|
168
|
+
or c.prominence >= EXACT_MATCH_MIN_PROMINENCE
|
|
169
|
+
)
|
|
152
170
|
return (
|
|
153
171
|
MATCH_CLASS_RANK[c.match_class],
|
|
154
|
-
0 if
|
|
172
|
+
0 if exact_lift else 1,
|
|
155
173
|
c.typo_count,
|
|
156
174
|
-c.prominence,
|
|
157
175
|
c.name_kind_rank,
|
|
@@ -218,6 +236,7 @@ def fuzzy_candidates(
|
|
|
218
236
|
names: list[tuple[str, str, str, bool, str]],
|
|
219
237
|
*,
|
|
220
238
|
top_k: int,
|
|
239
|
+
choices: list[str] | None = None,
|
|
221
240
|
) -> list[SuggestCandidate]:
|
|
222
241
|
"""Run brute-force RapidFuzz over a pre-materialized name list.
|
|
223
242
|
|
|
@@ -234,6 +253,12 @@ def fuzzy_candidates(
|
|
|
234
253
|
value)`` 5-tuples — typically the memoized output of
|
|
235
254
|
``store.iter_suggest_names()``.
|
|
236
255
|
top_k: Number of results the caller ultimately wants.
|
|
256
|
+
choices: Optional pre-extracted list of ``value_norm`` strings
|
|
257
|
+
(``[row[0] for row in names]``). When provided, the per-call
|
|
258
|
+
list comprehension is skipped — callers that memoize the name list
|
|
259
|
+
should also memoize and pass ``choices`` to avoid a 25k-string
|
|
260
|
+
rebuild on every query. When ``None`` (default), the list is
|
|
261
|
+
built from ``names`` as before (backward-compatible).
|
|
237
262
|
|
|
238
263
|
Returns:
|
|
239
264
|
List of ``SuggestCandidate`` objects (one per distinct matched name row,
|
|
@@ -250,8 +275,9 @@ def fuzzy_candidates(
|
|
|
250
275
|
|
|
251
276
|
fuzzy_pool = min(max(top_k * 5, 50), 500)
|
|
252
277
|
|
|
253
|
-
#
|
|
254
|
-
choices
|
|
278
|
+
# Use caller-supplied choices when available to avoid a per-call rebuild.
|
|
279
|
+
if choices is None:
|
|
280
|
+
choices = [row[0] for row in names]
|
|
255
281
|
|
|
256
282
|
raw_hits = process.extract(
|
|
257
283
|
query_norm,
|
|
@@ -21,6 +21,7 @@ from resolvekit.packs.custom.sources import (
|
|
|
21
21
|
CustomExactCodeSource,
|
|
22
22
|
CustomExactNameSource,
|
|
23
23
|
CustomFTSSource,
|
|
24
|
+
CustomFuzzyRetrievalSource,
|
|
24
25
|
CustomFuzzySource,
|
|
25
26
|
)
|
|
26
27
|
|
|
@@ -44,13 +45,22 @@ class GenericPack:
|
|
|
44
45
|
custom entities, etc.).
|
|
45
46
|
|
|
46
47
|
Sources (in pipeline order):
|
|
47
|
-
1. CustomExactCodeSource
|
|
48
|
-
2. CustomExactNameSource
|
|
49
|
-
3. CustomFTSSource
|
|
50
|
-
4.
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
48
|
+
1. CustomExactCodeSource — catch-all ``lookup_code_any``; raw 1.0
|
|
49
|
+
2. CustomExactNameSource — canonical raw 1.0, alias raw 0.95
|
|
50
|
+
3. CustomFTSSource — BM25 ranked FTS
|
|
51
|
+
4. CustomFuzzyRetrievalSource — generating brute-force RapidFuzz over the
|
|
52
|
+
store's materialized name list; emits
|
|
53
|
+
FUZZY-tier evidence so typo'd queries that
|
|
54
|
+
FTS cannot tokenize-match still produce
|
|
55
|
+
candidates. Free on exact-name queries
|
|
56
|
+
(engine fuzzy-skip guard bypasses it when
|
|
57
|
+
a confident EXACT_NAME candidate is present).
|
|
58
|
+
Callers needing stricter precision can raise
|
|
59
|
+
``confidence_threshold`` above the 0.89 FUZZY
|
|
60
|
+
cap to suppress fuzzy-tier results.
|
|
61
|
+
5. CustomFuzzySource — reranks existing candidates with
|
|
62
|
+
``fuzzy_edit_sim`` / ``fuzzy_token_sim``
|
|
63
|
+
signals (requires_existing_candidates)
|
|
54
64
|
|
|
55
65
|
Args:
|
|
56
66
|
symspell_dict_path: Accepted but ignored to satisfy factory introspection.
|
|
@@ -70,6 +80,7 @@ class GenericPack:
|
|
|
70
80
|
CustomExactCodeSource(),
|
|
71
81
|
CustomExactNameSource(),
|
|
72
82
|
CustomFTSSource(),
|
|
83
|
+
CustomFuzzyRetrievalSource(),
|
|
73
84
|
CustomFuzzySource(),
|
|
74
85
|
]
|
|
75
86
|
|
|
@@ -4,10 +4,12 @@ from resolvekit.packs.custom.sources.exact_code import CustomExactCodeSource
|
|
|
4
4
|
from resolvekit.packs.custom.sources.exact_name import CustomExactNameSource
|
|
5
5
|
from resolvekit.packs.custom.sources.fts import CustomFTSSource
|
|
6
6
|
from resolvekit.packs.custom.sources.fuzzy import CustomFuzzySource
|
|
7
|
+
from resolvekit.packs.custom.sources.fuzzy_retrieval import CustomFuzzyRetrievalSource
|
|
7
8
|
|
|
8
9
|
__all__ = [
|
|
9
10
|
"CustomExactCodeSource",
|
|
10
11
|
"CustomExactNameSource",
|
|
11
12
|
"CustomFTSSource",
|
|
13
|
+
"CustomFuzzyRetrievalSource",
|
|
12
14
|
"CustomFuzzySource",
|
|
13
15
|
]
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"""Brute-force fuzzy retrieval source for custom entities."""
|
|
2
|
+
|
|
3
|
+
from resolvekit.shared.sources import FuzzyRetrievalBruteSource
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class CustomFuzzyRetrievalSource(FuzzyRetrievalBruteSource):
|
|
7
|
+
"""Generating fuzzy source for the custom domain.
|
|
8
|
+
|
|
9
|
+
Materializes the store's name list once and runs brute-force
|
|
10
|
+
RapidFuzz over it — no prebuilt SymSpell dictionary required.
|
|
11
|
+
Ordered before the rerank ``CustomFuzzySource`` so the engine
|
|
12
|
+
enriches its FUZZY-tier candidates with ``fuzzy_edit_sim`` /
|
|
13
|
+
``fuzzy_token_sim`` signals before scoring.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
def __init__(self) -> None:
|
|
17
|
+
super().__init__(name="custom_fuzzy_retrieval", domain="custom")
|
|
@@ -4,11 +4,15 @@ from resolvekit.shared.sources.code_helpers import evidence_from_code_hits
|
|
|
4
4
|
from resolvekit.shared.sources.fts_base import BM25ScoreTiers, FTSSource
|
|
5
5
|
from resolvekit.shared.sources.fuzzy_base import FuzzySource
|
|
6
6
|
from resolvekit.shared.sources.fuzzy_retrieval_base import FuzzyRetrievalSource
|
|
7
|
+
from resolvekit.shared.sources.fuzzy_retrieval_brute_base import (
|
|
8
|
+
FuzzyRetrievalBruteSource,
|
|
9
|
+
)
|
|
7
10
|
from resolvekit.shared.sources.symspell_base import SymSpellSource
|
|
8
11
|
|
|
9
12
|
__all__ = [
|
|
10
13
|
"BM25ScoreTiers",
|
|
11
14
|
"FTSSource",
|
|
15
|
+
"FuzzyRetrievalBruteSource",
|
|
12
16
|
"FuzzyRetrievalSource",
|
|
13
17
|
"FuzzySource",
|
|
14
18
|
"SymSpellSource",
|
|
@@ -0,0 +1,403 @@
|
|
|
1
|
+
"""Store-backed brute-force fuzzy retrieval source.
|
|
2
|
+
|
|
3
|
+
Retrieves candidates by running RapidFuzz ``partial_ratio`` over the store's
|
|
4
|
+
materialized name list, reusing the same ``fuzzy_candidates`` / ``iter_suggest_names``
|
|
5
|
+
mechanism the ``suggest()`` path already uses. Unlike the SymSpell-backed
|
|
6
|
+
``FuzzyRetrievalSource``, this source needs no prebuilt dictionary — it operates
|
|
7
|
+
directly over whatever names are in the store, making it suitable for
|
|
8
|
+
programmatically-built packs (``Resolver.from_records``, ``domain="custom"``).
|
|
9
|
+
|
|
10
|
+
The engine's fuzzy-skip guard (``should_skip_source``) bypasses this source
|
|
11
|
+
when a confident exact-name match is already present, so the always-on
|
|
12
|
+
registration is free on clean canonical-name queries.
|
|
13
|
+
|
|
14
|
+
Short-input guard
|
|
15
|
+
-----------------
|
|
16
|
+
The self-contained guard helper at the bottom of this module rejects degenerate
|
|
17
|
+
inputs before the name list is built. It is intentionally domain-agnostic:
|
|
18
|
+
no geo entity-type hint logic, no ISO-code unlocking. Add domain-specific
|
|
19
|
+
unlocking in a subclass override if needed.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
from __future__ import annotations
|
|
23
|
+
|
|
24
|
+
import logging
|
|
25
|
+
import threading
|
|
26
|
+
import time
|
|
27
|
+
from typing import Any
|
|
28
|
+
|
|
29
|
+
from resolvekit.core.engine import CandidateSource
|
|
30
|
+
from resolvekit.core.engine.suggest_rank import FUZZY_AUTO_MAX_NAMES, fuzzy_candidates
|
|
31
|
+
from resolvekit.core.explain import emit_candidates_generated
|
|
32
|
+
from resolvekit.core.model import (
|
|
33
|
+
CandidateEvidence,
|
|
34
|
+
GenerationContext,
|
|
35
|
+
MatchTier,
|
|
36
|
+
ReasonCode,
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
logger = logging.getLogger(__name__)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
# ---------------------------------------------------------------------------
|
|
43
|
+
# Short-input guard (domain-agnostic)
|
|
44
|
+
# ---------------------------------------------------------------------------
|
|
45
|
+
|
|
46
|
+
# Common spreadsheet / dataframe missing-value markers. Compared casefolded.
|
|
47
|
+
_DEGENERATE_TOKENS: frozenset[str] = frozenset(
|
|
48
|
+
{
|
|
49
|
+
"na",
|
|
50
|
+
"n/a",
|
|
51
|
+
"n.a.",
|
|
52
|
+
"n.a",
|
|
53
|
+
"n/k",
|
|
54
|
+
"null",
|
|
55
|
+
"none",
|
|
56
|
+
"nan",
|
|
57
|
+
"nil",
|
|
58
|
+
"tbd",
|
|
59
|
+
"tba",
|
|
60
|
+
"unknown",
|
|
61
|
+
"<null>",
|
|
62
|
+
"#n/a",
|
|
63
|
+
"?",
|
|
64
|
+
"-",
|
|
65
|
+
"--",
|
|
66
|
+
"---",
|
|
67
|
+
".",
|
|
68
|
+
}
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def _is_degenerate_token(text: str) -> bool:
|
|
73
|
+
"""Return True for known missing-value markers (``NA``, ``#N/A``, ``--`` …).
|
|
74
|
+
|
|
75
|
+
The check is casefolded; surrounding ASCII whitespace is stripped first.
|
|
76
|
+
"""
|
|
77
|
+
return text.strip().casefold() in _DEGENERATE_TOKENS
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _is_punctuation_noise(text: str) -> bool:
|
|
81
|
+
"""Return True for short tokens dominated by punctuation or symbols.
|
|
82
|
+
|
|
83
|
+
Strips common spreadsheet punctuation and checks whether the residual
|
|
84
|
+
is empty or a very short alpha fragment (≤ 3 chars). Does not need a
|
|
85
|
+
dotted-initialism exemption because those pass the ``min_query_length``
|
|
86
|
+
gate upstream (``U.S.A.`` is 5 chars with dots).
|
|
87
|
+
"""
|
|
88
|
+
if not text:
|
|
89
|
+
return True
|
|
90
|
+
stripped = text
|
|
91
|
+
for ch in "#/\\-_.,;:!?*|()[]{}'\"`":
|
|
92
|
+
stripped = stripped.replace(ch, "")
|
|
93
|
+
stripped = stripped.strip()
|
|
94
|
+
if not stripped:
|
|
95
|
+
return True
|
|
96
|
+
had_punctuation = stripped != text.strip()
|
|
97
|
+
if not had_punctuation:
|
|
98
|
+
return False
|
|
99
|
+
return len(stripped) <= 3 and stripped.isalpha()
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def _is_single_letter(text: str) -> bool:
|
|
103
|
+
"""Return True for a bare single ASCII letter (any case)."""
|
|
104
|
+
raw = text.strip()
|
|
105
|
+
return len(raw) == 1 and raw.isascii() and raw.isalpha()
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def _short_input_blocked(query_norm: str) -> bool:
|
|
109
|
+
"""Domain-agnostic short-input gate.
|
|
110
|
+
|
|
111
|
+
Returns True when the source should suppress itself for this query.
|
|
112
|
+
|
|
113
|
+
Checks (earlier wins):
|
|
114
|
+
1. Degenerate missing-value markers — always blocked.
|
|
115
|
+
2. Single ASCII letter (any case) — too ambiguous for brute-force fuzzy.
|
|
116
|
+
3. Punctuation-noise tokens.
|
|
117
|
+
|
|
118
|
+
``min_query_length`` is checked separately in ``generate()`` before this
|
|
119
|
+
helper is called so callers that relax the default minimum still get the
|
|
120
|
+
degenerate-token and single-letter blocks.
|
|
121
|
+
"""
|
|
122
|
+
if _is_degenerate_token(query_norm):
|
|
123
|
+
return True
|
|
124
|
+
if _is_single_letter(query_norm):
|
|
125
|
+
return True
|
|
126
|
+
return _is_punctuation_noise(query_norm)
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
# ---------------------------------------------------------------------------
|
|
130
|
+
# Source
|
|
131
|
+
# ---------------------------------------------------------------------------
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
class FuzzyRetrievalBruteSource(CandidateSource):
|
|
135
|
+
"""Store-backed generating fuzzy source using brute-force RapidFuzz.
|
|
136
|
+
|
|
137
|
+
Materializes the store's name list once, lazily on the first
|
|
138
|
+
``generate()`` call (``warm()`` cannot pre-build it because
|
|
139
|
+
``CandidateSource.warm()`` receives no store reference), then runs
|
|
140
|
+
``fuzzy_candidates`` over it on every query that is not short-circuited by
|
|
141
|
+
the short-input guard or the engine's skip logic.
|
|
142
|
+
|
|
143
|
+
``tier = MatchTier.FUZZY`` is declared as a **class attribute** so that
|
|
144
|
+
``should_skip_source`` (``core/engine/_stages.py``) can read it before
|
|
145
|
+
``generate()`` is called — this is the mechanism that makes the always-on
|
|
146
|
+
registration free on exact-name queries.
|
|
147
|
+
|
|
148
|
+
The name list is keyed by the fixed ``entity_type_prefixes`` passed at
|
|
149
|
+
construction time. For a custom pack (``entity_type_prefixes=None``) there
|
|
150
|
+
is exactly one cache entry per instance. The store is treated as immutable
|
|
151
|
+
for the resolver's lifetime, so no invalidation is performed.
|
|
152
|
+
|
|
153
|
+
Both ``_names_cache`` and ``_choices_cache`` (the extracted ``value_norm``
|
|
154
|
+
strings) are memoized to avoid rebuilding the ``choices`` list on every
|
|
155
|
+
call to ``fuzzy_candidates``.
|
|
156
|
+
|
|
157
|
+
Configurable parameters
|
|
158
|
+
-----------------------
|
|
159
|
+
name : str
|
|
160
|
+
Unique name for this source (e.g. ``"custom_fuzzy_retrieval"``).
|
|
161
|
+
domain : str
|
|
162
|
+
Domain pack ID this source supports (e.g. ``"custom"``).
|
|
163
|
+
min_query_length : int
|
|
164
|
+
Minimum normalized query length to process. Shorter queries return
|
|
165
|
+
``[]`` immediately. Default 3.
|
|
166
|
+
max_names : int
|
|
167
|
+
Cap on the name-list size. When ``len(names) > max_names`` the
|
|
168
|
+
brute-force pass is skipped and ``[]`` is returned. Default
|
|
169
|
+
``FUZZY_AUTO_MAX_NAMES`` (25 000).
|
|
170
|
+
top_k : int
|
|
171
|
+
Maximum candidates requested from ``fuzzy_candidates``. Default 25.
|
|
172
|
+
entity_type_prefixes : frozenset[str] | None
|
|
173
|
+
When given, only names whose entity type starts with one of these
|
|
174
|
+
prefixes are included in the name list. ``None`` = all types.
|
|
175
|
+
"""
|
|
176
|
+
|
|
177
|
+
# Class-level declaration — read by should_skip_source BEFORE generate() runs.
|
|
178
|
+
tier: MatchTier = MatchTier.FUZZY # type: ignore[assignment]
|
|
179
|
+
|
|
180
|
+
def __init__(
|
|
181
|
+
self,
|
|
182
|
+
*,
|
|
183
|
+
name: str,
|
|
184
|
+
domain: str,
|
|
185
|
+
min_query_length: int = 3,
|
|
186
|
+
max_names: int = FUZZY_AUTO_MAX_NAMES,
|
|
187
|
+
top_k: int = 25,
|
|
188
|
+
entity_type_prefixes: frozenset[str] | None = None,
|
|
189
|
+
) -> None:
|
|
190
|
+
self._name = name
|
|
191
|
+
self._domain = domain
|
|
192
|
+
self._min_query_length = min_query_length
|
|
193
|
+
self._max_names = max_names
|
|
194
|
+
self._top_k = top_k
|
|
195
|
+
self._entity_type_prefixes = entity_type_prefixes
|
|
196
|
+
|
|
197
|
+
# Memoized name list and choices, keyed by entity_type_prefixes.
|
|
198
|
+
# Built lazily on first generate() / warm(); never invalidated.
|
|
199
|
+
self._names_cache: list[tuple[str, str, str, bool, str]] | None = None
|
|
200
|
+
self._choices_cache: list[str] | None = None
|
|
201
|
+
|
|
202
|
+
# Guards the one-time build (mirrors SymSpellSource pattern).
|
|
203
|
+
self._build_lock = threading.Lock()
|
|
204
|
+
self._built = False
|
|
205
|
+
|
|
206
|
+
# ------------------------------------------------------------------
|
|
207
|
+
# CandidateSource properties
|
|
208
|
+
# ------------------------------------------------------------------
|
|
209
|
+
|
|
210
|
+
@property
|
|
211
|
+
def name(self) -> str:
|
|
212
|
+
return self._name
|
|
213
|
+
|
|
214
|
+
@property
|
|
215
|
+
def reason_code(self) -> ReasonCode:
|
|
216
|
+
return ReasonCode.FUZZY_MATCH
|
|
217
|
+
|
|
218
|
+
def supports(self, domain_pack_id: str) -> bool:
|
|
219
|
+
return domain_pack_id == self._domain
|
|
220
|
+
|
|
221
|
+
@property
|
|
222
|
+
def requires_existing_candidates(self) -> bool:
|
|
223
|
+
return False
|
|
224
|
+
|
|
225
|
+
# ------------------------------------------------------------------
|
|
226
|
+
# Cache build (thread-safe)
|
|
227
|
+
# ------------------------------------------------------------------
|
|
228
|
+
|
|
229
|
+
def _ensure_built(self, store: Any) -> None:
|
|
230
|
+
"""Build name and choices caches if not already built.
|
|
231
|
+
|
|
232
|
+
Double-checked locking: cheap lock-free fast-path once built.
|
|
233
|
+
Any exception during build is swallowed so ``generate()`` can
|
|
234
|
+
return ``[]`` gracefully (never raise contract).
|
|
235
|
+
"""
|
|
236
|
+
if self._built:
|
|
237
|
+
return
|
|
238
|
+
with self._build_lock:
|
|
239
|
+
if self._built:
|
|
240
|
+
return
|
|
241
|
+
try:
|
|
242
|
+
names = list(
|
|
243
|
+
store.iter_suggest_names(
|
|
244
|
+
entity_type_prefixes=self._entity_type_prefixes
|
|
245
|
+
)
|
|
246
|
+
)
|
|
247
|
+
choices = [row[0] for row in names]
|
|
248
|
+
self._names_cache = names
|
|
249
|
+
self._choices_cache = choices
|
|
250
|
+
except Exception as exc:
|
|
251
|
+
logger.debug(
|
|
252
|
+
"FuzzyRetrievalBruteSource '%s': iter_suggest_names failed: %s",
|
|
253
|
+
self._name,
|
|
254
|
+
exc,
|
|
255
|
+
)
|
|
256
|
+
# Leave _built=False so callers see None caches and return [].
|
|
257
|
+
return
|
|
258
|
+
self._built = True
|
|
259
|
+
|
|
260
|
+
def warm(self) -> None:
|
|
261
|
+
"""No-op: cache is built on first ``generate()`` call.
|
|
262
|
+
|
|
263
|
+
``CandidateSource.warm()`` receives no store reference, so the name
|
|
264
|
+
list cannot be pre-built here. The build is deferred to the first
|
|
265
|
+
``generate()`` call, which receives the store via ``ctx.store``.
|
|
266
|
+
|
|
267
|
+
Subclasses that have access to the store at warm time can override
|
|
268
|
+
``warm()`` to call ``_ensure_built(store)`` directly.
|
|
269
|
+
"""
|
|
270
|
+
# ``CandidateSource.warm()`` has no store parameter — actual build
|
|
271
|
+
# deferred to first generate() call. The warm() contract still holds:
|
|
272
|
+
# if the runner calls warm() and then generate(), the first generate()
|
|
273
|
+
# call will build and subsequent calls skip the lock.
|
|
274
|
+
|
|
275
|
+
# ------------------------------------------------------------------
|
|
276
|
+
# generate
|
|
277
|
+
# ------------------------------------------------------------------
|
|
278
|
+
|
|
279
|
+
def generate(self, ctx: GenerationContext) -> list[CandidateEvidence]:
|
|
280
|
+
"""Generate FUZZY-tier evidence via brute-force RapidFuzz.
|
|
281
|
+
|
|
282
|
+
Returns ``[]`` (never raises) on any internal failure.
|
|
283
|
+
"""
|
|
284
|
+
try:
|
|
285
|
+
return self._generate_inner(ctx)
|
|
286
|
+
except Exception as exc:
|
|
287
|
+
logger.debug(
|
|
288
|
+
"FuzzyRetrievalBruteSource '%s': unexpected error for query '%s': %s",
|
|
289
|
+
self._name,
|
|
290
|
+
ctx.text_norm,
|
|
291
|
+
exc,
|
|
292
|
+
)
|
|
293
|
+
emit_candidates_generated(
|
|
294
|
+
ctx.trace,
|
|
295
|
+
self._name,
|
|
296
|
+
0,
|
|
297
|
+
entity_ids=[],
|
|
298
|
+
query=ctx.text_norm,
|
|
299
|
+
reason="error",
|
|
300
|
+
)
|
|
301
|
+
return []
|
|
302
|
+
|
|
303
|
+
def _generate_inner(self, ctx: GenerationContext) -> list[CandidateEvidence]: # noqa: PLR0911
|
|
304
|
+
# 1. Cooperative deadline check.
|
|
305
|
+
if ctx.deadline is not None and time.monotonic() >= ctx.deadline:
|
|
306
|
+
return []
|
|
307
|
+
|
|
308
|
+
query_norm = ctx.text_norm
|
|
309
|
+
|
|
310
|
+
# 2. Min-length gate.
|
|
311
|
+
if len(query_norm) < self._min_query_length:
|
|
312
|
+
return []
|
|
313
|
+
|
|
314
|
+
# 3. Degenerate-token / punctuation-noise / single-letter check.
|
|
315
|
+
if _short_input_blocked(query_norm):
|
|
316
|
+
return []
|
|
317
|
+
|
|
318
|
+
# 4. Build / load caches.
|
|
319
|
+
self._ensure_built(ctx.store)
|
|
320
|
+
|
|
321
|
+
names = self._names_cache
|
|
322
|
+
choices = self._choices_cache
|
|
323
|
+
if names is None or choices is None:
|
|
324
|
+
# Build failed (iter_suggest_names raised or is not implemented).
|
|
325
|
+
emit_candidates_generated(
|
|
326
|
+
ctx.trace,
|
|
327
|
+
self._name,
|
|
328
|
+
0,
|
|
329
|
+
entity_ids=[],
|
|
330
|
+
query=query_norm,
|
|
331
|
+
reason="no_names",
|
|
332
|
+
)
|
|
333
|
+
return []
|
|
334
|
+
|
|
335
|
+
# 5. Cap check — skip brute-force on very large stores.
|
|
336
|
+
if len(names) > self._max_names:
|
|
337
|
+
emit_candidates_generated(
|
|
338
|
+
ctx.trace,
|
|
339
|
+
self._name,
|
|
340
|
+
0,
|
|
341
|
+
entity_ids=[],
|
|
342
|
+
query=query_norm,
|
|
343
|
+
reason="cap_exceeded",
|
|
344
|
+
)
|
|
345
|
+
return []
|
|
346
|
+
|
|
347
|
+
# 6. Brute-force fuzzy candidates.
|
|
348
|
+
raw = fuzzy_candidates(
|
|
349
|
+
query_norm,
|
|
350
|
+
names,
|
|
351
|
+
top_k=self._top_k,
|
|
352
|
+
choices=choices,
|
|
353
|
+
)
|
|
354
|
+
|
|
355
|
+
if not raw:
|
|
356
|
+
emit_candidates_generated(
|
|
357
|
+
ctx.trace,
|
|
358
|
+
self._name,
|
|
359
|
+
0,
|
|
360
|
+
entity_ids=[],
|
|
361
|
+
query=query_norm,
|
|
362
|
+
)
|
|
363
|
+
return []
|
|
364
|
+
|
|
365
|
+
# 7. Dedupe: keep best hit per entity_id (lowest typo_count wins, then
|
|
366
|
+
# highest match_score, mirroring runner.suggest_prefix dedup logic).
|
|
367
|
+
best: dict[str, Any] = {}
|
|
368
|
+
for cand in raw:
|
|
369
|
+
eid = cand.entity_id
|
|
370
|
+
if eid not in best:
|
|
371
|
+
best[eid] = cand
|
|
372
|
+
else:
|
|
373
|
+
prev = best[eid]
|
|
374
|
+
if (cand.typo_count, -(cand.match_score or 0.0)) < (
|
|
375
|
+
prev.typo_count,
|
|
376
|
+
-(prev.match_score or 0.0),
|
|
377
|
+
):
|
|
378
|
+
best[eid] = cand
|
|
379
|
+
|
|
380
|
+
# 8. Map to CandidateEvidence, stamping match_tier=FUZZY explicitly.
|
|
381
|
+
evidence: list[CandidateEvidence] = []
|
|
382
|
+
for rank, cand in enumerate(best.values(), start=1):
|
|
383
|
+
evidence.append(
|
|
384
|
+
CandidateEvidence(
|
|
385
|
+
entity_id=cand.entity_id,
|
|
386
|
+
source_name=self._name,
|
|
387
|
+
raw_score=(cand.match_score or 0.0) / 100.0,
|
|
388
|
+
rank=rank,
|
|
389
|
+
matched_field="fuzzy_retrieval",
|
|
390
|
+
matched_value=cand.matched_value,
|
|
391
|
+
match_tier=MatchTier.FUZZY,
|
|
392
|
+
)
|
|
393
|
+
)
|
|
394
|
+
|
|
395
|
+
emit_candidates_generated(
|
|
396
|
+
ctx.trace,
|
|
397
|
+
self._name,
|
|
398
|
+
len(evidence),
|
|
399
|
+
entity_ids=[e.entity_id for e in evidence],
|
|
400
|
+
query=query_norm,
|
|
401
|
+
)
|
|
402
|
+
|
|
403
|
+
return evidence
|
|
@@ -34,6 +34,13 @@ SYMSPELL_BASE_SCORE = 0.9
|
|
|
34
34
|
SYMSPELL_DISTANCE_PENALTY = 0.15
|
|
35
35
|
SYMSPELL_MIN_SCORE = 0.5
|
|
36
36
|
|
|
37
|
+
# Queries this short get a tighter edit-distance bound: two edits in a ≤6-char
|
|
38
|
+
# string rewrite a third of it, which surfaces unrelated short names as
|
|
39
|
+
# candidates ("paris" → "moris"/Mauritius at distance 2) rather than fixing
|
|
40
|
+
# typos. Longer queries keep the source's configured max distance.
|
|
41
|
+
SYMSPELL_SHORT_QUERY_MAX_LENGTH = 6
|
|
42
|
+
SYMSPELL_SHORT_QUERY_MAX_DISTANCE = 1
|
|
43
|
+
|
|
37
44
|
# Temp cache files older than this are presumed leaked by a crashed writer and
|
|
38
45
|
# safe to reap; live builds finish in seconds.
|
|
39
46
|
_STALE_TMP_AGE_SECONDS = 3600.0
|
|
@@ -645,6 +652,11 @@ class SymSpellSource(CandidateSource):
|
|
|
645
652
|
edit_distance = suggestion.distance
|
|
646
653
|
if corrected == text_norm:
|
|
647
654
|
continue
|
|
655
|
+
if (
|
|
656
|
+
len(text_norm) <= SYMSPELL_SHORT_QUERY_MAX_LENGTH
|
|
657
|
+
and edit_distance > SYMSPELL_SHORT_QUERY_MAX_DISTANCE
|
|
658
|
+
):
|
|
659
|
+
continue
|
|
648
660
|
entity_ids = store.lookup_name_exact(corrected, name_kinds=self._name_kinds)
|
|
649
661
|
for entity_id in entity_ids:
|
|
650
662
|
rank += 1
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/_data/geo/continental_unions/entities.sqlite
RENAMED
|
File without changes
|
{resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/_data/geo/continental_unions/metadata.json
RENAMED
|
File without changes
|
{resolvekit-0.1.3 → resolvekit-0.1.5}/src/resolvekit/_data/geo/continental_unions/symspell.dict
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|