matchbox-db 0.6.2__tar.gz → 0.6.3.dev96__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/.github/workflows/prerelease.yml +38 -8
- {matchbox_db-0.6.2/src/matchbox_db.egg-info → matchbox_db-0.6.3.dev96}/PKG-INFO +1 -2
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/docs/api/common/factories/index.md +3 -3
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/docs/client/evaluation.md +1 -1
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/docs/client/link-data.md +12 -10
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/docs/client/look-up.md +2 -2
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/mkdocs.yml +0 -1
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/pyproject.toml +0 -1
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/client/_handler.py +186 -54
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/client/dags.py +212 -38
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/client/eval/mock_ui.py +1 -1
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/client/eval/utils.py +15 -39
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/client/models/models.py +72 -63
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/client/queries.py +58 -12
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/client/results.py +32 -47
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/client/sources.py +50 -22
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/common/dtos.py +186 -47
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/common/eval.py +8 -8
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/common/exceptions.py +95 -24
- matchbox_db-0.6.3.dev96/src/matchbox/common/factories/dags.py +52 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/common/factories/entities.py +7 -6
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/common/factories/models.py +39 -43
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/common/factories/scenarios.py +201 -128
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/common/factories/sources.py +15 -5
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/common/transform.py +51 -34
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/server/api/main.py +205 -27
- matchbox_db-0.6.3.dev96/src/matchbox/server/api/routers/collection.py +596 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/server/api/routers/eval.py +24 -24
- matchbox_db-0.6.3.dev96/src/matchbox/server/api/static/favicon.png +0 -0
- matchbox_db-0.6.3.dev96/src/matchbox/server/api/static/swagger-ui-bundle.js +2 -0
- matchbox_db-0.6.3.dev96/src/matchbox/server/api/static/swagger-ui.css +3 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/server/base.py +154 -76
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/server/postgresql/adapter.py +184 -126
- matchbox_db-0.6.3.dev96/src/matchbox/server/postgresql/alembic/versions/8c7f757b1046_remove_human_resolution_type.py +66 -0
- matchbox_db-0.6.3.dev96/src/matchbox/server/postgresql/alembic/versions/f500f7d832fe_adds_collections_and_versions_to_scope_.py +163 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/server/postgresql/orm.py +208 -27
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/server/postgresql/utils/db.py +0 -31
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/server/postgresql/utils/evaluation.py +17 -23
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/server/postgresql/utils/insert.py +11 -11
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/server/postgresql/utils/query.py +24 -45
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/server/postgresql/utils/results.py +1 -2
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/server/uploads.py +13 -23
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96/src/matchbox_db.egg-info}/PKG-INFO +1 -2
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox_db.egg-info/SOURCES.txt +7 -5
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox_db.egg-info/requires.txt +0 -1
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/test/client/models/methodologies/test_linkers_deterministic.py +1 -1
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/test/client/models/methodologies/test_linkers_probabilistic.py +1 -2
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/test/client/test_dags.py +434 -37
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/test/client/test_eval.py +6 -5
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/test/client/test_models.py +24 -15
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/test/client/test_queries.py +88 -3
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/test/client/test_results.py +7 -10
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/test/client/test_sources.py +16 -11
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/test/common/factories/test_entity_factory.py +7 -8
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/test/common/factories/test_model_factory.py +13 -7
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/test/common/factories/test_probability_generation.py +14 -20
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/test/common/factories/test_source_factory.py +17 -12
- matchbox_db-0.6.3.dev96/test/common/factories/test_testkit_dag.py +197 -0
- matchbox_db-0.6.3.dev96/test/common/test_dto.py +89 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/test/common/test_eval.py +10 -11
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/test/conftest.py +0 -1
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/test/e2e/test_e2e_dag.py +34 -17
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/test/e2e/test_e2e_evaluation.py +4 -5
- matchbox_db-0.6.3.dev96/test/server/api/routes/test_routes_collection.py +401 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/test/server/api/routes/test_routes_eval.py +46 -24
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/test/server/api/routes/test_routes_main.py +179 -41
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/test/server/api/routes/test_routes_resolution.py +89 -75
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/test/server/postgresql/test_pg_sql.py +164 -58
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/test/server/test_adapter.py +685 -582
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/test/server/test_uploads.py +15 -29
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/uv.lock +1325 -1554
- matchbox_db-0.6.2/docs/api/common/graph.md +0 -17
- matchbox_db-0.6.2/src/matchbox/common/factories/dags.py +0 -140
- matchbox_db-0.6.2/src/matchbox/common/graph.py +0 -66
- matchbox_db-0.6.2/src/matchbox/server/api/routers/resolution.py +0 -263
- matchbox_db-0.6.2/test/common/factories/test_testkit_dag.py +0 -322
- matchbox_db-0.6.2/test/common/test_dto.py +0 -40
- matchbox_db-0.6.2/test/common/test_graph.py +0 -14
- matchbox_db-0.6.2/test/fixtures/graph.py +0 -47
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/.github/pull_request_template.md +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/.github/workflows/ci.yml +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/.github/workflows/release.yml +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/.gitignore +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/.pre-commit-config.yaml +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/.vscode/launch.json +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/.vscode/settings.json +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/LICENSE +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/README.md +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/docker-compose.yml +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/docs/api/client/dags.md +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/docs/api/client/eval.md +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/docs/api/client/index.md +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/docs/api/client/models.md +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/docs/api/client/queries.md +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/docs/api/client/results.md +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/docs/api/client/sources.md +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/docs/api/common/arrow.md +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/docs/api/common/db.md +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/docs/api/common/dtos.md +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/docs/api/common/eval.md +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/docs/api/common/exceptions.md +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/docs/api/common/factories/entities.md +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/docs/api/common/factories/models.md +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/docs/api/common/factories/scenarios.md +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/docs/api/common/factories/sources.md +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/docs/api/common/hash.md +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/docs/api/common/index.md +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/docs/api/common/logging.md +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/docs/api/common/transform.md +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/docs/api/server/api.md +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/docs/api/server/backends/postgresql.md +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/docs/api/server/index.md +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/docs/api/server/uploads.md +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/docs/assets/matchbox-icon-dark.png +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/docs/assets/matchbox-icon.svg +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/docs/assets/matchbox-logo-dark.svg +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/docs/assets/matchbox-logo-light.svg +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/docs/client/explore-dags.md +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/docs/client/install.md +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/docs/contributing.md +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/docs/index.md +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/docs/server/concepts.md +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/docs/server/install.md +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/docs/server/risks.md +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/docs/stylesheets/extra.css +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/docs/use-cases.md +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/environments/containers.env +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/environments/development.env +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/environments/sample_client.env +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/environments/sample_server.env +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/justfile +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/setup.cfg +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/__init__.py +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/client/__init__.py +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/client/_settings.py +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/client/authorisation.py +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/client/eval/__init__.py +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/client/eval/justfile +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/client/eval/ui.py +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/client/models/__init__.py +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/client/models/comparison.py +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/client/models/dedupers/__init__.py +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/client/models/dedupers/base.py +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/client/models/dedupers/naive.py +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/client/models/linkers/__init__.py +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/client/models/linkers/base.py +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/client/models/linkers/deterministic.py +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/client/models/linkers/splinklinker.py +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/client/models/linkers/weighteddeterministic.py +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/common/__init__.py +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/common/arrow.py +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/common/db.py +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/common/factories/__init__.py +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/common/hash.py +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/common/logging.py +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/server/Dockerfile +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/server/__init__.py +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/server/api/__init__.py +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/server/api/dependencies.py +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/server/api/routers/__init__.py +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/server/postgresql/.gitkeep +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/server/postgresql/__init__.py +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/server/postgresql/alembic/env.py +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/server/postgresql/alembic/script.py.mako +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/server/postgresql/alembic/versions/05cc4181a0ad_removed_source_key_reference_and_added_.py +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/server/postgresql/alembic/versions/1907c34cfa1f_create_tables_given_schema.py +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/server/postgresql/alembic/versions/3754ae042254_move_orm_to_root_leaf_contains_structure.py +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/server/postgresql/alembic/versions/40a8e5ed48f2_create_schema_without_tables.py +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/server/postgresql/alembic/versions/4a7c35f86405_move_sourceconfigs_from_sourceaddress_.py +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/server/postgresql/alembic/versions/7a2d1b10ac0f_switch_from_location_uri_to_name.py +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/server/postgresql/alembic/versions/83b134a86713_simplify_resolution_naming_and_hashing.py +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/server/postgresql/alembic/versions/95c0b5c23446_renaming_sources_to_source_config.py +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/server/postgresql/alembic/versions/ae63f79f6b39_renamed_sourcecolumns_to_sourcefields.py +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/server/postgresql/alembic/versions/b38d61ab11cc_add_index_to_the_clustersourcekey_table.py +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/server/postgresql/alembic/versions/b694eb292dea_add_an_index_to_the_probabilities_.py +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/server/postgresql/alembic/versions/beba75a24962_add_pkspace_table.py +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/server/postgresql/alembic/versions/c4cb937d00f4_add_modelconfigs.py +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/server/postgresql/alembic/versions/dd0c3a9ecdf9_add_migrations_for_first_eval_tables.py +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/server/postgresql/alembic/versions/e4122bdf9b0d_renamed_primary_keys_to_just_keys.py +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/server/postgresql/alembic/versions/f3c9279437f4_add_content_hash_to_resolutions.py +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/server/postgresql/alembic.ini +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/server/postgresql/db.py +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/server/postgresql/justfile +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/server/postgresql/mixin.py +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox/server/postgresql/utils/__init__.py +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox_db.egg-info/dependency_links.txt +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/src/matchbox_db.egg-info/top_level.txt +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/test/__init__.py +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/test/client/__init__.py +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/test/client/models/__init__.py +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/test/client/models/methodologies/__init__.py +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/test/client/models/methodologies/test_dedupers_deterministic.py +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/test/client/models/test_comparison.py +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/test/client/test_handler.py +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/test/common/__init__.py +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/test/common/factories/__init__.py +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/test/common/factories/test_linked_factory.py +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/test/common/factories/test_scenarios.py +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/test/common/test_hash.py +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/test/common/test_results.py +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/test/common/test_transform.py +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/test/data/all_companies.csv +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/test/fixtures/__init__.py +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/test/fixtures/client.py +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/test/fixtures/db.py +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/test/justfile +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/test/server/__init__.py +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/test/server/api/__init__.py +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/test/server/api/routes/__init__.py +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/test/server/postgresql/__init__.py +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/test/server/postgresql/test_pg_core.py +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/test/server/postgresql/test_pg_migrations.py +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/test/utils.py +0 -0
- {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev96}/trufflehog-exclude.txt +0 -0
|
@@ -7,8 +7,10 @@ env:
|
|
|
7
7
|
IMAGE_NAME: ${{ github.repository }}
|
|
8
8
|
|
|
9
9
|
jobs:
|
|
10
|
-
build
|
|
10
|
+
build:
|
|
11
11
|
runs-on: ubuntu-latest
|
|
12
|
+
outputs:
|
|
13
|
+
mb_version: ${{ steps.get_version.outputs.mb_version }}
|
|
12
14
|
permissions:
|
|
13
15
|
contents: write
|
|
14
16
|
packages: write
|
|
@@ -34,11 +36,21 @@ jobs:
|
|
|
34
36
|
python-version: "3.11"
|
|
35
37
|
|
|
36
38
|
- name: Extract development version
|
|
39
|
+
id: get_version
|
|
37
40
|
run: |
|
|
38
|
-
echo "
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
41
|
+
echo "mb_version=$(uv run --frozen python -m setuptools_scm | sed 's/+.*//')" \
|
|
42
|
+
>> "$GITHUB_OUTPUT"
|
|
43
|
+
|
|
44
|
+
- name: Build package
|
|
45
|
+
env:
|
|
46
|
+
SETUPTOOLS_SCM_PRETEND_VERSION: ${{ steps.get_version.outputs.mb_version }}
|
|
47
|
+
run: uv build
|
|
48
|
+
|
|
49
|
+
- name: Upload package artifacts
|
|
50
|
+
uses: actions/upload-artifact@v4
|
|
51
|
+
with:
|
|
52
|
+
name: package-dist
|
|
53
|
+
path: ./dist
|
|
42
54
|
|
|
43
55
|
- name: Extract tag metadata for Docker
|
|
44
56
|
id: meta
|
|
@@ -46,7 +58,7 @@ jobs:
|
|
|
46
58
|
with:
|
|
47
59
|
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
|
|
48
60
|
tags: |
|
|
49
|
-
type=raw,value=${{
|
|
61
|
+
type=raw,value=${{ steps.get_version.outputs.mb_version }}
|
|
50
62
|
type=raw,value=development
|
|
51
63
|
|
|
52
64
|
- name: Build and push Docker image
|
|
@@ -56,7 +68,7 @@ jobs:
|
|
|
56
68
|
file: src/matchbox/server/Dockerfile
|
|
57
69
|
push: true
|
|
58
70
|
build-args: |
|
|
59
|
-
MB_VERSION=${{
|
|
71
|
+
MB_VERSION=${{ steps.get_version.outputs.mb_version }}
|
|
60
72
|
tags: ${{ steps.meta.outputs.tags }}
|
|
61
73
|
|
|
62
74
|
- name: Delete existing development release if it exists
|
|
@@ -76,5 +88,23 @@ jobs:
|
|
|
76
88
|
|
|
77
89
|
May be unstable.
|
|
78
90
|
|
|
79
|
-
**Version:** ${{
|
|
91
|
+
**Version:** ${{ steps.get_version.outputs.mb_version }}
|
|
80
92
|
**Commit:** ${{ github.sha }}
|
|
93
|
+
|
|
94
|
+
deploy-package:
|
|
95
|
+
needs: build
|
|
96
|
+
runs-on: ubuntu-latest
|
|
97
|
+
permissions:
|
|
98
|
+
id-token: write
|
|
99
|
+
|
|
100
|
+
environment: pypi
|
|
101
|
+
|
|
102
|
+
steps:
|
|
103
|
+
- name: Download package artifacts
|
|
104
|
+
uses: actions/download-artifact@v4
|
|
105
|
+
with:
|
|
106
|
+
name: package-dist
|
|
107
|
+
path: ./dist
|
|
108
|
+
|
|
109
|
+
- name: Publish package to PyPI
|
|
110
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: matchbox-db
|
|
3
|
-
Version: 0.6.
|
|
3
|
+
Version: 0.6.3.dev96
|
|
4
4
|
Summary: A framework for orchestrating and comparing data linking and deduplication methodologies.
|
|
5
5
|
Author: Department for Business and Trade
|
|
6
6
|
Project-URL: Documentation, https://uktrade.github.io/matchbox/
|
|
@@ -14,7 +14,6 @@ Requires-Dist: duckdb>=1.1.1
|
|
|
14
14
|
Requires-Dist: faker>=36.1.1
|
|
15
15
|
Requires-Dist: frozendict>=2.4.6
|
|
16
16
|
Requires-Dist: httpx>=0.28.0
|
|
17
|
-
Requires-Dist: matplotlib>=3.9.2
|
|
18
17
|
Requires-Dist: pandas>=2.2.3
|
|
19
18
|
Requires-Dist: polars-hash>=0.5.3
|
|
20
19
|
Requires-Dist: polars>=1.32.3
|
|
@@ -51,7 +51,7 @@ source_testkit = source_factory()
|
|
|
51
51
|
|
|
52
52
|
# Setup store
|
|
53
53
|
tracker = InMemoryUploadTracker()
|
|
54
|
-
upload_id = tracker.add_source(source_testkit.
|
|
54
|
+
upload_id = tracker.add_source(source_testkit.source.resolution_path)
|
|
55
55
|
```
|
|
56
56
|
|
|
57
57
|
|
|
@@ -83,7 +83,7 @@ source_factory(
|
|
|
83
83
|
By default, each `SourceTestkit` or `ModelTestkit` creates a new [`DAG`][matchbox.client.dags.DAG]. If membership to the right DAG is important, you can either set it manually:
|
|
84
84
|
|
|
85
85
|
```python
|
|
86
|
-
dag = DAG("companies"
|
|
86
|
+
dag = DAG("companies")
|
|
87
87
|
source_testkit = source_factory(dag=dag)
|
|
88
88
|
```
|
|
89
89
|
|
|
@@ -91,7 +91,7 @@ Or, you can unpack your objects into `DAG` methods:
|
|
|
91
91
|
|
|
92
92
|
```python
|
|
93
93
|
source_testkit = source_factory()
|
|
94
|
-
dag = DAG("companies"
|
|
94
|
+
dag = DAG("companies")
|
|
95
95
|
dag.source(**source_testkit.into_dag())
|
|
96
96
|
```
|
|
97
97
|
|
|
@@ -78,10 +78,10 @@ You're now ready to create your first [`DAG`][matchbox.client.dags.DAG].
|
|
|
78
78
|
=== "Example"
|
|
79
79
|
```python
|
|
80
80
|
from matchbox.client.dags import DAG
|
|
81
|
-
dag = DAG(name="companies"
|
|
81
|
+
dag = DAG(name="companies").new_run()
|
|
82
82
|
```
|
|
83
83
|
|
|
84
|
-
A DAG needs a name, which will be used to identify this DAG once you publish it to the Matchbox server. You also need to
|
|
84
|
+
A DAG needs a name, which will be used to identify this DAG once you publish it to the Matchbox server. You also need to use the `.new_run()` method to prepare the DAG to send results to the server.
|
|
85
85
|
|
|
86
86
|
This DAG will own all the sources and models you define later.
|
|
87
87
|
|
|
@@ -418,7 +418,7 @@ Once you're happy with your results, you need to publish your DAG so that other
|
|
|
418
418
|
|
|
419
419
|
=== "Example"
|
|
420
420
|
```python
|
|
421
|
-
dag.
|
|
421
|
+
dag.set_default()
|
|
422
422
|
```
|
|
423
423
|
|
|
424
424
|
|
|
@@ -527,16 +527,18 @@ This example demonstrates how you can:
|
|
|
527
527
|
|
|
528
528
|
### Re-run a previous DAG
|
|
529
529
|
|
|
530
|
-
You might want to publish a new
|
|
530
|
+
You might want to publish a new run of your DAG based on newer data. You can retrieve the old DAG and inspect it. You can't sync or publish it, as it will be read-only. However, you can generate a new run from it explicitly
|
|
531
531
|
|
|
532
532
|
=== "Example"
|
|
533
533
|
```python
|
|
534
|
-
#
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
534
|
+
# Create a new DAG identical to the previous default
|
|
535
|
+
dag = DAG(name="companies").load_default(
|
|
536
|
+
location=RelationalDBLocation(name="dbname", client=engine)
|
|
537
|
+
).new_run()
|
|
538
|
+
# Run new DAG
|
|
539
|
+
dag.run_and_sync()
|
|
540
|
+
# Make the DAG the new default
|
|
541
|
+
dag.set_default()
|
|
540
542
|
```
|
|
541
543
|
|
|
542
544
|
## Best practices
|
|
@@ -8,7 +8,7 @@ Given a key and a source, you can retrieve all keys resolving to the same entity
|
|
|
8
8
|
```python
|
|
9
9
|
from matchbox.client.dags import DAG
|
|
10
10
|
|
|
11
|
-
matches = DAG("companies").lookup_key(
|
|
11
|
+
matches = DAG("companies").load_default().lookup_key(
|
|
12
12
|
from_source="datahub_companies",
|
|
13
13
|
to_sources=["companies_house"],
|
|
14
14
|
key="8534735",
|
|
@@ -32,5 +32,5 @@ You can download an entire lookup as a PyArrow table.
|
|
|
32
32
|
```python
|
|
33
33
|
from matchbox.client.dags import DAG
|
|
34
34
|
|
|
35
|
-
lookup = DAG("companies").extract_lookup()
|
|
35
|
+
lookup = DAG("companies").load_default().extract_lookup()
|
|
36
36
|
```
|
|
@@ -47,7 +47,6 @@ nav:
|
|
|
47
47
|
- Sources: api/common/factories/sources.md
|
|
48
48
|
- Models: api/common/factories/models.md
|
|
49
49
|
- Scenarios: api/common/factories/scenarios.md
|
|
50
|
-
- Graph: api/common/graph.md
|
|
51
50
|
- Hashing: api/common/hash.md
|
|
52
51
|
- Logging: api/common/logging.md
|
|
53
52
|
- Transform: api/common/transform.md
|
|
@@ -8,6 +8,7 @@ from importlib.metadata import version
|
|
|
8
8
|
from io import BytesIO
|
|
9
9
|
|
|
10
10
|
import httpx
|
|
11
|
+
import polars as pl
|
|
11
12
|
from pyarrow import Table
|
|
12
13
|
from pyarrow.parquet import read_table
|
|
13
14
|
from tenacity import (
|
|
@@ -32,33 +33,37 @@ from matchbox.common.dtos import (
|
|
|
32
33
|
BackendCountableType,
|
|
33
34
|
BackendParameterType,
|
|
34
35
|
BackendResourceType,
|
|
36
|
+
Collection,
|
|
37
|
+
CollectionName,
|
|
35
38
|
LoginAttempt,
|
|
36
39
|
LoginResult,
|
|
37
40
|
Match,
|
|
41
|
+
ModelResolutionPath,
|
|
38
42
|
NotFoundError,
|
|
39
43
|
Resolution,
|
|
40
|
-
|
|
44
|
+
ResolutionPath,
|
|
41
45
|
ResolutionType,
|
|
46
|
+
ResourceOperationStatus,
|
|
47
|
+
Run,
|
|
48
|
+
RunID,
|
|
49
|
+
SourceResolutionPath,
|
|
42
50
|
UploadStage,
|
|
43
51
|
UploadStatus,
|
|
44
52
|
)
|
|
45
53
|
from matchbox.common.eval import Judgement, ModelComparison
|
|
46
54
|
from matchbox.common.exceptions import (
|
|
55
|
+
MatchboxCollectionNotFoundError,
|
|
47
56
|
MatchboxDataNotFound,
|
|
48
57
|
MatchboxDeletionNotConfirmed,
|
|
49
58
|
MatchboxEmptyServerResponse,
|
|
50
59
|
MatchboxResolutionNotFoundError,
|
|
60
|
+
MatchboxRunNotFoundError,
|
|
51
61
|
MatchboxServerFileError,
|
|
52
62
|
MatchboxTooManySamplesRequested,
|
|
53
63
|
MatchboxUnhandledServerResponse,
|
|
54
64
|
MatchboxUnparsedClientRequest,
|
|
55
65
|
MatchboxUserNotFoundError,
|
|
56
66
|
)
|
|
57
|
-
from matchbox.common.graph import (
|
|
58
|
-
ModelResolutionName,
|
|
59
|
-
ResolutionName,
|
|
60
|
-
SourceResolutionName,
|
|
61
|
-
)
|
|
62
67
|
from matchbox.common.hash import hash_to_base64
|
|
63
68
|
from matchbox.common.logging import logger
|
|
64
69
|
|
|
@@ -117,6 +122,10 @@ def handle_http_code(res: httpx.Response) -> httpx.Response:
|
|
|
117
122
|
if res.status_code == 404:
|
|
118
123
|
error = NotFoundError.model_validate(res.json())
|
|
119
124
|
match error.entity:
|
|
125
|
+
case BackendResourceType.COLLECTION:
|
|
126
|
+
raise MatchboxCollectionNotFoundError(error.details)
|
|
127
|
+
case BackendResourceType.RUN:
|
|
128
|
+
raise MatchboxRunNotFoundError(error.details)
|
|
120
129
|
case BackendResourceType.RESOLUTION:
|
|
121
130
|
raise MatchboxResolutionNotFoundError(error.details)
|
|
122
131
|
case BackendResourceType.CLUSTER:
|
|
@@ -127,7 +136,7 @@ def handle_http_code(res: httpx.Response) -> httpx.Response:
|
|
|
127
136
|
raise RuntimeError(f"Unexpected 404 error: {error.details}")
|
|
128
137
|
|
|
129
138
|
if res.status_code == 409:
|
|
130
|
-
error =
|
|
139
|
+
error = ResourceOperationStatus.model_validate(res.json())
|
|
131
140
|
raise MatchboxDeletionNotConfirmed(message=error.details)
|
|
132
141
|
|
|
133
142
|
if res.status_code == 422:
|
|
@@ -180,9 +189,9 @@ def login(user_name: str) -> int:
|
|
|
180
189
|
|
|
181
190
|
@http_retry
|
|
182
191
|
def query(
|
|
183
|
-
source:
|
|
192
|
+
source: SourceResolutionPath,
|
|
184
193
|
return_leaf_id: bool,
|
|
185
|
-
resolution:
|
|
194
|
+
resolution: ResolutionPath | None = None,
|
|
186
195
|
threshold: int | None = None,
|
|
187
196
|
limit: int | None = None,
|
|
188
197
|
) -> Table:
|
|
@@ -194,8 +203,10 @@ def query(
|
|
|
194
203
|
"/query",
|
|
195
204
|
params=url_params(
|
|
196
205
|
{
|
|
197
|
-
"
|
|
198
|
-
"
|
|
206
|
+
"collection": source.collection,
|
|
207
|
+
"run_id": source.run,
|
|
208
|
+
"source": source.name,
|
|
209
|
+
"resolution": resolution.name if resolution else None,
|
|
199
210
|
"return_leaf_id": return_leaf_id,
|
|
200
211
|
"threshold": threshold,
|
|
201
212
|
"limit": limit,
|
|
@@ -222,16 +233,16 @@ def query(
|
|
|
222
233
|
|
|
223
234
|
@http_retry
|
|
224
235
|
def match(
|
|
225
|
-
targets: list[
|
|
226
|
-
source:
|
|
236
|
+
targets: list[SourceResolutionPath],
|
|
237
|
+
source: SourceResolutionPath,
|
|
227
238
|
key: str,
|
|
228
|
-
resolution:
|
|
239
|
+
resolution: ResolutionPath,
|
|
229
240
|
threshold: int | None = None,
|
|
230
241
|
) -> list[Match]:
|
|
231
242
|
"""Match a source against a list of targets."""
|
|
232
243
|
log_prefix = f"Query {source}"
|
|
233
244
|
logger.debug(
|
|
234
|
-
f"{key} to {', '.join(targets)} using {resolution}",
|
|
245
|
+
f"{key} to {', '.join(str(targets))} using {resolution}",
|
|
235
246
|
prefix=log_prefix,
|
|
236
247
|
)
|
|
237
248
|
|
|
@@ -239,10 +250,12 @@ def match(
|
|
|
239
250
|
"/match",
|
|
240
251
|
params=url_params(
|
|
241
252
|
{
|
|
242
|
-
"
|
|
243
|
-
"
|
|
253
|
+
"collection": resolution.collection,
|
|
254
|
+
"run_id": resolution.run,
|
|
255
|
+
"targets": [t.name for t in targets],
|
|
256
|
+
"source": source.name,
|
|
244
257
|
"key": key,
|
|
245
|
-
"resolution": resolution,
|
|
258
|
+
"resolution": resolution.name,
|
|
246
259
|
"threshold": threshold,
|
|
247
260
|
}
|
|
248
261
|
),
|
|
@@ -258,42 +271,125 @@ def match(
|
|
|
258
271
|
return matches
|
|
259
272
|
|
|
260
273
|
|
|
261
|
-
#
|
|
274
|
+
# Collection management
|
|
275
|
+
|
|
276
|
+
|
|
277
|
+
@http_retry
|
|
278
|
+
def get_collection(name: CollectionName) -> Collection:
|
|
279
|
+
"""Get all runs and resolutions in a collection."""
|
|
280
|
+
log_prefix = f"Collection {name}"
|
|
281
|
+
logger.debug("Retrieving", prefix=log_prefix)
|
|
282
|
+
|
|
283
|
+
res = CLIENT.get(f"/collections/{name}")
|
|
284
|
+
return Collection.model_validate(res.json())
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
@http_retry
|
|
288
|
+
def create_collection(name: CollectionName) -> ResourceOperationStatus:
|
|
289
|
+
"""Create a new collection."""
|
|
290
|
+
log_prefix = f"Collection {name}"
|
|
291
|
+
logger.debug("Creating", prefix=log_prefix)
|
|
292
|
+
|
|
293
|
+
res = CLIENT.post(
|
|
294
|
+
f"/collections/{name}",
|
|
295
|
+
)
|
|
296
|
+
|
|
297
|
+
return ResourceOperationStatus.model_validate(res.json())
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
# Run management
|
|
262
301
|
|
|
263
302
|
|
|
264
303
|
@http_retry
|
|
265
|
-
def
|
|
266
|
-
|
|
304
|
+
def get_run(collection: CollectionName, run_id: RunID) -> Run:
|
|
305
|
+
"""Get all resolutions in a run."""
|
|
306
|
+
log_prefix = f"Collection {collection}, run {run_id}"
|
|
267
307
|
logger.debug("Retrieving", prefix=log_prefix)
|
|
268
308
|
|
|
269
|
-
res = CLIENT.get(f"/
|
|
309
|
+
res = CLIENT.get(f"/collections/{collection}/runs/{run_id}")
|
|
310
|
+
return Run.model_validate(res.json())
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
@http_retry
|
|
314
|
+
def create_run(collection: CollectionName) -> ResourceOperationStatus:
|
|
315
|
+
"""Create a new run."""
|
|
316
|
+
log_prefix = f"Collection {collection}, new run"
|
|
317
|
+
logger.debug("Creating", prefix=log_prefix)
|
|
318
|
+
|
|
319
|
+
res = CLIENT.post(f"/collections/{collection}/runs")
|
|
320
|
+
|
|
321
|
+
return Run.model_validate(res.json())
|
|
322
|
+
|
|
323
|
+
|
|
324
|
+
@http_retry
|
|
325
|
+
def delete_run(
|
|
326
|
+
collection: CollectionName, run_id: RunID, certain: bool = False
|
|
327
|
+
) -> ResourceOperationStatus:
|
|
328
|
+
"""Delete a run in Matchbox."""
|
|
329
|
+
log_prefix = f"Collection {collection}, run {run_id}"
|
|
330
|
+
logger.debug("Deleting", prefix=log_prefix)
|
|
331
|
+
|
|
332
|
+
res = CLIENT.delete(
|
|
333
|
+
f"/collections/{collection}/runs/{run_id}",
|
|
334
|
+
params={"certain": certain},
|
|
335
|
+
)
|
|
336
|
+
return ResourceOperationStatus.model_validate(res.json())
|
|
337
|
+
|
|
270
338
|
|
|
271
|
-
|
|
339
|
+
@http_retry
|
|
340
|
+
def set_run_mutable(
|
|
341
|
+
collection: CollectionName, run_id: RunID, mutable: bool
|
|
342
|
+
) -> ResourceOperationStatus:
|
|
343
|
+
"""Set a run as mutable for a collection."""
|
|
344
|
+
log_prefix = f"Collection {collection}, run {run_id}"
|
|
345
|
+
logger.debug("Setting mutability", prefix=log_prefix)
|
|
346
|
+
|
|
347
|
+
res = CLIENT.patch(f"/collections/{collection}/runs/{run_id}/mutable", json=mutable)
|
|
348
|
+
return ResourceOperationStatus.model_validate(res.json())
|
|
349
|
+
|
|
350
|
+
|
|
351
|
+
@http_retry
|
|
352
|
+
def set_run_default(
|
|
353
|
+
collection: CollectionName, run_id: RunID, default: bool
|
|
354
|
+
) -> ResourceOperationStatus:
|
|
355
|
+
"""Set a run as the default run for a collection."""
|
|
356
|
+
log_prefix = f"Collection {collection}, run {run_id}"
|
|
357
|
+
logger.debug("Setting as default", prefix=log_prefix)
|
|
358
|
+
|
|
359
|
+
res = CLIENT.patch(f"/collections/{collection}/runs/{run_id}/default", json=default)
|
|
360
|
+
return ResourceOperationStatus.model_validate(res.json())
|
|
361
|
+
|
|
362
|
+
|
|
363
|
+
# Resolution management
|
|
272
364
|
|
|
273
365
|
|
|
274
366
|
@http_retry
|
|
275
367
|
def create_resolution(
|
|
276
368
|
resolution: Resolution,
|
|
277
|
-
|
|
369
|
+
path: ResolutionPath,
|
|
370
|
+
) -> ResourceOperationStatus:
|
|
278
371
|
"""Create a resolution (model or source)."""
|
|
279
|
-
log_prefix = f"Resolution {
|
|
372
|
+
log_prefix = f"Resolution {path}"
|
|
280
373
|
logger.debug("Creating", prefix=log_prefix)
|
|
281
374
|
|
|
282
|
-
res = CLIENT.post(
|
|
375
|
+
res = CLIENT.post(
|
|
376
|
+
f"/collections/{path.collection}/runs/{path.run}/resolutions/{path.name}",
|
|
377
|
+
json=resolution.model_dump(),
|
|
378
|
+
)
|
|
283
379
|
|
|
284
|
-
return
|
|
380
|
+
return ResourceOperationStatus.model_validate(res.json())
|
|
285
381
|
|
|
286
382
|
|
|
287
383
|
@http_retry
|
|
288
384
|
def get_resolution(
|
|
289
|
-
|
|
385
|
+
path: ResolutionPath, validate_type: ResolutionType | None = None
|
|
290
386
|
) -> Resolution | None:
|
|
291
387
|
"""Get a resolution from Matchbox."""
|
|
292
|
-
log_prefix = f"Resolution {
|
|
388
|
+
log_prefix = f"Resolution {path}"
|
|
293
389
|
logger.debug("Retrieving metadata", prefix=log_prefix)
|
|
294
390
|
|
|
295
391
|
res = CLIENT.get(
|
|
296
|
-
f"/resolutions/{name}",
|
|
392
|
+
f"/collections/{path.collection}/runs/{path.run}/resolutions/{path.name}",
|
|
297
393
|
params=url_params({"validate_type": validate_type}),
|
|
298
394
|
)
|
|
299
395
|
return Resolution.model_validate(res.json())
|
|
@@ -301,17 +397,19 @@ def get_resolution(
|
|
|
301
397
|
|
|
302
398
|
@http_retry
|
|
303
399
|
def set_data(
|
|
304
|
-
|
|
400
|
+
path: ResolutionPath, data: pl.DataFrame | Table, validate_type: ResolutionType
|
|
305
401
|
) -> UploadStatus:
|
|
306
402
|
"""Upload source hashes or model results to server."""
|
|
307
|
-
log_prefix = f"Resolution {
|
|
403
|
+
log_prefix = f"Resolution {path}"
|
|
308
404
|
logger.debug("Uploading results", prefix=log_prefix)
|
|
309
405
|
|
|
310
|
-
|
|
406
|
+
data_arrow = data.to_arrow() if isinstance(data, pl.DataFrame) else data
|
|
407
|
+
buffer = table_to_buffer(table=data_arrow)
|
|
311
408
|
|
|
312
409
|
# Initialise upload
|
|
313
410
|
metadata_res = CLIENT.post(
|
|
314
|
-
f"/resolutions/{name}/data",
|
|
411
|
+
f"/collections/{path.collection}/runs/{path.run}/resolutions/{path.name}/data",
|
|
412
|
+
params=url_params({"validate_type": validate_type}),
|
|
315
413
|
)
|
|
316
414
|
|
|
317
415
|
upload = UploadStatus.model_validate(metadata_res.json())
|
|
@@ -343,70 +441,103 @@ def set_data(
|
|
|
343
441
|
|
|
344
442
|
|
|
345
443
|
@http_retry
|
|
346
|
-
def get_results(
|
|
444
|
+
def get_results(path: ModelResolutionPath) -> Table:
|
|
347
445
|
"""Get model results from Matchbox."""
|
|
348
|
-
log_prefix = f"Model {
|
|
446
|
+
log_prefix = f"Model {path}"
|
|
349
447
|
logger.debug("Retrieving results", prefix=log_prefix)
|
|
350
448
|
|
|
351
|
-
res = CLIENT.get(
|
|
449
|
+
res = CLIENT.get(
|
|
450
|
+
f"/collections/{path.collection}/runs/{path.run}/resolutions/{path.name}/data"
|
|
451
|
+
)
|
|
352
452
|
buffer = BytesIO(res.content)
|
|
353
453
|
return read_table(buffer)
|
|
354
454
|
|
|
355
455
|
|
|
356
456
|
@http_retry
|
|
357
|
-
def set_truth(
|
|
457
|
+
def set_truth(path: ModelResolutionPath, truth: int) -> ResourceOperationStatus:
|
|
358
458
|
"""Set the truth threshold for a model in Matchbox."""
|
|
359
|
-
log_prefix = f"Model {
|
|
459
|
+
log_prefix = f"Model {path}"
|
|
360
460
|
logger.debug("Setting truth value", prefix=log_prefix)
|
|
361
461
|
|
|
362
|
-
res = CLIENT.patch(
|
|
363
|
-
|
|
462
|
+
res = CLIENT.patch(
|
|
463
|
+
f"/collections/{path.collection}/runs/{path.run}/resolutions/{path.name}/truth",
|
|
464
|
+
json=truth,
|
|
465
|
+
)
|
|
466
|
+
return ResourceOperationStatus.model_validate(res.json())
|
|
364
467
|
|
|
365
468
|
|
|
366
469
|
@http_retry
|
|
367
|
-
def get_truth(
|
|
470
|
+
def get_truth(path: ModelResolutionPath) -> int:
|
|
368
471
|
"""Get the truth threshold for a model in Matchbox."""
|
|
369
|
-
log_prefix = f"Model {
|
|
472
|
+
log_prefix = f"Model {path}"
|
|
370
473
|
logger.debug("Retrieving truth value", prefix=log_prefix)
|
|
371
474
|
|
|
372
|
-
res = CLIENT.get(
|
|
475
|
+
res = CLIENT.get(
|
|
476
|
+
f"/collections/{path.collection}/runs/{path.run}/resolutions/{path.name}/truth"
|
|
477
|
+
)
|
|
373
478
|
return res.json()
|
|
374
479
|
|
|
375
480
|
|
|
376
481
|
@http_retry
|
|
377
482
|
def delete_resolution(
|
|
378
|
-
|
|
379
|
-
) ->
|
|
483
|
+
path: ModelResolutionPath, certain: bool = False
|
|
484
|
+
) -> ResourceOperationStatus:
|
|
380
485
|
"""Delete a resolution in Matchbox."""
|
|
381
|
-
log_prefix = f"Model {
|
|
486
|
+
log_prefix = f"Model {path}"
|
|
382
487
|
logger.debug("Deleting", prefix=log_prefix)
|
|
383
488
|
|
|
384
|
-
res = CLIENT.delete(
|
|
385
|
-
|
|
489
|
+
res = CLIENT.delete(
|
|
490
|
+
f"/collections/{path.collection}/runs/{path.run}/resolutions/{path.name}",
|
|
491
|
+
params={"certain": certain},
|
|
492
|
+
)
|
|
493
|
+
return ResourceOperationStatus.model_validate(res.json())
|
|
386
494
|
|
|
387
495
|
|
|
388
496
|
# Evaluation
|
|
389
497
|
|
|
390
498
|
|
|
391
499
|
@http_retry
|
|
392
|
-
def sample_for_eval(n: int, resolution:
|
|
500
|
+
def sample_for_eval(n: int, resolution: ModelResolutionPath, user_id: int) -> Table:
|
|
501
|
+
"""Sample model results for evaluation."""
|
|
393
502
|
res = CLIENT.get(
|
|
394
503
|
"/eval/samples",
|
|
395
|
-
params=url_params(
|
|
504
|
+
params=url_params(
|
|
505
|
+
{
|
|
506
|
+
"n": n,
|
|
507
|
+
"collection": resolution.collection,
|
|
508
|
+
"run_id": resolution.run,
|
|
509
|
+
"resolution": resolution.name,
|
|
510
|
+
"user_id": user_id,
|
|
511
|
+
}
|
|
512
|
+
),
|
|
396
513
|
)
|
|
397
514
|
|
|
398
515
|
return read_table(BytesIO(res.content))
|
|
399
516
|
|
|
400
517
|
|
|
401
518
|
@http_retry
|
|
402
|
-
def compare_models(
|
|
403
|
-
|
|
519
|
+
def compare_models(
|
|
520
|
+
resolutions: list[ModelResolutionPath],
|
|
521
|
+
) -> ModelComparison:
|
|
522
|
+
"""Get a model comparison for a set of model resolutions."""
|
|
523
|
+
qualified_resolution = [
|
|
524
|
+
ModelResolutionPath(
|
|
525
|
+
collection=resolution.collection,
|
|
526
|
+
run=resolution.run,
|
|
527
|
+
name=resolution,
|
|
528
|
+
)
|
|
529
|
+
for resolution in resolutions
|
|
530
|
+
]
|
|
531
|
+
res = CLIENT.post(
|
|
532
|
+
"/eval/compare", json=[r.model_dump() for r in qualified_resolution]
|
|
533
|
+
)
|
|
404
534
|
scores = {resolution: tuple(pr) for resolution, pr in res.json().items()}
|
|
405
535
|
return scores
|
|
406
536
|
|
|
407
537
|
|
|
408
538
|
@http_retry
|
|
409
539
|
def send_eval_judgement(judgement: Judgement) -> None:
|
|
540
|
+
"""Send judgements to the server."""
|
|
410
541
|
logger.debug(
|
|
411
542
|
f"Submitting judgement {judgement.shown}:{judgement.endorsed} "
|
|
412
543
|
f"for {judgement.user_id}"
|
|
@@ -416,6 +547,7 @@ def send_eval_judgement(judgement: Judgement) -> None:
|
|
|
416
547
|
|
|
417
548
|
@http_retry
|
|
418
549
|
def download_eval_data() -> tuple[Table, Table]:
|
|
550
|
+
"""Download all judgements from the server."""
|
|
419
551
|
logger.debug("Retrieving all judgements.")
|
|
420
552
|
res = CLIENT.get("/eval/judgements")
|
|
421
553
|
|
|
@@ -432,7 +564,7 @@ def download_eval_data() -> tuple[Table, Table]:
|
|
|
432
564
|
check_schema(SCHEMA_JUDGEMENTS, judgements.schema)
|
|
433
565
|
check_schema(SCHEMA_CLUSTER_EXPANSION, expansion.schema)
|
|
434
566
|
|
|
435
|
-
return judgements, expansion
|
|
567
|
+
return pl.from_arrow(judgements), pl.from_arrow(expansion)
|
|
436
568
|
|
|
437
569
|
|
|
438
570
|
# Admin
|