matchbox-db 0.6.2__tar.gz → 0.6.3.dev53__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (207) hide show
  1. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/.github/workflows/prerelease.yml +38 -8
  2. {matchbox_db-0.6.2/src/matchbox_db.egg-info → matchbox_db-0.6.3.dev53}/PKG-INFO +1 -1
  3. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/client/_handler.py +5 -3
  4. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/client/eval/utils.py +2 -6
  5. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/client/models/models.py +2 -2
  6. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/client/results.py +32 -47
  7. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/common/eval.py +6 -6
  8. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/common/factories/entities.py +2 -2
  9. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/common/factories/models.py +11 -34
  10. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/common/factories/scenarios.py +20 -9
  11. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/common/factories/sources.py +1 -1
  12. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/common/transform.py +35 -34
  13. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/server/api/main.py +71 -3
  14. matchbox_db-0.6.3.dev53/src/matchbox/server/api/static/favicon.png +0 -0
  15. matchbox_db-0.6.3.dev53/src/matchbox/server/api/static/swagger-ui-bundle.js +2 -0
  16. matchbox_db-0.6.3.dev53/src/matchbox/server/api/static/swagger-ui.css +3 -0
  17. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/server/postgresql/adapter.py +4 -1
  18. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/server/postgresql/utils/evaluation.py +9 -7
  19. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53/src/matchbox_db.egg-info}/PKG-INFO +1 -1
  20. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox_db.egg-info/SOURCES.txt +3 -0
  21. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/test/client/models/methodologies/test_linkers_deterministic.py +1 -1
  22. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/test/client/models/methodologies/test_linkers_probabilistic.py +1 -2
  23. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/test/client/test_results.py +7 -10
  24. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/test/common/factories/test_entity_factory.py +6 -6
  25. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/test/common/factories/test_model_factory.py +9 -6
  26. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/test/common/factories/test_probability_generation.py +14 -20
  27. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/test/common/test_eval.py +10 -11
  28. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/test/e2e/test_e2e_evaluation.py +3 -2
  29. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/test/server/api/routes/test_routes_main.py +96 -2
  30. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/test/server/api/routes/test_routes_resolution.py +4 -4
  31. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/test/server/test_adapter.py +2 -1
  32. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/uv.lock +1499 -1499
  33. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/.github/pull_request_template.md +0 -0
  34. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/.github/workflows/ci.yml +0 -0
  35. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/.github/workflows/release.yml +0 -0
  36. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/.gitignore +0 -0
  37. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/.pre-commit-config.yaml +0 -0
  38. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/.vscode/launch.json +0 -0
  39. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/.vscode/settings.json +0 -0
  40. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/LICENSE +0 -0
  41. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/README.md +0 -0
  42. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/docker-compose.yml +0 -0
  43. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/docs/api/client/dags.md +0 -0
  44. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/docs/api/client/eval.md +0 -0
  45. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/docs/api/client/index.md +0 -0
  46. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/docs/api/client/models.md +0 -0
  47. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/docs/api/client/queries.md +0 -0
  48. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/docs/api/client/results.md +0 -0
  49. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/docs/api/client/sources.md +0 -0
  50. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/docs/api/common/arrow.md +0 -0
  51. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/docs/api/common/db.md +0 -0
  52. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/docs/api/common/dtos.md +0 -0
  53. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/docs/api/common/eval.md +0 -0
  54. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/docs/api/common/exceptions.md +0 -0
  55. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/docs/api/common/factories/entities.md +0 -0
  56. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/docs/api/common/factories/index.md +0 -0
  57. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/docs/api/common/factories/models.md +0 -0
  58. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/docs/api/common/factories/scenarios.md +0 -0
  59. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/docs/api/common/factories/sources.md +0 -0
  60. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/docs/api/common/graph.md +0 -0
  61. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/docs/api/common/hash.md +0 -0
  62. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/docs/api/common/index.md +0 -0
  63. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/docs/api/common/logging.md +0 -0
  64. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/docs/api/common/transform.md +0 -0
  65. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/docs/api/server/api.md +0 -0
  66. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/docs/api/server/backends/postgresql.md +0 -0
  67. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/docs/api/server/index.md +0 -0
  68. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/docs/api/server/uploads.md +0 -0
  69. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/docs/assets/matchbox-icon-dark.png +0 -0
  70. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/docs/assets/matchbox-icon.svg +0 -0
  71. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/docs/assets/matchbox-logo-dark.svg +0 -0
  72. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/docs/assets/matchbox-logo-light.svg +0 -0
  73. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/docs/client/evaluation.md +0 -0
  74. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/docs/client/explore-dags.md +0 -0
  75. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/docs/client/install.md +0 -0
  76. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/docs/client/link-data.md +0 -0
  77. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/docs/client/look-up.md +0 -0
  78. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/docs/contributing.md +0 -0
  79. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/docs/index.md +0 -0
  80. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/docs/server/concepts.md +0 -0
  81. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/docs/server/install.md +0 -0
  82. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/docs/server/risks.md +0 -0
  83. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/docs/stylesheets/extra.css +0 -0
  84. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/docs/use-cases.md +0 -0
  85. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/environments/containers.env +0 -0
  86. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/environments/development.env +0 -0
  87. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/environments/sample_client.env +0 -0
  88. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/environments/sample_server.env +0 -0
  89. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/justfile +0 -0
  90. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/mkdocs.yml +0 -0
  91. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/pyproject.toml +0 -0
  92. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/setup.cfg +0 -0
  93. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/__init__.py +0 -0
  94. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/client/__init__.py +0 -0
  95. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/client/_settings.py +0 -0
  96. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/client/authorisation.py +0 -0
  97. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/client/dags.py +0 -0
  98. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/client/eval/__init__.py +0 -0
  99. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/client/eval/justfile +0 -0
  100. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/client/eval/mock_ui.py +0 -0
  101. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/client/eval/ui.py +0 -0
  102. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/client/models/__init__.py +0 -0
  103. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/client/models/comparison.py +0 -0
  104. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/client/models/dedupers/__init__.py +0 -0
  105. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/client/models/dedupers/base.py +0 -0
  106. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/client/models/dedupers/naive.py +0 -0
  107. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/client/models/linkers/__init__.py +0 -0
  108. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/client/models/linkers/base.py +0 -0
  109. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/client/models/linkers/deterministic.py +0 -0
  110. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/client/models/linkers/splinklinker.py +0 -0
  111. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/client/models/linkers/weighteddeterministic.py +0 -0
  112. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/client/queries.py +0 -0
  113. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/client/sources.py +0 -0
  114. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/common/__init__.py +0 -0
  115. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/common/arrow.py +0 -0
  116. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/common/db.py +0 -0
  117. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/common/dtos.py +0 -0
  118. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/common/exceptions.py +0 -0
  119. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/common/factories/__init__.py +0 -0
  120. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/common/factories/dags.py +0 -0
  121. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/common/graph.py +0 -0
  122. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/common/hash.py +0 -0
  123. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/common/logging.py +0 -0
  124. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/server/Dockerfile +0 -0
  125. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/server/__init__.py +0 -0
  126. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/server/api/__init__.py +0 -0
  127. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/server/api/dependencies.py +0 -0
  128. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/server/api/routers/__init__.py +0 -0
  129. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/server/api/routers/eval.py +0 -0
  130. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/server/api/routers/resolution.py +0 -0
  131. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/server/base.py +0 -0
  132. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/server/postgresql/.gitkeep +0 -0
  133. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/server/postgresql/__init__.py +0 -0
  134. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/server/postgresql/alembic/env.py +0 -0
  135. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/server/postgresql/alembic/script.py.mako +0 -0
  136. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/server/postgresql/alembic/versions/05cc4181a0ad_removed_source_key_reference_and_added_.py +0 -0
  137. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/server/postgresql/alembic/versions/1907c34cfa1f_create_tables_given_schema.py +0 -0
  138. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/server/postgresql/alembic/versions/3754ae042254_move_orm_to_root_leaf_contains_structure.py +0 -0
  139. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/server/postgresql/alembic/versions/40a8e5ed48f2_create_schema_without_tables.py +0 -0
  140. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/server/postgresql/alembic/versions/4a7c35f86405_move_sourceconfigs_from_sourceaddress_.py +0 -0
  141. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/server/postgresql/alembic/versions/7a2d1b10ac0f_switch_from_location_uri_to_name.py +0 -0
  142. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/server/postgresql/alembic/versions/83b134a86713_simplify_resolution_naming_and_hashing.py +0 -0
  143. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/server/postgresql/alembic/versions/95c0b5c23446_renaming_sources_to_source_config.py +0 -0
  144. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/server/postgresql/alembic/versions/ae63f79f6b39_renamed_sourcecolumns_to_sourcefields.py +0 -0
  145. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/server/postgresql/alembic/versions/b38d61ab11cc_add_index_to_the_clustersourcekey_table.py +0 -0
  146. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/server/postgresql/alembic/versions/b694eb292dea_add_an_index_to_the_probabilities_.py +0 -0
  147. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/server/postgresql/alembic/versions/beba75a24962_add_pkspace_table.py +0 -0
  148. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/server/postgresql/alembic/versions/c4cb937d00f4_add_modelconfigs.py +0 -0
  149. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/server/postgresql/alembic/versions/dd0c3a9ecdf9_add_migrations_for_first_eval_tables.py +0 -0
  150. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/server/postgresql/alembic/versions/e4122bdf9b0d_renamed_primary_keys_to_just_keys.py +0 -0
  151. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/server/postgresql/alembic/versions/f3c9279437f4_add_content_hash_to_resolutions.py +0 -0
  152. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/server/postgresql/alembic.ini +0 -0
  153. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/server/postgresql/db.py +0 -0
  154. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/server/postgresql/justfile +0 -0
  155. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/server/postgresql/mixin.py +0 -0
  156. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/server/postgresql/orm.py +0 -0
  157. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/server/postgresql/utils/__init__.py +0 -0
  158. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/server/postgresql/utils/db.py +0 -0
  159. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/server/postgresql/utils/insert.py +0 -0
  160. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/server/postgresql/utils/query.py +0 -0
  161. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/server/postgresql/utils/results.py +0 -0
  162. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox/server/uploads.py +0 -0
  163. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox_db.egg-info/dependency_links.txt +0 -0
  164. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox_db.egg-info/requires.txt +0 -0
  165. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/src/matchbox_db.egg-info/top_level.txt +0 -0
  166. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/test/__init__.py +0 -0
  167. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/test/client/__init__.py +0 -0
  168. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/test/client/models/__init__.py +0 -0
  169. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/test/client/models/methodologies/__init__.py +0 -0
  170. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/test/client/models/methodologies/test_dedupers_deterministic.py +0 -0
  171. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/test/client/models/test_comparison.py +0 -0
  172. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/test/client/test_dags.py +0 -0
  173. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/test/client/test_eval.py +0 -0
  174. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/test/client/test_handler.py +0 -0
  175. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/test/client/test_models.py +0 -0
  176. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/test/client/test_queries.py +0 -0
  177. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/test/client/test_sources.py +0 -0
  178. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/test/common/__init__.py +0 -0
  179. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/test/common/factories/__init__.py +0 -0
  180. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/test/common/factories/test_linked_factory.py +0 -0
  181. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/test/common/factories/test_scenarios.py +0 -0
  182. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/test/common/factories/test_source_factory.py +0 -0
  183. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/test/common/factories/test_testkit_dag.py +0 -0
  184. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/test/common/test_dto.py +0 -0
  185. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/test/common/test_graph.py +0 -0
  186. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/test/common/test_hash.py +0 -0
  187. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/test/common/test_results.py +0 -0
  188. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/test/common/test_transform.py +0 -0
  189. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/test/conftest.py +0 -0
  190. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/test/data/all_companies.csv +0 -0
  191. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/test/e2e/test_e2e_dag.py +0 -0
  192. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/test/fixtures/__init__.py +0 -0
  193. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/test/fixtures/client.py +0 -0
  194. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/test/fixtures/db.py +0 -0
  195. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/test/fixtures/graph.py +0 -0
  196. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/test/justfile +0 -0
  197. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/test/server/__init__.py +0 -0
  198. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/test/server/api/__init__.py +0 -0
  199. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/test/server/api/routes/__init__.py +0 -0
  200. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/test/server/api/routes/test_routes_eval.py +0 -0
  201. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/test/server/postgresql/__init__.py +0 -0
  202. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/test/server/postgresql/test_pg_core.py +0 -0
  203. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/test/server/postgresql/test_pg_migrations.py +0 -0
  204. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/test/server/postgresql/test_pg_sql.py +0 -0
  205. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/test/server/test_uploads.py +0 -0
  206. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/test/utils.py +0 -0
  207. {matchbox_db-0.6.2 → matchbox_db-0.6.3.dev53}/trufflehog-exclude.txt +0 -0
@@ -7,8 +7,10 @@ env:
7
7
  IMAGE_NAME: ${{ github.repository }}
8
8
 
9
9
  jobs:
10
- build-and-push-development-image:
10
+ build:
11
11
  runs-on: ubuntu-latest
12
+ outputs:
13
+ mb_version: ${{ steps.get_version.outputs.mb_version }}
12
14
  permissions:
13
15
  contents: write
14
16
  packages: write
@@ -34,11 +36,21 @@ jobs:
34
36
  python-version: "3.11"
35
37
 
36
38
  - name: Extract development version
39
+ id: get_version
37
40
  run: |
38
- echo "MB_VERSION=$( \
39
- SETUPTOOLS_SCM_PRETEND_METADATA='{distance=1}' \
40
- uv run --frozen python -m setuptools_scm \
41
- )" >> $GITHUB_ENV
41
+ echo "mb_version=$(uv run --frozen python -m setuptools_scm | sed 's/+.*//')" \
42
+ >> "$GITHUB_OUTPUT"
43
+
44
+ - name: Build package
45
+ env:
46
+ SETUPTOOLS_SCM_PRETEND_VERSION: ${{ steps.get_version.outputs.mb_version }}
47
+ run: uv build
48
+
49
+ - name: Upload package artifacts
50
+ uses: actions/upload-artifact@v4
51
+ with:
52
+ name: package-dist
53
+ path: ./dist
42
54
 
43
55
  - name: Extract tag metadata for Docker
44
56
  id: meta
@@ -46,7 +58,7 @@ jobs:
46
58
  with:
47
59
  images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
48
60
  tags: |
49
- type=raw,value=${{ env.MB_VERSION }}
61
+ type=raw,value=${{ steps.get_version.outputs.mb_version }}
50
62
  type=raw,value=development
51
63
 
52
64
  - name: Build and push Docker image
@@ -56,7 +68,7 @@ jobs:
56
68
  file: src/matchbox/server/Dockerfile
57
69
  push: true
58
70
  build-args: |
59
- MB_VERSION=${{ env.MB_VERSION }}
71
+ MB_VERSION=${{ steps.get_version.outputs.mb_version }}
60
72
  tags: ${{ steps.meta.outputs.tags }}
61
73
 
62
74
  - name: Delete existing development release if it exists
@@ -76,5 +88,23 @@ jobs:
76
88
 
77
89
  May be unstable.
78
90
 
79
- **Version:** ${{ env.MB_VERSION }}
91
+ **Version:** ${{ steps.get_version.outputs.mb_version }}
80
92
  **Commit:** ${{ github.sha }}
93
+
94
+ deploy-package:
95
+ needs: build
96
+ runs-on: ubuntu-latest
97
+ permissions:
98
+ id-token: write
99
+
100
+ environment: pypi
101
+
102
+ steps:
103
+ - name: Download package artifacts
104
+ uses: actions/download-artifact@v4
105
+ with:
106
+ name: package-dist
107
+ path: ./dist
108
+
109
+ - name: Publish package to PyPI
110
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: matchbox-db
3
- Version: 0.6.2
3
+ Version: 0.6.3.dev53
4
4
  Summary: A framework for orchestrating and comparing data linking and deduplication methodologies.
5
5
  Author: Department for Business and Trade
6
6
  Project-URL: Documentation, https://uktrade.github.io/matchbox/
@@ -8,6 +8,7 @@ from importlib.metadata import version
8
8
  from io import BytesIO
9
9
 
10
10
  import httpx
11
+ import polars as pl
11
12
  from pyarrow import Table
12
13
  from pyarrow.parquet import read_table
13
14
  from tenacity import (
@@ -301,13 +302,14 @@ def get_resolution(
301
302
 
302
303
  @http_retry
303
304
  def set_data(
304
- name: ResolutionName, data: Table, validate_type: ResolutionType
305
+ name: ResolutionName, data: pl.DataFrame, validate_type: ResolutionType
305
306
  ) -> UploadStatus:
306
307
  """Upload source hashes or model results to server."""
307
308
  log_prefix = f"Resolution {name}"
308
309
  logger.debug("Uploading results", prefix=log_prefix)
309
310
 
310
- buffer = table_to_buffer(table=data)
311
+ data_arrow = data.to_arrow() if isinstance(data, pl.DataFrame) else data
312
+ buffer = table_to_buffer(table=data_arrow)
311
313
 
312
314
  # Initialise upload
313
315
  metadata_res = CLIENT.post(
@@ -432,7 +434,7 @@ def download_eval_data() -> tuple[Table, Table]:
432
434
  check_schema(SCHEMA_JUDGEMENTS, judgements.schema)
433
435
  check_schema(SCHEMA_CLUSTER_EXPANSION, expansion.schema)
434
436
 
435
- return judgements, expansion
437
+ return pl.from_arrow(judgements), pl.from_arrow(expansion)
436
438
 
437
439
 
438
440
  # Admin
@@ -128,11 +128,7 @@ class EvalData:
128
128
 
129
129
  threshold = int(threshold * 100)
130
130
 
131
- root_leaf = (
132
- results.root_leaf()
133
- .rename({"root_id": "root", "leaf_id": "leaf"})
134
- .to_arrow()
135
- )
131
+ root_leaf = results.root_leaf().rename({"root_id": "root", "leaf_id": "leaf"})
136
132
  return precision_recall([root_leaf], self.judgements, self.expansion)[0]
137
133
 
138
134
  def pr_curve(self, results: Results) -> Figure:
@@ -140,7 +136,7 @@ class EvalData:
140
136
  all_p = []
141
137
  all_r = []
142
138
 
143
- probs = pl.from_arrow(results.probabilities)
139
+ probs = results.probabilities
144
140
  thresholds = probs.select("probability").unique().to_series()
145
141
  for i, t in enumerate(sorted(thresholds)):
146
142
  float_thresh = t / 100
@@ -224,8 +224,8 @@ class Model:
224
224
  if for_validation:
225
225
  self.results = Results(
226
226
  probabilities=results,
227
- left_root_leaf=self.left_query.leaf_id.to_arrow(),
228
- right_root_leaf=self.right_query.leaf_id.to_arrow()
227
+ left_root_leaf=self.left_query.leaf_id,
228
+ right_root_leaf=self.right_query.leaf_id
229
229
  if right_df is not None
230
230
  else None,
231
231
  )
@@ -4,8 +4,6 @@ from collections.abc import Hashable
4
4
  from typing import ParamSpec, TypeVar
5
5
 
6
6
  import polars as pl
7
- import pyarrow as pa
8
- import pyarrow.compute as pc
9
7
  from pydantic import ConfigDict
10
8
 
11
9
  from matchbox.common.arrow import SCHEMA_RESULTS
@@ -33,14 +31,14 @@ class Results:
33
31
 
34
32
  model_config = ConfigDict(arbitrary_types_allowed=True)
35
33
 
36
- probabilities: pa.Table
37
- _clusters: pa.Table | None = None
34
+ probabilities: pl.DataFrame
35
+ _clusters: pl.DataFrame | None = None
38
36
 
39
37
  def __init__(
40
38
  self,
41
- probabilities: pa.Table | pl.DataFrame,
42
- left_root_leaf: pa.Table | None = None,
43
- right_root_leaf: pa.Table | None = None,
39
+ probabilities: pl.DataFrame,
40
+ left_root_leaf: pl.DataFrame | None = None,
41
+ right_root_leaf: pl.DataFrame | None = None,
44
42
  ) -> None:
45
43
  """Initialises and validates results."""
46
44
  self.left_root_leaf = None
@@ -51,61 +49,49 @@ class Results:
51
49
  if right_root_leaf is not None:
52
50
  self.right_root_leaf = right_root_leaf
53
51
 
54
- if isinstance(probabilities, pl.DataFrame):
55
- probabilities = probabilities.to_arrow()
56
-
57
- if not isinstance(probabilities, pa.Table):
58
- raise ValueError("Expected a polars DataFrame or pyarrow Table.")
52
+ if not isinstance(probabilities, pl.DataFrame):
53
+ raise ValueError(f"Expected a polars DataFrame, got {type(probabilities)}.")
59
54
 
60
55
  expected_fields = set(SCHEMA_RESULTS.names)
61
- if set(probabilities.column_names) != expected_fields:
56
+ if set(probabilities.columns) != expected_fields:
62
57
  raise ValueError(
63
58
  f"Expected {expected_fields}.\nFound {set(probabilities.column_names)}."
64
59
  )
65
60
 
66
61
  # Handle empty tables
67
- if probabilities.num_rows == 0:
68
- empty_arrays = [pa.array([], type=field.type) for field in SCHEMA_RESULTS]
69
- probabilities = pa.Table.from_arrays(
70
- empty_arrays, names=[field.name for field in SCHEMA_RESULTS]
71
- )
62
+ if probabilities.height == 0:
63
+ probabilities = pl.DataFrame(schema=pl.Schema(SCHEMA_RESULTS))
72
64
 
73
65
  # Process probability field if it contains floating-point or decimal values
74
- probability_type = probabilities["probability"].type
75
- if pa.types.is_floating(probability_type) or pa.types.is_decimal(
76
- probability_type
77
- ):
78
- probability_uint8 = pc.cast(
79
- pc.round(pc.multiply(probabilities["probability"], 100)),
80
- options=pc.CastOptions(
81
- target_type=pa.uint8(),
82
- allow_float_truncate=True,
83
- allow_decimal_truncate=True,
84
- ),
66
+ probability_type = probabilities["probability"].dtype
67
+ if probability_type.is_float() or probability_type.is_decimal():
68
+ probability_uint8 = pl.Series(
69
+ probabilities.select(
70
+ pl.col("probability").mul(100).round(0).cast(pl.UInt8)
71
+ )
85
72
  )
86
73
 
87
74
  # Check max value only if the table is not empty
88
- max_prob = pc.max(probability_uint8)
89
- if max_prob is not None and max_prob.as_py() > 100:
90
- p_max = pc.max(probabilities["probability"]).as_py()
91
- p_min = pc.min(probabilities["probability"]).as_py()
75
+ max_prob = probability_uint8.max()
76
+ if max_prob is not None and max_prob > 100:
77
+ p_max = max_prob
78
+ p_min = probability_uint8.min()
92
79
  raise ValueError(f"Probability range misconfigured: [{p_min}, {p_max}]")
93
80
 
94
- probabilities = probabilities.set_column(
95
- i=probabilities.schema.get_field_index("probability"),
96
- field_="probability",
97
- column=probability_uint8,
81
+ probabilities = probabilities.replace_column(
82
+ probabilities.get_column_index("probability"), probability_uint8
98
83
  )
99
84
 
100
- self.probabilities = probabilities.cast(SCHEMA_RESULTS)
85
+ # need schema in format recognised by polars
86
+ self.probabilities = probabilities.cast(pl.Schema(SCHEMA_RESULTS))
101
87
 
102
88
  @property
103
89
  def clusters(self):
104
90
  """Retrieve new clusters implied by these results."""
105
- if not self._clusters:
91
+ if self._clusters is None:
106
92
  im = IntMap()
107
93
  self._clusters = to_clusters(
108
- results=self.probabilities, dtype=pa.int64, hash_func=im.index
94
+ results=self.probabilities, dtype=pl.Int64, hash_func=im.index
109
95
  )
110
96
  return self._clusters
111
97
 
@@ -146,7 +132,7 @@ class Results:
146
132
  ) -> pl.DataFrame:
147
133
  """Enriches the probability results with the source data."""
148
134
  return self._merge_with_source_data(
149
- base_df=pl.from_arrow(self.probabilities),
135
+ base_df=self.probabilities,
150
136
  base_df_cols=["left_id", "right_id", "probability"],
151
137
  left_data=left_data,
152
138
  left_key=left_key,
@@ -165,7 +151,7 @@ class Results:
165
151
  ) -> pl.DataFrame:
166
152
  """Enriches the cluster results with the source data."""
167
153
  return self._merge_with_source_data(
168
- base_df=pl.from_arrow(self.clusters),
154
+ base_df=self.clusters,
169
155
  base_df_cols=["parent", "child", "probability"],
170
156
  left_data=left_data,
171
157
  left_key=left_key,
@@ -182,20 +168,19 @@ class Results:
182
168
  "This Results object wasn't instantiated for validation features."
183
169
  )
184
170
 
185
- parents_root_leaf = pl.from_arrow(self.left_root_leaf.select(["id", "leaf_id"]))
171
+ parents_root_leaf = self.left_root_leaf.select(["id", "leaf_id"])
186
172
  if self.right_root_leaf is not None:
187
173
  parents_root_leaf = pl.concat(
188
174
  [
189
175
  parents_root_leaf,
190
- pl.from_arrow(self.right_root_leaf.select(["id", "leaf_id"])),
176
+ self.right_root_leaf.select(["id", "leaf_id"]),
191
177
  ]
192
178
  )
193
179
 
194
180
  # Go from parent-child (where child could be the root of another model)
195
181
  # to root-leaf, where leaf is a source cluster ID
196
182
  root_leaf_res = (
197
- pl.from_arrow(self.clusters)
198
- .rename({"parent": "root_id"})
183
+ self.clusters.rename({"parent": "root_id"})
199
184
  .join(parents_root_leaf, left_on="child", right_on="id")
200
185
  .select(["root_id", "leaf_id"])
201
186
  .unique()
@@ -205,7 +190,7 @@ class Results:
205
190
  unmerged_ids_rows = (
206
191
  parents_root_leaf.select("id", "leaf_id")
207
192
  .join(
208
- pl.from_arrow(self.clusters).select("child"),
193
+ self.clusters.select("child"),
209
194
  left_on="id",
210
195
  right_on="child",
211
196
  how="anti",
@@ -4,7 +4,6 @@ from itertools import chain, combinations
4
4
  from typing import TypeAlias
5
5
 
6
6
  import polars as pl
7
- from pyarrow import Table
8
7
  from pydantic import BaseModel, Field, field_validator
9
8
 
10
9
  from matchbox.common.graph import ModelResolutionName
@@ -39,7 +38,9 @@ class Judgement(BaseModel):
39
38
 
40
39
 
41
40
  def precision_recall(
42
- models_root_leaf: list[Table], judgements: Table, expansion: Table
41
+ models_root_leaf: list[pl.DataFrame],
42
+ judgements: pl.DataFrame,
43
+ expansion: pl.DataFrame,
43
44
  ) -> list[PrecisionRecall]:
44
45
  """From models and eval data, compute scores inspired by precision-recall.
45
46
 
@@ -78,10 +79,9 @@ def precision_recall(
78
79
  for root_leaf in models_root_leaf:
79
80
  if not len(root_leaf):
80
81
  raise ValueError("Model data cannot be empty.")
81
- leaves_per_set.append(set(root_leaf["leaf"].to_pylist()))
82
+ leaves_per_set.append(set(root_leaf["leaf"].to_list()))
82
83
  clusters = (
83
- pl.from_arrow(root_leaf)
84
- .group_by("root")
84
+ root_leaf.group_by("root")
85
85
  .agg(pl.col("leaf").alias("leaves"))
86
86
  .select("leaves")
87
87
  .to_series()
@@ -93,7 +93,7 @@ def precision_recall(
93
93
  pairs_per_model.append(model_pairs)
94
94
 
95
95
  validation_pairs, validation_net_count, validation_leaves = process_judgements(
96
- pl.from_arrow(judgements), pl.from_arrow(expansion)
96
+ judgements, expansion
97
97
  )
98
98
  leaves_per_set.append(validation_leaves)
99
99
 
@@ -521,7 +521,7 @@ def generate_entities(
521
521
 
522
522
 
523
523
  def probabilities_to_results_entities(
524
- probabilities: pa.Table,
524
+ probabilities: pl.DataFrame,
525
525
  left_clusters: tuple[ClusterEntity, ...],
526
526
  right_clusters: tuple[ClusterEntity, ...] | None = None,
527
527
  threshold: float | int = 0,
@@ -547,7 +547,7 @@ def probabilities_to_results_entities(
547
547
  djs.add(entity)
548
548
 
549
549
  # Add edges to the disjoint set
550
- for record in probabilities.to_pylist():
550
+ for record in probabilities.to_dicts():
551
551
  if record["probability"] >= threshold:
552
552
  djs.union(
553
553
  left_lookup[record["left_id"]],
@@ -76,12 +76,12 @@ add_model_class(MockDeduper)
76
76
  add_model_class(MockLinker)
77
77
 
78
78
 
79
- def component_report(all_nodes: list[Any], table: pa.Table) -> dict:
79
+ def component_report(all_nodes: list[Any], table: pl.DataFrame) -> dict:
80
80
  """Fast reporting on connected components using rustworkx.
81
81
 
82
82
  Args:
83
83
  all_nodes: list of identities of inputs being matched
84
- table: PyArrow table with 'left', 'right' columns
84
+ table: Polars dataframe with 'left', 'right' columns
85
85
 
86
86
  Returns:
87
87
  dictionary containing basic component statistics
@@ -252,7 +252,7 @@ def generate_dummy_probabilities(
252
252
  num_components: int,
253
253
  total_rows: int | None = None,
254
254
  seed: int = 42,
255
- ) -> pa.Table:
255
+ ) -> pl.DataFrame:
256
256
  """Generate dummy Arrow probabilities data with guaranteed isolated components.
257
257
 
258
258
  While much of the factory system uses generate_entity_probabilities, this function
@@ -269,7 +269,7 @@ def generate_dummy_probabilities(
269
269
  seed: Random seed for reproducibility
270
270
 
271
271
  Returns:
272
- PyArrow Table with 'left_id', 'right_id', and 'probability' columns
272
+ Polars dataframe with 'left_id', 'right_id', and 'probability' columns
273
273
  """
274
274
  # Validate inputs
275
275
  deduplicate = False
@@ -419,14 +419,9 @@ def generate_dummy_probabilities(
419
419
  # Convert to arrays
420
420
  lefts, rights, probs = zip(*all_edges, strict=True)
421
421
 
422
- # Create PyArrow arrays
423
- left_array = pa.array(lefts, type=pa.uint64())
424
- right_array = pa.array(rights, type=pa.uint64())
425
- prob_array = pa.array(probs, type=pa.uint8())
426
-
427
- return pa.table(
428
- [left_array, right_array, prob_array],
429
- names=["left_id", "right_id", "probability"],
422
+ return pl.DataFrame(
423
+ {"left_id": lefts, "right_id": rights, "probability": probs},
424
+ schema={"left_id": pl.UInt64, "right_id": pl.UInt64, "probability": pl.UInt8},
430
425
  )
431
426
 
432
427
 
@@ -436,7 +431,7 @@ def generate_entity_probabilities(
436
431
  source_entities: frozenset[SourceEntity],
437
432
  prob_range: tuple[float, float] = (0.8, 1.0),
438
433
  seed: int = 42,
439
- ) -> pa.Table:
434
+ ) -> pl.DataFrame:
440
435
  """Generate probabilities that will recover entity relationships.
441
436
 
442
437
  Compares ClusterEntity objects against ground truth SourceEntities by checking
@@ -530,27 +525,9 @@ def generate_entity_probabilities(
530
525
 
531
526
  # If no edges were generated, return empty table with correct schema
532
527
  if not edges:
533
- return pa.table(
534
- [
535
- pa.array([], type=pa.uint64()),
536
- pa.array([], type=pa.uint64()),
537
- pa.array([], type=pa.uint8()),
538
- ],
539
- schema=SCHEMA_RESULTS,
540
- )
528
+ return pl.DataFrame(schema=pl.Schema(SCHEMA_RESULTS))
541
529
 
542
- # Convert to arrays
543
- lefts, rights, probs = zip(*edges, strict=False)
544
-
545
- # Create PyArrow arrays
546
- left_array = pa.array(lefts, type=pa.uint64())
547
- right_array = pa.array(rights, type=pa.uint64())
548
- prob_array = pa.array(probs, type=pa.uint8())
549
-
550
- return pa.table(
551
- [left_array, right_array, prob_array],
552
- schema=SCHEMA_RESULTS,
553
- )
530
+ return pl.DataFrame(edges, orient="row", schema=pl.Schema(SCHEMA_RESULTS))
554
531
 
555
532
 
556
533
  class ModelTestkit(BaseModel):
@@ -565,7 +542,7 @@ class ModelTestkit(BaseModel):
565
542
  right_data: pa.Table | None
566
543
  right_query: Query | None
567
544
  right_clusters: dict[int, ClusterEntity] | None
568
- probabilities: pa.Table
545
+ probabilities: pl.DataFrame
569
546
 
570
547
  _entities: tuple[ClusterEntity, ...]
571
548
  _threshold: int
@@ -6,6 +6,7 @@ from pathlib import Path
6
6
  from typing import Any, Literal
7
7
 
8
8
  import pyarrow as pa
9
+ from polars.testing import assert_frame_equal
9
10
  from pydantic_settings import BaseSettings, SettingsConfigDict
10
11
  from sqlalchemy import Engine
11
12
 
@@ -171,7 +172,9 @@ def create_dedupe_scenario(
171
172
 
172
173
  # Add to backend and DAG
173
174
  backend.insert_resolution(resolution=model_testkit.model.to_resolution())
174
- backend.insert_model_data(name=name, results=model_testkit.probabilities)
175
+ backend.insert_model_data(
176
+ name=name, results=model_testkit.probabilities.to_arrow()
177
+ )
175
178
  dag.add_model(model_testkit)
176
179
 
177
180
  return dag
@@ -216,7 +219,9 @@ def create_probabilistic_dedupe_scenario(
216
219
 
217
220
  # Add to backend and DAG
218
221
  backend.insert_resolution(resolution=model_testkit.model.to_resolution())
219
- backend.insert_model_data(name=name, results=model_testkit.probabilities)
222
+ backend.insert_model_data(
223
+ name=name, results=model_testkit.probabilities.to_arrow()
224
+ )
220
225
  backend.set_model_truth(name=name, truth=50)
221
226
  dag.add_model(model_testkit)
222
227
 
@@ -271,7 +276,9 @@ def create_link_scenario(
271
276
 
272
277
  # Add to backend and DAG
273
278
  backend.insert_resolution(resolution=crn_duns_model.model.to_resolution())
274
- backend.insert_model_data(name=crn_duns_name, results=crn_duns_model.probabilities)
279
+ backend.insert_model_data(
280
+ name=crn_duns_name, results=crn_duns_model.probabilities.to_arrow()
281
+ )
275
282
  dag.add_model(crn_duns_model)
276
283
 
277
284
  # Create CRN-CDMS link
@@ -299,7 +306,9 @@ def create_link_scenario(
299
306
 
300
307
  # Add to backend and DAG
301
308
  backend.insert_resolution(resolution=crn_cdms_model.model.to_resolution())
302
- backend.insert_model_data(name=crn_cdms_name, results=crn_cdms_model.probabilities)
309
+ backend.insert_model_data(
310
+ name=crn_cdms_name, results=crn_cdms_model.probabilities.to_arrow()
311
+ )
303
312
  backend.set_model_truth(name=crn_cdms_name, truth=75)
304
313
  dag.add_model(crn_cdms_model)
305
314
 
@@ -344,7 +353,7 @@ def create_link_scenario(
344
353
  # Add to backend and DAG
345
354
  backend.insert_resolution(resolution=final_join_model.model.to_resolution())
346
355
  backend.insert_model_data(
347
- name=final_join_name, results=final_join_model.probabilities
356
+ name=final_join_name, results=final_join_model.probabilities.to_arrow()
348
357
  )
349
358
  dag.add_model(final_join_model)
350
359
 
@@ -422,15 +431,17 @@ def create_alt_dedupe_scenario(
422
431
  seed=seed,
423
432
  )
424
433
 
425
- assert model_testkit1.probabilities.num_rows > 0
426
- assert model_testkit1.probabilities == model_testkit2.probabilities
434
+ assert len(model_testkit1.probabilities) > 0
435
+ assert_frame_equal(model_testkit1.probabilities, model_testkit2.probabilities)
427
436
 
428
437
  for model, threshold in ((model_testkit1, 50), (model_testkit2, 75)):
429
438
  model.threshold = threshold
430
439
 
431
440
  # Add both models to backend and DAG
432
441
  backend.insert_resolution(resolution=model.model.to_resolution())
433
- backend.insert_model_data(name=model.name, results=model.probabilities)
442
+ backend.insert_model_data(
443
+ name=model.name, results=model.probabilities.to_arrow()
444
+ )
434
445
  backend.set_model_truth(name=model.name, truth=threshold)
435
446
 
436
447
  # Add to DAG
@@ -509,7 +520,7 @@ def create_convergent_scenario(
509
520
  seed=seed,
510
521
  )
511
522
 
512
- assert model_testkit.probabilities.num_rows > 0
523
+ assert len(model_testkit.probabilities) > 0
513
524
 
514
525
  # Add to DAG
515
526
  dag.add_model(model_testkit)
@@ -202,7 +202,7 @@ class LinkedSourcesTestkit(BaseModel):
202
202
 
203
203
  def diff_results(
204
204
  self,
205
- probabilities: pa.Table,
205
+ probabilities: pl.DataFrame,
206
206
  sources: list[SourceResolutionName],
207
207
  left_clusters: tuple[ClusterEntity, ...],
208
208
  right_clusters: tuple[ClusterEntity, ...] | None = None,