matchescu-reference-store 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- matchescu_reference_store-0.1.0/LICENSE +21 -0
- matchescu_reference_store-0.1.0/PKG-INFO +43 -0
- matchescu_reference_store-0.1.0/README.md +27 -0
- matchescu_reference_store-0.1.0/pyproject.toml +23 -0
- matchescu_reference_store-0.1.0/src/matchescu/py.typed +0 -0
- matchescu_reference_store-0.1.0/src/matchescu/reference_store/__init__.py +0 -0
- matchescu_reference_store-0.1.0/src/matchescu/reference_store/__pycache__/__init__.cpython-312.pyc +0 -0
- matchescu_reference_store-0.1.0/src/matchescu/reference_store/__pycache__/_exceptions.cpython-312.pyc +0 -0
- matchescu_reference_store-0.1.0/src/matchescu/reference_store/_exceptions.py +8 -0
- matchescu_reference_store-0.1.0/src/matchescu/reference_store/id_table/__init__.py +0 -0
- matchescu_reference_store-0.1.0/src/matchescu/reference_store/id_table/__pycache__/__init__.cpython-312.pyc +0 -0
- matchescu_reference_store-0.1.0/src/matchescu/reference_store/id_table/__pycache__/_in_memory.cpython-312.pyc +0 -0
- matchescu_reference_store-0.1.0/src/matchescu/reference_store/id_table/_in_memory.py +36 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2022 matchescu
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: matchescu-reference-store
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Storage options for entity references
|
|
5
|
+
License: MIT
|
|
6
|
+
Author: Andrei Olar
|
|
7
|
+
Author-email: andrei.olar@samlex.ro
|
|
8
|
+
Requires-Python: >=3.12,<4.0
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
13
|
+
Requires-Dist: matchescu-base (>=0.11.0,<0.12.0)
|
|
14
|
+
Description-Content-Type: text/markdown
|
|
15
|
+
|
|
16
|
+
# Reference Store
|
|
17
|
+
|
|
18
|
+
The entity reference store's main job is to keep entity references known by the system.
|
|
19
|
+
Depending on the entity resolution model being used, it may persist relations between
|
|
20
|
+
entity references.
|
|
21
|
+
|
|
22
|
+
The entity reference store logically organizes the data it stores in workspaces.
|
|
23
|
+
Each workspace corresponds to an entity resolution problem domain. For example,
|
|
24
|
+
users might have one workspace for resolving publications across multiple data sources
|
|
25
|
+
and another workspace for resolving e-commerce products across vendor databases.
|
|
26
|
+
|
|
27
|
+
# Development
|
|
28
|
+
|
|
29
|
+
First, ensure that the package builds locally and that the automated test suite runs
|
|
30
|
+
fine.
|
|
31
|
+
|
|
32
|
+
```shell
|
|
33
|
+
$ make bootstrap
|
|
34
|
+
$ make test
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
Then follow the coding guidelines to submit new contributions.
|
|
38
|
+
|
|
39
|
+
# Usage
|
|
40
|
+
|
|
41
|
+
This package is meant to be used as a subsystem in a larger entity resolution pipeline.
|
|
42
|
+
The functionalities should be imported and used as per the pipeline's requirements.
|
|
43
|
+
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# Reference Store
|
|
2
|
+
|
|
3
|
+
The entity reference store's main job is to keep entity references known by the system.
|
|
4
|
+
Depending on the entity resolution model being used, it may persist relations between
|
|
5
|
+
entity references.
|
|
6
|
+
|
|
7
|
+
The entity reference store logically organizes the data it stores in workspaces.
|
|
8
|
+
Each workspace corresponds to an entity resolution problem domain. For example,
|
|
9
|
+
users might have one workspace for resolving publications across multiple data sources
|
|
10
|
+
and another workspace for resolving e-commerce products across vendor databases.
|
|
11
|
+
|
|
12
|
+
# Development
|
|
13
|
+
|
|
14
|
+
First, ensure that the package builds locally and that the automated test suite runs
|
|
15
|
+
fine.
|
|
16
|
+
|
|
17
|
+
```shell
|
|
18
|
+
$ make bootstrap
|
|
19
|
+
$ make test
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
Then follow the coding guidelines to submit new contributions.
|
|
23
|
+
|
|
24
|
+
# Usage
|
|
25
|
+
|
|
26
|
+
This package is meant to be used as a subsystem in a larger entity resolution pipeline.
|
|
27
|
+
The functionalities should be imported and used as per the pipeline's requirements.
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
[tool.poetry]
|
|
2
|
+
name = "matchescu-reference-store"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "Storage options for entity references"
|
|
5
|
+
authors = ["Andrei Olar <andrei.olar@samlex.ro>"]
|
|
6
|
+
license = "MIT"
|
|
7
|
+
readme = "README.md"
|
|
8
|
+
packages = [{include="matchescu", from="src"}]
|
|
9
|
+
|
|
10
|
+
[tool.poetry.dependencies]
|
|
11
|
+
python = "^3.12"
|
|
12
|
+
matchescu-base = "^0.11.0"
|
|
13
|
+
|
|
14
|
+
[build-system]
|
|
15
|
+
requires = ["poetry-core>=2.0.0,<3.0.0"]
|
|
16
|
+
build-backend = "poetry.core.masonry.api"
|
|
17
|
+
|
|
18
|
+
[tool.poetry.group.dev.dependencies]
|
|
19
|
+
black = "^25.1.0"
|
|
20
|
+
ruff = "^0.11.1"
|
|
21
|
+
mypy = "^1.15.0"
|
|
22
|
+
pytest = "^8.3.5"
|
|
23
|
+
|
|
File without changes
|
|
File without changes
|
matchescu_reference_store-0.1.0/src/matchescu/reference_store/__pycache__/__init__.cpython-312.pyc
ADDED
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
from matchescu.typing import EntityReferenceIdentifier
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class EntityReferenceNotFound(Exception):
|
|
5
|
+
def __init__(self, identifier: EntityReferenceIdentifier) -> None:
|
|
6
|
+
super().__init__(
|
|
7
|
+
f"Entity reference with label '{identifier.label}' from source '{identifier.source}' not found"
|
|
8
|
+
)
|
|
File without changes
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
from functools import partial
|
|
2
|
+
from typing import Iterable, Hashable
|
|
3
|
+
|
|
4
|
+
from matchescu.reference_store._exceptions import EntityReferenceNotFound
|
|
5
|
+
from matchescu.typing import EntityReference, EntityReferenceIdentifier
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class InMemoryIdTable(object):
|
|
9
|
+
def __init__(self):
|
|
10
|
+
self._id_table = {}
|
|
11
|
+
|
|
12
|
+
def __len__(self) -> int:
|
|
13
|
+
return len(self._id_table)
|
|
14
|
+
|
|
15
|
+
def __iter__(self) -> Iterable[EntityReference]:
|
|
16
|
+
return iter(self._id_table.values())
|
|
17
|
+
|
|
18
|
+
def put(self, ref: EntityReference) -> None:
|
|
19
|
+
if ref is None:
|
|
20
|
+
return
|
|
21
|
+
self._id_table[ref.id] = ref
|
|
22
|
+
|
|
23
|
+
def get(self, source: str, label: Hashable) -> EntityReference:
|
|
24
|
+
identifier = EntityReferenceIdentifier(label, source)
|
|
25
|
+
if identifier not in self._id_table:
|
|
26
|
+
raise EntityReferenceNotFound(identifier)
|
|
27
|
+
return self._id_table[identifier]
|
|
28
|
+
|
|
29
|
+
@staticmethod
|
|
30
|
+
def __has_source(identifier: EntityReferenceIdentifier, source: str) -> bool:
|
|
31
|
+
return identifier.source == source
|
|
32
|
+
|
|
33
|
+
def get_by_source(self, source: str) -> Iterable[EntityReference]:
|
|
34
|
+
has_source = partial(self.__has_source, source=source)
|
|
35
|
+
ids_with_source = filter(has_source, self._id_table.keys())
|
|
36
|
+
return map(self._id_table.get, ids_with_source)
|