matchescu-reference-store 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2022 matchescu
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,43 @@
1
+ Metadata-Version: 2.1
2
+ Name: matchescu-reference-store
3
+ Version: 0.1.0
4
+ Summary: Storage options for entity references
5
+ License: MIT
6
+ Author: Andrei Olar
7
+ Author-email: andrei.olar@samlex.ro
8
+ Requires-Python: >=3.12,<4.0
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.12
12
+ Classifier: Programming Language :: Python :: 3.13
13
+ Requires-Dist: matchescu-base (>=0.11.0,<0.12.0)
14
+ Description-Content-Type: text/markdown
15
+
16
+ # Reference Store
17
+
18
+ The entity reference store's main job is to keep entity references known by the system.
19
+ Depending on the entity resolution model being used, it may persist relations between
20
+ entity references.
21
+
22
+ The entity reference store logically organizes the data it stores in workspaces.
23
+ Each workspace corresponds to an entity resolution problem domain. For example,
24
+ users might have one workspace for resolving publications across multiple data sources
25
+ and another workspace for resolving e-commerce products across vendor databases.
26
+
27
+ # Development
28
+
29
+ First, ensure that the package builds locally and that the automated test suite runs
30
+ fine.
31
+
32
+ ```shell
33
+ $ make bootstrap
34
+ $ make test
35
+ ```
36
+
37
+ Then follow the coding guidelines to submit new contributions.
38
+
39
+ # Usage
40
+
41
+ This package is meant to be used as a subsystem in a larger entity resolution pipeline.
42
+ The functionalities should be imported and used as per the pipeline's requirements.
43
+
@@ -0,0 +1,27 @@
1
+ # Reference Store
2
+
3
+ The entity reference store's main job is to keep entity references known by the system.
4
+ Depending on the entity resolution model being used, it may persist relations between
5
+ entity references.
6
+
7
+ The entity reference store logically organizes the data it stores in workspaces.
8
+ Each workspace corresponds to an entity resolution problem domain. For example,
9
+ users might have one workspace for resolving publications across multiple data sources
10
+ and another workspace for resolving e-commerce products across vendor databases.
11
+
12
+ # Development
13
+
14
+ First, ensure that the package builds locally and that the automated test suite runs
15
+ fine.
16
+
17
+ ```shell
18
+ $ make bootstrap
19
+ $ make test
20
+ ```
21
+
22
+ Then follow the coding guidelines to submit new contributions.
23
+
24
+ # Usage
25
+
26
+ This package is meant to be used as a subsystem in a larger entity resolution pipeline.
27
+ The functionalities should be imported and used as per the pipeline's requirements.
@@ -0,0 +1,23 @@
1
+ [tool.poetry]
2
+ name = "matchescu-reference-store"
3
+ version = "0.1.0"
4
+ description = "Storage options for entity references"
5
+ authors = ["Andrei Olar <andrei.olar@samlex.ro>"]
6
+ license = "MIT"
7
+ readme = "README.md"
8
+ packages = [{include="matchescu", from="src"}]
9
+
10
+ [tool.poetry.dependencies]
11
+ python = "^3.12"
12
+ matchescu-base = "^0.11.0"
13
+
14
+ [build-system]
15
+ requires = ["poetry-core>=2.0.0,<3.0.0"]
16
+ build-backend = "poetry.core.masonry.api"
17
+
18
+ [tool.poetry.group.dev.dependencies]
19
+ black = "^25.1.0"
20
+ ruff = "^0.11.1"
21
+ mypy = "^1.15.0"
22
+ pytest = "^8.3.5"
23
+
File without changes
@@ -0,0 +1,8 @@
1
+ from matchescu.typing import EntityReferenceIdentifier
2
+
3
+
4
+ class EntityReferenceNotFound(Exception):
5
+ def __init__(self, identifier: EntityReferenceIdentifier) -> None:
6
+ super().__init__(
7
+ f"Entity reference with label '{identifier.label}' from source '{identifier.source}' not found"
8
+ )
@@ -0,0 +1,36 @@
1
+ from functools import partial
2
+ from typing import Iterable, Hashable
3
+
4
+ from matchescu.reference_store._exceptions import EntityReferenceNotFound
5
+ from matchescu.typing import EntityReference, EntityReferenceIdentifier
6
+
7
+
8
+ class InMemoryIdTable(object):
9
+ def __init__(self):
10
+ self._id_table = {}
11
+
12
+ def __len__(self) -> int:
13
+ return len(self._id_table)
14
+
15
+ def __iter__(self) -> Iterable[EntityReference]:
16
+ return iter(self._id_table.values())
17
+
18
+ def put(self, ref: EntityReference) -> None:
19
+ if ref is None:
20
+ return
21
+ self._id_table[ref.id] = ref
22
+
23
+ def get(self, source: str, label: Hashable) -> EntityReference:
24
+ identifier = EntityReferenceIdentifier(label, source)
25
+ if identifier not in self._id_table:
26
+ raise EntityReferenceNotFound(identifier)
27
+ return self._id_table[identifier]
28
+
29
+ @staticmethod
30
+ def __has_source(identifier: EntityReferenceIdentifier, source: str) -> bool:
31
+ return identifier.source == source
32
+
33
+ def get_by_source(self, source: str) -> Iterable[EntityReference]:
34
+ has_source = partial(self.__has_source, source=source)
35
+ ids_with_source = filter(has_source, self._id_table.keys())
36
+ return map(self._id_table.get, ids_with_source)