amapy-db 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,11 @@
1
+ Metadata-Version: 2.1
2
+ Name: amapy-db
3
+ Version: 1.0.0
4
+ Summary: The Database plugin for asset-manager project. It provides functionalities to manage and store metadata related to digital assets, such as storing MD5 hashes of asset files and managing these records efficiently.
5
+ License: Copyright (c) 2024 Roche Diagnostics Computation Science & Informatics
6
+ Requires-Python: <3.11,>=3.10
7
+ Description-Content-Type: text/markdown
8
+ Requires-Dist: amapy-pluggy==1.*
9
+ Requires-Dist: amapy-utils==1.*
10
+
11
+ asset-db
@@ -0,0 +1 @@
1
+ asset-db
@@ -0,0 +1,11 @@
1
+ Metadata-Version: 2.1
2
+ Name: amapy-db
3
+ Version: 1.0.0
4
+ Summary: The Database plugin for asset-manager project. It provides functionalities to manage and store metadata related to digital assets, such as storing MD5 hashes of asset files and managing these records efficiently.
5
+ License: Copyright (c) 2024 Roche Diagnostics Computation Science & Informatics
6
+ Requires-Python: <3.11,>=3.10
7
+ Description-Content-Type: text/markdown
8
+ Requires-Dist: amapy-pluggy==1.*
9
+ Requires-Dist: amapy-utils==1.*
10
+
11
+ asset-db
@@ -0,0 +1,22 @@
1
+ README.md
2
+ pyproject.toml
3
+ amapy_db.egg-info/PKG-INFO
4
+ amapy_db.egg-info/SOURCES.txt
5
+ amapy_db.egg-info/dependency_links.txt
6
+ amapy_db.egg-info/requires.txt
7
+ amapy_db.egg-info/top_level.txt
8
+ asset_db/__init__.py
9
+ asset_db/assets_db.py
10
+ asset_db/db.py
11
+ asset_db/file_db.py
12
+ asset_db/manifest_db.py
13
+ asset_db/repo_db.py
14
+ asset_db/states_db.py
15
+ asset_db/store_db.py
16
+ asset_db/store_file_db.py
17
+ asset_db/tests/__init__.py
18
+ asset_db/tests/test_assets_db.py
19
+ asset_db/tests/test_db.py
20
+ asset_db/tests/test_file_db.py
21
+ asset_db/tests/test_manifest_db.py
22
+ asset_db/tests/test_states_db.py
@@ -0,0 +1,2 @@
1
+ amapy-pluggy==1.*
2
+ amapy-utils==1.*
@@ -0,0 +1 @@
1
+ asset_db
@@ -0,0 +1,7 @@
1
+ from .manifest_db import ManifestDB
2
+ from .file_db import FileDB
3
+ from .states_db import StatesDB
4
+ from .assets_db import AssetsDB
5
+ from .repo_db import RepoDB
6
+ from .store_db import StoreDB
7
+ from .store_file_db import StoreFileDB
@@ -0,0 +1,44 @@
1
+ import logging
2
+
3
+ from asset_db.db import Database
4
+
5
+ logger = logging.getLogger(__name__)
6
+
7
+
8
+ class AssetsDB(Database):
9
+ """
10
+ Stores the md5 hashes of asset*.yamls. We use this to check which
11
+ files need to be re-downloaded during fetch operations
12
+ """
13
+ type = "assets-db"
14
+
15
+ @property
16
+ def file_hashes(self):
17
+ try:
18
+ return self._file_hashes
19
+ except AttributeError:
20
+ self._file_hashes = self.data()
21
+ return self._file_hashes
22
+
23
+ def add_file_hash(self, path, hash):
24
+ """Adds to existing list of files"""
25
+ updated = self.update(**{path: hash})
26
+ setattr(self, "_file_hashes", updated)
27
+
28
+ def remove_files(self, *paths):
29
+ """Deletes specific assets from the list"""
30
+ data = self.file_hashes
31
+ for path in paths:
32
+ deleted = data.pop(path, None)
33
+ if deleted:
34
+ pass
35
+ # logger.info("file {} removed.".format(id))
36
+ else:
37
+ logger.info("file not added yet, ignoring remove for:{}".format(id))
38
+ setattr(self, "_file_hashes", data)
39
+ self._write_to_file(data)
40
+
41
+ def clear(self):
42
+ """Clears all assets"""
43
+ setattr(self, "_file_hashes", {})
44
+ self._write_to_file(data={})
@@ -0,0 +1,107 @@
1
+ import logging
2
+ import os
3
+ from typing import List, AnyStr, Union
4
+
5
+ from asset_utils.utils import utils
6
+ from asset_utils.utils.file_utils import FileUtils
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+ DB_FILE = "json" # yaml
11
+
12
+
13
+ class Database:
14
+ """Yaml wrapper to present a db like interface for writing to yaml files.
15
+ Singleton, so make sure all DBs inherit directly from this class only
16
+ Subclasses also behave as singletons
17
+ """
18
+ path = None
19
+ backup_path = None # in case we want to cache the database
20
+ _persisted_data = {}
21
+ _instances = {}
22
+
23
+ def __new__(cls, *args, **kwargs):
24
+ path = kwargs.get('path') or args[0] # there can be only one instance per path
25
+ if not path:
26
+ raise Exception("required param: path is null")
27
+
28
+ if cls._instances.get(path, None) is None:
29
+ cls._instances[path] = super(Database, cls).__new__(cls)
30
+
31
+ if cls._instances[path].__class__.__name__ != cls.__name__:
32
+ raise Exception("Instance already exists with same path but different class type")
33
+ return cls._instances[path]
34
+
35
+ def __init__(self, path, backup_path: str = None):
36
+ if path:
37
+ FileUtils.create_file_if_not_exists(path)
38
+ self.path = path
39
+ self.backup_path = backup_path
40
+ # create backup through hardlink
41
+ if self.backup_path:
42
+ if not os.path.exists(self.backup_path):
43
+ FileUtils.create_file_if_not_exists(self.backup_path)
44
+ if not os.path.samefile(self.path, self.backup_path):
45
+ try:
46
+ os.unlink(self.backup_path) # delete existing
47
+ os.link(src=self.path, dst=self.backup_path)
48
+ except OSError as e:
49
+ logger.warning(f"Backup not enabled because of OSError: {str(e)}")
50
+
51
+ def copy_to(self, db):
52
+ db.update(**self.data())
53
+
54
+ def update(self, **kwargs):
55
+ data = self.data() or {}
56
+ data = utils.update_dict(data, kwargs)
57
+ self._write_to_file(data=data)
58
+ return data
59
+
60
+ def _write_to_file(self, data):
61
+ """Stores the dictionary in a json file, overwrites existing data"""
62
+ self._persisted_data = data
63
+ self.__class__.write(data=self._persisted_data, path=self.path)
64
+
65
+ @classmethod
66
+ def write(cls, data, path):
67
+ if DB_FILE == "yaml":
68
+ FileUtils.write_yaml(abs_path=path, data=data)
69
+ elif DB_FILE == "json":
70
+ FileUtils.write_json(abs_path=path, data=data, sort_keys=False)
71
+ else:
72
+ raise Exception("Unsupported DB format")
73
+
74
+ def _read_from_file(self):
75
+ """returns data from yaml file"""
76
+ self._persisted_data = self._persisted_data or self.__class__.read(self.path)
77
+ return self._persisted_data
78
+
79
+ @classmethod
80
+ def read(cls, path):
81
+ if not path or not os.path.exists(path):
82
+ # nothing stored yet
83
+ return {}
84
+ if DB_FILE == "json":
85
+ return FileUtils.read_json(abs_path=path) or {}
86
+ elif DB_FILE == "yaml":
87
+ return FileUtils.read_yaml(abs_path=path) or {}
88
+ else:
89
+ raise Exception("Unsupported DB format")
90
+
91
+ def retrieve(self, key: Union[AnyStr, List]):
92
+ """Retrieves specific values for specific keys from the json file.
93
+ If keys are not found, the default value of None is returned.
94
+ """
95
+ data = self.data()
96
+ if isinstance(key, str):
97
+ return data.get(key, None)
98
+ elif isinstance(key, list):
99
+ result = {}
100
+ for each_key in key:
101
+ result[each_key] = data.get(each_key, None)
102
+ return result
103
+ else:
104
+ return None
105
+
106
+ def data(self):
107
+ return self._read_from_file()
@@ -0,0 +1,49 @@
1
+ import logging
2
+ import os
3
+
4
+ from asset_db.db import Database
5
+
6
+ logger = logging.getLogger(__name__)
7
+
8
+
9
+ class FileDB(Database):
10
+ """Stats db that holds file information to manage
11
+ Important: Stats db is a global db across the entire repo for all assets
12
+ whereas other dbs are asset specific
13
+ """
14
+ type = "file-stats"
15
+
16
+ def add_stats(self, **kwargs):
17
+ """adds to existing list of stats"""
18
+ self.update(**{"stats": kwargs})
19
+
20
+ def get_stats(self) -> dict:
21
+ """Returns assets and their md5 hashes"""
22
+ return self.retrieve("stats") or {}
23
+
24
+ def remove_stats(self, ids: list):
25
+ """Deletes specific assets from the list
26
+
27
+ Parameters
28
+ ----------
29
+ ids : list
30
+ url ids of asset objects
31
+ """
32
+ data = self.data()
33
+ stats = data.get("stats", {})
34
+ for id in ids:
35
+ deleted = stats.pop(id, None)
36
+ if deleted:
37
+ pass
38
+ # logger.info("object {} removed.".format(id))
39
+ else:
40
+ logger.warning("object not added yet, ignoring remove for:{}".format(id))
41
+
42
+ data["stats"] = stats
43
+ self._write_to_file(data=data)
44
+
45
+ def data(self):
46
+ if not os.path.exists(self.path):
47
+ # nothing stored yet
48
+ return {"stats": {}}
49
+ return self._read_from_file()
@@ -0,0 +1,118 @@
1
+ import logging
2
+
3
+ from asset_db.db import Database
4
+ from asset_utils.utils import utils
5
+
6
+ logger = logging.getLogger(__name__)
7
+
8
+
9
+ class ManifestDB(Database):
10
+ """
11
+ Class for managing asset manifest databases.
12
+ """
13
+ type = "asset-manifest"
14
+
15
+ def add_objects(self, **kwargs) -> None:
16
+ """Adds a new object to the manifest database.
17
+
18
+ Parameters
19
+ ----------
20
+ **kwargs : dict
21
+ Key-value pairs to be added as objects to the database.
22
+ """
23
+ data = {"objects": kwargs}
24
+ self.update(**data)
25
+
26
+ def get_objects(self) -> dict:
27
+ """Retrieves all objects from the manifest database.
28
+
29
+ Returns
30
+ -------
31
+ dict
32
+ A dictionary of all objects in the manifest database.
33
+ """
34
+ return self.retrieve("objects")
35
+
36
+ def get(self, key):
37
+ """Retrieves a specific value from the manifest database based on the provided key.
38
+
39
+ Parameters
40
+ ----------
41
+ key : str
42
+ The key for the value to be retrieved.
43
+
44
+ Returns
45
+ -------
46
+ The value associated with the key, if found; otherwise, None.
47
+ """
48
+ return self.data().get(key)
49
+
50
+ def remove_objects(self, ids: list) -> None:
51
+ """Removes specified objects from the manifest database.
52
+
53
+ Parameters
54
+ ----------
55
+ ids : list
56
+ A list of ids representing the objects to be removed.
57
+
58
+ Returns
59
+ -------
60
+ None
61
+ """
62
+ data = self.data()
63
+ objects = data.get("objects")
64
+ for _id in ids:
65
+ deleted = objects.pop(_id, None)
66
+ if deleted:
67
+ pass
68
+ else:
69
+ logger.info("asset not added yet, ignoring remove for:{}".format(_id))
70
+ data["objects"] = objects
71
+ self._write_to_file(data)
72
+
73
+ def clear_objects(self) -> None:
74
+ """Clears all objects from the manifest database.
75
+
76
+ Returns
77
+ -------
78
+ None
79
+ """
80
+ data = self.data()
81
+ data["objects"] = {}
82
+ self._write_to_file(data=data)
83
+
84
+ def get_version(self) -> str:
85
+ """Retrieves the version of the asset from the manifest database.
86
+
87
+ Returns
88
+ -------
89
+ str
90
+ The version of the asset.
91
+ """
92
+ return self.retrieve("version")
93
+
94
+ def set_version(self, version) -> None:
95
+ """Sets the version of the asset in the manifest database.
96
+
97
+ Parameters
98
+ ----------
99
+ version : str
100
+ The version to be set for the asset.
101
+
102
+ Returns
103
+ -------
104
+ None
105
+ """
106
+ data = {
107
+ "version": version,
108
+ "user": utils.get_user_id(),
109
+ "time": utils.get_time_stamp()
110
+ }
111
+ self.update(**data)
112
+
113
+ def get_parent_version(self):
114
+ return self.retrieve("parent_version")
115
+
116
+ def set_parent_version(self, version):
117
+ data = {"parent_version": version}
118
+ self.update(**data)
@@ -0,0 +1,42 @@
1
+ import logging
2
+
3
+ from asset_db.db import Database
4
+
5
+ logger = logging.getLogger(__name__)
6
+
7
+
8
+ class RepoDB(Database):
9
+ """
10
+ Stores Assets Meta information
11
+ """
12
+ type = "repo-db"
13
+
14
+ def add(self, **kwargs):
15
+ """Adds to existing list of objects"""
16
+ self.update(**kwargs)
17
+
18
+ def get(self, key) -> dict:
19
+ """Returns objects from yaml file"""
20
+ return self.retrieve(key=key)
21
+
22
+ def remove(self, keys: list):
23
+ """Deletes specific assets from the list
24
+
25
+ Parameters
26
+ ----------
27
+ keys : list
28
+ asset ids
29
+ """
30
+ data = self.data()
31
+ for id in keys:
32
+ deleted = data.pop(id, None)
33
+ if deleted:
34
+ pass
35
+ # logger.info("asset {} removed.".format(id))
36
+ else:
37
+ logger.info("asset not added yet, ignoring remove for:{}".format(id))
38
+ self._write_to_file(data)
39
+
40
+ def clear(self):
41
+ """Clears all assets"""
42
+ self._write_to_file(data={})
@@ -0,0 +1,116 @@
1
+ import logging
2
+ import os
3
+
4
+ from asset_db.db import Database
5
+
6
+ logger = logging.getLogger(__name__)
7
+
8
+
9
+ class StatesDB(Database):
10
+ """Stats db that holds file information to manage
11
+ Important: States db is asset-specific
12
+ """
13
+
14
+ type = "asset-states"
15
+
16
+ def set_asset_state(self, state):
17
+ self.update(**{"asset_state": state})
18
+
19
+ def get_asset_state(self):
20
+ return self.retrieve("asset_state") or None
21
+
22
+ def get_ref_states(self):
23
+ return self.retrieve("ref_states") or {}
24
+
25
+ def set_commit_message(self, x):
26
+ self.update(**{"commit_message": x})
27
+
28
+ def get_commit_message(self):
29
+ return self.retrieve("commit_message")
30
+
31
+ def add_object_states(self, **kwargs):
32
+ """Adds to existing list of stats
33
+
34
+ Parameters
35
+ ----------
36
+ kwargs : dict
37
+ key:value pairs of id: state
38
+ """
39
+ self.update(**{"object_states": kwargs})
40
+
41
+ def add_refs_states(self, **kwargs):
42
+ """Adds to existing list of stats
43
+
44
+ Parameters
45
+ ----------
46
+ kwargs : dict
47
+ key:value pairs of id: state
48
+ """
49
+ self.update(**{"ref_states": kwargs})
50
+
51
+ def get_object_states(self) -> dict:
52
+ """Returns assets and their md5 hashes"""
53
+ return self.retrieve("object_states") or {}
54
+
55
+ def get_content_states(self) -> dict:
56
+ return self.retrieve("content_states") or {}
57
+
58
+ def add_content_states(self, **kwargs):
59
+ self.update(**{"content_states": kwargs})
60
+
61
+ def remove_content_states(self, ids: list):
62
+ data = self.data()
63
+ states = data.get("content_states", {})
64
+ for id in ids:
65
+ deleted = states.pop(id, None)
66
+ if deleted:
67
+ pass
68
+ # logger.info("asset {} removed.".format(id))
69
+ else:
70
+ logger.warning("content not added yet, ignoring remove for:{}".format(id))
71
+
72
+ data["content_states"] = states
73
+ self._write_to_file(data=data)
74
+
75
+ def remove_object_states(self, ids: list):
76
+ """Deletes specific assets from the list
77
+
78
+ Parameters
79
+ ----------
80
+ ids : list
81
+ url ids of asset objects
82
+ """
83
+ data = self.data()
84
+ states = data.get("object_states", {})
85
+ for id in ids:
86
+ deleted = states.pop(id, None)
87
+ if deleted:
88
+ pass
89
+ # logger.info("asset {} removed.".format(id))
90
+ else:
91
+ logger.warning("asset not added yet, ignoring remove for:{}".format(id))
92
+
93
+ data["object_states"] = states
94
+ self._write_to_file(data=data)
95
+
96
+ def remove_ref_states(self, keys: list):
97
+ """Deletes specific assets_refs from the list
98
+
99
+ Parameters
100
+ ----------
101
+ keys : list
102
+ asset ids
103
+ """
104
+ data = self.data()
105
+ states = data.get("ref_states", {})
106
+ for key in keys:
107
+ _ = states.pop(key, None)
108
+
109
+ data["ref_states"] = states
110
+ self._write_to_file(data=data)
111
+
112
+ def data(self):
113
+ if not os.path.exists(self.path):
114
+ # nothing stored yet
115
+ return {"object_states": {}, "asset_state": None}
116
+ return self._read_from_file() or {}
@@ -0,0 +1,91 @@
1
+ import logging
2
+
3
+ from asset_db.db import Database
4
+
5
+ logger = logging.getLogger(__name__)
6
+
7
+
8
+ class StoreDB(Database):
9
+ """
10
+ Stores Assets Meta information
11
+ """
12
+ type = "store-db"
13
+
14
+ def add(self, **kwargs):
15
+ """Adds to existing list of objects"""
16
+ self.update(**kwargs)
17
+
18
+ def get(self, key) -> dict:
19
+ """Returns objects from yaml file"""
20
+ return self.retrieve(key=key)
21
+
22
+ def add_repo(self, repo_id, data: dict):
23
+ # pop any existing repos that point to the same path
24
+ # those are not valid anymore since 2 repos will never share the same path
25
+ existing = self.data()
26
+ repos = existing.get("repos") or {}
27
+ ids = list(repos.keys())
28
+ for id in ids:
29
+ if repos[id].get('path') == data.get('path'):
30
+ del repos[id]
31
+ repos[repo_id] = data
32
+ existing["repos"] = repos
33
+ self._write_to_file(data=existing)
34
+
35
+ def get_repos(self):
36
+ return self.retrieve("repos") or {}
37
+
38
+ def remove_repo(self, *repo_ids):
39
+ if not repo_ids:
40
+ return
41
+ data = self.data()
42
+ repos = data.get("repos")
43
+ for repo_id in repo_ids:
44
+ if repo_id in repos:
45
+ del repos[repo_id]
46
+ data["repos"] = repos
47
+ self._write_to_file(data=data)
48
+
49
+ def add_temp_asset(self, class_name, asset_data: dict) -> None:
50
+ temp_assets = self.retrieve("temp_assets") or {}
51
+ class_assets = temp_assets.get(class_name) or {}
52
+ id = asset_data.get("seq_id")
53
+ class_assets[id] = asset_data
54
+ temp_assets[class_name] = class_assets
55
+ self.update(temp_assets=temp_assets)
56
+
57
+ def remove_temp_asset(self, class_name, seq_id):
58
+ data = self.data()
59
+ temp_assets = data.get("temp_assets") or {}
60
+ class_assets = temp_assets.get(class_name, {})
61
+ if seq_id in class_assets:
62
+ del class_assets[seq_id]
63
+ temp_assets[class_name] = class_assets
64
+ data["temp_assets"] = temp_assets
65
+ self._write_to_file(data=data)
66
+
67
+ def list_temp_assets(self, class_name):
68
+ temp_assets = self.retrieve("temp_assets") or {}
69
+ return temp_assets.get(class_name, {})
70
+
71
+ def remove(self, keys: list):
72
+ """Deletes specific assets from the list
73
+
74
+ Parameters
75
+ ----------
76
+ keys : list
77
+ asset ids
78
+ """
79
+ data = self.data()
80
+ for id in keys:
81
+ deleted = data.pop(id, None)
82
+ if deleted:
83
+ pass
84
+ # logger.info("asset {} removed.".format(id))
85
+ else:
86
+ logger.info("asset not added yet, ignoring remove for:{}".format(id))
87
+ self._write_to_file(data)
88
+
89
+ def clear(self):
90
+ """Clears all assets"""
91
+ self._write_to_file(data={})
@@ -0,0 +1,51 @@
1
+ import logging
2
+ import os
3
+
4
+ from asset_db.db import Database
5
+
6
+ logger = logging.getLogger(__name__)
7
+
8
+
9
+ class StoreFileDB(Database):
10
+ """Stats db that holds file information to manage
11
+
12
+ Important: Stats db is a global db across the entire repo for all assets
13
+ whereas other dbs are asset specific
14
+ """
15
+
16
+ type = "store-file-stats"
17
+
18
+ def add_stats(self, **kwargs):
19
+ """Adds to existing list of stats"""
20
+ self.update(**{"stats": kwargs})
21
+
22
+ def get_stats(self) -> dict:
23
+ """Returns assets and their md5 hashes"""
24
+ return self.retrieve("stats") or {}
25
+
26
+ def remove_stats(self, ids: list):
27
+ """Deletes specific assets from the list
28
+
29
+ Parameters
30
+ ----------
31
+ ids : list
32
+ url ids of asset objects
33
+ """
34
+ data = self.data()
35
+ stats = data.get("stats", {})
36
+ for id in ids:
37
+ deleted = stats.pop(id, None)
38
+ if deleted:
39
+ pass
40
+ # logger.info("object {} removed.".format(id))
41
+ else:
42
+ logger.warning("object not added yet, ignoring remove for:{}".format(id))
43
+
44
+ data["stats"] = stats
45
+ self._write_to_file(data=data)
46
+
47
+ def data(self):
48
+ if not os.path.exists(self.path):
49
+ # nothing stored yet
50
+ return {"stats": {}}
51
+ return self._read_from_file()
File without changes
@@ -0,0 +1,38 @@
1
+ import os
2
+
3
+ import pytest
4
+
5
+ from asset_db.assets_db import AssetsDB
6
+
7
+
8
+ @pytest.fixture(scope="module")
9
+ def db(asset_root) -> AssetsDB:
10
+ db_path = os.path.join(asset_root, "database_test", "meta_hashes.json")
11
+ os.makedirs(os.path.dirname(db_path), exist_ok=True)
12
+ return AssetsDB(path=db_path)
13
+
14
+
15
+ def test_db_create(db: AssetsDB):
16
+ """make sure metadb got created"""
17
+ assert db is not None # db instance got created
18
+ assert db.path and os.path.exists(db.path)
19
+
20
+
21
+ def test_add_file_hash(db: AssetsDB):
22
+ path = "test/path"
23
+ hash = "abc123"
24
+ db.add_file_hash(path, hash)
25
+ assert db.file_hashes[path] == hash, "File hash should be added to the database"
26
+
27
+
28
+ def test_remove_files(db: AssetsDB):
29
+ # Setup - add some file hashes
30
+ paths = ["test/path1", "test/path2"]
31
+ hashes = ["abc123", "def456"]
32
+ for path, hash in zip(paths, hashes):
33
+ db.add_file_hash(path, hash)
34
+
35
+ # Test removing one file
36
+ db.remove_files(paths[0])
37
+ assert paths[0] not in db.file_hashes, "File should be removed from the database"
38
+ assert paths[1] in db.file_hashes, "Other files should remain in the database"
@@ -0,0 +1,53 @@
1
+ import os
2
+
3
+ import pytest
4
+
5
+ from asset_db import AssetsDB, FileDB
6
+ from asset_db.db import Database
7
+
8
+
9
+ def test_path_singleton():
10
+ """
11
+ Test that each path can only have one instance of a db class
12
+ """
13
+ db1 = AssetsDB(path="/tmp/test_1.yaml")
14
+ db2 = AssetsDB(path="/tmp/test_2.yaml")
15
+ assert db1 != db2
16
+
17
+ db3 = AssetsDB(path="/tmp/test_1.yaml")
18
+ assert db3 == db1
19
+ assert db3 != db2
20
+
21
+ with pytest.raises(Exception) as e:
22
+ FileDB(path="/tmp/test_2.yaml")
23
+ assert str(e.value) == "Instance already exists with same path but different class type"
24
+
25
+
26
+ @pytest.fixture(scope="module")
27
+ def db(asset_root) -> Database:
28
+ db_path = os.path.join(asset_root, "database_test", "db_file.json")
29
+ os.makedirs(os.path.dirname(db_path), exist_ok=True)
30
+ return Database(path=db_path)
31
+
32
+
33
+ def test_db_create(db: Database):
34
+ """make sure metadb got created"""
35
+ assert db is not None # db instance got created
36
+ assert db.path and os.path.exists(db.path)
37
+
38
+
39
+ def test_update(db):
40
+ db.update(key="value")
41
+ assert db.retrieve("key") == "value"
42
+
43
+
44
+ def test_write_and_read(db):
45
+ Database.write({"key": "value"}, db.path)
46
+ data = Database.read(db.path)
47
+ assert data == {"key": "value"}
48
+
49
+
50
+ def test_retrieve(db):
51
+ db.update(key1="value1", key2="value2")
52
+ assert db.retrieve("key1") == "value1"
53
+ assert db.retrieve(["key1", "key2"]) == {"key1": "value1", "key2": "value2"}
@@ -0,0 +1,32 @@
1
+ import os
2
+
3
+ import pytest
4
+
5
+ from asset_db.file_db import FileDB
6
+
7
+
8
+ @pytest.fixture(scope="module")
9
+ def db(asset_root) -> FileDB:
10
+ db_path = os.path.join(asset_root, "database_test", "file_stats.json")
11
+ os.makedirs(os.path.dirname(db_path), exist_ok=True)
12
+ return FileDB(path=db_path)
13
+
14
+
15
+ def test_db_create(db: FileDB):
16
+ """make sure metadb got created"""
17
+ assert db is not None # db instance got created
18
+ assert db.path and os.path.exists(db.path)
19
+
20
+
21
+ def test_add_and_get_stats(db):
22
+ db.add_stats(file1="hash1", file2="hash2")
23
+ stats = db.get_stats()
24
+ assert stats == {"file1": "hash1", "file2": "hash2"}, "Stats should be correctly added and retrieved"
25
+
26
+
27
+ def test_remove_stats(db):
28
+ db.add_stats(file1="hash1", file2="hash2")
29
+ db.remove_stats(["file1"])
30
+ stats = db.get_stats()
31
+ assert "file1" not in stats, "file1 should be removed from stats"
32
+ assert "file2" in stats, "file2 should remain in stats"
@@ -0,0 +1,56 @@
1
+ import os
2
+
3
+ import pytest
4
+
5
+ from asset_db import ManifestDB
6
+
7
+
8
+ @pytest.fixture(scope="module")
9
+ def db(asset_root) -> ManifestDB:
10
+ db_path = os.path.join(asset_root, "database_test", "manifest.yaml")
11
+ os.makedirs(os.path.dirname(db_path), exist_ok=True)
12
+ return ManifestDB(path=db_path)
13
+
14
+
15
+ def test_db_create(db: ManifestDB):
16
+ """make sure metadb got created"""
17
+ assert db is not None # db instance got created
18
+ assert db.path and os.path.exists(db.path)
19
+
20
+
21
+ def test_add_objects(db: ManifestDB):
22
+ objects = {"1": {"name": "test_object1"}, "2": {"name": "test_object2"}}
23
+ db.add_objects(**objects)
24
+ result = db.get_objects()
25
+ assert result == objects
26
+
27
+
28
+ def test_get_objects(db: ManifestDB):
29
+ objects = {"objects": {"1": {"name": "test_object1"}, "2": {"name": "test_object2"}}}
30
+ db.update(**objects)
31
+ result = db.get_objects()
32
+ assert result == objects["objects"]
33
+
34
+
35
+ def test_get(db: ManifestDB):
36
+ data = {"key1": "value1", "key2": "value2"}
37
+ db.update(**data)
38
+ result = db.get("key1")
39
+ assert result == "value1"
40
+
41
+
42
+ def test_remove_objects(db: ManifestDB):
43
+ objects = {"objects": {"1": {"name": "test_object1"}, "2": {"name": "test_object2"}}}
44
+ db.update(**objects)
45
+ db.remove_objects(["1"])
46
+ result = db.get_objects()
47
+ assert "1" not in result
48
+ assert "2" in result
49
+
50
+
51
+ def test_clear_objects(db: ManifestDB):
52
+ objects = {"objects": {"1": {"name": "test_object1"}, "2": {"name": "test_object2"}}}
53
+ db.update(**objects)
54
+ db.clear_objects()
55
+ result = db.get_objects()
56
+ assert result == {}
@@ -0,0 +1,40 @@
1
+ import os
2
+
3
+ import pytest
4
+
5
+ from asset_db import StatesDB
6
+
7
+ DATA = {
8
+ "test_data/2.txt": "pending",
9
+ "test_data/imgs/photo-1493976040374-85c8e12f0c0e.jpg": "uploaded",
10
+ "test_data/imgs/photo-1504198453319-5ce911bafcde.jpg": "pending",
11
+ "test_data/imgs/photo-1507143550189-fed454f93097.jpg": "uploading",
12
+ "test_data/imgs/photo-1513938709626-033611b8cc03.jpg": "version",
13
+ "test_data/imgs/photo-1516117172878-fd2c41f4a759.jpg": "versioned",
14
+ "test_data/imgs/photo-1516972810927-80185027ca84.jpg": "committing",
15
+ "test_data/imgs/photo-1522364723953-452d3431c267.jpg": "committed",
16
+ "test_data/imgs/photo-1524429656589-6633a470097c.jpg": "renaming",
17
+ "test_data/imgs/photo-1530122037265-a5f1f91d3b99.jpg": "renamed",
18
+ }
19
+
20
+
21
+ @pytest.fixture(scope="module")
22
+ def states_db(asset_root):
23
+ os.makedirs(os.path.join(asset_root, "states_db_test"))
24
+ return StatesDB(path=os.path.join(asset_root, "states_db_test", "states.yaml"))
25
+
26
+
27
+ def test_add_states(states_db):
28
+ # create a directory
29
+ states_db.add_object_states(**DATA)
30
+ stored = states_db.get_object_states()
31
+ assert stored == DATA
32
+
33
+
34
+ def test_remove_states(states_db):
35
+ data = DATA.copy()
36
+ states_db.add_object_states(**data)
37
+ data.pop("test_data/2.txt")
38
+ states_db.remove_object_states(["test_data/2.txt"])
39
+ stored = states_db.get_object_states()
40
+ assert data == stored
@@ -0,0 +1,19 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68.0.0,<69.0.0"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "amapy-db"
7
+ version = "1.0.0"
8
+ description = "The Database plugin for asset-manager project. It provides functionalities to manage and store metadata related to digital assets, such as storing MD5 hashes of asset files and managing these records efficiently."
9
+ readme = "README.md"
10
+ license = { text = "Copyright (c) 2024 Roche Diagnostics Computation Science & Informatics" }
11
+ requires-python = ">=3.10,<3.11" # "==3.10.*"
12
+ dependencies = [
13
+ "amapy-pluggy==1.*",
14
+ "amapy-utils==1.*"
15
+ ]
16
+
17
+ [tool.setuptools.packages.find]
18
+ include = ["*"]
19
+ namespaces = false
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+