amapy-db 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- amapy-db-1.0.0/PKG-INFO +11 -0
- amapy-db-1.0.0/README.md +1 -0
- amapy-db-1.0.0/amapy_db.egg-info/PKG-INFO +11 -0
- amapy-db-1.0.0/amapy_db.egg-info/SOURCES.txt +22 -0
- amapy-db-1.0.0/amapy_db.egg-info/dependency_links.txt +1 -0
- amapy-db-1.0.0/amapy_db.egg-info/requires.txt +2 -0
- amapy-db-1.0.0/amapy_db.egg-info/top_level.txt +1 -0
- amapy-db-1.0.0/asset_db/__init__.py +7 -0
- amapy-db-1.0.0/asset_db/assets_db.py +44 -0
- amapy-db-1.0.0/asset_db/db.py +107 -0
- amapy-db-1.0.0/asset_db/file_db.py +49 -0
- amapy-db-1.0.0/asset_db/manifest_db.py +118 -0
- amapy-db-1.0.0/asset_db/repo_db.py +42 -0
- amapy-db-1.0.0/asset_db/states_db.py +116 -0
- amapy-db-1.0.0/asset_db/store_db.py +91 -0
- amapy-db-1.0.0/asset_db/store_file_db.py +51 -0
- amapy-db-1.0.0/asset_db/tests/__init__.py +0 -0
- amapy-db-1.0.0/asset_db/tests/test_assets_db.py +38 -0
- amapy-db-1.0.0/asset_db/tests/test_db.py +53 -0
- amapy-db-1.0.0/asset_db/tests/test_file_db.py +32 -0
- amapy-db-1.0.0/asset_db/tests/test_manifest_db.py +56 -0
- amapy-db-1.0.0/asset_db/tests/test_states_db.py +40 -0
- amapy-db-1.0.0/pyproject.toml +19 -0
- amapy-db-1.0.0/setup.cfg +4 -0
amapy-db-1.0.0/PKG-INFO
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: amapy-db
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: The Database plugin for asset-manager project. It provides functionalities to manage and store metadata related to digital assets, such as storing MD5 hashes of asset files and managing these records efficiently.
|
|
5
|
+
License: Copyright (c) 2024 Roche Diagnostics Computation Science & Informatics
|
|
6
|
+
Requires-Python: <3.11,>=3.10
|
|
7
|
+
Description-Content-Type: text/markdown
|
|
8
|
+
Requires-Dist: amapy-pluggy==1.*
|
|
9
|
+
Requires-Dist: amapy-utils==1.*
|
|
10
|
+
|
|
11
|
+
asset-db
|
amapy-db-1.0.0/README.md
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
asset-db
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: amapy-db
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: The Database plugin for asset-manager project. It provides functionalities to manage and store metadata related to digital assets, such as storing MD5 hashes of asset files and managing these records efficiently.
|
|
5
|
+
License: Copyright (c) 2024 Roche Diagnostics Computation Science & Informatics
|
|
6
|
+
Requires-Python: <3.11,>=3.10
|
|
7
|
+
Description-Content-Type: text/markdown
|
|
8
|
+
Requires-Dist: amapy-pluggy==1.*
|
|
9
|
+
Requires-Dist: amapy-utils==1.*
|
|
10
|
+
|
|
11
|
+
asset-db
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
pyproject.toml
|
|
3
|
+
amapy_db.egg-info/PKG-INFO
|
|
4
|
+
amapy_db.egg-info/SOURCES.txt
|
|
5
|
+
amapy_db.egg-info/dependency_links.txt
|
|
6
|
+
amapy_db.egg-info/requires.txt
|
|
7
|
+
amapy_db.egg-info/top_level.txt
|
|
8
|
+
asset_db/__init__.py
|
|
9
|
+
asset_db/assets_db.py
|
|
10
|
+
asset_db/db.py
|
|
11
|
+
asset_db/file_db.py
|
|
12
|
+
asset_db/manifest_db.py
|
|
13
|
+
asset_db/repo_db.py
|
|
14
|
+
asset_db/states_db.py
|
|
15
|
+
asset_db/store_db.py
|
|
16
|
+
asset_db/store_file_db.py
|
|
17
|
+
asset_db/tests/__init__.py
|
|
18
|
+
asset_db/tests/test_assets_db.py
|
|
19
|
+
asset_db/tests/test_db.py
|
|
20
|
+
asset_db/tests/test_file_db.py
|
|
21
|
+
asset_db/tests/test_manifest_db.py
|
|
22
|
+
asset_db/tests/test_states_db.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
asset_db
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
|
|
3
|
+
from asset_db.db import Database
|
|
4
|
+
|
|
5
|
+
logger = logging.getLogger(__name__)
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class AssetsDB(Database):
|
|
9
|
+
"""
|
|
10
|
+
Stores the md5 hashes of asset*.yamls. We use this to check which
|
|
11
|
+
files need to be re-downloaded during fetch operations
|
|
12
|
+
"""
|
|
13
|
+
type = "assets-db"
|
|
14
|
+
|
|
15
|
+
@property
|
|
16
|
+
def file_hashes(self):
|
|
17
|
+
try:
|
|
18
|
+
return self._file_hashes
|
|
19
|
+
except AttributeError:
|
|
20
|
+
self._file_hashes = self.data()
|
|
21
|
+
return self._file_hashes
|
|
22
|
+
|
|
23
|
+
def add_file_hash(self, path, hash):
|
|
24
|
+
"""Adds to existing list of files"""
|
|
25
|
+
updated = self.update(**{path: hash})
|
|
26
|
+
setattr(self, "_file_hashes", updated)
|
|
27
|
+
|
|
28
|
+
def remove_files(self, *paths):
|
|
29
|
+
"""Deletes specific assets from the list"""
|
|
30
|
+
data = self.file_hashes
|
|
31
|
+
for path in paths:
|
|
32
|
+
deleted = data.pop(path, None)
|
|
33
|
+
if deleted:
|
|
34
|
+
pass
|
|
35
|
+
# logger.info("file {} removed.".format(id))
|
|
36
|
+
else:
|
|
37
|
+
logger.info("file not added yet, ignoring remove for:{}".format(id))
|
|
38
|
+
setattr(self, "_file_hashes", data)
|
|
39
|
+
self._write_to_file(data)
|
|
40
|
+
|
|
41
|
+
def clear(self):
|
|
42
|
+
"""Clears all assets"""
|
|
43
|
+
setattr(self, "_file_hashes", {})
|
|
44
|
+
self._write_to_file(data={})
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
from typing import List, AnyStr, Union
|
|
4
|
+
|
|
5
|
+
from asset_utils.utils import utils
|
|
6
|
+
from asset_utils.utils.file_utils import FileUtils
|
|
7
|
+
|
|
8
|
+
logger = logging.getLogger(__name__)
|
|
9
|
+
|
|
10
|
+
DB_FILE = "json" # yaml
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class Database:
|
|
14
|
+
"""Yaml wrapper to present a db like interface for writing to yaml files.
|
|
15
|
+
Singleton, so make sure all DBs inherit directly from this class only
|
|
16
|
+
Subclasses also behave as singletons
|
|
17
|
+
"""
|
|
18
|
+
path = None
|
|
19
|
+
backup_path = None # in case we want to cache the database
|
|
20
|
+
_persisted_data = {}
|
|
21
|
+
_instances = {}
|
|
22
|
+
|
|
23
|
+
def __new__(cls, *args, **kwargs):
|
|
24
|
+
path = kwargs.get('path') or args[0] # there can be only one instance per path
|
|
25
|
+
if not path:
|
|
26
|
+
raise Exception("required param: path is null")
|
|
27
|
+
|
|
28
|
+
if cls._instances.get(path, None) is None:
|
|
29
|
+
cls._instances[path] = super(Database, cls).__new__(cls)
|
|
30
|
+
|
|
31
|
+
if cls._instances[path].__class__.__name__ != cls.__name__:
|
|
32
|
+
raise Exception("Instance already exists with same path but different class type")
|
|
33
|
+
return cls._instances[path]
|
|
34
|
+
|
|
35
|
+
def __init__(self, path, backup_path: str = None):
|
|
36
|
+
if path:
|
|
37
|
+
FileUtils.create_file_if_not_exists(path)
|
|
38
|
+
self.path = path
|
|
39
|
+
self.backup_path = backup_path
|
|
40
|
+
# create backup through hardlink
|
|
41
|
+
if self.backup_path:
|
|
42
|
+
if not os.path.exists(self.backup_path):
|
|
43
|
+
FileUtils.create_file_if_not_exists(self.backup_path)
|
|
44
|
+
if not os.path.samefile(self.path, self.backup_path):
|
|
45
|
+
try:
|
|
46
|
+
os.unlink(self.backup_path) # delete existing
|
|
47
|
+
os.link(src=self.path, dst=self.backup_path)
|
|
48
|
+
except OSError as e:
|
|
49
|
+
logger.warning(f"Backup not enabled because of OSError: {str(e)}")
|
|
50
|
+
|
|
51
|
+
def copy_to(self, db):
|
|
52
|
+
db.update(**self.data())
|
|
53
|
+
|
|
54
|
+
def update(self, **kwargs):
|
|
55
|
+
data = self.data() or {}
|
|
56
|
+
data = utils.update_dict(data, kwargs)
|
|
57
|
+
self._write_to_file(data=data)
|
|
58
|
+
return data
|
|
59
|
+
|
|
60
|
+
def _write_to_file(self, data):
|
|
61
|
+
"""Stores the dictionary in a json file, overwrites existing data"""
|
|
62
|
+
self._persisted_data = data
|
|
63
|
+
self.__class__.write(data=self._persisted_data, path=self.path)
|
|
64
|
+
|
|
65
|
+
@classmethod
|
|
66
|
+
def write(cls, data, path):
|
|
67
|
+
if DB_FILE == "yaml":
|
|
68
|
+
FileUtils.write_yaml(abs_path=path, data=data)
|
|
69
|
+
elif DB_FILE == "json":
|
|
70
|
+
FileUtils.write_json(abs_path=path, data=data, sort_keys=False)
|
|
71
|
+
else:
|
|
72
|
+
raise Exception("Unsupported DB format")
|
|
73
|
+
|
|
74
|
+
def _read_from_file(self):
|
|
75
|
+
"""returns data from yaml file"""
|
|
76
|
+
self._persisted_data = self._persisted_data or self.__class__.read(self.path)
|
|
77
|
+
return self._persisted_data
|
|
78
|
+
|
|
79
|
+
@classmethod
|
|
80
|
+
def read(cls, path):
|
|
81
|
+
if not path or not os.path.exists(path):
|
|
82
|
+
# nothing stored yet
|
|
83
|
+
return {}
|
|
84
|
+
if DB_FILE == "json":
|
|
85
|
+
return FileUtils.read_json(abs_path=path) or {}
|
|
86
|
+
elif DB_FILE == "yaml":
|
|
87
|
+
return FileUtils.read_yaml(abs_path=path) or {}
|
|
88
|
+
else:
|
|
89
|
+
raise Exception("Unsupported DB format")
|
|
90
|
+
|
|
91
|
+
def retrieve(self, key: Union[AnyStr, List]):
|
|
92
|
+
"""Retrieves specific values for specific keys from the json file.
|
|
93
|
+
If keys are not found, the default value of None is returned.
|
|
94
|
+
"""
|
|
95
|
+
data = self.data()
|
|
96
|
+
if isinstance(key, str):
|
|
97
|
+
return data.get(key, None)
|
|
98
|
+
elif isinstance(key, list):
|
|
99
|
+
result = {}
|
|
100
|
+
for each_key in key:
|
|
101
|
+
result[each_key] = data.get(each_key, None)
|
|
102
|
+
return result
|
|
103
|
+
else:
|
|
104
|
+
return None
|
|
105
|
+
|
|
106
|
+
def data(self):
|
|
107
|
+
return self._read_from_file()
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
|
|
4
|
+
from asset_db.db import Database
|
|
5
|
+
|
|
6
|
+
logger = logging.getLogger(__name__)
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class FileDB(Database):
|
|
10
|
+
"""Stats db that holds file information to manage
|
|
11
|
+
Important: Stats db is a global db across the entire repo for all assets
|
|
12
|
+
whereas other dbs are asset specific
|
|
13
|
+
"""
|
|
14
|
+
type = "file-stats"
|
|
15
|
+
|
|
16
|
+
def add_stats(self, **kwargs):
|
|
17
|
+
"""adds to existing list of stats"""
|
|
18
|
+
self.update(**{"stats": kwargs})
|
|
19
|
+
|
|
20
|
+
def get_stats(self) -> dict:
|
|
21
|
+
"""Returns assets and their md5 hashes"""
|
|
22
|
+
return self.retrieve("stats") or {}
|
|
23
|
+
|
|
24
|
+
def remove_stats(self, ids: list):
|
|
25
|
+
"""Deletes specific assets from the list
|
|
26
|
+
|
|
27
|
+
Parameters
|
|
28
|
+
----------
|
|
29
|
+
ids : list
|
|
30
|
+
url ids of asset objects
|
|
31
|
+
"""
|
|
32
|
+
data = self.data()
|
|
33
|
+
stats = data.get("stats", {})
|
|
34
|
+
for id in ids:
|
|
35
|
+
deleted = stats.pop(id, None)
|
|
36
|
+
if deleted:
|
|
37
|
+
pass
|
|
38
|
+
# logger.info("object {} removed.".format(id))
|
|
39
|
+
else:
|
|
40
|
+
logger.warning("object not added yet, ignoring remove for:{}".format(id))
|
|
41
|
+
|
|
42
|
+
data["stats"] = stats
|
|
43
|
+
self._write_to_file(data=data)
|
|
44
|
+
|
|
45
|
+
def data(self):
|
|
46
|
+
if not os.path.exists(self.path):
|
|
47
|
+
# nothing stored yet
|
|
48
|
+
return {"stats": {}}
|
|
49
|
+
return self._read_from_file()
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
|
|
3
|
+
from asset_db.db import Database
|
|
4
|
+
from asset_utils.utils import utils
|
|
5
|
+
|
|
6
|
+
logger = logging.getLogger(__name__)
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class ManifestDB(Database):
|
|
10
|
+
"""
|
|
11
|
+
Class for managing asset manifest databases.
|
|
12
|
+
"""
|
|
13
|
+
type = "asset-manifest"
|
|
14
|
+
|
|
15
|
+
def add_objects(self, **kwargs) -> None:
|
|
16
|
+
"""Adds a new object to the manifest database.
|
|
17
|
+
|
|
18
|
+
Parameters
|
|
19
|
+
----------
|
|
20
|
+
**kwargs : dict
|
|
21
|
+
Key-value pairs to be added as objects to the database.
|
|
22
|
+
"""
|
|
23
|
+
data = {"objects": kwargs}
|
|
24
|
+
self.update(**data)
|
|
25
|
+
|
|
26
|
+
def get_objects(self) -> dict:
|
|
27
|
+
"""Retrieves all objects from the manifest database.
|
|
28
|
+
|
|
29
|
+
Returns
|
|
30
|
+
-------
|
|
31
|
+
dict
|
|
32
|
+
A dictionary of all objects in the manifest database.
|
|
33
|
+
"""
|
|
34
|
+
return self.retrieve("objects")
|
|
35
|
+
|
|
36
|
+
def get(self, key):
|
|
37
|
+
"""Retrieves a specific value from the manifest database based on the provided key.
|
|
38
|
+
|
|
39
|
+
Parameters
|
|
40
|
+
----------
|
|
41
|
+
key : str
|
|
42
|
+
The key for the value to be retrieved.
|
|
43
|
+
|
|
44
|
+
Returns
|
|
45
|
+
-------
|
|
46
|
+
The value associated with the key, if found; otherwise, None.
|
|
47
|
+
"""
|
|
48
|
+
return self.data().get(key)
|
|
49
|
+
|
|
50
|
+
def remove_objects(self, ids: list) -> None:
|
|
51
|
+
"""Removes specified objects from the manifest database.
|
|
52
|
+
|
|
53
|
+
Parameters
|
|
54
|
+
----------
|
|
55
|
+
ids : list
|
|
56
|
+
A list of ids representing the objects to be removed.
|
|
57
|
+
|
|
58
|
+
Returns
|
|
59
|
+
-------
|
|
60
|
+
None
|
|
61
|
+
"""
|
|
62
|
+
data = self.data()
|
|
63
|
+
objects = data.get("objects")
|
|
64
|
+
for _id in ids:
|
|
65
|
+
deleted = objects.pop(_id, None)
|
|
66
|
+
if deleted:
|
|
67
|
+
pass
|
|
68
|
+
else:
|
|
69
|
+
logger.info("asset not added yet, ignoring remove for:{}".format(_id))
|
|
70
|
+
data["objects"] = objects
|
|
71
|
+
self._write_to_file(data)
|
|
72
|
+
|
|
73
|
+
def clear_objects(self) -> None:
|
|
74
|
+
"""Clears all objects from the manifest database.
|
|
75
|
+
|
|
76
|
+
Returns
|
|
77
|
+
-------
|
|
78
|
+
None
|
|
79
|
+
"""
|
|
80
|
+
data = self.data()
|
|
81
|
+
data["objects"] = {}
|
|
82
|
+
self._write_to_file(data=data)
|
|
83
|
+
|
|
84
|
+
def get_version(self) -> str:
|
|
85
|
+
"""Retrieves the version of the asset from the manifest database.
|
|
86
|
+
|
|
87
|
+
Returns
|
|
88
|
+
-------
|
|
89
|
+
str
|
|
90
|
+
The version of the asset.
|
|
91
|
+
"""
|
|
92
|
+
return self.retrieve("version")
|
|
93
|
+
|
|
94
|
+
def set_version(self, version) -> None:
|
|
95
|
+
"""Sets the version of the asset in the manifest database.
|
|
96
|
+
|
|
97
|
+
Parameters
|
|
98
|
+
----------
|
|
99
|
+
version : str
|
|
100
|
+
The version to be set for the asset.
|
|
101
|
+
|
|
102
|
+
Returns
|
|
103
|
+
-------
|
|
104
|
+
None
|
|
105
|
+
"""
|
|
106
|
+
data = {
|
|
107
|
+
"version": version,
|
|
108
|
+
"user": utils.get_user_id(),
|
|
109
|
+
"time": utils.get_time_stamp()
|
|
110
|
+
}
|
|
111
|
+
self.update(**data)
|
|
112
|
+
|
|
113
|
+
def get_parent_version(self):
|
|
114
|
+
return self.retrieve("parent_version")
|
|
115
|
+
|
|
116
|
+
def set_parent_version(self, version):
|
|
117
|
+
data = {"parent_version": version}
|
|
118
|
+
self.update(**data)
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
|
|
3
|
+
from asset_db.db import Database
|
|
4
|
+
|
|
5
|
+
logger = logging.getLogger(__name__)
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class RepoDB(Database):
|
|
9
|
+
"""
|
|
10
|
+
Stores Assets Meta information
|
|
11
|
+
"""
|
|
12
|
+
type = "repo-db"
|
|
13
|
+
|
|
14
|
+
def add(self, **kwargs):
|
|
15
|
+
"""Adds to existing list of objects"""
|
|
16
|
+
self.update(**kwargs)
|
|
17
|
+
|
|
18
|
+
def get(self, key) -> dict:
|
|
19
|
+
"""Returns objects from yaml file"""
|
|
20
|
+
return self.retrieve(key=key)
|
|
21
|
+
|
|
22
|
+
def remove(self, keys: list):
|
|
23
|
+
"""Deletes specific assets from the list
|
|
24
|
+
|
|
25
|
+
Parameters
|
|
26
|
+
----------
|
|
27
|
+
keys : list
|
|
28
|
+
asset ids
|
|
29
|
+
"""
|
|
30
|
+
data = self.data()
|
|
31
|
+
for id in keys:
|
|
32
|
+
deleted = data.pop(id, None)
|
|
33
|
+
if deleted:
|
|
34
|
+
pass
|
|
35
|
+
# logger.info("asset {} removed.".format(id))
|
|
36
|
+
else:
|
|
37
|
+
logger.info("asset not added yet, ignoring remove for:{}".format(id))
|
|
38
|
+
self._write_to_file(data)
|
|
39
|
+
|
|
40
|
+
def clear(self):
|
|
41
|
+
"""Clears all assets"""
|
|
42
|
+
self._write_to_file(data={})
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
|
|
4
|
+
from asset_db.db import Database
|
|
5
|
+
|
|
6
|
+
logger = logging.getLogger(__name__)
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class StatesDB(Database):
|
|
10
|
+
"""Stats db that holds file information to manage
|
|
11
|
+
Important: States db is asset-specific
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
type = "asset-states"
|
|
15
|
+
|
|
16
|
+
def set_asset_state(self, state):
|
|
17
|
+
self.update(**{"asset_state": state})
|
|
18
|
+
|
|
19
|
+
def get_asset_state(self):
|
|
20
|
+
return self.retrieve("asset_state") or None
|
|
21
|
+
|
|
22
|
+
def get_ref_states(self):
|
|
23
|
+
return self.retrieve("ref_states") or {}
|
|
24
|
+
|
|
25
|
+
def set_commit_message(self, x):
|
|
26
|
+
self.update(**{"commit_message": x})
|
|
27
|
+
|
|
28
|
+
def get_commit_message(self):
|
|
29
|
+
return self.retrieve("commit_message")
|
|
30
|
+
|
|
31
|
+
def add_object_states(self, **kwargs):
|
|
32
|
+
"""Adds to existing list of stats
|
|
33
|
+
|
|
34
|
+
Parameters
|
|
35
|
+
----------
|
|
36
|
+
kwargs : dict
|
|
37
|
+
key:value pairs of id: state
|
|
38
|
+
"""
|
|
39
|
+
self.update(**{"object_states": kwargs})
|
|
40
|
+
|
|
41
|
+
def add_refs_states(self, **kwargs):
|
|
42
|
+
"""Adds to existing list of stats
|
|
43
|
+
|
|
44
|
+
Parameters
|
|
45
|
+
----------
|
|
46
|
+
kwargs : dict
|
|
47
|
+
key:value pairs of id: state
|
|
48
|
+
"""
|
|
49
|
+
self.update(**{"ref_states": kwargs})
|
|
50
|
+
|
|
51
|
+
def get_object_states(self) -> dict:
|
|
52
|
+
"""Returns assets and their md5 hashes"""
|
|
53
|
+
return self.retrieve("object_states") or {}
|
|
54
|
+
|
|
55
|
+
def get_content_states(self) -> dict:
|
|
56
|
+
return self.retrieve("content_states") or {}
|
|
57
|
+
|
|
58
|
+
def add_content_states(self, **kwargs):
|
|
59
|
+
self.update(**{"content_states": kwargs})
|
|
60
|
+
|
|
61
|
+
def remove_content_states(self, ids: list):
|
|
62
|
+
data = self.data()
|
|
63
|
+
states = data.get("content_states", {})
|
|
64
|
+
for id in ids:
|
|
65
|
+
deleted = states.pop(id, None)
|
|
66
|
+
if deleted:
|
|
67
|
+
pass
|
|
68
|
+
# logger.info("asset {} removed.".format(id))
|
|
69
|
+
else:
|
|
70
|
+
logger.warning("content not added yet, ignoring remove for:{}".format(id))
|
|
71
|
+
|
|
72
|
+
data["content_states"] = states
|
|
73
|
+
self._write_to_file(data=data)
|
|
74
|
+
|
|
75
|
+
def remove_object_states(self, ids: list):
|
|
76
|
+
"""Deletes specific assets from the list
|
|
77
|
+
|
|
78
|
+
Parameters
|
|
79
|
+
----------
|
|
80
|
+
ids : list
|
|
81
|
+
url ids of asset objects
|
|
82
|
+
"""
|
|
83
|
+
data = self.data()
|
|
84
|
+
states = data.get("object_states", {})
|
|
85
|
+
for id in ids:
|
|
86
|
+
deleted = states.pop(id, None)
|
|
87
|
+
if deleted:
|
|
88
|
+
pass
|
|
89
|
+
# logger.info("asset {} removed.".format(id))
|
|
90
|
+
else:
|
|
91
|
+
logger.warning("asset not added yet, ignoring remove for:{}".format(id))
|
|
92
|
+
|
|
93
|
+
data["object_states"] = states
|
|
94
|
+
self._write_to_file(data=data)
|
|
95
|
+
|
|
96
|
+
def remove_ref_states(self, keys: list):
|
|
97
|
+
"""Deletes specific assets_refs from the list
|
|
98
|
+
|
|
99
|
+
Parameters
|
|
100
|
+
----------
|
|
101
|
+
keys : list
|
|
102
|
+
asset ids
|
|
103
|
+
"""
|
|
104
|
+
data = self.data()
|
|
105
|
+
states = data.get("ref_states", {})
|
|
106
|
+
for key in keys:
|
|
107
|
+
_ = states.pop(key, None)
|
|
108
|
+
|
|
109
|
+
data["ref_states"] = states
|
|
110
|
+
self._write_to_file(data=data)
|
|
111
|
+
|
|
112
|
+
def data(self):
|
|
113
|
+
if not os.path.exists(self.path):
|
|
114
|
+
# nothing stored yet
|
|
115
|
+
return {"object_states": {}, "asset_state": None}
|
|
116
|
+
return self._read_from_file() or {}
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
|
|
3
|
+
from asset_db.db import Database
|
|
4
|
+
|
|
5
|
+
logger = logging.getLogger(__name__)
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class StoreDB(Database):
|
|
9
|
+
"""
|
|
10
|
+
Stores Assets Meta information
|
|
11
|
+
"""
|
|
12
|
+
type = "store-db"
|
|
13
|
+
|
|
14
|
+
def add(self, **kwargs):
|
|
15
|
+
"""Adds to existing list of objects"""
|
|
16
|
+
self.update(**kwargs)
|
|
17
|
+
|
|
18
|
+
def get(self, key) -> dict:
|
|
19
|
+
"""Returns objects from yaml file"""
|
|
20
|
+
return self.retrieve(key=key)
|
|
21
|
+
|
|
22
|
+
def add_repo(self, repo_id, data: dict):
|
|
23
|
+
# pop any existing repos that point to the same path
|
|
24
|
+
# those are not valid anymore since 2 repos will never share the same path
|
|
25
|
+
existing = self.data()
|
|
26
|
+
repos = existing.get("repos") or {}
|
|
27
|
+
ids = list(repos.keys())
|
|
28
|
+
for id in ids:
|
|
29
|
+
if repos[id].get('path') == data.get('path'):
|
|
30
|
+
del repos[id]
|
|
31
|
+
repos[repo_id] = data
|
|
32
|
+
existing["repos"] = repos
|
|
33
|
+
self._write_to_file(data=existing)
|
|
34
|
+
|
|
35
|
+
def get_repos(self):
|
|
36
|
+
return self.retrieve("repos") or {}
|
|
37
|
+
|
|
38
|
+
def remove_repo(self, *repo_ids):
|
|
39
|
+
if not repo_ids:
|
|
40
|
+
return
|
|
41
|
+
data = self.data()
|
|
42
|
+
repos = data.get("repos")
|
|
43
|
+
for repo_id in repo_ids:
|
|
44
|
+
if repo_id in repos:
|
|
45
|
+
del repos[repo_id]
|
|
46
|
+
data["repos"] = repos
|
|
47
|
+
self._write_to_file(data=data)
|
|
48
|
+
|
|
49
|
+
def add_temp_asset(self, class_name, asset_data: dict) -> None:
|
|
50
|
+
temp_assets = self.retrieve("temp_assets") or {}
|
|
51
|
+
class_assets = temp_assets.get(class_name) or {}
|
|
52
|
+
id = asset_data.get("seq_id")
|
|
53
|
+
class_assets[id] = asset_data
|
|
54
|
+
temp_assets[class_name] = class_assets
|
|
55
|
+
self.update(temp_assets=temp_assets)
|
|
56
|
+
|
|
57
|
+
def remove_temp_asset(self, class_name, seq_id):
|
|
58
|
+
data = self.data()
|
|
59
|
+
temp_assets = data.get("temp_assets") or {}
|
|
60
|
+
class_assets = temp_assets.get(class_name, {})
|
|
61
|
+
if seq_id in class_assets:
|
|
62
|
+
del class_assets[seq_id]
|
|
63
|
+
temp_assets[class_name] = class_assets
|
|
64
|
+
data["temp_assets"] = temp_assets
|
|
65
|
+
self._write_to_file(data=data)
|
|
66
|
+
|
|
67
|
+
def list_temp_assets(self, class_name):
|
|
68
|
+
temp_assets = self.retrieve("temp_assets") or {}
|
|
69
|
+
return temp_assets.get(class_name, {})
|
|
70
|
+
|
|
71
|
+
def remove(self, keys: list):
|
|
72
|
+
"""Deletes specific assets from the list
|
|
73
|
+
|
|
74
|
+
Parameters
|
|
75
|
+
----------
|
|
76
|
+
keys : list
|
|
77
|
+
asset ids
|
|
78
|
+
"""
|
|
79
|
+
data = self.data()
|
|
80
|
+
for id in keys:
|
|
81
|
+
deleted = data.pop(id, None)
|
|
82
|
+
if deleted:
|
|
83
|
+
pass
|
|
84
|
+
# logger.info("asset {} removed.".format(id))
|
|
85
|
+
else:
|
|
86
|
+
logger.info("asset not added yet, ignoring remove for:{}".format(id))
|
|
87
|
+
self._write_to_file(data)
|
|
88
|
+
|
|
89
|
+
def clear(self):
|
|
90
|
+
"""Clears all assets"""
|
|
91
|
+
self._write_to_file(data={})
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
|
|
4
|
+
from asset_db.db import Database
|
|
5
|
+
|
|
6
|
+
logger = logging.getLogger(__name__)
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class StoreFileDB(Database):
|
|
10
|
+
"""Stats db that holds file information to manage
|
|
11
|
+
|
|
12
|
+
Important: Stats db is a global db across the entire repo for all assets
|
|
13
|
+
whereas other dbs are asset specific
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
type = "store-file-stats"
|
|
17
|
+
|
|
18
|
+
def add_stats(self, **kwargs):
|
|
19
|
+
"""Adds to existing list of stats"""
|
|
20
|
+
self.update(**{"stats": kwargs})
|
|
21
|
+
|
|
22
|
+
def get_stats(self) -> dict:
|
|
23
|
+
"""Returns assets and their md5 hashes"""
|
|
24
|
+
return self.retrieve("stats") or {}
|
|
25
|
+
|
|
26
|
+
def remove_stats(self, ids: list):
|
|
27
|
+
"""Deletes specific assets from the list
|
|
28
|
+
|
|
29
|
+
Parameters
|
|
30
|
+
----------
|
|
31
|
+
ids : list
|
|
32
|
+
url ids of asset objects
|
|
33
|
+
"""
|
|
34
|
+
data = self.data()
|
|
35
|
+
stats = data.get("stats", {})
|
|
36
|
+
for id in ids:
|
|
37
|
+
deleted = stats.pop(id, None)
|
|
38
|
+
if deleted:
|
|
39
|
+
pass
|
|
40
|
+
# logger.info("object {} removed.".format(id))
|
|
41
|
+
else:
|
|
42
|
+
logger.warning("object not added yet, ignoring remove for:{}".format(id))
|
|
43
|
+
|
|
44
|
+
data["stats"] = stats
|
|
45
|
+
self._write_to_file(data=data)
|
|
46
|
+
|
|
47
|
+
def data(self):
|
|
48
|
+
if not os.path.exists(self.path):
|
|
49
|
+
# nothing stored yet
|
|
50
|
+
return {"stats": {}}
|
|
51
|
+
return self._read_from_file()
|
|
File without changes
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
from asset_db.assets_db import AssetsDB
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@pytest.fixture(scope="module")
|
|
9
|
+
def db(asset_root) -> AssetsDB:
|
|
10
|
+
db_path = os.path.join(asset_root, "database_test", "meta_hashes.json")
|
|
11
|
+
os.makedirs(os.path.dirname(db_path), exist_ok=True)
|
|
12
|
+
return AssetsDB(path=db_path)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def test_db_create(db: AssetsDB):
|
|
16
|
+
"""make sure metadb got created"""
|
|
17
|
+
assert db is not None # db instance got created
|
|
18
|
+
assert db.path and os.path.exists(db.path)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def test_add_file_hash(db: AssetsDB):
|
|
22
|
+
path = "test/path"
|
|
23
|
+
hash = "abc123"
|
|
24
|
+
db.add_file_hash(path, hash)
|
|
25
|
+
assert db.file_hashes[path] == hash, "File hash should be added to the database"
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def test_remove_files(db: AssetsDB):
|
|
29
|
+
# Setup - add some file hashes
|
|
30
|
+
paths = ["test/path1", "test/path2"]
|
|
31
|
+
hashes = ["abc123", "def456"]
|
|
32
|
+
for path, hash in zip(paths, hashes):
|
|
33
|
+
db.add_file_hash(path, hash)
|
|
34
|
+
|
|
35
|
+
# Test removing one file
|
|
36
|
+
db.remove_files(paths[0])
|
|
37
|
+
assert paths[0] not in db.file_hashes, "File should be removed from the database"
|
|
38
|
+
assert paths[1] in db.file_hashes, "Other files should remain in the database"
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
from asset_db import AssetsDB, FileDB
|
|
6
|
+
from asset_db.db import Database
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def test_path_singleton():
|
|
10
|
+
"""
|
|
11
|
+
Test that each path can only have one instance of a db class
|
|
12
|
+
"""
|
|
13
|
+
db1 = AssetsDB(path="/tmp/test_1.yaml")
|
|
14
|
+
db2 = AssetsDB(path="/tmp/test_2.yaml")
|
|
15
|
+
assert db1 != db2
|
|
16
|
+
|
|
17
|
+
db3 = AssetsDB(path="/tmp/test_1.yaml")
|
|
18
|
+
assert db3 == db1
|
|
19
|
+
assert db3 != db2
|
|
20
|
+
|
|
21
|
+
with pytest.raises(Exception) as e:
|
|
22
|
+
FileDB(path="/tmp/test_2.yaml")
|
|
23
|
+
assert str(e.value) == "Instance already exists with same path but different class type"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@pytest.fixture(scope="module")
|
|
27
|
+
def db(asset_root) -> Database:
|
|
28
|
+
db_path = os.path.join(asset_root, "database_test", "db_file.json")
|
|
29
|
+
os.makedirs(os.path.dirname(db_path), exist_ok=True)
|
|
30
|
+
return Database(path=db_path)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def test_db_create(db: Database):
|
|
34
|
+
"""make sure metadb got created"""
|
|
35
|
+
assert db is not None # db instance got created
|
|
36
|
+
assert db.path and os.path.exists(db.path)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def test_update(db):
|
|
40
|
+
db.update(key="value")
|
|
41
|
+
assert db.retrieve("key") == "value"
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def test_write_and_read(db):
|
|
45
|
+
Database.write({"key": "value"}, db.path)
|
|
46
|
+
data = Database.read(db.path)
|
|
47
|
+
assert data == {"key": "value"}
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def test_retrieve(db):
|
|
51
|
+
db.update(key1="value1", key2="value2")
|
|
52
|
+
assert db.retrieve("key1") == "value1"
|
|
53
|
+
assert db.retrieve(["key1", "key2"]) == {"key1": "value1", "key2": "value2"}
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
from asset_db.file_db import FileDB
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@pytest.fixture(scope="module")
|
|
9
|
+
def db(asset_root) -> FileDB:
|
|
10
|
+
db_path = os.path.join(asset_root, "database_test", "file_stats.json")
|
|
11
|
+
os.makedirs(os.path.dirname(db_path), exist_ok=True)
|
|
12
|
+
return FileDB(path=db_path)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def test_db_create(db: FileDB):
|
|
16
|
+
"""make sure metadb got created"""
|
|
17
|
+
assert db is not None # db instance got created
|
|
18
|
+
assert db.path and os.path.exists(db.path)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def test_add_and_get_stats(db):
|
|
22
|
+
db.add_stats(file1="hash1", file2="hash2")
|
|
23
|
+
stats = db.get_stats()
|
|
24
|
+
assert stats == {"file1": "hash1", "file2": "hash2"}, "Stats should be correctly added and retrieved"
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def test_remove_stats(db):
|
|
28
|
+
db.add_stats(file1="hash1", file2="hash2")
|
|
29
|
+
db.remove_stats(["file1"])
|
|
30
|
+
stats = db.get_stats()
|
|
31
|
+
assert "file1" not in stats, "file1 should be removed from stats"
|
|
32
|
+
assert "file2" in stats, "file2 should remain in stats"
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
from asset_db import ManifestDB
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@pytest.fixture(scope="module")
|
|
9
|
+
def db(asset_root) -> ManifestDB:
|
|
10
|
+
db_path = os.path.join(asset_root, "database_test", "manifest.yaml")
|
|
11
|
+
os.makedirs(os.path.dirname(db_path), exist_ok=True)
|
|
12
|
+
return ManifestDB(path=db_path)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def test_db_create(db: ManifestDB):
|
|
16
|
+
"""make sure metadb got created"""
|
|
17
|
+
assert db is not None # db instance got created
|
|
18
|
+
assert db.path and os.path.exists(db.path)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def test_add_objects(db: ManifestDB):
|
|
22
|
+
objects = {"1": {"name": "test_object1"}, "2": {"name": "test_object2"}}
|
|
23
|
+
db.add_objects(**objects)
|
|
24
|
+
result = db.get_objects()
|
|
25
|
+
assert result == objects
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def test_get_objects(db: ManifestDB):
|
|
29
|
+
objects = {"objects": {"1": {"name": "test_object1"}, "2": {"name": "test_object2"}}}
|
|
30
|
+
db.update(**objects)
|
|
31
|
+
result = db.get_objects()
|
|
32
|
+
assert result == objects["objects"]
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def test_get(db: ManifestDB):
|
|
36
|
+
data = {"key1": "value1", "key2": "value2"}
|
|
37
|
+
db.update(**data)
|
|
38
|
+
result = db.get("key1")
|
|
39
|
+
assert result == "value1"
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def test_remove_objects(db: ManifestDB):
|
|
43
|
+
objects = {"objects": {"1": {"name": "test_object1"}, "2": {"name": "test_object2"}}}
|
|
44
|
+
db.update(**objects)
|
|
45
|
+
db.remove_objects(["1"])
|
|
46
|
+
result = db.get_objects()
|
|
47
|
+
assert "1" not in result
|
|
48
|
+
assert "2" in result
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def test_clear_objects(db: ManifestDB):
|
|
52
|
+
objects = {"objects": {"1": {"name": "test_object1"}, "2": {"name": "test_object2"}}}
|
|
53
|
+
db.update(**objects)
|
|
54
|
+
db.clear_objects()
|
|
55
|
+
result = db.get_objects()
|
|
56
|
+
assert result == {}
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
from asset_db import StatesDB
|
|
6
|
+
|
|
7
|
+
DATA = {
|
|
8
|
+
"test_data/2.txt": "pending",
|
|
9
|
+
"test_data/imgs/photo-1493976040374-85c8e12f0c0e.jpg": "uploaded",
|
|
10
|
+
"test_data/imgs/photo-1504198453319-5ce911bafcde.jpg": "pending",
|
|
11
|
+
"test_data/imgs/photo-1507143550189-fed454f93097.jpg": "uploading",
|
|
12
|
+
"test_data/imgs/photo-1513938709626-033611b8cc03.jpg": "version",
|
|
13
|
+
"test_data/imgs/photo-1516117172878-fd2c41f4a759.jpg": "versioned",
|
|
14
|
+
"test_data/imgs/photo-1516972810927-80185027ca84.jpg": "committing",
|
|
15
|
+
"test_data/imgs/photo-1522364723953-452d3431c267.jpg": "committed",
|
|
16
|
+
"test_data/imgs/photo-1524429656589-6633a470097c.jpg": "renaming",
|
|
17
|
+
"test_data/imgs/photo-1530122037265-a5f1f91d3b99.jpg": "renamed",
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@pytest.fixture(scope="module")
|
|
22
|
+
def states_db(asset_root):
|
|
23
|
+
os.makedirs(os.path.join(asset_root, "states_db_test"))
|
|
24
|
+
return StatesDB(path=os.path.join(asset_root, "states_db_test", "states.yaml"))
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def test_add_states(states_db):
|
|
28
|
+
# create a directory
|
|
29
|
+
states_db.add_object_states(**DATA)
|
|
30
|
+
stored = states_db.get_object_states()
|
|
31
|
+
assert stored == DATA
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def test_remove_states(states_db):
|
|
35
|
+
data = DATA.copy()
|
|
36
|
+
states_db.add_object_states(**data)
|
|
37
|
+
data.pop("test_data/2.txt")
|
|
38
|
+
states_db.remove_object_states(["test_data/2.txt"])
|
|
39
|
+
stored = states_db.get_object_states()
|
|
40
|
+
assert data == stored
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68.0.0,<69.0.0"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "amapy-db"
|
|
7
|
+
version = "1.0.0"
|
|
8
|
+
description = "The Database plugin for asset-manager project. It provides functionalities to manage and store metadata related to digital assets, such as storing MD5 hashes of asset files and managing these records efficiently."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = { text = "Copyright (c) 2024 Roche Diagnostics Computation Science & Informatics" }
|
|
11
|
+
requires-python = ">=3.10,<3.11" # "==3.10.*"
|
|
12
|
+
dependencies = [
|
|
13
|
+
"amapy-pluggy==1.*",
|
|
14
|
+
"amapy-utils==1.*"
|
|
15
|
+
]
|
|
16
|
+
|
|
17
|
+
[tool.setuptools.packages.find]
|
|
18
|
+
include = ["*"]
|
|
19
|
+
namespaces = false
|
amapy-db-1.0.0/setup.cfg
ADDED