datamint 2.3.3__py3-none-any.whl → 2.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datamint/__init__.py +1 -3
- datamint/api/__init__.py +0 -3
- datamint/api/base_api.py +286 -54
- datamint/api/client.py +76 -13
- datamint/api/endpoints/__init__.py +2 -2
- datamint/api/endpoints/annotations_api.py +186 -28
- datamint/api/endpoints/deploy_model_api.py +78 -0
- datamint/api/endpoints/models_api.py +1 -0
- datamint/api/endpoints/projects_api.py +38 -7
- datamint/api/endpoints/resources_api.py +227 -100
- datamint/api/entity_base_api.py +66 -7
- datamint/apihandler/base_api_handler.py +0 -1
- datamint/apihandler/dto/annotation_dto.py +2 -0
- datamint/client_cmd_tools/datamint_config.py +0 -1
- datamint/client_cmd_tools/datamint_upload.py +3 -1
- datamint/configs.py +11 -7
- datamint/dataset/base_dataset.py +24 -4
- datamint/dataset/dataset.py +1 -1
- datamint/entities/__init__.py +1 -1
- datamint/entities/annotations/__init__.py +13 -0
- datamint/entities/{annotation.py → annotations/annotation.py} +81 -47
- datamint/entities/annotations/image_classification.py +12 -0
- datamint/entities/annotations/image_segmentation.py +252 -0
- datamint/entities/annotations/volume_segmentation.py +273 -0
- datamint/entities/base_entity.py +100 -6
- datamint/entities/cache_manager.py +129 -15
- datamint/entities/datasetinfo.py +60 -65
- datamint/entities/deployjob.py +18 -0
- datamint/entities/project.py +39 -0
- datamint/entities/resource.py +310 -46
- datamint/lightning/__init__.py +1 -0
- datamint/lightning/datamintdatamodule.py +103 -0
- datamint/mlflow/__init__.py +65 -0
- datamint/mlflow/artifact/__init__.py +1 -0
- datamint/mlflow/artifact/datamint_artifacts_repo.py +8 -0
- datamint/mlflow/env_utils.py +131 -0
- datamint/mlflow/env_vars.py +5 -0
- datamint/mlflow/flavors/__init__.py +17 -0
- datamint/mlflow/flavors/datamint_flavor.py +150 -0
- datamint/mlflow/flavors/model.py +877 -0
- datamint/mlflow/lightning/callbacks/__init__.py +1 -0
- datamint/mlflow/lightning/callbacks/modelcheckpoint.py +410 -0
- datamint/mlflow/models/__init__.py +93 -0
- datamint/mlflow/tracking/datamint_store.py +76 -0
- datamint/mlflow/tracking/default_experiment.py +27 -0
- datamint/mlflow/tracking/fluent.py +91 -0
- datamint/utils/env.py +27 -0
- datamint/utils/visualization.py +21 -13
- datamint-2.9.0.dist-info/METADATA +220 -0
- datamint-2.9.0.dist-info/RECORD +73 -0
- {datamint-2.3.3.dist-info → datamint-2.9.0.dist-info}/WHEEL +1 -1
- datamint-2.9.0.dist-info/entry_points.txt +18 -0
- datamint/apihandler/exp_api_handler.py +0 -204
- datamint/experiment/__init__.py +0 -1
- datamint/experiment/_patcher.py +0 -570
- datamint/experiment/experiment.py +0 -1049
- datamint-2.3.3.dist-info/METADATA +0 -125
- datamint-2.3.3.dist-info/RECORD +0 -54
- datamint-2.3.3.dist-info/entry_points.txt +0 -4
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Utility functions for automatically configuring MLflow environment variables
|
|
3
|
+
based on Datamint configuration.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import os
|
|
7
|
+
import logging
|
|
8
|
+
from urllib.parse import urlparse
|
|
9
|
+
from datamint import configs
|
|
10
|
+
import sys
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
_LOGGER = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def get_datamint_api_url() -> str | None:
|
|
17
|
+
"""Get the Datamint API URL from configuration or environment variables."""
|
|
18
|
+
api_url = configs.get_value(configs.APIURL_KEY, include_envvars=True) # configs checks env vars first
|
|
19
|
+
return api_url
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def get_datamint_api_key() -> str | None:
|
|
23
|
+
"""Get the Datamint API key from configuration or environment variables."""
|
|
24
|
+
# First check environment variable
|
|
25
|
+
api_key = os.getenv('DATAMINT_API_KEY')
|
|
26
|
+
if api_key:
|
|
27
|
+
return api_key
|
|
28
|
+
|
|
29
|
+
# Then check configuration
|
|
30
|
+
api_key = configs.get_value(configs.APIKEY_KEY)
|
|
31
|
+
if api_key:
|
|
32
|
+
return api_key
|
|
33
|
+
|
|
34
|
+
return None
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _get_mlflowdatamint_uri() -> str | None:
|
|
38
|
+
api_url = get_datamint_api_url()
|
|
39
|
+
if not api_url:
|
|
40
|
+
return None
|
|
41
|
+
_LOGGER.debug(f"Retrieved Datamint API URL: {api_url}")
|
|
42
|
+
|
|
43
|
+
# Remove trailing slash if present
|
|
44
|
+
api_url = api_url.rstrip('/')
|
|
45
|
+
# api_url samples:
|
|
46
|
+
# https://api.datamint.io
|
|
47
|
+
# http://localhost:3001
|
|
48
|
+
|
|
49
|
+
parsed_url = urlparse(api_url)
|
|
50
|
+
base_url = f"{parsed_url.scheme}://{parsed_url.hostname}"
|
|
51
|
+
_LOGGER.debug(f"Derived base URL for MLflow Datamint: {base_url}")
|
|
52
|
+
# FIXME: It should work with https or datamint-api server should forward https requests.
|
|
53
|
+
base_url = base_url.replace('https://', 'http://')
|
|
54
|
+
if len(base_url.replace('http:', '')) == 0:
|
|
55
|
+
return None
|
|
56
|
+
|
|
57
|
+
mlflow_uri = f"{base_url}:5000"
|
|
58
|
+
return mlflow_uri
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def setup_mlflow_environment(overwrite: bool = False,
|
|
62
|
+
set_mlflow: bool = True) -> bool:
|
|
63
|
+
"""
|
|
64
|
+
Set up MLflow environment variables based on Datamint configuration.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
overwrite (bool): If True, overwrite existing MLflow environment variables.
|
|
68
|
+
set_mlflow (bool): If True, set the MLflow tracking URI using mlflow.set_tracking_uri().
|
|
69
|
+
|
|
70
|
+
Returns:
|
|
71
|
+
bool: True if success, False otherwise.
|
|
72
|
+
"""
|
|
73
|
+
api_key = get_datamint_api_key()
|
|
74
|
+
mlflow_uri = _get_mlflowdatamint_uri()
|
|
75
|
+
_LOGGER.debug(f"Setting up MLflow environment variables from Datamint configuration: URI='{mlflow_uri}', API_KEY={'***' if api_key is not None else None}")
|
|
76
|
+
if not mlflow_uri or not api_key:
|
|
77
|
+
_LOGGER.warning("Datamint configuration incomplete, cannot auto-configure MLflow")
|
|
78
|
+
return False
|
|
79
|
+
|
|
80
|
+
if overwrite or not os.getenv('MLFLOW_TRACKING_TOKEN'):
|
|
81
|
+
os.environ['MLFLOW_TRACKING_TOKEN'] = api_key
|
|
82
|
+
if overwrite or not os.getenv('MLFLOW_TRACKING_URI'):
|
|
83
|
+
os.environ['MLFLOW_TRACKING_URI'] = mlflow_uri
|
|
84
|
+
|
|
85
|
+
_LOGGER.debug(f'Final MLflow environment variables: MLFLOW_TRACKING_URI={os.getenv("MLFLOW_TRACKING_URI")}, MLFLOW_TRACKING_TOKEN={"***" if os.getenv("MLFLOW_TRACKING_TOKEN") is not None else None}')
|
|
86
|
+
|
|
87
|
+
if set_mlflow:
|
|
88
|
+
import mlflow
|
|
89
|
+
_LOGGER.debug(f"Setting MLflow tracking URI to: {mlflow_uri}")
|
|
90
|
+
mlflow.set_tracking_uri(mlflow_uri)
|
|
91
|
+
|
|
92
|
+
if 'lightning.pytorch.loggers' in sys.modules:
|
|
93
|
+
# import lightning.pytorch.loggers
|
|
94
|
+
# importlib.reload(lightning.pytorch.loggers)
|
|
95
|
+
from lightning.pytorch.loggers import MLFlowLogger
|
|
96
|
+
|
|
97
|
+
# 1. Convert the immutable defaults tuple to a mutable list
|
|
98
|
+
current_defaults = list(MLFlowLogger.__init__.__defaults__)
|
|
99
|
+
|
|
100
|
+
# 2. Update the default value for 'tracking_uri'
|
|
101
|
+
# Based on the signature, 'tracking_uri' is the 3rd argument with a default (index 2)
|
|
102
|
+
# Signature: (experiment_name, run_name, tracking_uri, ...)
|
|
103
|
+
current_defaults[2] = mlflow_uri
|
|
104
|
+
|
|
105
|
+
# 3. Apply the modified defaults back to the class
|
|
106
|
+
MLFlowLogger.__init__.__defaults__ = tuple(current_defaults)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
return True
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def ensure_mlflow_configured() -> None:
|
|
113
|
+
"""
|
|
114
|
+
Ensure MLflow environment is properly configured.
|
|
115
|
+
Raises ValueError if configuration is incomplete.
|
|
116
|
+
"""
|
|
117
|
+
if not setup_mlflow_environment():
|
|
118
|
+
if not os.getenv('MLFLOW_TRACKING_URI'):
|
|
119
|
+
raise ValueError(
|
|
120
|
+
"MLflow environment not configured. Please either:\n"
|
|
121
|
+
"1. Run 'datamint-config --default-url <url>', or\n"
|
|
122
|
+
"2. Set DATAMINT_API_URL environment variable, or\n"
|
|
123
|
+
"3. Manually set MLFLOW_TRACKING_URI environment variable"
|
|
124
|
+
)
|
|
125
|
+
if not os.getenv('MLFLOW_TRACKING_TOKEN'):
|
|
126
|
+
raise ValueError(
|
|
127
|
+
"MLflow environment not configured. Please either:\n"
|
|
128
|
+
"1. Run 'datamint-config', or\n"
|
|
129
|
+
"2. Set DATAMINT_API_KEY environment variable, or\n"
|
|
130
|
+
"3. Manually set MLFLOW_TRACKING_TOKEN environment variable"
|
|
131
|
+
)
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Datamint MLflow custom flavor for wrapping PyTorch models with preprocessing.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from .datamint_flavor import (
|
|
6
|
+
save_model,
|
|
7
|
+
log_model,
|
|
8
|
+
load_model,
|
|
9
|
+
_load_pyfunc,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
__all__ = [
|
|
13
|
+
"save_model",
|
|
14
|
+
"log_model",
|
|
15
|
+
"load_model",
|
|
16
|
+
"_load_pyfunc",
|
|
17
|
+
]
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
import mlflow
|
|
2
|
+
from mlflow.models import Model, ModelInputExample, ModelSignature
|
|
3
|
+
import datamint
|
|
4
|
+
import datamint.mlflow.flavors
|
|
5
|
+
from mlflow import pyfunc
|
|
6
|
+
from .model import DatamintModel
|
|
7
|
+
import logging
|
|
8
|
+
from collections.abc import Sequence
|
|
9
|
+
from dataclasses import asdict
|
|
10
|
+
from packaging.requirements import Requirement
|
|
11
|
+
|
|
12
|
+
FLAVOR_NAME = 'datamint'
|
|
13
|
+
|
|
14
|
+
_LOGGER = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def save_model(datamint_model: DatamintModel,
|
|
18
|
+
path,
|
|
19
|
+
supported_modes: Sequence[str] | None = None,
|
|
20
|
+
data_path=None,
|
|
21
|
+
code_paths=None,
|
|
22
|
+
infer_code_paths=False,
|
|
23
|
+
conda_env=None,
|
|
24
|
+
mlflow_model: Model | None = None,
|
|
25
|
+
artifacts=None,
|
|
26
|
+
signature: ModelSignature | None = None,
|
|
27
|
+
input_example: ModelInputExample | None = None,
|
|
28
|
+
pip_requirements=None,
|
|
29
|
+
extra_pip_requirements=None,
|
|
30
|
+
metadata=None,
|
|
31
|
+
model_config=None,
|
|
32
|
+
streamable=None,
|
|
33
|
+
**kwargs):
|
|
34
|
+
import medimgkit
|
|
35
|
+
|
|
36
|
+
if mlflow_model is None:
|
|
37
|
+
mlflow_model = Model()
|
|
38
|
+
|
|
39
|
+
mlflow_model.add_flavor(
|
|
40
|
+
FLAVOR_NAME,
|
|
41
|
+
datamint_version=datamint.__version__,
|
|
42
|
+
supported_modes=supported_modes or datamint_model.get_supported_modes(),
|
|
43
|
+
model_settings=asdict(datamint_model.settings),
|
|
44
|
+
linked_models=datamint_model._get_linked_models_uri()
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
model_config = model_config or {}
|
|
48
|
+
model_config.setdefault('device', 'cuda' if datamint_model.settings.need_gpu else 'cpu')
|
|
49
|
+
|
|
50
|
+
def _get_req_name(req):
|
|
51
|
+
try:
|
|
52
|
+
return Requirement(req).name.lower()
|
|
53
|
+
except Exception:
|
|
54
|
+
return req.split("==")[0].strip().lower()
|
|
55
|
+
|
|
56
|
+
datamint_requirements = ['datamint=={}'.format(datamint.__version__), 'medimgkit=={}'.format(medimgkit.__version__)]
|
|
57
|
+
|
|
58
|
+
user_requirements = []
|
|
59
|
+
# Check if requirements are lists (not strings which are also Sequences)
|
|
60
|
+
if pip_requirements and isinstance(pip_requirements, Sequence) and not isinstance(pip_requirements, str):
|
|
61
|
+
user_requirements.extend(pip_requirements)
|
|
62
|
+
if extra_pip_requirements and isinstance(extra_pip_requirements, Sequence) and not isinstance(extra_pip_requirements, str):
|
|
63
|
+
user_requirements.extend(extra_pip_requirements)
|
|
64
|
+
|
|
65
|
+
user_req_names = {_get_req_name(req) for req in user_requirements}
|
|
66
|
+
|
|
67
|
+
missing_requirements = [req for req in datamint_requirements if _get_req_name(req) not in user_req_names]
|
|
68
|
+
|
|
69
|
+
if missing_requirements:
|
|
70
|
+
if extra_pip_requirements is None:
|
|
71
|
+
extra_pip_requirements = missing_requirements
|
|
72
|
+
elif isinstance(extra_pip_requirements, Sequence) and not isinstance(extra_pip_requirements, str):
|
|
73
|
+
extra_pip_requirements = list(extra_pip_requirements) + missing_requirements
|
|
74
|
+
elif pip_requirements and isinstance(pip_requirements, Sequence) and not isinstance(pip_requirements, str):
|
|
75
|
+
pip_requirements = list(pip_requirements) + missing_requirements
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
datamint_model._clear_linked_models_cache()
|
|
79
|
+
|
|
80
|
+
return mlflow.pyfunc.save_model(
|
|
81
|
+
path=path,
|
|
82
|
+
python_model=datamint_model,
|
|
83
|
+
data_path=data_path,
|
|
84
|
+
conda_env=conda_env,
|
|
85
|
+
mlflow_model=mlflow_model,
|
|
86
|
+
# loader_module=None,
|
|
87
|
+
artifacts=artifacts,
|
|
88
|
+
code_paths=code_paths,
|
|
89
|
+
infer_code_paths=infer_code_paths,
|
|
90
|
+
signature=signature,
|
|
91
|
+
input_example=input_example,
|
|
92
|
+
pip_requirements=pip_requirements,
|
|
93
|
+
extra_pip_requirements=extra_pip_requirements,
|
|
94
|
+
metadata=metadata,
|
|
95
|
+
model_config=model_config,
|
|
96
|
+
streamable=streamable,
|
|
97
|
+
**kwargs
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def log_model(
|
|
102
|
+
datamint_model: DatamintModel,
|
|
103
|
+
supported_modes: Sequence[str] | None = None,
|
|
104
|
+
name: str = "datamint_model",
|
|
105
|
+
data_path=None,
|
|
106
|
+
code_paths=None,
|
|
107
|
+
infer_code_paths=False,
|
|
108
|
+
artifacts=None,
|
|
109
|
+
registered_model_name: str | None = None,
|
|
110
|
+
signature: ModelSignature | None = None,
|
|
111
|
+
input_example: ModelInputExample | None = None,
|
|
112
|
+
pip_requirements=None,
|
|
113
|
+
extra_pip_requirements=None,
|
|
114
|
+
metadata=None,
|
|
115
|
+
model_config=None,
|
|
116
|
+
**kwargs
|
|
117
|
+
):
|
|
118
|
+
return Model.log(
|
|
119
|
+
datamint_model=datamint_model,
|
|
120
|
+
supported_modes=supported_modes,
|
|
121
|
+
name=name,
|
|
122
|
+
flavor=datamint.mlflow.flavors.datamint_flavor,
|
|
123
|
+
# loader_module=loader_module,
|
|
124
|
+
data_path=data_path,
|
|
125
|
+
code_paths=code_paths,
|
|
126
|
+
artifacts=artifacts,
|
|
127
|
+
registered_model_name=registered_model_name,
|
|
128
|
+
signature=signature,
|
|
129
|
+
input_example=input_example,
|
|
130
|
+
pip_requirements=pip_requirements,
|
|
131
|
+
extra_pip_requirements=extra_pip_requirements,
|
|
132
|
+
metadata=metadata,
|
|
133
|
+
model_config=model_config,
|
|
134
|
+
infer_code_paths=infer_code_paths,
|
|
135
|
+
**kwargs
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def load_model(model_uri: str, device: str | None = None) -> DatamintModel:
|
|
140
|
+
if device is not None:
|
|
141
|
+
model_config = {'device': device}
|
|
142
|
+
else:
|
|
143
|
+
model_config = None
|
|
144
|
+
return mlflow.pyfunc.load_model(model_uri=model_uri,
|
|
145
|
+
model_config=model_config
|
|
146
|
+
).unwrap_python_model()
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def _load_pyfunc(path: str, model_config=None) -> pyfunc.PyFuncModel:
|
|
150
|
+
return mlflow.pyfunc.load_model(model_uri=path, model_config=model_config)
|