datamint 2.3.3__py3-none-any.whl → 2.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. datamint/__init__.py +1 -3
  2. datamint/api/__init__.py +0 -3
  3. datamint/api/base_api.py +286 -54
  4. datamint/api/client.py +76 -13
  5. datamint/api/endpoints/__init__.py +2 -2
  6. datamint/api/endpoints/annotations_api.py +186 -28
  7. datamint/api/endpoints/deploy_model_api.py +78 -0
  8. datamint/api/endpoints/models_api.py +1 -0
  9. datamint/api/endpoints/projects_api.py +38 -7
  10. datamint/api/endpoints/resources_api.py +227 -100
  11. datamint/api/entity_base_api.py +66 -7
  12. datamint/apihandler/base_api_handler.py +0 -1
  13. datamint/apihandler/dto/annotation_dto.py +2 -0
  14. datamint/client_cmd_tools/datamint_config.py +0 -1
  15. datamint/client_cmd_tools/datamint_upload.py +3 -1
  16. datamint/configs.py +11 -7
  17. datamint/dataset/base_dataset.py +24 -4
  18. datamint/dataset/dataset.py +1 -1
  19. datamint/entities/__init__.py +1 -1
  20. datamint/entities/annotations/__init__.py +13 -0
  21. datamint/entities/{annotation.py → annotations/annotation.py} +81 -47
  22. datamint/entities/annotations/image_classification.py +12 -0
  23. datamint/entities/annotations/image_segmentation.py +252 -0
  24. datamint/entities/annotations/volume_segmentation.py +273 -0
  25. datamint/entities/base_entity.py +100 -6
  26. datamint/entities/cache_manager.py +129 -15
  27. datamint/entities/datasetinfo.py +60 -65
  28. datamint/entities/deployjob.py +18 -0
  29. datamint/entities/project.py +39 -0
  30. datamint/entities/resource.py +310 -46
  31. datamint/lightning/__init__.py +1 -0
  32. datamint/lightning/datamintdatamodule.py +103 -0
  33. datamint/mlflow/__init__.py +65 -0
  34. datamint/mlflow/artifact/__init__.py +1 -0
  35. datamint/mlflow/artifact/datamint_artifacts_repo.py +8 -0
  36. datamint/mlflow/env_utils.py +131 -0
  37. datamint/mlflow/env_vars.py +5 -0
  38. datamint/mlflow/flavors/__init__.py +17 -0
  39. datamint/mlflow/flavors/datamint_flavor.py +150 -0
  40. datamint/mlflow/flavors/model.py +877 -0
  41. datamint/mlflow/lightning/callbacks/__init__.py +1 -0
  42. datamint/mlflow/lightning/callbacks/modelcheckpoint.py +410 -0
  43. datamint/mlflow/models/__init__.py +93 -0
  44. datamint/mlflow/tracking/datamint_store.py +76 -0
  45. datamint/mlflow/tracking/default_experiment.py +27 -0
  46. datamint/mlflow/tracking/fluent.py +91 -0
  47. datamint/utils/env.py +27 -0
  48. datamint/utils/visualization.py +21 -13
  49. datamint-2.9.0.dist-info/METADATA +220 -0
  50. datamint-2.9.0.dist-info/RECORD +73 -0
  51. {datamint-2.3.3.dist-info → datamint-2.9.0.dist-info}/WHEEL +1 -1
  52. datamint-2.9.0.dist-info/entry_points.txt +18 -0
  53. datamint/apihandler/exp_api_handler.py +0 -204
  54. datamint/experiment/__init__.py +0 -1
  55. datamint/experiment/_patcher.py +0 -570
  56. datamint/experiment/experiment.py +0 -1049
  57. datamint-2.3.3.dist-info/METADATA +0 -125
  58. datamint-2.3.3.dist-info/RECORD +0 -54
  59. datamint-2.3.3.dist-info/entry_points.txt +0 -4
@@ -0,0 +1,131 @@
1
+ """
2
+ Utility functions for automatically configuring MLflow environment variables
3
+ based on Datamint configuration.
4
+ """
5
+
6
+ import os
7
+ import logging
8
+ from urllib.parse import urlparse
9
+ from datamint import configs
10
+ import sys
11
+
12
+
13
+ _LOGGER = logging.getLogger(__name__)
14
+
15
+
16
+ def get_datamint_api_url() -> str | None:
17
+ """Get the Datamint API URL from configuration or environment variables."""
18
+ api_url = configs.get_value(configs.APIURL_KEY, include_envvars=True) # configs checks env vars first
19
+ return api_url
20
+
21
+
22
+ def get_datamint_api_key() -> str | None:
23
+ """Get the Datamint API key from configuration or environment variables."""
24
+ # First check environment variable
25
+ api_key = os.getenv('DATAMINT_API_KEY')
26
+ if api_key:
27
+ return api_key
28
+
29
+ # Then check configuration
30
+ api_key = configs.get_value(configs.APIKEY_KEY)
31
+ if api_key:
32
+ return api_key
33
+
34
+ return None
35
+
36
+
37
+ def _get_mlflowdatamint_uri() -> str | None:
38
+ api_url = get_datamint_api_url()
39
+ if not api_url:
40
+ return None
41
+ _LOGGER.debug(f"Retrieved Datamint API URL: {api_url}")
42
+
43
+ # Remove trailing slash if present
44
+ api_url = api_url.rstrip('/')
45
+ # api_url samples:
46
+ # https://api.datamint.io
47
+ # http://localhost:3001
48
+
49
+ parsed_url = urlparse(api_url)
50
+ base_url = f"{parsed_url.scheme}://{parsed_url.hostname}"
51
+ _LOGGER.debug(f"Derived base URL for MLflow Datamint: {base_url}")
52
+ # FIXME: It should work with https or datamint-api server should forward https requests.
53
+ base_url = base_url.replace('https://', 'http://')
54
+ if len(base_url.replace('http:', '')) == 0:
55
+ return None
56
+
57
+ mlflow_uri = f"{base_url}:5000"
58
+ return mlflow_uri
59
+
60
+
61
+ def setup_mlflow_environment(overwrite: bool = False,
62
+ set_mlflow: bool = True) -> bool:
63
+ """
64
+ Set up MLflow environment variables based on Datamint configuration.
65
+
66
+ Args:
67
+ overwrite (bool): If True, overwrite existing MLflow environment variables.
68
+ set_mlflow (bool): If True, set the MLflow tracking URI using mlflow.set_tracking_uri().
69
+
70
+ Returns:
71
+ bool: True if success, False otherwise.
72
+ """
73
+ api_key = get_datamint_api_key()
74
+ mlflow_uri = _get_mlflowdatamint_uri()
75
+ _LOGGER.debug(f"Setting up MLflow environment variables from Datamint configuration: URI='{mlflow_uri}', API_KEY={'***' if api_key is not None else None}")
76
+ if not mlflow_uri or not api_key:
77
+ _LOGGER.warning("Datamint configuration incomplete, cannot auto-configure MLflow")
78
+ return False
79
+
80
+ if overwrite or not os.getenv('MLFLOW_TRACKING_TOKEN'):
81
+ os.environ['MLFLOW_TRACKING_TOKEN'] = api_key
82
+ if overwrite or not os.getenv('MLFLOW_TRACKING_URI'):
83
+ os.environ['MLFLOW_TRACKING_URI'] = mlflow_uri
84
+
85
+ _LOGGER.debug(f'Final MLflow environment variables: MLFLOW_TRACKING_URI={os.getenv("MLFLOW_TRACKING_URI")}, MLFLOW_TRACKING_TOKEN={"***" if os.getenv("MLFLOW_TRACKING_TOKEN") is not None else None}')
86
+
87
+ if set_mlflow:
88
+ import mlflow
89
+ _LOGGER.debug(f"Setting MLflow tracking URI to: {mlflow_uri}")
90
+ mlflow.set_tracking_uri(mlflow_uri)
91
+
92
+ if 'lightning.pytorch.loggers' in sys.modules:
93
+ # import lightning.pytorch.loggers
94
+ # importlib.reload(lightning.pytorch.loggers)
95
+ from lightning.pytorch.loggers import MLFlowLogger
96
+
97
+ # 1. Convert the immutable defaults tuple to a mutable list
98
+ current_defaults = list(MLFlowLogger.__init__.__defaults__)
99
+
100
+ # 2. Update the default value for 'tracking_uri'
101
+ # Based on the signature, 'tracking_uri' is the 3rd argument with a default (index 2)
102
+ # Signature: (experiment_name, run_name, tracking_uri, ...)
103
+ current_defaults[2] = mlflow_uri
104
+
105
+ # 3. Apply the modified defaults back to the class
106
+ MLFlowLogger.__init__.__defaults__ = tuple(current_defaults)
107
+
108
+
109
+ return True
110
+
111
+
112
+ def ensure_mlflow_configured() -> None:
113
+ """
114
+ Ensure MLflow environment is properly configured.
115
+ Raises ValueError if configuration is incomplete.
116
+ """
117
+ if not setup_mlflow_environment():
118
+ if not os.getenv('MLFLOW_TRACKING_URI'):
119
+ raise ValueError(
120
+ "MLflow environment not configured. Please either:\n"
121
+ "1. Run 'datamint-config --default-url <url>', or\n"
122
+ "2. Set DATAMINT_API_URL environment variable, or\n"
123
+ "3. Manually set MLFLOW_TRACKING_URI environment variable"
124
+ )
125
+ if not os.getenv('MLFLOW_TRACKING_TOKEN'):
126
+ raise ValueError(
127
+ "MLflow environment not configured. Please either:\n"
128
+ "1. Run 'datamint-config', or\n"
129
+ "2. Set DATAMINT_API_KEY environment variable, or\n"
130
+ "3. Manually set MLFLOW_TRACKING_TOKEN environment variable"
131
+ )
@@ -0,0 +1,5 @@
1
+ from enum import Enum
2
+
3
+ class EnvVars(Enum):
4
+ DATAMINT_PROJECT_ID = "DATAMINT_PROJECT_ID"
5
+ DATAMINT_PROJECT_NAME = "DATAMINT_PROJECT_NAME"
@@ -0,0 +1,17 @@
1
+ """
2
+ Datamint MLflow custom flavor for wrapping PyTorch models with preprocessing.
3
+ """
4
+
5
+ from .datamint_flavor import (
6
+ save_model,
7
+ log_model,
8
+ load_model,
9
+ _load_pyfunc,
10
+ )
11
+
12
+ __all__ = [
13
+ "save_model",
14
+ "log_model",
15
+ "load_model",
16
+ "_load_pyfunc",
17
+ ]
@@ -0,0 +1,150 @@
1
+ import mlflow
2
+ from mlflow.models import Model, ModelInputExample, ModelSignature
3
+ import datamint
4
+ import datamint.mlflow.flavors
5
+ from mlflow import pyfunc
6
+ from .model import DatamintModel
7
+ import logging
8
+ from collections.abc import Sequence
9
+ from dataclasses import asdict
10
+ from packaging.requirements import Requirement
11
+
12
+ FLAVOR_NAME = 'datamint'
13
+
14
+ _LOGGER = logging.getLogger(__name__)
15
+
16
+
17
+ def save_model(datamint_model: DatamintModel,
18
+ path,
19
+ supported_modes: Sequence[str] | None = None,
20
+ data_path=None,
21
+ code_paths=None,
22
+ infer_code_paths=False,
23
+ conda_env=None,
24
+ mlflow_model: Model | None = None,
25
+ artifacts=None,
26
+ signature: ModelSignature | None = None,
27
+ input_example: ModelInputExample | None = None,
28
+ pip_requirements=None,
29
+ extra_pip_requirements=None,
30
+ metadata=None,
31
+ model_config=None,
32
+ streamable=None,
33
+ **kwargs):
34
+ import medimgkit
35
+
36
+ if mlflow_model is None:
37
+ mlflow_model = Model()
38
+
39
+ mlflow_model.add_flavor(
40
+ FLAVOR_NAME,
41
+ datamint_version=datamint.__version__,
42
+ supported_modes=supported_modes or datamint_model.get_supported_modes(),
43
+ model_settings=asdict(datamint_model.settings),
44
+ linked_models=datamint_model._get_linked_models_uri()
45
+ )
46
+
47
+ model_config = model_config or {}
48
+ model_config.setdefault('device', 'cuda' if datamint_model.settings.need_gpu else 'cpu')
49
+
50
+ def _get_req_name(req):
51
+ try:
52
+ return Requirement(req).name.lower()
53
+ except Exception:
54
+ return req.split("==")[0].strip().lower()
55
+
56
+ datamint_requirements = ['datamint=={}'.format(datamint.__version__), 'medimgkit=={}'.format(medimgkit.__version__)]
57
+
58
+ user_requirements = []
59
+ # Check if requirements are lists (not strings which are also Sequences)
60
+ if pip_requirements and isinstance(pip_requirements, Sequence) and not isinstance(pip_requirements, str):
61
+ user_requirements.extend(pip_requirements)
62
+ if extra_pip_requirements and isinstance(extra_pip_requirements, Sequence) and not isinstance(extra_pip_requirements, str):
63
+ user_requirements.extend(extra_pip_requirements)
64
+
65
+ user_req_names = {_get_req_name(req) for req in user_requirements}
66
+
67
+ missing_requirements = [req for req in datamint_requirements if _get_req_name(req) not in user_req_names]
68
+
69
+ if missing_requirements:
70
+ if extra_pip_requirements is None:
71
+ extra_pip_requirements = missing_requirements
72
+ elif isinstance(extra_pip_requirements, Sequence) and not isinstance(extra_pip_requirements, str):
73
+ extra_pip_requirements = list(extra_pip_requirements) + missing_requirements
74
+ elif pip_requirements and isinstance(pip_requirements, Sequence) and not isinstance(pip_requirements, str):
75
+ pip_requirements = list(pip_requirements) + missing_requirements
76
+
77
+
78
+ datamint_model._clear_linked_models_cache()
79
+
80
+ return mlflow.pyfunc.save_model(
81
+ path=path,
82
+ python_model=datamint_model,
83
+ data_path=data_path,
84
+ conda_env=conda_env,
85
+ mlflow_model=mlflow_model,
86
+ # loader_module=None,
87
+ artifacts=artifacts,
88
+ code_paths=code_paths,
89
+ infer_code_paths=infer_code_paths,
90
+ signature=signature,
91
+ input_example=input_example,
92
+ pip_requirements=pip_requirements,
93
+ extra_pip_requirements=extra_pip_requirements,
94
+ metadata=metadata,
95
+ model_config=model_config,
96
+ streamable=streamable,
97
+ **kwargs
98
+ )
99
+
100
+
101
+ def log_model(
102
+ datamint_model: DatamintModel,
103
+ supported_modes: Sequence[str] | None = None,
104
+ name: str = "datamint_model",
105
+ data_path=None,
106
+ code_paths=None,
107
+ infer_code_paths=False,
108
+ artifacts=None,
109
+ registered_model_name: str | None = None,
110
+ signature: ModelSignature | None = None,
111
+ input_example: ModelInputExample | None = None,
112
+ pip_requirements=None,
113
+ extra_pip_requirements=None,
114
+ metadata=None,
115
+ model_config=None,
116
+ **kwargs
117
+ ):
118
+ return Model.log(
119
+ datamint_model=datamint_model,
120
+ supported_modes=supported_modes,
121
+ name=name,
122
+ flavor=datamint.mlflow.flavors.datamint_flavor,
123
+ # loader_module=loader_module,
124
+ data_path=data_path,
125
+ code_paths=code_paths,
126
+ artifacts=artifacts,
127
+ registered_model_name=registered_model_name,
128
+ signature=signature,
129
+ input_example=input_example,
130
+ pip_requirements=pip_requirements,
131
+ extra_pip_requirements=extra_pip_requirements,
132
+ metadata=metadata,
133
+ model_config=model_config,
134
+ infer_code_paths=infer_code_paths,
135
+ **kwargs
136
+ )
137
+
138
+
139
+ def load_model(model_uri: str, device: str | None = None) -> DatamintModel:
140
+ if device is not None:
141
+ model_config = {'device': device}
142
+ else:
143
+ model_config = None
144
+ return mlflow.pyfunc.load_model(model_uri=model_uri,
145
+ model_config=model_config
146
+ ).unwrap_python_model()
147
+
148
+
149
+ def _load_pyfunc(path: str, model_config=None) -> pyfunc.PyFuncModel:
150
+ return mlflow.pyfunc.load_model(model_uri=path, model_config=model_config)