mlrun 1.10.0rc10__py3-none-any.whl → 1.10.0rc11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/artifacts/manager.py +1 -1
- mlrun/common/constants.py +11 -0
- mlrun/common/schemas/model_monitoring/__init__.py +2 -0
- mlrun/common/schemas/model_monitoring/functions.py +2 -0
- mlrun/common/schemas/model_monitoring/model_endpoints.py +19 -1
- mlrun/common/schemas/serving.py +1 -0
- mlrun/common/schemas/workflow.py +3 -2
- mlrun/datastore/azure_blob.py +1 -1
- mlrun/datastore/base.py +4 -2
- mlrun/datastore/datastore.py +46 -14
- mlrun/datastore/google_cloud_storage.py +1 -1
- mlrun/datastore/s3.py +16 -5
- mlrun/datastore/sources.py +2 -2
- mlrun/datastore/targets.py +2 -2
- mlrun/db/__init__.py +0 -1
- mlrun/db/base.py +12 -0
- mlrun/db/httpdb.py +35 -0
- mlrun/db/nopdb.py +10 -0
- mlrun/execution.py +12 -0
- mlrun/frameworks/tf_keras/mlrun_interface.py +7 -18
- mlrun/launcher/base.py +1 -0
- mlrun/launcher/client.py +1 -0
- mlrun/launcher/local.py +4 -0
- mlrun/model.py +15 -4
- mlrun/model_monitoring/applications/base.py +74 -56
- mlrun/model_monitoring/db/tsdb/base.py +52 -19
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +179 -11
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +26 -11
- mlrun/model_monitoring/helpers.py +48 -0
- mlrun/projects/pipelines.py +12 -3
- mlrun/projects/project.py +30 -0
- mlrun/runtimes/daskjob.py +2 -0
- mlrun/runtimes/kubejob.py +4 -0
- mlrun/runtimes/mpijob/abstract.py +2 -0
- mlrun/runtimes/mpijob/v1.py +2 -0
- mlrun/runtimes/nuclio/function.py +2 -0
- mlrun/runtimes/nuclio/serving.py +59 -0
- mlrun/runtimes/pod.py +3 -0
- mlrun/runtimes/remotesparkjob.py +2 -0
- mlrun/runtimes/sparkjob/spark3job.py +2 -0
- mlrun/serving/server.py +97 -3
- mlrun/serving/states.py +146 -38
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.10.0rc10.dist-info → mlrun-1.10.0rc11.dist-info}/METADATA +13 -6
- {mlrun-1.10.0rc10.dist-info → mlrun-1.10.0rc11.dist-info}/RECORD +49 -51
- mlrun/db/sql_types.py +0 -160
- mlrun/utils/db.py +0 -71
- {mlrun-1.10.0rc10.dist-info → mlrun-1.10.0rc11.dist-info}/WHEEL +0 -0
- {mlrun-1.10.0rc10.dist-info → mlrun-1.10.0rc11.dist-info}/entry_points.txt +0 -0
- {mlrun-1.10.0rc10.dist-info → mlrun-1.10.0rc11.dist-info}/licenses/LICENSE +0 -0
- {mlrun-1.10.0rc10.dist-info → mlrun-1.10.0rc11.dist-info}/top_level.txt +0 -0
mlrun/artifacts/manager.py
CHANGED
mlrun/common/constants.py
CHANGED
|
@@ -27,6 +27,10 @@ DASK_LABEL_PREFIX = "dask.org/"
|
|
|
27
27
|
NUCLIO_LABEL_PREFIX = "nuclio.io/"
|
|
28
28
|
RESERVED_TAG_NAME_LATEST = "latest"
|
|
29
29
|
|
|
30
|
+
JOB_TYPE_WORKFLOW_RUNNER = "workflow-runner"
|
|
31
|
+
JOB_TYPE_PROJECT_LOADER = "project-loader"
|
|
32
|
+
JOB_TYPE_RERUN_WORKFLOW_RUNNER = "rerun-workflow-runner"
|
|
33
|
+
|
|
30
34
|
|
|
31
35
|
class MLRunInternalLabels:
|
|
32
36
|
### dask
|
|
@@ -76,7 +80,9 @@ class MLRunInternalLabels:
|
|
|
76
80
|
kind = "kind"
|
|
77
81
|
component = "component"
|
|
78
82
|
mlrun_type = "mlrun__type"
|
|
83
|
+
rerun_of = "rerun-of"
|
|
79
84
|
original_workflow_id = "original-workflow-id"
|
|
85
|
+
workflow_id = "workflow-id"
|
|
80
86
|
|
|
81
87
|
owner = "owner"
|
|
82
88
|
v3io_user = "v3io_user"
|
|
@@ -102,3 +108,8 @@ class MLRunInternalLabels:
|
|
|
102
108
|
class DeployStatusTextKind(mlrun.common.types.StrEnum):
|
|
103
109
|
logs = "logs"
|
|
104
110
|
events = "events"
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
class WorkflowSubmitMode(mlrun.common.types.StrEnum):
|
|
114
|
+
direct = "direct" # call KFP retry API directly
|
|
115
|
+
rerun = "rerun" # launch a RerunRunner function
|
|
@@ -34,6 +34,7 @@ class FunctionSummary(BaseModel):
|
|
|
34
34
|
type: FunctionsType
|
|
35
35
|
name: str
|
|
36
36
|
application_class: str
|
|
37
|
+
project_name: str
|
|
37
38
|
updated_time: datetime
|
|
38
39
|
status: Optional[str] = None
|
|
39
40
|
base_period: Optional[int] = None
|
|
@@ -59,6 +60,7 @@ class FunctionSummary(BaseModel):
|
|
|
59
60
|
else func_dict["spec"]["graph"]["steps"]["PushToMonitoringWriter"]["after"][
|
|
60
61
|
0
|
|
61
62
|
],
|
|
63
|
+
project_name=func_dict["metadata"]["project"],
|
|
62
64
|
updated_time=func_dict["metadata"].get("updated"),
|
|
63
65
|
status=func_dict["status"].get("state"),
|
|
64
66
|
base_period=base_period,
|
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
import abc
|
|
15
15
|
import json
|
|
16
16
|
from datetime import datetime
|
|
17
|
-
from typing import Any, NamedTuple, Optional, TypeVar
|
|
17
|
+
from typing import Any, Literal, NamedTuple, Optional, TypeVar
|
|
18
18
|
from uuid import UUID
|
|
19
19
|
|
|
20
20
|
from pydantic import validator # use `validator` if you’re still on Pydantic v1
|
|
@@ -334,6 +334,24 @@ class ModelEndpointMonitoringMetricNoData(_ModelEndpointMonitoringMetricValuesBa
|
|
|
334
334
|
data: bool = False
|
|
335
335
|
|
|
336
336
|
|
|
337
|
+
class ApplicationBaseRecord(BaseModel):
|
|
338
|
+
type: Literal["metric", "result"]
|
|
339
|
+
time: datetime
|
|
340
|
+
value: float
|
|
341
|
+
|
|
342
|
+
|
|
343
|
+
class ApplicationResultRecord(ApplicationBaseRecord):
|
|
344
|
+
kind: ResultKindApp
|
|
345
|
+
status: ResultStatusApp
|
|
346
|
+
result_name: str
|
|
347
|
+
type: Literal["result"] = "result"
|
|
348
|
+
|
|
349
|
+
|
|
350
|
+
class ApplicationMetricRecord(ApplicationBaseRecord):
|
|
351
|
+
metric_name: str
|
|
352
|
+
type: Literal["metric"] = "metric"
|
|
353
|
+
|
|
354
|
+
|
|
337
355
|
def _mapping_attributes(
|
|
338
356
|
model_class: type[Model],
|
|
339
357
|
flattened_dictionary: dict,
|
mlrun/common/schemas/serving.py
CHANGED
mlrun/common/schemas/workflow.py
CHANGED
|
@@ -47,8 +47,9 @@ class WorkflowRequest(pydantic.v1.BaseModel):
|
|
|
47
47
|
|
|
48
48
|
|
|
49
49
|
class RerunWorkflowRequest(pydantic.v1.BaseModel):
|
|
50
|
-
run_name: typing.Optional[str] =
|
|
51
|
-
run_id: typing.Optional[str] =
|
|
50
|
+
run_name: typing.Optional[str] = None
|
|
51
|
+
run_id: typing.Optional[str] = None
|
|
52
|
+
original_workflow_id: typing.Optional[str] = None
|
|
52
53
|
notifications: typing.Optional[list[Notification]] = None
|
|
53
54
|
workflow_runner_node_selector: typing.Optional[dict[str, str]] = None
|
|
54
55
|
|
mlrun/datastore/azure_blob.py
CHANGED
|
@@ -224,7 +224,7 @@ class AzureBlobStore(DataStore):
|
|
|
224
224
|
path = self._convert_key_to_remote_path(key=path)
|
|
225
225
|
super().rm(path=path, recursive=recursive, maxdepth=maxdepth)
|
|
226
226
|
|
|
227
|
-
def get_spark_options(self):
|
|
227
|
+
def get_spark_options(self, path=None):
|
|
228
228
|
res = {}
|
|
229
229
|
st = self.storage_options
|
|
230
230
|
service = "blob"
|
mlrun/datastore/base.py
CHANGED
|
@@ -48,7 +48,9 @@ class FileStats:
|
|
|
48
48
|
class DataStore:
|
|
49
49
|
using_bucket = False
|
|
50
50
|
|
|
51
|
-
def __init__(
|
|
51
|
+
def __init__(
|
|
52
|
+
self, parent, name, kind, endpoint="", secrets: Optional[dict] = None, **kwargs
|
|
53
|
+
):
|
|
52
54
|
self._parent = parent
|
|
53
55
|
self.kind = kind
|
|
54
56
|
self.name = name
|
|
@@ -176,7 +178,7 @@ class DataStore:
|
|
|
176
178
|
def upload(self, key, src_path):
|
|
177
179
|
pass
|
|
178
180
|
|
|
179
|
-
def get_spark_options(self):
|
|
181
|
+
def get_spark_options(self, path=None):
|
|
180
182
|
return {}
|
|
181
183
|
|
|
182
184
|
@staticmethod
|
mlrun/datastore/datastore.py
CHANGED
|
@@ -11,6 +11,7 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
+
import warnings
|
|
14
15
|
from typing import Optional
|
|
15
16
|
from urllib.parse import urlparse
|
|
16
17
|
|
|
@@ -105,8 +106,7 @@ def schema_to_store(schema) -> DataStore.__subclasses__():
|
|
|
105
106
|
from .alibaba_oss import OSSStore
|
|
106
107
|
|
|
107
108
|
return OSSStore
|
|
108
|
-
|
|
109
|
-
raise ValueError(f"unsupported store scheme ({schema})")
|
|
109
|
+
raise ValueError(f"unsupported store scheme ({schema})")
|
|
110
110
|
|
|
111
111
|
|
|
112
112
|
def uri_to_ipython(link):
|
|
@@ -210,12 +210,20 @@ class StoreManager:
|
|
|
210
210
|
artifact_url=artifact_url,
|
|
211
211
|
)
|
|
212
212
|
|
|
213
|
-
def
|
|
214
|
-
self,
|
|
213
|
+
def _get_or_create_remote_client(
|
|
214
|
+
self,
|
|
215
|
+
url,
|
|
216
|
+
secrets: Optional[dict] = None,
|
|
217
|
+
project_name="",
|
|
218
|
+
cache: Optional[dict] = None,
|
|
219
|
+
schema_to_class: callable = schema_to_store,
|
|
220
|
+
**kwargs,
|
|
215
221
|
) -> (DataStore, str, str):
|
|
222
|
+
# The cache can be an empty dictionary ({}), even if it is a _stores object
|
|
223
|
+
cache = cache if cache is not None else {}
|
|
216
224
|
schema, endpoint, parsed_url = parse_url(url)
|
|
217
225
|
subpath = parsed_url.path
|
|
218
|
-
|
|
226
|
+
cache_key = f"{schema}://{endpoint}" if endpoint else f"{schema}://"
|
|
219
227
|
|
|
220
228
|
if schema == "ds":
|
|
221
229
|
datastore_profile = datastore_profile_read(url, project_name, secrets)
|
|
@@ -237,24 +245,48 @@ class StoreManager:
|
|
|
237
245
|
subpath = url.replace("file://", "", 1)
|
|
238
246
|
|
|
239
247
|
if not schema and endpoint:
|
|
240
|
-
if endpoint in
|
|
241
|
-
return
|
|
248
|
+
if endpoint in cache.keys():
|
|
249
|
+
return cache[endpoint], subpath, url
|
|
242
250
|
else:
|
|
243
251
|
raise ValueError(f"no such store ({endpoint})")
|
|
244
252
|
|
|
245
253
|
if not secrets and not mlrun.config.is_running_as_api():
|
|
246
|
-
if
|
|
247
|
-
return
|
|
254
|
+
if cache_key in cache.keys():
|
|
255
|
+
return cache[cache_key], subpath, url
|
|
248
256
|
|
|
249
257
|
# support u/p embedding in url (as done in redis) by setting netloc as the "endpoint" parameter
|
|
250
258
|
# when running on server we don't cache the datastore, because there are multiple users and we don't want to
|
|
251
259
|
# cache the credentials, so for each new request we create a new store
|
|
252
|
-
|
|
253
|
-
|
|
260
|
+
remote_client_class = schema_to_class(schema)
|
|
261
|
+
remote_client = None
|
|
262
|
+
if remote_client_class:
|
|
263
|
+
remote_client = remote_client_class(
|
|
264
|
+
self, schema, cache_key, parsed_url.netloc, secrets=secrets, **kwargs
|
|
265
|
+
)
|
|
266
|
+
if not secrets and not mlrun.config.is_running_as_api():
|
|
267
|
+
cache[cache_key] = remote_client
|
|
268
|
+
else:
|
|
269
|
+
warnings.warn("scheme not found. Returning None")
|
|
270
|
+
return remote_client, subpath, url
|
|
271
|
+
|
|
272
|
+
def get_or_create_store(
|
|
273
|
+
self,
|
|
274
|
+
url,
|
|
275
|
+
secrets: Optional[dict] = None,
|
|
276
|
+
project_name="",
|
|
277
|
+
) -> (DataStore, str, str):
|
|
278
|
+
datastore, sub_path, url = self._get_or_create_remote_client(
|
|
279
|
+
url=url,
|
|
280
|
+
secrets=secrets,
|
|
281
|
+
project_name=project_name,
|
|
282
|
+
cache=self._stores,
|
|
283
|
+
schema_to_class=schema_to_store,
|
|
254
284
|
)
|
|
255
|
-
if not
|
|
256
|
-
|
|
257
|
-
|
|
285
|
+
if not isinstance(datastore, DataStore):
|
|
286
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
287
|
+
"remote client by url is not datastore"
|
|
288
|
+
)
|
|
289
|
+
return datastore, sub_path, url
|
|
258
290
|
|
|
259
291
|
def reset_secrets(self):
|
|
260
292
|
self._secrets = {}
|
|
@@ -194,7 +194,7 @@ class GoogleCloudStorageStore(DataStore):
|
|
|
194
194
|
self.filesystem.exists(path)
|
|
195
195
|
super().rm(path, recursive=recursive, maxdepth=maxdepth)
|
|
196
196
|
|
|
197
|
-
def get_spark_options(self):
|
|
197
|
+
def get_spark_options(self, path=None):
|
|
198
198
|
res = {}
|
|
199
199
|
st = self._get_credentials()
|
|
200
200
|
if "token" in st:
|
mlrun/datastore/s3.py
CHANGED
|
@@ -14,6 +14,7 @@
|
|
|
14
14
|
|
|
15
15
|
import time
|
|
16
16
|
from typing import Optional
|
|
17
|
+
from urllib.parse import urlparse
|
|
17
18
|
|
|
18
19
|
import boto3
|
|
19
20
|
from boto3.s3.transfer import TransferConfig
|
|
@@ -115,17 +116,27 @@ class S3Store(DataStore):
|
|
|
115
116
|
byterange += str(offset + size - 1)
|
|
116
117
|
return byterange
|
|
117
118
|
|
|
118
|
-
def get_spark_options(self):
|
|
119
|
+
def get_spark_options(self, path=None):
|
|
119
120
|
res = {}
|
|
121
|
+
bucket_str = ""
|
|
122
|
+
if path:
|
|
123
|
+
parsed = urlparse(path)
|
|
124
|
+
if parsed.scheme: # s3:// or s3a://
|
|
125
|
+
bucket = parsed.hostname
|
|
126
|
+
else:
|
|
127
|
+
# drop a leading slash, if any and take 1st segment
|
|
128
|
+
bucket = path.lstrip("/").split("/", 1)[0]
|
|
129
|
+
bucket_str = f".bucket.{bucket}"
|
|
130
|
+
|
|
120
131
|
st = self.get_storage_options()
|
|
121
132
|
if st.get("key"):
|
|
122
|
-
res["spark.hadoop.fs.s3a.access.key"] = st.get("key")
|
|
133
|
+
res[f"spark.hadoop.fs.s3a{bucket_str}.access.key"] = st.get("key")
|
|
123
134
|
if st.get("secret"):
|
|
124
|
-
res["spark.hadoop.fs.s3a.secret.key"] = st.get("secret")
|
|
135
|
+
res[f"spark.hadoop.fs.s3a{bucket_str}.secret.key"] = st.get("secret")
|
|
125
136
|
if st.get("endpoint_url"):
|
|
126
|
-
res["spark.hadoop.fs.s3a.endpoint"] = st.get("endpoint_url")
|
|
137
|
+
res[f"spark.hadoop.fs.s3a{bucket_str}.endpoint"] = st.get("endpoint_url")
|
|
127
138
|
if st.get("profile"):
|
|
128
|
-
res["spark.hadoop.fs.s3a.aws.profile"] = st.get("profile")
|
|
139
|
+
res[f"spark.hadoop.fs.s3a{bucket_str}.aws.profile"] = st.get("profile")
|
|
129
140
|
return res
|
|
130
141
|
|
|
131
142
|
@property
|
mlrun/datastore/sources.py
CHANGED
|
@@ -220,7 +220,7 @@ class CSVSource(BaseSourceDriver):
|
|
|
220
220
|
|
|
221
221
|
def get_spark_options(self):
|
|
222
222
|
store, path, _ = mlrun.store_manager.get_or_create_store(self.path)
|
|
223
|
-
spark_options = store.get_spark_options()
|
|
223
|
+
spark_options = store.get_spark_options(store.spark_url + path)
|
|
224
224
|
spark_options.update(
|
|
225
225
|
{
|
|
226
226
|
"path": store.spark_url + path,
|
|
@@ -407,7 +407,7 @@ class ParquetSource(BaseSourceDriver):
|
|
|
407
407
|
|
|
408
408
|
def get_spark_options(self):
|
|
409
409
|
store, path, _ = mlrun.store_manager.get_or_create_store(self.path)
|
|
410
|
-
spark_options = store.get_spark_options()
|
|
410
|
+
spark_options = store.get_spark_options(store.spark_url + path)
|
|
411
411
|
spark_options.update(
|
|
412
412
|
{
|
|
413
413
|
"path": store.spark_url + path,
|
mlrun/datastore/targets.py
CHANGED
|
@@ -970,7 +970,7 @@ class ParquetTarget(BaseStoreTarget):
|
|
|
970
970
|
break
|
|
971
971
|
|
|
972
972
|
store, path, url = self._get_store_and_path()
|
|
973
|
-
spark_options = store.get_spark_options()
|
|
973
|
+
spark_options = store.get_spark_options(store.spark_url + path)
|
|
974
974
|
spark_options.update(
|
|
975
975
|
{
|
|
976
976
|
"path": store.spark_url + path,
|
|
@@ -1104,7 +1104,7 @@ class CSVTarget(BaseStoreTarget):
|
|
|
1104
1104
|
|
|
1105
1105
|
def get_spark_options(self, key_column=None, timestamp_key=None, overwrite=True):
|
|
1106
1106
|
store, path, url = self._get_store_and_path()
|
|
1107
|
-
spark_options = store.get_spark_options()
|
|
1107
|
+
spark_options = store.get_spark_options(store.spark_url + path)
|
|
1108
1108
|
spark_options.update(
|
|
1109
1109
|
{
|
|
1110
1110
|
"path": store.spark_url + path,
|
mlrun/db/__init__.py
CHANGED
mlrun/db/base.py
CHANGED
|
@@ -645,6 +645,7 @@ class RunDBInterface(ABC):
|
|
|
645
645
|
project: str,
|
|
646
646
|
namespace: Optional[str] = None,
|
|
647
647
|
timeout: int = 30,
|
|
648
|
+
submit_mode: str = "",
|
|
648
649
|
):
|
|
649
650
|
pass
|
|
650
651
|
|
|
@@ -1130,6 +1131,17 @@ class RunDBInterface(ABC):
|
|
|
1130
1131
|
) -> list[mlrun.common.schemas.model_monitoring.FunctionSummary]:
|
|
1131
1132
|
pass
|
|
1132
1133
|
|
|
1134
|
+
@abstractmethod
|
|
1135
|
+
def get_monitoring_function_summary(
|
|
1136
|
+
self,
|
|
1137
|
+
project: str,
|
|
1138
|
+
function_name: str,
|
|
1139
|
+
start: Optional[datetime.datetime] = None,
|
|
1140
|
+
end: Optional[datetime.datetime] = None,
|
|
1141
|
+
include_latest_metrics: bool = False,
|
|
1142
|
+
) -> mlrun.common.schemas.model_monitoring.FunctionSummary:
|
|
1143
|
+
pass
|
|
1144
|
+
|
|
1133
1145
|
@abstractmethod
|
|
1134
1146
|
def get_project_summary(self, project: str) -> mlrun.common.schemas.ProjectSummary:
|
|
1135
1147
|
pass
|
mlrun/db/httpdb.py
CHANGED
|
@@ -2350,6 +2350,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
2350
2350
|
project: str,
|
|
2351
2351
|
namespace: Optional[str] = None,
|
|
2352
2352
|
timeout: int = 30,
|
|
2353
|
+
submit_mode: str = "",
|
|
2353
2354
|
):
|
|
2354
2355
|
"""
|
|
2355
2356
|
Retry a specific pipeline run using its run ID. This function sends an API request
|
|
@@ -2359,6 +2360,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
2359
2360
|
:param namespace: Kubernetes namespace where the pipeline is running. Optional.
|
|
2360
2361
|
:param timeout: Timeout (in seconds) for the API call. Defaults to 30 seconds.
|
|
2361
2362
|
:param project: Name of the MLRun project associated with the pipeline.
|
|
2363
|
+
:param submit_mode: Whether to submit the pipeline directly to the API.
|
|
2362
2364
|
|
|
2363
2365
|
:raises ValueError: Raised if the API response is not successful or contains an
|
|
2364
2366
|
error.
|
|
@@ -2370,6 +2372,9 @@ class HTTPRunDB(RunDBInterface):
|
|
|
2370
2372
|
if namespace:
|
|
2371
2373
|
params["namespace"] = namespace
|
|
2372
2374
|
|
|
2375
|
+
if submit_mode:
|
|
2376
|
+
params["submit-mode"] = submit_mode
|
|
2377
|
+
|
|
2373
2378
|
resp_text = ""
|
|
2374
2379
|
resp_code = None
|
|
2375
2380
|
try:
|
|
@@ -4188,6 +4193,36 @@ class HTTPRunDB(RunDBInterface):
|
|
|
4188
4193
|
results.append(FunctionSummary(**item))
|
|
4189
4194
|
return results
|
|
4190
4195
|
|
|
4196
|
+
def get_monitoring_function_summary(
|
|
4197
|
+
self,
|
|
4198
|
+
project: str,
|
|
4199
|
+
function_name: str,
|
|
4200
|
+
start: Optional[datetime] = None,
|
|
4201
|
+
end: Optional[datetime] = None,
|
|
4202
|
+
include_latest_metrics: bool = False,
|
|
4203
|
+
) -> FunctionSummary:
|
|
4204
|
+
"""
|
|
4205
|
+
Get a monitoring function summary for the specified project and function.
|
|
4206
|
+
:param project: The name of the project.
|
|
4207
|
+
:param function_name: The name of the function.
|
|
4208
|
+
:param start: Start time for filtering the results (optional).
|
|
4209
|
+
:param end: End time for filtering the results (optional).
|
|
4210
|
+
:param include_latest_metrics: Whether to include the latest metrics in the response (default is False).
|
|
4211
|
+
|
|
4212
|
+
:return: A FunctionSummary object containing information about the monitoring function.
|
|
4213
|
+
"""
|
|
4214
|
+
|
|
4215
|
+
response = self.api_call(
|
|
4216
|
+
method=mlrun.common.types.HTTPMethod.GET,
|
|
4217
|
+
path=f"projects/{project}/model-monitoring/function-summaries/{function_name}",
|
|
4218
|
+
params={
|
|
4219
|
+
"start": datetime_to_iso(start),
|
|
4220
|
+
"end": datetime_to_iso(end),
|
|
4221
|
+
"include-latest-metrics": include_latest_metrics,
|
|
4222
|
+
},
|
|
4223
|
+
)
|
|
4224
|
+
return FunctionSummary(**response.json())
|
|
4225
|
+
|
|
4191
4226
|
def create_hub_source(
|
|
4192
4227
|
self, source: Union[dict, mlrun.common.schemas.IndexedHubSource]
|
|
4193
4228
|
):
|
mlrun/db/nopdb.py
CHANGED
|
@@ -902,6 +902,16 @@ class NopDB(RunDBInterface):
|
|
|
902
902
|
) -> [mlrun.common.schemas.model_monitoring.FunctionSummary]:
|
|
903
903
|
pass
|
|
904
904
|
|
|
905
|
+
def get_monitoring_function_summary(
|
|
906
|
+
self,
|
|
907
|
+
project: str,
|
|
908
|
+
function_name: str,
|
|
909
|
+
start: Optional[datetime.datetime] = None,
|
|
910
|
+
end: Optional[datetime.datetime] = None,
|
|
911
|
+
include_latest_metrics: bool = False,
|
|
912
|
+
) -> mlrun.common.schemas.model_monitoring.FunctionSummary:
|
|
913
|
+
pass
|
|
914
|
+
|
|
905
915
|
def generate_event(
|
|
906
916
|
self, name: str, event_data: Union[dict, mlrun.common.schemas.Event], project=""
|
|
907
917
|
):
|
mlrun/execution.py
CHANGED
|
@@ -1286,6 +1286,18 @@ class MLClientCtx:
|
|
|
1286
1286
|
self.to_dict(), self._uid, self.project, iter=self._iteration
|
|
1287
1287
|
)
|
|
1288
1288
|
|
|
1289
|
+
def update_run(self):
|
|
1290
|
+
"""
|
|
1291
|
+
Store the run object in the DB - removes missing fields.
|
|
1292
|
+
Use _update_run for coherent updates.
|
|
1293
|
+
Should be called by the logging worker only (see is_logging_worker()).
|
|
1294
|
+
"""
|
|
1295
|
+
self._write_tmpfile()
|
|
1296
|
+
if self._rundb:
|
|
1297
|
+
self._rundb.update_run(
|
|
1298
|
+
self.to_dict(), self._uid, self.project, iter=self._iteration
|
|
1299
|
+
)
|
|
1300
|
+
|
|
1289
1301
|
def is_logging_worker(self):
|
|
1290
1302
|
"""
|
|
1291
1303
|
Check if the current worker is the logging worker.
|
|
@@ -107,14 +107,10 @@ class TFKerasMLRunInterface(MLRunInterface, ABC):
|
|
|
107
107
|
)
|
|
108
108
|
|
|
109
109
|
# Call the pre compile method:
|
|
110
|
-
|
|
111
|
-
optimizer=kwargs["optimizer"]
|
|
112
|
-
)
|
|
110
|
+
optimizer = self._pre_compile(optimizer=kwargs["optimizer"])
|
|
113
111
|
|
|
114
112
|
# Assign parameters:
|
|
115
113
|
kwargs["optimizer"] = optimizer
|
|
116
|
-
if experimental_run_tf_function is not None:
|
|
117
|
-
kwargs["experimental_run_tf_function"] = experimental_run_tf_function
|
|
118
114
|
|
|
119
115
|
# Call the original compile method:
|
|
120
116
|
return self.original_compile(*args, **kwargs)
|
|
@@ -235,23 +231,20 @@ class TFKerasMLRunInterface(MLRunInterface, ABC):
|
|
|
235
231
|
"""
|
|
236
232
|
self._RANK_0_ONLY_CALLBACKS.add(callback_name)
|
|
237
233
|
|
|
238
|
-
def _pre_compile(self, optimizer: Optimizer) ->
|
|
234
|
+
def _pre_compile(self, optimizer: Optimizer) -> Optimizer:
|
|
239
235
|
"""
|
|
240
236
|
Method to call before calling 'compile' to setup the run and inputs for using horovod.
|
|
241
237
|
|
|
242
238
|
:param optimizer: The optimzier to compile. It will be wrapped in horovod's distributed optimizer:
|
|
243
239
|
'hvd.DistributedOptimizer'.
|
|
244
240
|
|
|
245
|
-
:return: The updated
|
|
246
|
-
[0] = Wrapped optimizer.
|
|
247
|
-
[1] = The 'experimental_run_tf_function' parameter for 'compile' kwargs or 'None' if horovod should not
|
|
248
|
-
be used.
|
|
241
|
+
:return: The updated Wrapped optimizer.
|
|
249
242
|
|
|
250
243
|
:raise MLRunInvalidArgumentError: In case the optimizer was passed as a string.
|
|
251
244
|
"""
|
|
252
245
|
# Check if needed to run with horovod:
|
|
253
246
|
if self._hvd is None:
|
|
254
|
-
return optimizer
|
|
247
|
+
return optimizer
|
|
255
248
|
|
|
256
249
|
# Validate the optimizer input:
|
|
257
250
|
if isinstance(optimizer, str):
|
|
@@ -281,18 +274,14 @@ class TFKerasMLRunInterface(MLRunInterface, ABC):
|
|
|
281
274
|
|
|
282
275
|
# Adjust learning rate based on the number of GPUs:
|
|
283
276
|
if hasattr(optimizer, "lr"):
|
|
284
|
-
optimizer.lr
|
|
277
|
+
optimizer.lr = optimizer.lr * self._hvd.size()
|
|
285
278
|
else:
|
|
286
|
-
optimizer.learning_rate
|
|
279
|
+
optimizer.learning_rate = optimizer.learning_rate * self._hvd.size()
|
|
287
280
|
|
|
288
281
|
# Wrap the optimizer in horovod's distributed optimizer: 'hvd.DistributedOptimizer'.
|
|
289
282
|
optimizer = self._hvd.DistributedOptimizer(optimizer)
|
|
290
283
|
|
|
291
|
-
|
|
292
|
-
# optimizer to compute the gradients:
|
|
293
|
-
experimental_run_tf_function = False
|
|
294
|
-
|
|
295
|
-
return optimizer, experimental_run_tf_function
|
|
284
|
+
return optimizer
|
|
296
285
|
|
|
297
286
|
def _pre_fit(
|
|
298
287
|
self,
|
mlrun/launcher/base.py
CHANGED
mlrun/launcher/client.py
CHANGED
|
@@ -36,6 +36,7 @@ class ClientBaseLauncher(launcher.BaseLauncher, abc.ABC):
|
|
|
36
36
|
runtime: "mlrun.runtimes.base.BaseRuntime",
|
|
37
37
|
project_name: Optional[str] = "",
|
|
38
38
|
full: bool = True,
|
|
39
|
+
client_version: str = "",
|
|
39
40
|
):
|
|
40
41
|
runtime.try_auto_mount_based_on_config()
|
|
41
42
|
runtime._fill_credentials()
|
mlrun/launcher/local.py
CHANGED
|
@@ -13,6 +13,7 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
import os
|
|
15
15
|
import pathlib
|
|
16
|
+
from os import environ
|
|
16
17
|
from typing import Callable, Optional, Union
|
|
17
18
|
|
|
18
19
|
import mlrun.common.constants as mlrun_constants
|
|
@@ -251,6 +252,9 @@ class ClientLocalLauncher(launcher.ClientBaseLauncher):
|
|
|
251
252
|
# copy the code/base-spec to the local function (for the UI and code logging)
|
|
252
253
|
fn.spec.description = runtime.spec.description
|
|
253
254
|
fn.spec.build = runtime.spec.build
|
|
255
|
+
serving_spec = getattr(runtime.spec, "serving_spec", None)
|
|
256
|
+
if serving_spec:
|
|
257
|
+
environ["SERVING_SPEC_ENV"] = serving_spec
|
|
254
258
|
|
|
255
259
|
run.spec.handler = handler
|
|
256
260
|
run.spec.reset_on_run = reset_on_run
|
mlrun/model.py
CHANGED
|
@@ -19,7 +19,7 @@ import re
|
|
|
19
19
|
import time
|
|
20
20
|
import typing
|
|
21
21
|
from collections import OrderedDict
|
|
22
|
-
from copy import deepcopy
|
|
22
|
+
from copy import copy, deepcopy
|
|
23
23
|
from datetime import datetime
|
|
24
24
|
from os import environ
|
|
25
25
|
from typing import Any, Optional, Union
|
|
@@ -221,15 +221,26 @@ class ModelObj:
|
|
|
221
221
|
|
|
222
222
|
@classmethod
|
|
223
223
|
def from_dict(
|
|
224
|
-
cls,
|
|
224
|
+
cls,
|
|
225
|
+
struct=None,
|
|
226
|
+
fields=None,
|
|
227
|
+
deprecated_fields: Optional[dict] = None,
|
|
228
|
+
init_with_params: bool = False,
|
|
225
229
|
):
|
|
226
230
|
"""create an object from a python dictionary"""
|
|
227
|
-
struct = {} if struct is None else struct
|
|
231
|
+
struct = {} if struct is None else copy(struct)
|
|
228
232
|
deprecated_fields = deprecated_fields or {}
|
|
229
233
|
fields = fields or cls._dict_fields
|
|
230
234
|
if not fields:
|
|
231
235
|
fields = list(inspect.signature(cls.__init__).parameters.keys())
|
|
232
|
-
|
|
236
|
+
|
|
237
|
+
if init_with_params:
|
|
238
|
+
kwargs = {field: struct.pop(field, None) for field in fields}
|
|
239
|
+
kwargs.pop("self", None)
|
|
240
|
+
new_obj = cls(**kwargs)
|
|
241
|
+
else:
|
|
242
|
+
new_obj = cls()
|
|
243
|
+
|
|
233
244
|
if struct:
|
|
234
245
|
# we are looping over the fields to save the same order and behavior in which the class
|
|
235
246
|
# initialize the attributes
|