mlrun 1.6.2rc6__py3-none-any.whl → 1.6.3rc3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/artifacts/model.py +28 -22
- mlrun/common/db/sql_session.py +3 -0
- mlrun/common/model_monitoring/helpers.py +4 -2
- mlrun/common/schemas/__init__.py +2 -0
- mlrun/common/schemas/common.py +40 -0
- mlrun/common/schemas/model_monitoring/__init__.py +1 -0
- mlrun/common/schemas/model_monitoring/constants.py +21 -5
- mlrun/common/schemas/project.py +2 -0
- mlrun/config.py +51 -20
- mlrun/data_types/data_types.py +4 -0
- mlrun/datastore/azure_blob.py +9 -9
- mlrun/datastore/base.py +22 -44
- mlrun/datastore/google_cloud_storage.py +6 -6
- mlrun/datastore/v3io.py +70 -46
- mlrun/db/base.py +18 -0
- mlrun/db/httpdb.py +41 -36
- mlrun/execution.py +3 -3
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +3 -3
- mlrun/frameworks/tf_keras/model_handler.py +7 -7
- mlrun/k8s_utils.py +10 -5
- mlrun/kfpops.py +19 -10
- mlrun/model.py +6 -0
- mlrun/model_monitoring/api.py +8 -8
- mlrun/model_monitoring/batch.py +1 -1
- mlrun/model_monitoring/controller.py +0 -7
- mlrun/model_monitoring/features_drift_table.py +6 -0
- mlrun/model_monitoring/helpers.py +4 -1
- mlrun/model_monitoring/stores/kv_model_endpoint_store.py +13 -13
- mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -1
- mlrun/model_monitoring/stream_processing.py +50 -36
- mlrun/package/packagers/pandas_packagers.py +3 -3
- mlrun/package/utils/_archiver.py +3 -1
- mlrun/platforms/iguazio.py +6 -65
- mlrun/projects/pipelines.py +29 -12
- mlrun/projects/project.py +69 -55
- mlrun/run.py +2 -0
- mlrun/runtimes/base.py +24 -1
- mlrun/runtimes/function.py +9 -9
- mlrun/runtimes/kubejob.py +5 -3
- mlrun/runtimes/local.py +2 -2
- mlrun/runtimes/mpijob/abstract.py +6 -6
- mlrun/runtimes/pod.py +3 -3
- mlrun/runtimes/serving.py +3 -3
- mlrun/runtimes/sparkjob/spark3job.py +3 -3
- mlrun/serving/remote.py +4 -2
- mlrun/utils/async_http.py +3 -3
- mlrun/utils/helpers.py +20 -0
- mlrun/utils/http.py +3 -3
- mlrun/utils/logger.py +2 -2
- mlrun/utils/notifications/notification_pusher.py +6 -6
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.6.2rc6.dist-info → mlrun-1.6.3rc3.dist-info}/METADATA +15 -17
- {mlrun-1.6.2rc6.dist-info → mlrun-1.6.3rc3.dist-info}/RECORD +57 -56
- {mlrun-1.6.2rc6.dist-info → mlrun-1.6.3rc3.dist-info}/LICENSE +0 -0
- {mlrun-1.6.2rc6.dist-info → mlrun-1.6.3rc3.dist-info}/WHEEL +0 -0
- {mlrun-1.6.2rc6.dist-info → mlrun-1.6.3rc3.dist-info}/entry_points.txt +0 -0
- {mlrun-1.6.2rc6.dist-info → mlrun-1.6.3rc3.dist-info}/top_level.txt +0 -0
mlrun/artifacts/model.py
CHANGED
|
@@ -13,8 +13,9 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
import tempfile
|
|
15
15
|
from os import path
|
|
16
|
-
from typing import
|
|
16
|
+
from typing import Any
|
|
17
17
|
|
|
18
|
+
import pandas as pd
|
|
18
19
|
import yaml
|
|
19
20
|
from deprecated import deprecated
|
|
20
21
|
|
|
@@ -68,8 +69,8 @@ class ModelArtifactSpec(ArtifactSpec):
|
|
|
68
69
|
model_file=None,
|
|
69
70
|
metrics=None,
|
|
70
71
|
paraemeters=None,
|
|
71
|
-
inputs:
|
|
72
|
-
outputs:
|
|
72
|
+
inputs: list[Feature] = None,
|
|
73
|
+
outputs: list[Feature] = None,
|
|
73
74
|
framework=None,
|
|
74
75
|
algorithm=None,
|
|
75
76
|
feature_vector=None,
|
|
@@ -91,8 +92,8 @@ class ModelArtifactSpec(ArtifactSpec):
|
|
|
91
92
|
self.model_file = model_file
|
|
92
93
|
self.metrics = metrics or {}
|
|
93
94
|
self.parameters = paraemeters or {}
|
|
94
|
-
self.inputs:
|
|
95
|
-
self.outputs:
|
|
95
|
+
self.inputs: list[Feature] = inputs or []
|
|
96
|
+
self.outputs: list[Feature] = outputs or []
|
|
96
97
|
self.framework = framework
|
|
97
98
|
self.algorithm = algorithm
|
|
98
99
|
self.feature_vector = feature_vector
|
|
@@ -101,21 +102,21 @@ class ModelArtifactSpec(ArtifactSpec):
|
|
|
101
102
|
self.model_target_file = model_target_file
|
|
102
103
|
|
|
103
104
|
@property
|
|
104
|
-
def inputs(self) ->
|
|
105
|
+
def inputs(self) -> list[Feature]:
|
|
105
106
|
"""input feature list"""
|
|
106
107
|
return self._inputs
|
|
107
108
|
|
|
108
109
|
@inputs.setter
|
|
109
|
-
def inputs(self, inputs:
|
|
110
|
+
def inputs(self, inputs: list[Feature]):
|
|
110
111
|
self._inputs = ObjectList.from_list(Feature, inputs)
|
|
111
112
|
|
|
112
113
|
@property
|
|
113
|
-
def outputs(self) ->
|
|
114
|
+
def outputs(self) -> list[Feature]:
|
|
114
115
|
"""output feature list"""
|
|
115
116
|
return self._outputs
|
|
116
117
|
|
|
117
118
|
@outputs.setter
|
|
118
|
-
def outputs(self, outputs:
|
|
119
|
+
def outputs(self, outputs: list[Feature]):
|
|
119
120
|
self._outputs = ObjectList.from_list(Feature, outputs)
|
|
120
121
|
|
|
121
122
|
|
|
@@ -175,22 +176,22 @@ class ModelArtifact(Artifact):
|
|
|
175
176
|
self._spec = self._verify_dict(spec, "spec", ModelArtifactSpec)
|
|
176
177
|
|
|
177
178
|
@property
|
|
178
|
-
def inputs(self) ->
|
|
179
|
+
def inputs(self) -> list[Feature]:
|
|
179
180
|
"""input feature list"""
|
|
180
181
|
return self.spec.inputs
|
|
181
182
|
|
|
182
183
|
@inputs.setter
|
|
183
|
-
def inputs(self, inputs:
|
|
184
|
+
def inputs(self, inputs: list[Feature]):
|
|
184
185
|
"""input feature list"""
|
|
185
186
|
self.spec.inputs = inputs
|
|
186
187
|
|
|
187
188
|
@property
|
|
188
|
-
def outputs(self) ->
|
|
189
|
+
def outputs(self) -> list[Feature]:
|
|
189
190
|
"""input feature list"""
|
|
190
191
|
return self.spec.outputs
|
|
191
192
|
|
|
192
193
|
@outputs.setter
|
|
193
|
-
def outputs(self, outputs:
|
|
194
|
+
def outputs(self, outputs: list[Feature]):
|
|
194
195
|
"""input feature list"""
|
|
195
196
|
self.spec.outputs = outputs
|
|
196
197
|
|
|
@@ -260,6 +261,7 @@ class ModelArtifact(Artifact):
|
|
|
260
261
|
"""
|
|
261
262
|
subset = df
|
|
262
263
|
inferer = get_infer_interface(subset)
|
|
264
|
+
numeric_columns = self._extract_numeric_features(df)
|
|
263
265
|
if label_columns:
|
|
264
266
|
if not isinstance(label_columns, list):
|
|
265
267
|
label_columns = [label_columns]
|
|
@@ -273,9 +275,13 @@ class ModelArtifact(Artifact):
|
|
|
273
275
|
)
|
|
274
276
|
if with_stats:
|
|
275
277
|
self.spec.feature_stats = inferer.get_stats(
|
|
276
|
-
df, options=InferOptions.Histogram, num_bins=num_bins
|
|
278
|
+
df[numeric_columns], options=InferOptions.Histogram, num_bins=num_bins
|
|
277
279
|
)
|
|
278
280
|
|
|
281
|
+
@staticmethod
|
|
282
|
+
def _extract_numeric_features(df: pd.DataFrame) -> list[Any]:
|
|
283
|
+
return [col for col in df.columns if pd.api.types.is_numeric_dtype(df[col])]
|
|
284
|
+
|
|
279
285
|
@property
|
|
280
286
|
def is_dir(self):
|
|
281
287
|
return True
|
|
@@ -445,8 +451,8 @@ class LegacyModelArtifact(LegacyArtifact):
|
|
|
445
451
|
self.model_file = model_file
|
|
446
452
|
self.parameters = parameters or {}
|
|
447
453
|
self.metrics = metrics or {}
|
|
448
|
-
self.inputs:
|
|
449
|
-
self.outputs:
|
|
454
|
+
self.inputs: list[Feature] = inputs or []
|
|
455
|
+
self.outputs: list[Feature] = outputs or []
|
|
450
456
|
self.extra_data = extra_data or {}
|
|
451
457
|
self.framework = framework
|
|
452
458
|
self.algorithm = algorithm
|
|
@@ -456,21 +462,21 @@ class LegacyModelArtifact(LegacyArtifact):
|
|
|
456
462
|
self.model_target_file = model_target_file
|
|
457
463
|
|
|
458
464
|
@property
|
|
459
|
-
def inputs(self) ->
|
|
465
|
+
def inputs(self) -> list[Feature]:
|
|
460
466
|
"""input feature list"""
|
|
461
467
|
return self._inputs
|
|
462
468
|
|
|
463
469
|
@inputs.setter
|
|
464
|
-
def inputs(self, inputs:
|
|
470
|
+
def inputs(self, inputs: list[Feature]):
|
|
465
471
|
self._inputs = ObjectList.from_list(Feature, inputs)
|
|
466
472
|
|
|
467
473
|
@property
|
|
468
|
-
def outputs(self) ->
|
|
474
|
+
def outputs(self) -> list[Feature]:
|
|
469
475
|
"""output feature list"""
|
|
470
476
|
return self._outputs
|
|
471
477
|
|
|
472
478
|
@outputs.setter
|
|
473
|
-
def outputs(self, outputs:
|
|
479
|
+
def outputs(self, outputs: list[Feature]):
|
|
474
480
|
self._outputs = ObjectList.from_list(Feature, outputs)
|
|
475
481
|
|
|
476
482
|
def infer_from_df(self, df, label_columns=None, with_stats=True, num_bins=None):
|
|
@@ -642,8 +648,8 @@ def update_model(
|
|
|
642
648
|
parameters: dict = None,
|
|
643
649
|
metrics: dict = None,
|
|
644
650
|
extra_data: dict = None,
|
|
645
|
-
inputs:
|
|
646
|
-
outputs:
|
|
651
|
+
inputs: list[Feature] = None,
|
|
652
|
+
outputs: list[Feature] = None,
|
|
647
653
|
feature_vector: str = None,
|
|
648
654
|
feature_weights: list = None,
|
|
649
655
|
key_prefix: str = "",
|
mlrun/common/db/sql_session.py
CHANGED
|
@@ -63,9 +63,12 @@ def _init_engine(dsn=None):
|
|
|
63
63
|
max_overflow = config.httpdb.db.connections_pool_max_overflow
|
|
64
64
|
if max_overflow is None:
|
|
65
65
|
max_overflow = config.httpdb.max_workers
|
|
66
|
+
|
|
66
67
|
kwargs = {
|
|
67
68
|
"pool_size": pool_size,
|
|
68
69
|
"max_overflow": max_overflow,
|
|
70
|
+
"pool_pre_ping": config.httpdb.db.connections_pool_pre_ping,
|
|
71
|
+
"pool_recycle": config.httpdb.db.connections_pool_recycle,
|
|
69
72
|
}
|
|
70
73
|
engine = create_engine(dsn, **kwargs)
|
|
71
74
|
_engines[dsn] = engine
|
|
@@ -82,13 +82,15 @@ def parse_monitoring_stream_path(
|
|
|
82
82
|
if application_name is None:
|
|
83
83
|
stream_uri = (
|
|
84
84
|
mlrun.mlconf.model_endpoint_monitoring.default_http_sink.format(
|
|
85
|
-
project=project
|
|
85
|
+
project=project, namespace=mlrun.mlconf.namespace
|
|
86
86
|
)
|
|
87
87
|
)
|
|
88
88
|
else:
|
|
89
89
|
stream_uri = (
|
|
90
90
|
mlrun.mlconf.model_endpoint_monitoring.default_http_sink_app.format(
|
|
91
|
-
project=project,
|
|
91
|
+
project=project,
|
|
92
|
+
application_name=application_name,
|
|
93
|
+
namespace=mlrun.mlconf.namespace,
|
|
92
94
|
)
|
|
93
95
|
)
|
|
94
96
|
return stream_uri
|
mlrun/common/schemas/__init__.py
CHANGED
|
@@ -43,6 +43,7 @@ from .clusterization_spec import (
|
|
|
43
43
|
ClusterizationSpec,
|
|
44
44
|
WaitForChiefToReachOnlineStateFeatureFlag,
|
|
45
45
|
)
|
|
46
|
+
from .common import ImageBuilder
|
|
46
47
|
from .constants import (
|
|
47
48
|
APIStates,
|
|
48
49
|
ClusterizationRole,
|
|
@@ -113,6 +114,7 @@ from .model_monitoring import (
|
|
|
113
114
|
EventFieldType,
|
|
114
115
|
EventKeyMetrics,
|
|
115
116
|
Features,
|
|
117
|
+
FeatureSetFeatures,
|
|
116
118
|
FeatureValues,
|
|
117
119
|
GrafanaColumn,
|
|
118
120
|
GrafanaDataPoint,
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
# Copyright 2023 Iguazio
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
#
|
|
15
|
+
import typing
|
|
16
|
+
|
|
17
|
+
import pydantic
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class ImageBuilder(pydantic.BaseModel):
|
|
21
|
+
functionSourceCode: typing.Optional[str] = None
|
|
22
|
+
codeEntryType: typing.Optional[str] = None
|
|
23
|
+
codeEntryAttributes: typing.Optional[str] = None
|
|
24
|
+
source: typing.Optional[str] = None
|
|
25
|
+
code_origin: typing.Optional[str] = None
|
|
26
|
+
origin_filename: typing.Optional[str] = None
|
|
27
|
+
image: typing.Optional[str] = None
|
|
28
|
+
base_image: typing.Optional[str] = None
|
|
29
|
+
commands: typing.Optional[list] = None
|
|
30
|
+
extra: typing.Optional[str] = None
|
|
31
|
+
extra_args: typing.Optional[dict] = None
|
|
32
|
+
builder_env: typing.Optional[dict] = None
|
|
33
|
+
secret: typing.Optional[str] = None
|
|
34
|
+
registry: typing.Optional[str] = None
|
|
35
|
+
load_source_on_run: typing.Optional[bool] = None
|
|
36
|
+
with_mlrun: typing.Optional[bool] = None
|
|
37
|
+
auto_build: typing.Optional[bool] = None
|
|
38
|
+
build_pod: typing.Optional[str] = None
|
|
39
|
+
requirements: typing.Optional[list] = None
|
|
40
|
+
source_code_target_dir: typing.Optional[str] = None
|
|
@@ -77,6 +77,26 @@ class EventFieldType:
|
|
|
77
77
|
SAMPLE_PARQUET_PATH = "sample_parquet_path"
|
|
78
78
|
|
|
79
79
|
|
|
80
|
+
class MonitoringStrEnum(StrEnum):
|
|
81
|
+
@classmethod
|
|
82
|
+
def list(cls):
|
|
83
|
+
return list(map(lambda c: c.value, cls))
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
class FeatureSetFeatures(MonitoringStrEnum):
|
|
87
|
+
LATENCY = EventFieldType.LATENCY
|
|
88
|
+
ERROR_COUNT = EventFieldType.ERROR_COUNT
|
|
89
|
+
METRICS = EventFieldType.METRICS
|
|
90
|
+
|
|
91
|
+
@classmethod
|
|
92
|
+
def time_stamp(cls):
|
|
93
|
+
return EventFieldType.TIMESTAMP
|
|
94
|
+
|
|
95
|
+
@classmethod
|
|
96
|
+
def entity(cls):
|
|
97
|
+
return EventFieldType.ENDPOINT_ID
|
|
98
|
+
|
|
99
|
+
|
|
80
100
|
class ApplicationEvent:
|
|
81
101
|
APPLICATION_NAME = "application_name"
|
|
82
102
|
CURRENT_STATS = "current_stats"
|
|
@@ -89,7 +109,7 @@ class ApplicationEvent:
|
|
|
89
109
|
OUTPUT_STREAM_URI = "output_stream_uri"
|
|
90
110
|
|
|
91
111
|
|
|
92
|
-
class WriterEvent(
|
|
112
|
+
class WriterEvent(MonitoringStrEnum):
|
|
93
113
|
APPLICATION_NAME = "application_name"
|
|
94
114
|
ENDPOINT_ID = "endpoint_id"
|
|
95
115
|
START_INFER_TIME = "start_infer_time"
|
|
@@ -101,10 +121,6 @@ class WriterEvent(StrEnum):
|
|
|
101
121
|
RESULT_EXTRA_DATA = "result_extra_data"
|
|
102
122
|
CURRENT_STATS = "current_stats"
|
|
103
123
|
|
|
104
|
-
@classmethod
|
|
105
|
-
def list(cls):
|
|
106
|
-
return list(map(lambda c: c.value, cls))
|
|
107
|
-
|
|
108
124
|
|
|
109
125
|
class EventLiveStats:
|
|
110
126
|
LATENCY_AVG_5M = "latency_avg_5m"
|
mlrun/common/schemas/project.py
CHANGED
|
@@ -19,6 +19,7 @@ import pydantic
|
|
|
19
19
|
|
|
20
20
|
import mlrun.common.types
|
|
21
21
|
|
|
22
|
+
from .common import ImageBuilder
|
|
22
23
|
from .object import ObjectKind, ObjectStatus
|
|
23
24
|
|
|
24
25
|
|
|
@@ -85,6 +86,7 @@ class ProjectSpec(pydantic.BaseModel):
|
|
|
85
86
|
desired_state: typing.Optional[ProjectDesiredState] = ProjectDesiredState.online
|
|
86
87
|
custom_packagers: typing.Optional[typing.List[typing.Tuple[str, bool]]] = None
|
|
87
88
|
default_image: typing.Optional[str] = None
|
|
89
|
+
build: typing.Optional[ImageBuilder] = None
|
|
88
90
|
|
|
89
91
|
class Config:
|
|
90
92
|
extra = pydantic.Extra.allow
|
mlrun/config.py
CHANGED
|
@@ -288,6 +288,12 @@ default_config = {
|
|
|
288
288
|
"state": "online",
|
|
289
289
|
"retry_api_call_on_exception": "enabled",
|
|
290
290
|
"http_connection_timeout_keep_alive": 11,
|
|
291
|
+
# http client used by httpdb
|
|
292
|
+
"http": {
|
|
293
|
+
# when True, the client will verify the server's TLS
|
|
294
|
+
# set to False for backwards compatibility.
|
|
295
|
+
"verify": False,
|
|
296
|
+
},
|
|
291
297
|
"db": {
|
|
292
298
|
"commit_retry_timeout": 30,
|
|
293
299
|
"commit_retry_interval": 3,
|
|
@@ -306,7 +312,11 @@ default_config = {
|
|
|
306
312
|
# default is 16MB, max 1G, for more info https://dev.mysql.com/doc/refman/8.0/en/packet-too-large.html
|
|
307
313
|
"max_allowed_packet": 64000000, # 64MB
|
|
308
314
|
},
|
|
309
|
-
#
|
|
315
|
+
# tests connections for liveness upon each checkout
|
|
316
|
+
"connections_pool_pre_ping": True,
|
|
317
|
+
# this setting causes the pool to recycle connections after the given number of seconds has passed
|
|
318
|
+
"connections_pool_recycle": 60 * 60,
|
|
319
|
+
# None defaults to httpdb.max_workers
|
|
310
320
|
"connections_pool_size": None,
|
|
311
321
|
"connections_pool_max_overflow": None,
|
|
312
322
|
# below is a db-specific configuration
|
|
@@ -434,7 +444,7 @@ default_config = {
|
|
|
434
444
|
# pip install <requirement_specifier>, e.g. mlrun==0.5.4, mlrun~=0.5,
|
|
435
445
|
# git+https://github.com/mlrun/mlrun@development. by default uses the version
|
|
436
446
|
"mlrun_version_specifier": "",
|
|
437
|
-
"kaniko_image": "gcr.io/kaniko-project/executor:v1.
|
|
447
|
+
"kaniko_image": "gcr.io/kaniko-project/executor:v1.21.1", # kaniko builder image
|
|
438
448
|
"kaniko_init_container_image": "alpine:3.18",
|
|
439
449
|
# image for kaniko init container when docker registry is ECR
|
|
440
450
|
"kaniko_aws_cli_image": "amazon/aws-cli:2.7.10",
|
|
@@ -481,8 +491,8 @@ default_config = {
|
|
|
481
491
|
"offline_storage_path": "model-endpoints/{kind}",
|
|
482
492
|
# Default http path that points to the monitoring stream nuclio function. Will be used as a stream path
|
|
483
493
|
# when the user is working in CE environment and has not provided any stream path.
|
|
484
|
-
"default_http_sink": "http://nuclio-{project}-model-monitoring-stream.
|
|
485
|
-
"default_http_sink_app": "http://nuclio-{project}-{application_name}.
|
|
494
|
+
"default_http_sink": "http://nuclio-{project}-model-monitoring-stream.{namespace}.svc.cluster.local:8080",
|
|
495
|
+
"default_http_sink_app": "http://nuclio-{project}-{application_name}.{namespace}.svc.cluster.local:8080",
|
|
486
496
|
"batch_processing_function_branch": "master",
|
|
487
497
|
"parquet_batching_max_events": 10_000,
|
|
488
498
|
"parquet_batching_timeout_secs": timedelta(minutes=1).total_seconds(),
|
|
@@ -601,8 +611,9 @@ default_config = {
|
|
|
601
611
|
},
|
|
602
612
|
"workflows": {
|
|
603
613
|
"default_workflow_runner_name": "workflow-runner-{}",
|
|
604
|
-
# Default timeout seconds for retrieving workflow id after execution
|
|
605
|
-
|
|
614
|
+
# Default timeout seconds for retrieving workflow id after execution
|
|
615
|
+
# Remote workflow timeout is the maximum between remote and the inner engine timeout
|
|
616
|
+
"timeouts": {"local": 120, "kfp": 60, "remote": 60 * 5},
|
|
606
617
|
},
|
|
607
618
|
"log_collector": {
|
|
608
619
|
"address": "localhost:8282",
|
|
@@ -954,10 +965,10 @@ class Config:
|
|
|
954
965
|
with_gpu = (
|
|
955
966
|
with_gpu_requests if requirement == "requests" else with_gpu_limits
|
|
956
967
|
)
|
|
957
|
-
resources[
|
|
958
|
-
|
|
959
|
-
|
|
960
|
-
|
|
968
|
+
resources[requirement] = (
|
|
969
|
+
self.get_default_function_pod_requirement_resources(
|
|
970
|
+
requirement, with_gpu
|
|
971
|
+
)
|
|
961
972
|
)
|
|
962
973
|
return resources
|
|
963
974
|
|
|
@@ -1051,7 +1062,7 @@ class Config:
|
|
|
1051
1062
|
target: str = "online",
|
|
1052
1063
|
artifact_path: str = None,
|
|
1053
1064
|
application_name: str = None,
|
|
1054
|
-
) -> str:
|
|
1065
|
+
) -> typing.Union[str, list[str]]:
|
|
1055
1066
|
"""Get the full path from the configuration based on the provided project and kind.
|
|
1056
1067
|
|
|
1057
1068
|
:param project: Project name.
|
|
@@ -1067,7 +1078,8 @@ class Config:
|
|
|
1067
1078
|
relative artifact path will be taken from the global MLRun artifact path.
|
|
1068
1079
|
:param application_name: Application name, None for model_monitoring_stream.
|
|
1069
1080
|
|
|
1070
|
-
:return: Full configured path for the provided kind.
|
|
1081
|
+
:return: Full configured path for the provided kind. Can be either a single path
|
|
1082
|
+
or a list of paths in the case of the online model monitoring stream path.
|
|
1071
1083
|
"""
|
|
1072
1084
|
|
|
1073
1085
|
if target != "offline":
|
|
@@ -1088,12 +1100,22 @@ class Config:
|
|
|
1088
1100
|
if application_name is None
|
|
1089
1101
|
else f"{kind}-{application_name.lower()}",
|
|
1090
1102
|
)
|
|
1091
|
-
return mlrun.
|
|
1092
|
-
|
|
1093
|
-
|
|
1094
|
-
|
|
1095
|
-
|
|
1096
|
-
|
|
1103
|
+
elif kind == "stream": # return list for mlrun<1.6.3 BC
|
|
1104
|
+
return [
|
|
1105
|
+
mlrun.mlconf.model_endpoint_monitoring.store_prefixes.default.format(
|
|
1106
|
+
project=project,
|
|
1107
|
+
kind=kind,
|
|
1108
|
+
), # old stream uri (pipelines) for BC ML-6043
|
|
1109
|
+
mlrun.mlconf.model_endpoint_monitoring.store_prefixes.user_space.format(
|
|
1110
|
+
project=project,
|
|
1111
|
+
kind=kind,
|
|
1112
|
+
), # new stream uri (projects)
|
|
1113
|
+
]
|
|
1114
|
+
else:
|
|
1115
|
+
return mlrun.mlconf.model_endpoint_monitoring.store_prefixes.default.format(
|
|
1116
|
+
project=project,
|
|
1117
|
+
kind=kind,
|
|
1118
|
+
)
|
|
1097
1119
|
|
|
1098
1120
|
# Get the current offline path from the configuration
|
|
1099
1121
|
file_path = mlrun.mlconf.model_endpoint_monitoring.offline_storage_path.format(
|
|
@@ -1340,12 +1362,21 @@ def read_env(env=None, prefix=env_prefix):
|
|
|
1340
1362
|
if igz_domain:
|
|
1341
1363
|
config["ui_url"] = f"https://mlrun-ui.{igz_domain}"
|
|
1342
1364
|
|
|
1343
|
-
if config.get("log_level"):
|
|
1365
|
+
if log_level := config.get("log_level"):
|
|
1344
1366
|
import mlrun.utils.logger
|
|
1345
1367
|
|
|
1346
1368
|
# logger created (because of imports mess) before the config is loaded (in tests), therefore we're changing its
|
|
1347
1369
|
# level manually
|
|
1348
|
-
mlrun.utils.logger.set_logger_level(
|
|
1370
|
+
mlrun.utils.logger.set_logger_level(log_level)
|
|
1371
|
+
|
|
1372
|
+
if log_formatter_name := config.get("log_formatter"):
|
|
1373
|
+
import mlrun.utils.logger
|
|
1374
|
+
|
|
1375
|
+
log_formatter = mlrun.utils.create_formatter_instance(
|
|
1376
|
+
mlrun.utils.FormatterKinds(log_formatter_name)
|
|
1377
|
+
)
|
|
1378
|
+
mlrun.utils.logger.get_handler("default").setFormatter(log_formatter)
|
|
1379
|
+
|
|
1349
1380
|
# The default function pod resource values are of type str; however, when reading from environment variable numbers,
|
|
1350
1381
|
# it converts them to type int if contains only number, so we want to convert them to str.
|
|
1351
1382
|
_convert_resources_to_str(config)
|
mlrun/data_types/data_types.py
CHANGED
|
@@ -41,6 +41,7 @@ class ValueType(str, Enum):
|
|
|
41
41
|
BYTES = "bytes"
|
|
42
42
|
STRING = "str"
|
|
43
43
|
DATETIME = "datetime"
|
|
44
|
+
LIST = "List"
|
|
44
45
|
BYTES_LIST = "List[bytes]"
|
|
45
46
|
STRING_LIST = "List[string]"
|
|
46
47
|
INT32_LIST = "List[int32]"
|
|
@@ -48,6 +49,7 @@ class ValueType(str, Enum):
|
|
|
48
49
|
DOUBLE_LIST = "List[float]"
|
|
49
50
|
FLOAT_LIST = "List[float32]"
|
|
50
51
|
BOOL_LIST = "List[bool]"
|
|
52
|
+
Tuple = "Tuple"
|
|
51
53
|
|
|
52
54
|
|
|
53
55
|
def pd_schema_to_value_type(value):
|
|
@@ -102,6 +104,8 @@ def python_type_to_value_type(value_type):
|
|
|
102
104
|
"datetime64[ns]": ValueType.INT64,
|
|
103
105
|
"datetime64[ns, tz]": ValueType.INT64,
|
|
104
106
|
"category": ValueType.STRING,
|
|
107
|
+
"list": ValueType.LIST,
|
|
108
|
+
"tuple": ValueType.Tuple,
|
|
105
109
|
}
|
|
106
110
|
|
|
107
111
|
if type_name in type_map:
|
mlrun/datastore/azure_blob.py
CHANGED
|
@@ -175,9 +175,9 @@ class AzureBlobStore(DataStore):
|
|
|
175
175
|
|
|
176
176
|
if "client_secret" in st or "client_id" in st or "tenant_id" in st:
|
|
177
177
|
res[f"spark.hadoop.fs.azure.account.auth.type.{host}"] = "OAuth"
|
|
178
|
-
res[
|
|
179
|
-
|
|
180
|
-
|
|
178
|
+
res[f"spark.hadoop.fs.azure.account.oauth.provider.type.{host}"] = (
|
|
179
|
+
"org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider"
|
|
180
|
+
)
|
|
181
181
|
if "client_id" in st:
|
|
182
182
|
res[f"spark.hadoop.fs.azure.account.oauth2.client.id.{host}"] = st[
|
|
183
183
|
"client_id"
|
|
@@ -188,14 +188,14 @@ class AzureBlobStore(DataStore):
|
|
|
188
188
|
]
|
|
189
189
|
if "tenant_id" in st:
|
|
190
190
|
tenant_id = st["tenant_id"]
|
|
191
|
-
res[
|
|
192
|
-
f"
|
|
193
|
-
|
|
191
|
+
res[f"spark.hadoop.fs.azure.account.oauth2.client.endpoint.{host}"] = (
|
|
192
|
+
f"https://login.microsoftonline.com/{tenant_id}/oauth2/token"
|
|
193
|
+
)
|
|
194
194
|
|
|
195
195
|
if "sas_token" in st:
|
|
196
196
|
res[f"spark.hadoop.fs.azure.account.auth.type.{host}"] = "SAS"
|
|
197
|
-
res[
|
|
198
|
-
|
|
199
|
-
|
|
197
|
+
res[f"spark.hadoop.fs.azure.sas.token.provider.type.{host}"] = (
|
|
198
|
+
"org.apache.hadoop.fs.azurebfs.sas.FixedSASTokenProvider"
|
|
199
|
+
)
|
|
200
200
|
res[f"spark.hadoop.fs.azure.sas.fixed.token.{host}"] = st["sas_token"]
|
|
201
201
|
return res
|
mlrun/datastore/base.py
CHANGED
|
@@ -27,6 +27,7 @@ import requests
|
|
|
27
27
|
import urllib3
|
|
28
28
|
from deprecated import deprecated
|
|
29
29
|
|
|
30
|
+
import mlrun.config
|
|
30
31
|
import mlrun.errors
|
|
31
32
|
from mlrun.errors import err_to_str
|
|
32
33
|
from mlrun.utils import StorePrefix, is_ipython, logger
|
|
@@ -34,10 +35,6 @@ from mlrun.utils import StorePrefix, is_ipython, logger
|
|
|
34
35
|
from .store_resources import is_store_uri, parse_store_uri
|
|
35
36
|
from .utils import filter_df_start_end_time, select_columns_from_df
|
|
36
37
|
|
|
37
|
-
verify_ssl = False
|
|
38
|
-
if not verify_ssl:
|
|
39
|
-
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
|
40
|
-
|
|
41
38
|
|
|
42
39
|
class FileStats:
|
|
43
40
|
def __init__(self, size, modified, content_type=None):
|
|
@@ -643,45 +640,6 @@ def basic_auth_header(user, password):
|
|
|
643
640
|
return {"Authorization": authstr}
|
|
644
641
|
|
|
645
642
|
|
|
646
|
-
def http_get(url, headers=None, auth=None):
|
|
647
|
-
try:
|
|
648
|
-
response = requests.get(url, headers=headers, auth=auth, verify=verify_ssl)
|
|
649
|
-
except OSError as exc:
|
|
650
|
-
raise OSError(f"error: cannot connect to {url}: {err_to_str(exc)}")
|
|
651
|
-
|
|
652
|
-
mlrun.errors.raise_for_status(response)
|
|
653
|
-
|
|
654
|
-
return response.content
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
def http_head(url, headers=None, auth=None):
|
|
658
|
-
try:
|
|
659
|
-
response = requests.head(url, headers=headers, auth=auth, verify=verify_ssl)
|
|
660
|
-
except OSError as exc:
|
|
661
|
-
raise OSError(f"error: cannot connect to {url}: {err_to_str(exc)}")
|
|
662
|
-
|
|
663
|
-
mlrun.errors.raise_for_status(response)
|
|
664
|
-
|
|
665
|
-
return response.headers
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
def http_put(url, data, headers=None, auth=None, session=None):
|
|
669
|
-
try:
|
|
670
|
-
put_api = session.put if session else requests.put
|
|
671
|
-
response = put_api(
|
|
672
|
-
url, data=data, headers=headers, auth=auth, verify=verify_ssl
|
|
673
|
-
)
|
|
674
|
-
except OSError as exc:
|
|
675
|
-
raise OSError(f"error: cannot connect to {url}: {err_to_str(exc)}") from exc
|
|
676
|
-
|
|
677
|
-
mlrun.errors.raise_for_status(response)
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
def http_upload(url, file_path, headers=None, auth=None):
|
|
681
|
-
with open(file_path, "rb") as data:
|
|
682
|
-
http_put(url, data, headers, auth)
|
|
683
|
-
|
|
684
|
-
|
|
685
643
|
class HttpStore(DataStore):
|
|
686
644
|
def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
|
|
687
645
|
super().__init__(parent, name, schema, endpoint, secrets)
|
|
@@ -709,7 +667,7 @@ class HttpStore(DataStore):
|
|
|
709
667
|
raise ValueError("unimplemented")
|
|
710
668
|
|
|
711
669
|
def get(self, key, size=None, offset=0):
|
|
712
|
-
data =
|
|
670
|
+
data = self._http_get(self.url + self._join(key), self._headers, self.auth)
|
|
713
671
|
if offset:
|
|
714
672
|
data = data[offset:]
|
|
715
673
|
if size:
|
|
@@ -729,6 +687,26 @@ class HttpStore(DataStore):
|
|
|
729
687
|
f"schema as it is not secure and is not recommended."
|
|
730
688
|
)
|
|
731
689
|
|
|
690
|
+
def _http_get(
|
|
691
|
+
self,
|
|
692
|
+
url,
|
|
693
|
+
headers=None,
|
|
694
|
+
auth=None,
|
|
695
|
+
):
|
|
696
|
+
# import here to prevent import cycle
|
|
697
|
+
from mlrun.config import config as mlconf
|
|
698
|
+
|
|
699
|
+
verify_ssl = mlconf.httpdb.http.verify
|
|
700
|
+
try:
|
|
701
|
+
if not verify_ssl:
|
|
702
|
+
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
|
703
|
+
response = requests.get(url, headers=headers, auth=auth, verify=verify_ssl)
|
|
704
|
+
except OSError as exc:
|
|
705
|
+
raise OSError(f"error: cannot connect to {url}: {err_to_str(exc)}")
|
|
706
|
+
|
|
707
|
+
mlrun.errors.raise_for_status(response)
|
|
708
|
+
return response.content
|
|
709
|
+
|
|
732
710
|
|
|
733
711
|
# This wrapper class is designed to extract the 'ds' schema and profile name from URL-formatted paths.
|
|
734
712
|
# Within fsspec, the AbstractFileSystem::_strip_protocol() internal method is used to handle complete URL paths.
|
|
@@ -147,13 +147,13 @@ class GoogleCloudStorageStore(DataStore):
|
|
|
147
147
|
if "project_id" in credentials:
|
|
148
148
|
res["spark.hadoop.fs.gs.project.id"] = credentials["project_id"]
|
|
149
149
|
if "private_key_id" in credentials:
|
|
150
|
-
res[
|
|
151
|
-
"
|
|
152
|
-
|
|
150
|
+
res["spark.hadoop.fs.gs.auth.service.account.private.key.id"] = (
|
|
151
|
+
credentials["private_key_id"]
|
|
152
|
+
)
|
|
153
153
|
if "private_key" in credentials:
|
|
154
|
-
res[
|
|
155
|
-
"
|
|
156
|
-
|
|
154
|
+
res["spark.hadoop.fs.gs.auth.service.account.private.key"] = (
|
|
155
|
+
credentials["private_key"]
|
|
156
|
+
)
|
|
157
157
|
if "client_email" in credentials:
|
|
158
158
|
res["spark.hadoop.fs.gs.auth.service.account.email"] = credentials[
|
|
159
159
|
"client_email"
|