mlrun 1.7.1rc10__py3-none-any.whl → 1.8.0rc11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +23 -21
- mlrun/__main__.py +3 -3
- mlrun/alerts/alert.py +148 -14
- mlrun/artifacts/__init__.py +2 -3
- mlrun/artifacts/base.py +55 -12
- mlrun/artifacts/dataset.py +16 -16
- mlrun/artifacts/document.py +378 -0
- mlrun/artifacts/manager.py +26 -17
- mlrun/artifacts/model.py +66 -53
- mlrun/common/constants.py +8 -0
- mlrun/common/formatters/__init__.py +1 -0
- mlrun/common/formatters/feature_set.py +1 -0
- mlrun/common/formatters/function.py +1 -0
- mlrun/{model_monitoring/db/stores/base/__init__.py → common/formatters/model_endpoint.py} +16 -1
- mlrun/common/formatters/pipeline.py +1 -2
- mlrun/common/formatters/project.py +9 -0
- mlrun/common/model_monitoring/__init__.py +0 -5
- mlrun/common/model_monitoring/helpers.py +1 -29
- mlrun/common/runtimes/constants.py +1 -2
- mlrun/common/schemas/__init__.py +6 -2
- mlrun/common/schemas/alert.py +111 -19
- mlrun/common/schemas/api_gateway.py +3 -3
- mlrun/common/schemas/artifact.py +11 -7
- mlrun/common/schemas/auth.py +6 -4
- mlrun/common/schemas/background_task.py +7 -7
- mlrun/common/schemas/client_spec.py +2 -3
- mlrun/common/schemas/clusterization_spec.py +2 -2
- mlrun/common/schemas/common.py +53 -3
- mlrun/common/schemas/constants.py +15 -0
- mlrun/common/schemas/datastore_profile.py +1 -1
- mlrun/common/schemas/feature_store.py +9 -9
- mlrun/common/schemas/frontend_spec.py +4 -4
- mlrun/common/schemas/function.py +10 -10
- mlrun/common/schemas/hub.py +1 -1
- mlrun/common/schemas/k8s.py +3 -3
- mlrun/common/schemas/memory_reports.py +3 -3
- mlrun/common/schemas/model_monitoring/__init__.py +2 -1
- mlrun/common/schemas/model_monitoring/constants.py +67 -14
- mlrun/common/schemas/model_monitoring/grafana.py +1 -1
- mlrun/common/schemas/model_monitoring/model_endpoints.py +92 -147
- mlrun/common/schemas/notification.py +24 -3
- mlrun/common/schemas/object.py +1 -1
- mlrun/common/schemas/pagination.py +4 -4
- mlrun/common/schemas/partition.py +137 -0
- mlrun/common/schemas/pipeline.py +2 -2
- mlrun/common/schemas/project.py +25 -17
- mlrun/common/schemas/runs.py +2 -2
- mlrun/common/schemas/runtime_resource.py +5 -5
- mlrun/common/schemas/schedule.py +1 -1
- mlrun/common/schemas/secret.py +1 -1
- mlrun/common/schemas/tag.py +3 -3
- mlrun/common/schemas/workflow.py +5 -5
- mlrun/config.py +68 -10
- mlrun/data_types/__init__.py +0 -2
- mlrun/data_types/data_types.py +1 -0
- mlrun/data_types/infer.py +3 -1
- mlrun/data_types/spark.py +5 -3
- mlrun/data_types/to_pandas.py +11 -2
- mlrun/datastore/__init__.py +2 -2
- mlrun/datastore/alibaba_oss.py +4 -1
- mlrun/datastore/azure_blob.py +4 -1
- mlrun/datastore/base.py +12 -4
- mlrun/datastore/datastore.py +9 -3
- mlrun/datastore/datastore_profile.py +79 -20
- mlrun/datastore/dbfs_store.py +4 -1
- mlrun/datastore/filestore.py +4 -1
- mlrun/datastore/google_cloud_storage.py +4 -1
- mlrun/datastore/hdfs.py +4 -1
- mlrun/datastore/inmem.py +4 -1
- mlrun/datastore/redis.py +4 -1
- mlrun/datastore/s3.py +4 -1
- mlrun/datastore/sources.py +52 -51
- mlrun/datastore/store_resources.py +7 -4
- mlrun/datastore/targets.py +23 -22
- mlrun/datastore/utils.py +2 -2
- mlrun/datastore/v3io.py +4 -1
- mlrun/datastore/vectorstore.py +229 -0
- mlrun/datastore/wasbfs/fs.py +13 -12
- mlrun/db/base.py +213 -83
- mlrun/db/factory.py +0 -3
- mlrun/db/httpdb.py +1265 -387
- mlrun/db/nopdb.py +205 -74
- mlrun/errors.py +2 -2
- mlrun/execution.py +136 -50
- mlrun/feature_store/__init__.py +0 -2
- mlrun/feature_store/api.py +41 -40
- mlrun/feature_store/common.py +9 -9
- mlrun/feature_store/feature_set.py +20 -18
- mlrun/feature_store/feature_vector.py +27 -24
- mlrun/feature_store/retrieval/base.py +14 -9
- mlrun/feature_store/retrieval/job.py +2 -1
- mlrun/feature_store/steps.py +2 -2
- mlrun/features.py +30 -13
- mlrun/frameworks/__init__.py +1 -2
- mlrun/frameworks/_common/__init__.py +1 -2
- mlrun/frameworks/_common/artifacts_library.py +2 -2
- mlrun/frameworks/_common/mlrun_interface.py +10 -6
- mlrun/frameworks/_common/model_handler.py +29 -27
- mlrun/frameworks/_common/producer.py +3 -1
- mlrun/frameworks/_dl_common/__init__.py +1 -2
- mlrun/frameworks/_dl_common/loggers/__init__.py +1 -2
- mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +4 -4
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +3 -3
- mlrun/frameworks/_ml_common/__init__.py +1 -2
- mlrun/frameworks/_ml_common/loggers/__init__.py +1 -2
- mlrun/frameworks/_ml_common/model_handler.py +21 -21
- mlrun/frameworks/_ml_common/plans/__init__.py +1 -2
- mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +3 -1
- mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
- mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
- mlrun/frameworks/auto_mlrun/__init__.py +1 -2
- mlrun/frameworks/auto_mlrun/auto_mlrun.py +22 -15
- mlrun/frameworks/huggingface/__init__.py +1 -2
- mlrun/frameworks/huggingface/model_server.py +9 -9
- mlrun/frameworks/lgbm/__init__.py +47 -44
- mlrun/frameworks/lgbm/callbacks/__init__.py +1 -2
- mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -2
- mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -2
- mlrun/frameworks/lgbm/mlrun_interfaces/__init__.py +1 -2
- mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +5 -5
- mlrun/frameworks/lgbm/model_handler.py +15 -11
- mlrun/frameworks/lgbm/model_server.py +11 -7
- mlrun/frameworks/lgbm/utils.py +2 -2
- mlrun/frameworks/onnx/__init__.py +1 -2
- mlrun/frameworks/onnx/dataset.py +3 -3
- mlrun/frameworks/onnx/mlrun_interface.py +2 -2
- mlrun/frameworks/onnx/model_handler.py +7 -5
- mlrun/frameworks/onnx/model_server.py +8 -6
- mlrun/frameworks/parallel_coordinates.py +11 -11
- mlrun/frameworks/pytorch/__init__.py +22 -23
- mlrun/frameworks/pytorch/callbacks/__init__.py +1 -2
- mlrun/frameworks/pytorch/callbacks/callback.py +2 -1
- mlrun/frameworks/pytorch/callbacks/logging_callback.py +15 -8
- mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +19 -12
- mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +22 -15
- mlrun/frameworks/pytorch/callbacks_handler.py +36 -30
- mlrun/frameworks/pytorch/mlrun_interface.py +17 -17
- mlrun/frameworks/pytorch/model_handler.py +21 -17
- mlrun/frameworks/pytorch/model_server.py +13 -9
- mlrun/frameworks/sklearn/__init__.py +19 -18
- mlrun/frameworks/sklearn/estimator.py +2 -2
- mlrun/frameworks/sklearn/metric.py +3 -3
- mlrun/frameworks/sklearn/metrics_library.py +8 -6
- mlrun/frameworks/sklearn/mlrun_interface.py +3 -2
- mlrun/frameworks/sklearn/model_handler.py +4 -3
- mlrun/frameworks/tf_keras/__init__.py +11 -12
- mlrun/frameworks/tf_keras/callbacks/__init__.py +1 -2
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +17 -14
- mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +15 -12
- mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +21 -18
- mlrun/frameworks/tf_keras/model_handler.py +17 -13
- mlrun/frameworks/tf_keras/model_server.py +12 -8
- mlrun/frameworks/xgboost/__init__.py +19 -18
- mlrun/frameworks/xgboost/model_handler.py +13 -9
- mlrun/launcher/base.py +3 -4
- mlrun/launcher/local.py +1 -1
- mlrun/launcher/remote.py +1 -1
- mlrun/lists.py +4 -3
- mlrun/model.py +117 -46
- mlrun/model_monitoring/__init__.py +4 -4
- mlrun/model_monitoring/api.py +72 -59
- mlrun/model_monitoring/applications/_application_steps.py +17 -17
- mlrun/model_monitoring/applications/base.py +165 -6
- mlrun/model_monitoring/applications/context.py +88 -37
- mlrun/model_monitoring/applications/evidently_base.py +0 -1
- mlrun/model_monitoring/applications/histogram_data_drift.py +43 -21
- mlrun/model_monitoring/applications/results.py +55 -3
- mlrun/model_monitoring/controller.py +207 -239
- mlrun/model_monitoring/db/__init__.py +0 -2
- mlrun/model_monitoring/db/_schedules.py +156 -0
- mlrun/model_monitoring/db/_stats.py +189 -0
- mlrun/model_monitoring/db/tsdb/base.py +78 -25
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +61 -6
- mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +33 -0
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +255 -29
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +1 -0
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +78 -17
- mlrun/model_monitoring/helpers.py +151 -49
- mlrun/model_monitoring/stream_processing.py +99 -283
- mlrun/model_monitoring/tracking_policy.py +10 -3
- mlrun/model_monitoring/writer.py +48 -36
- mlrun/package/__init__.py +3 -6
- mlrun/package/context_handler.py +1 -1
- mlrun/package/packager.py +12 -9
- mlrun/package/packagers/__init__.py +0 -2
- mlrun/package/packagers/default_packager.py +14 -11
- mlrun/package/packagers/numpy_packagers.py +16 -7
- mlrun/package/packagers/pandas_packagers.py +18 -18
- mlrun/package/packagers/python_standard_library_packagers.py +25 -11
- mlrun/package/packagers_manager.py +31 -14
- mlrun/package/utils/__init__.py +0 -3
- mlrun/package/utils/_pickler.py +6 -6
- mlrun/platforms/__init__.py +47 -16
- mlrun/platforms/iguazio.py +4 -1
- mlrun/projects/operations.py +27 -27
- mlrun/projects/pipelines.py +71 -36
- mlrun/projects/project.py +890 -220
- mlrun/run.py +53 -10
- mlrun/runtimes/__init__.py +1 -3
- mlrun/runtimes/base.py +15 -11
- mlrun/runtimes/daskjob.py +9 -9
- mlrun/runtimes/generators.py +2 -1
- mlrun/runtimes/kubejob.py +4 -5
- mlrun/runtimes/mounts.py +572 -0
- mlrun/runtimes/mpijob/__init__.py +0 -2
- mlrun/runtimes/mpijob/abstract.py +7 -6
- mlrun/runtimes/nuclio/api_gateway.py +7 -7
- mlrun/runtimes/nuclio/application/application.py +11 -11
- mlrun/runtimes/nuclio/function.py +19 -17
- mlrun/runtimes/nuclio/serving.py +18 -13
- mlrun/runtimes/pod.py +154 -45
- mlrun/runtimes/remotesparkjob.py +3 -2
- mlrun/runtimes/sparkjob/__init__.py +0 -2
- mlrun/runtimes/sparkjob/spark3job.py +21 -11
- mlrun/runtimes/utils.py +6 -5
- mlrun/serving/merger.py +6 -4
- mlrun/serving/remote.py +18 -17
- mlrun/serving/routers.py +185 -172
- mlrun/serving/server.py +7 -1
- mlrun/serving/states.py +97 -78
- mlrun/serving/utils.py +13 -2
- mlrun/serving/v1_serving.py +3 -2
- mlrun/serving/v2_serving.py +105 -72
- mlrun/track/__init__.py +1 -1
- mlrun/track/tracker.py +2 -2
- mlrun/track/trackers/mlflow_tracker.py +6 -5
- mlrun/utils/async_http.py +1 -1
- mlrun/utils/clones.py +1 -1
- mlrun/utils/helpers.py +63 -19
- mlrun/utils/logger.py +106 -4
- mlrun/utils/notifications/notification/__init__.py +22 -19
- mlrun/utils/notifications/notification/base.py +33 -14
- mlrun/utils/notifications/notification/console.py +6 -6
- mlrun/utils/notifications/notification/git.py +11 -11
- mlrun/utils/notifications/notification/ipython.py +10 -9
- mlrun/utils/notifications/notification/mail.py +176 -0
- mlrun/utils/notifications/notification/slack.py +6 -6
- mlrun/utils/notifications/notification/webhook.py +6 -6
- mlrun/utils/notifications/notification_pusher.py +86 -44
- mlrun/utils/regex.py +11 -2
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.1rc10.dist-info → mlrun-1.8.0rc11.dist-info}/METADATA +29 -24
- mlrun-1.8.0rc11.dist-info/RECORD +347 -0
- mlrun/model_monitoring/db/stores/__init__.py +0 -136
- mlrun/model_monitoring/db/stores/base/store.py +0 -213
- mlrun/model_monitoring/db/stores/sqldb/__init__.py +0 -13
- mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +0 -71
- mlrun/model_monitoring/db/stores/sqldb/models/base.py +0 -190
- mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +0 -103
- mlrun/model_monitoring/db/stores/sqldb/models/sqlite.py +0 -40
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +0 -659
- mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +0 -13
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +0 -726
- mlrun/model_monitoring/model_endpoint.py +0 -118
- mlrun-1.7.1rc10.dist-info/RECORD +0 -351
- {mlrun-1.7.1rc10.dist-info → mlrun-1.8.0rc11.dist-info}/LICENSE +0 -0
- {mlrun-1.7.1rc10.dist-info → mlrun-1.8.0rc11.dist-info}/WHEEL +0 -0
- {mlrun-1.7.1rc10.dist-info → mlrun-1.8.0rc11.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.1rc10.dist-info → mlrun-1.8.0rc11.dist-info}/top_level.txt +0 -0
mlrun/common/schemas/runs.py
CHANGED
|
@@ -14,13 +14,13 @@
|
|
|
14
14
|
|
|
15
15
|
import typing
|
|
16
16
|
|
|
17
|
-
import pydantic
|
|
17
|
+
import pydantic.v1
|
|
18
18
|
from deprecated import deprecated
|
|
19
19
|
|
|
20
20
|
import mlrun.common.types
|
|
21
21
|
|
|
22
22
|
|
|
23
|
-
class RunIdentifier(pydantic.BaseModel):
|
|
23
|
+
class RunIdentifier(pydantic.v1.BaseModel):
|
|
24
24
|
kind: typing.Literal["run"] = "run"
|
|
25
25
|
uid: typing.Optional[str]
|
|
26
26
|
iter: typing.Optional[int]
|
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
#
|
|
15
15
|
import typing
|
|
16
16
|
|
|
17
|
-
import pydantic
|
|
17
|
+
import pydantic.v1
|
|
18
18
|
|
|
19
19
|
import mlrun.common.types
|
|
20
20
|
|
|
@@ -24,23 +24,23 @@ class ListRuntimeResourcesGroupByField(mlrun.common.types.StrEnum):
|
|
|
24
24
|
project = "project"
|
|
25
25
|
|
|
26
26
|
|
|
27
|
-
class RuntimeResource(pydantic.BaseModel):
|
|
27
|
+
class RuntimeResource(pydantic.v1.BaseModel):
|
|
28
28
|
name: str
|
|
29
29
|
labels: dict[str, str] = {}
|
|
30
30
|
status: typing.Optional[dict]
|
|
31
31
|
|
|
32
32
|
|
|
33
|
-
class RuntimeResources(pydantic.BaseModel):
|
|
33
|
+
class RuntimeResources(pydantic.v1.BaseModel):
|
|
34
34
|
crd_resources: list[RuntimeResource] = []
|
|
35
35
|
pod_resources: list[RuntimeResource] = []
|
|
36
36
|
# only for dask runtime
|
|
37
37
|
service_resources: typing.Optional[list[RuntimeResource]] = None
|
|
38
38
|
|
|
39
39
|
class Config:
|
|
40
|
-
extra = pydantic.Extra.allow
|
|
40
|
+
extra = pydantic.v1.Extra.allow
|
|
41
41
|
|
|
42
42
|
|
|
43
|
-
class KindRuntimeResources(pydantic.BaseModel):
|
|
43
|
+
class KindRuntimeResources(pydantic.v1.BaseModel):
|
|
44
44
|
kind: str
|
|
45
45
|
resources: RuntimeResources
|
|
46
46
|
|
mlrun/common/schemas/schedule.py
CHANGED
mlrun/common/schemas/secret.py
CHANGED
mlrun/common/schemas/tag.py
CHANGED
|
@@ -13,17 +13,17 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
#
|
|
15
15
|
|
|
16
|
-
import pydantic
|
|
16
|
+
import pydantic.v1
|
|
17
17
|
|
|
18
18
|
from .artifact import ArtifactIdentifier
|
|
19
19
|
|
|
20
20
|
|
|
21
|
-
class Tag(pydantic.BaseModel):
|
|
21
|
+
class Tag(pydantic.v1.BaseModel):
|
|
22
22
|
name: str
|
|
23
23
|
project: str
|
|
24
24
|
|
|
25
25
|
|
|
26
|
-
class TagObjects(pydantic.BaseModel):
|
|
26
|
+
class TagObjects(pydantic.v1.BaseModel):
|
|
27
27
|
"""Tag object"""
|
|
28
28
|
|
|
29
29
|
kind: str
|
mlrun/common/schemas/workflow.py
CHANGED
|
@@ -14,14 +14,14 @@
|
|
|
14
14
|
#
|
|
15
15
|
import typing
|
|
16
16
|
|
|
17
|
-
import pydantic
|
|
17
|
+
import pydantic.v1
|
|
18
18
|
|
|
19
19
|
from mlrun.common.schemas.notification import Notification
|
|
20
20
|
from mlrun.common.schemas.schedule import ScheduleCronTrigger
|
|
21
21
|
from mlrun.common.types import StrEnum
|
|
22
22
|
|
|
23
23
|
|
|
24
|
-
class WorkflowSpec(pydantic.BaseModel):
|
|
24
|
+
class WorkflowSpec(pydantic.v1.BaseModel):
|
|
25
25
|
name: str
|
|
26
26
|
engine: typing.Optional[str] = None
|
|
27
27
|
code: typing.Optional[str] = None
|
|
@@ -36,7 +36,7 @@ class WorkflowSpec(pydantic.BaseModel):
|
|
|
36
36
|
workflow_runner_node_selector: typing.Optional[dict[str, str]] = None
|
|
37
37
|
|
|
38
38
|
|
|
39
|
-
class WorkflowRequest(pydantic.BaseModel):
|
|
39
|
+
class WorkflowRequest(pydantic.v1.BaseModel):
|
|
40
40
|
spec: typing.Optional[WorkflowSpec] = None
|
|
41
41
|
arguments: typing.Optional[dict] = None
|
|
42
42
|
artifact_path: typing.Optional[str] = None
|
|
@@ -46,7 +46,7 @@ class WorkflowRequest(pydantic.BaseModel):
|
|
|
46
46
|
notifications: typing.Optional[list[Notification]] = None
|
|
47
47
|
|
|
48
48
|
|
|
49
|
-
class WorkflowResponse(pydantic.BaseModel):
|
|
49
|
+
class WorkflowResponse(pydantic.v1.BaseModel):
|
|
50
50
|
project: str = None
|
|
51
51
|
name: str = None
|
|
52
52
|
status: str = None
|
|
@@ -54,7 +54,7 @@ class WorkflowResponse(pydantic.BaseModel):
|
|
|
54
54
|
schedule: typing.Union[str, ScheduleCronTrigger] = None
|
|
55
55
|
|
|
56
56
|
|
|
57
|
-
class GetWorkflowResponse(pydantic.BaseModel):
|
|
57
|
+
class GetWorkflowResponse(pydantic.v1.BaseModel):
|
|
58
58
|
workflow_id: str = None
|
|
59
59
|
|
|
60
60
|
|
mlrun/config.py
CHANGED
|
@@ -102,6 +102,9 @@ default_config = {
|
|
|
102
102
|
"log_level": "INFO",
|
|
103
103
|
# log formatter (options: human | human_extended | json)
|
|
104
104
|
"log_formatter": "human",
|
|
105
|
+
# custom logger format, workes only with log_formatter: custom
|
|
106
|
+
# Note that your custom format must include those 4 fields - timestamp, level, message and more
|
|
107
|
+
"log_format_override": None,
|
|
105
108
|
"submit_timeout": "180", # timeout when submitting a new k8s resource
|
|
106
109
|
# runtimes cleanup interval in seconds
|
|
107
110
|
"runtimes_cleanup_interval": "300",
|
|
@@ -132,6 +135,12 @@ default_config = {
|
|
|
132
135
|
"delete_crd_resources_timeout": "5 minutes",
|
|
133
136
|
},
|
|
134
137
|
},
|
|
138
|
+
"object_retentions": {
|
|
139
|
+
"alert_activations": 14 * 7, # days
|
|
140
|
+
},
|
|
141
|
+
# A safety margin to account for delays
|
|
142
|
+
# This ensures that extra partitions are available beyond the specified retention period
|
|
143
|
+
"partitions_buffer_multiplier": 3,
|
|
135
144
|
# the grace period (in seconds) that will be given to runtime resources (after they're in terminal state)
|
|
136
145
|
# before deleting them (4 hours)
|
|
137
146
|
"runtime_resources_deletion_grace_period": "14400",
|
|
@@ -151,6 +160,7 @@ default_config = {
|
|
|
151
160
|
# migration from artifacts to artifacts_v2 is done in batches, and requires a state file to keep track of the
|
|
152
161
|
# migration progress.
|
|
153
162
|
"artifact_migration_batch_size": 200,
|
|
163
|
+
"artifact_migration_v9_batch_size": 30000,
|
|
154
164
|
"artifact_migration_state_file_path": "./db/_artifact_migration_state.json",
|
|
155
165
|
"datasets": {
|
|
156
166
|
"max_preview_columns": 100,
|
|
@@ -223,6 +233,7 @@ default_config = {
|
|
|
223
233
|
"delete_function": "900",
|
|
224
234
|
},
|
|
225
235
|
"runtimes": {"dask": "600"},
|
|
236
|
+
"push_notifications": "60",
|
|
226
237
|
},
|
|
227
238
|
},
|
|
228
239
|
"function": {
|
|
@@ -306,7 +317,7 @@ default_config = {
|
|
|
306
317
|
},
|
|
307
318
|
"request_timeout": 45, # seconds
|
|
308
319
|
},
|
|
309
|
-
# see server.api.utils.helpers.ensure_running_on_chief
|
|
320
|
+
# see server.py.services.api.utils.helpers.ensure_running_on_chief
|
|
310
321
|
"ensure_function_running_on_chief_mode": "enabled",
|
|
311
322
|
},
|
|
312
323
|
"port": 8080,
|
|
@@ -524,7 +535,7 @@ default_config = {
|
|
|
524
535
|
"verbose": True,
|
|
525
536
|
},
|
|
526
537
|
"pagination": {
|
|
527
|
-
"default_page_size":
|
|
538
|
+
"default_page_size": 200,
|
|
528
539
|
"pagination_cache": {
|
|
529
540
|
"interval": 60,
|
|
530
541
|
"ttl": 3600,
|
|
@@ -598,8 +609,6 @@ default_config = {
|
|
|
598
609
|
"default_http_sink_app": "http://nuclio-{project}-{application_name}.{namespace}.svc.cluster.local:8080",
|
|
599
610
|
"parquet_batching_max_events": 10_000,
|
|
600
611
|
"parquet_batching_timeout_secs": timedelta(minutes=1).total_seconds(),
|
|
601
|
-
# See mlrun.model_monitoring.db.stores.ObjectStoreFactory for available options
|
|
602
|
-
"endpoint_store_connection": "",
|
|
603
612
|
# See mlrun.model_monitoring.db.tsdb.ObjectTSDBFactory for available options
|
|
604
613
|
"tsdb_connection": "",
|
|
605
614
|
# See mlrun.common.schemas.model_monitoring.constants.StreamKind for available options
|
|
@@ -786,17 +795,36 @@ default_config = {
|
|
|
786
795
|
"grafana_url": "",
|
|
787
796
|
"alerts": {
|
|
788
797
|
# supported modes: "enabled", "disabled".
|
|
789
|
-
"mode": "
|
|
798
|
+
"mode": "enabled",
|
|
790
799
|
# maximum number of alerts we allow to be configured.
|
|
791
800
|
# user will get an error when exceeding this
|
|
792
801
|
"max_allowed": 10000,
|
|
793
802
|
# maximum allowed value for count in criteria field inside AlertConfig
|
|
794
803
|
"max_criteria_count": 100,
|
|
804
|
+
# interval for periodic events generation job
|
|
805
|
+
"events_generation_interval": "30",
|
|
795
806
|
},
|
|
796
807
|
"auth_with_client_id": {
|
|
797
808
|
"enabled": False,
|
|
798
809
|
"request_timeout": 5,
|
|
799
810
|
},
|
|
811
|
+
"services": {
|
|
812
|
+
# The running service name. One of: "api", "alerts"
|
|
813
|
+
"service_name": "api",
|
|
814
|
+
"hydra": {
|
|
815
|
+
# Comma separated list of services to run on the instance.
|
|
816
|
+
# Currently, this is only considered when the service_name is "api".
|
|
817
|
+
# "*" starts all services on the same instance,
|
|
818
|
+
# other options are considered as running only the api service.
|
|
819
|
+
"services": "*",
|
|
820
|
+
},
|
|
821
|
+
},
|
|
822
|
+
"notifications": {
|
|
823
|
+
"smtp": {
|
|
824
|
+
"config_secret_name": "mlrun-smtp-config",
|
|
825
|
+
"refresh_interval": "30",
|
|
826
|
+
}
|
|
827
|
+
},
|
|
800
828
|
}
|
|
801
829
|
_is_running_as_api = None
|
|
802
830
|
|
|
@@ -843,6 +871,22 @@ class Config:
|
|
|
843
871
|
name = self.__class__.__name__
|
|
844
872
|
return f"{name}({self._cfg!r})"
|
|
845
873
|
|
|
874
|
+
def __iter__(self):
|
|
875
|
+
if isinstance(self._cfg, Mapping):
|
|
876
|
+
return self._cfg.__iter__()
|
|
877
|
+
|
|
878
|
+
def items(self):
|
|
879
|
+
if isinstance(self._cfg, Mapping):
|
|
880
|
+
return iter(self._cfg.items())
|
|
881
|
+
|
|
882
|
+
def keys(self):
|
|
883
|
+
if isinstance(self._cfg, Mapping):
|
|
884
|
+
return iter(self.data.keys())
|
|
885
|
+
|
|
886
|
+
def values(self):
|
|
887
|
+
if isinstance(self._cfg, Mapping):
|
|
888
|
+
return iter(self.data.values())
|
|
889
|
+
|
|
846
890
|
def update(self, cfg, skip_errors=False):
|
|
847
891
|
for key, value in cfg.items():
|
|
848
892
|
if hasattr(self, key):
|
|
@@ -1035,6 +1079,17 @@ class Config:
|
|
|
1035
1079
|
f"is not allowed for iguazio version: {igz_version} < 3.5.1"
|
|
1036
1080
|
)
|
|
1037
1081
|
|
|
1082
|
+
def validate_object_retentions(self):
|
|
1083
|
+
for table_name, retention_days in self.object_retentions.items():
|
|
1084
|
+
if retention_days < 7 and not os.getenv("PARTITION_INTERVAL"):
|
|
1085
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
1086
|
+
f"{table_name} partition interval must be greater than a week"
|
|
1087
|
+
)
|
|
1088
|
+
elif retention_days > 53 * 7:
|
|
1089
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
1090
|
+
f"{table_name} partition interval must be less than a year"
|
|
1091
|
+
)
|
|
1092
|
+
|
|
1038
1093
|
def resolve_chief_api_url(self) -> str:
|
|
1039
1094
|
if self.httpdb.clusterization.chief.url:
|
|
1040
1095
|
return self.httpdb.clusterization.chief.url
|
|
@@ -1193,9 +1248,9 @@ class Config:
|
|
|
1193
1248
|
|
|
1194
1249
|
def get_model_monitoring_file_target_path(
|
|
1195
1250
|
self,
|
|
1196
|
-
project: str
|
|
1197
|
-
kind: str
|
|
1198
|
-
target:
|
|
1251
|
+
project: str,
|
|
1252
|
+
kind: str,
|
|
1253
|
+
target: typing.Literal["online", "offline"] = "online",
|
|
1199
1254
|
artifact_path: typing.Optional[str] = None,
|
|
1200
1255
|
function_name: typing.Optional[str] = None,
|
|
1201
1256
|
**kwargs,
|
|
@@ -1373,9 +1428,12 @@ def _validate_config(config):
|
|
|
1373
1428
|
pass
|
|
1374
1429
|
|
|
1375
1430
|
config.verify_security_context_enrichment_mode_is_allowed()
|
|
1431
|
+
config.validate_object_retentions()
|
|
1376
1432
|
|
|
1377
1433
|
|
|
1378
|
-
def _verify_gpu_requests_and_limits(
|
|
1434
|
+
def _verify_gpu_requests_and_limits(
|
|
1435
|
+
requests_gpu: typing.Optional[str] = None, limits_gpu: typing.Optional[str] = None
|
|
1436
|
+
):
|
|
1379
1437
|
# https://kubernetes.io/docs/tasks/manage-gpus/scheduling-gpus/
|
|
1380
1438
|
if requests_gpu and not limits_gpu:
|
|
1381
1439
|
raise mlrun.errors.MLRunConflictError(
|
|
@@ -1388,7 +1446,7 @@ def _verify_gpu_requests_and_limits(requests_gpu: str = None, limits_gpu: str =
|
|
|
1388
1446
|
)
|
|
1389
1447
|
|
|
1390
1448
|
|
|
1391
|
-
def _convert_resources_to_str(config: dict = None):
|
|
1449
|
+
def _convert_resources_to_str(config: typing.Optional[dict] = None):
|
|
1392
1450
|
resources_types = ["cpu", "memory", "gpu"]
|
|
1393
1451
|
resource_requirements = ["requests", "limits"]
|
|
1394
1452
|
if not config.get("default_function_pod_resources"):
|
mlrun/data_types/__init__.py
CHANGED
|
@@ -11,8 +11,6 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
#
|
|
15
|
-
# flake8: noqa - this is until we take care of the F401 violations with respect to __all__ & sphinx
|
|
16
14
|
|
|
17
15
|
from .data_types import (
|
|
18
16
|
InferOptions,
|
mlrun/data_types/data_types.py
CHANGED
mlrun/data_types/infer.py
CHANGED
|
@@ -12,6 +12,8 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
#
|
|
15
|
+
from typing import Optional
|
|
16
|
+
|
|
15
17
|
import numpy as np
|
|
16
18
|
import packaging.version
|
|
17
19
|
import pandas as pd
|
|
@@ -29,7 +31,7 @@ def infer_schema_from_df(
|
|
|
29
31
|
df: pd.DataFrame,
|
|
30
32
|
features,
|
|
31
33
|
entities,
|
|
32
|
-
timestamp_key: str = None,
|
|
34
|
+
timestamp_key: Optional[str] = None,
|
|
33
35
|
entity_columns=None,
|
|
34
36
|
options: InferOptions = InferOptions.Null,
|
|
35
37
|
):
|
mlrun/data_types/spark.py
CHANGED
|
@@ -14,11 +14,12 @@
|
|
|
14
14
|
#
|
|
15
15
|
from datetime import datetime
|
|
16
16
|
from os import environ
|
|
17
|
+
from typing import Optional
|
|
17
18
|
|
|
18
19
|
import numpy as np
|
|
19
20
|
import pytz
|
|
20
21
|
from pyspark.sql.functions import to_utc_timestamp
|
|
21
|
-
from pyspark.sql.types import BooleanType, DoubleType
|
|
22
|
+
from pyspark.sql.types import BooleanType, DoubleType
|
|
22
23
|
|
|
23
24
|
from mlrun.feature_store.retrieval.spark_merger import spark_df_to_pandas
|
|
24
25
|
from mlrun.utils import logger
|
|
@@ -35,7 +36,7 @@ def infer_schema_from_df_spark(
|
|
|
35
36
|
df,
|
|
36
37
|
features,
|
|
37
38
|
entities,
|
|
38
|
-
timestamp_key: str = None,
|
|
39
|
+
timestamp_key: Optional[str] = None,
|
|
39
40
|
entity_columns=None,
|
|
40
41
|
options: InferOptions = InferOptions.Null,
|
|
41
42
|
):
|
|
@@ -143,7 +144,8 @@ def get_df_stats_spark(df, options, num_bins=20, sample_size=None):
|
|
|
143
144
|
timestamp_columns = set()
|
|
144
145
|
boolean_columns = set()
|
|
145
146
|
for field in df_after_type_casts.schema.fields:
|
|
146
|
-
|
|
147
|
+
# covers TimestampType and TimestampNTZType, which was added in PySpark 3.4.0
|
|
148
|
+
is_timestamp = field.dataType.typeName().startswith("timestamp")
|
|
147
149
|
is_boolean = isinstance(field.dataType, BooleanType)
|
|
148
150
|
if is_timestamp:
|
|
149
151
|
df_after_type_casts = df_after_type_casts.withColumn(
|
mlrun/data_types/to_pandas.py
CHANGED
|
@@ -244,6 +244,15 @@ def _to_corrected_pandas_type(dt):
|
|
|
244
244
|
|
|
245
245
|
|
|
246
246
|
def spark_df_to_pandas(spark_df):
|
|
247
|
+
import pyspark
|
|
248
|
+
|
|
249
|
+
if semver.parse(pyspark.__version__) >= semver.Version(3, 5, 0):
|
|
250
|
+
|
|
251
|
+
def to_pandas(spark_df_inner):
|
|
252
|
+
return spark_df_inner.toPandas()
|
|
253
|
+
else:
|
|
254
|
+
to_pandas = _to_pandas
|
|
255
|
+
|
|
247
256
|
# as of pyspark 3.2.3, toPandas fails to convert timestamps unless we work around the issue
|
|
248
257
|
# when we upgrade pyspark, we should check whether this workaround is still necessary
|
|
249
258
|
# see https://stackoverflow.com/questions/76389694/transforming-pyspark-to-pandas-dataframe
|
|
@@ -262,9 +271,9 @@ def spark_df_to_pandas(spark_df):
|
|
|
262
271
|
)
|
|
263
272
|
type_conversion_dict[field.name] = "datetime64[ns]"
|
|
264
273
|
|
|
265
|
-
df =
|
|
274
|
+
df = to_pandas(spark_df)
|
|
266
275
|
if type_conversion_dict:
|
|
267
276
|
df = df.astype(type_conversion_dict)
|
|
268
277
|
return df
|
|
269
278
|
else:
|
|
270
|
-
return
|
|
279
|
+
return to_pandas(spark_df)
|
mlrun/datastore/__init__.py
CHANGED
|
@@ -12,8 +12,6 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
# flake8: noqa - this is until we take care of the F401 violations with respect to __all__ & sphinx
|
|
16
|
-
|
|
17
15
|
__all__ = [
|
|
18
16
|
"DataItem",
|
|
19
17
|
"get_store_resource",
|
|
@@ -32,6 +30,8 @@ __all__ = [
|
|
|
32
30
|
"DatabricksFileSystemDisableCache",
|
|
33
31
|
"DatabricksFileBugFixed",
|
|
34
32
|
"get_stream_pusher",
|
|
33
|
+
"ConfigProfile",
|
|
34
|
+
"VectorStoreCollection",
|
|
35
35
|
]
|
|
36
36
|
|
|
37
37
|
import fsspec
|
mlrun/datastore/alibaba_oss.py
CHANGED
|
@@ -15,6 +15,7 @@
|
|
|
15
15
|
import time
|
|
16
16
|
from datetime import datetime
|
|
17
17
|
from pathlib import Path
|
|
18
|
+
from typing import Optional
|
|
18
19
|
from urllib.parse import urlparse
|
|
19
20
|
|
|
20
21
|
import oss2
|
|
@@ -28,7 +29,9 @@ from .base import DataStore, FileStats, make_datastore_schema_sanitizer
|
|
|
28
29
|
class OSSStore(DataStore):
|
|
29
30
|
using_bucket = True
|
|
30
31
|
|
|
31
|
-
def __init__(
|
|
32
|
+
def __init__(
|
|
33
|
+
self, parent, schema, name, endpoint="", secrets: Optional[dict] = None
|
|
34
|
+
):
|
|
32
35
|
super().__init__(parent, name, schema, endpoint, secrets)
|
|
33
36
|
# will be used in case user asks to assume a role and work through fsspec
|
|
34
37
|
|
mlrun/datastore/azure_blob.py
CHANGED
|
@@ -14,6 +14,7 @@
|
|
|
14
14
|
|
|
15
15
|
import time
|
|
16
16
|
from pathlib import Path
|
|
17
|
+
from typing import Optional
|
|
17
18
|
from urllib.parse import urlparse
|
|
18
19
|
|
|
19
20
|
from azure.storage.blob import BlobServiceClient
|
|
@@ -36,7 +37,9 @@ class AzureBlobStore(DataStore):
|
|
|
36
37
|
1024 * 1024 * 8
|
|
37
38
|
) # for service_client property only, does not affect filesystem
|
|
38
39
|
|
|
39
|
-
def __init__(
|
|
40
|
+
def __init__(
|
|
41
|
+
self, parent, schema, name, endpoint="", secrets: Optional[dict] = None
|
|
42
|
+
):
|
|
40
43
|
super().__init__(parent, name, schema, endpoint, secrets=secrets)
|
|
41
44
|
self._service_client = None
|
|
42
45
|
self._storage_options = None
|
mlrun/datastore/base.py
CHANGED
|
@@ -48,7 +48,7 @@ class FileStats:
|
|
|
48
48
|
class DataStore:
|
|
49
49
|
using_bucket = False
|
|
50
50
|
|
|
51
|
-
def __init__(self, parent, name, kind, endpoint="", secrets: dict = None):
|
|
51
|
+
def __init__(self, parent, name, kind, endpoint="", secrets: Optional[dict] = None):
|
|
52
52
|
self._parent = parent
|
|
53
53
|
self.kind = kind
|
|
54
54
|
self.name = name
|
|
@@ -500,12 +500,18 @@ class DataItem:
|
|
|
500
500
|
"""DataItem url e.g. /dir/path, s3://bucket/path"""
|
|
501
501
|
return self._url
|
|
502
502
|
|
|
503
|
-
def get(
|
|
503
|
+
def get(
|
|
504
|
+
self,
|
|
505
|
+
size: Optional[int] = None,
|
|
506
|
+
offset: int = 0,
|
|
507
|
+
encoding: Optional[str] = None,
|
|
508
|
+
) -> Union[bytes, str]:
|
|
504
509
|
"""read all or a byte range and return the content
|
|
505
510
|
|
|
506
511
|
:param size: number of bytes to get
|
|
507
512
|
:param offset: fetch from offset (in bytes)
|
|
508
513
|
:param encoding: encoding (e.g. "utf-8") for converting bytes to str
|
|
514
|
+
:return: the bytes/str content
|
|
509
515
|
"""
|
|
510
516
|
body = self._store.get(self._path, size=size, offset=offset)
|
|
511
517
|
if encoding and isinstance(body, bytes):
|
|
@@ -519,7 +525,7 @@ class DataItem:
|
|
|
519
525
|
"""
|
|
520
526
|
self._store.download(self._path, target_path)
|
|
521
527
|
|
|
522
|
-
def put(self, data, append=False):
|
|
528
|
+
def put(self, data: Union[bytes, str], append: bool = False) -> None:
|
|
523
529
|
"""write/upload the data, append is only supported by some datastores
|
|
524
530
|
|
|
525
531
|
:param data: data (bytes/str) to write
|
|
@@ -687,7 +693,9 @@ def basic_auth_header(user, password):
|
|
|
687
693
|
|
|
688
694
|
|
|
689
695
|
class HttpStore(DataStore):
|
|
690
|
-
def __init__(
|
|
696
|
+
def __init__(
|
|
697
|
+
self, parent, schema, name, endpoint="", secrets: Optional[dict] = None
|
|
698
|
+
):
|
|
691
699
|
super().__init__(parent, name, schema, endpoint, secrets)
|
|
692
700
|
self._https_auth_token = None
|
|
693
701
|
self._schema = schema
|
mlrun/datastore/datastore.py
CHANGED
|
@@ -11,6 +11,7 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
+
from typing import Optional
|
|
14
15
|
from urllib.parse import urlparse
|
|
15
16
|
|
|
16
17
|
from mergedeep import merge
|
|
@@ -178,12 +179,17 @@ class StoreManager:
|
|
|
178
179
|
# which accepts a feature vector uri and generate the offline vector (parquet) for it if it doesnt exist
|
|
179
180
|
if not target and not allow_empty_resources:
|
|
180
181
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
181
|
-
f"
|
|
182
|
+
f"Resource {url} does not have a valid/persistent offline target"
|
|
182
183
|
)
|
|
183
184
|
return resource, target or ""
|
|
184
185
|
|
|
185
186
|
def object(
|
|
186
|
-
self,
|
|
187
|
+
self,
|
|
188
|
+
url,
|
|
189
|
+
key="",
|
|
190
|
+
project="",
|
|
191
|
+
allow_empty_resources=None,
|
|
192
|
+
secrets: Optional[dict] = None,
|
|
187
193
|
) -> DataItem:
|
|
188
194
|
meta = artifact_url = None
|
|
189
195
|
if is_store_uri(url):
|
|
@@ -205,7 +211,7 @@ class StoreManager:
|
|
|
205
211
|
)
|
|
206
212
|
|
|
207
213
|
def get_or_create_store(
|
|
208
|
-
self, url, secrets: dict = None, project_name=""
|
|
214
|
+
self, url, secrets: Optional[dict] = None, project_name=""
|
|
209
215
|
) -> (DataStore, str, str):
|
|
210
216
|
schema, endpoint, parsed_url = parse_url(url)
|
|
211
217
|
subpath = parsed_url.path
|