mlrun 1.7.2rc4__py3-none-any.whl → 1.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +26 -22
- mlrun/__main__.py +15 -16
- mlrun/alerts/alert.py +150 -15
- mlrun/api/schemas/__init__.py +1 -9
- mlrun/artifacts/__init__.py +2 -3
- mlrun/artifacts/base.py +62 -19
- mlrun/artifacts/dataset.py +17 -17
- mlrun/artifacts/document.py +454 -0
- mlrun/artifacts/manager.py +28 -18
- mlrun/artifacts/model.py +91 -59
- mlrun/artifacts/plots.py +2 -2
- mlrun/common/constants.py +8 -0
- mlrun/common/formatters/__init__.py +1 -0
- mlrun/common/formatters/artifact.py +1 -1
- mlrun/common/formatters/feature_set.py +2 -0
- mlrun/common/formatters/function.py +1 -0
- mlrun/{model_monitoring/db/stores/v3io_kv/__init__.py → common/formatters/model_endpoint.py} +17 -0
- mlrun/common/formatters/pipeline.py +1 -2
- mlrun/common/formatters/project.py +9 -0
- mlrun/common/model_monitoring/__init__.py +0 -5
- mlrun/common/model_monitoring/helpers.py +12 -62
- mlrun/common/runtimes/constants.py +25 -4
- mlrun/common/schemas/__init__.py +9 -5
- mlrun/common/schemas/alert.py +114 -19
- mlrun/common/schemas/api_gateway.py +3 -3
- mlrun/common/schemas/artifact.py +22 -9
- mlrun/common/schemas/auth.py +8 -4
- mlrun/common/schemas/background_task.py +7 -7
- mlrun/common/schemas/client_spec.py +4 -4
- mlrun/common/schemas/clusterization_spec.py +2 -2
- mlrun/common/schemas/common.py +53 -3
- mlrun/common/schemas/constants.py +15 -0
- mlrun/common/schemas/datastore_profile.py +1 -1
- mlrun/common/schemas/feature_store.py +9 -9
- mlrun/common/schemas/frontend_spec.py +4 -4
- mlrun/common/schemas/function.py +10 -10
- mlrun/common/schemas/hub.py +1 -1
- mlrun/common/schemas/k8s.py +3 -3
- mlrun/common/schemas/memory_reports.py +3 -3
- mlrun/common/schemas/model_monitoring/__init__.py +4 -8
- mlrun/common/schemas/model_monitoring/constants.py +127 -46
- mlrun/common/schemas/model_monitoring/grafana.py +18 -12
- mlrun/common/schemas/model_monitoring/model_endpoints.py +154 -160
- mlrun/common/schemas/notification.py +24 -3
- mlrun/common/schemas/object.py +1 -1
- mlrun/common/schemas/pagination.py +4 -4
- mlrun/common/schemas/partition.py +142 -0
- mlrun/common/schemas/pipeline.py +3 -3
- mlrun/common/schemas/project.py +26 -18
- mlrun/common/schemas/runs.py +3 -3
- mlrun/common/schemas/runtime_resource.py +5 -5
- mlrun/common/schemas/schedule.py +1 -1
- mlrun/common/schemas/secret.py +1 -1
- mlrun/{model_monitoring/db/stores/sqldb/__init__.py → common/schemas/serving.py} +10 -1
- mlrun/common/schemas/tag.py +3 -3
- mlrun/common/schemas/workflow.py +6 -5
- mlrun/common/types.py +1 -0
- mlrun/config.py +157 -89
- mlrun/data_types/__init__.py +5 -3
- mlrun/data_types/infer.py +13 -3
- mlrun/data_types/spark.py +2 -1
- mlrun/datastore/__init__.py +59 -18
- mlrun/datastore/alibaba_oss.py +4 -1
- mlrun/datastore/azure_blob.py +4 -1
- mlrun/datastore/base.py +19 -24
- mlrun/datastore/datastore.py +10 -4
- mlrun/datastore/datastore_profile.py +178 -45
- mlrun/datastore/dbfs_store.py +4 -1
- mlrun/datastore/filestore.py +4 -1
- mlrun/datastore/google_cloud_storage.py +4 -1
- mlrun/datastore/hdfs.py +4 -1
- mlrun/datastore/inmem.py +4 -1
- mlrun/datastore/redis.py +4 -1
- mlrun/datastore/s3.py +14 -3
- mlrun/datastore/sources.py +89 -92
- mlrun/datastore/store_resources.py +7 -4
- mlrun/datastore/storeytargets.py +51 -16
- mlrun/datastore/targets.py +38 -31
- mlrun/datastore/utils.py +87 -4
- mlrun/datastore/v3io.py +4 -1
- mlrun/datastore/vectorstore.py +291 -0
- mlrun/datastore/wasbfs/fs.py +13 -12
- mlrun/db/base.py +286 -100
- mlrun/db/httpdb.py +1562 -490
- mlrun/db/nopdb.py +250 -83
- mlrun/errors.py +6 -2
- mlrun/execution.py +194 -50
- mlrun/feature_store/__init__.py +2 -10
- mlrun/feature_store/api.py +20 -458
- mlrun/feature_store/common.py +9 -9
- mlrun/feature_store/feature_set.py +20 -18
- mlrun/feature_store/feature_vector.py +105 -479
- mlrun/feature_store/feature_vector_utils.py +466 -0
- mlrun/feature_store/retrieval/base.py +15 -11
- mlrun/feature_store/retrieval/job.py +2 -1
- mlrun/feature_store/retrieval/storey_merger.py +1 -1
- mlrun/feature_store/steps.py +3 -3
- mlrun/features.py +30 -13
- mlrun/frameworks/__init__.py +1 -2
- mlrun/frameworks/_common/__init__.py +1 -2
- mlrun/frameworks/_common/artifacts_library.py +2 -2
- mlrun/frameworks/_common/mlrun_interface.py +10 -6
- mlrun/frameworks/_common/model_handler.py +31 -31
- mlrun/frameworks/_common/producer.py +3 -1
- mlrun/frameworks/_dl_common/__init__.py +1 -2
- mlrun/frameworks/_dl_common/loggers/__init__.py +1 -2
- mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +4 -4
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +3 -3
- mlrun/frameworks/_ml_common/__init__.py +1 -2
- mlrun/frameworks/_ml_common/loggers/__init__.py +1 -2
- mlrun/frameworks/_ml_common/model_handler.py +21 -21
- mlrun/frameworks/_ml_common/plans/__init__.py +1 -2
- mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +3 -1
- mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
- mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
- mlrun/frameworks/auto_mlrun/__init__.py +1 -2
- mlrun/frameworks/auto_mlrun/auto_mlrun.py +22 -15
- mlrun/frameworks/huggingface/__init__.py +1 -2
- mlrun/frameworks/huggingface/model_server.py +9 -9
- mlrun/frameworks/lgbm/__init__.py +47 -44
- mlrun/frameworks/lgbm/callbacks/__init__.py +1 -2
- mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -2
- mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -2
- mlrun/frameworks/lgbm/mlrun_interfaces/__init__.py +1 -2
- mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +5 -5
- mlrun/frameworks/lgbm/model_handler.py +15 -11
- mlrun/frameworks/lgbm/model_server.py +11 -7
- mlrun/frameworks/lgbm/utils.py +2 -2
- mlrun/frameworks/onnx/__init__.py +1 -2
- mlrun/frameworks/onnx/dataset.py +3 -3
- mlrun/frameworks/onnx/mlrun_interface.py +2 -2
- mlrun/frameworks/onnx/model_handler.py +7 -5
- mlrun/frameworks/onnx/model_server.py +8 -6
- mlrun/frameworks/parallel_coordinates.py +11 -11
- mlrun/frameworks/pytorch/__init__.py +22 -23
- mlrun/frameworks/pytorch/callbacks/__init__.py +1 -2
- mlrun/frameworks/pytorch/callbacks/callback.py +2 -1
- mlrun/frameworks/pytorch/callbacks/logging_callback.py +15 -8
- mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +19 -12
- mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +22 -15
- mlrun/frameworks/pytorch/callbacks_handler.py +36 -30
- mlrun/frameworks/pytorch/mlrun_interface.py +17 -17
- mlrun/frameworks/pytorch/model_handler.py +21 -17
- mlrun/frameworks/pytorch/model_server.py +13 -9
- mlrun/frameworks/sklearn/__init__.py +19 -18
- mlrun/frameworks/sklearn/estimator.py +2 -2
- mlrun/frameworks/sklearn/metric.py +3 -3
- mlrun/frameworks/sklearn/metrics_library.py +8 -6
- mlrun/frameworks/sklearn/mlrun_interface.py +3 -2
- mlrun/frameworks/sklearn/model_handler.py +4 -3
- mlrun/frameworks/tf_keras/__init__.py +11 -12
- mlrun/frameworks/tf_keras/callbacks/__init__.py +1 -2
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +17 -14
- mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +15 -12
- mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +21 -18
- mlrun/frameworks/tf_keras/model_handler.py +17 -13
- mlrun/frameworks/tf_keras/model_server.py +12 -8
- mlrun/frameworks/xgboost/__init__.py +19 -18
- mlrun/frameworks/xgboost/model_handler.py +13 -9
- mlrun/k8s_utils.py +2 -5
- mlrun/launcher/base.py +3 -4
- mlrun/launcher/client.py +2 -2
- mlrun/launcher/local.py +6 -2
- mlrun/launcher/remote.py +1 -1
- mlrun/lists.py +8 -4
- mlrun/model.py +132 -46
- mlrun/model_monitoring/__init__.py +3 -5
- mlrun/model_monitoring/api.py +113 -98
- mlrun/model_monitoring/applications/__init__.py +0 -5
- mlrun/model_monitoring/applications/_application_steps.py +81 -50
- mlrun/model_monitoring/applications/base.py +467 -14
- mlrun/model_monitoring/applications/context.py +212 -134
- mlrun/model_monitoring/{db/stores/base → applications/evidently}/__init__.py +6 -2
- mlrun/model_monitoring/applications/evidently/base.py +146 -0
- mlrun/model_monitoring/applications/histogram_data_drift.py +89 -56
- mlrun/model_monitoring/applications/results.py +67 -15
- mlrun/model_monitoring/controller.py +701 -315
- mlrun/model_monitoring/db/__init__.py +0 -2
- mlrun/model_monitoring/db/_schedules.py +242 -0
- mlrun/model_monitoring/db/_stats.py +189 -0
- mlrun/model_monitoring/db/tsdb/__init__.py +33 -22
- mlrun/model_monitoring/db/tsdb/base.py +243 -49
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +76 -36
- mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +33 -0
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connection.py +213 -0
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +534 -88
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +1 -0
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +436 -106
- mlrun/model_monitoring/helpers.py +356 -114
- mlrun/model_monitoring/stream_processing.py +190 -345
- mlrun/model_monitoring/tracking_policy.py +11 -4
- mlrun/model_monitoring/writer.py +49 -90
- mlrun/package/__init__.py +3 -6
- mlrun/package/context_handler.py +2 -2
- mlrun/package/packager.py +12 -9
- mlrun/package/packagers/__init__.py +0 -2
- mlrun/package/packagers/default_packager.py +14 -11
- mlrun/package/packagers/numpy_packagers.py +16 -7
- mlrun/package/packagers/pandas_packagers.py +18 -18
- mlrun/package/packagers/python_standard_library_packagers.py +25 -11
- mlrun/package/packagers_manager.py +35 -32
- mlrun/package/utils/__init__.py +0 -3
- mlrun/package/utils/_pickler.py +6 -6
- mlrun/platforms/__init__.py +47 -16
- mlrun/platforms/iguazio.py +4 -1
- mlrun/projects/operations.py +30 -30
- mlrun/projects/pipelines.py +116 -47
- mlrun/projects/project.py +1292 -329
- mlrun/render.py +5 -9
- mlrun/run.py +57 -14
- mlrun/runtimes/__init__.py +1 -3
- mlrun/runtimes/base.py +30 -22
- mlrun/runtimes/daskjob.py +9 -9
- mlrun/runtimes/databricks_job/databricks_runtime.py +6 -5
- mlrun/runtimes/function_reference.py +5 -2
- mlrun/runtimes/generators.py +3 -2
- mlrun/runtimes/kubejob.py +6 -7
- mlrun/runtimes/mounts.py +574 -0
- mlrun/runtimes/mpijob/__init__.py +0 -2
- mlrun/runtimes/mpijob/abstract.py +7 -6
- mlrun/runtimes/nuclio/api_gateway.py +7 -7
- mlrun/runtimes/nuclio/application/application.py +11 -13
- mlrun/runtimes/nuclio/application/reverse_proxy.go +66 -64
- mlrun/runtimes/nuclio/function.py +127 -70
- mlrun/runtimes/nuclio/serving.py +105 -37
- mlrun/runtimes/pod.py +159 -54
- mlrun/runtimes/remotesparkjob.py +3 -2
- mlrun/runtimes/sparkjob/__init__.py +0 -2
- mlrun/runtimes/sparkjob/spark3job.py +22 -12
- mlrun/runtimes/utils.py +7 -6
- mlrun/secrets.py +2 -2
- mlrun/serving/__init__.py +8 -0
- mlrun/serving/merger.py +7 -5
- mlrun/serving/remote.py +35 -22
- mlrun/serving/routers.py +186 -240
- mlrun/serving/server.py +41 -10
- mlrun/serving/states.py +432 -118
- mlrun/serving/utils.py +13 -2
- mlrun/serving/v1_serving.py +3 -2
- mlrun/serving/v2_serving.py +161 -203
- mlrun/track/__init__.py +1 -1
- mlrun/track/tracker.py +2 -2
- mlrun/track/trackers/mlflow_tracker.py +6 -5
- mlrun/utils/async_http.py +35 -22
- mlrun/utils/clones.py +7 -4
- mlrun/utils/helpers.py +511 -58
- mlrun/utils/logger.py +119 -13
- mlrun/utils/notifications/notification/__init__.py +22 -19
- mlrun/utils/notifications/notification/base.py +39 -15
- mlrun/utils/notifications/notification/console.py +6 -6
- mlrun/utils/notifications/notification/git.py +11 -11
- mlrun/utils/notifications/notification/ipython.py +10 -9
- mlrun/utils/notifications/notification/mail.py +176 -0
- mlrun/utils/notifications/notification/slack.py +16 -8
- mlrun/utils/notifications/notification/webhook.py +24 -8
- mlrun/utils/notifications/notification_pusher.py +191 -200
- mlrun/utils/regex.py +12 -2
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.2rc4.dist-info → mlrun-1.8.0.dist-info}/METADATA +69 -54
- mlrun-1.8.0.dist-info/RECORD +351 -0
- {mlrun-1.7.2rc4.dist-info → mlrun-1.8.0.dist-info}/WHEEL +1 -1
- mlrun/model_monitoring/applications/evidently_base.py +0 -137
- mlrun/model_monitoring/db/stores/__init__.py +0 -136
- mlrun/model_monitoring/db/stores/base/store.py +0 -213
- mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +0 -71
- mlrun/model_monitoring/db/stores/sqldb/models/base.py +0 -190
- mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +0 -103
- mlrun/model_monitoring/db/stores/sqldb/models/sqlite.py +0 -40
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +0 -659
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +0 -726
- mlrun/model_monitoring/model_endpoint.py +0 -118
- mlrun-1.7.2rc4.dist-info/RECORD +0 -351
- {mlrun-1.7.2rc4.dist-info → mlrun-1.8.0.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.2rc4.dist-info → mlrun-1.8.0.dist-info/licenses}/LICENSE +0 -0
- {mlrun-1.7.2rc4.dist-info → mlrun-1.8.0.dist-info}/top_level.txt +0 -0
mlrun/datastore/inmem.py
CHANGED
|
@@ -17,6 +17,7 @@ from io import BytesIO, StringIO
|
|
|
17
17
|
import pandas as pd
|
|
18
18
|
|
|
19
19
|
import mlrun
|
|
20
|
+
import mlrun.utils.helpers
|
|
20
21
|
|
|
21
22
|
from .base import DataStore, FileStats
|
|
22
23
|
|
|
@@ -35,7 +36,9 @@ class InMemoryStore(DataStore):
|
|
|
35
36
|
|
|
36
37
|
def _get_item(self, key):
|
|
37
38
|
if key not in self._items:
|
|
38
|
-
raise
|
|
39
|
+
raise mlrun.errors.MLRunNotFoundError(
|
|
40
|
+
f"item {key} not found in memory store"
|
|
41
|
+
)
|
|
39
42
|
return self._items[key]
|
|
40
43
|
|
|
41
44
|
def get(self, key, size=None, offset=0):
|
mlrun/datastore/redis.py
CHANGED
|
@@ -12,6 +12,7 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
+
from typing import Optional
|
|
15
16
|
from urllib.parse import urlparse
|
|
16
17
|
|
|
17
18
|
import redis
|
|
@@ -30,7 +31,9 @@ class RedisStore(DataStore):
|
|
|
30
31
|
- key and value sizes are limited to 512MB
|
|
31
32
|
"""
|
|
32
33
|
|
|
33
|
-
def __init__(
|
|
34
|
+
def __init__(
|
|
35
|
+
self, parent, schema, name, endpoint="", secrets: Optional[dict] = None
|
|
36
|
+
):
|
|
34
37
|
redis_default_port = "6379"
|
|
35
38
|
super().__init__(parent, name, schema, endpoint, secrets=secrets)
|
|
36
39
|
self.headers = None
|
mlrun/datastore/s3.py
CHANGED
|
@@ -13,6 +13,7 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
import time
|
|
16
|
+
from typing import Optional
|
|
16
17
|
|
|
17
18
|
import boto3
|
|
18
19
|
from boto3.s3.transfer import TransferConfig
|
|
@@ -20,13 +21,15 @@ from fsspec.registry import get_filesystem_class
|
|
|
20
21
|
|
|
21
22
|
import mlrun.errors
|
|
22
23
|
|
|
23
|
-
from .base import DataStore, FileStats,
|
|
24
|
+
from .base import DataStore, FileStats, make_datastore_schema_sanitizer
|
|
24
25
|
|
|
25
26
|
|
|
26
27
|
class S3Store(DataStore):
|
|
27
28
|
using_bucket = True
|
|
28
29
|
|
|
29
|
-
def __init__(
|
|
30
|
+
def __init__(
|
|
31
|
+
self, parent, schema, name, endpoint="", secrets: Optional[dict] = None
|
|
32
|
+
):
|
|
30
33
|
super().__init__(parent, name, schema, endpoint, secrets)
|
|
31
34
|
# will be used in case user asks to assume a role and work through fsspec
|
|
32
35
|
self._temp_credentials = None
|
|
@@ -105,6 +108,13 @@ class S3Store(DataStore):
|
|
|
105
108
|
"choose-signer.s3.*", disable_signing
|
|
106
109
|
)
|
|
107
110
|
|
|
111
|
+
@staticmethod
|
|
112
|
+
def get_range(size, offset):
|
|
113
|
+
byterange = f"bytes={offset}-"
|
|
114
|
+
if size:
|
|
115
|
+
byterange += str(offset + size - 1)
|
|
116
|
+
return byterange
|
|
117
|
+
|
|
108
118
|
def get_spark_options(self):
|
|
109
119
|
res = {}
|
|
110
120
|
st = self.get_storage_options()
|
|
@@ -155,6 +165,7 @@ class S3Store(DataStore):
|
|
|
155
165
|
key=access_key_id,
|
|
156
166
|
secret=secret,
|
|
157
167
|
token=token,
|
|
168
|
+
use_listings_cache=False,
|
|
158
169
|
)
|
|
159
170
|
|
|
160
171
|
if endpoint_url:
|
|
@@ -182,7 +193,7 @@ class S3Store(DataStore):
|
|
|
182
193
|
bucket, key = self.get_bucket_and_key(key)
|
|
183
194
|
obj = self.s3.Object(bucket, key)
|
|
184
195
|
if size or offset:
|
|
185
|
-
return obj.get(Range=get_range(size, offset))["Body"].read()
|
|
196
|
+
return obj.get(Range=S3Store.get_range(size, offset))["Body"].read()
|
|
186
197
|
return obj.get()["Body"].read()
|
|
187
198
|
|
|
188
199
|
def put(self, key, data, append=False):
|
mlrun/datastore/sources.py
CHANGED
|
@@ -18,7 +18,7 @@ import warnings
|
|
|
18
18
|
from base64 import b64encode
|
|
19
19
|
from copy import copy
|
|
20
20
|
from datetime import datetime
|
|
21
|
-
from typing import Optional, Union
|
|
21
|
+
from typing import Any, Optional, Union
|
|
22
22
|
|
|
23
23
|
import pandas as pd
|
|
24
24
|
import semver
|
|
@@ -34,6 +34,7 @@ from mlrun.datastore.utils import transform_list_filters_to_tuple
|
|
|
34
34
|
from mlrun.secrets import SecretsStore
|
|
35
35
|
from mlrun.utils import logger
|
|
36
36
|
|
|
37
|
+
from ..common.schemas.function import Function
|
|
37
38
|
from ..model import DataSource
|
|
38
39
|
from ..platforms.iguazio import parse_path
|
|
39
40
|
from ..utils import get_class, is_explicit_ack_supported
|
|
@@ -181,10 +182,10 @@ class CSVSource(BaseSourceDriver):
|
|
|
181
182
|
def __init__(
|
|
182
183
|
self,
|
|
183
184
|
name: str = "",
|
|
184
|
-
path: str = None,
|
|
185
|
-
attributes: dict[str, object] = None,
|
|
186
|
-
key_field: str = None,
|
|
187
|
-
schedule: str = None,
|
|
185
|
+
path: Optional[str] = None,
|
|
186
|
+
attributes: Optional[dict[str, object]] = None,
|
|
187
|
+
key_field: Optional[str] = None,
|
|
188
|
+
schedule: Optional[str] = None,
|
|
188
189
|
parse_dates: Union[None, int, str, list[int], list[str]] = None,
|
|
189
190
|
**kwargs,
|
|
190
191
|
):
|
|
@@ -308,11 +309,11 @@ class ParquetSource(BaseSourceDriver):
|
|
|
308
309
|
def __init__(
|
|
309
310
|
self,
|
|
310
311
|
name: str = "",
|
|
311
|
-
path: str = None,
|
|
312
|
-
attributes: dict[str, object] = None,
|
|
313
|
-
key_field: str = None,
|
|
314
|
-
time_field: str = None,
|
|
315
|
-
schedule: str = None,
|
|
312
|
+
path: Optional[str] = None,
|
|
313
|
+
attributes: Optional[dict[str, object]] = None,
|
|
314
|
+
key_field: Optional[str] = None,
|
|
315
|
+
time_field: Optional[str] = None,
|
|
316
|
+
schedule: Optional[str] = None,
|
|
316
317
|
start_time: Optional[Union[datetime, str]] = None,
|
|
317
318
|
end_time: Optional[Union[datetime, str]] = None,
|
|
318
319
|
additional_filters: Optional[list[Union[tuple, list]]] = None,
|
|
@@ -392,7 +393,9 @@ class ParquetSource(BaseSourceDriver):
|
|
|
392
393
|
)
|
|
393
394
|
|
|
394
395
|
@classmethod
|
|
395
|
-
def from_dict(
|
|
396
|
+
def from_dict(
|
|
397
|
+
cls, struct=None, fields=None, deprecated_fields: Optional[dict] = None
|
|
398
|
+
):
|
|
396
399
|
new_obj = super().from_dict(
|
|
397
400
|
struct=struct, fields=fields, deprecated_fields=deprecated_fields
|
|
398
401
|
)
|
|
@@ -564,18 +567,18 @@ class BigQuerySource(BaseSourceDriver):
|
|
|
564
567
|
def __init__(
|
|
565
568
|
self,
|
|
566
569
|
name: str = "",
|
|
567
|
-
table: str = None,
|
|
568
|
-
max_results_for_table: int = None,
|
|
569
|
-
query: str = None,
|
|
570
|
-
materialization_dataset: str = None,
|
|
571
|
-
chunksize: int = None,
|
|
572
|
-
key_field: str = None,
|
|
573
|
-
time_field: str = None,
|
|
574
|
-
schedule: str = None,
|
|
570
|
+
table: Optional[str] = None,
|
|
571
|
+
max_results_for_table: Optional[int] = None,
|
|
572
|
+
query: Optional[str] = None,
|
|
573
|
+
materialization_dataset: Optional[str] = None,
|
|
574
|
+
chunksize: Optional[int] = None,
|
|
575
|
+
key_field: Optional[str] = None,
|
|
576
|
+
time_field: Optional[str] = None,
|
|
577
|
+
schedule: Optional[str] = None,
|
|
575
578
|
start_time=None,
|
|
576
579
|
end_time=None,
|
|
577
|
-
gcp_project: str = None,
|
|
578
|
-
spark_options: dict = None,
|
|
580
|
+
gcp_project: Optional[str] = None,
|
|
581
|
+
spark_options: Optional[dict] = None,
|
|
579
582
|
**kwargs,
|
|
580
583
|
):
|
|
581
584
|
if query and table:
|
|
@@ -776,27 +779,27 @@ class SnowflakeSource(BaseSourceDriver):
|
|
|
776
779
|
def __init__(
|
|
777
780
|
self,
|
|
778
781
|
name: str = "",
|
|
779
|
-
key_field: str = None,
|
|
780
|
-
attributes: dict[str, object] = None,
|
|
781
|
-
time_field: str = None,
|
|
782
|
-
schedule: str = None,
|
|
782
|
+
key_field: Optional[str] = None,
|
|
783
|
+
attributes: Optional[dict[str, object]] = None,
|
|
784
|
+
time_field: Optional[str] = None,
|
|
785
|
+
schedule: Optional[str] = None,
|
|
783
786
|
start_time=None,
|
|
784
787
|
end_time=None,
|
|
785
|
-
query: str = None,
|
|
786
|
-
url: str = None,
|
|
787
|
-
user: str = None,
|
|
788
|
-
database: str = None,
|
|
789
|
-
schema: str = None,
|
|
790
|
-
db_schema: str = None,
|
|
791
|
-
warehouse: str = None,
|
|
788
|
+
query: Optional[str] = None,
|
|
789
|
+
url: Optional[str] = None,
|
|
790
|
+
user: Optional[str] = None,
|
|
791
|
+
database: Optional[str] = None,
|
|
792
|
+
schema: Optional[str] = None,
|
|
793
|
+
db_schema: Optional[str] = None,
|
|
794
|
+
warehouse: Optional[str] = None,
|
|
792
795
|
**kwargs,
|
|
793
796
|
):
|
|
794
|
-
# TODO: Remove in 1.
|
|
797
|
+
# TODO: Remove in 1.10.0
|
|
795
798
|
if schema:
|
|
796
799
|
warnings.warn(
|
|
797
|
-
"schema is deprecated in 1.7.0, and will be removed in 1.
|
|
800
|
+
"schema is deprecated in 1.7.0, and will be removed in 1.10.0, please use db_schema"
|
|
798
801
|
)
|
|
799
|
-
db_schema = db_schema or schema # TODO: Remove in 1.
|
|
802
|
+
db_schema = db_schema or schema # TODO: Remove in 1.10.0
|
|
800
803
|
|
|
801
804
|
attributes = attributes or {}
|
|
802
805
|
if url:
|
|
@@ -850,9 +853,9 @@ class CustomSource(BaseSourceDriver):
|
|
|
850
853
|
|
|
851
854
|
def __init__(
|
|
852
855
|
self,
|
|
853
|
-
class_name: str = None,
|
|
856
|
+
class_name: Optional[str] = None,
|
|
854
857
|
name: str = "",
|
|
855
|
-
schedule: str = None,
|
|
858
|
+
schedule: Optional[str] = None,
|
|
856
859
|
**attributes,
|
|
857
860
|
):
|
|
858
861
|
attributes = attributes or {}
|
|
@@ -930,12 +933,12 @@ class OnlineSource(BaseSourceDriver):
|
|
|
930
933
|
|
|
931
934
|
def __init__(
|
|
932
935
|
self,
|
|
933
|
-
name: str = None,
|
|
934
|
-
path: str = None,
|
|
935
|
-
attributes: dict[str, object] = None,
|
|
936
|
-
key_field: str = None,
|
|
937
|
-
time_field: str = None,
|
|
938
|
-
workers: int = None,
|
|
936
|
+
name: Optional[str] = None,
|
|
937
|
+
path: Optional[str] = None,
|
|
938
|
+
attributes: Optional[dict[str, object]] = None,
|
|
939
|
+
key_field: Optional[str] = None,
|
|
940
|
+
time_field: Optional[str] = None,
|
|
941
|
+
workers: Optional[int] = None,
|
|
939
942
|
):
|
|
940
943
|
super().__init__(name, path, attributes, key_field, time_field)
|
|
941
944
|
self.online = True
|
|
@@ -949,8 +952,7 @@ class OnlineSource(BaseSourceDriver):
|
|
|
949
952
|
is_explicit_ack_supported(context)
|
|
950
953
|
and mlrun.mlconf.is_explicit_ack_enabled()
|
|
951
954
|
)
|
|
952
|
-
|
|
953
|
-
src_class = storey.SyncEmitSource(
|
|
955
|
+
src_class = storey.AsyncEmitSource(
|
|
954
956
|
context=context,
|
|
955
957
|
key_field=self.key_field or key_field,
|
|
956
958
|
full_event=True,
|
|
@@ -965,6 +967,21 @@ class OnlineSource(BaseSourceDriver):
|
|
|
965
967
|
"This source type is not supported with ingestion service yet"
|
|
966
968
|
)
|
|
967
969
|
|
|
970
|
+
@staticmethod
|
|
971
|
+
def set_explicit_ack_mode(function: Function, **extra_arguments) -> dict[str, Any]:
|
|
972
|
+
extra_arguments = extra_arguments or {}
|
|
973
|
+
engine = "sync"
|
|
974
|
+
if function.spec and hasattr(function.spec, "graph"):
|
|
975
|
+
engine = getattr(function.spec.graph, "engine", None) or engine
|
|
976
|
+
if mlrun.mlconf.is_explicit_ack_enabled() and engine == "async":
|
|
977
|
+
extra_arguments["explicit_ack_mode"] = extra_arguments.get(
|
|
978
|
+
"explicit_ack_mode", "explicitOnly"
|
|
979
|
+
)
|
|
980
|
+
extra_arguments["worker_allocation_mode"] = extra_arguments.get(
|
|
981
|
+
"worker_allocation_mode", "static"
|
|
982
|
+
)
|
|
983
|
+
return extra_arguments
|
|
984
|
+
|
|
968
985
|
|
|
969
986
|
class HttpSource(OnlineSource):
|
|
970
987
|
kind = "http"
|
|
@@ -986,7 +1003,7 @@ class StreamSource(OnlineSource):
|
|
|
986
1003
|
seek_to="earliest",
|
|
987
1004
|
shards=1,
|
|
988
1005
|
retention_in_hours=24,
|
|
989
|
-
extra_attributes: dict = None,
|
|
1006
|
+
extra_attributes: Optional[dict] = None,
|
|
990
1007
|
**kwargs,
|
|
991
1008
|
):
|
|
992
1009
|
"""
|
|
@@ -1027,15 +1044,7 @@ class StreamSource(OnlineSource):
|
|
|
1027
1044
|
raise_for_status=v3io.dataplane.RaiseForStatus.never,
|
|
1028
1045
|
)
|
|
1029
1046
|
res.raise_for_status([409, 204])
|
|
1030
|
-
|
|
1031
|
-
kwargs = {}
|
|
1032
|
-
engine = "async"
|
|
1033
|
-
if hasattr(function.spec, "graph") and function.spec.graph.engine:
|
|
1034
|
-
engine = function.spec.graph.engine
|
|
1035
|
-
|
|
1036
|
-
if mlrun.mlconf.is_explicit_ack_enabled() and engine == "async":
|
|
1037
|
-
kwargs["explicit_ack_mode"] = "explicitOnly"
|
|
1038
|
-
kwargs["worker_allocation_mode"] = "static"
|
|
1047
|
+
kwargs = self.set_explicit_ack_mode(function=function)
|
|
1039
1048
|
|
|
1040
1049
|
function.add_v3io_stream_trigger(
|
|
1041
1050
|
url,
|
|
@@ -1086,12 +1095,9 @@ class KafkaSource(OnlineSource):
|
|
|
1086
1095
|
attributes["initial_offset"] = initial_offset
|
|
1087
1096
|
if partitions is not None:
|
|
1088
1097
|
attributes["partitions"] = partitions
|
|
1089
|
-
sasl
|
|
1090
|
-
|
|
1091
|
-
|
|
1092
|
-
sasl["user"] = sasl_user
|
|
1093
|
-
sasl["password"] = sasl_pass
|
|
1094
|
-
if sasl:
|
|
1098
|
+
if sasl := mlrun.datastore.utils.KafkaParameters(attributes).sasl(
|
|
1099
|
+
usr=sasl_user, pwd=sasl_pass
|
|
1100
|
+
):
|
|
1095
1101
|
attributes["sasl"] = sasl
|
|
1096
1102
|
super().__init__(attributes=attributes, **kwargs)
|
|
1097
1103
|
|
|
@@ -1116,18 +1122,15 @@ class KafkaSource(OnlineSource):
|
|
|
1116
1122
|
else:
|
|
1117
1123
|
extra_attributes = copy(self.attributes)
|
|
1118
1124
|
partitions = extra_attributes.pop("partitions", None)
|
|
1119
|
-
explicit_ack_mode = None
|
|
1120
|
-
engine = "async"
|
|
1121
|
-
if hasattr(function.spec, "graph") and function.spec.graph.engine:
|
|
1122
|
-
engine = function.spec.graph.engine
|
|
1123
1125
|
|
|
1124
|
-
|
|
1125
|
-
|
|
1126
|
-
|
|
1127
|
-
|
|
1128
|
-
|
|
1126
|
+
extra_attributes = self.set_explicit_ack_mode(function, **extra_attributes)
|
|
1127
|
+
explicit_ack_mode = extra_attributes.get("explicit_ack_mode")
|
|
1128
|
+
extra_attributes["workerAllocationMode"] = extra_attributes.get(
|
|
1129
|
+
"worker_allocation_mode", "pool"
|
|
1130
|
+
)
|
|
1129
1131
|
|
|
1130
1132
|
trigger_kwargs = {}
|
|
1133
|
+
|
|
1131
1134
|
if "max_workers" in extra_attributes:
|
|
1132
1135
|
trigger_kwargs = {"max_workers": extra_attributes.pop("max_workers")}
|
|
1133
1136
|
|
|
@@ -1168,7 +1171,7 @@ class KafkaSource(OnlineSource):
|
|
|
1168
1171
|
self,
|
|
1169
1172
|
num_partitions: int = 4,
|
|
1170
1173
|
replication_factor: int = 1,
|
|
1171
|
-
topics: list[str] = None,
|
|
1174
|
+
topics: Optional[list[str]] = None,
|
|
1172
1175
|
):
|
|
1173
1176
|
"""
|
|
1174
1177
|
Create Kafka topics with the specified number of partitions and replication factor.
|
|
@@ -1193,19 +1196,13 @@ class KafkaSource(OnlineSource):
|
|
|
1193
1196
|
new_topics = [
|
|
1194
1197
|
NewTopic(topic, num_partitions, replication_factor) for topic in topics
|
|
1195
1198
|
]
|
|
1196
|
-
|
|
1197
|
-
|
|
1198
|
-
|
|
1199
|
-
|
|
1200
|
-
|
|
1201
|
-
|
|
1202
|
-
|
|
1203
|
-
),
|
|
1204
|
-
sasl_kerberos_domain_name=self.attributes.get("sasl", {}).get(
|
|
1205
|
-
"sasl_kerberos_domain_name"
|
|
1206
|
-
),
|
|
1207
|
-
sasl_oauth_token_provider=self.attributes.get("sasl", {}).get("mechanism"),
|
|
1208
|
-
)
|
|
1199
|
+
|
|
1200
|
+
kafka_admin_kwargs = {}
|
|
1201
|
+
kafka_admin_kwargs = mlrun.datastore.utils.KafkaParameters(
|
|
1202
|
+
self.attributes
|
|
1203
|
+
).admin()
|
|
1204
|
+
|
|
1205
|
+
kafka_admin = KafkaAdminClient(bootstrap_servers=brokers, **kafka_admin_kwargs)
|
|
1209
1206
|
try:
|
|
1210
1207
|
kafka_admin.create_topics(new_topics)
|
|
1211
1208
|
finally:
|
|
@@ -1226,16 +1223,16 @@ class SQLSource(BaseSourceDriver):
|
|
|
1226
1223
|
def __init__(
|
|
1227
1224
|
self,
|
|
1228
1225
|
name: str = "",
|
|
1229
|
-
chunksize: int = None,
|
|
1230
|
-
key_field: str = None,
|
|
1231
|
-
time_field: str = None,
|
|
1232
|
-
schedule: str = None,
|
|
1226
|
+
chunksize: Optional[int] = None,
|
|
1227
|
+
key_field: Optional[str] = None,
|
|
1228
|
+
time_field: Optional[str] = None,
|
|
1229
|
+
schedule: Optional[str] = None,
|
|
1233
1230
|
start_time: Optional[Union[datetime, str]] = None,
|
|
1234
1231
|
end_time: Optional[Union[datetime, str]] = None,
|
|
1235
|
-
db_url: str = None,
|
|
1236
|
-
table_name: str = None,
|
|
1237
|
-
spark_options: dict = None,
|
|
1238
|
-
parse_dates: list[str] = None,
|
|
1232
|
+
db_url: Optional[str] = None,
|
|
1233
|
+
table_name: Optional[str] = None,
|
|
1234
|
+
spark_options: Optional[dict] = None,
|
|
1235
|
+
parse_dates: Optional[list[str]] = None,
|
|
1239
1236
|
**kwargs,
|
|
1240
1237
|
):
|
|
1241
1238
|
"""
|
|
@@ -12,8 +12,6 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
# flake8: noqa - this is until we take care of the F401 violations with respect to __all__ & sphinx
|
|
16
|
-
|
|
17
15
|
import mlrun
|
|
18
16
|
import mlrun.artifacts
|
|
19
17
|
from mlrun.config import config
|
|
@@ -165,11 +163,16 @@ def get_store_resource(
|
|
|
165
163
|
return db.get_feature_vector(name, project, tag, uid)
|
|
166
164
|
|
|
167
165
|
elif StorePrefix.is_artifact(kind):
|
|
168
|
-
project, key, iteration, tag, tree = parse_artifact_uri(
|
|
166
|
+
project, key, iteration, tag, tree, uid = parse_artifact_uri(
|
|
169
167
|
uri, project or config.default_project
|
|
170
168
|
)
|
|
171
169
|
resource = db.read_artifact(
|
|
172
|
-
key,
|
|
170
|
+
key,
|
|
171
|
+
project=project,
|
|
172
|
+
tag=tag,
|
|
173
|
+
iter=iteration,
|
|
174
|
+
tree=tree,
|
|
175
|
+
uid=uid,
|
|
173
176
|
)
|
|
174
177
|
if resource.get("kind", "") == "link":
|
|
175
178
|
# todo: support other link types (not just iter, move this to the db/api layer
|
mlrun/datastore/storeytargets.py
CHANGED
|
@@ -11,6 +11,8 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
+
from urllib.parse import urlparse
|
|
15
|
+
|
|
14
16
|
import storey
|
|
15
17
|
from mergedeep import merge
|
|
16
18
|
from storey import V3ioDriver
|
|
@@ -18,6 +20,12 @@ from storey import V3ioDriver
|
|
|
18
20
|
import mlrun
|
|
19
21
|
import mlrun.model_monitoring.helpers
|
|
20
22
|
from mlrun.datastore.base import DataStore
|
|
23
|
+
from mlrun.datastore.datastore_profile import (
|
|
24
|
+
DatastoreProfileKafkaSource,
|
|
25
|
+
DatastoreProfileKafkaTarget,
|
|
26
|
+
DatastoreProfileTDEngine,
|
|
27
|
+
datastore_profile_read,
|
|
28
|
+
)
|
|
21
29
|
|
|
22
30
|
from ..platforms.iguazio import parse_path
|
|
23
31
|
from .utils import (
|
|
@@ -42,9 +50,16 @@ def get_url_and_storage_options(path, external_storage_options=None):
|
|
|
42
50
|
|
|
43
51
|
|
|
44
52
|
class TDEngineStoreyTarget(storey.TDEngineTarget):
|
|
45
|
-
def __init__(self, *args, **kwargs):
|
|
46
|
-
|
|
47
|
-
|
|
53
|
+
def __init__(self, *args, url: str, **kwargs):
|
|
54
|
+
if url.startswith("ds://"):
|
|
55
|
+
datastore_profile = datastore_profile_read(url)
|
|
56
|
+
if not isinstance(datastore_profile, DatastoreProfileTDEngine):
|
|
57
|
+
raise ValueError(
|
|
58
|
+
f"Unexpected datastore profile type:{datastore_profile.type}."
|
|
59
|
+
"Only DatastoreProfileTDEngine is supported"
|
|
60
|
+
)
|
|
61
|
+
url = datastore_profile.dsn()
|
|
62
|
+
super().__init__(*args, url=url, **kwargs)
|
|
48
63
|
|
|
49
64
|
|
|
50
65
|
class StoreyTargetUtils:
|
|
@@ -69,7 +84,12 @@ class StoreyTargetUtils:
|
|
|
69
84
|
|
|
70
85
|
class ParquetStoreyTarget(storey.ParquetTarget):
|
|
71
86
|
def __init__(self, *args, **kwargs):
|
|
87
|
+
alt_key_name = kwargs.pop("alternative_v3io_access_key", None)
|
|
72
88
|
args, kwargs = StoreyTargetUtils.process_args_and_kwargs(args, kwargs)
|
|
89
|
+
storage_options = kwargs.get("storage_options", {})
|
|
90
|
+
if storage_options and storage_options.get("v3io_access_key") and alt_key_name:
|
|
91
|
+
if alt_key := mlrun.get_secret_or_env(alt_key_name):
|
|
92
|
+
storage_options["v3io_access_key"] = alt_key
|
|
73
93
|
super().__init__(*args, **kwargs)
|
|
74
94
|
|
|
75
95
|
|
|
@@ -89,17 +109,20 @@ class StreamStoreyTarget(storey.StreamTarget):
|
|
|
89
109
|
raise mlrun.errors.MLRunInvalidArgumentError("StreamTarget requires a path")
|
|
90
110
|
|
|
91
111
|
_, storage_options = get_url_and_storage_options(uri)
|
|
92
|
-
|
|
112
|
+
_, path = parse_path(uri)
|
|
93
113
|
|
|
94
114
|
access_key = storage_options.get("v3io_access_key")
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
115
|
+
|
|
116
|
+
if alt_key_name := kwargs.pop("alternative_v3io_access_key", None):
|
|
117
|
+
if alt_key := mlrun.get_secret_or_env(alt_key_name):
|
|
118
|
+
access_key = alt_key
|
|
119
|
+
|
|
120
|
+
storage = V3ioDriver(access_key=access_key)
|
|
98
121
|
|
|
99
122
|
if storage_options:
|
|
100
123
|
kwargs["storage"] = storage
|
|
101
124
|
if args:
|
|
102
|
-
args[0] =
|
|
125
|
+
args[0] = path
|
|
103
126
|
if "stream_path" in kwargs:
|
|
104
127
|
kwargs["stream_path"] = path
|
|
105
128
|
|
|
@@ -108,17 +131,26 @@ class StreamStoreyTarget(storey.StreamTarget):
|
|
|
108
131
|
|
|
109
132
|
class KafkaStoreyTarget(storey.KafkaTarget):
|
|
110
133
|
def __init__(self, *args, **kwargs):
|
|
134
|
+
kwargs.pop("alternative_v3io_access_key", None)
|
|
111
135
|
path = kwargs.pop("path")
|
|
112
|
-
attributes = kwargs.pop("attributes",
|
|
136
|
+
attributes = kwargs.pop("attributes", {})
|
|
113
137
|
if path and path.startswith("ds://"):
|
|
114
|
-
datastore_profile = (
|
|
115
|
-
|
|
116
|
-
|
|
138
|
+
datastore_profile = datastore_profile_read(path)
|
|
139
|
+
if not isinstance(
|
|
140
|
+
datastore_profile,
|
|
141
|
+
(DatastoreProfileKafkaSource, DatastoreProfileKafkaTarget),
|
|
142
|
+
):
|
|
143
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
144
|
+
f"Unsupported datastore profile type: {type(datastore_profile)}"
|
|
145
|
+
)
|
|
146
|
+
|
|
117
147
|
attributes = merge(attributes, datastore_profile.attributes())
|
|
118
|
-
brokers = attributes.pop(
|
|
119
|
-
|
|
148
|
+
brokers = attributes.pop("brokers", None)
|
|
149
|
+
# Override the topic with the one in the url (if any)
|
|
150
|
+
parsed = urlparse(path)
|
|
151
|
+
topic = (
|
|
152
|
+
parsed.path.strip("/") if parsed.path else datastore_profile.get_topic()
|
|
120
153
|
)
|
|
121
|
-
topic = datastore_profile.topic
|
|
122
154
|
else:
|
|
123
155
|
brokers = attributes.pop(
|
|
124
156
|
"brokers", attributes.pop("bootstrap_servers", None)
|
|
@@ -129,7 +161,10 @@ class KafkaStoreyTarget(storey.KafkaTarget):
|
|
|
129
161
|
raise mlrun.errors.MLRunInvalidArgumentError("KafkaTarget requires a topic")
|
|
130
162
|
kwargs["brokers"] = brokers
|
|
131
163
|
kwargs["topic"] = topic
|
|
132
|
-
|
|
164
|
+
|
|
165
|
+
attributes = mlrun.datastore.utils.KafkaParameters(attributes).producer()
|
|
166
|
+
|
|
167
|
+
super().__init__(*args, **kwargs, producer_options=attributes)
|
|
133
168
|
|
|
134
169
|
|
|
135
170
|
class NoSqlStoreyTarget(storey.NoSqlTarget):
|