mlrun 1.7.0rc6__py3-none-any.whl → 1.7.0rc7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/common/constants.py +6 -0
- mlrun/common/schemas/__init__.py +2 -0
- mlrun/common/schemas/model_monitoring/__init__.py +4 -0
- mlrun/common/schemas/model_monitoring/constants.py +35 -18
- mlrun/common/schemas/project.py +1 -0
- mlrun/common/types.py +7 -1
- mlrun/config.py +11 -4
- mlrun/data_types/data_types.py +4 -0
- mlrun/datastore/alibaba_oss.py +130 -0
- mlrun/datastore/azure_blob.py +4 -5
- mlrun/datastore/base.py +22 -16
- mlrun/datastore/datastore.py +4 -0
- mlrun/datastore/google_cloud_storage.py +1 -1
- mlrun/datastore/sources.py +2 -3
- mlrun/db/base.py +14 -6
- mlrun/db/httpdb.py +61 -56
- mlrun/db/nopdb.py +3 -0
- mlrun/model.py +1 -0
- mlrun/model_monitoring/__init__.py +1 -1
- mlrun/model_monitoring/api.py +104 -295
- mlrun/model_monitoring/controller.py +25 -25
- mlrun/model_monitoring/db/__init__.py +16 -0
- mlrun/model_monitoring/{stores → db/stores}/__init__.py +43 -34
- mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
- mlrun/model_monitoring/{stores/model_endpoint_store.py → db/stores/base/store.py} +47 -6
- mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
- mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +49 -0
- mlrun/model_monitoring/{stores → db/stores/sqldb}/models/base.py +76 -3
- mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +68 -0
- mlrun/model_monitoring/{stores → db/stores/sqldb}/models/sqlite.py +13 -1
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +662 -0
- mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
- mlrun/model_monitoring/{stores/kv_model_endpoint_store.py → db/stores/v3io_kv/kv_store.py} +134 -3
- mlrun/model_monitoring/helpers.py +0 -2
- mlrun/model_monitoring/stream_processing.py +41 -9
- mlrun/model_monitoring/tracking_policy.py +7 -1
- mlrun/model_monitoring/writer.py +4 -36
- mlrun/projects/pipelines.py +13 -1
- mlrun/projects/project.py +109 -101
- mlrun/run.py +3 -1
- mlrun/runtimes/base.py +6 -0
- mlrun/runtimes/nuclio/api_gateway.py +188 -61
- mlrun/runtimes/nuclio/function.py +3 -0
- mlrun/runtimes/nuclio/serving.py +28 -32
- mlrun/runtimes/pod.py +26 -0
- mlrun/serving/server.py +4 -6
- mlrun/serving/states.py +34 -14
- mlrun/utils/helpers.py +34 -0
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.0rc6.dist-info → mlrun-1.7.0rc7.dist-info}/METADATA +14 -5
- {mlrun-1.7.0rc6.dist-info → mlrun-1.7.0rc7.dist-info}/RECORD +55 -51
- mlrun/model_monitoring/batch.py +0 -933
- mlrun/model_monitoring/stores/models/__init__.py +0 -27
- mlrun/model_monitoring/stores/models/mysql.py +0 -34
- mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -382
- {mlrun-1.7.0rc6.dist-info → mlrun-1.7.0rc7.dist-info}/LICENSE +0 -0
- {mlrun-1.7.0rc6.dist-info → mlrun-1.7.0rc7.dist-info}/WHEEL +0 -0
- {mlrun-1.7.0rc6.dist-info → mlrun-1.7.0rc7.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.0rc6.dist-info → mlrun-1.7.0rc7.dist-info}/top_level.txt +0 -0
mlrun/common/constants.py
CHANGED
|
@@ -14,4 +14,10 @@
|
|
|
14
14
|
#
|
|
15
15
|
IMAGE_NAME_ENRICH_REGISTRY_PREFIX = "." # prefix for image name to enrich with registry
|
|
16
16
|
MLRUN_CREATED_LABEL = "mlrun-created"
|
|
17
|
+
MLRUN_MODEL_CONF = "model-conf"
|
|
18
|
+
MLRUN_SERVING_SPEC_MOUNT_PATH = f"/tmp/mlrun/{MLRUN_MODEL_CONF}"
|
|
19
|
+
MLRUN_SERVING_SPEC_FILENAME = "serving_spec.json"
|
|
20
|
+
MLRUN_SERVING_SPEC_PATH = (
|
|
21
|
+
f"{MLRUN_SERVING_SPEC_MOUNT_PATH}/{MLRUN_SERVING_SPEC_FILENAME}"
|
|
22
|
+
)
|
|
17
23
|
MYSQL_MEDIUMBLOB_SIZE_BYTES = 16 * 1024 * 1024
|
mlrun/common/schemas/__init__.py
CHANGED
|
@@ -124,6 +124,7 @@ from .model_monitoring import (
|
|
|
124
124
|
EventFieldType,
|
|
125
125
|
EventKeyMetrics,
|
|
126
126
|
Features,
|
|
127
|
+
FeatureSetFeatures,
|
|
127
128
|
FeatureValues,
|
|
128
129
|
GrafanaColumn,
|
|
129
130
|
GrafanaDataPoint,
|
|
@@ -139,6 +140,7 @@ from .model_monitoring import (
|
|
|
139
140
|
ModelMonitoringMode,
|
|
140
141
|
ModelMonitoringStoreKinds,
|
|
141
142
|
MonitoringFunctionNames,
|
|
143
|
+
PrometheusEndpoints,
|
|
142
144
|
TimeSeriesTarget,
|
|
143
145
|
)
|
|
144
146
|
from .notification import (
|
|
@@ -22,6 +22,7 @@ from .constants import (
|
|
|
22
22
|
EventFieldType,
|
|
23
23
|
EventKeyMetrics,
|
|
24
24
|
EventLiveStats,
|
|
25
|
+
FeatureSetFeatures,
|
|
25
26
|
FileTargetKind,
|
|
26
27
|
FunctionURI,
|
|
27
28
|
ModelEndpointTarget,
|
|
@@ -29,9 +30,12 @@ from .constants import (
|
|
|
29
30
|
ModelMonitoringStoreKinds,
|
|
30
31
|
MonitoringFunctionNames,
|
|
31
32
|
ProjectSecretKeys,
|
|
33
|
+
PrometheusEndpoints,
|
|
32
34
|
PrometheusMetric,
|
|
35
|
+
SchedulingKeys,
|
|
33
36
|
TimeSeriesTarget,
|
|
34
37
|
VersionedModel,
|
|
38
|
+
WriterEvent,
|
|
35
39
|
)
|
|
36
40
|
from .grafana import (
|
|
37
41
|
GrafanaColumn,
|
|
@@ -21,6 +21,12 @@ import mlrun.common.helpers
|
|
|
21
21
|
from mlrun.common.types import StrEnum
|
|
22
22
|
|
|
23
23
|
|
|
24
|
+
class MonitoringStrEnum(StrEnum):
|
|
25
|
+
@classmethod
|
|
26
|
+
def list(cls):
|
|
27
|
+
return list(map(lambda c: c.value, cls))
|
|
28
|
+
|
|
29
|
+
|
|
24
30
|
class EventFieldType:
|
|
25
31
|
FUNCTION_URI = "function_uri"
|
|
26
32
|
FUNCTION = "function"
|
|
@@ -77,6 +83,20 @@ class EventFieldType:
|
|
|
77
83
|
SAMPLE_PARQUET_PATH = "sample_parquet_path"
|
|
78
84
|
|
|
79
85
|
|
|
86
|
+
class FeatureSetFeatures(MonitoringStrEnum):
|
|
87
|
+
LATENCY = EventFieldType.LATENCY
|
|
88
|
+
ERROR_COUNT = EventFieldType.ERROR_COUNT
|
|
89
|
+
METRICS = EventFieldType.METRICS
|
|
90
|
+
|
|
91
|
+
@classmethod
|
|
92
|
+
def time_stamp(cls):
|
|
93
|
+
return EventFieldType.TIMESTAMP
|
|
94
|
+
|
|
95
|
+
@classmethod
|
|
96
|
+
def entity(cls):
|
|
97
|
+
return EventFieldType.ENDPOINT_ID
|
|
98
|
+
|
|
99
|
+
|
|
80
100
|
class ApplicationEvent:
|
|
81
101
|
APPLICATION_NAME = "application_name"
|
|
82
102
|
CURRENT_STATS = "current_stats"
|
|
@@ -89,7 +109,7 @@ class ApplicationEvent:
|
|
|
89
109
|
OUTPUT_STREAM_URI = "output_stream_uri"
|
|
90
110
|
|
|
91
111
|
|
|
92
|
-
class WriterEvent(
|
|
112
|
+
class WriterEvent(MonitoringStrEnum):
|
|
93
113
|
APPLICATION_NAME = "application_name"
|
|
94
114
|
ENDPOINT_ID = "endpoint_id"
|
|
95
115
|
START_INFER_TIME = "start_infer_time"
|
|
@@ -101,10 +121,6 @@ class WriterEvent(StrEnum):
|
|
|
101
121
|
RESULT_EXTRA_DATA = "result_extra_data"
|
|
102
122
|
CURRENT_STATS = "current_stats"
|
|
103
123
|
|
|
104
|
-
@classmethod
|
|
105
|
-
def list(cls):
|
|
106
|
-
return list(map(lambda c: c.value, cls))
|
|
107
|
-
|
|
108
124
|
|
|
109
125
|
class EventLiveStats:
|
|
110
126
|
LATENCY_AVG_5M = "latency_avg_5m"
|
|
@@ -146,6 +162,9 @@ class ModelMonitoringStoreKinds:
|
|
|
146
162
|
|
|
147
163
|
class SchedulingKeys:
|
|
148
164
|
LAST_ANALYZED = "last_analyzed"
|
|
165
|
+
ENDPOINT_ID = "endpoint_id"
|
|
166
|
+
APPLICATION_NAME = "application_name"
|
|
167
|
+
UID = "uid"
|
|
149
168
|
|
|
150
169
|
|
|
151
170
|
class FileTargetKind:
|
|
@@ -155,6 +174,8 @@ class FileTargetKind:
|
|
|
155
174
|
PARQUET = "parquet"
|
|
156
175
|
APPS_PARQUET = "apps_parquet"
|
|
157
176
|
LOG_STREAM = "log_stream"
|
|
177
|
+
APP_RESULTS = "app_results"
|
|
178
|
+
MONITORING_SCHEDULES = "monitoring_schedules"
|
|
158
179
|
|
|
159
180
|
|
|
160
181
|
class ModelMonitoringMode(str, Enum):
|
|
@@ -177,20 +198,16 @@ class PrometheusMetric:
|
|
|
177
198
|
DRIFT_STATUS = "drift_status"
|
|
178
199
|
|
|
179
200
|
|
|
180
|
-
class
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
201
|
+
class PrometheusEndpoints(MonitoringStrEnum):
|
|
202
|
+
MODEL_MONITORING_METRICS = "/model-monitoring-metrics"
|
|
203
|
+
MONITORING_BATCH_METRICS = "/monitoring-batch-metrics"
|
|
204
|
+
MONITORING_DRIFT_STATUS = "/monitoring-drift-status"
|
|
205
|
+
|
|
185
206
|
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
MonitoringFunctionNames.STREAM,
|
|
191
|
-
MonitoringFunctionNames.BATCH,
|
|
192
|
-
MonitoringFunctionNames.APPLICATION_CONTROLLER,
|
|
193
|
-
]
|
|
207
|
+
class MonitoringFunctionNames(MonitoringStrEnum):
|
|
208
|
+
STREAM = "model-monitoring-stream"
|
|
209
|
+
APPLICATION_CONTROLLER = "model-monitoring-controller"
|
|
210
|
+
WRITER = "model-monitoring-writer"
|
|
194
211
|
|
|
195
212
|
|
|
196
213
|
@dataclass
|
mlrun/common/schemas/project.py
CHANGED
|
@@ -87,6 +87,7 @@ class ProjectSpec(pydantic.BaseModel):
|
|
|
87
87
|
custom_packagers: typing.Optional[list[tuple[str, bool]]] = None
|
|
88
88
|
default_image: typing.Optional[str] = None
|
|
89
89
|
build: typing.Optional[ImageBuilder] = None
|
|
90
|
+
default_function_node_selector: typing.Optional[dict] = {}
|
|
90
91
|
|
|
91
92
|
class Config:
|
|
92
93
|
extra = pydantic.Extra.allow
|
mlrun/common/types.py
CHANGED
|
@@ -11,7 +11,6 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
#
|
|
15
14
|
|
|
16
15
|
import enum
|
|
17
16
|
|
|
@@ -23,3 +22,10 @@ class StrEnum(str, enum.Enum):
|
|
|
23
22
|
|
|
24
23
|
def __repr__(self):
|
|
25
24
|
return self.value
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
# Partial backport from Python 3.11
|
|
28
|
+
# https://docs.python.org/3/library/http.html#http.HTTPMethod
|
|
29
|
+
class HTTPMethod(StrEnum):
|
|
30
|
+
GET = "GET"
|
|
31
|
+
POST = "POST"
|
mlrun/config.py
CHANGED
|
@@ -362,6 +362,8 @@ default_config = {
|
|
|
362
362
|
# - mlrun.runtimes.nuclio.function.enrich_function_with_ingress
|
|
363
363
|
"add_templated_ingress_host_mode": "never",
|
|
364
364
|
"explicit_ack": "enabled",
|
|
365
|
+
# size of serving spec to move to config maps
|
|
366
|
+
"serving_spec_env_cutoff": 4096,
|
|
365
367
|
},
|
|
366
368
|
"logs": {
|
|
367
369
|
"decode": {
|
|
@@ -479,6 +481,11 @@ default_config = {
|
|
|
479
481
|
# if set to true, will log a warning for trying to use run db functionality while in nop db mode
|
|
480
482
|
"verbose": True,
|
|
481
483
|
},
|
|
484
|
+
"pagination_cache": {
|
|
485
|
+
"interval": 60,
|
|
486
|
+
"ttl": 3600,
|
|
487
|
+
"max_size": 10000,
|
|
488
|
+
},
|
|
482
489
|
},
|
|
483
490
|
"model_endpoint_monitoring": {
|
|
484
491
|
"serving_stream_args": {"shard_count": 1, "retention_period_hours": 24},
|
|
@@ -498,10 +505,9 @@ default_config = {
|
|
|
498
505
|
# when the user is working in CE environment and has not provided any stream path.
|
|
499
506
|
"default_http_sink": "http://nuclio-{project}-model-monitoring-stream.{namespace}.svc.cluster.local:8080",
|
|
500
507
|
"default_http_sink_app": "http://nuclio-{project}-{application_name}.{namespace}.svc.cluster.local:8080",
|
|
501
|
-
"batch_processing_function_branch": "master",
|
|
502
508
|
"parquet_batching_max_events": 10_000,
|
|
503
509
|
"parquet_batching_timeout_secs": timedelta(minutes=1).total_seconds(),
|
|
504
|
-
# See mlrun.model_monitoring.stores.
|
|
510
|
+
# See mlrun.model_monitoring.db.stores.ObjectStoreFactory for available options
|
|
505
511
|
"store_type": "v3io-nosql",
|
|
506
512
|
"endpoint_store_connection": "",
|
|
507
513
|
},
|
|
@@ -616,8 +622,9 @@ default_config = {
|
|
|
616
622
|
},
|
|
617
623
|
"workflows": {
|
|
618
624
|
"default_workflow_runner_name": "workflow-runner-{}",
|
|
619
|
-
# Default timeout seconds for retrieving workflow id after execution
|
|
620
|
-
|
|
625
|
+
# Default timeout seconds for retrieving workflow id after execution
|
|
626
|
+
# Remote workflow timeout is the maximum between remote and the inner engine timeout
|
|
627
|
+
"timeouts": {"local": 120, "kfp": 60, "remote": 60 * 5},
|
|
621
628
|
},
|
|
622
629
|
"log_collector": {
|
|
623
630
|
"address": "localhost:8282",
|
mlrun/data_types/data_types.py
CHANGED
|
@@ -41,6 +41,7 @@ class ValueType(str, Enum):
|
|
|
41
41
|
BYTES = "bytes"
|
|
42
42
|
STRING = "str"
|
|
43
43
|
DATETIME = "datetime"
|
|
44
|
+
LIST = "List"
|
|
44
45
|
BYTES_LIST = "List[bytes]"
|
|
45
46
|
STRING_LIST = "List[string]"
|
|
46
47
|
INT32_LIST = "List[int32]"
|
|
@@ -48,6 +49,7 @@ class ValueType(str, Enum):
|
|
|
48
49
|
DOUBLE_LIST = "List[float]"
|
|
49
50
|
FLOAT_LIST = "List[float32]"
|
|
50
51
|
BOOL_LIST = "List[bool]"
|
|
52
|
+
Tuple = "Tuple"
|
|
51
53
|
|
|
52
54
|
|
|
53
55
|
def pd_schema_to_value_type(value):
|
|
@@ -102,6 +104,8 @@ def python_type_to_value_type(value_type):
|
|
|
102
104
|
"datetime64[ns]": ValueType.INT64,
|
|
103
105
|
"datetime64[ns, tz]": ValueType.INT64,
|
|
104
106
|
"category": ValueType.STRING,
|
|
107
|
+
"list": ValueType.LIST,
|
|
108
|
+
"tuple": ValueType.Tuple,
|
|
105
109
|
}
|
|
106
110
|
|
|
107
111
|
if type_name in type_map:
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
# Copyright 2023 Iguazio
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import time
|
|
16
|
+
from datetime import datetime
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
from urllib.parse import urlparse
|
|
19
|
+
|
|
20
|
+
import oss2
|
|
21
|
+
from fsspec.registry import get_filesystem_class
|
|
22
|
+
|
|
23
|
+
import mlrun.errors
|
|
24
|
+
|
|
25
|
+
from .base import DataStore, FileStats, makeDatastoreSchemaSanitizer
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class OSSStore(DataStore):
|
|
29
|
+
using_bucket = True
|
|
30
|
+
|
|
31
|
+
def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
|
|
32
|
+
super().__init__(parent, name, schema, endpoint, secrets)
|
|
33
|
+
# will be used in case user asks to assume a role and work through fsspec
|
|
34
|
+
|
|
35
|
+
access_key_id = self._get_secret_or_env("ALIBABA_ACCESS_KEY_ID")
|
|
36
|
+
secret_key = self._get_secret_or_env("ALIBABA_SECRET_ACCESS_KEY")
|
|
37
|
+
endpoint_url = self._get_secret_or_env("ALIBABA_ENDPOINT_URL")
|
|
38
|
+
if access_key_id and secret_key and endpoint_url:
|
|
39
|
+
self.auth = oss2.Auth(access_key_id, secret_key)
|
|
40
|
+
self.endpoint_url = endpoint_url
|
|
41
|
+
else:
|
|
42
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
43
|
+
"missing ALIBABA_ACCESS_KEY_ID or ALIBABA_SECRET_ACCESS_KEY ALIBABA_ENDPOINT_URL in environment"
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
@property
|
|
47
|
+
def filesystem(self):
|
|
48
|
+
"""return fsspec file system object, if supported"""
|
|
49
|
+
if self._filesystem:
|
|
50
|
+
return self._filesystem
|
|
51
|
+
try:
|
|
52
|
+
import ossfs # noqa
|
|
53
|
+
except ImportError as exc:
|
|
54
|
+
raise ImportError("ALIBABA ossfs not installed") from exc
|
|
55
|
+
filesystem_class = get_filesystem_class(protocol=self.kind)
|
|
56
|
+
self._filesystem = makeDatastoreSchemaSanitizer(
|
|
57
|
+
filesystem_class,
|
|
58
|
+
using_bucket=self.using_bucket,
|
|
59
|
+
**self.get_storage_options(),
|
|
60
|
+
)
|
|
61
|
+
return self._filesystem
|
|
62
|
+
|
|
63
|
+
def get_storage_options(self):
|
|
64
|
+
res = dict(
|
|
65
|
+
endpoint=self._get_secret_or_env("ALIBABA_ENDPOINT_URL"),
|
|
66
|
+
key=self._get_secret_or_env("ALIBABA_ACCESS_KEY_ID"),
|
|
67
|
+
secret=self._get_secret_or_env("ALIBABA_SECRET_ACCESS_KEY"),
|
|
68
|
+
)
|
|
69
|
+
return self._sanitize_storage_options(res)
|
|
70
|
+
|
|
71
|
+
def get_bucket_and_key(self, key):
|
|
72
|
+
path = self._join(key)[1:]
|
|
73
|
+
return self.endpoint, path
|
|
74
|
+
|
|
75
|
+
def upload(self, key, src_path):
|
|
76
|
+
bucket, key = self.get_bucket_and_key(key)
|
|
77
|
+
oss = oss2.Bucket(self.auth, self.endpoint_url, bucket)
|
|
78
|
+
oss.put_object(key, open(src_path, "rb"))
|
|
79
|
+
|
|
80
|
+
def get(self, key, size=None, offset=0):
|
|
81
|
+
bucket, key = self.get_bucket_and_key(key)
|
|
82
|
+
oss = oss2.Bucket(self.auth, self.endpoint_url, bucket)
|
|
83
|
+
if size or offset:
|
|
84
|
+
return oss.get_object(key, byte_range=self.get_range(size, offset)).read()
|
|
85
|
+
return oss.get_object(key).read()
|
|
86
|
+
|
|
87
|
+
def put(self, key, data, append=False):
|
|
88
|
+
bucket, key = self.get_bucket_and_key(key)
|
|
89
|
+
oss = oss2.Bucket(self.auth, self.endpoint_url, bucket)
|
|
90
|
+
oss.put_object(key, data)
|
|
91
|
+
|
|
92
|
+
def stat(self, key):
|
|
93
|
+
bucket, key = self.get_bucket_and_key(key)
|
|
94
|
+
oss = oss2.Bucket(self.auth, self.endpoint_url, bucket)
|
|
95
|
+
obj = oss.get_object_meta(key)
|
|
96
|
+
size = obj.content_length
|
|
97
|
+
modified = datetime.fromtimestamp(obj.last_modified)
|
|
98
|
+
return FileStats(size, time.mktime(modified.timetuple()))
|
|
99
|
+
|
|
100
|
+
def listdir(self, key):
|
|
101
|
+
remote_path = self._convert_key_to_remote_path(key)
|
|
102
|
+
if self.filesystem.isfile(remote_path):
|
|
103
|
+
return key
|
|
104
|
+
remote_path = f"{remote_path}/**"
|
|
105
|
+
files = self.filesystem.glob(remote_path)
|
|
106
|
+
key_length = len(key)
|
|
107
|
+
files = [
|
|
108
|
+
f.split("/", 1)[1][key_length:] for f in files if len(f.split("/")) > 1
|
|
109
|
+
]
|
|
110
|
+
return files
|
|
111
|
+
|
|
112
|
+
def delete(self, key):
|
|
113
|
+
bucket, key = self.get_bucket_and_key(key)
|
|
114
|
+
oss = oss2.Bucket(self.auth, self.endpoint_url, bucket)
|
|
115
|
+
oss.delete_object(key)
|
|
116
|
+
|
|
117
|
+
def _convert_key_to_remote_path(self, key):
|
|
118
|
+
key = key.strip("/")
|
|
119
|
+
schema = urlparse(key).scheme
|
|
120
|
+
# if called without passing dataitem - like in fset.purge_targets,
|
|
121
|
+
# key will include schema.
|
|
122
|
+
if not schema:
|
|
123
|
+
key = Path(self.endpoint, key).as_posix()
|
|
124
|
+
return key
|
|
125
|
+
|
|
126
|
+
@staticmethod
|
|
127
|
+
def get_range(size, offset):
|
|
128
|
+
if size:
|
|
129
|
+
return [offset, size]
|
|
130
|
+
return [offset, None]
|
mlrun/datastore/azure_blob.py
CHANGED
|
@@ -158,18 +158,17 @@ class AzureBlobStore(DataStore):
|
|
|
158
158
|
st[key] = parsed_value
|
|
159
159
|
|
|
160
160
|
account_name = st.get("account_name")
|
|
161
|
-
if not account_name:
|
|
162
|
-
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
163
|
-
"Property 'account_name' is absent both in storage settings and connection string"
|
|
164
|
-
)
|
|
165
161
|
if primary_url:
|
|
166
162
|
if primary_url.startswith("http://"):
|
|
167
163
|
primary_url = primary_url[len("http://") :]
|
|
168
164
|
if primary_url.startswith("https://"):
|
|
169
165
|
primary_url = primary_url[len("https://") :]
|
|
170
166
|
host = primary_url
|
|
171
|
-
|
|
167
|
+
elif account_name:
|
|
172
168
|
host = f"{account_name}.{service}.core.windows.net"
|
|
169
|
+
else:
|
|
170
|
+
return res
|
|
171
|
+
|
|
173
172
|
if "account_key" in st:
|
|
174
173
|
res[f"spark.hadoop.fs.azure.account.key.{host}"] = st["account_key"]
|
|
175
174
|
|
mlrun/datastore/base.py
CHANGED
|
@@ -27,6 +27,7 @@ import requests
|
|
|
27
27
|
import urllib3
|
|
28
28
|
from deprecated import deprecated
|
|
29
29
|
|
|
30
|
+
import mlrun.config
|
|
30
31
|
import mlrun.errors
|
|
31
32
|
from mlrun.errors import err_to_str
|
|
32
33
|
from mlrun.utils import StorePrefix, is_ipython, logger
|
|
@@ -34,10 +35,6 @@ from mlrun.utils import StorePrefix, is_ipython, logger
|
|
|
34
35
|
from .store_resources import is_store_uri, parse_store_uri
|
|
35
36
|
from .utils import filter_df_start_end_time, select_columns_from_df
|
|
36
37
|
|
|
37
|
-
verify_ssl = False
|
|
38
|
-
if not verify_ssl:
|
|
39
|
-
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
|
40
|
-
|
|
41
38
|
|
|
42
39
|
class FileStats:
|
|
43
40
|
def __init__(self, size, modified, content_type=None):
|
|
@@ -633,17 +630,6 @@ def basic_auth_header(user, password):
|
|
|
633
630
|
return {"Authorization": authstr}
|
|
634
631
|
|
|
635
632
|
|
|
636
|
-
def http_get(url, headers=None, auth=None):
|
|
637
|
-
try:
|
|
638
|
-
response = requests.get(url, headers=headers, auth=auth, verify=verify_ssl)
|
|
639
|
-
except OSError as exc:
|
|
640
|
-
raise OSError(f"error: cannot connect to {url}: {err_to_str(exc)}")
|
|
641
|
-
|
|
642
|
-
mlrun.errors.raise_for_status(response)
|
|
643
|
-
|
|
644
|
-
return response.content
|
|
645
|
-
|
|
646
|
-
|
|
647
633
|
class HttpStore(DataStore):
|
|
648
634
|
def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
|
|
649
635
|
super().__init__(parent, name, schema, endpoint, secrets)
|
|
@@ -671,7 +657,7 @@ class HttpStore(DataStore):
|
|
|
671
657
|
raise ValueError("unimplemented")
|
|
672
658
|
|
|
673
659
|
def get(self, key, size=None, offset=0):
|
|
674
|
-
data =
|
|
660
|
+
data = self._http_get(self.url + self._join(key), self._headers, self.auth)
|
|
675
661
|
if offset:
|
|
676
662
|
data = data[offset:]
|
|
677
663
|
if size:
|
|
@@ -691,6 +677,26 @@ class HttpStore(DataStore):
|
|
|
691
677
|
f"schema as it is not secure and is not recommended."
|
|
692
678
|
)
|
|
693
679
|
|
|
680
|
+
def _http_get(
|
|
681
|
+
self,
|
|
682
|
+
url,
|
|
683
|
+
headers=None,
|
|
684
|
+
auth=None,
|
|
685
|
+
):
|
|
686
|
+
# import here to prevent import cycle
|
|
687
|
+
from mlrun.config import config as mlconf
|
|
688
|
+
|
|
689
|
+
verify_ssl = mlconf.httpdb.http.verify
|
|
690
|
+
try:
|
|
691
|
+
if not verify_ssl:
|
|
692
|
+
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
|
693
|
+
response = requests.get(url, headers=headers, auth=auth, verify=verify_ssl)
|
|
694
|
+
except OSError as exc:
|
|
695
|
+
raise OSError(f"error: cannot connect to {url}: {err_to_str(exc)}")
|
|
696
|
+
|
|
697
|
+
mlrun.errors.raise_for_status(response)
|
|
698
|
+
return response.content
|
|
699
|
+
|
|
694
700
|
|
|
695
701
|
# This wrapper class is designed to extract the 'ds' schema and profile name from URL-formatted paths.
|
|
696
702
|
# Within fsspec, the AbstractFileSystem::_strip_protocol() internal method is used to handle complete URL paths.
|
mlrun/datastore/datastore.py
CHANGED
|
@@ -132,7 +132,7 @@ class GoogleCloudStorageStore(DataStore):
|
|
|
132
132
|
self.filesystem.rm(path=path, recursive=recursive, maxdepth=maxdepth)
|
|
133
133
|
|
|
134
134
|
def get_spark_options(self):
|
|
135
|
-
res =
|
|
135
|
+
res = {}
|
|
136
136
|
st = self.get_storage_options()
|
|
137
137
|
if "token" in st:
|
|
138
138
|
res = {"spark.hadoop.google.cloud.auth.service.account.enable": "true"}
|
mlrun/datastore/sources.py
CHANGED
|
@@ -854,12 +854,11 @@ class StreamSource(OnlineSource):
|
|
|
854
854
|
super().__init__(name, attributes=attrs, **kwargs)
|
|
855
855
|
|
|
856
856
|
def add_nuclio_trigger(self, function):
|
|
857
|
-
store,
|
|
857
|
+
store, _, url = mlrun.store_manager.get_or_create_store(self.path)
|
|
858
858
|
if store.kind != "v3io":
|
|
859
859
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
860
860
|
"Only profiles that reference the v3io datastore can be used with StreamSource"
|
|
861
861
|
)
|
|
862
|
-
path = "v3io:/" + path
|
|
863
862
|
storage_options = store.get_storage_options()
|
|
864
863
|
access_key = storage_options.get("v3io_access_key")
|
|
865
864
|
endpoint, stream_path = parse_path(url)
|
|
@@ -883,7 +882,7 @@ class StreamSource(OnlineSource):
|
|
|
883
882
|
kwargs["worker_allocation_mode"] = "static"
|
|
884
883
|
|
|
885
884
|
function.add_v3io_stream_trigger(
|
|
886
|
-
|
|
885
|
+
url,
|
|
887
886
|
self.name,
|
|
888
887
|
self.attributes["group"],
|
|
889
888
|
self.attributes["seek_to"],
|
mlrun/db/base.py
CHANGED
|
@@ -17,7 +17,7 @@ from abc import ABC, abstractmethod
|
|
|
17
17
|
from typing import Optional, Union
|
|
18
18
|
|
|
19
19
|
import mlrun.common.schemas
|
|
20
|
-
import mlrun.model_monitoring
|
|
20
|
+
import mlrun.model_monitoring
|
|
21
21
|
|
|
22
22
|
|
|
23
23
|
class RunDBError(Exception):
|
|
@@ -509,9 +509,7 @@ class RunDBInterface(ABC):
|
|
|
509
509
|
self,
|
|
510
510
|
project: str,
|
|
511
511
|
endpoint_id: str,
|
|
512
|
-
model_endpoint: Union[
|
|
513
|
-
mlrun.model_monitoring.model_endpoint.ModelEndpoint, dict
|
|
514
|
-
],
|
|
512
|
+
model_endpoint: Union[mlrun.model_monitoring.ModelEndpoint, dict],
|
|
515
513
|
):
|
|
516
514
|
pass
|
|
517
515
|
|
|
@@ -632,6 +630,10 @@ class RunDBInterface(ABC):
|
|
|
632
630
|
def get_api_gateway(self, name, project=None) -> mlrun.common.schemas.APIGateway:
|
|
633
631
|
pass
|
|
634
632
|
|
|
633
|
+
@abstractmethod
|
|
634
|
+
def delete_api_gateway(self, name, project=None):
|
|
635
|
+
pass
|
|
636
|
+
|
|
635
637
|
def get_builder_status(
|
|
636
638
|
self,
|
|
637
639
|
func: "mlrun.runtimes.BaseRuntime",
|
|
@@ -724,5 +726,11 @@ class RunDBInterface(ABC):
|
|
|
724
726
|
project: str,
|
|
725
727
|
base_period: int = 10,
|
|
726
728
|
image: str = "mlrun/mlrun",
|
|
727
|
-
|
|
728
|
-
|
|
729
|
+
deploy_histogram_data_drift_app: bool = True,
|
|
730
|
+
) -> None:
|
|
731
|
+
raise NotImplementedError
|
|
732
|
+
|
|
733
|
+
def deploy_histogram_data_drift_app(
|
|
734
|
+
self, project: str, image: str = "mlrun/mlrun"
|
|
735
|
+
) -> None:
|
|
736
|
+
raise NotImplementedError
|