mlrun 1.7.0rc5__py3-none-any.whl → 1.7.0rc7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/artifacts/base.py +2 -1
- mlrun/artifacts/plots.py +9 -5
- mlrun/common/constants.py +6 -0
- mlrun/common/schemas/__init__.py +2 -0
- mlrun/common/schemas/model_monitoring/__init__.py +4 -0
- mlrun/common/schemas/model_monitoring/constants.py +35 -18
- mlrun/common/schemas/project.py +1 -0
- mlrun/common/types.py +7 -1
- mlrun/config.py +19 -6
- mlrun/data_types/data_types.py +4 -0
- mlrun/datastore/alibaba_oss.py +130 -0
- mlrun/datastore/azure_blob.py +4 -5
- mlrun/datastore/base.py +22 -16
- mlrun/datastore/datastore.py +4 -0
- mlrun/datastore/google_cloud_storage.py +1 -1
- mlrun/datastore/sources.py +7 -7
- mlrun/db/base.py +14 -6
- mlrun/db/factory.py +1 -1
- mlrun/db/httpdb.py +61 -56
- mlrun/db/nopdb.py +3 -0
- mlrun/launcher/__init__.py +1 -1
- mlrun/launcher/base.py +1 -1
- mlrun/launcher/client.py +1 -1
- mlrun/launcher/factory.py +1 -1
- mlrun/launcher/local.py +1 -1
- mlrun/launcher/remote.py +1 -1
- mlrun/model.py +1 -0
- mlrun/model_monitoring/__init__.py +1 -1
- mlrun/model_monitoring/api.py +104 -301
- mlrun/model_monitoring/application.py +21 -21
- mlrun/model_monitoring/applications/histogram_data_drift.py +130 -40
- mlrun/model_monitoring/controller.py +26 -33
- mlrun/model_monitoring/db/__init__.py +16 -0
- mlrun/model_monitoring/{stores → db/stores}/__init__.py +43 -34
- mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
- mlrun/model_monitoring/{stores/model_endpoint_store.py → db/stores/base/store.py} +47 -6
- mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
- mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +49 -0
- mlrun/model_monitoring/{stores → db/stores/sqldb}/models/base.py +76 -3
- mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +68 -0
- mlrun/model_monitoring/{stores → db/stores/sqldb}/models/sqlite.py +13 -1
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +662 -0
- mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
- mlrun/model_monitoring/{stores/kv_model_endpoint_store.py → db/stores/v3io_kv/kv_store.py} +134 -3
- mlrun/model_monitoring/features_drift_table.py +34 -22
- mlrun/model_monitoring/helpers.py +45 -6
- mlrun/model_monitoring/stream_processing.py +43 -9
- mlrun/model_monitoring/tracking_policy.py +7 -1
- mlrun/model_monitoring/writer.py +4 -36
- mlrun/projects/pipelines.py +13 -1
- mlrun/projects/project.py +279 -117
- mlrun/run.py +72 -74
- mlrun/runtimes/__init__.py +35 -0
- mlrun/runtimes/base.py +7 -1
- mlrun/runtimes/nuclio/api_gateway.py +188 -61
- mlrun/runtimes/nuclio/application/__init__.py +15 -0
- mlrun/runtimes/nuclio/application/application.py +283 -0
- mlrun/runtimes/nuclio/application/reverse_proxy.go +87 -0
- mlrun/runtimes/nuclio/function.py +53 -1
- mlrun/runtimes/nuclio/serving.py +28 -32
- mlrun/runtimes/pod.py +27 -1
- mlrun/serving/server.py +4 -6
- mlrun/serving/states.py +41 -33
- mlrun/utils/helpers.py +34 -0
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.0rc5.dist-info → mlrun-1.7.0rc7.dist-info}/METADATA +14 -5
- {mlrun-1.7.0rc5.dist-info → mlrun-1.7.0rc7.dist-info}/RECORD +71 -64
- mlrun/model_monitoring/batch.py +0 -974
- mlrun/model_monitoring/stores/models/__init__.py +0 -27
- mlrun/model_monitoring/stores/models/mysql.py +0 -34
- mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -382
- {mlrun-1.7.0rc5.dist-info → mlrun-1.7.0rc7.dist-info}/LICENSE +0 -0
- {mlrun-1.7.0rc5.dist-info → mlrun-1.7.0rc7.dist-info}/WHEEL +0 -0
- {mlrun-1.7.0rc5.dist-info → mlrun-1.7.0rc7.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.0rc5.dist-info → mlrun-1.7.0rc7.dist-info}/top_level.txt +0 -0
mlrun/artifacts/base.py
CHANGED
|
@@ -88,9 +88,10 @@ class ArtifactSpec(ModelObj):
|
|
|
88
88
|
"db_key",
|
|
89
89
|
"extra_data",
|
|
90
90
|
"unpackaging_instructions",
|
|
91
|
+
"producer",
|
|
91
92
|
]
|
|
92
93
|
|
|
93
|
-
_extra_fields = ["annotations", "
|
|
94
|
+
_extra_fields = ["annotations", "sources", "license", "encoding"]
|
|
94
95
|
_exclude_fields_from_uid_hash = [
|
|
95
96
|
# if the artifact is first created, it will not have a db_key,
|
|
96
97
|
# exclude it so further updates of the artifacts will have the same hash
|
mlrun/artifacts/plots.py
CHANGED
|
@@ -12,6 +12,7 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
import base64
|
|
15
|
+
import typing
|
|
15
16
|
from io import BytesIO
|
|
16
17
|
|
|
17
18
|
from deprecated import deprecated
|
|
@@ -21,6 +22,9 @@ import mlrun
|
|
|
21
22
|
from ..utils import dict_to_json
|
|
22
23
|
from .base import Artifact, LegacyArtifact
|
|
23
24
|
|
|
25
|
+
if typing.TYPE_CHECKING:
|
|
26
|
+
from plotly.graph_objs import Figure
|
|
27
|
+
|
|
24
28
|
|
|
25
29
|
class PlotArtifact(Artifact):
|
|
26
30
|
kind = "plot"
|
|
@@ -207,10 +211,10 @@ class PlotlyArtifact(Artifact):
|
|
|
207
211
|
|
|
208
212
|
def __init__(
|
|
209
213
|
self,
|
|
210
|
-
figure=None,
|
|
211
|
-
key: str = None,
|
|
212
|
-
target_path: str = None,
|
|
213
|
-
):
|
|
214
|
+
figure: typing.Optional["Figure"] = None,
|
|
215
|
+
key: typing.Optional[str] = None,
|
|
216
|
+
target_path: typing.Optional[str] = None,
|
|
217
|
+
) -> None:
|
|
214
218
|
"""
|
|
215
219
|
Initialize a Plotly artifact with the given figure.
|
|
216
220
|
|
|
@@ -247,7 +251,7 @@ class PlotlyArtifact(Artifact):
|
|
|
247
251
|
self._figure = figure
|
|
248
252
|
self.spec.format = "html"
|
|
249
253
|
|
|
250
|
-
def get_body(self):
|
|
254
|
+
def get_body(self) -> str:
|
|
251
255
|
"""
|
|
252
256
|
Get the artifact's body - the Plotly figure's html code.
|
|
253
257
|
|
mlrun/common/constants.py
CHANGED
|
@@ -14,4 +14,10 @@
|
|
|
14
14
|
#
|
|
15
15
|
IMAGE_NAME_ENRICH_REGISTRY_PREFIX = "." # prefix for image name to enrich with registry
|
|
16
16
|
MLRUN_CREATED_LABEL = "mlrun-created"
|
|
17
|
+
MLRUN_MODEL_CONF = "model-conf"
|
|
18
|
+
MLRUN_SERVING_SPEC_MOUNT_PATH = f"/tmp/mlrun/{MLRUN_MODEL_CONF}"
|
|
19
|
+
MLRUN_SERVING_SPEC_FILENAME = "serving_spec.json"
|
|
20
|
+
MLRUN_SERVING_SPEC_PATH = (
|
|
21
|
+
f"{MLRUN_SERVING_SPEC_MOUNT_PATH}/{MLRUN_SERVING_SPEC_FILENAME}"
|
|
22
|
+
)
|
|
17
23
|
MYSQL_MEDIUMBLOB_SIZE_BYTES = 16 * 1024 * 1024
|
mlrun/common/schemas/__init__.py
CHANGED
|
@@ -124,6 +124,7 @@ from .model_monitoring import (
|
|
|
124
124
|
EventFieldType,
|
|
125
125
|
EventKeyMetrics,
|
|
126
126
|
Features,
|
|
127
|
+
FeatureSetFeatures,
|
|
127
128
|
FeatureValues,
|
|
128
129
|
GrafanaColumn,
|
|
129
130
|
GrafanaDataPoint,
|
|
@@ -139,6 +140,7 @@ from .model_monitoring import (
|
|
|
139
140
|
ModelMonitoringMode,
|
|
140
141
|
ModelMonitoringStoreKinds,
|
|
141
142
|
MonitoringFunctionNames,
|
|
143
|
+
PrometheusEndpoints,
|
|
142
144
|
TimeSeriesTarget,
|
|
143
145
|
)
|
|
144
146
|
from .notification import (
|
|
@@ -22,6 +22,7 @@ from .constants import (
|
|
|
22
22
|
EventFieldType,
|
|
23
23
|
EventKeyMetrics,
|
|
24
24
|
EventLiveStats,
|
|
25
|
+
FeatureSetFeatures,
|
|
25
26
|
FileTargetKind,
|
|
26
27
|
FunctionURI,
|
|
27
28
|
ModelEndpointTarget,
|
|
@@ -29,9 +30,12 @@ from .constants import (
|
|
|
29
30
|
ModelMonitoringStoreKinds,
|
|
30
31
|
MonitoringFunctionNames,
|
|
31
32
|
ProjectSecretKeys,
|
|
33
|
+
PrometheusEndpoints,
|
|
32
34
|
PrometheusMetric,
|
|
35
|
+
SchedulingKeys,
|
|
33
36
|
TimeSeriesTarget,
|
|
34
37
|
VersionedModel,
|
|
38
|
+
WriterEvent,
|
|
35
39
|
)
|
|
36
40
|
from .grafana import (
|
|
37
41
|
GrafanaColumn,
|
|
@@ -21,6 +21,12 @@ import mlrun.common.helpers
|
|
|
21
21
|
from mlrun.common.types import StrEnum
|
|
22
22
|
|
|
23
23
|
|
|
24
|
+
class MonitoringStrEnum(StrEnum):
|
|
25
|
+
@classmethod
|
|
26
|
+
def list(cls):
|
|
27
|
+
return list(map(lambda c: c.value, cls))
|
|
28
|
+
|
|
29
|
+
|
|
24
30
|
class EventFieldType:
|
|
25
31
|
FUNCTION_URI = "function_uri"
|
|
26
32
|
FUNCTION = "function"
|
|
@@ -77,6 +83,20 @@ class EventFieldType:
|
|
|
77
83
|
SAMPLE_PARQUET_PATH = "sample_parquet_path"
|
|
78
84
|
|
|
79
85
|
|
|
86
|
+
class FeatureSetFeatures(MonitoringStrEnum):
|
|
87
|
+
LATENCY = EventFieldType.LATENCY
|
|
88
|
+
ERROR_COUNT = EventFieldType.ERROR_COUNT
|
|
89
|
+
METRICS = EventFieldType.METRICS
|
|
90
|
+
|
|
91
|
+
@classmethod
|
|
92
|
+
def time_stamp(cls):
|
|
93
|
+
return EventFieldType.TIMESTAMP
|
|
94
|
+
|
|
95
|
+
@classmethod
|
|
96
|
+
def entity(cls):
|
|
97
|
+
return EventFieldType.ENDPOINT_ID
|
|
98
|
+
|
|
99
|
+
|
|
80
100
|
class ApplicationEvent:
|
|
81
101
|
APPLICATION_NAME = "application_name"
|
|
82
102
|
CURRENT_STATS = "current_stats"
|
|
@@ -89,7 +109,7 @@ class ApplicationEvent:
|
|
|
89
109
|
OUTPUT_STREAM_URI = "output_stream_uri"
|
|
90
110
|
|
|
91
111
|
|
|
92
|
-
class WriterEvent(
|
|
112
|
+
class WriterEvent(MonitoringStrEnum):
|
|
93
113
|
APPLICATION_NAME = "application_name"
|
|
94
114
|
ENDPOINT_ID = "endpoint_id"
|
|
95
115
|
START_INFER_TIME = "start_infer_time"
|
|
@@ -101,10 +121,6 @@ class WriterEvent(StrEnum):
|
|
|
101
121
|
RESULT_EXTRA_DATA = "result_extra_data"
|
|
102
122
|
CURRENT_STATS = "current_stats"
|
|
103
123
|
|
|
104
|
-
@classmethod
|
|
105
|
-
def list(cls):
|
|
106
|
-
return list(map(lambda c: c.value, cls))
|
|
107
|
-
|
|
108
124
|
|
|
109
125
|
class EventLiveStats:
|
|
110
126
|
LATENCY_AVG_5M = "latency_avg_5m"
|
|
@@ -146,6 +162,9 @@ class ModelMonitoringStoreKinds:
|
|
|
146
162
|
|
|
147
163
|
class SchedulingKeys:
|
|
148
164
|
LAST_ANALYZED = "last_analyzed"
|
|
165
|
+
ENDPOINT_ID = "endpoint_id"
|
|
166
|
+
APPLICATION_NAME = "application_name"
|
|
167
|
+
UID = "uid"
|
|
149
168
|
|
|
150
169
|
|
|
151
170
|
class FileTargetKind:
|
|
@@ -155,6 +174,8 @@ class FileTargetKind:
|
|
|
155
174
|
PARQUET = "parquet"
|
|
156
175
|
APPS_PARQUET = "apps_parquet"
|
|
157
176
|
LOG_STREAM = "log_stream"
|
|
177
|
+
APP_RESULTS = "app_results"
|
|
178
|
+
MONITORING_SCHEDULES = "monitoring_schedules"
|
|
158
179
|
|
|
159
180
|
|
|
160
181
|
class ModelMonitoringMode(str, Enum):
|
|
@@ -177,20 +198,16 @@ class PrometheusMetric:
|
|
|
177
198
|
DRIFT_STATUS = "drift_status"
|
|
178
199
|
|
|
179
200
|
|
|
180
|
-
class
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
201
|
+
class PrometheusEndpoints(MonitoringStrEnum):
|
|
202
|
+
MODEL_MONITORING_METRICS = "/model-monitoring-metrics"
|
|
203
|
+
MONITORING_BATCH_METRICS = "/monitoring-batch-metrics"
|
|
204
|
+
MONITORING_DRIFT_STATUS = "/monitoring-drift-status"
|
|
205
|
+
|
|
185
206
|
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
MonitoringFunctionNames.STREAM,
|
|
191
|
-
MonitoringFunctionNames.BATCH,
|
|
192
|
-
MonitoringFunctionNames.APPLICATION_CONTROLLER,
|
|
193
|
-
]
|
|
207
|
+
class MonitoringFunctionNames(MonitoringStrEnum):
|
|
208
|
+
STREAM = "model-monitoring-stream"
|
|
209
|
+
APPLICATION_CONTROLLER = "model-monitoring-controller"
|
|
210
|
+
WRITER = "model-monitoring-writer"
|
|
194
211
|
|
|
195
212
|
|
|
196
213
|
@dataclass
|
mlrun/common/schemas/project.py
CHANGED
|
@@ -87,6 +87,7 @@ class ProjectSpec(pydantic.BaseModel):
|
|
|
87
87
|
custom_packagers: typing.Optional[list[tuple[str, bool]]] = None
|
|
88
88
|
default_image: typing.Optional[str] = None
|
|
89
89
|
build: typing.Optional[ImageBuilder] = None
|
|
90
|
+
default_function_node_selector: typing.Optional[dict] = {}
|
|
90
91
|
|
|
91
92
|
class Config:
|
|
92
93
|
extra = pydantic.Extra.allow
|
mlrun/common/types.py
CHANGED
|
@@ -11,7 +11,6 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
#
|
|
15
14
|
|
|
16
15
|
import enum
|
|
17
16
|
|
|
@@ -23,3 +22,10 @@ class StrEnum(str, enum.Enum):
|
|
|
23
22
|
|
|
24
23
|
def __repr__(self):
|
|
25
24
|
return self.value
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
# Partial backport from Python 3.11
|
|
28
|
+
# https://docs.python.org/3/library/http.html#http.HTTPMethod
|
|
29
|
+
class HTTPMethod(StrEnum):
|
|
30
|
+
GET = "GET"
|
|
31
|
+
POST = "POST"
|
mlrun/config.py
CHANGED
|
@@ -324,7 +324,13 @@ default_config = {
|
|
|
324
324
|
# optional values (as per https://dev.mysql.com/doc/refman/8.0/en/sql-mode.html#sql-mode-full):
|
|
325
325
|
#
|
|
326
326
|
# if set to "nil" or "none", nothing would be set
|
|
327
|
-
"modes":
|
|
327
|
+
"modes": (
|
|
328
|
+
"STRICT_TRANS_TABLES"
|
|
329
|
+
",NO_ZERO_IN_DATE"
|
|
330
|
+
",NO_ZERO_DATE"
|
|
331
|
+
",ERROR_FOR_DIVISION_BY_ZERO"
|
|
332
|
+
",NO_ENGINE_SUBSTITUTION",
|
|
333
|
+
)
|
|
328
334
|
},
|
|
329
335
|
},
|
|
330
336
|
"jobs": {
|
|
@@ -356,6 +362,8 @@ default_config = {
|
|
|
356
362
|
# - mlrun.runtimes.nuclio.function.enrich_function_with_ingress
|
|
357
363
|
"add_templated_ingress_host_mode": "never",
|
|
358
364
|
"explicit_ack": "enabled",
|
|
365
|
+
# size of serving spec to move to config maps
|
|
366
|
+
"serving_spec_env_cutoff": 4096,
|
|
359
367
|
},
|
|
360
368
|
"logs": {
|
|
361
369
|
"decode": {
|
|
@@ -443,7 +451,7 @@ default_config = {
|
|
|
443
451
|
# pip install <requirement_specifier>, e.g. mlrun==0.5.4, mlrun~=0.5,
|
|
444
452
|
# git+https://github.com/mlrun/mlrun@development. by default uses the version
|
|
445
453
|
"mlrun_version_specifier": "",
|
|
446
|
-
"kaniko_image": "gcr.io/kaniko-project/executor:v1.
|
|
454
|
+
"kaniko_image": "gcr.io/kaniko-project/executor:v1.21.1", # kaniko builder image
|
|
447
455
|
"kaniko_init_container_image": "alpine:3.18",
|
|
448
456
|
# image for kaniko init container when docker registry is ECR
|
|
449
457
|
"kaniko_aws_cli_image": "amazon/aws-cli:2.7.10",
|
|
@@ -473,6 +481,11 @@ default_config = {
|
|
|
473
481
|
# if set to true, will log a warning for trying to use run db functionality while in nop db mode
|
|
474
482
|
"verbose": True,
|
|
475
483
|
},
|
|
484
|
+
"pagination_cache": {
|
|
485
|
+
"interval": 60,
|
|
486
|
+
"ttl": 3600,
|
|
487
|
+
"max_size": 10000,
|
|
488
|
+
},
|
|
476
489
|
},
|
|
477
490
|
"model_endpoint_monitoring": {
|
|
478
491
|
"serving_stream_args": {"shard_count": 1, "retention_period_hours": 24},
|
|
@@ -492,10 +505,9 @@ default_config = {
|
|
|
492
505
|
# when the user is working in CE environment and has not provided any stream path.
|
|
493
506
|
"default_http_sink": "http://nuclio-{project}-model-monitoring-stream.{namespace}.svc.cluster.local:8080",
|
|
494
507
|
"default_http_sink_app": "http://nuclio-{project}-{application_name}.{namespace}.svc.cluster.local:8080",
|
|
495
|
-
"batch_processing_function_branch": "master",
|
|
496
508
|
"parquet_batching_max_events": 10_000,
|
|
497
509
|
"parquet_batching_timeout_secs": timedelta(minutes=1).total_seconds(),
|
|
498
|
-
# See mlrun.model_monitoring.stores.
|
|
510
|
+
# See mlrun.model_monitoring.db.stores.ObjectStoreFactory for available options
|
|
499
511
|
"store_type": "v3io-nosql",
|
|
500
512
|
"endpoint_store_connection": "",
|
|
501
513
|
},
|
|
@@ -610,8 +622,9 @@ default_config = {
|
|
|
610
622
|
},
|
|
611
623
|
"workflows": {
|
|
612
624
|
"default_workflow_runner_name": "workflow-runner-{}",
|
|
613
|
-
# Default timeout seconds for retrieving workflow id after execution
|
|
614
|
-
|
|
625
|
+
# Default timeout seconds for retrieving workflow id after execution
|
|
626
|
+
# Remote workflow timeout is the maximum between remote and the inner engine timeout
|
|
627
|
+
"timeouts": {"local": 120, "kfp": 60, "remote": 60 * 5},
|
|
615
628
|
},
|
|
616
629
|
"log_collector": {
|
|
617
630
|
"address": "localhost:8282",
|
mlrun/data_types/data_types.py
CHANGED
|
@@ -41,6 +41,7 @@ class ValueType(str, Enum):
|
|
|
41
41
|
BYTES = "bytes"
|
|
42
42
|
STRING = "str"
|
|
43
43
|
DATETIME = "datetime"
|
|
44
|
+
LIST = "List"
|
|
44
45
|
BYTES_LIST = "List[bytes]"
|
|
45
46
|
STRING_LIST = "List[string]"
|
|
46
47
|
INT32_LIST = "List[int32]"
|
|
@@ -48,6 +49,7 @@ class ValueType(str, Enum):
|
|
|
48
49
|
DOUBLE_LIST = "List[float]"
|
|
49
50
|
FLOAT_LIST = "List[float32]"
|
|
50
51
|
BOOL_LIST = "List[bool]"
|
|
52
|
+
Tuple = "Tuple"
|
|
51
53
|
|
|
52
54
|
|
|
53
55
|
def pd_schema_to_value_type(value):
|
|
@@ -102,6 +104,8 @@ def python_type_to_value_type(value_type):
|
|
|
102
104
|
"datetime64[ns]": ValueType.INT64,
|
|
103
105
|
"datetime64[ns, tz]": ValueType.INT64,
|
|
104
106
|
"category": ValueType.STRING,
|
|
107
|
+
"list": ValueType.LIST,
|
|
108
|
+
"tuple": ValueType.Tuple,
|
|
105
109
|
}
|
|
106
110
|
|
|
107
111
|
if type_name in type_map:
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
# Copyright 2023 Iguazio
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import time
|
|
16
|
+
from datetime import datetime
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
from urllib.parse import urlparse
|
|
19
|
+
|
|
20
|
+
import oss2
|
|
21
|
+
from fsspec.registry import get_filesystem_class
|
|
22
|
+
|
|
23
|
+
import mlrun.errors
|
|
24
|
+
|
|
25
|
+
from .base import DataStore, FileStats, makeDatastoreSchemaSanitizer
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class OSSStore(DataStore):
|
|
29
|
+
using_bucket = True
|
|
30
|
+
|
|
31
|
+
def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
|
|
32
|
+
super().__init__(parent, name, schema, endpoint, secrets)
|
|
33
|
+
# will be used in case user asks to assume a role and work through fsspec
|
|
34
|
+
|
|
35
|
+
access_key_id = self._get_secret_or_env("ALIBABA_ACCESS_KEY_ID")
|
|
36
|
+
secret_key = self._get_secret_or_env("ALIBABA_SECRET_ACCESS_KEY")
|
|
37
|
+
endpoint_url = self._get_secret_or_env("ALIBABA_ENDPOINT_URL")
|
|
38
|
+
if access_key_id and secret_key and endpoint_url:
|
|
39
|
+
self.auth = oss2.Auth(access_key_id, secret_key)
|
|
40
|
+
self.endpoint_url = endpoint_url
|
|
41
|
+
else:
|
|
42
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
43
|
+
"missing ALIBABA_ACCESS_KEY_ID or ALIBABA_SECRET_ACCESS_KEY ALIBABA_ENDPOINT_URL in environment"
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
@property
|
|
47
|
+
def filesystem(self):
|
|
48
|
+
"""return fsspec file system object, if supported"""
|
|
49
|
+
if self._filesystem:
|
|
50
|
+
return self._filesystem
|
|
51
|
+
try:
|
|
52
|
+
import ossfs # noqa
|
|
53
|
+
except ImportError as exc:
|
|
54
|
+
raise ImportError("ALIBABA ossfs not installed") from exc
|
|
55
|
+
filesystem_class = get_filesystem_class(protocol=self.kind)
|
|
56
|
+
self._filesystem = makeDatastoreSchemaSanitizer(
|
|
57
|
+
filesystem_class,
|
|
58
|
+
using_bucket=self.using_bucket,
|
|
59
|
+
**self.get_storage_options(),
|
|
60
|
+
)
|
|
61
|
+
return self._filesystem
|
|
62
|
+
|
|
63
|
+
def get_storage_options(self):
|
|
64
|
+
res = dict(
|
|
65
|
+
endpoint=self._get_secret_or_env("ALIBABA_ENDPOINT_URL"),
|
|
66
|
+
key=self._get_secret_or_env("ALIBABA_ACCESS_KEY_ID"),
|
|
67
|
+
secret=self._get_secret_or_env("ALIBABA_SECRET_ACCESS_KEY"),
|
|
68
|
+
)
|
|
69
|
+
return self._sanitize_storage_options(res)
|
|
70
|
+
|
|
71
|
+
def get_bucket_and_key(self, key):
|
|
72
|
+
path = self._join(key)[1:]
|
|
73
|
+
return self.endpoint, path
|
|
74
|
+
|
|
75
|
+
def upload(self, key, src_path):
|
|
76
|
+
bucket, key = self.get_bucket_and_key(key)
|
|
77
|
+
oss = oss2.Bucket(self.auth, self.endpoint_url, bucket)
|
|
78
|
+
oss.put_object(key, open(src_path, "rb"))
|
|
79
|
+
|
|
80
|
+
def get(self, key, size=None, offset=0):
|
|
81
|
+
bucket, key = self.get_bucket_and_key(key)
|
|
82
|
+
oss = oss2.Bucket(self.auth, self.endpoint_url, bucket)
|
|
83
|
+
if size or offset:
|
|
84
|
+
return oss.get_object(key, byte_range=self.get_range(size, offset)).read()
|
|
85
|
+
return oss.get_object(key).read()
|
|
86
|
+
|
|
87
|
+
def put(self, key, data, append=False):
|
|
88
|
+
bucket, key = self.get_bucket_and_key(key)
|
|
89
|
+
oss = oss2.Bucket(self.auth, self.endpoint_url, bucket)
|
|
90
|
+
oss.put_object(key, data)
|
|
91
|
+
|
|
92
|
+
def stat(self, key):
|
|
93
|
+
bucket, key = self.get_bucket_and_key(key)
|
|
94
|
+
oss = oss2.Bucket(self.auth, self.endpoint_url, bucket)
|
|
95
|
+
obj = oss.get_object_meta(key)
|
|
96
|
+
size = obj.content_length
|
|
97
|
+
modified = datetime.fromtimestamp(obj.last_modified)
|
|
98
|
+
return FileStats(size, time.mktime(modified.timetuple()))
|
|
99
|
+
|
|
100
|
+
def listdir(self, key):
|
|
101
|
+
remote_path = self._convert_key_to_remote_path(key)
|
|
102
|
+
if self.filesystem.isfile(remote_path):
|
|
103
|
+
return key
|
|
104
|
+
remote_path = f"{remote_path}/**"
|
|
105
|
+
files = self.filesystem.glob(remote_path)
|
|
106
|
+
key_length = len(key)
|
|
107
|
+
files = [
|
|
108
|
+
f.split("/", 1)[1][key_length:] for f in files if len(f.split("/")) > 1
|
|
109
|
+
]
|
|
110
|
+
return files
|
|
111
|
+
|
|
112
|
+
def delete(self, key):
|
|
113
|
+
bucket, key = self.get_bucket_and_key(key)
|
|
114
|
+
oss = oss2.Bucket(self.auth, self.endpoint_url, bucket)
|
|
115
|
+
oss.delete_object(key)
|
|
116
|
+
|
|
117
|
+
def _convert_key_to_remote_path(self, key):
|
|
118
|
+
key = key.strip("/")
|
|
119
|
+
schema = urlparse(key).scheme
|
|
120
|
+
# if called without passing dataitem - like in fset.purge_targets,
|
|
121
|
+
# key will include schema.
|
|
122
|
+
if not schema:
|
|
123
|
+
key = Path(self.endpoint, key).as_posix()
|
|
124
|
+
return key
|
|
125
|
+
|
|
126
|
+
@staticmethod
|
|
127
|
+
def get_range(size, offset):
|
|
128
|
+
if size:
|
|
129
|
+
return [offset, size]
|
|
130
|
+
return [offset, None]
|
mlrun/datastore/azure_blob.py
CHANGED
|
@@ -158,18 +158,17 @@ class AzureBlobStore(DataStore):
|
|
|
158
158
|
st[key] = parsed_value
|
|
159
159
|
|
|
160
160
|
account_name = st.get("account_name")
|
|
161
|
-
if not account_name:
|
|
162
|
-
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
163
|
-
"Property 'account_name' is absent both in storage settings and connection string"
|
|
164
|
-
)
|
|
165
161
|
if primary_url:
|
|
166
162
|
if primary_url.startswith("http://"):
|
|
167
163
|
primary_url = primary_url[len("http://") :]
|
|
168
164
|
if primary_url.startswith("https://"):
|
|
169
165
|
primary_url = primary_url[len("https://") :]
|
|
170
166
|
host = primary_url
|
|
171
|
-
|
|
167
|
+
elif account_name:
|
|
172
168
|
host = f"{account_name}.{service}.core.windows.net"
|
|
169
|
+
else:
|
|
170
|
+
return res
|
|
171
|
+
|
|
173
172
|
if "account_key" in st:
|
|
174
173
|
res[f"spark.hadoop.fs.azure.account.key.{host}"] = st["account_key"]
|
|
175
174
|
|
mlrun/datastore/base.py
CHANGED
|
@@ -27,6 +27,7 @@ import requests
|
|
|
27
27
|
import urllib3
|
|
28
28
|
from deprecated import deprecated
|
|
29
29
|
|
|
30
|
+
import mlrun.config
|
|
30
31
|
import mlrun.errors
|
|
31
32
|
from mlrun.errors import err_to_str
|
|
32
33
|
from mlrun.utils import StorePrefix, is_ipython, logger
|
|
@@ -34,10 +35,6 @@ from mlrun.utils import StorePrefix, is_ipython, logger
|
|
|
34
35
|
from .store_resources import is_store_uri, parse_store_uri
|
|
35
36
|
from .utils import filter_df_start_end_time, select_columns_from_df
|
|
36
37
|
|
|
37
|
-
verify_ssl = False
|
|
38
|
-
if not verify_ssl:
|
|
39
|
-
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
|
40
|
-
|
|
41
38
|
|
|
42
39
|
class FileStats:
|
|
43
40
|
def __init__(self, size, modified, content_type=None):
|
|
@@ -633,17 +630,6 @@ def basic_auth_header(user, password):
|
|
|
633
630
|
return {"Authorization": authstr}
|
|
634
631
|
|
|
635
632
|
|
|
636
|
-
def http_get(url, headers=None, auth=None):
|
|
637
|
-
try:
|
|
638
|
-
response = requests.get(url, headers=headers, auth=auth, verify=verify_ssl)
|
|
639
|
-
except OSError as exc:
|
|
640
|
-
raise OSError(f"error: cannot connect to {url}: {err_to_str(exc)}")
|
|
641
|
-
|
|
642
|
-
mlrun.errors.raise_for_status(response)
|
|
643
|
-
|
|
644
|
-
return response.content
|
|
645
|
-
|
|
646
|
-
|
|
647
633
|
class HttpStore(DataStore):
|
|
648
634
|
def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
|
|
649
635
|
super().__init__(parent, name, schema, endpoint, secrets)
|
|
@@ -671,7 +657,7 @@ class HttpStore(DataStore):
|
|
|
671
657
|
raise ValueError("unimplemented")
|
|
672
658
|
|
|
673
659
|
def get(self, key, size=None, offset=0):
|
|
674
|
-
data =
|
|
660
|
+
data = self._http_get(self.url + self._join(key), self._headers, self.auth)
|
|
675
661
|
if offset:
|
|
676
662
|
data = data[offset:]
|
|
677
663
|
if size:
|
|
@@ -691,6 +677,26 @@ class HttpStore(DataStore):
|
|
|
691
677
|
f"schema as it is not secure and is not recommended."
|
|
692
678
|
)
|
|
693
679
|
|
|
680
|
+
def _http_get(
|
|
681
|
+
self,
|
|
682
|
+
url,
|
|
683
|
+
headers=None,
|
|
684
|
+
auth=None,
|
|
685
|
+
):
|
|
686
|
+
# import here to prevent import cycle
|
|
687
|
+
from mlrun.config import config as mlconf
|
|
688
|
+
|
|
689
|
+
verify_ssl = mlconf.httpdb.http.verify
|
|
690
|
+
try:
|
|
691
|
+
if not verify_ssl:
|
|
692
|
+
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
|
693
|
+
response = requests.get(url, headers=headers, auth=auth, verify=verify_ssl)
|
|
694
|
+
except OSError as exc:
|
|
695
|
+
raise OSError(f"error: cannot connect to {url}: {err_to_str(exc)}")
|
|
696
|
+
|
|
697
|
+
mlrun.errors.raise_for_status(response)
|
|
698
|
+
return response.content
|
|
699
|
+
|
|
694
700
|
|
|
695
701
|
# This wrapper class is designed to extract the 'ds' schema and profile name from URL-formatted paths.
|
|
696
702
|
# Within fsspec, the AbstractFileSystem::_strip_protocol() internal method is used to handle complete URL paths.
|
mlrun/datastore/datastore.py
CHANGED
|
@@ -132,7 +132,7 @@ class GoogleCloudStorageStore(DataStore):
|
|
|
132
132
|
self.filesystem.rm(path=path, recursive=recursive, maxdepth=maxdepth)
|
|
133
133
|
|
|
134
134
|
def get_spark_options(self):
|
|
135
|
-
res =
|
|
135
|
+
res = {}
|
|
136
136
|
st = self.get_storage_options()
|
|
137
137
|
if "token" in st:
|
|
138
138
|
res = {"spark.hadoop.google.cloud.auth.service.account.enable": "true"}
|
mlrun/datastore/sources.py
CHANGED
|
@@ -204,11 +204,11 @@ class CSVSource(BaseSourceDriver):
|
|
|
204
204
|
)
|
|
205
205
|
|
|
206
206
|
def get_spark_options(self):
|
|
207
|
-
store, path,
|
|
207
|
+
store, path, _ = mlrun.store_manager.get_or_create_store(self.path)
|
|
208
208
|
spark_options = store.get_spark_options()
|
|
209
209
|
spark_options.update(
|
|
210
210
|
{
|
|
211
|
-
"path":
|
|
211
|
+
"path": store.spark_url + path,
|
|
212
212
|
"format": "csv",
|
|
213
213
|
"header": "true",
|
|
214
214
|
"inferSchema": "true",
|
|
@@ -357,7 +357,7 @@ class ParquetSource(BaseSourceDriver):
|
|
|
357
357
|
)
|
|
358
358
|
|
|
359
359
|
def get_spark_options(self):
|
|
360
|
-
store, path,
|
|
360
|
+
store, path, _ = mlrun.store_manager.get_or_create_store(self.path)
|
|
361
361
|
spark_options = store.get_spark_options()
|
|
362
362
|
spark_options.update(
|
|
363
363
|
{
|
|
@@ -794,7 +794,8 @@ class OnlineSource(BaseSourceDriver):
|
|
|
794
794
|
explicit_ack = (
|
|
795
795
|
is_explicit_ack_supported(context) and mlrun.mlconf.is_explicit_ack()
|
|
796
796
|
)
|
|
797
|
-
|
|
797
|
+
# TODO: Change to AsyncEmitSource once we can drop support for nuclio<1.12.10
|
|
798
|
+
src_class = storey.SyncEmitSource(
|
|
798
799
|
context=context,
|
|
799
800
|
key_field=self.key_field or key_field,
|
|
800
801
|
full_event=True,
|
|
@@ -853,12 +854,11 @@ class StreamSource(OnlineSource):
|
|
|
853
854
|
super().__init__(name, attributes=attrs, **kwargs)
|
|
854
855
|
|
|
855
856
|
def add_nuclio_trigger(self, function):
|
|
856
|
-
store,
|
|
857
|
+
store, _, url = mlrun.store_manager.get_or_create_store(self.path)
|
|
857
858
|
if store.kind != "v3io":
|
|
858
859
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
859
860
|
"Only profiles that reference the v3io datastore can be used with StreamSource"
|
|
860
861
|
)
|
|
861
|
-
path = "v3io:/" + path
|
|
862
862
|
storage_options = store.get_storage_options()
|
|
863
863
|
access_key = storage_options.get("v3io_access_key")
|
|
864
864
|
endpoint, stream_path = parse_path(url)
|
|
@@ -882,7 +882,7 @@ class StreamSource(OnlineSource):
|
|
|
882
882
|
kwargs["worker_allocation_mode"] = "static"
|
|
883
883
|
|
|
884
884
|
function.add_v3io_stream_trigger(
|
|
885
|
-
|
|
885
|
+
url,
|
|
886
886
|
self.name,
|
|
887
887
|
self.attributes["group"],
|
|
888
888
|
self.attributes["seek_to"],
|
mlrun/db/base.py
CHANGED
|
@@ -17,7 +17,7 @@ from abc import ABC, abstractmethod
|
|
|
17
17
|
from typing import Optional, Union
|
|
18
18
|
|
|
19
19
|
import mlrun.common.schemas
|
|
20
|
-
import mlrun.model_monitoring
|
|
20
|
+
import mlrun.model_monitoring
|
|
21
21
|
|
|
22
22
|
|
|
23
23
|
class RunDBError(Exception):
|
|
@@ -509,9 +509,7 @@ class RunDBInterface(ABC):
|
|
|
509
509
|
self,
|
|
510
510
|
project: str,
|
|
511
511
|
endpoint_id: str,
|
|
512
|
-
model_endpoint: Union[
|
|
513
|
-
mlrun.model_monitoring.model_endpoint.ModelEndpoint, dict
|
|
514
|
-
],
|
|
512
|
+
model_endpoint: Union[mlrun.model_monitoring.ModelEndpoint, dict],
|
|
515
513
|
):
|
|
516
514
|
pass
|
|
517
515
|
|
|
@@ -632,6 +630,10 @@ class RunDBInterface(ABC):
|
|
|
632
630
|
def get_api_gateway(self, name, project=None) -> mlrun.common.schemas.APIGateway:
|
|
633
631
|
pass
|
|
634
632
|
|
|
633
|
+
@abstractmethod
|
|
634
|
+
def delete_api_gateway(self, name, project=None):
|
|
635
|
+
pass
|
|
636
|
+
|
|
635
637
|
def get_builder_status(
|
|
636
638
|
self,
|
|
637
639
|
func: "mlrun.runtimes.BaseRuntime",
|
|
@@ -724,5 +726,11 @@ class RunDBInterface(ABC):
|
|
|
724
726
|
project: str,
|
|
725
727
|
base_period: int = 10,
|
|
726
728
|
image: str = "mlrun/mlrun",
|
|
727
|
-
|
|
728
|
-
|
|
729
|
+
deploy_histogram_data_drift_app: bool = True,
|
|
730
|
+
) -> None:
|
|
731
|
+
raise NotImplementedError
|
|
732
|
+
|
|
733
|
+
def deploy_histogram_data_drift_app(
|
|
734
|
+
self, project: str, image: str = "mlrun/mlrun"
|
|
735
|
+
) -> None:
|
|
736
|
+
raise NotImplementedError
|