mlrun 1.7.0rc5__py3-none-any.whl → 1.7.0rc7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (75) hide show
  1. mlrun/artifacts/base.py +2 -1
  2. mlrun/artifacts/plots.py +9 -5
  3. mlrun/common/constants.py +6 -0
  4. mlrun/common/schemas/__init__.py +2 -0
  5. mlrun/common/schemas/model_monitoring/__init__.py +4 -0
  6. mlrun/common/schemas/model_monitoring/constants.py +35 -18
  7. mlrun/common/schemas/project.py +1 -0
  8. mlrun/common/types.py +7 -1
  9. mlrun/config.py +19 -6
  10. mlrun/data_types/data_types.py +4 -0
  11. mlrun/datastore/alibaba_oss.py +130 -0
  12. mlrun/datastore/azure_blob.py +4 -5
  13. mlrun/datastore/base.py +22 -16
  14. mlrun/datastore/datastore.py +4 -0
  15. mlrun/datastore/google_cloud_storage.py +1 -1
  16. mlrun/datastore/sources.py +7 -7
  17. mlrun/db/base.py +14 -6
  18. mlrun/db/factory.py +1 -1
  19. mlrun/db/httpdb.py +61 -56
  20. mlrun/db/nopdb.py +3 -0
  21. mlrun/launcher/__init__.py +1 -1
  22. mlrun/launcher/base.py +1 -1
  23. mlrun/launcher/client.py +1 -1
  24. mlrun/launcher/factory.py +1 -1
  25. mlrun/launcher/local.py +1 -1
  26. mlrun/launcher/remote.py +1 -1
  27. mlrun/model.py +1 -0
  28. mlrun/model_monitoring/__init__.py +1 -1
  29. mlrun/model_monitoring/api.py +104 -301
  30. mlrun/model_monitoring/application.py +21 -21
  31. mlrun/model_monitoring/applications/histogram_data_drift.py +130 -40
  32. mlrun/model_monitoring/controller.py +26 -33
  33. mlrun/model_monitoring/db/__init__.py +16 -0
  34. mlrun/model_monitoring/{stores → db/stores}/__init__.py +43 -34
  35. mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
  36. mlrun/model_monitoring/{stores/model_endpoint_store.py → db/stores/base/store.py} +47 -6
  37. mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
  38. mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +49 -0
  39. mlrun/model_monitoring/{stores → db/stores/sqldb}/models/base.py +76 -3
  40. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +68 -0
  41. mlrun/model_monitoring/{stores → db/stores/sqldb}/models/sqlite.py +13 -1
  42. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +662 -0
  43. mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
  44. mlrun/model_monitoring/{stores/kv_model_endpoint_store.py → db/stores/v3io_kv/kv_store.py} +134 -3
  45. mlrun/model_monitoring/features_drift_table.py +34 -22
  46. mlrun/model_monitoring/helpers.py +45 -6
  47. mlrun/model_monitoring/stream_processing.py +43 -9
  48. mlrun/model_monitoring/tracking_policy.py +7 -1
  49. mlrun/model_monitoring/writer.py +4 -36
  50. mlrun/projects/pipelines.py +13 -1
  51. mlrun/projects/project.py +279 -117
  52. mlrun/run.py +72 -74
  53. mlrun/runtimes/__init__.py +35 -0
  54. mlrun/runtimes/base.py +7 -1
  55. mlrun/runtimes/nuclio/api_gateway.py +188 -61
  56. mlrun/runtimes/nuclio/application/__init__.py +15 -0
  57. mlrun/runtimes/nuclio/application/application.py +283 -0
  58. mlrun/runtimes/nuclio/application/reverse_proxy.go +87 -0
  59. mlrun/runtimes/nuclio/function.py +53 -1
  60. mlrun/runtimes/nuclio/serving.py +28 -32
  61. mlrun/runtimes/pod.py +27 -1
  62. mlrun/serving/server.py +4 -6
  63. mlrun/serving/states.py +41 -33
  64. mlrun/utils/helpers.py +34 -0
  65. mlrun/utils/version/version.json +2 -2
  66. {mlrun-1.7.0rc5.dist-info → mlrun-1.7.0rc7.dist-info}/METADATA +14 -5
  67. {mlrun-1.7.0rc5.dist-info → mlrun-1.7.0rc7.dist-info}/RECORD +71 -64
  68. mlrun/model_monitoring/batch.py +0 -974
  69. mlrun/model_monitoring/stores/models/__init__.py +0 -27
  70. mlrun/model_monitoring/stores/models/mysql.py +0 -34
  71. mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -382
  72. {mlrun-1.7.0rc5.dist-info → mlrun-1.7.0rc7.dist-info}/LICENSE +0 -0
  73. {mlrun-1.7.0rc5.dist-info → mlrun-1.7.0rc7.dist-info}/WHEEL +0 -0
  74. {mlrun-1.7.0rc5.dist-info → mlrun-1.7.0rc7.dist-info}/entry_points.txt +0 -0
  75. {mlrun-1.7.0rc5.dist-info → mlrun-1.7.0rc7.dist-info}/top_level.txt +0 -0
mlrun/artifacts/base.py CHANGED
@@ -88,9 +88,10 @@ class ArtifactSpec(ModelObj):
88
88
  "db_key",
89
89
  "extra_data",
90
90
  "unpackaging_instructions",
91
+ "producer",
91
92
  ]
92
93
 
93
- _extra_fields = ["annotations", "producer", "sources", "license", "encoding"]
94
+ _extra_fields = ["annotations", "sources", "license", "encoding"]
94
95
  _exclude_fields_from_uid_hash = [
95
96
  # if the artifact is first created, it will not have a db_key,
96
97
  # exclude it so further updates of the artifacts will have the same hash
mlrun/artifacts/plots.py CHANGED
@@ -12,6 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
  import base64
15
+ import typing
15
16
  from io import BytesIO
16
17
 
17
18
  from deprecated import deprecated
@@ -21,6 +22,9 @@ import mlrun
21
22
  from ..utils import dict_to_json
22
23
  from .base import Artifact, LegacyArtifact
23
24
 
25
+ if typing.TYPE_CHECKING:
26
+ from plotly.graph_objs import Figure
27
+
24
28
 
25
29
  class PlotArtifact(Artifact):
26
30
  kind = "plot"
@@ -207,10 +211,10 @@ class PlotlyArtifact(Artifact):
207
211
 
208
212
  def __init__(
209
213
  self,
210
- figure=None,
211
- key: str = None,
212
- target_path: str = None,
213
- ):
214
+ figure: typing.Optional["Figure"] = None,
215
+ key: typing.Optional[str] = None,
216
+ target_path: typing.Optional[str] = None,
217
+ ) -> None:
214
218
  """
215
219
  Initialize a Plotly artifact with the given figure.
216
220
 
@@ -247,7 +251,7 @@ class PlotlyArtifact(Artifact):
247
251
  self._figure = figure
248
252
  self.spec.format = "html"
249
253
 
250
- def get_body(self):
254
+ def get_body(self) -> str:
251
255
  """
252
256
  Get the artifact's body - the Plotly figure's html code.
253
257
 
mlrun/common/constants.py CHANGED
@@ -14,4 +14,10 @@
14
14
  #
15
15
  IMAGE_NAME_ENRICH_REGISTRY_PREFIX = "." # prefix for image name to enrich with registry
16
16
  MLRUN_CREATED_LABEL = "mlrun-created"
17
+ MLRUN_MODEL_CONF = "model-conf"
18
+ MLRUN_SERVING_SPEC_MOUNT_PATH = f"/tmp/mlrun/{MLRUN_MODEL_CONF}"
19
+ MLRUN_SERVING_SPEC_FILENAME = "serving_spec.json"
20
+ MLRUN_SERVING_SPEC_PATH = (
21
+ f"{MLRUN_SERVING_SPEC_MOUNT_PATH}/{MLRUN_SERVING_SPEC_FILENAME}"
22
+ )
17
23
  MYSQL_MEDIUMBLOB_SIZE_BYTES = 16 * 1024 * 1024
@@ -124,6 +124,7 @@ from .model_monitoring import (
124
124
  EventFieldType,
125
125
  EventKeyMetrics,
126
126
  Features,
127
+ FeatureSetFeatures,
127
128
  FeatureValues,
128
129
  GrafanaColumn,
129
130
  GrafanaDataPoint,
@@ -139,6 +140,7 @@ from .model_monitoring import (
139
140
  ModelMonitoringMode,
140
141
  ModelMonitoringStoreKinds,
141
142
  MonitoringFunctionNames,
143
+ PrometheusEndpoints,
142
144
  TimeSeriesTarget,
143
145
  )
144
146
  from .notification import (
@@ -22,6 +22,7 @@ from .constants import (
22
22
  EventFieldType,
23
23
  EventKeyMetrics,
24
24
  EventLiveStats,
25
+ FeatureSetFeatures,
25
26
  FileTargetKind,
26
27
  FunctionURI,
27
28
  ModelEndpointTarget,
@@ -29,9 +30,12 @@ from .constants import (
29
30
  ModelMonitoringStoreKinds,
30
31
  MonitoringFunctionNames,
31
32
  ProjectSecretKeys,
33
+ PrometheusEndpoints,
32
34
  PrometheusMetric,
35
+ SchedulingKeys,
33
36
  TimeSeriesTarget,
34
37
  VersionedModel,
38
+ WriterEvent,
35
39
  )
36
40
  from .grafana import (
37
41
  GrafanaColumn,
@@ -21,6 +21,12 @@ import mlrun.common.helpers
21
21
  from mlrun.common.types import StrEnum
22
22
 
23
23
 
24
+ class MonitoringStrEnum(StrEnum):
25
+ @classmethod
26
+ def list(cls):
27
+ return list(map(lambda c: c.value, cls))
28
+
29
+
24
30
  class EventFieldType:
25
31
  FUNCTION_URI = "function_uri"
26
32
  FUNCTION = "function"
@@ -77,6 +83,20 @@ class EventFieldType:
77
83
  SAMPLE_PARQUET_PATH = "sample_parquet_path"
78
84
 
79
85
 
86
+ class FeatureSetFeatures(MonitoringStrEnum):
87
+ LATENCY = EventFieldType.LATENCY
88
+ ERROR_COUNT = EventFieldType.ERROR_COUNT
89
+ METRICS = EventFieldType.METRICS
90
+
91
+ @classmethod
92
+ def time_stamp(cls):
93
+ return EventFieldType.TIMESTAMP
94
+
95
+ @classmethod
96
+ def entity(cls):
97
+ return EventFieldType.ENDPOINT_ID
98
+
99
+
80
100
  class ApplicationEvent:
81
101
  APPLICATION_NAME = "application_name"
82
102
  CURRENT_STATS = "current_stats"
@@ -89,7 +109,7 @@ class ApplicationEvent:
89
109
  OUTPUT_STREAM_URI = "output_stream_uri"
90
110
 
91
111
 
92
- class WriterEvent(StrEnum):
112
+ class WriterEvent(MonitoringStrEnum):
93
113
  APPLICATION_NAME = "application_name"
94
114
  ENDPOINT_ID = "endpoint_id"
95
115
  START_INFER_TIME = "start_infer_time"
@@ -101,10 +121,6 @@ class WriterEvent(StrEnum):
101
121
  RESULT_EXTRA_DATA = "result_extra_data"
102
122
  CURRENT_STATS = "current_stats"
103
123
 
104
- @classmethod
105
- def list(cls):
106
- return list(map(lambda c: c.value, cls))
107
-
108
124
 
109
125
  class EventLiveStats:
110
126
  LATENCY_AVG_5M = "latency_avg_5m"
@@ -146,6 +162,9 @@ class ModelMonitoringStoreKinds:
146
162
 
147
163
  class SchedulingKeys:
148
164
  LAST_ANALYZED = "last_analyzed"
165
+ ENDPOINT_ID = "endpoint_id"
166
+ APPLICATION_NAME = "application_name"
167
+ UID = "uid"
149
168
 
150
169
 
151
170
  class FileTargetKind:
@@ -155,6 +174,8 @@ class FileTargetKind:
155
174
  PARQUET = "parquet"
156
175
  APPS_PARQUET = "apps_parquet"
157
176
  LOG_STREAM = "log_stream"
177
+ APP_RESULTS = "app_results"
178
+ MONITORING_SCHEDULES = "monitoring_schedules"
158
179
 
159
180
 
160
181
  class ModelMonitoringMode(str, Enum):
@@ -177,20 +198,16 @@ class PrometheusMetric:
177
198
  DRIFT_STATUS = "drift_status"
178
199
 
179
200
 
180
- class MonitoringFunctionNames:
181
- WRITER = "model-monitoring-writer"
182
- BATCH = "model-monitoring-batch"
183
- APPLICATION_CONTROLLER = "model-monitoring-controller"
184
- STREAM = "model-monitoring-stream"
201
+ class PrometheusEndpoints(MonitoringStrEnum):
202
+ MODEL_MONITORING_METRICS = "/model-monitoring-metrics"
203
+ MONITORING_BATCH_METRICS = "/monitoring-batch-metrics"
204
+ MONITORING_DRIFT_STATUS = "/monitoring-drift-status"
205
+
185
206
 
186
- @staticmethod
187
- def all():
188
- return [
189
- MonitoringFunctionNames.WRITER,
190
- MonitoringFunctionNames.STREAM,
191
- MonitoringFunctionNames.BATCH,
192
- MonitoringFunctionNames.APPLICATION_CONTROLLER,
193
- ]
207
+ class MonitoringFunctionNames(MonitoringStrEnum):
208
+ STREAM = "model-monitoring-stream"
209
+ APPLICATION_CONTROLLER = "model-monitoring-controller"
210
+ WRITER = "model-monitoring-writer"
194
211
 
195
212
 
196
213
  @dataclass
@@ -87,6 +87,7 @@ class ProjectSpec(pydantic.BaseModel):
87
87
  custom_packagers: typing.Optional[list[tuple[str, bool]]] = None
88
88
  default_image: typing.Optional[str] = None
89
89
  build: typing.Optional[ImageBuilder] = None
90
+ default_function_node_selector: typing.Optional[dict] = {}
90
91
 
91
92
  class Config:
92
93
  extra = pydantic.Extra.allow
mlrun/common/types.py CHANGED
@@ -11,7 +11,6 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
- #
15
14
 
16
15
  import enum
17
16
 
@@ -23,3 +22,10 @@ class StrEnum(str, enum.Enum):
23
22
 
24
23
  def __repr__(self):
25
24
  return self.value
25
+
26
+
27
+ # Partial backport from Python 3.11
28
+ # https://docs.python.org/3/library/http.html#http.HTTPMethod
29
+ class HTTPMethod(StrEnum):
30
+ GET = "GET"
31
+ POST = "POST"
mlrun/config.py CHANGED
@@ -324,7 +324,13 @@ default_config = {
324
324
  # optional values (as per https://dev.mysql.com/doc/refman/8.0/en/sql-mode.html#sql-mode-full):
325
325
  #
326
326
  # if set to "nil" or "none", nothing would be set
327
- "modes": "STRICT_TRANS_TABLES",
327
+ "modes": (
328
+ "STRICT_TRANS_TABLES"
329
+ ",NO_ZERO_IN_DATE"
330
+ ",NO_ZERO_DATE"
331
+ ",ERROR_FOR_DIVISION_BY_ZERO"
332
+ ",NO_ENGINE_SUBSTITUTION",
333
+ )
328
334
  },
329
335
  },
330
336
  "jobs": {
@@ -356,6 +362,8 @@ default_config = {
356
362
  # - mlrun.runtimes.nuclio.function.enrich_function_with_ingress
357
363
  "add_templated_ingress_host_mode": "never",
358
364
  "explicit_ack": "enabled",
365
+ # size of serving spec to move to config maps
366
+ "serving_spec_env_cutoff": 4096,
359
367
  },
360
368
  "logs": {
361
369
  "decode": {
@@ -443,7 +451,7 @@ default_config = {
443
451
  # pip install <requirement_specifier>, e.g. mlrun==0.5.4, mlrun~=0.5,
444
452
  # git+https://github.com/mlrun/mlrun@development. by default uses the version
445
453
  "mlrun_version_specifier": "",
446
- "kaniko_image": "gcr.io/kaniko-project/executor:v1.8.0", # kaniko builder image
454
+ "kaniko_image": "gcr.io/kaniko-project/executor:v1.21.1", # kaniko builder image
447
455
  "kaniko_init_container_image": "alpine:3.18",
448
456
  # image for kaniko init container when docker registry is ECR
449
457
  "kaniko_aws_cli_image": "amazon/aws-cli:2.7.10",
@@ -473,6 +481,11 @@ default_config = {
473
481
  # if set to true, will log a warning for trying to use run db functionality while in nop db mode
474
482
  "verbose": True,
475
483
  },
484
+ "pagination_cache": {
485
+ "interval": 60,
486
+ "ttl": 3600,
487
+ "max_size": 10000,
488
+ },
476
489
  },
477
490
  "model_endpoint_monitoring": {
478
491
  "serving_stream_args": {"shard_count": 1, "retention_period_hours": 24},
@@ -492,10 +505,9 @@ default_config = {
492
505
  # when the user is working in CE environment and has not provided any stream path.
493
506
  "default_http_sink": "http://nuclio-{project}-model-monitoring-stream.{namespace}.svc.cluster.local:8080",
494
507
  "default_http_sink_app": "http://nuclio-{project}-{application_name}.{namespace}.svc.cluster.local:8080",
495
- "batch_processing_function_branch": "master",
496
508
  "parquet_batching_max_events": 10_000,
497
509
  "parquet_batching_timeout_secs": timedelta(minutes=1).total_seconds(),
498
- # See mlrun.model_monitoring.stores.ModelEndpointStoreType for available options
510
+ # See mlrun.model_monitoring.db.stores.ObjectStoreFactory for available options
499
511
  "store_type": "v3io-nosql",
500
512
  "endpoint_store_connection": "",
501
513
  },
@@ -610,8 +622,9 @@ default_config = {
610
622
  },
611
623
  "workflows": {
612
624
  "default_workflow_runner_name": "workflow-runner-{}",
613
- # Default timeout seconds for retrieving workflow id after execution:
614
- "timeouts": {"local": 120, "kfp": 30, "remote": 90},
625
+ # Default timeout seconds for retrieving workflow id after execution
626
+ # Remote workflow timeout is the maximum between remote and the inner engine timeout
627
+ "timeouts": {"local": 120, "kfp": 60, "remote": 60 * 5},
615
628
  },
616
629
  "log_collector": {
617
630
  "address": "localhost:8282",
@@ -41,6 +41,7 @@ class ValueType(str, Enum):
41
41
  BYTES = "bytes"
42
42
  STRING = "str"
43
43
  DATETIME = "datetime"
44
+ LIST = "List"
44
45
  BYTES_LIST = "List[bytes]"
45
46
  STRING_LIST = "List[string]"
46
47
  INT32_LIST = "List[int32]"
@@ -48,6 +49,7 @@ class ValueType(str, Enum):
48
49
  DOUBLE_LIST = "List[float]"
49
50
  FLOAT_LIST = "List[float32]"
50
51
  BOOL_LIST = "List[bool]"
52
+ Tuple = "Tuple"
51
53
 
52
54
 
53
55
  def pd_schema_to_value_type(value):
@@ -102,6 +104,8 @@ def python_type_to_value_type(value_type):
102
104
  "datetime64[ns]": ValueType.INT64,
103
105
  "datetime64[ns, tz]": ValueType.INT64,
104
106
  "category": ValueType.STRING,
107
+ "list": ValueType.LIST,
108
+ "tuple": ValueType.Tuple,
105
109
  }
106
110
 
107
111
  if type_name in type_map:
@@ -0,0 +1,130 @@
1
+ # Copyright 2023 Iguazio
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import time
16
+ from datetime import datetime
17
+ from pathlib import Path
18
+ from urllib.parse import urlparse
19
+
20
+ import oss2
21
+ from fsspec.registry import get_filesystem_class
22
+
23
+ import mlrun.errors
24
+
25
+ from .base import DataStore, FileStats, makeDatastoreSchemaSanitizer
26
+
27
+
28
+ class OSSStore(DataStore):
29
+ using_bucket = True
30
+
31
+ def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
32
+ super().__init__(parent, name, schema, endpoint, secrets)
33
+ # will be used in case user asks to assume a role and work through fsspec
34
+
35
+ access_key_id = self._get_secret_or_env("ALIBABA_ACCESS_KEY_ID")
36
+ secret_key = self._get_secret_or_env("ALIBABA_SECRET_ACCESS_KEY")
37
+ endpoint_url = self._get_secret_or_env("ALIBABA_ENDPOINT_URL")
38
+ if access_key_id and secret_key and endpoint_url:
39
+ self.auth = oss2.Auth(access_key_id, secret_key)
40
+ self.endpoint_url = endpoint_url
41
+ else:
42
+ raise mlrun.errors.MLRunInvalidArgumentError(
43
+ "missing ALIBABA_ACCESS_KEY_ID or ALIBABA_SECRET_ACCESS_KEY ALIBABA_ENDPOINT_URL in environment"
44
+ )
45
+
46
+ @property
47
+ def filesystem(self):
48
+ """return fsspec file system object, if supported"""
49
+ if self._filesystem:
50
+ return self._filesystem
51
+ try:
52
+ import ossfs # noqa
53
+ except ImportError as exc:
54
+ raise ImportError("ALIBABA ossfs not installed") from exc
55
+ filesystem_class = get_filesystem_class(protocol=self.kind)
56
+ self._filesystem = makeDatastoreSchemaSanitizer(
57
+ filesystem_class,
58
+ using_bucket=self.using_bucket,
59
+ **self.get_storage_options(),
60
+ )
61
+ return self._filesystem
62
+
63
+ def get_storage_options(self):
64
+ res = dict(
65
+ endpoint=self._get_secret_or_env("ALIBABA_ENDPOINT_URL"),
66
+ key=self._get_secret_or_env("ALIBABA_ACCESS_KEY_ID"),
67
+ secret=self._get_secret_or_env("ALIBABA_SECRET_ACCESS_KEY"),
68
+ )
69
+ return self._sanitize_storage_options(res)
70
+
71
+ def get_bucket_and_key(self, key):
72
+ path = self._join(key)[1:]
73
+ return self.endpoint, path
74
+
75
+ def upload(self, key, src_path):
76
+ bucket, key = self.get_bucket_and_key(key)
77
+ oss = oss2.Bucket(self.auth, self.endpoint_url, bucket)
78
+ oss.put_object(key, open(src_path, "rb"))
79
+
80
+ def get(self, key, size=None, offset=0):
81
+ bucket, key = self.get_bucket_and_key(key)
82
+ oss = oss2.Bucket(self.auth, self.endpoint_url, bucket)
83
+ if size or offset:
84
+ return oss.get_object(key, byte_range=self.get_range(size, offset)).read()
85
+ return oss.get_object(key).read()
86
+
87
+ def put(self, key, data, append=False):
88
+ bucket, key = self.get_bucket_and_key(key)
89
+ oss = oss2.Bucket(self.auth, self.endpoint_url, bucket)
90
+ oss.put_object(key, data)
91
+
92
+ def stat(self, key):
93
+ bucket, key = self.get_bucket_and_key(key)
94
+ oss = oss2.Bucket(self.auth, self.endpoint_url, bucket)
95
+ obj = oss.get_object_meta(key)
96
+ size = obj.content_length
97
+ modified = datetime.fromtimestamp(obj.last_modified)
98
+ return FileStats(size, time.mktime(modified.timetuple()))
99
+
100
+ def listdir(self, key):
101
+ remote_path = self._convert_key_to_remote_path(key)
102
+ if self.filesystem.isfile(remote_path):
103
+ return key
104
+ remote_path = f"{remote_path}/**"
105
+ files = self.filesystem.glob(remote_path)
106
+ key_length = len(key)
107
+ files = [
108
+ f.split("/", 1)[1][key_length:] for f in files if len(f.split("/")) > 1
109
+ ]
110
+ return files
111
+
112
+ def delete(self, key):
113
+ bucket, key = self.get_bucket_and_key(key)
114
+ oss = oss2.Bucket(self.auth, self.endpoint_url, bucket)
115
+ oss.delete_object(key)
116
+
117
+ def _convert_key_to_remote_path(self, key):
118
+ key = key.strip("/")
119
+ schema = urlparse(key).scheme
120
+ # if called without passing dataitem - like in fset.purge_targets,
121
+ # key will include schema.
122
+ if not schema:
123
+ key = Path(self.endpoint, key).as_posix()
124
+ return key
125
+
126
+ @staticmethod
127
+ def get_range(size, offset):
128
+ if size:
129
+ return [offset, size]
130
+ return [offset, None]
@@ -158,18 +158,17 @@ class AzureBlobStore(DataStore):
158
158
  st[key] = parsed_value
159
159
 
160
160
  account_name = st.get("account_name")
161
- if not account_name:
162
- raise mlrun.errors.MLRunInvalidArgumentError(
163
- "Property 'account_name' is absent both in storage settings and connection string"
164
- )
165
161
  if primary_url:
166
162
  if primary_url.startswith("http://"):
167
163
  primary_url = primary_url[len("http://") :]
168
164
  if primary_url.startswith("https://"):
169
165
  primary_url = primary_url[len("https://") :]
170
166
  host = primary_url
171
- else:
167
+ elif account_name:
172
168
  host = f"{account_name}.{service}.core.windows.net"
169
+ else:
170
+ return res
171
+
173
172
  if "account_key" in st:
174
173
  res[f"spark.hadoop.fs.azure.account.key.{host}"] = st["account_key"]
175
174
 
mlrun/datastore/base.py CHANGED
@@ -27,6 +27,7 @@ import requests
27
27
  import urllib3
28
28
  from deprecated import deprecated
29
29
 
30
+ import mlrun.config
30
31
  import mlrun.errors
31
32
  from mlrun.errors import err_to_str
32
33
  from mlrun.utils import StorePrefix, is_ipython, logger
@@ -34,10 +35,6 @@ from mlrun.utils import StorePrefix, is_ipython, logger
34
35
  from .store_resources import is_store_uri, parse_store_uri
35
36
  from .utils import filter_df_start_end_time, select_columns_from_df
36
37
 
37
- verify_ssl = False
38
- if not verify_ssl:
39
- urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
40
-
41
38
 
42
39
  class FileStats:
43
40
  def __init__(self, size, modified, content_type=None):
@@ -633,17 +630,6 @@ def basic_auth_header(user, password):
633
630
  return {"Authorization": authstr}
634
631
 
635
632
 
636
- def http_get(url, headers=None, auth=None):
637
- try:
638
- response = requests.get(url, headers=headers, auth=auth, verify=verify_ssl)
639
- except OSError as exc:
640
- raise OSError(f"error: cannot connect to {url}: {err_to_str(exc)}")
641
-
642
- mlrun.errors.raise_for_status(response)
643
-
644
- return response.content
645
-
646
-
647
633
  class HttpStore(DataStore):
648
634
  def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
649
635
  super().__init__(parent, name, schema, endpoint, secrets)
@@ -671,7 +657,7 @@ class HttpStore(DataStore):
671
657
  raise ValueError("unimplemented")
672
658
 
673
659
  def get(self, key, size=None, offset=0):
674
- data = http_get(self.url + self._join(key), self._headers, self.auth)
660
+ data = self._http_get(self.url + self._join(key), self._headers, self.auth)
675
661
  if offset:
676
662
  data = data[offset:]
677
663
  if size:
@@ -691,6 +677,26 @@ class HttpStore(DataStore):
691
677
  f"schema as it is not secure and is not recommended."
692
678
  )
693
679
 
680
+ def _http_get(
681
+ self,
682
+ url,
683
+ headers=None,
684
+ auth=None,
685
+ ):
686
+ # import here to prevent import cycle
687
+ from mlrun.config import config as mlconf
688
+
689
+ verify_ssl = mlconf.httpdb.http.verify
690
+ try:
691
+ if not verify_ssl:
692
+ urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
693
+ response = requests.get(url, headers=headers, auth=auth, verify=verify_ssl)
694
+ except OSError as exc:
695
+ raise OSError(f"error: cannot connect to {url}: {err_to_str(exc)}")
696
+
697
+ mlrun.errors.raise_for_status(response)
698
+ return response.content
699
+
694
700
 
695
701
  # This wrapper class is designed to extract the 'ds' schema and profile name from URL-formatted paths.
696
702
  # Within fsspec, the AbstractFileSystem::_strip_protocol() internal method is used to handle complete URL paths.
@@ -98,6 +98,10 @@ def schema_to_store(schema):
98
98
  from .hdfs import HdfsStore
99
99
 
100
100
  return HdfsStore
101
+ elif schema == "oss":
102
+ from .alibaba_oss import OSSStore
103
+
104
+ return OSSStore
101
105
  else:
102
106
  raise ValueError(f"unsupported store scheme ({schema})")
103
107
 
@@ -132,7 +132,7 @@ class GoogleCloudStorageStore(DataStore):
132
132
  self.filesystem.rm(path=path, recursive=recursive, maxdepth=maxdepth)
133
133
 
134
134
  def get_spark_options(self):
135
- res = None
135
+ res = {}
136
136
  st = self.get_storage_options()
137
137
  if "token" in st:
138
138
  res = {"spark.hadoop.google.cloud.auth.service.account.enable": "true"}
@@ -204,11 +204,11 @@ class CSVSource(BaseSourceDriver):
204
204
  )
205
205
 
206
206
  def get_spark_options(self):
207
- store, path, url = mlrun.store_manager.get_or_create_store(self.path)
207
+ store, path, _ = mlrun.store_manager.get_or_create_store(self.path)
208
208
  spark_options = store.get_spark_options()
209
209
  spark_options.update(
210
210
  {
211
- "path": url,
211
+ "path": store.spark_url + path,
212
212
  "format": "csv",
213
213
  "header": "true",
214
214
  "inferSchema": "true",
@@ -357,7 +357,7 @@ class ParquetSource(BaseSourceDriver):
357
357
  )
358
358
 
359
359
  def get_spark_options(self):
360
- store, path, url = mlrun.store_manager.get_or_create_store(self.path)
360
+ store, path, _ = mlrun.store_manager.get_or_create_store(self.path)
361
361
  spark_options = store.get_spark_options()
362
362
  spark_options.update(
363
363
  {
@@ -794,7 +794,8 @@ class OnlineSource(BaseSourceDriver):
794
794
  explicit_ack = (
795
795
  is_explicit_ack_supported(context) and mlrun.mlconf.is_explicit_ack()
796
796
  )
797
- src_class = storey.AsyncEmitSource(
797
+ # TODO: Change to AsyncEmitSource once we can drop support for nuclio<1.12.10
798
+ src_class = storey.SyncEmitSource(
798
799
  context=context,
799
800
  key_field=self.key_field or key_field,
800
801
  full_event=True,
@@ -853,12 +854,11 @@ class StreamSource(OnlineSource):
853
854
  super().__init__(name, attributes=attrs, **kwargs)
854
855
 
855
856
  def add_nuclio_trigger(self, function):
856
- store, path, url = mlrun.store_manager.get_or_create_store(self.path)
857
+ store, _, url = mlrun.store_manager.get_or_create_store(self.path)
857
858
  if store.kind != "v3io":
858
859
  raise mlrun.errors.MLRunInvalidArgumentError(
859
860
  "Only profiles that reference the v3io datastore can be used with StreamSource"
860
861
  )
861
- path = "v3io:/" + path
862
862
  storage_options = store.get_storage_options()
863
863
  access_key = storage_options.get("v3io_access_key")
864
864
  endpoint, stream_path = parse_path(url)
@@ -882,7 +882,7 @@ class StreamSource(OnlineSource):
882
882
  kwargs["worker_allocation_mode"] = "static"
883
883
 
884
884
  function.add_v3io_stream_trigger(
885
- path,
885
+ url,
886
886
  self.name,
887
887
  self.attributes["group"],
888
888
  self.attributes["seek_to"],
mlrun/db/base.py CHANGED
@@ -17,7 +17,7 @@ from abc import ABC, abstractmethod
17
17
  from typing import Optional, Union
18
18
 
19
19
  import mlrun.common.schemas
20
- import mlrun.model_monitoring.model_endpoint
20
+ import mlrun.model_monitoring
21
21
 
22
22
 
23
23
  class RunDBError(Exception):
@@ -509,9 +509,7 @@ class RunDBInterface(ABC):
509
509
  self,
510
510
  project: str,
511
511
  endpoint_id: str,
512
- model_endpoint: Union[
513
- mlrun.model_monitoring.model_endpoint.ModelEndpoint, dict
514
- ],
512
+ model_endpoint: Union[mlrun.model_monitoring.ModelEndpoint, dict],
515
513
  ):
516
514
  pass
517
515
 
@@ -632,6 +630,10 @@ class RunDBInterface(ABC):
632
630
  def get_api_gateway(self, name, project=None) -> mlrun.common.schemas.APIGateway:
633
631
  pass
634
632
 
633
+ @abstractmethod
634
+ def delete_api_gateway(self, name, project=None):
635
+ pass
636
+
635
637
  def get_builder_status(
636
638
  self,
637
639
  func: "mlrun.runtimes.BaseRuntime",
@@ -724,5 +726,11 @@ class RunDBInterface(ABC):
724
726
  project: str,
725
727
  base_period: int = 10,
726
728
  image: str = "mlrun/mlrun",
727
- ):
728
- pass
729
+ deploy_histogram_data_drift_app: bool = True,
730
+ ) -> None:
731
+ raise NotImplementedError
732
+
733
+ def deploy_histogram_data_drift_app(
734
+ self, project: str, image: str = "mlrun/mlrun"
735
+ ) -> None:
736
+ raise NotImplementedError