mlrun 1.7.0rc2__py3-none-any.whl → 1.7.0rc4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (70) hide show
  1. mlrun/artifacts/manager.py +6 -1
  2. mlrun/common/constants.py +1 -0
  3. mlrun/common/model_monitoring/helpers.py +12 -6
  4. mlrun/common/schemas/__init__.py +1 -0
  5. mlrun/common/schemas/client_spec.py +1 -0
  6. mlrun/common/schemas/common.py +40 -0
  7. mlrun/common/schemas/model_monitoring/constants.py +4 -1
  8. mlrun/common/schemas/project.py +2 -0
  9. mlrun/config.py +20 -16
  10. mlrun/datastore/azure_blob.py +22 -9
  11. mlrun/datastore/base.py +15 -25
  12. mlrun/datastore/datastore.py +19 -8
  13. mlrun/datastore/datastore_profile.py +47 -5
  14. mlrun/datastore/google_cloud_storage.py +10 -6
  15. mlrun/datastore/hdfs.py +51 -0
  16. mlrun/datastore/redis.py +4 -0
  17. mlrun/datastore/s3.py +4 -0
  18. mlrun/datastore/sources.py +31 -50
  19. mlrun/datastore/targets.py +58 -48
  20. mlrun/datastore/utils.py +2 -49
  21. mlrun/datastore/v3io.py +4 -0
  22. mlrun/db/base.py +34 -0
  23. mlrun/db/httpdb.py +71 -42
  24. mlrun/execution.py +3 -3
  25. mlrun/feature_store/feature_vector.py +2 -2
  26. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +3 -3
  27. mlrun/frameworks/tf_keras/model_handler.py +7 -7
  28. mlrun/k8s_utils.py +10 -5
  29. mlrun/kfpops.py +19 -10
  30. mlrun/model.py +5 -0
  31. mlrun/model_monitoring/api.py +3 -3
  32. mlrun/model_monitoring/application.py +1 -1
  33. mlrun/model_monitoring/applications/__init__.py +13 -0
  34. mlrun/model_monitoring/applications/histogram_data_drift.py +218 -0
  35. mlrun/model_monitoring/batch.py +9 -111
  36. mlrun/model_monitoring/controller.py +73 -55
  37. mlrun/model_monitoring/controller_handler.py +13 -5
  38. mlrun/model_monitoring/features_drift_table.py +62 -53
  39. mlrun/model_monitoring/helpers.py +30 -21
  40. mlrun/model_monitoring/metrics/__init__.py +13 -0
  41. mlrun/model_monitoring/metrics/histogram_distance.py +127 -0
  42. mlrun/model_monitoring/stores/kv_model_endpoint_store.py +14 -14
  43. mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -1
  44. mlrun/package/packagers/pandas_packagers.py +3 -3
  45. mlrun/package/utils/_archiver.py +3 -1
  46. mlrun/platforms/iguazio.py +8 -65
  47. mlrun/projects/pipelines.py +21 -11
  48. mlrun/projects/project.py +121 -42
  49. mlrun/runtimes/base.py +21 -2
  50. mlrun/runtimes/kubejob.py +5 -3
  51. mlrun/runtimes/local.py +2 -2
  52. mlrun/runtimes/mpijob/abstract.py +6 -6
  53. mlrun/runtimes/nuclio/function.py +9 -9
  54. mlrun/runtimes/nuclio/serving.py +3 -3
  55. mlrun/runtimes/pod.py +3 -3
  56. mlrun/runtimes/sparkjob/spark3job.py +3 -3
  57. mlrun/serving/remote.py +4 -2
  58. mlrun/serving/server.py +15 -18
  59. mlrun/serving/states.py +27 -12
  60. mlrun/utils/async_http.py +3 -3
  61. mlrun/utils/helpers.py +27 -5
  62. mlrun/utils/http.py +3 -3
  63. mlrun/utils/notifications/notification_pusher.py +6 -6
  64. mlrun/utils/version/version.json +2 -2
  65. {mlrun-1.7.0rc2.dist-info → mlrun-1.7.0rc4.dist-info}/METADATA +13 -16
  66. {mlrun-1.7.0rc2.dist-info → mlrun-1.7.0rc4.dist-info}/RECORD +70 -64
  67. {mlrun-1.7.0rc2.dist-info → mlrun-1.7.0rc4.dist-info}/LICENSE +0 -0
  68. {mlrun-1.7.0rc2.dist-info → mlrun-1.7.0rc4.dist-info}/WHEEL +0 -0
  69. {mlrun-1.7.0rc2.dist-info → mlrun-1.7.0rc4.dist-info}/entry_points.txt +0 -0
  70. {mlrun-1.7.0rc2.dist-info → mlrun-1.7.0rc4.dist-info}/top_level.txt +0 -0
mlrun/serving/server.py CHANGED
@@ -188,11 +188,6 @@ class GraphServer(ModelObj):
188
188
 
189
189
  def init_object(self, namespace):
190
190
  self.graph.init_object(self.context, namespace, self.load_mode, reset=True)
191
- return (
192
- v2_serving_async_handler
193
- if config.datastore.async_source_mode == "enabled"
194
- else v2_serving_handler
195
- )
196
191
 
197
192
  def test(
198
193
  self,
@@ -310,7 +305,7 @@ class GraphServer(ModelObj):
310
305
 
311
306
  def wait_for_completion(self):
312
307
  """wait for async operation to complete"""
313
- self.graph.wait_for_completion()
308
+ return self.graph.wait_for_completion()
314
309
 
315
310
 
316
311
  def v2_serving_init(context, namespace=None):
@@ -334,11 +329,18 @@ def v2_serving_init(context, namespace=None):
334
329
  context.logger.info_with(
335
330
  "Initializing states", namespace=namespace or get_caller_globals()
336
331
  )
337
- server.init_states(context, namespace or get_caller_globals())
332
+ kwargs = {}
333
+ if hasattr(context, "is_mock"):
334
+ kwargs["is_mock"] = context.is_mock
335
+ server.init_states(
336
+ context,
337
+ namespace or get_caller_globals(),
338
+ **kwargs,
339
+ )
338
340
  context.logger.info("Initializing graph steps")
339
- serving_handler = server.init_object(namespace or get_caller_globals())
341
+ server.init_object(namespace or get_caller_globals())
340
342
  # set the handler hook to point to our handler
341
- setattr(context, "mlrun_handler", serving_handler)
343
+ setattr(context, "mlrun_handler", v2_serving_handler)
342
344
  setattr(context, "_server", server)
343
345
  context.logger.info_with("Serving was initialized", verbose=server.verbose)
344
346
  if server.verbose:
@@ -351,9 +353,9 @@ def v2_serving_init(context, namespace=None):
351
353
  "Setting termination callback to terminate graph on worker shutdown"
352
354
  )
353
355
 
354
- def termination_callback():
356
+ async def termination_callback():
355
357
  context.logger.info("Termination callback called")
356
- server.wait_for_completion()
358
+ await server.wait_for_completion()
357
359
  context.logger.info("Termination of async flow is completed")
358
360
 
359
361
  context.platform.set_termination_callback(termination_callback)
@@ -363,9 +365,9 @@ def v2_serving_init(context, namespace=None):
363
365
  "Setting drain callback to terminate and restart the graph on a drain event (such as rebalancing)"
364
366
  )
365
367
 
366
- def drain_callback():
368
+ async def drain_callback():
367
369
  context.logger.info("Drain callback called")
368
- server.wait_for_completion()
370
+ await server.wait_for_completion()
369
371
  context.logger.info(
370
372
  "Termination of async flow is completed. Rerunning async flow."
371
373
  )
@@ -386,11 +388,6 @@ def v2_serving_handler(context, event, get_body=False):
386
388
  return context._server.run(event, context, get_body)
387
389
 
388
390
 
389
- async def v2_serving_async_handler(context, event, get_body=False):
390
- """hook for nuclio handler()"""
391
- return await context._server.run(event, context, get_body)
392
-
393
-
394
391
  def create_graph_server(
395
392
  parameters={},
396
393
  load_mode=None,
mlrun/serving/states.py CHANGED
@@ -14,6 +14,7 @@
14
14
 
15
15
  __all__ = ["TaskStep", "RouterStep", "RootFlowStep", "ErrorStep"]
16
16
 
17
+ import asyncio
17
18
  import os
18
19
  import pathlib
19
20
  import traceback
@@ -1160,19 +1161,19 @@ class FlowStep(BaseStep):
1160
1161
  if self._controller:
1161
1162
  # async flow (using storey)
1162
1163
  event._awaitable_result = None
1163
- if config.datastore.async_source_mode == "enabled":
1164
+ if self.context.is_mock:
1165
+ resp = self._controller.emit(
1166
+ event, return_awaitable_result=self._wait_for_result
1167
+ )
1168
+ if self._wait_for_result and resp:
1169
+ return resp.await_result()
1170
+ else:
1164
1171
  resp_awaitable = self._controller.emit(
1165
1172
  event, await_result=self._wait_for_result
1166
1173
  )
1167
1174
  if self._wait_for_result:
1168
1175
  return resp_awaitable
1169
1176
  return self._await_and_return_id(resp_awaitable, event)
1170
- else:
1171
- resp = self._controller.emit(
1172
- event, return_awaitable_result=self._wait_for_result
1173
- )
1174
- if self._wait_for_result and resp:
1175
- return resp.await_result()
1176
1177
  event = copy(event)
1177
1178
  event.body = {"id": event.id}
1178
1179
  return event
@@ -1210,10 +1211,20 @@ class FlowStep(BaseStep):
1210
1211
 
1211
1212
  def wait_for_completion(self):
1212
1213
  """wait for completion of run in async flows"""
1214
+
1213
1215
  if self._controller:
1214
- if hasattr(self._controller, "terminate"):
1215
- self._controller.terminate()
1216
- return self._controller.await_termination()
1216
+ if asyncio.iscoroutinefunction(self._controller.await_termination):
1217
+
1218
+ async def terminate_and_await_termination():
1219
+ if hasattr(self._controller, "terminate"):
1220
+ await self._controller.terminate()
1221
+ return await self._controller.await_termination()
1222
+
1223
+ return terminate_and_await_termination()
1224
+ else:
1225
+ if hasattr(self._controller, "terminate"):
1226
+ self._controller.terminate()
1227
+ return self._controller.await_termination()
1217
1228
 
1218
1229
  def plot(self, filename=None, format=None, source=None, targets=None, **kw):
1219
1230
  """plot/save graph using graphviz
@@ -1555,10 +1566,14 @@ def _init_async_objects(context, steps):
1555
1566
  wait_for_result = True
1556
1567
 
1557
1568
  source_args = context.get_param("source_args", {})
1558
-
1559
1569
  explicit_ack = is_explicit_ack_supported(context) and mlrun.mlconf.is_explicit_ack()
1560
1570
 
1561
- default_source = storey.SyncEmitSource(
1571
+ if context.is_mock:
1572
+ source_class = storey.SyncEmitSource
1573
+ else:
1574
+ source_class = storey.AsyncEmitSource
1575
+
1576
+ default_source = source_class(
1562
1577
  context=context,
1563
1578
  explicit_ack=explicit_ack,
1564
1579
  **source_args,
mlrun/utils/async_http.py CHANGED
@@ -137,9 +137,9 @@ class _CustomRequestContext(_RequestContext):
137
137
 
138
138
  # enrich user agent
139
139
  # will help traceability and debugging
140
- headers[
141
- aiohttp.hdrs.USER_AGENT
142
- ] = f"{aiohttp.http.SERVER_SOFTWARE} mlrun/{config.version}"
140
+ headers[aiohttp.hdrs.USER_AGENT] = (
141
+ f"{aiohttp.http.SERVER_SOFTWARE} mlrun/{config.version}"
142
+ )
143
143
 
144
144
  response: typing.Optional[
145
145
  aiohttp.ClientResponse
mlrun/utils/helpers.py CHANGED
@@ -12,6 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ import asyncio
15
16
  import enum
16
17
  import functools
17
18
  import hashlib
@@ -30,7 +31,6 @@ from os import path
30
31
  from types import ModuleType
31
32
  from typing import Any, Optional
32
33
 
33
- import anyio
34
34
  import git
35
35
  import inflection
36
36
  import numpy as np
@@ -49,6 +49,7 @@ import mlrun.common.schemas
49
49
  import mlrun.errors
50
50
  import mlrun.utils.regex
51
51
  import mlrun.utils.version.version
52
+ from mlrun.common.constants import MYSQL_MEDIUMBLOB_SIZE_BYTES
52
53
  from mlrun.config import config
53
54
 
54
55
  from .logger import create_logger
@@ -270,6 +271,17 @@ def validate_artifact_key_name(
270
271
  )
271
272
 
272
273
 
274
+ def validate_inline_artifact_body_size(body: typing.Union[str, bytes, None]) -> None:
275
+ if body and len(body) > MYSQL_MEDIUMBLOB_SIZE_BYTES:
276
+ raise mlrun.errors.MLRunBadRequestError(
277
+ "The body of the artifact exceeds the maximum allowed size. "
278
+ "Avoid embedding the artifact body. "
279
+ "This increases the size of the project yaml file and could affect the project during loading and saving. "
280
+ "More information is available at"
281
+ "https://docs.mlrun.org/en/latest/projects/automate-project-git-source.html#setting-and-registering-the-project-artifacts"
282
+ )
283
+
284
+
273
285
  def validate_v3io_stream_consumer_group(
274
286
  value: str, raise_on_failure: bool = True
275
287
  ) -> bool:
@@ -1464,13 +1476,15 @@ def normalize_project_username(username: str):
1464
1476
  return username
1465
1477
 
1466
1478
 
1467
- # run_in threadpool is taken from fastapi to allow us to run sync functions in a threadpool
1468
- # without importing fastapi in the client
1469
1479
  async def run_in_threadpool(func, *args, **kwargs):
1480
+ """
1481
+ Run a sync-function in the loop default thread pool executor pool and await its result.
1482
+ Note that this function is not suitable for CPU-bound tasks, as it will block the event loop.
1483
+ """
1484
+ loop = asyncio.get_running_loop()
1470
1485
  if kwargs:
1471
- # run_sync doesn't accept 'kwargs', so bind them in here
1472
1486
  func = functools.partial(func, **kwargs)
1473
- return await anyio.to_thread.run_sync(func, *args)
1487
+ return await loop.run_in_executor(None, func, *args)
1474
1488
 
1475
1489
 
1476
1490
  def is_explicit_ack_supported(context):
@@ -1540,3 +1554,11 @@ def get_local_file_schema() -> list:
1540
1554
  # The expression `list(string.ascii_lowercase)` generates a list of lowercase alphabets,
1541
1555
  # which corresponds to drive letters in Windows file paths such as `C:/Windows/path`.
1542
1556
  return ["file"] + list(string.ascii_lowercase)
1557
+
1558
+
1559
+ def is_safe_path(base, filepath, is_symlink=False):
1560
+ # Avoid path traversal attacks by ensuring that the path is safe
1561
+ resolved_filepath = (
1562
+ os.path.abspath(filepath) if not is_symlink else os.path.realpath(filepath)
1563
+ )
1564
+ return base == os.path.commonpath((base, resolved_filepath))
mlrun/utils/http.py CHANGED
@@ -109,9 +109,9 @@ class HTTPSessionWithRetry(requests.Session):
109
109
  def request(self, method, url, **kwargs):
110
110
  retry_count = 0
111
111
  kwargs.setdefault("headers", {})
112
- kwargs["headers"][
113
- "User-Agent"
114
- ] = f"{requests.utils.default_user_agent()} mlrun/{config.version}"
112
+ kwargs["headers"]["User-Agent"] = (
113
+ f"{requests.utils.default_user_agent()} mlrun/{config.version}"
114
+ )
115
115
  while True:
116
116
  try:
117
117
  response = super().request(method, url, **kwargs)
@@ -303,9 +303,9 @@ class NotificationPusher(_NotificationPusherBase):
303
303
  traceback=traceback.format_exc(),
304
304
  )
305
305
  update_notification_status_kwargs["reason"] = f"Exception error: {str(exc)}"
306
- update_notification_status_kwargs[
307
- "status"
308
- ] = mlrun.common.schemas.NotificationStatus.ERROR
306
+ update_notification_status_kwargs["status"] = (
307
+ mlrun.common.schemas.NotificationStatus.ERROR
308
+ )
309
309
  raise exc
310
310
  finally:
311
311
  self._update_notification_status(
@@ -352,9 +352,9 @@ class NotificationPusher(_NotificationPusherBase):
352
352
  traceback=traceback.format_exc(),
353
353
  )
354
354
  update_notification_status_kwargs["reason"] = f"Exception error: {str(exc)}"
355
- update_notification_status_kwargs[
356
- "status"
357
- ] = mlrun.common.schemas.NotificationStatus.ERROR
355
+ update_notification_status_kwargs["status"] = (
356
+ mlrun.common.schemas.NotificationStatus.ERROR
357
+ )
358
358
  raise exc
359
359
  finally:
360
360
  await mlrun.utils.helpers.run_in_threadpool(
@@ -1,4 +1,4 @@
1
1
  {
2
- "git_commit": "523bdf379b1183bee50b5b0ebe61ddeae9d7ca1c",
3
- "version": "1.7.0-rc2"
2
+ "git_commit": "cb2750f25e202a321723af3465359944445dfda7",
3
+ "version": "1.7.0-rc4"
4
4
  }
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: mlrun
3
- Version: 1.7.0rc2
3
+ Version: 1.7.0rc4
4
4
  Summary: Tracking and config of machine learning runs
5
5
  Home-page: https://github.com/mlrun/mlrun
6
6
  Author: Yaron Haviv
@@ -44,13 +44,12 @@ Requires-Dist: semver ~=3.0
44
44
  Requires-Dist: dependency-injector ~=4.41
45
45
  Requires-Dist: fsspec ==2023.9.2
46
46
  Requires-Dist: v3iofs ~=0.1.17
47
- Requires-Dist: storey ~=1.7.3
47
+ Requires-Dist: storey ~=1.7.5
48
48
  Requires-Dist: inflection ~=0.5.0
49
49
  Requires-Dist: python-dotenv ~=0.17.0
50
- Requires-Dist: setuptools ~=68.2
50
+ Requires-Dist: setuptools ~=69.1
51
51
  Requires-Dist: deprecated ~=1.2
52
52
  Requires-Dist: jinja2 >=3.1.3,~=3.1
53
- Requires-Dist: anyio ~=3.7
54
53
  Requires-Dist: orjson ~=3.9
55
54
  Provides-Extra: all
56
55
  Requires-Dist: adlfs ==2023.9.0 ; extra == 'all'
@@ -80,12 +79,11 @@ Requires-Dist: sqlalchemy ~=1.4 ; extra == 'all'
80
79
  Provides-Extra: api
81
80
  Requires-Dist: uvicorn ~=0.27.1 ; extra == 'api'
82
81
  Requires-Dist: dask-kubernetes ~=0.11.0 ; extra == 'api'
83
- Requires-Dist: apscheduler !=3.10.2,~=3.6 ; extra == 'api'
84
- Requires-Dist: sqlite3-to-mysql ~=1.4 ; extra == 'api'
85
- Requires-Dist: objgraph ~=3.5 ; extra == 'api'
86
- Requires-Dist: igz-mgmt ~=0.0.10 ; extra == 'api'
87
- Requires-Dist: humanfriendly ~=9.2 ; extra == 'api'
88
- Requires-Dist: fastapi ~=0.103.2 ; extra == 'api'
82
+ Requires-Dist: apscheduler <4,>=3.10.3 ; extra == 'api'
83
+ Requires-Dist: objgraph ~=3.6 ; extra == 'api'
84
+ Requires-Dist: igz-mgmt ~=0.1.0 ; extra == 'api'
85
+ Requires-Dist: humanfriendly ~=10.0 ; extra == 'api'
86
+ Requires-Dist: fastapi ~=0.110.0 ; extra == 'api'
89
87
  Requires-Dist: sqlalchemy ~=1.4 ; extra == 'api'
90
88
  Requires-Dist: pymysql ~=1.0 ; extra == 'api'
91
89
  Requires-Dist: alembic ~=1.9 ; extra == 'api'
@@ -127,7 +125,7 @@ Provides-Extra: complete-api
127
125
  Requires-Dist: adlfs ==2023.9.0 ; extra == 'complete-api'
128
126
  Requires-Dist: aiobotocore <2.8,>=2.5.0 ; extra == 'complete-api'
129
127
  Requires-Dist: alembic ~=1.9 ; extra == 'complete-api'
130
- Requires-Dist: apscheduler !=3.10.2,~=3.6 ; extra == 'complete-api'
128
+ Requires-Dist: apscheduler <4,>=3.10.3 ; extra == 'complete-api'
131
129
  Requires-Dist: avro ~=1.11 ; extra == 'complete-api'
132
130
  Requires-Dist: azure-core ~=1.24 ; extra == 'complete-api'
133
131
  Requires-Dist: azure-identity ~=1.5 ; extra == 'complete-api'
@@ -137,23 +135,22 @@ Requires-Dist: dask-kubernetes ~=0.11.0 ; extra == 'complete-api'
137
135
  Requires-Dist: dask ~=2023.9.0 ; extra == 'complete-api'
138
136
  Requires-Dist: databricks-sdk ~=0.13.0 ; extra == 'complete-api'
139
137
  Requires-Dist: distributed ~=2023.9.0 ; extra == 'complete-api'
140
- Requires-Dist: fastapi ~=0.103.2 ; extra == 'complete-api'
138
+ Requires-Dist: fastapi ~=0.110.0 ; extra == 'complete-api'
141
139
  Requires-Dist: gcsfs ==2023.9.2 ; extra == 'complete-api'
142
140
  Requires-Dist: google-cloud-bigquery[bqstorage,pandas] ==3.14.1 ; extra == 'complete-api'
143
141
  Requires-Dist: graphviz ~=0.20.0 ; extra == 'complete-api'
144
- Requires-Dist: humanfriendly ~=9.2 ; extra == 'complete-api'
145
- Requires-Dist: igz-mgmt ~=0.0.10 ; extra == 'complete-api'
142
+ Requires-Dist: humanfriendly ~=10.0 ; extra == 'complete-api'
143
+ Requires-Dist: igz-mgmt ~=0.1.0 ; extra == 'complete-api'
146
144
  Requires-Dist: kafka-python ~=2.0 ; extra == 'complete-api'
147
145
  Requires-Dist: mlflow ~=2.8 ; extra == 'complete-api'
148
146
  Requires-Dist: msrest ~=0.6.21 ; extra == 'complete-api'
149
- Requires-Dist: objgraph ~=3.5 ; extra == 'complete-api'
147
+ Requires-Dist: objgraph ~=3.6 ; extra == 'complete-api'
150
148
  Requires-Dist: plotly <5.12.0,~=5.4 ; extra == 'complete-api'
151
149
  Requires-Dist: pymysql ~=1.0 ; extra == 'complete-api'
152
150
  Requires-Dist: pyopenssl >=23 ; extra == 'complete-api'
153
151
  Requires-Dist: redis ~=4.3 ; extra == 'complete-api'
154
152
  Requires-Dist: s3fs ==2023.9.2 ; extra == 'complete-api'
155
153
  Requires-Dist: sqlalchemy ~=1.4 ; extra == 'complete-api'
156
- Requires-Dist: sqlite3-to-mysql ~=1.4 ; extra == 'complete-api'
157
154
  Requires-Dist: timelength ~=1.1 ; extra == 'complete-api'
158
155
  Requires-Dist: uvicorn ~=0.27.1 ; extra == 'complete-api'
159
156
  Provides-Extra: dask