mlrun 1.7.0rc9__py3-none-any.whl → 1.7.0rc12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (68) hide show
  1. mlrun/__init__.py +1 -0
  2. mlrun/artifacts/manager.py +17 -6
  3. mlrun/artifacts/model.py +29 -25
  4. mlrun/common/schemas/__init__.py +1 -0
  5. mlrun/common/schemas/alert.py +122 -0
  6. mlrun/common/schemas/auth.py +4 -0
  7. mlrun/common/schemas/client_spec.py +1 -0
  8. mlrun/common/schemas/model_monitoring/constants.py +3 -1
  9. mlrun/config.py +6 -3
  10. mlrun/datastore/__init__.py +4 -3
  11. mlrun/datastore/base.py +6 -5
  12. mlrun/datastore/sources.py +9 -4
  13. mlrun/datastore/targets.py +11 -3
  14. mlrun/datastore/utils.py +16 -0
  15. mlrun/datastore/v3io.py +27 -50
  16. mlrun/db/base.py +44 -2
  17. mlrun/db/httpdb.py +192 -20
  18. mlrun/db/nopdb.py +36 -1
  19. mlrun/execution.py +21 -14
  20. mlrun/feature_store/api.py +6 -3
  21. mlrun/feature_store/feature_set.py +39 -23
  22. mlrun/feature_store/feature_vector.py +2 -1
  23. mlrun/feature_store/steps.py +30 -19
  24. mlrun/features.py +4 -13
  25. mlrun/frameworks/auto_mlrun/auto_mlrun.py +2 -2
  26. mlrun/frameworks/lgbm/__init__.py +1 -1
  27. mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
  28. mlrun/frameworks/lgbm/model_handler.py +1 -1
  29. mlrun/frameworks/pytorch/__init__.py +2 -2
  30. mlrun/frameworks/sklearn/__init__.py +1 -1
  31. mlrun/frameworks/tf_keras/__init__.py +1 -1
  32. mlrun/frameworks/xgboost/__init__.py +1 -1
  33. mlrun/model.py +2 -2
  34. mlrun/model_monitoring/application.py +11 -2
  35. mlrun/model_monitoring/applications/histogram_data_drift.py +3 -3
  36. mlrun/model_monitoring/controller.py +2 -3
  37. mlrun/model_monitoring/stream_processing.py +0 -1
  38. mlrun/model_monitoring/writer.py +32 -0
  39. mlrun/package/packagers_manager.py +1 -0
  40. mlrun/platforms/__init__.py +1 -1
  41. mlrun/platforms/other.py +1 -1
  42. mlrun/projects/operations.py +11 -4
  43. mlrun/projects/project.py +168 -62
  44. mlrun/run.py +72 -40
  45. mlrun/runtimes/mpijob/abstract.py +8 -8
  46. mlrun/runtimes/nuclio/function.py +9 -5
  47. mlrun/runtimes/nuclio/serving.py +12 -14
  48. mlrun/runtimes/pod.py +3 -3
  49. mlrun/secrets.py +6 -2
  50. mlrun/serving/routers.py +3 -1
  51. mlrun/serving/states.py +9 -35
  52. mlrun/serving/v2_serving.py +4 -4
  53. mlrun/utils/helpers.py +1 -1
  54. mlrun/utils/notifications/notification/base.py +12 -0
  55. mlrun/utils/notifications/notification/console.py +2 -0
  56. mlrun/utils/notifications/notification/git.py +3 -1
  57. mlrun/utils/notifications/notification/ipython.py +2 -0
  58. mlrun/utils/notifications/notification/slack.py +41 -13
  59. mlrun/utils/notifications/notification/webhook.py +11 -1
  60. mlrun/utils/retryer.py +2 -2
  61. mlrun/utils/version/version.json +2 -2
  62. {mlrun-1.7.0rc9.dist-info → mlrun-1.7.0rc12.dist-info}/METADATA +1 -1
  63. {mlrun-1.7.0rc9.dist-info → mlrun-1.7.0rc12.dist-info}/RECORD +67 -67
  64. mlrun/datastore/helpers.py +0 -18
  65. {mlrun-1.7.0rc9.dist-info → mlrun-1.7.0rc12.dist-info}/LICENSE +0 -0
  66. {mlrun-1.7.0rc9.dist-info → mlrun-1.7.0rc12.dist-info}/WHEEL +0 -0
  67. {mlrun-1.7.0rc9.dist-info → mlrun-1.7.0rc12.dist-info}/entry_points.txt +0 -0
  68. {mlrun-1.7.0rc9.dist-info → mlrun-1.7.0rc12.dist-info}/top_level.txt +0 -0
mlrun/__init__.py CHANGED
@@ -97,6 +97,7 @@ def set_environment(
97
97
  example::
98
98
 
99
99
  from os import path
100
+
100
101
  project_name, artifact_path = set_environment()
101
102
  set_environment("http://localhost:8080", artifact_path="./")
102
103
  set_environment(env_file="mlrun.env")
@@ -180,11 +180,13 @@ class ArtifactManager:
180
180
  upload=None,
181
181
  labels=None,
182
182
  db_key=None,
183
+ project=None,
184
+ is_retained_producer=None,
183
185
  **kwargs,
184
186
  ) -> Artifact:
185
187
  """
186
188
  Log an artifact to the DB and upload it to the artifact store.
187
- :param producer: The producer of the artifact, the producer depends from where the artifact is being logged.
189
+ :param producer: The producer of the artifact, the producer depends on where the artifact is being logged.
188
190
  :param item: The artifact to log.
189
191
  :param body: The body of the artifact.
190
192
  :param target_path: The target path of the artifact. (cannot be a relative path)
@@ -202,6 +204,9 @@ class ArtifactManager:
202
204
  :param labels: Labels to add to the artifact.
203
205
  :param db_key: The key to use when logging the artifact to the DB.
204
206
  If not provided, will generate a key based on the producer name and the artifact key.
207
+ :param project: The project to log the artifact to. If not provided, will use the producer's project.
208
+ :param is_retained_producer: Whether the producer is retained or not. Relevant to register artifacts flow
209
+ where a project may log artifacts which were produced by another producer.
205
210
  :param kwargs: Arguments to pass to the artifact class.
206
211
  :return: The logged artifact.
207
212
  """
@@ -226,7 +231,7 @@ class ArtifactManager:
226
231
 
227
232
  if db_key is None:
228
233
  # set the default artifact db key
229
- if producer.kind == "run":
234
+ if producer.kind == "run" and not is_retained_producer:
230
235
  # When the producer's type is "run,"
231
236
  # we generate a different db_key than the one we obtained in the request.
232
237
  # As a result, a new artifact for the requested key will be created,
@@ -251,8 +256,11 @@ class ArtifactManager:
251
256
  item.labels.update({"workflow-id": item.producer.get("workflow")})
252
257
 
253
258
  item.iter = producer.iteration
254
- project = producer.project
259
+ project = project or producer.project
255
260
  item.project = project
261
+ if is_retained_producer:
262
+ # if the producer is retained, we want to use the original target path
263
+ target_path = target_path or item.target_path
256
264
 
257
265
  # if target_path is provided and not relative, then no need to upload the artifact as it already exists
258
266
  if target_path:
@@ -260,7 +268,8 @@ class ArtifactManager:
260
268
  raise ValueError(
261
269
  f"target_path ({target_path}) param cannot be relative"
262
270
  )
263
- upload = False
271
+ if upload is None:
272
+ upload = False
264
273
 
265
274
  # if target_path wasn't provided, but src_path is not relative, then no need to upload the artifact as it
266
275
  # already exists. In this case set the target_path to the src_path and set upload to False
@@ -287,7 +296,9 @@ class ArtifactManager:
287
296
 
288
297
  if target_path and item.is_dir and not target_path.endswith("/"):
289
298
  target_path += "/"
290
- target_path = template_artifact_path(artifact_path=target_path, project=project)
299
+ target_path = template_artifact_path(
300
+ artifact_path=target_path, project=producer.project
301
+ )
291
302
  item.target_path = target_path
292
303
 
293
304
  item.before_log()
@@ -303,7 +314,7 @@ class ArtifactManager:
303
314
  item.upload(artifact_path=artifact_path)
304
315
 
305
316
  if db_key:
306
- self._log_to_db(db_key, producer.project, producer.inputs, item)
317
+ self._log_to_db(db_key, project, producer.inputs, item)
307
318
  size = str(item.size) or "?"
308
319
  db_str = "Y" if (self.artifact_db and db_key) else "N"
309
320
  logger.debug(
mlrun/artifacts/model.py CHANGED
@@ -11,9 +11,10 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
+
14
15
  import tempfile
15
16
  from os import path
16
- from typing import Any
17
+ from typing import Any, Optional
17
18
 
18
19
  import pandas as pd
19
20
  import yaml
@@ -69,8 +70,8 @@ class ModelArtifactSpec(ArtifactSpec):
69
70
  model_file=None,
70
71
  metrics=None,
71
72
  paraemeters=None,
72
- inputs: list[Feature] = None,
73
- outputs: list[Feature] = None,
73
+ inputs: Optional[list[Feature]] = None,
74
+ outputs: Optional[list[Feature]] = None,
74
75
  framework=None,
75
76
  algorithm=None,
76
77
  feature_vector=None,
@@ -92,8 +93,8 @@ class ModelArtifactSpec(ArtifactSpec):
92
93
  self.model_file = model_file
93
94
  self.metrics = metrics or {}
94
95
  self.parameters = paraemeters or {}
95
- self.inputs: list[Feature] = inputs or []
96
- self.outputs: list[Feature] = outputs or []
96
+ self.inputs = inputs or []
97
+ self.outputs = outputs or []
97
98
  self.framework = framework
98
99
  self.algorithm = algorithm
99
100
  self.feature_vector = feature_vector
@@ -102,21 +103,21 @@ class ModelArtifactSpec(ArtifactSpec):
102
103
  self.model_target_file = model_target_file
103
104
 
104
105
  @property
105
- def inputs(self) -> list[Feature]:
106
+ def inputs(self) -> ObjectList:
106
107
  """input feature list"""
107
108
  return self._inputs
108
109
 
109
110
  @inputs.setter
110
- def inputs(self, inputs: list[Feature]):
111
+ def inputs(self, inputs: list[Feature]) -> None:
111
112
  self._inputs = ObjectList.from_list(Feature, inputs)
112
113
 
113
114
  @property
114
- def outputs(self) -> list[Feature]:
115
+ def outputs(self) -> ObjectList:
115
116
  """output feature list"""
116
117
  return self._outputs
117
118
 
118
119
  @outputs.setter
119
- def outputs(self, outputs: list[Feature]):
120
+ def outputs(self, outputs: list[Feature]) -> None:
120
121
  self._outputs = ObjectList.from_list(Feature, outputs)
121
122
 
122
123
 
@@ -176,22 +177,22 @@ class ModelArtifact(Artifact):
176
177
  self._spec = self._verify_dict(spec, "spec", ModelArtifactSpec)
177
178
 
178
179
  @property
179
- def inputs(self) -> list[Feature]:
180
+ def inputs(self) -> ObjectList:
180
181
  """input feature list"""
181
182
  return self.spec.inputs
182
183
 
183
184
  @inputs.setter
184
- def inputs(self, inputs: list[Feature]):
185
+ def inputs(self, inputs: list[Feature]) -> None:
185
186
  """input feature list"""
186
187
  self.spec.inputs = inputs
187
188
 
188
189
  @property
189
- def outputs(self) -> list[Feature]:
190
+ def outputs(self) -> ObjectList:
190
191
  """input feature list"""
191
192
  return self.spec.outputs
192
193
 
193
194
  @outputs.setter
194
- def outputs(self, outputs: list[Feature]):
195
+ def outputs(self, outputs: list[Feature]) -> None:
195
196
  """input feature list"""
196
197
  self.spec.outputs = outputs
197
198
 
@@ -445,14 +446,14 @@ class LegacyModelArtifact(LegacyArtifact):
445
446
  **kwargs,
446
447
  ):
447
448
  super().__init__(key, body, format=format, target_path=target_path, **kwargs)
448
- self._inputs: ObjectList = None
449
- self._outputs: ObjectList = None
449
+ self._inputs: Optional[ObjectList] = None
450
+ self._outputs: Optional[ObjectList] = None
450
451
 
451
452
  self.model_file = model_file
452
453
  self.parameters = parameters or {}
453
454
  self.metrics = metrics or {}
454
- self.inputs: list[Feature] = inputs or []
455
- self.outputs: list[Feature] = outputs or []
455
+ self.inputs = inputs or []
456
+ self.outputs = outputs or []
456
457
  self.extra_data = extra_data or {}
457
458
  self.framework = framework
458
459
  self.algorithm = algorithm
@@ -462,21 +463,21 @@ class LegacyModelArtifact(LegacyArtifact):
462
463
  self.model_target_file = model_target_file
463
464
 
464
465
  @property
465
- def inputs(self) -> list[Feature]:
466
+ def inputs(self) -> Optional[ObjectList]:
466
467
  """input feature list"""
467
468
  return self._inputs
468
469
 
469
470
  @inputs.setter
470
- def inputs(self, inputs: list[Feature]):
471
+ def inputs(self, inputs: list[Feature]) -> None:
471
472
  self._inputs = ObjectList.from_list(Feature, inputs)
472
473
 
473
474
  @property
474
- def outputs(self) -> list[Feature]:
475
+ def outputs(self) -> Optional[ObjectList]:
475
476
  """output feature list"""
476
477
  return self._outputs
477
478
 
478
479
  @outputs.setter
479
- def outputs(self, outputs: list[Feature]):
480
+ def outputs(self, outputs: list[Feature]) -> None:
480
481
  self._outputs = ObjectList.from_list(Feature, outputs)
481
482
 
482
483
  def infer_from_df(self, df, label_columns=None, with_stats=True, num_bins=None):
@@ -552,9 +553,9 @@ def get_model(model_dir, suffix=""):
552
553
 
553
554
  example::
554
555
 
555
- model_file, model_artifact, extra_data = get_model(models_path, suffix='.pkl')
556
+ model_file, model_artifact, extra_data = get_model(models_path, suffix=".pkl")
556
557
  model = load(open(model_file, "rb"))
557
- categories = extra_data['categories'].as_df()
558
+ categories = extra_data["categories"].as_df()
558
559
 
559
560
  :param model_dir: model dir or artifact path (store://..) or DataItem
560
561
  :param suffix: model filename suffix (when using a dir)
@@ -663,8 +664,11 @@ def update_model(
663
664
 
664
665
  example::
665
666
 
666
- update_model(model_path, metrics={'speed': 100},
667
- extra_data={'my_data': b'some text', 'file': 's3://mybucket/..'})
667
+ update_model(
668
+ model_path,
669
+ metrics={"speed": 100},
670
+ extra_data={"my_data": b"some text", "file": "s3://mybucket/.."},
671
+ )
668
672
 
669
673
  :param model_artifact: model artifact object or path (store://..) or DataItem
670
674
  :param parameters: parameters dict
@@ -14,6 +14,7 @@
14
14
  #
15
15
  # flake8: noqa - this is until we take care of the F401 violations with respect to __all__ & sphinx
16
16
 
17
+ from .alert import AlertActiveState, AlertConfig, Event
17
18
  from .api_gateway import (
18
19
  APIGateway,
19
20
  APIGatewayAuthenticationMode,
@@ -0,0 +1,122 @@
1
+ # Copyright 2023 Iguazio
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ #
15
+ from datetime import datetime
16
+ from typing import Annotated, Optional, Union
17
+
18
+ import pydantic
19
+
20
+ from mlrun.common.schemas.notification import Notification
21
+ from mlrun.common.types import StrEnum
22
+
23
+
24
+ class EventEntityKind(StrEnum):
25
+ MODEL = "model"
26
+ JOB = "job"
27
+
28
+
29
+ class EventEntity(pydantic.BaseModel):
30
+ kind: EventEntityKind
31
+ project: str
32
+ id: str
33
+
34
+
35
+ class EventKind(StrEnum):
36
+ DRIFT_DETECTED = "drift_detected"
37
+ DRIFT_SUSPECTED = "drift_suspected"
38
+ FAILED = "failed"
39
+
40
+
41
+ _event_kind_entity_map = {
42
+ EventKind.DRIFT_SUSPECTED: [EventEntityKind.MODEL],
43
+ EventKind.DRIFT_DETECTED: [EventEntityKind.MODEL],
44
+ EventKind.FAILED: [EventEntityKind.JOB],
45
+ }
46
+
47
+
48
+ class Event(pydantic.BaseModel):
49
+ kind: EventKind
50
+ timestamp: Union[str, datetime] = None # occurrence time
51
+ entity: EventEntity
52
+ value: Optional[Union[float, str]] = None
53
+
54
+ def is_valid(self):
55
+ return self.entity.kind in _event_kind_entity_map[self.kind]
56
+
57
+
58
+ class AlertActiveState(StrEnum):
59
+ ACTIVE = "active"
60
+ INACTIVE = "inactive"
61
+
62
+
63
+ class AlertSeverity(StrEnum):
64
+ LOW = "low"
65
+ MEDIUM = "medium"
66
+ HIGH = "high"
67
+
68
+
69
+ # what should trigger the alert. must be either event (at least 1), or prometheus query
70
+ class AlertTrigger(pydantic.BaseModel):
71
+ events: list[EventKind] = []
72
+ prometheus_alert: str = None
73
+
74
+
75
+ class AlertCriteria(pydantic.BaseModel):
76
+ count: Annotated[
77
+ int,
78
+ pydantic.Field(
79
+ description="Number of events to wait until notification is sent"
80
+ ),
81
+ ] = 0
82
+ period: Annotated[
83
+ str,
84
+ pydantic.Field(
85
+ description="Time period during which event occurred. e.g. 1d, 3h, 5m, 15s"
86
+ ),
87
+ ] = None
88
+
89
+
90
+ class ResetPolicy(StrEnum):
91
+ MANUAL = "manual"
92
+ AUTO = "auto"
93
+
94
+
95
+ class AlertConfig(pydantic.BaseModel):
96
+ project: str
97
+ id: int = None
98
+ name: str
99
+ description: Optional[str] = ""
100
+ summary: Annotated[
101
+ str,
102
+ pydantic.Field(
103
+ description=(
104
+ "String to be sent in the notifications generated."
105
+ "e.g. 'Model {{ $project }}/{{ $entity }} is drifting.'"
106
+ )
107
+ ),
108
+ ]
109
+ created: Union[str, datetime] = None
110
+ severity: AlertSeverity
111
+ entity: EventEntity
112
+ trigger: AlertTrigger
113
+ criteria: Optional[AlertCriteria]
114
+ reset_policy: ResetPolicy = ResetPolicy.MANUAL
115
+ notifications: pydantic.conlist(Notification, min_items=1)
116
+ state: AlertActiveState = AlertActiveState.INACTIVE
117
+ count: Optional[int] = 0
118
+
119
+
120
+ class AlertsModes(StrEnum):
121
+ enabled = "enabled"
122
+ disabled = "disabled"
@@ -58,6 +58,8 @@ class AuthorizationResourceTypes(mlrun.common.types.StrEnum):
58
58
  pipeline = "pipeline"
59
59
  hub_source = "hub-source"
60
60
  workflow = "workflow"
61
+ alert = "alert"
62
+ event = "event"
61
63
  datastore_profile = "datastore-profile"
62
64
  api_gateway = "api-gateway"
63
65
 
@@ -83,6 +85,8 @@ class AuthorizationResourceTypes(mlrun.common.types.StrEnum):
83
85
  AuthorizationResourceTypes.schedule: "/projects/{project_name}/schedules/{resource_name}",
84
86
  AuthorizationResourceTypes.secret: "/projects/{project_name}/secrets/{resource_name}",
85
87
  AuthorizationResourceTypes.run: "/projects/{project_name}/runs/{resource_name}",
88
+ AuthorizationResourceTypes.event: "/projects/{project_name}/events/{resource_name}",
89
+ AuthorizationResourceTypes.alert: "/projects/{project_name}/alerts/{resource_name}",
86
90
  # runtime resource doesn't have an identifier, we don't need any auth granularity behind project level
87
91
  AuthorizationResourceTypes.runtime_resource: "/projects/{project_name}/runtime-resources",
88
92
  AuthorizationResourceTypes.model_endpoint: "/projects/{project_name}/model-endpoints/{resource_name}",
@@ -66,3 +66,4 @@ class ClientSpec(pydantic.BaseModel):
66
66
  logs: typing.Optional[dict]
67
67
  packagers: typing.Optional[dict]
68
68
  external_platform_tracking: typing.Optional[dict]
69
+ alerts_mode: typing.Optional[str]
@@ -308,4 +308,6 @@ class ControllerPolicy:
308
308
  BASE_PERIOD = "base_period"
309
309
 
310
310
 
311
- MLRUN_HISTOGRAM_DATA_DRIFT_APP_NAME = "histogram-data-drift"
311
+ class HistogramDataDriftApplicationConstants:
312
+ NAME = "histogram-data-drift"
313
+ GENERAL_RESULT_NAME = "general_drift"
mlrun/config.py CHANGED
@@ -549,10 +549,9 @@ default_config = {
549
549
  "feature_store": {
550
550
  "data_prefixes": {
551
551
  "default": "v3io:///projects/{project}/FeatureStore/{name}/{kind}",
552
- "nosql": "v3io:///projects/{project}/FeatureStore/{name}/{kind}",
552
+ "nosql": "v3io:///projects/{project}/FeatureStore/{name}/nosql",
553
553
  # "authority" is optional and generalizes [userinfo "@"] host [":" port]
554
- "redisnosql": "redis://{authority}/projects/{project}/FeatureStore/{name}/{kind}",
555
- "dsnosql": "ds://{ds_profile_name}/projects/{project}/FeatureStore/{name}/{kind}",
554
+ "redisnosql": "redis://{authority}/projects/{project}/FeatureStore/{name}/nosql",
556
555
  },
557
556
  "default_targets": "parquet,nosql",
558
557
  "default_job_image": "mlrun/mlrun",
@@ -688,6 +687,10 @@ default_config = {
688
687
  "access_key": "",
689
688
  },
690
689
  "grafana_url": "",
690
+ "alerts": {
691
+ # supported modes: "enabled", "disabled".
692
+ "mode": "disabled"
693
+ },
691
694
  }
692
695
 
693
696
  _is_running_as_api = None
@@ -64,7 +64,7 @@ from .store_resources import (
64
64
  parse_store_uri,
65
65
  )
66
66
  from .targets import CSVTarget, NoSqlTarget, ParquetTarget, StreamTarget
67
- from .utils import parse_kafka_url
67
+ from .utils import get_kafka_brokers_from_dict, parse_kafka_url
68
68
 
69
69
  store_manager = StoreManager()
70
70
 
@@ -107,8 +107,9 @@ def get_stream_pusher(stream_path: str, **kwargs):
107
107
  :param stream_path: path/url of stream
108
108
  """
109
109
 
110
- if stream_path.startswith("kafka://") or "kafka_brokers" in kwargs:
111
- topic, brokers = parse_kafka_url(stream_path, kwargs.get("kafka_brokers"))
110
+ kafka_brokers = get_kafka_brokers_from_dict(kwargs)
111
+ if stream_path.startswith("kafka://") or kafka_brokers:
112
+ topic, brokers = parse_kafka_url(stream_path, kafka_brokers)
112
113
  return KafkaOutputStream(topic, brokers, kwargs.get("kafka_producer_options"))
113
114
  elif stream_path.startswith("http://") or stream_path.startswith("https://"):
114
115
  return HTTPOutputStream(stream_path=stream_path)
mlrun/datastore/base.py CHANGED
@@ -389,14 +389,15 @@ class DataItem:
389
389
 
390
390
 
391
391
  # reading run results using DataItem (run.artifact())
392
- train_run = train_iris_func.run(inputs={'dataset': dataset},
393
- params={'label_column': 'label'})
392
+ train_run = train_iris_func.run(
393
+ inputs={"dataset": dataset}, params={"label_column": "label"}
394
+ )
394
395
 
395
- train_run.artifact('confusion-matrix').show()
396
- test_set = train_run.artifact('test_set').as_df()
396
+ train_run.artifact("confusion-matrix").show()
397
+ test_set = train_run.artifact("test_set").as_df()
397
398
 
398
399
  # create and use DataItem from uri
399
- data = mlrun.get_dataitem('http://xyz/data.json').get()
400
+ data = mlrun.get_dataitem("http://xyz/data.json").get()
400
401
  """
401
402
 
402
403
  def __init__(
@@ -406,12 +406,17 @@ class BigQuerySource(BaseSourceDriver):
406
406
 
407
407
  # use sql query
408
408
  query_string = "SELECT * FROM `the-psf.pypi.downloads20210328` LIMIT 5000"
409
- source = BigQuerySource("bq1", query=query_string,
410
- gcp_project="my_project",
411
- materialization_dataset="dataviews")
409
+ source = BigQuerySource(
410
+ "bq1",
411
+ query=query_string,
412
+ gcp_project="my_project",
413
+ materialization_dataset="dataviews",
414
+ )
412
415
 
413
416
  # read a table
414
- source = BigQuerySource("bq2", table="the-psf.pypi.downloads20210328", gcp_project="my_project")
417
+ source = BigQuerySource(
418
+ "bq2", table="the-psf.pypi.downloads20210328", gcp_project="my_project"
419
+ )
415
420
 
416
421
 
417
422
  :parameter name: source name
@@ -1532,15 +1532,23 @@ class KafkaTarget(BaseStoreTarget):
1532
1532
  **kwargs,
1533
1533
  ):
1534
1534
  attrs = {}
1535
+
1536
+ # TODO: Remove this in 1.9.0
1535
1537
  if bootstrap_servers:
1538
+ if brokers:
1539
+ raise mlrun.errors.MLRunInvalidArgumentError(
1540
+ "KafkaTarget cannot be created with both the 'brokers' parameter and the deprecated "
1541
+ "'bootstrap_servers' parameter. Please use 'brokers' only."
1542
+ )
1536
1543
  warnings.warn(
1537
1544
  "'bootstrap_servers' parameter is deprecated in 1.7.0 and will be removed in 1.9.0, "
1538
1545
  "use 'brokers' instead.",
1539
- # TODO: Remove this in 1.9.0
1540
1546
  FutureWarning,
1541
1547
  )
1542
- if bootstrap_servers is not None:
1543
- attrs["brokers"] = brokers or bootstrap_servers
1548
+ brokers = bootstrap_servers
1549
+
1550
+ if brokers:
1551
+ attrs["brokers"] = brokers
1544
1552
  if producer_options is not None:
1545
1553
  attrs["producer_options"] = producer_options
1546
1554
 
mlrun/datastore/utils.py CHANGED
@@ -15,6 +15,7 @@
15
15
  import tarfile
16
16
  import tempfile
17
17
  import typing
18
+ import warnings
18
19
  from urllib.parse import parse_qs, urlparse
19
20
 
20
21
  import pandas as pd
@@ -164,3 +165,18 @@ def _generate_sql_query_with_time_filter(
164
165
  query = query.filter(getattr(table.c, time_column) <= end_time)
165
166
 
166
167
  return query, parse_dates
168
+
169
+
170
+ def get_kafka_brokers_from_dict(options: dict, pop=False) -> typing.Optional[str]:
171
+ get_or_pop = options.pop if pop else options.get
172
+ kafka_brokers = get_or_pop("kafka_brokers", None)
173
+ if kafka_brokers:
174
+ return kafka_brokers
175
+ kafka_bootstrap_servers = get_or_pop("kafka_bootstrap_servers", None)
176
+ if kafka_bootstrap_servers:
177
+ warnings.warn(
178
+ "The 'kafka_bootstrap_servers' parameter is deprecated and will be removed in "
179
+ "1.9.0. Please pass the 'kafka_brokers' parameter instead.",
180
+ FutureWarning,
181
+ )
182
+ return kafka_bootstrap_servers
mlrun/datastore/v3io.py CHANGED
@@ -12,8 +12,6 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- import mmap
16
- import os
17
15
  import time
18
16
  from datetime import datetime
19
17
 
@@ -22,7 +20,6 @@ import v3io
22
20
  from v3io.dataplane.response import HttpResponseError
23
21
 
24
22
  import mlrun
25
- from mlrun.datastore.helpers import ONE_GB, ONE_MB
26
23
 
27
24
  from ..platforms.iguazio import parse_path, split_path
28
25
  from .base import (
@@ -32,6 +29,7 @@ from .base import (
32
29
  )
33
30
 
34
31
  V3IO_LOCAL_ROOT = "v3io"
32
+ V3IO_DEFAULT_UPLOAD_CHUNK_SIZE = 1024 * 1024 * 100
35
33
 
36
34
 
37
35
  class V3ioStore(DataStore):
@@ -98,46 +96,28 @@ class V3ioStore(DataStore):
98
96
  )
99
97
  return self._sanitize_storage_options(res)
100
98
 
101
- def _upload(self, key: str, src_path: str, max_chunk_size: int = ONE_GB):
99
+ def _upload(
100
+ self,
101
+ key: str,
102
+ src_path: str,
103
+ max_chunk_size: int = V3IO_DEFAULT_UPLOAD_CHUNK_SIZE,
104
+ ):
102
105
  """helper function for upload method, allows for controlling max_chunk_size in testing"""
103
106
  container, path = split_path(self._join(key))
104
- file_size = os.path.getsize(src_path) # in bytes
105
- if file_size <= ONE_MB:
106
- with open(src_path, "rb") as source_file:
107
- data = source_file.read()
108
- self._do_object_request(
109
- self.object.put,
110
- container=container,
111
- path=path,
112
- body=data,
113
- append=False,
114
- )
115
- return
116
- # chunk must be a multiple of the ALLOCATIONGRANULARITY
117
- # https://docs.python.org/3/library/mmap.html
118
- if residue := max_chunk_size % mmap.ALLOCATIONGRANULARITY:
119
- # round down to the nearest multiple of ALLOCATIONGRANULARITY
120
- max_chunk_size -= residue
121
-
122
107
  with open(src_path, "rb") as file_obj:
123
- file_offset = 0
124
- while file_offset < file_size:
125
- chunk_size = min(file_size - file_offset, max_chunk_size)
126
- with mmap.mmap(
127
- file_obj.fileno(),
128
- length=chunk_size,
129
- access=mmap.ACCESS_READ,
130
- offset=file_offset,
131
- ) as mmap_obj:
132
- append = file_offset != 0
133
- self._do_object_request(
134
- self.object.put,
135
- container=container,
136
- path=path,
137
- body=mmap_obj,
138
- append=append,
139
- )
140
- file_offset += chunk_size
108
+ append = False
109
+ while True:
110
+ data = memoryview(file_obj.read(max_chunk_size))
111
+ if not data:
112
+ break
113
+ self._do_object_request(
114
+ self.object.put,
115
+ container=container,
116
+ path=path,
117
+ body=data,
118
+ append=append,
119
+ )
120
+ append = True
141
121
 
142
122
  def upload(self, key, src_path):
143
123
  return self._upload(key, src_path)
@@ -152,19 +132,16 @@ class V3ioStore(DataStore):
152
132
  num_bytes=size,
153
133
  ).body
154
134
 
155
- def _put(self, key, data, append=False, max_chunk_size: int = ONE_GB):
135
+ def _put(
136
+ self,
137
+ key,
138
+ data,
139
+ append=False,
140
+ max_chunk_size: int = V3IO_DEFAULT_UPLOAD_CHUNK_SIZE,
141
+ ):
156
142
  """helper function for put method, allows for controlling max_chunk_size in testing"""
157
143
  container, path = split_path(self._join(key))
158
144
  buffer_size = len(data) # in bytes
159
- if buffer_size <= ONE_MB:
160
- self._do_object_request(
161
- self.object.put,
162
- container=container,
163
- path=path,
164
- body=data,
165
- append=append,
166
- )
167
- return
168
145
  buffer_offset = 0
169
146
  try:
170
147
  data = memoryview(data)