mlrun 1.7.0rc7__py3-none-any.whl → 1.7.0rc9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (52) hide show
  1. mlrun/__main__.py +2 -0
  2. mlrun/common/schemas/__init__.py +3 -0
  3. mlrun/common/schemas/api_gateway.py +8 -1
  4. mlrun/common/schemas/hub.py +7 -9
  5. mlrun/common/schemas/model_monitoring/constants.py +1 -1
  6. mlrun/common/schemas/pagination.py +26 -0
  7. mlrun/common/schemas/project.py +15 -10
  8. mlrun/config.py +28 -10
  9. mlrun/datastore/__init__.py +3 -7
  10. mlrun/datastore/datastore_profile.py +19 -1
  11. mlrun/datastore/snowflake_utils.py +43 -0
  12. mlrun/datastore/sources.py +9 -26
  13. mlrun/datastore/targets.py +131 -11
  14. mlrun/datastore/utils.py +10 -5
  15. mlrun/db/base.py +44 -0
  16. mlrun/db/httpdb.py +122 -21
  17. mlrun/db/nopdb.py +107 -0
  18. mlrun/feature_store/api.py +3 -2
  19. mlrun/feature_store/retrieval/spark_merger.py +27 -23
  20. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +1 -1
  21. mlrun/frameworks/tf_keras/mlrun_interface.py +2 -2
  22. mlrun/kfpops.py +2 -5
  23. mlrun/launcher/base.py +1 -1
  24. mlrun/launcher/client.py +2 -2
  25. mlrun/model_monitoring/helpers.py +3 -1
  26. mlrun/projects/pipelines.py +1 -1
  27. mlrun/projects/project.py +32 -21
  28. mlrun/run.py +5 -1
  29. mlrun/runtimes/__init__.py +16 -0
  30. mlrun/runtimes/base.py +4 -1
  31. mlrun/runtimes/kubejob.py +26 -121
  32. mlrun/runtimes/nuclio/api_gateway.py +58 -8
  33. mlrun/runtimes/nuclio/application/application.py +79 -1
  34. mlrun/runtimes/nuclio/application/reverse_proxy.go +9 -1
  35. mlrun/runtimes/nuclio/function.py +11 -8
  36. mlrun/runtimes/nuclio/serving.py +2 -2
  37. mlrun/runtimes/pod.py +145 -0
  38. mlrun/runtimes/utils.py +0 -28
  39. mlrun/serving/remote.py +2 -3
  40. mlrun/serving/routers.py +4 -3
  41. mlrun/serving/server.py +1 -1
  42. mlrun/serving/states.py +6 -9
  43. mlrun/serving/v2_serving.py +4 -3
  44. mlrun/utils/http.py +1 -1
  45. mlrun/utils/retryer.py +1 -0
  46. mlrun/utils/version/version.json +2 -2
  47. {mlrun-1.7.0rc7.dist-info → mlrun-1.7.0rc9.dist-info}/METADATA +15 -15
  48. {mlrun-1.7.0rc7.dist-info → mlrun-1.7.0rc9.dist-info}/RECORD +52 -50
  49. {mlrun-1.7.0rc7.dist-info → mlrun-1.7.0rc9.dist-info}/LICENSE +0 -0
  50. {mlrun-1.7.0rc7.dist-info → mlrun-1.7.0rc9.dist-info}/WHEEL +0 -0
  51. {mlrun-1.7.0rc7.dist-info → mlrun-1.7.0rc9.dist-info}/entry_points.txt +0 -0
  52. {mlrun-1.7.0rc7.dist-info → mlrun-1.7.0rc9.dist-info}/top_level.txt +0 -0
mlrun/datastore/utils.py CHANGED
@@ -23,24 +23,29 @@ import semver
23
23
  import mlrun.datastore
24
24
 
25
25
 
26
- def parse_kafka_url(url: str, bootstrap_servers: list = None) -> tuple[str, list]:
26
+ def parse_kafka_url(
27
+ url: str, brokers: typing.Union[list, str] = None
28
+ ) -> tuple[str, list]:
27
29
  """Generating Kafka topic and adjusting a list of bootstrap servers.
28
30
 
29
31
  :param url: URL path to parse using urllib.parse.urlparse.
30
- :param bootstrap_servers: List of bootstrap servers for the kafka brokers.
32
+ :param brokers: List of kafka brokers.
31
33
 
32
34
  :return: A tuple of:
33
35
  [0] = Kafka topic value
34
36
  [1] = List of bootstrap servers
35
37
  """
36
- bootstrap_servers = bootstrap_servers or []
38
+ brokers = brokers or []
39
+
40
+ if isinstance(brokers, str):
41
+ brokers = brokers.split(",")
37
42
 
38
43
  # Parse the provided URL into six components according to the general structure of a URL
39
44
  url = urlparse(url)
40
45
 
41
46
  # Add the network location to the bootstrap servers list
42
47
  if url.netloc:
43
- bootstrap_servers = [url.netloc] + bootstrap_servers
48
+ brokers = [url.netloc] + brokers
44
49
 
45
50
  # Get the topic value from the parsed url
46
51
  query_dict = parse_qs(url.query)
@@ -49,7 +54,7 @@ def parse_kafka_url(url: str, bootstrap_servers: list = None) -> tuple[str, list
49
54
  else:
50
55
  topic = url.path
51
56
  topic = topic.lstrip("/")
52
- return topic, bootstrap_servers
57
+ return topic, brokers
53
58
 
54
59
 
55
60
  def upload_tarball(source_dir, target, secrets=None):
mlrun/db/base.py CHANGED
@@ -634,6 +634,27 @@ class RunDBInterface(ABC):
634
634
  def delete_api_gateway(self, name, project=None):
635
635
  pass
636
636
 
637
+ @abstractmethod
638
+ def remote_builder(
639
+ self,
640
+ func: "mlrun.runtimes.BaseRuntime",
641
+ with_mlrun: bool,
642
+ mlrun_version_specifier: Optional[str] = None,
643
+ skip_deployed: bool = False,
644
+ builder_env: Optional[dict] = None,
645
+ force_build: bool = False,
646
+ ):
647
+ pass
648
+
649
+ @abstractmethod
650
+ def deploy_nuclio_function(
651
+ self,
652
+ func: "mlrun.runtimes.RemoteRuntime",
653
+ builder_env: Optional[dict] = None,
654
+ ):
655
+ pass
656
+
657
+ @abstractmethod
637
658
  def get_builder_status(
638
659
  self,
639
660
  func: "mlrun.runtimes.BaseRuntime",
@@ -644,6 +665,16 @@ class RunDBInterface(ABC):
644
665
  ):
645
666
  pass
646
667
 
668
+ @abstractmethod
669
+ def get_nuclio_deploy_status(
670
+ self,
671
+ func: "mlrun.runtimes.RemoteRuntime",
672
+ last_log_timestamp: float = 0.0,
673
+ verbose: bool = False,
674
+ ):
675
+ pass
676
+
677
+ @abstractmethod
647
678
  def set_run_notifications(
648
679
  self,
649
680
  project: str,
@@ -652,6 +683,7 @@ class RunDBInterface(ABC):
652
683
  ):
653
684
  pass
654
685
 
686
+ @abstractmethod
655
687
  def store_run_notifications(
656
688
  self,
657
689
  notification_objects: list[mlrun.model.Notification],
@@ -661,40 +693,49 @@ class RunDBInterface(ABC):
661
693
  ):
662
694
  pass
663
695
 
696
+ @abstractmethod
664
697
  def get_log_size(self, uid, project=""):
665
698
  pass
666
699
 
700
+ @abstractmethod
667
701
  def watch_log(self, uid, project="", watch=True, offset=0):
668
702
  pass
669
703
 
704
+ @abstractmethod
670
705
  def get_datastore_profile(
671
706
  self, name: str, project: str
672
707
  ) -> Optional[mlrun.common.schemas.DatastoreProfile]:
673
708
  pass
674
709
 
710
+ @abstractmethod
675
711
  def delete_datastore_profile(
676
712
  self, name: str, project: str
677
713
  ) -> mlrun.common.schemas.DatastoreProfile:
678
714
  pass
679
715
 
716
+ @abstractmethod
680
717
  def list_datastore_profiles(
681
718
  self, project: str
682
719
  ) -> list[mlrun.common.schemas.DatastoreProfile]:
683
720
  pass
684
721
 
722
+ @abstractmethod
685
723
  def store_datastore_profile(
686
724
  self, profile: mlrun.common.schemas.DatastoreProfile, project: str
687
725
  ):
688
726
  pass
689
727
 
728
+ @abstractmethod
690
729
  def function_status(self, project, name, kind, selector):
691
730
  pass
692
731
 
732
+ @abstractmethod
693
733
  def start_function(
694
734
  self, func_url: str = None, function: "mlrun.runtimes.BaseRuntime" = None
695
735
  ):
696
736
  pass
697
737
 
738
+ @abstractmethod
698
739
  def submit_workflow(
699
740
  self,
700
741
  project: str,
@@ -713,6 +754,7 @@ class RunDBInterface(ABC):
713
754
  ) -> "mlrun.common.schemas.WorkflowResponse":
714
755
  pass
715
756
 
757
+ @abstractmethod
716
758
  def update_model_monitoring_controller(
717
759
  self,
718
760
  project: str,
@@ -721,6 +763,7 @@ class RunDBInterface(ABC):
721
763
  ):
722
764
  pass
723
765
 
766
+ @abstractmethod
724
767
  def enable_model_monitoring(
725
768
  self,
726
769
  project: str,
@@ -730,6 +773,7 @@ class RunDBInterface(ABC):
730
773
  ) -> None:
731
774
  raise NotImplementedError
732
775
 
776
+ @abstractmethod
733
777
  def deploy_histogram_data_drift_app(
734
778
  self, project: str, image: str = "mlrun/mlrun"
735
779
  ) -> None:
mlrun/db/httpdb.py CHANGED
@@ -1184,7 +1184,7 @@ class HTTPRunDB(RunDBInterface):
1184
1184
  period didn't pass.
1185
1185
  :param grace_period: Grace period given to the runtime resource before they are actually removed, counted from
1186
1186
  the moment they moved to terminal state
1187
- (defaults to mlrun.config.config.runtime_resources_deletion_grace_period).
1187
+ (defaults to mlrun.mlconf.runtime_resources_deletion_grace_period).
1188
1188
 
1189
1189
  :returns: :py:class:`~mlrun.common.schemas.GroupedByProjectRuntimeResourcesOutput` listing the runtime resources
1190
1190
  that were removed.
@@ -1343,21 +1343,7 @@ class HTTPRunDB(RunDBInterface):
1343
1343
  :param builder_env: Kaniko builder pod env vars dict (for config/credentials)
1344
1344
  :param force_build: Force building the image, even when no changes were made
1345
1345
  """
1346
- is_s3_source = func.spec.build.source and func.spec.build.source.startswith(
1347
- "s3://"
1348
- )
1349
- is_ecr_image = mlrun.utils.is_ecr_url(config.httpdb.builder.docker_registry)
1350
- if not func.spec.build.load_source_on_run and is_s3_source and is_ecr_image:
1351
- logger.warning(
1352
- "Building a function image to ECR and loading an S3 source to the image may require conflicting access "
1353
- "keys. Only the permissions granted to the platform's configured secret will take affect "
1354
- "(see mlrun.config.config.httpdb.builder.docker_registry_secret). "
1355
- "In case the permissions are limited to ECR scope, you may use pull_at_runtime=True instead",
1356
- source=func.spec.build.source,
1357
- load_source_on_run=func.spec.build.load_source_on_run,
1358
- default_docker_registry=config.httpdb.builder.docker_registry,
1359
- )
1360
-
1346
+ self.warn_on_s3_and_ecr_permissions_conflict(func)
1361
1347
  try:
1362
1348
  req = {
1363
1349
  "function": func.to_dict(),
@@ -1376,10 +1362,103 @@ class HTTPRunDB(RunDBInterface):
1376
1362
 
1377
1363
  if not resp.ok:
1378
1364
  logger.error(f"bad resp!!\n{resp.text}")
1379
- raise ValueError("bad function run response")
1365
+ raise ValueError("bad submit build response")
1366
+
1367
+ return resp.json()
1368
+
1369
+ def deploy_nuclio_function(
1370
+ self,
1371
+ func: mlrun.runtimes.RemoteRuntime,
1372
+ builder_env: Optional[dict] = None,
1373
+ ):
1374
+ """
1375
+ Deploy a Nuclio function.
1376
+ :param func: Function to build.
1377
+ :param builder_env: Kaniko builder pod env vars dict (for config/credentials)
1378
+ """
1379
+ func.metadata.project = func.metadata.project or config.default_project
1380
+ self.warn_on_s3_and_ecr_permissions_conflict(func)
1381
+ try:
1382
+ req = {
1383
+ "function": func.to_dict(),
1384
+ }
1385
+ if builder_env:
1386
+ req["builder_env"] = builder_env
1387
+ _path = (
1388
+ f"projects/{func.metadata.project}/nuclio/{func.metadata.name}/deploy"
1389
+ )
1390
+ resp = self.api_call("POST", _path, json=req)
1391
+ except OSError as err:
1392
+ logger.error(f"error submitting nuclio deploy task: {err_to_str(err)}")
1393
+ raise OSError(f"error: cannot submit deploy, {err_to_str(err)}")
1394
+
1395
+ if not resp.ok:
1396
+ logger.error(f"deploy nuclio - bad response:\n{resp.text}")
1397
+ raise ValueError("bad nuclio deploy response")
1380
1398
 
1381
1399
  return resp.json()
1382
1400
 
1401
+ def get_nuclio_deploy_status(
1402
+ self,
1403
+ func: mlrun.runtimes.RemoteRuntime,
1404
+ last_log_timestamp: float = 0.0,
1405
+ verbose: bool = False,
1406
+ ):
1407
+ """Retrieve the status of a deploy operation currently in progress.
1408
+
1409
+ :param func: Function object that is being built.
1410
+ :param last_log_timestamp: Last timestamp of logs that were already retrieved. Function will return only logs
1411
+ later than this parameter.
1412
+ :param verbose: Add verbose logs into the output.
1413
+
1414
+ :returns: The following parameters:
1415
+
1416
+ - Text of builder logs.
1417
+ - Timestamp of last log retrieved, to be used in subsequent calls to this function.
1418
+ """
1419
+
1420
+ try:
1421
+ params = {
1422
+ "name": normalize_name(func.metadata.name),
1423
+ "project": func.metadata.project,
1424
+ "tag": func.metadata.tag,
1425
+ "last_log_timestamp": str(last_log_timestamp),
1426
+ "verbose": bool2str(verbose),
1427
+ }
1428
+ _path = (
1429
+ f"projects/{func.metadata.project}/nuclio/{func.metadata.name}/deploy"
1430
+ )
1431
+ resp = self.api_call("GET", _path, params=params)
1432
+ except OSError as err:
1433
+ logger.error(f"error getting deploy status: {err_to_str(err)}")
1434
+ raise OSError(f"error: cannot get deploy status, {err_to_str(err)}")
1435
+
1436
+ if not resp.ok:
1437
+ logger.warning(f"failed resp, {resp.text}")
1438
+ raise RunDBError("bad function build response")
1439
+
1440
+ if resp.headers:
1441
+ func.status.state = resp.headers.get("x-mlrun-function-status", "")
1442
+ last_log_timestamp = float(
1443
+ resp.headers.get("x-mlrun-last-timestamp", "0.0")
1444
+ )
1445
+ func.status.address = resp.headers.get("x-mlrun-address", "")
1446
+ func.status.nuclio_name = resp.headers.get("x-mlrun-name", "")
1447
+ func.status.internal_invocation_urls = resp.headers.get(
1448
+ "x-mlrun-internal-invocation-urls", ""
1449
+ ).split(",")
1450
+ func.status.external_invocation_urls = resp.headers.get(
1451
+ "x-mlrun-external-invocation-urls", ""
1452
+ ).split(",")
1453
+ func.status.container_image = resp.headers.get(
1454
+ "x-mlrun-container-image", ""
1455
+ )
1456
+
1457
+ text = ""
1458
+ if resp.content:
1459
+ text = resp.content.decode()
1460
+ return text, last_log_timestamp
1461
+
1383
1462
  def get_builder_status(
1384
1463
  self,
1385
1464
  func: BaseRuntime,
@@ -1441,9 +1520,14 @@ class HTTPRunDB(RunDBInterface):
1441
1520
  func.status.container_image = resp.headers.get(
1442
1521
  "x-mlrun-container-image", ""
1443
1522
  )
1444
- else:
1445
- func.status.build_pod = resp.headers.get("builder_pod", "")
1446
- func.spec.image = resp.headers.get("function_image", "")
1523
+
1524
+ builder_pod = resp.headers.get("builder_pod", "")
1525
+ if builder_pod:
1526
+ func.status.build_pod = builder_pod
1527
+
1528
+ function_image = resp.headers.get("function_image", "")
1529
+ if function_image:
1530
+ func.spec.image = function_image
1447
1531
 
1448
1532
  text = ""
1449
1533
  if resp.content:
@@ -1506,7 +1590,7 @@ class HTTPRunDB(RunDBInterface):
1506
1590
  Retrieve updated information on project background tasks being executed.
1507
1591
  If no filter is provided, will return background tasks from the last week.
1508
1592
 
1509
- :param project: Project name (defaults to mlrun.config.config.default_project).
1593
+ :param project: Project name (defaults to mlrun.mlconf.default_project).
1510
1594
  :param state: List only background tasks whose state is specified.
1511
1595
  :param created_from: Filter by background task created time in ``[created_from, created_to]``.
1512
1596
  :param created_to: Filter by background task created time in ``[created_from, created_to]``.
@@ -3703,6 +3787,23 @@ class HTTPRunDB(RunDBInterface):
3703
3787
 
3704
3788
  self.api_call(method="PUT", path=_path, json=profile.dict())
3705
3789
 
3790
+ @staticmethod
3791
+ def warn_on_s3_and_ecr_permissions_conflict(func):
3792
+ is_s3_source = func.spec.build.source and func.spec.build.source.startswith(
3793
+ "s3://"
3794
+ )
3795
+ is_ecr_image = mlrun.utils.is_ecr_url(config.httpdb.builder.docker_registry)
3796
+ if not func.spec.build.load_source_on_run and is_s3_source and is_ecr_image:
3797
+ logger.warning(
3798
+ "Building a function image to ECR and loading an S3 source to the image may require conflicting access "
3799
+ "keys. Only the permissions granted to the platform's configured secret will take affect "
3800
+ "(see mlrun.config.config.httpdb.builder.docker_registry_secret). "
3801
+ "In case the permissions are limited to ECR scope, you may use pull_at_runtime=True instead",
3802
+ source=func.spec.build.source,
3803
+ load_source_on_run=func.spec.build.load_source_on_run,
3804
+ default_docker_registry=config.httpdb.builder.docker_registry,
3805
+ )
3806
+
3706
3807
 
3707
3808
  def _as_json(obj):
3708
3809
  fn = getattr(obj, "to_json", None)
mlrun/db/nopdb.py CHANGED
@@ -528,6 +528,65 @@ class NopDB(RunDBInterface):
528
528
  ):
529
529
  pass
530
530
 
531
+ def remote_builder(
532
+ self,
533
+ func: "mlrun.runtimes.BaseRuntime",
534
+ with_mlrun: bool,
535
+ mlrun_version_specifier: Optional[str] = None,
536
+ skip_deployed: bool = False,
537
+ builder_env: Optional[dict] = None,
538
+ force_build: bool = False,
539
+ ):
540
+ pass
541
+
542
+ def deploy_nuclio_function(
543
+ self,
544
+ func: "mlrun.runtimes.RemoteRuntime",
545
+ builder_env: Optional[dict] = None,
546
+ ):
547
+ pass
548
+
549
+ def get_builder_status(
550
+ self,
551
+ func: "mlrun.runtimes.BaseRuntime",
552
+ offset: int = 0,
553
+ logs: bool = True,
554
+ last_log_timestamp: float = 0.0,
555
+ verbose: bool = False,
556
+ ):
557
+ pass
558
+
559
+ def get_nuclio_deploy_status(
560
+ self,
561
+ func: "mlrun.runtimes.RemoteRuntime",
562
+ last_log_timestamp: float = 0.0,
563
+ verbose: bool = False,
564
+ ):
565
+ pass
566
+
567
+ def set_run_notifications(
568
+ self,
569
+ project: str,
570
+ runs: list[mlrun.model.RunObject],
571
+ notifications: list[mlrun.model.Notification],
572
+ ):
573
+ pass
574
+
575
+ def store_run_notifications(
576
+ self,
577
+ notification_objects: list[mlrun.model.Notification],
578
+ run_uid: str,
579
+ project: str = None,
580
+ mask_params: bool = True,
581
+ ):
582
+ pass
583
+
584
+ def get_log_size(self, uid, project=""):
585
+ pass
586
+
587
+ def watch_log(self, uid, project="", watch=True, offset=0):
588
+ pass
589
+
531
590
  def get_datastore_profile(
532
591
  self, name: str, project: str
533
592
  ) -> Optional[mlrun.common.schemas.DatastoreProfile]:
@@ -545,3 +604,51 @@ class NopDB(RunDBInterface):
545
604
  self, profile: mlrun.common.schemas.DatastoreProfile, project: str
546
605
  ):
547
606
  pass
607
+
608
+ def function_status(self, project, name, kind, selector):
609
+ pass
610
+
611
+ def start_function(
612
+ self, func_url: str = None, function: "mlrun.runtimes.BaseRuntime" = None
613
+ ):
614
+ pass
615
+
616
+ def submit_workflow(
617
+ self,
618
+ project: str,
619
+ name: str,
620
+ workflow_spec: Union[
621
+ "mlrun.projects.pipelines.WorkflowSpec",
622
+ "mlrun.common.schemas.WorkflowSpec",
623
+ dict,
624
+ ],
625
+ arguments: Optional[dict] = None,
626
+ artifact_path: Optional[str] = None,
627
+ source: Optional[str] = None,
628
+ run_name: Optional[str] = None,
629
+ namespace: Optional[str] = None,
630
+ notifications: list["mlrun.model.Notification"] = None,
631
+ ) -> "mlrun.common.schemas.WorkflowResponse":
632
+ pass
633
+
634
+ def update_model_monitoring_controller(
635
+ self,
636
+ project: str,
637
+ base_period: int = 10,
638
+ image: str = "mlrun/mlrun",
639
+ ):
640
+ pass
641
+
642
+ def enable_model_monitoring(
643
+ self,
644
+ project: str,
645
+ base_period: int = 10,
646
+ image: str = "mlrun/mlrun",
647
+ deploy_histogram_data_drift_app: bool = True,
648
+ ) -> None:
649
+ raise NotImplementedError
650
+
651
+ def deploy_histogram_data_drift_app(
652
+ self, project: str, image: str = "mlrun/mlrun"
653
+ ) -> None:
654
+ raise NotImplementedError
@@ -1121,9 +1121,10 @@ def _ingest_with_spark(
1121
1121
  df_to_write = target.prepare_spark_df(
1122
1122
  df_to_write, key_columns, timestamp_key, spark_options
1123
1123
  )
1124
+ write_format = spark_options.pop("format", None)
1124
1125
  if overwrite:
1125
1126
  write_spark_dataframe_with_options(
1126
- spark_options, df_to_write, "overwrite"
1127
+ spark_options, df_to_write, "overwrite", write_format=write_format
1127
1128
  )
1128
1129
  else:
1129
1130
  # appending an empty dataframe may cause an empty file to be created (e.g. when writing to parquet)
@@ -1131,7 +1132,7 @@ def _ingest_with_spark(
1131
1132
  df_to_write.persist()
1132
1133
  if df_to_write.count() > 0:
1133
1134
  write_spark_dataframe_with_options(
1134
- spark_options, df_to_write, "append"
1135
+ spark_options, df_to_write, "append", write_format=write_format
1135
1136
  )
1136
1137
  target.update_resource_status("ready")
1137
1138
 
@@ -24,6 +24,32 @@ from .base import BaseMerger
24
24
  from .conversion import PandasConversionMixin
25
25
 
26
26
 
27
+ def spark_df_to_pandas(spark_df):
28
+ # as of pyspark 3.2.3, toPandas fails to convert timestamps unless we work around the issue
29
+ # when we upgrade pyspark, we should check whether this workaround is still necessary
30
+ # see https://stackoverflow.com/questions/76389694/transforming-pyspark-to-pandas-dataframe
31
+ if semver.parse(pd.__version__)["major"] >= 2:
32
+ import pyspark.sql.functions as pyspark_functions
33
+
34
+ type_conversion_dict = {}
35
+ for field in spark_df.schema.fields:
36
+ if str(field.dataType) == "TimestampType":
37
+ spark_df = spark_df.withColumn(
38
+ field.name,
39
+ pyspark_functions.date_format(
40
+ pyspark_functions.to_timestamp(field.name),
41
+ "yyyy-MM-dd'T'HH:mm:ss.SSSSSSSSS",
42
+ ),
43
+ )
44
+ type_conversion_dict[field.name] = "datetime64[ns]"
45
+ df = PandasConversionMixin.toPandas(spark_df)
46
+ if type_conversion_dict:
47
+ df = df.astype(type_conversion_dict)
48
+ return df
49
+ else:
50
+ return PandasConversionMixin.toPandas(spark_df)
51
+
52
+
27
53
  class SparkFeatureMerger(BaseMerger):
28
54
  engine = "spark"
29
55
  support_offline = True
@@ -166,29 +192,7 @@ class SparkFeatureMerger(BaseMerger):
166
192
  def get_df(self, to_pandas=True):
167
193
  if to_pandas:
168
194
  if self._pandas_df is None:
169
- df = self._result_df
170
- # as of pyspark 3.2.3, toPandas fails to convert timestamps unless we work around the issue
171
- # when we upgrade pyspark, we should check whether this workaround is still necessary
172
- # see https://stackoverflow.com/questions/76389694/transforming-pyspark-to-pandas-dataframe
173
- if semver.parse(pd.__version__)["major"] >= 2:
174
- import pyspark.sql.functions as pyspark_functions
175
-
176
- type_conversion_dict = {}
177
- for field in df.schema.fields:
178
- if str(field.dataType) == "TimestampType":
179
- df = df.withColumn(
180
- field.name,
181
- pyspark_functions.date_format(
182
- pyspark_functions.to_timestamp(field.name),
183
- "yyyy-MM-dd'T'HH:mm:ss.SSSSSSSSS",
184
- ),
185
- )
186
- type_conversion_dict[field.name] = "datetime64[ns]"
187
- df = PandasConversionMixin.toPandas(df)
188
- if type_conversion_dict:
189
- df = df.astype(type_conversion_dict)
190
- else:
191
- df = PandasConversionMixin.toPandas(df)
195
+ df = spark_df_to_pandas(self._result_df)
192
196
  self._pandas_df = df
193
197
  self._set_indexes(self._pandas_df)
194
198
  return self._pandas_df
@@ -17,7 +17,7 @@ from typing import Callable, Union
17
17
  import numpy as np
18
18
  import tensorflow as tf
19
19
  from tensorflow import Tensor, Variable
20
- from tensorflow.keras.callbacks import Callback
20
+ from tensorflow.python.keras.callbacks import Callback
21
21
 
22
22
  import mlrun
23
23
 
@@ -19,7 +19,8 @@ from typing import Union
19
19
 
20
20
  import tensorflow as tf
21
21
  from tensorflow import keras
22
- from tensorflow.keras.callbacks import (
22
+ from tensorflow.keras.optimizers import Optimizer
23
+ from tensorflow.python.keras.callbacks import (
23
24
  BaseLogger,
24
25
  Callback,
25
26
  CSVLogger,
@@ -27,7 +28,6 @@ from tensorflow.keras.callbacks import (
27
28
  ProgbarLogger,
28
29
  TensorBoard,
29
30
  )
30
- from tensorflow.keras.optimizers import Optimizer
31
31
 
32
32
  import mlrun
33
33
 
mlrun/kfpops.py CHANGED
@@ -103,7 +103,7 @@ def write_kfpmeta(struct):
103
103
  with open(path, "w") as fp:
104
104
  fp.write(str(val))
105
105
  except Exception as exc:
106
- logger.warning("Failed writing to temp file. Ignoring", exc=repr(exc))
106
+ logger.warning("Failed writing to temp file. Ignoring", exc=err_to_str(exc))
107
107
  pass
108
108
 
109
109
  text = "# Run Report\n"
@@ -112,10 +112,7 @@ def write_kfpmeta(struct):
112
112
 
113
113
  text += "## Metadata\n```yaml\n" + dict_to_yaml(struct) + "```\n"
114
114
 
115
- metadata = {
116
- "outputs": output_artifacts
117
- + [{"type": "markdown", "storage": "inline", "source": text}]
118
- }
115
+ metadata = {"outputs": [{"type": "markdown", "storage": "inline", "source": text}]}
119
116
  with open(os.path.join(KFPMETA_DIR, "mlpipeline-ui-metadata.json"), "w") as f:
120
117
  json.dump(metadata, f)
121
118
 
mlrun/launcher/base.py CHANGED
@@ -353,7 +353,7 @@ class BaseLauncher(abc.ABC):
353
353
  or {}
354
354
  )
355
355
  state_thresholds = (
356
- mlrun.config.config.function.spec.state_thresholds.default.to_dict()
356
+ mlrun.mlconf.function.spec.state_thresholds.default.to_dict()
357
357
  | state_thresholds
358
358
  )
359
359
  run.spec.state_thresholds = state_thresholds or run.spec.state_thresholds
mlrun/launcher/client.py CHANGED
@@ -47,7 +47,7 @@ class ClientBaseLauncher(launcher.BaseLauncher, abc.ABC):
47
47
  If build is needed, set the image as the base_image for the build.
48
48
  If image is not given set the default one.
49
49
  """
50
- if runtime.kind in mlrun.runtimes.RuntimeKinds.nuclio_runtimes():
50
+ if runtime.kind in mlrun.runtimes.RuntimeKinds.pure_nuclio_deployed_runtimes():
51
51
  return
52
52
 
53
53
  require_build = runtime.requires_build()
@@ -129,7 +129,7 @@ class ClientBaseLauncher(launcher.BaseLauncher, abc.ABC):
129
129
  logger.info("no returned result (job may still be in progress)")
130
130
  results_tbl.append(run.to_dict())
131
131
 
132
- if mlrun.utils.is_ipython and mlrun.config.config.ipython_widget:
132
+ if mlrun.utils.is_ipython and mlrun.mlconf.ipython_widget:
133
133
  results_tbl.show()
134
134
  print()
135
135
  ui_url = mlrun.utils.get_ui_url(project, uid)
@@ -42,7 +42,7 @@ class _BatchDict(typing.TypedDict):
42
42
  def get_stream_path(
43
43
  project: str = None,
44
44
  function_name: str = mm_constants.MonitoringFunctionNames.STREAM,
45
- ):
45
+ ) -> str:
46
46
  """
47
47
  Get stream path from the project secret. If wasn't set, take it from the system configurations
48
48
 
@@ -61,6 +61,8 @@ def get_stream_path(
61
61
  function_name=function_name,
62
62
  )
63
63
 
64
+ if isinstance(stream_uri, list): # ML-6043 - user side gets only the new stream uri
65
+ stream_uri = stream_uri[1] # get new stream path, under projects
64
66
  return mlrun.common.model_monitoring.helpers.parse_monitoring_stream_path(
65
67
  stream_uri=stream_uri, project=project, function_name=function_name
66
68
  )
@@ -1071,7 +1071,7 @@ def load_and_run(
1071
1071
  )
1072
1072
 
1073
1073
  except Exception as exc:
1074
- logger.error("Failed to send slack notification", exc=exc)
1074
+ logger.error("Failed to send slack notification", exc=err_to_str(exc))
1075
1075
 
1076
1076
  raise error
1077
1077