mlrun 1.7.0rc28__py3-none-any.whl → 1.7.0rc55__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (135) hide show
  1. mlrun/__main__.py +4 -2
  2. mlrun/alerts/alert.py +75 -8
  3. mlrun/artifacts/base.py +1 -0
  4. mlrun/artifacts/manager.py +9 -2
  5. mlrun/common/constants.py +4 -1
  6. mlrun/common/db/sql_session.py +3 -2
  7. mlrun/common/formatters/__init__.py +1 -0
  8. mlrun/common/formatters/artifact.py +1 -0
  9. mlrun/{model_monitoring/application.py → common/formatters/feature_set.py} +20 -6
  10. mlrun/common/formatters/run.py +3 -0
  11. mlrun/common/helpers.py +0 -1
  12. mlrun/common/schemas/__init__.py +3 -1
  13. mlrun/common/schemas/alert.py +15 -12
  14. mlrun/common/schemas/api_gateway.py +6 -6
  15. mlrun/common/schemas/auth.py +5 -0
  16. mlrun/common/schemas/client_spec.py +0 -1
  17. mlrun/common/schemas/common.py +7 -4
  18. mlrun/common/schemas/frontend_spec.py +7 -0
  19. mlrun/common/schemas/function.py +7 -0
  20. mlrun/common/schemas/model_monitoring/__init__.py +4 -3
  21. mlrun/common/schemas/model_monitoring/constants.py +41 -26
  22. mlrun/common/schemas/model_monitoring/model_endpoints.py +23 -47
  23. mlrun/common/schemas/notification.py +69 -12
  24. mlrun/common/schemas/project.py +45 -12
  25. mlrun/common/schemas/workflow.py +10 -2
  26. mlrun/common/types.py +1 -0
  27. mlrun/config.py +91 -35
  28. mlrun/data_types/data_types.py +6 -1
  29. mlrun/data_types/spark.py +2 -2
  30. mlrun/data_types/to_pandas.py +57 -25
  31. mlrun/datastore/__init__.py +1 -0
  32. mlrun/datastore/alibaba_oss.py +3 -2
  33. mlrun/datastore/azure_blob.py +125 -37
  34. mlrun/datastore/base.py +42 -21
  35. mlrun/datastore/datastore.py +4 -2
  36. mlrun/datastore/datastore_profile.py +1 -1
  37. mlrun/datastore/dbfs_store.py +3 -7
  38. mlrun/datastore/filestore.py +1 -3
  39. mlrun/datastore/google_cloud_storage.py +85 -29
  40. mlrun/datastore/inmem.py +4 -1
  41. mlrun/datastore/redis.py +1 -0
  42. mlrun/datastore/s3.py +25 -12
  43. mlrun/datastore/sources.py +76 -4
  44. mlrun/datastore/spark_utils.py +30 -0
  45. mlrun/datastore/storeytargets.py +151 -0
  46. mlrun/datastore/targets.py +102 -131
  47. mlrun/datastore/v3io.py +1 -0
  48. mlrun/db/base.py +15 -6
  49. mlrun/db/httpdb.py +57 -28
  50. mlrun/db/nopdb.py +29 -5
  51. mlrun/errors.py +20 -3
  52. mlrun/execution.py +46 -5
  53. mlrun/feature_store/api.py +25 -1
  54. mlrun/feature_store/common.py +6 -11
  55. mlrun/feature_store/feature_vector.py +3 -1
  56. mlrun/feature_store/retrieval/job.py +4 -1
  57. mlrun/feature_store/retrieval/spark_merger.py +10 -39
  58. mlrun/feature_store/steps.py +8 -0
  59. mlrun/frameworks/_common/plan.py +3 -3
  60. mlrun/frameworks/_ml_common/plan.py +1 -1
  61. mlrun/frameworks/parallel_coordinates.py +2 -3
  62. mlrun/frameworks/sklearn/mlrun_interface.py +13 -3
  63. mlrun/k8s_utils.py +48 -2
  64. mlrun/launcher/client.py +6 -6
  65. mlrun/launcher/local.py +2 -2
  66. mlrun/model.py +215 -34
  67. mlrun/model_monitoring/api.py +38 -24
  68. mlrun/model_monitoring/applications/__init__.py +1 -2
  69. mlrun/model_monitoring/applications/_application_steps.py +60 -29
  70. mlrun/model_monitoring/applications/base.py +2 -174
  71. mlrun/model_monitoring/applications/context.py +197 -70
  72. mlrun/model_monitoring/applications/evidently_base.py +11 -85
  73. mlrun/model_monitoring/applications/histogram_data_drift.py +21 -16
  74. mlrun/model_monitoring/applications/results.py +4 -4
  75. mlrun/model_monitoring/controller.py +110 -282
  76. mlrun/model_monitoring/db/stores/__init__.py +8 -3
  77. mlrun/model_monitoring/db/stores/base/store.py +3 -0
  78. mlrun/model_monitoring/db/stores/sqldb/models/base.py +9 -7
  79. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +18 -3
  80. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +43 -23
  81. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +48 -35
  82. mlrun/model_monitoring/db/tsdb/__init__.py +7 -2
  83. mlrun/model_monitoring/db/tsdb/base.py +147 -15
  84. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +94 -55
  85. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +0 -3
  86. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +144 -38
  87. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +44 -3
  88. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +246 -57
  89. mlrun/model_monitoring/helpers.py +70 -50
  90. mlrun/model_monitoring/stream_processing.py +96 -195
  91. mlrun/model_monitoring/writer.py +13 -5
  92. mlrun/package/packagers/default_packager.py +2 -2
  93. mlrun/projects/operations.py +16 -8
  94. mlrun/projects/pipelines.py +126 -115
  95. mlrun/projects/project.py +286 -129
  96. mlrun/render.py +3 -3
  97. mlrun/run.py +38 -19
  98. mlrun/runtimes/__init__.py +19 -8
  99. mlrun/runtimes/base.py +4 -1
  100. mlrun/runtimes/daskjob.py +1 -1
  101. mlrun/runtimes/funcdoc.py +1 -1
  102. mlrun/runtimes/kubejob.py +6 -6
  103. mlrun/runtimes/local.py +12 -5
  104. mlrun/runtimes/nuclio/api_gateway.py +68 -8
  105. mlrun/runtimes/nuclio/application/application.py +307 -70
  106. mlrun/runtimes/nuclio/function.py +63 -14
  107. mlrun/runtimes/nuclio/serving.py +10 -10
  108. mlrun/runtimes/pod.py +25 -19
  109. mlrun/runtimes/remotesparkjob.py +2 -5
  110. mlrun/runtimes/sparkjob/spark3job.py +16 -17
  111. mlrun/runtimes/utils.py +34 -0
  112. mlrun/serving/routers.py +2 -5
  113. mlrun/serving/server.py +37 -19
  114. mlrun/serving/states.py +30 -3
  115. mlrun/serving/v2_serving.py +44 -35
  116. mlrun/track/trackers/mlflow_tracker.py +5 -0
  117. mlrun/utils/async_http.py +1 -1
  118. mlrun/utils/db.py +18 -0
  119. mlrun/utils/helpers.py +150 -36
  120. mlrun/utils/http.py +1 -1
  121. mlrun/utils/notifications/notification/__init__.py +0 -1
  122. mlrun/utils/notifications/notification/webhook.py +8 -1
  123. mlrun/utils/notifications/notification_pusher.py +1 -1
  124. mlrun/utils/v3io_clients.py +2 -2
  125. mlrun/utils/version/version.json +2 -2
  126. {mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/METADATA +153 -66
  127. {mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/RECORD +131 -134
  128. {mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/WHEEL +1 -1
  129. mlrun/feature_store/retrieval/conversion.py +0 -271
  130. mlrun/model_monitoring/controller_handler.py +0 -37
  131. mlrun/model_monitoring/evidently_application.py +0 -20
  132. mlrun/model_monitoring/prometheus.py +0 -216
  133. {mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/LICENSE +0 -0
  134. {mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/entry_points.txt +0 -0
  135. {mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/top_level.txt +0 -0
@@ -47,7 +47,6 @@ from .spark_utils import spark_session_update_hadoop_options
47
47
  from .utils import (
48
48
  _generate_sql_query_with_time_filter,
49
49
  filter_df_start_end_time,
50
- parse_kafka_url,
51
50
  select_columns_from_df,
52
51
  )
53
52
 
@@ -390,6 +389,7 @@ class BaseStoreTarget(DataTargetBase):
390
389
  is_offline = False
391
390
  support_spark = False
392
391
  support_storey = False
392
+ support_pandas = False
393
393
  support_append = False
394
394
 
395
395
  def __init__(
@@ -439,6 +439,12 @@ class BaseStoreTarget(DataTargetBase):
439
439
  self.storage_options = storage_options
440
440
  self.schema = schema or {}
441
441
  self.credentials_prefix = credentials_prefix
442
+ if credentials_prefix:
443
+ warnings.warn(
444
+ "The 'credentials_prefix' parameter is deprecated and will be removed in "
445
+ "1.9.0. Please use datastore profiles instead.",
446
+ FutureWarning,
447
+ )
442
448
 
443
449
  self._target = None
444
450
  self._resource = None
@@ -549,9 +555,7 @@ class BaseStoreTarget(DataTargetBase):
549
555
  os.makedirs(dir, exist_ok=True)
550
556
  target_df = df
551
557
  partition_cols = None # single parquet file
552
- if not target_path.endswith(".parquet") and not target_path.endswith(
553
- ".pq"
554
- ): # directory
558
+ if not mlrun.utils.helpers.is_parquet_file(target_path): # directory
555
559
  partition_cols = []
556
560
  if timestamp_key and (
557
561
  self.partitioned or self.time_partitioning_granularity
@@ -728,6 +732,10 @@ class BaseStoreTarget(DataTargetBase):
728
732
  timestamp_key=None,
729
733
  featureset_status=None,
730
734
  ):
735
+ if not self.support_storey:
736
+ raise mlrun.errors.MLRunRuntimeError(
737
+ f"{type(self).__name__} does not support storey engine"
738
+ )
731
739
  raise NotImplementedError()
732
740
 
733
741
  def purge(self):
@@ -756,6 +764,8 @@ class BaseStoreTarget(DataTargetBase):
756
764
  **kwargs,
757
765
  ):
758
766
  """return the target data as dataframe"""
767
+ if not self.support_pandas:
768
+ raise NotImplementedError()
759
769
  mlrun.utils.helpers.additional_filters_warning(
760
770
  additional_filters, self.__class__
761
771
  )
@@ -770,6 +780,10 @@ class BaseStoreTarget(DataTargetBase):
770
780
 
771
781
  def get_spark_options(self, key_column=None, timestamp_key=None, overwrite=True):
772
782
  # options used in spark.read.load(**options)
783
+ if not self.support_spark:
784
+ raise mlrun.errors.MLRunRuntimeError(
785
+ f"{type(self).__name__} does not support spark engine"
786
+ )
773
787
  raise NotImplementedError()
774
788
 
775
789
  def prepare_spark_df(self, df, key_columns, timestamp_key=None, spark_options=None):
@@ -813,6 +827,7 @@ class ParquetTarget(BaseStoreTarget):
813
827
  support_spark = True
814
828
  support_storey = True
815
829
  support_dask = True
830
+ support_pandas = True
816
831
  support_append = True
817
832
 
818
833
  def __init__(
@@ -918,10 +933,9 @@ class ParquetTarget(BaseStoreTarget):
918
933
  if time_unit == time_partitioning_granularity:
919
934
  break
920
935
 
921
- if (
922
- not self.partitioned
923
- and not self.get_target_path().endswith(".parquet")
924
- and not self.get_target_path().endswith(".pq")
936
+ target_path = self.get_target_path()
937
+ if not self.partitioned and not mlrun.utils.helpers.is_parquet_file(
938
+ target_path
925
939
  ):
926
940
  partition_cols = []
927
941
 
@@ -929,25 +943,16 @@ class ParquetTarget(BaseStoreTarget):
929
943
  for key_column in key_columns:
930
944
  tuple_key_columns.append((key_column.name, key_column.value_type))
931
945
 
932
- store, path_in_store, target_path = self._get_store_and_path()
933
-
934
- storage_options = store.get_storage_options()
935
- if storage_options and self.storage_options:
936
- storage_options = merge(storage_options, self.storage_options)
937
- else:
938
- storage_options = storage_options or self.storage_options
939
-
940
946
  step = graph.add_step(
941
947
  name=self.name or "ParquetTarget",
942
948
  after=after,
943
949
  graph_shape="cylinder",
944
- class_name="storey.ParquetTarget",
950
+ class_name="mlrun.datastore.storeytargets.ParquetStoreyTarget",
945
951
  path=target_path,
946
952
  columns=column_list,
947
953
  index_cols=tuple_key_columns,
948
954
  partition_cols=partition_cols,
949
955
  time_field=timestamp_key,
950
- storage_options=storage_options,
951
956
  max_events=self.max_events,
952
957
  flush_after_seconds=self.flush_after_seconds,
953
958
  update_last_written=featureset_status.update_last_written_for_target,
@@ -1040,9 +1045,7 @@ class ParquetTarget(BaseStoreTarget):
1040
1045
  return result
1041
1046
 
1042
1047
  def is_single_file(self):
1043
- if self.path:
1044
- return self.path.endswith(".parquet") or self.path.endswith(".pq")
1045
- return False
1048
+ return mlrun.utils.helpers.is_parquet_file(self.path)
1046
1049
 
1047
1050
  def prepare_spark_df(self, df, key_columns, timestamp_key=None, spark_options=None):
1048
1051
  # If partitioning by time, add the necessary columns
@@ -1082,6 +1085,7 @@ class CSVTarget(BaseStoreTarget):
1082
1085
  is_offline = True
1083
1086
  support_spark = True
1084
1087
  support_storey = True
1088
+ support_pandas = True
1085
1089
 
1086
1090
  @staticmethod
1087
1091
  def _write_dataframe(df, storage_options, target_path, partition_cols, **kwargs):
@@ -1103,17 +1107,16 @@ class CSVTarget(BaseStoreTarget):
1103
1107
  column_list = self._get_column_list(
1104
1108
  features=features, timestamp_key=timestamp_key, key_columns=key_columns
1105
1109
  )
1106
- store, path_in_store, target_path = self._get_store_and_path()
1110
+ target_path = self.get_target_path()
1107
1111
  graph.add_step(
1108
1112
  name=self.name or "CSVTarget",
1109
1113
  after=after,
1110
1114
  graph_shape="cylinder",
1111
- class_name="storey.CSVTarget",
1115
+ class_name="mlrun.datastore.storeytargets.CSVStoreyTarget",
1112
1116
  path=target_path,
1113
1117
  columns=column_list,
1114
1118
  header=True,
1115
1119
  index_cols=key_columns,
1116
- storage_options=store.get_storage_options(),
1117
1120
  **self.attributes,
1118
1121
  )
1119
1122
 
@@ -1289,7 +1292,9 @@ class SnowflakeTarget(BaseStoreTarget):
1289
1292
  additional_filters=None,
1290
1293
  **kwargs,
1291
1294
  ):
1292
- raise NotImplementedError()
1295
+ raise mlrun.errors.MLRunRuntimeError(
1296
+ f"{type(self).__name__} does not support pandas engine"
1297
+ )
1293
1298
 
1294
1299
  @property
1295
1300
  def source_spark_attributes(self) -> dict:
@@ -1325,6 +1330,19 @@ class NoSqlBaseTarget(BaseStoreTarget):
1325
1330
  timestamp_key=None,
1326
1331
  featureset_status=None,
1327
1332
  ):
1333
+ table, column_list = self._get_table_and_columns(features, key_columns)
1334
+
1335
+ graph.add_step(
1336
+ name=self.name or self.writer_step_name,
1337
+ after=after,
1338
+ graph_shape="cylinder",
1339
+ class_name="mlrun.datastore.storeytargets.NoSqlStoreyTarget",
1340
+ columns=column_list,
1341
+ table=table,
1342
+ **self.attributes,
1343
+ )
1344
+
1345
+ def _get_table_and_columns(self, features, key_columns):
1328
1346
  key_columns = list(key_columns.keys())
1329
1347
  table = self._resource.uri
1330
1348
  column_list = self._get_column_list(
@@ -1343,15 +1361,7 @@ class NoSqlBaseTarget(BaseStoreTarget):
1343
1361
  col for col in column_list if col[0] not in aggregate_features
1344
1362
  ]
1345
1363
 
1346
- graph.add_step(
1347
- name=self.name or self.writer_step_name,
1348
- after=after,
1349
- graph_shape="cylinder",
1350
- class_name="storey.NoSqlTarget",
1351
- columns=column_list,
1352
- table=table,
1353
- **self.attributes,
1354
- )
1364
+ return table, column_list
1355
1365
 
1356
1366
  def prepare_spark_df(self, df, key_columns, timestamp_key=None, spark_options=None):
1357
1367
  raise NotImplementedError()
@@ -1362,19 +1372,6 @@ class NoSqlBaseTarget(BaseStoreTarget):
1362
1372
  def get_dask_options(self):
1363
1373
  return {"format": "csv"}
1364
1374
 
1365
- def as_df(
1366
- self,
1367
- columns=None,
1368
- df_module=None,
1369
- entities=None,
1370
- start_time=None,
1371
- end_time=None,
1372
- time_column=None,
1373
- additional_filters=None,
1374
- **kwargs,
1375
- ):
1376
- raise NotImplementedError()
1377
-
1378
1375
  def write_dataframe(
1379
1376
  self, df, key_column=None, timestamp_key=None, chunk_id=0, **kwargs
1380
1377
  ):
@@ -1487,11 +1484,9 @@ class RedisNoSqlTarget(NoSqlBaseTarget):
1487
1484
  support_spark = True
1488
1485
  writer_step_name = "RedisNoSqlTarget"
1489
1486
 
1490
- # Fetch server url from the RedisNoSqlTarget::__init__() 'path' parameter.
1491
- # If not set fetch it from 'mlrun.mlconf.redis.url' (MLRUN_REDIS__URL environment variable).
1492
- # Then look for username and password at REDIS_xxx secrets
1493
- def _get_server_endpoint(self):
1494
- endpoint, uri = parse_path(self.get_target_path())
1487
+ @staticmethod
1488
+ def get_server_endpoint(path, credentials_prefix=None):
1489
+ endpoint, uri = parse_path(path)
1495
1490
  endpoint = endpoint or mlrun.mlconf.redis.url
1496
1491
  if endpoint.startswith("ds://"):
1497
1492
  datastore_profile = datastore_profile_read(endpoint)
@@ -1508,8 +1503,15 @@ class RedisNoSqlTarget(NoSqlBaseTarget):
1508
1503
  raise mlrun.errors.MLRunInvalidArgumentError(
1509
1504
  "Provide Redis username and password only via secrets"
1510
1505
  )
1511
- user = self._get_credential("REDIS_USER", "")
1512
- password = self._get_credential("REDIS_PASSWORD", "")
1506
+ credentials_prefix = credentials_prefix or mlrun.get_secret_or_env(
1507
+ key="CREDENTIALS_PREFIX"
1508
+ )
1509
+ user = mlrun.get_secret_or_env(
1510
+ "REDIS_USER", default="", prefix=credentials_prefix
1511
+ )
1512
+ password = mlrun.get_secret_or_env(
1513
+ "REDIS_PASSWORD", default="", prefix=credentials_prefix
1514
+ )
1513
1515
  host = parsed_endpoint.hostname
1514
1516
  port = parsed_endpoint.port if parsed_endpoint.port else "6379"
1515
1517
  scheme = parsed_endpoint.scheme
@@ -1523,7 +1525,9 @@ class RedisNoSqlTarget(NoSqlBaseTarget):
1523
1525
  from storey import Table
1524
1526
  from storey.redis_driver import RedisDriver
1525
1527
 
1526
- endpoint, uri = self._get_server_endpoint()
1528
+ endpoint, uri = self.get_server_endpoint(
1529
+ self.get_target_path(), self.credentials_prefix
1530
+ )
1527
1531
 
1528
1532
  return Table(
1529
1533
  uri,
@@ -1532,7 +1536,9 @@ class RedisNoSqlTarget(NoSqlBaseTarget):
1532
1536
  )
1533
1537
 
1534
1538
  def get_spark_options(self, key_column=None, timestamp_key=None, overwrite=True):
1535
- endpoint, uri = self._get_server_endpoint()
1539
+ endpoint, uri = self.get_server_endpoint(
1540
+ self.get_target_path(), self.credentials_prefix
1541
+ )
1536
1542
  parsed_endpoint = urlparse(endpoint)
1537
1543
  store, path_in_store, path = self._get_store_and_path()
1538
1544
  return {
@@ -1564,6 +1570,29 @@ class RedisNoSqlTarget(NoSqlBaseTarget):
1564
1570
 
1565
1571
  return df
1566
1572
 
1573
+ def add_writer_step(
1574
+ self,
1575
+ graph,
1576
+ after,
1577
+ features,
1578
+ key_columns=None,
1579
+ timestamp_key=None,
1580
+ featureset_status=None,
1581
+ ):
1582
+ table, column_list = self._get_table_and_columns(features, key_columns)
1583
+
1584
+ graph.add_step(
1585
+ path=self.get_target_path(),
1586
+ name=self.name or self.writer_step_name,
1587
+ after=after,
1588
+ graph_shape="cylinder",
1589
+ class_name="mlrun.datastore.storeytargets.RedisNoSqlStoreyTarget",
1590
+ columns=column_list,
1591
+ table=table,
1592
+ credentials_prefix=self.credentials_prefix,
1593
+ **self.attributes,
1594
+ )
1595
+
1567
1596
 
1568
1597
  class StreamTarget(BaseStoreTarget):
1569
1598
  kind = TargetTypes.stream
@@ -1582,45 +1611,25 @@ class StreamTarget(BaseStoreTarget):
1582
1611
  timestamp_key=None,
1583
1612
  featureset_status=None,
1584
1613
  ):
1585
- from storey import V3ioDriver
1586
-
1587
1614
  key_columns = list(key_columns.keys())
1588
- store, path_in_store, path = self._get_store_and_path()
1589
- if not path:
1590
- raise mlrun.errors.MLRunInvalidArgumentError("StreamTarget requires a path")
1591
- endpoint, uri = parse_path(path)
1592
- storage_options = store.get_storage_options()
1593
- access_key = storage_options.get("v3io_access_key")
1615
+
1594
1616
  column_list = self._get_column_list(
1595
1617
  features=features, timestamp_key=timestamp_key, key_columns=key_columns
1596
1618
  )
1619
+ stream_path = self.get_target_path()
1620
+ if not stream_path:
1621
+ raise mlrun.errors.MLRunInvalidArgumentError("StreamTarget requires a path")
1597
1622
 
1598
1623
  graph.add_step(
1599
1624
  name=self.name or "StreamTarget",
1600
1625
  after=after,
1601
1626
  graph_shape="cylinder",
1602
- class_name="storey.StreamTarget",
1627
+ class_name="mlrun.datastore.storeytargets.StreamStoreyTarget",
1603
1628
  columns=column_list,
1604
- storage=V3ioDriver(
1605
- webapi=endpoint or mlrun.mlconf.v3io_api, access_key=access_key
1606
- ),
1607
- stream_path=uri,
1629
+ stream_path=stream_path,
1608
1630
  **self.attributes,
1609
1631
  )
1610
1632
 
1611
- def as_df(
1612
- self,
1613
- columns=None,
1614
- df_module=None,
1615
- entities=None,
1616
- start_time=None,
1617
- end_time=None,
1618
- time_column=None,
1619
- additional_filters=None,
1620
- **kwargs,
1621
- ):
1622
- raise NotImplementedError()
1623
-
1624
1633
 
1625
1634
  class KafkaTarget(BaseStoreTarget):
1626
1635
  """
@@ -1693,49 +1702,21 @@ class KafkaTarget(BaseStoreTarget):
1693
1702
  column_list = self._get_column_list(
1694
1703
  features=features, timestamp_key=timestamp_key, key_columns=key_columns
1695
1704
  )
1696
- if self.path and self.path.startswith("ds://"):
1697
- datastore_profile = datastore_profile_read(self.path)
1698
- attributes = datastore_profile.attributes()
1699
- brokers = attributes.pop(
1700
- "brokers", attributes.pop("bootstrap_servers", None)
1701
- )
1702
- topic = datastore_profile.topic
1703
- else:
1704
- attributes = copy(self.attributes)
1705
- brokers = attributes.pop(
1706
- "brokers", attributes.pop("bootstrap_servers", None)
1707
- )
1708
- topic, brokers = parse_kafka_url(self.get_target_path(), brokers)
1705
+ path = self.get_target_path()
1709
1706
 
1710
- if not topic:
1711
- raise mlrun.errors.MLRunInvalidArgumentError(
1712
- "KafkaTarget requires a path (topic)"
1713
- )
1707
+ if not path:
1708
+ raise mlrun.errors.MLRunInvalidArgumentError("KafkaTarget requires a path")
1714
1709
 
1715
1710
  graph.add_step(
1716
1711
  name=self.name or "KafkaTarget",
1717
1712
  after=after,
1718
1713
  graph_shape="cylinder",
1719
- class_name="storey.KafkaTarget",
1714
+ class_name="mlrun.datastore.storeytargets.KafkaStoreyTarget",
1720
1715
  columns=column_list,
1721
- topic=topic,
1722
- brokers=brokers,
1723
- **attributes,
1716
+ path=path,
1717
+ attributes=self.attributes,
1724
1718
  )
1725
1719
 
1726
- def as_df(
1727
- self,
1728
- columns=None,
1729
- df_module=None,
1730
- entities=None,
1731
- start_time=None,
1732
- end_time=None,
1733
- time_column=None,
1734
- additional_filters=None,
1735
- **kwargs,
1736
- ):
1737
- raise NotImplementedError()
1738
-
1739
1720
  def purge(self):
1740
1721
  pass
1741
1722
 
@@ -1770,7 +1751,7 @@ class TSDBTarget(BaseStoreTarget):
1770
1751
 
1771
1752
  graph.add_step(
1772
1753
  name=self.name or "TSDBTarget",
1773
- class_name="storey.TSDBTarget",
1754
+ class_name="mlrun.datastore.storeytargets.TSDBStoreyTarget",
1774
1755
  after=after,
1775
1756
  graph_shape="cylinder",
1776
1757
  path=uri,
@@ -1780,19 +1761,6 @@ class TSDBTarget(BaseStoreTarget):
1780
1761
  **self.attributes,
1781
1762
  )
1782
1763
 
1783
- def as_df(
1784
- self,
1785
- columns=None,
1786
- df_module=None,
1787
- entities=None,
1788
- start_time=None,
1789
- end_time=None,
1790
- time_column=None,
1791
- additional_filters=None,
1792
- **kwargs,
1793
- ):
1794
- raise NotImplementedError()
1795
-
1796
1764
  def write_dataframe(
1797
1765
  self, df, key_column=None, timestamp_key=None, chunk_id=0, **kwargs
1798
1766
  ):
@@ -1830,6 +1798,7 @@ class CustomTarget(BaseStoreTarget):
1830
1798
  is_online = False
1831
1799
  support_spark = False
1832
1800
  support_storey = True
1801
+ support_pandas = True
1833
1802
 
1834
1803
  def __init__(
1835
1804
  self,
@@ -1865,6 +1834,7 @@ class CustomTarget(BaseStoreTarget):
1865
1834
  class DFTarget(BaseStoreTarget):
1866
1835
  kind = TargetTypes.dataframe
1867
1836
  support_storey = True
1837
+ support_pandas = True
1868
1838
 
1869
1839
  def __init__(self, *args, name="dataframe", **kwargs):
1870
1840
  self._df = None
@@ -1927,6 +1897,7 @@ class SQLTarget(BaseStoreTarget):
1927
1897
  is_online = True
1928
1898
  support_spark = False
1929
1899
  support_storey = True
1900
+ support_pandas = True
1930
1901
 
1931
1902
  def __init__(
1932
1903
  self,
@@ -2069,7 +2040,7 @@ class SQLTarget(BaseStoreTarget):
2069
2040
  name=self.name or "SqlTarget",
2070
2041
  after=after,
2071
2042
  graph_shape="cylinder",
2072
- class_name="storey.NoSqlTarget",
2043
+ class_name="mlrun.datastore.storeytargets.NoSqlStoreyTarget",
2073
2044
  columns=column_list,
2074
2045
  header=True,
2075
2046
  table=table,
mlrun/datastore/v3io.py CHANGED
@@ -140,6 +140,7 @@ class V3ioStore(DataStore):
140
140
  max_chunk_size: int = V3IO_DEFAULT_UPLOAD_CHUNK_SIZE,
141
141
  ):
142
142
  """helper function for put method, allows for controlling max_chunk_size in testing"""
143
+ data, _ = self._prepare_put_data(data, append)
143
144
  container, path = split_path(self._join(key))
144
145
  buffer_size = len(data) # in bytes
145
146
  buffer_offset = 0
mlrun/db/base.py CHANGED
@@ -154,6 +154,7 @@ class RunDBInterface(ABC):
154
154
  mlrun.common.schemas.artifact.ArtifactsDeletionStrategies.metadata_only
155
155
  ),
156
156
  secrets: dict = None,
157
+ iter=None,
157
158
  ):
158
159
  pass
159
160
 
@@ -174,7 +175,9 @@ class RunDBInterface(ABC):
174
175
  pass
175
176
 
176
177
  @abstractmethod
177
- def list_functions(self, name=None, project="", tag="", labels=None):
178
+ def list_functions(
179
+ self, name=None, project="", tag="", labels=None, since=None, until=None
180
+ ):
178
181
  pass
179
182
 
180
183
  @abstractmethod
@@ -239,9 +242,8 @@ class RunDBInterface(ABC):
239
242
  )
240
243
  artifact_identifiers.append(
241
244
  mlrun.common.schemas.ArtifactIdentifier(
242
- key=mlrun.utils.get_in_artifact(artifact_obj, "key"),
243
- # we are passing tree as uid when storing an artifact, so if uid is not defined,
244
- # pass the tree as uid
245
+ # we pass the db_key and not the key so the API will be able to find the artifact in the db
246
+ key=mlrun.utils.get_in_artifact(artifact_obj, "db_key"),
245
247
  uid=mlrun.utils.get_in_artifact(artifact_obj, "uid"),
246
248
  producer_id=mlrun.utils.get_in_artifact(artifact_obj, "tree"),
247
249
  kind=mlrun.utils.get_in_artifact(artifact_obj, "kind"),
@@ -393,6 +395,9 @@ class RunDBInterface(ABC):
393
395
  partition_order: Union[
394
396
  mlrun.common.schemas.OrderType, str
395
397
  ] = mlrun.common.schemas.OrderType.desc,
398
+ format_: Union[
399
+ str, mlrun.common.formatters.FeatureSetFormat
400
+ ] = mlrun.common.formatters.FeatureSetFormat.full,
396
401
  ) -> list[dict]:
397
402
  pass
398
403
 
@@ -687,8 +692,11 @@ class RunDBInterface(ABC):
687
692
  @abstractmethod
688
693
  def store_api_gateway(
689
694
  self,
690
- api_gateway: mlrun.common.schemas.APIGateway,
691
- project: str = None,
695
+ api_gateway: Union[
696
+ mlrun.common.schemas.APIGateway,
697
+ "mlrun.runtimes.nuclio.api_gateway.APIGateway",
698
+ ],
699
+ project: Optional[str] = None,
692
700
  ):
693
701
  pass
694
702
 
@@ -924,5 +932,6 @@ class RunDBInterface(ABC):
924
932
  self,
925
933
  project: str,
926
934
  credentials: dict[str, str],
935
+ replace_creds: bool,
927
936
  ) -> None:
928
937
  pass