mlrun 1.7.0rc13__py3-none-any.whl → 1.7.0rc21__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (156) hide show
  1. mlrun/__init__.py +10 -1
  2. mlrun/__main__.py +23 -111
  3. mlrun/alerts/__init__.py +15 -0
  4. mlrun/alerts/alert.py +144 -0
  5. mlrun/api/schemas/__init__.py +4 -3
  6. mlrun/artifacts/__init__.py +8 -3
  7. mlrun/artifacts/base.py +36 -253
  8. mlrun/artifacts/dataset.py +9 -190
  9. mlrun/artifacts/manager.py +46 -42
  10. mlrun/artifacts/model.py +9 -141
  11. mlrun/artifacts/plots.py +14 -375
  12. mlrun/common/constants.py +65 -3
  13. mlrun/common/formatters/__init__.py +19 -0
  14. mlrun/{runtimes/mpijob/v1alpha1.py → common/formatters/artifact.py} +6 -14
  15. mlrun/common/formatters/base.py +113 -0
  16. mlrun/common/formatters/function.py +46 -0
  17. mlrun/common/formatters/pipeline.py +53 -0
  18. mlrun/common/formatters/project.py +51 -0
  19. mlrun/{runtimes → common/runtimes}/constants.py +32 -4
  20. mlrun/common/schemas/__init__.py +10 -5
  21. mlrun/common/schemas/alert.py +92 -11
  22. mlrun/common/schemas/api_gateway.py +56 -0
  23. mlrun/common/schemas/artifact.py +15 -5
  24. mlrun/common/schemas/auth.py +2 -0
  25. mlrun/common/schemas/client_spec.py +1 -0
  26. mlrun/common/schemas/frontend_spec.py +1 -0
  27. mlrun/common/schemas/function.py +4 -0
  28. mlrun/common/schemas/model_monitoring/__init__.py +15 -3
  29. mlrun/common/schemas/model_monitoring/constants.py +58 -7
  30. mlrun/common/schemas/model_monitoring/grafana.py +9 -5
  31. mlrun/common/schemas/model_monitoring/model_endpoints.py +86 -2
  32. mlrun/common/schemas/pipeline.py +0 -9
  33. mlrun/common/schemas/project.py +6 -11
  34. mlrun/common/types.py +1 -0
  35. mlrun/config.py +36 -8
  36. mlrun/data_types/to_pandas.py +9 -9
  37. mlrun/datastore/base.py +41 -9
  38. mlrun/datastore/datastore.py +6 -2
  39. mlrun/datastore/datastore_profile.py +56 -4
  40. mlrun/datastore/hdfs.py +5 -0
  41. mlrun/datastore/inmem.py +2 -2
  42. mlrun/datastore/redis.py +2 -2
  43. mlrun/datastore/s3.py +5 -0
  44. mlrun/datastore/sources.py +147 -7
  45. mlrun/datastore/store_resources.py +7 -7
  46. mlrun/datastore/targets.py +129 -9
  47. mlrun/datastore/utils.py +42 -0
  48. mlrun/datastore/v3io.py +1 -1
  49. mlrun/db/auth_utils.py +152 -0
  50. mlrun/db/base.py +55 -11
  51. mlrun/db/httpdb.py +346 -107
  52. mlrun/db/nopdb.py +52 -10
  53. mlrun/errors.py +11 -0
  54. mlrun/execution.py +24 -9
  55. mlrun/feature_store/__init__.py +0 -2
  56. mlrun/feature_store/api.py +12 -47
  57. mlrun/feature_store/feature_set.py +9 -0
  58. mlrun/feature_store/feature_vector.py +8 -0
  59. mlrun/feature_store/ingestion.py +7 -6
  60. mlrun/feature_store/retrieval/base.py +9 -4
  61. mlrun/feature_store/retrieval/conversion.py +9 -9
  62. mlrun/feature_store/retrieval/dask_merger.py +2 -0
  63. mlrun/feature_store/retrieval/job.py +9 -3
  64. mlrun/feature_store/retrieval/local_merger.py +2 -0
  65. mlrun/feature_store/retrieval/spark_merger.py +16 -0
  66. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +7 -12
  67. mlrun/frameworks/parallel_coordinates.py +2 -1
  68. mlrun/frameworks/tf_keras/__init__.py +4 -1
  69. mlrun/k8s_utils.py +10 -11
  70. mlrun/launcher/base.py +4 -3
  71. mlrun/launcher/client.py +5 -3
  72. mlrun/launcher/local.py +8 -2
  73. mlrun/launcher/remote.py +8 -2
  74. mlrun/lists.py +6 -2
  75. mlrun/model.py +62 -20
  76. mlrun/model_monitoring/__init__.py +1 -1
  77. mlrun/model_monitoring/api.py +41 -18
  78. mlrun/model_monitoring/application.py +5 -305
  79. mlrun/model_monitoring/applications/__init__.py +11 -0
  80. mlrun/model_monitoring/applications/_application_steps.py +157 -0
  81. mlrun/model_monitoring/applications/base.py +280 -0
  82. mlrun/model_monitoring/applications/context.py +214 -0
  83. mlrun/model_monitoring/applications/evidently_base.py +211 -0
  84. mlrun/model_monitoring/applications/histogram_data_drift.py +132 -91
  85. mlrun/model_monitoring/applications/results.py +99 -0
  86. mlrun/model_monitoring/controller.py +3 -1
  87. mlrun/model_monitoring/db/__init__.py +2 -0
  88. mlrun/model_monitoring/db/stores/__init__.py +0 -2
  89. mlrun/model_monitoring/db/stores/base/store.py +22 -37
  90. mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +43 -21
  91. mlrun/model_monitoring/db/stores/sqldb/models/base.py +39 -8
  92. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +27 -7
  93. mlrun/model_monitoring/db/stores/sqldb/models/sqlite.py +5 -0
  94. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +246 -224
  95. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +232 -216
  96. mlrun/model_monitoring/db/tsdb/__init__.py +100 -0
  97. mlrun/model_monitoring/db/tsdb/base.py +329 -0
  98. mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
  99. mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
  100. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +240 -0
  101. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +45 -0
  102. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +397 -0
  103. mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
  104. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +117 -0
  105. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +636 -0
  106. mlrun/model_monitoring/evidently_application.py +6 -118
  107. mlrun/model_monitoring/helpers.py +46 -1
  108. mlrun/model_monitoring/model_endpoint.py +3 -2
  109. mlrun/model_monitoring/stream_processing.py +57 -216
  110. mlrun/model_monitoring/writer.py +134 -124
  111. mlrun/package/utils/_formatter.py +2 -2
  112. mlrun/platforms/__init__.py +10 -9
  113. mlrun/platforms/iguazio.py +21 -202
  114. mlrun/projects/operations.py +19 -12
  115. mlrun/projects/pipelines.py +103 -109
  116. mlrun/projects/project.py +377 -137
  117. mlrun/render.py +15 -14
  118. mlrun/run.py +16 -47
  119. mlrun/runtimes/__init__.py +6 -3
  120. mlrun/runtimes/base.py +8 -7
  121. mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
  122. mlrun/runtimes/funcdoc.py +0 -28
  123. mlrun/runtimes/kubejob.py +2 -1
  124. mlrun/runtimes/local.py +5 -2
  125. mlrun/runtimes/mpijob/__init__.py +0 -20
  126. mlrun/runtimes/mpijob/v1.py +1 -1
  127. mlrun/runtimes/nuclio/api_gateway.py +440 -208
  128. mlrun/runtimes/nuclio/application/application.py +170 -8
  129. mlrun/runtimes/nuclio/function.py +39 -49
  130. mlrun/runtimes/pod.py +21 -41
  131. mlrun/runtimes/remotesparkjob.py +9 -3
  132. mlrun/runtimes/sparkjob/spark3job.py +1 -1
  133. mlrun/runtimes/utils.py +6 -45
  134. mlrun/serving/server.py +2 -1
  135. mlrun/serving/states.py +53 -2
  136. mlrun/serving/v2_serving.py +5 -1
  137. mlrun/track/tracker.py +2 -1
  138. mlrun/utils/async_http.py +25 -5
  139. mlrun/utils/helpers.py +107 -75
  140. mlrun/utils/logger.py +39 -7
  141. mlrun/utils/notifications/notification/__init__.py +14 -9
  142. mlrun/utils/notifications/notification/base.py +1 -1
  143. mlrun/utils/notifications/notification/slack.py +61 -13
  144. mlrun/utils/notifications/notification/webhook.py +1 -1
  145. mlrun/utils/notifications/notification_pusher.py +147 -16
  146. mlrun/utils/regex.py +9 -0
  147. mlrun/utils/v3io_clients.py +0 -1
  148. mlrun/utils/version/version.json +2 -2
  149. {mlrun-1.7.0rc13.dist-info → mlrun-1.7.0rc21.dist-info}/METADATA +14 -6
  150. {mlrun-1.7.0rc13.dist-info → mlrun-1.7.0rc21.dist-info}/RECORD +154 -133
  151. mlrun/kfpops.py +0 -865
  152. mlrun/platforms/other.py +0 -305
  153. {mlrun-1.7.0rc13.dist-info → mlrun-1.7.0rc21.dist-info}/LICENSE +0 -0
  154. {mlrun-1.7.0rc13.dist-info → mlrun-1.7.0rc21.dist-info}/WHEEL +0 -0
  155. {mlrun-1.7.0rc13.dist-info → mlrun-1.7.0rc21.dist-info}/entry_points.txt +0 -0
  156. {mlrun-1.7.0rc13.dist-info → mlrun-1.7.0rc21.dist-info}/top_level.txt +0 -0
@@ -30,6 +30,7 @@ import mlrun
30
30
  import mlrun.utils.helpers
31
31
  from mlrun.config import config
32
32
  from mlrun.datastore.snowflake_utils import get_snowflake_spark_options
33
+ from mlrun.datastore.utils import transform_list_filters_to_tuple
33
34
  from mlrun.model import DataSource, DataTarget, DataTargetBase, TargetPathObject
34
35
  from mlrun.utils import logger, now_date
35
36
  from mlrun.utils.helpers import to_parquet
@@ -656,6 +657,29 @@ class BaseStoreTarget(DataTargetBase):
656
657
  def _target_path_object(self):
657
658
  """return the actual/computed target path"""
658
659
  is_single_file = hasattr(self, "is_single_file") and self.is_single_file()
660
+
661
+ if self._resource and self.path:
662
+ parsed_url = urlparse(self.path)
663
+ # When the URL consists only from scheme and endpoint and no path,
664
+ # make a default path for DS and redis targets.
665
+ # Also ignore KafkaTarget when it uses the ds scheme (no default path for KafkaTarget)
666
+ if (
667
+ not isinstance(self, KafkaTarget)
668
+ and parsed_url.scheme in ["ds", "redis", "rediss"]
669
+ and (not parsed_url.path or parsed_url.path == "/")
670
+ ):
671
+ return TargetPathObject(
672
+ _get_target_path(
673
+ self,
674
+ self._resource,
675
+ self.run_id is not None,
676
+ netloc=parsed_url.netloc,
677
+ scheme=parsed_url.scheme,
678
+ ),
679
+ self.run_id,
680
+ is_single_file,
681
+ )
682
+
659
683
  return self.get_path() or (
660
684
  TargetPathObject(
661
685
  _get_target_path(self, self._resource, self.run_id is not None),
@@ -714,9 +738,13 @@ class BaseStoreTarget(DataTargetBase):
714
738
  start_time=None,
715
739
  end_time=None,
716
740
  time_column=None,
741
+ additional_filters=None,
717
742
  **kwargs,
718
743
  ):
719
744
  """return the target data as dataframe"""
745
+ mlrun.utils.helpers.additional_filters_warning(
746
+ additional_filters, self.__class__
747
+ )
720
748
  return mlrun.get_dataitem(self.get_target_path()).as_df(
721
749
  columns=columns,
722
750
  df_module=df_module,
@@ -730,7 +758,7 @@ class BaseStoreTarget(DataTargetBase):
730
758
  # options used in spark.read.load(**options)
731
759
  raise NotImplementedError()
732
760
 
733
- def prepare_spark_df(self, df, key_columns, timestamp_key=None, spark_options={}):
761
+ def prepare_spark_df(self, df, key_columns, timestamp_key=None, spark_options=None):
734
762
  return df
735
763
 
736
764
  def get_dask_options(self):
@@ -961,6 +989,7 @@ class ParquetTarget(BaseStoreTarget):
961
989
  start_time=None,
962
990
  end_time=None,
963
991
  time_column=None,
992
+ additional_filters=None,
964
993
  **kwargs,
965
994
  ):
966
995
  """return the target data as dataframe"""
@@ -971,6 +1000,7 @@ class ParquetTarget(BaseStoreTarget):
971
1000
  start_time=start_time,
972
1001
  end_time=end_time,
973
1002
  time_column=time_column,
1003
+ additional_filters=transform_list_filters_to_tuple(additional_filters),
974
1004
  **kwargs,
975
1005
  )
976
1006
  if not columns:
@@ -1101,8 +1131,12 @@ class CSVTarget(BaseStoreTarget):
1101
1131
  start_time=None,
1102
1132
  end_time=None,
1103
1133
  time_column=None,
1134
+ additional_filters=None,
1104
1135
  **kwargs,
1105
1136
  ):
1137
+ mlrun.utils.helpers.additional_filters_warning(
1138
+ additional_filters, self.__class__
1139
+ )
1106
1140
  df = super().as_df(
1107
1141
  columns=columns,
1108
1142
  df_module=df_module,
@@ -1209,6 +1243,7 @@ class SnowflakeTarget(BaseStoreTarget):
1209
1243
  start_time=None,
1210
1244
  end_time=None,
1211
1245
  time_column=None,
1246
+ additional_filters=None,
1212
1247
  **kwargs,
1213
1248
  ):
1214
1249
  raise NotImplementedError()
@@ -1275,7 +1310,17 @@ class NoSqlBaseTarget(BaseStoreTarget):
1275
1310
  def get_dask_options(self):
1276
1311
  return {"format": "csv"}
1277
1312
 
1278
- def as_df(self, columns=None, df_module=None, **kwargs):
1313
+ def as_df(
1314
+ self,
1315
+ columns=None,
1316
+ df_module=None,
1317
+ entities=None,
1318
+ start_time=None,
1319
+ end_time=None,
1320
+ time_column=None,
1321
+ additional_filters=None,
1322
+ **kwargs,
1323
+ ):
1279
1324
  raise NotImplementedError()
1280
1325
 
1281
1326
  def write_dataframe(
@@ -1511,11 +1556,40 @@ class StreamTarget(BaseStoreTarget):
1511
1556
  **self.attributes,
1512
1557
  )
1513
1558
 
1514
- def as_df(self, columns=None, df_module=None, **kwargs):
1559
+ def as_df(
1560
+ self,
1561
+ columns=None,
1562
+ df_module=None,
1563
+ entities=None,
1564
+ start_time=None,
1565
+ end_time=None,
1566
+ time_column=None,
1567
+ additional_filters=None,
1568
+ **kwargs,
1569
+ ):
1515
1570
  raise NotImplementedError()
1516
1571
 
1517
1572
 
1518
1573
  class KafkaTarget(BaseStoreTarget):
1574
+ """
1575
+ Kafka target storage driver, used to write data into kafka topics.
1576
+ example::
1577
+ # define target
1578
+ kafka_target = KafkaTarget(
1579
+ name="kafka", path="my_topic", brokers="localhost:9092"
1580
+ )
1581
+ # ingest
1582
+ stocks_set.ingest(stocks, [kafka_target])
1583
+ :param name: target name
1584
+ :param path: topic name e.g. "my_topic"
1585
+ :param after_step: optional, after what step in the graph to add the target
1586
+ :param columns: optional, which columns from data to write
1587
+ :param bootstrap_servers: Deprecated. Use the brokers parameter instead
1588
+ :param producer_options: additional configurations for kafka producer
1589
+ :param brokers: kafka broker as represented by a host:port pair, or a list of kafka brokers, e.g.
1590
+ "localhost:9092", or ["kafka-broker-1:9092", "kafka-broker-2:9092"]
1591
+ """
1592
+
1519
1593
  kind = TargetTypes.kafka
1520
1594
  is_table = False
1521
1595
  is_online = False
@@ -1597,7 +1671,17 @@ class KafkaTarget(BaseStoreTarget):
1597
1671
  **attributes,
1598
1672
  )
1599
1673
 
1600
- def as_df(self, columns=None, df_module=None, **kwargs):
1674
+ def as_df(
1675
+ self,
1676
+ columns=None,
1677
+ df_module=None,
1678
+ entities=None,
1679
+ start_time=None,
1680
+ end_time=None,
1681
+ time_column=None,
1682
+ additional_filters=None,
1683
+ **kwargs,
1684
+ ):
1601
1685
  raise NotImplementedError()
1602
1686
 
1603
1687
  def purge(self):
@@ -1644,7 +1728,17 @@ class TSDBTarget(BaseStoreTarget):
1644
1728
  **self.attributes,
1645
1729
  )
1646
1730
 
1647
- def as_df(self, columns=None, df_module=None, **kwargs):
1731
+ def as_df(
1732
+ self,
1733
+ columns=None,
1734
+ df_module=None,
1735
+ entities=None,
1736
+ start_time=None,
1737
+ end_time=None,
1738
+ time_column=None,
1739
+ additional_filters=None,
1740
+ **kwargs,
1741
+ ):
1648
1742
  raise NotImplementedError()
1649
1743
 
1650
1744
  def write_dataframe(
@@ -1755,11 +1849,16 @@ class DFTarget(BaseStoreTarget):
1755
1849
  self,
1756
1850
  columns=None,
1757
1851
  df_module=None,
1852
+ entities=None,
1758
1853
  start_time=None,
1759
1854
  end_time=None,
1760
1855
  time_column=None,
1856
+ additional_filters=None,
1761
1857
  **kwargs,
1762
1858
  ):
1859
+ mlrun.utils.helpers.additional_filters_warning(
1860
+ additional_filters, self.__class__
1861
+ )
1763
1862
  return select_columns_from_df(
1764
1863
  filter_df_start_end_time(
1765
1864
  self._df,
@@ -1934,6 +2033,7 @@ class SQLTarget(BaseStoreTarget):
1934
2033
  start_time=None,
1935
2034
  end_time=None,
1936
2035
  time_column=None,
2036
+ additional_filters=None,
1937
2037
  **kwargs,
1938
2038
  ):
1939
2039
  try:
@@ -1942,6 +2042,10 @@ class SQLTarget(BaseStoreTarget):
1942
2042
  except (ModuleNotFoundError, ImportError) as exc:
1943
2043
  self._raise_sqlalchemy_import_error(exc)
1944
2044
 
2045
+ mlrun.utils.helpers.additional_filters_warning(
2046
+ additional_filters, self.__class__
2047
+ )
2048
+
1945
2049
  db_path, table_name, _, _, _, _ = self._parse_url()
1946
2050
  engine = sqlalchemy.create_engine(db_path)
1947
2051
  parse_dates: Optional[list[str]] = self.attributes.get("parse_dates")
@@ -2031,7 +2135,7 @@ class SQLTarget(BaseStoreTarget):
2031
2135
  raise ValueError(f"Table named {table_name} is not exist")
2032
2136
 
2033
2137
  elif not table_exists and create_table:
2034
- TYPE_TO_SQL_TYPE = {
2138
+ type_to_sql_type = {
2035
2139
  int: sqlalchemy.Integer,
2036
2140
  str: sqlalchemy.String(self.attributes.get("varchar_len")),
2037
2141
  datetime.datetime: sqlalchemy.dialects.mysql.DATETIME(fsp=6),
@@ -2044,7 +2148,7 @@ class SQLTarget(BaseStoreTarget):
2044
2148
  # creat new table with the given name
2045
2149
  columns = []
2046
2150
  for col, col_type in self.schema.items():
2047
- col_type_sql = TYPE_TO_SQL_TYPE.get(col_type)
2151
+ col_type_sql = type_to_sql_type.get(col_type)
2048
2152
  if col_type_sql is None:
2049
2153
  raise TypeError(
2050
2154
  f"'{col_type}' unsupported type for column '{col}'"
@@ -2088,7 +2192,7 @@ kind_to_driver = {
2088
2192
  }
2089
2193
 
2090
2194
 
2091
- def _get_target_path(driver, resource, run_id_mode=False):
2195
+ def _get_target_path(driver, resource, run_id_mode=False, netloc=None, scheme=""):
2092
2196
  """return the default target path given the resource and target kind"""
2093
2197
  kind = driver.kind
2094
2198
  suffix = driver.suffix
@@ -2105,11 +2209,27 @@ def _get_target_path(driver, resource, run_id_mode=False):
2105
2209
  )
2106
2210
  name = resource.metadata.name
2107
2211
  project = resource.metadata.project or mlrun.mlconf.default_project
2108
- data_prefix = get_default_prefix_for_target(kind).format(
2212
+
2213
+ default_kind_name = kind
2214
+ if scheme == "ds":
2215
+ # "dsnosql" is not an actual target like Parquet or Redis; rather, it serves
2216
+ # as a placeholder that can be used in any specified target
2217
+ default_kind_name = "dsnosql"
2218
+ if scheme == "redis" or scheme == "rediss":
2219
+ default_kind_name = TargetTypes.redisnosql
2220
+
2221
+ netloc = netloc or ""
2222
+ data_prefix = get_default_prefix_for_target(default_kind_name).format(
2223
+ ds_profile_name=netloc, # In case of ds profile, set its the name
2224
+ authority=netloc, # In case of redis, replace {authority} with netloc
2109
2225
  project=project,
2110
2226
  kind=kind,
2111
2227
  name=name,
2112
2228
  )
2229
+
2230
+ if scheme == "rediss":
2231
+ data_prefix = data_prefix.replace("redis://", "rediss://", 1)
2232
+
2113
2233
  # todo: handle ver tag changes, may need to copy files?
2114
2234
  if not run_id_mode:
2115
2235
  version = resource.metadata.tag
mlrun/datastore/utils.py CHANGED
@@ -12,6 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
  #
15
+ import math
15
16
  import tarfile
16
17
  import tempfile
17
18
  import typing
@@ -180,3 +181,44 @@ def get_kafka_brokers_from_dict(options: dict, pop=False) -> typing.Optional[str
180
181
  FutureWarning,
181
182
  )
182
183
  return kafka_bootstrap_servers
184
+
185
+
186
+ def transform_list_filters_to_tuple(additional_filters):
187
+ tuple_filters = []
188
+ if not additional_filters:
189
+ return tuple_filters
190
+ validate_additional_filters(additional_filters)
191
+ for additional_filter in additional_filters:
192
+ tuple_filters.append(tuple(additional_filter))
193
+ return tuple_filters
194
+
195
+
196
+ def validate_additional_filters(additional_filters):
197
+ nan_error_message = "using NaN in additional_filters is not supported"
198
+ if additional_filters in [None, [], ()]:
199
+ return
200
+ for filter_tuple in additional_filters:
201
+ if filter_tuple == () or filter_tuple == []:
202
+ continue
203
+ if not isinstance(filter_tuple, (list, tuple)):
204
+ raise mlrun.errors.MLRunInvalidArgumentError(
205
+ f"mlrun supports additional_filters only as a list of tuples."
206
+ f" Current additional_filters: {additional_filters}"
207
+ )
208
+ if isinstance(filter_tuple[0], (list, tuple)):
209
+ raise mlrun.errors.MLRunInvalidArgumentError(
210
+ f"additional_filters does not support nested list inside filter tuples except in -in- logic."
211
+ f" Current filter_tuple: {filter_tuple}."
212
+ )
213
+ if len(filter_tuple) != 3:
214
+ raise mlrun.errors.MLRunInvalidArgumentError(
215
+ f"illegal filter tuple length, {filter_tuple} in additional filters:"
216
+ f" {additional_filters}"
217
+ )
218
+ col_name, op, value = filter_tuple
219
+ if isinstance(value, float) and math.isnan(value):
220
+ raise mlrun.errors.MLRunInvalidArgumentError(nan_error_message)
221
+ elif isinstance(value, (list, tuple)):
222
+ for sub_value in value:
223
+ if isinstance(sub_value, float) and math.isnan(sub_value):
224
+ raise mlrun.errors.MLRunInvalidArgumentError(nan_error_message)
mlrun/datastore/v3io.py CHANGED
@@ -29,7 +29,7 @@ from .base import (
29
29
  )
30
30
 
31
31
  V3IO_LOCAL_ROOT = "v3io"
32
- V3IO_DEFAULT_UPLOAD_CHUNK_SIZE = 1024 * 1024 * 100
32
+ V3IO_DEFAULT_UPLOAD_CHUNK_SIZE = 1024 * 1024 * 10
33
33
 
34
34
 
35
35
  class V3ioStore(DataStore):
mlrun/db/auth_utils.py ADDED
@@ -0,0 +1,152 @@
1
+ # Copyright 2024 Iguazio
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from abc import ABC, abstractmethod
16
+ from datetime import datetime, timedelta
17
+
18
+ import requests
19
+
20
+ import mlrun.errors
21
+ from mlrun.utils import logger
22
+
23
+
24
+ class TokenProvider(ABC):
25
+ @abstractmethod
26
+ def get_token(self):
27
+ pass
28
+
29
+ @abstractmethod
30
+ def is_iguazio_session(self):
31
+ pass
32
+
33
+
34
+ class StaticTokenProvider(TokenProvider):
35
+ def __init__(self, token: str):
36
+ self.token = token
37
+
38
+ def get_token(self):
39
+ return self.token
40
+
41
+ def is_iguazio_session(self):
42
+ return mlrun.platforms.iguazio.is_iguazio_session(self.token)
43
+
44
+
45
+ class OAuthClientIDTokenProvider(TokenProvider):
46
+ def __init__(
47
+ self, token_endpoint: str, client_id: str, client_secret: str, timeout=5
48
+ ):
49
+ if not token_endpoint or not client_id or not client_secret:
50
+ raise mlrun.errors.MLRunValueError(
51
+ "Invalid client_id configuration for authentication. Must provide token endpoint, client-id and secret"
52
+ )
53
+ self.token_endpoint = token_endpoint
54
+ self.client_id = client_id
55
+ self.client_secret = client_secret
56
+ self.timeout = timeout
57
+
58
+ # Since we're only issuing POST requests, which are actually a disguised GET, then it's ok to allow retries
59
+ # on them.
60
+ self._session = mlrun.utils.HTTPSessionWithRetry(
61
+ retry_on_post=True,
62
+ verbose=True,
63
+ )
64
+
65
+ self._cleanup()
66
+ self._refresh_token_if_needed()
67
+
68
+ def get_token(self):
69
+ self._refresh_token_if_needed()
70
+ return self.token
71
+
72
+ def is_iguazio_session(self):
73
+ return False
74
+
75
+ def _cleanup(self):
76
+ self.token = self.token_expiry_time = self.token_refresh_time = None
77
+
78
+ def _refresh_token_if_needed(self):
79
+ now = datetime.now()
80
+ if self.token:
81
+ if self.token_refresh_time and now <= self.token_refresh_time:
82
+ return self.token
83
+
84
+ # We only cleanup if token was really expired - even if we fail in refreshing the token, we can still
85
+ # use the existing one given that it's not expired.
86
+ if now >= self.token_expiry_time:
87
+ self._cleanup()
88
+
89
+ self._issue_token_request()
90
+ return self.token
91
+
92
+ def _issue_token_request(self, raise_on_error=False):
93
+ try:
94
+ headers = {"Content-Type": "application/x-www-form-urlencoded"}
95
+ request_body = {
96
+ "grant_type": "client_credentials",
97
+ "client_id": self.client_id,
98
+ "client_secret": self.client_secret,
99
+ }
100
+ response = self._session.request(
101
+ "POST",
102
+ self.token_endpoint,
103
+ timeout=self.timeout,
104
+ headers=headers,
105
+ data=request_body,
106
+ )
107
+ except requests.RequestException as exc:
108
+ error = f"Retrieving token failed: {mlrun.errors.err_to_str(exc)}"
109
+ if raise_on_error:
110
+ raise mlrun.errors.MLRunRuntimeError(error) from exc
111
+ else:
112
+ logger.warning(error)
113
+ return
114
+
115
+ if not response.ok:
116
+ error = "No error available"
117
+ if response.content:
118
+ try:
119
+ data = response.json()
120
+ error = data.get("error")
121
+ except Exception:
122
+ pass
123
+ logger.warning(
124
+ "Retrieving token failed", status=response.status_code, error=error
125
+ )
126
+ if raise_on_error:
127
+ mlrun.errors.raise_for_status(response)
128
+ return
129
+
130
+ self._parse_response(response.json())
131
+
132
+ def _parse_response(self, data: dict):
133
+ # Response is described in https://datatracker.ietf.org/doc/html/rfc6749#section-4.4.3
134
+ # According to spec, there isn't a refresh token - just the access token and its expiry time (in seconds).
135
+ self.token = data.get("access_token")
136
+ expires_in = data.get("expires_in")
137
+ if not self.token or not expires_in:
138
+ token_str = "****" if self.token else "missing"
139
+ logger.warning(
140
+ "Failed to parse token response", token=token_str, expires_in=expires_in
141
+ )
142
+ return
143
+
144
+ now = datetime.now()
145
+ self.token_expiry_time = now + timedelta(seconds=expires_in)
146
+ self.token_refresh_time = now + timedelta(seconds=expires_in / 2)
147
+ logger.info(
148
+ "Successfully retrieved client-id token",
149
+ expires_in=expires_in,
150
+ expiry=str(self.token_expiry_time),
151
+ refresh=str(self.token_refresh_time),
152
+ )
mlrun/db/base.py CHANGED
@@ -16,6 +16,10 @@ import datetime
16
16
  from abc import ABC, abstractmethod
17
17
  from typing import Optional, Union
18
18
 
19
+ import mlrun.alerts
20
+ import mlrun.common
21
+ import mlrun.common.formatters
22
+ import mlrun.common.runtimes.constants
19
23
  import mlrun.common.schemas
20
24
  import mlrun.model_monitoring
21
25
 
@@ -62,7 +66,10 @@ class RunDBInterface(ABC):
62
66
  uid: Optional[Union[str, list[str]]] = None,
63
67
  project: Optional[str] = None,
64
68
  labels: Optional[Union[str, list[str]]] = None,
65
- state: Optional[str] = None,
69
+ state: Optional[
70
+ mlrun.common.runtimes.constants.RunStates
71
+ ] = None, # Backward compatibility
72
+ states: Optional[list[mlrun.common.runtimes.constants.RunStates]] = None,
66
73
  sort: bool = True,
67
74
  last: int = 0,
68
75
  iter: bool = False,
@@ -117,7 +124,18 @@ class RunDBInterface(ABC):
117
124
  pass
118
125
 
119
126
  @abstractmethod
120
- def del_artifact(self, key, tag="", project="", tree=None, uid=None):
127
+ def del_artifact(
128
+ self,
129
+ key,
130
+ tag="",
131
+ project="",
132
+ tree=None,
133
+ uid=None,
134
+ deletion_strategy: mlrun.common.schemas.artifact.ArtifactsDeletionStrategies = (
135
+ mlrun.common.schemas.artifact.ArtifactsDeletionStrategies.metadata_only
136
+ ),
137
+ secrets: dict = None,
138
+ ):
121
139
  pass
122
140
 
123
141
  @abstractmethod
@@ -251,7 +269,7 @@ class RunDBInterface(ABC):
251
269
  def list_projects(
252
270
  self,
253
271
  owner: str = None,
254
- format_: mlrun.common.schemas.ProjectsFormat = mlrun.common.schemas.ProjectsFormat.name_only,
272
+ format_: mlrun.common.formatters.ProjectFormat = mlrun.common.formatters.ProjectFormat.name_only,
255
273
  labels: list[str] = None,
256
274
  state: mlrun.common.schemas.ProjectState = None,
257
275
  ) -> mlrun.common.schemas.ProjectsOutput:
@@ -427,8 +445,8 @@ class RunDBInterface(ABC):
427
445
  namespace: str = None,
428
446
  timeout: int = 30,
429
447
  format_: Union[
430
- str, mlrun.common.schemas.PipelinesFormat
431
- ] = mlrun.common.schemas.PipelinesFormat.summary,
448
+ str, mlrun.common.formatters.PipelineFormat
449
+ ] = mlrun.common.formatters.PipelineFormat.summary,
432
450
  project: str = None,
433
451
  ):
434
452
  pass
@@ -442,8 +460,8 @@ class RunDBInterface(ABC):
442
460
  page_token: str = "",
443
461
  filter_: str = "",
444
462
  format_: Union[
445
- str, mlrun.common.schemas.PipelinesFormat
446
- ] = mlrun.common.schemas.PipelinesFormat.metadata_only,
463
+ str, mlrun.common.formatters.PipelineFormat
464
+ ] = mlrun.common.formatters.PipelineFormat.metadata_only,
447
465
  page_size: int = None,
448
466
  ) -> mlrun.common.schemas.PipelinesOutput:
449
467
  pass
@@ -543,7 +561,7 @@ class RunDBInterface(ABC):
543
561
  end: Optional[str] = None,
544
562
  metrics: Optional[list[str]] = None,
545
563
  features: bool = False,
546
- ):
564
+ ) -> mlrun.model_monitoring.ModelEndpoint:
547
565
  pass
548
566
 
549
567
  @abstractmethod
@@ -617,8 +635,8 @@ class RunDBInterface(ABC):
617
635
  @abstractmethod
618
636
  def store_api_gateway(
619
637
  self,
620
- project: str,
621
638
  api_gateway: mlrun.common.schemas.APIGateway,
639
+ project: str = None,
622
640
  ):
623
641
  pass
624
642
 
@@ -664,7 +682,7 @@ class RunDBInterface(ABC):
664
682
  def store_alert_config(
665
683
  self,
666
684
  alert_name: str,
667
- alert_data: Union[dict, mlrun.common.schemas.AlertConfig],
685
+ alert_data: Union[dict, mlrun.alerts.alert.AlertConfig],
668
686
  project="",
669
687
  ):
670
688
  pass
@@ -685,6 +703,14 @@ class RunDBInterface(ABC):
685
703
  def reset_alert_config(self, alert_name: str, project=""):
686
704
  pass
687
705
 
706
+ @abstractmethod
707
+ def get_alert_template(self, template_name: str):
708
+ pass
709
+
710
+ @abstractmethod
711
+ def list_alert_templates(self):
712
+ pass
713
+
688
714
  @abstractmethod
689
715
  def get_builder_status(
690
716
  self,
@@ -802,7 +828,7 @@ class RunDBInterface(ABC):
802
828
  project: str,
803
829
  base_period: int = 10,
804
830
  image: str = "mlrun/mlrun",
805
- ):
831
+ ) -> None:
806
832
  pass
807
833
 
808
834
  @abstractmethod
@@ -815,6 +841,24 @@ class RunDBInterface(ABC):
815
841
  ) -> None:
816
842
  pass
817
843
 
844
+ @abstractmethod
845
+ def disable_model_monitoring(
846
+ self,
847
+ project: str,
848
+ delete_resources: bool = True,
849
+ delete_stream_function: bool = False,
850
+ delete_histogram_data_drift_app: bool = True,
851
+ delete_user_applications: bool = False,
852
+ user_application_list: list[str] = None,
853
+ ) -> bool:
854
+ pass
855
+
856
+ @abstractmethod
857
+ def delete_model_monitoring_function(
858
+ self, project: str, functions: list[str]
859
+ ) -> bool:
860
+ pass
861
+
818
862
  @abstractmethod
819
863
  def deploy_histogram_data_drift_app(
820
864
  self, project: str, image: str = "mlrun/mlrun"