mlrun 1.10.0rc18__py3-none-any.whl → 1.11.0rc16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (167) hide show
  1. mlrun/__init__.py +24 -3
  2. mlrun/__main__.py +0 -4
  3. mlrun/artifacts/dataset.py +2 -2
  4. mlrun/artifacts/document.py +6 -1
  5. mlrun/artifacts/llm_prompt.py +21 -15
  6. mlrun/artifacts/model.py +3 -3
  7. mlrun/artifacts/plots.py +1 -1
  8. mlrun/{model_monitoring/db/tsdb/tdengine → auth}/__init__.py +2 -3
  9. mlrun/auth/nuclio.py +89 -0
  10. mlrun/auth/providers.py +429 -0
  11. mlrun/auth/utils.py +415 -0
  12. mlrun/common/constants.py +14 -0
  13. mlrun/common/model_monitoring/helpers.py +123 -0
  14. mlrun/common/runtimes/constants.py +28 -0
  15. mlrun/common/schemas/__init__.py +14 -3
  16. mlrun/common/schemas/alert.py +2 -2
  17. mlrun/common/schemas/api_gateway.py +3 -0
  18. mlrun/common/schemas/auth.py +12 -10
  19. mlrun/common/schemas/client_spec.py +4 -0
  20. mlrun/common/schemas/constants.py +25 -0
  21. mlrun/common/schemas/frontend_spec.py +1 -8
  22. mlrun/common/schemas/function.py +34 -0
  23. mlrun/common/schemas/hub.py +33 -20
  24. mlrun/common/schemas/model_monitoring/__init__.py +2 -1
  25. mlrun/common/schemas/model_monitoring/constants.py +12 -15
  26. mlrun/common/schemas/model_monitoring/functions.py +13 -4
  27. mlrun/common/schemas/model_monitoring/model_endpoints.py +11 -0
  28. mlrun/common/schemas/pipeline.py +1 -1
  29. mlrun/common/schemas/secret.py +17 -2
  30. mlrun/common/secrets.py +95 -1
  31. mlrun/common/types.py +10 -10
  32. mlrun/config.py +69 -19
  33. mlrun/data_types/infer.py +2 -2
  34. mlrun/datastore/__init__.py +12 -5
  35. mlrun/datastore/azure_blob.py +162 -47
  36. mlrun/datastore/base.py +274 -10
  37. mlrun/datastore/datastore.py +7 -2
  38. mlrun/datastore/datastore_profile.py +84 -22
  39. mlrun/datastore/model_provider/huggingface_provider.py +225 -41
  40. mlrun/datastore/model_provider/mock_model_provider.py +87 -0
  41. mlrun/datastore/model_provider/model_provider.py +206 -74
  42. mlrun/datastore/model_provider/openai_provider.py +226 -66
  43. mlrun/datastore/s3.py +39 -18
  44. mlrun/datastore/sources.py +1 -1
  45. mlrun/datastore/store_resources.py +4 -4
  46. mlrun/datastore/storeytargets.py +17 -12
  47. mlrun/datastore/targets.py +1 -1
  48. mlrun/datastore/utils.py +25 -6
  49. mlrun/datastore/v3io.py +1 -1
  50. mlrun/db/base.py +63 -32
  51. mlrun/db/httpdb.py +373 -153
  52. mlrun/db/nopdb.py +54 -21
  53. mlrun/errors.py +4 -2
  54. mlrun/execution.py +66 -25
  55. mlrun/feature_store/api.py +1 -1
  56. mlrun/feature_store/common.py +1 -1
  57. mlrun/feature_store/feature_vector_utils.py +1 -1
  58. mlrun/feature_store/steps.py +8 -6
  59. mlrun/frameworks/_common/utils.py +3 -3
  60. mlrun/frameworks/_dl_common/loggers/logger.py +1 -1
  61. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +2 -1
  62. mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +1 -1
  63. mlrun/frameworks/_ml_common/utils.py +2 -1
  64. mlrun/frameworks/auto_mlrun/auto_mlrun.py +4 -3
  65. mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +2 -1
  66. mlrun/frameworks/onnx/dataset.py +2 -1
  67. mlrun/frameworks/onnx/mlrun_interface.py +2 -1
  68. mlrun/frameworks/pytorch/callbacks/logging_callback.py +5 -4
  69. mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +2 -1
  70. mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +2 -1
  71. mlrun/frameworks/pytorch/utils.py +2 -1
  72. mlrun/frameworks/sklearn/metric.py +2 -1
  73. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +5 -4
  74. mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +2 -1
  75. mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +2 -1
  76. mlrun/hub/__init__.py +52 -0
  77. mlrun/hub/base.py +142 -0
  78. mlrun/hub/module.py +172 -0
  79. mlrun/hub/step.py +113 -0
  80. mlrun/k8s_utils.py +105 -16
  81. mlrun/launcher/base.py +15 -7
  82. mlrun/launcher/local.py +4 -1
  83. mlrun/model.py +14 -4
  84. mlrun/model_monitoring/__init__.py +0 -1
  85. mlrun/model_monitoring/api.py +65 -28
  86. mlrun/model_monitoring/applications/__init__.py +1 -1
  87. mlrun/model_monitoring/applications/base.py +299 -128
  88. mlrun/model_monitoring/applications/context.py +2 -4
  89. mlrun/model_monitoring/controller.py +132 -58
  90. mlrun/model_monitoring/db/_schedules.py +38 -29
  91. mlrun/model_monitoring/db/_stats.py +6 -16
  92. mlrun/model_monitoring/db/tsdb/__init__.py +9 -7
  93. mlrun/model_monitoring/db/tsdb/base.py +29 -9
  94. mlrun/model_monitoring/db/tsdb/preaggregate.py +234 -0
  95. mlrun/model_monitoring/db/tsdb/stream_graph_steps.py +63 -0
  96. mlrun/model_monitoring/db/tsdb/timescaledb/queries/timescaledb_metrics_queries.py +414 -0
  97. mlrun/model_monitoring/db/tsdb/timescaledb/queries/timescaledb_predictions_queries.py +376 -0
  98. mlrun/model_monitoring/db/tsdb/timescaledb/queries/timescaledb_results_queries.py +590 -0
  99. mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_connection.py +434 -0
  100. mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_connector.py +541 -0
  101. mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_operations.py +808 -0
  102. mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_schema.py +502 -0
  103. mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_stream.py +163 -0
  104. mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_stream_graph_steps.py +60 -0
  105. mlrun/model_monitoring/db/tsdb/timescaledb/utils/timescaledb_dataframe_processor.py +141 -0
  106. mlrun/model_monitoring/db/tsdb/timescaledb/utils/timescaledb_query_builder.py +585 -0
  107. mlrun/model_monitoring/db/tsdb/timescaledb/writer_graph_steps.py +73 -0
  108. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +20 -9
  109. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +235 -51
  110. mlrun/model_monitoring/features_drift_table.py +2 -1
  111. mlrun/model_monitoring/helpers.py +30 -6
  112. mlrun/model_monitoring/stream_processing.py +34 -28
  113. mlrun/model_monitoring/writer.py +224 -4
  114. mlrun/package/__init__.py +2 -1
  115. mlrun/platforms/__init__.py +0 -43
  116. mlrun/platforms/iguazio.py +8 -4
  117. mlrun/projects/operations.py +17 -11
  118. mlrun/projects/pipelines.py +2 -2
  119. mlrun/projects/project.py +187 -123
  120. mlrun/run.py +95 -21
  121. mlrun/runtimes/__init__.py +2 -186
  122. mlrun/runtimes/base.py +103 -25
  123. mlrun/runtimes/constants.py +225 -0
  124. mlrun/runtimes/daskjob.py +5 -2
  125. mlrun/runtimes/databricks_job/databricks_runtime.py +2 -1
  126. mlrun/runtimes/local.py +5 -2
  127. mlrun/runtimes/mounts.py +20 -2
  128. mlrun/runtimes/nuclio/__init__.py +12 -7
  129. mlrun/runtimes/nuclio/api_gateway.py +36 -6
  130. mlrun/runtimes/nuclio/application/application.py +339 -40
  131. mlrun/runtimes/nuclio/function.py +222 -72
  132. mlrun/runtimes/nuclio/serving.py +132 -42
  133. mlrun/runtimes/pod.py +213 -21
  134. mlrun/runtimes/utils.py +49 -9
  135. mlrun/secrets.py +99 -14
  136. mlrun/serving/__init__.py +2 -0
  137. mlrun/serving/remote.py +84 -11
  138. mlrun/serving/routers.py +26 -44
  139. mlrun/serving/server.py +138 -51
  140. mlrun/serving/serving_wrapper.py +6 -2
  141. mlrun/serving/states.py +997 -283
  142. mlrun/serving/steps.py +62 -0
  143. mlrun/serving/system_steps.py +149 -95
  144. mlrun/serving/v2_serving.py +9 -10
  145. mlrun/track/trackers/mlflow_tracker.py +29 -31
  146. mlrun/utils/helpers.py +292 -94
  147. mlrun/utils/http.py +9 -2
  148. mlrun/utils/notifications/notification/base.py +18 -0
  149. mlrun/utils/notifications/notification/git.py +3 -5
  150. mlrun/utils/notifications/notification/mail.py +39 -16
  151. mlrun/utils/notifications/notification/slack.py +2 -4
  152. mlrun/utils/notifications/notification/webhook.py +2 -5
  153. mlrun/utils/notifications/notification_pusher.py +3 -3
  154. mlrun/utils/version/version.json +2 -2
  155. mlrun/utils/version/version.py +3 -4
  156. {mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/METADATA +63 -74
  157. {mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/RECORD +161 -143
  158. mlrun/api/schemas/__init__.py +0 -259
  159. mlrun/db/auth_utils.py +0 -152
  160. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +0 -344
  161. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +0 -75
  162. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connection.py +0 -281
  163. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +0 -1266
  164. {mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/WHEEL +0 -0
  165. {mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/entry_points.txt +0 -0
  166. {mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/licenses/LICENSE +0 -0
  167. {mlrun-1.10.0rc18.dist-info → mlrun-1.11.0rc16.dist-info}/top_level.txt +0 -0
mlrun/auth/utils.py ADDED
@@ -0,0 +1,415 @@
1
+ # Copyright 2025 Iguazio
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import os
16
+ import time
17
+ import typing
18
+
19
+ import jwt
20
+ import yaml
21
+
22
+ import mlrun.common.constants
23
+ import mlrun.common.schemas
24
+ import mlrun.utils.helpers
25
+ from mlrun.config import config as mlconf
26
+
27
+ if typing.TYPE_CHECKING:
28
+ import mlrun.db
29
+
30
+
31
+ class Claims:
32
+ """
33
+ JWT Claims constants.
34
+ """
35
+
36
+ SUBJECT = "sub"
37
+ EXPIRATION = "exp"
38
+
39
+
40
+ def load_offline_token(raise_on_error=True) -> typing.Optional[str]:
41
+ """
42
+ Load the offline token from the environment variable or YAML file.
43
+
44
+ The function first attempts to retrieve the offline token from the environment variable.
45
+ If not found, it tries to load the token from a YAML file. If both methods fail, it either
46
+ raises an error or logs a warning based on the `raise_on_error` parameter.
47
+
48
+ :param raise_on_error: If True, raises an error when the offline token cannot be resolved.
49
+ If False, logs a warning instead.
50
+ :return: The offline token if found, otherwise None.
51
+ """
52
+ if token_env := get_offline_token_from_env():
53
+ return token_env
54
+ return get_offline_token_from_file(raise_on_error=raise_on_error)
55
+
56
+
57
+ def get_offline_token_from_file(raise_on_error: bool = True) -> typing.Optional[str]:
58
+ """
59
+ Retrieve the offline token from a configured file.
60
+
61
+ This function reads the token file specified in the configuration, parses its content,
62
+ and extracts the offline token. If the file does not exist or cannot be parsed, it either
63
+ raises an error or logs a warning based on the `raise_on_error` parameter.
64
+
65
+ :param raise_on_error: Whether to raise an error or log a warning on failure.
66
+ :return: The offline token if found, otherwise None.
67
+ """
68
+ tokens = load_secret_tokens_from_file(raise_on_error=raise_on_error)
69
+ if not tokens:
70
+ return None
71
+ return parse_offline_token_data(tokens=tokens, raise_on_error=raise_on_error)
72
+
73
+
74
+ def load_secret_tokens_from_file(
75
+ raise_on_error: bool = True,
76
+ ) -> list[dict]:
77
+ """
78
+ Load and parse secret tokens from a configured file.
79
+
80
+ This function reads the secret tokens file (specified in
81
+ ``mlrun.mlconf.auth_with_oauth_token.token_file``) and returns the raw list
82
+ of token dictionaries under the ``secretTokens`` key. It does NOT validate
83
+ the tokens.
84
+
85
+ If the file is missing, empty, or malformed, the behavior depends on
86
+ ``raise_on_error``. In such cases, the function will either raise/log an
87
+ error and return an empty list.
88
+
89
+ :param raise_on_error: Whether to raise exceptions on read/parse failure.
90
+ :return: List of token dictionaries from ``secretTokens``.
91
+ Returns an empty list if parsing fails or no tokens exist.
92
+ :rtype: list[dict[str, Any]]
93
+ """
94
+ token_file = os.path.expanduser(mlconf.auth_with_oauth_token.token_file)
95
+ data = read_secret_tokens_file(raise_on_error=raise_on_error)
96
+ if not data:
97
+ mlrun.utils.helpers.raise_or_log_error(
98
+ f"Token file is empty or could not be parsed: {token_file}",
99
+ raise_on_error,
100
+ )
101
+ return []
102
+
103
+ tokens_list = data.get("secretTokens")
104
+ if not isinstance(tokens_list, list) or not tokens_list:
105
+ mlrun.utils.helpers.raise_or_log_error(
106
+ f"Invalid token file: 'secretTokens' must be a non-empty list in {token_file}",
107
+ raise_on_error,
108
+ )
109
+ return []
110
+
111
+ return tokens_list
112
+
113
+
114
+ def read_secret_tokens_file(
115
+ raise_on_error: bool = True,
116
+ ) -> typing.Optional[dict[str, typing.Any]]:
117
+ """
118
+ Read and parse the secret tokens file.
119
+
120
+ This function attempts to read the token file specified in the configuration and parse its content as YAML.
121
+ If the file does not exist or cannot be parsed, it either raises an error or logs a warning based on the
122
+ `raise_on_error` parameter.
123
+
124
+ - The configured path may use ``~`` to represent the user’s home directory, which
125
+ will be expanded automatically.
126
+
127
+ :param raise_on_error: Whether to raise an error or log a warning on failure.
128
+ :return: The parsed content of the token file as a dictionary, or None if an error occurs.
129
+ """
130
+ token_file = os.path.expanduser(mlconf.auth_with_oauth_token.token_file)
131
+
132
+ if not os.path.exists(token_file):
133
+ mlrun.utils.helpers.raise_or_log_error(
134
+ f"Configured token file not found: {token_file}", raise_on_error
135
+ )
136
+ return None
137
+
138
+ try:
139
+ with open(token_file) as token_file_io:
140
+ data = yaml.safe_load(token_file_io)
141
+ if not data:
142
+ mlrun.utils.helpers.raise_or_log_error(
143
+ f"Token file {token_file} is empty or invalid",
144
+ raise_on_error,
145
+ )
146
+ return None
147
+ if not isinstance(data, dict):
148
+ mlrun.utils.helpers.raise_or_log_error(
149
+ f"Token file {token_file} must contain a YAML mapping (dictionary)",
150
+ raise_on_error,
151
+ )
152
+ return None
153
+ return data
154
+ except yaml.YAMLError as exc:
155
+ mlrun.utils.helpers.raise_or_log_error(
156
+ f"Failed to parse token file {token_file}: {exc}", raise_on_error
157
+ )
158
+ return None
159
+
160
+
161
+ def parse_offline_token_data(
162
+ tokens: list[dict[str, typing.Any]], raise_on_error: bool = True
163
+ ) -> typing.Optional[str]:
164
+ """
165
+ Extract the correct offline token entry from the parsed tokens list.
166
+
167
+ Logic:
168
+ 1. Identify the target token entry using `mlrun.mlconf.auth_with_oauth_token.token_name`:
169
+ - If the value is set (non-empty):
170
+ - Look for an entry where `name == <TOKEN_NAME>`.
171
+ - If no match is found, resolution fails.
172
+ - If the value is not set (empty string):
173
+ - Look for an entry named "default".
174
+ - If not found, fall back to the first token in the list.
175
+ - If no entries exist, resolution fails.
176
+ 2. Validate the matched entry:
177
+ - Ensure the `token` field exists and is a valid, non-empty string.
178
+ - If valid, use the token as the resolved Offline Token.
179
+ 3. If any of the above steps fail, raise a detailed configuration error or log a warning.
180
+
181
+
182
+ :param tokens: List of token dictionaries loaded from the YAML file.
183
+ :param raise_on_error: Whether to raise an error or log a warning on failure.
184
+ :return: The resolved offline token, or None if resolution fails.
185
+ """
186
+ if not isinstance(tokens, list) or not tokens:
187
+ mlrun.utils.helpers.raise_or_log_error(
188
+ "Invalid token file: 'secretTokens' must be a non-empty list",
189
+ raise_on_error,
190
+ )
191
+ return None
192
+
193
+ name = mlconf.auth_with_oauth_token.token_name or "default"
194
+ matches = [t for t in tokens if t.get("name") == name] or (
195
+ [tokens[0]] if not mlconf.auth_with_oauth_token.token_name else []
196
+ )
197
+
198
+ if len(matches) != 1:
199
+ mlrun.utils.helpers.raise_or_log_error(
200
+ f"Failed to resolve a unique token. Found {len(matches)} entries for name '{name}'",
201
+ raise_on_error,
202
+ )
203
+ return None
204
+
205
+ token_value = matches[0].get("token")
206
+ if not token_value:
207
+ mlrun.utils.helpers.raise_or_log_error(
208
+ "Resolved token entry missing 'token' field",
209
+ raise_on_error,
210
+ )
211
+ return None
212
+
213
+ return token_value
214
+
215
+
216
+ def get_offline_token_from_env() -> typing.Optional[str]:
217
+ """
218
+ Retrieve the offline token from the environment variable.
219
+
220
+ This function checks the environment for the `MLRUN_AUTH_OFFLINE_TOKEN` variable
221
+ and returns its value if set.
222
+
223
+ :return: The offline token if found in the environment, otherwise None.
224
+ """
225
+ return mlrun.secrets.get_secret_or_env("MLRUN_AUTH_OFFLINE_TOKEN")
226
+
227
+
228
+ def load_and_prepare_secret_tokens(
229
+ auth_user_id: str | None = None,
230
+ raise_on_error: bool = True,
231
+ ) -> list[mlrun.common.schemas.SecretToken]:
232
+ """
233
+ Load, validate, and translate secret tokens from a file into SecretToken objects.
234
+
235
+ Steps performed:
236
+ 1. Load the secret tokens from the configured file.
237
+ 2. Validate each token for required fields and uniqueness.
238
+ 3. Translate validated token dictionaries into SecretToken objects.
239
+
240
+ :param auth_user_id: The user ID to filter the tokens by.
241
+ :param raise_on_error: Whether to raise exceptions or log warnings on failure
242
+ in any of the steps (loading, validation, translation).
243
+ :return: List of SecretToken objects.
244
+ :rtype: list[mlrun.common.schemas.SecretToken]
245
+ """
246
+ tokens_list = load_secret_tokens_from_file(raise_on_error=raise_on_error)
247
+ validated_tokens = extract_and_validate_tokens_info(
248
+ secret_tokens=[
249
+ mlrun.common.schemas.SecretToken(
250
+ name=token["name"],
251
+ token=token["token"],
252
+ )
253
+ for token in tokens_list
254
+ ],
255
+ authenticated_id=auth_user_id,
256
+ filter_by_authenticated_id=True,
257
+ )
258
+ secret_tokens = _translate_secret_tokens(
259
+ validated_tokens, raise_on_error=raise_on_error
260
+ )
261
+ return secret_tokens
262
+
263
+
264
+ def extract_and_validate_tokens_info(
265
+ secret_tokens: list[mlrun.common.schemas.SecretToken],
266
+ authenticated_id: str,
267
+ filter_by_authenticated_id: bool = False,
268
+ ) -> dict[str, dict[str, typing.Any]]:
269
+ """
270
+ Extract and validate tokens info from a list of SecretToken objects.
271
+
272
+ :param secret_tokens: List of SecretToken objects.
273
+ :param authenticated_id: The authenticated user ID.
274
+ :return: Dictionary of token info with the token name as the key and the token as the value.
275
+ """
276
+ token_values = {}
277
+ for secret_token in secret_tokens:
278
+ token_name = secret_token.name
279
+
280
+ # Validate name is provided and not duplicate
281
+ if secret_token.name and secret_token.name not in token_values:
282
+ # The token is expected to be a refresh token which we cannot verify ourselves, we verify it separately
283
+ # via orca when exchanging it for an access token. We decode it here without verification to extract its
284
+ # claims.
285
+ decoded_token = _decode_token_unverified(secret_token.token)
286
+
287
+ # Validate token expiration existence
288
+ if not decoded_token.get(Claims.EXPIRATION):
289
+ raise mlrun.errors.MLRunInvalidArgumentError(
290
+ f"Offline token '{token_name}' is missing the 'exp' (expiration) claim"
291
+ )
292
+ # Validate token subject existence
293
+ if not decoded_token.get(Claims.SUBJECT):
294
+ raise mlrun.errors.MLRunInvalidArgumentError(
295
+ f"Offline token '{token_name}' is missing the 'sub' (subject) claim"
296
+ )
297
+
298
+ # Validate token belongs to the authenticated user
299
+ token_sub = decoded_token.get(Claims.SUBJECT)
300
+ if token_sub != authenticated_id:
301
+ # just ignore the token as it doesn't belong to the authenticated user
302
+ if filter_by_authenticated_id:
303
+ continue
304
+ mlrun.utils.logger.warning(
305
+ "Offline token subject does not match the authenticated user",
306
+ token_name=token_name,
307
+ token_sub=token_sub,
308
+ user_id=authenticated_id,
309
+ )
310
+ raise mlrun.errors.MLRunInvalidArgumentError(
311
+ f"Offline token '{token_name}' does not match the authenticated user ID. "
312
+ "Stored tokens can only belong to the authenticated user."
313
+ )
314
+
315
+ # Store token info
316
+ token_values[secret_token.name] = {
317
+ "token_exp": decoded_token.get(Claims.EXPIRATION),
318
+ "token": secret_token.token,
319
+ }
320
+ else:
321
+ raise mlrun.errors.MLRunInvalidArgumentError(
322
+ f"Invalid or duplicate token name '{secret_token.name}' found in request payload"
323
+ )
324
+ return token_values
325
+
326
+
327
+ def resolve_jwt_subject(
328
+ token: str, raise_on_error: bool = True
329
+ ) -> typing.Optional[str]:
330
+ """
331
+ Extract the 'sub' (subject/user ID) claim from a JWT token.
332
+
333
+ The token is decoded without signature verification since it has already
334
+ been verified earlier during the authentication process.
335
+
336
+ :param token: The JWT token string.
337
+ :param raise_on_error: Whether to raise an error or log a warning on failure.
338
+ :return: The 'sub' claim value, or None if extraction fails.
339
+ """
340
+ try:
341
+ # This method is used from the client side after receiving this token from the server, there's no need or
342
+ # ability to verify its signature here.
343
+ return _decode_token_unverified(token).get(Claims.SUBJECT)
344
+ except jwt.PyJWTError as exc:
345
+ mlrun.utils.helpers.raise_or_log_error(
346
+ f"Failed to decode JWT token: {exc}", raise_on_error
347
+ )
348
+ return None
349
+
350
+
351
+ def is_token_expired(token: str, buffer_seconds: int = 0) -> bool:
352
+ """
353
+ Check if a JWT token is expired based on its 'exp' claim.
354
+
355
+ :param token: The JWT token string.
356
+ :param buffer_seconds: Number of seconds to subtract from the expiration time
357
+ :return: True if the token is expired, False otherwise.
358
+ """
359
+
360
+ # This method is used for caching and/or extra validation purposes in addition to the main verification flow,
361
+ # so we decode without signature verification here.
362
+ decoded_token = _decode_token_unverified(token)
363
+ expiration = decoded_token.get(Claims.EXPIRATION)
364
+ if not expiration:
365
+ raise mlrun.errors.MLRunInvalidArgumentError(
366
+ "Token is missing the 'exp' (expiration) claim"
367
+ )
368
+ now = time.time()
369
+ return now >= expiration - buffer_seconds
370
+
371
+
372
+ def _decode_token_unverified(token: str) -> dict:
373
+ try:
374
+ return jwt.decode(token, options={"verify_signature": False})
375
+ except jwt.DecodeError as exc:
376
+ raise mlrun.errors.MLRunInvalidArgumentError(
377
+ "Failed to decode offline token"
378
+ ) from exc
379
+ except Exception as exc:
380
+ raise mlrun.errors.MLRunInvalidArgumentError(
381
+ "Unexpected error decoding token"
382
+ ) from exc
383
+
384
+
385
+ def _translate_secret_tokens(
386
+ tokens_dict: dict[str, dict[str, typing.Any]], raise_on_error: bool = True
387
+ ) -> list[mlrun.common.schemas.SecretToken]:
388
+ """
389
+ Translate a dictionary of validated token data into SecretToken objects.
390
+
391
+ The dictionary is keyed by token name, with values containing token data
392
+ (including the token string). If an entry fails to translate, behavior depends
393
+ on ``raise_on_error``: raise an exception or log a warning.
394
+
395
+ :param tokens_dict: Dictionary of validated token data, keyed by token name.
396
+ :param raise_on_error: Whether to raise exceptions on translation errors.
397
+ :return: List of SecretToken objects created from the input dictionary.
398
+ :rtype: list[mlrun.common.schemas.SecretToken]
399
+ """
400
+ token_file = os.path.expanduser(mlconf.auth_with_oauth_token.token_file)
401
+ tokens = []
402
+ for token_name, token_data in tokens_dict.items():
403
+ try:
404
+ tokens.append(
405
+ mlrun.common.schemas.SecretToken(
406
+ name=token_name,
407
+ token=token_data["token"],
408
+ )
409
+ )
410
+ except Exception as exc:
411
+ mlrun.utils.helpers.raise_or_log_error(
412
+ f"Failed to create SecretToken from entry in {token_file}: {exc}",
413
+ raise_on_error,
414
+ )
415
+ return tokens
mlrun/common/constants.py CHANGED
@@ -27,9 +27,20 @@ DASK_LABEL_PREFIX = "dask.org/"
27
27
  NUCLIO_LABEL_PREFIX = "nuclio.io/"
28
28
  RESERVED_TAG_NAME_LATEST = "latest"
29
29
 
30
+ # Kubernetes DNS-1123 label name length limit
31
+ K8S_DNS_1123_LABEL_MAX_LENGTH = 63
32
+
33
+
34
+ RESERVED_BATCH_JOB_SUFFIX = "-batch"
35
+
30
36
  JOB_TYPE_WORKFLOW_RUNNER = "workflow-runner"
31
37
  JOB_TYPE_PROJECT_LOADER = "project-loader"
32
38
  JOB_TYPE_RERUN_WORKFLOW_RUNNER = "rerun-workflow-runner"
39
+ MLRUN_ACTIVE_PROJECT = "MLRUN_ACTIVE_PROJECT"
40
+
41
+ MLRUN_JOB_AUTH_SECRET_PATH = "/var/mlrun-secrets/auth"
42
+ MLRUN_JOB_AUTH_SECRET_FILE = ".igz.yml"
43
+ MLRUN_RUNTIME_AUTH_DEFAULT_TOKEN_NAME = "default"
33
44
 
34
45
 
35
46
  class MLRunInternalLabels:
@@ -92,6 +103,9 @@ class MLRunInternalLabels:
92
103
  workflow = "workflow"
93
104
  feature_vector = "feature-vector"
94
105
 
106
+ auth_username = f"{MLRUN_LABEL_PREFIX}user"
107
+ auth_token_name = f"{MLRUN_LABEL_PREFIX}token"
108
+
95
109
  @classmethod
96
110
  def all(cls):
97
111
  return [
@@ -14,6 +14,7 @@
14
14
 
15
15
  import sys
16
16
  import typing
17
+ from datetime import datetime
17
18
 
18
19
  import mlrun.common
19
20
  import mlrun.common.schemas.model_monitoring.constants as mm_constants
@@ -24,6 +25,7 @@ BinCounts = typing.NewType("BinCounts", list[int])
24
25
  BinEdges = typing.NewType("BinEdges", list[float])
25
26
 
26
27
  _MAX_FLOAT = sys.float_info.max
28
+ logger = mlrun.utils.create_logger(level="info", name="mm_helpers")
27
29
 
28
30
 
29
31
  def parse_model_endpoint_project_prefix(path: str, project_name: str):
@@ -50,6 +52,44 @@ def get_kafka_topic(project: str, function_name: typing.Optional[str] = None) ->
50
52
  )
51
53
 
52
54
 
55
+ # Constants for TimescaleDB database naming
56
+ TIMESCALEDB_DEFAULT_DB_PREFIX = "mlrun_mm"
57
+
58
+
59
+ def get_tsdb_database_name(profile_database: str) -> str:
60
+ """
61
+ Determine the TimescaleDB database name based on configuration.
62
+
63
+ When auto_create_database is enabled (default), generates a database name
64
+ using the system_id: 'mlrun_mm_{system_id}'.
65
+ When disabled, uses the database from the profile as-is.
66
+
67
+ This function is used by both TimescaleDBConnector (API server side) and
68
+ TimescaleDBStoreyTarget (stream side) to ensure consistent database naming.
69
+
70
+ :param profile_database: The database name from the PostgreSQL profile.
71
+ :return: The database name to use for TimescaleDB connections.
72
+ :raises MLRunInvalidArgumentError: If auto_create_database is enabled but
73
+ system_id is not set.
74
+ """
75
+ auto_create = mlrun.mlconf.model_endpoint_monitoring.tsdb.auto_create_database
76
+
77
+ if not auto_create:
78
+ return profile_database
79
+
80
+ # Auto-create mode: generate database name using system_id
81
+ if not mlrun.mlconf.system_id:
82
+ raise mlrun.errors.MLRunInvalidArgumentError(
83
+ "system_id is not set in mlrun.mlconf. "
84
+ "TimescaleDB requires system_id for auto-generating database name "
85
+ "when auto_create_database is enabled. "
86
+ "Either set system_id in MLRun configuration or disable auto_create_database "
87
+ "and provide an explicit database in the PostgreSQL connection string."
88
+ )
89
+
90
+ return f"{TIMESCALEDB_DEFAULT_DB_PREFIX}_{mlrun.mlconf.system_id}"
91
+
92
+
53
93
  def _get_counts(hist: Histogram) -> BinCounts:
54
94
  """Return the histogram counts"""
55
95
  return BinCounts(hist[0])
@@ -87,3 +127,86 @@ def pad_features_hist(feature_stats: FeatureStats) -> None:
87
127
  for feature in feature_stats.values():
88
128
  if hist_key in feature:
89
129
  pad_hist(Histogram(feature[hist_key]))
130
+
131
+
132
+ def get_model_endpoints_creation_task_status(
133
+ server,
134
+ ) -> tuple[
135
+ mlrun.common.schemas.BackgroundTaskState,
136
+ typing.Optional[datetime],
137
+ typing.Optional[set[str]],
138
+ ]:
139
+ background_task = None
140
+ background_task_state = mlrun.common.schemas.BackgroundTaskState.running
141
+ background_task_check_timestamp = None
142
+ model_endpoint_uids = None
143
+ try:
144
+ background_task = mlrun.get_run_db().get_project_background_task(
145
+ server.project, server.model_endpoint_creation_task_name
146
+ )
147
+ background_task_check_timestamp = mlrun.utils.now_date()
148
+ log_background_task_state(
149
+ server, background_task.status.state, background_task_check_timestamp
150
+ )
151
+ background_task_state = background_task.status.state
152
+ except mlrun.errors.MLRunNotFoundError:
153
+ logger.warning(
154
+ "Model endpoint creation task not found listing model endpoints",
155
+ project=server.project,
156
+ task_name=server.model_endpoint_creation_task_name,
157
+ )
158
+ if background_task is None:
159
+ if model_endpoints := mlrun.get_run_db().list_model_endpoints(
160
+ project=server.project,
161
+ function_name=server.function_name,
162
+ function_tag=server.function_tag,
163
+ tsdb_metrics=False,
164
+ ):
165
+ model_endpoint_uids = {
166
+ endpoint.metadata.uid for endpoint in model_endpoints.endpoints
167
+ }
168
+ logger.info(
169
+ "Model endpoints found after background task not found, model monitoring will monitor "
170
+ "events",
171
+ project=server.project,
172
+ function_name=server.function_name,
173
+ function_tag=server.function_tag,
174
+ uids=model_endpoint_uids,
175
+ )
176
+ background_task_state = mlrun.common.schemas.BackgroundTaskState.succeeded
177
+ else:
178
+ logger.warning(
179
+ "Model endpoints not found after background task not found, model monitoring will not "
180
+ "monitor events",
181
+ project=server.project,
182
+ function_name=server.function_name,
183
+ function_tag=server.function_tag,
184
+ )
185
+ background_task_state = mlrun.common.schemas.BackgroundTaskState.failed
186
+ return background_task_state, background_task_check_timestamp, model_endpoint_uids
187
+
188
+
189
+ def log_background_task_state(
190
+ server,
191
+ background_task_state: mlrun.common.schemas.BackgroundTaskState,
192
+ background_task_check_timestamp: typing.Optional[datetime],
193
+ ):
194
+ logger.info(
195
+ "Checking model endpoint creation task status",
196
+ task_name=server.model_endpoint_creation_task_name,
197
+ )
198
+ if (
199
+ background_task_state
200
+ in mlrun.common.schemas.BackgroundTaskState.terminal_states()
201
+ ):
202
+ logger.info(
203
+ f"Model endpoint creation task completed with state {background_task_state}"
204
+ )
205
+ else: # in progress
206
+ logger.info(
207
+ f"Model endpoint creation task is still in progress with the current state: "
208
+ f"{background_task_state}. Events will not be monitored for the next "
209
+ f"{mlrun.mlconf.model_endpoint_monitoring.model_endpoint_creation_check_period} seconds",
210
+ function_name=server.function_name,
211
+ background_task_check_timestamp=background_task_check_timestamp.isoformat(),
212
+ )
@@ -16,6 +16,7 @@ import typing
16
16
 
17
17
  import mlrun.common.constants as mlrun_constants
18
18
  import mlrun_pipelines.common.models
19
+ from mlrun.common.types import StrEnum
19
20
 
20
21
 
21
22
  class PodPhases:
@@ -365,3 +366,30 @@ class NuclioIngressAddTemplatedIngressModes:
365
366
  class FunctionEnvironmentVariables:
366
367
  _env_prefix = "MLRUN_"
367
368
  auth_session = f"{_env_prefix}AUTH_SESSION"
369
+
370
+
371
+ # Kubernetes probe types
372
+ class ProbeType(StrEnum):
373
+ READINESS = "readiness"
374
+ LIVENESS = "liveness"
375
+ STARTUP = "startup"
376
+
377
+ @property
378
+ def key(self):
379
+ return f"{self.value}Probe"
380
+
381
+ @classmethod
382
+ def is_valid(cls, value: str, raise_on_error: bool = False) -> bool:
383
+ valid_value = value in cls._value2member_map_
384
+ if not valid_value and raise_on_error:
385
+ raise ValueError(
386
+ f"Invalid probe type: {value}. Must be one of: {[p.value for p in ProbeType]}"
387
+ )
388
+ return valid_value
389
+
390
+
391
+ class ProbeTimeConfig(StrEnum):
392
+ INITIAL_DELAY_SECONDS = "initialDelaySeconds"
393
+ PERIOD_SECONDS = "periodSeconds"
394
+ TIMEOUT_SECONDS = "timeoutSeconds"
395
+ FAILURE_THRESHOLD = "failureThreshold"
@@ -43,6 +43,7 @@ from .artifact import (
43
43
  from .auth import (
44
44
  AuthInfo,
45
45
  AuthorizationAction,
46
+ AuthorizationResourceNamespace,
46
47
  AuthorizationResourceTypes,
47
48
  AuthorizationVerificationInput,
48
49
  Credentials,
@@ -65,7 +66,9 @@ from .common import ImageBuilder
65
66
  from .constants import (
66
67
  APIStates,
67
68
  ArtifactPartitionByField,
69
+ AuthorizationHeaderPrefixes,
68
70
  ClusterizationRole,
71
+ CookieNames,
69
72
  DeletionStrategy,
70
73
  FeatureStorePartitionByField,
71
74
  HeaderNames,
@@ -111,14 +114,18 @@ from .feature_store import (
111
114
  )
112
115
  from .frontend_spec import (
113
116
  ArtifactLimits,
114
- AuthenticationFeatureFlag,
115
117
  FeatureFlags,
116
118
  FrontendSpec,
117
119
  NuclioStreamsFeatureFlag,
118
120
  PreemptionNodesFeatureFlag,
119
121
  ProjectMembershipFeatureFlag,
120
122
  )
121
- from .function import FunctionState, PreemptionModes, SecurityContextEnrichmentModes
123
+ from .function import (
124
+ BatchingSpec,
125
+ FunctionState,
126
+ PreemptionModes,
127
+ SecurityContextEnrichmentModes,
128
+ )
122
129
  from .http import HTTPSessionRetryMode
123
130
  from .hub import (
124
131
  HubCatalog,
@@ -154,6 +161,7 @@ from .model_monitoring import (
154
161
  ModelEndpointSchema,
155
162
  ModelEndpointSpec,
156
163
  ModelEndpointStatus,
164
+ ModelMonitoringInfraLabel,
157
165
  ModelMonitoringMode,
158
166
  MonitoringFunctionNames,
159
167
  TSDBTarget,
@@ -211,10 +219,13 @@ from .schedule import (
211
219
  )
212
220
  from .secret import (
213
221
  AuthSecretData,
222
+ ListSecretTokensResponse,
214
223
  SecretKeysData,
215
224
  SecretProviderName,
216
225
  SecretsData,
217
- UserSecretCreationRequest,
226
+ SecretToken,
227
+ SecretTokenInfo,
228
+ StoreSecretTokensResponse,
218
229
  )
219
230
  from .serving import ModelRunnerStepData, ModelsData, MonitoringData
220
231
  from .tag import Tag, TagObjects