mlrun 1.10.0rc12__py3-none-any.whl → 1.10.0rc13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/artifacts/llm_prompt.py +5 -1
- mlrun/common/schemas/__init__.py +1 -1
- mlrun/common/schemas/model_monitoring/model_endpoints.py +1 -1
- mlrun/common/schemas/serving.py +7 -0
- mlrun/config.py +2 -0
- mlrun/datastore/model_provider/model_provider.py +81 -3
- mlrun/datastore/model_provider/openai_provider.py +52 -28
- mlrun/datastore/remote_client.py +11 -0
- mlrun/execution.py +5 -0
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +175 -8
- mlrun/projects/pipelines.py +40 -18
- mlrun/projects/project.py +5 -0
- mlrun/run.py +25 -2
- mlrun/serving/server.py +1 -0
- mlrun/serving/states.py +411 -21
- mlrun/serving/system_steps.py +6 -2
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.10.0rc12.dist-info → mlrun-1.10.0rc13.dist-info}/METADATA +1 -1
- {mlrun-1.10.0rc12.dist-info → mlrun-1.10.0rc13.dist-info}/RECORD +23 -23
- {mlrun-1.10.0rc12.dist-info → mlrun-1.10.0rc13.dist-info}/WHEEL +0 -0
- {mlrun-1.10.0rc12.dist-info → mlrun-1.10.0rc13.dist-info}/entry_points.txt +0 -0
- {mlrun-1.10.0rc12.dist-info → mlrun-1.10.0rc13.dist-info}/licenses/LICENSE +0 -0
- {mlrun-1.10.0rc12.dist-info → mlrun-1.10.0rc13.dist-info}/top_level.txt +0 -0
mlrun/artifacts/llm_prompt.py
CHANGED
|
@@ -61,7 +61,11 @@ class LLMPromptArtifactSpec(ArtifactSpec):
|
|
|
61
61
|
self.prompt_legend = prompt_legend
|
|
62
62
|
self.model_configuration = model_configuration
|
|
63
63
|
self.description = description
|
|
64
|
-
self._model_artifact =
|
|
64
|
+
self._model_artifact = (
|
|
65
|
+
model_artifact
|
|
66
|
+
if isinstance(model_artifact, model_art.ModelArtifact)
|
|
67
|
+
else None
|
|
68
|
+
)
|
|
65
69
|
|
|
66
70
|
@property
|
|
67
71
|
def model_uri(self):
|
mlrun/common/schemas/__init__.py
CHANGED
|
@@ -214,7 +214,7 @@ from .secret import (
|
|
|
214
214
|
SecretsData,
|
|
215
215
|
UserSecretCreationRequest,
|
|
216
216
|
)
|
|
217
|
-
from .serving import ModelRunnerStepData, MonitoringData
|
|
217
|
+
from .serving import ModelRunnerStepData, ModelsData, MonitoringData
|
|
218
218
|
from .tag import Tag, TagObjects
|
|
219
219
|
from .workflow import (
|
|
220
220
|
GetWorkflowResponse,
|
|
@@ -336,8 +336,8 @@ class ModelEndpointMonitoringMetricNoData(_ModelEndpointMonitoringMetricValuesBa
|
|
|
336
336
|
|
|
337
337
|
class ApplicationBaseRecord(BaseModel):
|
|
338
338
|
type: Literal["metric", "result"]
|
|
339
|
-
time: datetime
|
|
340
339
|
value: float
|
|
340
|
+
time: Optional[datetime] = None
|
|
341
341
|
|
|
342
342
|
|
|
343
343
|
class ApplicationResultRecord(ApplicationBaseRecord):
|
mlrun/common/schemas/serving.py
CHANGED
|
@@ -12,6 +12,8 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
+
import enum
|
|
16
|
+
|
|
15
17
|
from pydantic.v1 import BaseModel
|
|
16
18
|
|
|
17
19
|
from mlrun.common.types import StrEnum
|
|
@@ -40,3 +42,8 @@ class MonitoringData(StrEnum):
|
|
|
40
42
|
MODEL_PATH = "model_path"
|
|
41
43
|
MODEL_ENDPOINT_UID = "model_endpoint_uid"
|
|
42
44
|
MODEL_CLASS = "model_class"
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class ModelsData(enum.Enum):
|
|
48
|
+
MODEL_CLASS = 0
|
|
49
|
+
MODEL_PARAMETERS = 1
|
mlrun/config.py
CHANGED
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
from collections.abc import Awaitable
|
|
15
|
-
from typing import Callable, Optional, TypeVar
|
|
15
|
+
from typing import Callable, Optional, TypeVar, Union
|
|
16
16
|
|
|
17
17
|
import mlrun.errors
|
|
18
18
|
from mlrun.datastore.remote_client import (
|
|
@@ -23,6 +23,23 @@ T = TypeVar("T")
|
|
|
23
23
|
|
|
24
24
|
|
|
25
25
|
class ModelProvider(BaseRemoteClient):
|
|
26
|
+
"""
|
|
27
|
+
The ModelProvider class is an abstract base for integrating with external
|
|
28
|
+
model providers, primarily generative AI (GenAI) services.
|
|
29
|
+
|
|
30
|
+
Designed to be subclassed, it defines a consistent interface and shared
|
|
31
|
+
functionality for tasks such as text generation, embeddings, and invoking
|
|
32
|
+
fine-tuned models. Subclasses should implement provider-specific logic,
|
|
33
|
+
including SDK client initialization, model invocation, and custom operations.
|
|
34
|
+
|
|
35
|
+
Key Features:
|
|
36
|
+
- Establishes a consistent, reusable client management for model provider integrations.
|
|
37
|
+
- Simplifies GenAI service integration by abstracting common operations.
|
|
38
|
+
- Reduces duplication through shared components for common tasks.
|
|
39
|
+
- Holds default invocation parameters (e.g., temperature, max_tokens) to avoid boilerplate
|
|
40
|
+
code and promote consistency.
|
|
41
|
+
"""
|
|
42
|
+
|
|
26
43
|
support_async = False
|
|
27
44
|
|
|
28
45
|
def __init__(
|
|
@@ -44,9 +61,65 @@ class ModelProvider(BaseRemoteClient):
|
|
|
44
61
|
self._default_async_operation = None
|
|
45
62
|
|
|
46
63
|
def load_client(self) -> None:
|
|
64
|
+
"""
|
|
65
|
+
Initializes the SDK client for the model provider with the given keyword arguments
|
|
66
|
+
and assigns it to an instance attribute (e.g., self._client).
|
|
67
|
+
|
|
68
|
+
Subclasses should override this method to:
|
|
69
|
+
- Create and configure the provider-specific client instance.
|
|
70
|
+
- Assign the client instance to self._client.
|
|
71
|
+
- Define a default operation callable (e.g., a method to invoke model completions)
|
|
72
|
+
and assign it to self._default_operation.
|
|
73
|
+
"""
|
|
74
|
+
|
|
47
75
|
raise NotImplementedError("load_client method is not implemented")
|
|
48
76
|
|
|
49
|
-
def invoke(
|
|
77
|
+
def invoke(
|
|
78
|
+
self,
|
|
79
|
+
messages: Optional[list[dict]] = None,
|
|
80
|
+
as_str: bool = False,
|
|
81
|
+
**invoke_kwargs,
|
|
82
|
+
) -> Optional[Union[str, T]]:
|
|
83
|
+
"""
|
|
84
|
+
Invokes a generative AI model with the provided messages and additional parameters.
|
|
85
|
+
This method is designed to be a flexible interface for interacting with various
|
|
86
|
+
generative AI backends (e.g., OpenAI, Hugging Face, etc.). It allows users to send
|
|
87
|
+
a list of messages (following a standardized format) and receive a response. The
|
|
88
|
+
response can be returned as plain text or in its full structured format, depending
|
|
89
|
+
on the `as_str` parameter.
|
|
90
|
+
|
|
91
|
+
:param messages: A list of dictionaries representing the conversation history or input messages.
|
|
92
|
+
Each dictionary should follow the format::
|
|
93
|
+
{"role": "system"| "user" | "assistant" ..., "content": "Message content as a string"}
|
|
94
|
+
Example:
|
|
95
|
+
|
|
96
|
+
.. code-block:: json
|
|
97
|
+
|
|
98
|
+
[
|
|
99
|
+
{"role": "system", "content": "You are a helpful assistant."},
|
|
100
|
+
{"role": "user", "content": "What is the capital of France?"}
|
|
101
|
+
]
|
|
102
|
+
|
|
103
|
+
This format is consistent across all backends. Defaults to None if no messages
|
|
104
|
+
are provided.
|
|
105
|
+
|
|
106
|
+
:param as_str: A boolean flag indicating whether to return the response as a plain string.
|
|
107
|
+
- If True, the function extracts and returns the main content of the first
|
|
108
|
+
response.
|
|
109
|
+
- If False, the function returns the full response object,
|
|
110
|
+
which may include additional metadata or multiple response options.
|
|
111
|
+
Defaults to False.
|
|
112
|
+
|
|
113
|
+
:param invoke_kwargs:
|
|
114
|
+
Additional keyword arguments to be passed to the underlying model API call.
|
|
115
|
+
These can include parameters such as temperature, max tokens, etc.,
|
|
116
|
+
depending on the capabilities of the specific backend being used.
|
|
117
|
+
|
|
118
|
+
:return:
|
|
119
|
+
- If `as_str` is True: Returns the main content of the first response as a string.
|
|
120
|
+
- If `as_str` is False: Returns the full response object.
|
|
121
|
+
|
|
122
|
+
"""
|
|
50
123
|
raise NotImplementedError("invoke method is not implemented")
|
|
51
124
|
|
|
52
125
|
def customized_invoke(
|
|
@@ -78,5 +151,10 @@ class ModelProvider(BaseRemoteClient):
|
|
|
78
151
|
async def async_customized_invoke(self, **kwargs):
|
|
79
152
|
raise NotImplementedError("async_customized_invoke is not implemented")
|
|
80
153
|
|
|
81
|
-
async def async_invoke(
|
|
154
|
+
async def async_invoke(
|
|
155
|
+
self,
|
|
156
|
+
messages: Optional[list[dict]] = None,
|
|
157
|
+
as_str: bool = False,
|
|
158
|
+
**invoke_kwargs,
|
|
159
|
+
) -> Awaitable[str]:
|
|
82
160
|
raise NotImplementedError("async_invoke is not implemented")
|
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
from typing import Callable, Optional, TypeVar
|
|
15
|
+
from typing import Callable, Optional, TypeVar, Union
|
|
16
16
|
|
|
17
17
|
import mlrun
|
|
18
18
|
from mlrun.datastore.model_provider.model_provider import ModelProvider
|
|
@@ -21,6 +21,18 @@ T = TypeVar("T")
|
|
|
21
21
|
|
|
22
22
|
|
|
23
23
|
class OpenAIProvider(ModelProvider):
|
|
24
|
+
"""
|
|
25
|
+
OpenAIProvider is a wrapper around the OpenAI SDK that provides an interface
|
|
26
|
+
for interacting with OpenAI's generative AI services.
|
|
27
|
+
|
|
28
|
+
It supports both synchronous and asynchronous operations, allowing flexible
|
|
29
|
+
integration into various workflows.
|
|
30
|
+
|
|
31
|
+
This class extends the ModelProvider base class and implements OpenAI-specific
|
|
32
|
+
functionality, including client initialization, model invocation, and custom
|
|
33
|
+
operations tailored to the OpenAI API.
|
|
34
|
+
"""
|
|
35
|
+
|
|
24
36
|
def __init__(
|
|
25
37
|
self,
|
|
26
38
|
parent,
|
|
@@ -59,6 +71,19 @@ class OpenAIProvider(ModelProvider):
|
|
|
59
71
|
return self.endpoint
|
|
60
72
|
|
|
61
73
|
def load_client(self) -> None:
|
|
74
|
+
"""
|
|
75
|
+
Initializes the OpenAI SDK client using the provided options.
|
|
76
|
+
|
|
77
|
+
This method imports the `OpenAI` class from the `openai` package, instantiates
|
|
78
|
+
a client with the given keyword arguments (`self.options`), and assigns it to
|
|
79
|
+
`self._client`.
|
|
80
|
+
|
|
81
|
+
It also sets the default operation to `self.client.chat.completions.create`, which is
|
|
82
|
+
typically used for invoking chat-based model completions.
|
|
83
|
+
|
|
84
|
+
Raises:
|
|
85
|
+
ImportError: If the `openai` package is not installed.
|
|
86
|
+
"""
|
|
62
87
|
try:
|
|
63
88
|
from openai import OpenAI # noqa
|
|
64
89
|
|
|
@@ -87,34 +112,33 @@ class OpenAIProvider(ModelProvider):
|
|
|
87
112
|
else:
|
|
88
113
|
return self._default_operation(**invoke_kwargs, model=self.model)
|
|
89
114
|
|
|
90
|
-
def
|
|
91
|
-
self,
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
elif prompt:
|
|
101
|
-
messages = [
|
|
102
|
-
{
|
|
103
|
-
"role": "user",
|
|
104
|
-
"content": prompt,
|
|
105
|
-
},
|
|
106
|
-
]
|
|
107
|
-
else:
|
|
108
|
-
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
109
|
-
"must provide 'messages' or 'prompt' to invoke"
|
|
110
|
-
)
|
|
111
|
-
return messages, invoke_kwargs
|
|
115
|
+
def invoke(
|
|
116
|
+
self,
|
|
117
|
+
messages: Optional[list[dict]] = None,
|
|
118
|
+
as_str: bool = False,
|
|
119
|
+
**invoke_kwargs,
|
|
120
|
+
) -> Optional[Union[str, T]]:
|
|
121
|
+
"""
|
|
122
|
+
OpenAI-specific implementation of `ModelProvider.invoke`.
|
|
123
|
+
Invokes an OpenAI model operation using the sync client.
|
|
124
|
+
For full details, see `ModelProvider.invoke`.
|
|
112
125
|
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
126
|
+
:param messages: Same as ModelProvider.invoke.
|
|
127
|
+
|
|
128
|
+
:param as_str: bool
|
|
129
|
+
If `True`, returns only the main content of the first response
|
|
130
|
+
(`response.choices[0].message.content`).
|
|
131
|
+
If `False`, returns the full response object, whose type depends on
|
|
132
|
+
the specific OpenAI SDK operation used (e.g., chat completion, completion, etc.).
|
|
133
|
+
|
|
134
|
+
:param invoke_kwargs:
|
|
135
|
+
Same as ModelProvider.invoke.
|
|
136
|
+
|
|
137
|
+
"""
|
|
138
|
+
invoke_kwargs = self.get_invoke_kwargs(invoke_kwargs)
|
|
117
139
|
response = self._default_operation(
|
|
118
140
|
model=self.endpoint, messages=messages, **invoke_kwargs
|
|
119
141
|
)
|
|
120
|
-
|
|
142
|
+
if as_str:
|
|
143
|
+
return response.choices[0].message.content
|
|
144
|
+
return response
|
mlrun/datastore/remote_client.py
CHANGED
|
@@ -18,6 +18,17 @@ import mlrun
|
|
|
18
18
|
|
|
19
19
|
|
|
20
20
|
class BaseRemoteClient:
|
|
21
|
+
"""
|
|
22
|
+
The BaseRemoteClient class serves as a foundational component for managing
|
|
23
|
+
secrets and configurations.
|
|
24
|
+
It is designed to be extended by subclasses that interact with external services,
|
|
25
|
+
such as file systems (e.g., Datastore) or model providers (e.g., ModelProvider).
|
|
26
|
+
|
|
27
|
+
This class is intended to provide shared functionality and should not be
|
|
28
|
+
used directly. Instead, create a subclass to implement logic specific to
|
|
29
|
+
your use case, such as interactions with S3 storage or invoking model providers like OpenAI.
|
|
30
|
+
"""
|
|
31
|
+
|
|
21
32
|
def __init__(self, parent, kind, name, endpoint="", secrets: Optional[dict] = None):
|
|
22
33
|
self._parent = parent
|
|
23
34
|
self.kind = kind
|
mlrun/execution.py
CHANGED
|
@@ -961,6 +961,11 @@ class MLClientCtx:
|
|
|
961
961
|
:returns: The logged `LLMPromptArtifact` object.
|
|
962
962
|
"""
|
|
963
963
|
|
|
964
|
+
if not prompt_string and not prompt_path:
|
|
965
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
966
|
+
"Either 'prompt_string' or 'prompt_path' must be provided"
|
|
967
|
+
)
|
|
968
|
+
|
|
964
969
|
llm_prompt = LLMPromptArtifact(
|
|
965
970
|
key=key,
|
|
966
971
|
project=self.project or "",
|
|
@@ -804,25 +804,45 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
804
804
|
@staticmethod
|
|
805
805
|
def _get_sql_query(
|
|
806
806
|
*,
|
|
807
|
-
endpoint_id: str,
|
|
808
807
|
table_path: str,
|
|
808
|
+
endpoint_id: Optional[str] = None,
|
|
809
|
+
application_names: Optional[list[str]] = None,
|
|
809
810
|
name: str = mm_schemas.ResultData.RESULT_NAME,
|
|
810
811
|
metric_and_app_names: Optional[list[tuple[str, str]]] = None,
|
|
811
812
|
columns: Optional[list[str]] = None,
|
|
813
|
+
group_by_columns: Optional[list[str]] = None,
|
|
812
814
|
) -> str:
|
|
813
815
|
"""Get the SQL query for the results/metrics table"""
|
|
816
|
+
|
|
817
|
+
if metric_and_app_names and not endpoint_id:
|
|
818
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
819
|
+
"If metric_and_app_names is provided, endpoint_id must also be provided"
|
|
820
|
+
)
|
|
821
|
+
|
|
822
|
+
if metric_and_app_names and application_names:
|
|
823
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
824
|
+
"Cannot provide both metric_and_app_names and application_names"
|
|
825
|
+
)
|
|
826
|
+
|
|
814
827
|
if columns:
|
|
815
828
|
selection = ",".join(columns)
|
|
816
829
|
else:
|
|
817
830
|
selection = "*"
|
|
818
831
|
|
|
819
832
|
with StringIO() as query:
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
|
|
833
|
+
where_added = False
|
|
834
|
+
query.write(f"SELECT {selection} FROM '{table_path}'")
|
|
835
|
+
if endpoint_id:
|
|
836
|
+
query.write(
|
|
837
|
+
f" WHERE {mm_schemas.WriterEvent.ENDPOINT_ID}='{endpoint_id}'"
|
|
838
|
+
)
|
|
839
|
+
where_added = True
|
|
824
840
|
if metric_and_app_names:
|
|
825
|
-
|
|
841
|
+
if where_added:
|
|
842
|
+
query.write(" AND (")
|
|
843
|
+
else:
|
|
844
|
+
query.write(" WHERE (")
|
|
845
|
+
where_added = True
|
|
826
846
|
|
|
827
847
|
for i, (app_name, result_name) in enumerate(metric_and_app_names):
|
|
828
848
|
sub_cond = (
|
|
@@ -835,6 +855,22 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
835
855
|
|
|
836
856
|
query.write(")")
|
|
837
857
|
|
|
858
|
+
if application_names:
|
|
859
|
+
if where_added:
|
|
860
|
+
query.write(" AND (")
|
|
861
|
+
else:
|
|
862
|
+
query.write(" WHERE (")
|
|
863
|
+
for i, app_name in enumerate(application_names):
|
|
864
|
+
sub_cond = f"{mm_schemas.WriterEvent.APPLICATION_NAME}='{app_name}'"
|
|
865
|
+
if i != 0: # not first sub condition
|
|
866
|
+
query.write(" OR ")
|
|
867
|
+
query.write(sub_cond)
|
|
868
|
+
query.write(")")
|
|
869
|
+
|
|
870
|
+
if group_by_columns:
|
|
871
|
+
query.write(" GROUP BY ")
|
|
872
|
+
query.write(",".join(group_by_columns))
|
|
873
|
+
|
|
838
874
|
query.write(";")
|
|
839
875
|
return query.getvalue()
|
|
840
876
|
|
|
@@ -1272,7 +1308,49 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
1272
1308
|
end: Optional[Union[datetime, str]] = None,
|
|
1273
1309
|
application_names: Optional[Union[str, list[str]]] = None,
|
|
1274
1310
|
) -> dict[str, int]:
|
|
1275
|
-
|
|
1311
|
+
start, end = get_start_end(start=start, end=end, delta=timedelta(hours=24))
|
|
1312
|
+
group_by_columns = [
|
|
1313
|
+
mm_schemas.ApplicationEvent.APPLICATION_NAME,
|
|
1314
|
+
mm_schemas.ApplicationEvent.ENDPOINT_ID,
|
|
1315
|
+
]
|
|
1316
|
+
|
|
1317
|
+
def get_application_endpoints_records(
|
|
1318
|
+
record_type: Literal["metrics", "results"],
|
|
1319
|
+
):
|
|
1320
|
+
if record_type == "results":
|
|
1321
|
+
table_path = self.tables[mm_schemas.V3IOTSDBTables.APP_RESULTS]
|
|
1322
|
+
else:
|
|
1323
|
+
table_path = self.tables[mm_schemas.V3IOTSDBTables.METRICS]
|
|
1324
|
+
sql_query = self._get_sql_query(
|
|
1325
|
+
table_path=table_path,
|
|
1326
|
+
columns=[mm_schemas.WriterEvent.START_INFER_TIME],
|
|
1327
|
+
group_by_columns=group_by_columns,
|
|
1328
|
+
application_names=application_names,
|
|
1329
|
+
)
|
|
1330
|
+
return self.frames_client.read(
|
|
1331
|
+
backend=_TSDB_BE,
|
|
1332
|
+
start=start,
|
|
1333
|
+
end=end,
|
|
1334
|
+
query=sql_query,
|
|
1335
|
+
)
|
|
1336
|
+
|
|
1337
|
+
df_results = get_application_endpoints_records("results")
|
|
1338
|
+
df_metrics = get_application_endpoints_records("metrics")
|
|
1339
|
+
|
|
1340
|
+
if df_results.empty and df_metrics.empty:
|
|
1341
|
+
return {}
|
|
1342
|
+
|
|
1343
|
+
# Combine the two dataframes and count unique endpoints per application
|
|
1344
|
+
combined_df = pd.concat([df_results, df_metrics], ignore_index=True)
|
|
1345
|
+
if combined_df.empty:
|
|
1346
|
+
return {}
|
|
1347
|
+
combined_df.drop_duplicates(subset=group_by_columns, inplace=True)
|
|
1348
|
+
|
|
1349
|
+
grouped_df = combined_df.groupby(
|
|
1350
|
+
mm_schemas.WriterEvent.APPLICATION_NAME
|
|
1351
|
+
).count()
|
|
1352
|
+
|
|
1353
|
+
return grouped_df[mm_schemas.WriterEvent.ENDPOINT_ID].to_dict()
|
|
1276
1354
|
|
|
1277
1355
|
def calculate_latest_metrics(
|
|
1278
1356
|
self,
|
|
@@ -1282,4 +1360,93 @@ class V3IOTSDBConnector(TSDBConnector):
|
|
|
1282
1360
|
) -> list[
|
|
1283
1361
|
Union[mm_schemas.ApplicationResultRecord, mm_schemas.ApplicationMetricRecord]
|
|
1284
1362
|
]:
|
|
1285
|
-
|
|
1363
|
+
metric_list = []
|
|
1364
|
+
start, end = get_start_end(start=start, end=end, delta=timedelta(hours=24))
|
|
1365
|
+
|
|
1366
|
+
# Get the latest results
|
|
1367
|
+
def get_latest_metrics_records(
|
|
1368
|
+
record_type: Literal["metrics", "results"],
|
|
1369
|
+
) -> pd.DataFrame:
|
|
1370
|
+
group_by_columns = [mm_schemas.ApplicationEvent.APPLICATION_NAME]
|
|
1371
|
+
if record_type == "results":
|
|
1372
|
+
table_path = self.tables[mm_schemas.V3IOTSDBTables.APP_RESULTS]
|
|
1373
|
+
columns = [
|
|
1374
|
+
f"last({mm_schemas.ResultData.RESULT_STATUS})",
|
|
1375
|
+
f"last({mm_schemas.ResultData.RESULT_VALUE})",
|
|
1376
|
+
f"last({mm_schemas.ResultData.RESULT_KIND})",
|
|
1377
|
+
]
|
|
1378
|
+
group_by_columns += [
|
|
1379
|
+
mm_schemas.ResultData.RESULT_NAME,
|
|
1380
|
+
]
|
|
1381
|
+
else:
|
|
1382
|
+
table_path = self.tables[mm_schemas.V3IOTSDBTables.METRICS]
|
|
1383
|
+
columns = [f"last({mm_schemas.MetricData.METRIC_VALUE})"]
|
|
1384
|
+
group_by_columns += [
|
|
1385
|
+
mm_schemas.MetricData.METRIC_NAME,
|
|
1386
|
+
]
|
|
1387
|
+
sql_query = self._get_sql_query(
|
|
1388
|
+
table_path=table_path,
|
|
1389
|
+
columns=columns,
|
|
1390
|
+
group_by_columns=group_by_columns,
|
|
1391
|
+
application_names=application_names,
|
|
1392
|
+
)
|
|
1393
|
+
|
|
1394
|
+
return self.frames_client.read(
|
|
1395
|
+
backend=_TSDB_BE,
|
|
1396
|
+
start=start,
|
|
1397
|
+
end=end,
|
|
1398
|
+
query=sql_query,
|
|
1399
|
+
)
|
|
1400
|
+
|
|
1401
|
+
df_results = get_latest_metrics_records("results")
|
|
1402
|
+
df_metrics = get_latest_metrics_records("metrics")
|
|
1403
|
+
|
|
1404
|
+
if df_results.empty and df_metrics.empty:
|
|
1405
|
+
return metric_list
|
|
1406
|
+
|
|
1407
|
+
# Convert the results DataFrame to a list of ApplicationResultRecord
|
|
1408
|
+
def build_metric_objects() -> (
|
|
1409
|
+
list[
|
|
1410
|
+
Union[
|
|
1411
|
+
mm_schemas.ApplicationResultRecord,
|
|
1412
|
+
mm_schemas.ApplicationMetricRecord,
|
|
1413
|
+
]
|
|
1414
|
+
]
|
|
1415
|
+
):
|
|
1416
|
+
metric_objects = []
|
|
1417
|
+
if not df_results.empty:
|
|
1418
|
+
df_results.rename(
|
|
1419
|
+
columns={
|
|
1420
|
+
f"last({mm_schemas.ResultData.RESULT_VALUE})": mm_schemas.ResultData.RESULT_VALUE,
|
|
1421
|
+
f"last({mm_schemas.ResultData.RESULT_STATUS})": mm_schemas.ResultData.RESULT_STATUS,
|
|
1422
|
+
f"last({mm_schemas.ResultData.RESULT_KIND})": mm_schemas.ResultData.RESULT_KIND,
|
|
1423
|
+
},
|
|
1424
|
+
inplace=True,
|
|
1425
|
+
)
|
|
1426
|
+
for _, row in df_results.iterrows():
|
|
1427
|
+
metric_objects.append(
|
|
1428
|
+
mm_schemas.ApplicationResultRecord(
|
|
1429
|
+
result_name=row[mm_schemas.ResultData.RESULT_NAME],
|
|
1430
|
+
kind=row[mm_schemas.ResultData.RESULT_KIND],
|
|
1431
|
+
status=row[mm_schemas.ResultData.RESULT_STATUS],
|
|
1432
|
+
value=row[mm_schemas.ResultData.RESULT_VALUE],
|
|
1433
|
+
)
|
|
1434
|
+
)
|
|
1435
|
+
if not df_metrics.empty:
|
|
1436
|
+
df_metrics.rename(
|
|
1437
|
+
columns={
|
|
1438
|
+
f"last({mm_schemas.MetricData.METRIC_VALUE})": mm_schemas.MetricData.METRIC_VALUE,
|
|
1439
|
+
},
|
|
1440
|
+
inplace=True,
|
|
1441
|
+
)
|
|
1442
|
+
|
|
1443
|
+
for _, row in df_metrics.iterrows():
|
|
1444
|
+
metric_objects.append(
|
|
1445
|
+
mm_schemas.ApplicationMetricRecord(
|
|
1446
|
+
metric_name=row[mm_schemas.MetricData.METRIC_NAME],
|
|
1447
|
+
value=row[mm_schemas.MetricData.METRIC_VALUE],
|
|
1448
|
+
)
|
|
1449
|
+
)
|
|
1450
|
+
return metric_objects
|
|
1451
|
+
|
|
1452
|
+
return build_metric_objects()
|
mlrun/projects/pipelines.py
CHANGED
|
@@ -1081,34 +1081,56 @@ def rerun_workflow(
|
|
|
1081
1081
|
:param run_uid: The run UID of the original workflow to retry.
|
|
1082
1082
|
:param project_name: The project name.
|
|
1083
1083
|
"""
|
|
1084
|
+
db = mlrun.get_run_db()
|
|
1084
1085
|
|
|
1085
1086
|
try:
|
|
1086
|
-
#
|
|
1087
|
-
|
|
1088
|
-
# Retry the pipeline - TODO: add submit-direct flag when created
|
|
1089
|
-
db = mlrun.get_run_db()
|
|
1087
|
+
# Invoke the KFP retry endpoint (direct-submit mode)
|
|
1090
1088
|
new_pipeline_id = db.retry_pipeline(
|
|
1091
|
-
run_uid,
|
|
1089
|
+
run_id=run_uid,
|
|
1090
|
+
project=project_name,
|
|
1091
|
+
submit_mode=mlrun_constants.WorkflowSubmitMode.direct,
|
|
1092
|
+
)
|
|
1093
|
+
logger.info(
|
|
1094
|
+
"KFP retry submitted",
|
|
1095
|
+
new_pipeline_id=new_pipeline_id,
|
|
1096
|
+
rerun_of_workflow=run_uid,
|
|
1092
1097
|
)
|
|
1093
1098
|
|
|
1094
|
-
|
|
1095
|
-
|
|
1096
|
-
|
|
1099
|
+
except mlrun.errors.MLRunHTTPError as http_exc:
|
|
1100
|
+
logger.error(
|
|
1101
|
+
"Failed calling KFP retry API",
|
|
1102
|
+
run_id=run_uid,
|
|
1103
|
+
error=err_to_str(http_exc),
|
|
1097
1104
|
)
|
|
1098
|
-
|
|
1105
|
+
raise
|
|
1099
1106
|
|
|
1100
|
-
|
|
1107
|
+
# Enqueue "running" notifications server-side for this RerunRunner run
|
|
1108
|
+
db.push_run_notifications(context.uid, project_name)
|
|
1101
1109
|
|
|
1102
|
-
|
|
1103
|
-
|
|
1110
|
+
context.set_label(mlrun_constants.MLRunInternalLabels.workflow_id, new_pipeline_id)
|
|
1111
|
+
context.update_run()
|
|
1112
|
+
|
|
1113
|
+
context.log_result("workflow_id", new_pipeline_id)
|
|
1114
|
+
|
|
1115
|
+
try:
|
|
1116
|
+
pipeline = wait_for_pipeline_completion(
|
|
1104
1117
|
new_pipeline_id,
|
|
1105
1118
|
project=project_name,
|
|
1106
1119
|
)
|
|
1107
|
-
|
|
1108
|
-
# Temporary exception
|
|
1109
1120
|
except Exception as exc:
|
|
1110
|
-
|
|
1111
|
-
|
|
1121
|
+
mlrun.utils.logger.error(
|
|
1122
|
+
"Failed waiting for workflow completion",
|
|
1123
|
+
rerun_pipeline_id=new_pipeline_id,
|
|
1124
|
+
exc=err_to_str(exc),
|
|
1125
|
+
)
|
|
1126
|
+
else:
|
|
1127
|
+
final_state = pipeline["run"]["status"]
|
|
1128
|
+
context.log_result("workflow_state", final_state, commit=True)
|
|
1129
|
+
|
|
1130
|
+
if final_state != mlrun_pipelines.common.models.RunStatuses.succeeded:
|
|
1131
|
+
raise mlrun.errors.MLRunRuntimeError(
|
|
1132
|
+
f"Pipeline retry of {run_uid} finished in state={final_state}"
|
|
1133
|
+
)
|
|
1112
1134
|
|
|
1113
1135
|
|
|
1114
1136
|
def load_and_run(context, *args, **kwargs):
|
|
@@ -1201,13 +1223,13 @@ def load_and_run_workflow(
|
|
|
1201
1223
|
start_notifications = [
|
|
1202
1224
|
notification
|
|
1203
1225
|
for notification in context.get_notifications(unmask_secret_params=True)
|
|
1204
|
-
if
|
|
1226
|
+
if mlrun.common.runtimes.constants.RunStates.running in notification.when
|
|
1205
1227
|
]
|
|
1206
1228
|
|
|
1207
1229
|
# Prevent redundant notifications for run completion by ensuring that notifications are only triggered when the run
|
|
1208
1230
|
# reaches the "running" state, as the server already handles the completion notifications.
|
|
1209
1231
|
for notification in start_notifications:
|
|
1210
|
-
notification.when = [
|
|
1232
|
+
notification.when = [mlrun.common.runtimes.constants.RunStates.running]
|
|
1211
1233
|
|
|
1212
1234
|
workflow_log_message = workflow_name or workflow_path
|
|
1213
1235
|
context.logger.info(
|
mlrun/projects/project.py
CHANGED
|
@@ -1942,6 +1942,11 @@ class MlrunProject(ModelObj):
|
|
|
1942
1942
|
:returns: The logged `LLMPromptArtifact` object.
|
|
1943
1943
|
"""
|
|
1944
1944
|
|
|
1945
|
+
if not prompt_string and not prompt_path:
|
|
1946
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
1947
|
+
"Either 'prompt_string' or 'prompt_path' must be provided"
|
|
1948
|
+
)
|
|
1949
|
+
|
|
1945
1950
|
llm_prompt = LLMPromptArtifact(
|
|
1946
1951
|
key=key,
|
|
1947
1952
|
project=self.name,
|