genesis-flow 1.0.0__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {genesis_flow-1.0.0.dist-info → genesis_flow-1.0.3.dist-info}/METADATA +32 -2
- {genesis_flow-1.0.0.dist-info → genesis_flow-1.0.3.dist-info}/RECORD +27 -24
- mlflow/data/dataset_source_registry.py +8 -0
- mlflow/gateway/providers/bedrock.py +298 -0
- mlflow/genai/datasets/databricks_evaluation_dataset_source.py +77 -0
- mlflow/genai/datasets/evaluation_dataset.py +8 -5
- mlflow/genai/scorers/base.py +22 -14
- mlflow/langchain/utils/chat.py +10 -0
- mlflow/models/container/__init__.py +2 -2
- mlflow/spark/__init__.py +1286 -0
- mlflow/store/artifact/azure_blob_artifact_repo.py +1 -1
- mlflow/store/artifact/azure_data_lake_artifact_repo.py +1 -1
- mlflow/store/artifact/gcs_artifact_repo.py +1 -1
- mlflow/store/artifact/local_artifact_repo.py +2 -1
- mlflow/store/artifact/s3_artifact_repo.py +173 -3
- mlflow/tracing/client.py +139 -49
- mlflow/tracing/export/mlflow_v3.py +8 -11
- mlflow/tracing/provider.py +5 -1
- mlflow/tracking/_model_registry/client.py +5 -1
- mlflow/utils/file_utils.py +2 -1
- mlflow/utils/rest_utils.py +4 -0
- mlflow/utils/secure_loading.py +62 -8
- mlflow/version.py +2 -2
- {genesis_flow-1.0.0.dist-info → genesis_flow-1.0.3.dist-info}/WHEEL +0 -0
- {genesis_flow-1.0.0.dist-info → genesis_flow-1.0.3.dist-info}/entry_points.txt +0 -0
- {genesis_flow-1.0.0.dist-info → genesis_flow-1.0.3.dist-info}/licenses/LICENSE.txt +0 -0
- {genesis_flow-1.0.0.dist-info → genesis_flow-1.0.3.dist-info}/top_level.txt +0 -0
@@ -18,7 +18,6 @@ from mlflow.tracing.fluent import _EVAL_REQUEST_ID_TO_TRACE_ID, _set_last_active
|
|
18
18
|
from mlflow.tracing.trace_manager import InMemoryTraceManager
|
19
19
|
from mlflow.tracing.utils import add_size_stats_to_trace_metadata, maybe_get_request_id
|
20
20
|
from mlflow.utils.databricks_utils import is_in_databricks_notebook
|
21
|
-
from mlflow.utils.uri import is_databricks_uri
|
22
21
|
|
23
22
|
_logger = logging.getLogger(__name__)
|
24
23
|
|
@@ -30,13 +29,15 @@ class MlflowV3SpanExporter(SpanExporter):
|
|
30
29
|
"""
|
31
30
|
|
32
31
|
def __init__(self, tracking_uri: Optional[str] = None):
|
33
|
-
self._client = TracingClient(tracking_uri)
|
34
32
|
self._is_async_enabled = self._should_enable_async_logging()
|
35
33
|
if self._is_async_enabled:
|
36
34
|
self._async_queue = AsyncTraceExportQueue()
|
35
|
+
self._client = TracingClient(tracking_uri)
|
37
36
|
|
38
|
-
#
|
39
|
-
self.
|
37
|
+
# Only display traces inline in Databricks notebooks
|
38
|
+
self._should_display_trace = is_in_databricks_notebook()
|
39
|
+
if self._should_display_trace:
|
40
|
+
self._display_handler = get_display_handler()
|
40
41
|
|
41
42
|
def export(self, spans: Sequence[ReadableSpan]):
|
42
43
|
"""
|
@@ -64,7 +65,7 @@ class MlflowV3SpanExporter(SpanExporter):
|
|
64
65
|
if eval_request_id := trace.info.tags.get(TraceTagKey.EVAL_REQUEST_ID):
|
65
66
|
_EVAL_REQUEST_ID_TO_TRACE_ID[eval_request_id] = trace.info.trace_id
|
66
67
|
|
67
|
-
if not maybe_get_request_id(is_evaluate=True):
|
68
|
+
if self._should_display_trace and not maybe_get_request_id(is_evaluate=True):
|
68
69
|
self._display_handler.display_traces([trace])
|
69
70
|
|
70
71
|
if self._should_log_async():
|
@@ -88,7 +89,7 @@ class MlflowV3SpanExporter(SpanExporter):
|
|
88
89
|
try:
|
89
90
|
if trace:
|
90
91
|
add_size_stats_to_trace_metadata(trace)
|
91
|
-
returned_trace_info = self._client.
|
92
|
+
returned_trace_info = self._client.start_trace_v3(trace)
|
92
93
|
self._client._upload_trace_data(returned_trace_info, trace.data)
|
93
94
|
else:
|
94
95
|
_logger.warning("No trace or trace info provided, unable to export")
|
@@ -110,11 +111,7 @@ class MlflowV3SpanExporter(SpanExporter):
|
|
110
111
|
_logger.warning(f"Failed to link prompts to trace: {e}")
|
111
112
|
|
112
113
|
def _should_enable_async_logging(self):
|
113
|
-
if (
|
114
|
-
is_in_databricks_notebook()
|
115
|
-
# NB: Not defaulting OSS backend to async logging for now to reduce blast radius.
|
116
|
-
or not is_databricks_uri(self._client.tracking_uri)
|
117
|
-
):
|
114
|
+
if is_in_databricks_notebook():
|
118
115
|
# NB: We don't turn on async logging in Databricks notebook by default
|
119
116
|
# until we are confident that the async logging is working on the
|
120
117
|
# offline workload on Databricks, to derisk the inclusion to the
|
mlflow/tracing/provider.py
CHANGED
@@ -16,6 +16,7 @@ from typing import TYPE_CHECKING, Optional
|
|
16
16
|
|
17
17
|
from opentelemetry import context as context_api
|
18
18
|
from opentelemetry import trace
|
19
|
+
from opentelemetry.sdk.resources import Resource
|
19
20
|
from opentelemetry.sdk.trace import TracerProvider
|
20
21
|
|
21
22
|
import mlflow
|
@@ -277,7 +278,10 @@ def _setup_tracer_provider(disabled=False):
|
|
277
278
|
# Default to MLflow Tracking Server
|
278
279
|
processor = _get_mlflow_span_processor(tracking_uri=mlflow.get_tracking_uri())
|
279
280
|
|
280
|
-
|
281
|
+
# Setting an empty resource to avoid triggering resource aggregation, which causes
|
282
|
+
# an issue in LiteLLM tracing: https://github.com/mlflow/mlflow/issues/16296
|
283
|
+
# MLflow tracing does not use resource right now.
|
284
|
+
tracer_provider = TracerProvider(resource=Resource.get_empty())
|
281
285
|
tracer_provider.add_span_processor(processor)
|
282
286
|
_MLFLOW_TRACER_PROVIDER = tracer_provider
|
283
287
|
|
@@ -141,7 +141,11 @@ class ModelRegistryClient:
|
|
141
141
|
obtained via the ``token`` attribute of the object.
|
142
142
|
|
143
143
|
"""
|
144
|
-
|
144
|
+
# Add prompt filter for prompt-supported registries that also support filter_string
|
145
|
+
# Unity Catalog supports prompts but not filter_string parameter
|
146
|
+
if is_prompt_supported_registry(self.registry_uri) and not (
|
147
|
+
self.registry_uri or ""
|
148
|
+
).startswith("databricks-uc"):
|
145
149
|
# Adjust filter string to include or exclude prompts
|
146
150
|
filter_string = add_prompt_filter_string(filter_string, False)
|
147
151
|
|
mlflow/utils/file_utils.py
CHANGED
@@ -826,7 +826,8 @@ def shutil_copytree_without_file_permissions(src_dir, dst_dir):
|
|
826
826
|
# For each directory <dirname> immediately under <dirpath>, create an equivalently-named
|
827
827
|
# directory under the destination directory
|
828
828
|
abs_dir_path = os.path.join(dst_dir, relative_dir_path)
|
829
|
-
os.
|
829
|
+
if not os.path.exists(abs_dir_path):
|
830
|
+
os.mkdir(abs_dir_path)
|
830
831
|
for filename in filenames:
|
831
832
|
# For each file with name <filename> immediately under <dirpath>, copy that file to
|
832
833
|
# the appropriate location in the destination directory
|
mlflow/utils/rest_utils.py
CHANGED
@@ -119,6 +119,7 @@ def http_request(
|
|
119
119
|
host_creds.token,
|
120
120
|
host_creds.databricks_auth_profile,
|
121
121
|
retry_timeout_seconds=retry_timeout_seconds,
|
122
|
+
timeout=timeout,
|
122
123
|
)
|
123
124
|
|
124
125
|
def make_sdk_call():
|
@@ -259,6 +260,7 @@ def get_workspace_client(
|
|
259
260
|
token,
|
260
261
|
databricks_auth_profile,
|
261
262
|
retry_timeout_seconds=None,
|
263
|
+
timeout=None,
|
262
264
|
):
|
263
265
|
from databricks.sdk import WorkspaceClient
|
264
266
|
from databricks.sdk.config import Config
|
@@ -267,6 +269,8 @@ def get_workspace_client(
|
|
267
269
|
kwargs = {"host": host, "token": token}
|
268
270
|
else:
|
269
271
|
kwargs = {"profile": databricks_auth_profile}
|
272
|
+
if timeout is not None:
|
273
|
+
kwargs["http_timeout_seconds"] = timeout
|
270
274
|
config = Config(
|
271
275
|
**kwargs,
|
272
276
|
retry_timeout_seconds=retry_timeout_seconds
|
mlflow/utils/secure_loading.py
CHANGED
@@ -22,14 +22,14 @@ SAFE_PICKLE_CLASSES = {
|
|
22
22
|
'numpy.dtype',
|
23
23
|
'numpy.int32', 'numpy.int64', 'numpy.float32', 'numpy.float64',
|
24
24
|
'numpy.bool_', 'numpy.str_',
|
25
|
-
|
25
|
+
|
26
26
|
# Pandas types
|
27
27
|
'pandas.core.frame.DataFrame',
|
28
28
|
'pandas.core.series.Series',
|
29
29
|
'pandas.core.index.Index',
|
30
30
|
'pandas.core.dtypes.dtypes.CategoricalDtype',
|
31
|
-
|
32
|
-
# Scikit-learn estimators
|
31
|
+
|
32
|
+
# Scikit-learn estimators
|
33
33
|
'sklearn.linear_model._base.LinearRegression',
|
34
34
|
'sklearn.linear_model._logistic.LogisticRegression',
|
35
35
|
'sklearn.ensemble._forest.RandomForestClassifier',
|
@@ -38,20 +38,74 @@ SAFE_PICKLE_CLASSES = {
|
|
38
38
|
'sklearn.tree._classes.DecisionTreeRegressor',
|
39
39
|
'sklearn.svm._classes.SVC',
|
40
40
|
'sklearn.svm._classes.SVR',
|
41
|
-
|
42
|
-
#
|
41
|
+
|
42
|
+
# Built-in types
|
43
43
|
'builtins.dict', 'builtins.list', 'builtins.tuple', 'builtins.set',
|
44
44
|
'builtins.str', 'builtins.int', 'builtins.float', 'builtins.bool',
|
45
|
-
|
45
|
+
'builtins.type',
|
46
|
+
|
46
47
|
# Collections
|
47
48
|
'collections.OrderedDict',
|
48
49
|
'collections.defaultdict',
|
49
|
-
|
50
|
-
#
|
50
|
+
|
51
|
+
# MLflow types
|
51
52
|
'mlflow.models.signature.ModelSignature',
|
53
|
+
'mlflow.models.signature._TypeHints',
|
52
54
|
'mlflow.types.schema.Schema',
|
55
|
+
'mlflow.pyfunc.model.PythonModel',
|
56
|
+
|
57
|
+
# Cloudpickle internals
|
58
|
+
'cloudpickle.cloudpickle._make_skeleton_class',
|
59
|
+
'cloudpickle.cloudpickle._class_setstate',
|
60
|
+
'cloudpickle.cloudpickle._make_function',
|
61
|
+
'cloudpickle.cloudpickle._builtin_type',
|
62
|
+
'cloudpickle.cloudpickle._function_setstate',
|
63
|
+
'cloudpickle.cloudpickle._make_empty_cell',
|
64
|
+
'cloudpickle.cloudpickle._make_cell',
|
65
|
+
|
66
|
+
# Sentence Transformers
|
67
|
+
'sentence_transformers.SentenceTransformer.SentenceTransformer',
|
68
|
+
'sentence_transformers.model_card.SentenceTransformerModelCardData',
|
69
|
+
'sentence_transformers.models.Transformer.Transformer',
|
70
|
+
'sentence_transformers.models.Pooling.Pooling',
|
71
|
+
'sentence_transformers.models.Normalize.Normalize',
|
72
|
+
|
73
|
+
# Torch
|
74
|
+
'torch.torch_version.TorchVersion',
|
75
|
+
'torch._utils._rebuild_tensor_v2',
|
76
|
+
'torch.storage._load_from_bytes',
|
77
|
+
'torch.nn.modules.sparse.Embedding',
|
78
|
+
'torch._utils._rebuild_parameter',
|
79
|
+
'torch.nn.modules.normalization.LayerNorm',
|
80
|
+
'torch.nn.modules.dropout.Dropout',
|
81
|
+
'torch.nn.modules.container.ModuleList',
|
82
|
+
'torch.nn.modules.linear.Linear',
|
83
|
+
'torch.nn.modules.activation.Tanh',
|
84
|
+
'torch.float32',
|
85
|
+
'torch._C._nn.gelu',
|
86
|
+
|
87
|
+
# Transformers
|
88
|
+
'transformers.models.bert.modeling_bert.BertModel',
|
89
|
+
'transformers.models.bert.modeling_bert.BertEmbeddings',
|
90
|
+
'transformers.models.bert.modeling_bert.BertEncoder',
|
91
|
+
'transformers.models.bert.modeling_bert.BertLayer',
|
92
|
+
'transformers.models.bert.modeling_bert.BertAttention',
|
93
|
+
'transformers.models.bert.modeling_bert.BertSdpaSelfAttention',
|
94
|
+
'transformers.models.bert.modeling_bert.BertSelfOutput',
|
95
|
+
'transformers.models.bert.modeling_bert.BertIntermediate',
|
96
|
+
'transformers.models.bert.modeling_bert.BertOutput',
|
97
|
+
'transformers.models.bert.modeling_bert.BertPooler',
|
98
|
+
'transformers.models.bert.configuration_bert.BertConfig',
|
99
|
+
'transformers.models.bert.tokenization_bert_fast.BertTokenizerFast',
|
100
|
+
'transformers.activations.GELUActivation',
|
101
|
+
|
102
|
+
# Tokenizers
|
103
|
+
'tokenizers.Tokenizer',
|
104
|
+
'tokenizers.models.Model',
|
105
|
+
'tokenizers.AddedToken',
|
53
106
|
}
|
54
107
|
|
108
|
+
|
55
109
|
class RestrictedUnpickler(pickle.Unpickler):
|
56
110
|
"""
|
57
111
|
Secure unpickler that only allows safe, whitelisted classes.
|
mlflow/version.py
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
import importlib.metadata
|
3
3
|
import re
|
4
4
|
|
5
|
-
VERSION = "3.1.
|
5
|
+
VERSION = "3.1.4"
|
6
6
|
|
7
7
|
|
8
8
|
def is_release_version():
|
@@ -21,4 +21,4 @@ def _is_package_installed(package_name: str) -> bool:
|
|
21
21
|
# installed, or includes the full MLflow or mlflow-skinny package.
|
22
22
|
# This is used to determine whether to import modules that require
|
23
23
|
# dependencies that are not included in the tracing SDK.
|
24
|
-
IS_TRACING_SDK_ONLY = not any(_is_package_installed(pkg) for pkg in ["mlflow", "mlflow-skinny"])
|
24
|
+
IS_TRACING_SDK_ONLY = not any(_is_package_installed(pkg) for pkg in ["mlflow", "mlflow-skinny", "genesis-flow"])
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|