mlrun 1.7.2rc3__py3-none-any.whl → 1.8.0rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +18 -18
- mlrun/__main__.py +3 -3
- mlrun/alerts/alert.py +19 -12
- mlrun/artifacts/__init__.py +0 -2
- mlrun/artifacts/base.py +34 -11
- mlrun/artifacts/dataset.py +16 -16
- mlrun/artifacts/manager.py +13 -13
- mlrun/artifacts/model.py +66 -53
- mlrun/common/constants.py +6 -0
- mlrun/common/formatters/__init__.py +1 -0
- mlrun/common/formatters/feature_set.py +1 -0
- mlrun/common/formatters/function.py +1 -0
- mlrun/common/formatters/model_endpoint.py +30 -0
- mlrun/common/formatters/pipeline.py +1 -2
- mlrun/common/formatters/project.py +9 -0
- mlrun/common/model_monitoring/__init__.py +0 -3
- mlrun/common/model_monitoring/helpers.py +1 -1
- mlrun/common/runtimes/constants.py +1 -2
- mlrun/common/schemas/__init__.py +7 -2
- mlrun/common/schemas/alert.py +31 -18
- mlrun/common/schemas/api_gateway.py +3 -3
- mlrun/common/schemas/artifact.py +7 -13
- mlrun/common/schemas/auth.py +6 -4
- mlrun/common/schemas/background_task.py +7 -7
- mlrun/common/schemas/client_spec.py +2 -2
- mlrun/common/schemas/clusterization_spec.py +2 -2
- mlrun/common/schemas/common.py +53 -3
- mlrun/common/schemas/datastore_profile.py +1 -1
- mlrun/common/schemas/feature_store.py +9 -9
- mlrun/common/schemas/frontend_spec.py +4 -4
- mlrun/common/schemas/function.py +10 -10
- mlrun/common/schemas/hub.py +1 -1
- mlrun/common/schemas/k8s.py +3 -3
- mlrun/common/schemas/memory_reports.py +3 -3
- mlrun/common/schemas/model_monitoring/__init__.py +8 -1
- mlrun/common/schemas/model_monitoring/constants.py +62 -12
- mlrun/common/schemas/model_monitoring/grafana.py +1 -1
- mlrun/common/schemas/model_monitoring/model_endpoint_v2.py +149 -0
- mlrun/common/schemas/model_monitoring/model_endpoints.py +22 -6
- mlrun/common/schemas/notification.py +18 -3
- mlrun/common/schemas/object.py +1 -1
- mlrun/common/schemas/pagination.py +4 -4
- mlrun/common/schemas/partition.py +137 -0
- mlrun/common/schemas/pipeline.py +2 -2
- mlrun/common/schemas/project.py +22 -17
- mlrun/common/schemas/runs.py +2 -2
- mlrun/common/schemas/runtime_resource.py +5 -5
- mlrun/common/schemas/schedule.py +1 -1
- mlrun/common/schemas/secret.py +1 -1
- mlrun/common/schemas/tag.py +3 -3
- mlrun/common/schemas/workflow.py +5 -5
- mlrun/config.py +65 -15
- mlrun/data_types/__init__.py +0 -2
- mlrun/data_types/data_types.py +0 -1
- mlrun/data_types/infer.py +3 -1
- mlrun/data_types/spark.py +4 -4
- mlrun/data_types/to_pandas.py +2 -11
- mlrun/datastore/__init__.py +0 -2
- mlrun/datastore/alibaba_oss.py +4 -1
- mlrun/datastore/azure_blob.py +4 -1
- mlrun/datastore/base.py +12 -4
- mlrun/datastore/datastore.py +9 -3
- mlrun/datastore/datastore_profile.py +20 -20
- mlrun/datastore/dbfs_store.py +4 -1
- mlrun/datastore/filestore.py +4 -1
- mlrun/datastore/google_cloud_storage.py +4 -1
- mlrun/datastore/hdfs.py +4 -1
- mlrun/datastore/inmem.py +4 -1
- mlrun/datastore/redis.py +4 -1
- mlrun/datastore/s3.py +4 -1
- mlrun/datastore/sources.py +51 -49
- mlrun/datastore/store_resources.py +0 -2
- mlrun/datastore/targets.py +22 -23
- mlrun/datastore/utils.py +2 -2
- mlrun/datastore/v3io.py +4 -1
- mlrun/datastore/wasbfs/fs.py +13 -12
- mlrun/db/base.py +170 -64
- mlrun/db/factory.py +3 -0
- mlrun/db/httpdb.py +986 -238
- mlrun/db/nopdb.py +155 -57
- mlrun/errors.py +2 -2
- mlrun/execution.py +55 -29
- mlrun/feature_store/__init__.py +0 -2
- mlrun/feature_store/api.py +40 -40
- mlrun/feature_store/common.py +9 -9
- mlrun/feature_store/feature_set.py +20 -18
- mlrun/feature_store/feature_vector.py +27 -24
- mlrun/feature_store/retrieval/base.py +14 -9
- mlrun/feature_store/retrieval/job.py +2 -1
- mlrun/feature_store/steps.py +2 -2
- mlrun/features.py +30 -13
- mlrun/frameworks/__init__.py +1 -2
- mlrun/frameworks/_common/__init__.py +1 -2
- mlrun/frameworks/_common/artifacts_library.py +2 -2
- mlrun/frameworks/_common/mlrun_interface.py +10 -6
- mlrun/frameworks/_common/model_handler.py +29 -27
- mlrun/frameworks/_common/producer.py +3 -1
- mlrun/frameworks/_dl_common/__init__.py +1 -2
- mlrun/frameworks/_dl_common/loggers/__init__.py +1 -2
- mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +4 -4
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +3 -3
- mlrun/frameworks/_ml_common/__init__.py +1 -2
- mlrun/frameworks/_ml_common/loggers/__init__.py +1 -2
- mlrun/frameworks/_ml_common/model_handler.py +21 -21
- mlrun/frameworks/_ml_common/plans/__init__.py +1 -2
- mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +3 -1
- mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
- mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
- mlrun/frameworks/auto_mlrun/__init__.py +1 -2
- mlrun/frameworks/auto_mlrun/auto_mlrun.py +22 -15
- mlrun/frameworks/huggingface/__init__.py +1 -2
- mlrun/frameworks/huggingface/model_server.py +9 -9
- mlrun/frameworks/lgbm/__init__.py +47 -44
- mlrun/frameworks/lgbm/callbacks/__init__.py +1 -2
- mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -2
- mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -2
- mlrun/frameworks/lgbm/mlrun_interfaces/__init__.py +1 -2
- mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +5 -5
- mlrun/frameworks/lgbm/model_handler.py +15 -11
- mlrun/frameworks/lgbm/model_server.py +11 -7
- mlrun/frameworks/lgbm/utils.py +2 -2
- mlrun/frameworks/onnx/__init__.py +1 -2
- mlrun/frameworks/onnx/dataset.py +3 -3
- mlrun/frameworks/onnx/mlrun_interface.py +2 -2
- mlrun/frameworks/onnx/model_handler.py +7 -5
- mlrun/frameworks/onnx/model_server.py +8 -6
- mlrun/frameworks/parallel_coordinates.py +11 -11
- mlrun/frameworks/pytorch/__init__.py +22 -23
- mlrun/frameworks/pytorch/callbacks/__init__.py +1 -2
- mlrun/frameworks/pytorch/callbacks/callback.py +2 -1
- mlrun/frameworks/pytorch/callbacks/logging_callback.py +15 -8
- mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +19 -12
- mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +22 -15
- mlrun/frameworks/pytorch/callbacks_handler.py +36 -30
- mlrun/frameworks/pytorch/mlrun_interface.py +17 -17
- mlrun/frameworks/pytorch/model_handler.py +21 -17
- mlrun/frameworks/pytorch/model_server.py +13 -9
- mlrun/frameworks/sklearn/__init__.py +19 -18
- mlrun/frameworks/sklearn/estimator.py +2 -2
- mlrun/frameworks/sklearn/metric.py +3 -3
- mlrun/frameworks/sklearn/metrics_library.py +8 -6
- mlrun/frameworks/sklearn/mlrun_interface.py +3 -2
- mlrun/frameworks/sklearn/model_handler.py +4 -3
- mlrun/frameworks/tf_keras/__init__.py +11 -12
- mlrun/frameworks/tf_keras/callbacks/__init__.py +1 -2
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +17 -14
- mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +15 -12
- mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +21 -18
- mlrun/frameworks/tf_keras/model_handler.py +17 -13
- mlrun/frameworks/tf_keras/model_server.py +12 -8
- mlrun/frameworks/xgboost/__init__.py +19 -18
- mlrun/frameworks/xgboost/model_handler.py +13 -9
- mlrun/launcher/base.py +3 -4
- mlrun/launcher/local.py +1 -1
- mlrun/launcher/remote.py +1 -1
- mlrun/lists.py +4 -3
- mlrun/model.py +110 -46
- mlrun/model_monitoring/__init__.py +1 -2
- mlrun/model_monitoring/api.py +6 -6
- mlrun/model_monitoring/applications/_application_steps.py +13 -15
- mlrun/model_monitoring/applications/histogram_data_drift.py +41 -15
- mlrun/model_monitoring/applications/results.py +55 -3
- mlrun/model_monitoring/controller.py +185 -223
- mlrun/model_monitoring/db/_schedules.py +156 -0
- mlrun/model_monitoring/db/_stats.py +189 -0
- mlrun/model_monitoring/db/stores/__init__.py +1 -1
- mlrun/model_monitoring/db/stores/base/store.py +6 -65
- mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +0 -25
- mlrun/model_monitoring/db/stores/sqldb/models/base.py +0 -97
- mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +2 -58
- mlrun/model_monitoring/db/stores/sqldb/models/sqlite.py +0 -15
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +6 -257
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +9 -271
- mlrun/model_monitoring/db/tsdb/base.py +76 -24
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +61 -6
- mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +33 -0
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +253 -28
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +1 -0
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +35 -17
- mlrun/model_monitoring/helpers.py +91 -1
- mlrun/model_monitoring/model_endpoint.py +4 -2
- mlrun/model_monitoring/stream_processing.py +16 -13
- mlrun/model_monitoring/tracking_policy.py +10 -3
- mlrun/model_monitoring/writer.py +47 -26
- mlrun/package/__init__.py +3 -6
- mlrun/package/context_handler.py +1 -1
- mlrun/package/packager.py +12 -9
- mlrun/package/packagers/__init__.py +0 -2
- mlrun/package/packagers/default_packager.py +14 -11
- mlrun/package/packagers/numpy_packagers.py +16 -7
- mlrun/package/packagers/pandas_packagers.py +18 -18
- mlrun/package/packagers/python_standard_library_packagers.py +25 -11
- mlrun/package/packagers_manager.py +31 -14
- mlrun/package/utils/__init__.py +0 -3
- mlrun/package/utils/_pickler.py +6 -6
- mlrun/platforms/__init__.py +3 -16
- mlrun/platforms/iguazio.py +4 -1
- mlrun/projects/operations.py +27 -27
- mlrun/projects/pipelines.py +34 -35
- mlrun/projects/project.py +535 -182
- mlrun/run.py +13 -10
- mlrun/runtimes/__init__.py +1 -3
- mlrun/runtimes/base.py +15 -11
- mlrun/runtimes/daskjob.py +9 -9
- mlrun/runtimes/generators.py +2 -1
- mlrun/runtimes/kubejob.py +4 -5
- mlrun/runtimes/mounts.py +572 -0
- mlrun/runtimes/mpijob/__init__.py +0 -2
- mlrun/runtimes/mpijob/abstract.py +7 -6
- mlrun/runtimes/nuclio/api_gateway.py +7 -7
- mlrun/runtimes/nuclio/application/application.py +11 -11
- mlrun/runtimes/nuclio/function.py +13 -13
- mlrun/runtimes/nuclio/serving.py +9 -9
- mlrun/runtimes/pod.py +154 -45
- mlrun/runtimes/remotesparkjob.py +3 -2
- mlrun/runtimes/sparkjob/__init__.py +0 -2
- mlrun/runtimes/sparkjob/spark3job.py +21 -11
- mlrun/runtimes/utils.py +6 -5
- mlrun/serving/merger.py +6 -4
- mlrun/serving/remote.py +18 -17
- mlrun/serving/routers.py +27 -27
- mlrun/serving/server.py +1 -1
- mlrun/serving/states.py +76 -71
- mlrun/serving/utils.py +13 -2
- mlrun/serving/v1_serving.py +3 -2
- mlrun/serving/v2_serving.py +4 -4
- mlrun/track/__init__.py +1 -1
- mlrun/track/tracker.py +2 -2
- mlrun/track/trackers/mlflow_tracker.py +6 -5
- mlrun/utils/async_http.py +1 -1
- mlrun/utils/helpers.py +70 -16
- mlrun/utils/logger.py +106 -4
- mlrun/utils/notifications/notification/__init__.py +22 -19
- mlrun/utils/notifications/notification/base.py +33 -14
- mlrun/utils/notifications/notification/console.py +6 -6
- mlrun/utils/notifications/notification/git.py +11 -11
- mlrun/utils/notifications/notification/ipython.py +10 -9
- mlrun/utils/notifications/notification/mail.py +149 -0
- mlrun/utils/notifications/notification/slack.py +6 -6
- mlrun/utils/notifications/notification/webhook.py +18 -22
- mlrun/utils/notifications/notification_pusher.py +43 -31
- mlrun/utils/regex.py +3 -1
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.2rc3.dist-info → mlrun-1.8.0rc2.dist-info}/METADATA +18 -14
- mlrun-1.8.0rc2.dist-info/RECORD +358 -0
- {mlrun-1.7.2rc3.dist-info → mlrun-1.8.0rc2.dist-info}/WHEEL +1 -1
- mlrun-1.7.2rc3.dist-info/RECORD +0 -351
- {mlrun-1.7.2rc3.dist-info → mlrun-1.8.0rc2.dist-info}/LICENSE +0 -0
- {mlrun-1.7.2rc3.dist-info → mlrun-1.8.0rc2.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.2rc3.dist-info → mlrun-1.8.0rc2.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
# Copyright 2024 Iguazio
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
#
|
|
15
|
+
from datetime import datetime, timedelta
|
|
16
|
+
|
|
17
|
+
from mlrun.common.types import StrEnum
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class PartitionInterval(StrEnum):
|
|
21
|
+
DAY = "DAY"
|
|
22
|
+
MONTH = "MONTH"
|
|
23
|
+
YEARWEEK = "YEARWEEK"
|
|
24
|
+
|
|
25
|
+
@classmethod
|
|
26
|
+
def is_valid(cls, value: str) -> bool:
|
|
27
|
+
return value in cls._value2member_map_
|
|
28
|
+
|
|
29
|
+
@classmethod
|
|
30
|
+
def valid_intervals(cls) -> list:
|
|
31
|
+
return list(cls._value2member_map_.keys())
|
|
32
|
+
|
|
33
|
+
def as_duration(self) -> timedelta:
|
|
34
|
+
"""
|
|
35
|
+
Convert the partition interval to a duration-like timedelta.
|
|
36
|
+
|
|
37
|
+
Returns:
|
|
38
|
+
timedelta: A duration representing the partition interval.
|
|
39
|
+
"""
|
|
40
|
+
if self == PartitionInterval.DAY:
|
|
41
|
+
return timedelta(days=1)
|
|
42
|
+
elif self == PartitionInterval.MONTH:
|
|
43
|
+
# Approximate a month as 30 days
|
|
44
|
+
return timedelta(days=30)
|
|
45
|
+
elif self == PartitionInterval.YEARWEEK:
|
|
46
|
+
return timedelta(weeks=1)
|
|
47
|
+
|
|
48
|
+
@classmethod
|
|
49
|
+
def from_function(cls, partition_function: str):
|
|
50
|
+
"""
|
|
51
|
+
Returns the corresponding PartitionInterval for a given partition function,
|
|
52
|
+
or None if the function is not mapped.
|
|
53
|
+
|
|
54
|
+
:param partition_function: The partition function to map to an interval.
|
|
55
|
+
:return: PartitionInterval corresponding to the function, or None if no match is found.
|
|
56
|
+
"""
|
|
57
|
+
partition_function_to_partitions_interval = {
|
|
58
|
+
"DAY": "DAY",
|
|
59
|
+
"DAYOFMONTH": "DAY",
|
|
60
|
+
"MONTH": "MONTH",
|
|
61
|
+
"YEARWEEK": "YEARWEEK",
|
|
62
|
+
}
|
|
63
|
+
interval = partition_function_to_partitions_interval.get(partition_function)
|
|
64
|
+
if interval and cls.is_valid(interval):
|
|
65
|
+
return cls[interval]
|
|
66
|
+
raise KeyError(f"Partition function: {partition_function} isn't supported")
|
|
67
|
+
|
|
68
|
+
def get_partition_info(
|
|
69
|
+
self,
|
|
70
|
+
start_datetime: datetime,
|
|
71
|
+
partition_number: int = 1,
|
|
72
|
+
) -> list[tuple[str, str]]:
|
|
73
|
+
"""
|
|
74
|
+
Generates partition details for a specified number of partitions starting from a given datetime.
|
|
75
|
+
|
|
76
|
+
:param start_datetime: The starting datetime used for generating partition details.
|
|
77
|
+
:param partition_number: The number of partitions to generate details for.
|
|
78
|
+
|
|
79
|
+
:return: A list of tuples:
|
|
80
|
+
- partition_name: The name for the partition.
|
|
81
|
+
- partition_value: The "LESS THAN" value for the next partition boundary.
|
|
82
|
+
"""
|
|
83
|
+
partitioning_information_list = []
|
|
84
|
+
current_datetime = start_datetime
|
|
85
|
+
|
|
86
|
+
for _ in range(partition_number):
|
|
87
|
+
partition_name = self.get_partition_name(current_datetime)
|
|
88
|
+
partition_boundary_date = self.get_next_partition_time(current_datetime)
|
|
89
|
+
partition_value = self.get_partition_name(partition_boundary_date)
|
|
90
|
+
partitioning_information_list.append((partition_name, partition_value))
|
|
91
|
+
|
|
92
|
+
# Move to the next interval
|
|
93
|
+
current_datetime = partition_boundary_date
|
|
94
|
+
|
|
95
|
+
return partitioning_information_list
|
|
96
|
+
|
|
97
|
+
def get_next_partition_time(self, current_datetime: datetime) -> datetime:
|
|
98
|
+
"""
|
|
99
|
+
Calculates the next partition boundary time based on the specified partition interval.
|
|
100
|
+
:param current_datetime: The current datetime from which the next interval is calculated.
|
|
101
|
+
|
|
102
|
+
:return: A datetime object representing the start of the next partition interval.
|
|
103
|
+
- If the interval is DAY, it advances by one day.
|
|
104
|
+
- If the interval is MONTH, it advances to the first day of the next month.
|
|
105
|
+
- If the interval is YEARWEEK, it advances by one week.
|
|
106
|
+
"""
|
|
107
|
+
if self == PartitionInterval.DAY:
|
|
108
|
+
return current_datetime + timedelta(days=1)
|
|
109
|
+
elif self == PartitionInterval.MONTH:
|
|
110
|
+
return (current_datetime.replace(day=1) + timedelta(days=32)).replace(day=1)
|
|
111
|
+
elif self == PartitionInterval.YEARWEEK:
|
|
112
|
+
return current_datetime + timedelta(weeks=1)
|
|
113
|
+
|
|
114
|
+
def get_partition_name(self, current_datetime: datetime) -> str:
|
|
115
|
+
if self == PartitionInterval.DAY:
|
|
116
|
+
return current_datetime.strftime("%Y%m%d")
|
|
117
|
+
elif self == PartitionInterval.MONTH:
|
|
118
|
+
return current_datetime.strftime("%Y%m")
|
|
119
|
+
elif self == PartitionInterval.YEARWEEK:
|
|
120
|
+
year, week, _ = current_datetime.isocalendar()
|
|
121
|
+
return f"{year}{week:02d}"
|
|
122
|
+
|
|
123
|
+
def get_partition_expression(self):
|
|
124
|
+
if self == PartitionInterval.YEARWEEK:
|
|
125
|
+
return "YEARWEEK(activation_time, 1)"
|
|
126
|
+
else:
|
|
127
|
+
return f"{self}(activation_time)"
|
|
128
|
+
|
|
129
|
+
def get_number_of_partitions(self, days: int) -> int:
|
|
130
|
+
# Calculate the number partitions based on given number of days
|
|
131
|
+
if self == PartitionInterval.DAY:
|
|
132
|
+
return days
|
|
133
|
+
elif self == PartitionInterval.MONTH:
|
|
134
|
+
# Average number days in a month is 30.44
|
|
135
|
+
return int(days / 30.44)
|
|
136
|
+
elif self == PartitionInterval.YEARWEEK:
|
|
137
|
+
return int(days / 7)
|
mlrun/common/schemas/pipeline.py
CHANGED
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
#
|
|
15
15
|
import typing
|
|
16
16
|
|
|
17
|
-
import pydantic
|
|
17
|
+
import pydantic.v1
|
|
18
18
|
from deprecated import deprecated
|
|
19
19
|
|
|
20
20
|
import mlrun.common.types
|
|
@@ -39,7 +39,7 @@ class PipelinesPagination(str):
|
|
|
39
39
|
max_page_size = 200
|
|
40
40
|
|
|
41
41
|
|
|
42
|
-
class PipelinesOutput(pydantic.BaseModel):
|
|
42
|
+
class PipelinesOutput(pydantic.v1.BaseModel):
|
|
43
43
|
# use the format query param to control what is returned
|
|
44
44
|
runs: list[typing.Union[dict, str]]
|
|
45
45
|
total_size: int
|
mlrun/common/schemas/project.py
CHANGED
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
import datetime
|
|
16
16
|
import typing
|
|
17
17
|
|
|
18
|
-
import pydantic
|
|
18
|
+
import pydantic.v1
|
|
19
19
|
from deprecated import deprecated
|
|
20
20
|
|
|
21
21
|
import mlrun.common.types
|
|
@@ -40,14 +40,14 @@ class ProjectsFormat(mlrun.common.types.StrEnum):
|
|
|
40
40
|
leader = "leader"
|
|
41
41
|
|
|
42
42
|
|
|
43
|
-
class ProjectMetadata(pydantic.BaseModel):
|
|
43
|
+
class ProjectMetadata(pydantic.v1.BaseModel):
|
|
44
44
|
name: str
|
|
45
45
|
created: typing.Optional[datetime.datetime] = None
|
|
46
46
|
labels: typing.Optional[dict] = {}
|
|
47
47
|
annotations: typing.Optional[dict] = {}
|
|
48
48
|
|
|
49
49
|
class Config:
|
|
50
|
-
extra = pydantic.Extra.allow
|
|
50
|
+
extra = pydantic.v1.Extra.allow
|
|
51
51
|
|
|
52
52
|
|
|
53
53
|
class ProjectDesiredState(mlrun.common.types.StrEnum):
|
|
@@ -77,7 +77,7 @@ class ProjectStatus(ObjectStatus):
|
|
|
77
77
|
state: typing.Optional[ProjectState]
|
|
78
78
|
|
|
79
79
|
|
|
80
|
-
class ProjectSpec(pydantic.BaseModel):
|
|
80
|
+
class ProjectSpec(pydantic.v1.BaseModel):
|
|
81
81
|
description: typing.Optional[str] = None
|
|
82
82
|
owner: typing.Optional[str] = None
|
|
83
83
|
goals: typing.Optional[str] = None
|
|
@@ -97,10 +97,10 @@ class ProjectSpec(pydantic.BaseModel):
|
|
|
97
97
|
default_function_node_selector: typing.Optional[dict] = {}
|
|
98
98
|
|
|
99
99
|
class Config:
|
|
100
|
-
extra = pydantic.Extra.allow
|
|
100
|
+
extra = pydantic.v1.Extra.allow
|
|
101
101
|
|
|
102
102
|
|
|
103
|
-
class ProjectSpecOut(pydantic.BaseModel):
|
|
103
|
+
class ProjectSpecOut(pydantic.v1.BaseModel):
|
|
104
104
|
description: typing.Optional[str] = None
|
|
105
105
|
owner: typing.Optional[str] = None
|
|
106
106
|
goals: typing.Optional[str] = None
|
|
@@ -120,11 +120,11 @@ class ProjectSpecOut(pydantic.BaseModel):
|
|
|
120
120
|
default_function_node_selector: typing.Optional[dict] = {}
|
|
121
121
|
|
|
122
122
|
class Config:
|
|
123
|
-
extra = pydantic.Extra.allow
|
|
123
|
+
extra = pydantic.v1.Extra.allow
|
|
124
124
|
|
|
125
125
|
|
|
126
|
-
class Project(pydantic.BaseModel):
|
|
127
|
-
kind: ObjectKind = pydantic.Field(ObjectKind.project, const=True)
|
|
126
|
+
class Project(pydantic.v1.BaseModel):
|
|
127
|
+
kind: ObjectKind = pydantic.v1.Field(ObjectKind.project, const=True)
|
|
128
128
|
metadata: ProjectMetadata
|
|
129
129
|
spec: ProjectSpec = ProjectSpec()
|
|
130
130
|
status: ObjectStatus = ObjectStatus()
|
|
@@ -132,19 +132,19 @@ class Project(pydantic.BaseModel):
|
|
|
132
132
|
|
|
133
133
|
# The reason we have a different schema for the response model is that we don't want to validate project.spec.build in
|
|
134
134
|
# the response as the validation was added late and there may be corrupted values in the DB.
|
|
135
|
-
class ProjectOut(pydantic.BaseModel):
|
|
136
|
-
kind: ObjectKind = pydantic.Field(ObjectKind.project, const=True)
|
|
135
|
+
class ProjectOut(pydantic.v1.BaseModel):
|
|
136
|
+
kind: ObjectKind = pydantic.v1.Field(ObjectKind.project, const=True)
|
|
137
137
|
metadata: ProjectMetadata
|
|
138
138
|
spec: ProjectSpecOut = ProjectSpecOut()
|
|
139
139
|
status: ObjectStatus = ObjectStatus()
|
|
140
140
|
|
|
141
141
|
|
|
142
|
-
class ProjectOwner(pydantic.BaseModel):
|
|
142
|
+
class ProjectOwner(pydantic.v1.BaseModel):
|
|
143
143
|
username: str
|
|
144
144
|
access_key: str
|
|
145
145
|
|
|
146
146
|
|
|
147
|
-
class ProjectSummary(pydantic.BaseModel):
|
|
147
|
+
class ProjectSummary(pydantic.v1.BaseModel):
|
|
148
148
|
name: str
|
|
149
149
|
files_count: int = 0
|
|
150
150
|
feature_sets_count: int = 0
|
|
@@ -161,7 +161,7 @@ class ProjectSummary(pydantic.BaseModel):
|
|
|
161
161
|
updated: typing.Optional[datetime.datetime] = None
|
|
162
162
|
|
|
163
163
|
|
|
164
|
-
class IguazioProject(pydantic.BaseModel):
|
|
164
|
+
class IguazioProject(pydantic.v1.BaseModel):
|
|
165
165
|
data: dict
|
|
166
166
|
|
|
167
167
|
|
|
@@ -175,13 +175,18 @@ class IguazioProject(pydantic.BaseModel):
|
|
|
175
175
|
# to add a specific classes for them. it's frustrating but couldn't find other workaround, see:
|
|
176
176
|
# https://github.com/samuelcolvin/pydantic/issues/1423, https://github.com/samuelcolvin/pydantic/issues/619
|
|
177
177
|
ProjectOutput = typing.TypeVar(
|
|
178
|
-
"ProjectOutput",
|
|
178
|
+
"ProjectOutput",
|
|
179
|
+
ProjectOut,
|
|
180
|
+
str,
|
|
181
|
+
ProjectSummary,
|
|
182
|
+
IguazioProject,
|
|
183
|
+
tuple[str, datetime.datetime],
|
|
179
184
|
)
|
|
180
185
|
|
|
181
186
|
|
|
182
|
-
class ProjectsOutput(pydantic.BaseModel):
|
|
187
|
+
class ProjectsOutput(pydantic.v1.BaseModel):
|
|
183
188
|
projects: list[ProjectOutput]
|
|
184
189
|
|
|
185
190
|
|
|
186
|
-
class ProjectSummariesOutput(pydantic.BaseModel):
|
|
191
|
+
class ProjectSummariesOutput(pydantic.v1.BaseModel):
|
|
187
192
|
project_summaries: list[ProjectSummary]
|
mlrun/common/schemas/runs.py
CHANGED
|
@@ -14,13 +14,13 @@
|
|
|
14
14
|
|
|
15
15
|
import typing
|
|
16
16
|
|
|
17
|
-
import pydantic
|
|
17
|
+
import pydantic.v1
|
|
18
18
|
from deprecated import deprecated
|
|
19
19
|
|
|
20
20
|
import mlrun.common.types
|
|
21
21
|
|
|
22
22
|
|
|
23
|
-
class RunIdentifier(pydantic.BaseModel):
|
|
23
|
+
class RunIdentifier(pydantic.v1.BaseModel):
|
|
24
24
|
kind: typing.Literal["run"] = "run"
|
|
25
25
|
uid: typing.Optional[str]
|
|
26
26
|
iter: typing.Optional[int]
|
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
#
|
|
15
15
|
import typing
|
|
16
16
|
|
|
17
|
-
import pydantic
|
|
17
|
+
import pydantic.v1
|
|
18
18
|
|
|
19
19
|
import mlrun.common.types
|
|
20
20
|
|
|
@@ -24,23 +24,23 @@ class ListRuntimeResourcesGroupByField(mlrun.common.types.StrEnum):
|
|
|
24
24
|
project = "project"
|
|
25
25
|
|
|
26
26
|
|
|
27
|
-
class RuntimeResource(pydantic.BaseModel):
|
|
27
|
+
class RuntimeResource(pydantic.v1.BaseModel):
|
|
28
28
|
name: str
|
|
29
29
|
labels: dict[str, str] = {}
|
|
30
30
|
status: typing.Optional[dict]
|
|
31
31
|
|
|
32
32
|
|
|
33
|
-
class RuntimeResources(pydantic.BaseModel):
|
|
33
|
+
class RuntimeResources(pydantic.v1.BaseModel):
|
|
34
34
|
crd_resources: list[RuntimeResource] = []
|
|
35
35
|
pod_resources: list[RuntimeResource] = []
|
|
36
36
|
# only for dask runtime
|
|
37
37
|
service_resources: typing.Optional[list[RuntimeResource]] = None
|
|
38
38
|
|
|
39
39
|
class Config:
|
|
40
|
-
extra = pydantic.Extra.allow
|
|
40
|
+
extra = pydantic.v1.Extra.allow
|
|
41
41
|
|
|
42
42
|
|
|
43
|
-
class KindRuntimeResources(pydantic.BaseModel):
|
|
43
|
+
class KindRuntimeResources(pydantic.v1.BaseModel):
|
|
44
44
|
kind: str
|
|
45
45
|
resources: RuntimeResources
|
|
46
46
|
|
mlrun/common/schemas/schedule.py
CHANGED
mlrun/common/schemas/secret.py
CHANGED
mlrun/common/schemas/tag.py
CHANGED
|
@@ -13,17 +13,17 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
#
|
|
15
15
|
|
|
16
|
-
import pydantic
|
|
16
|
+
import pydantic.v1
|
|
17
17
|
|
|
18
18
|
from .artifact import ArtifactIdentifier
|
|
19
19
|
|
|
20
20
|
|
|
21
|
-
class Tag(pydantic.BaseModel):
|
|
21
|
+
class Tag(pydantic.v1.BaseModel):
|
|
22
22
|
name: str
|
|
23
23
|
project: str
|
|
24
24
|
|
|
25
25
|
|
|
26
|
-
class TagObjects(pydantic.BaseModel):
|
|
26
|
+
class TagObjects(pydantic.v1.BaseModel):
|
|
27
27
|
"""Tag object"""
|
|
28
28
|
|
|
29
29
|
kind: str
|
mlrun/common/schemas/workflow.py
CHANGED
|
@@ -14,14 +14,14 @@
|
|
|
14
14
|
#
|
|
15
15
|
import typing
|
|
16
16
|
|
|
17
|
-
import pydantic
|
|
17
|
+
import pydantic.v1
|
|
18
18
|
|
|
19
19
|
from mlrun.common.schemas.notification import Notification
|
|
20
20
|
from mlrun.common.schemas.schedule import ScheduleCronTrigger
|
|
21
21
|
from mlrun.common.types import StrEnum
|
|
22
22
|
|
|
23
23
|
|
|
24
|
-
class WorkflowSpec(pydantic.BaseModel):
|
|
24
|
+
class WorkflowSpec(pydantic.v1.BaseModel):
|
|
25
25
|
name: str
|
|
26
26
|
engine: typing.Optional[str] = None
|
|
27
27
|
code: typing.Optional[str] = None
|
|
@@ -36,7 +36,7 @@ class WorkflowSpec(pydantic.BaseModel):
|
|
|
36
36
|
workflow_runner_node_selector: typing.Optional[dict[str, str]] = None
|
|
37
37
|
|
|
38
38
|
|
|
39
|
-
class WorkflowRequest(pydantic.BaseModel):
|
|
39
|
+
class WorkflowRequest(pydantic.v1.BaseModel):
|
|
40
40
|
spec: typing.Optional[WorkflowSpec] = None
|
|
41
41
|
arguments: typing.Optional[dict] = None
|
|
42
42
|
artifact_path: typing.Optional[str] = None
|
|
@@ -46,7 +46,7 @@ class WorkflowRequest(pydantic.BaseModel):
|
|
|
46
46
|
notifications: typing.Optional[list[Notification]] = None
|
|
47
47
|
|
|
48
48
|
|
|
49
|
-
class WorkflowResponse(pydantic.BaseModel):
|
|
49
|
+
class WorkflowResponse(pydantic.v1.BaseModel):
|
|
50
50
|
project: str = None
|
|
51
51
|
name: str = None
|
|
52
52
|
status: str = None
|
|
@@ -54,7 +54,7 @@ class WorkflowResponse(pydantic.BaseModel):
|
|
|
54
54
|
schedule: typing.Union[str, ScheduleCronTrigger] = None
|
|
55
55
|
|
|
56
56
|
|
|
57
|
-
class GetWorkflowResponse(pydantic.BaseModel):
|
|
57
|
+
class GetWorkflowResponse(pydantic.v1.BaseModel):
|
|
58
58
|
workflow_id: str = None
|
|
59
59
|
|
|
60
60
|
|
mlrun/config.py
CHANGED
|
@@ -102,6 +102,9 @@ default_config = {
|
|
|
102
102
|
"log_level": "INFO",
|
|
103
103
|
# log formatter (options: human | human_extended | json)
|
|
104
104
|
"log_formatter": "human",
|
|
105
|
+
# custom logger format, workes only with log_formatter: custom
|
|
106
|
+
# Note that your custom format must include those 4 fields - timestamp, level, message and more
|
|
107
|
+
"log_format_override": None,
|
|
105
108
|
"submit_timeout": "180", # timeout when submitting a new k8s resource
|
|
106
109
|
# runtimes cleanup interval in seconds
|
|
107
110
|
"runtimes_cleanup_interval": "300",
|
|
@@ -120,14 +123,6 @@ default_config = {
|
|
|
120
123
|
"projects": {
|
|
121
124
|
"summaries": {
|
|
122
125
|
"cache_interval": "30",
|
|
123
|
-
"feature_gates": {
|
|
124
|
-
"artifacts": "enabled",
|
|
125
|
-
"schedules": "enabled",
|
|
126
|
-
"feature_sets": "enabled",
|
|
127
|
-
"models": "enabled",
|
|
128
|
-
"runs": "enabled",
|
|
129
|
-
"pipelines": "enabled",
|
|
130
|
-
},
|
|
131
126
|
},
|
|
132
127
|
},
|
|
133
128
|
},
|
|
@@ -140,6 +135,12 @@ default_config = {
|
|
|
140
135
|
"delete_crd_resources_timeout": "5 minutes",
|
|
141
136
|
},
|
|
142
137
|
},
|
|
138
|
+
"object_retentions": {
|
|
139
|
+
"alert_activation": 14 * 7, # days
|
|
140
|
+
},
|
|
141
|
+
# A safety margin to account for delays
|
|
142
|
+
# This ensures that extra partitions are available beyond the specified retention period
|
|
143
|
+
"partitions_buffer_multiplier": 3,
|
|
143
144
|
# the grace period (in seconds) that will be given to runtime resources (after they're in terminal state)
|
|
144
145
|
# before deleting them (4 hours)
|
|
145
146
|
"runtime_resources_deletion_grace_period": "14400",
|
|
@@ -314,7 +315,7 @@ default_config = {
|
|
|
314
315
|
},
|
|
315
316
|
"request_timeout": 45, # seconds
|
|
316
317
|
},
|
|
317
|
-
# see server.api.utils.helpers.ensure_running_on_chief
|
|
318
|
+
# see server.py.services.api.utils.helpers.ensure_running_on_chief
|
|
318
319
|
"ensure_function_running_on_chief_mode": "enabled",
|
|
319
320
|
},
|
|
320
321
|
"port": 8080,
|
|
@@ -794,17 +795,36 @@ default_config = {
|
|
|
794
795
|
"grafana_url": "",
|
|
795
796
|
"alerts": {
|
|
796
797
|
# supported modes: "enabled", "disabled".
|
|
797
|
-
"mode": "
|
|
798
|
+
"mode": "enabled",
|
|
798
799
|
# maximum number of alerts we allow to be configured.
|
|
799
800
|
# user will get an error when exceeding this
|
|
800
801
|
"max_allowed": 10000,
|
|
801
802
|
# maximum allowed value for count in criteria field inside AlertConfig
|
|
802
803
|
"max_criteria_count": 100,
|
|
804
|
+
# interval for periodic events generation job
|
|
805
|
+
"events_generation_interval": "30",
|
|
803
806
|
},
|
|
804
807
|
"auth_with_client_id": {
|
|
805
808
|
"enabled": False,
|
|
806
809
|
"request_timeout": 5,
|
|
807
810
|
},
|
|
811
|
+
"services": {
|
|
812
|
+
# The running service name. One of: "api", "alerts"
|
|
813
|
+
"service_name": "api",
|
|
814
|
+
"hydra": {
|
|
815
|
+
# Comma separated list of services to run on the instance.
|
|
816
|
+
# Currently, this is only considered when the service_name is "api".
|
|
817
|
+
# "*" starts all services on the same instance,
|
|
818
|
+
# other options are considered as running only the api service.
|
|
819
|
+
"services": "*",
|
|
820
|
+
},
|
|
821
|
+
},
|
|
822
|
+
"notifications": {
|
|
823
|
+
"smtp": {
|
|
824
|
+
"config_secret_name": "mlrun-smtp-config",
|
|
825
|
+
"refresh_interval": "30",
|
|
826
|
+
}
|
|
827
|
+
},
|
|
808
828
|
}
|
|
809
829
|
_is_running_as_api = None
|
|
810
830
|
|
|
@@ -851,6 +871,22 @@ class Config:
|
|
|
851
871
|
name = self.__class__.__name__
|
|
852
872
|
return f"{name}({self._cfg!r})"
|
|
853
873
|
|
|
874
|
+
def __iter__(self):
|
|
875
|
+
if isinstance(self._cfg, Mapping):
|
|
876
|
+
return self._cfg.__iter__()
|
|
877
|
+
|
|
878
|
+
def items(self):
|
|
879
|
+
if isinstance(self._cfg, Mapping):
|
|
880
|
+
return iter(self._cfg.items())
|
|
881
|
+
|
|
882
|
+
def keys(self):
|
|
883
|
+
if isinstance(self._cfg, Mapping):
|
|
884
|
+
return iter(self.data.keys())
|
|
885
|
+
|
|
886
|
+
def values(self):
|
|
887
|
+
if isinstance(self._cfg, Mapping):
|
|
888
|
+
return iter(self.data.values())
|
|
889
|
+
|
|
854
890
|
def update(self, cfg, skip_errors=False):
|
|
855
891
|
for key, value in cfg.items():
|
|
856
892
|
if hasattr(self, key):
|
|
@@ -1043,6 +1079,17 @@ class Config:
|
|
|
1043
1079
|
f"is not allowed for iguazio version: {igz_version} < 3.5.1"
|
|
1044
1080
|
)
|
|
1045
1081
|
|
|
1082
|
+
def validate_object_retentions(self):
|
|
1083
|
+
for table_name, retention_days in self.object_retentions.items():
|
|
1084
|
+
if retention_days < 7 and not os.getenv("PARTITION_INTERVAL"):
|
|
1085
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
1086
|
+
f"{table_name} partition interval must be greater than a week"
|
|
1087
|
+
)
|
|
1088
|
+
elif retention_days > 53 * 7:
|
|
1089
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
1090
|
+
f"{table_name} partition interval must be less than a year"
|
|
1091
|
+
)
|
|
1092
|
+
|
|
1046
1093
|
def resolve_chief_api_url(self) -> str:
|
|
1047
1094
|
if self.httpdb.clusterization.chief.url:
|
|
1048
1095
|
return self.httpdb.clusterization.chief.url
|
|
@@ -1201,9 +1248,9 @@ class Config:
|
|
|
1201
1248
|
|
|
1202
1249
|
def get_model_monitoring_file_target_path(
|
|
1203
1250
|
self,
|
|
1204
|
-
project: str
|
|
1205
|
-
kind: str
|
|
1206
|
-
target:
|
|
1251
|
+
project: str,
|
|
1252
|
+
kind: str,
|
|
1253
|
+
target: typing.Literal["online", "offline"] = "online",
|
|
1207
1254
|
artifact_path: typing.Optional[str] = None,
|
|
1208
1255
|
function_name: typing.Optional[str] = None,
|
|
1209
1256
|
**kwargs,
|
|
@@ -1381,9 +1428,12 @@ def _validate_config(config):
|
|
|
1381
1428
|
pass
|
|
1382
1429
|
|
|
1383
1430
|
config.verify_security_context_enrichment_mode_is_allowed()
|
|
1431
|
+
config.validate_object_retentions()
|
|
1384
1432
|
|
|
1385
1433
|
|
|
1386
|
-
def _verify_gpu_requests_and_limits(
|
|
1434
|
+
def _verify_gpu_requests_and_limits(
|
|
1435
|
+
requests_gpu: typing.Optional[str] = None, limits_gpu: typing.Optional[str] = None
|
|
1436
|
+
):
|
|
1387
1437
|
# https://kubernetes.io/docs/tasks/manage-gpus/scheduling-gpus/
|
|
1388
1438
|
if requests_gpu and not limits_gpu:
|
|
1389
1439
|
raise mlrun.errors.MLRunConflictError(
|
|
@@ -1396,7 +1446,7 @@ def _verify_gpu_requests_and_limits(requests_gpu: str = None, limits_gpu: str =
|
|
|
1396
1446
|
)
|
|
1397
1447
|
|
|
1398
1448
|
|
|
1399
|
-
def _convert_resources_to_str(config: dict = None):
|
|
1449
|
+
def _convert_resources_to_str(config: typing.Optional[dict] = None):
|
|
1400
1450
|
resources_types = ["cpu", "memory", "gpu"]
|
|
1401
1451
|
resource_requirements = ["requests", "limits"]
|
|
1402
1452
|
if not config.get("default_function_pod_resources"):
|
mlrun/data_types/__init__.py
CHANGED
|
@@ -11,8 +11,6 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
#
|
|
15
|
-
# flake8: noqa - this is until we take care of the F401 violations with respect to __all__ & sphinx
|
|
16
14
|
|
|
17
15
|
from .data_types import (
|
|
18
16
|
InferOptions,
|
mlrun/data_types/data_types.py
CHANGED
mlrun/data_types/infer.py
CHANGED
|
@@ -12,6 +12,8 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
#
|
|
15
|
+
from typing import Optional
|
|
16
|
+
|
|
15
17
|
import numpy as np
|
|
16
18
|
import packaging.version
|
|
17
19
|
import pandas as pd
|
|
@@ -29,7 +31,7 @@ def infer_schema_from_df(
|
|
|
29
31
|
df: pd.DataFrame,
|
|
30
32
|
features,
|
|
31
33
|
entities,
|
|
32
|
-
timestamp_key: str = None,
|
|
34
|
+
timestamp_key: Optional[str] = None,
|
|
33
35
|
entity_columns=None,
|
|
34
36
|
options: InferOptions = InferOptions.Null,
|
|
35
37
|
):
|
mlrun/data_types/spark.py
CHANGED
|
@@ -14,11 +14,12 @@
|
|
|
14
14
|
#
|
|
15
15
|
from datetime import datetime
|
|
16
16
|
from os import environ
|
|
17
|
+
from typing import Optional
|
|
17
18
|
|
|
18
19
|
import numpy as np
|
|
19
20
|
import pytz
|
|
20
21
|
from pyspark.sql.functions import to_utc_timestamp
|
|
21
|
-
from pyspark.sql.types import BooleanType, DoubleType
|
|
22
|
+
from pyspark.sql.types import BooleanType, DoubleType, TimestampType
|
|
22
23
|
|
|
23
24
|
from mlrun.feature_store.retrieval.spark_merger import spark_df_to_pandas
|
|
24
25
|
from mlrun.utils import logger
|
|
@@ -35,7 +36,7 @@ def infer_schema_from_df_spark(
|
|
|
35
36
|
df,
|
|
36
37
|
features,
|
|
37
38
|
entities,
|
|
38
|
-
timestamp_key: str = None,
|
|
39
|
+
timestamp_key: Optional[str] = None,
|
|
39
40
|
entity_columns=None,
|
|
40
41
|
options: InferOptions = InferOptions.Null,
|
|
41
42
|
):
|
|
@@ -143,8 +144,7 @@ def get_df_stats_spark(df, options, num_bins=20, sample_size=None):
|
|
|
143
144
|
timestamp_columns = set()
|
|
144
145
|
boolean_columns = set()
|
|
145
146
|
for field in df_after_type_casts.schema.fields:
|
|
146
|
-
|
|
147
|
-
is_timestamp = field.dataType.typeName().startswith("timestamp")
|
|
147
|
+
is_timestamp = isinstance(field.dataType, TimestampType)
|
|
148
148
|
is_boolean = isinstance(field.dataType, BooleanType)
|
|
149
149
|
if is_timestamp:
|
|
150
150
|
df_after_type_casts = df_after_type_casts.withColumn(
|
mlrun/data_types/to_pandas.py
CHANGED
|
@@ -244,15 +244,6 @@ def _to_corrected_pandas_type(dt):
|
|
|
244
244
|
|
|
245
245
|
|
|
246
246
|
def spark_df_to_pandas(spark_df):
|
|
247
|
-
import pyspark
|
|
248
|
-
|
|
249
|
-
if semver.parse(pyspark.__version__) >= semver.Version(3, 5, 0):
|
|
250
|
-
|
|
251
|
-
def to_pandas(spark_df_inner):
|
|
252
|
-
return spark_df_inner.toPandas()
|
|
253
|
-
else:
|
|
254
|
-
to_pandas = _to_pandas
|
|
255
|
-
|
|
256
247
|
# as of pyspark 3.2.3, toPandas fails to convert timestamps unless we work around the issue
|
|
257
248
|
# when we upgrade pyspark, we should check whether this workaround is still necessary
|
|
258
249
|
# see https://stackoverflow.com/questions/76389694/transforming-pyspark-to-pandas-dataframe
|
|
@@ -271,9 +262,9 @@ def spark_df_to_pandas(spark_df):
|
|
|
271
262
|
)
|
|
272
263
|
type_conversion_dict[field.name] = "datetime64[ns]"
|
|
273
264
|
|
|
274
|
-
df =
|
|
265
|
+
df = _to_pandas(spark_df)
|
|
275
266
|
if type_conversion_dict:
|
|
276
267
|
df = df.astype(type_conversion_dict)
|
|
277
268
|
return df
|
|
278
269
|
else:
|
|
279
|
-
return
|
|
270
|
+
return _to_pandas(spark_df)
|
mlrun/datastore/__init__.py
CHANGED
|
@@ -12,8 +12,6 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
# flake8: noqa - this is until we take care of the F401 violations with respect to __all__ & sphinx
|
|
16
|
-
|
|
17
15
|
__all__ = [
|
|
18
16
|
"DataItem",
|
|
19
17
|
"get_store_resource",
|