mlrun 1.7.0rc29__py3-none-any.whl → 1.7.0rc31__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (39) hide show
  1. mlrun/common/constants.py +1 -1
  2. mlrun/common/formatters/artifact.py +1 -0
  3. mlrun/common/schemas/model_monitoring/constants.py +5 -1
  4. mlrun/common/schemas/project.py +10 -9
  5. mlrun/config.py +21 -2
  6. mlrun/data_types/spark.py +2 -2
  7. mlrun/data_types/to_pandas.py +48 -16
  8. mlrun/datastore/__init__.py +1 -0
  9. mlrun/datastore/base.py +20 -8
  10. mlrun/datastore/datastore.py +4 -2
  11. mlrun/datastore/datastore_profile.py +1 -1
  12. mlrun/datastore/google_cloud_storage.py +1 -0
  13. mlrun/datastore/inmem.py +3 -0
  14. mlrun/datastore/s3.py +2 -0
  15. mlrun/datastore/sources.py +14 -0
  16. mlrun/datastore/targets.py +11 -1
  17. mlrun/db/base.py +1 -0
  18. mlrun/db/httpdb.py +10 -2
  19. mlrun/db/nopdb.py +1 -0
  20. mlrun/feature_store/retrieval/spark_merger.py +3 -32
  21. mlrun/model.py +1 -5
  22. mlrun/model_monitoring/api.py +3 -3
  23. mlrun/model_monitoring/controller.py +57 -73
  24. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +8 -2
  25. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +3 -0
  26. mlrun/model_monitoring/helpers.py +6 -12
  27. mlrun/model_monitoring/writer.py +1 -2
  28. mlrun/projects/project.py +16 -0
  29. mlrun/run.py +5 -5
  30. mlrun/runtimes/base.py +1 -1
  31. mlrun/utils/version/version.json +2 -2
  32. {mlrun-1.7.0rc29.dist-info → mlrun-1.7.0rc31.dist-info}/METADATA +6 -6
  33. {mlrun-1.7.0rc29.dist-info → mlrun-1.7.0rc31.dist-info}/RECORD +37 -39
  34. {mlrun-1.7.0rc29.dist-info → mlrun-1.7.0rc31.dist-info}/WHEEL +1 -1
  35. mlrun/feature_store/retrieval/conversion.py +0 -271
  36. mlrun/model_monitoring/controller_handler.py +0 -37
  37. {mlrun-1.7.0rc29.dist-info → mlrun-1.7.0rc31.dist-info}/LICENSE +0 -0
  38. {mlrun-1.7.0rc29.dist-info → mlrun-1.7.0rc31.dist-info}/entry_points.txt +0 -0
  39. {mlrun-1.7.0rc29.dist-info → mlrun-1.7.0rc31.dist-info}/top_level.txt +0 -0
mlrun/common/constants.py CHANGED
@@ -11,7 +11,6 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
- #
15
14
 
16
15
  IMAGE_NAME_ENRICH_REGISTRY_PREFIX = "." # prefix for image name to enrich with registry
17
16
  MLRUN_SERVING_CONF = "serving-conf"
@@ -70,6 +69,7 @@ class MLRunInternalLabels:
70
69
  job_type = "job-type"
71
70
  kind = "kind"
72
71
  component = "component"
72
+ mlrun_type = "mlrun__type"
73
73
 
74
74
  owner = "owner"
75
75
  v3io_user = "v3io_user"
@@ -37,6 +37,7 @@ class ArtifactFormat(ObjectFormat, mlrun.common.types.StrEnum):
37
37
  "spec.db_key",
38
38
  "spec.size",
39
39
  "spec.framework",
40
+ "spec.algorithm",
40
41
  "spec.metrics",
41
42
  "spec.target_path",
42
43
  ]
@@ -17,6 +17,7 @@ from dataclasses import dataclass
17
17
  from enum import Enum, IntEnum
18
18
  from typing import Optional
19
19
 
20
+ import mlrun.common.constants
20
21
  import mlrun.common.helpers
21
22
  from mlrun.common.types import StrEnum
22
23
 
@@ -354,7 +355,7 @@ class ResultStatusApp(IntEnum):
354
355
 
355
356
 
356
357
  class ModelMonitoringAppLabel:
357
- KEY = "mlrun__type"
358
+ KEY = mlrun.common.constants.MLRunInternalLabels.mlrun_type
358
359
  VAL = "mlrun__model-monitoring-application"
359
360
 
360
361
  def __str__(self) -> str:
@@ -377,3 +378,6 @@ class PredictionsQueryConstants:
377
378
 
378
379
  class SpecialApps:
379
380
  MLRUN_INFRA = "mlrun-infra"
381
+
382
+
383
+ _RESERVED_FUNCTION_NAMES = MonitoringFunctionNames.list() + [SpecialApps.MLRUN_INFRA]
@@ -114,18 +114,19 @@ class ProjectOwner(pydantic.BaseModel):
114
114
 
115
115
  class ProjectSummary(pydantic.BaseModel):
116
116
  name: str
117
- files_count: int
118
- feature_sets_count: int
119
- models_count: int
120
- runs_completed_recent_count: int
121
- runs_failed_recent_count: int
122
- runs_running_count: int
123
- distinct_schedules_count: int
124
- distinct_scheduled_jobs_pending_count: int
125
- distinct_scheduled_pipelines_pending_count: int
117
+ files_count: int = 0
118
+ feature_sets_count: int = 0
119
+ models_count: int = 0
120
+ runs_completed_recent_count: int = 0
121
+ runs_failed_recent_count: int = 0
122
+ runs_running_count: int = 0
123
+ distinct_schedules_count: int = 0
124
+ distinct_scheduled_jobs_pending_count: int = 0
125
+ distinct_scheduled_pipelines_pending_count: int = 0
126
126
  pipelines_completed_recent_count: typing.Optional[int] = None
127
127
  pipelines_failed_recent_count: typing.Optional[int] = None
128
128
  pipelines_running_count: typing.Optional[int] = None
129
+ updated: typing.Optional[datetime.datetime] = None
129
130
 
130
131
 
131
132
  class IguazioProject(pydantic.BaseModel):
mlrun/config.py CHANGED
@@ -52,6 +52,11 @@ default_config = {
52
52
  "kubernetes": {
53
53
  "kubeconfig_path": "", # local path to kubeconfig file (for development purposes),
54
54
  # empty by default as the API already running inside k8s cluster
55
+ "pagination": {
56
+ # pagination config for interacting with k8s API
57
+ "list_pods_limit": 200,
58
+ "list_crd_objects_limit": 200,
59
+ },
55
60
  },
56
61
  "dbpath": "", # db/api url
57
62
  # url to nuclio dashboard api (can be with user & token, e.g. https://username:password@dashboard-url.com)
@@ -108,7 +113,12 @@ default_config = {
108
113
  # max number of parallel abort run jobs in runs monitoring
109
114
  "concurrent_abort_stale_runs_workers": 10,
110
115
  "list_runs_time_period_in_days": 7, # days
111
- }
116
+ },
117
+ "projects": {
118
+ "summaries": {
119
+ "cache_interval": "30",
120
+ },
121
+ },
112
122
  },
113
123
  "crud": {
114
124
  "runs": {
@@ -269,6 +279,16 @@ default_config = {
269
279
  "url": "",
270
280
  "service": "mlrun-api-chief",
271
281
  "port": 8080,
282
+ "feature_gates": {
283
+ "scheduler": "enabled",
284
+ "project_sync": "enabled",
285
+ "cleanup": "enabled",
286
+ "runs_monitoring": "enabled",
287
+ "pagination_cache": "enabled",
288
+ "project_summaries": "enabled",
289
+ "start_logs": "enabled",
290
+ "stop_logs": "enabled",
291
+ },
272
292
  },
273
293
  "worker": {
274
294
  "sync_with_chief": {
@@ -437,7 +457,6 @@ default_config = {
437
457
  "followers": "",
438
458
  # This is used as the interval for the sync loop both when mlrun is leader and follower
439
459
  "periodic_sync_interval": "1 minute",
440
- "counters_cache_ttl": "2 minutes",
441
460
  "project_owners_cache_ttl": "30 seconds",
442
461
  # access key to be used when the leader is iguazio and polling is done from it
443
462
  "iguazio_access_key": "",
mlrun/data_types/spark.py CHANGED
@@ -20,10 +20,10 @@ import pytz
20
20
  from pyspark.sql.functions import to_utc_timestamp
21
21
  from pyspark.sql.types import BooleanType, DoubleType, TimestampType
22
22
 
23
+ from mlrun.feature_store.retrieval.spark_merger import spark_df_to_pandas
23
24
  from mlrun.utils import logger
24
25
 
25
26
  from .data_types import InferOptions, spark_to_value_type
26
- from .to_pandas import toPandas
27
27
 
28
28
  try:
29
29
  import pyspark.sql.functions as funcs
@@ -75,7 +75,7 @@ def get_df_preview_spark(df, preview_lines=20):
75
75
  """capture preview data from spark df"""
76
76
  df = df.limit(preview_lines)
77
77
 
78
- result_dict = toPandas(df).to_dict(orient="split")
78
+ result_dict = spark_df_to_pandas(df).to_dict(orient="split")
79
79
  return [result_dict["columns"], *result_dict["data"]]
80
80
 
81
81
 
@@ -15,21 +15,11 @@
15
15
  import warnings
16
16
  from collections import Counter
17
17
 
18
- from pyspark.sql.types import (
19
- BooleanType,
20
- ByteType,
21
- DoubleType,
22
- FloatType,
23
- IntegerType,
24
- IntegralType,
25
- LongType,
26
- MapType,
27
- ShortType,
28
- TimestampType,
29
- )
30
-
31
-
32
- def toPandas(spark_df):
18
+ import pandas as pd
19
+ import semver
20
+
21
+
22
+ def _toPandas(spark_df):
33
23
  """
34
24
  Modified version of spark DataFrame.toPandas() –
35
25
  https://github.com/apache/spark/blob/v3.2.3/python/pyspark/sql/pandas/conversion.py#L35
@@ -40,6 +30,12 @@ def toPandas(spark_df):
40
30
  This modification adds the missing unit to the dtype.
41
31
  """
42
32
  from pyspark.sql.dataframe import DataFrame
33
+ from pyspark.sql.types import (
34
+ BooleanType,
35
+ IntegralType,
36
+ MapType,
37
+ TimestampType,
38
+ )
43
39
 
44
40
  assert isinstance(spark_df, DataFrame)
45
41
 
@@ -48,7 +44,6 @@ def toPandas(spark_df):
48
44
  require_minimum_pandas_version()
49
45
 
50
46
  import numpy as np
51
- import pandas as pd
52
47
 
53
48
  timezone = spark_df.sql_ctx._conf.sessionLocalTimeZone()
54
49
 
@@ -217,6 +212,16 @@ def toPandas(spark_df):
217
212
 
218
213
  def _to_corrected_pandas_type(dt):
219
214
  import numpy as np
215
+ from pyspark.sql.types import (
216
+ BooleanType,
217
+ ByteType,
218
+ DoubleType,
219
+ FloatType,
220
+ IntegerType,
221
+ LongType,
222
+ ShortType,
223
+ TimestampType,
224
+ )
220
225
 
221
226
  if type(dt) == ByteType:
222
227
  return np.int8
@@ -236,3 +241,30 @@ def _to_corrected_pandas_type(dt):
236
241
  return "datetime64[ns]"
237
242
  else:
238
243
  return None
244
+
245
+
246
+ def spark_df_to_pandas(spark_df):
247
+ # as of pyspark 3.2.3, toPandas fails to convert timestamps unless we work around the issue
248
+ # when we upgrade pyspark, we should check whether this workaround is still necessary
249
+ # see https://stackoverflow.com/questions/76389694/transforming-pyspark-to-pandas-dataframe
250
+ if semver.parse(pd.__version__)["major"] >= 2:
251
+ import pyspark.sql.functions as pyspark_functions
252
+
253
+ type_conversion_dict = {}
254
+ for field in spark_df.schema.fields:
255
+ if str(field.dataType) == "TimestampType":
256
+ spark_df = spark_df.withColumn(
257
+ field.name,
258
+ pyspark_functions.date_format(
259
+ pyspark_functions.to_timestamp(field.name),
260
+ "yyyy-MM-dd'T'HH:mm:ss.SSSSSSSSS",
261
+ ),
262
+ )
263
+ type_conversion_dict[field.name] = "datetime64[ns]"
264
+
265
+ df = _toPandas(spark_df)
266
+ if type_conversion_dict:
267
+ df = df.astype(type_conversion_dict)
268
+ return df
269
+ else:
270
+ return _toPandas(spark_df)
@@ -117,6 +117,7 @@ def get_stream_pusher(stream_path: str, **kwargs):
117
117
  return OutputStream(stream_path, **kwargs)
118
118
  elif stream_path.startswith("v3io"):
119
119
  endpoint, stream_path = parse_path(stream_path)
120
+ endpoint = kwargs.pop("endpoint", None) or endpoint
120
121
  return OutputStream(stream_path, endpoint=endpoint, **kwargs)
121
122
  elif stream_path.startswith("dummy://"):
122
123
  return _DummyStream(**kwargs)
mlrun/datastore/base.py CHANGED
@@ -215,6 +215,11 @@ class DataStore:
215
215
  raise mlrun.errors.MLRunInvalidArgumentError(
216
216
  "When providing start_time or end_time, must provide time_column"
217
217
  )
218
+ if start_time and end_time and start_time.tzinfo != end_time.tzinfo:
219
+ raise mlrun.errors.MLRunInvalidArgumentError(
220
+ "start_time and end_time must have the same time zone"
221
+ )
222
+
218
223
  if start_time or end_time or additional_filters:
219
224
  partitions_time_attributes = find_partitions(url, file_system)
220
225
  set_filters(
@@ -232,13 +237,17 @@ class DataStore:
232
237
  ):
233
238
  raise ex
234
239
 
235
- # TODO: fix timezone issue (ML-6308)
236
- if start_time.tzinfo:
237
- start_time_inner = start_time.replace(tzinfo=None)
238
- end_time_inner = end_time.replace(tzinfo=None)
239
- else:
240
- start_time_inner = start_time.replace(tzinfo=pytz.utc)
241
- end_time_inner = end_time.replace(tzinfo=pytz.utc)
240
+ start_time_inner = None
241
+ if start_time:
242
+ start_time_inner = start_time.replace(
243
+ tzinfo=None if start_time.tzinfo else pytz.utc
244
+ )
245
+
246
+ end_time_inner = None
247
+ if end_time:
248
+ end_time_inner = end_time.replace(
249
+ tzinfo=None if end_time.tzinfo else pytz.utc
250
+ )
242
251
 
243
252
  set_filters(
244
253
  partitions_time_attributes,
@@ -382,7 +391,10 @@ class DataStore:
382
391
  }
383
392
 
384
393
  def rm(self, path, recursive=False, maxdepth=None):
385
- self.filesystem.rm(path=path, recursive=recursive, maxdepth=maxdepth)
394
+ try:
395
+ self.filesystem.rm(path=path, recursive=recursive, maxdepth=maxdepth)
396
+ except FileNotFoundError:
397
+ pass
386
398
 
387
399
  @staticmethod
388
400
  def _is_dd(df_module):
@@ -32,6 +32,8 @@ in_memory_store = InMemoryStore()
32
32
 
33
33
 
34
34
  def parse_url(url):
35
+ if url and url.startswith("v3io://") and not url.startswith("v3io:///"):
36
+ url = url.replace("v3io://", "v3io:///", 1)
35
37
  parsed_url = urlparse(url)
36
38
  schema = parsed_url.scheme.lower()
37
39
  endpoint = parsed_url.hostname
@@ -94,7 +96,7 @@ def schema_to_store(schema):
94
96
  from .dbfs_store import DBFSStore
95
97
 
96
98
  return DBFSStore
97
- elif schema == "hdfs":
99
+ elif schema in ["hdfs", "webhdfs"]:
98
100
  from .hdfs import HdfsStore
99
101
 
100
102
  return HdfsStore
@@ -207,7 +209,7 @@ class StoreManager:
207
209
  ) -> (DataStore, str, str):
208
210
  schema, endpoint, parsed_url = parse_url(url)
209
211
  subpath = parsed_url.path
210
- store_key = f"{schema}://{endpoint}"
212
+ store_key = f"{schema}://{endpoint}" if endpoint else f"{schema}://"
211
213
 
212
214
  if schema == "ds":
213
215
  datastore_profile = datastore_profile_read(url, project_name, secrets)
@@ -412,7 +412,7 @@ class DatastoreProfileHdfs(DatastoreProfile):
412
412
  return res or None
413
413
 
414
414
  def url(self, subpath):
415
- return f"hdfs://{self.host}:{self.http_port}{subpath}"
415
+ return f"webhdfs://{self.host}:{self.http_port}{subpath}"
416
416
 
417
417
 
418
418
  class DatastoreProfile2Json(pydantic.BaseModel):
@@ -133,6 +133,7 @@ class GoogleCloudStorageStore(DataStore):
133
133
 
134
134
  def rm(self, path, recursive=False, maxdepth=None):
135
135
  path = self._make_path(path)
136
+ self.filesystem.exists(path)
136
137
  self.filesystem.rm(path=path, recursive=recursive, maxdepth=maxdepth)
137
138
 
138
139
  def get_spark_options(self):
mlrun/datastore/inmem.py CHANGED
@@ -85,3 +85,6 @@ class InMemoryStore(DataStore):
85
85
  kwargs.pop(field, None)
86
86
 
87
87
  return reader(item, **kwargs)
88
+
89
+ def rm(self, path, recursive=False, maxdepth=None):
90
+ self._items.pop(path, None)
mlrun/datastore/s3.py CHANGED
@@ -201,6 +201,8 @@ class S3Store(DataStore):
201
201
  def rm(self, path, recursive=False, maxdepth=None):
202
202
  bucket, key = self.get_bucket_and_key(path)
203
203
  path = f"{bucket}/{key}"
204
+ # In order to raise an error if there is connection error, ML-7056.
205
+ self.filesystem.exists(path=path)
204
206
  self.filesystem.rm(path=path, recursive=recursive, maxdepth=maxdepth)
205
207
 
206
208
 
@@ -826,6 +826,20 @@ class SnowflakeSource(BaseSourceDriver):
826
826
  spark_options["query"] = self.attributes.get("query")
827
827
  return spark_options
828
828
 
829
+ def to_dataframe(
830
+ self,
831
+ columns=None,
832
+ df_module=None,
833
+ entities=None,
834
+ start_time=None,
835
+ end_time=None,
836
+ time_field=None,
837
+ additional_filters=None,
838
+ ):
839
+ raise mlrun.errors.MLRunRuntimeError(
840
+ f"{type(self).__name__} supports only spark engine"
841
+ )
842
+
829
843
 
830
844
  class CustomSource(BaseSourceDriver):
831
845
  kind = "custom"
@@ -726,6 +726,10 @@ class BaseStoreTarget(DataTargetBase):
726
726
  timestamp_key=None,
727
727
  featureset_status=None,
728
728
  ):
729
+ if not self.support_storey:
730
+ raise mlrun.errors.MLRunRuntimeError(
731
+ f"{type(self).__name__} does not support storey engine"
732
+ )
729
733
  raise NotImplementedError()
730
734
 
731
735
  def purge(self):
@@ -768,6 +772,10 @@ class BaseStoreTarget(DataTargetBase):
768
772
 
769
773
  def get_spark_options(self, key_column=None, timestamp_key=None, overwrite=True):
770
774
  # options used in spark.read.load(**options)
775
+ if not self.support_spark:
776
+ raise mlrun.errors.MLRunRuntimeError(
777
+ f"{type(self).__name__} does not support spark engine"
778
+ )
771
779
  raise NotImplementedError()
772
780
 
773
781
  def prepare_spark_df(self, df, key_columns, timestamp_key=None, spark_options=None):
@@ -1283,7 +1291,9 @@ class SnowflakeTarget(BaseStoreTarget):
1283
1291
  additional_filters=None,
1284
1292
  **kwargs,
1285
1293
  ):
1286
- raise NotImplementedError()
1294
+ raise mlrun.errors.MLRunRuntimeError(
1295
+ f"{type(self).__name__} does not support storey engine"
1296
+ )
1287
1297
 
1288
1298
  @property
1289
1299
  def source_spark_attributes(self) -> dict:
mlrun/db/base.py CHANGED
@@ -925,5 +925,6 @@ class RunDBInterface(ABC):
925
925
  self,
926
926
  project: str,
927
927
  credentials: dict[str, str],
928
+ replace_creds: bool,
928
929
  ) -> None:
929
930
  pass
mlrun/db/httpdb.py CHANGED
@@ -1253,13 +1253,17 @@ class HTTPRunDB(RunDBInterface):
1253
1253
  function_name=name,
1254
1254
  )
1255
1255
 
1256
- def list_functions(self, name=None, project=None, tag=None, labels=None):
1256
+ def list_functions(
1257
+ self, name=None, project=None, tag=None, labels=None, since=None, until=None
1258
+ ):
1257
1259
  """Retrieve a list of functions, filtered by specific criteria.
1258
1260
 
1259
1261
  :param name: Return only functions with a specific name.
1260
1262
  :param project: Return functions belonging to this project. If not specified, the default project is used.
1261
1263
  :param tag: Return function versions with specific tags.
1262
1264
  :param labels: Return functions that have specific labels assigned to them.
1265
+ :param since: Return functions updated after this date (as datetime object).
1266
+ :param until: Return functions updated before this date (as datetime object).
1263
1267
  :returns: List of function objects (as dictionary).
1264
1268
  """
1265
1269
  project = project or config.default_project
@@ -1267,6 +1271,8 @@ class HTTPRunDB(RunDBInterface):
1267
1271
  "name": name,
1268
1272
  "tag": tag,
1269
1273
  "label": labels or [],
1274
+ "since": datetime_to_iso(since),
1275
+ "until": datetime_to_iso(until),
1270
1276
  }
1271
1277
  error = "list functions"
1272
1278
  path = f"projects/{project}/functions"
@@ -3546,17 +3552,19 @@ class HTTPRunDB(RunDBInterface):
3546
3552
  self,
3547
3553
  project: str,
3548
3554
  credentials: dict[str, str],
3555
+ replace_creds: bool,
3549
3556
  ) -> None:
3550
3557
  """
3551
3558
  Set the credentials for the model monitoring application.
3552
3559
 
3553
3560
  :param project: Project name.
3554
3561
  :param credentials: Credentials to set.
3562
+ :param replace_creds: If True, will override the existing credentials.
3555
3563
  """
3556
3564
  self.api_call(
3557
3565
  method=mlrun.common.types.HTTPMethod.POST,
3558
3566
  path=f"projects/{project}/model-monitoring/set-model-monitoring-credentials",
3559
- params={**credentials},
3567
+ params={**credentials, "replace_creds": replace_creds},
3560
3568
  )
3561
3569
 
3562
3570
  def create_hub_source(
mlrun/db/nopdb.py CHANGED
@@ -738,6 +738,7 @@ class NopDB(RunDBInterface):
738
738
  self,
739
739
  project: str,
740
740
  credentials: dict[str, str],
741
+ replace_creds: bool,
741
742
  ) -> None:
742
743
  pass
743
744
 
@@ -13,45 +13,16 @@
13
13
  # limitations under the License.
14
14
  #
15
15
 
16
- import pandas as pd
17
- import semver
18
16
 
19
17
  import mlrun
18
+ from mlrun.data_types.to_pandas import spark_df_to_pandas
20
19
  from mlrun.datastore.sources import ParquetSource
21
20
  from mlrun.datastore.targets import get_offline_target
21
+ from mlrun.runtimes import RemoteSparkRuntime
22
+ from mlrun.runtimes.sparkjob import Spark3Runtime
22
23
  from mlrun.utils.helpers import additional_filters_warning
23
24
 
24
- from ...runtimes import RemoteSparkRuntime
25
- from ...runtimes.sparkjob import Spark3Runtime
26
25
  from .base import BaseMerger
27
- from .conversion import PandasConversionMixin
28
-
29
-
30
- def spark_df_to_pandas(spark_df):
31
- # as of pyspark 3.2.3, toPandas fails to convert timestamps unless we work around the issue
32
- # when we upgrade pyspark, we should check whether this workaround is still necessary
33
- # see https://stackoverflow.com/questions/76389694/transforming-pyspark-to-pandas-dataframe
34
- if semver.parse(pd.__version__)["major"] >= 2:
35
- import pyspark.sql.functions as pyspark_functions
36
-
37
- type_conversion_dict = {}
38
- for field in spark_df.schema.fields:
39
- if str(field.dataType) == "TimestampType":
40
- spark_df = spark_df.withColumn(
41
- field.name,
42
- pyspark_functions.date_format(
43
- pyspark_functions.to_timestamp(field.name),
44
- "yyyy-MM-dd'T'HH:mm:ss.SSSSSSSSS",
45
- ),
46
- )
47
- type_conversion_dict[field.name] = "datetime64[ns]"
48
-
49
- df = PandasConversionMixin.toPandas(spark_df)
50
- if type_conversion_dict:
51
- df = df.astype(type_conversion_dict)
52
- return df
53
- else:
54
- return PandasConversionMixin.toPandas(spark_df)
55
26
 
56
27
 
57
28
  class SparkFeatureMerger(BaseMerger):
mlrun/model.py CHANGED
@@ -753,10 +753,6 @@ class Notification(ModelObj):
753
753
  raise mlrun.errors.MLRunInvalidArgumentError(
754
754
  "Both 'secret_params' and 'params' are empty, at least one must be defined."
755
755
  )
756
- if secret_params and params and secret_params != params:
757
- raise mlrun.errors.MLRunInvalidArgumentError(
758
- "Both 'secret_params' and 'params' are defined but they contain different values"
759
- )
760
756
 
761
757
  notification_class.validate_params(secret_params or params)
762
758
 
@@ -1315,7 +1311,7 @@ class RunTemplate(ModelObj):
1315
1311
 
1316
1312
  task.with_input("data", "/file-dir/path/to/file")
1317
1313
  task.with_input("data", "s3://<bucket>/path/to/file")
1318
- task.with_input("data", "v3io://[<remote-host>]/<data-container>/path/to/file")
1314
+ task.with_input("data", "v3io://<data-container>/path/to/file")
1319
1315
  """
1320
1316
  if not self.spec.inputs:
1321
1317
  self.spec.inputs = {}
@@ -569,10 +569,10 @@ def _create_model_monitoring_function_base(
569
569
  "please use `ModelMonitoringApplicationBaseV2`. It will be removed in 1.9.0.",
570
570
  FutureWarning,
571
571
  )
572
- if name in mm_constants.MonitoringFunctionNames.list():
572
+ if name in mm_constants._RESERVED_FUNCTION_NAMES:
573
573
  raise mlrun.errors.MLRunInvalidArgumentError(
574
- f"An application cannot have the following names: "
575
- f"{mm_constants.MonitoringFunctionNames.list()}"
574
+ "An application cannot have the following names: "
575
+ f"{mm_constants._RESERVED_FUNCTION_NAMES}"
576
576
  )
577
577
  if func is None:
578
578
  func = ""