mlrun 1.7.0rc18__py3-none-any.whl → 1.7.0rc19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (63) hide show
  1. mlrun/__main__.py +5 -2
  2. mlrun/common/constants.py +64 -3
  3. mlrun/common/formatters/__init__.py +16 -0
  4. mlrun/common/formatters/base.py +59 -0
  5. mlrun/common/formatters/function.py +41 -0
  6. mlrun/common/runtimes/constants.py +29 -4
  7. mlrun/common/schemas/__init__.py +0 -1
  8. mlrun/common/schemas/api_gateway.py +52 -0
  9. mlrun/common/schemas/frontend_spec.py +1 -0
  10. mlrun/common/schemas/model_monitoring/__init__.py +6 -3
  11. mlrun/common/schemas/model_monitoring/constants.py +2 -7
  12. mlrun/config.py +7 -2
  13. mlrun/datastore/sources.py +16 -22
  14. mlrun/datastore/store_resources.py +5 -1
  15. mlrun/datastore/targets.py +3 -2
  16. mlrun/datastore/utils.py +42 -0
  17. mlrun/execution.py +16 -6
  18. mlrun/feature_store/ingestion.py +7 -6
  19. mlrun/feature_store/retrieval/job.py +4 -1
  20. mlrun/frameworks/parallel_coordinates.py +2 -1
  21. mlrun/frameworks/tf_keras/__init__.py +4 -1
  22. mlrun/launcher/client.py +4 -2
  23. mlrun/launcher/local.py +8 -2
  24. mlrun/launcher/remote.py +8 -2
  25. mlrun/model.py +5 -1
  26. mlrun/model_monitoring/db/stores/__init__.py +0 -2
  27. mlrun/model_monitoring/db/stores/base/store.py +1 -2
  28. mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +43 -21
  29. mlrun/model_monitoring/db/stores/sqldb/models/base.py +32 -2
  30. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +25 -5
  31. mlrun/model_monitoring/db/stores/sqldb/models/sqlite.py +5 -0
  32. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +207 -139
  33. mlrun/model_monitoring/db/tsdb/__init__.py +1 -1
  34. mlrun/model_monitoring/db/tsdb/base.py +225 -38
  35. mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
  36. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +48 -15
  37. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +182 -16
  38. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +229 -42
  39. mlrun/model_monitoring/helpers.py +13 -0
  40. mlrun/model_monitoring/writer.py +36 -11
  41. mlrun/projects/operations.py +8 -5
  42. mlrun/projects/pipelines.py +42 -15
  43. mlrun/projects/project.py +22 -6
  44. mlrun/runtimes/base.py +2 -1
  45. mlrun/runtimes/local.py +4 -1
  46. mlrun/runtimes/nuclio/api_gateway.py +32 -8
  47. mlrun/runtimes/nuclio/application/application.py +3 -3
  48. mlrun/runtimes/nuclio/function.py +1 -4
  49. mlrun/runtimes/utils.py +5 -6
  50. mlrun/serving/server.py +2 -1
  51. mlrun/utils/helpers.py +8 -6
  52. mlrun/utils/logger.py +28 -1
  53. mlrun/utils/notifications/notification/__init__.py +14 -9
  54. mlrun/utils/notifications/notification_pusher.py +10 -3
  55. mlrun/utils/v3io_clients.py +0 -1
  56. mlrun/utils/version/version.json +2 -2
  57. {mlrun-1.7.0rc18.dist-info → mlrun-1.7.0rc19.dist-info}/METADATA +3 -3
  58. {mlrun-1.7.0rc18.dist-info → mlrun-1.7.0rc19.dist-info}/RECORD +62 -59
  59. mlrun/model_monitoring/db/v3io_tsdb_reader.py +0 -335
  60. {mlrun-1.7.0rc18.dist-info → mlrun-1.7.0rc19.dist-info}/LICENSE +0 -0
  61. {mlrun-1.7.0rc18.dist-info → mlrun-1.7.0rc19.dist-info}/WHEEL +0 -0
  62. {mlrun-1.7.0rc18.dist-info → mlrun-1.7.0rc19.dist-info}/entry_points.txt +0 -0
  63. {mlrun-1.7.0rc18.dist-info → mlrun-1.7.0rc19.dist-info}/top_level.txt +0 -0
mlrun/__main__.py CHANGED
@@ -31,6 +31,7 @@ from mlrun_pipelines.mounts import auto_mount as auto_mount_modifier
31
31
  from tabulate import tabulate
32
32
 
33
33
  import mlrun
34
+ import mlrun.common.constants as mlrun_constants
34
35
  import mlrun.common.schemas
35
36
  from mlrun.common.helpers import parse_versioned_object_uri
36
37
 
@@ -256,8 +257,10 @@ def run(
256
257
  runobj.metadata.labels[k] = v
257
258
 
258
259
  if workflow:
259
- runobj.metadata.labels["workflow"] = workflow
260
- runobj.metadata.labels["mlrun/runner-pod"] = socket.gethostname()
260
+ runobj.metadata.labels[mlrun_constants.MLRunInternalLabels.workflow] = workflow
261
+ runobj.metadata.labels[mlrun_constants.MLRunInternalLabels.runner_pod] = (
262
+ socket.gethostname()
263
+ )
261
264
 
262
265
  if db:
263
266
  mlconf.dbpath = db
mlrun/common/constants.py CHANGED
@@ -12,12 +12,73 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
  #
15
+
15
16
  IMAGE_NAME_ENRICH_REGISTRY_PREFIX = "." # prefix for image name to enrich with registry
16
- MLRUN_CREATED_LABEL = "mlrun-created"
17
- MLRUN_MODEL_CONF = "model-conf"
18
- MLRUN_SERVING_SPEC_MOUNT_PATH = f"/tmp/mlrun/{MLRUN_MODEL_CONF}"
17
+ MLRUN_SERVING_CONF = "serving-conf"
18
+ MLRUN_SERVING_SPEC_MOUNT_PATH = f"/tmp/mlrun/{MLRUN_SERVING_CONF}"
19
19
  MLRUN_SERVING_SPEC_FILENAME = "serving_spec.json"
20
20
  MLRUN_SERVING_SPEC_PATH = (
21
21
  f"{MLRUN_SERVING_SPEC_MOUNT_PATH}/{MLRUN_SERVING_SPEC_FILENAME}"
22
22
  )
23
+ MLRUN_FUNCTIONS_ANNOTATION = "mlrun/mlrun-functions"
23
24
  MYSQL_MEDIUMBLOB_SIZE_BYTES = 16 * 1024 * 1024
25
+ MLRUN_LABEL_PREFIX = "mlrun/"
26
+ DASK_LABEL_PREFIX = "dask.org/"
27
+ NUCLIO_LABEL_PREFIX = "nuclio.io/"
28
+
29
+
30
+ class MLRunInternalLabels:
31
+ ### dask
32
+ dask_cluster_name = f"{DASK_LABEL_PREFIX}cluster-name"
33
+ dask_component = f"{DASK_LABEL_PREFIX}component"
34
+
35
+ ### spark
36
+ spark_role = "spark-role"
37
+
38
+ ### mpi
39
+ mpi_job_name = "mpi-job-name"
40
+ mpi_job_role = "mpi-job-role"
41
+ mpi_role_type = "mpi_role_type"
42
+
43
+ ### nuclio
44
+ nuclio_project_name = f"{NUCLIO_LABEL_PREFIX}project-name"
45
+ nuclio_class = f"{NUCLIO_LABEL_PREFIX}class"
46
+
47
+ ### mlrun
48
+ mlrun_auth_key = "mlrun-auth-key"
49
+ mlrun_class = f"{MLRUN_LABEL_PREFIX}class"
50
+ client_python_version = f"{MLRUN_LABEL_PREFIX}client_python_version"
51
+ client_version = f"{MLRUN_LABEL_PREFIX}client_version"
52
+ function = f"{MLRUN_LABEL_PREFIX}function"
53
+ job = f"{MLRUN_LABEL_PREFIX}job"
54
+ name = f"{MLRUN_LABEL_PREFIX}name"
55
+ mlrun_owner = f"{MLRUN_LABEL_PREFIX}owner"
56
+ owner_domain = f"{MLRUN_LABEL_PREFIX}owner_domain"
57
+ project = f"{MLRUN_LABEL_PREFIX}project"
58
+ runner_pod = f"{MLRUN_LABEL_PREFIX}runner-pod"
59
+ schedule_name = f"{MLRUN_LABEL_PREFIX}schedule-name"
60
+ scrape_metrics = f"{MLRUN_LABEL_PREFIX}scrape-metrics"
61
+ tag = f"{MLRUN_LABEL_PREFIX}tag"
62
+ uid = f"{MLRUN_LABEL_PREFIX}uid"
63
+ username = f"{MLRUN_LABEL_PREFIX}username"
64
+ username_domain = f"{MLRUN_LABEL_PREFIX}username_domain"
65
+ task_name = f"{MLRUN_LABEL_PREFIX}task-name"
66
+ host = "host"
67
+ job_type = "job-type"
68
+ kind = "kind"
69
+ component = "component"
70
+ resource_name = "resource_name"
71
+ created = "mlrun-created"
72
+
73
+ owner = "owner"
74
+ v3io_user = "v3io_user"
75
+ workflow = "workflow"
76
+ feature_vector = "feature-vector"
77
+
78
+ @classmethod
79
+ def all(cls):
80
+ return [
81
+ value
82
+ for key, value in cls.__dict__.items()
83
+ if not key.startswith("__") and isinstance(value, str)
84
+ ]
@@ -0,0 +1,16 @@
1
+ # Copyright 2024 Iguazio
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ #
15
+
16
+ from .function import FunctionFormat # noqa
@@ -0,0 +1,59 @@
1
+ # Copyright 2024 Iguazio
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ #
15
+
16
+ import typing
17
+
18
+
19
+ class ObjectFormat:
20
+ full = "full"
21
+
22
+ @staticmethod
23
+ def format_method(_format: str) -> typing.Optional[typing.Callable]:
24
+ return {
25
+ ObjectFormat.full: None,
26
+ }[_format]
27
+
28
+ @classmethod
29
+ def format_obj(cls, obj: typing.Any, _format: str) -> typing.Any:
30
+ _format = _format or cls.full
31
+ format_method = cls.format_method(_format)
32
+ if not format_method:
33
+ return obj
34
+
35
+ return format_method(obj)
36
+
37
+ @staticmethod
38
+ def filter_obj_method(_filter: list[list[str]]) -> typing.Callable:
39
+ def _filter_method(obj: dict) -> dict:
40
+ formatted_obj = {}
41
+ for key_list in _filter:
42
+ obj_recursive_iterator = obj
43
+ formatted_obj_recursive_iterator = formatted_obj
44
+ for idx, key in enumerate(key_list):
45
+ if key not in obj_recursive_iterator:
46
+ break
47
+ value = (
48
+ {} if idx < len(key_list) - 1 else obj_recursive_iterator[key]
49
+ )
50
+ formatted_obj_recursive_iterator.setdefault(key, value)
51
+
52
+ obj_recursive_iterator = obj_recursive_iterator[key]
53
+ formatted_obj_recursive_iterator = formatted_obj_recursive_iterator[
54
+ key
55
+ ]
56
+
57
+ return formatted_obj
58
+
59
+ return _filter_method
@@ -0,0 +1,41 @@
1
+ # Copyright 2024 Iguazio
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ #
15
+
16
+ import typing
17
+
18
+ import mlrun.common.types
19
+
20
+ from .base import ObjectFormat
21
+
22
+
23
+ class FunctionFormat(ObjectFormat, mlrun.common.types.StrEnum):
24
+ minimal = "minimal"
25
+
26
+ @staticmethod
27
+ def format_method(_format: str) -> typing.Optional[typing.Callable]:
28
+ return {
29
+ FunctionFormat.full: None,
30
+ FunctionFormat.minimal: FunctionFormat.filter_obj_method(
31
+ [
32
+ ["kind"],
33
+ ["metadata"],
34
+ ["status"],
35
+ ["spec", "description"],
36
+ ["spec", "image"],
37
+ ["spec", "default_handler"],
38
+ ["spec", "entry_points"],
39
+ ]
40
+ ),
41
+ }[_format]
@@ -15,6 +15,10 @@
15
15
  import enum
16
16
  import typing
17
17
 
18
+ import mlrun_pipelines.common.models
19
+
20
+ import mlrun.common.constants as mlrun_constants
21
+
18
22
 
19
23
  class PodPhases:
20
24
  """
@@ -122,8 +126,8 @@ class MPIJobCRDVersions:
122
126
  @staticmethod
123
127
  def role_label_by_version(version):
124
128
  return {
125
- MPIJobCRDVersions.v1alpha1: "mpi_role_type",
126
- MPIJobCRDVersions.v1: "mpi-job-role",
129
+ MPIJobCRDVersions.v1alpha1: mlrun_constants.MLRunInternalLabels.mpi_role_type,
130
+ MPIJobCRDVersions.v1: mlrun_constants.MLRunInternalLabels.mpi_job_role,
127
131
  }[version]
128
132
 
129
133
 
@@ -191,10 +195,31 @@ class RunStates:
191
195
  # TODO: add aborting state once we have it
192
196
  ]
193
197
 
198
+ @staticmethod
199
+ def run_state_to_pipeline_run_status(run_state: str):
200
+ if not run_state:
201
+ return mlrun_pipelines.common.models.RunStatuses.runtime_state_unspecified
194
202
 
203
+ if run_state not in RunStates.all():
204
+ raise ValueError(f"Invalid run state: {run_state}")
205
+
206
+ return {
207
+ RunStates.completed: mlrun_pipelines.common.models.RunStatuses.succeeded,
208
+ RunStates.error: mlrun_pipelines.common.models.RunStatuses.failed,
209
+ RunStates.running: mlrun_pipelines.common.models.RunStatuses.running,
210
+ RunStates.created: mlrun_pipelines.common.models.RunStatuses.pending,
211
+ RunStates.pending: mlrun_pipelines.common.models.RunStatuses.pending,
212
+ RunStates.unknown: mlrun_pipelines.common.models.RunStatuses.runtime_state_unspecified,
213
+ RunStates.aborted: mlrun_pipelines.common.models.RunStatuses.canceled,
214
+ RunStates.aborting: mlrun_pipelines.common.models.RunStatuses.canceling,
215
+ RunStates.skipped: mlrun_pipelines.common.models.RunStatuses.skipped,
216
+ }[run_state]
217
+
218
+
219
+ # TODO: remove this class in 1.9.0 - use only MlrunInternalLabels
195
220
  class RunLabels(enum.Enum):
196
- owner = "owner"
197
- v3io_user = "v3io_user"
221
+ owner = mlrun_constants.MLRunInternalLabels.owner
222
+ v3io_user = mlrun_constants.MLRunInternalLabels.v3io_user
198
223
 
199
224
  @staticmethod
200
225
  def all():
@@ -149,7 +149,6 @@ from .model_monitoring import (
149
149
  ModelMonitoringStoreKinds,
150
150
  MonitoringFunctionNames,
151
151
  PrometheusEndpoints,
152
- TimeSeriesConnector,
153
152
  TSDBTarget,
154
153
  V3IOTSDBTables,
155
154
  )
@@ -18,6 +18,7 @@ from typing import Optional
18
18
  import pydantic
19
19
 
20
20
  import mlrun.common.types
21
+ from mlrun.common.constants import MLRUN_FUNCTIONS_ANNOTATION
21
22
 
22
23
 
23
24
  class APIGatewayAuthenticationMode(mlrun.common.types.StrEnum):
@@ -55,6 +56,7 @@ class APIGatewayMetadata(_APIGatewayBaseModel):
55
56
  name: str
56
57
  namespace: Optional[str]
57
58
  labels: Optional[dict] = {}
59
+ annotations: Optional[dict] = {}
58
60
 
59
61
 
60
62
  class APIGatewayBasicAuth(_APIGatewayBaseModel):
@@ -91,6 +93,56 @@ class APIGateway(_APIGatewayBaseModel):
91
93
  spec: APIGatewaySpec
92
94
  status: Optional[APIGatewayStatus]
93
95
 
96
+ def get_function_names(self):
97
+ return [
98
+ upstream.nucliofunction.get("name")
99
+ for upstream in self.spec.upstreams
100
+ if upstream.nucliofunction.get("name")
101
+ ]
102
+
103
+ def enrich_mlrun_function_names(self):
104
+ upstream_with_nuclio_names = []
105
+ mlrun_function_uris = []
106
+ for upstream in self.spec.upstreams:
107
+ uri = upstream.nucliofunction.get("name")
108
+ project, function_name, tag, _ = (
109
+ mlrun.common.helpers.parse_versioned_object_uri(uri)
110
+ )
111
+ upstream.nucliofunction["name"] = (
112
+ mlrun.runtimes.nuclio.function.get_fullname(function_name, project, tag)
113
+ )
114
+
115
+ upstream_with_nuclio_names.append(upstream)
116
+ mlrun_function_uris.append(uri)
117
+
118
+ self.spec.upstreams = upstream_with_nuclio_names
119
+ if len(mlrun_function_uris) == 1:
120
+ self.metadata.annotations[MLRUN_FUNCTIONS_ANNOTATION] = mlrun_function_uris[
121
+ 0
122
+ ]
123
+ elif len(mlrun_function_uris) == 2:
124
+ self.metadata.annotations[MLRUN_FUNCTIONS_ANNOTATION] = "&".join(
125
+ mlrun_function_uris
126
+ )
127
+ return self
128
+
129
+ def replace_nuclio_names_with_mlrun_uri(self):
130
+ mlrun_functions = self.metadata.annotations.get(MLRUN_FUNCTIONS_ANNOTATION)
131
+ if mlrun_functions:
132
+ mlrun_function_uris = (
133
+ mlrun_functions.split("&")
134
+ if "&" in mlrun_functions
135
+ else [mlrun_functions]
136
+ )
137
+ if len(mlrun_function_uris) != len(self.spec.upstreams):
138
+ raise mlrun.errors.MLRunValueError(
139
+ "Error when translating nuclio names to mlrun names in api gateway:"
140
+ " number of functions doesn't match the mlrun functions in annotation"
141
+ )
142
+ for i in range(len(mlrun_function_uris)):
143
+ self.spec.upstreams[i].nucliofunction["name"] = mlrun_function_uris[i]
144
+ return self
145
+
94
146
 
95
147
  class APIGatewaysOutput(_APIGatewayBaseModel):
96
148
  api_gateways: typing.Optional[dict[str, APIGateway]] = {}
@@ -70,3 +70,4 @@ class FrontendSpec(pydantic.BaseModel):
70
70
  feature_store_data_prefixes: typing.Optional[dict[str, str]]
71
71
  allowed_artifact_path_prefixes_list: list[str]
72
72
  ce: typing.Optional[dict]
73
+ internal_labels: list[str] = []
@@ -11,8 +11,6 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
- #
15
- # flake8: noqa - this is until we take care of the F401 violations with respect to __all__ & sphinx
16
14
 
17
15
  from .constants import (
18
16
  ControllerPolicy,
@@ -30,13 +28,15 @@ from .constants import (
30
28
  ModelMonitoringMode,
31
29
  ModelMonitoringStoreKinds,
32
30
  MonitoringFunctionNames,
31
+ PredictionsQueryConstants,
33
32
  ProjectSecretKeys,
34
33
  PrometheusEndpoints,
35
34
  PrometheusMetric,
36
35
  ResultData,
36
+ ResultKindApp,
37
37
  SchedulingKeys,
38
+ SpecialApps,
38
39
  TDEngineSuperTables,
39
- TimeSeriesConnector,
40
40
  TSDBTarget,
41
41
  V3IOTSDBTables,
42
42
  VersionedModel,
@@ -59,7 +59,10 @@ from .model_endpoints import (
59
59
  ModelEndpointList,
60
60
  ModelEndpointMetadata,
61
61
  ModelEndpointMonitoringMetric,
62
+ ModelEndpointMonitoringMetricNoData,
62
63
  ModelEndpointMonitoringMetricType,
64
+ ModelEndpointMonitoringMetricValues,
65
+ ModelEndpointMonitoringResultValues,
63
66
  ModelEndpointSpec,
64
67
  ModelEndpointStatus,
65
68
  )
@@ -158,10 +158,6 @@ class EventKeyMetrics:
158
158
  REAL_TIME = "real_time"
159
159
 
160
160
 
161
- class TimeSeriesConnector:
162
- TSDB = "tsdb"
163
-
164
-
165
161
  class ModelEndpointTarget:
166
162
  V3IO_NOSQL = "v3io-nosql"
167
163
  SQL = "sql"
@@ -197,6 +193,7 @@ class FileTargetKind:
197
193
  APPS_PARQUET = "apps_parquet"
198
194
  LOG_STREAM = "log_stream"
199
195
  APP_RESULTS = "app_results"
196
+ APP_METRICS = "app_metrics"
200
197
  MONITORING_SCHEDULES = "monitoring_schedules"
201
198
  MONITORING_APPLICATION = "monitoring_application"
202
199
 
@@ -321,6 +318,7 @@ class ResultKindApp(Enum):
321
318
  concept_drift = 1
322
319
  model_performance = 2
323
320
  system_performance = 3
321
+ custom = 4
324
322
 
325
323
 
326
324
  class ResultStatusApp(IntEnum):
@@ -350,9 +348,6 @@ class TSDBTarget:
350
348
  V3IO_TSDB = "v3io-tsdb"
351
349
  TDEngine = "tdengine"
352
350
  PROMETHEUS = "prometheus"
353
- APP_RESULTS_TABLE = "app-results"
354
- V3IO_BE = "tsdb"
355
- V3IO_RATE = "1/s"
356
351
 
357
352
 
358
353
  class HistogramDataDriftApplicationConstants:
mlrun/config.py CHANGED
@@ -37,6 +37,7 @@ import dotenv
37
37
  import semver
38
38
  import yaml
39
39
 
40
+ import mlrun.common.constants
40
41
  import mlrun.common.schemas
41
42
  import mlrun.errors
42
43
 
@@ -87,7 +88,7 @@ default_config = {
87
88
  "mpijob_crd_version": "", # mpijob crd version (e.g: "v1alpha1". must be in: mlrun.runtime.MPIJobCRDVersions)
88
89
  "ipython_widget": True,
89
90
  "log_level": "INFO",
90
- # log formatter (options: human | json)
91
+ # log formatter (options: human | human_extended | json)
91
92
  "log_formatter": "human",
92
93
  "submit_timeout": "180", # timeout when submitting a new k8s resource
93
94
  # runtimes cleanup interval in seconds
@@ -370,7 +371,7 @@ default_config = {
370
371
  "add_templated_ingress_host_mode": "never",
371
372
  "explicit_ack": "enabled",
372
373
  # size of serving spec to move to config maps
373
- "serving_spec_env_cutoff": 4096,
374
+ "serving_spec_env_cutoff": 0,
374
375
  },
375
376
  "logs": {
376
377
  "decode": {
@@ -967,6 +968,10 @@ class Config:
967
968
  self.httpdb.clusterization.chief.url = chief_api_url
968
969
  return self.httpdb.clusterization.chief.url
969
970
 
971
+ @staticmethod
972
+ def internal_labels():
973
+ return mlrun.common.constants.MLRunInternalLabels.all()
974
+
970
975
  @staticmethod
971
976
  def get_storage_auto_mount_params():
972
977
  auto_mount_params = {}
@@ -12,7 +12,6 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
  import json
15
- import math
16
15
  import operator
17
16
  import os
18
17
  import warnings
@@ -31,6 +30,7 @@ from nuclio.config import split_path
31
30
  import mlrun
32
31
  from mlrun.config import config
33
32
  from mlrun.datastore.snowflake_utils import get_snowflake_spark_options
33
+ from mlrun.datastore.utils import transform_list_filters_to_tuple
34
34
  from mlrun.secrets import SecretsStore
35
35
 
36
36
  from ..model import DataSource
@@ -313,12 +313,13 @@ class ParquetSource(BaseSourceDriver):
313
313
  schedule: str = None,
314
314
  start_time: Optional[Union[datetime, str]] = None,
315
315
  end_time: Optional[Union[datetime, str]] = None,
316
- additional_filters: Optional[list[tuple]] = None,
316
+ additional_filters: Optional[list[Union[tuple, list]]] = None,
317
317
  ):
318
318
  if additional_filters:
319
319
  attributes = copy(attributes) or {}
320
+ additional_filters = transform_list_filters_to_tuple(additional_filters)
320
321
  attributes["additional_filters"] = additional_filters
321
- self.validate_additional_filters(additional_filters)
322
+
322
323
  super().__init__(
323
324
  name,
324
325
  path,
@@ -359,25 +360,6 @@ class ParquetSource(BaseSourceDriver):
359
360
  else:
360
361
  return time
361
362
 
362
- @staticmethod
363
- def validate_additional_filters(additional_filters):
364
- if not additional_filters:
365
- return
366
- for filter_tuple in additional_filters:
367
- if not filter_tuple:
368
- continue
369
- col_name, op, value = filter_tuple
370
- if isinstance(value, float) and math.isnan(value):
371
- raise mlrun.errors.MLRunInvalidArgumentError(
372
- "using NaN in additional_filters is not supported"
373
- )
374
- elif isinstance(value, (list, tuple, set)):
375
- for sub_value in value:
376
- if isinstance(sub_value, float) and math.isnan(sub_value):
377
- raise mlrun.errors.MLRunInvalidArgumentError(
378
- "using NaN in additional_filters is not supported"
379
- )
380
-
381
363
  def to_step(
382
364
  self,
383
365
  key_field=None,
@@ -393,6 +375,7 @@ class ParquetSource(BaseSourceDriver):
393
375
  attributes.pop("additional_filters", None)
394
376
  if context:
395
377
  attributes["context"] = context
378
+ additional_filters = transform_list_filters_to_tuple(additional_filters)
396
379
  data_item = mlrun.store_manager.object(self.path)
397
380
  store, path, url = mlrun.store_manager.get_or_create_store(self.path)
398
381
  return storey.ParquetSource(
@@ -406,6 +389,16 @@ class ParquetSource(BaseSourceDriver):
406
389
  **attributes,
407
390
  )
408
391
 
392
+ @classmethod
393
+ def from_dict(cls, struct=None, fields=None, deprecated_fields: dict = None):
394
+ new_obj = super().from_dict(
395
+ struct=struct, fields=fields, deprecated_fields=deprecated_fields
396
+ )
397
+ new_obj.attributes["additional_filters"] = transform_list_filters_to_tuple(
398
+ new_obj.additional_filters
399
+ )
400
+ return new_obj
401
+
409
402
  def get_spark_options(self):
410
403
  store, path, _ = mlrun.store_manager.get_or_create_store(self.path)
411
404
  spark_options = store.get_spark_options()
@@ -428,6 +421,7 @@ class ParquetSource(BaseSourceDriver):
428
421
  additional_filters=None,
429
422
  ):
430
423
  reader_args = self.attributes.get("reader_args", {})
424
+ additional_filters = transform_list_filters_to_tuple(additional_filters)
431
425
  return mlrun.store_manager.object(url=self.path).as_df(
432
426
  columns=columns,
433
427
  df_module=df_module,
@@ -146,7 +146,11 @@ def get_store_resource(
146
146
 
147
147
  db = db or mlrun.get_run_db(secrets=secrets)
148
148
  kind, uri = parse_store_uri(uri)
149
- if kind == StorePrefix.FeatureSet:
149
+ if not kind:
150
+ raise mlrun.errors.MLRunInvalidArgumentError(
151
+ f"Cannot get store resource from invalid URI: {uri}"
152
+ )
153
+ elif kind == StorePrefix.FeatureSet:
150
154
  project, name, tag, uid = parse_versioned_object_uri(
151
155
  uri, project or config.default_project
152
156
  )
@@ -30,6 +30,7 @@ import mlrun
30
30
  import mlrun.utils.helpers
31
31
  from mlrun.config import config
32
32
  from mlrun.datastore.snowflake_utils import get_snowflake_spark_options
33
+ from mlrun.datastore.utils import transform_list_filters_to_tuple
33
34
  from mlrun.model import DataSource, DataTarget, DataTargetBase, TargetPathObject
34
35
  from mlrun.utils import logger, now_date
35
36
  from mlrun.utils.helpers import to_parquet
@@ -757,7 +758,7 @@ class BaseStoreTarget(DataTargetBase):
757
758
  # options used in spark.read.load(**options)
758
759
  raise NotImplementedError()
759
760
 
760
- def prepare_spark_df(self, df, key_columns, timestamp_key=None, spark_options={}):
761
+ def prepare_spark_df(self, df, key_columns, timestamp_key=None, spark_options=None):
761
762
  return df
762
763
 
763
764
  def get_dask_options(self):
@@ -999,7 +1000,7 @@ class ParquetTarget(BaseStoreTarget):
999
1000
  start_time=start_time,
1000
1001
  end_time=end_time,
1001
1002
  time_column=time_column,
1002
- additional_filters=additional_filters,
1003
+ additional_filters=transform_list_filters_to_tuple(additional_filters),
1003
1004
  **kwargs,
1004
1005
  )
1005
1006
  if not columns:
mlrun/datastore/utils.py CHANGED
@@ -12,6 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
  #
15
+ import math
15
16
  import tarfile
16
17
  import tempfile
17
18
  import typing
@@ -180,3 +181,44 @@ def get_kafka_brokers_from_dict(options: dict, pop=False) -> typing.Optional[str
180
181
  FutureWarning,
181
182
  )
182
183
  return kafka_bootstrap_servers
184
+
185
+
186
+ def transform_list_filters_to_tuple(additional_filters):
187
+ tuple_filters = []
188
+ if not additional_filters:
189
+ return tuple_filters
190
+ validate_additional_filters(additional_filters)
191
+ for additional_filter in additional_filters:
192
+ tuple_filters.append(tuple(additional_filter))
193
+ return tuple_filters
194
+
195
+
196
+ def validate_additional_filters(additional_filters):
197
+ nan_error_message = "using NaN in additional_filters is not supported"
198
+ if additional_filters in [None, [], ()]:
199
+ return
200
+ for filter_tuple in additional_filters:
201
+ if filter_tuple == () or filter_tuple == []:
202
+ continue
203
+ if not isinstance(filter_tuple, (list, tuple)):
204
+ raise mlrun.errors.MLRunInvalidArgumentError(
205
+ f"mlrun supports additional_filters only as a list of tuples."
206
+ f" Current additional_filters: {additional_filters}"
207
+ )
208
+ if isinstance(filter_tuple[0], (list, tuple)):
209
+ raise mlrun.errors.MLRunInvalidArgumentError(
210
+ f"additional_filters does not support nested list inside filter tuples except in -in- logic."
211
+ f" Current filter_tuple: {filter_tuple}."
212
+ )
213
+ if len(filter_tuple) != 3:
214
+ raise mlrun.errors.MLRunInvalidArgumentError(
215
+ f"illegal filter tuple length, {filter_tuple} in additional filters:"
216
+ f" {additional_filters}"
217
+ )
218
+ col_name, op, value = filter_tuple
219
+ if isinstance(value, float) and math.isnan(value):
220
+ raise mlrun.errors.MLRunInvalidArgumentError(nan_error_message)
221
+ elif isinstance(value, (list, tuple)):
222
+ for sub_value in value:
223
+ if isinstance(sub_value, float) and math.isnan(sub_value):
224
+ raise mlrun.errors.MLRunInvalidArgumentError(nan_error_message)