mlrun 1.7.0rc29__py3-none-any.whl → 1.7.0rc31__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/common/constants.py +1 -1
- mlrun/common/formatters/artifact.py +1 -0
- mlrun/common/schemas/model_monitoring/constants.py +5 -1
- mlrun/common/schemas/project.py +10 -9
- mlrun/config.py +21 -2
- mlrun/data_types/spark.py +2 -2
- mlrun/data_types/to_pandas.py +48 -16
- mlrun/datastore/__init__.py +1 -0
- mlrun/datastore/base.py +20 -8
- mlrun/datastore/datastore.py +4 -2
- mlrun/datastore/datastore_profile.py +1 -1
- mlrun/datastore/google_cloud_storage.py +1 -0
- mlrun/datastore/inmem.py +3 -0
- mlrun/datastore/s3.py +2 -0
- mlrun/datastore/sources.py +14 -0
- mlrun/datastore/targets.py +11 -1
- mlrun/db/base.py +1 -0
- mlrun/db/httpdb.py +10 -2
- mlrun/db/nopdb.py +1 -0
- mlrun/feature_store/retrieval/spark_merger.py +3 -32
- mlrun/model.py +1 -5
- mlrun/model_monitoring/api.py +3 -3
- mlrun/model_monitoring/controller.py +57 -73
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +8 -2
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +3 -0
- mlrun/model_monitoring/helpers.py +6 -12
- mlrun/model_monitoring/writer.py +1 -2
- mlrun/projects/project.py +16 -0
- mlrun/run.py +5 -5
- mlrun/runtimes/base.py +1 -1
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.0rc29.dist-info → mlrun-1.7.0rc31.dist-info}/METADATA +6 -6
- {mlrun-1.7.0rc29.dist-info → mlrun-1.7.0rc31.dist-info}/RECORD +37 -39
- {mlrun-1.7.0rc29.dist-info → mlrun-1.7.0rc31.dist-info}/WHEEL +1 -1
- mlrun/feature_store/retrieval/conversion.py +0 -271
- mlrun/model_monitoring/controller_handler.py +0 -37
- {mlrun-1.7.0rc29.dist-info → mlrun-1.7.0rc31.dist-info}/LICENSE +0 -0
- {mlrun-1.7.0rc29.dist-info → mlrun-1.7.0rc31.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.0rc29.dist-info → mlrun-1.7.0rc31.dist-info}/top_level.txt +0 -0
mlrun/common/constants.py
CHANGED
|
@@ -11,7 +11,6 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
#
|
|
15
14
|
|
|
16
15
|
IMAGE_NAME_ENRICH_REGISTRY_PREFIX = "." # prefix for image name to enrich with registry
|
|
17
16
|
MLRUN_SERVING_CONF = "serving-conf"
|
|
@@ -70,6 +69,7 @@ class MLRunInternalLabels:
|
|
|
70
69
|
job_type = "job-type"
|
|
71
70
|
kind = "kind"
|
|
72
71
|
component = "component"
|
|
72
|
+
mlrun_type = "mlrun__type"
|
|
73
73
|
|
|
74
74
|
owner = "owner"
|
|
75
75
|
v3io_user = "v3io_user"
|
|
@@ -17,6 +17,7 @@ from dataclasses import dataclass
|
|
|
17
17
|
from enum import Enum, IntEnum
|
|
18
18
|
from typing import Optional
|
|
19
19
|
|
|
20
|
+
import mlrun.common.constants
|
|
20
21
|
import mlrun.common.helpers
|
|
21
22
|
from mlrun.common.types import StrEnum
|
|
22
23
|
|
|
@@ -354,7 +355,7 @@ class ResultStatusApp(IntEnum):
|
|
|
354
355
|
|
|
355
356
|
|
|
356
357
|
class ModelMonitoringAppLabel:
|
|
357
|
-
KEY =
|
|
358
|
+
KEY = mlrun.common.constants.MLRunInternalLabels.mlrun_type
|
|
358
359
|
VAL = "mlrun__model-monitoring-application"
|
|
359
360
|
|
|
360
361
|
def __str__(self) -> str:
|
|
@@ -377,3 +378,6 @@ class PredictionsQueryConstants:
|
|
|
377
378
|
|
|
378
379
|
class SpecialApps:
|
|
379
380
|
MLRUN_INFRA = "mlrun-infra"
|
|
381
|
+
|
|
382
|
+
|
|
383
|
+
_RESERVED_FUNCTION_NAMES = MonitoringFunctionNames.list() + [SpecialApps.MLRUN_INFRA]
|
mlrun/common/schemas/project.py
CHANGED
|
@@ -114,18 +114,19 @@ class ProjectOwner(pydantic.BaseModel):
|
|
|
114
114
|
|
|
115
115
|
class ProjectSummary(pydantic.BaseModel):
|
|
116
116
|
name: str
|
|
117
|
-
files_count: int
|
|
118
|
-
feature_sets_count: int
|
|
119
|
-
models_count: int
|
|
120
|
-
runs_completed_recent_count: int
|
|
121
|
-
runs_failed_recent_count: int
|
|
122
|
-
runs_running_count: int
|
|
123
|
-
distinct_schedules_count: int
|
|
124
|
-
distinct_scheduled_jobs_pending_count: int
|
|
125
|
-
distinct_scheduled_pipelines_pending_count: int
|
|
117
|
+
files_count: int = 0
|
|
118
|
+
feature_sets_count: int = 0
|
|
119
|
+
models_count: int = 0
|
|
120
|
+
runs_completed_recent_count: int = 0
|
|
121
|
+
runs_failed_recent_count: int = 0
|
|
122
|
+
runs_running_count: int = 0
|
|
123
|
+
distinct_schedules_count: int = 0
|
|
124
|
+
distinct_scheduled_jobs_pending_count: int = 0
|
|
125
|
+
distinct_scheduled_pipelines_pending_count: int = 0
|
|
126
126
|
pipelines_completed_recent_count: typing.Optional[int] = None
|
|
127
127
|
pipelines_failed_recent_count: typing.Optional[int] = None
|
|
128
128
|
pipelines_running_count: typing.Optional[int] = None
|
|
129
|
+
updated: typing.Optional[datetime.datetime] = None
|
|
129
130
|
|
|
130
131
|
|
|
131
132
|
class IguazioProject(pydantic.BaseModel):
|
mlrun/config.py
CHANGED
|
@@ -52,6 +52,11 @@ default_config = {
|
|
|
52
52
|
"kubernetes": {
|
|
53
53
|
"kubeconfig_path": "", # local path to kubeconfig file (for development purposes),
|
|
54
54
|
# empty by default as the API already running inside k8s cluster
|
|
55
|
+
"pagination": {
|
|
56
|
+
# pagination config for interacting with k8s API
|
|
57
|
+
"list_pods_limit": 200,
|
|
58
|
+
"list_crd_objects_limit": 200,
|
|
59
|
+
},
|
|
55
60
|
},
|
|
56
61
|
"dbpath": "", # db/api url
|
|
57
62
|
# url to nuclio dashboard api (can be with user & token, e.g. https://username:password@dashboard-url.com)
|
|
@@ -108,7 +113,12 @@ default_config = {
|
|
|
108
113
|
# max number of parallel abort run jobs in runs monitoring
|
|
109
114
|
"concurrent_abort_stale_runs_workers": 10,
|
|
110
115
|
"list_runs_time_period_in_days": 7, # days
|
|
111
|
-
}
|
|
116
|
+
},
|
|
117
|
+
"projects": {
|
|
118
|
+
"summaries": {
|
|
119
|
+
"cache_interval": "30",
|
|
120
|
+
},
|
|
121
|
+
},
|
|
112
122
|
},
|
|
113
123
|
"crud": {
|
|
114
124
|
"runs": {
|
|
@@ -269,6 +279,16 @@ default_config = {
|
|
|
269
279
|
"url": "",
|
|
270
280
|
"service": "mlrun-api-chief",
|
|
271
281
|
"port": 8080,
|
|
282
|
+
"feature_gates": {
|
|
283
|
+
"scheduler": "enabled",
|
|
284
|
+
"project_sync": "enabled",
|
|
285
|
+
"cleanup": "enabled",
|
|
286
|
+
"runs_monitoring": "enabled",
|
|
287
|
+
"pagination_cache": "enabled",
|
|
288
|
+
"project_summaries": "enabled",
|
|
289
|
+
"start_logs": "enabled",
|
|
290
|
+
"stop_logs": "enabled",
|
|
291
|
+
},
|
|
272
292
|
},
|
|
273
293
|
"worker": {
|
|
274
294
|
"sync_with_chief": {
|
|
@@ -437,7 +457,6 @@ default_config = {
|
|
|
437
457
|
"followers": "",
|
|
438
458
|
# This is used as the interval for the sync loop both when mlrun is leader and follower
|
|
439
459
|
"periodic_sync_interval": "1 minute",
|
|
440
|
-
"counters_cache_ttl": "2 minutes",
|
|
441
460
|
"project_owners_cache_ttl": "30 seconds",
|
|
442
461
|
# access key to be used when the leader is iguazio and polling is done from it
|
|
443
462
|
"iguazio_access_key": "",
|
mlrun/data_types/spark.py
CHANGED
|
@@ -20,10 +20,10 @@ import pytz
|
|
|
20
20
|
from pyspark.sql.functions import to_utc_timestamp
|
|
21
21
|
from pyspark.sql.types import BooleanType, DoubleType, TimestampType
|
|
22
22
|
|
|
23
|
+
from mlrun.feature_store.retrieval.spark_merger import spark_df_to_pandas
|
|
23
24
|
from mlrun.utils import logger
|
|
24
25
|
|
|
25
26
|
from .data_types import InferOptions, spark_to_value_type
|
|
26
|
-
from .to_pandas import toPandas
|
|
27
27
|
|
|
28
28
|
try:
|
|
29
29
|
import pyspark.sql.functions as funcs
|
|
@@ -75,7 +75,7 @@ def get_df_preview_spark(df, preview_lines=20):
|
|
|
75
75
|
"""capture preview data from spark df"""
|
|
76
76
|
df = df.limit(preview_lines)
|
|
77
77
|
|
|
78
|
-
result_dict =
|
|
78
|
+
result_dict = spark_df_to_pandas(df).to_dict(orient="split")
|
|
79
79
|
return [result_dict["columns"], *result_dict["data"]]
|
|
80
80
|
|
|
81
81
|
|
mlrun/data_types/to_pandas.py
CHANGED
|
@@ -15,21 +15,11 @@
|
|
|
15
15
|
import warnings
|
|
16
16
|
from collections import Counter
|
|
17
17
|
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
IntegerType,
|
|
24
|
-
IntegralType,
|
|
25
|
-
LongType,
|
|
26
|
-
MapType,
|
|
27
|
-
ShortType,
|
|
28
|
-
TimestampType,
|
|
29
|
-
)
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
def toPandas(spark_df):
|
|
18
|
+
import pandas as pd
|
|
19
|
+
import semver
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def _toPandas(spark_df):
|
|
33
23
|
"""
|
|
34
24
|
Modified version of spark DataFrame.toPandas() –
|
|
35
25
|
https://github.com/apache/spark/blob/v3.2.3/python/pyspark/sql/pandas/conversion.py#L35
|
|
@@ -40,6 +30,12 @@ def toPandas(spark_df):
|
|
|
40
30
|
This modification adds the missing unit to the dtype.
|
|
41
31
|
"""
|
|
42
32
|
from pyspark.sql.dataframe import DataFrame
|
|
33
|
+
from pyspark.sql.types import (
|
|
34
|
+
BooleanType,
|
|
35
|
+
IntegralType,
|
|
36
|
+
MapType,
|
|
37
|
+
TimestampType,
|
|
38
|
+
)
|
|
43
39
|
|
|
44
40
|
assert isinstance(spark_df, DataFrame)
|
|
45
41
|
|
|
@@ -48,7 +44,6 @@ def toPandas(spark_df):
|
|
|
48
44
|
require_minimum_pandas_version()
|
|
49
45
|
|
|
50
46
|
import numpy as np
|
|
51
|
-
import pandas as pd
|
|
52
47
|
|
|
53
48
|
timezone = spark_df.sql_ctx._conf.sessionLocalTimeZone()
|
|
54
49
|
|
|
@@ -217,6 +212,16 @@ def toPandas(spark_df):
|
|
|
217
212
|
|
|
218
213
|
def _to_corrected_pandas_type(dt):
|
|
219
214
|
import numpy as np
|
|
215
|
+
from pyspark.sql.types import (
|
|
216
|
+
BooleanType,
|
|
217
|
+
ByteType,
|
|
218
|
+
DoubleType,
|
|
219
|
+
FloatType,
|
|
220
|
+
IntegerType,
|
|
221
|
+
LongType,
|
|
222
|
+
ShortType,
|
|
223
|
+
TimestampType,
|
|
224
|
+
)
|
|
220
225
|
|
|
221
226
|
if type(dt) == ByteType:
|
|
222
227
|
return np.int8
|
|
@@ -236,3 +241,30 @@ def _to_corrected_pandas_type(dt):
|
|
|
236
241
|
return "datetime64[ns]"
|
|
237
242
|
else:
|
|
238
243
|
return None
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
def spark_df_to_pandas(spark_df):
|
|
247
|
+
# as of pyspark 3.2.3, toPandas fails to convert timestamps unless we work around the issue
|
|
248
|
+
# when we upgrade pyspark, we should check whether this workaround is still necessary
|
|
249
|
+
# see https://stackoverflow.com/questions/76389694/transforming-pyspark-to-pandas-dataframe
|
|
250
|
+
if semver.parse(pd.__version__)["major"] >= 2:
|
|
251
|
+
import pyspark.sql.functions as pyspark_functions
|
|
252
|
+
|
|
253
|
+
type_conversion_dict = {}
|
|
254
|
+
for field in spark_df.schema.fields:
|
|
255
|
+
if str(field.dataType) == "TimestampType":
|
|
256
|
+
spark_df = spark_df.withColumn(
|
|
257
|
+
field.name,
|
|
258
|
+
pyspark_functions.date_format(
|
|
259
|
+
pyspark_functions.to_timestamp(field.name),
|
|
260
|
+
"yyyy-MM-dd'T'HH:mm:ss.SSSSSSSSS",
|
|
261
|
+
),
|
|
262
|
+
)
|
|
263
|
+
type_conversion_dict[field.name] = "datetime64[ns]"
|
|
264
|
+
|
|
265
|
+
df = _toPandas(spark_df)
|
|
266
|
+
if type_conversion_dict:
|
|
267
|
+
df = df.astype(type_conversion_dict)
|
|
268
|
+
return df
|
|
269
|
+
else:
|
|
270
|
+
return _toPandas(spark_df)
|
mlrun/datastore/__init__.py
CHANGED
|
@@ -117,6 +117,7 @@ def get_stream_pusher(stream_path: str, **kwargs):
|
|
|
117
117
|
return OutputStream(stream_path, **kwargs)
|
|
118
118
|
elif stream_path.startswith("v3io"):
|
|
119
119
|
endpoint, stream_path = parse_path(stream_path)
|
|
120
|
+
endpoint = kwargs.pop("endpoint", None) or endpoint
|
|
120
121
|
return OutputStream(stream_path, endpoint=endpoint, **kwargs)
|
|
121
122
|
elif stream_path.startswith("dummy://"):
|
|
122
123
|
return _DummyStream(**kwargs)
|
mlrun/datastore/base.py
CHANGED
|
@@ -215,6 +215,11 @@ class DataStore:
|
|
|
215
215
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
216
216
|
"When providing start_time or end_time, must provide time_column"
|
|
217
217
|
)
|
|
218
|
+
if start_time and end_time and start_time.tzinfo != end_time.tzinfo:
|
|
219
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
220
|
+
"start_time and end_time must have the same time zone"
|
|
221
|
+
)
|
|
222
|
+
|
|
218
223
|
if start_time or end_time or additional_filters:
|
|
219
224
|
partitions_time_attributes = find_partitions(url, file_system)
|
|
220
225
|
set_filters(
|
|
@@ -232,13 +237,17 @@ class DataStore:
|
|
|
232
237
|
):
|
|
233
238
|
raise ex
|
|
234
239
|
|
|
235
|
-
|
|
236
|
-
if start_time
|
|
237
|
-
start_time_inner = start_time.replace(
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
240
|
+
start_time_inner = None
|
|
241
|
+
if start_time:
|
|
242
|
+
start_time_inner = start_time.replace(
|
|
243
|
+
tzinfo=None if start_time.tzinfo else pytz.utc
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
end_time_inner = None
|
|
247
|
+
if end_time:
|
|
248
|
+
end_time_inner = end_time.replace(
|
|
249
|
+
tzinfo=None if end_time.tzinfo else pytz.utc
|
|
250
|
+
)
|
|
242
251
|
|
|
243
252
|
set_filters(
|
|
244
253
|
partitions_time_attributes,
|
|
@@ -382,7 +391,10 @@ class DataStore:
|
|
|
382
391
|
}
|
|
383
392
|
|
|
384
393
|
def rm(self, path, recursive=False, maxdepth=None):
|
|
385
|
-
|
|
394
|
+
try:
|
|
395
|
+
self.filesystem.rm(path=path, recursive=recursive, maxdepth=maxdepth)
|
|
396
|
+
except FileNotFoundError:
|
|
397
|
+
pass
|
|
386
398
|
|
|
387
399
|
@staticmethod
|
|
388
400
|
def _is_dd(df_module):
|
mlrun/datastore/datastore.py
CHANGED
|
@@ -32,6 +32,8 @@ in_memory_store = InMemoryStore()
|
|
|
32
32
|
|
|
33
33
|
|
|
34
34
|
def parse_url(url):
|
|
35
|
+
if url and url.startswith("v3io://") and not url.startswith("v3io:///"):
|
|
36
|
+
url = url.replace("v3io://", "v3io:///", 1)
|
|
35
37
|
parsed_url = urlparse(url)
|
|
36
38
|
schema = parsed_url.scheme.lower()
|
|
37
39
|
endpoint = parsed_url.hostname
|
|
@@ -94,7 +96,7 @@ def schema_to_store(schema):
|
|
|
94
96
|
from .dbfs_store import DBFSStore
|
|
95
97
|
|
|
96
98
|
return DBFSStore
|
|
97
|
-
elif schema
|
|
99
|
+
elif schema in ["hdfs", "webhdfs"]:
|
|
98
100
|
from .hdfs import HdfsStore
|
|
99
101
|
|
|
100
102
|
return HdfsStore
|
|
@@ -207,7 +209,7 @@ class StoreManager:
|
|
|
207
209
|
) -> (DataStore, str, str):
|
|
208
210
|
schema, endpoint, parsed_url = parse_url(url)
|
|
209
211
|
subpath = parsed_url.path
|
|
210
|
-
store_key = f"{schema}://{endpoint}"
|
|
212
|
+
store_key = f"{schema}://{endpoint}" if endpoint else f"{schema}://"
|
|
211
213
|
|
|
212
214
|
if schema == "ds":
|
|
213
215
|
datastore_profile = datastore_profile_read(url, project_name, secrets)
|
|
@@ -412,7 +412,7 @@ class DatastoreProfileHdfs(DatastoreProfile):
|
|
|
412
412
|
return res or None
|
|
413
413
|
|
|
414
414
|
def url(self, subpath):
|
|
415
|
-
return f"
|
|
415
|
+
return f"webhdfs://{self.host}:{self.http_port}{subpath}"
|
|
416
416
|
|
|
417
417
|
|
|
418
418
|
class DatastoreProfile2Json(pydantic.BaseModel):
|
|
@@ -133,6 +133,7 @@ class GoogleCloudStorageStore(DataStore):
|
|
|
133
133
|
|
|
134
134
|
def rm(self, path, recursive=False, maxdepth=None):
|
|
135
135
|
path = self._make_path(path)
|
|
136
|
+
self.filesystem.exists(path)
|
|
136
137
|
self.filesystem.rm(path=path, recursive=recursive, maxdepth=maxdepth)
|
|
137
138
|
|
|
138
139
|
def get_spark_options(self):
|
mlrun/datastore/inmem.py
CHANGED
mlrun/datastore/s3.py
CHANGED
|
@@ -201,6 +201,8 @@ class S3Store(DataStore):
|
|
|
201
201
|
def rm(self, path, recursive=False, maxdepth=None):
|
|
202
202
|
bucket, key = self.get_bucket_and_key(path)
|
|
203
203
|
path = f"{bucket}/{key}"
|
|
204
|
+
# In order to raise an error if there is connection error, ML-7056.
|
|
205
|
+
self.filesystem.exists(path=path)
|
|
204
206
|
self.filesystem.rm(path=path, recursive=recursive, maxdepth=maxdepth)
|
|
205
207
|
|
|
206
208
|
|
mlrun/datastore/sources.py
CHANGED
|
@@ -826,6 +826,20 @@ class SnowflakeSource(BaseSourceDriver):
|
|
|
826
826
|
spark_options["query"] = self.attributes.get("query")
|
|
827
827
|
return spark_options
|
|
828
828
|
|
|
829
|
+
def to_dataframe(
|
|
830
|
+
self,
|
|
831
|
+
columns=None,
|
|
832
|
+
df_module=None,
|
|
833
|
+
entities=None,
|
|
834
|
+
start_time=None,
|
|
835
|
+
end_time=None,
|
|
836
|
+
time_field=None,
|
|
837
|
+
additional_filters=None,
|
|
838
|
+
):
|
|
839
|
+
raise mlrun.errors.MLRunRuntimeError(
|
|
840
|
+
f"{type(self).__name__} supports only spark engine"
|
|
841
|
+
)
|
|
842
|
+
|
|
829
843
|
|
|
830
844
|
class CustomSource(BaseSourceDriver):
|
|
831
845
|
kind = "custom"
|
mlrun/datastore/targets.py
CHANGED
|
@@ -726,6 +726,10 @@ class BaseStoreTarget(DataTargetBase):
|
|
|
726
726
|
timestamp_key=None,
|
|
727
727
|
featureset_status=None,
|
|
728
728
|
):
|
|
729
|
+
if not self.support_storey:
|
|
730
|
+
raise mlrun.errors.MLRunRuntimeError(
|
|
731
|
+
f"{type(self).__name__} does not support storey engine"
|
|
732
|
+
)
|
|
729
733
|
raise NotImplementedError()
|
|
730
734
|
|
|
731
735
|
def purge(self):
|
|
@@ -768,6 +772,10 @@ class BaseStoreTarget(DataTargetBase):
|
|
|
768
772
|
|
|
769
773
|
def get_spark_options(self, key_column=None, timestamp_key=None, overwrite=True):
|
|
770
774
|
# options used in spark.read.load(**options)
|
|
775
|
+
if not self.support_spark:
|
|
776
|
+
raise mlrun.errors.MLRunRuntimeError(
|
|
777
|
+
f"{type(self).__name__} does not support spark engine"
|
|
778
|
+
)
|
|
771
779
|
raise NotImplementedError()
|
|
772
780
|
|
|
773
781
|
def prepare_spark_df(self, df, key_columns, timestamp_key=None, spark_options=None):
|
|
@@ -1283,7 +1291,9 @@ class SnowflakeTarget(BaseStoreTarget):
|
|
|
1283
1291
|
additional_filters=None,
|
|
1284
1292
|
**kwargs,
|
|
1285
1293
|
):
|
|
1286
|
-
raise
|
|
1294
|
+
raise mlrun.errors.MLRunRuntimeError(
|
|
1295
|
+
f"{type(self).__name__} does not support storey engine"
|
|
1296
|
+
)
|
|
1287
1297
|
|
|
1288
1298
|
@property
|
|
1289
1299
|
def source_spark_attributes(self) -> dict:
|
mlrun/db/base.py
CHANGED
mlrun/db/httpdb.py
CHANGED
|
@@ -1253,13 +1253,17 @@ class HTTPRunDB(RunDBInterface):
|
|
|
1253
1253
|
function_name=name,
|
|
1254
1254
|
)
|
|
1255
1255
|
|
|
1256
|
-
def list_functions(
|
|
1256
|
+
def list_functions(
|
|
1257
|
+
self, name=None, project=None, tag=None, labels=None, since=None, until=None
|
|
1258
|
+
):
|
|
1257
1259
|
"""Retrieve a list of functions, filtered by specific criteria.
|
|
1258
1260
|
|
|
1259
1261
|
:param name: Return only functions with a specific name.
|
|
1260
1262
|
:param project: Return functions belonging to this project. If not specified, the default project is used.
|
|
1261
1263
|
:param tag: Return function versions with specific tags.
|
|
1262
1264
|
:param labels: Return functions that have specific labels assigned to them.
|
|
1265
|
+
:param since: Return functions updated after this date (as datetime object).
|
|
1266
|
+
:param until: Return functions updated before this date (as datetime object).
|
|
1263
1267
|
:returns: List of function objects (as dictionary).
|
|
1264
1268
|
"""
|
|
1265
1269
|
project = project or config.default_project
|
|
@@ -1267,6 +1271,8 @@ class HTTPRunDB(RunDBInterface):
|
|
|
1267
1271
|
"name": name,
|
|
1268
1272
|
"tag": tag,
|
|
1269
1273
|
"label": labels or [],
|
|
1274
|
+
"since": datetime_to_iso(since),
|
|
1275
|
+
"until": datetime_to_iso(until),
|
|
1270
1276
|
}
|
|
1271
1277
|
error = "list functions"
|
|
1272
1278
|
path = f"projects/{project}/functions"
|
|
@@ -3546,17 +3552,19 @@ class HTTPRunDB(RunDBInterface):
|
|
|
3546
3552
|
self,
|
|
3547
3553
|
project: str,
|
|
3548
3554
|
credentials: dict[str, str],
|
|
3555
|
+
replace_creds: bool,
|
|
3549
3556
|
) -> None:
|
|
3550
3557
|
"""
|
|
3551
3558
|
Set the credentials for the model monitoring application.
|
|
3552
3559
|
|
|
3553
3560
|
:param project: Project name.
|
|
3554
3561
|
:param credentials: Credentials to set.
|
|
3562
|
+
:param replace_creds: If True, will override the existing credentials.
|
|
3555
3563
|
"""
|
|
3556
3564
|
self.api_call(
|
|
3557
3565
|
method=mlrun.common.types.HTTPMethod.POST,
|
|
3558
3566
|
path=f"projects/{project}/model-monitoring/set-model-monitoring-credentials",
|
|
3559
|
-
params={**credentials},
|
|
3567
|
+
params={**credentials, "replace_creds": replace_creds},
|
|
3560
3568
|
)
|
|
3561
3569
|
|
|
3562
3570
|
def create_hub_source(
|
mlrun/db/nopdb.py
CHANGED
|
@@ -13,45 +13,16 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
#
|
|
15
15
|
|
|
16
|
-
import pandas as pd
|
|
17
|
-
import semver
|
|
18
16
|
|
|
19
17
|
import mlrun
|
|
18
|
+
from mlrun.data_types.to_pandas import spark_df_to_pandas
|
|
20
19
|
from mlrun.datastore.sources import ParquetSource
|
|
21
20
|
from mlrun.datastore.targets import get_offline_target
|
|
21
|
+
from mlrun.runtimes import RemoteSparkRuntime
|
|
22
|
+
from mlrun.runtimes.sparkjob import Spark3Runtime
|
|
22
23
|
from mlrun.utils.helpers import additional_filters_warning
|
|
23
24
|
|
|
24
|
-
from ...runtimes import RemoteSparkRuntime
|
|
25
|
-
from ...runtimes.sparkjob import Spark3Runtime
|
|
26
25
|
from .base import BaseMerger
|
|
27
|
-
from .conversion import PandasConversionMixin
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
def spark_df_to_pandas(spark_df):
|
|
31
|
-
# as of pyspark 3.2.3, toPandas fails to convert timestamps unless we work around the issue
|
|
32
|
-
# when we upgrade pyspark, we should check whether this workaround is still necessary
|
|
33
|
-
# see https://stackoverflow.com/questions/76389694/transforming-pyspark-to-pandas-dataframe
|
|
34
|
-
if semver.parse(pd.__version__)["major"] >= 2:
|
|
35
|
-
import pyspark.sql.functions as pyspark_functions
|
|
36
|
-
|
|
37
|
-
type_conversion_dict = {}
|
|
38
|
-
for field in spark_df.schema.fields:
|
|
39
|
-
if str(field.dataType) == "TimestampType":
|
|
40
|
-
spark_df = spark_df.withColumn(
|
|
41
|
-
field.name,
|
|
42
|
-
pyspark_functions.date_format(
|
|
43
|
-
pyspark_functions.to_timestamp(field.name),
|
|
44
|
-
"yyyy-MM-dd'T'HH:mm:ss.SSSSSSSSS",
|
|
45
|
-
),
|
|
46
|
-
)
|
|
47
|
-
type_conversion_dict[field.name] = "datetime64[ns]"
|
|
48
|
-
|
|
49
|
-
df = PandasConversionMixin.toPandas(spark_df)
|
|
50
|
-
if type_conversion_dict:
|
|
51
|
-
df = df.astype(type_conversion_dict)
|
|
52
|
-
return df
|
|
53
|
-
else:
|
|
54
|
-
return PandasConversionMixin.toPandas(spark_df)
|
|
55
26
|
|
|
56
27
|
|
|
57
28
|
class SparkFeatureMerger(BaseMerger):
|
mlrun/model.py
CHANGED
|
@@ -753,10 +753,6 @@ class Notification(ModelObj):
|
|
|
753
753
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
754
754
|
"Both 'secret_params' and 'params' are empty, at least one must be defined."
|
|
755
755
|
)
|
|
756
|
-
if secret_params and params and secret_params != params:
|
|
757
|
-
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
758
|
-
"Both 'secret_params' and 'params' are defined but they contain different values"
|
|
759
|
-
)
|
|
760
756
|
|
|
761
757
|
notification_class.validate_params(secret_params or params)
|
|
762
758
|
|
|
@@ -1315,7 +1311,7 @@ class RunTemplate(ModelObj):
|
|
|
1315
1311
|
|
|
1316
1312
|
task.with_input("data", "/file-dir/path/to/file")
|
|
1317
1313
|
task.with_input("data", "s3://<bucket>/path/to/file")
|
|
1318
|
-
task.with_input("data", "v3io
|
|
1314
|
+
task.with_input("data", "v3io://<data-container>/path/to/file")
|
|
1319
1315
|
"""
|
|
1320
1316
|
if not self.spec.inputs:
|
|
1321
1317
|
self.spec.inputs = {}
|
mlrun/model_monitoring/api.py
CHANGED
|
@@ -569,10 +569,10 @@ def _create_model_monitoring_function_base(
|
|
|
569
569
|
"please use `ModelMonitoringApplicationBaseV2`. It will be removed in 1.9.0.",
|
|
570
570
|
FutureWarning,
|
|
571
571
|
)
|
|
572
|
-
if name in mm_constants.
|
|
572
|
+
if name in mm_constants._RESERVED_FUNCTION_NAMES:
|
|
573
573
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
574
|
-
|
|
575
|
-
f"{mm_constants.
|
|
574
|
+
"An application cannot have the following names: "
|
|
575
|
+
f"{mm_constants._RESERVED_FUNCTION_NAMES}"
|
|
576
576
|
)
|
|
577
577
|
if func is None:
|
|
578
578
|
func = ""
|