mlrun 1.3.2rc1__py3-none-any.whl → 1.3.2rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/api/api/deps.py +14 -1
- mlrun/api/api/endpoints/frontend_spec.py +0 -2
- mlrun/api/api/endpoints/functions.py +15 -27
- mlrun/api/api/endpoints/grafana_proxy.py +435 -74
- mlrun/api/api/endpoints/healthz.py +5 -18
- mlrun/api/api/endpoints/model_endpoints.py +33 -37
- mlrun/api/api/utils.py +6 -13
- mlrun/api/crud/__init__.py +14 -16
- mlrun/api/crud/logs.py +5 -7
- mlrun/api/crud/model_monitoring/__init__.py +2 -2
- mlrun/api/crud/model_monitoring/model_endpoint_store.py +847 -0
- mlrun/api/crud/model_monitoring/model_endpoints.py +105 -328
- mlrun/api/crud/pipelines.py +2 -3
- mlrun/api/db/sqldb/models/models_mysql.py +52 -19
- mlrun/api/db/sqldb/models/models_sqlite.py +52 -19
- mlrun/api/db/sqldb/session.py +19 -26
- mlrun/api/schemas/__init__.py +2 -0
- mlrun/api/schemas/constants.py +0 -13
- mlrun/api/schemas/frontend_spec.py +0 -1
- mlrun/api/schemas/model_endpoints.py +38 -195
- mlrun/api/schemas/schedule.py +2 -2
- mlrun/api/utils/clients/log_collector.py +5 -0
- mlrun/builder.py +9 -41
- mlrun/config.py +1 -76
- mlrun/data_types/__init__.py +1 -6
- mlrun/data_types/data_types.py +1 -3
- mlrun/datastore/__init__.py +2 -9
- mlrun/datastore/sources.py +20 -25
- mlrun/datastore/store_resources.py +1 -1
- mlrun/datastore/targets.py +34 -67
- mlrun/datastore/utils.py +4 -26
- mlrun/db/base.py +2 -4
- mlrun/db/filedb.py +5 -13
- mlrun/db/httpdb.py +32 -64
- mlrun/db/sqldb.py +2 -4
- mlrun/errors.py +0 -5
- mlrun/execution.py +0 -2
- mlrun/feature_store/api.py +8 -24
- mlrun/feature_store/feature_set.py +6 -28
- mlrun/feature_store/feature_vector.py +0 -2
- mlrun/feature_store/ingestion.py +11 -8
- mlrun/feature_store/retrieval/base.py +43 -271
- mlrun/feature_store/retrieval/dask_merger.py +153 -55
- mlrun/feature_store/retrieval/job.py +3 -12
- mlrun/feature_store/retrieval/local_merger.py +130 -48
- mlrun/feature_store/retrieval/spark_merger.py +125 -126
- mlrun/features.py +2 -7
- mlrun/model_monitoring/constants.py +6 -48
- mlrun/model_monitoring/helpers.py +35 -118
- mlrun/model_monitoring/model_monitoring_batch.py +260 -293
- mlrun/model_monitoring/stream_processing_fs.py +253 -220
- mlrun/platforms/iguazio.py +0 -33
- mlrun/projects/project.py +72 -34
- mlrun/runtimes/base.py +0 -5
- mlrun/runtimes/daskjob.py +0 -2
- mlrun/runtimes/function.py +3 -29
- mlrun/runtimes/kubejob.py +15 -39
- mlrun/runtimes/local.py +45 -7
- mlrun/runtimes/mpijob/abstract.py +0 -2
- mlrun/runtimes/mpijob/v1.py +0 -2
- mlrun/runtimes/pod.py +0 -2
- mlrun/runtimes/remotesparkjob.py +0 -2
- mlrun/runtimes/serving.py +0 -6
- mlrun/runtimes/sparkjob/abstract.py +2 -39
- mlrun/runtimes/sparkjob/spark3job.py +0 -2
- mlrun/serving/__init__.py +1 -2
- mlrun/serving/routers.py +35 -35
- mlrun/serving/server.py +12 -22
- mlrun/serving/states.py +30 -162
- mlrun/serving/v2_serving.py +10 -13
- mlrun/utils/clones.py +1 -1
- mlrun/utils/model_monitoring.py +96 -122
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/METADATA +27 -23
- {mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/RECORD +79 -92
- mlrun/api/crud/model_monitoring/grafana.py +0 -427
- mlrun/datastore/spark_udf.py +0 -40
- mlrun/model_monitoring/__init__.py +0 -44
- mlrun/model_monitoring/common.py +0 -112
- mlrun/model_monitoring/model_endpoint.py +0 -141
- mlrun/model_monitoring/stores/__init__.py +0 -106
- mlrun/model_monitoring/stores/kv_model_endpoint_store.py +0 -448
- mlrun/model_monitoring/stores/model_endpoint_store.py +0 -147
- mlrun/model_monitoring/stores/models/__init__.py +0 -23
- mlrun/model_monitoring/stores/models/base.py +0 -18
- mlrun/model_monitoring/stores/models/mysql.py +0 -100
- mlrun/model_monitoring/stores/models/sqlite.py +0 -98
- mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -375
- mlrun/utils/db.py +0 -52
- {mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/LICENSE +0 -0
- {mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/WHEEL +0 -0
- {mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/entry_points.txt +0 -0
- {mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/top_level.txt +0 -0
mlrun/feature_store/api.py
CHANGED
|
@@ -28,6 +28,7 @@ from ..datastore.store_resources import parse_store_uri
|
|
|
28
28
|
from ..datastore.targets import (
|
|
29
29
|
BaseStoreTarget,
|
|
30
30
|
get_default_prefix_for_source,
|
|
31
|
+
get_default_targets,
|
|
31
32
|
get_target_driver,
|
|
32
33
|
kind_to_driver,
|
|
33
34
|
validate_target_list,
|
|
@@ -102,7 +103,6 @@ def get_offline_features(
|
|
|
102
103
|
engine_args: dict = None,
|
|
103
104
|
query: str = None,
|
|
104
105
|
join_type: str = "inner",
|
|
105
|
-
order_by: Union[str, List[str]] = None,
|
|
106
106
|
spark_service: str = None,
|
|
107
107
|
) -> OfflineVectorResponse:
|
|
108
108
|
"""retrieve offline feature vector results
|
|
@@ -161,8 +161,6 @@ def get_offline_features(
|
|
|
161
161
|
* right: use only keys from right frame (SQL: right outer join)
|
|
162
162
|
* outer: use union of keys from both frames (SQL: full outer join)
|
|
163
163
|
* inner: use intersection of keys from both frames (SQL: inner join).
|
|
164
|
-
:param order_by: Name or list of names to order by. The name or the names in the list can be the feature name
|
|
165
|
-
or the alias of the feature you pass in the feature list.
|
|
166
164
|
"""
|
|
167
165
|
if isinstance(feature_vector, FeatureVector):
|
|
168
166
|
update_stats = True
|
|
@@ -192,7 +190,6 @@ def get_offline_features(
|
|
|
192
190
|
with_indexes=with_indexes,
|
|
193
191
|
query=query,
|
|
194
192
|
join_type=join_type,
|
|
195
|
-
order_by=order_by,
|
|
196
193
|
)
|
|
197
194
|
|
|
198
195
|
start_time = str_to_timestamp(start_time)
|
|
@@ -216,7 +213,6 @@ def get_offline_features(
|
|
|
216
213
|
update_stats=update_stats,
|
|
217
214
|
query=query,
|
|
218
215
|
join_type=join_type,
|
|
219
|
-
order_by=order_by,
|
|
220
216
|
)
|
|
221
217
|
|
|
222
218
|
|
|
@@ -409,15 +405,6 @@ def ingest(
|
|
|
409
405
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
410
406
|
"feature set and source must be specified"
|
|
411
407
|
)
|
|
412
|
-
if (
|
|
413
|
-
not mlrun_context
|
|
414
|
-
and not targets
|
|
415
|
-
and not (featureset.spec.targets or featureset.spec.with_default_targets)
|
|
416
|
-
and (run_config is not None and not run_config.local)
|
|
417
|
-
):
|
|
418
|
-
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
419
|
-
f"Feature set {featureset.metadata.name} is remote ingested with no targets defined, aborting"
|
|
420
|
-
)
|
|
421
408
|
|
|
422
409
|
if featureset is not None:
|
|
423
410
|
featureset.validate_steps(namespace=namespace)
|
|
@@ -490,11 +477,10 @@ def ingest(
|
|
|
490
477
|
f"Source.end_time is {str(source.end_time)}"
|
|
491
478
|
)
|
|
492
479
|
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
480
|
+
if mlrun_context:
|
|
481
|
+
mlrun_context.logger.info(
|
|
482
|
+
f"starting ingestion task to {featureset.uri}.{filter_time_string}"
|
|
483
|
+
)
|
|
498
484
|
return_df = False
|
|
499
485
|
|
|
500
486
|
if featureset.spec.passthrough:
|
|
@@ -503,7 +489,7 @@ def ingest(
|
|
|
503
489
|
|
|
504
490
|
namespace = namespace or get_caller_globals()
|
|
505
491
|
|
|
506
|
-
targets_to_ingest = targets or featureset.spec.targets
|
|
492
|
+
targets_to_ingest = targets or featureset.spec.targets or get_default_targets()
|
|
507
493
|
targets_to_ingest = copy.deepcopy(targets_to_ingest)
|
|
508
494
|
|
|
509
495
|
validate_target_paths_for_engine(targets_to_ingest, featureset.spec.engine, source)
|
|
@@ -700,9 +686,7 @@ def preview(
|
|
|
700
686
|
)
|
|
701
687
|
# reduce the size of the ingestion if we do not infer stats
|
|
702
688
|
rows_limit = (
|
|
703
|
-
|
|
704
|
-
if InferOptions.get_common_options(options, InferOptions.Stats)
|
|
705
|
-
else 1000
|
|
689
|
+
0 if InferOptions.get_common_options(options, InferOptions.Stats) else 1000
|
|
706
690
|
)
|
|
707
691
|
source = init_featureset_graph(
|
|
708
692
|
source,
|
|
@@ -786,7 +770,7 @@ def deploy_ingestion_service(
|
|
|
786
770
|
name=featureset.metadata.name,
|
|
787
771
|
)
|
|
788
772
|
|
|
789
|
-
targets_to_ingest = targets or featureset.spec.targets
|
|
773
|
+
targets_to_ingest = targets or featureset.spec.targets or get_default_targets()
|
|
790
774
|
targets_to_ingest = copy.deepcopy(targets_to_ingest)
|
|
791
775
|
featureset.update_targets_for_ingest(targets_to_ingest)
|
|
792
776
|
|
|
@@ -118,7 +118,7 @@ class FeatureSetSpec(ModelObj):
|
|
|
118
118
|
self.owner = owner
|
|
119
119
|
self.description = description
|
|
120
120
|
self.entities: List[Union[Entity, str]] = entities or []
|
|
121
|
-
self.relations: Dict[str,
|
|
121
|
+
self.relations: Dict[str, Entity] = relations or {}
|
|
122
122
|
self.features: List[Feature] = features or []
|
|
123
123
|
self.partition_keys = partition_keys or []
|
|
124
124
|
self.timestamp_key = timestamp_key
|
|
@@ -131,7 +131,6 @@ class FeatureSetSpec(ModelObj):
|
|
|
131
131
|
self.engine = engine
|
|
132
132
|
self.output_path = output_path or mlconf.artifact_path
|
|
133
133
|
self.passthrough = passthrough
|
|
134
|
-
self.with_default_targets = True
|
|
135
134
|
|
|
136
135
|
@property
|
|
137
136
|
def entities(self) -> List[Entity]:
|
|
@@ -233,9 +232,6 @@ class FeatureSetSpec(ModelObj):
|
|
|
233
232
|
|
|
234
233
|
@relations.setter
|
|
235
234
|
def relations(self, relations: Dict[str, Entity]):
|
|
236
|
-
for col, ent in relations.items():
|
|
237
|
-
if isinstance(ent, str):
|
|
238
|
-
relations[col] = Entity(ent)
|
|
239
235
|
self._relations = ObjectDict.from_dict({"entity": Entity}, relations, "entity")
|
|
240
236
|
|
|
241
237
|
def require_processing(self):
|
|
@@ -328,7 +324,7 @@ class FeatureSet(ModelObj):
|
|
|
328
324
|
timestamp_key: str = None,
|
|
329
325
|
engine: str = None,
|
|
330
326
|
label_column: str = None,
|
|
331
|
-
relations: Dict[str,
|
|
327
|
+
relations: Dict[str, Entity] = None,
|
|
332
328
|
passthrough: bool = None,
|
|
333
329
|
):
|
|
334
330
|
"""Feature set object, defines a set of features and their data pipeline
|
|
@@ -376,7 +372,6 @@ class FeatureSet(ModelObj):
|
|
|
376
372
|
self.status = None
|
|
377
373
|
self._last_state = ""
|
|
378
374
|
self._aggregations = {}
|
|
379
|
-
self.set_targets()
|
|
380
375
|
|
|
381
376
|
@property
|
|
382
377
|
def spec(self) -> FeatureSetSpec:
|
|
@@ -475,25 +470,10 @@ class FeatureSet(ModelObj):
|
|
|
475
470
|
)
|
|
476
471
|
targets = targets or []
|
|
477
472
|
if with_defaults:
|
|
478
|
-
self.spec.with_default_targets = True
|
|
479
473
|
targets.extend(get_default_targets())
|
|
480
|
-
else:
|
|
481
|
-
self.spec.with_default_targets = False
|
|
482
|
-
|
|
483
|
-
self.spec.targets = []
|
|
484
|
-
self.__set_targets_add_targets_helper(targets)
|
|
485
|
-
|
|
486
|
-
if default_final_step:
|
|
487
|
-
self.spec.graph.final_step = default_final_step
|
|
488
|
-
|
|
489
|
-
def __set_targets_add_targets_helper(self, targets):
|
|
490
|
-
"""
|
|
491
|
-
Add the desired target list
|
|
492
474
|
|
|
493
|
-
:param targets: list of target type names ('csv', 'nosql', ..) or target objects
|
|
494
|
-
CSVTarget(), ParquetTarget(), NoSqlTarget(), StreamTarget(), ..
|
|
495
|
-
"""
|
|
496
475
|
validate_target_list(targets=targets)
|
|
476
|
+
|
|
497
477
|
for target in targets:
|
|
498
478
|
kind = target.kind if hasattr(target, "kind") else target
|
|
499
479
|
if kind not in TargetTypes.all():
|
|
@@ -505,6 +485,8 @@ class FeatureSet(ModelObj):
|
|
|
505
485
|
target, name=str(target), partitioned=(target == "parquet")
|
|
506
486
|
)
|
|
507
487
|
self.spec.targets.update(target)
|
|
488
|
+
if default_final_step:
|
|
489
|
+
self.spec.graph.final_step = default_final_step
|
|
508
490
|
|
|
509
491
|
def validate_steps(self, namespace):
|
|
510
492
|
if not self.spec:
|
|
@@ -941,11 +923,7 @@ class FeatureSet(ModelObj):
|
|
|
941
923
|
raise mlrun.errors.MLRunNotFoundError(
|
|
942
924
|
"passthrough feature set {self.metadata.name} with no source"
|
|
943
925
|
)
|
|
944
|
-
|
|
945
|
-
# to_dataframe() can sometimes return an iterator of dataframes instead of one dataframe
|
|
946
|
-
if not isinstance(df, pd.DataFrame):
|
|
947
|
-
df = pd.concat(df)
|
|
948
|
-
return df
|
|
926
|
+
return self.spec.source.to_dataframe()
|
|
949
927
|
|
|
950
928
|
target = get_offline_target(self, name=target_name)
|
|
951
929
|
if not target:
|
|
@@ -520,8 +520,6 @@ class OnlineVectorService:
|
|
|
520
520
|
v = data[name]
|
|
521
521
|
if v is None or (type(v) == float and (np.isinf(v) or np.isnan(v))):
|
|
522
522
|
data[name] = self._impute_values.get(name, v)
|
|
523
|
-
for name in list(self.vector.spec.entity_fields.keys()):
|
|
524
|
-
data.pop(name, None)
|
|
525
523
|
|
|
526
524
|
if as_list and data:
|
|
527
525
|
data = [
|
mlrun/feature_store/ingestion.py
CHANGED
|
@@ -89,7 +89,7 @@ def init_featureset_graph(
|
|
|
89
89
|
key_fields = entity_columns if entity_columns else None
|
|
90
90
|
|
|
91
91
|
sizes = [0] * len(targets)
|
|
92
|
-
|
|
92
|
+
data_result = None
|
|
93
93
|
total_rows = 0
|
|
94
94
|
targets = [get_target_driver(target, featureset) for target in targets]
|
|
95
95
|
if featureset.spec.passthrough:
|
|
@@ -100,11 +100,11 @@ def init_featureset_graph(
|
|
|
100
100
|
# set the entities to be the indexes of the df
|
|
101
101
|
event.body = entities_to_index(featureset, event.body)
|
|
102
102
|
|
|
103
|
-
|
|
104
|
-
if
|
|
103
|
+
data = server.run(event, get_body=True)
|
|
104
|
+
if data is not None:
|
|
105
105
|
for i, target in enumerate(targets):
|
|
106
106
|
size = target.write_dataframe(
|
|
107
|
-
|
|
107
|
+
data,
|
|
108
108
|
key_column=key_fields,
|
|
109
109
|
timestamp_key=featureset.spec.timestamp_key,
|
|
110
110
|
chunk_id=chunk_id,
|
|
@@ -112,18 +112,21 @@ def init_featureset_graph(
|
|
|
112
112
|
if size:
|
|
113
113
|
sizes[i] += size
|
|
114
114
|
chunk_id += 1
|
|
115
|
-
|
|
116
|
-
|
|
115
|
+
if data_result is None:
|
|
116
|
+
# in case of multiple chunks only return the first chunk (last may be too small)
|
|
117
|
+
data_result = data
|
|
118
|
+
total_rows += data.shape[0]
|
|
117
119
|
if rows_limit and total_rows >= rows_limit:
|
|
118
120
|
break
|
|
119
121
|
|
|
122
|
+
# todo: fire termination event if iterator
|
|
123
|
+
|
|
120
124
|
for i, target in enumerate(targets):
|
|
121
125
|
target_status = target.update_resource_status("ready", size=sizes[i])
|
|
122
126
|
if verbose:
|
|
123
127
|
logger.info(f"wrote target: {target_status}")
|
|
124
128
|
|
|
125
|
-
|
|
126
|
-
return result_df.head(rows_limit)
|
|
129
|
+
return data_result
|
|
127
130
|
|
|
128
131
|
|
|
129
132
|
def featureset_initializer(server):
|