mlrun 1.8.0rc34__py3-none-any.whl → 1.8.0rc36__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

@@ -20,7 +20,6 @@ from datetime import datetime
20
20
  from typing import Any, Optional, Union
21
21
 
22
22
  import pandas as pd
23
- from deprecated import deprecated
24
23
 
25
24
  import mlrun
26
25
  import mlrun.errors
@@ -91,122 +90,6 @@ def _features_to_vector_and_check_permissions(features, update_stats):
91
90
  return vector
92
91
 
93
92
 
94
- @deprecated(
95
- version="1.6.0",
96
- reason="get_offline_features() will be removed in 1.8.0, please instead use "
97
- "get_feature_vector('store://feature_vector_name').get_offline_features()",
98
- category=FutureWarning,
99
- )
100
- def get_offline_features(
101
- feature_vector: Union[str, FeatureVector],
102
- entity_rows=None,
103
- entity_timestamp_column: Optional[str] = None,
104
- target: DataTargetBase = None,
105
- run_config: RunConfig = None,
106
- drop_columns: Optional[list[str]] = None,
107
- start_time: Optional[Union[str, datetime]] = None,
108
- end_time: Optional[Union[str, datetime]] = None,
109
- with_indexes: bool = False,
110
- update_stats: bool = False,
111
- engine: Optional[str] = None,
112
- engine_args: Optional[dict] = None,
113
- query: Optional[str] = None,
114
- order_by: Optional[Union[str, list[str]]] = None,
115
- spark_service: Optional[str] = None,
116
- timestamp_for_filtering: Optional[Union[str, dict[str, str]]] = None,
117
- additional_filters: Optional[list] = None,
118
- ):
119
- """retrieve offline feature vector results
120
-
121
- specify a feature vector object/uri and retrieve the desired features, their metadata
122
- and statistics. returns :py:class:`~mlrun.feature_store.OfflineVectorResponse`,
123
- results can be returned as a dataframe or written to a target
124
-
125
- The start_time and end_time attributes allow filtering the data to a given time range, they accept
126
- string values or pandas `Timestamp` objects, string values can also be relative, for example:
127
- "now", "now - 1d2h", "now+5m", where a valid pandas Timedelta string follows the verb "now",
128
- for time alignment you can use the verb "floor" e.g. "now -1d floor 1H" will align the time to the last hour
129
- (the floor string is passed to pandas.Timestamp.floor(), can use D, H, T, S for day, hour, min, sec alignment).
130
- Another option to filter the data is by the `query` argument - can be seen in the example.
131
- example::
132
-
133
- features = [
134
- "stock-quotes.bid",
135
- "stock-quotes.asks_sum_5h",
136
- "stock-quotes.ask as mycol",
137
- "stocks.*",
138
- ]
139
- vector = FeatureVector(features=features)
140
- resp = get_offline_features(
141
- vector,
142
- entity_rows=trades,
143
- entity_timestamp_column="time",
144
- query="ticker in ['GOOG'] and bid>100",
145
- )
146
- print(resp.to_dataframe())
147
- print(vector.get_stats_table())
148
- resp.to_parquet("./out.parquet")
149
-
150
- :param feature_vector: feature vector uri or FeatureVector object. passing feature vector obj requires
151
- update permissions
152
- :param entity_rows: dataframe with entity rows to join with
153
- :param target: where to write the results to
154
- :param drop_columns: list of columns to drop from the final result
155
- :param entity_timestamp_column: timestamp column name in the entity rows dataframe. can be specified
156
- only if param entity_rows was specified.
157
- :param run_config: function and/or run configuration
158
- see :py:class:`~mlrun.feature_store.RunConfig`
159
- :param start_time: datetime, low limit of time needed to be filtered. Optional.
160
- :param end_time: datetime, high limit of time needed to be filtered. Optional.
161
- :param with_indexes: Return vector with/without the entities and the timestamp_key of the feature sets
162
- and with/without entity_timestamp_column and timestamp_for_filtering columns.
163
- This property can be specified also in the feature vector spec
164
- (feature_vector.spec.with_indexes)
165
- (default False)
166
- :param update_stats: update features statistics from the requested feature sets on the vector.
167
- (default False).
168
- :param engine: processing engine kind ("local", "dask", or "spark")
169
- :param engine_args: kwargs for the processing engine
170
- :param query: The query string used to filter rows on the output
171
- :param spark_service: Name of the spark service to be used (when using a remote-spark runtime)
172
- :param order_by: Name or list of names to order by. The name or the names in the list can be the
173
- feature name or the alias of the feature you pass in the feature list.
174
- :param timestamp_for_filtering: name of the column to filter by, can be str for all the feature sets or a
175
- dictionary ({<feature set name>: <timestamp column name>, ...})
176
- that indicates the timestamp column name for each feature set. Optional.
177
- By default, the filter executes on the timestamp_key of each feature set.
178
- Note: the time filtering is performed on each feature set before the
179
- merge process using start_time and end_time params.
180
- :param additional_filters: List of additional_filter conditions as tuples.
181
- Each tuple should be in the format (column_name, operator, value).
182
- Supported operators: "=", ">=", "<=", ">", "<".
183
- Example: [("Product", "=", "Computer")]
184
- For all supported filters, please see:
185
- https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetDataset.html
186
-
187
-
188
- """
189
- return _get_offline_features(
190
- feature_vector,
191
- entity_rows,
192
- entity_timestamp_column,
193
- target,
194
- run_config,
195
- drop_columns,
196
- start_time,
197
- end_time,
198
- with_indexes,
199
- update_stats,
200
- engine,
201
- engine_args,
202
- query,
203
- order_by,
204
- spark_service,
205
- timestamp_for_filtering,
206
- additional_filters,
207
- )
208
-
209
-
210
93
  def _get_offline_features(
211
94
  feature_vector: Union[str, FeatureVector],
212
95
  entity_rows=None,
@@ -288,93 +171,6 @@ def _get_offline_features(
288
171
  )
289
172
 
290
173
 
291
- @deprecated(
292
- version="1.6.0",
293
- reason="get_online_feature_service() will be removed in 1.8.0, please instead use "
294
- "get_feature_vector('store://feature_vector_name').get_online_feature_service()",
295
- category=FutureWarning,
296
- )
297
- def get_online_feature_service(
298
- feature_vector: Union[str, FeatureVector],
299
- run_config: RunConfig = None,
300
- fixed_window_type: FixedWindowType = FixedWindowType.LastClosedWindow,
301
- impute_policy: Optional[dict] = None,
302
- update_stats: bool = False,
303
- entity_keys: Optional[list[str]] = None,
304
- ):
305
- """initialize and return online feature vector service api,
306
- returns :py:class:`~mlrun.feature_store.OnlineVectorService`
307
-
308
- :**usage**:
309
- There are two ways to use the function:
310
-
311
- 1. As context manager
312
-
313
- Example::
314
-
315
- with get_online_feature_service(vector_uri) as svc:
316
- resp = svc.get([{"ticker": "GOOG"}, {"ticker": "MSFT"}])
317
- print(resp)
318
- resp = svc.get([{"ticker": "AAPL"}], as_list=True)
319
- print(resp)
320
-
321
- Example with imputing::
322
-
323
- with get_online_feature_service(vector_uri, entity_keys=['id'],
324
- impute_policy={"*": "$mean", "amount": 0)) as svc:
325
- resp = svc.get([{"id": "C123487"}])
326
-
327
- 2. as simple function, note that in that option you need to close the session.
328
-
329
- Example::
330
-
331
- svc = get_online_feature_service(vector_uri, entity_keys=["ticker"])
332
- try:
333
- resp = svc.get([{"ticker": "GOOG"}, {"ticker": "MSFT"}])
334
- print(resp)
335
- resp = svc.get([{"ticker": "AAPL"}], as_list=True)
336
- print(resp)
337
-
338
- finally:
339
- svc.close()
340
-
341
- Example with imputing::
342
-
343
- svc = get_online_feature_service(vector_uri, entity_keys=['id'],
344
- impute_policy={"*": "$mean", "amount": 0))
345
- try:
346
- resp = svc.get([{"id": "C123487"}])
347
- except Exception as e:
348
- handling exception...
349
- finally:
350
- svc.close()
351
-
352
- :param feature_vector: feature vector uri or FeatureVector object. passing feature vector obj requires update
353
- permissions.
354
- :param run_config: function and/or run configuration for remote jobs/services
355
- :param impute_policy: a dict with `impute_policy` per feature, the dict key is the feature name and the dict
356
- value indicate which value will be used in case the feature is NaN/empty, the replaced
357
- value can be fixed number for constants or $mean, $max, $min, $std, $count
358
- for statistical
359
- values. "*" is used to specify the default for all features, example: `{"*": "$mean"}`
360
- :param fixed_window_type: determines how to query the fixed window values which were previously inserted by ingest
361
- :param update_stats: update features statistics from the requested feature sets on the vector.
362
- Default: False.
363
- :param entity_keys: Entity list of the first feature_set in the vector.
364
- The indexes that are used to query the online service.
365
- :return: Initialize the `OnlineVectorService`.
366
- Will be used in subclasses where `support_online=True`.
367
- """
368
- return _get_online_feature_service(
369
- feature_vector,
370
- run_config,
371
- fixed_window_type,
372
- impute_policy,
373
- update_stats,
374
- entity_keys,
375
- )
376
-
377
-
378
174
  def _get_online_feature_service(
379
175
  feature_vector: Union[str, FeatureVector],
380
176
  run_config: RunConfig = None,
@@ -448,86 +244,6 @@ def _get_namespace(run_config: RunConfig) -> dict[str, Any]:
448
244
  return get_caller_globals()
449
245
 
450
246
 
451
- def ingest(
452
- featureset: Union[FeatureSet, str] = None,
453
- source=None,
454
- targets: Optional[list[DataTargetBase]] = None,
455
- namespace=None,
456
- return_df: bool = True,
457
- infer_options: InferOptions = InferOptions.default(),
458
- run_config: RunConfig = None,
459
- mlrun_context=None,
460
- spark_context=None,
461
- overwrite=None,
462
- ) -> Optional[pd.DataFrame]:
463
- """Read local DataFrame, file, URL, or source into the feature store
464
- Ingest reads from the source, run the graph transformations, infers metadata and stats
465
- and writes the results to the default of specified targets
466
-
467
- when targets are not specified data is stored in the configured default targets
468
- (will usually be NoSQL for real-time and Parquet for offline).
469
-
470
- the `run_config` parameter allow specifying the function and job configuration,
471
- see: :py:class:`~mlrun.feature_store.RunConfig`
472
-
473
- example::
474
-
475
- stocks_set = FeatureSet("stocks", entities=[Entity("ticker")])
476
- stocks = pd.read_csv("stocks.csv")
477
- df = ingest(stocks_set, stocks, infer_options=fstore.InferOptions.default())
478
-
479
- # for running as remote job
480
- config = RunConfig(image="mlrun/mlrun")
481
- df = ingest(stocks_set, stocks, run_config=config)
482
-
483
- # specify source and targets
484
- source = CSVSource("mycsv", path="measurements.csv")
485
- targets = [CSVTarget("mycsv", path="./mycsv.csv")]
486
- ingest(measurements, source, targets)
487
-
488
- :param featureset: feature set object or featureset.uri. (uri must be of a feature set that is in the DB,
489
- call `.save()` if it's not)
490
- :param source: source dataframe or other sources (e.g. parquet source see:
491
- :py:class:`~mlrun.datastore.ParquetSource` and other classes in mlrun.datastore with suffix
492
- Source)
493
- :param targets: optional list of data target objects
494
- :param namespace: namespace or module containing graph classes
495
- :param return_df: indicate if to return a dataframe with the graph results
496
- :param infer_options: schema (for discovery of entities, features in featureset), index, stats,
497
- histogram and preview infer options (:py:class:`~mlrun.feature_store.InferOptions`)
498
- :param run_config: function and/or run configuration for remote jobs,
499
- see :py:class:`~mlrun.feature_store.RunConfig`
500
- :param mlrun_context: mlrun context (when running as a job), for internal use !
501
- :param spark_context: local spark session for spark ingestion, example for creating the spark context:
502
- `spark = SparkSession.builder.appName("Spark function").getOrCreate()`
503
- For remote spark ingestion, this should contain the remote spark service name
504
- :param overwrite: delete the targets' data prior to ingestion
505
- (default: True for non scheduled ingest - deletes the targets that are about to be ingested.
506
- False for scheduled ingest - does not delete the target)
507
- :return: if return_df is True, a dataframe will be returned based on the graph
508
- """
509
- if mlrun_context is None:
510
- deprecated(
511
- version="1.6.0",
512
- reason="Calling 'ingest' with mlrun_context=None is deprecated and will be removed in 1.8.0,\
513
- use 'FeatureSet.ingest()' instead",
514
- category=FutureWarning,
515
- )
516
-
517
- return _ingest(
518
- featureset,
519
- source,
520
- targets,
521
- namespace,
522
- return_df,
523
- infer_options,
524
- run_config,
525
- mlrun_context,
526
- spark_context,
527
- overwrite,
528
- )
529
-
530
-
531
247
  def _ingest(
532
248
  featureset: Union[FeatureSet, str] = None,
533
249
  source=None,
@@ -776,53 +492,6 @@ def _ingest(
776
492
  return df
777
493
 
778
494
 
779
- @deprecated(
780
- version="1.6.0",
781
- reason="'preview' will be removed in 1.8.0, use 'FeatureSet.preview()' instead",
782
- category=FutureWarning,
783
- )
784
- def preview(
785
- featureset: FeatureSet,
786
- source,
787
- entity_columns: Optional[list] = None,
788
- namespace=None,
789
- options: InferOptions = None,
790
- verbose: bool = False,
791
- sample_size: Optional[int] = None,
792
- ) -> pd.DataFrame:
793
- """run the ingestion pipeline with local DataFrame/file data and infer features schema and stats
794
-
795
- example::
796
-
797
- quotes_set = FeatureSet("stock-quotes", entities=[Entity("ticker")])
798
- quotes_set.add_aggregation("ask", ["sum", "max"], ["1h", "5h"], "10m")
799
- quotes_set.add_aggregation("bid", ["min", "max"], ["1h"], "10m")
800
- df = preview(
801
- quotes_set,
802
- quotes_df,
803
- entity_columns=["ticker"],
804
- )
805
-
806
- :param featureset: feature set object or uri
807
- :param source: source dataframe or csv/parquet file path
808
- :param entity_columns: list of entity (index) column names
809
- :param namespace: namespace or module containing graph classes
810
- :param options: schema (for discovery of entities, features in featureset), index, stats,
811
- histogram and preview infer options (:py:class:`~mlrun.feature_store.InferOptions`)
812
- :param verbose: verbose log
813
- :param sample_size: num of rows to sample from the dataset (for large datasets)
814
- """
815
- return _preview(
816
- featureset,
817
- source,
818
- entity_columns,
819
- namespace,
820
- options,
821
- verbose,
822
- sample_size,
823
- )
824
-
825
-
826
495
  def _preview(
827
496
  featureset: FeatureSet,
828
497
  source,
@@ -912,55 +581,6 @@ def _run_ingestion_job(
912
581
  return run_ingestion_job(name, featureset, run_config, source.schedule)
913
582
 
914
583
 
915
- @deprecated(
916
- version="1.6.0",
917
- reason="'deploy_ingestion_service_v2' will be removed in 1.8.0, "
918
- "use 'FeatureSet.deploy_ingestion_service()' instead",
919
- category=FutureWarning,
920
- )
921
- def deploy_ingestion_service_v2(
922
- featureset: Union[FeatureSet, str],
923
- source: DataSource = None,
924
- targets: Optional[list[DataTargetBase]] = None,
925
- name: Optional[str] = None,
926
- run_config: RunConfig = None,
927
- verbose=False,
928
- ) -> tuple[str, BaseRuntime]:
929
- """Start real-time ingestion service using nuclio function
930
-
931
- Deploy a real-time function implementing feature ingestion pipeline
932
- the source maps to Nuclio event triggers (http, kafka, v3io stream, etc.)
933
-
934
- the `run_config` parameter allow specifying the function and job configuration,
935
- see: :py:class:`~mlrun.feature_store.RunConfig`
936
-
937
- example::
938
-
939
- source = HTTPSource()
940
- func = mlrun.code_to_function("ingest", kind="serving").apply(mount_v3io())
941
- config = RunConfig(function=func)
942
- deploy_ingestion_service_v2(my_set, source, run_config=config)
943
-
944
- :param featureset: feature set object or uri
945
- :param source: data source object describing the online or offline source
946
- :param targets: list of data target objects
947
- :param name: name for the job/function
948
- :param run_config: service runtime configuration (function object/uri, resources, etc..)
949
- :param verbose: verbose log
950
-
951
- :return: URL to access the deployed ingestion service, and the function that was deployed (which will
952
- differ from the function passed in via the run_config parameter).
953
- """
954
- return _deploy_ingestion_service_v2(
955
- featureset,
956
- source,
957
- targets,
958
- name,
959
- run_config,
960
- verbose,
961
- )
962
-
963
-
964
584
  def _deploy_ingestion_service_v2(
965
585
  featureset: Union[FeatureSet, str],
966
586
  source: DataSource = None,
mlrun/model.py CHANGED
@@ -430,6 +430,19 @@ class ObjectList:
430
430
  self._children[child_obj.name] = child_obj
431
431
  return child_obj
432
432
 
433
+ def move_to_end(self, child, last=True):
434
+ self._children.move_to_end(child, last)
435
+
436
+ def update_list(self, object_list: "ObjectList", push_at_start: bool = False):
437
+ if push_at_start:
438
+ self._children = OrderedDict(
439
+ list(object_list._children.items()) + list(self._children.items())
440
+ )
441
+ else:
442
+ self._children = OrderedDict(
443
+ list(self._children.items()) + list(object_list._children.items())
444
+ )
445
+
433
446
 
434
447
  class Credentials(ModelObj):
435
448
  generate_access_key = "$generate"
@@ -107,7 +107,7 @@ def get_or_create_model_endpoint(
107
107
  sample_set_statistics=sample_set_statistics,
108
108
  )
109
109
 
110
- except mlrun.errors.MLRunNotFoundError:
110
+ except (mlrun.errors.MLRunNotFoundError, mlrun.errors.MLRunInvalidArgumentError):
111
111
  # Create a new model endpoint with the provided details
112
112
  pass
113
113
  if not model_endpoint:
@@ -23,7 +23,7 @@ import mlrun.model_monitoring.applications.base as mm_base
23
23
  import mlrun.model_monitoring.applications.context as mm_context
24
24
  from mlrun.errors import MLRunIncompatibleVersionError
25
25
 
26
- SUPPORTED_EVIDENTLY_VERSION = semver.Version.parse("0.4.39")
26
+ SUPPORTED_EVIDENTLY_VERSION = semver.Version.parse("0.6.0")
27
27
 
28
28
 
29
29
  def _check_evidently_version(*, cur: semver.Version, ref: semver.Version) -> None:
@@ -102,10 +102,10 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBase):
102
102
  Each metric is calculated over all the features individually and the mean is taken as the metric value.
103
103
  The average of Hellinger and total variance distance is taken as the result.
104
104
 
105
- The application logs two artifacts:
105
+ The application can log two artifacts:
106
106
 
107
- * A JSON with the general drift per feature.
108
- * A plotly table different metrics per feature.
107
+ * JSON with the general drift value per feature, produced by default.
108
+ * Plotly table with the various metrics and histograms per feature (disabled by default due to performance issues).
109
109
 
110
110
  This application is deployed by default when calling:
111
111
 
@@ -114,12 +114,18 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBase):
114
114
  project.enable_model_monitoring()
115
115
 
116
116
  To avoid it, pass :code:`deploy_histogram_data_drift_app=False`.
117
+
118
+ If you want to change the application defaults, such as the classifier or which artifacts to produce, you
119
+ need to inherit from this class and deploy it as any other model monitoring application.
117
120
  """
118
121
 
119
122
  NAME: Final[str] = HistogramDataDriftApplicationConstants.NAME
120
123
 
121
124
  _REQUIRED_METRICS = {HellingerDistance, TotalVarianceDistance}
122
- _STATS_TYPES: tuple[StatsKind] = (StatsKind.CURRENT_STATS, StatsKind.DRIFT_MEASURES)
125
+ _STATS_TYPES: tuple[StatsKind, StatsKind] = (
126
+ StatsKind.CURRENT_STATS,
127
+ StatsKind.DRIFT_MEASURES,
128
+ )
123
129
 
124
130
  metrics: list[type[HistogramDistanceMetric]] = [
125
131
  HellingerDistance,
@@ -127,7 +133,12 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBase):
127
133
  TotalVarianceDistance,
128
134
  ]
129
135
 
130
- def __init__(self, value_classifier: Optional[ValueClassifier] = None) -> None:
136
+ def __init__(
137
+ self,
138
+ value_classifier: Optional[ValueClassifier] = None,
139
+ produce_json_artifact: bool = True,
140
+ produce_plotly_artifact: bool = False,
141
+ ) -> None:
131
142
  """
132
143
  :param value_classifier: Classifier object that adheres to the `ValueClassifier` protocol.
133
144
  If not provided, the default `DataDriftClassifier()` is used.
@@ -137,6 +148,9 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBase):
137
148
  self.metrics
138
149
  ), "TVD and Hellinger distance are required for the general data drift result"
139
150
 
151
+ self._produce_json_artifact = produce_json_artifact
152
+ self._produce_plotly_artifact = produce_plotly_artifact
153
+
140
154
  def _compute_metrics_per_feature(
141
155
  self, monitoring_context: mm_context.MonitoringApplicationContext
142
156
  ) -> DataFrame:
@@ -295,40 +309,43 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBase):
295
309
  cast(str, key): (self._value_classifier.value_to_status(value), value)
296
310
  for key, value in drift_per_feature_values.items()
297
311
  }
298
- monitoring_context.logger.debug("Logging plotly artifact")
299
- monitoring_context.log_artifact(
300
- mm_drift_table.FeaturesDriftTablePlot().produce(
301
- sample_set_statistics=sample_set_statistics,
302
- inputs_statistics=inputs_statistics,
303
- metrics=metrics_per_feature.T.to_dict(), # pyright: ignore[reportArgumentType]
304
- drift_results=drift_results,
305
- )
312
+ monitoring_context.logger.debug("Producing plotly artifact")
313
+ artifact = mm_drift_table.FeaturesDriftTablePlot().produce(
314
+ sample_set_statistics=sample_set_statistics,
315
+ inputs_statistics=inputs_statistics,
316
+ metrics=metrics_per_feature.T.to_dict(), # pyright: ignore[reportArgumentType]
317
+ drift_results=drift_results,
306
318
  )
319
+ monitoring_context.logger.debug("Logging plotly artifact")
320
+ monitoring_context.log_artifact(artifact)
307
321
  monitoring_context.logger.debug("Logged plotly artifact successfully")
308
322
 
309
323
  def _log_drift_artifacts(
310
324
  self,
311
325
  monitoring_context: mm_context.MonitoringApplicationContext,
312
326
  metrics_per_feature: DataFrame,
313
- log_json_artifact: bool = True,
314
327
  ) -> None:
315
328
  """Log JSON and Plotly drift data per feature artifacts"""
329
+ if not self._produce_json_artifact and not self._produce_plotly_artifact:
330
+ return
331
+
316
332
  drift_per_feature_values = metrics_per_feature[
317
333
  [HellingerDistance.NAME, TotalVarianceDistance.NAME]
318
334
  ].mean(axis=1)
319
335
 
320
- if log_json_artifact:
336
+ if self._produce_json_artifact:
321
337
  self._log_json_artifact(drift_per_feature_values, monitoring_context)
322
338
 
323
- self._log_plotly_table_artifact(
324
- sample_set_statistics=self._get_shared_features_sample_stats(
325
- monitoring_context
326
- ),
327
- inputs_statistics=monitoring_context.feature_stats,
328
- metrics_per_feature=metrics_per_feature,
329
- drift_per_feature_values=drift_per_feature_values,
330
- monitoring_context=monitoring_context,
331
- )
339
+ if self._produce_plotly_artifact:
340
+ self._log_plotly_table_artifact(
341
+ sample_set_statistics=self._get_shared_features_sample_stats(
342
+ monitoring_context
343
+ ),
344
+ inputs_statistics=monitoring_context.feature_stats,
345
+ metrics_per_feature=metrics_per_feature,
346
+ drift_per_feature_values=drift_per_feature_values,
347
+ monitoring_context=monitoring_context,
348
+ )
332
349
 
333
350
  def do_tracking(
334
351
  self, monitoring_context: mm_context.MonitoringApplicationContext
mlrun/projects/project.py CHANGED
@@ -4678,7 +4678,6 @@ class MlrunProject(ModelObj):
4678
4678
  ] = None, # Backward compatibility
4679
4679
  states: typing.Optional[list[mlrun.common.runtimes.constants.RunStates]] = None,
4680
4680
  sort: bool = True,
4681
- last: int = 0,
4682
4681
  iter: bool = False,
4683
4682
  start_time_from: Optional[datetime.datetime] = None,
4684
4683
  start_time_to: Optional[datetime.datetime] = None,
@@ -4751,7 +4750,6 @@ class MlrunProject(ModelObj):
4751
4750
  else states or None
4752
4751
  ),
4753
4752
  sort=sort,
4754
- last=last,
4755
4753
  iter=iter,
4756
4754
  start_time_from=start_time_from,
4757
4755
  start_time_to=start_time_to,
@@ -11,7 +11,7 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
-
14
+ import typing
15
15
  from ast import FunctionDef, parse, unparse
16
16
  from base64 import b64decode
17
17
  from typing import Callable, Optional, Union
@@ -139,7 +139,7 @@ class DatabricksRuntime(kubejob.KubejobRuntime):
139
139
  )
140
140
 
141
141
  def _get_modified_user_code(self, original_handler: str, log_artifacts_code: str):
142
- encoded_code = (
142
+ encoded_code: typing.Optional[str] = (
143
143
  self.spec.build.functionSourceCode if hasattr(self.spec, "build") else None
144
144
  )
145
145
  if not encoded_code:
@@ -527,6 +527,17 @@ class RemoteRuntime(KubeResource):
527
527
  access_key = kwargs.pop("access_key", None)
528
528
  if not access_key:
529
529
  access_key = self._resolve_v3io_access_key()
530
+ engine = "sync"
531
+ explicit_ack_mode = kwargs.pop("explicit_ack_mode", None)
532
+ if (
533
+ self.spec
534
+ and hasattr(self.spec, "graph")
535
+ and self.spec.graph
536
+ and self.spec.graph.engine
537
+ ):
538
+ engine = self.spec.graph.engine
539
+ if mlrun.mlconf.is_explicit_ack_enabled() and engine == "async":
540
+ explicit_ack_mode = explicit_ack_mode or "explicitOnly"
530
541
 
531
542
  self.add_trigger(
532
543
  name,
@@ -540,6 +551,7 @@ class RemoteRuntime(KubeResource):
540
551
  extra_attributes=extra_attributes,
541
552
  read_batch_size=256,
542
553
  access_key=access_key,
554
+ explicit_ack_mode=explicit_ack_mode,
543
555
  **kwargs,
544
556
  ),
545
557
  )
@@ -376,6 +376,7 @@ class ServingRuntime(RemoteRuntime):
376
376
  creation_strategy: Optional[
377
377
  schemas.ModelEndpointCreationStrategy
378
378
  ] = schemas.ModelEndpointCreationStrategy.INPLACE,
379
+ outputs: Optional[list[str]] = None,
379
380
  **class_args,
380
381
  ):
381
382
  """add ml model and/or route to the function.
@@ -408,6 +409,9 @@ class ServingRuntime(RemoteRuntime):
408
409
  * **archive**:
409
410
  1. If model endpoints with the same name exist, preserve them.
410
411
  2. Create a new model endpoint with the same name and set it to `latest`.
412
+ :param outputs: list of the model outputs (e.g. labels) ,if provided will override the outputs that been
413
+ configured in the model artifact, please note that those outputs need to be equal to the
414
+ model serving function outputs (length, and order)
411
415
  :param class_args: extra kwargs to pass to the model serving class __init__
412
416
  (can be read in the model using .get_param(key) method)
413
417
  """
@@ -443,6 +447,8 @@ class ServingRuntime(RemoteRuntime):
443
447
  if class_name and hasattr(class_name, "to_dict"):
444
448
  if model_path:
445
449
  class_name.model_path = model_path
450
+ if outputs:
451
+ class_name.outputs = outputs
446
452
  key, state = params_to_step(
447
453
  class_name,
448
454
  key,
@@ -470,6 +476,7 @@ class ServingRuntime(RemoteRuntime):
470
476
  else:
471
477
  class_args = deepcopy(class_args)
472
478
  class_args["model_path"] = model_path
479
+ class_args["outputs"] = outputs
473
480
  state = TaskStep(
474
481
  class_name,
475
482
  class_args,