mlrun 1.6.0rc6__py3-none-any.whl → 1.6.0rc8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (50) hide show
  1. mlrun/__main__.py +32 -31
  2. mlrun/common/schemas/auth.py +2 -0
  3. mlrun/common/schemas/workflow.py +2 -0
  4. mlrun/config.py +3 -3
  5. mlrun/datastore/base.py +9 -3
  6. mlrun/datastore/datastore.py +10 -7
  7. mlrun/datastore/datastore_profile.py +19 -2
  8. mlrun/datastore/dbfs_store.py +6 -6
  9. mlrun/datastore/s3.py +6 -2
  10. mlrun/datastore/sources.py +12 -2
  11. mlrun/datastore/targets.py +43 -20
  12. mlrun/db/httpdb.py +22 -0
  13. mlrun/feature_store/feature_set.py +5 -2
  14. mlrun/feature_store/retrieval/spark_merger.py +7 -1
  15. mlrun/kfpops.py +1 -1
  16. mlrun/launcher/client.py +1 -6
  17. mlrun/launcher/remote.py +5 -3
  18. mlrun/model.py +2 -2
  19. mlrun/model_monitoring/batch_application.py +61 -94
  20. mlrun/package/packager.py +115 -89
  21. mlrun/package/packagers/default_packager.py +66 -65
  22. mlrun/package/packagers/numpy_packagers.py +109 -62
  23. mlrun/package/packagers/pandas_packagers.py +12 -23
  24. mlrun/package/packagers/python_standard_library_packagers.py +35 -57
  25. mlrun/package/packagers_manager.py +16 -13
  26. mlrun/package/utils/_pickler.py +8 -18
  27. mlrun/package/utils/_supported_format.py +1 -1
  28. mlrun/projects/pipelines.py +63 -4
  29. mlrun/projects/project.py +34 -11
  30. mlrun/runtimes/__init__.py +6 -0
  31. mlrun/runtimes/base.py +12 -1
  32. mlrun/runtimes/daskjob.py +73 -5
  33. mlrun/runtimes/databricks_job/databricks_runtime.py +2 -0
  34. mlrun/runtimes/function.py +53 -4
  35. mlrun/runtimes/kubejob.py +1 -1
  36. mlrun/runtimes/local.py +9 -9
  37. mlrun/runtimes/pod.py +1 -1
  38. mlrun/runtimes/remotesparkjob.py +1 -0
  39. mlrun/runtimes/serving.py +11 -1
  40. mlrun/runtimes/sparkjob/spark3job.py +4 -1
  41. mlrun/runtimes/utils.py +1 -46
  42. mlrun/utils/helpers.py +1 -17
  43. mlrun/utils/notifications/notification_pusher.py +27 -6
  44. mlrun/utils/version/version.json +2 -2
  45. {mlrun-1.6.0rc6.dist-info → mlrun-1.6.0rc8.dist-info}/METADATA +7 -6
  46. {mlrun-1.6.0rc6.dist-info → mlrun-1.6.0rc8.dist-info}/RECORD +50 -50
  47. {mlrun-1.6.0rc6.dist-info → mlrun-1.6.0rc8.dist-info}/WHEEL +1 -1
  48. {mlrun-1.6.0rc6.dist-info → mlrun-1.6.0rc8.dist-info}/LICENSE +0 -0
  49. {mlrun-1.6.0rc6.dist-info → mlrun-1.6.0rc8.dist-info}/entry_points.txt +0 -0
  50. {mlrun-1.6.0rc6.dist-info → mlrun-1.6.0rc8.dist-info}/top_level.txt +0 -0
@@ -24,6 +24,7 @@ from typing import Any, Dict, List, Optional, Union
24
24
  from urllib.parse import urlparse
25
25
 
26
26
  import pandas as pd
27
+ from mergedeep import merge
27
28
 
28
29
  import mlrun
29
30
  import mlrun.utils.helpers
@@ -293,6 +294,8 @@ def add_target_steps(graph, resource, targets, to_df=False, final_step=None):
293
294
  driver = get_target_driver(target, resource)
294
295
  table = driver.get_table_object() or table
295
296
  driver.update_resource_status()
297
+ if target.after_step:
298
+ target.attributes["infer_columns_from_data"] = True
296
299
  driver.add_writer_step(
297
300
  graph,
298
301
  target.after_step or final_step,
@@ -435,17 +438,20 @@ class BaseStoreTarget(DataTargetBase):
435
438
  prefix=self.credentials_prefix,
436
439
  )
437
440
 
438
- def _get_store(self):
441
+ def _get_store_and_path(self):
439
442
  credentials_prefix_secrets = (
440
443
  {"CREDENTIALS_PREFIX": self.credentials_prefix}
441
444
  if self.credentials_prefix
442
445
  else None
443
446
  )
444
- store, _ = mlrun.store_manager.get_or_create_store(
447
+ store, resolved_store_path = mlrun.store_manager.get_or_create_store(
445
448
  self.get_target_path(),
446
449
  credentials_prefix_secrets,
447
450
  )
448
- return store
451
+ if self.get_target_path().startswith("ds://"):
452
+ return store, store.url + resolved_store_path
453
+ else:
454
+ return store, self.get_target_path()
449
455
 
450
456
  def _get_column_list(self, features, timestamp_key, key_columns, with_type=False):
451
457
  result = []
@@ -494,17 +500,18 @@ class BaseStoreTarget(DataTargetBase):
494
500
  df.write.mode("overwrite").save(**options)
495
501
  elif hasattr(df, "dask"):
496
502
  dask_options = self.get_dask_options()
497
- storage_options = self._get_store().get_storage_options()
503
+ store, target_path = self._get_store_and_path()
504
+ storage_options = store.get_storage_options()
498
505
  df = df.repartition(partition_size="100MB")
499
506
  try:
500
507
  if dask_options["format"] == "parquet":
501
508
  df.to_parquet(
502
- generate_path_with_chunk(self, chunk_id),
509
+ generate_path_with_chunk(self, chunk_id, target_path),
503
510
  storage_options=storage_options,
504
511
  )
505
512
  elif dask_options["format"] == "csv":
506
513
  df.to_csv(
507
- generate_path_with_chunk(self, chunk_id),
514
+ generate_path_with_chunk(self, chunk_id, target_path),
508
515
  storage_options=storage_options,
509
516
  )
510
517
  else:
@@ -514,8 +521,9 @@ class BaseStoreTarget(DataTargetBase):
514
521
  except Exception as exc:
515
522
  raise RuntimeError("Failed to write Dask Dataframe") from exc
516
523
  else:
517
- target_path = generate_path_with_chunk(self, chunk_id)
518
- file_system = self._get_store().get_filesystem(False)
524
+ store, target_path = self._get_store_and_path()
525
+ target_path = generate_path_with_chunk(self, chunk_id, target_path)
526
+ file_system = store.get_filesystem(False)
519
527
  if file_system.protocol == "file":
520
528
  dir = os.path.dirname(target_path)
521
529
  if dir:
@@ -551,10 +559,16 @@ class BaseStoreTarget(DataTargetBase):
551
559
  # Partitioning will be performed on timestamp_key and then on self.partition_cols
552
560
  # (We might want to give the user control on this order as additional functionality)
553
561
  partition_cols += self.partition_cols or []
554
- storage_options = self._get_store().get_storage_options()
562
+
563
+ storage_options = store.get_storage_options()
564
+ if storage_options and self.storage_options:
565
+ storage_options = merge(storage_options, self.storage_options)
566
+ else:
567
+ storage_options = storage_options or self.storage_options
568
+
555
569
  self._write_dataframe(
556
570
  target_df,
557
- self.storage_options or storage_options,
571
+ storage_options,
558
572
  target_path,
559
573
  partition_cols=partition_cols,
560
574
  **kwargs,
@@ -603,6 +617,7 @@ class BaseStoreTarget(DataTargetBase):
603
617
 
604
618
  driver._resource = resource
605
619
  driver.run_id = spec.run_id
620
+ driver.after_step = spec.after_step
606
621
  return driver
607
622
 
608
623
  def get_table_object(self):
@@ -673,7 +688,8 @@ class BaseStoreTarget(DataTargetBase):
673
688
  raise NotImplementedError()
674
689
 
675
690
  def purge(self):
676
- self._get_store().rm(self.get_target_path(), recursive=True)
691
+ store, target_path = self._get_store_and_path()
692
+ store.rm(target_path, recursive=True)
677
693
 
678
694
  def as_df(
679
695
  self,
@@ -860,18 +876,25 @@ class ParquetTarget(BaseStoreTarget):
860
876
  "update_last_written": featureset_status.update_last_written_for_target
861
877
  }
862
878
 
879
+ store, target_path = self._get_store_and_path()
880
+
881
+ storage_options = store.get_storage_options()
882
+ if storage_options and self.storage_options:
883
+ storage_options = merge(storage_options, self.storage_options)
884
+ else:
885
+ storage_options = storage_options or self.storage_options
886
+
863
887
  graph.add_step(
864
888
  name=self.name or "ParquetTarget",
865
889
  after=after,
866
890
  graph_shape="cylinder",
867
891
  class_name="storey.ParquetTarget",
868
- path=self.get_target_path(),
892
+ path=target_path,
869
893
  columns=column_list,
870
894
  index_cols=tuple_key_columns,
871
895
  partition_cols=partition_cols,
872
896
  time_field=timestamp_key,
873
- storage_options=self.storage_options
874
- or self._get_store().get_storage_options(),
897
+ storage_options=storage_options,
875
898
  max_events=self.max_events,
876
899
  flush_after_seconds=self.flush_after_seconds,
877
900
  **self.attributes,
@@ -1009,17 +1032,17 @@ class CSVTarget(BaseStoreTarget):
1009
1032
  column_list = self._get_column_list(
1010
1033
  features=features, timestamp_key=timestamp_key, key_columns=key_columns
1011
1034
  )
1012
-
1035
+ store, target_path = self._get_store_and_path()
1013
1036
  graph.add_step(
1014
1037
  name=self.name or "CSVTarget",
1015
1038
  after=after,
1016
1039
  graph_shape="cylinder",
1017
1040
  class_name="storey.CSVTarget",
1018
- path=self.get_target_path(),
1041
+ path=target_path,
1019
1042
  columns=column_list,
1020
1043
  header=True,
1021
1044
  index_cols=key_columns,
1022
- storage_options=self._get_store().get_storage_options(),
1045
+ storage_options=store.get_storage_options(),
1023
1046
  **self.attributes,
1024
1047
  )
1025
1048
 
@@ -1923,8 +1946,8 @@ def _get_target_path(driver, resource, run_id_mode=False):
1923
1946
  return f"{data_prefix}/{kind_prefix}/{name}{suffix}"
1924
1947
 
1925
1948
 
1926
- def generate_path_with_chunk(target, chunk_id):
1927
- prefix, suffix = os.path.splitext(target.get_target_path())
1949
+ def generate_path_with_chunk(target, chunk_id, path):
1950
+ prefix, suffix = os.path.splitext(path)
1928
1951
  if chunk_id and not target.partitioned and not target.time_partitioning_granularity:
1929
1952
  return f"{prefix}/{chunk_id:0>4}{suffix}"
1930
- return target.get_target_path()
1953
+ return path
mlrun/db/httpdb.py CHANGED
@@ -3143,6 +3143,21 @@ class HTTPRunDB(RunDBInterface):
3143
3143
  body=dict_to_json(authorization_verification_input.dict()),
3144
3144
  )
3145
3145
 
3146
+ def list_api_gateways(self, project=None):
3147
+ """
3148
+ Returns a list of Nuclio api gateways
3149
+ :param project: optional str parameter to filter by project, if not passed, default Nuclio's value is taken
3150
+
3151
+ :return: json with the list of Nuclio Api Gateways
3152
+ (json example is here
3153
+ https://github.com/nuclio/nuclio/blob/development/docs/reference/api/README.md#listing-all-api-gateways)
3154
+ """
3155
+ project = project or config.default_project
3156
+ error = "list api gateways"
3157
+ endpoint_path = f"projects/{project}/nuclio/api-gateways"
3158
+ resp = self.api_call("GET", endpoint_path, error)
3159
+ return resp.json()
3160
+
3146
3161
  def trigger_migrations(self) -> Optional[mlrun.common.schemas.BackgroundTask]:
3147
3162
  """Trigger migrations (will do nothing if no migrations are needed) and wait for them to finish if actually
3148
3163
  triggered
@@ -3238,6 +3253,7 @@ class HTTPRunDB(RunDBInterface):
3238
3253
  source: Optional[str] = None,
3239
3254
  run_name: Optional[str] = None,
3240
3255
  namespace: Optional[str] = None,
3256
+ notifications: typing.List[mlrun.model.Notification] = None,
3241
3257
  ):
3242
3258
  """
3243
3259
  Submitting workflow for a remote execution.
@@ -3250,6 +3266,7 @@ class HTTPRunDB(RunDBInterface):
3250
3266
  :param source: source url of the project
3251
3267
  :param run_name: run name to override the default: 'workflow-runner-<workflow name>'
3252
3268
  :param namespace: kubernetes namespace if other than default
3269
+ :param notifications: list of notifications to send when workflow execution is completed
3253
3270
 
3254
3271
  :returns: :py:class:`~mlrun.common.schemas.WorkflowResponse`.
3255
3272
  """
@@ -3281,6 +3298,11 @@ class HTTPRunDB(RunDBInterface):
3281
3298
  req["spec"] = workflow_spec
3282
3299
  req["spec"]["image"] = image
3283
3300
  req["spec"]["name"] = workflow_name
3301
+ if notifications:
3302
+ req["notifications"] = [
3303
+ notification.to_dict() for notification in notifications
3304
+ ]
3305
+
3284
3306
  response = self.api_call(
3285
3307
  "POST",
3286
3308
  f"projects/{project}/workflows/{workflow_name}/submit",
@@ -16,6 +16,7 @@ from datetime import datetime
16
16
  from typing import Dict, List, Optional, Union
17
17
 
18
18
  import pandas as pd
19
+ import pytz
19
20
  from storey import EmitEveryEvent, EmitPolicy
20
21
 
21
22
  import mlrun
@@ -929,9 +930,11 @@ class FeatureSet(ModelObj):
929
930
  )
930
931
  df = self.spec.source.to_dataframe(
931
932
  columns=columns,
933
+ # overwrite `source.start_time` when the source is schedule.
932
934
  start_time=start_time
933
- or pd.Timestamp.min, # overwrite `source.start_time` when the source is schedule.
934
- end_time=end_time or pd.Timestamp.max,
935
+ or pd.to_datetime(pd.Timestamp.min, unit="ns").replace(tzinfo=pytz.UTC),
936
+ end_time=end_time
937
+ or pd.to_datetime(pd.Timestamp.max, unit="ns").replace(tzinfo=pytz.UTC),
935
938
  time_field=time_column,
936
939
  **kwargs,
937
940
  )
@@ -172,11 +172,17 @@ class SparkFeatureMerger(BaseMerger):
172
172
  # when we upgrade pyspark, we should check whether this workaround is still necessary
173
173
  # see https://stackoverflow.com/questions/76389694/transforming-pyspark-to-pandas-dataframe
174
174
  if semver.parse(pd.__version__)["major"] >= 2:
175
+ import pyspark.sql.functions as pyspark_functions
176
+
175
177
  type_conversion_dict = {}
176
178
  for field in df.schema.fields:
177
179
  if str(field.dataType) == "TimestampType":
178
180
  df = df.withColumn(
179
- field.name, df[field.name].cast("string")
181
+ field.name,
182
+ pyspark_functions.date_format(
183
+ pyspark_functions.to_timestamp(field.name),
184
+ "yyyy-MM-dd'T'HH:mm:ss.SSS",
185
+ ),
180
186
  )
181
187
  type_conversion_dict[field.name] = "datetime64[ns]"
182
188
  df = df.toPandas()
mlrun/kfpops.py CHANGED
@@ -93,7 +93,7 @@ def write_kfpmeta(struct):
93
93
  val = results[key]
94
94
  try:
95
95
  path = "/".join([KFP_ARTIFACTS_DIR, key])
96
- logger.info("writing artifact output", path=path, val=val)
96
+ logger.info("Writing artifact output", path=path, val=val)
97
97
  with open(path, "w") as fp:
98
98
  fp.write(str(val))
99
99
  except Exception as exc:
mlrun/launcher/client.py CHANGED
@@ -52,12 +52,7 @@ class ClientBaseLauncher(launcher.BaseLauncher, abc.ABC):
52
52
  if runtime.kind in mlrun.runtimes.RuntimeKinds.nuclio_runtimes():
53
53
  return
54
54
 
55
- build = runtime.spec.build
56
- require_build = (
57
- build.commands
58
- or build.requirements
59
- or (build.source and not build.load_source_on_run)
60
- )
55
+ require_build = runtime.requires_build()
61
56
  image = runtime.spec.image
62
57
  # we allow users to not set an image, in that case we'll use the default
63
58
  if (
mlrun/launcher/remote.py CHANGED
@@ -90,9 +90,11 @@ class ClientRemoteLauncher(launcher.ClientBaseLauncher):
90
90
  runtime.deploy(skip_deployed=True, show_on_failure=True)
91
91
 
92
92
  else:
93
- raise mlrun.errors.MLRunRuntimeError(
94
- "Function image is not built/ready, set auto_build=True or use .deploy() method first"
95
- )
93
+ if runtime.requires_build():
94
+ logger.warning(
95
+ "Function image is not built/ready and function requires build - execution will fail. "
96
+ "Need to set auto_build=True or use .deploy() method first"
97
+ )
96
98
 
97
99
  if runtime.verbose:
98
100
  logger.info(f"runspec:\n{run.to_yaml()}")
mlrun/model.py CHANGED
@@ -922,7 +922,7 @@ class RunSpec(ModelObj):
922
922
  """
923
923
  Set the dictionary of k8s states (pod phase) to thresholds time strings.
924
924
  The state will be matched against the pod's status. The threshold should be a time string that conforms
925
- to timelength python package standards and is at least 1 second (-1 for infinite). If the phase is active
925
+ to timelength python package standards and is at least 1 minute (-1 for infinite). If the phase is active
926
926
  for longer than the threshold, the run will be marked as aborted and the pod will be deleted.
927
927
  See mlconf.function.spec.state_thresholds for the state options and default values.
928
928
 
@@ -1433,7 +1433,7 @@ class RunObject(RunTemplate):
1433
1433
  self.logs(watch=False)
1434
1434
  if raise_on_failure and state != mlrun.runtimes.constants.RunStates.completed:
1435
1435
  raise mlrun.errors.MLRunRuntimeError(
1436
- f"task {self.metadata.name} did not complete (state={state})"
1436
+ f"Task {self.metadata.name} did not complete (state={state})"
1437
1437
  )
1438
1438
 
1439
1439
  return state
@@ -11,25 +11,20 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
- #
14
+
15
15
  import concurrent.futures
16
16
  import datetime
17
17
  import json
18
18
  import os
19
19
  import re
20
- from typing import List, Tuple
20
+ from typing import Callable, Optional, Tuple
21
21
 
22
- import numpy as np
23
22
  import pandas as pd
24
23
 
25
24
  import mlrun
26
- import mlrun.common.helpers
27
- import mlrun.common.model_monitoring.helpers
28
- import mlrun.common.schemas.model_monitoring
29
25
  import mlrun.common.schemas.model_monitoring.constants as mm_constants
30
26
  import mlrun.data_types.infer
31
27
  import mlrun.feature_store as fstore
32
- import mlrun.utils.v3io_clients
33
28
  from mlrun.datastore import get_stream_pusher
34
29
  from mlrun.datastore.targets import ParquetTarget
35
30
  from mlrun.model_monitoring.batch import calculate_inputs_statistics
@@ -72,46 +67,39 @@ class BatchApplicationProcessor:
72
67
 
73
68
  # Get the batch interval range
74
69
  self.batch_dict = context.parameters[
75
- mlrun.common.schemas.model_monitoring.EventFieldType.BATCH_INTERVALS_DICT
70
+ mm_constants.EventFieldType.BATCH_INTERVALS_DICT
76
71
  ]
77
72
 
78
- # TODO: This will be removed in 1.5.0 once the job params can be parsed with different types
73
+ # TODO: This will be removed once the job params can be parsed with different types
79
74
  # Convert batch dict string into a dictionary
80
75
  if isinstance(self.batch_dict, str):
81
76
  self._parse_batch_dict_str()
82
77
  # If provided, only model endpoints in that that list will be analyzed
83
78
  self.model_endpoints = context.parameters.get(
84
- mlrun.common.schemas.model_monitoring.EventFieldType.MODEL_ENDPOINTS, None
85
- )
86
- self.v3io_access_key = os.environ.get("V3IO_ACCESS_KEY")
87
- self.model_monitoring_access_key = (
88
- os.environ.get("MODEL_MONITORING_ACCESS_KEY") or self.v3io_access_key
79
+ mm_constants.EventFieldType.MODEL_ENDPOINTS, None
89
80
  )
81
+ self.model_monitoring_access_key = self._get_model_monitoring_access_key()
90
82
  self.parquet_directory = get_monitoring_parquet_path(
91
83
  project=project,
92
- kind=mlrun.common.schemas.model_monitoring.FileTargetKind.BATCH_CONTROLLER_PARQUET,
84
+ kind=mm_constants.FileTargetKind.BATCH_CONTROLLER_PARQUET,
93
85
  )
94
86
  self.storage_options = None
95
87
  if not mlrun.mlconf.is_ce_mode():
96
- self._initialize_v3io_configurations(
97
- model_monitoring_access_key=self.model_monitoring_access_key
98
- )
88
+ self._initialize_v3io_configurations()
99
89
  elif self.parquet_directory.startswith("s3://"):
100
90
  self.storage_options = mlrun.mlconf.get_s3_storage_options()
101
91
 
102
- def _initialize_v3io_configurations(
103
- self,
104
- v3io_access_key: str = None,
105
- v3io_framesd: str = None,
106
- v3io_api: str = None,
107
- model_monitoring_access_key: str = None,
108
- ):
109
- # Get the V3IO configurations
110
- self.v3io_framesd = v3io_framesd or mlrun.mlconf.v3io_framesd
111
- self.v3io_api = v3io_api or mlrun.mlconf.v3io_api
112
-
113
- self.v3io_access_key = v3io_access_key or os.environ.get("V3IO_ACCESS_KEY")
114
- self.model_monitoring_access_key = model_monitoring_access_key
92
+ @staticmethod
93
+ def _get_model_monitoring_access_key() -> Optional[str]:
94
+ access_key = os.getenv(mm_constants.ProjectSecretKeys.ACCESS_KEY)
95
+ # allow access key to be empty and don't fetch v3io access key if not needed
96
+ if access_key is None:
97
+ access_key = mlrun.mlconf.get_v3io_access_key()
98
+ return access_key
99
+
100
+ def _initialize_v3io_configurations(self) -> None:
101
+ self.v3io_framesd = mlrun.mlconf.v3io_framesd
102
+ self.v3io_api = mlrun.mlconf.v3io_api
115
103
  self.storage_options = dict(
116
104
  v3io_access_key=self.model_monitoring_access_key, v3io_api=self.v3io_api
117
105
  )
@@ -126,9 +114,7 @@ class BatchApplicationProcessor:
126
114
  self.project
127
115
  ).list_model_monitoring_functions()
128
116
  if application:
129
- applications_names = np.unique(
130
- [app.metadata.name for app in application]
131
- ).tolist()
117
+ applications_names = list({app.metadata.name for app in application})
132
118
  else:
133
119
  logger.info("There are no monitoring application found in this project")
134
120
  applications_names = []
@@ -144,26 +130,18 @@ class BatchApplicationProcessor:
144
130
  futures = []
145
131
  for endpoint in endpoints:
146
132
  if (
147
- endpoint[
148
- mlrun.common.schemas.model_monitoring.EventFieldType.ACTIVE
149
- ]
150
- and endpoint[
151
- mlrun.common.schemas.model_monitoring.EventFieldType.MONITORING_MODE
152
- ]
153
- == mlrun.common.schemas.model_monitoring.ModelMonitoringMode.enabled.value
133
+ endpoint[mm_constants.EventFieldType.ACTIVE]
134
+ and endpoint[mm_constants.EventFieldType.MONITORING_MODE]
135
+ == mm_constants.ModelMonitoringMode.enabled.value
154
136
  ):
155
137
  # Skip router endpoint:
156
138
  if (
157
- int(
158
- endpoint[
159
- mlrun.common.schemas.model_monitoring.EventFieldType.ENDPOINT_TYPE
160
- ]
161
- )
162
- == mlrun.common.schemas.model_monitoring.EndpointType.ROUTER
139
+ int(endpoint[mm_constants.EventFieldType.ENDPOINT_TYPE])
140
+ == mm_constants.EndpointType.ROUTER
163
141
  ):
164
142
  # Router endpoint has no feature stats
165
143
  logger.info(
166
- f"{endpoint[mlrun.common.schemas.model_monitoring.EventFieldType.UID]} is router skipping"
144
+ f"{endpoint[mm_constants.EventFieldType.UID]} is router skipping"
167
145
  )
168
146
  continue
169
147
  future = pool.submit(
@@ -184,10 +162,11 @@ class BatchApplicationProcessor:
184
162
 
185
163
  self._delete_old_parquet()
186
164
 
187
- @staticmethod
165
+ @classmethod
188
166
  def model_endpoint_process(
167
+ cls,
189
168
  endpoint: dict,
190
- applications_names: List[str],
169
+ applications_names: list[str],
191
170
  bath_dict: dict,
192
171
  project: str,
193
172
  parquet_directory: str,
@@ -207,20 +186,14 @@ class BatchApplicationProcessor:
207
186
  :param model_monitoring_access_key: (str) Access key to apply the model monitoring process.
208
187
 
209
188
  """
210
- endpoint_id = endpoint[mlrun.common.schemas.model_monitoring.EventFieldType.UID]
189
+ endpoint_id = endpoint[mm_constants.EventFieldType.UID]
211
190
  try:
212
191
  # Getting batch interval start time and end time
213
- start_time, end_time = BatchApplicationProcessor._get_interval_range(
214
- bath_dict
215
- )
192
+ start_time, end_time = cls._get_interval_range(bath_dict)
216
193
  m_fs = fstore.get_feature_set(
217
- endpoint[
218
- mlrun.common.schemas.model_monitoring.EventFieldType.FEATURE_SET_URI
219
- ]
194
+ endpoint[mm_constants.EventFieldType.FEATURE_SET_URI]
220
195
  )
221
- labels = endpoint[
222
- mlrun.common.schemas.model_monitoring.EventFieldType.LABEL_NAMES
223
- ]
196
+ labels = endpoint[mm_constants.EventFieldType.LABEL_NAMES]
224
197
  if labels:
225
198
  if isinstance(labels, str):
226
199
  labels = json.loads(labels)
@@ -232,7 +205,7 @@ class BatchApplicationProcessor:
232
205
 
233
206
  try:
234
207
  # get sample data
235
- df = BatchApplicationProcessor._get_sample_df(
208
+ df = cls._get_sample_df(
236
209
  m_fs,
237
210
  endpoint_id,
238
211
  end_time,
@@ -245,14 +218,10 @@ class BatchApplicationProcessor:
245
218
  logger.warn(
246
219
  "Not enough model events since the beginning of the batch interval",
247
220
  featureset_name=m_fs.metadata.name,
248
- endpoint=endpoint[
249
- mlrun.common.schemas.model_monitoring.EventFieldType.UID
250
- ],
221
+ endpoint=endpoint[mm_constants.EventFieldType.UID],
251
222
  min_rqeuired_events=mlrun.mlconf.model_endpoint_monitoring.parquet_batching_max_events,
252
- start_time=str(
253
- datetime.datetime.now() - datetime.timedelta(hours=1)
254
- ),
255
- end_time=str(datetime.datetime.now()),
223
+ start_time=start_time,
224
+ end_time=end_time,
256
225
  )
257
226
  return
258
227
 
@@ -264,9 +233,7 @@ class BatchApplicationProcessor:
264
233
  logger.warn(
265
234
  "Parquet not found, probably due to not enough model events",
266
235
  # parquet_target=m_fs.status.targets[0].path, TODO:
267
- endpoint=endpoint[
268
- mlrun.common.schemas.model_monitoring.EventFieldType.UID
269
- ],
236
+ endpoint=endpoint[mm_constants.EventFieldType.UID],
270
237
  min_rqeuired_events=mlrun.mlconf.model_endpoint_monitoring.parquet_batching_max_events,
271
238
  )
272
239
  return
@@ -282,15 +249,11 @@ class BatchApplicationProcessor:
282
249
  m_fs.save()
283
250
 
284
251
  # Get the timestamp of the latest request:
285
- latest_request = df[
286
- mlrun.common.schemas.model_monitoring.EventFieldType.TIMESTAMP
287
- ].iloc[-1]
252
+ latest_request = df[mm_constants.EventFieldType.TIMESTAMP].iloc[-1]
288
253
 
289
254
  # Get the feature stats from the model endpoint for reference data
290
255
  feature_stats = json.loads(
291
- endpoint[
292
- mlrun.common.schemas.model_monitoring.EventFieldType.FEATURE_STATS
293
- ]
256
+ endpoint[mm_constants.EventFieldType.FEATURE_STATS]
294
257
  )
295
258
 
296
259
  # Get the current stats:
@@ -300,7 +263,7 @@ class BatchApplicationProcessor:
300
263
  )
301
264
 
302
265
  # create and push data to all applications
303
- BatchApplicationProcessor._push_to_applications(
266
+ cls._push_to_applications(
304
267
  current_stats,
305
268
  feature_stats,
306
269
  parquet_directory,
@@ -314,22 +277,27 @@ class BatchApplicationProcessor:
314
277
 
315
278
  except FileNotFoundError as e:
316
279
  logger.error(
317
- f"Exception for endpoint {endpoint[mlrun.common.schemas.model_monitoring.EventFieldType.UID]}"
280
+ f"Exception for endpoint {endpoint[mm_constants.EventFieldType.UID]}"
318
281
  )
319
282
  return endpoint_id, e
320
283
 
321
284
  @staticmethod
322
- def _get_interval_range(batch_dict) -> Tuple[datetime.datetime, datetime.datetime]:
285
+ def _get_interval_range(
286
+ batch_dict: dict[str, int],
287
+ now_func: Callable[[], datetime.datetime] = datetime.datetime.now,
288
+ ) -> Tuple[datetime.datetime, datetime.datetime]:
323
289
  """Getting batch interval time range"""
324
290
  minutes, hours, days = (
325
- batch_dict[mlrun.common.schemas.model_monitoring.EventFieldType.MINUTES],
326
- batch_dict[mlrun.common.schemas.model_monitoring.EventFieldType.HOURS],
327
- batch_dict[mlrun.common.schemas.model_monitoring.EventFieldType.DAYS],
291
+ batch_dict[mm_constants.EventFieldType.MINUTES],
292
+ batch_dict[mm_constants.EventFieldType.HOURS],
293
+ batch_dict[mm_constants.EventFieldType.DAYS],
294
+ )
295
+ end_time = now_func() - datetime.timedelta(
296
+ seconds=mlrun.mlconf.model_endpoint_monitoring.parquet_batching_timeout_secs
328
297
  )
329
- start_time = datetime.datetime.now() - datetime.timedelta(
298
+ start_time = end_time - datetime.timedelta(
330
299
  minutes=minutes, hours=hours, days=days
331
300
  )
332
- end_time = datetime.datetime.now()
333
301
  return start_time, end_time
334
302
 
335
303
  def _parse_batch_dict_str(self):
@@ -357,7 +325,7 @@ class BatchApplicationProcessor:
357
325
  ("minute", "%M"),
358
326
  ]:
359
327
  schedule_time_str += f"{unit}={schedule_time.strftime(fmt)}/"
360
- endpoint_str = f"{mlrun.common.schemas.model_monitoring.EventFieldType.ENDPOINT_ID}={endpoint_id}"
328
+ endpoint_str = f"{mm_constants.EventFieldType.ENDPOINT_ID}={endpoint_id}"
361
329
 
362
330
  return f"{parquet_directory}/{schedule_time_str}/{endpoint_str}"
363
331
 
@@ -374,10 +342,11 @@ class BatchApplicationProcessor:
374
342
 
375
343
  base_directory = get_monitoring_parquet_path(
376
344
  project=self.project,
377
- kind=mlrun.common.schemas.model_monitoring.FileTargetKind.BATCH_CONTROLLER_PARQUET,
345
+ kind=mm_constants.FileTargetKind.BATCH_CONTROLLER_PARQUET,
378
346
  )
379
347
  target = ParquetTarget(path=base_directory)
380
- fs = target._get_store().get_filesystem()
348
+ store, _ = target._get_store_and_path()
349
+ fs = store.get_filesystem()
381
350
 
382
351
  try:
383
352
  # List all subdirectories in the base directory
@@ -452,7 +421,7 @@ class BatchApplicationProcessor:
452
421
  mm_constants.ApplicationEvent.ENDPOINT_ID: endpoint_id,
453
422
  mm_constants.ApplicationEvent.OUTPUT_STREAM_URI: get_stream_path(
454
423
  project=project,
455
- application_name=mlrun.common.schemas.model_monitoring.constants.MonitoringFunctionNames.WRITER,
424
+ application_name=mm_constants.MonitoringFunctionNames.WRITER,
456
425
  ),
457
426
  }
458
427
  for app_name in applications_names:
@@ -500,9 +469,7 @@ class BatchApplicationProcessor:
500
469
  } # to avoid exception when the taf is not latest
501
470
  entity_rows = pd.DataFrame(
502
471
  {
503
- mlrun.common.schemas.model_monitoring.EventFieldType.ENDPOINT_ID: [
504
- endpoint_id
505
- ],
472
+ mm_constants.EventFieldType.ENDPOINT_ID: [endpoint_id],
506
473
  "scheduled_time": [end_time],
507
474
  }
508
475
  )
@@ -512,12 +479,12 @@ class BatchApplicationProcessor:
512
479
  entity_timestamp_column="scheduled_time",
513
480
  start_time=start_time,
514
481
  end_time=end_time,
515
- timestamp_for_filtering=mlrun.common.schemas.model_monitoring.EventFieldType.TIMESTAMP,
482
+ timestamp_for_filtering=mm_constants.EventFieldType.TIMESTAMP,
516
483
  target=ParquetTarget(
517
484
  path=parquet_directory,
518
485
  time_partitioning_granularity="minute",
519
486
  partition_cols=[
520
- mlrun.common.schemas.model_monitoring.EventFieldType.ENDPOINT_ID,
487
+ mm_constants.EventFieldType.ENDPOINT_ID,
521
488
  ],
522
489
  storage_options=storage_options,
523
490
  ),