mlrun 1.3.2rc1__py3-none-any.whl → 1.3.2rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (93) hide show
  1. mlrun/api/api/deps.py +14 -1
  2. mlrun/api/api/endpoints/frontend_spec.py +0 -2
  3. mlrun/api/api/endpoints/functions.py +15 -27
  4. mlrun/api/api/endpoints/grafana_proxy.py +435 -74
  5. mlrun/api/api/endpoints/healthz.py +5 -18
  6. mlrun/api/api/endpoints/model_endpoints.py +33 -37
  7. mlrun/api/api/utils.py +6 -13
  8. mlrun/api/crud/__init__.py +14 -16
  9. mlrun/api/crud/logs.py +5 -7
  10. mlrun/api/crud/model_monitoring/__init__.py +2 -2
  11. mlrun/api/crud/model_monitoring/model_endpoint_store.py +847 -0
  12. mlrun/api/crud/model_monitoring/model_endpoints.py +105 -328
  13. mlrun/api/crud/pipelines.py +2 -3
  14. mlrun/api/db/sqldb/models/models_mysql.py +52 -19
  15. mlrun/api/db/sqldb/models/models_sqlite.py +52 -19
  16. mlrun/api/db/sqldb/session.py +19 -26
  17. mlrun/api/schemas/__init__.py +2 -0
  18. mlrun/api/schemas/constants.py +0 -13
  19. mlrun/api/schemas/frontend_spec.py +0 -1
  20. mlrun/api/schemas/model_endpoints.py +38 -195
  21. mlrun/api/schemas/schedule.py +2 -2
  22. mlrun/api/utils/clients/log_collector.py +5 -0
  23. mlrun/builder.py +9 -41
  24. mlrun/config.py +1 -76
  25. mlrun/data_types/__init__.py +1 -6
  26. mlrun/data_types/data_types.py +1 -3
  27. mlrun/datastore/__init__.py +2 -9
  28. mlrun/datastore/sources.py +20 -25
  29. mlrun/datastore/store_resources.py +1 -1
  30. mlrun/datastore/targets.py +34 -67
  31. mlrun/datastore/utils.py +4 -26
  32. mlrun/db/base.py +2 -4
  33. mlrun/db/filedb.py +5 -13
  34. mlrun/db/httpdb.py +32 -64
  35. mlrun/db/sqldb.py +2 -4
  36. mlrun/errors.py +0 -5
  37. mlrun/execution.py +0 -2
  38. mlrun/feature_store/api.py +8 -24
  39. mlrun/feature_store/feature_set.py +6 -28
  40. mlrun/feature_store/feature_vector.py +0 -2
  41. mlrun/feature_store/ingestion.py +11 -8
  42. mlrun/feature_store/retrieval/base.py +43 -271
  43. mlrun/feature_store/retrieval/dask_merger.py +153 -55
  44. mlrun/feature_store/retrieval/job.py +3 -12
  45. mlrun/feature_store/retrieval/local_merger.py +130 -48
  46. mlrun/feature_store/retrieval/spark_merger.py +125 -126
  47. mlrun/features.py +2 -7
  48. mlrun/model_monitoring/constants.py +6 -48
  49. mlrun/model_monitoring/helpers.py +35 -118
  50. mlrun/model_monitoring/model_monitoring_batch.py +260 -293
  51. mlrun/model_monitoring/stream_processing_fs.py +253 -220
  52. mlrun/platforms/iguazio.py +0 -33
  53. mlrun/projects/project.py +72 -34
  54. mlrun/runtimes/base.py +0 -5
  55. mlrun/runtimes/daskjob.py +0 -2
  56. mlrun/runtimes/function.py +3 -29
  57. mlrun/runtimes/kubejob.py +15 -39
  58. mlrun/runtimes/local.py +45 -7
  59. mlrun/runtimes/mpijob/abstract.py +0 -2
  60. mlrun/runtimes/mpijob/v1.py +0 -2
  61. mlrun/runtimes/pod.py +0 -2
  62. mlrun/runtimes/remotesparkjob.py +0 -2
  63. mlrun/runtimes/serving.py +0 -6
  64. mlrun/runtimes/sparkjob/abstract.py +2 -39
  65. mlrun/runtimes/sparkjob/spark3job.py +0 -2
  66. mlrun/serving/__init__.py +1 -2
  67. mlrun/serving/routers.py +35 -35
  68. mlrun/serving/server.py +12 -22
  69. mlrun/serving/states.py +30 -162
  70. mlrun/serving/v2_serving.py +10 -13
  71. mlrun/utils/clones.py +1 -1
  72. mlrun/utils/model_monitoring.py +96 -122
  73. mlrun/utils/version/version.json +2 -2
  74. {mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/METADATA +27 -23
  75. {mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/RECORD +79 -92
  76. mlrun/api/crud/model_monitoring/grafana.py +0 -427
  77. mlrun/datastore/spark_udf.py +0 -40
  78. mlrun/model_monitoring/__init__.py +0 -44
  79. mlrun/model_monitoring/common.py +0 -112
  80. mlrun/model_monitoring/model_endpoint.py +0 -141
  81. mlrun/model_monitoring/stores/__init__.py +0 -106
  82. mlrun/model_monitoring/stores/kv_model_endpoint_store.py +0 -448
  83. mlrun/model_monitoring/stores/model_endpoint_store.py +0 -147
  84. mlrun/model_monitoring/stores/models/__init__.py +0 -23
  85. mlrun/model_monitoring/stores/models/base.py +0 -18
  86. mlrun/model_monitoring/stores/models/mysql.py +0 -100
  87. mlrun/model_monitoring/stores/models/sqlite.py +0 -98
  88. mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -375
  89. mlrun/utils/db.py +0 -52
  90. {mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/LICENSE +0 -0
  91. {mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/WHEEL +0 -0
  92. {mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/entry_points.txt +0 -0
  93. {mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/top_level.txt +0 -0
@@ -28,6 +28,7 @@ from ..datastore.store_resources import parse_store_uri
28
28
  from ..datastore.targets import (
29
29
  BaseStoreTarget,
30
30
  get_default_prefix_for_source,
31
+ get_default_targets,
31
32
  get_target_driver,
32
33
  kind_to_driver,
33
34
  validate_target_list,
@@ -102,7 +103,6 @@ def get_offline_features(
102
103
  engine_args: dict = None,
103
104
  query: str = None,
104
105
  join_type: str = "inner",
105
- order_by: Union[str, List[str]] = None,
106
106
  spark_service: str = None,
107
107
  ) -> OfflineVectorResponse:
108
108
  """retrieve offline feature vector results
@@ -161,8 +161,6 @@ def get_offline_features(
161
161
  * right: use only keys from right frame (SQL: right outer join)
162
162
  * outer: use union of keys from both frames (SQL: full outer join)
163
163
  * inner: use intersection of keys from both frames (SQL: inner join).
164
- :param order_by: Name or list of names to order by. The name or the names in the list can be the feature name
165
- or the alias of the feature you pass in the feature list.
166
164
  """
167
165
  if isinstance(feature_vector, FeatureVector):
168
166
  update_stats = True
@@ -192,7 +190,6 @@ def get_offline_features(
192
190
  with_indexes=with_indexes,
193
191
  query=query,
194
192
  join_type=join_type,
195
- order_by=order_by,
196
193
  )
197
194
 
198
195
  start_time = str_to_timestamp(start_time)
@@ -216,7 +213,6 @@ def get_offline_features(
216
213
  update_stats=update_stats,
217
214
  query=query,
218
215
  join_type=join_type,
219
- order_by=order_by,
220
216
  )
221
217
 
222
218
 
@@ -409,15 +405,6 @@ def ingest(
409
405
  raise mlrun.errors.MLRunInvalidArgumentError(
410
406
  "feature set and source must be specified"
411
407
  )
412
- if (
413
- not mlrun_context
414
- and not targets
415
- and not (featureset.spec.targets or featureset.spec.with_default_targets)
416
- and (run_config is not None and not run_config.local)
417
- ):
418
- raise mlrun.errors.MLRunInvalidArgumentError(
419
- f"Feature set {featureset.metadata.name} is remote ingested with no targets defined, aborting"
420
- )
421
408
 
422
409
  if featureset is not None:
423
410
  featureset.validate_steps(namespace=namespace)
@@ -490,11 +477,10 @@ def ingest(
490
477
  f"Source.end_time is {str(source.end_time)}"
491
478
  )
492
479
 
493
- if mlrun_context:
494
- mlrun_context.logger.info(
495
- f"starting ingestion task to {featureset.uri}.{filter_time_string}"
496
- )
497
-
480
+ if mlrun_context:
481
+ mlrun_context.logger.info(
482
+ f"starting ingestion task to {featureset.uri}.{filter_time_string}"
483
+ )
498
484
  return_df = False
499
485
 
500
486
  if featureset.spec.passthrough:
@@ -503,7 +489,7 @@ def ingest(
503
489
 
504
490
  namespace = namespace or get_caller_globals()
505
491
 
506
- targets_to_ingest = targets or featureset.spec.targets
492
+ targets_to_ingest = targets or featureset.spec.targets or get_default_targets()
507
493
  targets_to_ingest = copy.deepcopy(targets_to_ingest)
508
494
 
509
495
  validate_target_paths_for_engine(targets_to_ingest, featureset.spec.engine, source)
@@ -700,9 +686,7 @@ def preview(
700
686
  )
701
687
  # reduce the size of the ingestion if we do not infer stats
702
688
  rows_limit = (
703
- None
704
- if InferOptions.get_common_options(options, InferOptions.Stats)
705
- else 1000
689
+ 0 if InferOptions.get_common_options(options, InferOptions.Stats) else 1000
706
690
  )
707
691
  source = init_featureset_graph(
708
692
  source,
@@ -786,7 +770,7 @@ def deploy_ingestion_service(
786
770
  name=featureset.metadata.name,
787
771
  )
788
772
 
789
- targets_to_ingest = targets or featureset.spec.targets
773
+ targets_to_ingest = targets or featureset.spec.targets or get_default_targets()
790
774
  targets_to_ingest = copy.deepcopy(targets_to_ingest)
791
775
  featureset.update_targets_for_ingest(targets_to_ingest)
792
776
 
@@ -118,7 +118,7 @@ class FeatureSetSpec(ModelObj):
118
118
  self.owner = owner
119
119
  self.description = description
120
120
  self.entities: List[Union[Entity, str]] = entities or []
121
- self.relations: Dict[str, Union[Entity, str]] = relations or {}
121
+ self.relations: Dict[str, Entity] = relations or {}
122
122
  self.features: List[Feature] = features or []
123
123
  self.partition_keys = partition_keys or []
124
124
  self.timestamp_key = timestamp_key
@@ -131,7 +131,6 @@ class FeatureSetSpec(ModelObj):
131
131
  self.engine = engine
132
132
  self.output_path = output_path or mlconf.artifact_path
133
133
  self.passthrough = passthrough
134
- self.with_default_targets = True
135
134
 
136
135
  @property
137
136
  def entities(self) -> List[Entity]:
@@ -233,9 +232,6 @@ class FeatureSetSpec(ModelObj):
233
232
 
234
233
  @relations.setter
235
234
  def relations(self, relations: Dict[str, Entity]):
236
- for col, ent in relations.items():
237
- if isinstance(ent, str):
238
- relations[col] = Entity(ent)
239
235
  self._relations = ObjectDict.from_dict({"entity": Entity}, relations, "entity")
240
236
 
241
237
  def require_processing(self):
@@ -328,7 +324,7 @@ class FeatureSet(ModelObj):
328
324
  timestamp_key: str = None,
329
325
  engine: str = None,
330
326
  label_column: str = None,
331
- relations: Dict[str, Union[Entity, str]] = None,
327
+ relations: Dict[str, Entity] = None,
332
328
  passthrough: bool = None,
333
329
  ):
334
330
  """Feature set object, defines a set of features and their data pipeline
@@ -376,7 +372,6 @@ class FeatureSet(ModelObj):
376
372
  self.status = None
377
373
  self._last_state = ""
378
374
  self._aggregations = {}
379
- self.set_targets()
380
375
 
381
376
  @property
382
377
  def spec(self) -> FeatureSetSpec:
@@ -475,25 +470,10 @@ class FeatureSet(ModelObj):
475
470
  )
476
471
  targets = targets or []
477
472
  if with_defaults:
478
- self.spec.with_default_targets = True
479
473
  targets.extend(get_default_targets())
480
- else:
481
- self.spec.with_default_targets = False
482
-
483
- self.spec.targets = []
484
- self.__set_targets_add_targets_helper(targets)
485
-
486
- if default_final_step:
487
- self.spec.graph.final_step = default_final_step
488
-
489
- def __set_targets_add_targets_helper(self, targets):
490
- """
491
- Add the desired target list
492
474
 
493
- :param targets: list of target type names ('csv', 'nosql', ..) or target objects
494
- CSVTarget(), ParquetTarget(), NoSqlTarget(), StreamTarget(), ..
495
- """
496
475
  validate_target_list(targets=targets)
476
+
497
477
  for target in targets:
498
478
  kind = target.kind if hasattr(target, "kind") else target
499
479
  if kind not in TargetTypes.all():
@@ -505,6 +485,8 @@ class FeatureSet(ModelObj):
505
485
  target, name=str(target), partitioned=(target == "parquet")
506
486
  )
507
487
  self.spec.targets.update(target)
488
+ if default_final_step:
489
+ self.spec.graph.final_step = default_final_step
508
490
 
509
491
  def validate_steps(self, namespace):
510
492
  if not self.spec:
@@ -941,11 +923,7 @@ class FeatureSet(ModelObj):
941
923
  raise mlrun.errors.MLRunNotFoundError(
942
924
  "passthrough feature set {self.metadata.name} with no source"
943
925
  )
944
- df = self.spec.source.to_dataframe()
945
- # to_dataframe() can sometimes return an iterator of dataframes instead of one dataframe
946
- if not isinstance(df, pd.DataFrame):
947
- df = pd.concat(df)
948
- return df
926
+ return self.spec.source.to_dataframe()
949
927
 
950
928
  target = get_offline_target(self, name=target_name)
951
929
  if not target:
@@ -520,8 +520,6 @@ class OnlineVectorService:
520
520
  v = data[name]
521
521
  if v is None or (type(v) == float and (np.isinf(v) or np.isnan(v))):
522
522
  data[name] = self._impute_values.get(name, v)
523
- for name in list(self.vector.spec.entity_fields.keys()):
524
- data.pop(name, None)
525
523
 
526
524
  if as_list and data:
527
525
  data = [
@@ -89,7 +89,7 @@ def init_featureset_graph(
89
89
  key_fields = entity_columns if entity_columns else None
90
90
 
91
91
  sizes = [0] * len(targets)
92
- result_dfs = []
92
+ data_result = None
93
93
  total_rows = 0
94
94
  targets = [get_target_driver(target, featureset) for target in targets]
95
95
  if featureset.spec.passthrough:
@@ -100,11 +100,11 @@ def init_featureset_graph(
100
100
  # set the entities to be the indexes of the df
101
101
  event.body = entities_to_index(featureset, event.body)
102
102
 
103
- df = server.run(event, get_body=True)
104
- if df is not None:
103
+ data = server.run(event, get_body=True)
104
+ if data is not None:
105
105
  for i, target in enumerate(targets):
106
106
  size = target.write_dataframe(
107
- df,
107
+ data,
108
108
  key_column=key_fields,
109
109
  timestamp_key=featureset.spec.timestamp_key,
110
110
  chunk_id=chunk_id,
@@ -112,18 +112,21 @@ def init_featureset_graph(
112
112
  if size:
113
113
  sizes[i] += size
114
114
  chunk_id += 1
115
- result_dfs.append(df)
116
- total_rows += df.shape[0]
115
+ if data_result is None:
116
+ # in case of multiple chunks only return the first chunk (last may be too small)
117
+ data_result = data
118
+ total_rows += data.shape[0]
117
119
  if rows_limit and total_rows >= rows_limit:
118
120
  break
119
121
 
122
+ # todo: fire termination event if iterator
123
+
120
124
  for i, target in enumerate(targets):
121
125
  target_status = target.update_resource_status("ready", size=sizes[i])
122
126
  if verbose:
123
127
  logger.info(f"wrote target: {target_status}")
124
128
 
125
- result_df = pd.concat(result_dfs)
126
- return result_df.head(rows_limit)
129
+ return data_result
127
130
 
128
131
 
129
132
  def featureset_initializer(server):