mlrun 1.4.0rc25__py3-none-any.whl → 1.5.0rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (184) hide show
  1. mlrun/__init__.py +2 -35
  2. mlrun/__main__.py +3 -41
  3. mlrun/api/api/api.py +6 -0
  4. mlrun/api/api/endpoints/feature_store.py +0 -4
  5. mlrun/api/api/endpoints/files.py +14 -2
  6. mlrun/api/api/endpoints/frontend_spec.py +2 -1
  7. mlrun/api/api/endpoints/functions.py +95 -59
  8. mlrun/api/api/endpoints/grafana_proxy.py +9 -9
  9. mlrun/api/api/endpoints/logs.py +17 -3
  10. mlrun/api/api/endpoints/model_endpoints.py +3 -2
  11. mlrun/api/api/endpoints/pipelines.py +1 -5
  12. mlrun/api/api/endpoints/projects.py +88 -0
  13. mlrun/api/api/endpoints/runs.py +48 -6
  14. mlrun/api/api/endpoints/submit.py +2 -1
  15. mlrun/api/api/endpoints/workflows.py +355 -0
  16. mlrun/api/api/utils.py +3 -4
  17. mlrun/api/crud/__init__.py +1 -0
  18. mlrun/api/crud/client_spec.py +6 -2
  19. mlrun/api/crud/feature_store.py +5 -0
  20. mlrun/api/crud/model_monitoring/__init__.py +1 -0
  21. mlrun/api/crud/model_monitoring/deployment.py +497 -0
  22. mlrun/api/crud/model_monitoring/grafana.py +96 -42
  23. mlrun/api/crud/model_monitoring/helpers.py +159 -0
  24. mlrun/api/crud/model_monitoring/model_endpoints.py +202 -476
  25. mlrun/api/crud/notifications.py +9 -4
  26. mlrun/api/crud/pipelines.py +6 -11
  27. mlrun/api/crud/projects.py +2 -2
  28. mlrun/api/crud/runtime_resources.py +4 -3
  29. mlrun/api/crud/runtimes/nuclio/helpers.py +5 -1
  30. mlrun/api/crud/secrets.py +21 -0
  31. mlrun/api/crud/workflows.py +352 -0
  32. mlrun/api/db/base.py +16 -1
  33. mlrun/api/db/init_db.py +2 -4
  34. mlrun/api/db/session.py +1 -1
  35. mlrun/api/db/sqldb/db.py +129 -31
  36. mlrun/api/db/sqldb/models/models_mysql.py +15 -1
  37. mlrun/api/db/sqldb/models/models_sqlite.py +16 -2
  38. mlrun/api/launcher.py +38 -6
  39. mlrun/api/main.py +3 -2
  40. mlrun/api/rundb/__init__.py +13 -0
  41. mlrun/{db → api/rundb}/sqldb.py +36 -84
  42. mlrun/api/runtime_handlers/__init__.py +56 -0
  43. mlrun/api/runtime_handlers/base.py +1247 -0
  44. mlrun/api/runtime_handlers/daskjob.py +209 -0
  45. mlrun/api/runtime_handlers/kubejob.py +37 -0
  46. mlrun/api/runtime_handlers/mpijob.py +147 -0
  47. mlrun/api/runtime_handlers/remotesparkjob.py +29 -0
  48. mlrun/api/runtime_handlers/sparkjob.py +148 -0
  49. mlrun/api/schemas/__init__.py +17 -6
  50. mlrun/api/utils/builder.py +1 -4
  51. mlrun/api/utils/clients/chief.py +14 -0
  52. mlrun/api/utils/clients/iguazio.py +33 -33
  53. mlrun/api/utils/clients/nuclio.py +2 -2
  54. mlrun/api/utils/periodic.py +9 -2
  55. mlrun/api/utils/projects/follower.py +14 -7
  56. mlrun/api/utils/projects/leader.py +2 -1
  57. mlrun/api/utils/projects/remotes/nop_follower.py +2 -2
  58. mlrun/api/utils/projects/remotes/nop_leader.py +2 -2
  59. mlrun/api/utils/runtimes/__init__.py +14 -0
  60. mlrun/api/utils/runtimes/nuclio.py +43 -0
  61. mlrun/api/utils/scheduler.py +98 -15
  62. mlrun/api/utils/singletons/db.py +5 -1
  63. mlrun/api/utils/singletons/project_member.py +4 -1
  64. mlrun/api/utils/singletons/scheduler.py +1 -1
  65. mlrun/artifacts/base.py +6 -6
  66. mlrun/artifacts/dataset.py +4 -4
  67. mlrun/artifacts/manager.py +2 -3
  68. mlrun/artifacts/model.py +2 -2
  69. mlrun/artifacts/plots.py +8 -8
  70. mlrun/common/db/__init__.py +14 -0
  71. mlrun/common/helpers.py +37 -0
  72. mlrun/{mlutils → common/model_monitoring}/__init__.py +3 -2
  73. mlrun/common/model_monitoring/helpers.py +69 -0
  74. mlrun/common/schemas/__init__.py +13 -1
  75. mlrun/common/schemas/auth.py +4 -1
  76. mlrun/common/schemas/client_spec.py +1 -1
  77. mlrun/common/schemas/function.py +17 -0
  78. mlrun/common/schemas/model_monitoring/__init__.py +48 -0
  79. mlrun/common/{model_monitoring.py → schemas/model_monitoring/constants.py} +11 -23
  80. mlrun/common/schemas/model_monitoring/grafana.py +55 -0
  81. mlrun/common/schemas/{model_endpoints.py → model_monitoring/model_endpoints.py} +32 -65
  82. mlrun/common/schemas/notification.py +1 -0
  83. mlrun/common/schemas/object.py +4 -0
  84. mlrun/common/schemas/project.py +1 -0
  85. mlrun/common/schemas/regex.py +1 -1
  86. mlrun/common/schemas/runs.py +1 -8
  87. mlrun/common/schemas/schedule.py +1 -8
  88. mlrun/common/schemas/workflow.py +54 -0
  89. mlrun/config.py +45 -42
  90. mlrun/datastore/__init__.py +21 -0
  91. mlrun/datastore/base.py +1 -1
  92. mlrun/datastore/datastore.py +9 -0
  93. mlrun/datastore/dbfs_store.py +168 -0
  94. mlrun/datastore/helpers.py +18 -0
  95. mlrun/datastore/sources.py +1 -0
  96. mlrun/datastore/store_resources.py +2 -5
  97. mlrun/datastore/v3io.py +1 -2
  98. mlrun/db/__init__.py +4 -68
  99. mlrun/db/base.py +12 -0
  100. mlrun/db/factory.py +65 -0
  101. mlrun/db/httpdb.py +175 -20
  102. mlrun/db/nopdb.py +4 -2
  103. mlrun/execution.py +4 -2
  104. mlrun/feature_store/__init__.py +1 -0
  105. mlrun/feature_store/api.py +1 -2
  106. mlrun/feature_store/common.py +2 -1
  107. mlrun/feature_store/feature_set.py +1 -11
  108. mlrun/feature_store/feature_vector.py +340 -2
  109. mlrun/feature_store/ingestion.py +5 -10
  110. mlrun/feature_store/retrieval/base.py +118 -104
  111. mlrun/feature_store/retrieval/dask_merger.py +17 -10
  112. mlrun/feature_store/retrieval/job.py +4 -1
  113. mlrun/feature_store/retrieval/local_merger.py +18 -18
  114. mlrun/feature_store/retrieval/spark_merger.py +21 -14
  115. mlrun/feature_store/retrieval/storey_merger.py +22 -16
  116. mlrun/kfpops.py +3 -9
  117. mlrun/launcher/base.py +57 -53
  118. mlrun/launcher/client.py +5 -4
  119. mlrun/launcher/factory.py +24 -13
  120. mlrun/launcher/local.py +6 -6
  121. mlrun/launcher/remote.py +4 -4
  122. mlrun/lists.py +0 -11
  123. mlrun/model.py +11 -17
  124. mlrun/model_monitoring/__init__.py +2 -22
  125. mlrun/model_monitoring/features_drift_table.py +1 -1
  126. mlrun/model_monitoring/helpers.py +22 -210
  127. mlrun/model_monitoring/model_endpoint.py +1 -1
  128. mlrun/model_monitoring/model_monitoring_batch.py +127 -50
  129. mlrun/model_monitoring/prometheus.py +219 -0
  130. mlrun/model_monitoring/stores/__init__.py +16 -11
  131. mlrun/model_monitoring/stores/kv_model_endpoint_store.py +95 -23
  132. mlrun/model_monitoring/stores/models/mysql.py +47 -29
  133. mlrun/model_monitoring/stores/models/sqlite.py +47 -29
  134. mlrun/model_monitoring/stores/sql_model_endpoint_store.py +31 -19
  135. mlrun/model_monitoring/{stream_processing_fs.py → stream_processing.py} +206 -64
  136. mlrun/model_monitoring/tracking_policy.py +104 -0
  137. mlrun/package/packager.py +6 -8
  138. mlrun/package/packagers/default_packager.py +121 -10
  139. mlrun/package/packagers/numpy_packagers.py +1 -1
  140. mlrun/platforms/__init__.py +0 -2
  141. mlrun/platforms/iguazio.py +0 -56
  142. mlrun/projects/pipelines.py +53 -159
  143. mlrun/projects/project.py +10 -37
  144. mlrun/render.py +1 -1
  145. mlrun/run.py +8 -124
  146. mlrun/runtimes/__init__.py +6 -42
  147. mlrun/runtimes/base.py +29 -1249
  148. mlrun/runtimes/daskjob.py +2 -198
  149. mlrun/runtimes/funcdoc.py +0 -9
  150. mlrun/runtimes/function.py +25 -29
  151. mlrun/runtimes/kubejob.py +5 -29
  152. mlrun/runtimes/local.py +1 -1
  153. mlrun/runtimes/mpijob/__init__.py +2 -2
  154. mlrun/runtimes/mpijob/abstract.py +10 -1
  155. mlrun/runtimes/mpijob/v1.py +0 -76
  156. mlrun/runtimes/mpijob/v1alpha1.py +1 -74
  157. mlrun/runtimes/nuclio.py +3 -2
  158. mlrun/runtimes/pod.py +28 -18
  159. mlrun/runtimes/remotesparkjob.py +1 -15
  160. mlrun/runtimes/serving.py +14 -6
  161. mlrun/runtimes/sparkjob/__init__.py +0 -1
  162. mlrun/runtimes/sparkjob/abstract.py +4 -131
  163. mlrun/runtimes/utils.py +0 -26
  164. mlrun/serving/routers.py +7 -7
  165. mlrun/serving/server.py +11 -8
  166. mlrun/serving/states.py +7 -1
  167. mlrun/serving/v2_serving.py +6 -6
  168. mlrun/utils/helpers.py +23 -42
  169. mlrun/utils/notifications/notification/__init__.py +4 -0
  170. mlrun/utils/notifications/notification/webhook.py +61 -0
  171. mlrun/utils/notifications/notification_pusher.py +5 -25
  172. mlrun/utils/regex.py +7 -2
  173. mlrun/utils/version/version.json +2 -2
  174. {mlrun-1.4.0rc25.dist-info → mlrun-1.5.0rc2.dist-info}/METADATA +26 -25
  175. {mlrun-1.4.0rc25.dist-info → mlrun-1.5.0rc2.dist-info}/RECORD +180 -158
  176. {mlrun-1.4.0rc25.dist-info → mlrun-1.5.0rc2.dist-info}/WHEEL +1 -1
  177. mlrun/mlutils/data.py +0 -160
  178. mlrun/mlutils/models.py +0 -78
  179. mlrun/mlutils/plots.py +0 -902
  180. mlrun/utils/model_monitoring.py +0 -249
  181. /mlrun/{api/db/sqldb/session.py → common/db/sql_session.py} +0 -0
  182. {mlrun-1.4.0rc25.dist-info → mlrun-1.5.0rc2.dist-info}/LICENSE +0 -0
  183. {mlrun-1.4.0rc25.dist-info → mlrun-1.5.0rc2.dist-info}/entry_points.txt +0 -0
  184. {mlrun-1.4.0rc25.dist-info → mlrun-1.5.0rc2.dist-info}/top_level.txt +0 -0
@@ -16,13 +16,12 @@ import abc
16
16
  import typing
17
17
  from datetime import datetime
18
18
 
19
- import dask.dataframe as dd
20
19
  import pandas as pd
21
20
 
22
21
  import mlrun
23
22
  from mlrun.datastore.targets import CSVTarget, ParquetTarget
24
23
  from mlrun.feature_store.feature_set import FeatureSet
25
- from mlrun.feature_store.feature_vector import Feature
24
+ from mlrun.feature_store.feature_vector import Feature, JoinGraph
26
25
 
27
26
  from ...utils import logger, str_to_timestamp
28
27
  from ..feature_vector import OfflineVectorResponse
@@ -42,6 +41,7 @@ class BaseMerger(abc.ABC):
42
41
  def __init__(self, vector, **engine_args):
43
42
  self._relation = dict()
44
43
  self._join_type = "inner"
44
+ self._default_join_type = "default_join"
45
45
  self.vector = vector
46
46
 
47
47
  self._result_df = None
@@ -196,21 +196,34 @@ class BaseMerger(abc.ABC):
196
196
  ) # the struct of key is [[[],[]], ..] So that each record indicates which way the corresponding
197
197
  # featureset is connected to the previous one, and within each record the left keys are indicated in index 0
198
198
  # and the right keys in index 1, this keys will be the keys that will be used in this join
199
+ join_types = []
199
200
 
200
- fs_link_list = self._create_linked_relation_list(
201
- feature_set_objects, feature_set_fields
201
+ entity_rows_keys = (
202
+ list(entity_rows.columns) if entity_rows is not None else None
202
203
  )
204
+ join_graph = self._get_graph(
205
+ feature_set_objects, feature_set_fields, entity_rows_keys
206
+ )
207
+ if entity_rows_keys:
208
+ entity_rows = self._convert_entity_rows_to_engine_df(entity_rows)
209
+ dfs.append(entity_rows)
210
+ keys.append([[], []])
211
+ feature_sets.append(None)
212
+ join_types.append(None)
203
213
 
204
214
  filtered = False
205
- for node in fs_link_list:
206
- name = node.name
215
+ for step in join_graph.steps:
216
+ name = step.right_feature_set_name
207
217
  feature_set = feature_set_objects[name]
218
+ saved_columns_for_relation = list(
219
+ self.vector.get_feature_set_relations(feature_set).keys()
220
+ )
208
221
  feature_sets.append(feature_set)
209
222
  columns = feature_set_fields[name]
210
223
  self._origin_alias.update({name: alias for name, alias in columns})
211
224
  column_names = [name for name, _ in columns]
212
225
 
213
- for column in node.data["save_cols"]:
226
+ for column in saved_columns_for_relation:
214
227
  if column not in column_names:
215
228
  column_names.append(column)
216
229
  if column not in self._index_columns:
@@ -247,19 +260,19 @@ class BaseMerger(abc.ABC):
247
260
  time_column,
248
261
  )
249
262
 
250
- column_names += node.data["save_index"]
251
- node.data["save_cols"] += node.data["save_index"]
252
263
  fs_entities_and_timestamp = list(feature_set.spec.entities.keys())
264
+ column_names += fs_entities_and_timestamp
265
+ saved_columns_for_relation += fs_entities_and_timestamp
253
266
  if feature_set.spec.timestamp_key:
254
267
  column_names.append(feature_set.spec.timestamp_key)
255
- node.data["save_cols"].append(feature_set.spec.timestamp_key)
268
+ saved_columns_for_relation.append(feature_set.spec.timestamp_key)
256
269
  fs_entities_and_timestamp.append(feature_set.spec.timestamp_key)
257
270
 
258
271
  # rename columns to be unique for each feature set and select if needed
259
272
  rename_col_dict = {
260
273
  column: f"{column}_{name}"
261
274
  for column in column_names
262
- if column not in node.data["save_cols"]
275
+ if column not in saved_columns_for_relation
263
276
  }
264
277
  df_temp = self._rename_columns_and_select(
265
278
  df,
@@ -274,7 +287,8 @@ class BaseMerger(abc.ABC):
274
287
  dfs.append(df)
275
288
  del df
276
289
 
277
- keys.append([node.data["left_keys"], node.data["right_keys"]])
290
+ keys.append([step.left_keys, step.right_keys])
291
+ join_types.append([step.join_type, step.asof_join])
278
292
 
279
293
  # update alias according to the unique column name
280
294
  new_columns = []
@@ -293,31 +307,13 @@ class BaseMerger(abc.ABC):
293
307
  "start_time and end_time can only be provided in conjunction with "
294
308
  "a timestamp column, or when the at least one feature_set has a timestamp key"
295
309
  )
296
- # convert pandas entity_rows to spark\dask DF if needed
297
- if (
298
- entity_rows is not None
299
- and not hasattr(entity_rows, "rdd")
300
- and self.engine == "spark"
301
- ):
302
- entity_rows = self.spark.createDataFrame(entity_rows)
303
- elif (
304
- entity_rows is not None
305
- and not hasattr(entity_rows, "dask")
306
- and self.engine == "dask"
307
- ):
308
- entity_rows = dd.from_pandas(
309
- entity_rows, npartitions=len(entity_rows.columns)
310
- )
311
-
312
310
  # join the feature data frames
313
311
  result_timestamp = self.merge(
314
- entity_df=entity_rows,
315
- entity_timestamp_column=entity_timestamp_column
316
- if entity_rows is not None
317
- else None,
312
+ entity_timestamp_column=entity_timestamp_column,
318
313
  featuresets=feature_sets,
319
314
  featureset_dfs=dfs,
320
315
  keys=keys,
316
+ join_types=join_types,
321
317
  )
322
318
 
323
319
  all_columns = None
@@ -386,41 +382,46 @@ class BaseMerger(abc.ABC):
386
382
 
387
383
  def merge(
388
384
  self,
389
- entity_df,
390
385
  entity_timestamp_column: str,
391
386
  featuresets: list,
392
387
  featureset_dfs: list,
393
388
  keys: list = None,
389
+ join_types: list = None,
394
390
  ):
395
391
  """join the entities and feature set features into a result dataframe"""
396
- merged_df = entity_df
397
- if entity_df is None and featureset_dfs:
398
- merged_df = featureset_dfs.pop(0)
399
- featureset = featuresets.pop(0)
400
- if keys is not None:
401
- keys.pop(0)
402
- else:
403
- # keys can be multiple keys on each side of the join
404
- keys = [[[], []]] * len(featureset_dfs)
405
- entity_timestamp_column = (
406
- entity_timestamp_column or featureset.spec.timestamp_key
407
- )
408
- elif entity_df is not None and featureset_dfs:
409
- # when `entity_rows` passed to `get_offline_features`
410
- # keys[0] mention the way that `entity_rows` joins to the first `featureset`
411
- # and it can join only by the entities of the first `featureset`
412
- keys[0][0] = keys[0][1] = list(featuresets[0].spec.entities.keys())
413
-
414
- for featureset, featureset_df, lr_key in zip(featuresets, featureset_dfs, keys):
415
- if featureset.spec.timestamp_key and entity_timestamp_column:
392
+
393
+ merged_df = featureset_dfs.pop(0)
394
+ featureset = featuresets.pop(0)
395
+ keys.pop(0)
396
+ join_types.pop(0)
397
+
398
+ if not entity_timestamp_column and featureset:
399
+ entity_timestamp_column = featureset.spec.timestamp_key
400
+
401
+ for featureset, featureset_df, lr_key, join_type in zip(
402
+ featuresets, featureset_dfs, keys, join_types
403
+ ):
404
+ join_type, as_of = join_type
405
+ if (
406
+ featureset.spec.timestamp_key
407
+ and entity_timestamp_column
408
+ and join_type == self._default_join_type
409
+ ):
416
410
  merge_func = self._asof_join
417
- else:
411
+ elif join_type == self._default_join_type:
412
+ merge_func = self._join
413
+ elif join_type != self._default_join_type and not as_of:
414
+ self._join_type = join_type
418
415
  merge_func = self._join
416
+ else:
417
+ self._join_type = join_type
418
+ merge_func = self._asof_join
419
419
 
420
420
  merged_df = merge_func(
421
421
  merged_df,
422
422
  entity_timestamp_column,
423
- featureset,
423
+ featureset.metadata.name,
424
+ featureset.spec.timestamp_key,
424
425
  featureset_df,
425
426
  lr_key[0],
426
427
  lr_key[1],
@@ -441,7 +442,8 @@ class BaseMerger(abc.ABC):
441
442
  self,
442
443
  entity_df,
443
444
  entity_timestamp_column: str,
444
- featureset,
445
+ featureset_name: str,
446
+ featureset_timstamp: str,
445
447
  featureset_df,
446
448
  left_keys: list,
447
449
  right_keys: list,
@@ -452,7 +454,8 @@ class BaseMerger(abc.ABC):
452
454
  self,
453
455
  entity_df,
454
456
  entity_timestamp_column: str,
455
- featureset,
457
+ featureset_name: str,
458
+ featureset_timestamp: str,
456
459
  featureset_df,
457
460
  left_keys: list,
458
461
  right_keys: list,
@@ -480,10 +483,42 @@ class BaseMerger(abc.ABC):
480
483
  size = CSVTarget(path=target_path).write_dataframe(self._result_df, **kw)
481
484
  return size
482
485
 
486
+ def _get_graph(
487
+ self, feature_set_objects, feature_set_fields, entity_rows_keys=None
488
+ ):
489
+ join_graph = self.vector.spec.join_graph
490
+ if not join_graph:
491
+ fs_link_list = self._create_linked_relation_list(
492
+ feature_set_objects, feature_set_fields, entity_rows_keys
493
+ )
494
+ join_graph = None
495
+ for i, node in enumerate(fs_link_list):
496
+ if node.name != self._entity_rows_node_name and join_graph is None:
497
+ join_graph = JoinGraph(first_feature_set=node.name)
498
+ elif node.name == self._entity_rows_node_name:
499
+ continue
500
+ else:
501
+ join_graph.inner(other_operand=node.name)
502
+
503
+ last_step = join_graph.steps[-1]
504
+ last_step.join_type = self._default_join_type
505
+ last_step.left_keys = node.left_keys
506
+ last_step.right_keys = node.right_keys
507
+ else:
508
+ join_graph._init_all_join_keys(feature_set_objects, self.vector)
509
+ return join_graph
510
+
483
511
  class _Node:
484
- def __init__(self, name: str, order: int, data=None):
512
+ def __init__(
513
+ self,
514
+ name: str,
515
+ order: int,
516
+ left_keys: typing.List[str] = None,
517
+ right_keys: typing.List[str] = None,
518
+ ):
485
519
  self.name = name
486
- self.data = data
520
+ self.left_keys = left_keys if left_keys is not None else []
521
+ self.right_keys = right_keys if right_keys is not None else []
487
522
  # order of this feature_set in the original list
488
523
  self.order = order
489
524
  self.next = None
@@ -495,7 +530,9 @@ class BaseMerger(abc.ABC):
495
530
  return self.name == other.name
496
531
 
497
532
  def __copy__(self):
498
- return BaseMerger._Node(self.name, self.order, self.data.copy())
533
+ return BaseMerger._Node(
534
+ self.name, self.order, self.left_keys, self.right_keys
535
+ )
499
536
 
500
537
  class _LinkedList:
501
538
  def __init__(self, head=None):
@@ -565,9 +602,6 @@ class BaseMerger(abc.ABC):
565
602
  node = self.find_node(other_head.name)
566
603
  if node is None:
567
604
  return
568
- for col in other_head.data["save_cols"]:
569
- if col not in node.data["save_cols"]:
570
- node.data["save_cols"].append(col)
571
605
  for other_node in other_iter:
572
606
  if self.find_node(other_node.name) is None:
573
607
  while node is not None and other_node.order > node.order:
@@ -587,24 +621,24 @@ class BaseMerger(abc.ABC):
587
621
  head=BaseMerger._Node(
588
622
  name=feature_set_names[0],
589
623
  order=0,
590
- data={
591
- "left_keys": [],
592
- "right_keys": [],
593
- "save_cols": [],
594
- "save_index": [],
595
- },
596
624
  )
597
625
  )
598
626
  relation_linked_lists = []
599
627
  feature_set_entity_list_dict = {
600
628
  name: feature_set_objects[name].spec.entities for name in feature_set_names
601
629
  }
602
- entity_relation_val_list = {
603
- name: list(feature_set_objects[name].spec.relations.values())
630
+ relation_val_list = {
631
+ name: list(
632
+ self.vector.get_feature_set_relations(
633
+ feature_set_objects[name]
634
+ ).values()
635
+ )
604
636
  for name in feature_set_names
605
637
  }
606
- entity_relation_key_list = {
607
- name: list(feature_set_objects[name].spec.relations.keys())
638
+ relation_key_list = {
639
+ name: list(
640
+ self.vector.get_feature_set_relations(feature_set_objects[name]).keys()
641
+ )
608
642
  for name in feature_set_names
609
643
  }
610
644
 
@@ -612,12 +646,6 @@ class BaseMerger(abc.ABC):
612
646
  relations = BaseMerger._LinkedList()
613
647
  main_node = BaseMerger._Node(
614
648
  name,
615
- data={
616
- "left_keys": [],
617
- "right_keys": [],
618
- "save_cols": [],
619
- "save_index": [],
620
- },
621
649
  order=order,
622
650
  )
623
651
  relations.add_first(main_node)
@@ -629,8 +657,8 @@ class BaseMerger(abc.ABC):
629
657
  name_head = linked_list_relation.head.name
630
658
  feature_set_in_entity_list = feature_set_entity_list_dict[fs_name_in]
631
659
  feature_set_in_entity_list_names = list(feature_set_in_entity_list.keys())
632
- entity_relation_list = entity_relation_val_list[name_head]
633
- col_relation_list = entity_relation_key_list[name_head]
660
+ entity_relation_list = relation_val_list[name_head]
661
+ col_relation_list = relation_key_list[name_head]
634
662
  curr_col_relation_list = list(
635
663
  map(
636
664
  lambda ent: (
@@ -649,18 +677,11 @@ class BaseMerger(abc.ABC):
649
677
  linked_list_relation.add_last(
650
678
  BaseMerger._Node(
651
679
  fs_name_in,
652
- data={
653
- "left_keys": curr_col_relation_list,
654
- "right_keys": feature_set_in_entity_list_names,
655
- "save_cols": [],
656
- "save_index": [],
657
- },
680
+ left_keys=curr_col_relation_list,
681
+ right_keys=feature_set_in_entity_list_names,
658
682
  order=name_in_order,
659
683
  )
660
684
  )
661
- linked_list_relation.head.data["save_cols"].extend(
662
- curr_col_relation_list
663
- )
664
685
  elif name_in_order > head_order and sorted(
665
686
  feature_set_in_entity_list_names
666
687
  ) == sorted(feature_set_entity_list_dict[name_head].keys()):
@@ -669,16 +690,11 @@ class BaseMerger(abc.ABC):
669
690
  linked_list_relation.add_last(
670
691
  BaseMerger._Node(
671
692
  fs_name_in,
672
- data={
673
- "left_keys": keys,
674
- "right_keys": keys,
675
- "save_cols": [],
676
- "save_index": keys,
677
- },
693
+ left_keys=keys,
694
+ right_keys=keys,
678
695
  order=name_in_order,
679
696
  )
680
697
  )
681
- linked_list_relation.head.data["save_index"] = keys
682
698
  return linked_list_relation
683
699
 
684
700
  def _build_entity_rows_relation(entity_rows_relation, fs_name, fs_order):
@@ -692,16 +708,11 @@ class BaseMerger(abc.ABC):
692
708
  entity_rows_relation.add_last(
693
709
  BaseMerger._Node(
694
710
  fs_name,
695
- data={
696
- "left_keys": keys,
697
- "right_keys": keys,
698
- "save_cols": [],
699
- "save_index": keys,
700
- },
711
+ left_keys=keys,
712
+ right_keys=keys,
701
713
  order=fs_order,
702
714
  )
703
715
  )
704
- entity_rows_relation.head.data["save_index"] = keys
705
716
 
706
717
  if entity_rows_keys is not None:
707
718
  entity_rows_linked_relation = _create_relation(
@@ -805,3 +816,6 @@ class BaseMerger(abc.ABC):
805
816
  :param order_by_active: list of names to sort by.
806
817
  """
807
818
  raise NotImplementedError
819
+
820
+ def _convert_entity_rows_to_engine_df(self, entity_rows):
821
+ raise NotImplementedError
@@ -41,8 +41,9 @@ class DaskFeatureMerger(BaseMerger):
41
41
  self,
42
42
  entity_df,
43
43
  entity_timestamp_column: str,
44
- featureset,
45
- featureset_df,
44
+ featureset_name: str,
45
+ featureset_timestamp: str,
46
+ featureset_df: list,
46
47
  left_keys: list,
47
48
  right_keys: list,
48
49
  ):
@@ -53,20 +54,20 @@ class DaskFeatureMerger(BaseMerger):
53
54
  sort_partition, timestamp=entity_timestamp_column
54
55
  )
55
56
  featureset_df = featureset_df.map_partitions(
56
- sort_partition, timestamp=featureset.spec.timestamp_key
57
+ sort_partition, timestamp=featureset_timestamp
57
58
  )
58
59
 
59
60
  merged_df = merge_asof(
60
61
  entity_df,
61
62
  featureset_df,
62
63
  left_on=entity_timestamp_column,
63
- right_on=featureset.spec.timestamp_key,
64
+ right_on=featureset_timestamp,
64
65
  left_by=left_keys or None,
65
66
  right_by=right_keys or None,
66
- suffixes=("", f"_{featureset.metadata.name}_"),
67
+ suffixes=("", f"_{featureset_name}_"),
67
68
  )
68
69
  for col in merged_df.columns:
69
- if re.findall(f"_{featureset.metadata.name}_$", col):
70
+ if re.findall(f"_{featureset_name}_$", col):
70
71
  self._append_drop_column(col)
71
72
 
72
73
  return merged_df
@@ -75,23 +76,23 @@ class DaskFeatureMerger(BaseMerger):
75
76
  self,
76
77
  entity_df,
77
78
  entity_timestamp_column: str,
78
- featureset,
79
+ featureset_name,
80
+ featureset_timestamp,
79
81
  featureset_df,
80
82
  left_keys: list,
81
83
  right_keys: list,
82
84
  ):
83
85
 
84
- fs_name = featureset.metadata.name
85
86
  merged_df = merge(
86
87
  entity_df,
87
88
  featureset_df,
88
89
  how=self._join_type,
89
90
  left_on=left_keys,
90
91
  right_on=right_keys,
91
- suffixes=("", f"_{fs_name}_"),
92
+ suffixes=("", f"_{featureset_name}_"),
92
93
  )
93
94
  for col in merged_df.columns:
94
- if re.findall(f"_{fs_name}_$", col):
95
+ if re.findall(f"_{featureset_name}_$", col):
95
96
  self._append_drop_column(col)
96
97
  return merged_df
97
98
 
@@ -155,3 +156,9 @@ class DaskFeatureMerger(BaseMerger):
155
156
 
156
157
  def _order_by(self, order_by_active):
157
158
  self._result_df.sort_values(by=order_by_active)
159
+
160
+ def _convert_entity_rows_to_engine_df(self, entity_rows):
161
+ if entity_rows is not None and not hasattr(entity_rows, "dask"):
162
+ return dd.from_pandas(entity_rows, npartitions=len(entity_rows.columns))
163
+
164
+ return entity_rows
@@ -62,9 +62,12 @@ def run_merge_job(
62
62
  function = run_config.to_function(kind, merger.get_default_image(kind))
63
63
 
64
64
  # Avoid overriding a handler that was provided by the user
65
- # The user shouldn't have to provide a handler, but we leave this option open just in case
66
65
  if not run_config.handler:
67
66
  function.with_code(body=default_code)
67
+ else:
68
+ raise mlrun.errors.MLRunInvalidArgumentError(
69
+ "get_offline_features does not support run_config with a handler"
70
+ )
68
71
 
69
72
  function.metadata.project = vector.metadata.project
70
73
  function.metadata.name = function.metadata.name or name
@@ -30,45 +30,42 @@ class LocalFeatureMerger(BaseMerger):
30
30
  self,
31
31
  entity_df,
32
32
  entity_timestamp_column: str,
33
- featureset,
34
- featureset_df,
33
+ featureset_name,
34
+ featureset_timstamp,
35
+ featureset_df: list,
35
36
  left_keys: list,
36
37
  right_keys: list,
37
38
  ):
38
39
 
39
- indexes = None
40
- if not right_keys:
41
- indexes = list(featureset.spec.entities.keys())
42
40
  index_col_not_in_entity = "index" not in entity_df.columns
43
41
  index_col_not_in_featureset = "index" not in featureset_df.columns
44
42
  entity_df[entity_timestamp_column] = pd.to_datetime(
45
43
  entity_df[entity_timestamp_column]
46
44
  )
47
- featureset_df[featureset.spec.timestamp_key] = pd.to_datetime(
48
- featureset_df[featureset.spec.timestamp_key]
45
+ featureset_df[featureset_timstamp] = pd.to_datetime(
46
+ featureset_df[featureset_timstamp]
49
47
  )
50
48
  entity_df.sort_values(by=entity_timestamp_column, inplace=True)
51
- featureset_df.sort_values(by=featureset.spec.timestamp_key, inplace=True)
49
+ featureset_df.sort_values(by=featureset_timstamp, inplace=True)
52
50
 
53
51
  merged_df = pd.merge_asof(
54
52
  entity_df,
55
53
  featureset_df,
56
54
  left_on=entity_timestamp_column,
57
- right_on=featureset.spec.timestamp_key,
58
- by=indexes,
55
+ right_on=featureset_timstamp,
59
56
  left_by=left_keys or None,
60
57
  right_by=right_keys or None,
61
- suffixes=("", f"_{featureset.metadata.name}_"),
58
+ suffixes=("", f"_{featureset_name}_"),
62
59
  )
63
60
  for col in merged_df.columns:
64
- if re.findall(f"_{featureset.metadata.name}_$", col):
61
+ if re.findall(f"_{featureset_name}_$", col):
65
62
  self._append_drop_column(col)
66
63
  # Undo indexing tricks for asof merge
67
64
  # to return the correct indexes and not
68
65
  # overload `index` columns
69
66
  if (
70
- indexes
71
- and "index" not in indexes
67
+ "index" not in left_keys
68
+ and "index" not in right_keys
72
69
  and index_col_not_in_entity
73
70
  and index_col_not_in_featureset
74
71
  and "index" in merged_df.columns
@@ -80,22 +77,22 @@ class LocalFeatureMerger(BaseMerger):
80
77
  self,
81
78
  entity_df,
82
79
  entity_timestamp_column: str,
83
- featureset,
80
+ featureset_name,
81
+ featureset_timestamp,
84
82
  featureset_df,
85
83
  left_keys: list,
86
84
  right_keys: list,
87
85
  ):
88
- fs_name = featureset.metadata.name
89
86
  merged_df = pd.merge(
90
87
  entity_df,
91
88
  featureset_df,
92
89
  how=self._join_type,
93
90
  left_on=left_keys,
94
91
  right_on=right_keys,
95
- suffixes=("", f"_{fs_name}_"),
92
+ suffixes=("", f"_{featureset_name}_"),
96
93
  )
97
94
  for col in merged_df.columns:
98
- if re.findall(f"_{fs_name}_$", col):
95
+ if re.findall(f"_{featureset_name}_$", col):
99
96
  self._append_drop_column(col)
100
97
  return merged_df
101
98
 
@@ -135,3 +132,6 @@ class LocalFeatureMerger(BaseMerger):
135
132
 
136
133
  def _order_by(self, order_by_active):
137
134
  self._result_df.sort_values(by=order_by_active, ignore_index=True, inplace=True)
135
+
136
+ def _convert_entity_rows_to_engine_df(self, entity_rows):
137
+ return entity_rows