mlrun 1.8.0rc42__py3-none-any.whl → 1.8.0rc44__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

@@ -0,0 +1,466 @@
1
+ # Copyright 2023 Iguazio
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ import typing
15
+ from copy import copy
16
+ from typing import Union
17
+
18
+ import numpy as np
19
+
20
+ import mlrun
21
+ from mlrun.feature_store import FeatureSet
22
+
23
+ from ..model import ModelObj, ObjectList
24
+
25
+ # Forward reference for type annotations
26
+
27
+
28
+ class _JoinStep(ModelObj):
29
+ def __init__(
30
+ self,
31
+ name: typing.Optional[str] = None,
32
+ left_step_name: typing.Optional[str] = None,
33
+ right_step_name: typing.Optional[str] = None,
34
+ left_feature_set_names: typing.Optional[Union[str, list[str]]] = None,
35
+ right_feature_set_name: typing.Optional[str] = None,
36
+ join_type: str = "inner",
37
+ asof_join: bool = False,
38
+ ):
39
+ self.name = name
40
+ self.left_step_name = left_step_name
41
+ self.right_step_name = right_step_name
42
+ self.left_feature_set_names = (
43
+ left_feature_set_names
44
+ if left_feature_set_names is None
45
+ or isinstance(left_feature_set_names, list)
46
+ else [left_feature_set_names]
47
+ )
48
+ self.right_feature_set_name = right_feature_set_name
49
+ self.join_type = join_type
50
+ self.asof_join = asof_join
51
+
52
+ self.left_keys = []
53
+ self.right_keys = []
54
+
55
+ def init_join_keys(
56
+ self,
57
+ feature_set_objects: ObjectList,
58
+ vector,
59
+ entity_rows_keys: typing.Optional[list[str]] = None,
60
+ ):
61
+ if feature_set_objects[self.right_feature_set_name].is_connectable_to_df(
62
+ entity_rows_keys
63
+ ):
64
+ self.left_keys, self.right_keys = [
65
+ list(
66
+ feature_set_objects[
67
+ self.right_feature_set_name
68
+ ].spec.entities.keys()
69
+ )
70
+ ] * 2
71
+
72
+ if (
73
+ self.join_type == JoinGraph.first_join_type
74
+ or not self.left_feature_set_names
75
+ ):
76
+ self.join_type = (
77
+ "inner"
78
+ if self.join_type == JoinGraph.first_join_type
79
+ else self.join_type
80
+ )
81
+ return
82
+
83
+ for left_fset in self.left_feature_set_names:
84
+ current_left_keys = feature_set_objects[left_fset].extract_relation_keys(
85
+ feature_set_objects[self.right_feature_set_name],
86
+ vector.get_feature_set_relations(feature_set_objects[left_fset]),
87
+ )
88
+ current_right_keys = list(
89
+ feature_set_objects[self.right_feature_set_name].spec.entities.keys()
90
+ )
91
+ for i in range(len(current_left_keys)):
92
+ if (
93
+ current_left_keys[i] not in self.left_keys
94
+ and current_right_keys[i] not in self.right_keys
95
+ ):
96
+ self.left_keys.append(current_left_keys[i])
97
+ self.right_keys.append(current_right_keys[i])
98
+
99
+ if not self.left_keys:
100
+ raise mlrun.errors.MLRunRuntimeError(
101
+ f"{self.name} can't be preform due to undefined relation between "
102
+ f"{self.left_feature_set_names} to {self.right_feature_set_name}"
103
+ )
104
+
105
+
106
+ class JoinGraph(ModelObj):
107
+ """
108
+ A class that represents a graph of data joins between feature sets
109
+ """
110
+
111
+ default_graph_name = "$__join_graph_fv__$"
112
+ first_join_type = "first"
113
+ _dict_fields = ["name", "first_feature_set", "steps"]
114
+
115
+ def __init__(
116
+ self,
117
+ name: typing.Optional[str] = None,
118
+ first_feature_set: Union[str, FeatureSet] = None,
119
+ ):
120
+ """
121
+ JoinGraph is a class that represents a graph of data joins between feature sets. It allows users to define
122
+ data joins step by step, specifying the join type for each step. The graph can be used to build a sequence of
123
+ joins that will be executed in order, allowing the creation of complex join operations between feature sets.
124
+
125
+
126
+ Example:
127
+ # Create a new JoinGraph and add steps for joining feature sets.
128
+ join_graph = JoinGraph(name="my_join_graph", first_feature_set="featureset1")
129
+ join_graph.inner("featureset2")
130
+ join_graph.left("featureset3", asof_join=True)
131
+
132
+
133
+ :param name: (str, optional) The name of the join graph. If not provided,
134
+ a default name will be used.
135
+ :param first_feature_set: (str or FeatureSet, optional) The first feature set to join. It can be
136
+ specified either as a string representing the name of the feature set or as a
137
+ FeatureSet object.
138
+ """
139
+ self.name = name or self.default_graph_name
140
+ self._steps: ObjectList = None
141
+ self._feature_sets = None
142
+ if first_feature_set:
143
+ self._start(first_feature_set)
144
+
145
+ def inner(self, other_operand: typing.Union[str, FeatureSet]):
146
+ """
147
+ Specifies an inner join with the given feature set
148
+
149
+ :param other_operand: (str or FeatureSet) The name of the feature set or a FeatureSet object to join with.
150
+
151
+ :return: JoinGraph: The updated JoinGraph object with the specified inner join.
152
+ """
153
+ return self._join_operands(other_operand, "inner")
154
+
155
+ def outer(self, other_operand: typing.Union[str, FeatureSet]):
156
+ """
157
+ Specifies an outer join with the given feature set
158
+
159
+ :param other_operand: (str or FeatureSet) The name of the feature set or a FeatureSet object to join with.
160
+ :return: JoinGraph: The updated JoinGraph object with the specified outer join.
161
+ """
162
+ return self._join_operands(other_operand, "outer")
163
+
164
+ def left(self, other_operand: typing.Union[str, FeatureSet], asof_join):
165
+ """
166
+ Specifies a left join with the given feature set
167
+
168
+ :param other_operand: (str or FeatureSet) The name of the feature set or a FeatureSet object to join with.
169
+ :param asof_join: (bool) A flag indicating whether to perform an as-of join.
170
+
171
+ :return: JoinGraph: The updated JoinGraph object with the specified left join.
172
+ """
173
+ return self._join_operands(other_operand, "left", asof_join=asof_join)
174
+
175
+ def right(self, other_operand: typing.Union[str, FeatureSet]):
176
+ """
177
+ Specifies a right join with the given feature set
178
+
179
+ :param other_operand: (str or FeatureSet) The name of the feature set or a FeatureSet object to join with.
180
+
181
+ :return: JoinGraph: The updated JoinGraph object with the specified right join.
182
+ """
183
+ return self._join_operands(other_operand, "right")
184
+
185
+ def _join_operands(
186
+ self,
187
+ other_operand: typing.Union[str, FeatureSet],
188
+ join_type: str,
189
+ asof_join: bool = False,
190
+ ):
191
+ if isinstance(other_operand, FeatureSet):
192
+ other_operand = other_operand.metadata.name
193
+
194
+ first_key_num = len(self._steps.keys()) if self._steps else 0
195
+ left_last_step_name, left_all_feature_sets = (
196
+ self.last_step_name,
197
+ self.all_feature_sets_names,
198
+ )
199
+ is_first_fs = (
200
+ join_type == JoinGraph.first_join_type or left_all_feature_sets == self.name
201
+ )
202
+ # create_new_step
203
+ new_step = _JoinStep(
204
+ f"step_{first_key_num}",
205
+ left_last_step_name if not is_first_fs else "",
206
+ other_operand,
207
+ left_all_feature_sets if not is_first_fs else [],
208
+ other_operand,
209
+ join_type,
210
+ asof_join,
211
+ )
212
+
213
+ if self.steps is not None:
214
+ self.steps.update(new_step)
215
+ else:
216
+ self.steps = [new_step]
217
+ return self
218
+
219
+ def _start(self, other_operand: typing.Union[str, FeatureSet]):
220
+ return self._join_operands(other_operand, JoinGraph.first_join_type)
221
+
222
+ def _init_all_join_keys(
223
+ self,
224
+ feature_set_objects,
225
+ vector,
226
+ entity_rows_keys: typing.Optional[list[str]] = None,
227
+ ):
228
+ for step in self.steps:
229
+ step.init_join_keys(feature_set_objects, vector, entity_rows_keys)
230
+
231
+ @property
232
+ def all_feature_sets_names(self):
233
+ """
234
+ Returns a list of all feature set names included in the join graph.
235
+
236
+ :return: List[str]: A list of feature set names.
237
+ """
238
+ if self._steps:
239
+ return self._steps[-1].left_feature_set_names + [
240
+ self._steps[-1].right_feature_set_name
241
+ ]
242
+ else:
243
+ return self.name
244
+
245
+ @property
246
+ def last_step_name(self):
247
+ """
248
+ Returns the name of the last step in the join graph.
249
+
250
+ :return: str: The name of the last step.
251
+ """
252
+ if self._steps:
253
+ return self._steps[-1].name
254
+ else:
255
+ return self.name
256
+
257
+ @property
258
+ def steps(self):
259
+ """
260
+ Returns the list of join steps as ObjectList, which can be used to iterate over the steps
261
+ or access the properties of each step.
262
+ :return: ObjectList: The list of join steps.
263
+ """
264
+ return self._steps
265
+
266
+ @steps.setter
267
+ def steps(self, steps):
268
+ """
269
+ Setter for the steps property. It allows updating the join steps.
270
+
271
+ :param steps: (List[_JoinStep]) The list of join steps.
272
+ """
273
+ self._steps = ObjectList.from_list(child_class=_JoinStep, children=steps)
274
+
275
+
276
+ class OnlineVectorService:
277
+ """get_online_feature_service response object"""
278
+
279
+ def __init__(
280
+ self,
281
+ vector,
282
+ graph,
283
+ index_columns,
284
+ impute_policy: typing.Optional[dict] = None,
285
+ requested_columns: typing.Optional[list[str]] = None,
286
+ ):
287
+ self.vector = vector
288
+ self.impute_policy = impute_policy or {}
289
+
290
+ self._controller = graph.controller
291
+ self._index_columns = index_columns
292
+ self._impute_values = {}
293
+ self._requested_columns = requested_columns
294
+
295
+ def __enter__(self):
296
+ return self
297
+
298
+ def __exit__(self, exc_type, exc_val, exc_tb):
299
+ self.close()
300
+
301
+ def initialize(self):
302
+ """internal, init the feature service and prep the imputing logic"""
303
+ if not self.impute_policy:
304
+ return
305
+
306
+ impute_policy = copy(self.impute_policy)
307
+ vector = self.vector
308
+ feature_stats = vector.get_stats_table()
309
+ self._impute_values = {}
310
+
311
+ feature_keys = list(vector.status.features.keys())
312
+ if vector.status.label_column in feature_keys:
313
+ feature_keys.remove(vector.status.label_column)
314
+
315
+ if "*" in impute_policy:
316
+ value = impute_policy["*"]
317
+ del impute_policy["*"]
318
+
319
+ for name in feature_keys:
320
+ if name not in impute_policy:
321
+ if isinstance(value, str) and value.startswith("$"):
322
+ self._impute_values[name] = feature_stats.loc[name, value[1:]]
323
+ else:
324
+ self._impute_values[name] = value
325
+
326
+ for name, value in impute_policy.items():
327
+ if name not in feature_keys:
328
+ raise mlrun.errors.MLRunInvalidArgumentError(
329
+ f"feature {name} in impute_policy but not in feature vector"
330
+ )
331
+ if isinstance(value, str) and value.startswith("$"):
332
+ self._impute_values[name] = feature_stats.loc[name, value[1:]]
333
+ else:
334
+ self._impute_values[name] = value
335
+
336
+ @property
337
+ def status(self):
338
+ """vector merger function status (ready, running, error)"""
339
+ return "ready"
340
+
341
+ def get(self, entity_rows: list[Union[dict, list]], as_list=False):
342
+ """get feature vector given the provided entity inputs
343
+
344
+ take a list of input vectors/rows and return a list of enriched feature vectors
345
+ each input and/or output vector can be a list of values or a dictionary of field names and values,
346
+ to return the vector as a list of values set the `as_list` to True.
347
+
348
+ if the input is a list of list (vs a list of dict), the values in the list will correspond to the
349
+ index/entity values, i.e. [["GOOG"], ["MSFT"]] means "GOOG" and "MSFT" are the index/entity fields.
350
+
351
+ example::
352
+
353
+ # accept list of dict, return list of dict
354
+ svc = fstore.get_online_feature_service(vector)
355
+ resp = svc.get([{"name": "joe"}, {"name": "mike"}])
356
+
357
+ # accept list of list, return list of list
358
+ svc = fstore.get_online_feature_service(vector, as_list=True)
359
+ resp = svc.get([["joe"], ["mike"]])
360
+
361
+ :param entity_rows: list of list/dict with input entity data/rows
362
+ :param as_list: return a list of list (list input is required by many ML frameworks)
363
+ """
364
+ results = []
365
+ futures = []
366
+ if isinstance(entity_rows, dict):
367
+ entity_rows = [entity_rows]
368
+
369
+ # validate we have valid input struct
370
+ if (
371
+ not entity_rows
372
+ or not isinstance(entity_rows, list)
373
+ or not isinstance(entity_rows[0], (list, dict))
374
+ ):
375
+ raise mlrun.errors.MLRunInvalidArgumentError(
376
+ f"input data is of type {type(entity_rows)}. must be a list of lists or list of dicts"
377
+ )
378
+
379
+ # if list of list, convert to dicts (with the index columns as the dict keys)
380
+ if isinstance(entity_rows[0], list):
381
+ if not self._index_columns or len(entity_rows[0]) != len(
382
+ self._index_columns
383
+ ):
384
+ raise mlrun.errors.MLRunInvalidArgumentError(
385
+ "input list must be in the same size of the index_keys list"
386
+ )
387
+ index_range = range(len(self._index_columns))
388
+ entity_rows = [
389
+ {self._index_columns[i]: item[i] for i in index_range}
390
+ for item in entity_rows
391
+ ]
392
+
393
+ for row in entity_rows:
394
+ futures.append(self._controller.emit(row, return_awaitable_result=True))
395
+
396
+ for future in futures:
397
+ result = future.await_result()
398
+ data = result.body
399
+ if data:
400
+ actual_columns = data.keys()
401
+ if all([col in self._index_columns for col in actual_columns]):
402
+ # didn't get any data from the graph
403
+ results.append(None)
404
+ continue
405
+ for column in self._requested_columns:
406
+ if (
407
+ column not in actual_columns
408
+ and column != self.vector.status.label_column
409
+ ):
410
+ data[column] = None
411
+
412
+ if self._impute_values:
413
+ for name in data.keys():
414
+ v = data[name]
415
+ if v is None or (
416
+ isinstance(v, float) and (np.isinf(v) or np.isnan(v))
417
+ ):
418
+ data[name] = self._impute_values.get(name, v)
419
+ if not self.vector.spec.with_indexes:
420
+ for name in self.vector.status.index_keys:
421
+ data.pop(name, None)
422
+ if not any(data.values()):
423
+ data = None
424
+
425
+ if as_list and data:
426
+ data = [
427
+ data.get(key, None)
428
+ for key in self._requested_columns
429
+ if key != self.vector.status.label_column
430
+ ]
431
+ results.append(data)
432
+
433
+ return results
434
+
435
+ def close(self):
436
+ """terminate the async loop"""
437
+ self._controller.terminate()
438
+
439
+
440
+ class OfflineVectorResponse:
441
+ """get_offline_features response object"""
442
+
443
+ def __init__(self, merger):
444
+ self._merger = merger
445
+ self.vector = merger.vector
446
+
447
+ @property
448
+ def status(self):
449
+ """vector prep job status (ready, running, error)"""
450
+ return self._merger.get_status()
451
+
452
+ def to_dataframe(self, to_pandas=True):
453
+ """return result as dataframe"""
454
+ if self.status != "completed":
455
+ raise mlrun.errors.MLRunTaskNotReadyError(
456
+ "feature vector dataset is not ready"
457
+ )
458
+ return self._merger.get_df(to_pandas=to_pandas)
459
+
460
+ def to_parquet(self, target_path, **kw):
461
+ """return results as parquet file"""
462
+ return self._merger.to_parquet(target_path, **kw)
463
+
464
+ def to_csv(self, target_path, **kw):
465
+ """return results as csv file"""
466
+ return self._merger.to_csv(target_path, **kw)
@@ -21,10 +21,9 @@ import pandas as pd
21
21
  import mlrun
22
22
  from mlrun.datastore.targets import CSVTarget, ParquetTarget
23
23
  from mlrun.feature_store.feature_set import FeatureSet
24
- from mlrun.feature_store.feature_vector import JoinGraph
24
+ from mlrun.feature_store.feature_vector_utils import JoinGraph, OfflineVectorResponse
25
25
 
26
26
  from ...utils import logger, str_to_timestamp
27
- from ..feature_vector import OfflineVectorResponse
28
27
 
29
28
 
30
29
  class BaseMerger(abc.ABC):
@@ -16,7 +16,7 @@ from mlrun.datastore.store_resources import ResourceCache
16
16
  from mlrun.datastore.targets import get_online_target
17
17
  from mlrun.serving.server import create_graph_server
18
18
 
19
- from ..feature_vector import OnlineVectorService
19
+ from ..feature_vector_utils import OnlineVectorService
20
20
  from .base import BaseMerger
21
21
 
22
22
 
@@ -583,7 +583,7 @@ class MonitoringApplicationController:
583
583
  ) as pool:
584
584
  futures = {
585
585
  pool.submit(
586
- MonitoringApplicationController.endpoint_to_regular_event,
586
+ self.endpoint_to_regular_event,
587
587
  endpoint,
588
588
  policy,
589
589
  set(applications_names),
@@ -606,14 +606,14 @@ class MonitoringApplicationController:
606
606
  logger.error(error)
607
607
  logger.info("Finishing monitoring controller chief")
608
608
 
609
- @staticmethod
610
609
  def endpoint_to_regular_event(
610
+ self,
611
611
  endpoint: mlrun.common.schemas.ModelEndpoint,
612
612
  policy: dict,
613
613
  applications_names: set,
614
614
  v3io_access_key: str,
615
615
  ) -> None:
616
- if MonitoringApplicationController._should_monitor_endpoint(
616
+ if self._should_monitor_endpoint(
617
617
  endpoint,
618
618
  set(applications_names),
619
619
  policy.get(ControllerEventEndpointPolicy.BASE_PERIOD, 10),
@@ -210,8 +210,7 @@ class TSDBConnector(ABC):
210
210
  endpoint_ids: Union[str, list[str]],
211
211
  start: Optional[datetime] = None,
212
212
  end: Optional[datetime] = None,
213
- get_raw: bool = False,
214
- ) -> Union[pd.DataFrame, list[v3io_frames.client.RawFrame]]:
213
+ ) -> Union[pd.DataFrame, dict[str, float]]:
215
214
  """
216
215
  Fetches data from the predictions TSDB table and returns the most recent request
217
216
  timestamp for each specified endpoint.
@@ -219,11 +218,11 @@ class TSDBConnector(ABC):
219
218
  :param endpoint_ids: A list of model endpoint identifiers.
220
219
  :param start: The start time for the query.
221
220
  :param end: The end time for the query.
222
- :param get_raw: Whether to return the request as raw frames rather than a pandas dataframe. Defaults
223
- to False. This can greatly improve performance when a dataframe isn't needed.
224
221
 
225
- :return: A pd.DataFrame containing the columns [endpoint_id, last_request, last_latency].
226
- If an endpoint has not been invoked within the specified time range, it will not appear in the result.
222
+ :return: A pd.DataFrame containing the columns [endpoint_id, last_request, last_latency] or a dictionary
223
+ containing the endpoint_id as the key and the last request timestamp as the value.
224
+ if an endpoint has not been invoked within the specified time range, it will not appear in the result (relevant
225
+ only to non-v3io connector).
227
226
  """
228
227
 
229
228
  @abstractmethod
@@ -668,7 +668,6 @@ class TDEngineConnector(TSDBConnector):
668
668
  endpoint_ids: Union[str, list[str]],
669
669
  start: Optional[datetime] = None,
670
670
  end: Optional[datetime] = None,
671
- get_raw: bool = False,
672
671
  ) -> pd.DataFrame:
673
672
  filter_query = self._get_endpoint_filter(endpoint_id=endpoint_ids)
674
673
  start, end = self._get_start_end(start, end)