mlrun 1.3.2rc1__py3-none-any.whl → 1.3.2rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/api/api/deps.py +14 -1
- mlrun/api/api/endpoints/frontend_spec.py +0 -2
- mlrun/api/api/endpoints/functions.py +15 -27
- mlrun/api/api/endpoints/grafana_proxy.py +435 -74
- mlrun/api/api/endpoints/healthz.py +5 -18
- mlrun/api/api/endpoints/model_endpoints.py +33 -37
- mlrun/api/api/utils.py +6 -13
- mlrun/api/crud/__init__.py +14 -16
- mlrun/api/crud/logs.py +5 -7
- mlrun/api/crud/model_monitoring/__init__.py +2 -2
- mlrun/api/crud/model_monitoring/model_endpoint_store.py +847 -0
- mlrun/api/crud/model_monitoring/model_endpoints.py +105 -328
- mlrun/api/crud/pipelines.py +2 -3
- mlrun/api/db/sqldb/models/models_mysql.py +52 -19
- mlrun/api/db/sqldb/models/models_sqlite.py +52 -19
- mlrun/api/db/sqldb/session.py +19 -26
- mlrun/api/schemas/__init__.py +2 -0
- mlrun/api/schemas/constants.py +0 -13
- mlrun/api/schemas/frontend_spec.py +0 -1
- mlrun/api/schemas/model_endpoints.py +38 -195
- mlrun/api/schemas/schedule.py +2 -2
- mlrun/api/utils/clients/log_collector.py +5 -0
- mlrun/builder.py +9 -41
- mlrun/config.py +1 -76
- mlrun/data_types/__init__.py +1 -6
- mlrun/data_types/data_types.py +1 -3
- mlrun/datastore/__init__.py +2 -9
- mlrun/datastore/sources.py +20 -25
- mlrun/datastore/store_resources.py +1 -1
- mlrun/datastore/targets.py +34 -67
- mlrun/datastore/utils.py +4 -26
- mlrun/db/base.py +2 -4
- mlrun/db/filedb.py +5 -13
- mlrun/db/httpdb.py +32 -64
- mlrun/db/sqldb.py +2 -4
- mlrun/errors.py +0 -5
- mlrun/execution.py +0 -2
- mlrun/feature_store/api.py +8 -24
- mlrun/feature_store/feature_set.py +6 -28
- mlrun/feature_store/feature_vector.py +0 -2
- mlrun/feature_store/ingestion.py +11 -8
- mlrun/feature_store/retrieval/base.py +43 -271
- mlrun/feature_store/retrieval/dask_merger.py +153 -55
- mlrun/feature_store/retrieval/job.py +3 -12
- mlrun/feature_store/retrieval/local_merger.py +130 -48
- mlrun/feature_store/retrieval/spark_merger.py +125 -126
- mlrun/features.py +2 -7
- mlrun/model_monitoring/constants.py +6 -48
- mlrun/model_monitoring/helpers.py +35 -118
- mlrun/model_monitoring/model_monitoring_batch.py +260 -293
- mlrun/model_monitoring/stream_processing_fs.py +253 -220
- mlrun/platforms/iguazio.py +0 -33
- mlrun/projects/project.py +72 -34
- mlrun/runtimes/base.py +0 -5
- mlrun/runtimes/daskjob.py +0 -2
- mlrun/runtimes/function.py +3 -29
- mlrun/runtimes/kubejob.py +15 -39
- mlrun/runtimes/local.py +45 -7
- mlrun/runtimes/mpijob/abstract.py +0 -2
- mlrun/runtimes/mpijob/v1.py +0 -2
- mlrun/runtimes/pod.py +0 -2
- mlrun/runtimes/remotesparkjob.py +0 -2
- mlrun/runtimes/serving.py +0 -6
- mlrun/runtimes/sparkjob/abstract.py +2 -39
- mlrun/runtimes/sparkjob/spark3job.py +0 -2
- mlrun/serving/__init__.py +1 -2
- mlrun/serving/routers.py +35 -35
- mlrun/serving/server.py +12 -22
- mlrun/serving/states.py +30 -162
- mlrun/serving/v2_serving.py +10 -13
- mlrun/utils/clones.py +1 -1
- mlrun/utils/model_monitoring.py +96 -122
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/METADATA +27 -23
- {mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/RECORD +79 -92
- mlrun/api/crud/model_monitoring/grafana.py +0 -427
- mlrun/datastore/spark_udf.py +0 -40
- mlrun/model_monitoring/__init__.py +0 -44
- mlrun/model_monitoring/common.py +0 -112
- mlrun/model_monitoring/model_endpoint.py +0 -141
- mlrun/model_monitoring/stores/__init__.py +0 -106
- mlrun/model_monitoring/stores/kv_model_endpoint_store.py +0 -448
- mlrun/model_monitoring/stores/model_endpoint_store.py +0 -147
- mlrun/model_monitoring/stores/models/__init__.py +0 -23
- mlrun/model_monitoring/stores/models/base.py +0 -18
- mlrun/model_monitoring/stores/models/mysql.py +0 -100
- mlrun/model_monitoring/stores/models/sqlite.py +0 -98
- mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -375
- mlrun/utils/db.py +0 -52
- {mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/LICENSE +0 -0
- {mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/WHEEL +0 -0
- {mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/entry_points.txt +0 -0
- {mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/top_level.txt +0 -0
|
@@ -13,16 +13,11 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
#
|
|
15
15
|
import abc
|
|
16
|
-
import typing
|
|
17
|
-
from datetime import datetime
|
|
18
16
|
|
|
19
17
|
import mlrun
|
|
20
18
|
from mlrun.datastore.targets import CSVTarget, ParquetTarget
|
|
21
|
-
from mlrun.feature_store.feature_set import FeatureSet
|
|
22
|
-
from mlrun.feature_store.feature_vector import Feature
|
|
23
19
|
|
|
24
20
|
from ...utils import logger
|
|
25
|
-
from ..feature_vector import OfflineVectorResponse
|
|
26
21
|
|
|
27
22
|
|
|
28
23
|
class BaseMerger(abc.ABC):
|
|
@@ -41,7 +36,6 @@ class BaseMerger(abc.ABC):
|
|
|
41
36
|
self._drop_indexes = True
|
|
42
37
|
self._target = None
|
|
43
38
|
self._alias = dict()
|
|
44
|
-
self._origin_alias = dict()
|
|
45
39
|
|
|
46
40
|
def _append_drop_column(self, key):
|
|
47
41
|
if key and key not in self._drop_columns:
|
|
@@ -77,7 +71,6 @@ class BaseMerger(abc.ABC):
|
|
|
77
71
|
update_stats=None,
|
|
78
72
|
query=None,
|
|
79
73
|
join_type="inner",
|
|
80
|
-
order_by=None,
|
|
81
74
|
):
|
|
82
75
|
self._target = target
|
|
83
76
|
self._join_type = join_type
|
|
@@ -117,11 +110,9 @@ class BaseMerger(abc.ABC):
|
|
|
117
110
|
start_time=start_time,
|
|
118
111
|
end_time=end_time,
|
|
119
112
|
query=query,
|
|
120
|
-
order_by=order_by,
|
|
121
113
|
)
|
|
122
114
|
|
|
123
115
|
def _write_to_target(self):
|
|
124
|
-
self.vector.spec.with_indexes = not self._drop_indexes
|
|
125
116
|
if self._target:
|
|
126
117
|
is_persistent_vector = self.vector.metadata.name is not None
|
|
127
118
|
if not self._target.path and not is_persistent_vector:
|
|
@@ -134,14 +125,6 @@ class BaseMerger(abc.ABC):
|
|
|
134
125
|
target_status = self._target.update_resource_status("ready", size=size)
|
|
135
126
|
logger.info(f"wrote target: {target_status}")
|
|
136
127
|
self.vector.save()
|
|
137
|
-
if self.vector.spec.with_indexes:
|
|
138
|
-
self.vector.spec.entity_fields = [
|
|
139
|
-
Feature(name=feature, value_type=self._result_df[feature].dtype)
|
|
140
|
-
if self._result_df[feature].dtype.name != "object"
|
|
141
|
-
else Feature(name=feature, value_type="str")
|
|
142
|
-
for feature in self._index_columns
|
|
143
|
-
]
|
|
144
|
-
self.vector.save()
|
|
145
128
|
|
|
146
129
|
def _set_indexes(self, df):
|
|
147
130
|
if self._index_columns and not self._drop_indexes:
|
|
@@ -151,15 +134,28 @@ class BaseMerger(abc.ABC):
|
|
|
151
134
|
if index not in df.columns:
|
|
152
135
|
index_columns_missing.append(index)
|
|
153
136
|
if not index_columns_missing:
|
|
154
|
-
|
|
137
|
+
if self.engine == "local" or self.engine == "spark":
|
|
138
|
+
df.set_index(self._index_columns, inplace=True)
|
|
139
|
+
elif self.engine == "dask":
|
|
140
|
+
if len(self._index_columns) == 1:
|
|
141
|
+
return df.set_index(self._index_columns[0])
|
|
142
|
+
elif len(self._index_columns) != 1:
|
|
143
|
+
return self._reset_index(self._result_df)
|
|
144
|
+
else:
|
|
145
|
+
logger.info(
|
|
146
|
+
"The entities will stay as columns because "
|
|
147
|
+
"Dask dataframe does not yet support multi-indexes"
|
|
148
|
+
)
|
|
149
|
+
return self._result_df
|
|
155
150
|
else:
|
|
156
151
|
logger.warn(
|
|
157
152
|
f"Can't set index, not all index columns found: {index_columns_missing}. "
|
|
158
153
|
f"It is possible that column was already indexed."
|
|
159
154
|
)
|
|
160
|
-
|
|
161
|
-
|
|
155
|
+
else:
|
|
156
|
+
return df
|
|
162
157
|
|
|
158
|
+
@abc.abstractmethod
|
|
163
159
|
def _generate_vector(
|
|
164
160
|
self,
|
|
165
161
|
entity_rows,
|
|
@@ -169,148 +165,8 @@ class BaseMerger(abc.ABC):
|
|
|
169
165
|
start_time=None,
|
|
170
166
|
end_time=None,
|
|
171
167
|
query=None,
|
|
172
|
-
order_by=None,
|
|
173
168
|
):
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
feature_sets = []
|
|
177
|
-
dfs = []
|
|
178
|
-
keys = (
|
|
179
|
-
[]
|
|
180
|
-
) # the struct of key is [[[],[]], ..] So that each record indicates which way the corresponding
|
|
181
|
-
# featureset is connected to the previous one, and within each record the left keys are indicated in index 0
|
|
182
|
-
# and the right keys in index 1, this keys will be the keys that will be used in this join
|
|
183
|
-
|
|
184
|
-
fs_link_list = self._create_linked_relation_list(
|
|
185
|
-
feature_set_objects, feature_set_fields
|
|
186
|
-
)
|
|
187
|
-
|
|
188
|
-
for node in fs_link_list:
|
|
189
|
-
name = node.name
|
|
190
|
-
feature_set = feature_set_objects[name]
|
|
191
|
-
feature_sets.append(feature_set)
|
|
192
|
-
columns = feature_set_fields[name]
|
|
193
|
-
self._origin_alias.update({name: alias for name, alias in columns})
|
|
194
|
-
column_names = [name for name, _ in columns]
|
|
195
|
-
|
|
196
|
-
for column in node.data["save_cols"]:
|
|
197
|
-
if column not in column_names:
|
|
198
|
-
self._append_drop_column(column)
|
|
199
|
-
column_names.append(column)
|
|
200
|
-
|
|
201
|
-
df = self._get_engine_df(
|
|
202
|
-
feature_set,
|
|
203
|
-
name,
|
|
204
|
-
column_names,
|
|
205
|
-
start_time,
|
|
206
|
-
end_time,
|
|
207
|
-
entity_timestamp_column,
|
|
208
|
-
)
|
|
209
|
-
|
|
210
|
-
column_names += node.data["save_index"]
|
|
211
|
-
node.data["save_cols"] += node.data["save_index"]
|
|
212
|
-
if feature_set.spec.timestamp_key:
|
|
213
|
-
entity_timestamp_column_list = [feature_set.spec.timestamp_key]
|
|
214
|
-
column_names += entity_timestamp_column_list
|
|
215
|
-
node.data["save_cols"] += entity_timestamp_column_list
|
|
216
|
-
if not entity_timestamp_column:
|
|
217
|
-
# if not entity_timestamp_column the firs `FeatureSet` will define it
|
|
218
|
-
entity_timestamp_column = feature_set.spec.timestamp_key
|
|
219
|
-
|
|
220
|
-
# rename columns to be unique for each feature set and select if needed
|
|
221
|
-
rename_col_dict = {
|
|
222
|
-
column: f"{column}_{name}"
|
|
223
|
-
for column in column_names
|
|
224
|
-
if column not in node.data["save_cols"]
|
|
225
|
-
}
|
|
226
|
-
fs_entities = list(feature_set.spec.entities.keys())
|
|
227
|
-
df_temp = self._rename_columns_and_select(
|
|
228
|
-
df, rename_col_dict, columns=list(set(column_names + fs_entities))
|
|
229
|
-
)
|
|
230
|
-
|
|
231
|
-
if df_temp is not None:
|
|
232
|
-
df = df_temp
|
|
233
|
-
del df_temp
|
|
234
|
-
|
|
235
|
-
dfs.append(df)
|
|
236
|
-
del df
|
|
237
|
-
|
|
238
|
-
keys.append([node.data["left_keys"], node.data["right_keys"]])
|
|
239
|
-
|
|
240
|
-
# update alias according to the unique column name
|
|
241
|
-
new_columns = []
|
|
242
|
-
if not self._drop_indexes:
|
|
243
|
-
new_columns.extend([(ind, ind) for ind in fs_entities])
|
|
244
|
-
for column, alias in columns:
|
|
245
|
-
if column in rename_col_dict:
|
|
246
|
-
new_columns.append((rename_col_dict[column], alias or column))
|
|
247
|
-
else:
|
|
248
|
-
new_columns.append((column, alias))
|
|
249
|
-
self._update_alias(dictionary={name: alias for name, alias in new_columns})
|
|
250
|
-
|
|
251
|
-
# convert pandas entity_rows to spark DF if needed
|
|
252
|
-
if (
|
|
253
|
-
entity_rows is not None
|
|
254
|
-
and not hasattr(entity_rows, "rdd")
|
|
255
|
-
and self.engine == "spark"
|
|
256
|
-
):
|
|
257
|
-
entity_rows = self.spark.createDataFrame(entity_rows)
|
|
258
|
-
|
|
259
|
-
# join the feature data frames
|
|
260
|
-
self.merge(
|
|
261
|
-
entity_df=entity_rows,
|
|
262
|
-
entity_timestamp_column=entity_timestamp_column,
|
|
263
|
-
featuresets=feature_sets,
|
|
264
|
-
featureset_dfs=dfs,
|
|
265
|
-
keys=keys,
|
|
266
|
-
)
|
|
267
|
-
|
|
268
|
-
all_columns = None
|
|
269
|
-
if not self._drop_indexes and entity_timestamp_column:
|
|
270
|
-
if entity_timestamp_column not in self._alias.values():
|
|
271
|
-
self._update_alias(
|
|
272
|
-
key=entity_timestamp_column, val=entity_timestamp_column
|
|
273
|
-
)
|
|
274
|
-
all_columns = list(self._alias.keys())
|
|
275
|
-
|
|
276
|
-
df_temp = self._rename_columns_and_select(
|
|
277
|
-
self._result_df, self._alias, columns=all_columns
|
|
278
|
-
)
|
|
279
|
-
if df_temp is not None:
|
|
280
|
-
self._result_df = df_temp
|
|
281
|
-
del df_temp
|
|
282
|
-
|
|
283
|
-
df_temp = self._drop_columns_from_result()
|
|
284
|
-
if df_temp is not None:
|
|
285
|
-
self._result_df = df_temp
|
|
286
|
-
del df_temp
|
|
287
|
-
|
|
288
|
-
if self.vector.status.label_column:
|
|
289
|
-
self._result_df = self._result_df.dropna(
|
|
290
|
-
subset=[self.vector.status.label_column]
|
|
291
|
-
)
|
|
292
|
-
# filter joined data frame by the query param
|
|
293
|
-
if query:
|
|
294
|
-
self._filter(query)
|
|
295
|
-
|
|
296
|
-
if order_by:
|
|
297
|
-
if isinstance(order_by, str):
|
|
298
|
-
order_by = [order_by]
|
|
299
|
-
order_by_active = [
|
|
300
|
-
order_col
|
|
301
|
-
if order_col in self._result_df.columns
|
|
302
|
-
else self._origin_alias.get(order_col, None)
|
|
303
|
-
for order_col in order_by
|
|
304
|
-
]
|
|
305
|
-
if None in order_by_active:
|
|
306
|
-
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
307
|
-
f"Result dataframe contains {self._result_df.columns} "
|
|
308
|
-
f"columns and can't order by {order_by}"
|
|
309
|
-
)
|
|
310
|
-
self._order_by(order_by_active)
|
|
311
|
-
|
|
312
|
-
self._write_to_target()
|
|
313
|
-
return OfflineVectorResponse(self)
|
|
169
|
+
raise NotImplementedError("_generate_vector() operation not supported in class")
|
|
314
170
|
|
|
315
171
|
def _unpersist_df(self, df):
|
|
316
172
|
pass
|
|
@@ -322,6 +178,7 @@ class BaseMerger(abc.ABC):
|
|
|
322
178
|
featuresets: list,
|
|
323
179
|
featureset_dfs: list,
|
|
324
180
|
keys: list = None,
|
|
181
|
+
all_columns: list = None,
|
|
325
182
|
):
|
|
326
183
|
"""join the entities and feature set features into a result dataframe"""
|
|
327
184
|
merged_df = entity_df
|
|
@@ -333,6 +190,10 @@ class BaseMerger(abc.ABC):
|
|
|
333
190
|
else:
|
|
334
191
|
# keys can be multiple keys on each side of the join
|
|
335
192
|
keys = [[[], []]] * len(featureset_dfs)
|
|
193
|
+
if all_columns is not None:
|
|
194
|
+
all_columns.pop(0)
|
|
195
|
+
else:
|
|
196
|
+
all_columns = [[]] * len(featureset_dfs)
|
|
336
197
|
entity_timestamp_column = (
|
|
337
198
|
entity_timestamp_column or featureset.spec.timestamp_key
|
|
338
199
|
)
|
|
@@ -342,7 +203,9 @@ class BaseMerger(abc.ABC):
|
|
|
342
203
|
# and it can join only by the entities of the first `featureset`
|
|
343
204
|
keys[0][0] = keys[0][1] = list(featuresets[0].spec.entities.keys())
|
|
344
205
|
|
|
345
|
-
for featureset, featureset_df, lr_key in zip(
|
|
206
|
+
for featureset, featureset_df, lr_key, columns in zip(
|
|
207
|
+
featuresets, featureset_dfs, keys, all_columns
|
|
208
|
+
):
|
|
346
209
|
if featureset.spec.timestamp_key:
|
|
347
210
|
merge_func = self._asof_join
|
|
348
211
|
if self._join_type != "inner":
|
|
@@ -360,6 +223,7 @@ class BaseMerger(abc.ABC):
|
|
|
360
223
|
featureset_df,
|
|
361
224
|
lr_key[0],
|
|
362
225
|
lr_key[1],
|
|
226
|
+
columns,
|
|
363
227
|
)
|
|
364
228
|
|
|
365
229
|
# unpersist as required by the implementation (e.g. spark) and delete references
|
|
@@ -378,6 +242,7 @@ class BaseMerger(abc.ABC):
|
|
|
378
242
|
featureset_df,
|
|
379
243
|
left_keys: list,
|
|
380
244
|
right_keys: list,
|
|
245
|
+
columns: list,
|
|
381
246
|
):
|
|
382
247
|
raise NotImplementedError("_asof_join() operation not implemented in class")
|
|
383
248
|
|
|
@@ -390,6 +255,7 @@ class BaseMerger(abc.ABC):
|
|
|
390
255
|
featureset_df,
|
|
391
256
|
left_keys: list,
|
|
392
257
|
right_keys: list,
|
|
258
|
+
columns: list,
|
|
393
259
|
):
|
|
394
260
|
raise NotImplementedError("_join() operation not implemented in class")
|
|
395
261
|
|
|
@@ -401,7 +267,6 @@ class BaseMerger(abc.ABC):
|
|
|
401
267
|
|
|
402
268
|
def get_df(self, to_pandas=True):
|
|
403
269
|
"""return the result as a dataframe (pandas by default)"""
|
|
404
|
-
self._set_indexes(self._result_df)
|
|
405
270
|
return self._result_df
|
|
406
271
|
|
|
407
272
|
def to_parquet(self, target_path, **kw):
|
|
@@ -428,9 +293,6 @@ class BaseMerger(abc.ABC):
|
|
|
428
293
|
def __eq__(self, other):
|
|
429
294
|
return self.name == other.name
|
|
430
295
|
|
|
431
|
-
def __copy__(self):
|
|
432
|
-
return BaseMerger._Node(self.name, self.order, self.data.copy())
|
|
433
|
-
|
|
434
296
|
class _LinkedList:
|
|
435
297
|
def __init__(self, head=None):
|
|
436
298
|
self.head = head
|
|
@@ -451,19 +313,6 @@ class BaseMerger(abc.ABC):
|
|
|
451
313
|
yield node
|
|
452
314
|
node = node.next
|
|
453
315
|
|
|
454
|
-
def __copy__(self):
|
|
455
|
-
ll = BaseMerger._LinkedList()
|
|
456
|
-
prev_node = None
|
|
457
|
-
for node in self:
|
|
458
|
-
new_node = node.__copy__()
|
|
459
|
-
if ll.head is None:
|
|
460
|
-
ll.head = new_node
|
|
461
|
-
else:
|
|
462
|
-
prev_node.next = new_node
|
|
463
|
-
prev_node = new_node
|
|
464
|
-
ll.len = self.len
|
|
465
|
-
return ll
|
|
466
|
-
|
|
467
316
|
def add_first(self, node):
|
|
468
317
|
node.next = self.head
|
|
469
318
|
self.head = node
|
|
@@ -476,9 +325,7 @@ class BaseMerger(abc.ABC):
|
|
|
476
325
|
for current_node in self:
|
|
477
326
|
pass
|
|
478
327
|
current_node.next = node
|
|
479
|
-
|
|
480
|
-
self.len += 1
|
|
481
|
-
node = node.next
|
|
328
|
+
self.len += 1
|
|
482
329
|
|
|
483
330
|
def add_after(self, target_node, new_node):
|
|
484
331
|
new_node.next = target_node.next
|
|
@@ -499,9 +346,7 @@ class BaseMerger(abc.ABC):
|
|
|
499
346
|
node = self.find_node(other_head.name)
|
|
500
347
|
if node is None:
|
|
501
348
|
return
|
|
502
|
-
|
|
503
|
-
if col not in node.data["save_cols"]:
|
|
504
|
-
node.data["save_cols"].append(col)
|
|
349
|
+
node.data["save_cols"] += other_head.data["save_cols"]
|
|
505
350
|
for other_node in other_iter:
|
|
506
351
|
if self.find_node(other_node.name) is None:
|
|
507
352
|
while node is not None and other_node.order > node.order:
|
|
@@ -575,9 +420,10 @@ class BaseMerger(abc.ABC):
|
|
|
575
420
|
)
|
|
576
421
|
)
|
|
577
422
|
|
|
578
|
-
if
|
|
579
|
-
|
|
580
|
-
|
|
423
|
+
# checking if feature_set have relation with feature_set_in
|
|
424
|
+
relation_wise = all(curr_col_relation_list)
|
|
425
|
+
|
|
426
|
+
if relation_wise:
|
|
581
427
|
# add to the link list feature set according to the defined relation
|
|
582
428
|
linked_list_relation.add_last(
|
|
583
429
|
BaseMerger._Node(
|
|
@@ -591,8 +437,8 @@ class BaseMerger(abc.ABC):
|
|
|
591
437
|
order=name_in_order,
|
|
592
438
|
)
|
|
593
439
|
)
|
|
594
|
-
linked_list_relation.head.data["save_cols"].
|
|
595
|
-
curr_col_relation_list
|
|
440
|
+
linked_list_relation.head.data["save_cols"].append(
|
|
441
|
+
*curr_col_relation_list
|
|
596
442
|
)
|
|
597
443
|
elif name_in_order > head_order and sorted(
|
|
598
444
|
feature_set_in_entity_list_names
|
|
@@ -622,14 +468,14 @@ class BaseMerger(abc.ABC):
|
|
|
622
468
|
relation_linked_lists.append(linked_relation)
|
|
623
469
|
|
|
624
470
|
# concat all the link lists to one, for the merging process
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
471
|
+
link_list_iter = iter(relation_linked_lists)
|
|
472
|
+
return_relation = next(link_list_iter)
|
|
473
|
+
for relation_list in link_list_iter:
|
|
474
|
+
return_relation.concat(relation_list)
|
|
475
|
+
if return_relation.len != len(feature_set_objects):
|
|
476
|
+
raise mlrun.errors.MLRunRuntimeError("Failed to merge")
|
|
631
477
|
|
|
632
|
-
|
|
478
|
+
return return_relation
|
|
633
479
|
|
|
634
480
|
@classmethod
|
|
635
481
|
def get_default_image(cls, kind):
|
|
@@ -637,77 +483,3 @@ class BaseMerger(abc.ABC):
|
|
|
637
483
|
|
|
638
484
|
def _reset_index(self, _result_df):
|
|
639
485
|
raise NotImplementedError
|
|
640
|
-
|
|
641
|
-
@abc.abstractmethod
|
|
642
|
-
def _create_engine_env(self):
|
|
643
|
-
"""
|
|
644
|
-
initialize engine env if needed
|
|
645
|
-
"""
|
|
646
|
-
raise NotImplementedError
|
|
647
|
-
|
|
648
|
-
@abc.abstractmethod
|
|
649
|
-
def _get_engine_df(
|
|
650
|
-
self,
|
|
651
|
-
feature_set: FeatureSet,
|
|
652
|
-
feature_set_name: typing.List[str],
|
|
653
|
-
column_names: typing.List[str] = None,
|
|
654
|
-
start_time: typing.Union[str, datetime] = None,
|
|
655
|
-
end_time: typing.Union[str, datetime] = None,
|
|
656
|
-
entity_timestamp_column: str = None,
|
|
657
|
-
):
|
|
658
|
-
"""
|
|
659
|
-
Return the feature_set data frame according to the args
|
|
660
|
-
|
|
661
|
-
:param feature_set: current feature_set to extract from the data frame
|
|
662
|
-
:param feature_set_name: the name of the current feature_set
|
|
663
|
-
:param column_names: list of columns to select (if not all)
|
|
664
|
-
:param start_time: filter by start time
|
|
665
|
-
:param end_time: filter by end time
|
|
666
|
-
:param entity_timestamp_column: specify the time column name in the file
|
|
667
|
-
|
|
668
|
-
:return: Data frame of the current engine
|
|
669
|
-
"""
|
|
670
|
-
raise NotImplementedError
|
|
671
|
-
|
|
672
|
-
@abc.abstractmethod
|
|
673
|
-
def _rename_columns_and_select(
|
|
674
|
-
self,
|
|
675
|
-
df,
|
|
676
|
-
rename_col_dict: typing.Dict[str, str],
|
|
677
|
-
columns: typing.List[str] = None,
|
|
678
|
-
):
|
|
679
|
-
"""
|
|
680
|
-
rename the columns of the df according to rename_col_dict, and select only `columns` if it is not none
|
|
681
|
-
|
|
682
|
-
:param df: the data frame to change
|
|
683
|
-
:param rename_col_dict: the renaming dictionary - {<current_column_name>: <new_column_name>, ...}
|
|
684
|
-
:param columns: list of columns to select (if not all)
|
|
685
|
-
|
|
686
|
-
:return: the data frame after the transformation or None if the transformation were preformed inplace
|
|
687
|
-
"""
|
|
688
|
-
raise NotImplementedError
|
|
689
|
-
|
|
690
|
-
@abc.abstractmethod
|
|
691
|
-
def _drop_columns_from_result(self):
|
|
692
|
-
"""
|
|
693
|
-
drop `self._drop_columns` from `self._result_df`
|
|
694
|
-
"""
|
|
695
|
-
raise NotImplementedError
|
|
696
|
-
|
|
697
|
-
@abc.abstractmethod
|
|
698
|
-
def _filter(self, query: str):
|
|
699
|
-
"""
|
|
700
|
-
filter `self._result_df` by `query`
|
|
701
|
-
|
|
702
|
-
:param query: The query string used to filter rows
|
|
703
|
-
"""
|
|
704
|
-
raise NotImplementedError
|
|
705
|
-
|
|
706
|
-
@abc.abstractmethod
|
|
707
|
-
def _order_by(self, order_by_active: typing.List[str]):
|
|
708
|
-
"""
|
|
709
|
-
Order by `order_by_active` along all axis.
|
|
710
|
-
|
|
711
|
-
:param order_by_active: list of names to sort by.
|
|
712
|
-
"""
|
|
713
|
-
raise NotImplementedError
|