mlrun 1.3.3__py3-none-any.whl → 1.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +3 -3
- mlrun/__main__.py +79 -37
- mlrun/api/__init__.py +1 -1
- mlrun/api/api/__init__.py +1 -1
- mlrun/api/api/api.py +4 -4
- mlrun/api/api/deps.py +10 -21
- mlrun/api/api/endpoints/__init__.py +1 -1
- mlrun/api/api/endpoints/artifacts.py +64 -36
- mlrun/api/api/endpoints/auth.py +4 -4
- mlrun/api/api/endpoints/background_tasks.py +11 -11
- mlrun/api/api/endpoints/client_spec.py +5 -5
- mlrun/api/api/endpoints/clusterization_spec.py +6 -4
- mlrun/api/api/endpoints/feature_store.py +124 -115
- mlrun/api/api/endpoints/files.py +22 -14
- mlrun/api/api/endpoints/frontend_spec.py +28 -21
- mlrun/api/api/endpoints/functions.py +142 -87
- mlrun/api/api/endpoints/grafana_proxy.py +89 -442
- mlrun/api/api/endpoints/healthz.py +20 -7
- mlrun/api/api/endpoints/hub.py +320 -0
- mlrun/api/api/endpoints/internal/__init__.py +1 -1
- mlrun/api/api/endpoints/internal/config.py +1 -1
- mlrun/api/api/endpoints/internal/memory_reports.py +9 -9
- mlrun/api/api/endpoints/logs.py +11 -11
- mlrun/api/api/endpoints/model_endpoints.py +74 -70
- mlrun/api/api/endpoints/operations.py +13 -9
- mlrun/api/api/endpoints/pipelines.py +93 -88
- mlrun/api/api/endpoints/projects.py +35 -35
- mlrun/api/api/endpoints/runs.py +69 -27
- mlrun/api/api/endpoints/runtime_resources.py +28 -28
- mlrun/api/api/endpoints/schedules.py +98 -41
- mlrun/api/api/endpoints/secrets.py +37 -32
- mlrun/api/api/endpoints/submit.py +12 -12
- mlrun/api/api/endpoints/tags.py +20 -22
- mlrun/api/api/utils.py +251 -42
- mlrun/api/constants.py +1 -1
- mlrun/api/crud/__init__.py +18 -15
- mlrun/api/crud/artifacts.py +10 -10
- mlrun/api/crud/client_spec.py +4 -4
- mlrun/api/crud/clusterization_spec.py +3 -3
- mlrun/api/crud/feature_store.py +54 -46
- mlrun/api/crud/functions.py +3 -3
- mlrun/api/crud/hub.py +312 -0
- mlrun/api/crud/logs.py +11 -9
- mlrun/api/crud/model_monitoring/__init__.py +3 -3
- mlrun/api/crud/model_monitoring/grafana.py +435 -0
- mlrun/api/crud/model_monitoring/model_endpoints.py +352 -129
- mlrun/api/crud/notifications.py +149 -0
- mlrun/api/crud/pipelines.py +67 -52
- mlrun/api/crud/projects.py +51 -23
- mlrun/api/crud/runs.py +7 -5
- mlrun/api/crud/runtime_resources.py +13 -13
- mlrun/api/{db/filedb → crud/runtimes}/__init__.py +1 -1
- mlrun/api/crud/runtimes/nuclio/__init__.py +14 -0
- mlrun/api/crud/runtimes/nuclio/function.py +505 -0
- mlrun/api/crud/runtimes/nuclio/helpers.py +310 -0
- mlrun/api/crud/secrets.py +88 -46
- mlrun/api/crud/tags.py +5 -5
- mlrun/api/db/__init__.py +1 -1
- mlrun/api/db/base.py +102 -54
- mlrun/api/db/init_db.py +2 -3
- mlrun/api/db/session.py +4 -12
- mlrun/api/db/sqldb/__init__.py +1 -1
- mlrun/api/db/sqldb/db.py +439 -196
- mlrun/api/db/sqldb/helpers.py +1 -1
- mlrun/api/db/sqldb/models/__init__.py +3 -3
- mlrun/api/db/sqldb/models/models_mysql.py +82 -64
- mlrun/api/db/sqldb/models/models_sqlite.py +76 -64
- mlrun/api/db/sqldb/session.py +27 -20
- mlrun/api/initial_data.py +82 -24
- mlrun/api/launcher.py +196 -0
- mlrun/api/main.py +91 -22
- mlrun/api/middlewares.py +6 -5
- mlrun/api/migrations_mysql/env.py +1 -1
- mlrun/api/migrations_mysql/versions/28383af526f3_market_place_to_hub.py +40 -0
- mlrun/api/migrations_mysql/versions/32bae1b0e29c_increase_timestamp_fields_precision.py +1 -1
- mlrun/api/migrations_mysql/versions/4903aef6a91d_tag_foreign_key_and_cascades.py +1 -1
- mlrun/api/migrations_mysql/versions/5f1351c88a19_adding_background_tasks_table.py +1 -1
- mlrun/api/migrations_mysql/versions/88e656800d6a_add_requested_logs_column_and_index_to_.py +1 -1
- mlrun/api/migrations_mysql/versions/9d16de5f03a7_adding_data_versions_table.py +1 -1
- mlrun/api/migrations_mysql/versions/b86f5b53f3d7_adding_name_and_updated_to_runs_table.py +1 -1
- mlrun/api/migrations_mysql/versions/c4af40b0bf61_init.py +1 -1
- mlrun/api/migrations_mysql/versions/c905d15bd91d_notifications.py +72 -0
- mlrun/api/migrations_mysql/versions/ee041e8fdaa0_adding_next_run_time_column_to_schedule_.py +1 -1
- mlrun/api/migrations_sqlite/env.py +1 -1
- mlrun/api/migrations_sqlite/versions/11f8dd2dc9fe_init.py +1 -1
- mlrun/api/migrations_sqlite/versions/1c954f8cb32d_schedule_last_run_uri.py +1 -1
- mlrun/api/migrations_sqlite/versions/2b6d23c715aa_adding_feature_sets.py +1 -1
- mlrun/api/migrations_sqlite/versions/4acd9430b093_market_place_to_hub.py +77 -0
- mlrun/api/migrations_sqlite/versions/6401142f2d7c_adding_next_run_time_column_to_schedule_.py +1 -1
- mlrun/api/migrations_sqlite/versions/64d90a1a69bc_adding_background_tasks_table.py +1 -1
- mlrun/api/migrations_sqlite/versions/803438ecd005_add_requested_logs_column_to_runs.py +1 -1
- mlrun/api/migrations_sqlite/versions/863114f0c659_refactoring_feature_set.py +1 -1
- mlrun/api/migrations_sqlite/versions/959ae00528ad_notifications.py +63 -0
- mlrun/api/migrations_sqlite/versions/accf9fc83d38_adding_data_versions_table.py +1 -1
- mlrun/api/migrations_sqlite/versions/b68e8e897a28_schedule_labels.py +1 -1
- mlrun/api/migrations_sqlite/versions/bcd0c1f9720c_adding_project_labels.py +1 -1
- mlrun/api/migrations_sqlite/versions/cf21882f938e_schedule_id.py +1 -1
- mlrun/api/migrations_sqlite/versions/d781f58f607f_tag_object_name_string.py +1 -1
- mlrun/api/migrations_sqlite/versions/deac06871ace_adding_marketplace_sources_table.py +1 -1
- mlrun/api/migrations_sqlite/versions/e1dd5983c06b_schedule_concurrency_limit.py +1 -1
- mlrun/api/migrations_sqlite/versions/e5594ed3ab53_adding_name_and_updated_to_runs_table.py +1 -1
- mlrun/api/migrations_sqlite/versions/f4249b4ba6fa_adding_feature_vectors.py +1 -1
- mlrun/api/migrations_sqlite/versions/f7b5a1a03629_adding_feature_labels.py +1 -1
- mlrun/api/schemas/__init__.py +216 -138
- mlrun/api/utils/__init__.py +1 -1
- mlrun/api/utils/asyncio.py +1 -1
- mlrun/api/utils/auth/__init__.py +1 -1
- mlrun/api/utils/auth/providers/__init__.py +1 -1
- mlrun/api/utils/auth/providers/base.py +7 -7
- mlrun/api/utils/auth/providers/nop.py +6 -7
- mlrun/api/utils/auth/providers/opa.py +17 -17
- mlrun/api/utils/auth/verifier.py +36 -34
- mlrun/api/utils/background_tasks.py +24 -24
- mlrun/{builder.py → api/utils/builder.py} +216 -123
- mlrun/api/utils/clients/__init__.py +1 -1
- mlrun/api/utils/clients/chief.py +19 -4
- mlrun/api/utils/clients/iguazio.py +106 -60
- mlrun/api/utils/clients/log_collector.py +1 -1
- mlrun/api/utils/clients/nuclio.py +23 -23
- mlrun/api/utils/clients/protocols/grpc.py +2 -2
- mlrun/api/utils/db/__init__.py +1 -1
- mlrun/api/utils/db/alembic.py +1 -1
- mlrun/api/utils/db/backup.py +1 -1
- mlrun/api/utils/db/mysql.py +24 -25
- mlrun/api/utils/db/sql_collation.py +1 -1
- mlrun/api/utils/db/sqlite_migration.py +2 -2
- mlrun/api/utils/events/__init__.py +14 -0
- mlrun/api/utils/events/base.py +57 -0
- mlrun/api/utils/events/events_factory.py +41 -0
- mlrun/api/utils/events/iguazio.py +217 -0
- mlrun/api/utils/events/nop.py +55 -0
- mlrun/api/utils/helpers.py +16 -13
- mlrun/api/utils/memory_reports.py +1 -1
- mlrun/api/utils/periodic.py +6 -3
- mlrun/api/utils/projects/__init__.py +1 -1
- mlrun/api/utils/projects/follower.py +33 -33
- mlrun/api/utils/projects/leader.py +36 -34
- mlrun/api/utils/projects/member.py +27 -27
- mlrun/api/utils/projects/remotes/__init__.py +1 -1
- mlrun/api/utils/projects/remotes/follower.py +13 -13
- mlrun/api/utils/projects/remotes/leader.py +10 -10
- mlrun/api/utils/projects/remotes/nop_follower.py +27 -21
- mlrun/api/utils/projects/remotes/nop_leader.py +17 -16
- mlrun/api/utils/scheduler.py +140 -51
- mlrun/api/utils/singletons/__init__.py +1 -1
- mlrun/api/utils/singletons/db.py +9 -15
- mlrun/api/utils/singletons/k8s.py +677 -5
- mlrun/api/utils/singletons/logs_dir.py +1 -1
- mlrun/api/utils/singletons/project_member.py +1 -1
- mlrun/api/utils/singletons/scheduler.py +1 -1
- mlrun/artifacts/__init__.py +2 -2
- mlrun/artifacts/base.py +8 -2
- mlrun/artifacts/dataset.py +5 -3
- mlrun/artifacts/manager.py +7 -1
- mlrun/artifacts/model.py +15 -4
- mlrun/artifacts/plots.py +1 -1
- mlrun/common/__init__.py +1 -1
- mlrun/common/constants.py +15 -0
- mlrun/common/model_monitoring.py +209 -0
- mlrun/common/schemas/__init__.py +167 -0
- mlrun/{api → common}/schemas/artifact.py +13 -14
- mlrun/{api → common}/schemas/auth.py +10 -8
- mlrun/{api → common}/schemas/background_task.py +3 -3
- mlrun/{api → common}/schemas/client_spec.py +1 -1
- mlrun/{api → common}/schemas/clusterization_spec.py +3 -3
- mlrun/{api → common}/schemas/constants.py +21 -8
- mlrun/common/schemas/events.py +36 -0
- mlrun/{api → common}/schemas/feature_store.py +2 -1
- mlrun/{api → common}/schemas/frontend_spec.py +7 -6
- mlrun/{api → common}/schemas/function.py +5 -5
- mlrun/{api → common}/schemas/http.py +3 -3
- mlrun/common/schemas/hub.py +134 -0
- mlrun/{api → common}/schemas/k8s.py +3 -3
- mlrun/{api → common}/schemas/memory_reports.py +1 -1
- mlrun/common/schemas/model_endpoints.py +342 -0
- mlrun/common/schemas/notification.py +57 -0
- mlrun/{api → common}/schemas/object.py +6 -6
- mlrun/{api → common}/schemas/pipeline.py +3 -3
- mlrun/{api → common}/schemas/project.py +6 -5
- mlrun/common/schemas/regex.py +24 -0
- mlrun/common/schemas/runs.py +30 -0
- mlrun/{api → common}/schemas/runtime_resource.py +3 -3
- mlrun/{api → common}/schemas/schedule.py +19 -7
- mlrun/{api → common}/schemas/secret.py +3 -3
- mlrun/{api → common}/schemas/tag.py +2 -2
- mlrun/common/types.py +25 -0
- mlrun/config.py +152 -20
- mlrun/data_types/__init__.py +7 -2
- mlrun/data_types/data_types.py +4 -2
- mlrun/data_types/infer.py +1 -1
- mlrun/data_types/spark.py +10 -3
- mlrun/datastore/__init__.py +10 -3
- mlrun/datastore/azure_blob.py +1 -1
- mlrun/datastore/base.py +185 -53
- mlrun/datastore/datastore.py +1 -1
- mlrun/datastore/filestore.py +1 -1
- mlrun/datastore/google_cloud_storage.py +1 -1
- mlrun/datastore/inmem.py +4 -1
- mlrun/datastore/redis.py +1 -1
- mlrun/datastore/s3.py +1 -1
- mlrun/datastore/sources.py +192 -70
- mlrun/datastore/spark_udf.py +44 -0
- mlrun/datastore/store_resources.py +4 -4
- mlrun/datastore/targets.py +115 -45
- mlrun/datastore/utils.py +127 -5
- mlrun/datastore/v3io.py +1 -1
- mlrun/datastore/wasbfs/__init__.py +1 -1
- mlrun/datastore/wasbfs/fs.py +1 -1
- mlrun/db/__init__.py +7 -5
- mlrun/db/base.py +112 -68
- mlrun/db/httpdb.py +445 -277
- mlrun/db/nopdb.py +491 -0
- mlrun/db/sqldb.py +112 -65
- mlrun/errors.py +6 -1
- mlrun/execution.py +44 -22
- mlrun/feature_store/__init__.py +1 -1
- mlrun/feature_store/api.py +143 -95
- mlrun/feature_store/common.py +16 -20
- mlrun/feature_store/feature_set.py +42 -12
- mlrun/feature_store/feature_vector.py +32 -21
- mlrun/feature_store/ingestion.py +9 -12
- mlrun/feature_store/retrieval/__init__.py +3 -2
- mlrun/feature_store/retrieval/base.py +388 -66
- mlrun/feature_store/retrieval/dask_merger.py +63 -151
- mlrun/feature_store/retrieval/job.py +30 -12
- mlrun/feature_store/retrieval/local_merger.py +40 -133
- mlrun/feature_store/retrieval/spark_merger.py +129 -127
- mlrun/feature_store/retrieval/storey_merger.py +173 -0
- mlrun/feature_store/steps.py +132 -15
- mlrun/features.py +8 -3
- mlrun/frameworks/__init__.py +1 -1
- mlrun/frameworks/_common/__init__.py +1 -1
- mlrun/frameworks/_common/artifacts_library.py +1 -1
- mlrun/frameworks/_common/mlrun_interface.py +1 -1
- mlrun/frameworks/_common/model_handler.py +1 -1
- mlrun/frameworks/_common/plan.py +1 -1
- mlrun/frameworks/_common/producer.py +1 -1
- mlrun/frameworks/_common/utils.py +1 -1
- mlrun/frameworks/_dl_common/__init__.py +1 -1
- mlrun/frameworks/_dl_common/loggers/__init__.py +1 -1
- mlrun/frameworks/_dl_common/loggers/logger.py +1 -1
- mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +1 -1
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +1 -1
- mlrun/frameworks/_dl_common/model_handler.py +1 -1
- mlrun/frameworks/_dl_common/utils.py +1 -1
- mlrun/frameworks/_ml_common/__init__.py +1 -1
- mlrun/frameworks/_ml_common/artifacts_library.py +1 -1
- mlrun/frameworks/_ml_common/loggers/__init__.py +1 -1
- mlrun/frameworks/_ml_common/loggers/logger.py +1 -1
- mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +1 -1
- mlrun/frameworks/_ml_common/model_handler.py +1 -1
- mlrun/frameworks/_ml_common/pkl_model_server.py +13 -1
- mlrun/frameworks/_ml_common/plan.py +1 -1
- mlrun/frameworks/_ml_common/plans/__init__.py +1 -1
- mlrun/frameworks/_ml_common/plans/calibration_curve_plan.py +1 -6
- mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +1 -1
- mlrun/frameworks/_ml_common/plans/dataset_plan.py +1 -1
- mlrun/frameworks/_ml_common/plans/feature_importance_plan.py +1 -1
- mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +1 -1
- mlrun/frameworks/_ml_common/producer.py +1 -1
- mlrun/frameworks/_ml_common/utils.py +1 -1
- mlrun/frameworks/auto_mlrun/__init__.py +1 -1
- mlrun/frameworks/auto_mlrun/auto_mlrun.py +1 -1
- mlrun/frameworks/huggingface/__init__.py +1 -1
- mlrun/frameworks/huggingface/model_server.py +1 -1
- mlrun/frameworks/lgbm/__init__.py +1 -1
- mlrun/frameworks/lgbm/callbacks/__init__.py +1 -1
- mlrun/frameworks/lgbm/callbacks/callback.py +1 -1
- mlrun/frameworks/lgbm/callbacks/logging_callback.py +1 -1
- mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +1 -1
- mlrun/frameworks/lgbm/mlrun_interfaces/__init__.py +1 -1
- mlrun/frameworks/lgbm/mlrun_interfaces/booster_mlrun_interface.py +1 -1
- mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +1 -1
- mlrun/frameworks/lgbm/mlrun_interfaces/model_mlrun_interface.py +1 -1
- mlrun/frameworks/lgbm/model_handler.py +1 -1
- mlrun/frameworks/lgbm/model_server.py +1 -1
- mlrun/frameworks/lgbm/utils.py +1 -1
- mlrun/frameworks/onnx/__init__.py +1 -1
- mlrun/frameworks/onnx/dataset.py +1 -1
- mlrun/frameworks/onnx/mlrun_interface.py +1 -1
- mlrun/frameworks/onnx/model_handler.py +1 -1
- mlrun/frameworks/onnx/model_server.py +1 -1
- mlrun/frameworks/parallel_coordinates.py +1 -1
- mlrun/frameworks/pytorch/__init__.py +1 -1
- mlrun/frameworks/pytorch/callbacks/__init__.py +1 -1
- mlrun/frameworks/pytorch/callbacks/callback.py +1 -1
- mlrun/frameworks/pytorch/callbacks/logging_callback.py +1 -1
- mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +1 -1
- mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +1 -1
- mlrun/frameworks/pytorch/callbacks_handler.py +1 -1
- mlrun/frameworks/pytorch/mlrun_interface.py +1 -1
- mlrun/frameworks/pytorch/model_handler.py +1 -1
- mlrun/frameworks/pytorch/model_server.py +1 -1
- mlrun/frameworks/pytorch/utils.py +1 -1
- mlrun/frameworks/sklearn/__init__.py +1 -1
- mlrun/frameworks/sklearn/estimator.py +1 -1
- mlrun/frameworks/sklearn/metric.py +1 -1
- mlrun/frameworks/sklearn/metrics_library.py +1 -1
- mlrun/frameworks/sklearn/mlrun_interface.py +1 -1
- mlrun/frameworks/sklearn/model_handler.py +1 -1
- mlrun/frameworks/sklearn/utils.py +1 -1
- mlrun/frameworks/tf_keras/__init__.py +1 -1
- mlrun/frameworks/tf_keras/callbacks/__init__.py +1 -1
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +1 -1
- mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +1 -1
- mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +1 -1
- mlrun/frameworks/tf_keras/mlrun_interface.py +1 -1
- mlrun/frameworks/tf_keras/model_handler.py +1 -1
- mlrun/frameworks/tf_keras/model_server.py +1 -1
- mlrun/frameworks/tf_keras/utils.py +1 -1
- mlrun/frameworks/xgboost/__init__.py +1 -1
- mlrun/frameworks/xgboost/mlrun_interface.py +1 -1
- mlrun/frameworks/xgboost/model_handler.py +1 -1
- mlrun/frameworks/xgboost/utils.py +1 -1
- mlrun/k8s_utils.py +14 -765
- mlrun/kfpops.py +14 -17
- mlrun/launcher/__init__.py +13 -0
- mlrun/launcher/base.py +406 -0
- mlrun/launcher/client.py +159 -0
- mlrun/launcher/factory.py +50 -0
- mlrun/launcher/local.py +276 -0
- mlrun/launcher/remote.py +178 -0
- mlrun/lists.py +10 -2
- mlrun/mlutils/__init__.py +1 -1
- mlrun/mlutils/data.py +1 -1
- mlrun/mlutils/models.py +1 -1
- mlrun/mlutils/plots.py +1 -1
- mlrun/model.py +252 -14
- mlrun/model_monitoring/__init__.py +41 -0
- mlrun/model_monitoring/features_drift_table.py +1 -1
- mlrun/model_monitoring/helpers.py +123 -38
- mlrun/model_monitoring/model_endpoint.py +144 -0
- mlrun/model_monitoring/model_monitoring_batch.py +310 -259
- mlrun/model_monitoring/stores/__init__.py +106 -0
- mlrun/model_monitoring/stores/kv_model_endpoint_store.py +448 -0
- mlrun/model_monitoring/stores/model_endpoint_store.py +147 -0
- mlrun/model_monitoring/stores/models/__init__.py +23 -0
- mlrun/model_monitoring/stores/models/base.py +18 -0
- mlrun/model_monitoring/stores/models/mysql.py +100 -0
- mlrun/model_monitoring/stores/models/sqlite.py +98 -0
- mlrun/model_monitoring/stores/sql_model_endpoint_store.py +370 -0
- mlrun/model_monitoring/stream_processing_fs.py +239 -271
- mlrun/package/__init__.py +163 -0
- mlrun/package/context_handler.py +325 -0
- mlrun/package/errors.py +47 -0
- mlrun/package/packager.py +298 -0
- mlrun/{runtimes/package → package/packagers}/__init__.py +3 -1
- mlrun/package/packagers/default_packager.py +422 -0
- mlrun/package/packagers/numpy_packagers.py +612 -0
- mlrun/package/packagers/pandas_packagers.py +968 -0
- mlrun/package/packagers/python_standard_library_packagers.py +616 -0
- mlrun/package/packagers_manager.py +786 -0
- mlrun/package/utils/__init__.py +53 -0
- mlrun/package/utils/_archiver.py +226 -0
- mlrun/package/utils/_formatter.py +211 -0
- mlrun/package/utils/_pickler.py +234 -0
- mlrun/package/utils/_supported_format.py +71 -0
- mlrun/package/utils/log_hint_utils.py +93 -0
- mlrun/package/utils/type_hint_utils.py +298 -0
- mlrun/platforms/__init__.py +1 -1
- mlrun/platforms/iguazio.py +34 -2
- mlrun/platforms/other.py +1 -1
- mlrun/projects/__init__.py +1 -1
- mlrun/projects/operations.py +14 -9
- mlrun/projects/pipelines.py +31 -13
- mlrun/projects/project.py +762 -238
- mlrun/render.py +49 -19
- mlrun/run.py +57 -326
- mlrun/runtimes/__init__.py +3 -9
- mlrun/runtimes/base.py +247 -784
- mlrun/runtimes/constants.py +1 -1
- mlrun/runtimes/daskjob.py +45 -41
- mlrun/runtimes/funcdoc.py +43 -7
- mlrun/runtimes/function.py +66 -656
- mlrun/runtimes/function_reference.py +1 -1
- mlrun/runtimes/generators.py +1 -1
- mlrun/runtimes/kubejob.py +99 -116
- mlrun/runtimes/local.py +59 -66
- mlrun/runtimes/mpijob/__init__.py +1 -1
- mlrun/runtimes/mpijob/abstract.py +13 -15
- mlrun/runtimes/mpijob/v1.py +3 -1
- mlrun/runtimes/mpijob/v1alpha1.py +1 -1
- mlrun/runtimes/nuclio.py +1 -1
- mlrun/runtimes/pod.py +51 -26
- mlrun/runtimes/remotesparkjob.py +3 -1
- mlrun/runtimes/serving.py +12 -4
- mlrun/runtimes/sparkjob/__init__.py +1 -2
- mlrun/runtimes/sparkjob/abstract.py +44 -31
- mlrun/runtimes/sparkjob/spark3job.py +11 -9
- mlrun/runtimes/utils.py +61 -42
- mlrun/secrets.py +16 -18
- mlrun/serving/__init__.py +3 -2
- mlrun/serving/merger.py +1 -1
- mlrun/serving/remote.py +1 -1
- mlrun/serving/routers.py +39 -42
- mlrun/serving/server.py +23 -13
- mlrun/serving/serving_wrapper.py +1 -1
- mlrun/serving/states.py +172 -39
- mlrun/serving/utils.py +1 -1
- mlrun/serving/v1_serving.py +1 -1
- mlrun/serving/v2_serving.py +29 -21
- mlrun/utils/__init__.py +1 -2
- mlrun/utils/async_http.py +8 -1
- mlrun/utils/azure_vault.py +1 -1
- mlrun/utils/clones.py +2 -2
- mlrun/utils/condition_evaluator.py +65 -0
- mlrun/utils/db.py +52 -0
- mlrun/utils/helpers.py +188 -13
- mlrun/utils/http.py +89 -54
- mlrun/utils/logger.py +48 -8
- mlrun/utils/model_monitoring.py +132 -100
- mlrun/utils/notifications/__init__.py +1 -1
- mlrun/utils/notifications/notification/__init__.py +8 -6
- mlrun/utils/notifications/notification/base.py +20 -14
- mlrun/utils/notifications/notification/console.py +7 -4
- mlrun/utils/notifications/notification/git.py +36 -19
- mlrun/utils/notifications/notification/ipython.py +10 -8
- mlrun/utils/notifications/notification/slack.py +18 -13
- mlrun/utils/notifications/notification_pusher.py +377 -56
- mlrun/utils/regex.py +6 -1
- mlrun/utils/singleton.py +1 -1
- mlrun/utils/v3io_clients.py +1 -1
- mlrun/utils/vault.py +270 -269
- mlrun/utils/version/__init__.py +1 -1
- mlrun/utils/version/version.json +2 -2
- mlrun/utils/version/version.py +1 -1
- {mlrun-1.3.3.dist-info → mlrun-1.4.0.dist-info}/METADATA +16 -10
- mlrun-1.4.0.dist-info/RECORD +434 -0
- mlrun/api/api/endpoints/marketplace.py +0 -257
- mlrun/api/crud/marketplace.py +0 -221
- mlrun/api/crud/model_monitoring/model_endpoint_store.py +0 -847
- mlrun/api/db/filedb/db.py +0 -518
- mlrun/api/schemas/marketplace.py +0 -128
- mlrun/api/schemas/model_endpoints.py +0 -185
- mlrun/db/filedb.py +0 -891
- mlrun/feature_store/retrieval/online.py +0 -92
- mlrun/model_monitoring/constants.py +0 -67
- mlrun/runtimes/package/context_handler.py +0 -711
- mlrun/runtimes/sparkjob/spark2job.py +0 -59
- mlrun-1.3.3.dist-info/RECORD +0 -381
- {mlrun-1.3.3.dist-info → mlrun-1.4.0.dist-info}/LICENSE +0 -0
- {mlrun-1.3.3.dist-info → mlrun-1.4.0.dist-info}/WHEEL +0 -0
- {mlrun-1.3.3.dist-info → mlrun-1.4.0.dist-info}/entry_points.txt +0 -0
- {mlrun-1.3.3.dist-info → mlrun-1.4.0.dist-info}/top_level.txt +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright
|
|
1
|
+
# Copyright 2023 Iguazio
|
|
2
2
|
#
|
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
4
|
# you may not use this file except in compliance with the License.
|
|
@@ -20,151 +20,18 @@ from dask.distributed import Client
|
|
|
20
20
|
|
|
21
21
|
import mlrun
|
|
22
22
|
|
|
23
|
-
from ..feature_vector import OfflineVectorResponse
|
|
24
23
|
from .base import BaseMerger
|
|
25
24
|
|
|
26
25
|
|
|
27
26
|
class DaskFeatureMerger(BaseMerger):
|
|
28
27
|
engine = "dask"
|
|
28
|
+
support_offline = True
|
|
29
29
|
|
|
30
30
|
def __init__(self, vector, **engine_args):
|
|
31
31
|
super().__init__(vector, **engine_args)
|
|
32
32
|
self.client = engine_args.get("dask_client")
|
|
33
33
|
self._dask_cluster_uri = engine_args.get("dask_cluster_uri")
|
|
34
34
|
|
|
35
|
-
def _generate_vector(
|
|
36
|
-
self,
|
|
37
|
-
entity_rows,
|
|
38
|
-
entity_timestamp_column,
|
|
39
|
-
feature_set_objects,
|
|
40
|
-
feature_set_fields,
|
|
41
|
-
start_time=None,
|
|
42
|
-
end_time=None,
|
|
43
|
-
query=None,
|
|
44
|
-
):
|
|
45
|
-
if "index" not in self._index_columns:
|
|
46
|
-
self._append_drop_column("index")
|
|
47
|
-
|
|
48
|
-
# init the dask client if needed
|
|
49
|
-
if not self.client:
|
|
50
|
-
if self._dask_cluster_uri:
|
|
51
|
-
function = mlrun.import_function(self._dask_cluster_uri)
|
|
52
|
-
self.client = function.client
|
|
53
|
-
else:
|
|
54
|
-
self.client = Client()
|
|
55
|
-
|
|
56
|
-
# load dataframes
|
|
57
|
-
feature_sets = []
|
|
58
|
-
dfs = []
|
|
59
|
-
keys = (
|
|
60
|
-
[]
|
|
61
|
-
) # the struct of key is [[[],[]], ..] So that each record indicates which way the corresponding
|
|
62
|
-
# featureset is connected to the previous one, and within each record the left keys are indicated in index 0
|
|
63
|
-
# and the right keys in index 1, this keys will be the keys that will be used in this join
|
|
64
|
-
all_columns = []
|
|
65
|
-
|
|
66
|
-
fs_link_list = self._create_linked_relation_list(
|
|
67
|
-
feature_set_objects, feature_set_fields
|
|
68
|
-
)
|
|
69
|
-
|
|
70
|
-
for node in fs_link_list:
|
|
71
|
-
name = node.name
|
|
72
|
-
feature_set = feature_set_objects[name]
|
|
73
|
-
feature_sets.append(feature_set)
|
|
74
|
-
columns = feature_set_fields[name]
|
|
75
|
-
column_names = [name for name, alias in columns]
|
|
76
|
-
|
|
77
|
-
for col in node.data["save_cols"]:
|
|
78
|
-
if col not in column_names:
|
|
79
|
-
self._append_drop_column(col)
|
|
80
|
-
column_names += node.data["save_cols"]
|
|
81
|
-
|
|
82
|
-
df = feature_set.to_dataframe(
|
|
83
|
-
columns=column_names,
|
|
84
|
-
df_module=dd,
|
|
85
|
-
start_time=start_time,
|
|
86
|
-
end_time=end_time,
|
|
87
|
-
time_column=entity_timestamp_column,
|
|
88
|
-
index=False,
|
|
89
|
-
)
|
|
90
|
-
|
|
91
|
-
df = df.reset_index()
|
|
92
|
-
column_names += node.data["save_index"]
|
|
93
|
-
node.data["save_cols"] += node.data["save_index"]
|
|
94
|
-
entity_timestamp_column_list = (
|
|
95
|
-
[entity_timestamp_column]
|
|
96
|
-
if entity_timestamp_column
|
|
97
|
-
else feature_set.spec.timestamp_key
|
|
98
|
-
)
|
|
99
|
-
if entity_timestamp_column_list:
|
|
100
|
-
column_names += entity_timestamp_column_list
|
|
101
|
-
node.data["save_cols"] += entity_timestamp_column_list
|
|
102
|
-
|
|
103
|
-
df = df.persist()
|
|
104
|
-
|
|
105
|
-
# rename columns to be unique for each feature set
|
|
106
|
-
rename_col_dict = {
|
|
107
|
-
col: f"{col}_{name}"
|
|
108
|
-
for col in column_names
|
|
109
|
-
if col not in node.data["save_cols"]
|
|
110
|
-
}
|
|
111
|
-
df = df.rename(
|
|
112
|
-
columns=rename_col_dict,
|
|
113
|
-
)
|
|
114
|
-
|
|
115
|
-
dfs.append(df)
|
|
116
|
-
del df
|
|
117
|
-
|
|
118
|
-
keys.append([node.data["left_keys"], node.data["right_keys"]])
|
|
119
|
-
|
|
120
|
-
# update alias according to the unique column name
|
|
121
|
-
new_columns = []
|
|
122
|
-
for col, alias in columns:
|
|
123
|
-
if col in rename_col_dict and alias:
|
|
124
|
-
new_columns.append((rename_col_dict[col], alias))
|
|
125
|
-
elif col in rename_col_dict and not alias:
|
|
126
|
-
new_columns.append((rename_col_dict[col], col))
|
|
127
|
-
else:
|
|
128
|
-
new_columns.append((col, alias))
|
|
129
|
-
all_columns.append(new_columns)
|
|
130
|
-
self._update_alias(
|
|
131
|
-
dictionary={name: alias for name, alias in new_columns if alias}
|
|
132
|
-
)
|
|
133
|
-
|
|
134
|
-
self.merge(
|
|
135
|
-
entity_df=entity_rows,
|
|
136
|
-
entity_timestamp_column=entity_timestamp_column,
|
|
137
|
-
featuresets=feature_sets,
|
|
138
|
-
featureset_dfs=dfs,
|
|
139
|
-
keys=keys,
|
|
140
|
-
all_columns=all_columns,
|
|
141
|
-
)
|
|
142
|
-
|
|
143
|
-
self._result_df = self._result_df.drop(
|
|
144
|
-
columns=self._drop_columns, errors="ignore"
|
|
145
|
-
)
|
|
146
|
-
|
|
147
|
-
# renaming all columns according to self._alias
|
|
148
|
-
self._result_df = self._result_df.rename(
|
|
149
|
-
columns=self._alias,
|
|
150
|
-
)
|
|
151
|
-
|
|
152
|
-
if self.vector.status.label_column:
|
|
153
|
-
self._result_df = self._result_df.dropna(
|
|
154
|
-
subset=[self.vector.status.label_column]
|
|
155
|
-
)
|
|
156
|
-
# filter joined data frame by the query param
|
|
157
|
-
if query:
|
|
158
|
-
self._result_df = self._result_df.query(query)
|
|
159
|
-
|
|
160
|
-
if self._drop_indexes:
|
|
161
|
-
self._result_df = self._reset_index(self._result_df)
|
|
162
|
-
else:
|
|
163
|
-
self._result_df = self._set_indexes(self._result_df)
|
|
164
|
-
self._write_to_target()
|
|
165
|
-
|
|
166
|
-
return OfflineVectorResponse(self)
|
|
167
|
-
|
|
168
35
|
def _reset_index(self, df):
|
|
169
36
|
to_drop = df.index.name is None
|
|
170
37
|
df = df.reset_index(drop=to_drop)
|
|
@@ -178,27 +45,22 @@ class DaskFeatureMerger(BaseMerger):
|
|
|
178
45
|
featureset_df,
|
|
179
46
|
left_keys: list,
|
|
180
47
|
right_keys: list,
|
|
181
|
-
columns: list,
|
|
182
48
|
):
|
|
49
|
+
def sort_partition(partition, timestamp):
|
|
50
|
+
return partition.sort_values(timestamp)
|
|
183
51
|
|
|
184
|
-
entity_df =
|
|
185
|
-
|
|
186
|
-
entity_df
|
|
187
|
-
if entity_timestamp_column not in entity_df
|
|
188
|
-
else entity_df.set_index(entity_timestamp_column, drop=True)
|
|
52
|
+
entity_df = entity_df.map_partitions(
|
|
53
|
+
sort_partition, timestamp=entity_timestamp_column
|
|
189
54
|
)
|
|
190
|
-
featureset_df =
|
|
191
|
-
|
|
192
|
-
featureset_df
|
|
193
|
-
if entity_timestamp_column not in featureset_df
|
|
194
|
-
else featureset_df.set_index(entity_timestamp_column, drop=True)
|
|
55
|
+
featureset_df = featureset_df.map_partitions(
|
|
56
|
+
sort_partition, timestamp=featureset.spec.timestamp_key
|
|
195
57
|
)
|
|
196
58
|
|
|
197
59
|
merged_df = merge_asof(
|
|
198
60
|
entity_df,
|
|
199
61
|
featureset_df,
|
|
200
|
-
|
|
201
|
-
|
|
62
|
+
left_on=entity_timestamp_column,
|
|
63
|
+
right_on=featureset.spec.timestamp_key,
|
|
202
64
|
left_by=left_keys or None,
|
|
203
65
|
right_by=right_keys or None,
|
|
204
66
|
suffixes=("", f"_{featureset.metadata.name}_"),
|
|
@@ -217,7 +79,6 @@ class DaskFeatureMerger(BaseMerger):
|
|
|
217
79
|
featureset_df,
|
|
218
80
|
left_keys: list,
|
|
219
81
|
right_keys: list,
|
|
220
|
-
columns: list,
|
|
221
82
|
):
|
|
222
83
|
|
|
223
84
|
fs_name = featureset.metadata.name
|
|
@@ -241,5 +102,56 @@ class DaskFeatureMerger(BaseMerger):
|
|
|
241
102
|
|
|
242
103
|
def get_df(self, to_pandas=True):
|
|
243
104
|
if to_pandas and hasattr(self._result_df, "dask"):
|
|
244
|
-
|
|
245
|
-
|
|
105
|
+
df = self._result_df.compute()
|
|
106
|
+
else:
|
|
107
|
+
df = self._result_df
|
|
108
|
+
self._set_indexes(df)
|
|
109
|
+
return df
|
|
110
|
+
|
|
111
|
+
def _create_engine_env(self):
|
|
112
|
+
if "index" not in self._index_columns:
|
|
113
|
+
self._append_drop_column("index")
|
|
114
|
+
|
|
115
|
+
# init the dask client if needed
|
|
116
|
+
if not self.client:
|
|
117
|
+
if self._dask_cluster_uri:
|
|
118
|
+
function = mlrun.import_function(self._dask_cluster_uri)
|
|
119
|
+
self.client = function.client
|
|
120
|
+
else:
|
|
121
|
+
self.client = Client()
|
|
122
|
+
|
|
123
|
+
def _get_engine_df(
|
|
124
|
+
self,
|
|
125
|
+
feature_set,
|
|
126
|
+
feature_set_name,
|
|
127
|
+
column_names=None,
|
|
128
|
+
start_time=None,
|
|
129
|
+
end_time=None,
|
|
130
|
+
time_column=None,
|
|
131
|
+
):
|
|
132
|
+
df = feature_set.to_dataframe(
|
|
133
|
+
columns=column_names,
|
|
134
|
+
df_module=dd,
|
|
135
|
+
start_time=start_time,
|
|
136
|
+
end_time=end_time,
|
|
137
|
+
time_column=time_column,
|
|
138
|
+
index=False,
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
return self._reset_index(df).persist()
|
|
142
|
+
|
|
143
|
+
def _rename_columns_and_select(self, df, rename_col_dict, columns=None):
|
|
144
|
+
return df.rename(
|
|
145
|
+
columns=rename_col_dict,
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
def _drop_columns_from_result(self):
|
|
149
|
+
self._result_df = self._result_df.drop(
|
|
150
|
+
columns=self._drop_columns, errors="ignore"
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
def _filter(self, query):
|
|
154
|
+
self._result_df = self._result_df.query(query)
|
|
155
|
+
|
|
156
|
+
def _order_by(self, order_by_active):
|
|
157
|
+
self._result_df.sort_values(by=order_by_active)
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright
|
|
1
|
+
# Copyright 2023 Iguazio
|
|
2
2
|
#
|
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
4
|
# you may not use this file except in compliance with the License.
|
|
@@ -33,12 +33,15 @@ def run_merge_job(
|
|
|
33
33
|
engine_args: dict,
|
|
34
34
|
spark_service: str = None,
|
|
35
35
|
entity_rows=None,
|
|
36
|
-
|
|
36
|
+
entity_timestamp_column=None,
|
|
37
37
|
run_config=None,
|
|
38
38
|
drop_columns=None,
|
|
39
39
|
with_indexes=None,
|
|
40
40
|
query=None,
|
|
41
|
-
|
|
41
|
+
order_by=None,
|
|
42
|
+
start_time=None,
|
|
43
|
+
end_time=None,
|
|
44
|
+
timestamp_for_filtering=None,
|
|
42
45
|
):
|
|
43
46
|
name = vector.metadata.name
|
|
44
47
|
if not target or not hasattr(target, "to_dict"):
|
|
@@ -92,20 +95,27 @@ def run_merge_job(
|
|
|
92
95
|
set_default_resources(
|
|
93
96
|
function.spec.executor_resources, function.with_executor_requests
|
|
94
97
|
)
|
|
98
|
+
if start_time and not isinstance(start_time, str):
|
|
99
|
+
start_time = start_time.isoformat()
|
|
100
|
+
if end_time and not isinstance(end_time, str):
|
|
101
|
+
end_time = end_time.isoformat()
|
|
95
102
|
|
|
96
103
|
task = new_task(
|
|
97
104
|
name=name,
|
|
98
105
|
params={
|
|
99
106
|
"vector_uri": vector.uri,
|
|
100
107
|
"target": target.to_dict(),
|
|
101
|
-
"
|
|
108
|
+
"entity_timestamp_column": entity_timestamp_column,
|
|
102
109
|
"drop_columns": drop_columns,
|
|
103
110
|
"with_indexes": with_indexes,
|
|
104
111
|
"query": query,
|
|
105
|
-
"
|
|
112
|
+
"order_by": order_by,
|
|
113
|
+
"start_time": start_time,
|
|
114
|
+
"end_time": end_time,
|
|
115
|
+
"timestamp_for_filtering": timestamp_for_filtering,
|
|
106
116
|
"engine_args": engine_args,
|
|
107
117
|
},
|
|
108
|
-
inputs={"entity_rows": entity_rows},
|
|
118
|
+
inputs={"entity_rows": entity_rows} if entity_rows is not None else {},
|
|
109
119
|
)
|
|
110
120
|
task.spec.secret_sources = run_config.secret_sources
|
|
111
121
|
task.set_label("job-type", "feature-merge").set_label("feature-vector", vector.uri)
|
|
@@ -120,15 +130,16 @@ def run_merge_job(
|
|
|
120
130
|
watch=run_config.watch,
|
|
121
131
|
)
|
|
122
132
|
logger.info(f"feature vector merge job started, run id = {run.uid()}")
|
|
123
|
-
return RemoteVectorResponse(vector, run)
|
|
133
|
+
return RemoteVectorResponse(vector, run, with_indexes)
|
|
124
134
|
|
|
125
135
|
|
|
126
136
|
class RemoteVectorResponse:
|
|
127
137
|
"""get_offline_features response object"""
|
|
128
138
|
|
|
129
|
-
def __init__(self, vector, run):
|
|
139
|
+
def __init__(self, vector, run, with_indexes=False):
|
|
130
140
|
self.run = run
|
|
131
141
|
self.vector = vector
|
|
142
|
+
self.with_indexes = with_indexes or self.vector.spec.with_indexes
|
|
132
143
|
|
|
133
144
|
@property
|
|
134
145
|
def status(self):
|
|
@@ -147,12 +158,18 @@ class RemoteVectorResponse:
|
|
|
147
158
|
:param df_module: optional, py module used to create the DataFrame (e.g. pd, dd, cudf, ..)
|
|
148
159
|
:param kwargs: extended DataItem.as_df() args
|
|
149
160
|
"""
|
|
161
|
+
|
|
150
162
|
file_format = kwargs.get("format")
|
|
151
163
|
if not file_format:
|
|
152
164
|
file_format = self.run.status.results["target"]["kind"]
|
|
153
|
-
|
|
165
|
+
df = mlrun.get_dataitem(self.target_uri).as_df(
|
|
154
166
|
columns=columns, df_module=df_module, format=file_format, **kwargs
|
|
155
167
|
)
|
|
168
|
+
if self.with_indexes:
|
|
169
|
+
df.set_index(
|
|
170
|
+
list(self.vector.spec.entity_fields.keys()), inplace=True, drop=True
|
|
171
|
+
)
|
|
172
|
+
return df
|
|
156
173
|
|
|
157
174
|
@property
|
|
158
175
|
def target_uri(self):
|
|
@@ -166,17 +183,18 @@ import mlrun
|
|
|
166
183
|
import mlrun.feature_store.retrieval
|
|
167
184
|
from mlrun.datastore.targets import get_target_driver
|
|
168
185
|
def merge_handler(context, vector_uri, target, entity_rows=None,
|
|
169
|
-
|
|
186
|
+
entity_timestamp_column=None, drop_columns=None, with_indexes=None, query=None,
|
|
187
|
+
engine_args=None, order_by=None, start_time=None, end_time=None, timestamp_for_filtering=None):
|
|
170
188
|
vector = context.get_store_resource(vector_uri)
|
|
171
189
|
store_target = get_target_driver(target, vector)
|
|
172
|
-
entity_timestamp_column = timestamp_column or vector.spec.timestamp_field
|
|
173
190
|
if entity_rows:
|
|
174
191
|
entity_rows = entity_rows.as_df()
|
|
175
192
|
|
|
176
193
|
context.logger.info(f"starting vector merge task to {vector.uri}")
|
|
177
194
|
merger = mlrun.feature_store.retrieval.{{{engine}}}(vector, **(engine_args or {}))
|
|
178
195
|
merger.start(entity_rows, entity_timestamp_column, store_target, drop_columns, with_indexes=with_indexes,
|
|
179
|
-
query=query,
|
|
196
|
+
query=query, order_by=order_by, start_time=start_time, end_time=end_time,
|
|
197
|
+
timestamp_for_filtering=timestamp_for_filtering)
|
|
180
198
|
|
|
181
199
|
target = vector.status.targets[store_target.name].to_dict()
|
|
182
200
|
context.log_result('feature_vector', vector.uri)
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright
|
|
1
|
+
# Copyright 2023 Iguazio
|
|
2
2
|
#
|
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
4
|
# you may not use this file except in compliance with the License.
|
|
@@ -16,143 +16,16 @@ import re
|
|
|
16
16
|
|
|
17
17
|
import pandas as pd
|
|
18
18
|
|
|
19
|
-
from ..feature_vector import OfflineVectorResponse
|
|
20
19
|
from .base import BaseMerger
|
|
21
20
|
|
|
22
21
|
|
|
23
22
|
class LocalFeatureMerger(BaseMerger):
|
|
24
23
|
engine = "local"
|
|
24
|
+
support_offline = True
|
|
25
25
|
|
|
26
26
|
def __init__(self, vector, **engine_args):
|
|
27
27
|
super().__init__(vector, **engine_args)
|
|
28
28
|
|
|
29
|
-
def _generate_vector(
|
|
30
|
-
self,
|
|
31
|
-
entity_rows,
|
|
32
|
-
entity_timestamp_column,
|
|
33
|
-
feature_set_objects,
|
|
34
|
-
feature_set_fields,
|
|
35
|
-
start_time=None,
|
|
36
|
-
end_time=None,
|
|
37
|
-
query=None,
|
|
38
|
-
):
|
|
39
|
-
|
|
40
|
-
feature_sets = []
|
|
41
|
-
dfs = []
|
|
42
|
-
keys = (
|
|
43
|
-
[]
|
|
44
|
-
) # the struct of key is [[[],[]], ..] So that each record indicates which way the corresponding
|
|
45
|
-
# featureset is connected to the previous one, and within each record the left keys are indicated in index 0
|
|
46
|
-
# and the right keys in index 1, this keys will be the keys that will be used in this join
|
|
47
|
-
all_columns = []
|
|
48
|
-
|
|
49
|
-
fs_link_list = self._create_linked_relation_list(
|
|
50
|
-
feature_set_objects, feature_set_fields
|
|
51
|
-
)
|
|
52
|
-
|
|
53
|
-
for node in fs_link_list:
|
|
54
|
-
name = node.name
|
|
55
|
-
feature_set = feature_set_objects[name]
|
|
56
|
-
feature_sets.append(feature_set)
|
|
57
|
-
columns = feature_set_fields[name]
|
|
58
|
-
column_names = [name for name, alias in columns]
|
|
59
|
-
|
|
60
|
-
for col in node.data["save_cols"]:
|
|
61
|
-
if col not in column_names:
|
|
62
|
-
self._append_drop_column(col)
|
|
63
|
-
column_names += node.data["save_cols"]
|
|
64
|
-
|
|
65
|
-
# handling case where there are multiple feature sets and user creates vector where entity_timestamp_
|
|
66
|
-
# column is from a specific feature set (can't be entity timestamp)
|
|
67
|
-
if (
|
|
68
|
-
entity_timestamp_column in column_names
|
|
69
|
-
or feature_set.spec.timestamp_key == entity_timestamp_column
|
|
70
|
-
):
|
|
71
|
-
df = feature_set.to_dataframe(
|
|
72
|
-
columns=column_names,
|
|
73
|
-
start_time=start_time,
|
|
74
|
-
end_time=end_time,
|
|
75
|
-
time_column=entity_timestamp_column,
|
|
76
|
-
)
|
|
77
|
-
else:
|
|
78
|
-
df = feature_set.to_dataframe(
|
|
79
|
-
columns=column_names,
|
|
80
|
-
time_column=entity_timestamp_column,
|
|
81
|
-
)
|
|
82
|
-
if df.index.names[0]:
|
|
83
|
-
df.reset_index(inplace=True)
|
|
84
|
-
column_names += node.data["save_index"]
|
|
85
|
-
node.data["save_cols"] += node.data["save_index"]
|
|
86
|
-
entity_timestamp_column_list = (
|
|
87
|
-
[entity_timestamp_column]
|
|
88
|
-
if entity_timestamp_column
|
|
89
|
-
else feature_set.spec.timestamp_key
|
|
90
|
-
)
|
|
91
|
-
if entity_timestamp_column_list:
|
|
92
|
-
column_names += entity_timestamp_column_list
|
|
93
|
-
node.data["save_cols"] += entity_timestamp_column_list
|
|
94
|
-
# rename columns to be unique for each feature set
|
|
95
|
-
rename_col_dict = {
|
|
96
|
-
col: f"{col}_{name}"
|
|
97
|
-
for col in column_names
|
|
98
|
-
if col not in node.data["save_cols"]
|
|
99
|
-
}
|
|
100
|
-
df.rename(
|
|
101
|
-
columns=rename_col_dict,
|
|
102
|
-
inplace=True,
|
|
103
|
-
)
|
|
104
|
-
|
|
105
|
-
dfs.append(df)
|
|
106
|
-
keys.append([node.data["left_keys"], node.data["right_keys"]])
|
|
107
|
-
|
|
108
|
-
# update alias according to the unique column name
|
|
109
|
-
new_columns = []
|
|
110
|
-
for col, alias in columns:
|
|
111
|
-
if col in rename_col_dict and alias:
|
|
112
|
-
new_columns.append((rename_col_dict[col], alias))
|
|
113
|
-
elif col in rename_col_dict and not alias:
|
|
114
|
-
new_columns.append((rename_col_dict[col], col))
|
|
115
|
-
else:
|
|
116
|
-
new_columns.append((col, alias))
|
|
117
|
-
all_columns.append(new_columns)
|
|
118
|
-
self._update_alias(
|
|
119
|
-
dictionary={name: alias for name, alias in new_columns if alias}
|
|
120
|
-
)
|
|
121
|
-
|
|
122
|
-
self.merge(
|
|
123
|
-
entity_df=entity_rows,
|
|
124
|
-
entity_timestamp_column=entity_timestamp_column,
|
|
125
|
-
featuresets=feature_sets,
|
|
126
|
-
featureset_dfs=dfs,
|
|
127
|
-
keys=keys,
|
|
128
|
-
all_columns=all_columns,
|
|
129
|
-
)
|
|
130
|
-
|
|
131
|
-
self._result_df.drop(columns=self._drop_columns, inplace=True, errors="ignore")
|
|
132
|
-
|
|
133
|
-
# renaming all columns according to self._alias
|
|
134
|
-
self._result_df.rename(
|
|
135
|
-
columns=self._alias,
|
|
136
|
-
inplace=True,
|
|
137
|
-
)
|
|
138
|
-
if self.vector.status.label_column:
|
|
139
|
-
self._result_df.dropna(
|
|
140
|
-
subset=[self.vector.status.label_column],
|
|
141
|
-
inplace=True,
|
|
142
|
-
)
|
|
143
|
-
# filter joined data frame by the query param
|
|
144
|
-
if query:
|
|
145
|
-
self._result_df.query(query, inplace=True)
|
|
146
|
-
|
|
147
|
-
if self._drop_indexes:
|
|
148
|
-
self._result_df.reset_index(drop=True, inplace=True)
|
|
149
|
-
else:
|
|
150
|
-
self._set_indexes(self._result_df)
|
|
151
|
-
|
|
152
|
-
self._write_to_target()
|
|
153
|
-
|
|
154
|
-
return OfflineVectorResponse(self)
|
|
155
|
-
|
|
156
29
|
def _asof_join(
|
|
157
30
|
self,
|
|
158
31
|
entity_df,
|
|
@@ -161,7 +34,6 @@ class LocalFeatureMerger(BaseMerger):
|
|
|
161
34
|
featureset_df,
|
|
162
35
|
left_keys: list,
|
|
163
36
|
right_keys: list,
|
|
164
|
-
columns: list,
|
|
165
37
|
):
|
|
166
38
|
|
|
167
39
|
indexes = None
|
|
@@ -176,7 +48,7 @@ class LocalFeatureMerger(BaseMerger):
|
|
|
176
48
|
featureset_df[featureset.spec.timestamp_key]
|
|
177
49
|
)
|
|
178
50
|
entity_df.sort_values(by=entity_timestamp_column, inplace=True)
|
|
179
|
-
featureset_df.sort_values(by=
|
|
51
|
+
featureset_df.sort_values(by=featureset.spec.timestamp_key, inplace=True)
|
|
180
52
|
|
|
181
53
|
merged_df = pd.merge_asof(
|
|
182
54
|
entity_df,
|
|
@@ -191,7 +63,6 @@ class LocalFeatureMerger(BaseMerger):
|
|
|
191
63
|
for col in merged_df.columns:
|
|
192
64
|
if re.findall(f"_{featureset.metadata.name}_$", col):
|
|
193
65
|
self._append_drop_column(col)
|
|
194
|
-
|
|
195
66
|
# Undo indexing tricks for asof merge
|
|
196
67
|
# to return the correct indexes and not
|
|
197
68
|
# overload `index` columns
|
|
@@ -213,7 +84,6 @@ class LocalFeatureMerger(BaseMerger):
|
|
|
213
84
|
featureset_df,
|
|
214
85
|
left_keys: list,
|
|
215
86
|
right_keys: list,
|
|
216
|
-
columns: list,
|
|
217
87
|
):
|
|
218
88
|
fs_name = featureset.metadata.name
|
|
219
89
|
merged_df = pd.merge(
|
|
@@ -228,3 +98,40 @@ class LocalFeatureMerger(BaseMerger):
|
|
|
228
98
|
if re.findall(f"_{fs_name}_$", col):
|
|
229
99
|
self._append_drop_column(col)
|
|
230
100
|
return merged_df
|
|
101
|
+
|
|
102
|
+
def _create_engine_env(self):
|
|
103
|
+
pass
|
|
104
|
+
|
|
105
|
+
def _get_engine_df(
|
|
106
|
+
self,
|
|
107
|
+
feature_set,
|
|
108
|
+
feature_set_name,
|
|
109
|
+
column_names=None,
|
|
110
|
+
start_time=None,
|
|
111
|
+
end_time=None,
|
|
112
|
+
time_column=None,
|
|
113
|
+
):
|
|
114
|
+
df = feature_set.to_dataframe(
|
|
115
|
+
columns=column_names,
|
|
116
|
+
start_time=start_time,
|
|
117
|
+
end_time=end_time,
|
|
118
|
+
time_column=time_column,
|
|
119
|
+
)
|
|
120
|
+
if df.index.names[0]:
|
|
121
|
+
df.reset_index(inplace=True)
|
|
122
|
+
return df
|
|
123
|
+
|
|
124
|
+
def _rename_columns_and_select(self, df, rename_col_dict, columns=None):
|
|
125
|
+
df.rename(
|
|
126
|
+
columns=rename_col_dict,
|
|
127
|
+
inplace=True,
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
def _drop_columns_from_result(self):
|
|
131
|
+
self._result_df.drop(columns=self._drop_columns, inplace=True, errors="ignore")
|
|
132
|
+
|
|
133
|
+
def _filter(self, query):
|
|
134
|
+
self._result_df.query(query, inplace=True)
|
|
135
|
+
|
|
136
|
+
def _order_by(self, order_by_active):
|
|
137
|
+
self._result_df.sort_values(by=order_by_active, ignore_index=True, inplace=True)
|