mlrun 1.3.3__py3-none-any.whl → 1.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +3 -3
- mlrun/__main__.py +79 -37
- mlrun/api/__init__.py +1 -1
- mlrun/api/api/__init__.py +1 -1
- mlrun/api/api/api.py +4 -4
- mlrun/api/api/deps.py +10 -21
- mlrun/api/api/endpoints/__init__.py +1 -1
- mlrun/api/api/endpoints/artifacts.py +64 -36
- mlrun/api/api/endpoints/auth.py +4 -4
- mlrun/api/api/endpoints/background_tasks.py +11 -11
- mlrun/api/api/endpoints/client_spec.py +5 -5
- mlrun/api/api/endpoints/clusterization_spec.py +6 -4
- mlrun/api/api/endpoints/feature_store.py +124 -115
- mlrun/api/api/endpoints/files.py +22 -14
- mlrun/api/api/endpoints/frontend_spec.py +28 -21
- mlrun/api/api/endpoints/functions.py +142 -87
- mlrun/api/api/endpoints/grafana_proxy.py +89 -442
- mlrun/api/api/endpoints/healthz.py +20 -7
- mlrun/api/api/endpoints/hub.py +320 -0
- mlrun/api/api/endpoints/internal/__init__.py +1 -1
- mlrun/api/api/endpoints/internal/config.py +1 -1
- mlrun/api/api/endpoints/internal/memory_reports.py +9 -9
- mlrun/api/api/endpoints/logs.py +11 -11
- mlrun/api/api/endpoints/model_endpoints.py +74 -70
- mlrun/api/api/endpoints/operations.py +13 -9
- mlrun/api/api/endpoints/pipelines.py +93 -88
- mlrun/api/api/endpoints/projects.py +35 -35
- mlrun/api/api/endpoints/runs.py +69 -27
- mlrun/api/api/endpoints/runtime_resources.py +28 -28
- mlrun/api/api/endpoints/schedules.py +98 -41
- mlrun/api/api/endpoints/secrets.py +37 -32
- mlrun/api/api/endpoints/submit.py +12 -12
- mlrun/api/api/endpoints/tags.py +20 -22
- mlrun/api/api/utils.py +251 -42
- mlrun/api/constants.py +1 -1
- mlrun/api/crud/__init__.py +18 -15
- mlrun/api/crud/artifacts.py +10 -10
- mlrun/api/crud/client_spec.py +4 -4
- mlrun/api/crud/clusterization_spec.py +3 -3
- mlrun/api/crud/feature_store.py +54 -46
- mlrun/api/crud/functions.py +3 -3
- mlrun/api/crud/hub.py +312 -0
- mlrun/api/crud/logs.py +11 -9
- mlrun/api/crud/model_monitoring/__init__.py +3 -3
- mlrun/api/crud/model_monitoring/grafana.py +435 -0
- mlrun/api/crud/model_monitoring/model_endpoints.py +352 -129
- mlrun/api/crud/notifications.py +149 -0
- mlrun/api/crud/pipelines.py +67 -52
- mlrun/api/crud/projects.py +51 -23
- mlrun/api/crud/runs.py +7 -5
- mlrun/api/crud/runtime_resources.py +13 -13
- mlrun/api/{db/filedb → crud/runtimes}/__init__.py +1 -1
- mlrun/api/crud/runtimes/nuclio/__init__.py +14 -0
- mlrun/api/crud/runtimes/nuclio/function.py +505 -0
- mlrun/api/crud/runtimes/nuclio/helpers.py +310 -0
- mlrun/api/crud/secrets.py +88 -46
- mlrun/api/crud/tags.py +5 -5
- mlrun/api/db/__init__.py +1 -1
- mlrun/api/db/base.py +102 -54
- mlrun/api/db/init_db.py +2 -3
- mlrun/api/db/session.py +4 -12
- mlrun/api/db/sqldb/__init__.py +1 -1
- mlrun/api/db/sqldb/db.py +439 -196
- mlrun/api/db/sqldb/helpers.py +1 -1
- mlrun/api/db/sqldb/models/__init__.py +3 -3
- mlrun/api/db/sqldb/models/models_mysql.py +82 -64
- mlrun/api/db/sqldb/models/models_sqlite.py +76 -64
- mlrun/api/db/sqldb/session.py +27 -20
- mlrun/api/initial_data.py +82 -24
- mlrun/api/launcher.py +196 -0
- mlrun/api/main.py +91 -22
- mlrun/api/middlewares.py +6 -5
- mlrun/api/migrations_mysql/env.py +1 -1
- mlrun/api/migrations_mysql/versions/28383af526f3_market_place_to_hub.py +40 -0
- mlrun/api/migrations_mysql/versions/32bae1b0e29c_increase_timestamp_fields_precision.py +1 -1
- mlrun/api/migrations_mysql/versions/4903aef6a91d_tag_foreign_key_and_cascades.py +1 -1
- mlrun/api/migrations_mysql/versions/5f1351c88a19_adding_background_tasks_table.py +1 -1
- mlrun/api/migrations_mysql/versions/88e656800d6a_add_requested_logs_column_and_index_to_.py +1 -1
- mlrun/api/migrations_mysql/versions/9d16de5f03a7_adding_data_versions_table.py +1 -1
- mlrun/api/migrations_mysql/versions/b86f5b53f3d7_adding_name_and_updated_to_runs_table.py +1 -1
- mlrun/api/migrations_mysql/versions/c4af40b0bf61_init.py +1 -1
- mlrun/api/migrations_mysql/versions/c905d15bd91d_notifications.py +72 -0
- mlrun/api/migrations_mysql/versions/ee041e8fdaa0_adding_next_run_time_column_to_schedule_.py +1 -1
- mlrun/api/migrations_sqlite/env.py +1 -1
- mlrun/api/migrations_sqlite/versions/11f8dd2dc9fe_init.py +1 -1
- mlrun/api/migrations_sqlite/versions/1c954f8cb32d_schedule_last_run_uri.py +1 -1
- mlrun/api/migrations_sqlite/versions/2b6d23c715aa_adding_feature_sets.py +1 -1
- mlrun/api/migrations_sqlite/versions/4acd9430b093_market_place_to_hub.py +77 -0
- mlrun/api/migrations_sqlite/versions/6401142f2d7c_adding_next_run_time_column_to_schedule_.py +1 -1
- mlrun/api/migrations_sqlite/versions/64d90a1a69bc_adding_background_tasks_table.py +1 -1
- mlrun/api/migrations_sqlite/versions/803438ecd005_add_requested_logs_column_to_runs.py +1 -1
- mlrun/api/migrations_sqlite/versions/863114f0c659_refactoring_feature_set.py +1 -1
- mlrun/api/migrations_sqlite/versions/959ae00528ad_notifications.py +63 -0
- mlrun/api/migrations_sqlite/versions/accf9fc83d38_adding_data_versions_table.py +1 -1
- mlrun/api/migrations_sqlite/versions/b68e8e897a28_schedule_labels.py +1 -1
- mlrun/api/migrations_sqlite/versions/bcd0c1f9720c_adding_project_labels.py +1 -1
- mlrun/api/migrations_sqlite/versions/cf21882f938e_schedule_id.py +1 -1
- mlrun/api/migrations_sqlite/versions/d781f58f607f_tag_object_name_string.py +1 -1
- mlrun/api/migrations_sqlite/versions/deac06871ace_adding_marketplace_sources_table.py +1 -1
- mlrun/api/migrations_sqlite/versions/e1dd5983c06b_schedule_concurrency_limit.py +1 -1
- mlrun/api/migrations_sqlite/versions/e5594ed3ab53_adding_name_and_updated_to_runs_table.py +1 -1
- mlrun/api/migrations_sqlite/versions/f4249b4ba6fa_adding_feature_vectors.py +1 -1
- mlrun/api/migrations_sqlite/versions/f7b5a1a03629_adding_feature_labels.py +1 -1
- mlrun/api/schemas/__init__.py +216 -138
- mlrun/api/utils/__init__.py +1 -1
- mlrun/api/utils/asyncio.py +1 -1
- mlrun/api/utils/auth/__init__.py +1 -1
- mlrun/api/utils/auth/providers/__init__.py +1 -1
- mlrun/api/utils/auth/providers/base.py +7 -7
- mlrun/api/utils/auth/providers/nop.py +6 -7
- mlrun/api/utils/auth/providers/opa.py +17 -17
- mlrun/api/utils/auth/verifier.py +36 -34
- mlrun/api/utils/background_tasks.py +24 -24
- mlrun/{builder.py → api/utils/builder.py} +216 -123
- mlrun/api/utils/clients/__init__.py +1 -1
- mlrun/api/utils/clients/chief.py +19 -4
- mlrun/api/utils/clients/iguazio.py +106 -60
- mlrun/api/utils/clients/log_collector.py +1 -1
- mlrun/api/utils/clients/nuclio.py +23 -23
- mlrun/api/utils/clients/protocols/grpc.py +2 -2
- mlrun/api/utils/db/__init__.py +1 -1
- mlrun/api/utils/db/alembic.py +1 -1
- mlrun/api/utils/db/backup.py +1 -1
- mlrun/api/utils/db/mysql.py +24 -25
- mlrun/api/utils/db/sql_collation.py +1 -1
- mlrun/api/utils/db/sqlite_migration.py +2 -2
- mlrun/api/utils/events/__init__.py +14 -0
- mlrun/api/utils/events/base.py +57 -0
- mlrun/api/utils/events/events_factory.py +41 -0
- mlrun/api/utils/events/iguazio.py +217 -0
- mlrun/api/utils/events/nop.py +55 -0
- mlrun/api/utils/helpers.py +16 -13
- mlrun/api/utils/memory_reports.py +1 -1
- mlrun/api/utils/periodic.py +6 -3
- mlrun/api/utils/projects/__init__.py +1 -1
- mlrun/api/utils/projects/follower.py +33 -33
- mlrun/api/utils/projects/leader.py +36 -34
- mlrun/api/utils/projects/member.py +27 -27
- mlrun/api/utils/projects/remotes/__init__.py +1 -1
- mlrun/api/utils/projects/remotes/follower.py +13 -13
- mlrun/api/utils/projects/remotes/leader.py +10 -10
- mlrun/api/utils/projects/remotes/nop_follower.py +27 -21
- mlrun/api/utils/projects/remotes/nop_leader.py +17 -16
- mlrun/api/utils/scheduler.py +140 -51
- mlrun/api/utils/singletons/__init__.py +1 -1
- mlrun/api/utils/singletons/db.py +9 -15
- mlrun/api/utils/singletons/k8s.py +677 -5
- mlrun/api/utils/singletons/logs_dir.py +1 -1
- mlrun/api/utils/singletons/project_member.py +1 -1
- mlrun/api/utils/singletons/scheduler.py +1 -1
- mlrun/artifacts/__init__.py +2 -2
- mlrun/artifacts/base.py +8 -2
- mlrun/artifacts/dataset.py +5 -3
- mlrun/artifacts/manager.py +7 -1
- mlrun/artifacts/model.py +15 -4
- mlrun/artifacts/plots.py +1 -1
- mlrun/common/__init__.py +1 -1
- mlrun/common/constants.py +15 -0
- mlrun/common/model_monitoring.py +209 -0
- mlrun/common/schemas/__init__.py +167 -0
- mlrun/{api → common}/schemas/artifact.py +13 -14
- mlrun/{api → common}/schemas/auth.py +10 -8
- mlrun/{api → common}/schemas/background_task.py +3 -3
- mlrun/{api → common}/schemas/client_spec.py +1 -1
- mlrun/{api → common}/schemas/clusterization_spec.py +3 -3
- mlrun/{api → common}/schemas/constants.py +21 -8
- mlrun/common/schemas/events.py +36 -0
- mlrun/{api → common}/schemas/feature_store.py +2 -1
- mlrun/{api → common}/schemas/frontend_spec.py +7 -6
- mlrun/{api → common}/schemas/function.py +5 -5
- mlrun/{api → common}/schemas/http.py +3 -3
- mlrun/common/schemas/hub.py +134 -0
- mlrun/{api → common}/schemas/k8s.py +3 -3
- mlrun/{api → common}/schemas/memory_reports.py +1 -1
- mlrun/common/schemas/model_endpoints.py +342 -0
- mlrun/common/schemas/notification.py +57 -0
- mlrun/{api → common}/schemas/object.py +6 -6
- mlrun/{api → common}/schemas/pipeline.py +3 -3
- mlrun/{api → common}/schemas/project.py +6 -5
- mlrun/common/schemas/regex.py +24 -0
- mlrun/common/schemas/runs.py +30 -0
- mlrun/{api → common}/schemas/runtime_resource.py +3 -3
- mlrun/{api → common}/schemas/schedule.py +19 -7
- mlrun/{api → common}/schemas/secret.py +3 -3
- mlrun/{api → common}/schemas/tag.py +2 -2
- mlrun/common/types.py +25 -0
- mlrun/config.py +152 -20
- mlrun/data_types/__init__.py +7 -2
- mlrun/data_types/data_types.py +4 -2
- mlrun/data_types/infer.py +1 -1
- mlrun/data_types/spark.py +10 -3
- mlrun/datastore/__init__.py +10 -3
- mlrun/datastore/azure_blob.py +1 -1
- mlrun/datastore/base.py +185 -53
- mlrun/datastore/datastore.py +1 -1
- mlrun/datastore/filestore.py +1 -1
- mlrun/datastore/google_cloud_storage.py +1 -1
- mlrun/datastore/inmem.py +4 -1
- mlrun/datastore/redis.py +1 -1
- mlrun/datastore/s3.py +1 -1
- mlrun/datastore/sources.py +192 -70
- mlrun/datastore/spark_udf.py +44 -0
- mlrun/datastore/store_resources.py +4 -4
- mlrun/datastore/targets.py +115 -45
- mlrun/datastore/utils.py +127 -5
- mlrun/datastore/v3io.py +1 -1
- mlrun/datastore/wasbfs/__init__.py +1 -1
- mlrun/datastore/wasbfs/fs.py +1 -1
- mlrun/db/__init__.py +7 -5
- mlrun/db/base.py +112 -68
- mlrun/db/httpdb.py +445 -277
- mlrun/db/nopdb.py +491 -0
- mlrun/db/sqldb.py +112 -65
- mlrun/errors.py +6 -1
- mlrun/execution.py +44 -22
- mlrun/feature_store/__init__.py +1 -1
- mlrun/feature_store/api.py +143 -95
- mlrun/feature_store/common.py +16 -20
- mlrun/feature_store/feature_set.py +42 -12
- mlrun/feature_store/feature_vector.py +32 -21
- mlrun/feature_store/ingestion.py +9 -12
- mlrun/feature_store/retrieval/__init__.py +3 -2
- mlrun/feature_store/retrieval/base.py +388 -66
- mlrun/feature_store/retrieval/dask_merger.py +63 -151
- mlrun/feature_store/retrieval/job.py +30 -12
- mlrun/feature_store/retrieval/local_merger.py +40 -133
- mlrun/feature_store/retrieval/spark_merger.py +129 -127
- mlrun/feature_store/retrieval/storey_merger.py +173 -0
- mlrun/feature_store/steps.py +132 -15
- mlrun/features.py +8 -3
- mlrun/frameworks/__init__.py +1 -1
- mlrun/frameworks/_common/__init__.py +1 -1
- mlrun/frameworks/_common/artifacts_library.py +1 -1
- mlrun/frameworks/_common/mlrun_interface.py +1 -1
- mlrun/frameworks/_common/model_handler.py +1 -1
- mlrun/frameworks/_common/plan.py +1 -1
- mlrun/frameworks/_common/producer.py +1 -1
- mlrun/frameworks/_common/utils.py +1 -1
- mlrun/frameworks/_dl_common/__init__.py +1 -1
- mlrun/frameworks/_dl_common/loggers/__init__.py +1 -1
- mlrun/frameworks/_dl_common/loggers/logger.py +1 -1
- mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +1 -1
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +1 -1
- mlrun/frameworks/_dl_common/model_handler.py +1 -1
- mlrun/frameworks/_dl_common/utils.py +1 -1
- mlrun/frameworks/_ml_common/__init__.py +1 -1
- mlrun/frameworks/_ml_common/artifacts_library.py +1 -1
- mlrun/frameworks/_ml_common/loggers/__init__.py +1 -1
- mlrun/frameworks/_ml_common/loggers/logger.py +1 -1
- mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +1 -1
- mlrun/frameworks/_ml_common/model_handler.py +1 -1
- mlrun/frameworks/_ml_common/pkl_model_server.py +13 -1
- mlrun/frameworks/_ml_common/plan.py +1 -1
- mlrun/frameworks/_ml_common/plans/__init__.py +1 -1
- mlrun/frameworks/_ml_common/plans/calibration_curve_plan.py +1 -6
- mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +1 -1
- mlrun/frameworks/_ml_common/plans/dataset_plan.py +1 -1
- mlrun/frameworks/_ml_common/plans/feature_importance_plan.py +1 -1
- mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +1 -1
- mlrun/frameworks/_ml_common/producer.py +1 -1
- mlrun/frameworks/_ml_common/utils.py +1 -1
- mlrun/frameworks/auto_mlrun/__init__.py +1 -1
- mlrun/frameworks/auto_mlrun/auto_mlrun.py +1 -1
- mlrun/frameworks/huggingface/__init__.py +1 -1
- mlrun/frameworks/huggingface/model_server.py +1 -1
- mlrun/frameworks/lgbm/__init__.py +1 -1
- mlrun/frameworks/lgbm/callbacks/__init__.py +1 -1
- mlrun/frameworks/lgbm/callbacks/callback.py +1 -1
- mlrun/frameworks/lgbm/callbacks/logging_callback.py +1 -1
- mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +1 -1
- mlrun/frameworks/lgbm/mlrun_interfaces/__init__.py +1 -1
- mlrun/frameworks/lgbm/mlrun_interfaces/booster_mlrun_interface.py +1 -1
- mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +1 -1
- mlrun/frameworks/lgbm/mlrun_interfaces/model_mlrun_interface.py +1 -1
- mlrun/frameworks/lgbm/model_handler.py +1 -1
- mlrun/frameworks/lgbm/model_server.py +1 -1
- mlrun/frameworks/lgbm/utils.py +1 -1
- mlrun/frameworks/onnx/__init__.py +1 -1
- mlrun/frameworks/onnx/dataset.py +1 -1
- mlrun/frameworks/onnx/mlrun_interface.py +1 -1
- mlrun/frameworks/onnx/model_handler.py +1 -1
- mlrun/frameworks/onnx/model_server.py +1 -1
- mlrun/frameworks/parallel_coordinates.py +1 -1
- mlrun/frameworks/pytorch/__init__.py +1 -1
- mlrun/frameworks/pytorch/callbacks/__init__.py +1 -1
- mlrun/frameworks/pytorch/callbacks/callback.py +1 -1
- mlrun/frameworks/pytorch/callbacks/logging_callback.py +1 -1
- mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +1 -1
- mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +1 -1
- mlrun/frameworks/pytorch/callbacks_handler.py +1 -1
- mlrun/frameworks/pytorch/mlrun_interface.py +1 -1
- mlrun/frameworks/pytorch/model_handler.py +1 -1
- mlrun/frameworks/pytorch/model_server.py +1 -1
- mlrun/frameworks/pytorch/utils.py +1 -1
- mlrun/frameworks/sklearn/__init__.py +1 -1
- mlrun/frameworks/sklearn/estimator.py +1 -1
- mlrun/frameworks/sklearn/metric.py +1 -1
- mlrun/frameworks/sklearn/metrics_library.py +1 -1
- mlrun/frameworks/sklearn/mlrun_interface.py +1 -1
- mlrun/frameworks/sklearn/model_handler.py +1 -1
- mlrun/frameworks/sklearn/utils.py +1 -1
- mlrun/frameworks/tf_keras/__init__.py +1 -1
- mlrun/frameworks/tf_keras/callbacks/__init__.py +1 -1
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +1 -1
- mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +1 -1
- mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +1 -1
- mlrun/frameworks/tf_keras/mlrun_interface.py +1 -1
- mlrun/frameworks/tf_keras/model_handler.py +1 -1
- mlrun/frameworks/tf_keras/model_server.py +1 -1
- mlrun/frameworks/tf_keras/utils.py +1 -1
- mlrun/frameworks/xgboost/__init__.py +1 -1
- mlrun/frameworks/xgboost/mlrun_interface.py +1 -1
- mlrun/frameworks/xgboost/model_handler.py +1 -1
- mlrun/frameworks/xgboost/utils.py +1 -1
- mlrun/k8s_utils.py +14 -765
- mlrun/kfpops.py +14 -17
- mlrun/launcher/__init__.py +13 -0
- mlrun/launcher/base.py +406 -0
- mlrun/launcher/client.py +159 -0
- mlrun/launcher/factory.py +50 -0
- mlrun/launcher/local.py +276 -0
- mlrun/launcher/remote.py +178 -0
- mlrun/lists.py +10 -2
- mlrun/mlutils/__init__.py +1 -1
- mlrun/mlutils/data.py +1 -1
- mlrun/mlutils/models.py +1 -1
- mlrun/mlutils/plots.py +1 -1
- mlrun/model.py +252 -14
- mlrun/model_monitoring/__init__.py +41 -0
- mlrun/model_monitoring/features_drift_table.py +1 -1
- mlrun/model_monitoring/helpers.py +123 -38
- mlrun/model_monitoring/model_endpoint.py +144 -0
- mlrun/model_monitoring/model_monitoring_batch.py +310 -259
- mlrun/model_monitoring/stores/__init__.py +106 -0
- mlrun/model_monitoring/stores/kv_model_endpoint_store.py +448 -0
- mlrun/model_monitoring/stores/model_endpoint_store.py +147 -0
- mlrun/model_monitoring/stores/models/__init__.py +23 -0
- mlrun/model_monitoring/stores/models/base.py +18 -0
- mlrun/model_monitoring/stores/models/mysql.py +100 -0
- mlrun/model_monitoring/stores/models/sqlite.py +98 -0
- mlrun/model_monitoring/stores/sql_model_endpoint_store.py +370 -0
- mlrun/model_monitoring/stream_processing_fs.py +239 -271
- mlrun/package/__init__.py +163 -0
- mlrun/package/context_handler.py +325 -0
- mlrun/package/errors.py +47 -0
- mlrun/package/packager.py +298 -0
- mlrun/{runtimes/package → package/packagers}/__init__.py +3 -1
- mlrun/package/packagers/default_packager.py +422 -0
- mlrun/package/packagers/numpy_packagers.py +612 -0
- mlrun/package/packagers/pandas_packagers.py +968 -0
- mlrun/package/packagers/python_standard_library_packagers.py +616 -0
- mlrun/package/packagers_manager.py +786 -0
- mlrun/package/utils/__init__.py +53 -0
- mlrun/package/utils/_archiver.py +226 -0
- mlrun/package/utils/_formatter.py +211 -0
- mlrun/package/utils/_pickler.py +234 -0
- mlrun/package/utils/_supported_format.py +71 -0
- mlrun/package/utils/log_hint_utils.py +93 -0
- mlrun/package/utils/type_hint_utils.py +298 -0
- mlrun/platforms/__init__.py +1 -1
- mlrun/platforms/iguazio.py +34 -2
- mlrun/platforms/other.py +1 -1
- mlrun/projects/__init__.py +1 -1
- mlrun/projects/operations.py +14 -9
- mlrun/projects/pipelines.py +31 -13
- mlrun/projects/project.py +762 -238
- mlrun/render.py +49 -19
- mlrun/run.py +57 -326
- mlrun/runtimes/__init__.py +3 -9
- mlrun/runtimes/base.py +247 -784
- mlrun/runtimes/constants.py +1 -1
- mlrun/runtimes/daskjob.py +45 -41
- mlrun/runtimes/funcdoc.py +43 -7
- mlrun/runtimes/function.py +66 -656
- mlrun/runtimes/function_reference.py +1 -1
- mlrun/runtimes/generators.py +1 -1
- mlrun/runtimes/kubejob.py +99 -116
- mlrun/runtimes/local.py +59 -66
- mlrun/runtimes/mpijob/__init__.py +1 -1
- mlrun/runtimes/mpijob/abstract.py +13 -15
- mlrun/runtimes/mpijob/v1.py +3 -1
- mlrun/runtimes/mpijob/v1alpha1.py +1 -1
- mlrun/runtimes/nuclio.py +1 -1
- mlrun/runtimes/pod.py +51 -26
- mlrun/runtimes/remotesparkjob.py +3 -1
- mlrun/runtimes/serving.py +12 -4
- mlrun/runtimes/sparkjob/__init__.py +1 -2
- mlrun/runtimes/sparkjob/abstract.py +44 -31
- mlrun/runtimes/sparkjob/spark3job.py +11 -9
- mlrun/runtimes/utils.py +61 -42
- mlrun/secrets.py +16 -18
- mlrun/serving/__init__.py +3 -2
- mlrun/serving/merger.py +1 -1
- mlrun/serving/remote.py +1 -1
- mlrun/serving/routers.py +39 -42
- mlrun/serving/server.py +23 -13
- mlrun/serving/serving_wrapper.py +1 -1
- mlrun/serving/states.py +172 -39
- mlrun/serving/utils.py +1 -1
- mlrun/serving/v1_serving.py +1 -1
- mlrun/serving/v2_serving.py +29 -21
- mlrun/utils/__init__.py +1 -2
- mlrun/utils/async_http.py +8 -1
- mlrun/utils/azure_vault.py +1 -1
- mlrun/utils/clones.py +2 -2
- mlrun/utils/condition_evaluator.py +65 -0
- mlrun/utils/db.py +52 -0
- mlrun/utils/helpers.py +188 -13
- mlrun/utils/http.py +89 -54
- mlrun/utils/logger.py +48 -8
- mlrun/utils/model_monitoring.py +132 -100
- mlrun/utils/notifications/__init__.py +1 -1
- mlrun/utils/notifications/notification/__init__.py +8 -6
- mlrun/utils/notifications/notification/base.py +20 -14
- mlrun/utils/notifications/notification/console.py +7 -4
- mlrun/utils/notifications/notification/git.py +36 -19
- mlrun/utils/notifications/notification/ipython.py +10 -8
- mlrun/utils/notifications/notification/slack.py +18 -13
- mlrun/utils/notifications/notification_pusher.py +377 -56
- mlrun/utils/regex.py +6 -1
- mlrun/utils/singleton.py +1 -1
- mlrun/utils/v3io_clients.py +1 -1
- mlrun/utils/vault.py +270 -269
- mlrun/utils/version/__init__.py +1 -1
- mlrun/utils/version/version.json +2 -2
- mlrun/utils/version/version.py +1 -1
- {mlrun-1.3.3.dist-info → mlrun-1.4.0.dist-info}/METADATA +16 -10
- mlrun-1.4.0.dist-info/RECORD +434 -0
- mlrun/api/api/endpoints/marketplace.py +0 -257
- mlrun/api/crud/marketplace.py +0 -221
- mlrun/api/crud/model_monitoring/model_endpoint_store.py +0 -847
- mlrun/api/db/filedb/db.py +0 -518
- mlrun/api/schemas/marketplace.py +0 -128
- mlrun/api/schemas/model_endpoints.py +0 -185
- mlrun/db/filedb.py +0 -891
- mlrun/feature_store/retrieval/online.py +0 -92
- mlrun/model_monitoring/constants.py +0 -67
- mlrun/runtimes/package/context_handler.py +0 -711
- mlrun/runtimes/sparkjob/spark2job.py +0 -59
- mlrun-1.3.3.dist-info/RECORD +0 -381
- {mlrun-1.3.3.dist-info → mlrun-1.4.0.dist-info}/LICENSE +0 -0
- {mlrun-1.3.3.dist-info → mlrun-1.4.0.dist-info}/WHEEL +0 -0
- {mlrun-1.3.3.dist-info → mlrun-1.4.0.dist-info}/entry_points.txt +0 -0
- {mlrun-1.3.3.dist-info → mlrun-1.4.0.dist-info}/top_level.txt +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright
|
|
1
|
+
# Copyright 2023 Iguazio
|
|
2
2
|
#
|
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
4
|
# you may not use this file except in compliance with the License.
|
|
@@ -12,17 +12,18 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
#
|
|
15
|
+
|
|
15
16
|
import mlrun
|
|
16
17
|
from mlrun.datastore.targets import get_offline_target
|
|
17
18
|
|
|
18
19
|
from ...runtimes import RemoteSparkRuntime
|
|
19
20
|
from ...runtimes.sparkjob.abstract import AbstractSparkRuntime
|
|
20
|
-
from ..feature_vector import OfflineVectorResponse
|
|
21
21
|
from .base import BaseMerger
|
|
22
22
|
|
|
23
23
|
|
|
24
24
|
class SparkFeatureMerger(BaseMerger):
|
|
25
25
|
engine = "spark"
|
|
26
|
+
support_offline = True
|
|
26
27
|
|
|
27
28
|
def __init__(self, vector, **engine_args):
|
|
28
29
|
super().__init__(vector, **engine_args)
|
|
@@ -33,109 +34,6 @@ class SparkFeatureMerger(BaseMerger):
|
|
|
33
34
|
def to_spark_df(self, session, path):
|
|
34
35
|
return session.read.load(path)
|
|
35
36
|
|
|
36
|
-
def _generate_vector(
|
|
37
|
-
self,
|
|
38
|
-
entity_rows,
|
|
39
|
-
entity_timestamp_column,
|
|
40
|
-
feature_set_objects,
|
|
41
|
-
feature_set_fields,
|
|
42
|
-
start_time=None,
|
|
43
|
-
end_time=None,
|
|
44
|
-
query=None,
|
|
45
|
-
):
|
|
46
|
-
from pyspark.sql import SparkSession
|
|
47
|
-
from pyspark.sql.functions import col
|
|
48
|
-
|
|
49
|
-
if self.spark is None:
|
|
50
|
-
# create spark context
|
|
51
|
-
self.spark = SparkSession.builder.appName(
|
|
52
|
-
f"vector-merger-{self.vector.metadata.name}"
|
|
53
|
-
).getOrCreate()
|
|
54
|
-
|
|
55
|
-
feature_sets = []
|
|
56
|
-
dfs = []
|
|
57
|
-
|
|
58
|
-
for name, columns in feature_set_fields.items():
|
|
59
|
-
feature_set = feature_set_objects[name]
|
|
60
|
-
feature_sets.append(feature_set)
|
|
61
|
-
column_names = [name for name, alias in columns]
|
|
62
|
-
|
|
63
|
-
if feature_set.spec.passthrough:
|
|
64
|
-
if not feature_set.spec.source:
|
|
65
|
-
raise mlrun.errors.MLRunNotFoundError(
|
|
66
|
-
f"passthrough feature set {name} with no source"
|
|
67
|
-
)
|
|
68
|
-
source_kind = feature_set.spec.source.kind
|
|
69
|
-
source_path = feature_set.spec.source.path
|
|
70
|
-
else:
|
|
71
|
-
target = get_offline_target(feature_set)
|
|
72
|
-
if not target:
|
|
73
|
-
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
74
|
-
f"feature set {name} does not have offline targets"
|
|
75
|
-
)
|
|
76
|
-
source_kind = target.kind
|
|
77
|
-
source_path = target.get_target_path()
|
|
78
|
-
|
|
79
|
-
# handling case where there are multiple feature sets and user creates vector where
|
|
80
|
-
# entity_timestamp_column is from a specific feature set (can't be entity timestamp)
|
|
81
|
-
source_driver = mlrun.datastore.sources.source_kind_to_driver[source_kind]
|
|
82
|
-
if (
|
|
83
|
-
entity_timestamp_column in column_names
|
|
84
|
-
or feature_set.spec.timestamp_key == entity_timestamp_column
|
|
85
|
-
):
|
|
86
|
-
source = source_driver(
|
|
87
|
-
name=self.vector.metadata.name,
|
|
88
|
-
path=source_path,
|
|
89
|
-
time_field=entity_timestamp_column,
|
|
90
|
-
start_time=start_time,
|
|
91
|
-
end_time=end_time,
|
|
92
|
-
)
|
|
93
|
-
else:
|
|
94
|
-
source = source_driver(
|
|
95
|
-
name=self.vector.metadata.name,
|
|
96
|
-
path=source_path,
|
|
97
|
-
time_field=entity_timestamp_column,
|
|
98
|
-
)
|
|
99
|
-
|
|
100
|
-
# add the index/key to selected columns
|
|
101
|
-
timestamp_key = feature_set.spec.timestamp_key
|
|
102
|
-
|
|
103
|
-
df = source.to_spark_df(
|
|
104
|
-
self.spark, named_view=self.named_view, time_field=timestamp_key
|
|
105
|
-
)
|
|
106
|
-
|
|
107
|
-
if timestamp_key and timestamp_key not in column_names:
|
|
108
|
-
columns.append((timestamp_key, None))
|
|
109
|
-
for entity in feature_set.spec.entities.keys():
|
|
110
|
-
if entity not in column_names:
|
|
111
|
-
columns.append((entity, None))
|
|
112
|
-
|
|
113
|
-
# select requested columns and rename with alias where needed
|
|
114
|
-
df = df.select([col(name).alias(alias or name) for name, alias in columns])
|
|
115
|
-
dfs.append(df)
|
|
116
|
-
del df
|
|
117
|
-
|
|
118
|
-
# convert pandas entity_rows to spark DF if needed
|
|
119
|
-
if entity_rows is not None and not hasattr(entity_rows, "rdd"):
|
|
120
|
-
entity_rows = self.spark.createDataFrame(entity_rows)
|
|
121
|
-
|
|
122
|
-
# join the feature data frames
|
|
123
|
-
self.merge(entity_rows, entity_timestamp_column, feature_sets, dfs)
|
|
124
|
-
|
|
125
|
-
# filter joined data frame by the query param
|
|
126
|
-
if query:
|
|
127
|
-
self._result_df = self._result_df.filter(query)
|
|
128
|
-
|
|
129
|
-
self._result_df = self._result_df.drop(*self._drop_columns)
|
|
130
|
-
|
|
131
|
-
if self.vector.status.label_column:
|
|
132
|
-
self._result_df = self._result_df.dropna(
|
|
133
|
-
subset=[self.vector.status.label_column]
|
|
134
|
-
)
|
|
135
|
-
|
|
136
|
-
self._write_to_target()
|
|
137
|
-
return OfflineVectorResponse(self)
|
|
138
|
-
|
|
139
37
|
def _unpersist_df(self, df):
|
|
140
38
|
df.unpersist()
|
|
141
39
|
|
|
@@ -147,7 +45,6 @@ class SparkFeatureMerger(BaseMerger):
|
|
|
147
45
|
featureset_df,
|
|
148
46
|
left_keys: list,
|
|
149
47
|
right_keys: list,
|
|
150
|
-
columns: list,
|
|
151
48
|
):
|
|
152
49
|
|
|
153
50
|
"""Perform an as of join between entity and featureset.
|
|
@@ -170,30 +67,32 @@ class SparkFeatureMerger(BaseMerger):
|
|
|
170
67
|
from pyspark.sql.functions import col, monotonically_increasing_id, row_number
|
|
171
68
|
|
|
172
69
|
entity_with_id = entity_df.withColumn("_row_nr", monotonically_increasing_id())
|
|
173
|
-
|
|
174
|
-
|
|
70
|
+
rename_right_keys = {}
|
|
71
|
+
for key in right_keys + [featureset.spec.timestamp_key]:
|
|
72
|
+
if key in entity_df.columns:
|
|
73
|
+
rename_right_keys[key] = f"ft__{key}"
|
|
175
74
|
# get columns for projection
|
|
176
75
|
projection = [
|
|
177
|
-
col(col_name).alias(
|
|
178
|
-
f"ft__{col_name}"
|
|
179
|
-
if col_name in indexes + [entity_timestamp_column]
|
|
180
|
-
else col_name
|
|
181
|
-
)
|
|
76
|
+
col(col_name).alias(rename_right_keys.get(col_name, col_name))
|
|
182
77
|
for col_name in featureset_df.columns
|
|
183
78
|
]
|
|
184
79
|
|
|
185
80
|
aliased_featureset_df = featureset_df.select(projection)
|
|
81
|
+
right_timestamp = rename_right_keys.get(
|
|
82
|
+
featureset.spec.timestamp_key, featureset.spec.timestamp_key
|
|
83
|
+
)
|
|
186
84
|
|
|
187
85
|
# set join conditions
|
|
188
86
|
join_cond = (
|
|
189
87
|
entity_with_id[entity_timestamp_column]
|
|
190
|
-
>= aliased_featureset_df[
|
|
88
|
+
>= aliased_featureset_df[right_timestamp]
|
|
191
89
|
)
|
|
192
90
|
|
|
193
91
|
# join based on entities
|
|
194
|
-
for
|
|
92
|
+
for key_l, key_r in zip(left_keys, right_keys):
|
|
195
93
|
join_cond = join_cond & (
|
|
196
|
-
entity_with_id[
|
|
94
|
+
entity_with_id[key_l]
|
|
95
|
+
== aliased_featureset_df[rename_right_keys.get(key_r, key_r)]
|
|
197
96
|
)
|
|
198
97
|
|
|
199
98
|
conditional_join = entity_with_id.join(
|
|
@@ -201,20 +100,22 @@ class SparkFeatureMerger(BaseMerger):
|
|
|
201
100
|
)
|
|
202
101
|
|
|
203
102
|
window = Window.partitionBy("_row_nr").orderBy(
|
|
204
|
-
col(
|
|
103
|
+
col(right_timestamp).desc(),
|
|
205
104
|
)
|
|
206
105
|
filter_most_recent_feature_timestamp = conditional_join.withColumn(
|
|
207
106
|
"_rank", row_number().over(window)
|
|
208
107
|
).filter(col("_rank") == 1)
|
|
209
108
|
|
|
210
|
-
for key in
|
|
211
|
-
|
|
212
|
-
filter_most_recent_feature_timestamp
|
|
213
|
-
|
|
109
|
+
for key in right_keys + [featureset.spec.timestamp_key]:
|
|
110
|
+
if key in entity_df.columns + [entity_timestamp_column]:
|
|
111
|
+
filter_most_recent_feature_timestamp = (
|
|
112
|
+
filter_most_recent_feature_timestamp.drop(
|
|
113
|
+
aliased_featureset_df[f"ft__{key}"]
|
|
114
|
+
)
|
|
214
115
|
)
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
116
|
+
return filter_most_recent_feature_timestamp.drop("_row_nr", "_rank").orderBy(
|
|
117
|
+
col(entity_timestamp_column)
|
|
118
|
+
)
|
|
218
119
|
|
|
219
120
|
def _join(
|
|
220
121
|
self,
|
|
@@ -224,7 +125,6 @@ class SparkFeatureMerger(BaseMerger):
|
|
|
224
125
|
featureset_df,
|
|
225
126
|
left_keys: list,
|
|
226
127
|
right_keys: list,
|
|
227
|
-
columns: list,
|
|
228
128
|
):
|
|
229
129
|
|
|
230
130
|
"""
|
|
@@ -245,8 +145,19 @@ class SparkFeatureMerger(BaseMerger):
|
|
|
245
145
|
be prefixed with featureset_df name.
|
|
246
146
|
|
|
247
147
|
"""
|
|
248
|
-
|
|
249
|
-
|
|
148
|
+
if left_keys != right_keys:
|
|
149
|
+
join_cond = [
|
|
150
|
+
entity_df[key_l] == featureset_df[key_r]
|
|
151
|
+
for key_l, key_r in zip(left_keys, right_keys)
|
|
152
|
+
]
|
|
153
|
+
else:
|
|
154
|
+
join_cond = left_keys
|
|
155
|
+
|
|
156
|
+
merged_df = entity_df.join(
|
|
157
|
+
featureset_df,
|
|
158
|
+
join_cond,
|
|
159
|
+
how=self._join_type,
|
|
160
|
+
)
|
|
250
161
|
return merged_df
|
|
251
162
|
|
|
252
163
|
def get_df(self, to_pandas=True):
|
|
@@ -268,3 +179,94 @@ class SparkFeatureMerger(BaseMerger):
|
|
|
268
179
|
return RemoteSparkRuntime.default_image
|
|
269
180
|
else:
|
|
270
181
|
raise mlrun.errors.MLRunInvalidArgumentError(f"Unsupported kind '{kind}'")
|
|
182
|
+
|
|
183
|
+
def _create_engine_env(self):
|
|
184
|
+
from pyspark.sql import SparkSession
|
|
185
|
+
|
|
186
|
+
if self.spark is None:
|
|
187
|
+
# create spark context
|
|
188
|
+
self.spark = SparkSession.builder.appName(
|
|
189
|
+
f"vector-merger-{self.vector.metadata.name}"
|
|
190
|
+
).getOrCreate()
|
|
191
|
+
|
|
192
|
+
def _get_engine_df(
|
|
193
|
+
self,
|
|
194
|
+
feature_set,
|
|
195
|
+
feature_set_name,
|
|
196
|
+
column_names=None,
|
|
197
|
+
start_time=None,
|
|
198
|
+
end_time=None,
|
|
199
|
+
time_column=None,
|
|
200
|
+
):
|
|
201
|
+
source_kwargs = {}
|
|
202
|
+
if feature_set.spec.passthrough:
|
|
203
|
+
if not feature_set.spec.source:
|
|
204
|
+
raise mlrun.errors.MLRunNotFoundError(
|
|
205
|
+
f"passthrough feature set {feature_set_name} with no source"
|
|
206
|
+
)
|
|
207
|
+
source_kind = feature_set.spec.source.kind
|
|
208
|
+
source_path = feature_set.spec.source.path
|
|
209
|
+
source_kwargs.update(feature_set.spec.source.attributes)
|
|
210
|
+
else:
|
|
211
|
+
target = get_offline_target(feature_set)
|
|
212
|
+
if not target:
|
|
213
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
214
|
+
f"feature set {feature_set_name} does not have offline targets"
|
|
215
|
+
)
|
|
216
|
+
source_kind = target.kind
|
|
217
|
+
source_path = target.get_target_path()
|
|
218
|
+
|
|
219
|
+
# handling case where there are multiple feature sets and user creates vector where
|
|
220
|
+
# entity_timestamp_column is from a specific feature set (can't be entity timestamp)
|
|
221
|
+
source_driver = mlrun.datastore.sources.source_kind_to_driver[source_kind]
|
|
222
|
+
|
|
223
|
+
source = source_driver(
|
|
224
|
+
name=self.vector.metadata.name,
|
|
225
|
+
path=source_path,
|
|
226
|
+
time_field=time_column,
|
|
227
|
+
start_time=start_time,
|
|
228
|
+
end_time=end_time,
|
|
229
|
+
**source_kwargs,
|
|
230
|
+
)
|
|
231
|
+
|
|
232
|
+
columns = column_names + [ent.name for ent in feature_set.spec.entities]
|
|
233
|
+
if (
|
|
234
|
+
feature_set.spec.timestamp_key
|
|
235
|
+
and feature_set.spec.timestamp_key not in columns
|
|
236
|
+
):
|
|
237
|
+
columns.append(feature_set.spec.timestamp_key)
|
|
238
|
+
|
|
239
|
+
return source.to_spark_df(
|
|
240
|
+
self.spark,
|
|
241
|
+
named_view=self.named_view,
|
|
242
|
+
time_field=time_column,
|
|
243
|
+
columns=columns,
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
def _rename_columns_and_select(
|
|
247
|
+
self,
|
|
248
|
+
df,
|
|
249
|
+
rename_col_dict,
|
|
250
|
+
columns=None,
|
|
251
|
+
):
|
|
252
|
+
from pyspark.sql.functions import col
|
|
253
|
+
|
|
254
|
+
return df.select(
|
|
255
|
+
[
|
|
256
|
+
col(name).alias(rename_col_dict.get(name, name))
|
|
257
|
+
for name in columns or rename_col_dict.keys()
|
|
258
|
+
]
|
|
259
|
+
)
|
|
260
|
+
|
|
261
|
+
def _drop_columns_from_result(self):
|
|
262
|
+
self._result_df = self._result_df.drop(*self._drop_columns)
|
|
263
|
+
|
|
264
|
+
def _filter(self, query):
|
|
265
|
+
self._result_df = self._result_df.filter(query)
|
|
266
|
+
|
|
267
|
+
def _order_by(self, order_by_active):
|
|
268
|
+
from pyspark.sql.functions import col
|
|
269
|
+
|
|
270
|
+
self._result_df = self._result_df.orderBy(
|
|
271
|
+
*[col(col_name).asc_nulls_last() for col_name in order_by_active]
|
|
272
|
+
)
|
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
# Copyright 2018 Iguazio
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
import mlrun
|
|
15
|
+
from mlrun.datastore.store_resources import ResourceCache
|
|
16
|
+
from mlrun.datastore.targets import get_online_target
|
|
17
|
+
from mlrun.serving.server import create_graph_server
|
|
18
|
+
|
|
19
|
+
from ..feature_vector import OnlineVectorService
|
|
20
|
+
from .base import BaseMerger
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class StoreyFeatureMerger(BaseMerger):
|
|
24
|
+
engine = "storey"
|
|
25
|
+
support_online = True
|
|
26
|
+
|
|
27
|
+
def __init__(self, vector, **engine_args):
|
|
28
|
+
super().__init__(vector, **engine_args)
|
|
29
|
+
self.impute_policy = engine_args.get("impute_policy")
|
|
30
|
+
|
|
31
|
+
def _generate_online_feature_vector_graph(
|
|
32
|
+
self,
|
|
33
|
+
entity_keys,
|
|
34
|
+
feature_set_fields,
|
|
35
|
+
feature_set_objects,
|
|
36
|
+
fixed_window_type,
|
|
37
|
+
):
|
|
38
|
+
graph = self.vector.spec.graph.copy()
|
|
39
|
+
start_states, default_final_state, responders = graph.check_and_process_graph(
|
|
40
|
+
allow_empty=True
|
|
41
|
+
)
|
|
42
|
+
next = graph
|
|
43
|
+
|
|
44
|
+
fs_link_list = self._create_linked_relation_list(
|
|
45
|
+
feature_set_objects, feature_set_fields, entity_keys
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
all_columns = []
|
|
49
|
+
save_column = []
|
|
50
|
+
entity_keys = []
|
|
51
|
+
end_aliases = {}
|
|
52
|
+
for node in fs_link_list:
|
|
53
|
+
name = node.name
|
|
54
|
+
if name == self._entity_rows_node_name:
|
|
55
|
+
continue
|
|
56
|
+
featureset = feature_set_objects[name]
|
|
57
|
+
columns = feature_set_fields[name]
|
|
58
|
+
column_names = [name for name, alias in columns]
|
|
59
|
+
aliases = {name: alias for name, alias in columns if alias}
|
|
60
|
+
all_columns += [aliases.get(name, name) for name in column_names]
|
|
61
|
+
for col in node.data["save_cols"]:
|
|
62
|
+
if col not in column_names:
|
|
63
|
+
column_names.append(col)
|
|
64
|
+
else:
|
|
65
|
+
save_column.append(col)
|
|
66
|
+
|
|
67
|
+
entity_list = node.data["right_keys"] or list(
|
|
68
|
+
featureset.spec.entities.keys()
|
|
69
|
+
)
|
|
70
|
+
if not entity_keys:
|
|
71
|
+
# if entity_keys not provided by the user we will set it to be the entity of the first feature set
|
|
72
|
+
entity_keys = entity_list
|
|
73
|
+
end_aliases.update(
|
|
74
|
+
{
|
|
75
|
+
k: v
|
|
76
|
+
for k, v in zip(entity_list, node.data["left_keys"])
|
|
77
|
+
if k != v and v in save_column
|
|
78
|
+
}
|
|
79
|
+
)
|
|
80
|
+
mapping = {
|
|
81
|
+
k: v for k, v in zip(node.data["left_keys"], entity_list) if k != v
|
|
82
|
+
}
|
|
83
|
+
if mapping:
|
|
84
|
+
next = next.to(
|
|
85
|
+
"storey.Rename",
|
|
86
|
+
f"rename-{name}",
|
|
87
|
+
mapping=mapping,
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
next = next.to(
|
|
91
|
+
"storey.QueryByKey",
|
|
92
|
+
f"query-{name}",
|
|
93
|
+
features=column_names,
|
|
94
|
+
table=featureset.uri,
|
|
95
|
+
key_field=entity_list,
|
|
96
|
+
aliases=aliases,
|
|
97
|
+
fixed_window_type=fixed_window_type.to_qbk_fixed_window_type(),
|
|
98
|
+
)
|
|
99
|
+
if end_aliases:
|
|
100
|
+
# run if the user want to save a column that related to another entity
|
|
101
|
+
next = next.to(
|
|
102
|
+
"storey.Rename",
|
|
103
|
+
"rename-entity-to-features",
|
|
104
|
+
mapping=end_aliases,
|
|
105
|
+
)
|
|
106
|
+
for name in start_states:
|
|
107
|
+
next.set_next(name)
|
|
108
|
+
|
|
109
|
+
if not start_states: # graph was empty
|
|
110
|
+
next.respond()
|
|
111
|
+
elif not responders and default_final_state: # graph has clear state sequence
|
|
112
|
+
graph[default_final_state].respond()
|
|
113
|
+
elif not responders:
|
|
114
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
115
|
+
"the graph doesnt have an explicit final step to respond on"
|
|
116
|
+
)
|
|
117
|
+
return graph, all_columns, entity_keys
|
|
118
|
+
|
|
119
|
+
def init_online_vector_service(
|
|
120
|
+
self, entity_keys, fixed_window_type, update_stats=False
|
|
121
|
+
):
|
|
122
|
+
try:
|
|
123
|
+
from storey import SyncEmitSource
|
|
124
|
+
except ImportError as exc:
|
|
125
|
+
raise ImportError(f"storey not installed, use pip install storey, {exc}")
|
|
126
|
+
|
|
127
|
+
feature_set_objects, feature_set_fields = self.vector.parse_features(
|
|
128
|
+
offline=False, update_stats=update_stats
|
|
129
|
+
)
|
|
130
|
+
if not feature_set_fields:
|
|
131
|
+
raise mlrun.errors.MLRunRuntimeError(
|
|
132
|
+
f"No features found for feature vector '{self.vector.metadata.name}'"
|
|
133
|
+
)
|
|
134
|
+
(
|
|
135
|
+
graph,
|
|
136
|
+
requested_columns,
|
|
137
|
+
entity_keys,
|
|
138
|
+
) = self._generate_online_feature_vector_graph(
|
|
139
|
+
entity_keys,
|
|
140
|
+
feature_set_fields,
|
|
141
|
+
feature_set_objects,
|
|
142
|
+
fixed_window_type,
|
|
143
|
+
)
|
|
144
|
+
graph.set_flow_source(SyncEmitSource())
|
|
145
|
+
server = create_graph_server(graph=graph, parameters={})
|
|
146
|
+
|
|
147
|
+
cache = ResourceCache()
|
|
148
|
+
all_fs_entities = []
|
|
149
|
+
for featureset in feature_set_objects.values():
|
|
150
|
+
driver = get_online_target(featureset)
|
|
151
|
+
if not driver:
|
|
152
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
153
|
+
f"resource {featureset.uri} does not have an online data target"
|
|
154
|
+
)
|
|
155
|
+
cache.cache_table(featureset.uri, driver.get_table_object())
|
|
156
|
+
|
|
157
|
+
for key in featureset.spec.entities.keys():
|
|
158
|
+
if key not in all_fs_entities:
|
|
159
|
+
all_fs_entities.append(key)
|
|
160
|
+
server.init_states(context=None, namespace=None, resource_cache=cache)
|
|
161
|
+
server.init_object(None)
|
|
162
|
+
|
|
163
|
+
service = OnlineVectorService(
|
|
164
|
+
self.vector,
|
|
165
|
+
graph,
|
|
166
|
+
entity_keys,
|
|
167
|
+
all_fs_entities=all_fs_entities,
|
|
168
|
+
impute_policy=self.impute_policy,
|
|
169
|
+
requested_columns=requested_columns,
|
|
170
|
+
)
|
|
171
|
+
service.initialize()
|
|
172
|
+
|
|
173
|
+
return service
|