mlrun 1.3.3__py3-none-any.whl → 1.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +3 -3
- mlrun/__main__.py +79 -37
- mlrun/api/__init__.py +1 -1
- mlrun/api/api/__init__.py +1 -1
- mlrun/api/api/api.py +4 -4
- mlrun/api/api/deps.py +10 -21
- mlrun/api/api/endpoints/__init__.py +1 -1
- mlrun/api/api/endpoints/artifacts.py +64 -36
- mlrun/api/api/endpoints/auth.py +4 -4
- mlrun/api/api/endpoints/background_tasks.py +11 -11
- mlrun/api/api/endpoints/client_spec.py +5 -5
- mlrun/api/api/endpoints/clusterization_spec.py +6 -4
- mlrun/api/api/endpoints/feature_store.py +124 -115
- mlrun/api/api/endpoints/files.py +22 -14
- mlrun/api/api/endpoints/frontend_spec.py +28 -21
- mlrun/api/api/endpoints/functions.py +142 -87
- mlrun/api/api/endpoints/grafana_proxy.py +89 -442
- mlrun/api/api/endpoints/healthz.py +20 -7
- mlrun/api/api/endpoints/hub.py +320 -0
- mlrun/api/api/endpoints/internal/__init__.py +1 -1
- mlrun/api/api/endpoints/internal/config.py +1 -1
- mlrun/api/api/endpoints/internal/memory_reports.py +9 -9
- mlrun/api/api/endpoints/logs.py +11 -11
- mlrun/api/api/endpoints/model_endpoints.py +74 -70
- mlrun/api/api/endpoints/operations.py +13 -9
- mlrun/api/api/endpoints/pipelines.py +93 -88
- mlrun/api/api/endpoints/projects.py +35 -35
- mlrun/api/api/endpoints/runs.py +69 -27
- mlrun/api/api/endpoints/runtime_resources.py +28 -28
- mlrun/api/api/endpoints/schedules.py +98 -41
- mlrun/api/api/endpoints/secrets.py +37 -32
- mlrun/api/api/endpoints/submit.py +12 -12
- mlrun/api/api/endpoints/tags.py +20 -22
- mlrun/api/api/utils.py +251 -42
- mlrun/api/constants.py +1 -1
- mlrun/api/crud/__init__.py +18 -15
- mlrun/api/crud/artifacts.py +10 -10
- mlrun/api/crud/client_spec.py +4 -4
- mlrun/api/crud/clusterization_spec.py +3 -3
- mlrun/api/crud/feature_store.py +54 -46
- mlrun/api/crud/functions.py +3 -3
- mlrun/api/crud/hub.py +312 -0
- mlrun/api/crud/logs.py +11 -9
- mlrun/api/crud/model_monitoring/__init__.py +3 -3
- mlrun/api/crud/model_monitoring/grafana.py +435 -0
- mlrun/api/crud/model_monitoring/model_endpoints.py +352 -129
- mlrun/api/crud/notifications.py +149 -0
- mlrun/api/crud/pipelines.py +67 -52
- mlrun/api/crud/projects.py +51 -23
- mlrun/api/crud/runs.py +7 -5
- mlrun/api/crud/runtime_resources.py +13 -13
- mlrun/api/{db/filedb → crud/runtimes}/__init__.py +1 -1
- mlrun/api/crud/runtimes/nuclio/__init__.py +14 -0
- mlrun/api/crud/runtimes/nuclio/function.py +505 -0
- mlrun/api/crud/runtimes/nuclio/helpers.py +310 -0
- mlrun/api/crud/secrets.py +88 -46
- mlrun/api/crud/tags.py +5 -5
- mlrun/api/db/__init__.py +1 -1
- mlrun/api/db/base.py +102 -54
- mlrun/api/db/init_db.py +2 -3
- mlrun/api/db/session.py +4 -12
- mlrun/api/db/sqldb/__init__.py +1 -1
- mlrun/api/db/sqldb/db.py +439 -196
- mlrun/api/db/sqldb/helpers.py +1 -1
- mlrun/api/db/sqldb/models/__init__.py +3 -3
- mlrun/api/db/sqldb/models/models_mysql.py +82 -64
- mlrun/api/db/sqldb/models/models_sqlite.py +76 -64
- mlrun/api/db/sqldb/session.py +27 -20
- mlrun/api/initial_data.py +82 -24
- mlrun/api/launcher.py +196 -0
- mlrun/api/main.py +91 -22
- mlrun/api/middlewares.py +6 -5
- mlrun/api/migrations_mysql/env.py +1 -1
- mlrun/api/migrations_mysql/versions/28383af526f3_market_place_to_hub.py +40 -0
- mlrun/api/migrations_mysql/versions/32bae1b0e29c_increase_timestamp_fields_precision.py +1 -1
- mlrun/api/migrations_mysql/versions/4903aef6a91d_tag_foreign_key_and_cascades.py +1 -1
- mlrun/api/migrations_mysql/versions/5f1351c88a19_adding_background_tasks_table.py +1 -1
- mlrun/api/migrations_mysql/versions/88e656800d6a_add_requested_logs_column_and_index_to_.py +1 -1
- mlrun/api/migrations_mysql/versions/9d16de5f03a7_adding_data_versions_table.py +1 -1
- mlrun/api/migrations_mysql/versions/b86f5b53f3d7_adding_name_and_updated_to_runs_table.py +1 -1
- mlrun/api/migrations_mysql/versions/c4af40b0bf61_init.py +1 -1
- mlrun/api/migrations_mysql/versions/c905d15bd91d_notifications.py +72 -0
- mlrun/api/migrations_mysql/versions/ee041e8fdaa0_adding_next_run_time_column_to_schedule_.py +1 -1
- mlrun/api/migrations_sqlite/env.py +1 -1
- mlrun/api/migrations_sqlite/versions/11f8dd2dc9fe_init.py +1 -1
- mlrun/api/migrations_sqlite/versions/1c954f8cb32d_schedule_last_run_uri.py +1 -1
- mlrun/api/migrations_sqlite/versions/2b6d23c715aa_adding_feature_sets.py +1 -1
- mlrun/api/migrations_sqlite/versions/4acd9430b093_market_place_to_hub.py +77 -0
- mlrun/api/migrations_sqlite/versions/6401142f2d7c_adding_next_run_time_column_to_schedule_.py +1 -1
- mlrun/api/migrations_sqlite/versions/64d90a1a69bc_adding_background_tasks_table.py +1 -1
- mlrun/api/migrations_sqlite/versions/803438ecd005_add_requested_logs_column_to_runs.py +1 -1
- mlrun/api/migrations_sqlite/versions/863114f0c659_refactoring_feature_set.py +1 -1
- mlrun/api/migrations_sqlite/versions/959ae00528ad_notifications.py +63 -0
- mlrun/api/migrations_sqlite/versions/accf9fc83d38_adding_data_versions_table.py +1 -1
- mlrun/api/migrations_sqlite/versions/b68e8e897a28_schedule_labels.py +1 -1
- mlrun/api/migrations_sqlite/versions/bcd0c1f9720c_adding_project_labels.py +1 -1
- mlrun/api/migrations_sqlite/versions/cf21882f938e_schedule_id.py +1 -1
- mlrun/api/migrations_sqlite/versions/d781f58f607f_tag_object_name_string.py +1 -1
- mlrun/api/migrations_sqlite/versions/deac06871ace_adding_marketplace_sources_table.py +1 -1
- mlrun/api/migrations_sqlite/versions/e1dd5983c06b_schedule_concurrency_limit.py +1 -1
- mlrun/api/migrations_sqlite/versions/e5594ed3ab53_adding_name_and_updated_to_runs_table.py +1 -1
- mlrun/api/migrations_sqlite/versions/f4249b4ba6fa_adding_feature_vectors.py +1 -1
- mlrun/api/migrations_sqlite/versions/f7b5a1a03629_adding_feature_labels.py +1 -1
- mlrun/api/schemas/__init__.py +216 -138
- mlrun/api/utils/__init__.py +1 -1
- mlrun/api/utils/asyncio.py +1 -1
- mlrun/api/utils/auth/__init__.py +1 -1
- mlrun/api/utils/auth/providers/__init__.py +1 -1
- mlrun/api/utils/auth/providers/base.py +7 -7
- mlrun/api/utils/auth/providers/nop.py +6 -7
- mlrun/api/utils/auth/providers/opa.py +17 -17
- mlrun/api/utils/auth/verifier.py +36 -34
- mlrun/api/utils/background_tasks.py +24 -24
- mlrun/{builder.py → api/utils/builder.py} +216 -123
- mlrun/api/utils/clients/__init__.py +1 -1
- mlrun/api/utils/clients/chief.py +19 -4
- mlrun/api/utils/clients/iguazio.py +106 -60
- mlrun/api/utils/clients/log_collector.py +1 -1
- mlrun/api/utils/clients/nuclio.py +23 -23
- mlrun/api/utils/clients/protocols/grpc.py +2 -2
- mlrun/api/utils/db/__init__.py +1 -1
- mlrun/api/utils/db/alembic.py +1 -1
- mlrun/api/utils/db/backup.py +1 -1
- mlrun/api/utils/db/mysql.py +24 -25
- mlrun/api/utils/db/sql_collation.py +1 -1
- mlrun/api/utils/db/sqlite_migration.py +2 -2
- mlrun/api/utils/events/__init__.py +14 -0
- mlrun/api/utils/events/base.py +57 -0
- mlrun/api/utils/events/events_factory.py +41 -0
- mlrun/api/utils/events/iguazio.py +217 -0
- mlrun/api/utils/events/nop.py +55 -0
- mlrun/api/utils/helpers.py +16 -13
- mlrun/api/utils/memory_reports.py +1 -1
- mlrun/api/utils/periodic.py +6 -3
- mlrun/api/utils/projects/__init__.py +1 -1
- mlrun/api/utils/projects/follower.py +33 -33
- mlrun/api/utils/projects/leader.py +36 -34
- mlrun/api/utils/projects/member.py +27 -27
- mlrun/api/utils/projects/remotes/__init__.py +1 -1
- mlrun/api/utils/projects/remotes/follower.py +13 -13
- mlrun/api/utils/projects/remotes/leader.py +10 -10
- mlrun/api/utils/projects/remotes/nop_follower.py +27 -21
- mlrun/api/utils/projects/remotes/nop_leader.py +17 -16
- mlrun/api/utils/scheduler.py +140 -51
- mlrun/api/utils/singletons/__init__.py +1 -1
- mlrun/api/utils/singletons/db.py +9 -15
- mlrun/api/utils/singletons/k8s.py +677 -5
- mlrun/api/utils/singletons/logs_dir.py +1 -1
- mlrun/api/utils/singletons/project_member.py +1 -1
- mlrun/api/utils/singletons/scheduler.py +1 -1
- mlrun/artifacts/__init__.py +2 -2
- mlrun/artifacts/base.py +8 -2
- mlrun/artifacts/dataset.py +5 -3
- mlrun/artifacts/manager.py +7 -1
- mlrun/artifacts/model.py +15 -4
- mlrun/artifacts/plots.py +1 -1
- mlrun/common/__init__.py +1 -1
- mlrun/common/constants.py +15 -0
- mlrun/common/model_monitoring.py +209 -0
- mlrun/common/schemas/__init__.py +167 -0
- mlrun/{api → common}/schemas/artifact.py +13 -14
- mlrun/{api → common}/schemas/auth.py +10 -8
- mlrun/{api → common}/schemas/background_task.py +3 -3
- mlrun/{api → common}/schemas/client_spec.py +1 -1
- mlrun/{api → common}/schemas/clusterization_spec.py +3 -3
- mlrun/{api → common}/schemas/constants.py +21 -8
- mlrun/common/schemas/events.py +36 -0
- mlrun/{api → common}/schemas/feature_store.py +2 -1
- mlrun/{api → common}/schemas/frontend_spec.py +7 -6
- mlrun/{api → common}/schemas/function.py +5 -5
- mlrun/{api → common}/schemas/http.py +3 -3
- mlrun/common/schemas/hub.py +134 -0
- mlrun/{api → common}/schemas/k8s.py +3 -3
- mlrun/{api → common}/schemas/memory_reports.py +1 -1
- mlrun/common/schemas/model_endpoints.py +342 -0
- mlrun/common/schemas/notification.py +57 -0
- mlrun/{api → common}/schemas/object.py +6 -6
- mlrun/{api → common}/schemas/pipeline.py +3 -3
- mlrun/{api → common}/schemas/project.py +6 -5
- mlrun/common/schemas/regex.py +24 -0
- mlrun/common/schemas/runs.py +30 -0
- mlrun/{api → common}/schemas/runtime_resource.py +3 -3
- mlrun/{api → common}/schemas/schedule.py +19 -7
- mlrun/{api → common}/schemas/secret.py +3 -3
- mlrun/{api → common}/schemas/tag.py +2 -2
- mlrun/common/types.py +25 -0
- mlrun/config.py +152 -20
- mlrun/data_types/__init__.py +7 -2
- mlrun/data_types/data_types.py +4 -2
- mlrun/data_types/infer.py +1 -1
- mlrun/data_types/spark.py +10 -3
- mlrun/datastore/__init__.py +10 -3
- mlrun/datastore/azure_blob.py +1 -1
- mlrun/datastore/base.py +185 -53
- mlrun/datastore/datastore.py +1 -1
- mlrun/datastore/filestore.py +1 -1
- mlrun/datastore/google_cloud_storage.py +1 -1
- mlrun/datastore/inmem.py +4 -1
- mlrun/datastore/redis.py +1 -1
- mlrun/datastore/s3.py +1 -1
- mlrun/datastore/sources.py +192 -70
- mlrun/datastore/spark_udf.py +44 -0
- mlrun/datastore/store_resources.py +4 -4
- mlrun/datastore/targets.py +115 -45
- mlrun/datastore/utils.py +127 -5
- mlrun/datastore/v3io.py +1 -1
- mlrun/datastore/wasbfs/__init__.py +1 -1
- mlrun/datastore/wasbfs/fs.py +1 -1
- mlrun/db/__init__.py +7 -5
- mlrun/db/base.py +112 -68
- mlrun/db/httpdb.py +445 -277
- mlrun/db/nopdb.py +491 -0
- mlrun/db/sqldb.py +112 -65
- mlrun/errors.py +6 -1
- mlrun/execution.py +44 -22
- mlrun/feature_store/__init__.py +1 -1
- mlrun/feature_store/api.py +143 -95
- mlrun/feature_store/common.py +16 -20
- mlrun/feature_store/feature_set.py +42 -12
- mlrun/feature_store/feature_vector.py +32 -21
- mlrun/feature_store/ingestion.py +9 -12
- mlrun/feature_store/retrieval/__init__.py +3 -2
- mlrun/feature_store/retrieval/base.py +388 -66
- mlrun/feature_store/retrieval/dask_merger.py +63 -151
- mlrun/feature_store/retrieval/job.py +30 -12
- mlrun/feature_store/retrieval/local_merger.py +40 -133
- mlrun/feature_store/retrieval/spark_merger.py +129 -127
- mlrun/feature_store/retrieval/storey_merger.py +173 -0
- mlrun/feature_store/steps.py +132 -15
- mlrun/features.py +8 -3
- mlrun/frameworks/__init__.py +1 -1
- mlrun/frameworks/_common/__init__.py +1 -1
- mlrun/frameworks/_common/artifacts_library.py +1 -1
- mlrun/frameworks/_common/mlrun_interface.py +1 -1
- mlrun/frameworks/_common/model_handler.py +1 -1
- mlrun/frameworks/_common/plan.py +1 -1
- mlrun/frameworks/_common/producer.py +1 -1
- mlrun/frameworks/_common/utils.py +1 -1
- mlrun/frameworks/_dl_common/__init__.py +1 -1
- mlrun/frameworks/_dl_common/loggers/__init__.py +1 -1
- mlrun/frameworks/_dl_common/loggers/logger.py +1 -1
- mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +1 -1
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +1 -1
- mlrun/frameworks/_dl_common/model_handler.py +1 -1
- mlrun/frameworks/_dl_common/utils.py +1 -1
- mlrun/frameworks/_ml_common/__init__.py +1 -1
- mlrun/frameworks/_ml_common/artifacts_library.py +1 -1
- mlrun/frameworks/_ml_common/loggers/__init__.py +1 -1
- mlrun/frameworks/_ml_common/loggers/logger.py +1 -1
- mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +1 -1
- mlrun/frameworks/_ml_common/model_handler.py +1 -1
- mlrun/frameworks/_ml_common/pkl_model_server.py +13 -1
- mlrun/frameworks/_ml_common/plan.py +1 -1
- mlrun/frameworks/_ml_common/plans/__init__.py +1 -1
- mlrun/frameworks/_ml_common/plans/calibration_curve_plan.py +1 -6
- mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +1 -1
- mlrun/frameworks/_ml_common/plans/dataset_plan.py +1 -1
- mlrun/frameworks/_ml_common/plans/feature_importance_plan.py +1 -1
- mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +1 -1
- mlrun/frameworks/_ml_common/producer.py +1 -1
- mlrun/frameworks/_ml_common/utils.py +1 -1
- mlrun/frameworks/auto_mlrun/__init__.py +1 -1
- mlrun/frameworks/auto_mlrun/auto_mlrun.py +1 -1
- mlrun/frameworks/huggingface/__init__.py +1 -1
- mlrun/frameworks/huggingface/model_server.py +1 -1
- mlrun/frameworks/lgbm/__init__.py +1 -1
- mlrun/frameworks/lgbm/callbacks/__init__.py +1 -1
- mlrun/frameworks/lgbm/callbacks/callback.py +1 -1
- mlrun/frameworks/lgbm/callbacks/logging_callback.py +1 -1
- mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +1 -1
- mlrun/frameworks/lgbm/mlrun_interfaces/__init__.py +1 -1
- mlrun/frameworks/lgbm/mlrun_interfaces/booster_mlrun_interface.py +1 -1
- mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +1 -1
- mlrun/frameworks/lgbm/mlrun_interfaces/model_mlrun_interface.py +1 -1
- mlrun/frameworks/lgbm/model_handler.py +1 -1
- mlrun/frameworks/lgbm/model_server.py +1 -1
- mlrun/frameworks/lgbm/utils.py +1 -1
- mlrun/frameworks/onnx/__init__.py +1 -1
- mlrun/frameworks/onnx/dataset.py +1 -1
- mlrun/frameworks/onnx/mlrun_interface.py +1 -1
- mlrun/frameworks/onnx/model_handler.py +1 -1
- mlrun/frameworks/onnx/model_server.py +1 -1
- mlrun/frameworks/parallel_coordinates.py +1 -1
- mlrun/frameworks/pytorch/__init__.py +1 -1
- mlrun/frameworks/pytorch/callbacks/__init__.py +1 -1
- mlrun/frameworks/pytorch/callbacks/callback.py +1 -1
- mlrun/frameworks/pytorch/callbacks/logging_callback.py +1 -1
- mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +1 -1
- mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +1 -1
- mlrun/frameworks/pytorch/callbacks_handler.py +1 -1
- mlrun/frameworks/pytorch/mlrun_interface.py +1 -1
- mlrun/frameworks/pytorch/model_handler.py +1 -1
- mlrun/frameworks/pytorch/model_server.py +1 -1
- mlrun/frameworks/pytorch/utils.py +1 -1
- mlrun/frameworks/sklearn/__init__.py +1 -1
- mlrun/frameworks/sklearn/estimator.py +1 -1
- mlrun/frameworks/sklearn/metric.py +1 -1
- mlrun/frameworks/sklearn/metrics_library.py +1 -1
- mlrun/frameworks/sklearn/mlrun_interface.py +1 -1
- mlrun/frameworks/sklearn/model_handler.py +1 -1
- mlrun/frameworks/sklearn/utils.py +1 -1
- mlrun/frameworks/tf_keras/__init__.py +1 -1
- mlrun/frameworks/tf_keras/callbacks/__init__.py +1 -1
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +1 -1
- mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +1 -1
- mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +1 -1
- mlrun/frameworks/tf_keras/mlrun_interface.py +1 -1
- mlrun/frameworks/tf_keras/model_handler.py +1 -1
- mlrun/frameworks/tf_keras/model_server.py +1 -1
- mlrun/frameworks/tf_keras/utils.py +1 -1
- mlrun/frameworks/xgboost/__init__.py +1 -1
- mlrun/frameworks/xgboost/mlrun_interface.py +1 -1
- mlrun/frameworks/xgboost/model_handler.py +1 -1
- mlrun/frameworks/xgboost/utils.py +1 -1
- mlrun/k8s_utils.py +14 -765
- mlrun/kfpops.py +14 -17
- mlrun/launcher/__init__.py +13 -0
- mlrun/launcher/base.py +406 -0
- mlrun/launcher/client.py +159 -0
- mlrun/launcher/factory.py +50 -0
- mlrun/launcher/local.py +276 -0
- mlrun/launcher/remote.py +178 -0
- mlrun/lists.py +10 -2
- mlrun/mlutils/__init__.py +1 -1
- mlrun/mlutils/data.py +1 -1
- mlrun/mlutils/models.py +1 -1
- mlrun/mlutils/plots.py +1 -1
- mlrun/model.py +252 -14
- mlrun/model_monitoring/__init__.py +41 -0
- mlrun/model_monitoring/features_drift_table.py +1 -1
- mlrun/model_monitoring/helpers.py +123 -38
- mlrun/model_monitoring/model_endpoint.py +144 -0
- mlrun/model_monitoring/model_monitoring_batch.py +310 -259
- mlrun/model_monitoring/stores/__init__.py +106 -0
- mlrun/model_monitoring/stores/kv_model_endpoint_store.py +448 -0
- mlrun/model_monitoring/stores/model_endpoint_store.py +147 -0
- mlrun/model_monitoring/stores/models/__init__.py +23 -0
- mlrun/model_monitoring/stores/models/base.py +18 -0
- mlrun/model_monitoring/stores/models/mysql.py +100 -0
- mlrun/model_monitoring/stores/models/sqlite.py +98 -0
- mlrun/model_monitoring/stores/sql_model_endpoint_store.py +370 -0
- mlrun/model_monitoring/stream_processing_fs.py +239 -271
- mlrun/package/__init__.py +163 -0
- mlrun/package/context_handler.py +325 -0
- mlrun/package/errors.py +47 -0
- mlrun/package/packager.py +298 -0
- mlrun/{runtimes/package → package/packagers}/__init__.py +3 -1
- mlrun/package/packagers/default_packager.py +422 -0
- mlrun/package/packagers/numpy_packagers.py +612 -0
- mlrun/package/packagers/pandas_packagers.py +968 -0
- mlrun/package/packagers/python_standard_library_packagers.py +616 -0
- mlrun/package/packagers_manager.py +786 -0
- mlrun/package/utils/__init__.py +53 -0
- mlrun/package/utils/_archiver.py +226 -0
- mlrun/package/utils/_formatter.py +211 -0
- mlrun/package/utils/_pickler.py +234 -0
- mlrun/package/utils/_supported_format.py +71 -0
- mlrun/package/utils/log_hint_utils.py +93 -0
- mlrun/package/utils/type_hint_utils.py +298 -0
- mlrun/platforms/__init__.py +1 -1
- mlrun/platforms/iguazio.py +34 -2
- mlrun/platforms/other.py +1 -1
- mlrun/projects/__init__.py +1 -1
- mlrun/projects/operations.py +14 -9
- mlrun/projects/pipelines.py +31 -13
- mlrun/projects/project.py +762 -238
- mlrun/render.py +49 -19
- mlrun/run.py +57 -326
- mlrun/runtimes/__init__.py +3 -9
- mlrun/runtimes/base.py +247 -784
- mlrun/runtimes/constants.py +1 -1
- mlrun/runtimes/daskjob.py +45 -41
- mlrun/runtimes/funcdoc.py +43 -7
- mlrun/runtimes/function.py +66 -656
- mlrun/runtimes/function_reference.py +1 -1
- mlrun/runtimes/generators.py +1 -1
- mlrun/runtimes/kubejob.py +99 -116
- mlrun/runtimes/local.py +59 -66
- mlrun/runtimes/mpijob/__init__.py +1 -1
- mlrun/runtimes/mpijob/abstract.py +13 -15
- mlrun/runtimes/mpijob/v1.py +3 -1
- mlrun/runtimes/mpijob/v1alpha1.py +1 -1
- mlrun/runtimes/nuclio.py +1 -1
- mlrun/runtimes/pod.py +51 -26
- mlrun/runtimes/remotesparkjob.py +3 -1
- mlrun/runtimes/serving.py +12 -4
- mlrun/runtimes/sparkjob/__init__.py +1 -2
- mlrun/runtimes/sparkjob/abstract.py +44 -31
- mlrun/runtimes/sparkjob/spark3job.py +11 -9
- mlrun/runtimes/utils.py +61 -42
- mlrun/secrets.py +16 -18
- mlrun/serving/__init__.py +3 -2
- mlrun/serving/merger.py +1 -1
- mlrun/serving/remote.py +1 -1
- mlrun/serving/routers.py +39 -42
- mlrun/serving/server.py +23 -13
- mlrun/serving/serving_wrapper.py +1 -1
- mlrun/serving/states.py +172 -39
- mlrun/serving/utils.py +1 -1
- mlrun/serving/v1_serving.py +1 -1
- mlrun/serving/v2_serving.py +29 -21
- mlrun/utils/__init__.py +1 -2
- mlrun/utils/async_http.py +8 -1
- mlrun/utils/azure_vault.py +1 -1
- mlrun/utils/clones.py +2 -2
- mlrun/utils/condition_evaluator.py +65 -0
- mlrun/utils/db.py +52 -0
- mlrun/utils/helpers.py +188 -13
- mlrun/utils/http.py +89 -54
- mlrun/utils/logger.py +48 -8
- mlrun/utils/model_monitoring.py +132 -100
- mlrun/utils/notifications/__init__.py +1 -1
- mlrun/utils/notifications/notification/__init__.py +8 -6
- mlrun/utils/notifications/notification/base.py +20 -14
- mlrun/utils/notifications/notification/console.py +7 -4
- mlrun/utils/notifications/notification/git.py +36 -19
- mlrun/utils/notifications/notification/ipython.py +10 -8
- mlrun/utils/notifications/notification/slack.py +18 -13
- mlrun/utils/notifications/notification_pusher.py +377 -56
- mlrun/utils/regex.py +6 -1
- mlrun/utils/singleton.py +1 -1
- mlrun/utils/v3io_clients.py +1 -1
- mlrun/utils/vault.py +270 -269
- mlrun/utils/version/__init__.py +1 -1
- mlrun/utils/version/version.json +2 -2
- mlrun/utils/version/version.py +1 -1
- {mlrun-1.3.3.dist-info → mlrun-1.4.0.dist-info}/METADATA +16 -10
- mlrun-1.4.0.dist-info/RECORD +434 -0
- mlrun/api/api/endpoints/marketplace.py +0 -257
- mlrun/api/crud/marketplace.py +0 -221
- mlrun/api/crud/model_monitoring/model_endpoint_store.py +0 -847
- mlrun/api/db/filedb/db.py +0 -518
- mlrun/api/schemas/marketplace.py +0 -128
- mlrun/api/schemas/model_endpoints.py +0 -185
- mlrun/db/filedb.py +0 -891
- mlrun/feature_store/retrieval/online.py +0 -92
- mlrun/model_monitoring/constants.py +0 -67
- mlrun/runtimes/package/context_handler.py +0 -711
- mlrun/runtimes/sparkjob/spark2job.py +0 -59
- mlrun-1.3.3.dist-info/RECORD +0 -381
- {mlrun-1.3.3.dist-info → mlrun-1.4.0.dist-info}/LICENSE +0 -0
- {mlrun-1.3.3.dist-info → mlrun-1.4.0.dist-info}/WHEEL +0 -0
- {mlrun-1.3.3.dist-info → mlrun-1.4.0.dist-info}/entry_points.txt +0 -0
- {mlrun-1.3.3.dist-info → mlrun-1.4.0.dist-info}/top_level.txt +0 -0
mlrun/feature_store/api.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright
|
|
1
|
+
# Copyright 2023 Iguazio
|
|
2
2
|
#
|
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
4
|
# you may not use this file except in compliance with the License.
|
|
@@ -12,10 +12,12 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
import copy
|
|
15
|
+
import importlib.util
|
|
16
|
+
import pathlib
|
|
17
|
+
import sys
|
|
15
18
|
import warnings
|
|
16
19
|
from datetime import datetime
|
|
17
|
-
from typing import List, Optional, Union
|
|
18
|
-
from urllib.parse import urlparse
|
|
20
|
+
from typing import Any, Dict, List, Optional, Union
|
|
19
21
|
|
|
20
22
|
import pandas as pd
|
|
21
23
|
|
|
@@ -28,7 +30,6 @@ from ..datastore.store_resources import parse_store_uri
|
|
|
28
30
|
from ..datastore.targets import (
|
|
29
31
|
BaseStoreTarget,
|
|
30
32
|
get_default_prefix_for_source,
|
|
31
|
-
get_default_targets,
|
|
32
33
|
get_target_driver,
|
|
33
34
|
kind_to_driver,
|
|
34
35
|
validate_target_list,
|
|
@@ -39,7 +40,7 @@ from ..model import DataSource, DataTargetBase
|
|
|
39
40
|
from ..runtimes import RuntimeKinds
|
|
40
41
|
from ..runtimes.function_reference import FunctionReference
|
|
41
42
|
from ..serving.server import Response
|
|
42
|
-
from ..utils import get_caller_globals, logger, normalize_name
|
|
43
|
+
from ..utils import get_caller_globals, logger, normalize_name
|
|
43
44
|
from .common import (
|
|
44
45
|
RunConfig,
|
|
45
46
|
get_feature_set_by_uri,
|
|
@@ -61,7 +62,7 @@ from .ingestion import (
|
|
|
61
62
|
run_ingestion_job,
|
|
62
63
|
run_spark_graph,
|
|
63
64
|
)
|
|
64
|
-
from .retrieval import get_merger,
|
|
65
|
+
from .retrieval import get_merger, run_merge_job
|
|
65
66
|
|
|
66
67
|
_v3iofs = None
|
|
67
68
|
spark_transform_handler = "transform"
|
|
@@ -77,7 +78,7 @@ def _features_to_vector_and_check_permissions(features, update_stats):
|
|
|
77
78
|
"feature vector name must be specified"
|
|
78
79
|
)
|
|
79
80
|
verify_feature_vector_permissions(
|
|
80
|
-
vector, mlrun.
|
|
81
|
+
vector, mlrun.common.schemas.AuthorizationAction.update
|
|
81
82
|
)
|
|
82
83
|
|
|
83
84
|
vector.save()
|
|
@@ -102,8 +103,9 @@ def get_offline_features(
|
|
|
102
103
|
engine: str = None,
|
|
103
104
|
engine_args: dict = None,
|
|
104
105
|
query: str = None,
|
|
105
|
-
|
|
106
|
+
order_by: Union[str, List[str]] = None,
|
|
106
107
|
spark_service: str = None,
|
|
108
|
+
timestamp_for_filtering: Union[str, Dict[str, str]] = None,
|
|
107
109
|
) -> OfflineVectorResponse:
|
|
108
110
|
"""retrieve offline feature vector results
|
|
109
111
|
|
|
@@ -133,35 +135,44 @@ def get_offline_features(
|
|
|
133
135
|
print(vector.get_stats_table())
|
|
134
136
|
resp.to_parquet("./out.parquet")
|
|
135
137
|
|
|
136
|
-
:param feature_vector:
|
|
137
|
-
|
|
138
|
-
:param entity_rows:
|
|
139
|
-
:param target:
|
|
140
|
-
:param drop_columns:
|
|
141
|
-
:param entity_timestamp_column: timestamp column name in the entity rows dataframe
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
:param end_time:
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
:param
|
|
153
|
-
|
|
154
|
-
:param
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
138
|
+
:param feature_vector: feature vector uri or FeatureVector object. passing feature vector obj requires
|
|
139
|
+
update permissions
|
|
140
|
+
:param entity_rows: dataframe with entity rows to join with
|
|
141
|
+
:param target: where to write the results to
|
|
142
|
+
:param drop_columns: list of columns to drop from the final result
|
|
143
|
+
:param entity_timestamp_column: timestamp column name in the entity rows dataframe. can be specified
|
|
144
|
+
only if param entity_rows was specified.
|
|
145
|
+
:param run_config: function and/or run configuration
|
|
146
|
+
see :py:class:`~mlrun.feature_store.RunConfig`
|
|
147
|
+
:param start_time: datetime, low limit of time needed to be filtered. Optional.
|
|
148
|
+
:param end_time: datetime, high limit of time needed to be filtered. Optional.
|
|
149
|
+
:param with_indexes: Return vector with/without the entities and the timestamp_key of the feature sets
|
|
150
|
+
and with/without entity_timestamp_column and timestamp_for_filtering columns.
|
|
151
|
+
This property can be specified also in the feature vector spec
|
|
152
|
+
(feature_vector.spec.with_indexes)
|
|
153
|
+
(default False)
|
|
154
|
+
:param update_stats: update features statistics from the requested feature sets on the vector.
|
|
155
|
+
(default False).
|
|
156
|
+
:param engine: processing engine kind ("local", "dask", or "spark")
|
|
157
|
+
:param engine_args: kwargs for the processing engine
|
|
158
|
+
:param query: The query string used to filter rows on the output
|
|
159
|
+
:param spark_service: Name of the spark service to be used (when using a remote-spark runtime)
|
|
160
|
+
:param order_by: Name or list of names to order by. The name or the names in the list can be the
|
|
161
|
+
feature name or the alias of the feature you pass in the feature list.
|
|
162
|
+
:param timestamp_for_filtering: name of the column to filter by, can be str for all the feature sets or a
|
|
163
|
+
dictionary ({<feature set name>: <timestamp column name>, ...})
|
|
164
|
+
that indicates the timestamp column name for each feature set. Optional.
|
|
165
|
+
By default, the filter executes on the timestamp_key of each feature set.
|
|
166
|
+
Note: the time filtering is performed on each feature set before the
|
|
167
|
+
merge process using start_time and end_time params.
|
|
168
|
+
|
|
164
169
|
"""
|
|
170
|
+
if entity_rows is None and entity_timestamp_column is not None:
|
|
171
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
172
|
+
"entity_timestamp_column param "
|
|
173
|
+
"can not be specified without entity_rows param"
|
|
174
|
+
)
|
|
175
|
+
|
|
165
176
|
if isinstance(feature_vector, FeatureVector):
|
|
166
177
|
update_stats = True
|
|
167
178
|
|
|
@@ -184,23 +195,17 @@ def get_offline_features(
|
|
|
184
195
|
engine_args,
|
|
185
196
|
spark_service,
|
|
186
197
|
entity_rows,
|
|
187
|
-
|
|
198
|
+
entity_timestamp_column=entity_timestamp_column,
|
|
188
199
|
run_config=run_config,
|
|
189
200
|
drop_columns=drop_columns,
|
|
190
201
|
with_indexes=with_indexes,
|
|
191
202
|
query=query,
|
|
192
|
-
|
|
203
|
+
order_by=order_by,
|
|
204
|
+
start_time=start_time,
|
|
205
|
+
end_time=end_time,
|
|
206
|
+
timestamp_for_filtering=timestamp_for_filtering,
|
|
193
207
|
)
|
|
194
208
|
|
|
195
|
-
start_time = str_to_timestamp(start_time)
|
|
196
|
-
end_time = str_to_timestamp(end_time)
|
|
197
|
-
if (start_time or end_time) and not entity_timestamp_column:
|
|
198
|
-
raise TypeError(
|
|
199
|
-
"entity_timestamp_column or feature_vector.spec.timestamp_field is required when passing start/end time"
|
|
200
|
-
)
|
|
201
|
-
if start_time and not end_time:
|
|
202
|
-
# if end_time is not specified set it to now()
|
|
203
|
-
end_time = pd.Timestamp.now()
|
|
204
209
|
merger = merger_engine(feature_vector, **(engine_args or {}))
|
|
205
210
|
return merger.start(
|
|
206
211
|
entity_rows,
|
|
@@ -209,10 +214,11 @@ def get_offline_features(
|
|
|
209
214
|
drop_columns=drop_columns,
|
|
210
215
|
start_time=start_time,
|
|
211
216
|
end_time=end_time,
|
|
217
|
+
timestamp_for_filtering=timestamp_for_filtering,
|
|
212
218
|
with_indexes=with_indexes,
|
|
213
219
|
update_stats=update_stats,
|
|
214
220
|
query=query,
|
|
215
|
-
|
|
221
|
+
order_by=order_by,
|
|
216
222
|
)
|
|
217
223
|
|
|
218
224
|
|
|
@@ -222,6 +228,7 @@ def get_online_feature_service(
|
|
|
222
228
|
fixed_window_type: FixedWindowType = FixedWindowType.LastClosedWindow,
|
|
223
229
|
impute_policy: dict = None,
|
|
224
230
|
update_stats: bool = False,
|
|
231
|
+
entity_keys: List[str] = None,
|
|
225
232
|
) -> OnlineVectorService:
|
|
226
233
|
"""initialize and return online feature vector service api,
|
|
227
234
|
returns :py:class:`~mlrun.feature_store.OnlineVectorService`
|
|
@@ -241,14 +248,15 @@ def get_online_feature_service(
|
|
|
241
248
|
|
|
242
249
|
Example with imputing::
|
|
243
250
|
|
|
244
|
-
with get_online_feature_service(vector_uri,
|
|
251
|
+
with get_online_feature_service(vector_uri, entity_keys=['id'],
|
|
252
|
+
impute_policy={"*": "$mean", "amount": 0)) as svc:
|
|
245
253
|
resp = svc.get([{"id": "C123487"}])
|
|
246
254
|
|
|
247
255
|
2. as simple function, note that in that option you need to close the session.
|
|
248
256
|
|
|
249
257
|
Example::
|
|
250
258
|
|
|
251
|
-
svc = get_online_feature_service(vector_uri)
|
|
259
|
+
svc = get_online_feature_service(vector_uri, entity_keys=['ticker'])
|
|
252
260
|
try:
|
|
253
261
|
resp = svc.get([{"ticker": "GOOG"}, {"ticker": "MSFT"}])
|
|
254
262
|
print(resp)
|
|
@@ -260,7 +268,8 @@ def get_online_feature_service(
|
|
|
260
268
|
|
|
261
269
|
Example with imputing::
|
|
262
270
|
|
|
263
|
-
svc = get_online_feature_service(vector_uri,
|
|
271
|
+
svc = get_online_feature_service(vector_uri, entity_keys=['id'],
|
|
272
|
+
impute_policy={"*": "$mean", "amount": 0))
|
|
264
273
|
try:
|
|
265
274
|
resp = svc.get([{"id": "C123487"}])
|
|
266
275
|
except Exception as e:
|
|
@@ -268,15 +277,21 @@ def get_online_feature_service(
|
|
|
268
277
|
finally:
|
|
269
278
|
svc.close()
|
|
270
279
|
|
|
271
|
-
:param feature_vector:
|
|
272
|
-
|
|
273
|
-
:param run_config:
|
|
274
|
-
:param impute_policy:
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
:param
|
|
280
|
+
:param feature_vector: feature vector uri or FeatureVector object. passing feature vector obj requires update
|
|
281
|
+
permissions.
|
|
282
|
+
:param run_config: function and/or run configuration for remote jobs/services
|
|
283
|
+
:param impute_policy: a dict with `impute_policy` per feature, the dict key is the feature name and the dict
|
|
284
|
+
value indicate which value will be used in case the feature is NaN/empty, the replaced
|
|
285
|
+
value can be fixed number for constants or $mean, $max, $min, $std, $count
|
|
286
|
+
for statistical
|
|
287
|
+
values. "*" is used to specify the default for all features, example: `{"*": "$mean"}`
|
|
288
|
+
:param fixed_window_type: determines how to query the fixed window values which were previously inserted by ingest
|
|
289
|
+
:param update_stats: update features statistics from the requested feature sets on the vector.
|
|
290
|
+
Default: False.
|
|
291
|
+
:param entity_keys: Entity list of the first feature_set in the vector.
|
|
292
|
+
The indexes that are used to query the online service.
|
|
293
|
+
:return: Initialize the `OnlineVectorService`.
|
|
294
|
+
Will be used in subclasses where `support_online=True`.
|
|
280
295
|
"""
|
|
281
296
|
if isinstance(feature_vector, FeatureVector):
|
|
282
297
|
update_stats = True
|
|
@@ -288,17 +303,15 @@ def get_online_feature_service(
|
|
|
288
303
|
if impute_policy and not feature_vector.status.stats:
|
|
289
304
|
update_stats = True
|
|
290
305
|
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
)
|
|
294
|
-
service = OnlineVectorService(
|
|
295
|
-
feature_vector, graph, index_columns, impute_policy=impute_policy
|
|
296
|
-
)
|
|
297
|
-
service.initialize()
|
|
298
|
-
|
|
306
|
+
engine_args = {"impute_policy": impute_policy}
|
|
307
|
+
merger_engine = get_merger("storey")
|
|
299
308
|
# todo: support remote service (using remote nuclio/mlrun function if run_config)
|
|
300
309
|
|
|
301
|
-
|
|
310
|
+
merger = merger_engine(feature_vector, **engine_args)
|
|
311
|
+
|
|
312
|
+
return merger.init_online_vector_service(
|
|
313
|
+
entity_keys, fixed_window_type, update_stats=update_stats
|
|
314
|
+
)
|
|
302
315
|
|
|
303
316
|
|
|
304
317
|
def _rename_source_dataframe_columns(df):
|
|
@@ -322,6 +335,21 @@ def _rename_source_dataframe_columns(df):
|
|
|
322
335
|
return df
|
|
323
336
|
|
|
324
337
|
|
|
338
|
+
def _get_namespace(run_config: RunConfig) -> Dict[str, Any]:
|
|
339
|
+
# if running locally, we need to import the file dynamically to get its namespace
|
|
340
|
+
if run_config and run_config.local and run_config.function:
|
|
341
|
+
filename = run_config.function.spec.filename
|
|
342
|
+
if filename:
|
|
343
|
+
module_name = pathlib.Path(filename).name.rsplit(".", maxsplit=1)[0]
|
|
344
|
+
spec = importlib.util.spec_from_file_location(module_name, filename)
|
|
345
|
+
module = importlib.util.module_from_spec(spec)
|
|
346
|
+
sys.modules[module_name] = module
|
|
347
|
+
spec.loader.exec_module(module)
|
|
348
|
+
return vars(__import__(module_name))
|
|
349
|
+
else:
|
|
350
|
+
return get_caller_globals()
|
|
351
|
+
|
|
352
|
+
|
|
325
353
|
def ingest(
|
|
326
354
|
featureset: Union[FeatureSet, str] = None,
|
|
327
355
|
source=None,
|
|
@@ -367,7 +395,8 @@ def ingest(
|
|
|
367
395
|
:param targets: optional list of data target objects
|
|
368
396
|
:param namespace: namespace or module containing graph classes
|
|
369
397
|
:param return_df: indicate if to return a dataframe with the graph results
|
|
370
|
-
:param infer_options: schema
|
|
398
|
+
:param infer_options: schema (for discovery of entities, features in featureset), index, stats,
|
|
399
|
+
histogram and preview infer options (:py:class:`~mlrun.feature_store.InferOptions`)
|
|
371
400
|
:param run_config: function and/or run configuration for remote jobs,
|
|
372
401
|
see :py:class:`~mlrun.feature_store.RunConfig`
|
|
373
402
|
:param mlrun_context: mlrun context (when running as a job), for internal use !
|
|
@@ -405,6 +434,15 @@ def ingest(
|
|
|
405
434
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
406
435
|
"feature set and source must be specified"
|
|
407
436
|
)
|
|
437
|
+
if (
|
|
438
|
+
not mlrun_context
|
|
439
|
+
and not targets
|
|
440
|
+
and not (featureset.spec.targets or featureset.spec.with_default_targets)
|
|
441
|
+
and (run_config is not None and not run_config.local)
|
|
442
|
+
):
|
|
443
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
444
|
+
f"Feature set {featureset.metadata.name} is remote ingested with no targets defined, aborting"
|
|
445
|
+
)
|
|
408
446
|
|
|
409
447
|
if featureset is not None:
|
|
410
448
|
featureset.validate_steps(namespace=namespace)
|
|
@@ -416,7 +454,7 @@ def ingest(
|
|
|
416
454
|
)
|
|
417
455
|
# remote job execution
|
|
418
456
|
verify_feature_set_permissions(
|
|
419
|
-
featureset, mlrun.
|
|
457
|
+
featureset, mlrun.common.schemas.AuthorizationAction.update
|
|
420
458
|
)
|
|
421
459
|
run_config = run_config.copy() if run_config else RunConfig()
|
|
422
460
|
source, run_config.parameters = set_task_params(
|
|
@@ -448,7 +486,7 @@ def ingest(
|
|
|
448
486
|
|
|
449
487
|
featureset.validate_steps(namespace=namespace)
|
|
450
488
|
verify_feature_set_permissions(
|
|
451
|
-
featureset, mlrun.
|
|
489
|
+
featureset, mlrun.common.schemas.AuthorizationAction.update
|
|
452
490
|
)
|
|
453
491
|
if not source:
|
|
454
492
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
@@ -477,19 +515,21 @@ def ingest(
|
|
|
477
515
|
f"Source.end_time is {str(source.end_time)}"
|
|
478
516
|
)
|
|
479
517
|
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
518
|
+
if mlrun_context:
|
|
519
|
+
mlrun_context.logger.info(
|
|
520
|
+
f"starting ingestion task to {featureset.uri}.{filter_time_string}"
|
|
521
|
+
)
|
|
522
|
+
|
|
484
523
|
return_df = False
|
|
485
524
|
|
|
486
525
|
if featureset.spec.passthrough:
|
|
487
526
|
featureset.spec.source = source
|
|
488
527
|
featureset.spec.validate_no_processing_for_passthrough()
|
|
489
528
|
|
|
490
|
-
|
|
529
|
+
if not namespace:
|
|
530
|
+
namespace = _get_namespace(run_config)
|
|
491
531
|
|
|
492
|
-
targets_to_ingest = targets or featureset.spec.targets
|
|
532
|
+
targets_to_ingest = targets or featureset.spec.targets
|
|
493
533
|
targets_to_ingest = copy.deepcopy(targets_to_ingest)
|
|
494
534
|
|
|
495
535
|
validate_target_paths_for_engine(targets_to_ingest, featureset.spec.engine, source)
|
|
@@ -633,10 +673,14 @@ def preview(
|
|
|
633
673
|
:param entity_columns: list of entity (index) column names
|
|
634
674
|
:param timestamp_key: DEPRECATED. Use FeatureSet parameter.
|
|
635
675
|
:param namespace: namespace or module containing graph classes
|
|
636
|
-
:param options: schema
|
|
676
|
+
:param options: schema (for discovery of entities, features in featureset), index, stats,
|
|
677
|
+
histogram and preview infer options (:py:class:`~mlrun.feature_store.InferOptions`)
|
|
637
678
|
:param verbose: verbose log
|
|
638
679
|
:param sample_size: num of rows to sample from the dataset (for large datasets)
|
|
639
680
|
"""
|
|
681
|
+
if isinstance(source, pd.DataFrame):
|
|
682
|
+
source = _rename_source_dataframe_columns(source)
|
|
683
|
+
|
|
640
684
|
# preview reads the source as a pandas df, which is not fully compatible with spark
|
|
641
685
|
if featureset.spec.engine == "spark":
|
|
642
686
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
@@ -661,7 +705,7 @@ def preview(
|
|
|
661
705
|
source = mlrun.store_manager.object(url=source).as_df()
|
|
662
706
|
|
|
663
707
|
verify_feature_set_permissions(
|
|
664
|
-
featureset, mlrun.
|
|
708
|
+
featureset, mlrun.common.schemas.AuthorizationAction.update
|
|
665
709
|
)
|
|
666
710
|
|
|
667
711
|
featureset.spec.validate_no_processing_for_passthrough()
|
|
@@ -686,7 +730,9 @@ def preview(
|
|
|
686
730
|
)
|
|
687
731
|
# reduce the size of the ingestion if we do not infer stats
|
|
688
732
|
rows_limit = (
|
|
689
|
-
|
|
733
|
+
None
|
|
734
|
+
if InferOptions.get_common_options(options, InferOptions.Stats)
|
|
735
|
+
else 1000
|
|
690
736
|
)
|
|
691
737
|
source = init_featureset_graph(
|
|
692
738
|
source,
|
|
@@ -757,7 +803,7 @@ def deploy_ingestion_service(
|
|
|
757
803
|
featureset = get_feature_set_by_uri(featureset)
|
|
758
804
|
|
|
759
805
|
verify_feature_set_permissions(
|
|
760
|
-
featureset, mlrun.
|
|
806
|
+
featureset, mlrun.common.schemas.AuthorizationAction.update
|
|
761
807
|
)
|
|
762
808
|
|
|
763
809
|
verify_feature_set_exists(featureset)
|
|
@@ -770,7 +816,7 @@ def deploy_ingestion_service(
|
|
|
770
816
|
name=featureset.metadata.name,
|
|
771
817
|
)
|
|
772
818
|
|
|
773
|
-
targets_to_ingest = targets or featureset.spec.targets
|
|
819
|
+
targets_to_ingest = targets or featureset.spec.targets
|
|
774
820
|
targets_to_ingest = copy.deepcopy(targets_to_ingest)
|
|
775
821
|
featureset.update_targets_for_ingest(targets_to_ingest)
|
|
776
822
|
|
|
@@ -832,7 +878,11 @@ def _ingest_with_spark(
|
|
|
832
878
|
f"{featureset.metadata.project}-{featureset.metadata.name}"
|
|
833
879
|
)
|
|
834
880
|
|
|
835
|
-
spark =
|
|
881
|
+
spark = (
|
|
882
|
+
pyspark.sql.SparkSession.builder.appName(session_name)
|
|
883
|
+
.config("spark.sql.session.timeZone", "UTC")
|
|
884
|
+
.getOrCreate()
|
|
885
|
+
)
|
|
836
886
|
created_spark_context = True
|
|
837
887
|
|
|
838
888
|
timestamp_key = featureset.spec.timestamp_key
|
|
@@ -843,12 +893,14 @@ def _ingest_with_spark(
|
|
|
843
893
|
df = source
|
|
844
894
|
else:
|
|
845
895
|
df = source.to_spark_df(spark, time_field=timestamp_key)
|
|
846
|
-
df = source.filter_df_start_end_time(df, timestamp_key)
|
|
847
896
|
if featureset.spec.graph and featureset.spec.graph.steps:
|
|
848
897
|
df = run_spark_graph(df, featureset, namespace, spark)
|
|
849
898
|
|
|
850
899
|
if isinstance(df, Response) and df.status_code != 0:
|
|
851
900
|
mlrun.errors.raise_for_status_code(df.status_code, df.body.split(": ")[1])
|
|
901
|
+
|
|
902
|
+
df.persist()
|
|
903
|
+
|
|
852
904
|
_infer_from_static_df(df, featureset, options=infer_options)
|
|
853
905
|
|
|
854
906
|
key_columns = list(featureset.spec.entities.keys())
|
|
@@ -863,14 +915,6 @@ def _ingest_with_spark(
|
|
|
863
915
|
target.set_resource(featureset)
|
|
864
916
|
if featureset.spec.passthrough and target.is_offline:
|
|
865
917
|
continue
|
|
866
|
-
if target.path and urlparse(target.path).scheme == "":
|
|
867
|
-
if mlrun_context:
|
|
868
|
-
mlrun_context.logger.error(
|
|
869
|
-
"Paths for spark ingest must contain schema, i.e v3io, s3, az"
|
|
870
|
-
)
|
|
871
|
-
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
872
|
-
"Paths for spark ingest must contain schema, i.e v3io, s3, az"
|
|
873
|
-
)
|
|
874
918
|
spark_options = target.get_spark_options(
|
|
875
919
|
key_columns, timestamp_key, overwrite
|
|
876
920
|
)
|
|
@@ -957,11 +1001,15 @@ def _infer_from_static_df(
|
|
|
957
1001
|
):
|
|
958
1002
|
"""infer feature-set schema & stats from static dataframe (without pipeline)"""
|
|
959
1003
|
if hasattr(df, "to_dataframe"):
|
|
1004
|
+
if hasattr(df, "time_field"):
|
|
1005
|
+
time_field = df.time_field or featureset.spec.timestamp_key
|
|
1006
|
+
else:
|
|
1007
|
+
time_field = featureset.spec.timestamp_key
|
|
960
1008
|
if df.is_iterator():
|
|
961
1009
|
# todo: describe over multiple chunks
|
|
962
|
-
df = next(df.to_dataframe())
|
|
1010
|
+
df = next(df.to_dataframe(time_field=time_field))
|
|
963
1011
|
else:
|
|
964
|
-
df = df.to_dataframe()
|
|
1012
|
+
df = df.to_dataframe(time_field=time_field)
|
|
965
1013
|
inferer = get_infer_interface(df)
|
|
966
1014
|
if InferOptions.get_common_options(options, InferOptions.schema()):
|
|
967
1015
|
featureset.spec.timestamp_key = inferer.infer_schema(
|
mlrun/feature_store/common.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright
|
|
1
|
+
# Copyright 2023 Iguazio
|
|
2
2
|
#
|
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
4
|
# you may not use this file except in compliance with the License.
|
|
@@ -16,11 +16,11 @@ from copy import copy
|
|
|
16
16
|
|
|
17
17
|
import mlrun
|
|
18
18
|
import mlrun.errors
|
|
19
|
-
from mlrun.
|
|
19
|
+
from mlrun.common.schemas import AuthorizationVerificationInput
|
|
20
20
|
from mlrun.runtimes import BaseRuntime
|
|
21
21
|
from mlrun.runtimes.function_reference import FunctionReference
|
|
22
22
|
from mlrun.runtimes.utils import enrich_function_from_dict
|
|
23
|
-
from mlrun.utils import StorePrefix, logger,
|
|
23
|
+
from mlrun.utils import StorePrefix, logger, parse_versioned_object_uri
|
|
24
24
|
|
|
25
25
|
from ..config import config
|
|
26
26
|
|
|
@@ -86,13 +86,13 @@ def get_feature_set_by_uri(uri, project=None):
|
|
|
86
86
|
db = mlrun.get_run_db()
|
|
87
87
|
project, name, tag, uid = parse_feature_set_uri(uri, project)
|
|
88
88
|
resource = (
|
|
89
|
-
mlrun.
|
|
89
|
+
mlrun.common.schemas.AuthorizationResourceTypes.feature_set.to_resource_string(
|
|
90
90
|
project, "feature-set"
|
|
91
91
|
)
|
|
92
92
|
)
|
|
93
93
|
|
|
94
94
|
auth_input = AuthorizationVerificationInput(
|
|
95
|
-
resource=resource, action=mlrun.
|
|
95
|
+
resource=resource, action=mlrun.common.schemas.AuthorizationAction.read
|
|
96
96
|
)
|
|
97
97
|
db.verify_authorization(auth_input)
|
|
98
98
|
|
|
@@ -115,19 +115,17 @@ def get_feature_vector_by_uri(uri, project=None, update=True):
|
|
|
115
115
|
|
|
116
116
|
project, name, tag, uid = parse_versioned_object_uri(uri, default_project)
|
|
117
117
|
|
|
118
|
-
resource = (
|
|
119
|
-
|
|
120
|
-
project, "feature-vector"
|
|
121
|
-
)
|
|
118
|
+
resource = mlrun.common.schemas.AuthorizationResourceTypes.feature_vector.to_resource_string(
|
|
119
|
+
project, "feature-vector"
|
|
122
120
|
)
|
|
123
121
|
|
|
124
122
|
if update:
|
|
125
123
|
auth_input = AuthorizationVerificationInput(
|
|
126
|
-
resource=resource, action=mlrun.
|
|
124
|
+
resource=resource, action=mlrun.common.schemas.AuthorizationAction.update
|
|
127
125
|
)
|
|
128
126
|
else:
|
|
129
127
|
auth_input = AuthorizationVerificationInput(
|
|
130
|
-
resource=resource, action=mlrun.
|
|
128
|
+
resource=resource, action=mlrun.common.schemas.AuthorizationAction.read
|
|
131
129
|
)
|
|
132
130
|
|
|
133
131
|
db.verify_authorization(auth_input)
|
|
@@ -136,12 +134,12 @@ def get_feature_vector_by_uri(uri, project=None, update=True):
|
|
|
136
134
|
|
|
137
135
|
|
|
138
136
|
def verify_feature_set_permissions(
|
|
139
|
-
feature_set, action: mlrun.
|
|
137
|
+
feature_set, action: mlrun.common.schemas.AuthorizationAction
|
|
140
138
|
):
|
|
141
139
|
project, _, _, _ = parse_feature_set_uri(feature_set.uri)
|
|
142
140
|
|
|
143
141
|
resource = (
|
|
144
|
-
mlrun.
|
|
142
|
+
mlrun.common.schemas.AuthorizationResourceTypes.feature_set.to_resource_string(
|
|
145
143
|
project, "feature-set"
|
|
146
144
|
)
|
|
147
145
|
)
|
|
@@ -164,14 +162,12 @@ def verify_feature_set_exists(feature_set):
|
|
|
164
162
|
|
|
165
163
|
|
|
166
164
|
def verify_feature_vector_permissions(
|
|
167
|
-
feature_vector, action: mlrun.
|
|
165
|
+
feature_vector, action: mlrun.common.schemas.AuthorizationAction
|
|
168
166
|
):
|
|
169
|
-
project = feature_vector._metadata.project or
|
|
167
|
+
project = feature_vector._metadata.project or config.default_project
|
|
170
168
|
|
|
171
|
-
resource = (
|
|
172
|
-
|
|
173
|
-
project, "feature-vector"
|
|
174
|
-
)
|
|
169
|
+
resource = mlrun.common.schemas.AuthorizationResourceTypes.feature_vector.to_resource_string(
|
|
170
|
+
project, "feature-vector"
|
|
175
171
|
)
|
|
176
172
|
|
|
177
173
|
db = mlrun.get_run_db()
|
|
@@ -218,7 +214,7 @@ class RunConfig:
|
|
|
218
214
|
config = RunConfig("mycode.py", image="mlrun/mlrun", requirements=["spacy"])
|
|
219
215
|
|
|
220
216
|
# config for using function object
|
|
221
|
-
function = mlrun.import_function("hub://
|
|
217
|
+
function = mlrun.import_function("hub://some-function")
|
|
222
218
|
config = RunConfig(function)
|
|
223
219
|
|
|
224
220
|
:param function: this can be function uri or function object or path to function code (.py/.ipynb)
|