PyPI - mlrun - Versions diffs - 1.3.3__py3-none-any.whl → 1.4.0__py3-none-any.whl - Mend

mlrun 1.3.3py3-none-any.whl → 1.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (444) hide show

mlrun/__init__.py +3 -3
mlrun/__main__.py +79 -37
mlrun/api/__init__.py +1 -1
mlrun/api/api/__init__.py +1 -1
mlrun/api/api/api.py +4 -4
mlrun/api/api/deps.py +10 -21
mlrun/api/api/endpoints/__init__.py +1 -1
mlrun/api/api/endpoints/artifacts.py +64 -36
mlrun/api/api/endpoints/auth.py +4 -4
mlrun/api/api/endpoints/background_tasks.py +11 -11
mlrun/api/api/endpoints/client_spec.py +5 -5
mlrun/api/api/endpoints/clusterization_spec.py +6 -4
mlrun/api/api/endpoints/feature_store.py +124 -115
mlrun/api/api/endpoints/files.py +22 -14
mlrun/api/api/endpoints/frontend_spec.py +28 -21
mlrun/api/api/endpoints/functions.py +142 -87
mlrun/api/api/endpoints/grafana_proxy.py +89 -442
mlrun/api/api/endpoints/healthz.py +20 -7
mlrun/api/api/endpoints/hub.py +320 -0
mlrun/api/api/endpoints/internal/__init__.py +1 -1
mlrun/api/api/endpoints/internal/config.py +1 -1
mlrun/api/api/endpoints/internal/memory_reports.py +9 -9
mlrun/api/api/endpoints/logs.py +11 -11
mlrun/api/api/endpoints/model_endpoints.py +74 -70
mlrun/api/api/endpoints/operations.py +13 -9
mlrun/api/api/endpoints/pipelines.py +93 -88
mlrun/api/api/endpoints/projects.py +35 -35
mlrun/api/api/endpoints/runs.py +69 -27
mlrun/api/api/endpoints/runtime_resources.py +28 -28
mlrun/api/api/endpoints/schedules.py +98 -41
mlrun/api/api/endpoints/secrets.py +37 -32
mlrun/api/api/endpoints/submit.py +12 -12
mlrun/api/api/endpoints/tags.py +20 -22
mlrun/api/api/utils.py +251 -42
mlrun/api/constants.py +1 -1
mlrun/api/crud/__init__.py +18 -15
mlrun/api/crud/artifacts.py +10 -10
mlrun/api/crud/client_spec.py +4 -4
mlrun/api/crud/clusterization_spec.py +3 -3
mlrun/api/crud/feature_store.py +54 -46
mlrun/api/crud/functions.py +3 -3
mlrun/api/crud/hub.py +312 -0
mlrun/api/crud/logs.py +11 -9
mlrun/api/crud/model_monitoring/__init__.py +3 -3
mlrun/api/crud/model_monitoring/grafana.py +435 -0
mlrun/api/crud/model_monitoring/model_endpoints.py +352 -129
mlrun/api/crud/notifications.py +149 -0
mlrun/api/crud/pipelines.py +67 -52
mlrun/api/crud/projects.py +51 -23
mlrun/api/crud/runs.py +7 -5
mlrun/api/crud/runtime_resources.py +13 -13
mlrun/api/{db/filedb → crud/runtimes}/__init__.py +1 -1
mlrun/api/crud/runtimes/nuclio/__init__.py +14 -0
mlrun/api/crud/runtimes/nuclio/function.py +505 -0
mlrun/api/crud/runtimes/nuclio/helpers.py +310 -0
mlrun/api/crud/secrets.py +88 -46
mlrun/api/crud/tags.py +5 -5
mlrun/api/db/__init__.py +1 -1
mlrun/api/db/base.py +102 -54
mlrun/api/db/init_db.py +2 -3
mlrun/api/db/session.py +4 -12
mlrun/api/db/sqldb/__init__.py +1 -1
mlrun/api/db/sqldb/db.py +439 -196
mlrun/api/db/sqldb/helpers.py +1 -1
mlrun/api/db/sqldb/models/__init__.py +3 -3
mlrun/api/db/sqldb/models/models_mysql.py +82 -64
mlrun/api/db/sqldb/models/models_sqlite.py +76 -64
mlrun/api/db/sqldb/session.py +27 -20
mlrun/api/initial_data.py +82 -24
mlrun/api/launcher.py +196 -0
mlrun/api/main.py +91 -22
mlrun/api/middlewares.py +6 -5
mlrun/api/migrations_mysql/env.py +1 -1
mlrun/api/migrations_mysql/versions/28383af526f3_market_place_to_hub.py +40 -0
mlrun/api/migrations_mysql/versions/32bae1b0e29c_increase_timestamp_fields_precision.py +1 -1
mlrun/api/migrations_mysql/versions/4903aef6a91d_tag_foreign_key_and_cascades.py +1 -1
mlrun/api/migrations_mysql/versions/5f1351c88a19_adding_background_tasks_table.py +1 -1
mlrun/api/migrations_mysql/versions/88e656800d6a_add_requested_logs_column_and_index_to_.py +1 -1
mlrun/api/migrations_mysql/versions/9d16de5f03a7_adding_data_versions_table.py +1 -1
mlrun/api/migrations_mysql/versions/b86f5b53f3d7_adding_name_and_updated_to_runs_table.py +1 -1
mlrun/api/migrations_mysql/versions/c4af40b0bf61_init.py +1 -1
mlrun/api/migrations_mysql/versions/c905d15bd91d_notifications.py +72 -0
mlrun/api/migrations_mysql/versions/ee041e8fdaa0_adding_next_run_time_column_to_schedule_.py +1 -1
mlrun/api/migrations_sqlite/env.py +1 -1
mlrun/api/migrations_sqlite/versions/11f8dd2dc9fe_init.py +1 -1
mlrun/api/migrations_sqlite/versions/1c954f8cb32d_schedule_last_run_uri.py +1 -1
mlrun/api/migrations_sqlite/versions/2b6d23c715aa_adding_feature_sets.py +1 -1
mlrun/api/migrations_sqlite/versions/4acd9430b093_market_place_to_hub.py +77 -0
mlrun/api/migrations_sqlite/versions/6401142f2d7c_adding_next_run_time_column_to_schedule_.py +1 -1
mlrun/api/migrations_sqlite/versions/64d90a1a69bc_adding_background_tasks_table.py +1 -1
mlrun/api/migrations_sqlite/versions/803438ecd005_add_requested_logs_column_to_runs.py +1 -1
mlrun/api/migrations_sqlite/versions/863114f0c659_refactoring_feature_set.py +1 -1
mlrun/api/migrations_sqlite/versions/959ae00528ad_notifications.py +63 -0
mlrun/api/migrations_sqlite/versions/accf9fc83d38_adding_data_versions_table.py +1 -1
mlrun/api/migrations_sqlite/versions/b68e8e897a28_schedule_labels.py +1 -1
mlrun/api/migrations_sqlite/versions/bcd0c1f9720c_adding_project_labels.py +1 -1
mlrun/api/migrations_sqlite/versions/cf21882f938e_schedule_id.py +1 -1
mlrun/api/migrations_sqlite/versions/d781f58f607f_tag_object_name_string.py +1 -1
mlrun/api/migrations_sqlite/versions/deac06871ace_adding_marketplace_sources_table.py +1 -1
mlrun/api/migrations_sqlite/versions/e1dd5983c06b_schedule_concurrency_limit.py +1 -1
mlrun/api/migrations_sqlite/versions/e5594ed3ab53_adding_name_and_updated_to_runs_table.py +1 -1
mlrun/api/migrations_sqlite/versions/f4249b4ba6fa_adding_feature_vectors.py +1 -1
mlrun/api/migrations_sqlite/versions/f7b5a1a03629_adding_feature_labels.py +1 -1
mlrun/api/schemas/__init__.py +216 -138
mlrun/api/utils/__init__.py +1 -1
mlrun/api/utils/asyncio.py +1 -1
mlrun/api/utils/auth/__init__.py +1 -1
mlrun/api/utils/auth/providers/__init__.py +1 -1
mlrun/api/utils/auth/providers/base.py +7 -7
mlrun/api/utils/auth/providers/nop.py +6 -7
mlrun/api/utils/auth/providers/opa.py +17 -17
mlrun/api/utils/auth/verifier.py +36 -34
mlrun/api/utils/background_tasks.py +24 -24
mlrun/{builder.py → api/utils/builder.py} +216 -123
mlrun/api/utils/clients/__init__.py +1 -1
mlrun/api/utils/clients/chief.py +19 -4
mlrun/api/utils/clients/iguazio.py +106 -60
mlrun/api/utils/clients/log_collector.py +1 -1
mlrun/api/utils/clients/nuclio.py +23 -23
mlrun/api/utils/clients/protocols/grpc.py +2 -2
mlrun/api/utils/db/__init__.py +1 -1
mlrun/api/utils/db/alembic.py +1 -1
mlrun/api/utils/db/backup.py +1 -1
mlrun/api/utils/db/mysql.py +24 -25
mlrun/api/utils/db/sql_collation.py +1 -1
mlrun/api/utils/db/sqlite_migration.py +2 -2
mlrun/api/utils/events/__init__.py +14 -0
mlrun/api/utils/events/base.py +57 -0
mlrun/api/utils/events/events_factory.py +41 -0
mlrun/api/utils/events/iguazio.py +217 -0
mlrun/api/utils/events/nop.py +55 -0
mlrun/api/utils/helpers.py +16 -13
mlrun/api/utils/memory_reports.py +1 -1
mlrun/api/utils/periodic.py +6 -3
mlrun/api/utils/projects/__init__.py +1 -1
mlrun/api/utils/projects/follower.py +33 -33
mlrun/api/utils/projects/leader.py +36 -34
mlrun/api/utils/projects/member.py +27 -27
mlrun/api/utils/projects/remotes/__init__.py +1 -1
mlrun/api/utils/projects/remotes/follower.py +13 -13
mlrun/api/utils/projects/remotes/leader.py +10 -10
mlrun/api/utils/projects/remotes/nop_follower.py +27 -21
mlrun/api/utils/projects/remotes/nop_leader.py +17 -16
mlrun/api/utils/scheduler.py +140 -51
mlrun/api/utils/singletons/__init__.py +1 -1
mlrun/api/utils/singletons/db.py +9 -15
mlrun/api/utils/singletons/k8s.py +677 -5
mlrun/api/utils/singletons/logs_dir.py +1 -1
mlrun/api/utils/singletons/project_member.py +1 -1
mlrun/api/utils/singletons/scheduler.py +1 -1
mlrun/artifacts/__init__.py +2 -2
mlrun/artifacts/base.py +8 -2
mlrun/artifacts/dataset.py +5 -3
mlrun/artifacts/manager.py +7 -1
mlrun/artifacts/model.py +15 -4
mlrun/artifacts/plots.py +1 -1
mlrun/common/__init__.py +1 -1
mlrun/common/constants.py +15 -0
mlrun/common/model_monitoring.py +209 -0
mlrun/common/schemas/__init__.py +167 -0
mlrun/{api → common}/schemas/artifact.py +13 -14
mlrun/{api → common}/schemas/auth.py +10 -8
mlrun/{api → common}/schemas/background_task.py +3 -3
mlrun/{api → common}/schemas/client_spec.py +1 -1
mlrun/{api → common}/schemas/clusterization_spec.py +3 -3
mlrun/{api → common}/schemas/constants.py +21 -8
mlrun/common/schemas/events.py +36 -0
mlrun/{api → common}/schemas/feature_store.py +2 -1
mlrun/{api → common}/schemas/frontend_spec.py +7 -6
mlrun/{api → common}/schemas/function.py +5 -5
mlrun/{api → common}/schemas/http.py +3 -3
mlrun/common/schemas/hub.py +134 -0
mlrun/{api → common}/schemas/k8s.py +3 -3
mlrun/{api → common}/schemas/memory_reports.py +1 -1
mlrun/common/schemas/model_endpoints.py +342 -0
mlrun/common/schemas/notification.py +57 -0
mlrun/{api → common}/schemas/object.py +6 -6
mlrun/{api → common}/schemas/pipeline.py +3 -3
mlrun/{api → common}/schemas/project.py +6 -5
mlrun/common/schemas/regex.py +24 -0
mlrun/common/schemas/runs.py +30 -0
mlrun/{api → common}/schemas/runtime_resource.py +3 -3
mlrun/{api → common}/schemas/schedule.py +19 -7
mlrun/{api → common}/schemas/secret.py +3 -3
mlrun/{api → common}/schemas/tag.py +2 -2
mlrun/common/types.py +25 -0
mlrun/config.py +152 -20
mlrun/data_types/__init__.py +7 -2
mlrun/data_types/data_types.py +4 -2
mlrun/data_types/infer.py +1 -1
mlrun/data_types/spark.py +10 -3
mlrun/datastore/__init__.py +10 -3
mlrun/datastore/azure_blob.py +1 -1
mlrun/datastore/base.py +185 -53
mlrun/datastore/datastore.py +1 -1
mlrun/datastore/filestore.py +1 -1
mlrun/datastore/google_cloud_storage.py +1 -1
mlrun/datastore/inmem.py +4 -1
mlrun/datastore/redis.py +1 -1
mlrun/datastore/s3.py +1 -1
mlrun/datastore/sources.py +192 -70
mlrun/datastore/spark_udf.py +44 -0
mlrun/datastore/store_resources.py +4 -4
mlrun/datastore/targets.py +115 -45
mlrun/datastore/utils.py +127 -5
mlrun/datastore/v3io.py +1 -1
mlrun/datastore/wasbfs/__init__.py +1 -1
mlrun/datastore/wasbfs/fs.py +1 -1
mlrun/db/__init__.py +7 -5
mlrun/db/base.py +112 -68
mlrun/db/httpdb.py +445 -277
mlrun/db/nopdb.py +491 -0
mlrun/db/sqldb.py +112 -65
mlrun/errors.py +6 -1
mlrun/execution.py +44 -22
mlrun/feature_store/__init__.py +1 -1
mlrun/feature_store/api.py +143 -95
mlrun/feature_store/common.py +16 -20
mlrun/feature_store/feature_set.py +42 -12
mlrun/feature_store/feature_vector.py +32 -21
mlrun/feature_store/ingestion.py +9 -12
mlrun/feature_store/retrieval/__init__.py +3 -2
mlrun/feature_store/retrieval/base.py +388 -66
mlrun/feature_store/retrieval/dask_merger.py +63 -151
mlrun/feature_store/retrieval/job.py +30 -12
mlrun/feature_store/retrieval/local_merger.py +40 -133
mlrun/feature_store/retrieval/spark_merger.py +129 -127
mlrun/feature_store/retrieval/storey_merger.py +173 -0
mlrun/feature_store/steps.py +132 -15
mlrun/features.py +8 -3
mlrun/frameworks/__init__.py +1 -1
mlrun/frameworks/_common/__init__.py +1 -1
mlrun/frameworks/_common/artifacts_library.py +1 -1
mlrun/frameworks/_common/mlrun_interface.py +1 -1
mlrun/frameworks/_common/model_handler.py +1 -1
mlrun/frameworks/_common/plan.py +1 -1
mlrun/frameworks/_common/producer.py +1 -1
mlrun/frameworks/_common/utils.py +1 -1
mlrun/frameworks/_dl_common/__init__.py +1 -1
mlrun/frameworks/_dl_common/loggers/__init__.py +1 -1
mlrun/frameworks/_dl_common/loggers/logger.py +1 -1
mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +1 -1
mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +1 -1
mlrun/frameworks/_dl_common/model_handler.py +1 -1
mlrun/frameworks/_dl_common/utils.py +1 -1
mlrun/frameworks/_ml_common/__init__.py +1 -1
mlrun/frameworks/_ml_common/artifacts_library.py +1 -1
mlrun/frameworks/_ml_common/loggers/__init__.py +1 -1
mlrun/frameworks/_ml_common/loggers/logger.py +1 -1
mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +1 -1
mlrun/frameworks/_ml_common/model_handler.py +1 -1
mlrun/frameworks/_ml_common/pkl_model_server.py +13 -1
mlrun/frameworks/_ml_common/plan.py +1 -1
mlrun/frameworks/_ml_common/plans/__init__.py +1 -1
mlrun/frameworks/_ml_common/plans/calibration_curve_plan.py +1 -6
mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +1 -1
mlrun/frameworks/_ml_common/plans/dataset_plan.py +1 -1
mlrun/frameworks/_ml_common/plans/feature_importance_plan.py +1 -1
mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +1 -1
mlrun/frameworks/_ml_common/producer.py +1 -1
mlrun/frameworks/_ml_common/utils.py +1 -1
mlrun/frameworks/auto_mlrun/__init__.py +1 -1
mlrun/frameworks/auto_mlrun/auto_mlrun.py +1 -1
mlrun/frameworks/huggingface/__init__.py +1 -1
mlrun/frameworks/huggingface/model_server.py +1 -1
mlrun/frameworks/lgbm/__init__.py +1 -1
mlrun/frameworks/lgbm/callbacks/__init__.py +1 -1
mlrun/frameworks/lgbm/callbacks/callback.py +1 -1
mlrun/frameworks/lgbm/callbacks/logging_callback.py +1 -1
mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +1 -1
mlrun/frameworks/lgbm/mlrun_interfaces/__init__.py +1 -1
mlrun/frameworks/lgbm/mlrun_interfaces/booster_mlrun_interface.py +1 -1
mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +1 -1
mlrun/frameworks/lgbm/mlrun_interfaces/model_mlrun_interface.py +1 -1
mlrun/frameworks/lgbm/model_handler.py +1 -1
mlrun/frameworks/lgbm/model_server.py +1 -1
mlrun/frameworks/lgbm/utils.py +1 -1
mlrun/frameworks/onnx/__init__.py +1 -1
mlrun/frameworks/onnx/dataset.py +1 -1
mlrun/frameworks/onnx/mlrun_interface.py +1 -1
mlrun/frameworks/onnx/model_handler.py +1 -1
mlrun/frameworks/onnx/model_server.py +1 -1
mlrun/frameworks/parallel_coordinates.py +1 -1
mlrun/frameworks/pytorch/__init__.py +1 -1
mlrun/frameworks/pytorch/callbacks/__init__.py +1 -1
mlrun/frameworks/pytorch/callbacks/callback.py +1 -1
mlrun/frameworks/pytorch/callbacks/logging_callback.py +1 -1
mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +1 -1
mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +1 -1
mlrun/frameworks/pytorch/callbacks_handler.py +1 -1
mlrun/frameworks/pytorch/mlrun_interface.py +1 -1
mlrun/frameworks/pytorch/model_handler.py +1 -1
mlrun/frameworks/pytorch/model_server.py +1 -1
mlrun/frameworks/pytorch/utils.py +1 -1
mlrun/frameworks/sklearn/__init__.py +1 -1
mlrun/frameworks/sklearn/estimator.py +1 -1
mlrun/frameworks/sklearn/metric.py +1 -1
mlrun/frameworks/sklearn/metrics_library.py +1 -1
mlrun/frameworks/sklearn/mlrun_interface.py +1 -1
mlrun/frameworks/sklearn/model_handler.py +1 -1
mlrun/frameworks/sklearn/utils.py +1 -1
mlrun/frameworks/tf_keras/__init__.py +1 -1
mlrun/frameworks/tf_keras/callbacks/__init__.py +1 -1
mlrun/frameworks/tf_keras/callbacks/logging_callback.py +1 -1
mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +1 -1
mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +1 -1
mlrun/frameworks/tf_keras/mlrun_interface.py +1 -1
mlrun/frameworks/tf_keras/model_handler.py +1 -1
mlrun/frameworks/tf_keras/model_server.py +1 -1
mlrun/frameworks/tf_keras/utils.py +1 -1
mlrun/frameworks/xgboost/__init__.py +1 -1
mlrun/frameworks/xgboost/mlrun_interface.py +1 -1
mlrun/frameworks/xgboost/model_handler.py +1 -1
mlrun/frameworks/xgboost/utils.py +1 -1
mlrun/k8s_utils.py +14 -765
mlrun/kfpops.py +14 -17
mlrun/launcher/__init__.py +13 -0
mlrun/launcher/base.py +406 -0
mlrun/launcher/client.py +159 -0
mlrun/launcher/factory.py +50 -0
mlrun/launcher/local.py +276 -0
mlrun/launcher/remote.py +178 -0
mlrun/lists.py +10 -2
mlrun/mlutils/__init__.py +1 -1
mlrun/mlutils/data.py +1 -1
mlrun/mlutils/models.py +1 -1
mlrun/mlutils/plots.py +1 -1
mlrun/model.py +252 -14
mlrun/model_monitoring/__init__.py +41 -0
mlrun/model_monitoring/features_drift_table.py +1 -1
mlrun/model_monitoring/helpers.py +123 -38
mlrun/model_monitoring/model_endpoint.py +144 -0
mlrun/model_monitoring/model_monitoring_batch.py +310 -259
mlrun/model_monitoring/stores/__init__.py +106 -0
mlrun/model_monitoring/stores/kv_model_endpoint_store.py +448 -0
mlrun/model_monitoring/stores/model_endpoint_store.py +147 -0
mlrun/model_monitoring/stores/models/__init__.py +23 -0
mlrun/model_monitoring/stores/models/base.py +18 -0
mlrun/model_monitoring/stores/models/mysql.py +100 -0
mlrun/model_monitoring/stores/models/sqlite.py +98 -0
mlrun/model_monitoring/stores/sql_model_endpoint_store.py +370 -0
mlrun/model_monitoring/stream_processing_fs.py +239 -271
mlrun/package/__init__.py +163 -0
mlrun/package/context_handler.py +325 -0
mlrun/package/errors.py +47 -0
mlrun/package/packager.py +298 -0
mlrun/{runtimes/package → package/packagers}/__init__.py +3 -1
mlrun/package/packagers/default_packager.py +422 -0
mlrun/package/packagers/numpy_packagers.py +612 -0
mlrun/package/packagers/pandas_packagers.py +968 -0
mlrun/package/packagers/python_standard_library_packagers.py +616 -0
mlrun/package/packagers_manager.py +786 -0
mlrun/package/utils/__init__.py +53 -0
mlrun/package/utils/_archiver.py +226 -0
mlrun/package/utils/_formatter.py +211 -0
mlrun/package/utils/_pickler.py +234 -0
mlrun/package/utils/_supported_format.py +71 -0
mlrun/package/utils/log_hint_utils.py +93 -0
mlrun/package/utils/type_hint_utils.py +298 -0
mlrun/platforms/__init__.py +1 -1
mlrun/platforms/iguazio.py +34 -2
mlrun/platforms/other.py +1 -1
mlrun/projects/__init__.py +1 -1
mlrun/projects/operations.py +14 -9
mlrun/projects/pipelines.py +31 -13
mlrun/projects/project.py +762 -238
mlrun/render.py +49 -19
mlrun/run.py +57 -326
mlrun/runtimes/__init__.py +3 -9
mlrun/runtimes/base.py +247 -784
mlrun/runtimes/constants.py +1 -1
mlrun/runtimes/daskjob.py +45 -41
mlrun/runtimes/funcdoc.py +43 -7
mlrun/runtimes/function.py +66 -656
mlrun/runtimes/function_reference.py +1 -1
mlrun/runtimes/generators.py +1 -1
mlrun/runtimes/kubejob.py +99 -116
mlrun/runtimes/local.py +59 -66
mlrun/runtimes/mpijob/__init__.py +1 -1
mlrun/runtimes/mpijob/abstract.py +13 -15
mlrun/runtimes/mpijob/v1.py +3 -1
mlrun/runtimes/mpijob/v1alpha1.py +1 -1
mlrun/runtimes/nuclio.py +1 -1
mlrun/runtimes/pod.py +51 -26
mlrun/runtimes/remotesparkjob.py +3 -1
mlrun/runtimes/serving.py +12 -4
mlrun/runtimes/sparkjob/__init__.py +1 -2
mlrun/runtimes/sparkjob/abstract.py +44 -31
mlrun/runtimes/sparkjob/spark3job.py +11 -9
mlrun/runtimes/utils.py +61 -42
mlrun/secrets.py +16 -18
mlrun/serving/__init__.py +3 -2
mlrun/serving/merger.py +1 -1
mlrun/serving/remote.py +1 -1
mlrun/serving/routers.py +39 -42
mlrun/serving/server.py +23 -13
mlrun/serving/serving_wrapper.py +1 -1
mlrun/serving/states.py +172 -39
mlrun/serving/utils.py +1 -1
mlrun/serving/v1_serving.py +1 -1
mlrun/serving/v2_serving.py +29 -21
mlrun/utils/__init__.py +1 -2
mlrun/utils/async_http.py +8 -1
mlrun/utils/azure_vault.py +1 -1
mlrun/utils/clones.py +2 -2
mlrun/utils/condition_evaluator.py +65 -0
mlrun/utils/db.py +52 -0
mlrun/utils/helpers.py +188 -13
mlrun/utils/http.py +89 -54
mlrun/utils/logger.py +48 -8
mlrun/utils/model_monitoring.py +132 -100
mlrun/utils/notifications/__init__.py +1 -1
mlrun/utils/notifications/notification/__init__.py +8 -6
mlrun/utils/notifications/notification/base.py +20 -14
mlrun/utils/notifications/notification/console.py +7 -4
mlrun/utils/notifications/notification/git.py +36 -19
mlrun/utils/notifications/notification/ipython.py +10 -8
mlrun/utils/notifications/notification/slack.py +18 -13
mlrun/utils/notifications/notification_pusher.py +377 -56
mlrun/utils/regex.py +6 -1
mlrun/utils/singleton.py +1 -1
mlrun/utils/v3io_clients.py +1 -1
mlrun/utils/vault.py +270 -269
mlrun/utils/version/__init__.py +1 -1
mlrun/utils/version/version.json +2 -2
mlrun/utils/version/version.py +1 -1
{mlrun-1.3.3.dist-info → mlrun-1.4.0.dist-info}/METADATA +16 -10
mlrun-1.4.0.dist-info/RECORD +434 -0
mlrun/api/api/endpoints/marketplace.py +0 -257
mlrun/api/crud/marketplace.py +0 -221
mlrun/api/crud/model_monitoring/model_endpoint_store.py +0 -847
mlrun/api/db/filedb/db.py +0 -518
mlrun/api/schemas/marketplace.py +0 -128
mlrun/api/schemas/model_endpoints.py +0 -185
mlrun/db/filedb.py +0 -891
mlrun/feature_store/retrieval/online.py +0 -92
mlrun/model_monitoring/constants.py +0 -67
mlrun/runtimes/package/context_handler.py +0 -711
mlrun/runtimes/sparkjob/spark2job.py +0 -59
mlrun-1.3.3.dist-info/RECORD +0 -381
{mlrun-1.3.3.dist-info → mlrun-1.4.0.dist-info}/LICENSE +0 -0
{mlrun-1.3.3.dist-info → mlrun-1.4.0.dist-info}/WHEEL +0 -0
{mlrun-1.3.3.dist-info → mlrun-1.4.0.dist-info}/entry_points.txt +0 -0
{mlrun-1.3.3.dist-info → mlrun-1.4.0.dist-info}/top_level.txt +0 -0

mlrun/datastore/sources.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright 2018 Iguazio
+# Copyright 2023 Iguazio
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -32,7 +32,12 @@ from ..config import config
 from ..model import DataSource
 from ..platforms.iguazio import parse_path
 from ..utils import get_class
-from .utils import store_path_to_spark
+from .utils import (
+    _generate_sql_query_with_time_filter,
+    filter_df_start_end_time,
+    select_columns_from_df,
+    store_path_to_spark,
+)
 def get_source_from_dict(source):
@@ -62,38 +67,59 @@ class BaseSourceDriver(DataSource):
     def to_step(self, key_field=None, time_field=None, context=None):
         import storey
+        if not self.support_storey:
+            raise mlrun.errors.MLRunRuntimeError(
+                f"{type(self).__name__} does not support storey engine"
+            )
         return storey.SyncEmitSource(context=context)
     def get_table_object(self):
         """get storey Table object"""
         return None
-    def to_dataframe(self):
-        return mlrun.store_manager.object(url=self.path).as_df()
-    def filter_df_start_end_time(self, df, time_field):
-        # give priority to source time_field over the feature set's timestamp_key
-        if self.time_field:
-            time_field = self.time_field
-        if self.start_time or self.end_time:
-            self.start_time = (
-                datetime.min if self.start_time is None else self.start_time
-            )
-            self.end_time = datetime.max if self.end_time is None else self.end_time
-            df = df.filter(
-                (df[time_field] > self.start_time) & (df[time_field] <= self.end_time)
-            )
-        return df
+    def to_dataframe(
+        self,
+        columns=None,
+        df_module=None,
+        entities=None,
+        start_time=None,
+        end_time=None,
+        time_field=None,
+    ):
+        """return the source data as dataframe"""
+        return mlrun.store_manager.object(url=self.path).as_df(
+            columns=columns,
+            df_module=df_module,
+            start_time=start_time or self.start_time,
+            end_time=end_time or self.end_time,
+            time_column=time_field or self.time_field,
+        )
-    def to_spark_df(self, session, named_view=False, time_field=None):
+    def to_spark_df(self, session, named_view=False, time_field=None, columns=None):
         if self.support_spark:
             df = session.read.load(**self.get_spark_options())
             if named_view:
                 df.createOrReplaceTempView(self.name)
-            return df
+            return self._filter_spark_df(df, time_field, columns)
         raise NotImplementedError()
+    def _filter_spark_df(self, df, time_field=None, columns=None):
+        if not (columns or time_field):
+            return df
+        from pyspark.sql.functions import col
+        if time_field:
+            if self.start_time:
+                df = df.filter(col(time_field) > self.start_time)
+            if self.end_time:
+                df = df.filter(col(time_field) <= self.end_time)
+        if columns:
+            df = df.select([col(name) for name in columns])
+        return df
     def get_spark_options(self):
         # options used in spark.read.load(**options)
         raise NotImplementedError()
@@ -166,7 +192,6 @@ class CSVSource(BaseSourceDriver):
         return storey.CSVSource(
             paths=self.path,
-            header=True,
             build_dict=True,
             key_field=self.key_field or key_field,
             storage_options=self._get_store().get_storage_options(),
@@ -182,7 +207,7 @@ class CSVSource(BaseSourceDriver):
             "inferSchema": "true",
         }
-    def to_spark_df(self, session, named_view=False, time_field=None):
+    def to_spark_df(self, session, named_view=False, time_field=None, columns=None):
         import pyspark.sql.functions as funcs
         df = session.read.load(**self.get_spark_options())
@@ -196,15 +221,28 @@ class CSVSource(BaseSourceDriver):
                 df = df.withColumn(col_name, funcs.col(col_name).cast("timestamp"))
         if named_view:
             df.createOrReplaceTempView(self.name)
-        return df
+        return self._filter_spark_df(df, time_field, columns)
-    def to_dataframe(self):
-        kwargs = self.attributes.get("reader_args", {})
-        chunksize = self.attributes.get("chunksize")
-        if chunksize:
-            kwargs["chunksize"] = chunksize
+    def to_dataframe(
+        self,
+        columns=None,
+        df_module=None,
+        entities=None,
+        start_time=None,
+        end_time=None,
+        time_field=None,
+    ):
+        reader_args = self.attributes.get("reader_args", {})
         return mlrun.store_manager.object(url=self.path).as_df(
-            parse_dates=self._parse_dates, **kwargs
+            columns=columns,
+            df_module=df_module,
+            format="csv",
+            start_time=start_time or self.start_time,
+            end_time=end_time or self.end_time,
+            time_column=time_field or self.time_field,
+            parse_dates=self._parse_dates,
+            chunksize=self.attributes.get("chunksize"),
+            **reader_args,
         )
     def is_iterator(self):
@@ -246,7 +284,6 @@ class ParquetSource(BaseSourceDriver):
         start_time: Optional[Union[datetime, str]] = None,
         end_time: Optional[Union[datetime, str]] = None,
     ):
         super().__init__(
             name,
             path,
@@ -312,10 +349,24 @@ class ParquetSource(BaseSourceDriver):
             "format": "parquet",
         }
-    def to_dataframe(self):
-        kwargs = self.attributes.get("reader_args", {})
+    def to_dataframe(
+        self,
+        columns=None,
+        df_module=None,
+        entities=None,
+        start_time=None,
+        end_time=None,
+        time_field=None,
+    ):
+        reader_args = self.attributes.get("reader_args", {})
         return mlrun.store_manager.object(url=self.path).as_df(
-            format="parquet", **kwargs
+            columns=columns,
+            df_module=df_module,
+            start_time=start_time or self.start_time,
+            end_time=end_time or self.end_time,
+            time_column=time_field or self.time_field,
+            format="parquet",
+            **reader_args,
         )
@@ -323,8 +374,13 @@ class BigQuerySource(BaseSourceDriver):
     """
     Reads Google BigQuery query results as input source for a flow.
+    For authentication, set the GCP_CREDENTIALS project secret to the credentials json string.
     example::
+         # set the credentials
+         project.set_secrets({"GCP_CREDENTIALS": gcp_credentials_json})
          # use sql query
          query_string = "SELECT * FROM `the-psf.pypi.downloads20210328` LIMIT 5000"
          source = BigQuerySource("bq1", query=query_string,
@@ -371,11 +427,21 @@ class BigQuerySource(BaseSourceDriver):
         end_time=None,
         gcp_project: str = None,
         spark_options: dict = None,
+        **kwargs,
     ):
         if query and table:
             raise mlrun.errors.MLRunInvalidArgumentError(
                 "cannot specify both table and query args"
             )
+        # Otherwise, the client library does not fully respect the limit
+        if (
+            max_results_for_table
+            and chunksize
+            and max_results_for_table % chunksize != 0
+        ):
+            raise mlrun.errors.MLRunInvalidArgumentError(
+                "max_results_for_table must be a multiple of chunksize"
+            )
         attrs = {
             "query": query,
             "table": table,
@@ -394,8 +460,8 @@ class BigQuerySource(BaseSourceDriver):
             schedule=schedule,
             start_time=start_time,
             end_time=end_time,
+            **kwargs,
         )
-        self._rows_iterator = None
     def _get_credentials_string(self):
         gcp_project = self.attributes.get("gcp_project", None)
@@ -417,7 +483,15 @@ class BigQuerySource(BaseSourceDriver):
             return credentials, gcp_project or gcp_cred_dict["project_id"]
         return None, gcp_project
-    def to_dataframe(self):
+    def to_dataframe(
+        self,
+        columns=None,
+        df_module=None,
+        entities=None,
+        start_time=None,
+        end_time=None,
+        time_field=None,
+    ):
         from google.cloud import bigquery
         from google.cloud.bigquery_storage_v1 import BigQueryReadClient
@@ -438,39 +512,43 @@ class BigQuerySource(BaseSourceDriver):
         if query:
             query_job = bqclient.query(query)
-            self._rows_iterator = query_job.result(page_size=chunksize)
-            dtypes = schema_to_dtypes(self._rows_iterator.schema)
-            if chunksize:
-                # passing bqstorage_client greatly improves performance
-                return self._rows_iterator.to_dataframe_iterable(
-                    bqstorage_client=BigQueryReadClient(), dtypes=dtypes
-                )
-            else:
-                return self._rows_iterator.to_dataframe(dtypes=dtypes)
+            rows_iterator = query_job.result(page_size=chunksize)
         elif table:
             table = self.attributes.get("table")
             max_results = self.attributes.get("max_results")
-            rows = bqclient.list_rows(
+            rows_iterator = bqclient.list_rows(
                 table, page_size=chunksize, max_results=max_results
             )
-            dtypes = schema_to_dtypes(rows.schema)
-            if chunksize:
-                # passing bqstorage_client greatly improves performance
-                return rows.to_dataframe_iterable(
-                    bqstorage_client=BigQueryReadClient(), dtypes=dtypes
-                )
-            else:
-                return rows.to_dataframe(dtypes=dtypes)
         else:
             raise mlrun.errors.MLRunInvalidArgumentError(
                 "table or query args must be specified"
             )
+        dtypes = schema_to_dtypes(rows_iterator.schema)
+        if chunksize:
+            # passing bqstorage_client greatly improves performance
+            df = rows_iterator.to_dataframe_iterable(
+                bqstorage_client=BigQueryReadClient(), dtypes=dtypes
+            )
+        else:
+            df = rows_iterator.to_dataframe(dtypes=dtypes)
+        # TODO : filter as part of the query
+        return select_columns_from_df(
+            filter_df_start_end_time(
+                df,
+                time_column=time_field or self.time_field,
+                start_time=start_time or self.start_time,
+                end_time=end_time or self.end_time,
+            ),
+            columns=columns,
+        )
     def is_iterator(self):
         return bool(self.attributes.get("chunksize"))
-    def to_spark_df(self, session, named_view=False, time_field=None):
+    def to_spark_df(self, session, named_view=False, time_field=None, columns=None):
         options = copy(self.attributes.get("spark_options", {}))
         credentials, gcp_project = self._get_credentials_string()
         if credentials:
@@ -500,7 +578,7 @@ class BigQuerySource(BaseSourceDriver):
         df = session.read.format("bigquery").load(**options)
         if named_view:
             df.createOrReplaceTempView(self.name)
-        return df
+        return self._filter_spark_df(df, time_field, columns)
 class SnowflakeSource(BaseSourceDriver):
@@ -555,6 +633,7 @@ class SnowflakeSource(BaseSourceDriver):
         database: str = None,
         schema: str = None,
         warehouse: str = None,
+        **kwargs,
     ):
         attrs = {
             "query": query,
@@ -573,6 +652,7 @@ class SnowflakeSource(BaseSourceDriver):
             schedule=schedule,
             start_time=start_time,
             end_time=end_time,
+            **kwargs,
         )
     def _get_password(self):
@@ -664,7 +744,7 @@ class DataFrameSource:
             context=self.context or context,
         )
-    def to_dataframe(self):
+    def to_dataframe(self, **kwargs):
         return self._df
     def is_iterator(self):
@@ -839,7 +919,15 @@ class KafkaSource(OnlineSource):
             attributes["sasl"] = sasl
         super().__init__(attributes=attributes, **kwargs)
-    def to_dataframe(self):
+    def to_dataframe(
+        self,
+        columns=None,
+        df_module=None,
+        entities=None,
+        start_time=None,
+        end_time=None,
+        time_field=None,
+    ):
         raise mlrun.MLRunInvalidArgumentError(
             "KafkaSource does not support batch processing"
         )
@@ -880,13 +968,15 @@ class SQLSource(BaseSourceDriver):
         table_name: str = None,
         spark_options: dict = None,
         time_fields: List[str] = None,
+        parse_dates: List[str] = None,
+        **kwargs,
     ):
         """
         Reads SqlDB as input source for a flow.
         example::
-            db_path = "mysql+pymysql://<username>:<password>@<host>:<port>/<db_name>"
-            source = SqlDBSource(
-                collection_name='source_name', db_path=self.db, key_field='key'
+            db_url = "mysql+pymysql://<username>:<password>@<host>:<port>/<db_name>"
+            source = SQLSource(
+                table_name='source_name', db_url=db_url, key_field='key'
             )
         :param name:            source name
         :param chunksize:       number of rows per chunk (default large single chunk)
@@ -903,19 +993,32 @@ class SQLSource(BaseSourceDriver):
                                 from the current database
         :param spark_options:   additional spark read options
         :param time_fields :    all the field to be parsed as timestamp.
+        :param parse_dates :    all the field to be parsed as timestamp.
         """
+        if time_fields:
+            warnings.warn(
+                "'time_fields' is deprecated, use 'parse_dates' instead. "
+                "This will be removed in 1.6.0",
+                # TODO: Remove this in 1.6.0
+                FutureWarning,
+            )
+            parse_dates = time_fields
         db_url = db_url or mlrun.mlconf.sql.url
         if db_url is None:
             raise mlrun.errors.MLRunInvalidArgumentError(
                 "cannot specify without db_path arg or secret MLRUN_SQL__URL"
             )
+        if time_field:
+            if parse_dates:
+                time_fields.append(time_field)
+            else:
+                parse_dates = [time_field]
         attrs = {
             "chunksize": chunksize,
             "spark_options": spark_options,
             "table_name": table_name,
             "db_path": db_url,
-            "time_fields": time_fields,
+            "parse_dates": parse_dates,
         }
         attrs = {key: value for key, value in attrs.items() if value is not None}
         super().__init__(
@@ -926,24 +1029,43 @@ class SQLSource(BaseSourceDriver):
             schedule=schedule,
             start_time=start_time,
             end_time=end_time,
+            **kwargs,
         )
-    def to_dataframe(self):
-        import sqlalchemy as db
+    def to_dataframe(
+        self,
+        columns=None,
+        df_module=None,
+        entities=None,
+        start_time=None,
+        end_time=None,
+        time_field=None,
+    ):
+        import sqlalchemy as sqlalchemy
-        query = self.attributes.get("query", None)
         db_path = self.attributes.get("db_path")
         table_name = self.attributes.get("table_name")
-        if not query:
-            query = f"SELECT * FROM {table_name}"
+        parse_dates = self.attributes.get("parse_dates")
+        time_field = time_field or self.time_field
+        start_time = start_time or self.start_time
+        end_time = end_time or self.end_time
         if table_name and db_path:
-            engine = db.create_engine(db_path)
+            engine = sqlalchemy.create_engine(db_path)
+            query, parse_dates = _generate_sql_query_with_time_filter(
+                table_name=table_name,
+                engine=engine,
+                time_column=time_field,
+                parse_dates=parse_dates,
+                start_time=start_time,
+                end_time=end_time,
+            )
             with engine.connect() as con:
                 return pd.read_sql(
                     query,
                     con=con,
                     chunksize=self.attributes.get("chunksize"),
-                    parse_dates=self.attributes.get("time_fields"),
+                    parse_dates=parse_dates,
+                    columns=columns,
                 )
         else:
             raise mlrun.errors.MLRunInvalidArgumentError(

mlrun/datastore/spark_udf.py ADDED Viewed

@@ -0,0 +1,44 @@
+# Copyright 2023 Iguazio
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import hashlib
+from pyspark.sql.functions import udf
+from pyspark.sql.types import StringType
+def _hash_list(*list_to_hash):
+    list_to_hash = [str(element) for element in list_to_hash]
+    str_concatted = "".join(list_to_hash)
+    sha1 = hashlib.sha1()
+    sha1.update(str_concatted.encode("utf8"))
+    return sha1.hexdigest()
+def _redis_stringify_key(*args):
+    if len(args) == 1:
+        key_list = args[0]
+    else:
+        key_list = list(args)
+    suffix = "}:static"
+    if isinstance(key_list, list):
+        if len(key_list) >= 3:
+            return str(key_list[0]) + "." + _hash_list(*key_list[1:]) + suffix
+        if len(key_list) == 2:
+            return str(key_list[0]) + "." + str(key_list[1]) + suffix
+        return str(key_list[0]) + suffix
+    return str(key_list) + suffix
+hash_and_concat_v3io_udf = udf(_hash_list, StringType())
+hash_and_concat_redis_udf = udf(_redis_stringify_key, StringType())

mlrun/datastore/store_resources.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright 2018 Iguazio
+# Copyright 2023 Iguazio
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -81,7 +81,7 @@ class ResourceCache:
             endpoint, uri = parse_path(uri)
             self._tabels[uri] = Table(
                 uri,
-                V3ioDriver(webapi=endpoint),
+                V3ioDriver(webapi=endpoint or mlrun.mlconf.v3io_api),
                 flush_interval_secs=mlrun.mlconf.feature_store.flush_interval,
             )
             return self._tabels[uri]
@@ -101,8 +101,8 @@ class ResourceCache:
         if is_store_uri(uri):
             resource = get_store_resource(uri)
             if resource.kind in [
-                mlrun.api.schemas.ObjectKind.feature_set.value,
-                mlrun.api.schemas.ObjectKind.feature_vector.value,
+                mlrun.common.schemas.ObjectKind.feature_set.value,
+                mlrun.common.schemas.ObjectKind.feature_vector.value,
             ]:
                 target = get_online_target(resource)
                 if not target:

mlrun 1.3.3__py3-none-any.whl → 1.4.0__py3-none-any.whl

Potentially problematic release.

mlrun 1.3.3py3-none-any.whl → 1.4.0py3-none-any.whl