mlrun 1.3.3rc1__py3-none-any.whl → 1.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +3 -3
- mlrun/__main__.py +79 -37
- mlrun/api/__init__.py +1 -1
- mlrun/api/api/__init__.py +1 -1
- mlrun/api/api/api.py +4 -4
- mlrun/api/api/deps.py +10 -21
- mlrun/api/api/endpoints/__init__.py +1 -1
- mlrun/api/api/endpoints/artifacts.py +64 -36
- mlrun/api/api/endpoints/auth.py +4 -4
- mlrun/api/api/endpoints/background_tasks.py +11 -11
- mlrun/api/api/endpoints/client_spec.py +5 -5
- mlrun/api/api/endpoints/clusterization_spec.py +6 -4
- mlrun/api/api/endpoints/feature_store.py +124 -115
- mlrun/api/api/endpoints/files.py +22 -14
- mlrun/api/api/endpoints/frontend_spec.py +28 -21
- mlrun/api/api/endpoints/functions.py +142 -87
- mlrun/api/api/endpoints/grafana_proxy.py +89 -442
- mlrun/api/api/endpoints/healthz.py +20 -7
- mlrun/api/api/endpoints/hub.py +320 -0
- mlrun/api/api/endpoints/internal/__init__.py +1 -1
- mlrun/api/api/endpoints/internal/config.py +1 -1
- mlrun/api/api/endpoints/internal/memory_reports.py +9 -9
- mlrun/api/api/endpoints/logs.py +11 -11
- mlrun/api/api/endpoints/model_endpoints.py +74 -70
- mlrun/api/api/endpoints/operations.py +13 -9
- mlrun/api/api/endpoints/pipelines.py +93 -88
- mlrun/api/api/endpoints/projects.py +35 -35
- mlrun/api/api/endpoints/runs.py +69 -27
- mlrun/api/api/endpoints/runtime_resources.py +28 -28
- mlrun/api/api/endpoints/schedules.py +98 -41
- mlrun/api/api/endpoints/secrets.py +37 -32
- mlrun/api/api/endpoints/submit.py +12 -12
- mlrun/api/api/endpoints/tags.py +20 -22
- mlrun/api/api/utils.py +251 -42
- mlrun/api/constants.py +1 -1
- mlrun/api/crud/__init__.py +18 -15
- mlrun/api/crud/artifacts.py +10 -10
- mlrun/api/crud/client_spec.py +4 -4
- mlrun/api/crud/clusterization_spec.py +3 -3
- mlrun/api/crud/feature_store.py +54 -46
- mlrun/api/crud/functions.py +3 -3
- mlrun/api/crud/hub.py +312 -0
- mlrun/api/crud/logs.py +11 -9
- mlrun/api/crud/model_monitoring/__init__.py +3 -3
- mlrun/api/crud/model_monitoring/grafana.py +435 -0
- mlrun/api/crud/model_monitoring/model_endpoints.py +352 -129
- mlrun/api/crud/notifications.py +149 -0
- mlrun/api/crud/pipelines.py +67 -52
- mlrun/api/crud/projects.py +51 -23
- mlrun/api/crud/runs.py +7 -5
- mlrun/api/crud/runtime_resources.py +13 -13
- mlrun/api/{db/filedb → crud/runtimes}/__init__.py +1 -1
- mlrun/api/crud/runtimes/nuclio/__init__.py +14 -0
- mlrun/api/crud/runtimes/nuclio/function.py +505 -0
- mlrun/api/crud/runtimes/nuclio/helpers.py +310 -0
- mlrun/api/crud/secrets.py +88 -46
- mlrun/api/crud/tags.py +5 -5
- mlrun/api/db/__init__.py +1 -1
- mlrun/api/db/base.py +102 -54
- mlrun/api/db/init_db.py +2 -3
- mlrun/api/db/session.py +4 -12
- mlrun/api/db/sqldb/__init__.py +1 -1
- mlrun/api/db/sqldb/db.py +439 -196
- mlrun/api/db/sqldb/helpers.py +1 -1
- mlrun/api/db/sqldb/models/__init__.py +3 -3
- mlrun/api/db/sqldb/models/models_mysql.py +82 -64
- mlrun/api/db/sqldb/models/models_sqlite.py +76 -64
- mlrun/api/db/sqldb/session.py +27 -20
- mlrun/api/initial_data.py +82 -24
- mlrun/api/launcher.py +196 -0
- mlrun/api/main.py +91 -22
- mlrun/api/middlewares.py +6 -5
- mlrun/api/migrations_mysql/env.py +1 -1
- mlrun/api/migrations_mysql/versions/28383af526f3_market_place_to_hub.py +40 -0
- mlrun/api/migrations_mysql/versions/32bae1b0e29c_increase_timestamp_fields_precision.py +1 -1
- mlrun/api/migrations_mysql/versions/4903aef6a91d_tag_foreign_key_and_cascades.py +1 -1
- mlrun/api/migrations_mysql/versions/5f1351c88a19_adding_background_tasks_table.py +1 -1
- mlrun/api/migrations_mysql/versions/88e656800d6a_add_requested_logs_column_and_index_to_.py +1 -1
- mlrun/api/migrations_mysql/versions/9d16de5f03a7_adding_data_versions_table.py +1 -1
- mlrun/api/migrations_mysql/versions/b86f5b53f3d7_adding_name_and_updated_to_runs_table.py +1 -1
- mlrun/api/migrations_mysql/versions/c4af40b0bf61_init.py +1 -1
- mlrun/api/migrations_mysql/versions/c905d15bd91d_notifications.py +72 -0
- mlrun/api/migrations_mysql/versions/ee041e8fdaa0_adding_next_run_time_column_to_schedule_.py +1 -1
- mlrun/api/migrations_sqlite/env.py +1 -1
- mlrun/api/migrations_sqlite/versions/11f8dd2dc9fe_init.py +1 -1
- mlrun/api/migrations_sqlite/versions/1c954f8cb32d_schedule_last_run_uri.py +1 -1
- mlrun/api/migrations_sqlite/versions/2b6d23c715aa_adding_feature_sets.py +1 -1
- mlrun/api/migrations_sqlite/versions/4acd9430b093_market_place_to_hub.py +77 -0
- mlrun/api/migrations_sqlite/versions/6401142f2d7c_adding_next_run_time_column_to_schedule_.py +1 -1
- mlrun/api/migrations_sqlite/versions/64d90a1a69bc_adding_background_tasks_table.py +1 -1
- mlrun/api/migrations_sqlite/versions/803438ecd005_add_requested_logs_column_to_runs.py +1 -1
- mlrun/api/migrations_sqlite/versions/863114f0c659_refactoring_feature_set.py +1 -1
- mlrun/api/migrations_sqlite/versions/959ae00528ad_notifications.py +63 -0
- mlrun/api/migrations_sqlite/versions/accf9fc83d38_adding_data_versions_table.py +1 -1
- mlrun/api/migrations_sqlite/versions/b68e8e897a28_schedule_labels.py +1 -1
- mlrun/api/migrations_sqlite/versions/bcd0c1f9720c_adding_project_labels.py +1 -1
- mlrun/api/migrations_sqlite/versions/cf21882f938e_schedule_id.py +1 -1
- mlrun/api/migrations_sqlite/versions/d781f58f607f_tag_object_name_string.py +1 -1
- mlrun/api/migrations_sqlite/versions/deac06871ace_adding_marketplace_sources_table.py +1 -1
- mlrun/api/migrations_sqlite/versions/e1dd5983c06b_schedule_concurrency_limit.py +1 -1
- mlrun/api/migrations_sqlite/versions/e5594ed3ab53_adding_name_and_updated_to_runs_table.py +1 -1
- mlrun/api/migrations_sqlite/versions/f4249b4ba6fa_adding_feature_vectors.py +1 -1
- mlrun/api/migrations_sqlite/versions/f7b5a1a03629_adding_feature_labels.py +1 -1
- mlrun/api/schemas/__init__.py +216 -138
- mlrun/api/utils/__init__.py +1 -1
- mlrun/api/utils/asyncio.py +1 -1
- mlrun/api/utils/auth/__init__.py +1 -1
- mlrun/api/utils/auth/providers/__init__.py +1 -1
- mlrun/api/utils/auth/providers/base.py +7 -7
- mlrun/api/utils/auth/providers/nop.py +6 -7
- mlrun/api/utils/auth/providers/opa.py +17 -17
- mlrun/api/utils/auth/verifier.py +36 -34
- mlrun/api/utils/background_tasks.py +24 -24
- mlrun/{builder.py → api/utils/builder.py} +216 -123
- mlrun/api/utils/clients/__init__.py +1 -1
- mlrun/api/utils/clients/chief.py +19 -4
- mlrun/api/utils/clients/iguazio.py +106 -60
- mlrun/api/utils/clients/log_collector.py +1 -1
- mlrun/api/utils/clients/nuclio.py +23 -23
- mlrun/api/utils/clients/protocols/grpc.py +2 -2
- mlrun/api/utils/db/__init__.py +1 -1
- mlrun/api/utils/db/alembic.py +1 -1
- mlrun/api/utils/db/backup.py +1 -1
- mlrun/api/utils/db/mysql.py +24 -25
- mlrun/api/utils/db/sql_collation.py +1 -1
- mlrun/api/utils/db/sqlite_migration.py +2 -2
- mlrun/api/utils/events/__init__.py +14 -0
- mlrun/api/utils/events/base.py +57 -0
- mlrun/api/utils/events/events_factory.py +41 -0
- mlrun/api/utils/events/iguazio.py +217 -0
- mlrun/api/utils/events/nop.py +55 -0
- mlrun/api/utils/helpers.py +16 -13
- mlrun/api/utils/memory_reports.py +1 -1
- mlrun/api/utils/periodic.py +6 -3
- mlrun/api/utils/projects/__init__.py +1 -1
- mlrun/api/utils/projects/follower.py +33 -33
- mlrun/api/utils/projects/leader.py +36 -34
- mlrun/api/utils/projects/member.py +27 -27
- mlrun/api/utils/projects/remotes/__init__.py +1 -1
- mlrun/api/utils/projects/remotes/follower.py +13 -13
- mlrun/api/utils/projects/remotes/leader.py +10 -10
- mlrun/api/utils/projects/remotes/nop_follower.py +27 -21
- mlrun/api/utils/projects/remotes/nop_leader.py +17 -16
- mlrun/api/utils/scheduler.py +140 -51
- mlrun/api/utils/singletons/__init__.py +1 -1
- mlrun/api/utils/singletons/db.py +9 -15
- mlrun/api/utils/singletons/k8s.py +677 -5
- mlrun/api/utils/singletons/logs_dir.py +1 -1
- mlrun/api/utils/singletons/project_member.py +1 -1
- mlrun/api/utils/singletons/scheduler.py +1 -1
- mlrun/artifacts/__init__.py +2 -2
- mlrun/artifacts/base.py +8 -2
- mlrun/artifacts/dataset.py +5 -3
- mlrun/artifacts/manager.py +7 -1
- mlrun/artifacts/model.py +15 -4
- mlrun/artifacts/plots.py +1 -1
- mlrun/common/__init__.py +1 -1
- mlrun/common/constants.py +15 -0
- mlrun/common/model_monitoring.py +209 -0
- mlrun/common/schemas/__init__.py +167 -0
- mlrun/{api → common}/schemas/artifact.py +13 -14
- mlrun/{api → common}/schemas/auth.py +10 -8
- mlrun/{api → common}/schemas/background_task.py +3 -3
- mlrun/{api → common}/schemas/client_spec.py +1 -1
- mlrun/{api → common}/schemas/clusterization_spec.py +3 -3
- mlrun/{api → common}/schemas/constants.py +21 -8
- mlrun/common/schemas/events.py +36 -0
- mlrun/{api → common}/schemas/feature_store.py +2 -1
- mlrun/{api → common}/schemas/frontend_spec.py +7 -6
- mlrun/{api → common}/schemas/function.py +5 -5
- mlrun/{api → common}/schemas/http.py +3 -3
- mlrun/common/schemas/hub.py +134 -0
- mlrun/{api → common}/schemas/k8s.py +3 -3
- mlrun/{api → common}/schemas/memory_reports.py +1 -1
- mlrun/common/schemas/model_endpoints.py +342 -0
- mlrun/common/schemas/notification.py +57 -0
- mlrun/{api → common}/schemas/object.py +6 -6
- mlrun/{api → common}/schemas/pipeline.py +3 -3
- mlrun/{api → common}/schemas/project.py +6 -5
- mlrun/common/schemas/regex.py +24 -0
- mlrun/common/schemas/runs.py +30 -0
- mlrun/{api → common}/schemas/runtime_resource.py +3 -3
- mlrun/{api → common}/schemas/schedule.py +19 -7
- mlrun/{api → common}/schemas/secret.py +3 -3
- mlrun/{api → common}/schemas/tag.py +2 -2
- mlrun/common/types.py +25 -0
- mlrun/config.py +152 -20
- mlrun/data_types/__init__.py +7 -2
- mlrun/data_types/data_types.py +4 -2
- mlrun/data_types/infer.py +1 -1
- mlrun/data_types/spark.py +10 -3
- mlrun/datastore/__init__.py +10 -3
- mlrun/datastore/azure_blob.py +1 -1
- mlrun/datastore/base.py +185 -53
- mlrun/datastore/datastore.py +1 -1
- mlrun/datastore/filestore.py +1 -1
- mlrun/datastore/google_cloud_storage.py +1 -1
- mlrun/datastore/inmem.py +4 -1
- mlrun/datastore/redis.py +1 -1
- mlrun/datastore/s3.py +1 -1
- mlrun/datastore/sources.py +192 -70
- mlrun/datastore/spark_udf.py +44 -0
- mlrun/datastore/store_resources.py +4 -4
- mlrun/datastore/targets.py +115 -45
- mlrun/datastore/utils.py +127 -5
- mlrun/datastore/v3io.py +1 -1
- mlrun/datastore/wasbfs/__init__.py +1 -1
- mlrun/datastore/wasbfs/fs.py +1 -1
- mlrun/db/__init__.py +7 -5
- mlrun/db/base.py +112 -68
- mlrun/db/httpdb.py +445 -277
- mlrun/db/nopdb.py +491 -0
- mlrun/db/sqldb.py +112 -65
- mlrun/errors.py +6 -1
- mlrun/execution.py +44 -22
- mlrun/feature_store/__init__.py +1 -1
- mlrun/feature_store/api.py +143 -95
- mlrun/feature_store/common.py +16 -20
- mlrun/feature_store/feature_set.py +42 -12
- mlrun/feature_store/feature_vector.py +32 -21
- mlrun/feature_store/ingestion.py +9 -12
- mlrun/feature_store/retrieval/__init__.py +3 -2
- mlrun/feature_store/retrieval/base.py +388 -66
- mlrun/feature_store/retrieval/dask_merger.py +63 -151
- mlrun/feature_store/retrieval/job.py +30 -12
- mlrun/feature_store/retrieval/local_merger.py +40 -133
- mlrun/feature_store/retrieval/spark_merger.py +129 -127
- mlrun/feature_store/retrieval/storey_merger.py +173 -0
- mlrun/feature_store/steps.py +132 -15
- mlrun/features.py +8 -3
- mlrun/frameworks/__init__.py +1 -1
- mlrun/frameworks/_common/__init__.py +1 -1
- mlrun/frameworks/_common/artifacts_library.py +1 -1
- mlrun/frameworks/_common/mlrun_interface.py +1 -1
- mlrun/frameworks/_common/model_handler.py +1 -1
- mlrun/frameworks/_common/plan.py +1 -1
- mlrun/frameworks/_common/producer.py +1 -1
- mlrun/frameworks/_common/utils.py +1 -1
- mlrun/frameworks/_dl_common/__init__.py +1 -1
- mlrun/frameworks/_dl_common/loggers/__init__.py +1 -1
- mlrun/frameworks/_dl_common/loggers/logger.py +1 -1
- mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +1 -1
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +1 -1
- mlrun/frameworks/_dl_common/model_handler.py +1 -1
- mlrun/frameworks/_dl_common/utils.py +1 -1
- mlrun/frameworks/_ml_common/__init__.py +1 -1
- mlrun/frameworks/_ml_common/artifacts_library.py +1 -1
- mlrun/frameworks/_ml_common/loggers/__init__.py +1 -1
- mlrun/frameworks/_ml_common/loggers/logger.py +1 -1
- mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +1 -1
- mlrun/frameworks/_ml_common/model_handler.py +1 -1
- mlrun/frameworks/_ml_common/pkl_model_server.py +13 -1
- mlrun/frameworks/_ml_common/plan.py +1 -1
- mlrun/frameworks/_ml_common/plans/__init__.py +1 -1
- mlrun/frameworks/_ml_common/plans/calibration_curve_plan.py +1 -6
- mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +1 -1
- mlrun/frameworks/_ml_common/plans/dataset_plan.py +1 -1
- mlrun/frameworks/_ml_common/plans/feature_importance_plan.py +1 -1
- mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +1 -1
- mlrun/frameworks/_ml_common/producer.py +1 -1
- mlrun/frameworks/_ml_common/utils.py +1 -1
- mlrun/frameworks/auto_mlrun/__init__.py +1 -1
- mlrun/frameworks/auto_mlrun/auto_mlrun.py +1 -1
- mlrun/frameworks/huggingface/__init__.py +1 -1
- mlrun/frameworks/huggingface/model_server.py +1 -1
- mlrun/frameworks/lgbm/__init__.py +1 -1
- mlrun/frameworks/lgbm/callbacks/__init__.py +1 -1
- mlrun/frameworks/lgbm/callbacks/callback.py +1 -1
- mlrun/frameworks/lgbm/callbacks/logging_callback.py +1 -1
- mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +1 -1
- mlrun/frameworks/lgbm/mlrun_interfaces/__init__.py +1 -1
- mlrun/frameworks/lgbm/mlrun_interfaces/booster_mlrun_interface.py +1 -1
- mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +1 -1
- mlrun/frameworks/lgbm/mlrun_interfaces/model_mlrun_interface.py +1 -1
- mlrun/frameworks/lgbm/model_handler.py +1 -1
- mlrun/frameworks/lgbm/model_server.py +1 -1
- mlrun/frameworks/lgbm/utils.py +1 -1
- mlrun/frameworks/onnx/__init__.py +1 -1
- mlrun/frameworks/onnx/dataset.py +1 -1
- mlrun/frameworks/onnx/mlrun_interface.py +1 -1
- mlrun/frameworks/onnx/model_handler.py +1 -1
- mlrun/frameworks/onnx/model_server.py +1 -1
- mlrun/frameworks/parallel_coordinates.py +1 -1
- mlrun/frameworks/pytorch/__init__.py +1 -1
- mlrun/frameworks/pytorch/callbacks/__init__.py +1 -1
- mlrun/frameworks/pytorch/callbacks/callback.py +1 -1
- mlrun/frameworks/pytorch/callbacks/logging_callback.py +1 -1
- mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +1 -1
- mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +1 -1
- mlrun/frameworks/pytorch/callbacks_handler.py +1 -1
- mlrun/frameworks/pytorch/mlrun_interface.py +1 -1
- mlrun/frameworks/pytorch/model_handler.py +1 -1
- mlrun/frameworks/pytorch/model_server.py +1 -1
- mlrun/frameworks/pytorch/utils.py +1 -1
- mlrun/frameworks/sklearn/__init__.py +1 -1
- mlrun/frameworks/sklearn/estimator.py +1 -1
- mlrun/frameworks/sklearn/metric.py +1 -1
- mlrun/frameworks/sklearn/metrics_library.py +1 -1
- mlrun/frameworks/sklearn/mlrun_interface.py +1 -1
- mlrun/frameworks/sklearn/model_handler.py +1 -1
- mlrun/frameworks/sklearn/utils.py +1 -1
- mlrun/frameworks/tf_keras/__init__.py +1 -1
- mlrun/frameworks/tf_keras/callbacks/__init__.py +1 -1
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +1 -1
- mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +1 -1
- mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +1 -1
- mlrun/frameworks/tf_keras/mlrun_interface.py +1 -1
- mlrun/frameworks/tf_keras/model_handler.py +1 -1
- mlrun/frameworks/tf_keras/model_server.py +1 -1
- mlrun/frameworks/tf_keras/utils.py +1 -1
- mlrun/frameworks/xgboost/__init__.py +1 -1
- mlrun/frameworks/xgboost/mlrun_interface.py +1 -1
- mlrun/frameworks/xgboost/model_handler.py +1 -1
- mlrun/frameworks/xgboost/utils.py +1 -1
- mlrun/k8s_utils.py +14 -765
- mlrun/kfpops.py +14 -17
- mlrun/launcher/__init__.py +13 -0
- mlrun/launcher/base.py +406 -0
- mlrun/launcher/client.py +159 -0
- mlrun/launcher/factory.py +50 -0
- mlrun/launcher/local.py +276 -0
- mlrun/launcher/remote.py +178 -0
- mlrun/lists.py +10 -2
- mlrun/mlutils/__init__.py +1 -1
- mlrun/mlutils/data.py +1 -1
- mlrun/mlutils/models.py +1 -1
- mlrun/mlutils/plots.py +1 -1
- mlrun/model.py +252 -14
- mlrun/model_monitoring/__init__.py +41 -0
- mlrun/model_monitoring/features_drift_table.py +1 -1
- mlrun/model_monitoring/helpers.py +123 -38
- mlrun/model_monitoring/model_endpoint.py +144 -0
- mlrun/model_monitoring/model_monitoring_batch.py +310 -259
- mlrun/model_monitoring/stores/__init__.py +106 -0
- mlrun/model_monitoring/stores/kv_model_endpoint_store.py +448 -0
- mlrun/model_monitoring/stores/model_endpoint_store.py +147 -0
- mlrun/model_monitoring/stores/models/__init__.py +23 -0
- mlrun/model_monitoring/stores/models/base.py +18 -0
- mlrun/model_monitoring/stores/models/mysql.py +100 -0
- mlrun/model_monitoring/stores/models/sqlite.py +98 -0
- mlrun/model_monitoring/stores/sql_model_endpoint_store.py +370 -0
- mlrun/model_monitoring/stream_processing_fs.py +239 -271
- mlrun/package/__init__.py +163 -0
- mlrun/package/context_handler.py +325 -0
- mlrun/package/errors.py +47 -0
- mlrun/package/packager.py +298 -0
- mlrun/{runtimes/package → package/packagers}/__init__.py +3 -1
- mlrun/package/packagers/default_packager.py +422 -0
- mlrun/package/packagers/numpy_packagers.py +612 -0
- mlrun/package/packagers/pandas_packagers.py +968 -0
- mlrun/package/packagers/python_standard_library_packagers.py +616 -0
- mlrun/package/packagers_manager.py +786 -0
- mlrun/package/utils/__init__.py +53 -0
- mlrun/package/utils/_archiver.py +226 -0
- mlrun/package/utils/_formatter.py +211 -0
- mlrun/package/utils/_pickler.py +234 -0
- mlrun/package/utils/_supported_format.py +71 -0
- mlrun/package/utils/log_hint_utils.py +93 -0
- mlrun/package/utils/type_hint_utils.py +298 -0
- mlrun/platforms/__init__.py +1 -1
- mlrun/platforms/iguazio.py +34 -2
- mlrun/platforms/other.py +1 -1
- mlrun/projects/__init__.py +1 -1
- mlrun/projects/operations.py +14 -9
- mlrun/projects/pipelines.py +31 -13
- mlrun/projects/project.py +762 -238
- mlrun/render.py +49 -19
- mlrun/run.py +57 -326
- mlrun/runtimes/__init__.py +3 -9
- mlrun/runtimes/base.py +247 -784
- mlrun/runtimes/constants.py +1 -1
- mlrun/runtimes/daskjob.py +45 -41
- mlrun/runtimes/funcdoc.py +43 -7
- mlrun/runtimes/function.py +66 -656
- mlrun/runtimes/function_reference.py +1 -1
- mlrun/runtimes/generators.py +1 -1
- mlrun/runtimes/kubejob.py +99 -116
- mlrun/runtimes/local.py +59 -66
- mlrun/runtimes/mpijob/__init__.py +1 -1
- mlrun/runtimes/mpijob/abstract.py +13 -15
- mlrun/runtimes/mpijob/v1.py +3 -1
- mlrun/runtimes/mpijob/v1alpha1.py +1 -1
- mlrun/runtimes/nuclio.py +1 -1
- mlrun/runtimes/pod.py +51 -26
- mlrun/runtimes/remotesparkjob.py +3 -1
- mlrun/runtimes/serving.py +12 -4
- mlrun/runtimes/sparkjob/__init__.py +1 -2
- mlrun/runtimes/sparkjob/abstract.py +44 -31
- mlrun/runtimes/sparkjob/spark3job.py +11 -9
- mlrun/runtimes/utils.py +61 -42
- mlrun/secrets.py +16 -18
- mlrun/serving/__init__.py +3 -2
- mlrun/serving/merger.py +1 -1
- mlrun/serving/remote.py +1 -1
- mlrun/serving/routers.py +39 -42
- mlrun/serving/server.py +23 -13
- mlrun/serving/serving_wrapper.py +1 -1
- mlrun/serving/states.py +172 -39
- mlrun/serving/utils.py +1 -1
- mlrun/serving/v1_serving.py +1 -1
- mlrun/serving/v2_serving.py +29 -21
- mlrun/utils/__init__.py +1 -2
- mlrun/utils/async_http.py +8 -1
- mlrun/utils/azure_vault.py +1 -1
- mlrun/utils/clones.py +2 -2
- mlrun/utils/condition_evaluator.py +65 -0
- mlrun/utils/db.py +52 -0
- mlrun/utils/helpers.py +188 -13
- mlrun/utils/http.py +89 -54
- mlrun/utils/logger.py +48 -8
- mlrun/utils/model_monitoring.py +132 -100
- mlrun/utils/notifications/__init__.py +1 -1
- mlrun/utils/notifications/notification/__init__.py +8 -6
- mlrun/utils/notifications/notification/base.py +20 -14
- mlrun/utils/notifications/notification/console.py +7 -4
- mlrun/utils/notifications/notification/git.py +36 -19
- mlrun/utils/notifications/notification/ipython.py +10 -8
- mlrun/utils/notifications/notification/slack.py +18 -13
- mlrun/utils/notifications/notification_pusher.py +377 -56
- mlrun/utils/regex.py +6 -1
- mlrun/utils/singleton.py +1 -1
- mlrun/utils/v3io_clients.py +1 -1
- mlrun/utils/vault.py +270 -269
- mlrun/utils/version/__init__.py +1 -1
- mlrun/utils/version/version.json +2 -2
- mlrun/utils/version/version.py +1 -1
- {mlrun-1.3.3rc1.dist-info → mlrun-1.4.0.dist-info}/METADATA +16 -10
- mlrun-1.4.0.dist-info/RECORD +434 -0
- mlrun/api/api/endpoints/marketplace.py +0 -257
- mlrun/api/crud/marketplace.py +0 -221
- mlrun/api/crud/model_monitoring/model_endpoint_store.py +0 -847
- mlrun/api/db/filedb/db.py +0 -518
- mlrun/api/schemas/marketplace.py +0 -128
- mlrun/api/schemas/model_endpoints.py +0 -185
- mlrun/db/filedb.py +0 -891
- mlrun/feature_store/retrieval/online.py +0 -92
- mlrun/model_monitoring/constants.py +0 -67
- mlrun/runtimes/package/context_handler.py +0 -711
- mlrun/runtimes/sparkjob/spark2job.py +0 -59
- mlrun-1.3.3rc1.dist-info/RECORD +0 -381
- {mlrun-1.3.3rc1.dist-info → mlrun-1.4.0.dist-info}/LICENSE +0 -0
- {mlrun-1.3.3rc1.dist-info → mlrun-1.4.0.dist-info}/WHEEL +0 -0
- {mlrun-1.3.3rc1.dist-info → mlrun-1.4.0.dist-info}/entry_points.txt +0 -0
- {mlrun-1.3.3rc1.dist-info → mlrun-1.4.0.dist-info}/top_level.txt +0 -0
mlrun/config.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright
|
|
1
|
+
# Copyright 2023 Iguazio
|
|
2
2
|
#
|
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
4
|
# you may not use this file except in compliance with the License.
|
|
@@ -48,6 +48,10 @@ default_env_file = os.getenv("MLRUN_DEFAULT_ENV_FILE", "~/.mlrun.env")
|
|
|
48
48
|
|
|
49
49
|
default_config = {
|
|
50
50
|
"namespace": "", # default kubernetes namespace
|
|
51
|
+
"kubernetes": {
|
|
52
|
+
"kubeconfig_path": "", # local path to kubeconfig file (for development purposes),
|
|
53
|
+
# empty by default as the API already running inside k8s cluster
|
|
54
|
+
},
|
|
51
55
|
"dbpath": "", # db/api url
|
|
52
56
|
# url to nuclio dashboard api (can be with user & token, e.g. https://username:password@dashboard-url.com)
|
|
53
57
|
"nuclio_dashboard_url": "",
|
|
@@ -74,9 +78,10 @@ default_config = {
|
|
|
74
78
|
"spark_app_image_tag": "", # image tag to use for spark operator app runtime
|
|
75
79
|
"spark_history_server_path": "", # spark logs directory for spark history server
|
|
76
80
|
"spark_operator_version": "spark-3", # the version of the spark operator in use
|
|
77
|
-
"builder_alpine_image": "alpine:3.13.1", # builder alpine image (as kaniko's initContainer)
|
|
78
81
|
"package_path": "mlrun", # mlrun pip package
|
|
79
82
|
"default_base_image": "mlrun/mlrun", # default base image when doing .deploy()
|
|
83
|
+
# template for project default image name. Parameter {name} will be replaced with project name
|
|
84
|
+
"default_project_image_name": ".mlrun-project-image-{name}",
|
|
80
85
|
"default_project": "default", # default project name
|
|
81
86
|
"default_archive": "", # default remote archive URL (for build tar.gz)
|
|
82
87
|
"mpijob_crd_version": "", # mpijob crd version (e.g: "v1alpha1". must be in: mlrun.runtime.MPIJobCRDVersions)
|
|
@@ -155,7 +160,7 @@ default_config = {
|
|
|
155
160
|
# default security context to be applied to all functions - json string base64 encoded format
|
|
156
161
|
# in camelCase format: {"runAsUser": 1000, "runAsGroup": 3000}
|
|
157
162
|
"default": "e30=", # encoded empty dict
|
|
158
|
-
# see mlrun.
|
|
163
|
+
# see mlrun.common.schemas.function.SecurityContextEnrichmentModes for available options
|
|
159
164
|
"enrichment_mode": "disabled",
|
|
160
165
|
# default 65534 (nogroup), set to -1 to use the user unix id or
|
|
161
166
|
# function.spec.security_context.pipelines.kfp_pod_user_unix_id for kfp pods
|
|
@@ -178,7 +183,7 @@ default_config = {
|
|
|
178
183
|
"mpijob": "mlrun/ml-models",
|
|
179
184
|
},
|
|
180
185
|
# see enrich_function_preemption_spec for more info,
|
|
181
|
-
# and mlrun.
|
|
186
|
+
# and mlrun.common.schemas.function.PreemptionModes for available options
|
|
182
187
|
"preemption_mode": "prevent",
|
|
183
188
|
},
|
|
184
189
|
"httpdb": {
|
|
@@ -219,7 +224,7 @@ default_config = {
|
|
|
219
224
|
"allowed_file_paths": "s3://,gcs://,gs://,az://",
|
|
220
225
|
"db_type": "sqldb",
|
|
221
226
|
"max_workers": 64,
|
|
222
|
-
# See mlrun.
|
|
227
|
+
# See mlrun.common.schemas.APIStates for options
|
|
223
228
|
"state": "online",
|
|
224
229
|
"retry_api_call_on_exception": "enabled",
|
|
225
230
|
"http_connection_timeout_keep_alive": 11,
|
|
@@ -230,10 +235,10 @@ default_config = {
|
|
|
230
235
|
"conflict_retry_interval": None,
|
|
231
236
|
# Whether to perform data migrations on initialization. enabled or disabled
|
|
232
237
|
"data_migrations_mode": "enabled",
|
|
233
|
-
# Whether
|
|
238
|
+
# Whether to perform database migration from sqlite to mysql on initialization
|
|
234
239
|
"database_migration_mode": "enabled",
|
|
235
240
|
"backup": {
|
|
236
|
-
# Whether
|
|
241
|
+
# Whether to use db backups on initialization
|
|
237
242
|
"mode": "enabled",
|
|
238
243
|
"file_format": "db_backup_%Y%m%d%H%M.db",
|
|
239
244
|
"use_rotation": True,
|
|
@@ -244,6 +249,14 @@ default_config = {
|
|
|
244
249
|
# None will set this to be equal to the httpdb.max_workers
|
|
245
250
|
"connections_pool_size": None,
|
|
246
251
|
"connections_pool_max_overflow": None,
|
|
252
|
+
# below is a db-specific configuration
|
|
253
|
+
"mysql": {
|
|
254
|
+
# comma separated mysql modes (globally) to set on runtime
|
|
255
|
+
# optional values (as per https://dev.mysql.com/doc/refman/8.0/en/sql-mode.html#sql-mode-full):
|
|
256
|
+
#
|
|
257
|
+
# if set to "nil" or "none", nothing would be set
|
|
258
|
+
"modes": "STRICT_TRANS_TABLES",
|
|
259
|
+
},
|
|
247
260
|
},
|
|
248
261
|
"jobs": {
|
|
249
262
|
# whether to allow to run local runtimes in the API - configurable to allow the scheduler testing to work
|
|
@@ -357,9 +370,12 @@ default_config = {
|
|
|
357
370
|
# git+https://github.com/mlrun/mlrun@development. by default uses the version
|
|
358
371
|
"mlrun_version_specifier": "",
|
|
359
372
|
"kaniko_image": "gcr.io/kaniko-project/executor:v1.8.0", # kaniko builder image
|
|
360
|
-
"kaniko_init_container_image": "alpine:3.
|
|
373
|
+
"kaniko_init_container_image": "alpine:3.18",
|
|
361
374
|
# image for kaniko init container when docker registry is ECR
|
|
362
375
|
"kaniko_aws_cli_image": "amazon/aws-cli:2.7.10",
|
|
376
|
+
# kaniko sometimes fails to get filesystem from image, this is a workaround to retry the process
|
|
377
|
+
# a known issue in Kaniko - https://github.com/GoogleContainerTools/kaniko/issues/1717
|
|
378
|
+
"kaniko_image_fs_extraction_retries": "3",
|
|
363
379
|
# additional docker build args in json encoded base64 format
|
|
364
380
|
"build_args": "",
|
|
365
381
|
"pip_ca_secret_name": "",
|
|
@@ -372,18 +388,37 @@ default_config = {
|
|
|
372
388
|
},
|
|
373
389
|
"v3io_api": "",
|
|
374
390
|
"v3io_framesd": "",
|
|
391
|
+
# If running from sdk and MLRUN_DBPATH is not set, the db will fallback to a nop db which will not preform any
|
|
392
|
+
# run db operations.
|
|
393
|
+
"nop_db": {
|
|
394
|
+
# if set to true, will raise an error for trying to use run db functionality
|
|
395
|
+
# if set to false, will use a nop db which will not preform any run db operations
|
|
396
|
+
"raise_error": False,
|
|
397
|
+
# if set to true, will log a warning for trying to use run db functionality while in nop db mode
|
|
398
|
+
"verbose": True,
|
|
399
|
+
},
|
|
375
400
|
},
|
|
376
401
|
"model_endpoint_monitoring": {
|
|
377
402
|
"serving_stream_args": {"shard_count": 1, "retention_period_hours": 24},
|
|
378
403
|
"drift_thresholds": {"default": {"possible_drift": 0.5, "drift_detected": 0.7}},
|
|
404
|
+
# Store prefixes are used to handle model monitoring storing policies based on project and kind, such as events,
|
|
405
|
+
# stream, and endpoints.
|
|
379
406
|
"store_prefixes": {
|
|
380
407
|
"default": "v3io:///users/pipelines/{project}/model-endpoints/{kind}",
|
|
381
408
|
"user_space": "v3io:///projects/{project}/model-endpoints/{kind}",
|
|
409
|
+
"stream": "",
|
|
382
410
|
},
|
|
411
|
+
# Offline storage path can be either relative or a full path. This path is used for general offline data
|
|
412
|
+
# storage such as the parquet file which is generated from the monitoring stream function for the drift analysis
|
|
413
|
+
"offline_storage_path": "model-endpoints/{kind}",
|
|
414
|
+
# Default http path that points to the monitoring stream nuclio function. Will be used as a stream path
|
|
415
|
+
# when the user is working in CE environment and has not provided any stream path.
|
|
416
|
+
"default_http_sink": "http://nuclio-{project}-model-monitoring-stream.mlrun.svc.cluster.local:8080",
|
|
383
417
|
"batch_processing_function_branch": "master",
|
|
384
418
|
"parquet_batching_max_events": 10000,
|
|
385
|
-
# See mlrun.
|
|
386
|
-
"store_type": "
|
|
419
|
+
# See mlrun.common.schemas.ModelEndpointStoreType for available options
|
|
420
|
+
"store_type": "v3io-nosql",
|
|
421
|
+
"endpoint_store_connection": "",
|
|
387
422
|
},
|
|
388
423
|
"secret_stores": {
|
|
389
424
|
"vault": {
|
|
@@ -427,15 +462,16 @@ default_config = {
|
|
|
427
462
|
"projects_prefix": "projects", # The UI link prefix for projects
|
|
428
463
|
"url": "", # remote/external mlrun UI url (for hyperlinks)
|
|
429
464
|
},
|
|
430
|
-
"
|
|
431
|
-
"k8s_secrets_project_name": "-
|
|
465
|
+
"hub": {
|
|
466
|
+
"k8s_secrets_project_name": "-hub-secrets",
|
|
432
467
|
"catalog_filename": "catalog.json",
|
|
433
468
|
"default_source": {
|
|
434
|
-
# Set
|
|
469
|
+
# Set false to avoid creating a global source (for example in a dark site)
|
|
435
470
|
"create": True,
|
|
436
471
|
"name": "mlrun_global_hub",
|
|
437
472
|
"description": "MLRun global function hub",
|
|
438
|
-
"url": "https://raw.githubusercontent.com/mlrun/marketplace",
|
|
473
|
+
"url": "https://raw.githubusercontent.com/mlrun/marketplace/master",
|
|
474
|
+
"object_type": "functions",
|
|
439
475
|
"channel": "master",
|
|
440
476
|
},
|
|
441
477
|
},
|
|
@@ -507,6 +543,27 @@ default_config = {
|
|
|
507
543
|
# interval for stopping log collection for runs which are in a terminal state
|
|
508
544
|
"stop_logs_interval": 3600,
|
|
509
545
|
},
|
|
546
|
+
# Configurations for the `mlrun.package` sub-package involving packagers - logging returned outputs and parsing
|
|
547
|
+
# inputs data items:
|
|
548
|
+
"packagers": {
|
|
549
|
+
# Whether to enable packagers. True will wrap each run in the `mlrun.package.handler` decorator to log and parse
|
|
550
|
+
# using packagers.
|
|
551
|
+
"enabled": True,
|
|
552
|
+
# Whether to treat returned tuples from functions as a tuple and not as multiple returned items. If True, all
|
|
553
|
+
# returned values will be packaged together as the tuple they are returned in. Default is False to enable
|
|
554
|
+
# logging multiple returned items.
|
|
555
|
+
"pack_tuples": False,
|
|
556
|
+
},
|
|
557
|
+
# Events are currently (and only) used to audit changes and record access to MLRun entities (such as secrets)
|
|
558
|
+
"events": {
|
|
559
|
+
# supported modes "enabled", "disabled".
|
|
560
|
+
# "enabled" - events are emitted.
|
|
561
|
+
# "disabled" - a nop client is used (aka doing nothing).
|
|
562
|
+
"mode": "disabled",
|
|
563
|
+
"verbose": False,
|
|
564
|
+
# used for igz client when emitting events
|
|
565
|
+
"access_key": "",
|
|
566
|
+
},
|
|
510
567
|
}
|
|
511
568
|
|
|
512
569
|
_is_running_as_api = None
|
|
@@ -517,8 +574,7 @@ def is_running_as_api():
|
|
|
517
574
|
global _is_running_as_api
|
|
518
575
|
|
|
519
576
|
if _is_running_as_api is None:
|
|
520
|
-
|
|
521
|
-
_is_running_as_api = json.loads(os.getenv("MLRUN_IS_API_SERVER", "false"))
|
|
577
|
+
_is_running_as_api = os.getenv("MLRUN_IS_API_SERVER", "false").lower() == "true"
|
|
522
578
|
|
|
523
579
|
return _is_running_as_api
|
|
524
580
|
|
|
@@ -926,6 +982,68 @@ class Config:
|
|
|
926
982
|
# Get v3io access key from the environment
|
|
927
983
|
return os.environ.get("V3IO_ACCESS_KEY")
|
|
928
984
|
|
|
985
|
+
def get_model_monitoring_file_target_path(
|
|
986
|
+
self,
|
|
987
|
+
project: str = "",
|
|
988
|
+
kind: str = "",
|
|
989
|
+
target: str = "online",
|
|
990
|
+
artifact_path: str = None,
|
|
991
|
+
) -> str:
|
|
992
|
+
"""Get the full path from the configuration based on the provided project and kind.
|
|
993
|
+
|
|
994
|
+
:param project: Project name.
|
|
995
|
+
:param kind: Kind of target path (e.g. events, log_stream, endpoints, etc.)
|
|
996
|
+
:param target: Can be either online or offline. If the target is online, then we try to get a specific
|
|
997
|
+
path for the provided kind. If it doesn't exist, use the default path.
|
|
998
|
+
If the target path is offline and the offline path is already a full path in the
|
|
999
|
+
configuration, then the result will be that path as-is. If the offline path is a
|
|
1000
|
+
relative path, then the result will be based on the project artifact path and the offline
|
|
1001
|
+
relative path. If project artifact path wasn't provided, then we use MLRun artifact
|
|
1002
|
+
path instead.
|
|
1003
|
+
:param artifact_path: Optional artifact path that will be used as a relative path. If not provided, the
|
|
1004
|
+
relative artifact path will be taken from the global MLRun artifact path.
|
|
1005
|
+
|
|
1006
|
+
:return: Full configured path for the provided kind.
|
|
1007
|
+
"""
|
|
1008
|
+
|
|
1009
|
+
if target != "offline":
|
|
1010
|
+
store_prefix_dict = (
|
|
1011
|
+
mlrun.mlconf.model_endpoint_monitoring.store_prefixes.to_dict()
|
|
1012
|
+
)
|
|
1013
|
+
if store_prefix_dict.get(kind):
|
|
1014
|
+
# Target exist in store prefix and has a valid string value
|
|
1015
|
+
return store_prefix_dict[kind].format(project=project)
|
|
1016
|
+
return mlrun.mlconf.model_endpoint_monitoring.store_prefixes.default.format(
|
|
1017
|
+
project=project, kind=kind
|
|
1018
|
+
)
|
|
1019
|
+
|
|
1020
|
+
# Get the current offline path from the configuration
|
|
1021
|
+
file_path = mlrun.mlconf.model_endpoint_monitoring.offline_storage_path.format(
|
|
1022
|
+
project=project, kind=kind
|
|
1023
|
+
)
|
|
1024
|
+
|
|
1025
|
+
# Absolute path
|
|
1026
|
+
if any(value in file_path for value in ["://", ":///"]) or os.path.isabs(
|
|
1027
|
+
file_path
|
|
1028
|
+
):
|
|
1029
|
+
return file_path
|
|
1030
|
+
|
|
1031
|
+
# Relative path
|
|
1032
|
+
else:
|
|
1033
|
+
artifact_path = artifact_path or config.artifact_path
|
|
1034
|
+
if artifact_path[-1] != "/":
|
|
1035
|
+
artifact_path += "/"
|
|
1036
|
+
|
|
1037
|
+
return mlrun.utils.helpers.fill_artifact_path_template(
|
|
1038
|
+
artifact_path=artifact_path + file_path, project=project
|
|
1039
|
+
)
|
|
1040
|
+
|
|
1041
|
+
def is_ce_mode(self) -> bool:
|
|
1042
|
+
# True if the setup is in CE environment
|
|
1043
|
+
return isinstance(mlrun.mlconf.ce, mlrun.config.Config) and any(
|
|
1044
|
+
ver in mlrun.mlconf.ce.mode for ver in ["lite", "full"]
|
|
1045
|
+
)
|
|
1046
|
+
|
|
929
1047
|
|
|
930
1048
|
# Global configuration
|
|
931
1049
|
config = Config.from_dict(default_config)
|
|
@@ -946,7 +1064,7 @@ def _populate(skip_errors=False):
|
|
|
946
1064
|
def _do_populate(env=None, skip_errors=False):
|
|
947
1065
|
global config
|
|
948
1066
|
|
|
949
|
-
if not os.environ.get("MLRUN_IGNORE_ENV_FILE")
|
|
1067
|
+
if not os.environ.get("MLRUN_IGNORE_ENV_FILE"):
|
|
950
1068
|
if "MLRUN_ENV_FILE" in os.environ:
|
|
951
1069
|
env_file = os.path.expanduser(os.environ["MLRUN_ENV_FILE"])
|
|
952
1070
|
dotenv.load_dotenv(env_file, override=True)
|
|
@@ -983,12 +1101,10 @@ def _do_populate(env=None, skip_errors=False):
|
|
|
983
1101
|
|
|
984
1102
|
|
|
985
1103
|
def _validate_config(config):
|
|
986
|
-
import mlrun.k8s_utils
|
|
987
|
-
|
|
988
1104
|
try:
|
|
989
1105
|
limits_gpu = config.default_function_pod_resources.limits.gpu
|
|
990
1106
|
requests_gpu = config.default_function_pod_resources.requests.gpu
|
|
991
|
-
|
|
1107
|
+
_verify_gpu_requests_and_limits(
|
|
992
1108
|
requests_gpu=requests_gpu,
|
|
993
1109
|
limits_gpu=limits_gpu,
|
|
994
1110
|
)
|
|
@@ -998,6 +1114,19 @@ def _validate_config(config):
|
|
|
998
1114
|
config.verify_security_context_enrichment_mode_is_allowed()
|
|
999
1115
|
|
|
1000
1116
|
|
|
1117
|
+
def _verify_gpu_requests_and_limits(requests_gpu: str = None, limits_gpu: str = None):
|
|
1118
|
+
# https://kubernetes.io/docs/tasks/manage-gpus/scheduling-gpus/
|
|
1119
|
+
if requests_gpu and not limits_gpu:
|
|
1120
|
+
raise mlrun.errors.MLRunConflictError(
|
|
1121
|
+
"You cannot specify GPU requests without specifying limits"
|
|
1122
|
+
)
|
|
1123
|
+
if requests_gpu and limits_gpu and requests_gpu != limits_gpu:
|
|
1124
|
+
raise mlrun.errors.MLRunConflictError(
|
|
1125
|
+
f"When specifying both GPU requests and limits these two values must be equal, "
|
|
1126
|
+
f"requests_gpu={requests_gpu}, limits_gpu={limits_gpu}"
|
|
1127
|
+
)
|
|
1128
|
+
|
|
1129
|
+
|
|
1001
1130
|
def _convert_resources_to_str(config: dict = None):
|
|
1002
1131
|
resources_types = ["cpu", "memory", "gpu"]
|
|
1003
1132
|
resource_requirements = ["requests", "limits"]
|
|
@@ -1048,15 +1177,18 @@ def read_env(env=None, prefix=env_prefix):
|
|
|
1048
1177
|
cfg[path[0]] = value
|
|
1049
1178
|
|
|
1050
1179
|
env_dbpath = env.get("MLRUN_DBPATH", "")
|
|
1180
|
+
# expected format: https://mlrun-api.tenant.default-tenant.app.some-system.some-namespace.com
|
|
1051
1181
|
is_remote_mlrun = (
|
|
1052
1182
|
env_dbpath.startswith("https://mlrun-api.") and "tenant." in env_dbpath
|
|
1053
1183
|
)
|
|
1184
|
+
|
|
1054
1185
|
# It's already a standard to set this env var to configure the v3io api, so we're supporting it (instead
|
|
1055
1186
|
# of MLRUN_V3IO_API), in remote usage this can be auto detected from the DBPATH
|
|
1056
1187
|
v3io_api = env.get("V3IO_API")
|
|
1057
1188
|
if v3io_api:
|
|
1058
1189
|
config["v3io_api"] = v3io_api
|
|
1059
1190
|
elif is_remote_mlrun:
|
|
1191
|
+
# in remote mlrun we can't use http, so we'll use https
|
|
1060
1192
|
config["v3io_api"] = env_dbpath.replace("https://mlrun-api.", "https://webapi.")
|
|
1061
1193
|
|
|
1062
1194
|
# It's already a standard to set this env var to configure the v3io framesd, so we're supporting it (instead
|
mlrun/data_types/__init__.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright
|
|
1
|
+
# Copyright 2023 Iguazio
|
|
2
2
|
#
|
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
4
|
# you may not use this file except in compliance with the License.
|
|
@@ -14,7 +14,12 @@
|
|
|
14
14
|
#
|
|
15
15
|
# flake8: noqa - this is until we take care of the F401 violations with respect to __all__ & sphinx
|
|
16
16
|
|
|
17
|
-
from .data_types import
|
|
17
|
+
from .data_types import (
|
|
18
|
+
InferOptions,
|
|
19
|
+
ValueType,
|
|
20
|
+
pd_schema_to_value_type,
|
|
21
|
+
python_type_to_value_type,
|
|
22
|
+
)
|
|
18
23
|
from .infer import DFDataInfer
|
|
19
24
|
|
|
20
25
|
|
mlrun/data_types/data_types.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright
|
|
1
|
+
# Copyright 2023 Iguazio
|
|
2
2
|
#
|
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
4
|
# you may not use this file except in compliance with the License.
|
|
@@ -79,7 +79,9 @@ def pa_type_to_value_type(type_):
|
|
|
79
79
|
|
|
80
80
|
|
|
81
81
|
def python_type_to_value_type(value_type):
|
|
82
|
-
type_name =
|
|
82
|
+
type_name = (
|
|
83
|
+
value_type.__name__ if hasattr(value_type, "__name__") else str(value_type)
|
|
84
|
+
)
|
|
83
85
|
type_map = {
|
|
84
86
|
"int": ValueType.INT64,
|
|
85
87
|
"str": ValueType.STRING,
|
mlrun/data_types/infer.py
CHANGED
mlrun/data_types/spark.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright
|
|
1
|
+
# Copyright 2023 Iguazio
|
|
2
2
|
#
|
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
4
|
# you may not use this file except in compliance with the License.
|
|
@@ -16,6 +16,8 @@ from datetime import datetime
|
|
|
16
16
|
from os import environ
|
|
17
17
|
|
|
18
18
|
import numpy as np
|
|
19
|
+
import pytz
|
|
20
|
+
from pyspark.sql.functions import to_utc_timestamp
|
|
19
21
|
from pyspark.sql.types import BooleanType, DoubleType, TimestampType
|
|
20
22
|
|
|
21
23
|
from mlrun.utils import logger
|
|
@@ -143,6 +145,9 @@ def get_df_stats_spark(df, options, num_bins=20, sample_size=None):
|
|
|
143
145
|
is_timestamp = isinstance(field.dataType, TimestampType)
|
|
144
146
|
is_boolean = isinstance(field.dataType, BooleanType)
|
|
145
147
|
if is_timestamp:
|
|
148
|
+
df_after_type_casts = df_after_type_casts.withColumn(
|
|
149
|
+
field.name, to_utc_timestamp(df_after_type_casts[field.name], "UTC")
|
|
150
|
+
)
|
|
146
151
|
timestamp_columns.add(field.name)
|
|
147
152
|
if is_boolean:
|
|
148
153
|
boolean_columns.add(field.name)
|
|
@@ -210,11 +215,13 @@ def get_df_stats_spark(df, options, num_bins=20, sample_size=None):
|
|
|
210
215
|
if col in timestamp_columns:
|
|
211
216
|
for stat, val in stats.items():
|
|
212
217
|
if stat == "mean" or stat in original_type_stats:
|
|
213
|
-
stats[stat] = datetime.fromtimestamp(val).isoformat()
|
|
218
|
+
stats[stat] = datetime.fromtimestamp(val, tz=pytz.UTC).isoformat()
|
|
214
219
|
elif stat == "hist":
|
|
215
220
|
values = stats[stat][1]
|
|
216
221
|
for i in range(len(values)):
|
|
217
|
-
values[i] = datetime.fromtimestamp(
|
|
222
|
+
values[i] = datetime.fromtimestamp(
|
|
223
|
+
values[i], tz=pytz.UTC
|
|
224
|
+
).isoformat()
|
|
218
225
|
# for boolean values, keep mean and histogram values numeric (0 to 1 representation)
|
|
219
226
|
if col in boolean_columns:
|
|
220
227
|
for stat, val in stats.items():
|
mlrun/datastore/__init__.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright
|
|
1
|
+
# Copyright 2023 Iguazio
|
|
2
2
|
#
|
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
4
|
# you may not use this file except in compliance with the License.
|
|
@@ -33,7 +33,12 @@ __all__ = [
|
|
|
33
33
|
|
|
34
34
|
import mlrun.datastore.wasbfs
|
|
35
35
|
|
|
36
|
-
from ..platforms.iguazio import
|
|
36
|
+
from ..platforms.iguazio import (
|
|
37
|
+
HTTPOutputStream,
|
|
38
|
+
KafkaOutputStream,
|
|
39
|
+
OutputStream,
|
|
40
|
+
parse_path,
|
|
41
|
+
)
|
|
37
42
|
from ..utils import logger
|
|
38
43
|
from .base import DataItem
|
|
39
44
|
from .datastore import StoreManager, in_memory_store, uri_to_ipython
|
|
@@ -69,7 +74,7 @@ def get_in_memory_items():
|
|
|
69
74
|
|
|
70
75
|
|
|
71
76
|
def get_stream_pusher(stream_path: str, **kwargs):
|
|
72
|
-
"""get a stream pusher object from URL
|
|
77
|
+
"""get a stream pusher object from URL.
|
|
73
78
|
|
|
74
79
|
common kwargs::
|
|
75
80
|
|
|
@@ -87,6 +92,8 @@ def get_stream_pusher(stream_path: str, **kwargs):
|
|
|
87
92
|
return KafkaOutputStream(
|
|
88
93
|
topic, bootstrap_servers, kwargs.get("kafka_producer_options")
|
|
89
94
|
)
|
|
95
|
+
elif stream_path.startswith("http://") or stream_path.startswith("https://"):
|
|
96
|
+
return HTTPOutputStream(stream_path=stream_path)
|
|
90
97
|
elif "://" not in stream_path:
|
|
91
98
|
return OutputStream(stream_path, **kwargs)
|
|
92
99
|
elif stream_path.startswith("v3io"):
|
mlrun/datastore/azure_blob.py
CHANGED