genesis-flow 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- genesis_flow-1.0.0.dist-info/METADATA +822 -0
- genesis_flow-1.0.0.dist-info/RECORD +645 -0
- genesis_flow-1.0.0.dist-info/WHEEL +5 -0
- genesis_flow-1.0.0.dist-info/entry_points.txt +19 -0
- genesis_flow-1.0.0.dist-info/licenses/LICENSE.txt +202 -0
- genesis_flow-1.0.0.dist-info/top_level.txt +1 -0
- mlflow/__init__.py +367 -0
- mlflow/__main__.py +3 -0
- mlflow/ag2/__init__.py +56 -0
- mlflow/ag2/ag2_logger.py +294 -0
- mlflow/anthropic/__init__.py +40 -0
- mlflow/anthropic/autolog.py +129 -0
- mlflow/anthropic/chat.py +144 -0
- mlflow/artifacts/__init__.py +268 -0
- mlflow/autogen/__init__.py +144 -0
- mlflow/autogen/chat.py +142 -0
- mlflow/azure/__init__.py +26 -0
- mlflow/azure/auth_handler.py +257 -0
- mlflow/azure/client.py +319 -0
- mlflow/azure/config.py +120 -0
- mlflow/azure/connection_factory.py +340 -0
- mlflow/azure/exceptions.py +27 -0
- mlflow/azure/stores.py +327 -0
- mlflow/azure/utils.py +183 -0
- mlflow/bedrock/__init__.py +45 -0
- mlflow/bedrock/_autolog.py +202 -0
- mlflow/bedrock/chat.py +122 -0
- mlflow/bedrock/stream.py +160 -0
- mlflow/bedrock/utils.py +43 -0
- mlflow/cli.py +707 -0
- mlflow/client.py +12 -0
- mlflow/config/__init__.py +56 -0
- mlflow/crewai/__init__.py +79 -0
- mlflow/crewai/autolog.py +253 -0
- mlflow/crewai/chat.py +29 -0
- mlflow/data/__init__.py +75 -0
- mlflow/data/artifact_dataset_sources.py +170 -0
- mlflow/data/code_dataset_source.py +40 -0
- mlflow/data/dataset.py +123 -0
- mlflow/data/dataset_registry.py +168 -0
- mlflow/data/dataset_source.py +110 -0
- mlflow/data/dataset_source_registry.py +219 -0
- mlflow/data/delta_dataset_source.py +167 -0
- mlflow/data/digest_utils.py +108 -0
- mlflow/data/evaluation_dataset.py +562 -0
- mlflow/data/filesystem_dataset_source.py +81 -0
- mlflow/data/http_dataset_source.py +145 -0
- mlflow/data/huggingface_dataset.py +258 -0
- mlflow/data/huggingface_dataset_source.py +118 -0
- mlflow/data/meta_dataset.py +104 -0
- mlflow/data/numpy_dataset.py +223 -0
- mlflow/data/pandas_dataset.py +231 -0
- mlflow/data/polars_dataset.py +352 -0
- mlflow/data/pyfunc_dataset_mixin.py +31 -0
- mlflow/data/schema.py +76 -0
- mlflow/data/sources.py +1 -0
- mlflow/data/spark_dataset.py +406 -0
- mlflow/data/spark_dataset_source.py +74 -0
- mlflow/data/spark_delta_utils.py +118 -0
- mlflow/data/tensorflow_dataset.py +350 -0
- mlflow/data/uc_volume_dataset_source.py +81 -0
- mlflow/db.py +27 -0
- mlflow/dspy/__init__.py +17 -0
- mlflow/dspy/autolog.py +197 -0
- mlflow/dspy/callback.py +398 -0
- mlflow/dspy/constant.py +1 -0
- mlflow/dspy/load.py +93 -0
- mlflow/dspy/save.py +393 -0
- mlflow/dspy/util.py +109 -0
- mlflow/dspy/wrapper.py +226 -0
- mlflow/entities/__init__.py +104 -0
- mlflow/entities/_mlflow_object.py +52 -0
- mlflow/entities/assessment.py +545 -0
- mlflow/entities/assessment_error.py +80 -0
- mlflow/entities/assessment_source.py +141 -0
- mlflow/entities/dataset.py +92 -0
- mlflow/entities/dataset_input.py +51 -0
- mlflow/entities/dataset_summary.py +62 -0
- mlflow/entities/document.py +48 -0
- mlflow/entities/experiment.py +109 -0
- mlflow/entities/experiment_tag.py +35 -0
- mlflow/entities/file_info.py +45 -0
- mlflow/entities/input_tag.py +35 -0
- mlflow/entities/lifecycle_stage.py +35 -0
- mlflow/entities/logged_model.py +228 -0
- mlflow/entities/logged_model_input.py +26 -0
- mlflow/entities/logged_model_output.py +32 -0
- mlflow/entities/logged_model_parameter.py +46 -0
- mlflow/entities/logged_model_status.py +74 -0
- mlflow/entities/logged_model_tag.py +33 -0
- mlflow/entities/metric.py +200 -0
- mlflow/entities/model_registry/__init__.py +29 -0
- mlflow/entities/model_registry/_model_registry_entity.py +13 -0
- mlflow/entities/model_registry/model_version.py +243 -0
- mlflow/entities/model_registry/model_version_deployment_job_run_state.py +44 -0
- mlflow/entities/model_registry/model_version_deployment_job_state.py +70 -0
- mlflow/entities/model_registry/model_version_search.py +25 -0
- mlflow/entities/model_registry/model_version_stages.py +25 -0
- mlflow/entities/model_registry/model_version_status.py +35 -0
- mlflow/entities/model_registry/model_version_tag.py +35 -0
- mlflow/entities/model_registry/prompt.py +73 -0
- mlflow/entities/model_registry/prompt_version.py +244 -0
- mlflow/entities/model_registry/registered_model.py +175 -0
- mlflow/entities/model_registry/registered_model_alias.py +35 -0
- mlflow/entities/model_registry/registered_model_deployment_job_state.py +39 -0
- mlflow/entities/model_registry/registered_model_search.py +25 -0
- mlflow/entities/model_registry/registered_model_tag.py +35 -0
- mlflow/entities/multipart_upload.py +74 -0
- mlflow/entities/param.py +49 -0
- mlflow/entities/run.py +97 -0
- mlflow/entities/run_data.py +84 -0
- mlflow/entities/run_info.py +188 -0
- mlflow/entities/run_inputs.py +59 -0
- mlflow/entities/run_outputs.py +43 -0
- mlflow/entities/run_status.py +41 -0
- mlflow/entities/run_tag.py +36 -0
- mlflow/entities/source_type.py +31 -0
- mlflow/entities/span.py +774 -0
- mlflow/entities/span_event.py +96 -0
- mlflow/entities/span_status.py +102 -0
- mlflow/entities/trace.py +317 -0
- mlflow/entities/trace_data.py +71 -0
- mlflow/entities/trace_info.py +220 -0
- mlflow/entities/trace_info_v2.py +162 -0
- mlflow/entities/trace_location.py +173 -0
- mlflow/entities/trace_state.py +39 -0
- mlflow/entities/trace_status.py +68 -0
- mlflow/entities/view_type.py +51 -0
- mlflow/environment_variables.py +866 -0
- mlflow/evaluation/__init__.py +16 -0
- mlflow/evaluation/assessment.py +369 -0
- mlflow/evaluation/evaluation.py +411 -0
- mlflow/evaluation/evaluation_tag.py +61 -0
- mlflow/evaluation/fluent.py +48 -0
- mlflow/evaluation/utils.py +201 -0
- mlflow/exceptions.py +213 -0
- mlflow/experiments.py +140 -0
- mlflow/gemini/__init__.py +81 -0
- mlflow/gemini/autolog.py +186 -0
- mlflow/gemini/chat.py +261 -0
- mlflow/genai/__init__.py +71 -0
- mlflow/genai/datasets/__init__.py +67 -0
- mlflow/genai/datasets/evaluation_dataset.py +131 -0
- mlflow/genai/evaluation/__init__.py +3 -0
- mlflow/genai/evaluation/base.py +411 -0
- mlflow/genai/evaluation/constant.py +23 -0
- mlflow/genai/evaluation/utils.py +244 -0
- mlflow/genai/judges/__init__.py +21 -0
- mlflow/genai/judges/databricks.py +404 -0
- mlflow/genai/label_schemas/__init__.py +153 -0
- mlflow/genai/label_schemas/label_schemas.py +209 -0
- mlflow/genai/labeling/__init__.py +159 -0
- mlflow/genai/labeling/labeling.py +250 -0
- mlflow/genai/optimize/__init__.py +13 -0
- mlflow/genai/optimize/base.py +198 -0
- mlflow/genai/optimize/optimizers/__init__.py +4 -0
- mlflow/genai/optimize/optimizers/base_optimizer.py +38 -0
- mlflow/genai/optimize/optimizers/dspy_mipro_optimizer.py +221 -0
- mlflow/genai/optimize/optimizers/dspy_optimizer.py +91 -0
- mlflow/genai/optimize/optimizers/utils/dspy_mipro_callback.py +76 -0
- mlflow/genai/optimize/optimizers/utils/dspy_mipro_utils.py +18 -0
- mlflow/genai/optimize/types.py +75 -0
- mlflow/genai/optimize/util.py +30 -0
- mlflow/genai/prompts/__init__.py +206 -0
- mlflow/genai/scheduled_scorers.py +431 -0
- mlflow/genai/scorers/__init__.py +26 -0
- mlflow/genai/scorers/base.py +492 -0
- mlflow/genai/scorers/builtin_scorers.py +765 -0
- mlflow/genai/scorers/scorer_utils.py +138 -0
- mlflow/genai/scorers/validation.py +165 -0
- mlflow/genai/utils/data_validation.py +146 -0
- mlflow/genai/utils/enum_utils.py +23 -0
- mlflow/genai/utils/trace_utils.py +211 -0
- mlflow/groq/__init__.py +42 -0
- mlflow/groq/_groq_autolog.py +74 -0
- mlflow/johnsnowlabs/__init__.py +888 -0
- mlflow/langchain/__init__.py +24 -0
- mlflow/langchain/api_request_parallel_processor.py +330 -0
- mlflow/langchain/autolog.py +147 -0
- mlflow/langchain/chat_agent_langgraph.py +340 -0
- mlflow/langchain/constant.py +1 -0
- mlflow/langchain/constants.py +1 -0
- mlflow/langchain/databricks_dependencies.py +444 -0
- mlflow/langchain/langchain_tracer.py +597 -0
- mlflow/langchain/model.py +919 -0
- mlflow/langchain/output_parsers.py +142 -0
- mlflow/langchain/retriever_chain.py +153 -0
- mlflow/langchain/runnables.py +527 -0
- mlflow/langchain/utils/chat.py +402 -0
- mlflow/langchain/utils/logging.py +671 -0
- mlflow/langchain/utils/serialization.py +36 -0
- mlflow/legacy_databricks_cli/__init__.py +0 -0
- mlflow/legacy_databricks_cli/configure/__init__.py +0 -0
- mlflow/legacy_databricks_cli/configure/provider.py +482 -0
- mlflow/litellm/__init__.py +175 -0
- mlflow/llama_index/__init__.py +22 -0
- mlflow/llama_index/autolog.py +55 -0
- mlflow/llama_index/chat.py +43 -0
- mlflow/llama_index/constant.py +1 -0
- mlflow/llama_index/model.py +577 -0
- mlflow/llama_index/pyfunc_wrapper.py +332 -0
- mlflow/llama_index/serialize_objects.py +188 -0
- mlflow/llama_index/tracer.py +561 -0
- mlflow/metrics/__init__.py +479 -0
- mlflow/metrics/base.py +39 -0
- mlflow/metrics/genai/__init__.py +25 -0
- mlflow/metrics/genai/base.py +101 -0
- mlflow/metrics/genai/genai_metric.py +771 -0
- mlflow/metrics/genai/metric_definitions.py +450 -0
- mlflow/metrics/genai/model_utils.py +371 -0
- mlflow/metrics/genai/prompt_template.py +68 -0
- mlflow/metrics/genai/prompts/__init__.py +0 -0
- mlflow/metrics/genai/prompts/v1.py +422 -0
- mlflow/metrics/genai/utils.py +6 -0
- mlflow/metrics/metric_definitions.py +619 -0
- mlflow/mismatch.py +34 -0
- mlflow/mistral/__init__.py +34 -0
- mlflow/mistral/autolog.py +71 -0
- mlflow/mistral/chat.py +135 -0
- mlflow/ml_package_versions.py +452 -0
- mlflow/models/__init__.py +97 -0
- mlflow/models/auth_policy.py +83 -0
- mlflow/models/cli.py +354 -0
- mlflow/models/container/__init__.py +294 -0
- mlflow/models/container/scoring_server/__init__.py +0 -0
- mlflow/models/container/scoring_server/nginx.conf +39 -0
- mlflow/models/dependencies_schemas.py +287 -0
- mlflow/models/display_utils.py +158 -0
- mlflow/models/docker_utils.py +211 -0
- mlflow/models/evaluation/__init__.py +23 -0
- mlflow/models/evaluation/_shap_patch.py +64 -0
- mlflow/models/evaluation/artifacts.py +194 -0
- mlflow/models/evaluation/base.py +1811 -0
- mlflow/models/evaluation/calibration_curve.py +109 -0
- mlflow/models/evaluation/default_evaluator.py +996 -0
- mlflow/models/evaluation/deprecated.py +23 -0
- mlflow/models/evaluation/evaluator_registry.py +80 -0
- mlflow/models/evaluation/evaluators/classifier.py +704 -0
- mlflow/models/evaluation/evaluators/default.py +233 -0
- mlflow/models/evaluation/evaluators/regressor.py +96 -0
- mlflow/models/evaluation/evaluators/shap.py +296 -0
- mlflow/models/evaluation/lift_curve.py +178 -0
- mlflow/models/evaluation/utils/metric.py +123 -0
- mlflow/models/evaluation/utils/trace.py +179 -0
- mlflow/models/evaluation/validation.py +434 -0
- mlflow/models/flavor_backend.py +93 -0
- mlflow/models/flavor_backend_registry.py +53 -0
- mlflow/models/model.py +1639 -0
- mlflow/models/model_config.py +150 -0
- mlflow/models/notebook_resources/agent_evaluation_template.html +235 -0
- mlflow/models/notebook_resources/eval_with_dataset_example.py +22 -0
- mlflow/models/notebook_resources/eval_with_synthetic_example.py +22 -0
- mlflow/models/python_api.py +369 -0
- mlflow/models/rag_signatures.py +128 -0
- mlflow/models/resources.py +321 -0
- mlflow/models/signature.py +662 -0
- mlflow/models/utils.py +2054 -0
- mlflow/models/wheeled_model.py +280 -0
- mlflow/openai/__init__.py +57 -0
- mlflow/openai/_agent_tracer.py +364 -0
- mlflow/openai/api_request_parallel_processor.py +131 -0
- mlflow/openai/autolog.py +509 -0
- mlflow/openai/constant.py +1 -0
- mlflow/openai/model.py +824 -0
- mlflow/openai/utils/chat_schema.py +367 -0
- mlflow/optuna/__init__.py +3 -0
- mlflow/optuna/storage.py +646 -0
- mlflow/plugins/__init__.py +72 -0
- mlflow/plugins/base.py +358 -0
- mlflow/plugins/builtin/__init__.py +24 -0
- mlflow/plugins/builtin/pytorch_plugin.py +150 -0
- mlflow/plugins/builtin/sklearn_plugin.py +158 -0
- mlflow/plugins/builtin/transformers_plugin.py +187 -0
- mlflow/plugins/cli.py +321 -0
- mlflow/plugins/discovery.py +340 -0
- mlflow/plugins/manager.py +465 -0
- mlflow/plugins/registry.py +316 -0
- mlflow/plugins/templates/framework_plugin_template.py +329 -0
- mlflow/prompt/constants.py +20 -0
- mlflow/prompt/promptlab_model.py +197 -0
- mlflow/prompt/registry_utils.py +248 -0
- mlflow/promptflow/__init__.py +495 -0
- mlflow/protos/__init__.py +0 -0
- mlflow/protos/assessments_pb2.py +174 -0
- mlflow/protos/databricks_artifacts_pb2.py +489 -0
- mlflow/protos/databricks_filesystem_service_pb2.py +196 -0
- mlflow/protos/databricks_managed_catalog_messages_pb2.py +95 -0
- mlflow/protos/databricks_managed_catalog_service_pb2.py +86 -0
- mlflow/protos/databricks_pb2.py +267 -0
- mlflow/protos/databricks_trace_server_pb2.py +374 -0
- mlflow/protos/databricks_uc_registry_messages_pb2.py +1249 -0
- mlflow/protos/databricks_uc_registry_service_pb2.py +170 -0
- mlflow/protos/facet_feature_statistics_pb2.py +296 -0
- mlflow/protos/internal_pb2.py +77 -0
- mlflow/protos/mlflow_artifacts_pb2.py +336 -0
- mlflow/protos/model_registry_pb2.py +1073 -0
- mlflow/protos/scalapb/__init__.py +0 -0
- mlflow/protos/scalapb/scalapb_pb2.py +104 -0
- mlflow/protos/service_pb2.py +2600 -0
- mlflow/protos/unity_catalog_oss_messages_pb2.py +457 -0
- mlflow/protos/unity_catalog_oss_service_pb2.py +130 -0
- mlflow/protos/unity_catalog_prompt_messages_pb2.py +447 -0
- mlflow/protos/unity_catalog_prompt_messages_pb2_grpc.py +24 -0
- mlflow/protos/unity_catalog_prompt_service_pb2.py +164 -0
- mlflow/protos/unity_catalog_prompt_service_pb2_grpc.py +785 -0
- mlflow/py.typed +0 -0
- mlflow/pydantic_ai/__init__.py +57 -0
- mlflow/pydantic_ai/autolog.py +173 -0
- mlflow/pyfunc/__init__.py +3844 -0
- mlflow/pyfunc/_mlflow_pyfunc_backend_predict.py +61 -0
- mlflow/pyfunc/backend.py +523 -0
- mlflow/pyfunc/context.py +78 -0
- mlflow/pyfunc/dbconnect_artifact_cache.py +144 -0
- mlflow/pyfunc/loaders/__init__.py +7 -0
- mlflow/pyfunc/loaders/chat_agent.py +117 -0
- mlflow/pyfunc/loaders/chat_model.py +125 -0
- mlflow/pyfunc/loaders/code_model.py +31 -0
- mlflow/pyfunc/loaders/responses_agent.py +112 -0
- mlflow/pyfunc/mlserver.py +46 -0
- mlflow/pyfunc/model.py +1473 -0
- mlflow/pyfunc/scoring_server/__init__.py +604 -0
- mlflow/pyfunc/scoring_server/app.py +7 -0
- mlflow/pyfunc/scoring_server/client.py +146 -0
- mlflow/pyfunc/spark_model_cache.py +48 -0
- mlflow/pyfunc/stdin_server.py +44 -0
- mlflow/pyfunc/utils/__init__.py +3 -0
- mlflow/pyfunc/utils/data_validation.py +224 -0
- mlflow/pyfunc/utils/environment.py +22 -0
- mlflow/pyfunc/utils/input_converter.py +47 -0
- mlflow/pyfunc/utils/serving_data_parser.py +11 -0
- mlflow/pytorch/__init__.py +1171 -0
- mlflow/pytorch/_lightning_autolog.py +580 -0
- mlflow/pytorch/_pytorch_autolog.py +50 -0
- mlflow/pytorch/pickle_module.py +35 -0
- mlflow/rfunc/__init__.py +42 -0
- mlflow/rfunc/backend.py +134 -0
- mlflow/runs.py +89 -0
- mlflow/server/__init__.py +302 -0
- mlflow/server/auth/__init__.py +1224 -0
- mlflow/server/auth/__main__.py +4 -0
- mlflow/server/auth/basic_auth.ini +6 -0
- mlflow/server/auth/cli.py +11 -0
- mlflow/server/auth/client.py +537 -0
- mlflow/server/auth/config.py +34 -0
- mlflow/server/auth/db/__init__.py +0 -0
- mlflow/server/auth/db/cli.py +18 -0
- mlflow/server/auth/db/migrations/__init__.py +0 -0
- mlflow/server/auth/db/migrations/alembic.ini +110 -0
- mlflow/server/auth/db/migrations/env.py +76 -0
- mlflow/server/auth/db/migrations/versions/8606fa83a998_initial_migration.py +51 -0
- mlflow/server/auth/db/migrations/versions/__init__.py +0 -0
- mlflow/server/auth/db/models.py +67 -0
- mlflow/server/auth/db/utils.py +37 -0
- mlflow/server/auth/entities.py +165 -0
- mlflow/server/auth/logo.py +14 -0
- mlflow/server/auth/permissions.py +65 -0
- mlflow/server/auth/routes.py +18 -0
- mlflow/server/auth/sqlalchemy_store.py +263 -0
- mlflow/server/graphql/__init__.py +0 -0
- mlflow/server/graphql/autogenerated_graphql_schema.py +353 -0
- mlflow/server/graphql/graphql_custom_scalars.py +24 -0
- mlflow/server/graphql/graphql_errors.py +15 -0
- mlflow/server/graphql/graphql_no_batching.py +89 -0
- mlflow/server/graphql/graphql_schema_extensions.py +74 -0
- mlflow/server/handlers.py +3217 -0
- mlflow/server/prometheus_exporter.py +17 -0
- mlflow/server/validation.py +30 -0
- mlflow/shap/__init__.py +691 -0
- mlflow/sklearn/__init__.py +1994 -0
- mlflow/sklearn/utils.py +1041 -0
- mlflow/smolagents/__init__.py +66 -0
- mlflow/smolagents/autolog.py +139 -0
- mlflow/smolagents/chat.py +29 -0
- mlflow/store/__init__.py +10 -0
- mlflow/store/_unity_catalog/__init__.py +1 -0
- mlflow/store/_unity_catalog/lineage/__init__.py +1 -0
- mlflow/store/_unity_catalog/lineage/constants.py +2 -0
- mlflow/store/_unity_catalog/registry/__init__.py +6 -0
- mlflow/store/_unity_catalog/registry/prompt_info.py +75 -0
- mlflow/store/_unity_catalog/registry/rest_store.py +1740 -0
- mlflow/store/_unity_catalog/registry/uc_oss_rest_store.py +507 -0
- mlflow/store/_unity_catalog/registry/utils.py +121 -0
- mlflow/store/artifact/__init__.py +0 -0
- mlflow/store/artifact/artifact_repo.py +472 -0
- mlflow/store/artifact/artifact_repository_registry.py +154 -0
- mlflow/store/artifact/azure_blob_artifact_repo.py +275 -0
- mlflow/store/artifact/azure_data_lake_artifact_repo.py +295 -0
- mlflow/store/artifact/cli.py +141 -0
- mlflow/store/artifact/cloud_artifact_repo.py +332 -0
- mlflow/store/artifact/databricks_artifact_repo.py +729 -0
- mlflow/store/artifact/databricks_artifact_repo_resources.py +301 -0
- mlflow/store/artifact/databricks_logged_model_artifact_repo.py +93 -0
- mlflow/store/artifact/databricks_models_artifact_repo.py +216 -0
- mlflow/store/artifact/databricks_sdk_artifact_repo.py +134 -0
- mlflow/store/artifact/databricks_sdk_models_artifact_repo.py +97 -0
- mlflow/store/artifact/dbfs_artifact_repo.py +240 -0
- mlflow/store/artifact/ftp_artifact_repo.py +132 -0
- mlflow/store/artifact/gcs_artifact_repo.py +296 -0
- mlflow/store/artifact/hdfs_artifact_repo.py +209 -0
- mlflow/store/artifact/http_artifact_repo.py +218 -0
- mlflow/store/artifact/local_artifact_repo.py +142 -0
- mlflow/store/artifact/mlflow_artifacts_repo.py +94 -0
- mlflow/store/artifact/models_artifact_repo.py +259 -0
- mlflow/store/artifact/optimized_s3_artifact_repo.py +356 -0
- mlflow/store/artifact/presigned_url_artifact_repo.py +173 -0
- mlflow/store/artifact/r2_artifact_repo.py +70 -0
- mlflow/store/artifact/runs_artifact_repo.py +265 -0
- mlflow/store/artifact/s3_artifact_repo.py +330 -0
- mlflow/store/artifact/sftp_artifact_repo.py +141 -0
- mlflow/store/artifact/uc_volume_artifact_repo.py +76 -0
- mlflow/store/artifact/unity_catalog_models_artifact_repo.py +168 -0
- mlflow/store/artifact/unity_catalog_oss_models_artifact_repo.py +168 -0
- mlflow/store/artifact/utils/__init__.py +0 -0
- mlflow/store/artifact/utils/models.py +148 -0
- mlflow/store/db/__init__.py +0 -0
- mlflow/store/db/base_sql_model.py +3 -0
- mlflow/store/db/db_types.py +10 -0
- mlflow/store/db/utils.py +314 -0
- mlflow/store/db_migrations/__init__.py +0 -0
- mlflow/store/db_migrations/alembic.ini +74 -0
- mlflow/store/db_migrations/env.py +84 -0
- mlflow/store/db_migrations/versions/0584bdc529eb_add_cascading_deletion_to_datasets_from_experiments.py +88 -0
- mlflow/store/db_migrations/versions/0a8213491aaa_drop_duplicate_killed_constraint.py +49 -0
- mlflow/store/db_migrations/versions/0c779009ac13_add_deleted_time_field_to_runs_table.py +24 -0
- mlflow/store/db_migrations/versions/181f10493468_allow_nulls_for_metric_values.py +35 -0
- mlflow/store/db_migrations/versions/27a6a02d2cf1_add_model_version_tags_table.py +38 -0
- mlflow/store/db_migrations/versions/2b4d017a5e9b_add_model_registry_tables_to_db.py +77 -0
- mlflow/store/db_migrations/versions/2d6e25af4d3e_increase_max_param_val_length.py +33 -0
- mlflow/store/db_migrations/versions/3500859a5d39_add_model_aliases_table.py +50 -0
- mlflow/store/db_migrations/versions/39d1c3be5f05_add_is_nan_constraint_for_metrics_tables_if_necessary.py +41 -0
- mlflow/store/db_migrations/versions/400f98739977_add_logged_model_tables.py +123 -0
- mlflow/store/db_migrations/versions/4465047574b1_increase_max_dataset_schema_size.py +38 -0
- mlflow/store/db_migrations/versions/451aebb31d03_add_metric_step.py +35 -0
- mlflow/store/db_migrations/versions/5b0e9adcef9c_add_cascade_deletion_to_trace_tables_fk.py +40 -0
- mlflow/store/db_migrations/versions/6953534de441_add_step_to_inputs_table.py +25 -0
- mlflow/store/db_migrations/versions/728d730b5ebd_add_registered_model_tags_table.py +38 -0
- mlflow/store/db_migrations/versions/7ac759974ad8_update_run_tags_with_larger_limit.py +36 -0
- mlflow/store/db_migrations/versions/7f2a7d5fae7d_add_datasets_inputs_input_tags_tables.py +82 -0
- mlflow/store/db_migrations/versions/84291f40a231_add_run_link_to_model_version.py +26 -0
- mlflow/store/db_migrations/versions/867495a8f9d4_add_trace_tables.py +90 -0
- mlflow/store/db_migrations/versions/89d4b8295536_create_latest_metrics_table.py +169 -0
- mlflow/store/db_migrations/versions/90e64c465722_migrate_user_column_to_tags.py +64 -0
- mlflow/store/db_migrations/versions/97727af70f4d_creation_time_last_update_time_experiments.py +25 -0
- mlflow/store/db_migrations/versions/__init__.py +0 -0
- mlflow/store/db_migrations/versions/a8c4a736bde6_allow_nulls_for_run_id.py +27 -0
- mlflow/store/db_migrations/versions/acf3f17fdcc7_add_storage_location_field_to_model_.py +29 -0
- mlflow/store/db_migrations/versions/bd07f7e963c5_create_index_on_run_uuid.py +26 -0
- mlflow/store/db_migrations/versions/bda7b8c39065_increase_model_version_tag_value_limit.py +38 -0
- mlflow/store/db_migrations/versions/c48cb773bb87_reset_default_value_for_is_nan_in_metrics_table_for_mysql.py +41 -0
- mlflow/store/db_migrations/versions/cbc13b556ace_add_v3_trace_schema_columns.py +31 -0
- mlflow/store/db_migrations/versions/cc1f77228345_change_param_value_length_to_500.py +34 -0
- mlflow/store/db_migrations/versions/cfd24bdc0731_update_run_status_constraint_with_killed.py +78 -0
- mlflow/store/db_migrations/versions/df50e92ffc5e_add_experiment_tags_table.py +38 -0
- mlflow/store/db_migrations/versions/f5a4f2784254_increase_run_tag_value_limit.py +36 -0
- mlflow/store/entities/__init__.py +3 -0
- mlflow/store/entities/paged_list.py +18 -0
- mlflow/store/model_registry/__init__.py +10 -0
- mlflow/store/model_registry/abstract_store.py +1081 -0
- mlflow/store/model_registry/base_rest_store.py +44 -0
- mlflow/store/model_registry/databricks_workspace_model_registry_rest_store.py +37 -0
- mlflow/store/model_registry/dbmodels/__init__.py +0 -0
- mlflow/store/model_registry/dbmodels/models.py +206 -0
- mlflow/store/model_registry/file_store.py +1091 -0
- mlflow/store/model_registry/rest_store.py +481 -0
- mlflow/store/model_registry/sqlalchemy_store.py +1286 -0
- mlflow/store/tracking/__init__.py +23 -0
- mlflow/store/tracking/abstract_store.py +816 -0
- mlflow/store/tracking/dbmodels/__init__.py +0 -0
- mlflow/store/tracking/dbmodels/initial_models.py +243 -0
- mlflow/store/tracking/dbmodels/models.py +1073 -0
- mlflow/store/tracking/file_store.py +2438 -0
- mlflow/store/tracking/postgres_managed_identity.py +146 -0
- mlflow/store/tracking/rest_store.py +1131 -0
- mlflow/store/tracking/sqlalchemy_store.py +2785 -0
- mlflow/system_metrics/__init__.py +61 -0
- mlflow/system_metrics/metrics/__init__.py +0 -0
- mlflow/system_metrics/metrics/base_metrics_monitor.py +32 -0
- mlflow/system_metrics/metrics/cpu_monitor.py +23 -0
- mlflow/system_metrics/metrics/disk_monitor.py +21 -0
- mlflow/system_metrics/metrics/gpu_monitor.py +71 -0
- mlflow/system_metrics/metrics/network_monitor.py +34 -0
- mlflow/system_metrics/metrics/rocm_monitor.py +123 -0
- mlflow/system_metrics/system_metrics_monitor.py +198 -0
- mlflow/tracing/__init__.py +16 -0
- mlflow/tracing/assessment.py +356 -0
- mlflow/tracing/client.py +531 -0
- mlflow/tracing/config.py +125 -0
- mlflow/tracing/constant.py +105 -0
- mlflow/tracing/destination.py +81 -0
- mlflow/tracing/display/__init__.py +40 -0
- mlflow/tracing/display/display_handler.py +196 -0
- mlflow/tracing/export/async_export_queue.py +186 -0
- mlflow/tracing/export/inference_table.py +138 -0
- mlflow/tracing/export/mlflow_v3.py +137 -0
- mlflow/tracing/export/utils.py +70 -0
- mlflow/tracing/fluent.py +1417 -0
- mlflow/tracing/processor/base_mlflow.py +199 -0
- mlflow/tracing/processor/inference_table.py +175 -0
- mlflow/tracing/processor/mlflow_v3.py +47 -0
- mlflow/tracing/processor/otel.py +73 -0
- mlflow/tracing/provider.py +487 -0
- mlflow/tracing/trace_manager.py +200 -0
- mlflow/tracing/utils/__init__.py +616 -0
- mlflow/tracing/utils/artifact_utils.py +28 -0
- mlflow/tracing/utils/copy.py +55 -0
- mlflow/tracing/utils/environment.py +55 -0
- mlflow/tracing/utils/exception.py +21 -0
- mlflow/tracing/utils/once.py +35 -0
- mlflow/tracing/utils/otlp.py +63 -0
- mlflow/tracing/utils/processor.py +54 -0
- mlflow/tracing/utils/search.py +292 -0
- mlflow/tracing/utils/timeout.py +250 -0
- mlflow/tracing/utils/token.py +19 -0
- mlflow/tracing/utils/truncation.py +124 -0
- mlflow/tracing/utils/warning.py +76 -0
- mlflow/tracking/__init__.py +39 -0
- mlflow/tracking/_model_registry/__init__.py +1 -0
- mlflow/tracking/_model_registry/client.py +764 -0
- mlflow/tracking/_model_registry/fluent.py +853 -0
- mlflow/tracking/_model_registry/registry.py +67 -0
- mlflow/tracking/_model_registry/utils.py +251 -0
- mlflow/tracking/_tracking_service/__init__.py +0 -0
- mlflow/tracking/_tracking_service/client.py +883 -0
- mlflow/tracking/_tracking_service/registry.py +56 -0
- mlflow/tracking/_tracking_service/utils.py +275 -0
- mlflow/tracking/artifact_utils.py +179 -0
- mlflow/tracking/client.py +5900 -0
- mlflow/tracking/context/__init__.py +0 -0
- mlflow/tracking/context/abstract_context.py +35 -0
- mlflow/tracking/context/databricks_cluster_context.py +15 -0
- mlflow/tracking/context/databricks_command_context.py +15 -0
- mlflow/tracking/context/databricks_job_context.py +49 -0
- mlflow/tracking/context/databricks_notebook_context.py +41 -0
- mlflow/tracking/context/databricks_repo_context.py +43 -0
- mlflow/tracking/context/default_context.py +51 -0
- mlflow/tracking/context/git_context.py +32 -0
- mlflow/tracking/context/registry.py +98 -0
- mlflow/tracking/context/system_environment_context.py +15 -0
- mlflow/tracking/default_experiment/__init__.py +1 -0
- mlflow/tracking/default_experiment/abstract_context.py +43 -0
- mlflow/tracking/default_experiment/databricks_notebook_experiment_provider.py +44 -0
- mlflow/tracking/default_experiment/registry.py +75 -0
- mlflow/tracking/fluent.py +3595 -0
- mlflow/tracking/metric_value_conversion_utils.py +93 -0
- mlflow/tracking/multimedia.py +206 -0
- mlflow/tracking/registry.py +86 -0
- mlflow/tracking/request_auth/__init__.py +0 -0
- mlflow/tracking/request_auth/abstract_request_auth_provider.py +34 -0
- mlflow/tracking/request_auth/registry.py +60 -0
- mlflow/tracking/request_header/__init__.py +0 -0
- mlflow/tracking/request_header/abstract_request_header_provider.py +36 -0
- mlflow/tracking/request_header/databricks_request_header_provider.py +38 -0
- mlflow/tracking/request_header/default_request_header_provider.py +17 -0
- mlflow/tracking/request_header/registry.py +79 -0
- mlflow/transformers/__init__.py +2982 -0
- mlflow/transformers/flavor_config.py +258 -0
- mlflow/transformers/hub_utils.py +83 -0
- mlflow/transformers/llm_inference_utils.py +468 -0
- mlflow/transformers/model_io.py +301 -0
- mlflow/transformers/peft.py +51 -0
- mlflow/transformers/signature.py +183 -0
- mlflow/transformers/torch_utils.py +55 -0
- mlflow/types/__init__.py +21 -0
- mlflow/types/agent.py +270 -0
- mlflow/types/chat.py +240 -0
- mlflow/types/llm.py +935 -0
- mlflow/types/responses.py +139 -0
- mlflow/types/responses_helpers.py +416 -0
- mlflow/types/schema.py +1505 -0
- mlflow/types/type_hints.py +647 -0
- mlflow/types/utils.py +753 -0
- mlflow/utils/__init__.py +283 -0
- mlflow/utils/_capture_modules.py +256 -0
- mlflow/utils/_capture_transformers_modules.py +75 -0
- mlflow/utils/_spark_utils.py +201 -0
- mlflow/utils/_unity_catalog_oss_utils.py +97 -0
- mlflow/utils/_unity_catalog_utils.py +479 -0
- mlflow/utils/annotations.py +218 -0
- mlflow/utils/arguments_utils.py +16 -0
- mlflow/utils/async_logging/__init__.py +1 -0
- mlflow/utils/async_logging/async_artifacts_logging_queue.py +258 -0
- mlflow/utils/async_logging/async_logging_queue.py +366 -0
- mlflow/utils/async_logging/run_artifact.py +38 -0
- mlflow/utils/async_logging/run_batch.py +58 -0
- mlflow/utils/async_logging/run_operations.py +49 -0
- mlflow/utils/autologging_utils/__init__.py +737 -0
- mlflow/utils/autologging_utils/client.py +432 -0
- mlflow/utils/autologging_utils/config.py +33 -0
- mlflow/utils/autologging_utils/events.py +294 -0
- mlflow/utils/autologging_utils/logging_and_warnings.py +328 -0
- mlflow/utils/autologging_utils/metrics_queue.py +71 -0
- mlflow/utils/autologging_utils/safety.py +1104 -0
- mlflow/utils/autologging_utils/versioning.py +95 -0
- mlflow/utils/checkpoint_utils.py +206 -0
- mlflow/utils/class_utils.py +6 -0
- mlflow/utils/cli_args.py +257 -0
- mlflow/utils/conda.py +354 -0
- mlflow/utils/credentials.py +231 -0
- mlflow/utils/data_utils.py +17 -0
- mlflow/utils/databricks_utils.py +1436 -0
- mlflow/utils/docstring_utils.py +477 -0
- mlflow/utils/doctor.py +133 -0
- mlflow/utils/download_cloud_file_chunk.py +43 -0
- mlflow/utils/env_manager.py +16 -0
- mlflow/utils/env_pack.py +131 -0
- mlflow/utils/environment.py +1009 -0
- mlflow/utils/exception_utils.py +14 -0
- mlflow/utils/file_utils.py +978 -0
- mlflow/utils/git_utils.py +77 -0
- mlflow/utils/gorilla.py +797 -0
- mlflow/utils/import_hooks/__init__.py +363 -0
- mlflow/utils/lazy_load.py +51 -0
- mlflow/utils/logging_utils.py +168 -0
- mlflow/utils/mime_type_utils.py +58 -0
- mlflow/utils/mlflow_tags.py +103 -0
- mlflow/utils/model_utils.py +486 -0
- mlflow/utils/name_utils.py +346 -0
- mlflow/utils/nfs_on_spark.py +62 -0
- mlflow/utils/openai_utils.py +164 -0
- mlflow/utils/os.py +12 -0
- mlflow/utils/oss_registry_utils.py +29 -0
- mlflow/utils/plugins.py +17 -0
- mlflow/utils/process.py +182 -0
- mlflow/utils/promptlab_utils.py +146 -0
- mlflow/utils/proto_json_utils.py +743 -0
- mlflow/utils/pydantic_utils.py +54 -0
- mlflow/utils/request_utils.py +279 -0
- mlflow/utils/requirements_utils.py +704 -0
- mlflow/utils/rest_utils.py +673 -0
- mlflow/utils/search_logged_model_utils.py +127 -0
- mlflow/utils/search_utils.py +2111 -0
- mlflow/utils/secure_loading.py +221 -0
- mlflow/utils/security_validation.py +384 -0
- mlflow/utils/server_cli_utils.py +61 -0
- mlflow/utils/spark_utils.py +15 -0
- mlflow/utils/string_utils.py +138 -0
- mlflow/utils/thread_utils.py +63 -0
- mlflow/utils/time.py +54 -0
- mlflow/utils/timeout.py +42 -0
- mlflow/utils/uri.py +572 -0
- mlflow/utils/validation.py +662 -0
- mlflow/utils/virtualenv.py +458 -0
- mlflow/utils/warnings_utils.py +25 -0
- mlflow/utils/yaml_utils.py +179 -0
- mlflow/version.py +24 -0
@@ -0,0 +1,743 @@
|
|
1
|
+
import base64
|
2
|
+
import datetime
|
3
|
+
import importlib
|
4
|
+
import json
|
5
|
+
import os
|
6
|
+
from collections import defaultdict
|
7
|
+
from copy import deepcopy
|
8
|
+
from functools import partial
|
9
|
+
from json import JSONEncoder
|
10
|
+
from typing import Any, Optional
|
11
|
+
|
12
|
+
import pydantic
|
13
|
+
from google.protobuf.descriptor import FieldDescriptor
|
14
|
+
from google.protobuf.duration_pb2 import Duration
|
15
|
+
from google.protobuf.json_format import MessageToJson, ParseDict
|
16
|
+
from google.protobuf.struct_pb2 import NULL_VALUE, Value
|
17
|
+
from google.protobuf.timestamp_pb2 import Timestamp
|
18
|
+
|
19
|
+
from mlflow.exceptions import MlflowException
|
20
|
+
from mlflow.utils import IS_PYDANTIC_V2_OR_NEWER
|
21
|
+
|
22
|
+
_PROTOBUF_INT64_FIELDS = [
|
23
|
+
FieldDescriptor.TYPE_INT64,
|
24
|
+
FieldDescriptor.TYPE_UINT64,
|
25
|
+
FieldDescriptor.TYPE_FIXED64,
|
26
|
+
FieldDescriptor.TYPE_SFIXED64,
|
27
|
+
FieldDescriptor.TYPE_SINT64,
|
28
|
+
]
|
29
|
+
|
30
|
+
from mlflow.protos.databricks_pb2 import BAD_REQUEST
|
31
|
+
|
32
|
+
|
33
|
+
def _mark_int64_fields_for_proto_maps(proto_map, value_field_type):
|
34
|
+
"""Converts a proto map to JSON, preserving only int64-related fields."""
|
35
|
+
json_dict = {}
|
36
|
+
for key, value in proto_map.items():
|
37
|
+
# The value of a protobuf map can only be a scalar or a message (not a map or repeated
|
38
|
+
# field).
|
39
|
+
if value_field_type == FieldDescriptor.TYPE_MESSAGE:
|
40
|
+
json_dict[key] = _mark_int64_fields(value)
|
41
|
+
elif value_field_type in _PROTOBUF_INT64_FIELDS:
|
42
|
+
json_dict[key] = int(value)
|
43
|
+
elif isinstance(key, int):
|
44
|
+
json_dict[key] = value
|
45
|
+
return json_dict
|
46
|
+
|
47
|
+
|
48
|
+
def _mark_int64_fields(proto_message):
|
49
|
+
"""Converts a proto message to JSON, preserving only int64-related fields."""
|
50
|
+
json_dict = {}
|
51
|
+
for field, value in proto_message.ListFields():
|
52
|
+
if (
|
53
|
+
# These three conditions check if this field is a protobuf map.
|
54
|
+
# See the official implementation: https://bit.ly/3EMx1rl
|
55
|
+
field.type == FieldDescriptor.TYPE_MESSAGE
|
56
|
+
and field.message_type.has_options
|
57
|
+
and field.message_type.GetOptions().map_entry
|
58
|
+
):
|
59
|
+
# Deal with proto map fields separately in another function.
|
60
|
+
json_dict[field.name] = _mark_int64_fields_for_proto_maps(
|
61
|
+
value, field.message_type.fields_by_name["value"].type
|
62
|
+
)
|
63
|
+
continue
|
64
|
+
|
65
|
+
if field.type == FieldDescriptor.TYPE_MESSAGE:
|
66
|
+
ftype = partial(_mark_int64_fields)
|
67
|
+
elif field.type in _PROTOBUF_INT64_FIELDS:
|
68
|
+
ftype = int
|
69
|
+
else:
|
70
|
+
# Skip all non-int64 fields.
|
71
|
+
continue
|
72
|
+
|
73
|
+
json_dict[field.name] = (
|
74
|
+
[ftype(v) for v in value]
|
75
|
+
if field.label == FieldDescriptor.LABEL_REPEATED
|
76
|
+
else ftype(value)
|
77
|
+
)
|
78
|
+
return json_dict
|
79
|
+
|
80
|
+
|
81
|
+
def _merge_json_dicts(from_dict, to_dict):
|
82
|
+
"""Merges the json elements of from_dict into to_dict. Only works for json dicts
|
83
|
+
converted from proto messages
|
84
|
+
"""
|
85
|
+
for key, value in from_dict.items():
|
86
|
+
if isinstance(key, int) and str(key) in to_dict:
|
87
|
+
# When the key (i.e. the proto field name) is an integer, it must be a proto map field
|
88
|
+
# with integer as the key. For example:
|
89
|
+
# from_dict is {'field_map': {1: '2', 3: '4'}}
|
90
|
+
# to_dict is {'field_map': {'1': '2', '3': '4'}}
|
91
|
+
# So we need to replace the str keys with int keys in to_dict.
|
92
|
+
to_dict[key] = to_dict[str(key)]
|
93
|
+
del to_dict[str(key)]
|
94
|
+
|
95
|
+
if key not in to_dict:
|
96
|
+
continue
|
97
|
+
|
98
|
+
if isinstance(value, dict):
|
99
|
+
_merge_json_dicts(from_dict[key], to_dict[key])
|
100
|
+
elif isinstance(value, list):
|
101
|
+
for i, v in enumerate(value):
|
102
|
+
if isinstance(v, dict):
|
103
|
+
_merge_json_dicts(v, to_dict[key][i])
|
104
|
+
else:
|
105
|
+
to_dict[key][i] = v
|
106
|
+
else:
|
107
|
+
to_dict[key] = from_dict[key]
|
108
|
+
return to_dict
|
109
|
+
|
110
|
+
|
111
|
+
def message_to_json(message):
|
112
|
+
"""Converts a message to JSON, using snake_case for field names."""
|
113
|
+
|
114
|
+
# Google's MessageToJson API converts int64 proto fields to JSON strings.
|
115
|
+
# For more info, see https://github.com/protocolbuffers/protobuf/issues/2954
|
116
|
+
json_dict_with_int64_as_str = json.loads(
|
117
|
+
MessageToJson(message, preserving_proto_field_name=True)
|
118
|
+
)
|
119
|
+
# We convert this proto message into a JSON dict where only int64 proto fields
|
120
|
+
# are preserved, and they are treated as JSON numbers, not strings.
|
121
|
+
json_dict_with_int64_fields_only = _mark_int64_fields(message)
|
122
|
+
# By merging these two JSON dicts, we end up with a JSON dict where int64 proto fields are not
|
123
|
+
# converted to JSON strings. Int64 keys in proto maps will always be converted to JSON strings
|
124
|
+
# because JSON doesn't support non-string keys.
|
125
|
+
json_dict_with_int64_as_numbers = _merge_json_dicts(
|
126
|
+
json_dict_with_int64_fields_only, json_dict_with_int64_as_str
|
127
|
+
)
|
128
|
+
return json.dumps(json_dict_with_int64_as_numbers, indent=2)
|
129
|
+
|
130
|
+
|
131
|
+
def proto_timestamp_to_milliseconds(timestamp: str) -> int:
|
132
|
+
"""
|
133
|
+
Converts a timestamp string (e.g. "2025-04-15T08:49:18.699Z") to milliseconds.
|
134
|
+
"""
|
135
|
+
t = Timestamp()
|
136
|
+
t.FromJsonString(timestamp)
|
137
|
+
return t.ToMilliseconds()
|
138
|
+
|
139
|
+
|
140
|
+
def milliseconds_to_proto_timestamp(milliseconds: int) -> str:
|
141
|
+
"""
|
142
|
+
Converts milliseconds to a timestamp string (e.g. "2025-04-15T08:49:18.699Z").
|
143
|
+
"""
|
144
|
+
t = Timestamp()
|
145
|
+
t.FromMilliseconds(milliseconds)
|
146
|
+
return t.ToJsonString()
|
147
|
+
|
148
|
+
|
149
|
+
def proto_duration_to_milliseconds(duration: str) -> int:
|
150
|
+
"""
|
151
|
+
Converts a duration string (e.g. "1.5s") to milliseconds.
|
152
|
+
"""
|
153
|
+
d = Duration()
|
154
|
+
d.FromJsonString(duration)
|
155
|
+
return d.ToMilliseconds()
|
156
|
+
|
157
|
+
|
158
|
+
def milliseconds_to_proto_duration(milliseconds: int) -> str:
|
159
|
+
"""
|
160
|
+
Converts milliseconds to a duration string (e.g. "1.5s").
|
161
|
+
"""
|
162
|
+
d = Duration()
|
163
|
+
d.FromMilliseconds(milliseconds)
|
164
|
+
return d.ToJsonString()
|
165
|
+
|
166
|
+
|
167
|
+
def _stringify_all_experiment_ids(x):
|
168
|
+
"""Converts experiment_id fields which are defined as ints into strings in the given json.
|
169
|
+
This is necessary for backwards- and forwards-compatibility with MLflow clients/servers
|
170
|
+
running MLflow 0.9.0 and below, as experiment_id was changed from an int to a string.
|
171
|
+
To note, the Python JSON serializer is happy to auto-convert strings into ints (so a
|
172
|
+
server or client that sees the new format is fine), but is unwilling to convert ints
|
173
|
+
to strings. Therefore, we need to manually perform this conversion.
|
174
|
+
|
175
|
+
This code can be removed after MLflow 1.0, after users have given reasonable time to
|
176
|
+
upgrade clients and servers to MLflow 0.9.1+.
|
177
|
+
"""
|
178
|
+
if isinstance(x, dict):
|
179
|
+
items = x.items()
|
180
|
+
for k, v in items:
|
181
|
+
if k == "experiment_id":
|
182
|
+
x[k] = str(v)
|
183
|
+
elif k == "experiment_ids":
|
184
|
+
x[k] = [str(w) for w in v]
|
185
|
+
elif k == "info" and isinstance(v, dict) and "experiment_id" in v and "run_uuid" in v:
|
186
|
+
# shortcut for run info
|
187
|
+
v["experiment_id"] = str(v["experiment_id"])
|
188
|
+
elif k not in ("params", "tags", "metrics"): # skip run data
|
189
|
+
_stringify_all_experiment_ids(v)
|
190
|
+
elif isinstance(x, list):
|
191
|
+
for y in x:
|
192
|
+
_stringify_all_experiment_ids(y)
|
193
|
+
|
194
|
+
|
195
|
+
def parse_dict(js_dict, message):
|
196
|
+
"""Parses a JSON dictionary into a message proto, ignoring unknown fields in the JSON."""
|
197
|
+
_stringify_all_experiment_ids(js_dict)
|
198
|
+
ParseDict(js_dict=js_dict, message=message, ignore_unknown_fields=True)
|
199
|
+
|
200
|
+
|
201
|
+
def set_pb_value(proto: Value, value: Any):
|
202
|
+
"""
|
203
|
+
DO NOT USE THIS FUNCTION. Preserved for backwards compatibility.
|
204
|
+
|
205
|
+
Set a value to the google.protobuf.Value object.
|
206
|
+
"""
|
207
|
+
if isinstance(value, dict):
|
208
|
+
for key, val in value.items():
|
209
|
+
set_pb_value(proto.struct_value.fields[key], val)
|
210
|
+
elif isinstance(value, list):
|
211
|
+
for idx, val in enumerate(value):
|
212
|
+
pb = Value()
|
213
|
+
set_pb_value(pb, val)
|
214
|
+
proto.list_value.values.append(pb)
|
215
|
+
elif isinstance(value, bool):
|
216
|
+
proto.bool_value = value
|
217
|
+
elif isinstance(value, (int, float)):
|
218
|
+
proto.number_value = value
|
219
|
+
elif isinstance(value, str):
|
220
|
+
proto.string_value = value
|
221
|
+
elif value is None:
|
222
|
+
proto.null_value = NULL_VALUE
|
223
|
+
|
224
|
+
else:
|
225
|
+
raise ValueError(f"Unsupported value type: {type(value)}")
|
226
|
+
|
227
|
+
|
228
|
+
def parse_pb_value(proto: Value) -> Optional[Any]:
|
229
|
+
"""
|
230
|
+
DO NOT USE THIS FUNCTION. Preserved for backwards compatibility.
|
231
|
+
|
232
|
+
Extract a value from the google.protobuf.Value object.
|
233
|
+
"""
|
234
|
+
if proto.HasField("struct_value"):
|
235
|
+
return {key: parse_pb_value(val) for key, val in proto.struct_value.fields.items()}
|
236
|
+
elif proto.HasField("list_value"):
|
237
|
+
return [parse_pb_value(val) for val in proto.list_value.values]
|
238
|
+
elif proto.HasField("bool_value"):
|
239
|
+
return proto.bool_value
|
240
|
+
elif proto.HasField("number_value"):
|
241
|
+
return proto.number_value
|
242
|
+
elif proto.HasField("string_value"):
|
243
|
+
return proto.string_value
|
244
|
+
|
245
|
+
return None
|
246
|
+
|
247
|
+
|
248
|
+
class NumpyEncoder(JSONEncoder):
|
249
|
+
"""Special json encoder for numpy types.
|
250
|
+
Note that some numpy types doesn't have native python equivalence,
|
251
|
+
hence json.dumps will raise TypeError.
|
252
|
+
In this case, you'll need to convert your numpy types into its closest python equivalence.
|
253
|
+
"""
|
254
|
+
|
255
|
+
def try_convert(self, o):
|
256
|
+
import numpy as np
|
257
|
+
import pandas as pd
|
258
|
+
|
259
|
+
def encode_binary(x):
|
260
|
+
return base64.encodebytes(x).decode("ascii")
|
261
|
+
|
262
|
+
if isinstance(o, np.ndarray):
|
263
|
+
if o.dtype == object:
|
264
|
+
return [self.try_convert(x)[0] for x in o.tolist()], True
|
265
|
+
elif o.dtype == np.bytes_:
|
266
|
+
return np.vectorize(encode_binary)(o), True
|
267
|
+
else:
|
268
|
+
return o.tolist(), True
|
269
|
+
|
270
|
+
if isinstance(o, np.generic):
|
271
|
+
return o.item(), True
|
272
|
+
if isinstance(o, (bytes, bytearray)):
|
273
|
+
return encode_binary(o), True
|
274
|
+
if isinstance(o, np.datetime64):
|
275
|
+
return np.datetime_as_string(o), True
|
276
|
+
if isinstance(o, (pd.Timestamp, datetime.date, datetime.datetime, datetime.time)):
|
277
|
+
return o.isoformat(), True
|
278
|
+
if isinstance(o, pydantic.BaseModel):
|
279
|
+
return o.model_dump() if IS_PYDANTIC_V2_OR_NEWER else o.dict(), True
|
280
|
+
return o, False
|
281
|
+
|
282
|
+
def default(self, o):
|
283
|
+
res, converted = self.try_convert(o)
|
284
|
+
if converted:
|
285
|
+
return res
|
286
|
+
else:
|
287
|
+
return super().default(o)
|
288
|
+
|
289
|
+
|
290
|
+
class MlflowInvalidInputException(MlflowException):
|
291
|
+
def __init__(self, message):
|
292
|
+
super().__init__(f"Invalid input. {message}", error_code=BAD_REQUEST)
|
293
|
+
|
294
|
+
|
295
|
+
class MlflowFailedTypeConversion(MlflowInvalidInputException):
|
296
|
+
def __init__(self, col_name, col_type, ex):
|
297
|
+
super().__init__(
|
298
|
+
message=f"Data is not compatible with model signature. "
|
299
|
+
f"Failed to convert column {col_name} to type '{col_type}'. Error: '{ex!r}'"
|
300
|
+
)
|
301
|
+
|
302
|
+
|
303
|
+
def cast_df_types_according_to_schema(pdf, schema):
|
304
|
+
import numpy as np
|
305
|
+
|
306
|
+
from mlflow.models.utils import _enforce_array, _enforce_map, _enforce_object
|
307
|
+
from mlflow.types.schema import AnyType, Array, DataType, Map, Object
|
308
|
+
|
309
|
+
actual_cols = set(pdf.columns)
|
310
|
+
if schema.has_input_names():
|
311
|
+
dtype_list = zip(schema.input_names(), schema.input_types())
|
312
|
+
elif schema.is_tensor_spec() and len(schema.input_types()) == 1:
|
313
|
+
dtype_list = zip(actual_cols, [schema.input_types()[0] for _ in actual_cols])
|
314
|
+
else:
|
315
|
+
n = min(len(schema.input_types()), len(pdf.columns))
|
316
|
+
dtype_list = zip(pdf.columns[:n], schema.input_types()[:n])
|
317
|
+
required_input_names = set(schema.required_input_names())
|
318
|
+
|
319
|
+
for col_name, col_type_spec in dtype_list:
|
320
|
+
if isinstance(col_type_spec, DataType):
|
321
|
+
col_type = col_type_spec.to_pandas()
|
322
|
+
else:
|
323
|
+
col_type = col_type_spec
|
324
|
+
if col_name in actual_cols:
|
325
|
+
required = col_name in required_input_names
|
326
|
+
try:
|
327
|
+
if isinstance(col_type_spec, DataType) and col_type_spec == DataType.binary:
|
328
|
+
# NB: We expect binary data to be passed base64 encoded
|
329
|
+
pdf[col_name] = pdf[col_name].map(
|
330
|
+
lambda x: base64.decodebytes(bytes(x, "utf8"))
|
331
|
+
)
|
332
|
+
elif col_type == np.dtype(bytes):
|
333
|
+
pdf[col_name] = pdf[col_name].map(lambda x: bytes(x, "utf8"))
|
334
|
+
elif schema.is_tensor_spec() and isinstance(pdf[col_name].iloc[0], list):
|
335
|
+
# For dataframe with multidimensional column, it contains
|
336
|
+
# list type values, we cannot convert
|
337
|
+
# its type by `astype`, skip conversion.
|
338
|
+
# The conversion will be done in `_enforce_schema` while
|
339
|
+
# `PyFuncModel.predict` being called.
|
340
|
+
pass
|
341
|
+
elif isinstance(col_type_spec, Array):
|
342
|
+
pdf[col_name] = pdf[col_name].map(
|
343
|
+
lambda x: _enforce_array(x, col_type_spec, required=required)
|
344
|
+
)
|
345
|
+
elif isinstance(col_type_spec, Object):
|
346
|
+
pdf[col_name] = pdf[col_name].map(
|
347
|
+
lambda x: _enforce_object(x, col_type_spec, required=required)
|
348
|
+
)
|
349
|
+
elif isinstance(col_type_spec, Map):
|
350
|
+
pdf[col_name] = pdf[col_name].map(
|
351
|
+
lambda x: _enforce_map(x, col_type_spec, required=required)
|
352
|
+
)
|
353
|
+
elif isinstance(col_type_spec, AnyType):
|
354
|
+
pass
|
355
|
+
else:
|
356
|
+
pdf[col_name] = pdf[col_name].astype(col_type, copy=False)
|
357
|
+
except Exception as ex:
|
358
|
+
raise MlflowFailedTypeConversion(col_name, col_type, ex)
|
359
|
+
return pdf
|
360
|
+
|
361
|
+
|
362
|
+
def dataframe_from_parsed_json(decoded_input, pandas_orient, schema=None):
|
363
|
+
"""Convert parsed json into pandas.DataFrame. If schema is provided this methods will attempt to
|
364
|
+
cast data types according to the schema. This include base64 decoding for binary columns.
|
365
|
+
|
366
|
+
Args:
|
367
|
+
decoded_input: Parsed json - either a list or a dictionary.
|
368
|
+
pandas_orient: pandas data frame convention used to store the data.
|
369
|
+
schema: MLflow schema used when parsing the data.
|
370
|
+
|
371
|
+
Returns:
|
372
|
+
pandas.DataFrame.
|
373
|
+
"""
|
374
|
+
import pandas as pd
|
375
|
+
|
376
|
+
if pandas_orient == "records":
|
377
|
+
if not isinstance(decoded_input, list):
|
378
|
+
if isinstance(decoded_input, dict):
|
379
|
+
typemessage = "dictionary"
|
380
|
+
else:
|
381
|
+
typemessage = f"type {type(decoded_input)}"
|
382
|
+
raise MlflowInvalidInputException(
|
383
|
+
f"Dataframe records format must be a list of records. Got {typemessage}."
|
384
|
+
)
|
385
|
+
try:
|
386
|
+
pdf = pd.DataFrame(data=decoded_input)
|
387
|
+
except Exception as ex:
|
388
|
+
raise MlflowInvalidInputException(
|
389
|
+
f"Provided dataframe_records field is not a valid dataframe representation in "
|
390
|
+
f"'records' format. Error: '{ex}'"
|
391
|
+
)
|
392
|
+
elif pandas_orient == "split":
|
393
|
+
if not isinstance(decoded_input, dict):
|
394
|
+
if isinstance(decoded_input, list):
|
395
|
+
typemessage = "list"
|
396
|
+
else:
|
397
|
+
typemessage = f"type {type(decoded_input)}"
|
398
|
+
raise MlflowInvalidInputException(
|
399
|
+
f"Dataframe split format must be a dictionary. Got {typemessage}."
|
400
|
+
)
|
401
|
+
keys = set(decoded_input.keys())
|
402
|
+
missing_data = "data" not in keys
|
403
|
+
extra_keys = keys.difference({"columns", "data", "index"})
|
404
|
+
if missing_data or extra_keys:
|
405
|
+
raise MlflowInvalidInputException(
|
406
|
+
f"Dataframe split format must have 'data' field and optionally 'columns' "
|
407
|
+
f"and 'index' fields. Got {keys}.'"
|
408
|
+
)
|
409
|
+
try:
|
410
|
+
pdf = pd.DataFrame(
|
411
|
+
index=decoded_input.get("index"),
|
412
|
+
columns=decoded_input.get("columns"),
|
413
|
+
data=decoded_input["data"],
|
414
|
+
)
|
415
|
+
except Exception as ex:
|
416
|
+
raise MlflowInvalidInputException(
|
417
|
+
f"Provided dataframe_split field is not a valid dataframe representation in "
|
418
|
+
f"'split' format. Error: '{ex}'"
|
419
|
+
)
|
420
|
+
if schema is not None:
|
421
|
+
pdf = cast_df_types_according_to_schema(pdf, schema)
|
422
|
+
return pdf
|
423
|
+
|
424
|
+
|
425
|
+
def dataframe_from_raw_json(path_or_str, schema=None, pandas_orient: str = "split"):
|
426
|
+
"""Parse raw json into a pandas.Dataframe.
|
427
|
+
|
428
|
+
If schema is provided this methods will attempt to cast data types according to the schema. This
|
429
|
+
include base64 decoding for binary columns.
|
430
|
+
|
431
|
+
Args:
|
432
|
+
path_or_str: Path to a json file or a json string.
|
433
|
+
schema: MLflow schema used when parsing the data.
|
434
|
+
pandas_orient: pandas data frame convention used to store the data.
|
435
|
+
|
436
|
+
Returns:
|
437
|
+
pandas.DataFrame.
|
438
|
+
"""
|
439
|
+
if os.path.exists(path_or_str):
|
440
|
+
with open(path_or_str) as f:
|
441
|
+
parsed_json = json.load(f)
|
442
|
+
else:
|
443
|
+
parsed_json = json.loads(path_or_str)
|
444
|
+
|
445
|
+
return dataframe_from_parsed_json(parsed_json, pandas_orient, schema)
|
446
|
+
|
447
|
+
|
448
|
+
def _get_jsonable_obj(data, pandas_orient="records"):
|
449
|
+
"""Attempt to make the data json-able via standard library.
|
450
|
+
|
451
|
+
Look for some commonly used types that are not jsonable and convert them into json-able ones.
|
452
|
+
Unknown data types are returned as is.
|
453
|
+
|
454
|
+
Args:
|
455
|
+
data: Data to be converted, works with pandas and numpy, rest will be returned as is.
|
456
|
+
pandas_orient: If `data` is a Pandas DataFrame, it will be converted to a JSON
|
457
|
+
dictionary using this Pandas serialization orientation.
|
458
|
+
"""
|
459
|
+
import numpy as np
|
460
|
+
import pandas as pd
|
461
|
+
|
462
|
+
if isinstance(data, np.ndarray):
|
463
|
+
return data.tolist()
|
464
|
+
if isinstance(data, pd.DataFrame):
|
465
|
+
return data.to_dict(orient=pandas_orient)
|
466
|
+
if isinstance(data, pd.Series):
|
467
|
+
return pd.DataFrame(data).to_dict(orient=pandas_orient)
|
468
|
+
else: # by default just return whatever this is and hope for the best
|
469
|
+
return data
|
470
|
+
|
471
|
+
|
472
|
+
def convert_data_type(data, spec):
|
473
|
+
"""
|
474
|
+
Convert input data to the type specified in the spec.
|
475
|
+
|
476
|
+
Args:
|
477
|
+
data: Input data.
|
478
|
+
spec: ColSpec or TensorSpec.
|
479
|
+
"""
|
480
|
+
import numpy as np
|
481
|
+
|
482
|
+
from mlflow.models.utils import _enforce_array, _enforce_map, _enforce_object
|
483
|
+
from mlflow.types.schema import AnyType, Array, ColSpec, DataType, Map, Object, TensorSpec
|
484
|
+
|
485
|
+
try:
|
486
|
+
if spec is None:
|
487
|
+
return np.array(data)
|
488
|
+
if isinstance(spec, TensorSpec):
|
489
|
+
return np.array(data, dtype=spec.type)
|
490
|
+
if isinstance(spec, ColSpec):
|
491
|
+
if isinstance(spec.type, DataType):
|
492
|
+
return (
|
493
|
+
np.array(data, spec.type.to_numpy())
|
494
|
+
if isinstance(data, (list, np.ndarray))
|
495
|
+
else np.array([data], spec.type.to_numpy())[0]
|
496
|
+
)
|
497
|
+
elif isinstance(spec.type, Array):
|
498
|
+
# convert to numpy array for backwards compatibility
|
499
|
+
return np.array(_enforce_array(data, spec.type, required=spec.required))
|
500
|
+
elif isinstance(spec.type, Object):
|
501
|
+
return _enforce_object(data, spec.type, required=spec.required)
|
502
|
+
elif isinstance(spec.type, Map):
|
503
|
+
return _enforce_map(data, spec.type, required=spec.required)
|
504
|
+
elif isinstance(spec.type, AnyType):
|
505
|
+
return data
|
506
|
+
except MlflowException as e:
|
507
|
+
raise MlflowInvalidInputException(e.message)
|
508
|
+
except Exception as ex:
|
509
|
+
raise MlflowInvalidInputException(f"{ex}")
|
510
|
+
|
511
|
+
raise MlflowInvalidInputException(
|
512
|
+
f"Failed to convert data type for data `{data}` with spec `{spec}`."
|
513
|
+
)
|
514
|
+
|
515
|
+
|
516
|
+
def _cast_schema_type(input_data, schema=None):
|
517
|
+
import numpy as np
|
518
|
+
|
519
|
+
input_data = deepcopy(input_data)
|
520
|
+
# spec_name -> spec mapping
|
521
|
+
types_dict = schema.input_dict() if schema and schema.has_input_names() else {}
|
522
|
+
if schema is not None:
|
523
|
+
if (
|
524
|
+
len(types_dict) == 1
|
525
|
+
and isinstance(input_data, list)
|
526
|
+
and not any(isinstance(x, dict) for x in input_data)
|
527
|
+
):
|
528
|
+
# for data with a single column (not List[Dict]), match input with column
|
529
|
+
input_data = {next(iter(types_dict)): input_data}
|
530
|
+
# Un-named schema should only contain a single column or a single value
|
531
|
+
elif not schema.has_input_names() and not (
|
532
|
+
isinstance(input_data, list) or np.isscalar(input_data)
|
533
|
+
):
|
534
|
+
raise MlflowInvalidInputException(
|
535
|
+
"Failed to parse input data. This model contains an un-named "
|
536
|
+
" model signature which expects a single n-dimensional array or "
|
537
|
+
"a single value as input, however, an input of type "
|
538
|
+
f"{type(input_data)} was found."
|
539
|
+
)
|
540
|
+
if isinstance(input_data, dict):
|
541
|
+
# each key corresponds to a column, values should be
|
542
|
+
# checked against the schema
|
543
|
+
input_data = {
|
544
|
+
col: convert_data_type(data, types_dict.get(col)) for col, data in input_data.items()
|
545
|
+
}
|
546
|
+
elif isinstance(input_data, list):
|
547
|
+
# List of dictionaries of column_name -> value mapping
|
548
|
+
# List[Dict] must correspond to a schema with named columns
|
549
|
+
if all(isinstance(x, dict) for x in input_data):
|
550
|
+
input_data = [
|
551
|
+
{col: convert_data_type(value, types_dict.get(col)) for col, value in data.items()}
|
552
|
+
for data in input_data
|
553
|
+
]
|
554
|
+
# List of values
|
555
|
+
else:
|
556
|
+
spec = schema.inputs[0] if schema else None
|
557
|
+
input_data = convert_data_type(input_data, spec)
|
558
|
+
else:
|
559
|
+
spec = schema.inputs[0] if schema else None
|
560
|
+
try:
|
561
|
+
input_data = convert_data_type(input_data, spec)
|
562
|
+
except Exception as e:
|
563
|
+
raise MlflowInvalidInputException(
|
564
|
+
f"Failed to convert data `{input_data}` to type `{spec}` defined "
|
565
|
+
"in the model signature."
|
566
|
+
) from e
|
567
|
+
return input_data
|
568
|
+
|
569
|
+
|
570
|
+
def parse_instances_data(data, schema=None):
|
571
|
+
import numpy as np
|
572
|
+
|
573
|
+
from mlflow.types.schema import Array
|
574
|
+
|
575
|
+
if "instances" not in data:
|
576
|
+
raise MlflowInvalidInputException("Expecting data to have `instances` as key.")
|
577
|
+
data = data["instances"]
|
578
|
+
# List[Dict]
|
579
|
+
if isinstance(data, list) and len(data) > 0 and isinstance(data[0], dict):
|
580
|
+
# convert items to column format (map column/input name to tensor)
|
581
|
+
data_dict = defaultdict(list)
|
582
|
+
types_dict = schema.input_dict() if schema and schema.has_input_names() else {}
|
583
|
+
for item in data:
|
584
|
+
for col, v in item.items():
|
585
|
+
data_dict[col].append(convert_data_type(v, types_dict.get(col)))
|
586
|
+
# convert to numpy array for backwards compatibility
|
587
|
+
data = {col: np.array(v) for col, v in data_dict.items()}
|
588
|
+
else:
|
589
|
+
data = _cast_schema_type(data, schema)
|
590
|
+
|
591
|
+
# Sanity check inputted data. This check will only be applied
|
592
|
+
# when the row-format `instances` is used since it requires
|
593
|
+
# same 0-th dimension for all items.
|
594
|
+
if isinstance(data, dict):
|
595
|
+
# ensure all columns have the same number of items
|
596
|
+
# Only check the data when it's a list or numpy array
|
597
|
+
check_data = {k: v for k, v in data.items() if isinstance(v, (list, np.ndarray))}
|
598
|
+
if schema and schema.has_input_names():
|
599
|
+
# Only check required columns
|
600
|
+
required_cols = schema.required_input_names()
|
601
|
+
# For Array schema we should not check the length of the data matching
|
602
|
+
check_cols = {
|
603
|
+
col for col, spec in schema.input_dict().items() if not isinstance(spec.type, Array)
|
604
|
+
}
|
605
|
+
check_cols = list(set(required_cols) & check_cols & set(check_data.keys()))
|
606
|
+
else:
|
607
|
+
check_cols = list(check_data.keys())
|
608
|
+
|
609
|
+
if check_cols:
|
610
|
+
expected_len = len(check_data[check_cols[0]])
|
611
|
+
if not all(len(check_data[col]) == expected_len for col in check_cols[1:]):
|
612
|
+
raise MlflowInvalidInputException(
|
613
|
+
"The length of values for each input/column name are not the same"
|
614
|
+
)
|
615
|
+
return data
|
616
|
+
|
617
|
+
|
618
|
+
# TODO: Reuse this function for `inputs` key data parsing in serving, and
|
619
|
+
# add `convert_to_numpy` param to avoid converting data to numpy arrays for
|
620
|
+
# genAI flavors.
|
621
|
+
def parse_inputs_data(inputs_data_or_path, schema=None):
|
622
|
+
"""
|
623
|
+
Helper function to cast inputs_data based on the schema.
|
624
|
+
Inputs data must be able to pass to the model for pyfunc predict directly.
|
625
|
+
|
626
|
+
Args:
|
627
|
+
inputs_data_or_path: A json-serializable object or path to a json file
|
628
|
+
schema: data schema to cast to. Be of type `mlflow.types.Schema`.
|
629
|
+
"""
|
630
|
+
if isinstance(inputs_data_or_path, str) and os.path.exists(inputs_data_or_path):
|
631
|
+
with open(inputs_data_or_path) as handle:
|
632
|
+
inputs_data = json.load(handle)
|
633
|
+
else:
|
634
|
+
inputs_data = inputs_data_or_path
|
635
|
+
return _cast_schema_type(inputs_data, schema)
|
636
|
+
|
637
|
+
|
638
|
+
def parse_tf_serving_input(inp_dict, schema=None):
|
639
|
+
"""
|
640
|
+
Args:
|
641
|
+
inp_dict: A dict deserialized from a JSON string formatted as described in TF's
|
642
|
+
serving API doc
|
643
|
+
(https://www.tensorflow.org/tfx/serving/api_rest#request_format_2)
|
644
|
+
schema: MLflow schema used when parsing the data.
|
645
|
+
"""
|
646
|
+
|
647
|
+
if "signature_name" in inp_dict:
|
648
|
+
raise MlflowInvalidInputException('"signature_name" parameter is currently not supported')
|
649
|
+
|
650
|
+
if not (list(inp_dict.keys()) == ["instances"] or list(inp_dict.keys()) == ["inputs"]):
|
651
|
+
raise MlflowInvalidInputException(
|
652
|
+
'One of "instances" and "inputs" must be specified (not both or any other keys).'
|
653
|
+
f"Received: {list(inp_dict.keys())}"
|
654
|
+
)
|
655
|
+
|
656
|
+
# Read the JSON
|
657
|
+
try:
|
658
|
+
# objects & arrays schema for List[Dict] and Dict[List] are different
|
659
|
+
# so the conversion for `instances` below changes the schema.
|
660
|
+
# e.g.
|
661
|
+
# [{"col1": 1, "col2": 2}, {"col1": 3, "col2": 4}] -> {"col1": [1, 3], "col2": [2, 4]}
|
662
|
+
# Schema([ColSpec(long, "col1"), ColSpec(long, "col2")]) ->
|
663
|
+
# Schema([ColSpec(Array(long), "col1"), ColSpec(Array(long), "col2")])
|
664
|
+
# To avoid this, we shouldn't use `instances` for such data.
|
665
|
+
if "instances" in inp_dict:
|
666
|
+
return parse_instances_data(inp_dict, schema)
|
667
|
+
else:
|
668
|
+
# items already in column format, convert values to tensor
|
669
|
+
return _cast_schema_type(inp_dict["inputs"], schema)
|
670
|
+
except MlflowException as e:
|
671
|
+
raise e
|
672
|
+
except Exception as e:
|
673
|
+
# Add error into message to provide details for serving usage
|
674
|
+
raise MlflowInvalidInputException(
|
675
|
+
f"Ensure that the input is a valid JSON-formatted string.\nError: {e!r}"
|
676
|
+
) from e
|
677
|
+
|
678
|
+
|
679
|
+
# Reference: https://stackoverflow.com/a/12126976
|
680
|
+
class _CustomJsonEncoder(json.JSONEncoder):
|
681
|
+
def default(self, o):
|
682
|
+
import numpy as np
|
683
|
+
import pandas as pd
|
684
|
+
|
685
|
+
if isinstance(o, (datetime.datetime, datetime.date, datetime.time, pd.Timestamp)):
|
686
|
+
return o.isoformat()
|
687
|
+
|
688
|
+
if isinstance(o, np.ndarray):
|
689
|
+
return o.tolist()
|
690
|
+
|
691
|
+
return super().default(o)
|
692
|
+
|
693
|
+
|
694
|
+
def get_jsonable_input(name, data):
|
695
|
+
import numpy as np
|
696
|
+
|
697
|
+
if isinstance(data, np.ndarray):
|
698
|
+
return data.tolist()
|
699
|
+
else:
|
700
|
+
raise MlflowException(f"Incompatible input type:{type(data)} for input {name}.")
|
701
|
+
|
702
|
+
|
703
|
+
def dump_input_data(data, inputs_key="inputs", params: Optional[dict[str, Any]] = None):
|
704
|
+
"""
|
705
|
+
Args:
|
706
|
+
data: Input data.
|
707
|
+
inputs_key: Key to represent data in the request payload.
|
708
|
+
params: Additional parameters to pass to the model for inference.
|
709
|
+
"""
|
710
|
+
import numpy as np
|
711
|
+
import pandas as pd
|
712
|
+
|
713
|
+
# Convert scipy data to numpy array
|
714
|
+
if importlib.util.find_spec("scipy.sparse"):
|
715
|
+
from scipy.sparse import csc_matrix, csr_matrix
|
716
|
+
|
717
|
+
if isinstance(data, (csc_matrix, csr_matrix)):
|
718
|
+
data = data.toarray()
|
719
|
+
|
720
|
+
if isinstance(data, pd.DataFrame):
|
721
|
+
post_data = {"dataframe_split": data.to_dict(orient="split")}
|
722
|
+
elif isinstance(data, dict):
|
723
|
+
post_data = {inputs_key: {k: get_jsonable_input(k, v) for k, v in data}}
|
724
|
+
elif isinstance(data, np.ndarray):
|
725
|
+
post_data = {inputs_key: data.tolist()}
|
726
|
+
elif isinstance(data, list):
|
727
|
+
post_data = {inputs_key: data}
|
728
|
+
else:
|
729
|
+
post_data = data
|
730
|
+
|
731
|
+
if params is not None:
|
732
|
+
if not isinstance(params, dict):
|
733
|
+
raise MlflowException(
|
734
|
+
f"Params must be a dictionary. Got type '{type(params).__name__}'."
|
735
|
+
)
|
736
|
+
# if post_data is not dictionary, params should be included in post_data directly
|
737
|
+
if isinstance(post_data, dict):
|
738
|
+
post_data["params"] = params
|
739
|
+
|
740
|
+
if not isinstance(post_data, str):
|
741
|
+
post_data = json.dumps(post_data, cls=_CustomJsonEncoder)
|
742
|
+
|
743
|
+
return post_data
|