genesis-flow 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- genesis_flow-1.0.0.dist-info/METADATA +822 -0
- genesis_flow-1.0.0.dist-info/RECORD +645 -0
- genesis_flow-1.0.0.dist-info/WHEEL +5 -0
- genesis_flow-1.0.0.dist-info/entry_points.txt +19 -0
- genesis_flow-1.0.0.dist-info/licenses/LICENSE.txt +202 -0
- genesis_flow-1.0.0.dist-info/top_level.txt +1 -0
- mlflow/__init__.py +367 -0
- mlflow/__main__.py +3 -0
- mlflow/ag2/__init__.py +56 -0
- mlflow/ag2/ag2_logger.py +294 -0
- mlflow/anthropic/__init__.py +40 -0
- mlflow/anthropic/autolog.py +129 -0
- mlflow/anthropic/chat.py +144 -0
- mlflow/artifacts/__init__.py +268 -0
- mlflow/autogen/__init__.py +144 -0
- mlflow/autogen/chat.py +142 -0
- mlflow/azure/__init__.py +26 -0
- mlflow/azure/auth_handler.py +257 -0
- mlflow/azure/client.py +319 -0
- mlflow/azure/config.py +120 -0
- mlflow/azure/connection_factory.py +340 -0
- mlflow/azure/exceptions.py +27 -0
- mlflow/azure/stores.py +327 -0
- mlflow/azure/utils.py +183 -0
- mlflow/bedrock/__init__.py +45 -0
- mlflow/bedrock/_autolog.py +202 -0
- mlflow/bedrock/chat.py +122 -0
- mlflow/bedrock/stream.py +160 -0
- mlflow/bedrock/utils.py +43 -0
- mlflow/cli.py +707 -0
- mlflow/client.py +12 -0
- mlflow/config/__init__.py +56 -0
- mlflow/crewai/__init__.py +79 -0
- mlflow/crewai/autolog.py +253 -0
- mlflow/crewai/chat.py +29 -0
- mlflow/data/__init__.py +75 -0
- mlflow/data/artifact_dataset_sources.py +170 -0
- mlflow/data/code_dataset_source.py +40 -0
- mlflow/data/dataset.py +123 -0
- mlflow/data/dataset_registry.py +168 -0
- mlflow/data/dataset_source.py +110 -0
- mlflow/data/dataset_source_registry.py +219 -0
- mlflow/data/delta_dataset_source.py +167 -0
- mlflow/data/digest_utils.py +108 -0
- mlflow/data/evaluation_dataset.py +562 -0
- mlflow/data/filesystem_dataset_source.py +81 -0
- mlflow/data/http_dataset_source.py +145 -0
- mlflow/data/huggingface_dataset.py +258 -0
- mlflow/data/huggingface_dataset_source.py +118 -0
- mlflow/data/meta_dataset.py +104 -0
- mlflow/data/numpy_dataset.py +223 -0
- mlflow/data/pandas_dataset.py +231 -0
- mlflow/data/polars_dataset.py +352 -0
- mlflow/data/pyfunc_dataset_mixin.py +31 -0
- mlflow/data/schema.py +76 -0
- mlflow/data/sources.py +1 -0
- mlflow/data/spark_dataset.py +406 -0
- mlflow/data/spark_dataset_source.py +74 -0
- mlflow/data/spark_delta_utils.py +118 -0
- mlflow/data/tensorflow_dataset.py +350 -0
- mlflow/data/uc_volume_dataset_source.py +81 -0
- mlflow/db.py +27 -0
- mlflow/dspy/__init__.py +17 -0
- mlflow/dspy/autolog.py +197 -0
- mlflow/dspy/callback.py +398 -0
- mlflow/dspy/constant.py +1 -0
- mlflow/dspy/load.py +93 -0
- mlflow/dspy/save.py +393 -0
- mlflow/dspy/util.py +109 -0
- mlflow/dspy/wrapper.py +226 -0
- mlflow/entities/__init__.py +104 -0
- mlflow/entities/_mlflow_object.py +52 -0
- mlflow/entities/assessment.py +545 -0
- mlflow/entities/assessment_error.py +80 -0
- mlflow/entities/assessment_source.py +141 -0
- mlflow/entities/dataset.py +92 -0
- mlflow/entities/dataset_input.py +51 -0
- mlflow/entities/dataset_summary.py +62 -0
- mlflow/entities/document.py +48 -0
- mlflow/entities/experiment.py +109 -0
- mlflow/entities/experiment_tag.py +35 -0
- mlflow/entities/file_info.py +45 -0
- mlflow/entities/input_tag.py +35 -0
- mlflow/entities/lifecycle_stage.py +35 -0
- mlflow/entities/logged_model.py +228 -0
- mlflow/entities/logged_model_input.py +26 -0
- mlflow/entities/logged_model_output.py +32 -0
- mlflow/entities/logged_model_parameter.py +46 -0
- mlflow/entities/logged_model_status.py +74 -0
- mlflow/entities/logged_model_tag.py +33 -0
- mlflow/entities/metric.py +200 -0
- mlflow/entities/model_registry/__init__.py +29 -0
- mlflow/entities/model_registry/_model_registry_entity.py +13 -0
- mlflow/entities/model_registry/model_version.py +243 -0
- mlflow/entities/model_registry/model_version_deployment_job_run_state.py +44 -0
- mlflow/entities/model_registry/model_version_deployment_job_state.py +70 -0
- mlflow/entities/model_registry/model_version_search.py +25 -0
- mlflow/entities/model_registry/model_version_stages.py +25 -0
- mlflow/entities/model_registry/model_version_status.py +35 -0
- mlflow/entities/model_registry/model_version_tag.py +35 -0
- mlflow/entities/model_registry/prompt.py +73 -0
- mlflow/entities/model_registry/prompt_version.py +244 -0
- mlflow/entities/model_registry/registered_model.py +175 -0
- mlflow/entities/model_registry/registered_model_alias.py +35 -0
- mlflow/entities/model_registry/registered_model_deployment_job_state.py +39 -0
- mlflow/entities/model_registry/registered_model_search.py +25 -0
- mlflow/entities/model_registry/registered_model_tag.py +35 -0
- mlflow/entities/multipart_upload.py +74 -0
- mlflow/entities/param.py +49 -0
- mlflow/entities/run.py +97 -0
- mlflow/entities/run_data.py +84 -0
- mlflow/entities/run_info.py +188 -0
- mlflow/entities/run_inputs.py +59 -0
- mlflow/entities/run_outputs.py +43 -0
- mlflow/entities/run_status.py +41 -0
- mlflow/entities/run_tag.py +36 -0
- mlflow/entities/source_type.py +31 -0
- mlflow/entities/span.py +774 -0
- mlflow/entities/span_event.py +96 -0
- mlflow/entities/span_status.py +102 -0
- mlflow/entities/trace.py +317 -0
- mlflow/entities/trace_data.py +71 -0
- mlflow/entities/trace_info.py +220 -0
- mlflow/entities/trace_info_v2.py +162 -0
- mlflow/entities/trace_location.py +173 -0
- mlflow/entities/trace_state.py +39 -0
- mlflow/entities/trace_status.py +68 -0
- mlflow/entities/view_type.py +51 -0
- mlflow/environment_variables.py +866 -0
- mlflow/evaluation/__init__.py +16 -0
- mlflow/evaluation/assessment.py +369 -0
- mlflow/evaluation/evaluation.py +411 -0
- mlflow/evaluation/evaluation_tag.py +61 -0
- mlflow/evaluation/fluent.py +48 -0
- mlflow/evaluation/utils.py +201 -0
- mlflow/exceptions.py +213 -0
- mlflow/experiments.py +140 -0
- mlflow/gemini/__init__.py +81 -0
- mlflow/gemini/autolog.py +186 -0
- mlflow/gemini/chat.py +261 -0
- mlflow/genai/__init__.py +71 -0
- mlflow/genai/datasets/__init__.py +67 -0
- mlflow/genai/datasets/evaluation_dataset.py +131 -0
- mlflow/genai/evaluation/__init__.py +3 -0
- mlflow/genai/evaluation/base.py +411 -0
- mlflow/genai/evaluation/constant.py +23 -0
- mlflow/genai/evaluation/utils.py +244 -0
- mlflow/genai/judges/__init__.py +21 -0
- mlflow/genai/judges/databricks.py +404 -0
- mlflow/genai/label_schemas/__init__.py +153 -0
- mlflow/genai/label_schemas/label_schemas.py +209 -0
- mlflow/genai/labeling/__init__.py +159 -0
- mlflow/genai/labeling/labeling.py +250 -0
- mlflow/genai/optimize/__init__.py +13 -0
- mlflow/genai/optimize/base.py +198 -0
- mlflow/genai/optimize/optimizers/__init__.py +4 -0
- mlflow/genai/optimize/optimizers/base_optimizer.py +38 -0
- mlflow/genai/optimize/optimizers/dspy_mipro_optimizer.py +221 -0
- mlflow/genai/optimize/optimizers/dspy_optimizer.py +91 -0
- mlflow/genai/optimize/optimizers/utils/dspy_mipro_callback.py +76 -0
- mlflow/genai/optimize/optimizers/utils/dspy_mipro_utils.py +18 -0
- mlflow/genai/optimize/types.py +75 -0
- mlflow/genai/optimize/util.py +30 -0
- mlflow/genai/prompts/__init__.py +206 -0
- mlflow/genai/scheduled_scorers.py +431 -0
- mlflow/genai/scorers/__init__.py +26 -0
- mlflow/genai/scorers/base.py +492 -0
- mlflow/genai/scorers/builtin_scorers.py +765 -0
- mlflow/genai/scorers/scorer_utils.py +138 -0
- mlflow/genai/scorers/validation.py +165 -0
- mlflow/genai/utils/data_validation.py +146 -0
- mlflow/genai/utils/enum_utils.py +23 -0
- mlflow/genai/utils/trace_utils.py +211 -0
- mlflow/groq/__init__.py +42 -0
- mlflow/groq/_groq_autolog.py +74 -0
- mlflow/johnsnowlabs/__init__.py +888 -0
- mlflow/langchain/__init__.py +24 -0
- mlflow/langchain/api_request_parallel_processor.py +330 -0
- mlflow/langchain/autolog.py +147 -0
- mlflow/langchain/chat_agent_langgraph.py +340 -0
- mlflow/langchain/constant.py +1 -0
- mlflow/langchain/constants.py +1 -0
- mlflow/langchain/databricks_dependencies.py +444 -0
- mlflow/langchain/langchain_tracer.py +597 -0
- mlflow/langchain/model.py +919 -0
- mlflow/langchain/output_parsers.py +142 -0
- mlflow/langchain/retriever_chain.py +153 -0
- mlflow/langchain/runnables.py +527 -0
- mlflow/langchain/utils/chat.py +402 -0
- mlflow/langchain/utils/logging.py +671 -0
- mlflow/langchain/utils/serialization.py +36 -0
- mlflow/legacy_databricks_cli/__init__.py +0 -0
- mlflow/legacy_databricks_cli/configure/__init__.py +0 -0
- mlflow/legacy_databricks_cli/configure/provider.py +482 -0
- mlflow/litellm/__init__.py +175 -0
- mlflow/llama_index/__init__.py +22 -0
- mlflow/llama_index/autolog.py +55 -0
- mlflow/llama_index/chat.py +43 -0
- mlflow/llama_index/constant.py +1 -0
- mlflow/llama_index/model.py +577 -0
- mlflow/llama_index/pyfunc_wrapper.py +332 -0
- mlflow/llama_index/serialize_objects.py +188 -0
- mlflow/llama_index/tracer.py +561 -0
- mlflow/metrics/__init__.py +479 -0
- mlflow/metrics/base.py +39 -0
- mlflow/metrics/genai/__init__.py +25 -0
- mlflow/metrics/genai/base.py +101 -0
- mlflow/metrics/genai/genai_metric.py +771 -0
- mlflow/metrics/genai/metric_definitions.py +450 -0
- mlflow/metrics/genai/model_utils.py +371 -0
- mlflow/metrics/genai/prompt_template.py +68 -0
- mlflow/metrics/genai/prompts/__init__.py +0 -0
- mlflow/metrics/genai/prompts/v1.py +422 -0
- mlflow/metrics/genai/utils.py +6 -0
- mlflow/metrics/metric_definitions.py +619 -0
- mlflow/mismatch.py +34 -0
- mlflow/mistral/__init__.py +34 -0
- mlflow/mistral/autolog.py +71 -0
- mlflow/mistral/chat.py +135 -0
- mlflow/ml_package_versions.py +452 -0
- mlflow/models/__init__.py +97 -0
- mlflow/models/auth_policy.py +83 -0
- mlflow/models/cli.py +354 -0
- mlflow/models/container/__init__.py +294 -0
- mlflow/models/container/scoring_server/__init__.py +0 -0
- mlflow/models/container/scoring_server/nginx.conf +39 -0
- mlflow/models/dependencies_schemas.py +287 -0
- mlflow/models/display_utils.py +158 -0
- mlflow/models/docker_utils.py +211 -0
- mlflow/models/evaluation/__init__.py +23 -0
- mlflow/models/evaluation/_shap_patch.py +64 -0
- mlflow/models/evaluation/artifacts.py +194 -0
- mlflow/models/evaluation/base.py +1811 -0
- mlflow/models/evaluation/calibration_curve.py +109 -0
- mlflow/models/evaluation/default_evaluator.py +996 -0
- mlflow/models/evaluation/deprecated.py +23 -0
- mlflow/models/evaluation/evaluator_registry.py +80 -0
- mlflow/models/evaluation/evaluators/classifier.py +704 -0
- mlflow/models/evaluation/evaluators/default.py +233 -0
- mlflow/models/evaluation/evaluators/regressor.py +96 -0
- mlflow/models/evaluation/evaluators/shap.py +296 -0
- mlflow/models/evaluation/lift_curve.py +178 -0
- mlflow/models/evaluation/utils/metric.py +123 -0
- mlflow/models/evaluation/utils/trace.py +179 -0
- mlflow/models/evaluation/validation.py +434 -0
- mlflow/models/flavor_backend.py +93 -0
- mlflow/models/flavor_backend_registry.py +53 -0
- mlflow/models/model.py +1639 -0
- mlflow/models/model_config.py +150 -0
- mlflow/models/notebook_resources/agent_evaluation_template.html +235 -0
- mlflow/models/notebook_resources/eval_with_dataset_example.py +22 -0
- mlflow/models/notebook_resources/eval_with_synthetic_example.py +22 -0
- mlflow/models/python_api.py +369 -0
- mlflow/models/rag_signatures.py +128 -0
- mlflow/models/resources.py +321 -0
- mlflow/models/signature.py +662 -0
- mlflow/models/utils.py +2054 -0
- mlflow/models/wheeled_model.py +280 -0
- mlflow/openai/__init__.py +57 -0
- mlflow/openai/_agent_tracer.py +364 -0
- mlflow/openai/api_request_parallel_processor.py +131 -0
- mlflow/openai/autolog.py +509 -0
- mlflow/openai/constant.py +1 -0
- mlflow/openai/model.py +824 -0
- mlflow/openai/utils/chat_schema.py +367 -0
- mlflow/optuna/__init__.py +3 -0
- mlflow/optuna/storage.py +646 -0
- mlflow/plugins/__init__.py +72 -0
- mlflow/plugins/base.py +358 -0
- mlflow/plugins/builtin/__init__.py +24 -0
- mlflow/plugins/builtin/pytorch_plugin.py +150 -0
- mlflow/plugins/builtin/sklearn_plugin.py +158 -0
- mlflow/plugins/builtin/transformers_plugin.py +187 -0
- mlflow/plugins/cli.py +321 -0
- mlflow/plugins/discovery.py +340 -0
- mlflow/plugins/manager.py +465 -0
- mlflow/plugins/registry.py +316 -0
- mlflow/plugins/templates/framework_plugin_template.py +329 -0
- mlflow/prompt/constants.py +20 -0
- mlflow/prompt/promptlab_model.py +197 -0
- mlflow/prompt/registry_utils.py +248 -0
- mlflow/promptflow/__init__.py +495 -0
- mlflow/protos/__init__.py +0 -0
- mlflow/protos/assessments_pb2.py +174 -0
- mlflow/protos/databricks_artifacts_pb2.py +489 -0
- mlflow/protos/databricks_filesystem_service_pb2.py +196 -0
- mlflow/protos/databricks_managed_catalog_messages_pb2.py +95 -0
- mlflow/protos/databricks_managed_catalog_service_pb2.py +86 -0
- mlflow/protos/databricks_pb2.py +267 -0
- mlflow/protos/databricks_trace_server_pb2.py +374 -0
- mlflow/protos/databricks_uc_registry_messages_pb2.py +1249 -0
- mlflow/protos/databricks_uc_registry_service_pb2.py +170 -0
- mlflow/protos/facet_feature_statistics_pb2.py +296 -0
- mlflow/protos/internal_pb2.py +77 -0
- mlflow/protos/mlflow_artifacts_pb2.py +336 -0
- mlflow/protos/model_registry_pb2.py +1073 -0
- mlflow/protos/scalapb/__init__.py +0 -0
- mlflow/protos/scalapb/scalapb_pb2.py +104 -0
- mlflow/protos/service_pb2.py +2600 -0
- mlflow/protos/unity_catalog_oss_messages_pb2.py +457 -0
- mlflow/protos/unity_catalog_oss_service_pb2.py +130 -0
- mlflow/protos/unity_catalog_prompt_messages_pb2.py +447 -0
- mlflow/protos/unity_catalog_prompt_messages_pb2_grpc.py +24 -0
- mlflow/protos/unity_catalog_prompt_service_pb2.py +164 -0
- mlflow/protos/unity_catalog_prompt_service_pb2_grpc.py +785 -0
- mlflow/py.typed +0 -0
- mlflow/pydantic_ai/__init__.py +57 -0
- mlflow/pydantic_ai/autolog.py +173 -0
- mlflow/pyfunc/__init__.py +3844 -0
- mlflow/pyfunc/_mlflow_pyfunc_backend_predict.py +61 -0
- mlflow/pyfunc/backend.py +523 -0
- mlflow/pyfunc/context.py +78 -0
- mlflow/pyfunc/dbconnect_artifact_cache.py +144 -0
- mlflow/pyfunc/loaders/__init__.py +7 -0
- mlflow/pyfunc/loaders/chat_agent.py +117 -0
- mlflow/pyfunc/loaders/chat_model.py +125 -0
- mlflow/pyfunc/loaders/code_model.py +31 -0
- mlflow/pyfunc/loaders/responses_agent.py +112 -0
- mlflow/pyfunc/mlserver.py +46 -0
- mlflow/pyfunc/model.py +1473 -0
- mlflow/pyfunc/scoring_server/__init__.py +604 -0
- mlflow/pyfunc/scoring_server/app.py +7 -0
- mlflow/pyfunc/scoring_server/client.py +146 -0
- mlflow/pyfunc/spark_model_cache.py +48 -0
- mlflow/pyfunc/stdin_server.py +44 -0
- mlflow/pyfunc/utils/__init__.py +3 -0
- mlflow/pyfunc/utils/data_validation.py +224 -0
- mlflow/pyfunc/utils/environment.py +22 -0
- mlflow/pyfunc/utils/input_converter.py +47 -0
- mlflow/pyfunc/utils/serving_data_parser.py +11 -0
- mlflow/pytorch/__init__.py +1171 -0
- mlflow/pytorch/_lightning_autolog.py +580 -0
- mlflow/pytorch/_pytorch_autolog.py +50 -0
- mlflow/pytorch/pickle_module.py +35 -0
- mlflow/rfunc/__init__.py +42 -0
- mlflow/rfunc/backend.py +134 -0
- mlflow/runs.py +89 -0
- mlflow/server/__init__.py +302 -0
- mlflow/server/auth/__init__.py +1224 -0
- mlflow/server/auth/__main__.py +4 -0
- mlflow/server/auth/basic_auth.ini +6 -0
- mlflow/server/auth/cli.py +11 -0
- mlflow/server/auth/client.py +537 -0
- mlflow/server/auth/config.py +34 -0
- mlflow/server/auth/db/__init__.py +0 -0
- mlflow/server/auth/db/cli.py +18 -0
- mlflow/server/auth/db/migrations/__init__.py +0 -0
- mlflow/server/auth/db/migrations/alembic.ini +110 -0
- mlflow/server/auth/db/migrations/env.py +76 -0
- mlflow/server/auth/db/migrations/versions/8606fa83a998_initial_migration.py +51 -0
- mlflow/server/auth/db/migrations/versions/__init__.py +0 -0
- mlflow/server/auth/db/models.py +67 -0
- mlflow/server/auth/db/utils.py +37 -0
- mlflow/server/auth/entities.py +165 -0
- mlflow/server/auth/logo.py +14 -0
- mlflow/server/auth/permissions.py +65 -0
- mlflow/server/auth/routes.py +18 -0
- mlflow/server/auth/sqlalchemy_store.py +263 -0
- mlflow/server/graphql/__init__.py +0 -0
- mlflow/server/graphql/autogenerated_graphql_schema.py +353 -0
- mlflow/server/graphql/graphql_custom_scalars.py +24 -0
- mlflow/server/graphql/graphql_errors.py +15 -0
- mlflow/server/graphql/graphql_no_batching.py +89 -0
- mlflow/server/graphql/graphql_schema_extensions.py +74 -0
- mlflow/server/handlers.py +3217 -0
- mlflow/server/prometheus_exporter.py +17 -0
- mlflow/server/validation.py +30 -0
- mlflow/shap/__init__.py +691 -0
- mlflow/sklearn/__init__.py +1994 -0
- mlflow/sklearn/utils.py +1041 -0
- mlflow/smolagents/__init__.py +66 -0
- mlflow/smolagents/autolog.py +139 -0
- mlflow/smolagents/chat.py +29 -0
- mlflow/store/__init__.py +10 -0
- mlflow/store/_unity_catalog/__init__.py +1 -0
- mlflow/store/_unity_catalog/lineage/__init__.py +1 -0
- mlflow/store/_unity_catalog/lineage/constants.py +2 -0
- mlflow/store/_unity_catalog/registry/__init__.py +6 -0
- mlflow/store/_unity_catalog/registry/prompt_info.py +75 -0
- mlflow/store/_unity_catalog/registry/rest_store.py +1740 -0
- mlflow/store/_unity_catalog/registry/uc_oss_rest_store.py +507 -0
- mlflow/store/_unity_catalog/registry/utils.py +121 -0
- mlflow/store/artifact/__init__.py +0 -0
- mlflow/store/artifact/artifact_repo.py +472 -0
- mlflow/store/artifact/artifact_repository_registry.py +154 -0
- mlflow/store/artifact/azure_blob_artifact_repo.py +275 -0
- mlflow/store/artifact/azure_data_lake_artifact_repo.py +295 -0
- mlflow/store/artifact/cli.py +141 -0
- mlflow/store/artifact/cloud_artifact_repo.py +332 -0
- mlflow/store/artifact/databricks_artifact_repo.py +729 -0
- mlflow/store/artifact/databricks_artifact_repo_resources.py +301 -0
- mlflow/store/artifact/databricks_logged_model_artifact_repo.py +93 -0
- mlflow/store/artifact/databricks_models_artifact_repo.py +216 -0
- mlflow/store/artifact/databricks_sdk_artifact_repo.py +134 -0
- mlflow/store/artifact/databricks_sdk_models_artifact_repo.py +97 -0
- mlflow/store/artifact/dbfs_artifact_repo.py +240 -0
- mlflow/store/artifact/ftp_artifact_repo.py +132 -0
- mlflow/store/artifact/gcs_artifact_repo.py +296 -0
- mlflow/store/artifact/hdfs_artifact_repo.py +209 -0
- mlflow/store/artifact/http_artifact_repo.py +218 -0
- mlflow/store/artifact/local_artifact_repo.py +142 -0
- mlflow/store/artifact/mlflow_artifacts_repo.py +94 -0
- mlflow/store/artifact/models_artifact_repo.py +259 -0
- mlflow/store/artifact/optimized_s3_artifact_repo.py +356 -0
- mlflow/store/artifact/presigned_url_artifact_repo.py +173 -0
- mlflow/store/artifact/r2_artifact_repo.py +70 -0
- mlflow/store/artifact/runs_artifact_repo.py +265 -0
- mlflow/store/artifact/s3_artifact_repo.py +330 -0
- mlflow/store/artifact/sftp_artifact_repo.py +141 -0
- mlflow/store/artifact/uc_volume_artifact_repo.py +76 -0
- mlflow/store/artifact/unity_catalog_models_artifact_repo.py +168 -0
- mlflow/store/artifact/unity_catalog_oss_models_artifact_repo.py +168 -0
- mlflow/store/artifact/utils/__init__.py +0 -0
- mlflow/store/artifact/utils/models.py +148 -0
- mlflow/store/db/__init__.py +0 -0
- mlflow/store/db/base_sql_model.py +3 -0
- mlflow/store/db/db_types.py +10 -0
- mlflow/store/db/utils.py +314 -0
- mlflow/store/db_migrations/__init__.py +0 -0
- mlflow/store/db_migrations/alembic.ini +74 -0
- mlflow/store/db_migrations/env.py +84 -0
- mlflow/store/db_migrations/versions/0584bdc529eb_add_cascading_deletion_to_datasets_from_experiments.py +88 -0
- mlflow/store/db_migrations/versions/0a8213491aaa_drop_duplicate_killed_constraint.py +49 -0
- mlflow/store/db_migrations/versions/0c779009ac13_add_deleted_time_field_to_runs_table.py +24 -0
- mlflow/store/db_migrations/versions/181f10493468_allow_nulls_for_metric_values.py +35 -0
- mlflow/store/db_migrations/versions/27a6a02d2cf1_add_model_version_tags_table.py +38 -0
- mlflow/store/db_migrations/versions/2b4d017a5e9b_add_model_registry_tables_to_db.py +77 -0
- mlflow/store/db_migrations/versions/2d6e25af4d3e_increase_max_param_val_length.py +33 -0
- mlflow/store/db_migrations/versions/3500859a5d39_add_model_aliases_table.py +50 -0
- mlflow/store/db_migrations/versions/39d1c3be5f05_add_is_nan_constraint_for_metrics_tables_if_necessary.py +41 -0
- mlflow/store/db_migrations/versions/400f98739977_add_logged_model_tables.py +123 -0
- mlflow/store/db_migrations/versions/4465047574b1_increase_max_dataset_schema_size.py +38 -0
- mlflow/store/db_migrations/versions/451aebb31d03_add_metric_step.py +35 -0
- mlflow/store/db_migrations/versions/5b0e9adcef9c_add_cascade_deletion_to_trace_tables_fk.py +40 -0
- mlflow/store/db_migrations/versions/6953534de441_add_step_to_inputs_table.py +25 -0
- mlflow/store/db_migrations/versions/728d730b5ebd_add_registered_model_tags_table.py +38 -0
- mlflow/store/db_migrations/versions/7ac759974ad8_update_run_tags_with_larger_limit.py +36 -0
- mlflow/store/db_migrations/versions/7f2a7d5fae7d_add_datasets_inputs_input_tags_tables.py +82 -0
- mlflow/store/db_migrations/versions/84291f40a231_add_run_link_to_model_version.py +26 -0
- mlflow/store/db_migrations/versions/867495a8f9d4_add_trace_tables.py +90 -0
- mlflow/store/db_migrations/versions/89d4b8295536_create_latest_metrics_table.py +169 -0
- mlflow/store/db_migrations/versions/90e64c465722_migrate_user_column_to_tags.py +64 -0
- mlflow/store/db_migrations/versions/97727af70f4d_creation_time_last_update_time_experiments.py +25 -0
- mlflow/store/db_migrations/versions/__init__.py +0 -0
- mlflow/store/db_migrations/versions/a8c4a736bde6_allow_nulls_for_run_id.py +27 -0
- mlflow/store/db_migrations/versions/acf3f17fdcc7_add_storage_location_field_to_model_.py +29 -0
- mlflow/store/db_migrations/versions/bd07f7e963c5_create_index_on_run_uuid.py +26 -0
- mlflow/store/db_migrations/versions/bda7b8c39065_increase_model_version_tag_value_limit.py +38 -0
- mlflow/store/db_migrations/versions/c48cb773bb87_reset_default_value_for_is_nan_in_metrics_table_for_mysql.py +41 -0
- mlflow/store/db_migrations/versions/cbc13b556ace_add_v3_trace_schema_columns.py +31 -0
- mlflow/store/db_migrations/versions/cc1f77228345_change_param_value_length_to_500.py +34 -0
- mlflow/store/db_migrations/versions/cfd24bdc0731_update_run_status_constraint_with_killed.py +78 -0
- mlflow/store/db_migrations/versions/df50e92ffc5e_add_experiment_tags_table.py +38 -0
- mlflow/store/db_migrations/versions/f5a4f2784254_increase_run_tag_value_limit.py +36 -0
- mlflow/store/entities/__init__.py +3 -0
- mlflow/store/entities/paged_list.py +18 -0
- mlflow/store/model_registry/__init__.py +10 -0
- mlflow/store/model_registry/abstract_store.py +1081 -0
- mlflow/store/model_registry/base_rest_store.py +44 -0
- mlflow/store/model_registry/databricks_workspace_model_registry_rest_store.py +37 -0
- mlflow/store/model_registry/dbmodels/__init__.py +0 -0
- mlflow/store/model_registry/dbmodels/models.py +206 -0
- mlflow/store/model_registry/file_store.py +1091 -0
- mlflow/store/model_registry/rest_store.py +481 -0
- mlflow/store/model_registry/sqlalchemy_store.py +1286 -0
- mlflow/store/tracking/__init__.py +23 -0
- mlflow/store/tracking/abstract_store.py +816 -0
- mlflow/store/tracking/dbmodels/__init__.py +0 -0
- mlflow/store/tracking/dbmodels/initial_models.py +243 -0
- mlflow/store/tracking/dbmodels/models.py +1073 -0
- mlflow/store/tracking/file_store.py +2438 -0
- mlflow/store/tracking/postgres_managed_identity.py +146 -0
- mlflow/store/tracking/rest_store.py +1131 -0
- mlflow/store/tracking/sqlalchemy_store.py +2785 -0
- mlflow/system_metrics/__init__.py +61 -0
- mlflow/system_metrics/metrics/__init__.py +0 -0
- mlflow/system_metrics/metrics/base_metrics_monitor.py +32 -0
- mlflow/system_metrics/metrics/cpu_monitor.py +23 -0
- mlflow/system_metrics/metrics/disk_monitor.py +21 -0
- mlflow/system_metrics/metrics/gpu_monitor.py +71 -0
- mlflow/system_metrics/metrics/network_monitor.py +34 -0
- mlflow/system_metrics/metrics/rocm_monitor.py +123 -0
- mlflow/system_metrics/system_metrics_monitor.py +198 -0
- mlflow/tracing/__init__.py +16 -0
- mlflow/tracing/assessment.py +356 -0
- mlflow/tracing/client.py +531 -0
- mlflow/tracing/config.py +125 -0
- mlflow/tracing/constant.py +105 -0
- mlflow/tracing/destination.py +81 -0
- mlflow/tracing/display/__init__.py +40 -0
- mlflow/tracing/display/display_handler.py +196 -0
- mlflow/tracing/export/async_export_queue.py +186 -0
- mlflow/tracing/export/inference_table.py +138 -0
- mlflow/tracing/export/mlflow_v3.py +137 -0
- mlflow/tracing/export/utils.py +70 -0
- mlflow/tracing/fluent.py +1417 -0
- mlflow/tracing/processor/base_mlflow.py +199 -0
- mlflow/tracing/processor/inference_table.py +175 -0
- mlflow/tracing/processor/mlflow_v3.py +47 -0
- mlflow/tracing/processor/otel.py +73 -0
- mlflow/tracing/provider.py +487 -0
- mlflow/tracing/trace_manager.py +200 -0
- mlflow/tracing/utils/__init__.py +616 -0
- mlflow/tracing/utils/artifact_utils.py +28 -0
- mlflow/tracing/utils/copy.py +55 -0
- mlflow/tracing/utils/environment.py +55 -0
- mlflow/tracing/utils/exception.py +21 -0
- mlflow/tracing/utils/once.py +35 -0
- mlflow/tracing/utils/otlp.py +63 -0
- mlflow/tracing/utils/processor.py +54 -0
- mlflow/tracing/utils/search.py +292 -0
- mlflow/tracing/utils/timeout.py +250 -0
- mlflow/tracing/utils/token.py +19 -0
- mlflow/tracing/utils/truncation.py +124 -0
- mlflow/tracing/utils/warning.py +76 -0
- mlflow/tracking/__init__.py +39 -0
- mlflow/tracking/_model_registry/__init__.py +1 -0
- mlflow/tracking/_model_registry/client.py +764 -0
- mlflow/tracking/_model_registry/fluent.py +853 -0
- mlflow/tracking/_model_registry/registry.py +67 -0
- mlflow/tracking/_model_registry/utils.py +251 -0
- mlflow/tracking/_tracking_service/__init__.py +0 -0
- mlflow/tracking/_tracking_service/client.py +883 -0
- mlflow/tracking/_tracking_service/registry.py +56 -0
- mlflow/tracking/_tracking_service/utils.py +275 -0
- mlflow/tracking/artifact_utils.py +179 -0
- mlflow/tracking/client.py +5900 -0
- mlflow/tracking/context/__init__.py +0 -0
- mlflow/tracking/context/abstract_context.py +35 -0
- mlflow/tracking/context/databricks_cluster_context.py +15 -0
- mlflow/tracking/context/databricks_command_context.py +15 -0
- mlflow/tracking/context/databricks_job_context.py +49 -0
- mlflow/tracking/context/databricks_notebook_context.py +41 -0
- mlflow/tracking/context/databricks_repo_context.py +43 -0
- mlflow/tracking/context/default_context.py +51 -0
- mlflow/tracking/context/git_context.py +32 -0
- mlflow/tracking/context/registry.py +98 -0
- mlflow/tracking/context/system_environment_context.py +15 -0
- mlflow/tracking/default_experiment/__init__.py +1 -0
- mlflow/tracking/default_experiment/abstract_context.py +43 -0
- mlflow/tracking/default_experiment/databricks_notebook_experiment_provider.py +44 -0
- mlflow/tracking/default_experiment/registry.py +75 -0
- mlflow/tracking/fluent.py +3595 -0
- mlflow/tracking/metric_value_conversion_utils.py +93 -0
- mlflow/tracking/multimedia.py +206 -0
- mlflow/tracking/registry.py +86 -0
- mlflow/tracking/request_auth/__init__.py +0 -0
- mlflow/tracking/request_auth/abstract_request_auth_provider.py +34 -0
- mlflow/tracking/request_auth/registry.py +60 -0
- mlflow/tracking/request_header/__init__.py +0 -0
- mlflow/tracking/request_header/abstract_request_header_provider.py +36 -0
- mlflow/tracking/request_header/databricks_request_header_provider.py +38 -0
- mlflow/tracking/request_header/default_request_header_provider.py +17 -0
- mlflow/tracking/request_header/registry.py +79 -0
- mlflow/transformers/__init__.py +2982 -0
- mlflow/transformers/flavor_config.py +258 -0
- mlflow/transformers/hub_utils.py +83 -0
- mlflow/transformers/llm_inference_utils.py +468 -0
- mlflow/transformers/model_io.py +301 -0
- mlflow/transformers/peft.py +51 -0
- mlflow/transformers/signature.py +183 -0
- mlflow/transformers/torch_utils.py +55 -0
- mlflow/types/__init__.py +21 -0
- mlflow/types/agent.py +270 -0
- mlflow/types/chat.py +240 -0
- mlflow/types/llm.py +935 -0
- mlflow/types/responses.py +139 -0
- mlflow/types/responses_helpers.py +416 -0
- mlflow/types/schema.py +1505 -0
- mlflow/types/type_hints.py +647 -0
- mlflow/types/utils.py +753 -0
- mlflow/utils/__init__.py +283 -0
- mlflow/utils/_capture_modules.py +256 -0
- mlflow/utils/_capture_transformers_modules.py +75 -0
- mlflow/utils/_spark_utils.py +201 -0
- mlflow/utils/_unity_catalog_oss_utils.py +97 -0
- mlflow/utils/_unity_catalog_utils.py +479 -0
- mlflow/utils/annotations.py +218 -0
- mlflow/utils/arguments_utils.py +16 -0
- mlflow/utils/async_logging/__init__.py +1 -0
- mlflow/utils/async_logging/async_artifacts_logging_queue.py +258 -0
- mlflow/utils/async_logging/async_logging_queue.py +366 -0
- mlflow/utils/async_logging/run_artifact.py +38 -0
- mlflow/utils/async_logging/run_batch.py +58 -0
- mlflow/utils/async_logging/run_operations.py +49 -0
- mlflow/utils/autologging_utils/__init__.py +737 -0
- mlflow/utils/autologging_utils/client.py +432 -0
- mlflow/utils/autologging_utils/config.py +33 -0
- mlflow/utils/autologging_utils/events.py +294 -0
- mlflow/utils/autologging_utils/logging_and_warnings.py +328 -0
- mlflow/utils/autologging_utils/metrics_queue.py +71 -0
- mlflow/utils/autologging_utils/safety.py +1104 -0
- mlflow/utils/autologging_utils/versioning.py +95 -0
- mlflow/utils/checkpoint_utils.py +206 -0
- mlflow/utils/class_utils.py +6 -0
- mlflow/utils/cli_args.py +257 -0
- mlflow/utils/conda.py +354 -0
- mlflow/utils/credentials.py +231 -0
- mlflow/utils/data_utils.py +17 -0
- mlflow/utils/databricks_utils.py +1436 -0
- mlflow/utils/docstring_utils.py +477 -0
- mlflow/utils/doctor.py +133 -0
- mlflow/utils/download_cloud_file_chunk.py +43 -0
- mlflow/utils/env_manager.py +16 -0
- mlflow/utils/env_pack.py +131 -0
- mlflow/utils/environment.py +1009 -0
- mlflow/utils/exception_utils.py +14 -0
- mlflow/utils/file_utils.py +978 -0
- mlflow/utils/git_utils.py +77 -0
- mlflow/utils/gorilla.py +797 -0
- mlflow/utils/import_hooks/__init__.py +363 -0
- mlflow/utils/lazy_load.py +51 -0
- mlflow/utils/logging_utils.py +168 -0
- mlflow/utils/mime_type_utils.py +58 -0
- mlflow/utils/mlflow_tags.py +103 -0
- mlflow/utils/model_utils.py +486 -0
- mlflow/utils/name_utils.py +346 -0
- mlflow/utils/nfs_on_spark.py +62 -0
- mlflow/utils/openai_utils.py +164 -0
- mlflow/utils/os.py +12 -0
- mlflow/utils/oss_registry_utils.py +29 -0
- mlflow/utils/plugins.py +17 -0
- mlflow/utils/process.py +182 -0
- mlflow/utils/promptlab_utils.py +146 -0
- mlflow/utils/proto_json_utils.py +743 -0
- mlflow/utils/pydantic_utils.py +54 -0
- mlflow/utils/request_utils.py +279 -0
- mlflow/utils/requirements_utils.py +704 -0
- mlflow/utils/rest_utils.py +673 -0
- mlflow/utils/search_logged_model_utils.py +127 -0
- mlflow/utils/search_utils.py +2111 -0
- mlflow/utils/secure_loading.py +221 -0
- mlflow/utils/security_validation.py +384 -0
- mlflow/utils/server_cli_utils.py +61 -0
- mlflow/utils/spark_utils.py +15 -0
- mlflow/utils/string_utils.py +138 -0
- mlflow/utils/thread_utils.py +63 -0
- mlflow/utils/time.py +54 -0
- mlflow/utils/timeout.py +42 -0
- mlflow/utils/uri.py +572 -0
- mlflow/utils/validation.py +662 -0
- mlflow/utils/virtualenv.py +458 -0
- mlflow/utils/warnings_utils.py +25 -0
- mlflow/utils/yaml_utils.py +179 -0
- mlflow/version.py +24 -0
@@ -0,0 +1,223 @@
|
|
1
|
+
import json
|
2
|
+
import logging
|
3
|
+
from functools import cached_property
|
4
|
+
from typing import Any, Optional, Union
|
5
|
+
|
6
|
+
import numpy as np
|
7
|
+
|
8
|
+
from mlflow.data.dataset import Dataset
|
9
|
+
from mlflow.data.dataset_source import DatasetSource
|
10
|
+
from mlflow.data.digest_utils import compute_numpy_digest
|
11
|
+
from mlflow.data.evaluation_dataset import EvaluationDataset
|
12
|
+
from mlflow.data.pyfunc_dataset_mixin import PyFuncConvertibleDatasetMixin, PyFuncInputsOutputs
|
13
|
+
from mlflow.data.schema import TensorDatasetSchema
|
14
|
+
from mlflow.types.utils import _infer_schema
|
15
|
+
|
16
|
+
_logger = logging.getLogger(__name__)
|
17
|
+
|
18
|
+
|
19
|
+
class NumpyDataset(Dataset, PyFuncConvertibleDatasetMixin):
|
20
|
+
"""
|
21
|
+
Represents a NumPy dataset for use with MLflow Tracking.
|
22
|
+
"""
|
23
|
+
|
24
|
+
def __init__(
|
25
|
+
self,
|
26
|
+
features: Union[np.ndarray, dict[str, np.ndarray]],
|
27
|
+
source: DatasetSource,
|
28
|
+
targets: Union[np.ndarray, dict[str, np.ndarray]] = None,
|
29
|
+
name: Optional[str] = None,
|
30
|
+
digest: Optional[str] = None,
|
31
|
+
):
|
32
|
+
"""
|
33
|
+
Args:
|
34
|
+
features: A numpy array or dictionary of numpy arrays containing dataset features.
|
35
|
+
source: The source of the numpy dataset.
|
36
|
+
targets: A numpy array or dictionary of numpy arrays containing dataset targets.
|
37
|
+
Optional.
|
38
|
+
name: The name of the dataset. E.g. "wiki_train". If unspecified, a name is
|
39
|
+
automatically generated.
|
40
|
+
digest: The digest (hash, fingerprint) of the dataset. If unspecified, a digest
|
41
|
+
is automatically computed.
|
42
|
+
"""
|
43
|
+
self._features = features
|
44
|
+
self._targets = targets
|
45
|
+
super().__init__(source=source, name=name, digest=digest)
|
46
|
+
|
47
|
+
def _compute_digest(self) -> str:
|
48
|
+
"""
|
49
|
+
Computes a digest for the dataset. Called if the user doesn't supply
|
50
|
+
a digest when constructing the dataset.
|
51
|
+
"""
|
52
|
+
return compute_numpy_digest(self._features, self._targets)
|
53
|
+
|
54
|
+
def to_dict(self) -> dict[str, str]:
|
55
|
+
"""Create config dictionary for the dataset.
|
56
|
+
|
57
|
+
Returns a string dictionary containing the following fields: name, digest, source, source
|
58
|
+
type, schema, and profile.
|
59
|
+
"""
|
60
|
+
schema = json.dumps(self.schema.to_dict()) if self.schema else None
|
61
|
+
config = super().to_dict()
|
62
|
+
config.update(
|
63
|
+
{
|
64
|
+
"schema": schema,
|
65
|
+
"profile": json.dumps(self.profile),
|
66
|
+
}
|
67
|
+
)
|
68
|
+
return config
|
69
|
+
|
70
|
+
@property
|
71
|
+
def source(self) -> DatasetSource:
|
72
|
+
"""
|
73
|
+
The source of the dataset.
|
74
|
+
"""
|
75
|
+
return self._source
|
76
|
+
|
77
|
+
@property
|
78
|
+
def features(self) -> Union[np.ndarray, dict[str, np.ndarray]]:
|
79
|
+
"""
|
80
|
+
The features of the dataset.
|
81
|
+
"""
|
82
|
+
return self._features
|
83
|
+
|
84
|
+
@property
|
85
|
+
def targets(self) -> Optional[Union[np.ndarray, dict[str, np.ndarray]]]:
|
86
|
+
"""
|
87
|
+
The targets of the dataset. May be ``None`` if no targets are available.
|
88
|
+
"""
|
89
|
+
return self._targets
|
90
|
+
|
91
|
+
@property
|
92
|
+
def profile(self) -> Optional[Any]:
|
93
|
+
"""
|
94
|
+
A profile of the dataset. May be ``None`` if a profile cannot be computed.
|
95
|
+
"""
|
96
|
+
|
97
|
+
def get_profile_attribute(numpy_data, attr_name):
|
98
|
+
if isinstance(numpy_data, dict):
|
99
|
+
return {key: getattr(array, attr_name) for key, array in numpy_data.items()}
|
100
|
+
else:
|
101
|
+
return getattr(numpy_data, attr_name)
|
102
|
+
|
103
|
+
profile = {
|
104
|
+
"features_shape": get_profile_attribute(self._features, "shape"),
|
105
|
+
"features_size": get_profile_attribute(self._features, "size"),
|
106
|
+
"features_nbytes": get_profile_attribute(self._features, "nbytes"),
|
107
|
+
}
|
108
|
+
if self._targets is not None:
|
109
|
+
profile.update(
|
110
|
+
{
|
111
|
+
"targets_shape": get_profile_attribute(self._targets, "shape"),
|
112
|
+
"targets_size": get_profile_attribute(self._targets, "size"),
|
113
|
+
"targets_nbytes": get_profile_attribute(self._targets, "nbytes"),
|
114
|
+
}
|
115
|
+
)
|
116
|
+
|
117
|
+
return profile
|
118
|
+
|
119
|
+
@cached_property
|
120
|
+
def schema(self) -> Optional[TensorDatasetSchema]:
|
121
|
+
"""
|
122
|
+
MLflow TensorSpec schema representing the dataset features and targets (optional).
|
123
|
+
"""
|
124
|
+
try:
|
125
|
+
features_schema = _infer_schema(self._features)
|
126
|
+
targets_schema = None
|
127
|
+
if self._targets is not None:
|
128
|
+
targets_schema = _infer_schema(self._targets)
|
129
|
+
return TensorDatasetSchema(features=features_schema, targets=targets_schema)
|
130
|
+
except Exception as e:
|
131
|
+
_logger.warning("Failed to infer schema for NumPy dataset. Exception: %s", e)
|
132
|
+
return None
|
133
|
+
|
134
|
+
def to_pyfunc(self) -> PyFuncInputsOutputs:
|
135
|
+
"""
|
136
|
+
Converts the dataset to a collection of pyfunc inputs and outputs for model
|
137
|
+
evaluation. Required for use with mlflow.evaluate().
|
138
|
+
"""
|
139
|
+
return PyFuncInputsOutputs(self._features, self._targets)
|
140
|
+
|
141
|
+
def to_evaluation_dataset(self, path=None, feature_names=None) -> EvaluationDataset:
|
142
|
+
"""
|
143
|
+
Converts the dataset to an EvaluationDataset for model evaluation. Required
|
144
|
+
for use with mlflow.sklearn.evaluate().
|
145
|
+
"""
|
146
|
+
return EvaluationDataset(
|
147
|
+
data=self._features,
|
148
|
+
targets=self._targets,
|
149
|
+
path=path,
|
150
|
+
feature_names=feature_names,
|
151
|
+
name=self.name,
|
152
|
+
digest=self.digest,
|
153
|
+
)
|
154
|
+
|
155
|
+
|
156
|
+
def from_numpy(
|
157
|
+
features: Union[np.ndarray, dict[str, np.ndarray]],
|
158
|
+
source: Union[str, DatasetSource] = None,
|
159
|
+
targets: Union[np.ndarray, dict[str, np.ndarray]] = None,
|
160
|
+
name: Optional[str] = None,
|
161
|
+
digest: Optional[str] = None,
|
162
|
+
) -> NumpyDataset:
|
163
|
+
"""
|
164
|
+
Constructs a :py:class:`NumpyDataset <mlflow.data.numpy_dataset.NumpyDataset>` object from
|
165
|
+
NumPy features, optional targets, and source. If the source is path like, then this will
|
166
|
+
construct a DatasetSource object from the source path. Otherwise, the source is assumed to
|
167
|
+
be a DatasetSource object.
|
168
|
+
|
169
|
+
Args:
|
170
|
+
features: NumPy features, represented as an np.ndarray or dictionary of named np.ndarrays.
|
171
|
+
source: The source from which the numpy data was derived, e.g. a filesystem path, an S3 URI,
|
172
|
+
an HTTPS URL, a delta table name with version, or spark table etc. ``source`` may be
|
173
|
+
specified as a URI, a path-like string, or an instance of
|
174
|
+
:py:class:`DatasetSource <mlflow.data.dataset_source.DatasetSource>`. If unspecified,
|
175
|
+
the source is assumed to be the code location (e.g. notebook cell, script, etc.) where
|
176
|
+
:py:func:`from_numpy <mlflow.data.from_numpy>` is being called.
|
177
|
+
targets: Optional NumPy targets, represented as an np.ndarray or dictionary of named
|
178
|
+
np.ndarrays.
|
179
|
+
name: The name of the dataset. If unspecified, a name is generated.
|
180
|
+
digest: The dataset digest (hash). If unspecified, a digest is computed automatically.
|
181
|
+
|
182
|
+
.. code-block:: python
|
183
|
+
:test:
|
184
|
+
:caption: Basic Example
|
185
|
+
|
186
|
+
import mlflow
|
187
|
+
import numpy as np
|
188
|
+
|
189
|
+
x = np.random.uniform(size=[2, 5, 4])
|
190
|
+
y = np.random.randint(2, size=[2])
|
191
|
+
dataset = mlflow.data.from_numpy(x, targets=y)
|
192
|
+
|
193
|
+
.. code-block:: python
|
194
|
+
:test:
|
195
|
+
:caption: Dict Example
|
196
|
+
|
197
|
+
import mlflow
|
198
|
+
import numpy as np
|
199
|
+
|
200
|
+
x = {
|
201
|
+
"feature_1": np.random.uniform(size=[2, 5, 4]),
|
202
|
+
"feature_2": np.random.uniform(size=[2, 5, 4]),
|
203
|
+
}
|
204
|
+
y = np.random.randint(2, size=[2])
|
205
|
+
dataset = mlflow.data.from_numpy(x, targets=y)
|
206
|
+
"""
|
207
|
+
from mlflow.data.code_dataset_source import CodeDatasetSource
|
208
|
+
from mlflow.data.dataset_source_registry import resolve_dataset_source
|
209
|
+
from mlflow.tracking.context import registry
|
210
|
+
|
211
|
+
if source is not None:
|
212
|
+
if isinstance(source, DatasetSource):
|
213
|
+
resolved_source = source
|
214
|
+
else:
|
215
|
+
resolved_source = resolve_dataset_source(
|
216
|
+
source,
|
217
|
+
)
|
218
|
+
else:
|
219
|
+
context_tags = registry.resolve_tags()
|
220
|
+
resolved_source = CodeDatasetSource(tags=context_tags)
|
221
|
+
return NumpyDataset(
|
222
|
+
features=features, source=resolved_source, targets=targets, name=name, digest=digest
|
223
|
+
)
|
@@ -0,0 +1,231 @@
|
|
1
|
+
import json
|
2
|
+
import logging
|
3
|
+
from functools import cached_property
|
4
|
+
from typing import Any, Optional, Union
|
5
|
+
|
6
|
+
import pandas as pd
|
7
|
+
|
8
|
+
from mlflow.data.dataset import Dataset
|
9
|
+
from mlflow.data.dataset_source import DatasetSource
|
10
|
+
from mlflow.data.digest_utils import compute_pandas_digest
|
11
|
+
from mlflow.data.evaluation_dataset import EvaluationDataset
|
12
|
+
from mlflow.data.pyfunc_dataset_mixin import PyFuncConvertibleDatasetMixin, PyFuncInputsOutputs
|
13
|
+
from mlflow.exceptions import MlflowException
|
14
|
+
from mlflow.protos.databricks_pb2 import INVALID_PARAMETER_VALUE
|
15
|
+
from mlflow.types import Schema
|
16
|
+
from mlflow.types.utils import _infer_schema
|
17
|
+
|
18
|
+
_logger = logging.getLogger(__name__)
|
19
|
+
|
20
|
+
|
21
|
+
class PandasDataset(Dataset, PyFuncConvertibleDatasetMixin):
|
22
|
+
"""
|
23
|
+
Represents a Pandas DataFrame for use with MLflow Tracking.
|
24
|
+
"""
|
25
|
+
|
26
|
+
def __init__(
|
27
|
+
self,
|
28
|
+
df: pd.DataFrame,
|
29
|
+
source: DatasetSource,
|
30
|
+
targets: Optional[str] = None,
|
31
|
+
name: Optional[str] = None,
|
32
|
+
digest: Optional[str] = None,
|
33
|
+
predictions: Optional[str] = None,
|
34
|
+
):
|
35
|
+
"""
|
36
|
+
Args:
|
37
|
+
df: A pandas DataFrame.
|
38
|
+
source: The source of the pandas DataFrame.
|
39
|
+
targets: The name of the target column. Optional.
|
40
|
+
name: The name of the dataset. E.g. "wiki_train". If unspecified, a name is
|
41
|
+
automatically generated.
|
42
|
+
digest: The digest (hash, fingerprint) of the dataset. If unspecified, a digest
|
43
|
+
is automatically computed.
|
44
|
+
predictions: Optional. The name of the column containing model predictions,
|
45
|
+
if the dataset contains model predictions. If specified, this column
|
46
|
+
must be present in the dataframe (``df``).
|
47
|
+
"""
|
48
|
+
if targets is not None and targets not in df.columns:
|
49
|
+
raise MlflowException(
|
50
|
+
f"The specified pandas DataFrame does not contain the specified targets column"
|
51
|
+
f" '{targets}'.",
|
52
|
+
INVALID_PARAMETER_VALUE,
|
53
|
+
)
|
54
|
+
if predictions is not None and predictions not in df.columns:
|
55
|
+
raise MlflowException(
|
56
|
+
f"The specified pandas DataFrame does not contain the specified predictions column"
|
57
|
+
f" '{predictions}'.",
|
58
|
+
INVALID_PARAMETER_VALUE,
|
59
|
+
)
|
60
|
+
self._df = df
|
61
|
+
self._targets = targets
|
62
|
+
self._predictions = predictions
|
63
|
+
super().__init__(source=source, name=name, digest=digest)
|
64
|
+
|
65
|
+
def _compute_digest(self) -> str:
|
66
|
+
"""
|
67
|
+
Computes a digest for the dataset. Called if the user doesn't supply
|
68
|
+
a digest when constructing the dataset.
|
69
|
+
"""
|
70
|
+
return compute_pandas_digest(self._df)
|
71
|
+
|
72
|
+
def to_dict(self) -> dict[str, str]:
|
73
|
+
"""Create config dictionary for the dataset.
|
74
|
+
|
75
|
+
Returns a string dictionary containing the following fields: name, digest, source, source
|
76
|
+
type, schema, and profile.
|
77
|
+
"""
|
78
|
+
schema = json.dumps({"mlflow_colspec": self.schema.to_dict()}) if self.schema else None
|
79
|
+
config = super().to_dict()
|
80
|
+
config.update(
|
81
|
+
{
|
82
|
+
"schema": schema,
|
83
|
+
"profile": json.dumps(self.profile),
|
84
|
+
}
|
85
|
+
)
|
86
|
+
return config
|
87
|
+
|
88
|
+
@property
|
89
|
+
def df(self) -> pd.DataFrame:
|
90
|
+
"""
|
91
|
+
The underlying pandas DataFrame.
|
92
|
+
"""
|
93
|
+
return self._df
|
94
|
+
|
95
|
+
@property
|
96
|
+
def source(self) -> DatasetSource:
|
97
|
+
"""
|
98
|
+
The source of the dataset.
|
99
|
+
"""
|
100
|
+
return self._source
|
101
|
+
|
102
|
+
@property
|
103
|
+
def targets(self) -> Optional[str]:
|
104
|
+
"""
|
105
|
+
The name of the target column. May be ``None`` if no target column is available.
|
106
|
+
"""
|
107
|
+
return self._targets
|
108
|
+
|
109
|
+
@property
|
110
|
+
def predictions(self) -> Optional[str]:
|
111
|
+
"""
|
112
|
+
The name of the predictions column. May be ``None`` if no predictions column is available.
|
113
|
+
"""
|
114
|
+
return self._predictions
|
115
|
+
|
116
|
+
@property
|
117
|
+
def profile(self) -> Optional[Any]:
|
118
|
+
"""
|
119
|
+
A profile of the dataset. May be ``None`` if a profile cannot be computed.
|
120
|
+
"""
|
121
|
+
return {
|
122
|
+
"num_rows": len(self._df),
|
123
|
+
"num_elements": int(self._df.size),
|
124
|
+
}
|
125
|
+
|
126
|
+
@cached_property
|
127
|
+
def schema(self) -> Optional[Schema]:
|
128
|
+
"""
|
129
|
+
An instance of :py:class:`mlflow.types.Schema` representing the tabular dataset. May be
|
130
|
+
``None`` if the schema cannot be inferred from the dataset.
|
131
|
+
"""
|
132
|
+
try:
|
133
|
+
return _infer_schema(self._df)
|
134
|
+
except Exception as e:
|
135
|
+
_logger.debug("Failed to infer schema for Pandas dataset. Exception: %s", e)
|
136
|
+
return None
|
137
|
+
|
138
|
+
def to_pyfunc(self) -> PyFuncInputsOutputs:
|
139
|
+
"""
|
140
|
+
Converts the dataset to a collection of pyfunc inputs and outputs for model
|
141
|
+
evaluation. Required for use with mlflow.evaluate().
|
142
|
+
"""
|
143
|
+
if self._targets:
|
144
|
+
inputs = self._df.drop(columns=[self._targets])
|
145
|
+
outputs = self._df[self._targets]
|
146
|
+
return PyFuncInputsOutputs(inputs, outputs)
|
147
|
+
else:
|
148
|
+
return PyFuncInputsOutputs(self._df)
|
149
|
+
|
150
|
+
def to_evaluation_dataset(self, path=None, feature_names=None) -> EvaluationDataset:
|
151
|
+
"""
|
152
|
+
Converts the dataset to an EvaluationDataset for model evaluation. Required
|
153
|
+
for use with mlflow.evaluate().
|
154
|
+
"""
|
155
|
+
return EvaluationDataset(
|
156
|
+
data=self._df,
|
157
|
+
targets=self._targets,
|
158
|
+
path=path,
|
159
|
+
feature_names=feature_names,
|
160
|
+
predictions=self._predictions,
|
161
|
+
name=self.name,
|
162
|
+
digest=self.digest,
|
163
|
+
)
|
164
|
+
|
165
|
+
|
166
|
+
def from_pandas(
|
167
|
+
df: pd.DataFrame,
|
168
|
+
source: Union[str, DatasetSource] = None,
|
169
|
+
targets: Optional[str] = None,
|
170
|
+
name: Optional[str] = None,
|
171
|
+
digest: Optional[str] = None,
|
172
|
+
predictions: Optional[str] = None,
|
173
|
+
) -> PandasDataset:
|
174
|
+
"""
|
175
|
+
Constructs a :py:class:`PandasDataset <mlflow.data.pandas_dataset.PandasDataset>` instance from
|
176
|
+
a Pandas DataFrame, optional targets, optional predictions, and source.
|
177
|
+
|
178
|
+
Args:
|
179
|
+
df: A Pandas DataFrame.
|
180
|
+
source: The source from which the DataFrame was derived, e.g. a filesystem
|
181
|
+
path, an S3 URI, an HTTPS URL, a delta table name with version, or
|
182
|
+
spark table etc. ``source`` may be specified as a URI, a path-like string,
|
183
|
+
or an instance of
|
184
|
+
:py:class:`DatasetSource <mlflow.data.dataset_source.DatasetSource>`.
|
185
|
+
If unspecified, the source is assumed to be the code location
|
186
|
+
(e.g. notebook cell, script, etc.) where
|
187
|
+
:py:func:`from_pandas <mlflow.data.from_pandas>` is being called.
|
188
|
+
targets: An optional target column name for supervised training. This column
|
189
|
+
must be present in the dataframe (``df``).
|
190
|
+
name: The name of the dataset. If unspecified, a name is generated.
|
191
|
+
digest: The dataset digest (hash). If unspecified, a digest is computed
|
192
|
+
automatically.
|
193
|
+
predictions: An optional predictions column name for model evaluation. This column
|
194
|
+
must be present in the dataframe (``df``).
|
195
|
+
|
196
|
+
.. code-block:: python
|
197
|
+
:test:
|
198
|
+
:caption: Example
|
199
|
+
|
200
|
+
import mlflow
|
201
|
+
import pandas as pd
|
202
|
+
|
203
|
+
x = pd.DataFrame(
|
204
|
+
[["tom", 10, 1, 1], ["nick", 15, 0, 1], ["july", 14, 1, 1]],
|
205
|
+
columns=["Name", "Age", "Label", "ModelOutput"],
|
206
|
+
)
|
207
|
+
dataset = mlflow.data.from_pandas(x, targets="Label", predictions="ModelOutput")
|
208
|
+
"""
|
209
|
+
|
210
|
+
from mlflow.data.code_dataset_source import CodeDatasetSource
|
211
|
+
from mlflow.data.dataset_source_registry import resolve_dataset_source
|
212
|
+
from mlflow.tracking.context import registry
|
213
|
+
|
214
|
+
if source is not None:
|
215
|
+
if isinstance(source, DatasetSource):
|
216
|
+
resolved_source = source
|
217
|
+
else:
|
218
|
+
resolved_source = resolve_dataset_source(
|
219
|
+
source,
|
220
|
+
)
|
221
|
+
else:
|
222
|
+
context_tags = registry.resolve_tags()
|
223
|
+
resolved_source = CodeDatasetSource(tags=context_tags)
|
224
|
+
return PandasDataset(
|
225
|
+
df=df,
|
226
|
+
source=resolved_source,
|
227
|
+
targets=targets,
|
228
|
+
name=name,
|
229
|
+
digest=digest,
|
230
|
+
predictions=predictions,
|
231
|
+
)
|