llama-stack 0.3.5__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack/__init__.py +0 -5
- llama_stack/cli/llama.py +3 -3
- llama_stack/cli/stack/_list_deps.py +12 -23
- llama_stack/cli/stack/list_stacks.py +37 -18
- llama_stack/cli/stack/run.py +121 -11
- llama_stack/cli/stack/utils.py +0 -127
- llama_stack/core/access_control/access_control.py +69 -28
- llama_stack/core/access_control/conditions.py +15 -5
- llama_stack/core/admin.py +267 -0
- llama_stack/core/build.py +6 -74
- llama_stack/core/client.py +1 -1
- llama_stack/core/configure.py +6 -6
- llama_stack/core/conversations/conversations.py +28 -25
- llama_stack/core/datatypes.py +271 -79
- llama_stack/core/distribution.py +15 -16
- llama_stack/core/external.py +3 -3
- llama_stack/core/inspect.py +98 -15
- llama_stack/core/library_client.py +73 -61
- llama_stack/core/prompts/prompts.py +12 -11
- llama_stack/core/providers.py +17 -11
- llama_stack/core/resolver.py +65 -56
- llama_stack/core/routers/__init__.py +8 -12
- llama_stack/core/routers/datasets.py +1 -4
- llama_stack/core/routers/eval_scoring.py +7 -4
- llama_stack/core/routers/inference.py +55 -271
- llama_stack/core/routers/safety.py +52 -24
- llama_stack/core/routers/tool_runtime.py +6 -48
- llama_stack/core/routers/vector_io.py +130 -51
- llama_stack/core/routing_tables/benchmarks.py +24 -20
- llama_stack/core/routing_tables/common.py +1 -4
- llama_stack/core/routing_tables/datasets.py +22 -22
- llama_stack/core/routing_tables/models.py +119 -6
- llama_stack/core/routing_tables/scoring_functions.py +7 -7
- llama_stack/core/routing_tables/shields.py +1 -2
- llama_stack/core/routing_tables/toolgroups.py +17 -7
- llama_stack/core/routing_tables/vector_stores.py +51 -16
- llama_stack/core/server/auth.py +5 -3
- llama_stack/core/server/auth_providers.py +36 -20
- llama_stack/core/server/fastapi_router_registry.py +84 -0
- llama_stack/core/server/quota.py +2 -2
- llama_stack/core/server/routes.py +79 -27
- llama_stack/core/server/server.py +102 -87
- llama_stack/core/stack.py +201 -58
- llama_stack/core/storage/datatypes.py +26 -3
- llama_stack/{providers/utils → core/storage}/kvstore/__init__.py +2 -0
- llama_stack/{providers/utils → core/storage}/kvstore/kvstore.py +55 -24
- llama_stack/{providers/utils → core/storage}/kvstore/mongodb/mongodb.py +13 -10
- llama_stack/{providers/utils → core/storage}/kvstore/postgres/postgres.py +28 -17
- llama_stack/{providers/utils → core/storage}/kvstore/redis/redis.py +41 -16
- llama_stack/{providers/utils → core/storage}/kvstore/sqlite/sqlite.py +1 -1
- llama_stack/core/storage/sqlstore/__init__.py +17 -0
- llama_stack/{providers/utils → core/storage}/sqlstore/authorized_sqlstore.py +69 -49
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlalchemy_sqlstore.py +47 -17
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py +25 -8
- llama_stack/core/store/registry.py +1 -1
- llama_stack/core/utils/config.py +8 -2
- llama_stack/core/utils/config_resolution.py +32 -29
- llama_stack/core/utils/context.py +4 -10
- llama_stack/core/utils/exec.py +9 -0
- llama_stack/core/utils/type_inspection.py +45 -0
- llama_stack/distributions/dell/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/dell/dell.py +2 -2
- llama_stack/distributions/dell/run-with-safety.yaml +3 -2
- llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/meta-reference-gpu/meta_reference.py +2 -2
- llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +3 -2
- llama_stack/distributions/nvidia/{run.yaml → config.yaml} +4 -4
- llama_stack/distributions/nvidia/nvidia.py +1 -1
- llama_stack/distributions/nvidia/run-with-safety.yaml +4 -4
- llama_stack/{apis/datasetio → distributions/oci}/__init__.py +1 -1
- llama_stack/distributions/oci/config.yaml +134 -0
- llama_stack/distributions/oci/oci.py +108 -0
- llama_stack/distributions/open-benchmark/{run.yaml → config.yaml} +5 -4
- llama_stack/distributions/open-benchmark/open_benchmark.py +2 -3
- llama_stack/distributions/postgres-demo/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/starter/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/starter/starter.py +8 -5
- llama_stack/distributions/starter-gpu/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/template.py +13 -69
- llama_stack/distributions/watsonx/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/watsonx/watsonx.py +1 -1
- llama_stack/log.py +28 -11
- llama_stack/models/llama/checkpoint.py +6 -6
- llama_stack/models/llama/hadamard_utils.py +2 -0
- llama_stack/models/llama/llama3/generation.py +3 -1
- llama_stack/models/llama/llama3/interface.py +2 -5
- llama_stack/models/llama/llama3/multimodal/encoder_utils.py +3 -3
- llama_stack/models/llama/llama3/multimodal/image_transform.py +6 -6
- llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +1 -1
- llama_stack/models/llama/llama3/tool_utils.py +2 -1
- llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +1 -1
- llama_stack/providers/inline/agents/meta_reference/__init__.py +3 -3
- llama_stack/providers/inline/agents/meta_reference/agents.py +44 -261
- llama_stack/providers/inline/agents/meta_reference/config.py +6 -1
- llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +207 -57
- llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +308 -47
- llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +162 -96
- llama_stack/providers/inline/agents/meta_reference/responses/types.py +23 -8
- llama_stack/providers/inline/agents/meta_reference/responses/utils.py +201 -33
- llama_stack/providers/inline/agents/meta_reference/safety.py +8 -13
- llama_stack/providers/inline/batches/reference/__init__.py +2 -4
- llama_stack/providers/inline/batches/reference/batches.py +78 -60
- llama_stack/providers/inline/datasetio/localfs/datasetio.py +2 -5
- llama_stack/providers/inline/eval/meta_reference/eval.py +16 -61
- llama_stack/providers/inline/files/localfs/files.py +37 -28
- llama_stack/providers/inline/inference/meta_reference/config.py +2 -2
- llama_stack/providers/inline/inference/meta_reference/generators.py +50 -60
- llama_stack/providers/inline/inference/meta_reference/inference.py +403 -19
- llama_stack/providers/inline/inference/meta_reference/model_parallel.py +7 -26
- llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +2 -12
- llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +10 -15
- llama_stack/providers/inline/post_training/common/validator.py +1 -5
- llama_stack/providers/inline/post_training/huggingface/post_training.py +8 -8
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +18 -10
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +12 -9
- llama_stack/providers/inline/post_training/huggingface/utils.py +27 -6
- llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/common/utils.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/post_training.py +8 -8
- llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +16 -16
- llama_stack/providers/inline/safety/code_scanner/code_scanner.py +13 -9
- llama_stack/providers/inline/safety/llama_guard/llama_guard.py +18 -15
- llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +9 -9
- llama_stack/providers/inline/scoring/basic/scoring.py +6 -13
- llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/braintrust/braintrust.py +12 -15
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +7 -14
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +1 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +1 -3
- llama_stack/providers/inline/tool_runtime/rag/__init__.py +1 -1
- llama_stack/providers/inline/tool_runtime/rag/config.py +8 -1
- llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +7 -6
- llama_stack/providers/inline/tool_runtime/rag/memory.py +64 -48
- llama_stack/providers/inline/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/chroma/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/faiss.py +43 -28
- llama_stack/providers/inline/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/milvus/config.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +40 -33
- llama_stack/providers/registry/agents.py +7 -3
- llama_stack/providers/registry/batches.py +1 -1
- llama_stack/providers/registry/datasetio.py +1 -1
- llama_stack/providers/registry/eval.py +1 -1
- llama_stack/{apis/datasets/__init__.py → providers/registry/file_processors.py} +5 -1
- llama_stack/providers/registry/files.py +11 -2
- llama_stack/providers/registry/inference.py +22 -3
- llama_stack/providers/registry/post_training.py +1 -1
- llama_stack/providers/registry/safety.py +1 -1
- llama_stack/providers/registry/scoring.py +1 -1
- llama_stack/providers/registry/tool_runtime.py +2 -2
- llama_stack/providers/registry/vector_io.py +7 -7
- llama_stack/providers/remote/datasetio/huggingface/huggingface.py +2 -5
- llama_stack/providers/remote/datasetio/nvidia/datasetio.py +1 -4
- llama_stack/providers/remote/eval/nvidia/eval.py +15 -9
- llama_stack/providers/remote/files/openai/__init__.py +19 -0
- llama_stack/providers/remote/files/openai/config.py +28 -0
- llama_stack/providers/remote/files/openai/files.py +253 -0
- llama_stack/providers/remote/files/s3/files.py +52 -30
- llama_stack/providers/remote/inference/anthropic/anthropic.py +2 -1
- llama_stack/providers/remote/inference/anthropic/config.py +1 -1
- llama_stack/providers/remote/inference/azure/azure.py +1 -3
- llama_stack/providers/remote/inference/azure/config.py +8 -7
- llama_stack/providers/remote/inference/bedrock/__init__.py +1 -1
- llama_stack/providers/remote/inference/bedrock/bedrock.py +82 -105
- llama_stack/providers/remote/inference/bedrock/config.py +24 -3
- llama_stack/providers/remote/inference/cerebras/cerebras.py +5 -5
- llama_stack/providers/remote/inference/cerebras/config.py +12 -5
- llama_stack/providers/remote/inference/databricks/config.py +13 -6
- llama_stack/providers/remote/inference/databricks/databricks.py +16 -6
- llama_stack/providers/remote/inference/fireworks/config.py +5 -5
- llama_stack/providers/remote/inference/fireworks/fireworks.py +1 -1
- llama_stack/providers/remote/inference/gemini/config.py +1 -1
- llama_stack/providers/remote/inference/gemini/gemini.py +13 -14
- llama_stack/providers/remote/inference/groq/config.py +5 -5
- llama_stack/providers/remote/inference/groq/groq.py +1 -1
- llama_stack/providers/remote/inference/llama_openai_compat/config.py +5 -5
- llama_stack/providers/remote/inference/llama_openai_compat/llama.py +8 -6
- llama_stack/providers/remote/inference/nvidia/__init__.py +1 -1
- llama_stack/providers/remote/inference/nvidia/config.py +21 -11
- llama_stack/providers/remote/inference/nvidia/nvidia.py +115 -3
- llama_stack/providers/remote/inference/nvidia/utils.py +1 -1
- llama_stack/providers/remote/inference/oci/__init__.py +17 -0
- llama_stack/providers/remote/inference/oci/auth.py +79 -0
- llama_stack/providers/remote/inference/oci/config.py +75 -0
- llama_stack/providers/remote/inference/oci/oci.py +162 -0
- llama_stack/providers/remote/inference/ollama/config.py +7 -5
- llama_stack/providers/remote/inference/ollama/ollama.py +17 -8
- llama_stack/providers/remote/inference/openai/config.py +4 -4
- llama_stack/providers/remote/inference/openai/openai.py +1 -1
- llama_stack/providers/remote/inference/passthrough/__init__.py +2 -2
- llama_stack/providers/remote/inference/passthrough/config.py +5 -10
- llama_stack/providers/remote/inference/passthrough/passthrough.py +97 -75
- llama_stack/providers/remote/inference/runpod/config.py +12 -5
- llama_stack/providers/remote/inference/runpod/runpod.py +2 -20
- llama_stack/providers/remote/inference/sambanova/config.py +5 -5
- llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -1
- llama_stack/providers/remote/inference/tgi/config.py +7 -6
- llama_stack/providers/remote/inference/tgi/tgi.py +19 -11
- llama_stack/providers/remote/inference/together/config.py +5 -5
- llama_stack/providers/remote/inference/together/together.py +15 -12
- llama_stack/providers/remote/inference/vertexai/config.py +1 -1
- llama_stack/providers/remote/inference/vllm/config.py +5 -5
- llama_stack/providers/remote/inference/vllm/vllm.py +13 -14
- llama_stack/providers/remote/inference/watsonx/config.py +4 -4
- llama_stack/providers/remote/inference/watsonx/watsonx.py +21 -94
- llama_stack/providers/remote/post_training/nvidia/post_training.py +4 -4
- llama_stack/providers/remote/post_training/nvidia/utils.py +1 -1
- llama_stack/providers/remote/safety/bedrock/bedrock.py +6 -6
- llama_stack/providers/remote/safety/bedrock/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/nvidia.py +11 -5
- llama_stack/providers/remote/safety/sambanova/config.py +1 -1
- llama_stack/providers/remote/safety/sambanova/sambanova.py +6 -6
- llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +12 -7
- llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +8 -2
- llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +57 -15
- llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +11 -6
- llama_stack/providers/remote/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/chroma/chroma.py +125 -20
- llama_stack/providers/remote/vector_io/chroma/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/milvus.py +27 -21
- llama_stack/providers/remote/vector_io/pgvector/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/config.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/pgvector.py +26 -18
- llama_stack/providers/remote/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/qdrant.py +141 -24
- llama_stack/providers/remote/vector_io/weaviate/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/config.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/weaviate.py +26 -21
- llama_stack/providers/utils/common/data_schema_validator.py +1 -5
- llama_stack/providers/utils/files/form_data.py +1 -1
- llama_stack/providers/utils/inference/embedding_mixin.py +1 -1
- llama_stack/providers/utils/inference/inference_store.py +7 -8
- llama_stack/providers/utils/inference/litellm_openai_mixin.py +79 -79
- llama_stack/providers/utils/inference/model_registry.py +1 -3
- llama_stack/providers/utils/inference/openai_compat.py +44 -1171
- llama_stack/providers/utils/inference/openai_mixin.py +68 -42
- llama_stack/providers/utils/inference/prompt_adapter.py +50 -265
- llama_stack/providers/utils/inference/stream_utils.py +23 -0
- llama_stack/providers/utils/memory/__init__.py +2 -0
- llama_stack/providers/utils/memory/file_utils.py +1 -1
- llama_stack/providers/utils/memory/openai_vector_store_mixin.py +181 -84
- llama_stack/providers/utils/memory/vector_store.py +39 -38
- llama_stack/providers/utils/pagination.py +1 -1
- llama_stack/providers/utils/responses/responses_store.py +15 -25
- llama_stack/providers/utils/scoring/aggregation_utils.py +1 -2
- llama_stack/providers/utils/scoring/base_scoring_fn.py +1 -2
- llama_stack/providers/utils/tools/mcp.py +93 -11
- llama_stack/telemetry/constants.py +27 -0
- llama_stack/telemetry/helpers.py +43 -0
- llama_stack/testing/api_recorder.py +25 -16
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/METADATA +56 -54
- llama_stack-0.4.0.dist-info/RECORD +588 -0
- llama_stack-0.4.0.dist-info/top_level.txt +2 -0
- llama_stack_api/__init__.py +945 -0
- llama_stack_api/admin/__init__.py +45 -0
- llama_stack_api/admin/api.py +72 -0
- llama_stack_api/admin/fastapi_routes.py +117 -0
- llama_stack_api/admin/models.py +113 -0
- llama_stack_api/agents.py +173 -0
- llama_stack_api/batches/__init__.py +40 -0
- llama_stack_api/batches/api.py +53 -0
- llama_stack_api/batches/fastapi_routes.py +113 -0
- llama_stack_api/batches/models.py +78 -0
- llama_stack_api/benchmarks/__init__.py +43 -0
- llama_stack_api/benchmarks/api.py +39 -0
- llama_stack_api/benchmarks/fastapi_routes.py +109 -0
- llama_stack_api/benchmarks/models.py +109 -0
- {llama_stack/apis → llama_stack_api}/common/content_types.py +1 -43
- {llama_stack/apis → llama_stack_api}/common/errors.py +0 -8
- {llama_stack/apis → llama_stack_api}/common/job_types.py +1 -1
- llama_stack_api/common/responses.py +77 -0
- {llama_stack/apis → llama_stack_api}/common/training_types.py +1 -1
- {llama_stack/apis → llama_stack_api}/common/type_system.py +2 -14
- llama_stack_api/connectors.py +146 -0
- {llama_stack/apis/conversations → llama_stack_api}/conversations.py +23 -39
- {llama_stack/apis/datasetio → llama_stack_api}/datasetio.py +4 -8
- llama_stack_api/datasets/__init__.py +61 -0
- llama_stack_api/datasets/api.py +35 -0
- llama_stack_api/datasets/fastapi_routes.py +104 -0
- llama_stack_api/datasets/models.py +152 -0
- {llama_stack/providers → llama_stack_api}/datatypes.py +166 -10
- {llama_stack/apis/eval → llama_stack_api}/eval.py +8 -40
- llama_stack_api/file_processors/__init__.py +27 -0
- llama_stack_api/file_processors/api.py +64 -0
- llama_stack_api/file_processors/fastapi_routes.py +78 -0
- llama_stack_api/file_processors/models.py +42 -0
- llama_stack_api/files/__init__.py +35 -0
- llama_stack_api/files/api.py +51 -0
- llama_stack_api/files/fastapi_routes.py +124 -0
- llama_stack_api/files/models.py +107 -0
- {llama_stack/apis/inference → llama_stack_api}/inference.py +90 -194
- llama_stack_api/inspect_api/__init__.py +37 -0
- llama_stack_api/inspect_api/api.py +25 -0
- llama_stack_api/inspect_api/fastapi_routes.py +76 -0
- llama_stack_api/inspect_api/models.py +28 -0
- {llama_stack/apis/agents → llama_stack_api/internal}/__init__.py +3 -1
- llama_stack/providers/utils/kvstore/api.py → llama_stack_api/internal/kvstore.py +5 -0
- llama_stack_api/internal/sqlstore.py +79 -0
- {llama_stack/apis/models → llama_stack_api}/models.py +11 -9
- {llama_stack/apis/agents → llama_stack_api}/openai_responses.py +184 -27
- {llama_stack/apis/post_training → llama_stack_api}/post_training.py +7 -11
- {llama_stack/apis/prompts → llama_stack_api}/prompts.py +3 -4
- llama_stack_api/providers/__init__.py +33 -0
- llama_stack_api/providers/api.py +16 -0
- llama_stack_api/providers/fastapi_routes.py +57 -0
- llama_stack_api/providers/models.py +24 -0
- {llama_stack/apis/tools → llama_stack_api}/rag_tool.py +2 -52
- {llama_stack/apis → llama_stack_api}/resource.py +1 -1
- llama_stack_api/router_utils.py +160 -0
- {llama_stack/apis/safety → llama_stack_api}/safety.py +6 -9
- {llama_stack → llama_stack_api}/schema_utils.py +94 -4
- {llama_stack/apis/scoring → llama_stack_api}/scoring.py +3 -3
- {llama_stack/apis/scoring_functions → llama_stack_api}/scoring_functions.py +9 -6
- {llama_stack/apis/shields → llama_stack_api}/shields.py +6 -7
- {llama_stack/apis/tools → llama_stack_api}/tools.py +26 -21
- {llama_stack/apis/vector_io → llama_stack_api}/vector_io.py +133 -152
- {llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py +1 -1
- llama_stack/apis/agents/agents.py +0 -894
- llama_stack/apis/batches/__init__.py +0 -9
- llama_stack/apis/batches/batches.py +0 -100
- llama_stack/apis/benchmarks/__init__.py +0 -7
- llama_stack/apis/benchmarks/benchmarks.py +0 -108
- llama_stack/apis/common/responses.py +0 -36
- llama_stack/apis/conversations/__init__.py +0 -31
- llama_stack/apis/datasets/datasets.py +0 -251
- llama_stack/apis/datatypes.py +0 -160
- llama_stack/apis/eval/__init__.py +0 -7
- llama_stack/apis/files/__init__.py +0 -7
- llama_stack/apis/files/files.py +0 -199
- llama_stack/apis/inference/__init__.py +0 -7
- llama_stack/apis/inference/event_logger.py +0 -43
- llama_stack/apis/inspect/__init__.py +0 -7
- llama_stack/apis/inspect/inspect.py +0 -94
- llama_stack/apis/models/__init__.py +0 -7
- llama_stack/apis/post_training/__init__.py +0 -7
- llama_stack/apis/prompts/__init__.py +0 -9
- llama_stack/apis/providers/__init__.py +0 -7
- llama_stack/apis/providers/providers.py +0 -69
- llama_stack/apis/safety/__init__.py +0 -7
- llama_stack/apis/scoring/__init__.py +0 -7
- llama_stack/apis/scoring_functions/__init__.py +0 -7
- llama_stack/apis/shields/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +0 -77
- llama_stack/apis/telemetry/__init__.py +0 -7
- llama_stack/apis/telemetry/telemetry.py +0 -423
- llama_stack/apis/tools/__init__.py +0 -8
- llama_stack/apis/vector_io/__init__.py +0 -7
- llama_stack/apis/vector_stores/__init__.py +0 -7
- llama_stack/core/server/tracing.py +0 -80
- llama_stack/core/ui/app.py +0 -55
- llama_stack/core/ui/modules/__init__.py +0 -5
- llama_stack/core/ui/modules/api.py +0 -32
- llama_stack/core/ui/modules/utils.py +0 -42
- llama_stack/core/ui/page/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/datasets.py +0 -18
- llama_stack/core/ui/page/distribution/eval_tasks.py +0 -20
- llama_stack/core/ui/page/distribution/models.py +0 -18
- llama_stack/core/ui/page/distribution/providers.py +0 -27
- llama_stack/core/ui/page/distribution/resources.py +0 -48
- llama_stack/core/ui/page/distribution/scoring_functions.py +0 -18
- llama_stack/core/ui/page/distribution/shields.py +0 -19
- llama_stack/core/ui/page/evaluations/__init__.py +0 -5
- llama_stack/core/ui/page/evaluations/app_eval.py +0 -143
- llama_stack/core/ui/page/evaluations/native_eval.py +0 -253
- llama_stack/core/ui/page/playground/__init__.py +0 -5
- llama_stack/core/ui/page/playground/chat.py +0 -130
- llama_stack/core/ui/page/playground/tools.py +0 -352
- llama_stack/distributions/dell/build.yaml +0 -33
- llama_stack/distributions/meta-reference-gpu/build.yaml +0 -32
- llama_stack/distributions/nvidia/build.yaml +0 -29
- llama_stack/distributions/open-benchmark/build.yaml +0 -36
- llama_stack/distributions/postgres-demo/__init__.py +0 -7
- llama_stack/distributions/postgres-demo/build.yaml +0 -23
- llama_stack/distributions/postgres-demo/postgres_demo.py +0 -125
- llama_stack/distributions/starter/build.yaml +0 -61
- llama_stack/distributions/starter-gpu/build.yaml +0 -61
- llama_stack/distributions/watsonx/build.yaml +0 -33
- llama_stack/providers/inline/agents/meta_reference/agent_instance.py +0 -1024
- llama_stack/providers/inline/agents/meta_reference/persistence.py +0 -228
- llama_stack/providers/inline/telemetry/__init__.py +0 -5
- llama_stack/providers/inline/telemetry/meta_reference/__init__.py +0 -21
- llama_stack/providers/inline/telemetry/meta_reference/config.py +0 -47
- llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +0 -252
- llama_stack/providers/remote/inference/bedrock/models.py +0 -29
- llama_stack/providers/utils/kvstore/sqlite/config.py +0 -20
- llama_stack/providers/utils/sqlstore/__init__.py +0 -5
- llama_stack/providers/utils/sqlstore/api.py +0 -128
- llama_stack/providers/utils/telemetry/__init__.py +0 -5
- llama_stack/providers/utils/telemetry/trace_protocol.py +0 -142
- llama_stack/providers/utils/telemetry/tracing.py +0 -384
- llama_stack/strong_typing/__init__.py +0 -19
- llama_stack/strong_typing/auxiliary.py +0 -228
- llama_stack/strong_typing/classdef.py +0 -440
- llama_stack/strong_typing/core.py +0 -46
- llama_stack/strong_typing/deserializer.py +0 -877
- llama_stack/strong_typing/docstring.py +0 -409
- llama_stack/strong_typing/exception.py +0 -23
- llama_stack/strong_typing/inspection.py +0 -1085
- llama_stack/strong_typing/mapping.py +0 -40
- llama_stack/strong_typing/name.py +0 -182
- llama_stack/strong_typing/schema.py +0 -792
- llama_stack/strong_typing/serialization.py +0 -97
- llama_stack/strong_typing/serializer.py +0 -500
- llama_stack/strong_typing/slots.py +0 -27
- llama_stack/strong_typing/topological.py +0 -89
- llama_stack/ui/node_modules/flatted/python/flatted.py +0 -149
- llama_stack-0.3.5.dist-info/RECORD +0 -625
- llama_stack-0.3.5.dist-info/top_level.txt +0 -1
- /llama_stack/{providers/utils → core/storage}/kvstore/config.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/mongodb/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/postgres/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/redis/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/sqlite/__init__.py +0 -0
- /llama_stack/{apis → providers/inline/file_processor}/__init__.py +0 -0
- /llama_stack/{apis/common → telemetry}/__init__.py +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/WHEEL +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/entry_points.txt +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/licenses/LICENSE +0 -0
- {llama_stack/core/ui → llama_stack_api/common}/__init__.py +0 -0
- {llama_stack/strong_typing → llama_stack_api}/py.typed +0 -0
- {llama_stack/apis → llama_stack_api}/version.py +0 -0
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
"""FastAPI router for the Inspect API.
|
|
8
|
+
|
|
9
|
+
This module defines the FastAPI router for the Inspect API using standard
|
|
10
|
+
FastAPI route decorators.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from typing import Annotated
|
|
14
|
+
|
|
15
|
+
from fastapi import APIRouter, Query
|
|
16
|
+
|
|
17
|
+
from llama_stack_api.router_utils import PUBLIC_ROUTE_KEY, standard_responses
|
|
18
|
+
from llama_stack_api.version import LLAMA_STACK_API_V1
|
|
19
|
+
|
|
20
|
+
from .api import Inspect
|
|
21
|
+
from .models import (
|
|
22
|
+
ApiFilter,
|
|
23
|
+
HealthInfo,
|
|
24
|
+
ListRoutesResponse,
|
|
25
|
+
VersionInfo,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def create_router(impl: Inspect) -> APIRouter:
|
|
30
|
+
"""Create a FastAPI router for the Inspect API."""
|
|
31
|
+
router = APIRouter(
|
|
32
|
+
prefix=f"/{LLAMA_STACK_API_V1}",
|
|
33
|
+
tags=["Inspect"],
|
|
34
|
+
responses=standard_responses,
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
@router.get(
|
|
38
|
+
"/inspect/routes",
|
|
39
|
+
response_model=ListRoutesResponse,
|
|
40
|
+
summary="List routes.",
|
|
41
|
+
description="List all available API routes with their methods and implementing providers.",
|
|
42
|
+
responses={200: {"description": "Response containing information about all available routes."}},
|
|
43
|
+
)
|
|
44
|
+
async def list_routes(
|
|
45
|
+
api_filter: Annotated[
|
|
46
|
+
ApiFilter | None,
|
|
47
|
+
Query(
|
|
48
|
+
description="Optional filter to control which routes are returned. Can be an API level ('v1', 'v1alpha', 'v1beta') to show non-deprecated routes at that level, or 'deprecated' to show deprecated routes across all levels. If not specified, returns all non-deprecated routes."
|
|
49
|
+
),
|
|
50
|
+
] = None,
|
|
51
|
+
) -> ListRoutesResponse:
|
|
52
|
+
return await impl.list_routes(api_filter)
|
|
53
|
+
|
|
54
|
+
@router.get(
|
|
55
|
+
"/health",
|
|
56
|
+
response_model=HealthInfo,
|
|
57
|
+
summary="Get health status.",
|
|
58
|
+
description="Get the current health status of the service.",
|
|
59
|
+
responses={200: {"description": "Health information indicating if the service is operational."}},
|
|
60
|
+
openapi_extra={PUBLIC_ROUTE_KEY: True},
|
|
61
|
+
)
|
|
62
|
+
async def health() -> HealthInfo:
|
|
63
|
+
return await impl.health()
|
|
64
|
+
|
|
65
|
+
@router.get(
|
|
66
|
+
"/version",
|
|
67
|
+
response_model=VersionInfo,
|
|
68
|
+
summary="Get version.",
|
|
69
|
+
description="Get the version of the service.",
|
|
70
|
+
responses={200: {"description": "Version information containing the service version number."}},
|
|
71
|
+
openapi_extra={PUBLIC_ROUTE_KEY: True},
|
|
72
|
+
)
|
|
73
|
+
async def version() -> VersionInfo:
|
|
74
|
+
return await impl.version()
|
|
75
|
+
|
|
76
|
+
return router
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
"""Pydantic models for Inspect API requests and responses.
|
|
8
|
+
|
|
9
|
+
This module re-exports models from llama_stack_api.admin.models to ensure
|
|
10
|
+
a single source of truth and avoid type conflicts.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
# Import and re-export shared models from admin
|
|
14
|
+
from llama_stack_api.admin.models import (
|
|
15
|
+
ApiFilter,
|
|
16
|
+
HealthInfo,
|
|
17
|
+
ListRoutesResponse,
|
|
18
|
+
RouteInfo,
|
|
19
|
+
VersionInfo,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
__all__ = [
|
|
23
|
+
"ApiFilter",
|
|
24
|
+
"RouteInfo",
|
|
25
|
+
"HealthInfo",
|
|
26
|
+
"VersionInfo",
|
|
27
|
+
"ListRoutesResponse",
|
|
28
|
+
]
|
|
@@ -9,6 +9,8 @@ from typing import Protocol
|
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
class KVStore(Protocol):
|
|
12
|
+
"""Protocol for simple key/value storage backends."""
|
|
13
|
+
|
|
12
14
|
# TODO: make the value type bytes instead of str
|
|
13
15
|
async def set(self, key: str, value: str, expiration: datetime | None = None) -> None: ...
|
|
14
16
|
|
|
@@ -19,3 +21,6 @@ class KVStore(Protocol):
|
|
|
19
21
|
async def values_in_range(self, start_key: str, end_key: str) -> list[str]: ...
|
|
20
22
|
|
|
21
23
|
async def keys_in_range(self, start_key: str, end_key: str) -> list[str]: ...
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
__all__ = ["KVStore"]
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
from collections.abc import Mapping, Sequence
|
|
8
|
+
from enum import Enum
|
|
9
|
+
from typing import Any, Literal, Protocol
|
|
10
|
+
|
|
11
|
+
from pydantic import BaseModel
|
|
12
|
+
|
|
13
|
+
from llama_stack_api import PaginatedResponse
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class ColumnType(Enum):
|
|
17
|
+
INTEGER = "INTEGER"
|
|
18
|
+
STRING = "STRING"
|
|
19
|
+
TEXT = "TEXT"
|
|
20
|
+
FLOAT = "FLOAT"
|
|
21
|
+
BOOLEAN = "BOOLEAN"
|
|
22
|
+
JSON = "JSON"
|
|
23
|
+
DATETIME = "DATETIME"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class ColumnDefinition(BaseModel):
|
|
27
|
+
type: ColumnType
|
|
28
|
+
primary_key: bool = False
|
|
29
|
+
nullable: bool = True
|
|
30
|
+
default: Any = None
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class SqlStore(Protocol):
|
|
34
|
+
"""Protocol for common SQL-store functionality."""
|
|
35
|
+
|
|
36
|
+
async def create_table(self, table: str, schema: Mapping[str, ColumnType | ColumnDefinition]) -> None: ...
|
|
37
|
+
|
|
38
|
+
async def insert(self, table: str, data: Mapping[str, Any] | Sequence[Mapping[str, Any]]) -> None: ...
|
|
39
|
+
|
|
40
|
+
async def upsert(
|
|
41
|
+
self,
|
|
42
|
+
table: str,
|
|
43
|
+
data: Mapping[str, Any],
|
|
44
|
+
conflict_columns: list[str],
|
|
45
|
+
update_columns: list[str] | None = None,
|
|
46
|
+
) -> None: ...
|
|
47
|
+
|
|
48
|
+
async def fetch_all(
|
|
49
|
+
self,
|
|
50
|
+
table: str,
|
|
51
|
+
where: Mapping[str, Any] | None = None,
|
|
52
|
+
where_sql: str | None = None,
|
|
53
|
+
limit: int | None = None,
|
|
54
|
+
order_by: list[tuple[str, Literal["asc", "desc"]]] | None = None,
|
|
55
|
+
cursor: tuple[str, str] | None = None,
|
|
56
|
+
) -> PaginatedResponse: ...
|
|
57
|
+
|
|
58
|
+
async def fetch_one(
|
|
59
|
+
self,
|
|
60
|
+
table: str,
|
|
61
|
+
where: Mapping[str, Any] | None = None,
|
|
62
|
+
where_sql: str | None = None,
|
|
63
|
+
order_by: list[tuple[str, Literal["asc", "desc"]]] | None = None,
|
|
64
|
+
) -> dict[str, Any] | None: ...
|
|
65
|
+
|
|
66
|
+
async def update(self, table: str, data: Mapping[str, Any], where: Mapping[str, Any]) -> None: ...
|
|
67
|
+
|
|
68
|
+
async def delete(self, table: str, where: Mapping[str, Any]) -> None: ...
|
|
69
|
+
|
|
70
|
+
async def add_column_if_not_exists(
|
|
71
|
+
self,
|
|
72
|
+
table: str,
|
|
73
|
+
column_name: str,
|
|
74
|
+
column_type: ColumnType,
|
|
75
|
+
nullable: bool = True,
|
|
76
|
+
) -> None: ...
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
__all__ = ["ColumnDefinition", "ColumnType", "SqlStore"]
|
|
@@ -9,10 +9,9 @@ from typing import Any, Literal, Protocol, runtime_checkable
|
|
|
9
9
|
|
|
10
10
|
from pydantic import BaseModel, ConfigDict, Field, field_validator
|
|
11
11
|
|
|
12
|
-
from
|
|
13
|
-
from
|
|
14
|
-
from
|
|
15
|
-
from llama_stack.schema_utils import json_schema_type, webmethod
|
|
12
|
+
from llama_stack_api.resource import Resource, ResourceType
|
|
13
|
+
from llama_stack_api.schema_utils import json_schema_type, webmethod
|
|
14
|
+
from llama_stack_api.version import LLAMA_STACK_API_V1
|
|
16
15
|
|
|
17
16
|
|
|
18
17
|
class CommonModelFields(BaseModel):
|
|
@@ -27,10 +26,12 @@ class ModelType(StrEnum):
|
|
|
27
26
|
"""Enumeration of supported model types in Llama Stack.
|
|
28
27
|
:cvar llm: Large language model for text generation and completion
|
|
29
28
|
:cvar embedding: Embedding model for converting text to vector representations
|
|
29
|
+
:cvar rerank: Reranking model for reordering documents based on their relevance to a query
|
|
30
30
|
"""
|
|
31
31
|
|
|
32
32
|
llm = "llm"
|
|
33
33
|
embedding = "embedding"
|
|
34
|
+
rerank = "rerank"
|
|
34
35
|
|
|
35
36
|
|
|
36
37
|
@json_schema_type
|
|
@@ -88,22 +89,23 @@ class OpenAIModel(BaseModel):
|
|
|
88
89
|
:object: The object type, which will be "model"
|
|
89
90
|
:created: The Unix timestamp in seconds when the model was created
|
|
90
91
|
:owned_by: The owner of the model
|
|
92
|
+
:custom_metadata: Llama Stack-specific metadata including model_type, provider info, and additional metadata
|
|
91
93
|
"""
|
|
92
94
|
|
|
93
95
|
id: str
|
|
94
96
|
object: Literal["model"] = "model"
|
|
95
97
|
created: int
|
|
96
98
|
owned_by: str
|
|
99
|
+
custom_metadata: dict[str, Any] | None = None
|
|
97
100
|
|
|
98
101
|
|
|
102
|
+
@json_schema_type
|
|
99
103
|
class OpenAIListModelsResponse(BaseModel):
|
|
100
104
|
data: list[OpenAIModel]
|
|
101
105
|
|
|
102
106
|
|
|
103
107
|
@runtime_checkable
|
|
104
|
-
@trace_protocol
|
|
105
108
|
class Models(Protocol):
|
|
106
|
-
@webmethod(route="/models", method="GET", level=LLAMA_STACK_API_V1)
|
|
107
109
|
async def list_models(self) -> ListModelsResponse:
|
|
108
110
|
"""List all models.
|
|
109
111
|
|
|
@@ -111,7 +113,7 @@ class Models(Protocol):
|
|
|
111
113
|
"""
|
|
112
114
|
...
|
|
113
115
|
|
|
114
|
-
@webmethod(route="/
|
|
116
|
+
@webmethod(route="/models", method="GET", level=LLAMA_STACK_API_V1)
|
|
115
117
|
async def openai_list_models(self) -> OpenAIListModelsResponse:
|
|
116
118
|
"""List models using the OpenAI API.
|
|
117
119
|
|
|
@@ -133,7 +135,7 @@ class Models(Protocol):
|
|
|
133
135
|
"""
|
|
134
136
|
...
|
|
135
137
|
|
|
136
|
-
@webmethod(route="/models", method="POST", level=LLAMA_STACK_API_V1)
|
|
138
|
+
@webmethod(route="/models", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
|
|
137
139
|
async def register_model(
|
|
138
140
|
self,
|
|
139
141
|
model_id: str,
|
|
@@ -155,7 +157,7 @@ class Models(Protocol):
|
|
|
155
157
|
"""
|
|
156
158
|
...
|
|
157
159
|
|
|
158
|
-
@webmethod(route="/models/{model_id:path}", method="DELETE", level=LLAMA_STACK_API_V1)
|
|
160
|
+
@webmethod(route="/models/{model_id:path}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True)
|
|
159
161
|
async def unregister_model(
|
|
160
162
|
self,
|
|
161
163
|
model_id: str,
|
|
@@ -4,13 +4,16 @@
|
|
|
4
4
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
5
|
# the root directory of this source tree.
|
|
6
6
|
|
|
7
|
+
from collections.abc import Sequence
|
|
8
|
+
from enum import Enum
|
|
7
9
|
from typing import Annotated, Any, Literal
|
|
8
10
|
|
|
9
|
-
from pydantic import BaseModel, Field
|
|
11
|
+
from pydantic import BaseModel, Field, model_validator
|
|
10
12
|
from typing_extensions import TypedDict
|
|
11
13
|
|
|
12
|
-
from
|
|
13
|
-
from
|
|
14
|
+
from llama_stack_api.inference import OpenAITokenLogProb
|
|
15
|
+
from llama_stack_api.schema_utils import json_schema_type, register_schema
|
|
16
|
+
from llama_stack_api.vector_io import SearchRankingOptions as FileSearchRankingOptions
|
|
14
17
|
|
|
15
18
|
# NOTE(ashwin): this file is literally a copy of the OpenAI responses API schema. We should probably
|
|
16
19
|
# take their YAML and generate this file automatically. Their YAML is available.
|
|
@@ -46,23 +49,66 @@ class OpenAIResponseInputMessageContentImage(BaseModel):
|
|
|
46
49
|
|
|
47
50
|
:param detail: Level of detail for image processing, can be "low", "high", or "auto"
|
|
48
51
|
:param type: Content type identifier, always "input_image"
|
|
52
|
+
:param file_id: (Optional) The ID of the file to be sent to the model.
|
|
49
53
|
:param image_url: (Optional) URL of the image content
|
|
50
54
|
"""
|
|
51
55
|
|
|
52
56
|
detail: Literal["low"] | Literal["high"] | Literal["auto"] = "auto"
|
|
53
57
|
type: Literal["input_image"] = "input_image"
|
|
54
|
-
|
|
58
|
+
file_id: str | None = None
|
|
55
59
|
image_url: str | None = None
|
|
56
60
|
|
|
57
61
|
|
|
58
|
-
|
|
62
|
+
@json_schema_type
|
|
63
|
+
class OpenAIResponseInputMessageContentFile(BaseModel):
|
|
64
|
+
"""File content for input messages in OpenAI response format.
|
|
65
|
+
|
|
66
|
+
:param type: The type of the input item. Always `input_file`.
|
|
67
|
+
:param file_data: The data of the file to be sent to the model.
|
|
68
|
+
:param file_id: (Optional) The ID of the file to be sent to the model.
|
|
69
|
+
:param file_url: The URL of the file to be sent to the model.
|
|
70
|
+
:param filename: The name of the file to be sent to the model.
|
|
71
|
+
"""
|
|
72
|
+
|
|
73
|
+
type: Literal["input_file"] = "input_file"
|
|
74
|
+
file_data: str | None = None
|
|
75
|
+
file_id: str | None = None
|
|
76
|
+
file_url: str | None = None
|
|
77
|
+
filename: str | None = None
|
|
78
|
+
|
|
79
|
+
@model_validator(mode="after")
|
|
80
|
+
def validate_file_source(self) -> "OpenAIResponseInputMessageContentFile":
|
|
81
|
+
if not any([self.file_data, self.file_id, self.file_url, self.filename]):
|
|
82
|
+
raise ValueError(
|
|
83
|
+
"At least one of 'file_data', 'file_id', 'file_url', or 'filename' must be provided for file content"
|
|
84
|
+
)
|
|
85
|
+
return self
|
|
86
|
+
|
|
87
|
+
|
|
59
88
|
OpenAIResponseInputMessageContent = Annotated[
|
|
60
|
-
OpenAIResponseInputMessageContentText
|
|
89
|
+
OpenAIResponseInputMessageContentText
|
|
90
|
+
| OpenAIResponseInputMessageContentImage
|
|
91
|
+
| OpenAIResponseInputMessageContentFile,
|
|
61
92
|
Field(discriminator="type"),
|
|
62
93
|
]
|
|
63
94
|
register_schema(OpenAIResponseInputMessageContent, name="OpenAIResponseInputMessageContent")
|
|
64
95
|
|
|
65
96
|
|
|
97
|
+
@json_schema_type
|
|
98
|
+
class OpenAIResponsePrompt(BaseModel):
|
|
99
|
+
"""OpenAI compatible Prompt object that is used in OpenAI responses.
|
|
100
|
+
|
|
101
|
+
:param id: Unique identifier of the prompt template
|
|
102
|
+
:param variables: Dictionary of variable names to OpenAIResponseInputMessageContent structure for template substitution. The substitution values can either be strings, or other Response input types
|
|
103
|
+
like images or files.
|
|
104
|
+
:param version: Version number of the prompt to use (defaults to latest if not specified)
|
|
105
|
+
"""
|
|
106
|
+
|
|
107
|
+
id: str
|
|
108
|
+
variables: dict[str, OpenAIResponseInputMessageContent] | None = None
|
|
109
|
+
version: str | None = None
|
|
110
|
+
|
|
111
|
+
|
|
66
112
|
@json_schema_type
|
|
67
113
|
class OpenAIResponseAnnotationFileCitation(BaseModel):
|
|
68
114
|
"""File citation annotation for referencing specific files in response content.
|
|
@@ -129,6 +175,7 @@ class OpenAIResponseOutputMessageContentOutputText(BaseModel):
|
|
|
129
175
|
text: str
|
|
130
176
|
type: Literal["output_text"] = "output_text"
|
|
131
177
|
annotations: list[OpenAIResponseAnnotations] = Field(default_factory=list)
|
|
178
|
+
logprobs: list[OpenAITokenLogProb] | None = None
|
|
132
179
|
|
|
133
180
|
|
|
134
181
|
@json_schema_type
|
|
@@ -159,7 +206,7 @@ class OpenAIResponseMessage(BaseModel):
|
|
|
159
206
|
scenarios.
|
|
160
207
|
"""
|
|
161
208
|
|
|
162
|
-
content: str |
|
|
209
|
+
content: str | Sequence[OpenAIResponseInputMessageContent] | Sequence[OpenAIResponseOutputMessageContent]
|
|
163
210
|
role: Literal["system"] | Literal["developer"] | Literal["user"] | Literal["assistant"]
|
|
164
211
|
type: Literal["message"] = "message"
|
|
165
212
|
|
|
@@ -211,10 +258,10 @@ class OpenAIResponseOutputMessageFileSearchToolCall(BaseModel):
|
|
|
211
258
|
"""
|
|
212
259
|
|
|
213
260
|
id: str
|
|
214
|
-
queries:
|
|
261
|
+
queries: Sequence[str]
|
|
215
262
|
status: str
|
|
216
263
|
type: Literal["file_search_call"] = "file_search_call"
|
|
217
|
-
results:
|
|
264
|
+
results: Sequence[OpenAIResponseOutputMessageFileSearchToolCallResults] | None = None
|
|
218
265
|
|
|
219
266
|
|
|
220
267
|
@json_schema_type
|
|
@@ -359,7 +406,7 @@ class OpenAIResponseText(BaseModel):
|
|
|
359
406
|
|
|
360
407
|
|
|
361
408
|
# Must match type Literals of OpenAIResponseInputToolWebSearch below
|
|
362
|
-
WebSearchToolTypes = ["web_search", "web_search_preview", "web_search_preview_2025_03_11"]
|
|
409
|
+
WebSearchToolTypes = ["web_search", "web_search_preview", "web_search_preview_2025_03_11", "web_search_2025_08_26"]
|
|
363
410
|
|
|
364
411
|
|
|
365
412
|
@json_schema_type
|
|
@@ -371,9 +418,12 @@ class OpenAIResponseInputToolWebSearch(BaseModel):
|
|
|
371
418
|
"""
|
|
372
419
|
|
|
373
420
|
# Must match values of WebSearchToolTypes above
|
|
374
|
-
type:
|
|
375
|
-
"web_search"
|
|
376
|
-
|
|
421
|
+
type: (
|
|
422
|
+
Literal["web_search"]
|
|
423
|
+
| Literal["web_search_preview"]
|
|
424
|
+
| Literal["web_search_preview_2025_03_11"]
|
|
425
|
+
| Literal["web_search_2025_08_26"]
|
|
426
|
+
) = "web_search"
|
|
377
427
|
# TODO: actually use search_context_size somewhere...
|
|
378
428
|
search_context_size: str | None = Field(default="medium", pattern="^low|medium|high$")
|
|
379
429
|
# TODO: add user_location
|
|
@@ -443,6 +493,7 @@ class OpenAIResponseInputToolMCP(BaseModel):
|
|
|
443
493
|
:param server_label: Label to identify this MCP server
|
|
444
494
|
:param server_url: URL endpoint of the MCP server
|
|
445
495
|
:param headers: (Optional) HTTP headers to include when connecting to the server
|
|
496
|
+
:param authorization: (Optional) OAuth access token for authenticating with the MCP server
|
|
446
497
|
:param require_approval: Approval requirement for tool calls ("always", "never", or filter)
|
|
447
498
|
:param allowed_tools: (Optional) Restriction on which tools can be used from this server
|
|
448
499
|
"""
|
|
@@ -451,6 +502,7 @@ class OpenAIResponseInputToolMCP(BaseModel):
|
|
|
451
502
|
server_label: str
|
|
452
503
|
server_url: str
|
|
453
504
|
headers: dict[str, Any] | None = None
|
|
505
|
+
authorization: str | None = Field(default=None, exclude=True)
|
|
454
506
|
|
|
455
507
|
require_approval: Literal["always"] | Literal["never"] | ApprovalFilter = "never"
|
|
456
508
|
allowed_tools: list[str] | AllowedToolsFilter | None = None
|
|
@@ -490,6 +542,105 @@ OpenAIResponseTool = Annotated[
|
|
|
490
542
|
register_schema(OpenAIResponseTool, name="OpenAIResponseTool")
|
|
491
543
|
|
|
492
544
|
|
|
545
|
+
@json_schema_type
|
|
546
|
+
class OpenAIResponseInputToolChoiceAllowedTools(BaseModel):
|
|
547
|
+
"""Constrains the tools available to the model to a pre-defined set.
|
|
548
|
+
|
|
549
|
+
:param mode: Constrains the tools available to the model to a pre-defined set
|
|
550
|
+
:param tools: A list of tool definitions that the model should be allowed to call
|
|
551
|
+
:param type: Tool choice type identifier, always "allowed_tools"
|
|
552
|
+
"""
|
|
553
|
+
|
|
554
|
+
mode: Literal["auto", "required"] = "auto"
|
|
555
|
+
tools: list[dict[str, str]]
|
|
556
|
+
type: Literal["allowed_tools"] = "allowed_tools"
|
|
557
|
+
|
|
558
|
+
|
|
559
|
+
@json_schema_type
|
|
560
|
+
class OpenAIResponseInputToolChoiceFileSearch(BaseModel):
|
|
561
|
+
"""Indicates that the model should use file search to generate a response.
|
|
562
|
+
|
|
563
|
+
:param type: Tool choice type identifier, always "file_search"
|
|
564
|
+
"""
|
|
565
|
+
|
|
566
|
+
type: Literal["file_search"] = "file_search"
|
|
567
|
+
|
|
568
|
+
|
|
569
|
+
@json_schema_type
|
|
570
|
+
class OpenAIResponseInputToolChoiceWebSearch(BaseModel):
|
|
571
|
+
"""Indicates that the model should use web search to generate a response
|
|
572
|
+
|
|
573
|
+
:param type: Web search tool type variant to use
|
|
574
|
+
"""
|
|
575
|
+
|
|
576
|
+
type: (
|
|
577
|
+
Literal["web_search"]
|
|
578
|
+
| Literal["web_search_preview"]
|
|
579
|
+
| Literal["web_search_preview_2025_03_11"]
|
|
580
|
+
| Literal["web_search_2025_08_26"]
|
|
581
|
+
) = "web_search"
|
|
582
|
+
|
|
583
|
+
|
|
584
|
+
@json_schema_type
|
|
585
|
+
class OpenAIResponseInputToolChoiceFunctionTool(BaseModel):
|
|
586
|
+
"""Forces the model to call a specific function.
|
|
587
|
+
|
|
588
|
+
:param name: The name of the function to call
|
|
589
|
+
:param type: Tool choice type identifier, always "function"
|
|
590
|
+
"""
|
|
591
|
+
|
|
592
|
+
name: str
|
|
593
|
+
type: Literal["function"] = "function"
|
|
594
|
+
|
|
595
|
+
|
|
596
|
+
@json_schema_type
|
|
597
|
+
class OpenAIResponseInputToolChoiceMCPTool(BaseModel):
|
|
598
|
+
"""Forces the model to call a specific tool on a remote MCP server
|
|
599
|
+
|
|
600
|
+
:param server_label: The label of the MCP server to use.
|
|
601
|
+
:param type: Tool choice type identifier, always "mcp"
|
|
602
|
+
:param name: (Optional) The name of the tool to call on the server.
|
|
603
|
+
"""
|
|
604
|
+
|
|
605
|
+
server_label: str
|
|
606
|
+
type: Literal["mcp"] = "mcp"
|
|
607
|
+
name: str | None = None
|
|
608
|
+
|
|
609
|
+
|
|
610
|
+
@json_schema_type
|
|
611
|
+
class OpenAIResponseInputToolChoiceCustomTool(BaseModel):
|
|
612
|
+
"""Forces the model to call a custom tool.
|
|
613
|
+
|
|
614
|
+
:param type: Tool choice type identifier, always "custom"
|
|
615
|
+
:param name: The name of the custom tool to call.
|
|
616
|
+
"""
|
|
617
|
+
|
|
618
|
+
type: Literal["custom"] = "custom"
|
|
619
|
+
name: str
|
|
620
|
+
|
|
621
|
+
|
|
622
|
+
class OpenAIResponseInputToolChoiceMode(str, Enum):
|
|
623
|
+
auto = "auto"
|
|
624
|
+
required = "required"
|
|
625
|
+
none = "none"
|
|
626
|
+
|
|
627
|
+
|
|
628
|
+
OpenAIResponseInputToolChoiceObject = Annotated[
|
|
629
|
+
OpenAIResponseInputToolChoiceAllowedTools
|
|
630
|
+
| OpenAIResponseInputToolChoiceFileSearch
|
|
631
|
+
| OpenAIResponseInputToolChoiceWebSearch
|
|
632
|
+
| OpenAIResponseInputToolChoiceFunctionTool
|
|
633
|
+
| OpenAIResponseInputToolChoiceMCPTool
|
|
634
|
+
| OpenAIResponseInputToolChoiceCustomTool,
|
|
635
|
+
Field(discriminator="type"),
|
|
636
|
+
]
|
|
637
|
+
|
|
638
|
+
# 3. Final Union without registration or None (Keep it clean)
|
|
639
|
+
OpenAIResponseInputToolChoice = OpenAIResponseInputToolChoiceMode | OpenAIResponseInputToolChoiceObject
|
|
640
|
+
|
|
641
|
+
register_schema(OpenAIResponseInputToolChoice, name="OpenAIResponseInputToolChoice")
|
|
642
|
+
|
|
643
|
+
|
|
493
644
|
class OpenAIResponseUsageOutputTokensDetails(BaseModel):
|
|
494
645
|
"""Token details for output tokens in OpenAI response usage.
|
|
495
646
|
|
|
@@ -536,16 +687,20 @@ class OpenAIResponseObject(BaseModel):
|
|
|
536
687
|
:param model: Model identifier used for generation
|
|
537
688
|
:param object: Object type identifier, always "response"
|
|
538
689
|
:param output: List of generated output items (messages, tool calls, etc.)
|
|
539
|
-
:param parallel_tool_calls: Whether
|
|
690
|
+
:param parallel_tool_calls: (Optional) Whether to allow more than one function tool call generated per turn.
|
|
540
691
|
:param previous_response_id: (Optional) ID of the previous response in a conversation
|
|
692
|
+
:param prompt: (Optional) Reference to a prompt template and its variables.
|
|
541
693
|
:param status: Current status of the response generation
|
|
542
694
|
:param temperature: (Optional) Sampling temperature used for generation
|
|
543
695
|
:param text: Text formatting configuration for the response
|
|
544
696
|
:param top_p: (Optional) Nucleus sampling parameter used for generation
|
|
545
697
|
:param tools: (Optional) An array of tools the model may call while generating a response.
|
|
698
|
+
:param tool_choice: (Optional) Tool choice configuration for the response.
|
|
546
699
|
:param truncation: (Optional) Truncation strategy applied to the response
|
|
547
700
|
:param usage: (Optional) Token usage information for the response
|
|
548
701
|
:param instructions: (Optional) System message inserted into the model's context
|
|
702
|
+
:param max_tool_calls: (Optional) Max number of total calls to built-in tools that can be processed in a response
|
|
703
|
+
:param metadata: (Optional) Dictionary of metadata key-value pairs
|
|
549
704
|
"""
|
|
550
705
|
|
|
551
706
|
created_at: int
|
|
@@ -553,19 +708,23 @@ class OpenAIResponseObject(BaseModel):
|
|
|
553
708
|
id: str
|
|
554
709
|
model: str
|
|
555
710
|
object: Literal["response"] = "response"
|
|
556
|
-
output:
|
|
557
|
-
parallel_tool_calls: bool =
|
|
711
|
+
output: Sequence[OpenAIResponseOutput]
|
|
712
|
+
parallel_tool_calls: bool | None = True
|
|
558
713
|
previous_response_id: str | None = None
|
|
714
|
+
prompt: OpenAIResponsePrompt | None = None
|
|
559
715
|
status: str
|
|
560
716
|
temperature: float | None = None
|
|
561
717
|
# Default to text format to avoid breaking the loading of old responses
|
|
562
718
|
# before the field was added. New responses will have this set always.
|
|
563
719
|
text: OpenAIResponseText = OpenAIResponseText(format=OpenAIResponseTextFormat(type="text"))
|
|
564
720
|
top_p: float | None = None
|
|
565
|
-
tools:
|
|
721
|
+
tools: Sequence[OpenAIResponseTool] | None = None
|
|
722
|
+
tool_choice: OpenAIResponseInputToolChoice | None = None
|
|
566
723
|
truncation: str | None = None
|
|
567
724
|
usage: OpenAIResponseUsage | None = None
|
|
568
725
|
instructions: str | None = None
|
|
726
|
+
max_tool_calls: int | None = None
|
|
727
|
+
metadata: dict[str, str] | None = None
|
|
569
728
|
|
|
570
729
|
|
|
571
730
|
@json_schema_type
|
|
@@ -691,6 +850,7 @@ class OpenAIResponseObjectStreamResponseOutputTextDelta(BaseModel):
|
|
|
691
850
|
:param content_index: Index position within the text content
|
|
692
851
|
:param delta: Incremental text content being added
|
|
693
852
|
:param item_id: Unique identifier of the output item being updated
|
|
853
|
+
:param logprobs: (Optional) Token log probability details
|
|
694
854
|
:param output_index: Index position of the item in the output list
|
|
695
855
|
:param sequence_number: Sequential number for ordering streaming events
|
|
696
856
|
:param type: Event type identifier, always "response.output_text.delta"
|
|
@@ -699,6 +859,7 @@ class OpenAIResponseObjectStreamResponseOutputTextDelta(BaseModel):
|
|
|
699
859
|
content_index: int
|
|
700
860
|
delta: str
|
|
701
861
|
item_id: str
|
|
862
|
+
logprobs: list[OpenAITokenLogProb] | None = None
|
|
702
863
|
output_index: int
|
|
703
864
|
sequence_number: int
|
|
704
865
|
type: Literal["response.output_text.delta"] = "response.output_text.delta"
|
|
@@ -889,7 +1050,7 @@ class OpenAIResponseContentPartOutputText(BaseModel):
|
|
|
889
1050
|
type: Literal["output_text"] = "output_text"
|
|
890
1051
|
text: str
|
|
891
1052
|
annotations: list[OpenAIResponseAnnotations] = Field(default_factory=list)
|
|
892
|
-
logprobs: list[
|
|
1053
|
+
logprobs: list[OpenAITokenLogProb] | None = None
|
|
893
1054
|
|
|
894
1055
|
|
|
895
1056
|
@json_schema_type
|
|
@@ -1254,20 +1415,16 @@ class OpenAIResponseInputFunctionToolCallOutput(BaseModel):
|
|
|
1254
1415
|
|
|
1255
1416
|
OpenAIResponseInput = Annotated[
|
|
1256
1417
|
# Responses API allows output messages to be passed in as input
|
|
1257
|
-
|
|
1258
|
-
| OpenAIResponseOutputMessageFileSearchToolCall
|
|
1259
|
-
| OpenAIResponseOutputMessageFunctionToolCall
|
|
1418
|
+
OpenAIResponseOutput
|
|
1260
1419
|
| OpenAIResponseInputFunctionToolCallOutput
|
|
1261
|
-
| OpenAIResponseMCPApprovalRequest
|
|
1262
1420
|
| OpenAIResponseMCPApprovalResponse
|
|
1263
|
-
| OpenAIResponseOutputMessageMCPCall
|
|
1264
|
-
| OpenAIResponseOutputMessageMCPListTools
|
|
1265
1421
|
| OpenAIResponseMessage,
|
|
1266
1422
|
Field(union_mode="left_to_right"),
|
|
1267
1423
|
]
|
|
1268
1424
|
register_schema(OpenAIResponseInput, name="OpenAIResponseInput")
|
|
1269
1425
|
|
|
1270
1426
|
|
|
1427
|
+
@json_schema_type
|
|
1271
1428
|
class ListOpenAIResponseInputItem(BaseModel):
|
|
1272
1429
|
"""List container for OpenAI response input items.
|
|
1273
1430
|
|
|
@@ -1275,7 +1432,7 @@ class ListOpenAIResponseInputItem(BaseModel):
|
|
|
1275
1432
|
:param object: Object type identifier, always "list"
|
|
1276
1433
|
"""
|
|
1277
1434
|
|
|
1278
|
-
data:
|
|
1435
|
+
data: Sequence[OpenAIResponseInput]
|
|
1279
1436
|
object: Literal["list"] = "list"
|
|
1280
1437
|
|
|
1281
1438
|
|
|
@@ -1286,7 +1443,7 @@ class OpenAIResponseObjectWithInput(OpenAIResponseObject):
|
|
|
1286
1443
|
:param input: List of input items that led to this response
|
|
1287
1444
|
"""
|
|
1288
1445
|
|
|
1289
|
-
input:
|
|
1446
|
+
input: Sequence[OpenAIResponseInput]
|
|
1290
1447
|
|
|
1291
1448
|
def to_response_object(self) -> OpenAIResponseObject:
|
|
1292
1449
|
"""Convert to OpenAIResponseObject by excluding input field."""
|
|
@@ -1304,7 +1461,7 @@ class ListOpenAIResponseObject(BaseModel):
|
|
|
1304
1461
|
:param object: Object type identifier, always "list"
|
|
1305
1462
|
"""
|
|
1306
1463
|
|
|
1307
|
-
data:
|
|
1464
|
+
data: Sequence[OpenAIResponseObjectWithInput]
|
|
1308
1465
|
has_more: bool
|
|
1309
1466
|
first_id: str
|
|
1310
1467
|
last_id: str
|