llama-stack 0.3.5__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack/__init__.py +0 -5
- llama_stack/cli/llama.py +3 -3
- llama_stack/cli/stack/_list_deps.py +12 -23
- llama_stack/cli/stack/list_stacks.py +37 -18
- llama_stack/cli/stack/run.py +121 -11
- llama_stack/cli/stack/utils.py +0 -127
- llama_stack/core/access_control/access_control.py +69 -28
- llama_stack/core/access_control/conditions.py +15 -5
- llama_stack/core/admin.py +267 -0
- llama_stack/core/build.py +6 -74
- llama_stack/core/client.py +1 -1
- llama_stack/core/configure.py +6 -6
- llama_stack/core/conversations/conversations.py +28 -25
- llama_stack/core/datatypes.py +271 -79
- llama_stack/core/distribution.py +15 -16
- llama_stack/core/external.py +3 -3
- llama_stack/core/inspect.py +98 -15
- llama_stack/core/library_client.py +73 -61
- llama_stack/core/prompts/prompts.py +12 -11
- llama_stack/core/providers.py +17 -11
- llama_stack/core/resolver.py +65 -56
- llama_stack/core/routers/__init__.py +8 -12
- llama_stack/core/routers/datasets.py +1 -4
- llama_stack/core/routers/eval_scoring.py +7 -4
- llama_stack/core/routers/inference.py +55 -271
- llama_stack/core/routers/safety.py +52 -24
- llama_stack/core/routers/tool_runtime.py +6 -48
- llama_stack/core/routers/vector_io.py +130 -51
- llama_stack/core/routing_tables/benchmarks.py +24 -20
- llama_stack/core/routing_tables/common.py +1 -4
- llama_stack/core/routing_tables/datasets.py +22 -22
- llama_stack/core/routing_tables/models.py +119 -6
- llama_stack/core/routing_tables/scoring_functions.py +7 -7
- llama_stack/core/routing_tables/shields.py +1 -2
- llama_stack/core/routing_tables/toolgroups.py +17 -7
- llama_stack/core/routing_tables/vector_stores.py +51 -16
- llama_stack/core/server/auth.py +5 -3
- llama_stack/core/server/auth_providers.py +36 -20
- llama_stack/core/server/fastapi_router_registry.py +84 -0
- llama_stack/core/server/quota.py +2 -2
- llama_stack/core/server/routes.py +79 -27
- llama_stack/core/server/server.py +102 -87
- llama_stack/core/stack.py +235 -62
- llama_stack/core/storage/datatypes.py +26 -3
- llama_stack/{providers/utils → core/storage}/kvstore/__init__.py +2 -0
- llama_stack/{providers/utils → core/storage}/kvstore/kvstore.py +55 -24
- llama_stack/{providers/utils → core/storage}/kvstore/mongodb/mongodb.py +13 -10
- llama_stack/{providers/utils → core/storage}/kvstore/postgres/postgres.py +28 -17
- llama_stack/{providers/utils → core/storage}/kvstore/redis/redis.py +41 -16
- llama_stack/{providers/utils → core/storage}/kvstore/sqlite/sqlite.py +1 -1
- llama_stack/core/storage/sqlstore/__init__.py +17 -0
- llama_stack/{providers/utils → core/storage}/sqlstore/authorized_sqlstore.py +69 -49
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlalchemy_sqlstore.py +47 -17
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py +25 -8
- llama_stack/core/store/registry.py +1 -1
- llama_stack/core/utils/config.py +8 -2
- llama_stack/core/utils/config_resolution.py +32 -29
- llama_stack/core/utils/context.py +4 -10
- llama_stack/core/utils/exec.py +9 -0
- llama_stack/core/utils/type_inspection.py +45 -0
- llama_stack/distributions/dell/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/dell/dell.py +2 -2
- llama_stack/distributions/dell/run-with-safety.yaml +3 -2
- llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/meta-reference-gpu/meta_reference.py +2 -2
- llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +3 -2
- llama_stack/distributions/nvidia/{run.yaml → config.yaml} +4 -4
- llama_stack/distributions/nvidia/nvidia.py +1 -1
- llama_stack/distributions/nvidia/run-with-safety.yaml +4 -4
- llama_stack/{apis/datasetio → distributions/oci}/__init__.py +1 -1
- llama_stack/distributions/oci/config.yaml +134 -0
- llama_stack/distributions/oci/oci.py +108 -0
- llama_stack/distributions/open-benchmark/{run.yaml → config.yaml} +5 -4
- llama_stack/distributions/open-benchmark/open_benchmark.py +2 -3
- llama_stack/distributions/postgres-demo/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/starter/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/starter/starter.py +8 -5
- llama_stack/distributions/starter-gpu/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/template.py +13 -69
- llama_stack/distributions/watsonx/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/watsonx/watsonx.py +1 -1
- llama_stack/log.py +28 -11
- llama_stack/models/llama/checkpoint.py +6 -6
- llama_stack/models/llama/hadamard_utils.py +2 -0
- llama_stack/models/llama/llama3/generation.py +3 -1
- llama_stack/models/llama/llama3/interface.py +2 -5
- llama_stack/models/llama/llama3/multimodal/encoder_utils.py +3 -3
- llama_stack/models/llama/llama3/multimodal/image_transform.py +6 -6
- llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +1 -1
- llama_stack/models/llama/llama3/tool_utils.py +2 -1
- llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +1 -1
- llama_stack/providers/inline/agents/meta_reference/__init__.py +3 -3
- llama_stack/providers/inline/agents/meta_reference/agents.py +44 -261
- llama_stack/providers/inline/agents/meta_reference/config.py +6 -1
- llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +207 -57
- llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +308 -47
- llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +162 -96
- llama_stack/providers/inline/agents/meta_reference/responses/types.py +23 -8
- llama_stack/providers/inline/agents/meta_reference/responses/utils.py +201 -33
- llama_stack/providers/inline/agents/meta_reference/safety.py +8 -13
- llama_stack/providers/inline/batches/reference/__init__.py +2 -4
- llama_stack/providers/inline/batches/reference/batches.py +78 -60
- llama_stack/providers/inline/datasetio/localfs/datasetio.py +2 -5
- llama_stack/providers/inline/eval/meta_reference/eval.py +16 -61
- llama_stack/providers/inline/files/localfs/files.py +37 -28
- llama_stack/providers/inline/inference/meta_reference/config.py +2 -2
- llama_stack/providers/inline/inference/meta_reference/generators.py +50 -60
- llama_stack/providers/inline/inference/meta_reference/inference.py +403 -19
- llama_stack/providers/inline/inference/meta_reference/model_parallel.py +7 -26
- llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +2 -12
- llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +10 -15
- llama_stack/providers/inline/post_training/common/validator.py +1 -5
- llama_stack/providers/inline/post_training/huggingface/post_training.py +8 -8
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +18 -10
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +12 -9
- llama_stack/providers/inline/post_training/huggingface/utils.py +27 -6
- llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/common/utils.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/post_training.py +8 -8
- llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +16 -16
- llama_stack/providers/inline/safety/code_scanner/code_scanner.py +13 -9
- llama_stack/providers/inline/safety/llama_guard/llama_guard.py +18 -15
- llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +9 -9
- llama_stack/providers/inline/scoring/basic/scoring.py +6 -13
- llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/braintrust/braintrust.py +12 -15
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +7 -14
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +1 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +1 -3
- llama_stack/providers/inline/tool_runtime/rag/__init__.py +1 -1
- llama_stack/providers/inline/tool_runtime/rag/config.py +8 -1
- llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +7 -6
- llama_stack/providers/inline/tool_runtime/rag/memory.py +64 -48
- llama_stack/providers/inline/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/chroma/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/faiss.py +46 -28
- llama_stack/providers/inline/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/milvus/config.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +44 -33
- llama_stack/providers/registry/agents.py +8 -3
- llama_stack/providers/registry/batches.py +1 -1
- llama_stack/providers/registry/datasetio.py +1 -1
- llama_stack/providers/registry/eval.py +1 -1
- llama_stack/{apis/datasets/__init__.py → providers/registry/file_processors.py} +5 -1
- llama_stack/providers/registry/files.py +11 -2
- llama_stack/providers/registry/inference.py +22 -3
- llama_stack/providers/registry/post_training.py +1 -1
- llama_stack/providers/registry/safety.py +1 -1
- llama_stack/providers/registry/scoring.py +1 -1
- llama_stack/providers/registry/tool_runtime.py +2 -2
- llama_stack/providers/registry/vector_io.py +7 -7
- llama_stack/providers/remote/datasetio/huggingface/huggingface.py +2 -5
- llama_stack/providers/remote/datasetio/nvidia/datasetio.py +1 -4
- llama_stack/providers/remote/eval/nvidia/eval.py +15 -9
- llama_stack/providers/remote/files/openai/__init__.py +19 -0
- llama_stack/providers/remote/files/openai/config.py +28 -0
- llama_stack/providers/remote/files/openai/files.py +253 -0
- llama_stack/providers/remote/files/s3/files.py +52 -30
- llama_stack/providers/remote/inference/anthropic/anthropic.py +2 -1
- llama_stack/providers/remote/inference/anthropic/config.py +1 -1
- llama_stack/providers/remote/inference/azure/azure.py +1 -3
- llama_stack/providers/remote/inference/azure/config.py +8 -7
- llama_stack/providers/remote/inference/bedrock/__init__.py +1 -1
- llama_stack/providers/remote/inference/bedrock/bedrock.py +82 -105
- llama_stack/providers/remote/inference/bedrock/config.py +24 -3
- llama_stack/providers/remote/inference/cerebras/cerebras.py +5 -5
- llama_stack/providers/remote/inference/cerebras/config.py +12 -5
- llama_stack/providers/remote/inference/databricks/config.py +13 -6
- llama_stack/providers/remote/inference/databricks/databricks.py +16 -6
- llama_stack/providers/remote/inference/fireworks/config.py +5 -5
- llama_stack/providers/remote/inference/fireworks/fireworks.py +1 -1
- llama_stack/providers/remote/inference/gemini/config.py +1 -1
- llama_stack/providers/remote/inference/gemini/gemini.py +13 -14
- llama_stack/providers/remote/inference/groq/config.py +5 -5
- llama_stack/providers/remote/inference/groq/groq.py +1 -1
- llama_stack/providers/remote/inference/llama_openai_compat/config.py +5 -5
- llama_stack/providers/remote/inference/llama_openai_compat/llama.py +8 -6
- llama_stack/providers/remote/inference/nvidia/__init__.py +1 -1
- llama_stack/providers/remote/inference/nvidia/config.py +21 -11
- llama_stack/providers/remote/inference/nvidia/nvidia.py +115 -3
- llama_stack/providers/remote/inference/nvidia/utils.py +1 -1
- llama_stack/providers/remote/inference/oci/__init__.py +17 -0
- llama_stack/providers/remote/inference/oci/auth.py +79 -0
- llama_stack/providers/remote/inference/oci/config.py +75 -0
- llama_stack/providers/remote/inference/oci/oci.py +162 -0
- llama_stack/providers/remote/inference/ollama/config.py +7 -5
- llama_stack/providers/remote/inference/ollama/ollama.py +17 -8
- llama_stack/providers/remote/inference/openai/config.py +4 -4
- llama_stack/providers/remote/inference/openai/openai.py +1 -1
- llama_stack/providers/remote/inference/passthrough/__init__.py +2 -2
- llama_stack/providers/remote/inference/passthrough/config.py +5 -10
- llama_stack/providers/remote/inference/passthrough/passthrough.py +97 -75
- llama_stack/providers/remote/inference/runpod/config.py +12 -5
- llama_stack/providers/remote/inference/runpod/runpod.py +2 -20
- llama_stack/providers/remote/inference/sambanova/config.py +5 -5
- llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -1
- llama_stack/providers/remote/inference/tgi/config.py +7 -6
- llama_stack/providers/remote/inference/tgi/tgi.py +19 -11
- llama_stack/providers/remote/inference/together/config.py +5 -5
- llama_stack/providers/remote/inference/together/together.py +15 -12
- llama_stack/providers/remote/inference/vertexai/config.py +1 -1
- llama_stack/providers/remote/inference/vllm/config.py +5 -5
- llama_stack/providers/remote/inference/vllm/vllm.py +13 -14
- llama_stack/providers/remote/inference/watsonx/config.py +4 -4
- llama_stack/providers/remote/inference/watsonx/watsonx.py +21 -94
- llama_stack/providers/remote/post_training/nvidia/post_training.py +4 -4
- llama_stack/providers/remote/post_training/nvidia/utils.py +1 -1
- llama_stack/providers/remote/safety/bedrock/bedrock.py +6 -6
- llama_stack/providers/remote/safety/bedrock/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/nvidia.py +11 -5
- llama_stack/providers/remote/safety/sambanova/config.py +1 -1
- llama_stack/providers/remote/safety/sambanova/sambanova.py +6 -6
- llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +12 -7
- llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +8 -2
- llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +57 -15
- llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +11 -6
- llama_stack/providers/remote/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/chroma/chroma.py +131 -23
- llama_stack/providers/remote/vector_io/chroma/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/milvus.py +37 -28
- llama_stack/providers/remote/vector_io/pgvector/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/config.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/pgvector.py +37 -25
- llama_stack/providers/remote/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/qdrant.py +147 -30
- llama_stack/providers/remote/vector_io/weaviate/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/config.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/weaviate.py +31 -26
- llama_stack/providers/utils/common/data_schema_validator.py +1 -5
- llama_stack/providers/utils/files/form_data.py +1 -1
- llama_stack/providers/utils/inference/embedding_mixin.py +1 -1
- llama_stack/providers/utils/inference/inference_store.py +7 -8
- llama_stack/providers/utils/inference/litellm_openai_mixin.py +79 -79
- llama_stack/providers/utils/inference/model_registry.py +1 -3
- llama_stack/providers/utils/inference/openai_compat.py +44 -1171
- llama_stack/providers/utils/inference/openai_mixin.py +68 -42
- llama_stack/providers/utils/inference/prompt_adapter.py +50 -265
- llama_stack/providers/utils/inference/stream_utils.py +23 -0
- llama_stack/providers/utils/memory/__init__.py +2 -0
- llama_stack/providers/utils/memory/file_utils.py +1 -1
- llama_stack/providers/utils/memory/openai_vector_store_mixin.py +181 -84
- llama_stack/providers/utils/memory/vector_store.py +39 -38
- llama_stack/providers/utils/pagination.py +1 -1
- llama_stack/providers/utils/responses/responses_store.py +15 -25
- llama_stack/providers/utils/scoring/aggregation_utils.py +1 -2
- llama_stack/providers/utils/scoring/base_scoring_fn.py +1 -2
- llama_stack/providers/utils/tools/mcp.py +93 -11
- llama_stack/providers/utils/vector_io/__init__.py +16 -0
- llama_stack/providers/utils/vector_io/vector_utils.py +36 -0
- llama_stack/telemetry/constants.py +27 -0
- llama_stack/telemetry/helpers.py +43 -0
- llama_stack/testing/api_recorder.py +25 -16
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/METADATA +57 -55
- llama_stack-0.4.1.dist-info/RECORD +588 -0
- llama_stack-0.4.1.dist-info/top_level.txt +2 -0
- llama_stack_api/__init__.py +945 -0
- llama_stack_api/admin/__init__.py +45 -0
- llama_stack_api/admin/api.py +72 -0
- llama_stack_api/admin/fastapi_routes.py +117 -0
- llama_stack_api/admin/models.py +113 -0
- llama_stack_api/agents.py +173 -0
- llama_stack_api/batches/__init__.py +40 -0
- llama_stack_api/batches/api.py +53 -0
- llama_stack_api/batches/fastapi_routes.py +113 -0
- llama_stack_api/batches/models.py +78 -0
- llama_stack_api/benchmarks/__init__.py +43 -0
- llama_stack_api/benchmarks/api.py +39 -0
- llama_stack_api/benchmarks/fastapi_routes.py +109 -0
- llama_stack_api/benchmarks/models.py +109 -0
- {llama_stack/apis → llama_stack_api}/common/content_types.py +1 -43
- {llama_stack/apis → llama_stack_api}/common/errors.py +0 -8
- {llama_stack/apis → llama_stack_api}/common/job_types.py +1 -1
- llama_stack_api/common/responses.py +77 -0
- {llama_stack/apis → llama_stack_api}/common/training_types.py +1 -1
- {llama_stack/apis → llama_stack_api}/common/type_system.py +2 -14
- llama_stack_api/connectors.py +146 -0
- {llama_stack/apis/conversations → llama_stack_api}/conversations.py +23 -39
- {llama_stack/apis/datasetio → llama_stack_api}/datasetio.py +4 -8
- llama_stack_api/datasets/__init__.py +61 -0
- llama_stack_api/datasets/api.py +35 -0
- llama_stack_api/datasets/fastapi_routes.py +104 -0
- llama_stack_api/datasets/models.py +152 -0
- {llama_stack/providers → llama_stack_api}/datatypes.py +166 -10
- {llama_stack/apis/eval → llama_stack_api}/eval.py +8 -40
- llama_stack_api/file_processors/__init__.py +27 -0
- llama_stack_api/file_processors/api.py +64 -0
- llama_stack_api/file_processors/fastapi_routes.py +78 -0
- llama_stack_api/file_processors/models.py +42 -0
- llama_stack_api/files/__init__.py +35 -0
- llama_stack_api/files/api.py +51 -0
- llama_stack_api/files/fastapi_routes.py +124 -0
- llama_stack_api/files/models.py +107 -0
- {llama_stack/apis/inference → llama_stack_api}/inference.py +90 -194
- llama_stack_api/inspect_api/__init__.py +37 -0
- llama_stack_api/inspect_api/api.py +25 -0
- llama_stack_api/inspect_api/fastapi_routes.py +76 -0
- llama_stack_api/inspect_api/models.py +28 -0
- {llama_stack/apis/agents → llama_stack_api/internal}/__init__.py +3 -1
- llama_stack/providers/utils/kvstore/api.py → llama_stack_api/internal/kvstore.py +5 -0
- llama_stack_api/internal/sqlstore.py +79 -0
- {llama_stack/apis/models → llama_stack_api}/models.py +11 -9
- {llama_stack/apis/agents → llama_stack_api}/openai_responses.py +184 -27
- {llama_stack/apis/post_training → llama_stack_api}/post_training.py +7 -11
- {llama_stack/apis/prompts → llama_stack_api}/prompts.py +3 -4
- llama_stack_api/providers/__init__.py +33 -0
- llama_stack_api/providers/api.py +16 -0
- llama_stack_api/providers/fastapi_routes.py +57 -0
- llama_stack_api/providers/models.py +24 -0
- {llama_stack/apis/tools → llama_stack_api}/rag_tool.py +2 -52
- {llama_stack/apis → llama_stack_api}/resource.py +1 -1
- llama_stack_api/router_utils.py +160 -0
- {llama_stack/apis/safety → llama_stack_api}/safety.py +6 -9
- {llama_stack → llama_stack_api}/schema_utils.py +94 -4
- {llama_stack/apis/scoring → llama_stack_api}/scoring.py +3 -3
- {llama_stack/apis/scoring_functions → llama_stack_api}/scoring_functions.py +9 -6
- {llama_stack/apis/shields → llama_stack_api}/shields.py +6 -7
- {llama_stack/apis/tools → llama_stack_api}/tools.py +26 -21
- {llama_stack/apis/vector_io → llama_stack_api}/vector_io.py +133 -152
- {llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py +1 -1
- llama_stack/apis/agents/agents.py +0 -894
- llama_stack/apis/batches/__init__.py +0 -9
- llama_stack/apis/batches/batches.py +0 -100
- llama_stack/apis/benchmarks/__init__.py +0 -7
- llama_stack/apis/benchmarks/benchmarks.py +0 -108
- llama_stack/apis/common/responses.py +0 -36
- llama_stack/apis/conversations/__init__.py +0 -31
- llama_stack/apis/datasets/datasets.py +0 -251
- llama_stack/apis/datatypes.py +0 -160
- llama_stack/apis/eval/__init__.py +0 -7
- llama_stack/apis/files/__init__.py +0 -7
- llama_stack/apis/files/files.py +0 -199
- llama_stack/apis/inference/__init__.py +0 -7
- llama_stack/apis/inference/event_logger.py +0 -43
- llama_stack/apis/inspect/__init__.py +0 -7
- llama_stack/apis/inspect/inspect.py +0 -94
- llama_stack/apis/models/__init__.py +0 -7
- llama_stack/apis/post_training/__init__.py +0 -7
- llama_stack/apis/prompts/__init__.py +0 -9
- llama_stack/apis/providers/__init__.py +0 -7
- llama_stack/apis/providers/providers.py +0 -69
- llama_stack/apis/safety/__init__.py +0 -7
- llama_stack/apis/scoring/__init__.py +0 -7
- llama_stack/apis/scoring_functions/__init__.py +0 -7
- llama_stack/apis/shields/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +0 -77
- llama_stack/apis/telemetry/__init__.py +0 -7
- llama_stack/apis/telemetry/telemetry.py +0 -423
- llama_stack/apis/tools/__init__.py +0 -8
- llama_stack/apis/vector_io/__init__.py +0 -7
- llama_stack/apis/vector_stores/__init__.py +0 -7
- llama_stack/core/server/tracing.py +0 -80
- llama_stack/core/ui/app.py +0 -55
- llama_stack/core/ui/modules/__init__.py +0 -5
- llama_stack/core/ui/modules/api.py +0 -32
- llama_stack/core/ui/modules/utils.py +0 -42
- llama_stack/core/ui/page/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/datasets.py +0 -18
- llama_stack/core/ui/page/distribution/eval_tasks.py +0 -20
- llama_stack/core/ui/page/distribution/models.py +0 -18
- llama_stack/core/ui/page/distribution/providers.py +0 -27
- llama_stack/core/ui/page/distribution/resources.py +0 -48
- llama_stack/core/ui/page/distribution/scoring_functions.py +0 -18
- llama_stack/core/ui/page/distribution/shields.py +0 -19
- llama_stack/core/ui/page/evaluations/__init__.py +0 -5
- llama_stack/core/ui/page/evaluations/app_eval.py +0 -143
- llama_stack/core/ui/page/evaluations/native_eval.py +0 -253
- llama_stack/core/ui/page/playground/__init__.py +0 -5
- llama_stack/core/ui/page/playground/chat.py +0 -130
- llama_stack/core/ui/page/playground/tools.py +0 -352
- llama_stack/distributions/dell/build.yaml +0 -33
- llama_stack/distributions/meta-reference-gpu/build.yaml +0 -32
- llama_stack/distributions/nvidia/build.yaml +0 -29
- llama_stack/distributions/open-benchmark/build.yaml +0 -36
- llama_stack/distributions/postgres-demo/__init__.py +0 -7
- llama_stack/distributions/postgres-demo/build.yaml +0 -23
- llama_stack/distributions/postgres-demo/postgres_demo.py +0 -125
- llama_stack/distributions/starter/build.yaml +0 -61
- llama_stack/distributions/starter-gpu/build.yaml +0 -61
- llama_stack/distributions/watsonx/build.yaml +0 -33
- llama_stack/providers/inline/agents/meta_reference/agent_instance.py +0 -1024
- llama_stack/providers/inline/agents/meta_reference/persistence.py +0 -228
- llama_stack/providers/inline/telemetry/__init__.py +0 -5
- llama_stack/providers/inline/telemetry/meta_reference/__init__.py +0 -21
- llama_stack/providers/inline/telemetry/meta_reference/config.py +0 -47
- llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +0 -252
- llama_stack/providers/remote/inference/bedrock/models.py +0 -29
- llama_stack/providers/utils/kvstore/sqlite/config.py +0 -20
- llama_stack/providers/utils/sqlstore/__init__.py +0 -5
- llama_stack/providers/utils/sqlstore/api.py +0 -128
- llama_stack/providers/utils/telemetry/__init__.py +0 -5
- llama_stack/providers/utils/telemetry/trace_protocol.py +0 -142
- llama_stack/providers/utils/telemetry/tracing.py +0 -384
- llama_stack/strong_typing/__init__.py +0 -19
- llama_stack/strong_typing/auxiliary.py +0 -228
- llama_stack/strong_typing/classdef.py +0 -440
- llama_stack/strong_typing/core.py +0 -46
- llama_stack/strong_typing/deserializer.py +0 -877
- llama_stack/strong_typing/docstring.py +0 -409
- llama_stack/strong_typing/exception.py +0 -23
- llama_stack/strong_typing/inspection.py +0 -1085
- llama_stack/strong_typing/mapping.py +0 -40
- llama_stack/strong_typing/name.py +0 -182
- llama_stack/strong_typing/schema.py +0 -792
- llama_stack/strong_typing/serialization.py +0 -97
- llama_stack/strong_typing/serializer.py +0 -500
- llama_stack/strong_typing/slots.py +0 -27
- llama_stack/strong_typing/topological.py +0 -89
- llama_stack/ui/node_modules/flatted/python/flatted.py +0 -149
- llama_stack-0.3.5.dist-info/RECORD +0 -625
- llama_stack-0.3.5.dist-info/top_level.txt +0 -1
- /llama_stack/{providers/utils → core/storage}/kvstore/config.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/mongodb/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/postgres/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/redis/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/sqlite/__init__.py +0 -0
- /llama_stack/{apis → providers/inline/file_processor}/__init__.py +0 -0
- /llama_stack/{apis/common → telemetry}/__init__.py +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/WHEEL +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/entry_points.txt +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/licenses/LICENSE +0 -0
- {llama_stack/core/ui → llama_stack_api/common}/__init__.py +0 -0
- {llama_stack/strong_typing → llama_stack_api}/py.typed +0 -0
- {llama_stack/apis → llama_stack_api}/version.py +0 -0
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
from typing import Annotated
|
|
8
|
+
|
|
9
|
+
from fastapi import APIRouter, Depends, UploadFile
|
|
10
|
+
from fastapi.param_functions import File, Form
|
|
11
|
+
from fastapi.responses import Response
|
|
12
|
+
|
|
13
|
+
from llama_stack_api.router_utils import create_path_dependency, create_query_dependency, standard_responses
|
|
14
|
+
from llama_stack_api.version import LLAMA_STACK_API_V1
|
|
15
|
+
|
|
16
|
+
from .api import Files
|
|
17
|
+
from .models import (
|
|
18
|
+
DeleteFileRequest,
|
|
19
|
+
ExpiresAfter,
|
|
20
|
+
ListFilesRequest,
|
|
21
|
+
ListOpenAIFileResponse,
|
|
22
|
+
OpenAIFileDeleteResponse,
|
|
23
|
+
OpenAIFileObject,
|
|
24
|
+
OpenAIFilePurpose,
|
|
25
|
+
RetrieveFileContentRequest,
|
|
26
|
+
RetrieveFileRequest,
|
|
27
|
+
UploadFileRequest,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
# Automatically generate dependency functions from Pydantic models
|
|
31
|
+
# This ensures the models are the single source of truth for descriptions
|
|
32
|
+
get_list_files_request = create_query_dependency(ListFilesRequest)
|
|
33
|
+
get_get_files_request = create_path_dependency(RetrieveFileRequest)
|
|
34
|
+
get_delete_files_request = create_path_dependency(DeleteFileRequest)
|
|
35
|
+
get_retrieve_file_content_request = create_path_dependency(RetrieveFileContentRequest)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def create_router(impl: Files) -> APIRouter:
|
|
39
|
+
router = APIRouter(
|
|
40
|
+
prefix=f"/{LLAMA_STACK_API_V1}",
|
|
41
|
+
tags=["Files"],
|
|
42
|
+
responses=standard_responses,
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
@router.get(
|
|
46
|
+
"/files",
|
|
47
|
+
response_model=ListOpenAIFileResponse,
|
|
48
|
+
summary="List files",
|
|
49
|
+
description="List files",
|
|
50
|
+
responses={
|
|
51
|
+
200: {"description": "The list of files."},
|
|
52
|
+
},
|
|
53
|
+
)
|
|
54
|
+
async def list_files(
|
|
55
|
+
request: Annotated[ListFilesRequest, Depends(get_list_files_request)],
|
|
56
|
+
) -> ListOpenAIFileResponse:
|
|
57
|
+
return await impl.openai_list_files(request)
|
|
58
|
+
|
|
59
|
+
@router.get(
|
|
60
|
+
"/files/{file_id}",
|
|
61
|
+
response_model=OpenAIFileObject,
|
|
62
|
+
summary="Get file",
|
|
63
|
+
description="Get file",
|
|
64
|
+
responses={
|
|
65
|
+
200: {"description": "The file."},
|
|
66
|
+
},
|
|
67
|
+
)
|
|
68
|
+
async def get_file(
|
|
69
|
+
request: Annotated[RetrieveFileRequest, Depends(get_get_files_request)],
|
|
70
|
+
) -> OpenAIFileObject:
|
|
71
|
+
return await impl.openai_retrieve_file(request)
|
|
72
|
+
|
|
73
|
+
@router.delete(
|
|
74
|
+
"/files/{file_id}",
|
|
75
|
+
response_model=OpenAIFileDeleteResponse,
|
|
76
|
+
summary="Delete file",
|
|
77
|
+
description="Delete file",
|
|
78
|
+
responses={
|
|
79
|
+
200: {"description": "The file was deleted."},
|
|
80
|
+
},
|
|
81
|
+
)
|
|
82
|
+
async def delete_file(
|
|
83
|
+
request: Annotated[DeleteFileRequest, Depends(get_delete_files_request)],
|
|
84
|
+
) -> OpenAIFileDeleteResponse:
|
|
85
|
+
return await impl.openai_delete_file(request)
|
|
86
|
+
|
|
87
|
+
@router.get(
|
|
88
|
+
"/files/{file_id}/content",
|
|
89
|
+
status_code=200,
|
|
90
|
+
summary="Retrieve file content",
|
|
91
|
+
description="Retrieve file content",
|
|
92
|
+
responses={
|
|
93
|
+
200: {
|
|
94
|
+
"description": "The raw file content as a binary response.",
|
|
95
|
+
"content": {"application/json": {"schema": {"$ref": "#/components/schemas/Response"}}},
|
|
96
|
+
},
|
|
97
|
+
},
|
|
98
|
+
)
|
|
99
|
+
async def retrieve_file_content(
|
|
100
|
+
request: Annotated[RetrieveFileContentRequest, Depends(get_retrieve_file_content_request)],
|
|
101
|
+
) -> Response:
|
|
102
|
+
return await impl.openai_retrieve_file_content(request)
|
|
103
|
+
|
|
104
|
+
@router.post(
|
|
105
|
+
"/files",
|
|
106
|
+
response_model=OpenAIFileObject,
|
|
107
|
+
summary="Upload file",
|
|
108
|
+
description="Upload a file.",
|
|
109
|
+
responses={
|
|
110
|
+
200: {"description": "The uploaded file."},
|
|
111
|
+
},
|
|
112
|
+
)
|
|
113
|
+
async def upload_file(
|
|
114
|
+
file: Annotated[UploadFile, File(description="The file to upload.")],
|
|
115
|
+
purpose: Annotated[OpenAIFilePurpose, Form(description="The intended purpose of the uploaded file.")],
|
|
116
|
+
expires_after: Annotated[ExpiresAfter | None, Form(description="Optional expiration settings.")] = None,
|
|
117
|
+
) -> OpenAIFileObject:
|
|
118
|
+
request = UploadFileRequest(
|
|
119
|
+
purpose=purpose,
|
|
120
|
+
expires_after=expires_after,
|
|
121
|
+
)
|
|
122
|
+
return await impl.openai_upload_file(request, file)
|
|
123
|
+
|
|
124
|
+
return router
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
from enum import StrEnum
|
|
8
|
+
from typing import ClassVar, Literal
|
|
9
|
+
|
|
10
|
+
from pydantic import BaseModel, Field
|
|
11
|
+
|
|
12
|
+
from llama_stack_api.common.responses import Order
|
|
13
|
+
from llama_stack_api.schema_utils import json_schema_type
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class OpenAIFilePurpose(StrEnum):
|
|
17
|
+
"""
|
|
18
|
+
Valid purpose values for OpenAI Files API.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
ASSISTANTS = "assistants"
|
|
22
|
+
BATCH = "batch"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@json_schema_type
|
|
26
|
+
class OpenAIFileObject(BaseModel):
|
|
27
|
+
"""OpenAI File object as defined in the OpenAI Files API."""
|
|
28
|
+
|
|
29
|
+
object: Literal["file"] = Field(default="file", description="The object type, which is always 'file'.")
|
|
30
|
+
id: str = Field(..., description="The file identifier, which can be referenced in the API endpoints.")
|
|
31
|
+
bytes: int = Field(..., description="The size of the file, in bytes.")
|
|
32
|
+
created_at: int = Field(..., description="The Unix timestamp (in seconds) for when the file was created.")
|
|
33
|
+
expires_at: int = Field(..., description="The Unix timestamp (in seconds) for when the file expires.")
|
|
34
|
+
filename: str = Field(..., description="The name of the file.")
|
|
35
|
+
purpose: OpenAIFilePurpose = Field(..., description="The intended purpose of the file.")
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@json_schema_type
|
|
39
|
+
class ExpiresAfter(BaseModel):
|
|
40
|
+
"""Control expiration of uploaded files."""
|
|
41
|
+
|
|
42
|
+
MIN: ClassVar[int] = 3600 # 1 hour
|
|
43
|
+
MAX: ClassVar[int] = 2592000 # 30 days
|
|
44
|
+
|
|
45
|
+
anchor: Literal["created_at"] = Field(..., description="The anchor point for expiration, must be 'created_at'.")
|
|
46
|
+
seconds: int = Field(
|
|
47
|
+
..., ge=MIN, le=MAX, description="Seconds until expiration, between 3600 (1 hour) and 2592000 (30 days)."
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@json_schema_type
|
|
52
|
+
class ListOpenAIFileResponse(BaseModel):
|
|
53
|
+
"""Response for listing files in OpenAI Files API."""
|
|
54
|
+
|
|
55
|
+
data: list[OpenAIFileObject] = Field(..., description="The list of files.")
|
|
56
|
+
has_more: bool = Field(..., description="Whether there are more files available beyond this page.")
|
|
57
|
+
first_id: str = Field(..., description="The ID of the first file in the list for pagination.")
|
|
58
|
+
last_id: str = Field(..., description="The ID of the last file in the list for pagination.")
|
|
59
|
+
object: Literal["list"] = Field(default="list", description="The object type, which is always 'list'.")
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
@json_schema_type
|
|
63
|
+
class OpenAIFileDeleteResponse(BaseModel):
|
|
64
|
+
"""Response for deleting a file in OpenAI Files API."""
|
|
65
|
+
|
|
66
|
+
id: str = Field(..., description="The file identifier that was deleted.")
|
|
67
|
+
object: Literal["file"] = Field(default="file", description="The object type, which is always 'file'.")
|
|
68
|
+
deleted: bool = Field(..., description="Whether the file was successfully deleted.")
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
@json_schema_type
|
|
72
|
+
class ListFilesRequest(BaseModel):
|
|
73
|
+
"""Request model for listing files."""
|
|
74
|
+
|
|
75
|
+
after: str | None = Field(default=None, description="A cursor for pagination. Returns files after this ID.")
|
|
76
|
+
limit: int | None = Field(default=10000, description="Maximum number of files to return (1-10,000).")
|
|
77
|
+
order: Order | None = Field(default=Order.desc, description="Sort order by created_at timestamp ('asc' or 'desc').")
|
|
78
|
+
purpose: OpenAIFilePurpose | None = Field(default=None, description="Filter files by purpose.")
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
@json_schema_type
|
|
82
|
+
class RetrieveFileRequest(BaseModel):
|
|
83
|
+
"""Request model for retrieving a file."""
|
|
84
|
+
|
|
85
|
+
file_id: str = Field(..., description="The ID of the file to retrieve.")
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
@json_schema_type
|
|
89
|
+
class DeleteFileRequest(BaseModel):
|
|
90
|
+
"""Request model for deleting a file."""
|
|
91
|
+
|
|
92
|
+
file_id: str = Field(..., description="The ID of the file to delete.")
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
@json_schema_type
|
|
96
|
+
class RetrieveFileContentRequest(BaseModel):
|
|
97
|
+
"""Request model for retrieving file content."""
|
|
98
|
+
|
|
99
|
+
file_id: str = Field(..., description="The ID of the file to retrieve content from.")
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
@json_schema_type
|
|
103
|
+
class UploadFileRequest(BaseModel):
|
|
104
|
+
"""Request model for uploading a file."""
|
|
105
|
+
|
|
106
|
+
purpose: OpenAIFilePurpose = Field(..., description="The intended purpose of the uploaded file.")
|
|
107
|
+
expires_after: ExpiresAfter | None = Field(default=None, description="Optional expiration settings for the file.")
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
# the root directory of this source tree.
|
|
6
6
|
|
|
7
7
|
from collections.abc import AsyncIterator
|
|
8
|
-
from enum import Enum
|
|
8
|
+
from enum import Enum, StrEnum
|
|
9
9
|
from typing import (
|
|
10
10
|
Annotated,
|
|
11
11
|
Any,
|
|
@@ -15,28 +15,16 @@ from typing import (
|
|
|
15
15
|
)
|
|
16
16
|
|
|
17
17
|
from fastapi import Body
|
|
18
|
-
from pydantic import BaseModel, Field
|
|
18
|
+
from pydantic import BaseModel, Field
|
|
19
19
|
from typing_extensions import TypedDict
|
|
20
20
|
|
|
21
|
-
from
|
|
22
|
-
from
|
|
23
|
-
|
|
24
|
-
from llama_stack.apis.telemetry import MetricResponseMixin
|
|
25
|
-
from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
|
|
26
|
-
from llama_stack.models.llama.datatypes import (
|
|
27
|
-
BuiltinTool,
|
|
28
|
-
StopReason,
|
|
29
|
-
ToolCall,
|
|
30
|
-
ToolDefinition,
|
|
31
|
-
ToolPromptFormat,
|
|
21
|
+
from llama_stack_api.common.content_types import InterleavedContent
|
|
22
|
+
from llama_stack_api.common.responses import (
|
|
23
|
+
Order,
|
|
32
24
|
)
|
|
33
|
-
from
|
|
34
|
-
from
|
|
35
|
-
|
|
36
|
-
register_schema(ToolCall)
|
|
37
|
-
register_schema(ToolDefinition)
|
|
38
|
-
|
|
39
|
-
from enum import StrEnum
|
|
25
|
+
from llama_stack_api.models import Model
|
|
26
|
+
from llama_stack_api.schema_utils import json_schema_type, register_schema, webmethod
|
|
27
|
+
from llama_stack_api.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
|
|
40
28
|
|
|
41
29
|
|
|
42
30
|
@json_schema_type
|
|
@@ -97,7 +85,7 @@ class SamplingParams(BaseModel):
|
|
|
97
85
|
|
|
98
86
|
strategy: SamplingStrategy = Field(default_factory=GreedySamplingStrategy)
|
|
99
87
|
|
|
100
|
-
max_tokens: int | None =
|
|
88
|
+
max_tokens: int | None = None
|
|
101
89
|
repetition_penalty: float | None = 1.0
|
|
102
90
|
stop: list[str] | None = None
|
|
103
91
|
|
|
@@ -202,58 +190,6 @@ class ToolResponseMessage(BaseModel):
|
|
|
202
190
|
content: InterleavedContent
|
|
203
191
|
|
|
204
192
|
|
|
205
|
-
@json_schema_type
|
|
206
|
-
class CompletionMessage(BaseModel):
|
|
207
|
-
"""A message containing the model's (assistant) response in a chat conversation.
|
|
208
|
-
|
|
209
|
-
:param role: Must be "assistant" to identify this as the model's response
|
|
210
|
-
:param content: The content of the model's response
|
|
211
|
-
:param stop_reason: Reason why the model stopped generating. Options are:
|
|
212
|
-
- `StopReason.end_of_turn`: The model finished generating the entire response.
|
|
213
|
-
- `StopReason.end_of_message`: The model finished generating but generated a partial response -- usually, a tool call. The user may call the tool and continue the conversation with the tool's response.
|
|
214
|
-
- `StopReason.out_of_tokens`: The model ran out of token budget.
|
|
215
|
-
:param tool_calls: List of tool calls. Each tool call is a ToolCall object.
|
|
216
|
-
"""
|
|
217
|
-
|
|
218
|
-
role: Literal["assistant"] = "assistant"
|
|
219
|
-
content: InterleavedContent
|
|
220
|
-
stop_reason: StopReason
|
|
221
|
-
tool_calls: list[ToolCall] | None = Field(default_factory=lambda: [])
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
Message = Annotated[
|
|
225
|
-
UserMessage | SystemMessage | ToolResponseMessage | CompletionMessage,
|
|
226
|
-
Field(discriminator="role"),
|
|
227
|
-
]
|
|
228
|
-
register_schema(Message, name="Message")
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
@json_schema_type
|
|
232
|
-
class ToolResponse(BaseModel):
|
|
233
|
-
"""Response from a tool invocation.
|
|
234
|
-
|
|
235
|
-
:param call_id: Unique identifier for the tool call this response is for
|
|
236
|
-
:param tool_name: Name of the tool that was invoked
|
|
237
|
-
:param content: The response content from the tool
|
|
238
|
-
:param metadata: (Optional) Additional metadata about the tool response
|
|
239
|
-
"""
|
|
240
|
-
|
|
241
|
-
call_id: str
|
|
242
|
-
tool_name: BuiltinTool | str
|
|
243
|
-
content: InterleavedContent
|
|
244
|
-
metadata: dict[str, Any] | None = None
|
|
245
|
-
|
|
246
|
-
@field_validator("tool_name", mode="before")
|
|
247
|
-
@classmethod
|
|
248
|
-
def validate_field(cls, v):
|
|
249
|
-
if isinstance(v, str):
|
|
250
|
-
try:
|
|
251
|
-
return BuiltinTool(v)
|
|
252
|
-
except ValueError:
|
|
253
|
-
return v
|
|
254
|
-
return v
|
|
255
|
-
|
|
256
|
-
|
|
257
193
|
class ToolChoice(Enum):
|
|
258
194
|
"""Whether tool use is required or automatic. This is a hint to the model which may not be followed. It depends on the Instruction Following capabilities of the model.
|
|
259
195
|
|
|
@@ -290,22 +226,6 @@ class ChatCompletionResponseEventType(Enum):
|
|
|
290
226
|
progress = "progress"
|
|
291
227
|
|
|
292
228
|
|
|
293
|
-
@json_schema_type
|
|
294
|
-
class ChatCompletionResponseEvent(BaseModel):
|
|
295
|
-
"""An event during chat completion generation.
|
|
296
|
-
|
|
297
|
-
:param event_type: Type of the event
|
|
298
|
-
:param delta: Content generated since last event. This can be one or more tokens, or a tool call.
|
|
299
|
-
:param logprobs: Optional log probabilities for generated tokens
|
|
300
|
-
:param stop_reason: Optional reason why generation stopped, if complete
|
|
301
|
-
"""
|
|
302
|
-
|
|
303
|
-
event_type: ChatCompletionResponseEventType
|
|
304
|
-
delta: ContentDelta
|
|
305
|
-
logprobs: list[TokenLogProbs] | None = None
|
|
306
|
-
stop_reason: StopReason | None = None
|
|
307
|
-
|
|
308
|
-
|
|
309
229
|
class ResponseFormatType(StrEnum):
|
|
310
230
|
"""Types of formats for structured (guided) decoding.
|
|
311
231
|
|
|
@@ -358,34 +278,6 @@ class CompletionRequest(BaseModel):
|
|
|
358
278
|
logprobs: LogProbConfig | None = None
|
|
359
279
|
|
|
360
280
|
|
|
361
|
-
@json_schema_type
|
|
362
|
-
class CompletionResponse(MetricResponseMixin):
|
|
363
|
-
"""Response from a completion request.
|
|
364
|
-
|
|
365
|
-
:param content: The generated completion text
|
|
366
|
-
:param stop_reason: Reason why generation stopped
|
|
367
|
-
:param logprobs: Optional log probabilities for generated tokens
|
|
368
|
-
"""
|
|
369
|
-
|
|
370
|
-
content: str
|
|
371
|
-
stop_reason: StopReason
|
|
372
|
-
logprobs: list[TokenLogProbs] | None = None
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
@json_schema_type
|
|
376
|
-
class CompletionResponseStreamChunk(MetricResponseMixin):
|
|
377
|
-
"""A chunk of a streamed completion response.
|
|
378
|
-
|
|
379
|
-
:param delta: New content generated since last chunk. This can be one or more tokens.
|
|
380
|
-
:param stop_reason: Optional reason why generation stopped, if complete
|
|
381
|
-
:param logprobs: Optional log probabilities for generated tokens
|
|
382
|
-
"""
|
|
383
|
-
|
|
384
|
-
delta: str
|
|
385
|
-
stop_reason: StopReason | None = None
|
|
386
|
-
logprobs: list[TokenLogProbs] | None = None
|
|
387
|
-
|
|
388
|
-
|
|
389
281
|
class SystemMessageBehavior(Enum):
|
|
390
282
|
"""Config for how to override the default system prompt.
|
|
391
283
|
|
|
@@ -399,70 +291,6 @@ class SystemMessageBehavior(Enum):
|
|
|
399
291
|
replace = "replace"
|
|
400
292
|
|
|
401
293
|
|
|
402
|
-
@json_schema_type
|
|
403
|
-
class ToolConfig(BaseModel):
|
|
404
|
-
"""Configuration for tool use.
|
|
405
|
-
|
|
406
|
-
:param tool_choice: (Optional) Whether tool use is automatic, required, or none. Can also specify a tool name to use a specific tool. Defaults to ToolChoice.auto.
|
|
407
|
-
:param tool_prompt_format: (Optional) Instructs the model how to format tool calls. By default, Llama Stack will attempt to use a format that is best adapted to the model.
|
|
408
|
-
- `ToolPromptFormat.json`: The tool calls are formatted as a JSON object.
|
|
409
|
-
- `ToolPromptFormat.function_tag`: The tool calls are enclosed in a <function=function_name> tag.
|
|
410
|
-
- `ToolPromptFormat.python_list`: The tool calls are output as Python syntax -- a list of function calls.
|
|
411
|
-
:param system_message_behavior: (Optional) Config for how to override the default system prompt.
|
|
412
|
-
- `SystemMessageBehavior.append`: Appends the provided system message to the default system prompt.
|
|
413
|
-
- `SystemMessageBehavior.replace`: Replaces the default system prompt with the provided system message. The system message can include the string
|
|
414
|
-
'{{function_definitions}}' to indicate where the function definitions should be inserted.
|
|
415
|
-
"""
|
|
416
|
-
|
|
417
|
-
tool_choice: ToolChoice | str | None = Field(default=ToolChoice.auto)
|
|
418
|
-
tool_prompt_format: ToolPromptFormat | None = Field(default=None)
|
|
419
|
-
system_message_behavior: SystemMessageBehavior | None = Field(default=SystemMessageBehavior.append)
|
|
420
|
-
|
|
421
|
-
def model_post_init(self, __context: Any) -> None:
|
|
422
|
-
if isinstance(self.tool_choice, str):
|
|
423
|
-
try:
|
|
424
|
-
self.tool_choice = ToolChoice[self.tool_choice]
|
|
425
|
-
except KeyError:
|
|
426
|
-
pass
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
# This is an internally used class
|
|
430
|
-
@json_schema_type
|
|
431
|
-
class ChatCompletionRequest(BaseModel):
|
|
432
|
-
model: str
|
|
433
|
-
messages: list[Message]
|
|
434
|
-
sampling_params: SamplingParams | None = Field(default_factory=SamplingParams)
|
|
435
|
-
|
|
436
|
-
tools: list[ToolDefinition] | None = Field(default_factory=lambda: [])
|
|
437
|
-
tool_config: ToolConfig | None = Field(default_factory=ToolConfig)
|
|
438
|
-
|
|
439
|
-
response_format: ResponseFormat | None = None
|
|
440
|
-
stream: bool | None = False
|
|
441
|
-
logprobs: LogProbConfig | None = None
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
@json_schema_type
|
|
445
|
-
class ChatCompletionResponseStreamChunk(MetricResponseMixin):
|
|
446
|
-
"""A chunk of a streamed chat completion response.
|
|
447
|
-
|
|
448
|
-
:param event: The event containing the new content
|
|
449
|
-
"""
|
|
450
|
-
|
|
451
|
-
event: ChatCompletionResponseEvent
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
@json_schema_type
|
|
455
|
-
class ChatCompletionResponse(MetricResponseMixin):
|
|
456
|
-
"""Response from a chat completion request.
|
|
457
|
-
|
|
458
|
-
:param completion_message: The complete response message
|
|
459
|
-
:param logprobs: Optional log probabilities for generated tokens
|
|
460
|
-
"""
|
|
461
|
-
|
|
462
|
-
completion_message: CompletionMessage
|
|
463
|
-
logprobs: list[TokenLogProbs] | None = None
|
|
464
|
-
|
|
465
|
-
|
|
466
294
|
@json_schema_type
|
|
467
295
|
class EmbeddingsResponse(BaseModel):
|
|
468
296
|
"""Response containing generated embeddings.
|
|
@@ -727,6 +555,81 @@ OpenAIResponseFormatParam = Annotated[
|
|
|
727
555
|
register_schema(OpenAIResponseFormatParam, name="OpenAIResponseFormatParam")
|
|
728
556
|
|
|
729
557
|
|
|
558
|
+
@json_schema_type
|
|
559
|
+
class FunctionToolConfig(BaseModel):
|
|
560
|
+
name: str
|
|
561
|
+
|
|
562
|
+
|
|
563
|
+
@json_schema_type
|
|
564
|
+
class OpenAIChatCompletionToolChoiceFunctionTool(BaseModel):
|
|
565
|
+
"""Function tool choice for OpenAI-compatible chat completion requests.
|
|
566
|
+
|
|
567
|
+
:param type: Must be "function" to indicate function tool choice
|
|
568
|
+
:param function: The function tool configuration
|
|
569
|
+
"""
|
|
570
|
+
|
|
571
|
+
type: Literal["function"] = "function"
|
|
572
|
+
function: FunctionToolConfig
|
|
573
|
+
|
|
574
|
+
def __init__(self, name: str):
|
|
575
|
+
super().__init__(type="function", function=FunctionToolConfig(name=name))
|
|
576
|
+
|
|
577
|
+
|
|
578
|
+
@json_schema_type
|
|
579
|
+
class CustomToolConfig(BaseModel):
|
|
580
|
+
"""Custom tool configuration for OpenAI-compatible chat completion requests.
|
|
581
|
+
|
|
582
|
+
:param name: Name of the custom tool
|
|
583
|
+
"""
|
|
584
|
+
|
|
585
|
+
name: str
|
|
586
|
+
|
|
587
|
+
|
|
588
|
+
@json_schema_type
|
|
589
|
+
class OpenAIChatCompletionToolChoiceCustomTool(BaseModel):
|
|
590
|
+
"""Custom tool choice for OpenAI-compatible chat completion requests.
|
|
591
|
+
|
|
592
|
+
:param type: Must be "custom" to indicate custom tool choice
|
|
593
|
+
"""
|
|
594
|
+
|
|
595
|
+
type: Literal["custom"] = "custom"
|
|
596
|
+
custom: CustomToolConfig
|
|
597
|
+
|
|
598
|
+
def __init__(self, name: str):
|
|
599
|
+
super().__init__(type="custom", custom=CustomToolConfig(name=name))
|
|
600
|
+
|
|
601
|
+
|
|
602
|
+
@json_schema_type
|
|
603
|
+
class AllowedToolsConfig(BaseModel):
|
|
604
|
+
tools: list[dict[str, Any]]
|
|
605
|
+
mode: Literal["auto", "required"]
|
|
606
|
+
|
|
607
|
+
|
|
608
|
+
@json_schema_type
|
|
609
|
+
class OpenAIChatCompletionToolChoiceAllowedTools(BaseModel):
|
|
610
|
+
"""Allowed tools response format for OpenAI-compatible chat completion requests.
|
|
611
|
+
|
|
612
|
+
:param type: Must be "allowed_tools" to indicate allowed tools response format
|
|
613
|
+
"""
|
|
614
|
+
|
|
615
|
+
type: Literal["allowed_tools"] = "allowed_tools"
|
|
616
|
+
allowed_tools: AllowedToolsConfig
|
|
617
|
+
|
|
618
|
+
def __init__(self, tools: list[dict[str, Any]], mode: Literal["auto", "required"]):
|
|
619
|
+
super().__init__(type="allowed_tools", allowed_tools=AllowedToolsConfig(tools=tools, mode=mode))
|
|
620
|
+
|
|
621
|
+
|
|
622
|
+
# Define the object-level union with discriminator
|
|
623
|
+
OpenAIChatCompletionToolChoice = Annotated[
|
|
624
|
+
OpenAIChatCompletionToolChoiceAllowedTools
|
|
625
|
+
| OpenAIChatCompletionToolChoiceFunctionTool
|
|
626
|
+
| OpenAIChatCompletionToolChoiceCustomTool,
|
|
627
|
+
Field(discriminator="type"),
|
|
628
|
+
]
|
|
629
|
+
|
|
630
|
+
register_schema(OpenAIChatCompletionToolChoice, name="OpenAIChatCompletionToolChoice")
|
|
631
|
+
|
|
632
|
+
|
|
730
633
|
@json_schema_type
|
|
731
634
|
class OpenAITopLogProb(BaseModel):
|
|
732
635
|
"""The top log probability for a token from an OpenAI-compatible chat completion response.
|
|
@@ -754,7 +657,7 @@ class OpenAITokenLogProb(BaseModel):
|
|
|
754
657
|
token: str
|
|
755
658
|
bytes: list[int] | None = None
|
|
756
659
|
logprob: float
|
|
757
|
-
top_logprobs: list[OpenAITopLogProb]
|
|
660
|
+
top_logprobs: list[OpenAITopLogProb] | None = None
|
|
758
661
|
|
|
759
662
|
|
|
760
663
|
@json_schema_type
|
|
@@ -1160,7 +1063,6 @@ class OpenAIEmbeddingsRequestWithExtraBody(BaseModel, extra="allow"):
|
|
|
1160
1063
|
|
|
1161
1064
|
|
|
1162
1065
|
@runtime_checkable
|
|
1163
|
-
@trace_protocol
|
|
1164
1066
|
class InferenceProvider(Protocol):
|
|
1165
1067
|
"""
|
|
1166
1068
|
This protocol defines the interface that should be implemented by all inference providers.
|
|
@@ -1189,20 +1091,18 @@ class InferenceProvider(Protocol):
|
|
|
1189
1091
|
raise NotImplementedError("Reranking is not implemented")
|
|
1190
1092
|
return # this is so mypy's safe-super rule will consider the method concrete
|
|
1191
1093
|
|
|
1192
|
-
@webmethod(route="/openai/v1/completions", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
|
|
1193
1094
|
@webmethod(route="/completions", method="POST", level=LLAMA_STACK_API_V1)
|
|
1194
1095
|
async def openai_completion(
|
|
1195
1096
|
self,
|
|
1196
1097
|
params: Annotated[OpenAICompletionRequestWithExtraBody, Body(...)],
|
|
1197
|
-
) -> OpenAICompletion:
|
|
1098
|
+
) -> OpenAICompletion | AsyncIterator[OpenAICompletion]:
|
|
1198
1099
|
"""Create completion.
|
|
1199
1100
|
|
|
1200
1101
|
Generate an OpenAI-compatible completion for the given prompt using the specified model.
|
|
1201
|
-
:returns: An OpenAICompletion.
|
|
1102
|
+
:returns: An OpenAICompletion. When streaming, returns Server-Sent Events (SSE) with OpenAICompletion chunks.
|
|
1202
1103
|
"""
|
|
1203
1104
|
...
|
|
1204
1105
|
|
|
1205
|
-
@webmethod(route="/openai/v1/chat/completions", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
|
|
1206
1106
|
@webmethod(route="/chat/completions", method="POST", level=LLAMA_STACK_API_V1)
|
|
1207
1107
|
async def openai_chat_completion(
|
|
1208
1108
|
self,
|
|
@@ -1211,11 +1111,10 @@ class InferenceProvider(Protocol):
|
|
|
1211
1111
|
"""Create chat completions.
|
|
1212
1112
|
|
|
1213
1113
|
Generate an OpenAI-compatible chat completion for the given messages using the specified model.
|
|
1214
|
-
:returns: An OpenAIChatCompletion.
|
|
1114
|
+
:returns: An OpenAIChatCompletion. When streaming, returns Server-Sent Events (SSE) with OpenAIChatCompletionChunk objects.
|
|
1215
1115
|
"""
|
|
1216
1116
|
...
|
|
1217
1117
|
|
|
1218
|
-
@webmethod(route="/openai/v1/embeddings", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
|
|
1219
1118
|
@webmethod(route="/embeddings", method="POST", level=LLAMA_STACK_API_V1)
|
|
1220
1119
|
async def openai_embeddings(
|
|
1221
1120
|
self,
|
|
@@ -1234,12 +1133,12 @@ class Inference(InferenceProvider):
|
|
|
1234
1133
|
|
|
1235
1134
|
Llama Stack Inference API for generating completions, chat completions, and embeddings.
|
|
1236
1135
|
|
|
1237
|
-
This API provides the raw interface to the underlying models.
|
|
1136
|
+
This API provides the raw interface to the underlying models. Three kinds of models are supported:
|
|
1238
1137
|
- LLM models: these models generate "raw" and "chat" (conversational) completions.
|
|
1239
1138
|
- Embedding models: these models generate embeddings to be used for semantic search.
|
|
1139
|
+
- Rerank models: these models reorder the documents based on their relevance to a query.
|
|
1240
1140
|
"""
|
|
1241
1141
|
|
|
1242
|
-
@webmethod(route="/openai/v1/chat/completions", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
|
|
1243
1142
|
@webmethod(route="/chat/completions", method="GET", level=LLAMA_STACK_API_V1)
|
|
1244
1143
|
async def list_chat_completions(
|
|
1245
1144
|
self,
|
|
@@ -1258,9 +1157,6 @@ class Inference(InferenceProvider):
|
|
|
1258
1157
|
"""
|
|
1259
1158
|
raise NotImplementedError("List chat completions is not implemented")
|
|
1260
1159
|
|
|
1261
|
-
@webmethod(
|
|
1262
|
-
route="/openai/v1/chat/completions/{completion_id}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True
|
|
1263
|
-
)
|
|
1264
1160
|
@webmethod(route="/chat/completions/{completion_id}", method="GET", level=LLAMA_STACK_API_V1)
|
|
1265
1161
|
async def get_chat_completion(self, completion_id: str) -> OpenAICompletionWithInputMessages:
|
|
1266
1162
|
"""Get chat completion.
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
"""Inspect API protocol and models.
|
|
8
|
+
|
|
9
|
+
This module contains the Inspect protocol definition.
|
|
10
|
+
Pydantic models are defined in llama_stack_api.inspect.models.
|
|
11
|
+
The FastAPI router is defined in llama_stack_api.inspect.fastapi_routes.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
# Import fastapi_routes for router factory access
|
|
15
|
+
from . import fastapi_routes
|
|
16
|
+
|
|
17
|
+
# Import protocol for re-export
|
|
18
|
+
from .api import Inspect
|
|
19
|
+
|
|
20
|
+
# Import models for re-export
|
|
21
|
+
from .models import (
|
|
22
|
+
ApiFilter,
|
|
23
|
+
HealthInfo,
|
|
24
|
+
ListRoutesResponse,
|
|
25
|
+
RouteInfo,
|
|
26
|
+
VersionInfo,
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
__all__ = [
|
|
30
|
+
"Inspect",
|
|
31
|
+
"ApiFilter",
|
|
32
|
+
"HealthInfo",
|
|
33
|
+
"ListRoutesResponse",
|
|
34
|
+
"RouteInfo",
|
|
35
|
+
"VersionInfo",
|
|
36
|
+
"fastapi_routes",
|
|
37
|
+
]
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
from typing import Protocol, runtime_checkable
|
|
8
|
+
|
|
9
|
+
from .models import (
|
|
10
|
+
ApiFilter,
|
|
11
|
+
HealthInfo,
|
|
12
|
+
ListRoutesResponse,
|
|
13
|
+
VersionInfo,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@runtime_checkable
|
|
18
|
+
class Inspect(Protocol):
|
|
19
|
+
"""APIs for inspecting the Llama Stack service, including health status, available API routes with methods and implementing providers."""
|
|
20
|
+
|
|
21
|
+
async def list_routes(self, api_filter: ApiFilter | None = None) -> ListRoutesResponse: ...
|
|
22
|
+
|
|
23
|
+
async def health(self) -> HealthInfo: ...
|
|
24
|
+
|
|
25
|
+
async def version(self) -> VersionInfo: ...
|