llama-stack 0.3.4__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack/__init__.py +0 -5
- llama_stack/cli/llama.py +3 -3
- llama_stack/cli/stack/_list_deps.py +12 -23
- llama_stack/cli/stack/list_stacks.py +37 -18
- llama_stack/cli/stack/run.py +121 -11
- llama_stack/cli/stack/utils.py +0 -127
- llama_stack/core/access_control/access_control.py +69 -28
- llama_stack/core/access_control/conditions.py +15 -5
- llama_stack/core/admin.py +267 -0
- llama_stack/core/build.py +6 -74
- llama_stack/core/client.py +1 -1
- llama_stack/core/configure.py +6 -6
- llama_stack/core/conversations/conversations.py +28 -25
- llama_stack/core/datatypes.py +271 -79
- llama_stack/core/distribution.py +15 -16
- llama_stack/core/external.py +3 -3
- llama_stack/core/inspect.py +98 -15
- llama_stack/core/library_client.py +73 -61
- llama_stack/core/prompts/prompts.py +12 -11
- llama_stack/core/providers.py +17 -11
- llama_stack/core/resolver.py +65 -56
- llama_stack/core/routers/__init__.py +8 -12
- llama_stack/core/routers/datasets.py +1 -4
- llama_stack/core/routers/eval_scoring.py +7 -4
- llama_stack/core/routers/inference.py +55 -271
- llama_stack/core/routers/safety.py +52 -24
- llama_stack/core/routers/tool_runtime.py +6 -48
- llama_stack/core/routers/vector_io.py +130 -51
- llama_stack/core/routing_tables/benchmarks.py +24 -20
- llama_stack/core/routing_tables/common.py +1 -4
- llama_stack/core/routing_tables/datasets.py +22 -22
- llama_stack/core/routing_tables/models.py +119 -6
- llama_stack/core/routing_tables/scoring_functions.py +7 -7
- llama_stack/core/routing_tables/shields.py +1 -2
- llama_stack/core/routing_tables/toolgroups.py +17 -7
- llama_stack/core/routing_tables/vector_stores.py +51 -16
- llama_stack/core/server/auth.py +5 -3
- llama_stack/core/server/auth_providers.py +36 -20
- llama_stack/core/server/fastapi_router_registry.py +84 -0
- llama_stack/core/server/quota.py +2 -2
- llama_stack/core/server/routes.py +79 -27
- llama_stack/core/server/server.py +102 -87
- llama_stack/core/stack.py +201 -58
- llama_stack/core/storage/datatypes.py +26 -3
- llama_stack/{providers/utils → core/storage}/kvstore/__init__.py +2 -0
- llama_stack/{providers/utils → core/storage}/kvstore/kvstore.py +55 -24
- llama_stack/{providers/utils → core/storage}/kvstore/mongodb/mongodb.py +13 -10
- llama_stack/{providers/utils → core/storage}/kvstore/postgres/postgres.py +28 -17
- llama_stack/{providers/utils → core/storage}/kvstore/redis/redis.py +41 -16
- llama_stack/{providers/utils → core/storage}/kvstore/sqlite/sqlite.py +1 -1
- llama_stack/core/storage/sqlstore/__init__.py +17 -0
- llama_stack/{providers/utils → core/storage}/sqlstore/authorized_sqlstore.py +69 -49
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlalchemy_sqlstore.py +47 -17
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py +25 -8
- llama_stack/core/store/registry.py +1 -1
- llama_stack/core/utils/config.py +8 -2
- llama_stack/core/utils/config_resolution.py +32 -29
- llama_stack/core/utils/context.py +4 -10
- llama_stack/core/utils/exec.py +9 -0
- llama_stack/core/utils/type_inspection.py +45 -0
- llama_stack/distributions/dell/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/dell/dell.py +2 -2
- llama_stack/distributions/dell/run-with-safety.yaml +3 -2
- llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/meta-reference-gpu/meta_reference.py +2 -2
- llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +3 -2
- llama_stack/distributions/nvidia/{run.yaml → config.yaml} +4 -4
- llama_stack/distributions/nvidia/nvidia.py +1 -1
- llama_stack/distributions/nvidia/run-with-safety.yaml +4 -4
- llama_stack/{apis/datasetio → distributions/oci}/__init__.py +1 -1
- llama_stack/distributions/oci/config.yaml +134 -0
- llama_stack/distributions/oci/oci.py +108 -0
- llama_stack/distributions/open-benchmark/{run.yaml → config.yaml} +5 -4
- llama_stack/distributions/open-benchmark/open_benchmark.py +2 -3
- llama_stack/distributions/postgres-demo/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/starter/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/starter/starter.py +8 -5
- llama_stack/distributions/starter-gpu/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/template.py +13 -69
- llama_stack/distributions/watsonx/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/watsonx/watsonx.py +1 -1
- llama_stack/log.py +28 -11
- llama_stack/models/llama/checkpoint.py +6 -6
- llama_stack/models/llama/hadamard_utils.py +2 -0
- llama_stack/models/llama/llama3/generation.py +3 -1
- llama_stack/models/llama/llama3/interface.py +2 -5
- llama_stack/models/llama/llama3/multimodal/encoder_utils.py +3 -3
- llama_stack/models/llama/llama3/multimodal/image_transform.py +6 -6
- llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +1 -1
- llama_stack/models/llama/llama3/tool_utils.py +2 -1
- llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +1 -1
- llama_stack/providers/inline/agents/meta_reference/__init__.py +3 -3
- llama_stack/providers/inline/agents/meta_reference/agents.py +44 -261
- llama_stack/providers/inline/agents/meta_reference/config.py +6 -1
- llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +207 -57
- llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +308 -47
- llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +162 -96
- llama_stack/providers/inline/agents/meta_reference/responses/types.py +23 -8
- llama_stack/providers/inline/agents/meta_reference/responses/utils.py +201 -33
- llama_stack/providers/inline/agents/meta_reference/safety.py +8 -13
- llama_stack/providers/inline/batches/reference/__init__.py +2 -4
- llama_stack/providers/inline/batches/reference/batches.py +78 -60
- llama_stack/providers/inline/datasetio/localfs/datasetio.py +2 -5
- llama_stack/providers/inline/eval/meta_reference/eval.py +16 -61
- llama_stack/providers/inline/files/localfs/files.py +37 -28
- llama_stack/providers/inline/inference/meta_reference/config.py +2 -2
- llama_stack/providers/inline/inference/meta_reference/generators.py +50 -60
- llama_stack/providers/inline/inference/meta_reference/inference.py +403 -19
- llama_stack/providers/inline/inference/meta_reference/model_parallel.py +7 -26
- llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +2 -12
- llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +10 -15
- llama_stack/providers/inline/post_training/common/validator.py +1 -5
- llama_stack/providers/inline/post_training/huggingface/post_training.py +8 -8
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +18 -10
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +12 -9
- llama_stack/providers/inline/post_training/huggingface/utils.py +27 -6
- llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/common/utils.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/post_training.py +8 -8
- llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +16 -16
- llama_stack/providers/inline/safety/code_scanner/code_scanner.py +13 -9
- llama_stack/providers/inline/safety/llama_guard/llama_guard.py +18 -15
- llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +9 -9
- llama_stack/providers/inline/scoring/basic/scoring.py +6 -13
- llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/braintrust/braintrust.py +12 -15
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +7 -14
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +1 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +1 -3
- llama_stack/providers/inline/tool_runtime/rag/__init__.py +1 -1
- llama_stack/providers/inline/tool_runtime/rag/config.py +8 -1
- llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +7 -6
- llama_stack/providers/inline/tool_runtime/rag/memory.py +64 -48
- llama_stack/providers/inline/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/chroma/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/faiss.py +43 -28
- llama_stack/providers/inline/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/milvus/config.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +40 -33
- llama_stack/providers/registry/agents.py +7 -3
- llama_stack/providers/registry/batches.py +1 -1
- llama_stack/providers/registry/datasetio.py +1 -1
- llama_stack/providers/registry/eval.py +1 -1
- llama_stack/{apis/datasets/__init__.py → providers/registry/file_processors.py} +5 -1
- llama_stack/providers/registry/files.py +11 -2
- llama_stack/providers/registry/inference.py +22 -3
- llama_stack/providers/registry/post_training.py +1 -1
- llama_stack/providers/registry/safety.py +1 -1
- llama_stack/providers/registry/scoring.py +1 -1
- llama_stack/providers/registry/tool_runtime.py +2 -2
- llama_stack/providers/registry/vector_io.py +7 -7
- llama_stack/providers/remote/datasetio/huggingface/huggingface.py +2 -5
- llama_stack/providers/remote/datasetio/nvidia/datasetio.py +1 -4
- llama_stack/providers/remote/eval/nvidia/eval.py +15 -9
- llama_stack/providers/remote/files/openai/__init__.py +19 -0
- llama_stack/providers/remote/files/openai/config.py +28 -0
- llama_stack/providers/remote/files/openai/files.py +253 -0
- llama_stack/providers/remote/files/s3/files.py +52 -30
- llama_stack/providers/remote/inference/anthropic/anthropic.py +2 -1
- llama_stack/providers/remote/inference/anthropic/config.py +1 -1
- llama_stack/providers/remote/inference/azure/azure.py +1 -3
- llama_stack/providers/remote/inference/azure/config.py +8 -7
- llama_stack/providers/remote/inference/bedrock/__init__.py +1 -1
- llama_stack/providers/remote/inference/bedrock/bedrock.py +82 -105
- llama_stack/providers/remote/inference/bedrock/config.py +24 -3
- llama_stack/providers/remote/inference/cerebras/cerebras.py +5 -5
- llama_stack/providers/remote/inference/cerebras/config.py +12 -5
- llama_stack/providers/remote/inference/databricks/config.py +13 -6
- llama_stack/providers/remote/inference/databricks/databricks.py +16 -6
- llama_stack/providers/remote/inference/fireworks/config.py +5 -5
- llama_stack/providers/remote/inference/fireworks/fireworks.py +1 -1
- llama_stack/providers/remote/inference/gemini/config.py +1 -1
- llama_stack/providers/remote/inference/gemini/gemini.py +13 -14
- llama_stack/providers/remote/inference/groq/config.py +5 -5
- llama_stack/providers/remote/inference/groq/groq.py +1 -1
- llama_stack/providers/remote/inference/llama_openai_compat/config.py +5 -5
- llama_stack/providers/remote/inference/llama_openai_compat/llama.py +8 -6
- llama_stack/providers/remote/inference/nvidia/__init__.py +1 -1
- llama_stack/providers/remote/inference/nvidia/config.py +21 -11
- llama_stack/providers/remote/inference/nvidia/nvidia.py +115 -3
- llama_stack/providers/remote/inference/nvidia/utils.py +1 -1
- llama_stack/providers/remote/inference/oci/__init__.py +17 -0
- llama_stack/providers/remote/inference/oci/auth.py +79 -0
- llama_stack/providers/remote/inference/oci/config.py +75 -0
- llama_stack/providers/remote/inference/oci/oci.py +162 -0
- llama_stack/providers/remote/inference/ollama/config.py +7 -5
- llama_stack/providers/remote/inference/ollama/ollama.py +17 -8
- llama_stack/providers/remote/inference/openai/config.py +4 -4
- llama_stack/providers/remote/inference/openai/openai.py +1 -1
- llama_stack/providers/remote/inference/passthrough/__init__.py +2 -2
- llama_stack/providers/remote/inference/passthrough/config.py +5 -10
- llama_stack/providers/remote/inference/passthrough/passthrough.py +97 -75
- llama_stack/providers/remote/inference/runpod/config.py +12 -5
- llama_stack/providers/remote/inference/runpod/runpod.py +2 -20
- llama_stack/providers/remote/inference/sambanova/config.py +5 -5
- llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -1
- llama_stack/providers/remote/inference/tgi/config.py +7 -6
- llama_stack/providers/remote/inference/tgi/tgi.py +19 -11
- llama_stack/providers/remote/inference/together/config.py +5 -5
- llama_stack/providers/remote/inference/together/together.py +15 -12
- llama_stack/providers/remote/inference/vertexai/config.py +1 -1
- llama_stack/providers/remote/inference/vllm/config.py +5 -5
- llama_stack/providers/remote/inference/vllm/vllm.py +13 -14
- llama_stack/providers/remote/inference/watsonx/config.py +4 -4
- llama_stack/providers/remote/inference/watsonx/watsonx.py +21 -94
- llama_stack/providers/remote/post_training/nvidia/post_training.py +4 -4
- llama_stack/providers/remote/post_training/nvidia/utils.py +1 -1
- llama_stack/providers/remote/safety/bedrock/bedrock.py +6 -6
- llama_stack/providers/remote/safety/bedrock/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/nvidia.py +11 -5
- llama_stack/providers/remote/safety/sambanova/config.py +1 -1
- llama_stack/providers/remote/safety/sambanova/sambanova.py +6 -6
- llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +12 -7
- llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +8 -2
- llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +57 -15
- llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +11 -6
- llama_stack/providers/remote/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/chroma/chroma.py +125 -20
- llama_stack/providers/remote/vector_io/chroma/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/milvus.py +27 -21
- llama_stack/providers/remote/vector_io/pgvector/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/config.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/pgvector.py +26 -18
- llama_stack/providers/remote/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/qdrant.py +141 -24
- llama_stack/providers/remote/vector_io/weaviate/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/config.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/weaviate.py +26 -21
- llama_stack/providers/utils/common/data_schema_validator.py +1 -5
- llama_stack/providers/utils/files/form_data.py +1 -1
- llama_stack/providers/utils/inference/embedding_mixin.py +1 -1
- llama_stack/providers/utils/inference/inference_store.py +12 -21
- llama_stack/providers/utils/inference/litellm_openai_mixin.py +79 -79
- llama_stack/providers/utils/inference/model_registry.py +1 -3
- llama_stack/providers/utils/inference/openai_compat.py +44 -1171
- llama_stack/providers/utils/inference/openai_mixin.py +68 -42
- llama_stack/providers/utils/inference/prompt_adapter.py +50 -265
- llama_stack/providers/utils/inference/stream_utils.py +23 -0
- llama_stack/providers/utils/memory/__init__.py +2 -0
- llama_stack/providers/utils/memory/file_utils.py +1 -1
- llama_stack/providers/utils/memory/openai_vector_store_mixin.py +181 -84
- llama_stack/providers/utils/memory/vector_store.py +39 -38
- llama_stack/providers/utils/pagination.py +1 -1
- llama_stack/providers/utils/responses/responses_store.py +15 -25
- llama_stack/providers/utils/scoring/aggregation_utils.py +1 -2
- llama_stack/providers/utils/scoring/base_scoring_fn.py +1 -2
- llama_stack/providers/utils/tools/mcp.py +93 -11
- llama_stack/telemetry/constants.py +27 -0
- llama_stack/telemetry/helpers.py +43 -0
- llama_stack/testing/api_recorder.py +25 -16
- {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/METADATA +56 -131
- llama_stack-0.4.0.dist-info/RECORD +588 -0
- llama_stack-0.4.0.dist-info/top_level.txt +2 -0
- llama_stack_api/__init__.py +945 -0
- llama_stack_api/admin/__init__.py +45 -0
- llama_stack_api/admin/api.py +72 -0
- llama_stack_api/admin/fastapi_routes.py +117 -0
- llama_stack_api/admin/models.py +113 -0
- llama_stack_api/agents.py +173 -0
- llama_stack_api/batches/__init__.py +40 -0
- llama_stack_api/batches/api.py +53 -0
- llama_stack_api/batches/fastapi_routes.py +113 -0
- llama_stack_api/batches/models.py +78 -0
- llama_stack_api/benchmarks/__init__.py +43 -0
- llama_stack_api/benchmarks/api.py +39 -0
- llama_stack_api/benchmarks/fastapi_routes.py +109 -0
- llama_stack_api/benchmarks/models.py +109 -0
- {llama_stack/apis → llama_stack_api}/common/content_types.py +1 -43
- {llama_stack/apis → llama_stack_api}/common/errors.py +0 -8
- {llama_stack/apis → llama_stack_api}/common/job_types.py +1 -1
- llama_stack_api/common/responses.py +77 -0
- {llama_stack/apis → llama_stack_api}/common/training_types.py +1 -1
- {llama_stack/apis → llama_stack_api}/common/type_system.py +2 -14
- llama_stack_api/connectors.py +146 -0
- {llama_stack/apis/conversations → llama_stack_api}/conversations.py +23 -39
- {llama_stack/apis/datasetio → llama_stack_api}/datasetio.py +4 -8
- llama_stack_api/datasets/__init__.py +61 -0
- llama_stack_api/datasets/api.py +35 -0
- llama_stack_api/datasets/fastapi_routes.py +104 -0
- llama_stack_api/datasets/models.py +152 -0
- {llama_stack/providers → llama_stack_api}/datatypes.py +166 -10
- {llama_stack/apis/eval → llama_stack_api}/eval.py +8 -40
- llama_stack_api/file_processors/__init__.py +27 -0
- llama_stack_api/file_processors/api.py +64 -0
- llama_stack_api/file_processors/fastapi_routes.py +78 -0
- llama_stack_api/file_processors/models.py +42 -0
- llama_stack_api/files/__init__.py +35 -0
- llama_stack_api/files/api.py +51 -0
- llama_stack_api/files/fastapi_routes.py +124 -0
- llama_stack_api/files/models.py +107 -0
- {llama_stack/apis/inference → llama_stack_api}/inference.py +90 -194
- llama_stack_api/inspect_api/__init__.py +37 -0
- llama_stack_api/inspect_api/api.py +25 -0
- llama_stack_api/inspect_api/fastapi_routes.py +76 -0
- llama_stack_api/inspect_api/models.py +28 -0
- {llama_stack/apis/agents → llama_stack_api/internal}/__init__.py +3 -1
- llama_stack/providers/utils/kvstore/api.py → llama_stack_api/internal/kvstore.py +5 -0
- llama_stack_api/internal/sqlstore.py +79 -0
- {llama_stack/apis/models → llama_stack_api}/models.py +11 -9
- {llama_stack/apis/agents → llama_stack_api}/openai_responses.py +184 -27
- {llama_stack/apis/post_training → llama_stack_api}/post_training.py +7 -11
- {llama_stack/apis/prompts → llama_stack_api}/prompts.py +3 -4
- llama_stack_api/providers/__init__.py +33 -0
- llama_stack_api/providers/api.py +16 -0
- llama_stack_api/providers/fastapi_routes.py +57 -0
- llama_stack_api/providers/models.py +24 -0
- {llama_stack/apis/tools → llama_stack_api}/rag_tool.py +2 -52
- {llama_stack/apis → llama_stack_api}/resource.py +1 -1
- llama_stack_api/router_utils.py +160 -0
- {llama_stack/apis/safety → llama_stack_api}/safety.py +6 -9
- {llama_stack → llama_stack_api}/schema_utils.py +94 -4
- {llama_stack/apis/scoring → llama_stack_api}/scoring.py +3 -3
- {llama_stack/apis/scoring_functions → llama_stack_api}/scoring_functions.py +9 -6
- {llama_stack/apis/shields → llama_stack_api}/shields.py +6 -7
- {llama_stack/apis/tools → llama_stack_api}/tools.py +26 -21
- {llama_stack/apis/vector_io → llama_stack_api}/vector_io.py +133 -152
- {llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py +1 -1
- llama_stack/apis/agents/agents.py +0 -894
- llama_stack/apis/batches/__init__.py +0 -9
- llama_stack/apis/batches/batches.py +0 -100
- llama_stack/apis/benchmarks/__init__.py +0 -7
- llama_stack/apis/benchmarks/benchmarks.py +0 -108
- llama_stack/apis/common/responses.py +0 -36
- llama_stack/apis/conversations/__init__.py +0 -31
- llama_stack/apis/datasets/datasets.py +0 -251
- llama_stack/apis/datatypes.py +0 -160
- llama_stack/apis/eval/__init__.py +0 -7
- llama_stack/apis/files/__init__.py +0 -7
- llama_stack/apis/files/files.py +0 -199
- llama_stack/apis/inference/__init__.py +0 -7
- llama_stack/apis/inference/event_logger.py +0 -43
- llama_stack/apis/inspect/__init__.py +0 -7
- llama_stack/apis/inspect/inspect.py +0 -94
- llama_stack/apis/models/__init__.py +0 -7
- llama_stack/apis/post_training/__init__.py +0 -7
- llama_stack/apis/prompts/__init__.py +0 -9
- llama_stack/apis/providers/__init__.py +0 -7
- llama_stack/apis/providers/providers.py +0 -69
- llama_stack/apis/safety/__init__.py +0 -7
- llama_stack/apis/scoring/__init__.py +0 -7
- llama_stack/apis/scoring_functions/__init__.py +0 -7
- llama_stack/apis/shields/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +0 -77
- llama_stack/apis/telemetry/__init__.py +0 -7
- llama_stack/apis/telemetry/telemetry.py +0 -423
- llama_stack/apis/tools/__init__.py +0 -8
- llama_stack/apis/vector_io/__init__.py +0 -7
- llama_stack/apis/vector_stores/__init__.py +0 -7
- llama_stack/core/server/tracing.py +0 -80
- llama_stack/core/ui/app.py +0 -55
- llama_stack/core/ui/modules/__init__.py +0 -5
- llama_stack/core/ui/modules/api.py +0 -32
- llama_stack/core/ui/modules/utils.py +0 -42
- llama_stack/core/ui/page/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/datasets.py +0 -18
- llama_stack/core/ui/page/distribution/eval_tasks.py +0 -20
- llama_stack/core/ui/page/distribution/models.py +0 -18
- llama_stack/core/ui/page/distribution/providers.py +0 -27
- llama_stack/core/ui/page/distribution/resources.py +0 -48
- llama_stack/core/ui/page/distribution/scoring_functions.py +0 -18
- llama_stack/core/ui/page/distribution/shields.py +0 -19
- llama_stack/core/ui/page/evaluations/__init__.py +0 -5
- llama_stack/core/ui/page/evaluations/app_eval.py +0 -143
- llama_stack/core/ui/page/evaluations/native_eval.py +0 -253
- llama_stack/core/ui/page/playground/__init__.py +0 -5
- llama_stack/core/ui/page/playground/chat.py +0 -130
- llama_stack/core/ui/page/playground/tools.py +0 -352
- llama_stack/distributions/dell/build.yaml +0 -33
- llama_stack/distributions/meta-reference-gpu/build.yaml +0 -32
- llama_stack/distributions/nvidia/build.yaml +0 -29
- llama_stack/distributions/open-benchmark/build.yaml +0 -36
- llama_stack/distributions/postgres-demo/__init__.py +0 -7
- llama_stack/distributions/postgres-demo/build.yaml +0 -23
- llama_stack/distributions/postgres-demo/postgres_demo.py +0 -125
- llama_stack/distributions/starter/build.yaml +0 -61
- llama_stack/distributions/starter-gpu/build.yaml +0 -61
- llama_stack/distributions/watsonx/build.yaml +0 -33
- llama_stack/providers/inline/agents/meta_reference/agent_instance.py +0 -1024
- llama_stack/providers/inline/agents/meta_reference/persistence.py +0 -228
- llama_stack/providers/inline/telemetry/__init__.py +0 -5
- llama_stack/providers/inline/telemetry/meta_reference/__init__.py +0 -21
- llama_stack/providers/inline/telemetry/meta_reference/config.py +0 -47
- llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +0 -252
- llama_stack/providers/remote/inference/bedrock/models.py +0 -29
- llama_stack/providers/utils/kvstore/sqlite/config.py +0 -20
- llama_stack/providers/utils/sqlstore/__init__.py +0 -5
- llama_stack/providers/utils/sqlstore/api.py +0 -128
- llama_stack/providers/utils/telemetry/__init__.py +0 -5
- llama_stack/providers/utils/telemetry/trace_protocol.py +0 -142
- llama_stack/providers/utils/telemetry/tracing.py +0 -384
- llama_stack/strong_typing/__init__.py +0 -19
- llama_stack/strong_typing/auxiliary.py +0 -228
- llama_stack/strong_typing/classdef.py +0 -440
- llama_stack/strong_typing/core.py +0 -46
- llama_stack/strong_typing/deserializer.py +0 -877
- llama_stack/strong_typing/docstring.py +0 -409
- llama_stack/strong_typing/exception.py +0 -23
- llama_stack/strong_typing/inspection.py +0 -1085
- llama_stack/strong_typing/mapping.py +0 -40
- llama_stack/strong_typing/name.py +0 -182
- llama_stack/strong_typing/schema.py +0 -792
- llama_stack/strong_typing/serialization.py +0 -97
- llama_stack/strong_typing/serializer.py +0 -500
- llama_stack/strong_typing/slots.py +0 -27
- llama_stack/strong_typing/topological.py +0 -89
- llama_stack/ui/node_modules/flatted/python/flatted.py +0 -149
- llama_stack-0.3.4.dist-info/RECORD +0 -625
- llama_stack-0.3.4.dist-info/top_level.txt +0 -1
- /llama_stack/{providers/utils → core/storage}/kvstore/config.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/mongodb/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/postgres/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/redis/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/sqlite/__init__.py +0 -0
- /llama_stack/{apis → providers/inline/file_processor}/__init__.py +0 -0
- /llama_stack/{apis/common → telemetry}/__init__.py +0 -0
- {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/WHEEL +0 -0
- {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/entry_points.txt +0 -0
- {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/licenses/LICENSE +0 -0
- {llama_stack/core/ui → llama_stack_api/common}/__init__.py +0 -0
- {llama_stack/strong_typing → llama_stack_api}/py.typed +0 -0
- {llama_stack/apis → llama_stack_api}/version.py +0 -0
|
@@ -10,11 +10,11 @@ from typing import Annotated, Any, Literal, Protocol
|
|
|
10
10
|
|
|
11
11
|
from pydantic import BaseModel, Field
|
|
12
12
|
|
|
13
|
-
from
|
|
14
|
-
from
|
|
15
|
-
from
|
|
16
|
-
from
|
|
17
|
-
from
|
|
13
|
+
from llama_stack_api.common.content_types import URL
|
|
14
|
+
from llama_stack_api.common.job_types import JobStatus
|
|
15
|
+
from llama_stack_api.common.training_types import Checkpoint
|
|
16
|
+
from llama_stack_api.schema_utils import json_schema_type, register_schema, webmethod
|
|
17
|
+
from llama_stack_api.version import LLAMA_STACK_API_V1ALPHA
|
|
18
18
|
|
|
19
19
|
|
|
20
20
|
@json_schema_type
|
|
@@ -236,6 +236,7 @@ class PostTrainingRLHFRequest(BaseModel):
|
|
|
236
236
|
logger_config: dict[str, Any]
|
|
237
237
|
|
|
238
238
|
|
|
239
|
+
@json_schema_type
|
|
239
240
|
class PostTrainingJob(BaseModel):
|
|
240
241
|
job_uuid: str
|
|
241
242
|
|
|
@@ -265,6 +266,7 @@ class PostTrainingJobStatusResponse(BaseModel):
|
|
|
265
266
|
checkpoints: list[Checkpoint] = Field(default_factory=list)
|
|
266
267
|
|
|
267
268
|
|
|
269
|
+
@json_schema_type
|
|
268
270
|
class ListPostTrainingJobsResponse(BaseModel):
|
|
269
271
|
data: list[PostTrainingJob]
|
|
270
272
|
|
|
@@ -284,7 +286,6 @@ class PostTrainingJobArtifactsResponse(BaseModel):
|
|
|
284
286
|
|
|
285
287
|
|
|
286
288
|
class PostTraining(Protocol):
|
|
287
|
-
@webmethod(route="/post-training/supervised-fine-tune", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
|
|
288
289
|
@webmethod(route="/post-training/supervised-fine-tune", method="POST", level=LLAMA_STACK_API_V1ALPHA)
|
|
289
290
|
async def supervised_fine_tune(
|
|
290
291
|
self,
|
|
@@ -312,7 +313,6 @@ class PostTraining(Protocol):
|
|
|
312
313
|
"""
|
|
313
314
|
...
|
|
314
315
|
|
|
315
|
-
@webmethod(route="/post-training/preference-optimize", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
|
|
316
316
|
@webmethod(route="/post-training/preference-optimize", method="POST", level=LLAMA_STACK_API_V1ALPHA)
|
|
317
317
|
async def preference_optimize(
|
|
318
318
|
self,
|
|
@@ -335,7 +335,6 @@ class PostTraining(Protocol):
|
|
|
335
335
|
"""
|
|
336
336
|
...
|
|
337
337
|
|
|
338
|
-
@webmethod(route="/post-training/jobs", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
|
|
339
338
|
@webmethod(route="/post-training/jobs", method="GET", level=LLAMA_STACK_API_V1ALPHA)
|
|
340
339
|
async def get_training_jobs(self) -> ListPostTrainingJobsResponse:
|
|
341
340
|
"""Get all training jobs.
|
|
@@ -344,7 +343,6 @@ class PostTraining(Protocol):
|
|
|
344
343
|
"""
|
|
345
344
|
...
|
|
346
345
|
|
|
347
|
-
@webmethod(route="/post-training/job/status", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
|
|
348
346
|
@webmethod(route="/post-training/job/status", method="GET", level=LLAMA_STACK_API_V1ALPHA)
|
|
349
347
|
async def get_training_job_status(self, job_uuid: str) -> PostTrainingJobStatusResponse:
|
|
350
348
|
"""Get the status of a training job.
|
|
@@ -354,7 +352,6 @@ class PostTraining(Protocol):
|
|
|
354
352
|
"""
|
|
355
353
|
...
|
|
356
354
|
|
|
357
|
-
@webmethod(route="/post-training/job/cancel", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
|
|
358
355
|
@webmethod(route="/post-training/job/cancel", method="POST", level=LLAMA_STACK_API_V1ALPHA)
|
|
359
356
|
async def cancel_training_job(self, job_uuid: str) -> None:
|
|
360
357
|
"""Cancel a training job.
|
|
@@ -363,7 +360,6 @@ class PostTraining(Protocol):
|
|
|
363
360
|
"""
|
|
364
361
|
...
|
|
365
362
|
|
|
366
|
-
@webmethod(route="/post-training/job/artifacts", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
|
|
367
363
|
@webmethod(route="/post-training/job/artifacts", method="GET", level=LLAMA_STACK_API_V1ALPHA)
|
|
368
364
|
async def get_training_job_artifacts(self, job_uuid: str) -> PostTrainingJobArtifactsResponse:
|
|
369
365
|
"""Get the artifacts of a training job.
|
|
@@ -10,9 +10,8 @@ from typing import Protocol, runtime_checkable
|
|
|
10
10
|
|
|
11
11
|
from pydantic import BaseModel, Field, field_validator, model_validator
|
|
12
12
|
|
|
13
|
-
from
|
|
14
|
-
from
|
|
15
|
-
from llama_stack.schema_utils import json_schema_type, webmethod
|
|
13
|
+
from llama_stack_api.schema_utils import json_schema_type, webmethod
|
|
14
|
+
from llama_stack_api.version import LLAMA_STACK_API_V1
|
|
16
15
|
|
|
17
16
|
|
|
18
17
|
@json_schema_type
|
|
@@ -85,6 +84,7 @@ class Prompt(BaseModel):
|
|
|
85
84
|
return f"pmpt_{hex_string}"
|
|
86
85
|
|
|
87
86
|
|
|
87
|
+
@json_schema_type
|
|
88
88
|
class ListPromptsResponse(BaseModel):
|
|
89
89
|
"""Response model to list prompts."""
|
|
90
90
|
|
|
@@ -92,7 +92,6 @@ class ListPromptsResponse(BaseModel):
|
|
|
92
92
|
|
|
93
93
|
|
|
94
94
|
@runtime_checkable
|
|
95
|
-
@trace_protocol
|
|
96
95
|
class Prompts(Protocol):
|
|
97
96
|
"""Prompts
|
|
98
97
|
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
"""Providers API protocol and models.
|
|
8
|
+
|
|
9
|
+
This module contains the Providers protocol definition.
|
|
10
|
+
Pydantic models are defined in llama_stack_api.providers.models.
|
|
11
|
+
The FastAPI router is defined in llama_stack_api.providers.fastapi_routes.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
# Import fastapi_routes for router factory access
|
|
15
|
+
from . import fastapi_routes
|
|
16
|
+
|
|
17
|
+
# Import protocol for re-export
|
|
18
|
+
from .api import Providers
|
|
19
|
+
|
|
20
|
+
# Import models for re-export
|
|
21
|
+
from .models import (
|
|
22
|
+
InspectProviderRequest,
|
|
23
|
+
ListProvidersResponse,
|
|
24
|
+
ProviderInfo,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
__all__ = [
|
|
28
|
+
"Providers",
|
|
29
|
+
"ProviderInfo",
|
|
30
|
+
"ListProvidersResponse",
|
|
31
|
+
"InspectProviderRequest",
|
|
32
|
+
"fastapi_routes",
|
|
33
|
+
]
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
from typing import Protocol, runtime_checkable
|
|
8
|
+
|
|
9
|
+
from .models import InspectProviderRequest, ListProvidersResponse, ProviderInfo
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@runtime_checkable
|
|
13
|
+
class Providers(Protocol):
|
|
14
|
+
async def list_providers(self) -> ListProvidersResponse: ...
|
|
15
|
+
|
|
16
|
+
async def inspect_provider(self, request: InspectProviderRequest) -> ProviderInfo: ...
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
"""FastAPI router for the Providers API.
|
|
8
|
+
|
|
9
|
+
This module defines the FastAPI router for the Providers API using standard
|
|
10
|
+
FastAPI route decorators.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from typing import Annotated
|
|
14
|
+
|
|
15
|
+
from fastapi import APIRouter, Depends
|
|
16
|
+
|
|
17
|
+
from llama_stack_api.router_utils import create_path_dependency, standard_responses
|
|
18
|
+
from llama_stack_api.version import LLAMA_STACK_API_V1
|
|
19
|
+
|
|
20
|
+
from .api import Providers
|
|
21
|
+
from .models import InspectProviderRequest, ListProvidersResponse, ProviderInfo
|
|
22
|
+
|
|
23
|
+
# Path parameter dependencies for single-field models
|
|
24
|
+
get_inspect_provider_request = create_path_dependency(InspectProviderRequest)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def create_router(impl: Providers) -> APIRouter:
|
|
28
|
+
"""Create a FastAPI router for the Providers API."""
|
|
29
|
+
router = APIRouter(
|
|
30
|
+
prefix=f"/{LLAMA_STACK_API_V1}",
|
|
31
|
+
tags=["Providers"],
|
|
32
|
+
responses=standard_responses,
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
@router.get(
|
|
36
|
+
"/providers",
|
|
37
|
+
response_model=ListProvidersResponse,
|
|
38
|
+
summary="List providers.",
|
|
39
|
+
description="List all available providers.",
|
|
40
|
+
responses={200: {"description": "A ListProvidersResponse containing information about all providers."}},
|
|
41
|
+
)
|
|
42
|
+
async def list_providers() -> ListProvidersResponse:
|
|
43
|
+
return await impl.list_providers()
|
|
44
|
+
|
|
45
|
+
@router.get(
|
|
46
|
+
"/providers/{provider_id}",
|
|
47
|
+
response_model=ProviderInfo,
|
|
48
|
+
summary="Get provider.",
|
|
49
|
+
description="Get detailed information about a specific provider.",
|
|
50
|
+
responses={200: {"description": "A ProviderInfo object containing the provider's details."}},
|
|
51
|
+
)
|
|
52
|
+
async def inspect_provider(
|
|
53
|
+
request: Annotated[InspectProviderRequest, Depends(get_inspect_provider_request)],
|
|
54
|
+
) -> ProviderInfo:
|
|
55
|
+
return await impl.inspect_provider(request)
|
|
56
|
+
|
|
57
|
+
return router
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
"""Pydantic models for Providers API requests and responses.
|
|
8
|
+
|
|
9
|
+
This module re-exports models from llama_stack_api.admin.models to ensure
|
|
10
|
+
a single source of truth and avoid type conflicts.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
# Import and re-export shared models from admin
|
|
14
|
+
from llama_stack_api.admin.models import (
|
|
15
|
+
InspectProviderRequest,
|
|
16
|
+
ListProvidersResponse,
|
|
17
|
+
ProviderInfo,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
__all__ = [
|
|
21
|
+
"ProviderInfo",
|
|
22
|
+
"ListProvidersResponse",
|
|
23
|
+
"InspectProviderRequest",
|
|
24
|
+
]
|
|
@@ -5,18 +5,13 @@
|
|
|
5
5
|
# the root directory of this source tree.
|
|
6
6
|
|
|
7
7
|
from enum import Enum, StrEnum
|
|
8
|
-
from typing import Annotated, Any, Literal
|
|
8
|
+
from typing import Annotated, Any, Literal
|
|
9
9
|
|
|
10
10
|
from pydantic import BaseModel, Field, field_validator
|
|
11
|
-
from typing_extensions import runtime_checkable
|
|
12
11
|
|
|
13
|
-
from
|
|
14
|
-
from llama_stack.apis.version import LLAMA_STACK_API_V1
|
|
15
|
-
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
|
|
16
|
-
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
|
|
12
|
+
from llama_stack_api.common.content_types import URL, InterleavedContent
|
|
17
13
|
|
|
18
14
|
|
|
19
|
-
@json_schema_type
|
|
20
15
|
class RRFRanker(BaseModel):
|
|
21
16
|
"""
|
|
22
17
|
Reciprocal Rank Fusion (RRF) ranker configuration.
|
|
@@ -30,7 +25,6 @@ class RRFRanker(BaseModel):
|
|
|
30
25
|
impact_factor: float = Field(default=60.0, gt=0.0) # default of 60 for optimal performance
|
|
31
26
|
|
|
32
27
|
|
|
33
|
-
@json_schema_type
|
|
34
28
|
class WeightedRanker(BaseModel):
|
|
35
29
|
"""
|
|
36
30
|
Weighted ranker configuration that combines vector and keyword scores.
|
|
@@ -55,10 +49,8 @@ Ranker = Annotated[
|
|
|
55
49
|
RRFRanker | WeightedRanker,
|
|
56
50
|
Field(discriminator="type"),
|
|
57
51
|
]
|
|
58
|
-
register_schema(Ranker, name="Ranker")
|
|
59
52
|
|
|
60
53
|
|
|
61
|
-
@json_schema_type
|
|
62
54
|
class RAGDocument(BaseModel):
|
|
63
55
|
"""
|
|
64
56
|
A document to be used for document ingestion in the RAG Tool.
|
|
@@ -75,7 +67,6 @@ class RAGDocument(BaseModel):
|
|
|
75
67
|
metadata: dict[str, Any] = Field(default_factory=dict)
|
|
76
68
|
|
|
77
69
|
|
|
78
|
-
@json_schema_type
|
|
79
70
|
class RAGQueryResult(BaseModel):
|
|
80
71
|
"""Result of a RAG query containing retrieved content and metadata.
|
|
81
72
|
|
|
@@ -87,7 +78,6 @@ class RAGQueryResult(BaseModel):
|
|
|
87
78
|
metadata: dict[str, Any] = Field(default_factory=dict)
|
|
88
79
|
|
|
89
80
|
|
|
90
|
-
@json_schema_type
|
|
91
81
|
class RAGQueryGenerator(Enum):
|
|
92
82
|
"""Types of query generators for RAG systems.
|
|
93
83
|
|
|
@@ -101,7 +91,6 @@ class RAGQueryGenerator(Enum):
|
|
|
101
91
|
custom = "custom"
|
|
102
92
|
|
|
103
93
|
|
|
104
|
-
@json_schema_type
|
|
105
94
|
class RAGSearchMode(StrEnum):
|
|
106
95
|
"""
|
|
107
96
|
Search modes for RAG query retrieval:
|
|
@@ -115,7 +104,6 @@ class RAGSearchMode(StrEnum):
|
|
|
115
104
|
HYBRID = "hybrid"
|
|
116
105
|
|
|
117
106
|
|
|
118
|
-
@json_schema_type
|
|
119
107
|
class DefaultRAGQueryGeneratorConfig(BaseModel):
|
|
120
108
|
"""Configuration for the default RAG query generator.
|
|
121
109
|
|
|
@@ -127,7 +115,6 @@ class DefaultRAGQueryGeneratorConfig(BaseModel):
|
|
|
127
115
|
separator: str = " "
|
|
128
116
|
|
|
129
117
|
|
|
130
|
-
@json_schema_type
|
|
131
118
|
class LLMRAGQueryGeneratorConfig(BaseModel):
|
|
132
119
|
"""Configuration for the LLM-based RAG query generator.
|
|
133
120
|
|
|
@@ -145,10 +132,8 @@ RAGQueryGeneratorConfig = Annotated[
|
|
|
145
132
|
DefaultRAGQueryGeneratorConfig | LLMRAGQueryGeneratorConfig,
|
|
146
133
|
Field(discriminator="type"),
|
|
147
134
|
]
|
|
148
|
-
register_schema(RAGQueryGeneratorConfig, name="RAGQueryGeneratorConfig")
|
|
149
135
|
|
|
150
136
|
|
|
151
|
-
@json_schema_type
|
|
152
137
|
class RAGQueryConfig(BaseModel):
|
|
153
138
|
"""
|
|
154
139
|
Configuration for the RAG query generation.
|
|
@@ -181,38 +166,3 @@ class RAGQueryConfig(BaseModel):
|
|
|
181
166
|
if len(v) == 0:
|
|
182
167
|
raise ValueError("chunk_template must not be empty")
|
|
183
168
|
return v
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
@runtime_checkable
|
|
187
|
-
@trace_protocol
|
|
188
|
-
class RAGToolRuntime(Protocol):
|
|
189
|
-
@webmethod(route="/tool-runtime/rag-tool/insert", method="POST", level=LLAMA_STACK_API_V1)
|
|
190
|
-
async def insert(
|
|
191
|
-
self,
|
|
192
|
-
documents: list[RAGDocument],
|
|
193
|
-
vector_db_id: str,
|
|
194
|
-
chunk_size_in_tokens: int = 512,
|
|
195
|
-
) -> None:
|
|
196
|
-
"""Index documents so they can be used by the RAG system.
|
|
197
|
-
|
|
198
|
-
:param documents: List of documents to index in the RAG system
|
|
199
|
-
:param vector_db_id: ID of the vector database to store the document embeddings
|
|
200
|
-
:param chunk_size_in_tokens: (Optional) Size in tokens for document chunking during indexing
|
|
201
|
-
"""
|
|
202
|
-
...
|
|
203
|
-
|
|
204
|
-
@webmethod(route="/tool-runtime/rag-tool/query", method="POST", level=LLAMA_STACK_API_V1)
|
|
205
|
-
async def query(
|
|
206
|
-
self,
|
|
207
|
-
content: InterleavedContent,
|
|
208
|
-
vector_db_ids: list[str],
|
|
209
|
-
query_config: RAGQueryConfig | None = None,
|
|
210
|
-
) -> RAGQueryResult:
|
|
211
|
-
"""Query the RAG system for context; typically invoked by the agent.
|
|
212
|
-
|
|
213
|
-
:param content: The query content to search for in the indexed documents
|
|
214
|
-
:param vector_db_ids: List of vector database IDs to search within
|
|
215
|
-
:param query_config: (Optional) Configuration parameters for the query operation
|
|
216
|
-
:returns: RAGQueryResult containing the retrieved content and metadata
|
|
217
|
-
"""
|
|
218
|
-
...
|
|
@@ -4,7 +4,6 @@
|
|
|
4
4
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
5
|
# the root directory of this source tree.
|
|
6
6
|
|
|
7
|
-
|
|
8
7
|
from enum import StrEnum
|
|
9
8
|
|
|
10
9
|
from pydantic import BaseModel, Field
|
|
@@ -20,6 +19,7 @@ class ResourceType(StrEnum):
|
|
|
20
19
|
tool = "tool"
|
|
21
20
|
tool_group = "tool_group"
|
|
22
21
|
prompt = "prompt"
|
|
22
|
+
connector = "connector"
|
|
23
23
|
|
|
24
24
|
|
|
25
25
|
class Resource(BaseModel):
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
"""Utilities for creating FastAPI routers with standard error responses.
|
|
8
|
+
|
|
9
|
+
This module provides standard error response definitions for FastAPI routers.
|
|
10
|
+
These responses use OpenAPI $ref references to component responses defined
|
|
11
|
+
in the OpenAPI specification.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import inspect
|
|
15
|
+
from collections.abc import Callable
|
|
16
|
+
from typing import Annotated, Any, TypeVar
|
|
17
|
+
|
|
18
|
+
from fastapi import Path, Query
|
|
19
|
+
from pydantic import BaseModel
|
|
20
|
+
|
|
21
|
+
# OpenAPI extension key to mark routes that don't require authentication.
|
|
22
|
+
# Use this in FastAPI route decorators: @router.get("/health", openapi_extra={PUBLIC_ROUTE_KEY: True})
|
|
23
|
+
PUBLIC_ROUTE_KEY = "x-public"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
standard_responses: dict[int | str, dict[str, Any]] = {
|
|
27
|
+
400: {"$ref": "#/components/responses/BadRequest400"},
|
|
28
|
+
429: {"$ref": "#/components/responses/TooManyRequests429"},
|
|
29
|
+
500: {"$ref": "#/components/responses/InternalServerError500"},
|
|
30
|
+
"default": {"$ref": "#/components/responses/DefaultError"},
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
T = TypeVar("T", bound=BaseModel)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def create_query_dependency[T: BaseModel](model_class: type[T]) -> Callable[..., T]:
|
|
37
|
+
"""Create a FastAPI dependency function from a Pydantic model for query parameters.
|
|
38
|
+
|
|
39
|
+
FastAPI does not natively support using Pydantic models as query parameters
|
|
40
|
+
without a dependency function. Using a dependency function typically leads to
|
|
41
|
+
duplication: field types, default values, and descriptions must be repeated in
|
|
42
|
+
`Query(...)` annotations even though they already exist in the Pydantic model.
|
|
43
|
+
|
|
44
|
+
This function automatically generates a dependency function that extracts query parameters
|
|
45
|
+
from the request and constructs an instance of the Pydantic model. The descriptions and
|
|
46
|
+
defaults are automatically extracted from the model's Field definitions, making the model
|
|
47
|
+
the single source of truth.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
model_class: The Pydantic model class to create a dependency for
|
|
51
|
+
|
|
52
|
+
Returns:
|
|
53
|
+
A dependency function that can be used with FastAPI's Depends()
|
|
54
|
+
```
|
|
55
|
+
"""
|
|
56
|
+
# Build function signature dynamically from model fields
|
|
57
|
+
annotations: dict[str, Any] = {}
|
|
58
|
+
defaults: dict[str, Any] = {}
|
|
59
|
+
|
|
60
|
+
for field_name, field_info in model_class.model_fields.items():
|
|
61
|
+
# Extract description from Field
|
|
62
|
+
description = field_info.description
|
|
63
|
+
|
|
64
|
+
# Create Query annotation with description from model
|
|
65
|
+
query_annotation = Query(description=description) if description else Query()
|
|
66
|
+
|
|
67
|
+
# Create Annotated type with Query
|
|
68
|
+
field_type = field_info.annotation
|
|
69
|
+
annotations[field_name] = Annotated[field_type, query_annotation]
|
|
70
|
+
|
|
71
|
+
# Set default value from model
|
|
72
|
+
if field_info.default is not inspect.Parameter.empty:
|
|
73
|
+
defaults[field_name] = field_info.default
|
|
74
|
+
|
|
75
|
+
# Create the dependency function dynamically
|
|
76
|
+
def dependency_func(**kwargs: Any) -> T:
|
|
77
|
+
return model_class(**kwargs)
|
|
78
|
+
|
|
79
|
+
# Set function signature
|
|
80
|
+
sig_params = []
|
|
81
|
+
for field_name, field_type in annotations.items():
|
|
82
|
+
default = defaults.get(field_name, inspect.Parameter.empty)
|
|
83
|
+
param = inspect.Parameter(
|
|
84
|
+
field_name,
|
|
85
|
+
inspect.Parameter.POSITIONAL_OR_KEYWORD,
|
|
86
|
+
default=default,
|
|
87
|
+
annotation=field_type,
|
|
88
|
+
)
|
|
89
|
+
sig_params.append(param)
|
|
90
|
+
|
|
91
|
+
# These attributes are set dynamically at runtime. While mypy can't verify them statically,
|
|
92
|
+
# they are standard Python function attributes that exist on all callable objects at runtime.
|
|
93
|
+
# Setting them allows FastAPI to properly introspect the function signature for dependency injection.
|
|
94
|
+
dependency_func.__signature__ = inspect.Signature(sig_params) # type: ignore[attr-defined]
|
|
95
|
+
dependency_func.__annotations__ = annotations # type: ignore[attr-defined]
|
|
96
|
+
dependency_func.__name__ = f"get_{model_class.__name__.lower()}_request" # type: ignore[attr-defined]
|
|
97
|
+
|
|
98
|
+
return dependency_func
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def create_path_dependency[T: BaseModel](model_class: type[T]) -> Callable[..., T]:
|
|
102
|
+
"""Create a FastAPI dependency function from a Pydantic model for path parameters.
|
|
103
|
+
|
|
104
|
+
FastAPI requires path parameters to be explicitly annotated with `Path()`. When using
|
|
105
|
+
a Pydantic model that contains path parameters, you typically need a dependency function
|
|
106
|
+
that extracts the path parameter and constructs the model. This leads to duplication:
|
|
107
|
+
the parameter name, type, and description must be repeated in `Path(...)` annotations
|
|
108
|
+
even though they already exist in the Pydantic model.
|
|
109
|
+
|
|
110
|
+
This function automatically generates a dependency function that extracts path parameters
|
|
111
|
+
from the request and constructs an instance of the Pydantic model. The descriptions are
|
|
112
|
+
automatically extracted from the model's Field definitions, making the model the single
|
|
113
|
+
source of truth.
|
|
114
|
+
|
|
115
|
+
Args:
|
|
116
|
+
model_class: The Pydantic model class to create a dependency for. The model should
|
|
117
|
+
have exactly one field that represents the path parameter.
|
|
118
|
+
|
|
119
|
+
Returns:
|
|
120
|
+
A dependency function that can be used with FastAPI's Depends()
|
|
121
|
+
```
|
|
122
|
+
"""
|
|
123
|
+
# Get the single field from the model (path parameter models typically have one field)
|
|
124
|
+
if len(model_class.model_fields) != 1:
|
|
125
|
+
raise ValueError(
|
|
126
|
+
f"Path parameter model {model_class.__name__} must have exactly one field, "
|
|
127
|
+
f"but has {len(model_class.model_fields)} fields"
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
field_name, field_info = next(iter(model_class.model_fields.items()))
|
|
131
|
+
|
|
132
|
+
# Extract description from Field
|
|
133
|
+
description = field_info.description
|
|
134
|
+
|
|
135
|
+
# Create Path annotation with description from model
|
|
136
|
+
path_annotation = Path(description=description) if description else Path()
|
|
137
|
+
|
|
138
|
+
# Create Annotated type with Path
|
|
139
|
+
field_type = field_info.annotation
|
|
140
|
+
annotations: dict[str, Any] = {field_name: Annotated[field_type, path_annotation]}
|
|
141
|
+
|
|
142
|
+
# Create the dependency function dynamically
|
|
143
|
+
def dependency_func(**kwargs: Any) -> T:
|
|
144
|
+
return model_class(**kwargs)
|
|
145
|
+
|
|
146
|
+
# Set function signature
|
|
147
|
+
param = inspect.Parameter(
|
|
148
|
+
field_name,
|
|
149
|
+
inspect.Parameter.POSITIONAL_OR_KEYWORD,
|
|
150
|
+
annotation=annotations[field_name],
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
# These attributes are set dynamically at runtime. While mypy can't verify them statically,
|
|
154
|
+
# they are standard Python function attributes that exist on all callable objects at runtime.
|
|
155
|
+
# Setting them allows FastAPI to properly introspect the function signature for dependency injection.
|
|
156
|
+
dependency_func.__signature__ = inspect.Signature([param]) # type: ignore[attr-defined]
|
|
157
|
+
dependency_func.__annotations__ = annotations # type: ignore[attr-defined]
|
|
158
|
+
dependency_func.__name__ = f"get_{model_class.__name__.lower()}_request" # type: ignore[attr-defined]
|
|
159
|
+
|
|
160
|
+
return dependency_func
|
|
@@ -9,11 +9,10 @@ from typing import Any, Protocol, runtime_checkable
|
|
|
9
9
|
|
|
10
10
|
from pydantic import BaseModel, Field
|
|
11
11
|
|
|
12
|
-
from
|
|
13
|
-
from
|
|
14
|
-
from
|
|
15
|
-
from
|
|
16
|
-
from llama_stack.schema_utils import json_schema_type, webmethod
|
|
12
|
+
from llama_stack_api.inference import OpenAIMessageParam
|
|
13
|
+
from llama_stack_api.schema_utils import json_schema_type, webmethod
|
|
14
|
+
from llama_stack_api.shields import Shield
|
|
15
|
+
from llama_stack_api.version import LLAMA_STACK_API_V1
|
|
17
16
|
|
|
18
17
|
|
|
19
18
|
@json_schema_type
|
|
@@ -94,7 +93,6 @@ class ShieldStore(Protocol):
|
|
|
94
93
|
|
|
95
94
|
|
|
96
95
|
@runtime_checkable
|
|
97
|
-
@trace_protocol
|
|
98
96
|
class Safety(Protocol):
|
|
99
97
|
"""Safety
|
|
100
98
|
|
|
@@ -121,15 +119,14 @@ class Safety(Protocol):
|
|
|
121
119
|
"""
|
|
122
120
|
...
|
|
123
121
|
|
|
124
|
-
@webmethod(route="/openai/v1/moderations", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
|
|
125
122
|
@webmethod(route="/moderations", method="POST", level=LLAMA_STACK_API_V1)
|
|
126
|
-
async def run_moderation(self, input: str | list[str], model: str) -> ModerationObject:
|
|
123
|
+
async def run_moderation(self, input: str | list[str], model: str | None = None) -> ModerationObject:
|
|
127
124
|
"""Create moderation.
|
|
128
125
|
|
|
129
126
|
Classifies if text and/or image inputs are potentially harmful.
|
|
130
127
|
:param input: Input (or inputs) to classify.
|
|
131
128
|
Can be a single string, an array of strings, or an array of multi-modal input objects similar to other models.
|
|
132
|
-
:param model: The content moderation model you would like to use.
|
|
129
|
+
:param model: (Optional) The content moderation model you would like to use.
|
|
133
130
|
:returns: A moderation object.
|
|
134
131
|
"""
|
|
135
132
|
...
|