llama-stack 0.3.5__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack/__init__.py +0 -5
- llama_stack/cli/llama.py +3 -3
- llama_stack/cli/stack/_list_deps.py +12 -23
- llama_stack/cli/stack/list_stacks.py +37 -18
- llama_stack/cli/stack/run.py +121 -11
- llama_stack/cli/stack/utils.py +0 -127
- llama_stack/core/access_control/access_control.py +69 -28
- llama_stack/core/access_control/conditions.py +15 -5
- llama_stack/core/admin.py +267 -0
- llama_stack/core/build.py +6 -74
- llama_stack/core/client.py +1 -1
- llama_stack/core/configure.py +6 -6
- llama_stack/core/conversations/conversations.py +28 -25
- llama_stack/core/datatypes.py +271 -79
- llama_stack/core/distribution.py +15 -16
- llama_stack/core/external.py +3 -3
- llama_stack/core/inspect.py +98 -15
- llama_stack/core/library_client.py +73 -61
- llama_stack/core/prompts/prompts.py +12 -11
- llama_stack/core/providers.py +17 -11
- llama_stack/core/resolver.py +65 -56
- llama_stack/core/routers/__init__.py +8 -12
- llama_stack/core/routers/datasets.py +1 -4
- llama_stack/core/routers/eval_scoring.py +7 -4
- llama_stack/core/routers/inference.py +55 -271
- llama_stack/core/routers/safety.py +52 -24
- llama_stack/core/routers/tool_runtime.py +6 -48
- llama_stack/core/routers/vector_io.py +130 -51
- llama_stack/core/routing_tables/benchmarks.py +24 -20
- llama_stack/core/routing_tables/common.py +1 -4
- llama_stack/core/routing_tables/datasets.py +22 -22
- llama_stack/core/routing_tables/models.py +119 -6
- llama_stack/core/routing_tables/scoring_functions.py +7 -7
- llama_stack/core/routing_tables/shields.py +1 -2
- llama_stack/core/routing_tables/toolgroups.py +17 -7
- llama_stack/core/routing_tables/vector_stores.py +51 -16
- llama_stack/core/server/auth.py +5 -3
- llama_stack/core/server/auth_providers.py +36 -20
- llama_stack/core/server/fastapi_router_registry.py +84 -0
- llama_stack/core/server/quota.py +2 -2
- llama_stack/core/server/routes.py +79 -27
- llama_stack/core/server/server.py +102 -87
- llama_stack/core/stack.py +201 -58
- llama_stack/core/storage/datatypes.py +26 -3
- llama_stack/{providers/utils → core/storage}/kvstore/__init__.py +2 -0
- llama_stack/{providers/utils → core/storage}/kvstore/kvstore.py +55 -24
- llama_stack/{providers/utils → core/storage}/kvstore/mongodb/mongodb.py +13 -10
- llama_stack/{providers/utils → core/storage}/kvstore/postgres/postgres.py +28 -17
- llama_stack/{providers/utils → core/storage}/kvstore/redis/redis.py +41 -16
- llama_stack/{providers/utils → core/storage}/kvstore/sqlite/sqlite.py +1 -1
- llama_stack/core/storage/sqlstore/__init__.py +17 -0
- llama_stack/{providers/utils → core/storage}/sqlstore/authorized_sqlstore.py +69 -49
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlalchemy_sqlstore.py +47 -17
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py +25 -8
- llama_stack/core/store/registry.py +1 -1
- llama_stack/core/utils/config.py +8 -2
- llama_stack/core/utils/config_resolution.py +32 -29
- llama_stack/core/utils/context.py +4 -10
- llama_stack/core/utils/exec.py +9 -0
- llama_stack/core/utils/type_inspection.py +45 -0
- llama_stack/distributions/dell/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/dell/dell.py +2 -2
- llama_stack/distributions/dell/run-with-safety.yaml +3 -2
- llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/meta-reference-gpu/meta_reference.py +2 -2
- llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +3 -2
- llama_stack/distributions/nvidia/{run.yaml → config.yaml} +4 -4
- llama_stack/distributions/nvidia/nvidia.py +1 -1
- llama_stack/distributions/nvidia/run-with-safety.yaml +4 -4
- llama_stack/{apis/datasetio → distributions/oci}/__init__.py +1 -1
- llama_stack/distributions/oci/config.yaml +134 -0
- llama_stack/distributions/oci/oci.py +108 -0
- llama_stack/distributions/open-benchmark/{run.yaml → config.yaml} +5 -4
- llama_stack/distributions/open-benchmark/open_benchmark.py +2 -3
- llama_stack/distributions/postgres-demo/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/starter/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/starter/starter.py +8 -5
- llama_stack/distributions/starter-gpu/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/template.py +13 -69
- llama_stack/distributions/watsonx/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/watsonx/watsonx.py +1 -1
- llama_stack/log.py +28 -11
- llama_stack/models/llama/checkpoint.py +6 -6
- llama_stack/models/llama/hadamard_utils.py +2 -0
- llama_stack/models/llama/llama3/generation.py +3 -1
- llama_stack/models/llama/llama3/interface.py +2 -5
- llama_stack/models/llama/llama3/multimodal/encoder_utils.py +3 -3
- llama_stack/models/llama/llama3/multimodal/image_transform.py +6 -6
- llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +1 -1
- llama_stack/models/llama/llama3/tool_utils.py +2 -1
- llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +1 -1
- llama_stack/providers/inline/agents/meta_reference/__init__.py +3 -3
- llama_stack/providers/inline/agents/meta_reference/agents.py +44 -261
- llama_stack/providers/inline/agents/meta_reference/config.py +6 -1
- llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +207 -57
- llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +308 -47
- llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +162 -96
- llama_stack/providers/inline/agents/meta_reference/responses/types.py +23 -8
- llama_stack/providers/inline/agents/meta_reference/responses/utils.py +201 -33
- llama_stack/providers/inline/agents/meta_reference/safety.py +8 -13
- llama_stack/providers/inline/batches/reference/__init__.py +2 -4
- llama_stack/providers/inline/batches/reference/batches.py +78 -60
- llama_stack/providers/inline/datasetio/localfs/datasetio.py +2 -5
- llama_stack/providers/inline/eval/meta_reference/eval.py +16 -61
- llama_stack/providers/inline/files/localfs/files.py +37 -28
- llama_stack/providers/inline/inference/meta_reference/config.py +2 -2
- llama_stack/providers/inline/inference/meta_reference/generators.py +50 -60
- llama_stack/providers/inline/inference/meta_reference/inference.py +403 -19
- llama_stack/providers/inline/inference/meta_reference/model_parallel.py +7 -26
- llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +2 -12
- llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +10 -15
- llama_stack/providers/inline/post_training/common/validator.py +1 -5
- llama_stack/providers/inline/post_training/huggingface/post_training.py +8 -8
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +18 -10
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +12 -9
- llama_stack/providers/inline/post_training/huggingface/utils.py +27 -6
- llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/common/utils.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/post_training.py +8 -8
- llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +16 -16
- llama_stack/providers/inline/safety/code_scanner/code_scanner.py +13 -9
- llama_stack/providers/inline/safety/llama_guard/llama_guard.py +18 -15
- llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +9 -9
- llama_stack/providers/inline/scoring/basic/scoring.py +6 -13
- llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/braintrust/braintrust.py +12 -15
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +7 -14
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +1 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +1 -3
- llama_stack/providers/inline/tool_runtime/rag/__init__.py +1 -1
- llama_stack/providers/inline/tool_runtime/rag/config.py +8 -1
- llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +7 -6
- llama_stack/providers/inline/tool_runtime/rag/memory.py +64 -48
- llama_stack/providers/inline/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/chroma/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/faiss.py +43 -28
- llama_stack/providers/inline/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/milvus/config.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +40 -33
- llama_stack/providers/registry/agents.py +7 -3
- llama_stack/providers/registry/batches.py +1 -1
- llama_stack/providers/registry/datasetio.py +1 -1
- llama_stack/providers/registry/eval.py +1 -1
- llama_stack/{apis/datasets/__init__.py → providers/registry/file_processors.py} +5 -1
- llama_stack/providers/registry/files.py +11 -2
- llama_stack/providers/registry/inference.py +22 -3
- llama_stack/providers/registry/post_training.py +1 -1
- llama_stack/providers/registry/safety.py +1 -1
- llama_stack/providers/registry/scoring.py +1 -1
- llama_stack/providers/registry/tool_runtime.py +2 -2
- llama_stack/providers/registry/vector_io.py +7 -7
- llama_stack/providers/remote/datasetio/huggingface/huggingface.py +2 -5
- llama_stack/providers/remote/datasetio/nvidia/datasetio.py +1 -4
- llama_stack/providers/remote/eval/nvidia/eval.py +15 -9
- llama_stack/providers/remote/files/openai/__init__.py +19 -0
- llama_stack/providers/remote/files/openai/config.py +28 -0
- llama_stack/providers/remote/files/openai/files.py +253 -0
- llama_stack/providers/remote/files/s3/files.py +52 -30
- llama_stack/providers/remote/inference/anthropic/anthropic.py +2 -1
- llama_stack/providers/remote/inference/anthropic/config.py +1 -1
- llama_stack/providers/remote/inference/azure/azure.py +1 -3
- llama_stack/providers/remote/inference/azure/config.py +8 -7
- llama_stack/providers/remote/inference/bedrock/__init__.py +1 -1
- llama_stack/providers/remote/inference/bedrock/bedrock.py +82 -105
- llama_stack/providers/remote/inference/bedrock/config.py +24 -3
- llama_stack/providers/remote/inference/cerebras/cerebras.py +5 -5
- llama_stack/providers/remote/inference/cerebras/config.py +12 -5
- llama_stack/providers/remote/inference/databricks/config.py +13 -6
- llama_stack/providers/remote/inference/databricks/databricks.py +16 -6
- llama_stack/providers/remote/inference/fireworks/config.py +5 -5
- llama_stack/providers/remote/inference/fireworks/fireworks.py +1 -1
- llama_stack/providers/remote/inference/gemini/config.py +1 -1
- llama_stack/providers/remote/inference/gemini/gemini.py +13 -14
- llama_stack/providers/remote/inference/groq/config.py +5 -5
- llama_stack/providers/remote/inference/groq/groq.py +1 -1
- llama_stack/providers/remote/inference/llama_openai_compat/config.py +5 -5
- llama_stack/providers/remote/inference/llama_openai_compat/llama.py +8 -6
- llama_stack/providers/remote/inference/nvidia/__init__.py +1 -1
- llama_stack/providers/remote/inference/nvidia/config.py +21 -11
- llama_stack/providers/remote/inference/nvidia/nvidia.py +115 -3
- llama_stack/providers/remote/inference/nvidia/utils.py +1 -1
- llama_stack/providers/remote/inference/oci/__init__.py +17 -0
- llama_stack/providers/remote/inference/oci/auth.py +79 -0
- llama_stack/providers/remote/inference/oci/config.py +75 -0
- llama_stack/providers/remote/inference/oci/oci.py +162 -0
- llama_stack/providers/remote/inference/ollama/config.py +7 -5
- llama_stack/providers/remote/inference/ollama/ollama.py +17 -8
- llama_stack/providers/remote/inference/openai/config.py +4 -4
- llama_stack/providers/remote/inference/openai/openai.py +1 -1
- llama_stack/providers/remote/inference/passthrough/__init__.py +2 -2
- llama_stack/providers/remote/inference/passthrough/config.py +5 -10
- llama_stack/providers/remote/inference/passthrough/passthrough.py +97 -75
- llama_stack/providers/remote/inference/runpod/config.py +12 -5
- llama_stack/providers/remote/inference/runpod/runpod.py +2 -20
- llama_stack/providers/remote/inference/sambanova/config.py +5 -5
- llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -1
- llama_stack/providers/remote/inference/tgi/config.py +7 -6
- llama_stack/providers/remote/inference/tgi/tgi.py +19 -11
- llama_stack/providers/remote/inference/together/config.py +5 -5
- llama_stack/providers/remote/inference/together/together.py +15 -12
- llama_stack/providers/remote/inference/vertexai/config.py +1 -1
- llama_stack/providers/remote/inference/vllm/config.py +5 -5
- llama_stack/providers/remote/inference/vllm/vllm.py +13 -14
- llama_stack/providers/remote/inference/watsonx/config.py +4 -4
- llama_stack/providers/remote/inference/watsonx/watsonx.py +21 -94
- llama_stack/providers/remote/post_training/nvidia/post_training.py +4 -4
- llama_stack/providers/remote/post_training/nvidia/utils.py +1 -1
- llama_stack/providers/remote/safety/bedrock/bedrock.py +6 -6
- llama_stack/providers/remote/safety/bedrock/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/nvidia.py +11 -5
- llama_stack/providers/remote/safety/sambanova/config.py +1 -1
- llama_stack/providers/remote/safety/sambanova/sambanova.py +6 -6
- llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +12 -7
- llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +8 -2
- llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +57 -15
- llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +11 -6
- llama_stack/providers/remote/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/chroma/chroma.py +125 -20
- llama_stack/providers/remote/vector_io/chroma/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/milvus.py +27 -21
- llama_stack/providers/remote/vector_io/pgvector/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/config.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/pgvector.py +26 -18
- llama_stack/providers/remote/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/qdrant.py +141 -24
- llama_stack/providers/remote/vector_io/weaviate/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/config.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/weaviate.py +26 -21
- llama_stack/providers/utils/common/data_schema_validator.py +1 -5
- llama_stack/providers/utils/files/form_data.py +1 -1
- llama_stack/providers/utils/inference/embedding_mixin.py +1 -1
- llama_stack/providers/utils/inference/inference_store.py +7 -8
- llama_stack/providers/utils/inference/litellm_openai_mixin.py +79 -79
- llama_stack/providers/utils/inference/model_registry.py +1 -3
- llama_stack/providers/utils/inference/openai_compat.py +44 -1171
- llama_stack/providers/utils/inference/openai_mixin.py +68 -42
- llama_stack/providers/utils/inference/prompt_adapter.py +50 -265
- llama_stack/providers/utils/inference/stream_utils.py +23 -0
- llama_stack/providers/utils/memory/__init__.py +2 -0
- llama_stack/providers/utils/memory/file_utils.py +1 -1
- llama_stack/providers/utils/memory/openai_vector_store_mixin.py +181 -84
- llama_stack/providers/utils/memory/vector_store.py +39 -38
- llama_stack/providers/utils/pagination.py +1 -1
- llama_stack/providers/utils/responses/responses_store.py +15 -25
- llama_stack/providers/utils/scoring/aggregation_utils.py +1 -2
- llama_stack/providers/utils/scoring/base_scoring_fn.py +1 -2
- llama_stack/providers/utils/tools/mcp.py +93 -11
- llama_stack/telemetry/constants.py +27 -0
- llama_stack/telemetry/helpers.py +43 -0
- llama_stack/testing/api_recorder.py +25 -16
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/METADATA +56 -54
- llama_stack-0.4.0.dist-info/RECORD +588 -0
- llama_stack-0.4.0.dist-info/top_level.txt +2 -0
- llama_stack_api/__init__.py +945 -0
- llama_stack_api/admin/__init__.py +45 -0
- llama_stack_api/admin/api.py +72 -0
- llama_stack_api/admin/fastapi_routes.py +117 -0
- llama_stack_api/admin/models.py +113 -0
- llama_stack_api/agents.py +173 -0
- llama_stack_api/batches/__init__.py +40 -0
- llama_stack_api/batches/api.py +53 -0
- llama_stack_api/batches/fastapi_routes.py +113 -0
- llama_stack_api/batches/models.py +78 -0
- llama_stack_api/benchmarks/__init__.py +43 -0
- llama_stack_api/benchmarks/api.py +39 -0
- llama_stack_api/benchmarks/fastapi_routes.py +109 -0
- llama_stack_api/benchmarks/models.py +109 -0
- {llama_stack/apis → llama_stack_api}/common/content_types.py +1 -43
- {llama_stack/apis → llama_stack_api}/common/errors.py +0 -8
- {llama_stack/apis → llama_stack_api}/common/job_types.py +1 -1
- llama_stack_api/common/responses.py +77 -0
- {llama_stack/apis → llama_stack_api}/common/training_types.py +1 -1
- {llama_stack/apis → llama_stack_api}/common/type_system.py +2 -14
- llama_stack_api/connectors.py +146 -0
- {llama_stack/apis/conversations → llama_stack_api}/conversations.py +23 -39
- {llama_stack/apis/datasetio → llama_stack_api}/datasetio.py +4 -8
- llama_stack_api/datasets/__init__.py +61 -0
- llama_stack_api/datasets/api.py +35 -0
- llama_stack_api/datasets/fastapi_routes.py +104 -0
- llama_stack_api/datasets/models.py +152 -0
- {llama_stack/providers → llama_stack_api}/datatypes.py +166 -10
- {llama_stack/apis/eval → llama_stack_api}/eval.py +8 -40
- llama_stack_api/file_processors/__init__.py +27 -0
- llama_stack_api/file_processors/api.py +64 -0
- llama_stack_api/file_processors/fastapi_routes.py +78 -0
- llama_stack_api/file_processors/models.py +42 -0
- llama_stack_api/files/__init__.py +35 -0
- llama_stack_api/files/api.py +51 -0
- llama_stack_api/files/fastapi_routes.py +124 -0
- llama_stack_api/files/models.py +107 -0
- {llama_stack/apis/inference → llama_stack_api}/inference.py +90 -194
- llama_stack_api/inspect_api/__init__.py +37 -0
- llama_stack_api/inspect_api/api.py +25 -0
- llama_stack_api/inspect_api/fastapi_routes.py +76 -0
- llama_stack_api/inspect_api/models.py +28 -0
- {llama_stack/apis/agents → llama_stack_api/internal}/__init__.py +3 -1
- llama_stack/providers/utils/kvstore/api.py → llama_stack_api/internal/kvstore.py +5 -0
- llama_stack_api/internal/sqlstore.py +79 -0
- {llama_stack/apis/models → llama_stack_api}/models.py +11 -9
- {llama_stack/apis/agents → llama_stack_api}/openai_responses.py +184 -27
- {llama_stack/apis/post_training → llama_stack_api}/post_training.py +7 -11
- {llama_stack/apis/prompts → llama_stack_api}/prompts.py +3 -4
- llama_stack_api/providers/__init__.py +33 -0
- llama_stack_api/providers/api.py +16 -0
- llama_stack_api/providers/fastapi_routes.py +57 -0
- llama_stack_api/providers/models.py +24 -0
- {llama_stack/apis/tools → llama_stack_api}/rag_tool.py +2 -52
- {llama_stack/apis → llama_stack_api}/resource.py +1 -1
- llama_stack_api/router_utils.py +160 -0
- {llama_stack/apis/safety → llama_stack_api}/safety.py +6 -9
- {llama_stack → llama_stack_api}/schema_utils.py +94 -4
- {llama_stack/apis/scoring → llama_stack_api}/scoring.py +3 -3
- {llama_stack/apis/scoring_functions → llama_stack_api}/scoring_functions.py +9 -6
- {llama_stack/apis/shields → llama_stack_api}/shields.py +6 -7
- {llama_stack/apis/tools → llama_stack_api}/tools.py +26 -21
- {llama_stack/apis/vector_io → llama_stack_api}/vector_io.py +133 -152
- {llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py +1 -1
- llama_stack/apis/agents/agents.py +0 -894
- llama_stack/apis/batches/__init__.py +0 -9
- llama_stack/apis/batches/batches.py +0 -100
- llama_stack/apis/benchmarks/__init__.py +0 -7
- llama_stack/apis/benchmarks/benchmarks.py +0 -108
- llama_stack/apis/common/responses.py +0 -36
- llama_stack/apis/conversations/__init__.py +0 -31
- llama_stack/apis/datasets/datasets.py +0 -251
- llama_stack/apis/datatypes.py +0 -160
- llama_stack/apis/eval/__init__.py +0 -7
- llama_stack/apis/files/__init__.py +0 -7
- llama_stack/apis/files/files.py +0 -199
- llama_stack/apis/inference/__init__.py +0 -7
- llama_stack/apis/inference/event_logger.py +0 -43
- llama_stack/apis/inspect/__init__.py +0 -7
- llama_stack/apis/inspect/inspect.py +0 -94
- llama_stack/apis/models/__init__.py +0 -7
- llama_stack/apis/post_training/__init__.py +0 -7
- llama_stack/apis/prompts/__init__.py +0 -9
- llama_stack/apis/providers/__init__.py +0 -7
- llama_stack/apis/providers/providers.py +0 -69
- llama_stack/apis/safety/__init__.py +0 -7
- llama_stack/apis/scoring/__init__.py +0 -7
- llama_stack/apis/scoring_functions/__init__.py +0 -7
- llama_stack/apis/shields/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +0 -77
- llama_stack/apis/telemetry/__init__.py +0 -7
- llama_stack/apis/telemetry/telemetry.py +0 -423
- llama_stack/apis/tools/__init__.py +0 -8
- llama_stack/apis/vector_io/__init__.py +0 -7
- llama_stack/apis/vector_stores/__init__.py +0 -7
- llama_stack/core/server/tracing.py +0 -80
- llama_stack/core/ui/app.py +0 -55
- llama_stack/core/ui/modules/__init__.py +0 -5
- llama_stack/core/ui/modules/api.py +0 -32
- llama_stack/core/ui/modules/utils.py +0 -42
- llama_stack/core/ui/page/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/datasets.py +0 -18
- llama_stack/core/ui/page/distribution/eval_tasks.py +0 -20
- llama_stack/core/ui/page/distribution/models.py +0 -18
- llama_stack/core/ui/page/distribution/providers.py +0 -27
- llama_stack/core/ui/page/distribution/resources.py +0 -48
- llama_stack/core/ui/page/distribution/scoring_functions.py +0 -18
- llama_stack/core/ui/page/distribution/shields.py +0 -19
- llama_stack/core/ui/page/evaluations/__init__.py +0 -5
- llama_stack/core/ui/page/evaluations/app_eval.py +0 -143
- llama_stack/core/ui/page/evaluations/native_eval.py +0 -253
- llama_stack/core/ui/page/playground/__init__.py +0 -5
- llama_stack/core/ui/page/playground/chat.py +0 -130
- llama_stack/core/ui/page/playground/tools.py +0 -352
- llama_stack/distributions/dell/build.yaml +0 -33
- llama_stack/distributions/meta-reference-gpu/build.yaml +0 -32
- llama_stack/distributions/nvidia/build.yaml +0 -29
- llama_stack/distributions/open-benchmark/build.yaml +0 -36
- llama_stack/distributions/postgres-demo/__init__.py +0 -7
- llama_stack/distributions/postgres-demo/build.yaml +0 -23
- llama_stack/distributions/postgres-demo/postgres_demo.py +0 -125
- llama_stack/distributions/starter/build.yaml +0 -61
- llama_stack/distributions/starter-gpu/build.yaml +0 -61
- llama_stack/distributions/watsonx/build.yaml +0 -33
- llama_stack/providers/inline/agents/meta_reference/agent_instance.py +0 -1024
- llama_stack/providers/inline/agents/meta_reference/persistence.py +0 -228
- llama_stack/providers/inline/telemetry/__init__.py +0 -5
- llama_stack/providers/inline/telemetry/meta_reference/__init__.py +0 -21
- llama_stack/providers/inline/telemetry/meta_reference/config.py +0 -47
- llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +0 -252
- llama_stack/providers/remote/inference/bedrock/models.py +0 -29
- llama_stack/providers/utils/kvstore/sqlite/config.py +0 -20
- llama_stack/providers/utils/sqlstore/__init__.py +0 -5
- llama_stack/providers/utils/sqlstore/api.py +0 -128
- llama_stack/providers/utils/telemetry/__init__.py +0 -5
- llama_stack/providers/utils/telemetry/trace_protocol.py +0 -142
- llama_stack/providers/utils/telemetry/tracing.py +0 -384
- llama_stack/strong_typing/__init__.py +0 -19
- llama_stack/strong_typing/auxiliary.py +0 -228
- llama_stack/strong_typing/classdef.py +0 -440
- llama_stack/strong_typing/core.py +0 -46
- llama_stack/strong_typing/deserializer.py +0 -877
- llama_stack/strong_typing/docstring.py +0 -409
- llama_stack/strong_typing/exception.py +0 -23
- llama_stack/strong_typing/inspection.py +0 -1085
- llama_stack/strong_typing/mapping.py +0 -40
- llama_stack/strong_typing/name.py +0 -182
- llama_stack/strong_typing/schema.py +0 -792
- llama_stack/strong_typing/serialization.py +0 -97
- llama_stack/strong_typing/serializer.py +0 -500
- llama_stack/strong_typing/slots.py +0 -27
- llama_stack/strong_typing/topological.py +0 -89
- llama_stack/ui/node_modules/flatted/python/flatted.py +0 -149
- llama_stack-0.3.5.dist-info/RECORD +0 -625
- llama_stack-0.3.5.dist-info/top_level.txt +0 -1
- /llama_stack/{providers/utils → core/storage}/kvstore/config.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/mongodb/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/postgres/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/redis/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/sqlite/__init__.py +0 -0
- /llama_stack/{apis → providers/inline/file_processor}/__init__.py +0 -0
- /llama_stack/{apis/common → telemetry}/__init__.py +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/WHEEL +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/entry_points.txt +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/licenses/LICENSE +0 -0
- {llama_stack/core/ui → llama_stack_api/common}/__init__.py +0 -0
- {llama_stack/strong_typing → llama_stack_api}/py.typed +0 -0
- {llama_stack/apis → llama_stack_api}/version.py +0 -0
|
@@ -11,29 +11,44 @@ import json
|
|
|
11
11
|
import time
|
|
12
12
|
import uuid
|
|
13
13
|
from io import BytesIO
|
|
14
|
-
from typing import Any
|
|
14
|
+
from typing import Any
|
|
15
15
|
|
|
16
16
|
from openai.types.batch import BatchError, Errors
|
|
17
17
|
from pydantic import BaseModel
|
|
18
18
|
|
|
19
|
-
from llama_stack.
|
|
20
|
-
from llama_stack.
|
|
21
|
-
from
|
|
22
|
-
|
|
19
|
+
from llama_stack.core.storage.kvstore import KVStore
|
|
20
|
+
from llama_stack.log import get_logger
|
|
21
|
+
from llama_stack_api import (
|
|
22
|
+
Batches,
|
|
23
|
+
BatchObject,
|
|
24
|
+
ConflictError,
|
|
25
|
+
Files,
|
|
23
26
|
Inference,
|
|
27
|
+
ListBatchesResponse,
|
|
28
|
+
Models,
|
|
24
29
|
OpenAIAssistantMessageParam,
|
|
25
30
|
OpenAIChatCompletionRequestWithExtraBody,
|
|
26
31
|
OpenAICompletionRequestWithExtraBody,
|
|
27
32
|
OpenAIDeveloperMessageParam,
|
|
28
33
|
OpenAIEmbeddingsRequestWithExtraBody,
|
|
34
|
+
OpenAIFilePurpose,
|
|
29
35
|
OpenAIMessageParam,
|
|
30
36
|
OpenAISystemMessageParam,
|
|
31
37
|
OpenAIToolMessageParam,
|
|
32
38
|
OpenAIUserMessageParam,
|
|
39
|
+
ResourceNotFoundError,
|
|
40
|
+
)
|
|
41
|
+
from llama_stack_api.batches.models import (
|
|
42
|
+
CancelBatchRequest,
|
|
43
|
+
CreateBatchRequest,
|
|
44
|
+
ListBatchesRequest,
|
|
45
|
+
RetrieveBatchRequest,
|
|
46
|
+
)
|
|
47
|
+
from llama_stack_api.files.models import (
|
|
48
|
+
RetrieveFileContentRequest,
|
|
49
|
+
RetrieveFileRequest,
|
|
50
|
+
UploadFileRequest,
|
|
33
51
|
)
|
|
34
|
-
from llama_stack.apis.models import Models
|
|
35
|
-
from llama_stack.log import get_logger
|
|
36
|
-
from llama_stack.providers.utils.kvstore import KVStore
|
|
37
52
|
|
|
38
53
|
from .config import ReferenceBatchesImplConfig
|
|
39
54
|
|
|
@@ -136,11 +151,7 @@ class ReferenceBatchesImpl(Batches):
|
|
|
136
151
|
# TODO (SECURITY): this currently works w/ configured api keys, not with x-llamastack-provider-data or with user policy restrictions
|
|
137
152
|
async def create_batch(
|
|
138
153
|
self,
|
|
139
|
-
|
|
140
|
-
endpoint: str,
|
|
141
|
-
completion_window: Literal["24h"],
|
|
142
|
-
metadata: dict[str, str] | None = None,
|
|
143
|
-
idempotency_key: str | None = None,
|
|
154
|
+
request: CreateBatchRequest,
|
|
144
155
|
) -> BatchObject:
|
|
145
156
|
"""
|
|
146
157
|
Create a new batch for processing multiple API requests.
|
|
@@ -181,14 +192,14 @@ class ReferenceBatchesImpl(Batches):
|
|
|
181
192
|
|
|
182
193
|
# TODO: set expiration time for garbage collection
|
|
183
194
|
|
|
184
|
-
if endpoint not in ["/v1/chat/completions", "/v1/completions", "/v1/embeddings"]:
|
|
195
|
+
if request.endpoint not in ["/v1/chat/completions", "/v1/completions", "/v1/embeddings"]:
|
|
185
196
|
raise ValueError(
|
|
186
|
-
f"Invalid endpoint: {endpoint}. Supported values: /v1/chat/completions, /v1/completions, /v1/embeddings. Code: invalid_value. Param: endpoint",
|
|
197
|
+
f"Invalid endpoint: {request.endpoint}. Supported values: /v1/chat/completions, /v1/completions, /v1/embeddings. Code: invalid_value. Param: endpoint",
|
|
187
198
|
)
|
|
188
199
|
|
|
189
|
-
if completion_window != "24h":
|
|
200
|
+
if request.completion_window != "24h":
|
|
190
201
|
raise ValueError(
|
|
191
|
-
f"Invalid completion_window: {completion_window}. Supported values are: 24h. Code: invalid_value. Param: completion_window",
|
|
202
|
+
f"Invalid completion_window: {request.completion_window}. Supported values are: 24h. Code: invalid_value. Param: completion_window",
|
|
192
203
|
)
|
|
193
204
|
|
|
194
205
|
batch_id = f"batch_{uuid.uuid4().hex[:16]}"
|
|
@@ -196,22 +207,22 @@ class ReferenceBatchesImpl(Batches):
|
|
|
196
207
|
# For idempotent requests, use the idempotency key for the batch ID
|
|
197
208
|
# This ensures the same key always maps to the same batch ID,
|
|
198
209
|
# allowing us to detect parameter conflicts
|
|
199
|
-
if idempotency_key is not None:
|
|
200
|
-
hash_input = idempotency_key.encode("utf-8")
|
|
210
|
+
if request.idempotency_key is not None:
|
|
211
|
+
hash_input = request.idempotency_key.encode("utf-8")
|
|
201
212
|
hash_digest = hashlib.sha256(hash_input).hexdigest()[:24]
|
|
202
213
|
batch_id = f"batch_{hash_digest}"
|
|
203
214
|
|
|
204
215
|
try:
|
|
205
|
-
existing_batch = await self.retrieve_batch(batch_id)
|
|
216
|
+
existing_batch = await self.retrieve_batch(RetrieveBatchRequest(batch_id=batch_id))
|
|
206
217
|
|
|
207
218
|
if (
|
|
208
|
-
existing_batch.input_file_id != input_file_id
|
|
209
|
-
or existing_batch.endpoint != endpoint
|
|
210
|
-
or existing_batch.completion_window != completion_window
|
|
211
|
-
or existing_batch.metadata != metadata
|
|
219
|
+
existing_batch.input_file_id != request.input_file_id
|
|
220
|
+
or existing_batch.endpoint != request.endpoint
|
|
221
|
+
or existing_batch.completion_window != request.completion_window
|
|
222
|
+
or existing_batch.metadata != request.metadata
|
|
212
223
|
):
|
|
213
224
|
raise ConflictError(
|
|
214
|
-
f"Idempotency key '{idempotency_key}' was previously used with different parameters. "
|
|
225
|
+
f"Idempotency key '{request.idempotency_key}' was previously used with different parameters. "
|
|
215
226
|
"Either use a new idempotency key or ensure all parameters match the original request."
|
|
216
227
|
)
|
|
217
228
|
|
|
@@ -226,12 +237,12 @@ class ReferenceBatchesImpl(Batches):
|
|
|
226
237
|
batch = BatchObject(
|
|
227
238
|
id=batch_id,
|
|
228
239
|
object="batch",
|
|
229
|
-
endpoint=endpoint,
|
|
230
|
-
input_file_id=input_file_id,
|
|
231
|
-
completion_window=completion_window,
|
|
240
|
+
endpoint=request.endpoint,
|
|
241
|
+
input_file_id=request.input_file_id,
|
|
242
|
+
completion_window=request.completion_window,
|
|
232
243
|
status="validating",
|
|
233
244
|
created_at=current_time,
|
|
234
|
-
metadata=metadata,
|
|
245
|
+
metadata=request.metadata,
|
|
235
246
|
)
|
|
236
247
|
|
|
237
248
|
await self.kvstore.set(f"batch:{batch_id}", batch.to_json())
|
|
@@ -243,28 +254,27 @@ class ReferenceBatchesImpl(Batches):
|
|
|
243
254
|
|
|
244
255
|
return batch
|
|
245
256
|
|
|
246
|
-
async def cancel_batch(self,
|
|
257
|
+
async def cancel_batch(self, request: CancelBatchRequest) -> BatchObject:
|
|
247
258
|
"""Cancel a batch that is in progress."""
|
|
248
|
-
batch = await self.retrieve_batch(batch_id)
|
|
259
|
+
batch = await self.retrieve_batch(RetrieveBatchRequest(batch_id=request.batch_id))
|
|
249
260
|
|
|
250
261
|
if batch.status in ["cancelled", "cancelling"]:
|
|
251
262
|
return batch
|
|
252
263
|
|
|
253
264
|
if batch.status in ["completed", "failed", "expired"]:
|
|
254
|
-
raise ConflictError(f"Cannot cancel batch '{batch_id}' with status '{batch.status}'")
|
|
265
|
+
raise ConflictError(f"Cannot cancel batch '{request.batch_id}' with status '{batch.status}'")
|
|
255
266
|
|
|
256
|
-
await self._update_batch(batch_id, status="cancelling", cancelling_at=int(time.time()))
|
|
267
|
+
await self._update_batch(request.batch_id, status="cancelling", cancelling_at=int(time.time()))
|
|
257
268
|
|
|
258
|
-
if batch_id in self._processing_tasks:
|
|
259
|
-
self._processing_tasks[batch_id].cancel()
|
|
269
|
+
if request.batch_id in self._processing_tasks:
|
|
270
|
+
self._processing_tasks[request.batch_id].cancel()
|
|
260
271
|
# note: task removal and status="cancelled" handled in finally block of _process_batch
|
|
261
272
|
|
|
262
|
-
return await self.retrieve_batch(batch_id)
|
|
273
|
+
return await self.retrieve_batch(RetrieveBatchRequest(batch_id=request.batch_id))
|
|
263
274
|
|
|
264
275
|
async def list_batches(
|
|
265
276
|
self,
|
|
266
|
-
|
|
267
|
-
limit: int = 20,
|
|
277
|
+
request: ListBatchesRequest,
|
|
268
278
|
) -> ListBatchesResponse:
|
|
269
279
|
"""
|
|
270
280
|
List all batches, eventually only for the current user.
|
|
@@ -281,14 +291,14 @@ class ReferenceBatchesImpl(Batches):
|
|
|
281
291
|
batches.sort(key=lambda b: b.created_at, reverse=True)
|
|
282
292
|
|
|
283
293
|
start_idx = 0
|
|
284
|
-
if after:
|
|
294
|
+
if request.after:
|
|
285
295
|
for i, batch in enumerate(batches):
|
|
286
|
-
if batch.id == after:
|
|
296
|
+
if batch.id == request.after:
|
|
287
297
|
start_idx = i + 1
|
|
288
298
|
break
|
|
289
299
|
|
|
290
|
-
page_batches = batches[start_idx : start_idx + limit]
|
|
291
|
-
has_more = (start_idx + limit) < len(batches)
|
|
300
|
+
page_batches = batches[start_idx : start_idx + request.limit]
|
|
301
|
+
has_more = (start_idx + request.limit) < len(batches)
|
|
292
302
|
|
|
293
303
|
first_id = page_batches[0].id if page_batches else None
|
|
294
304
|
last_id = page_batches[-1].id if page_batches else None
|
|
@@ -300,11 +310,11 @@ class ReferenceBatchesImpl(Batches):
|
|
|
300
310
|
has_more=has_more,
|
|
301
311
|
)
|
|
302
312
|
|
|
303
|
-
async def retrieve_batch(self,
|
|
313
|
+
async def retrieve_batch(self, request: RetrieveBatchRequest) -> BatchObject:
|
|
304
314
|
"""Retrieve information about a specific batch."""
|
|
305
|
-
batch_data = await self.kvstore.get(f"batch:{batch_id}")
|
|
315
|
+
batch_data = await self.kvstore.get(f"batch:{request.batch_id}")
|
|
306
316
|
if not batch_data:
|
|
307
|
-
raise ResourceNotFoundError(batch_id, "Batch", "batches.list()")
|
|
317
|
+
raise ResourceNotFoundError(request.batch_id, "Batch", "batches.list()")
|
|
308
318
|
|
|
309
319
|
return BatchObject.model_validate_json(batch_data)
|
|
310
320
|
|
|
@@ -312,7 +322,7 @@ class ReferenceBatchesImpl(Batches):
|
|
|
312
322
|
"""Update batch fields in kvstore."""
|
|
313
323
|
async with self._update_batch_lock:
|
|
314
324
|
try:
|
|
315
|
-
batch = await self.retrieve_batch(batch_id)
|
|
325
|
+
batch = await self.retrieve_batch(RetrieveBatchRequest(batch_id=batch_id))
|
|
316
326
|
|
|
317
327
|
# batch processing is async. once cancelling, only allow "cancelled" status updates
|
|
318
328
|
if batch.status == "cancelling" and updates.get("status") != "cancelled":
|
|
@@ -344,7 +354,7 @@ class ReferenceBatchesImpl(Batches):
|
|
|
344
354
|
requests: list[BatchRequest] = []
|
|
345
355
|
errors: list[BatchError] = []
|
|
346
356
|
try:
|
|
347
|
-
await self.files_api.openai_retrieve_file(batch.input_file_id)
|
|
357
|
+
await self.files_api.openai_retrieve_file(RetrieveFileRequest(file_id=batch.input_file_id))
|
|
348
358
|
except Exception:
|
|
349
359
|
errors.append(
|
|
350
360
|
BatchError(
|
|
@@ -357,8 +367,13 @@ class ReferenceBatchesImpl(Batches):
|
|
|
357
367
|
return errors, requests
|
|
358
368
|
|
|
359
369
|
# TODO(SECURITY): do something about large files
|
|
360
|
-
file_content_response = await self.files_api.openai_retrieve_file_content(
|
|
361
|
-
|
|
370
|
+
file_content_response = await self.files_api.openai_retrieve_file_content(
|
|
371
|
+
RetrieveFileContentRequest(file_id=batch.input_file_id)
|
|
372
|
+
)
|
|
373
|
+
# Handle both bytes and memoryview types - convert to bytes unconditionally
|
|
374
|
+
# (bytes(x) returns x if already bytes, creates new bytes from memoryview otherwise)
|
|
375
|
+
body_bytes = bytes(file_content_response.body)
|
|
376
|
+
file_content = body_bytes.decode("utf-8")
|
|
362
377
|
for line_num, line in enumerate(file_content.strip().split("\n"), 1):
|
|
363
378
|
if line.strip(): # skip empty lines
|
|
364
379
|
try:
|
|
@@ -415,8 +430,8 @@ class ReferenceBatchesImpl(Batches):
|
|
|
415
430
|
)
|
|
416
431
|
valid = False
|
|
417
432
|
|
|
418
|
-
if (
|
|
419
|
-
if
|
|
433
|
+
if (request_body := request.get("body")) and isinstance(request_body, dict):
|
|
434
|
+
if request_body.get("stream", False):
|
|
420
435
|
errors.append(
|
|
421
436
|
BatchError(
|
|
422
437
|
code="streaming_unsupported",
|
|
@@ -447,7 +462,7 @@ class ReferenceBatchesImpl(Batches):
|
|
|
447
462
|
]
|
|
448
463
|
|
|
449
464
|
for param, expected_type, type_string in required_params:
|
|
450
|
-
if param not in
|
|
465
|
+
if param not in request_body:
|
|
451
466
|
errors.append(
|
|
452
467
|
BatchError(
|
|
453
468
|
code="invalid_request",
|
|
@@ -457,7 +472,7 @@ class ReferenceBatchesImpl(Batches):
|
|
|
457
472
|
)
|
|
458
473
|
)
|
|
459
474
|
valid = False
|
|
460
|
-
elif not isinstance(
|
|
475
|
+
elif not isinstance(request_body[param], expected_type):
|
|
461
476
|
errors.append(
|
|
462
477
|
BatchError(
|
|
463
478
|
code="invalid_request",
|
|
@@ -468,15 +483,15 @@ class ReferenceBatchesImpl(Batches):
|
|
|
468
483
|
)
|
|
469
484
|
valid = False
|
|
470
485
|
|
|
471
|
-
if "model" in
|
|
486
|
+
if "model" in request_body and isinstance(request_body["model"], str):
|
|
472
487
|
try:
|
|
473
|
-
await self.models_api.get_model(
|
|
488
|
+
await self.models_api.get_model(request_body["model"])
|
|
474
489
|
except Exception:
|
|
475
490
|
errors.append(
|
|
476
491
|
BatchError(
|
|
477
492
|
code="model_not_found",
|
|
478
493
|
line=line_num,
|
|
479
|
-
message=f"Model '{
|
|
494
|
+
message=f"Model '{request_body['model']}' does not exist or is not supported",
|
|
480
495
|
param="body.model",
|
|
481
496
|
)
|
|
482
497
|
)
|
|
@@ -484,14 +499,14 @@ class ReferenceBatchesImpl(Batches):
|
|
|
484
499
|
|
|
485
500
|
if valid:
|
|
486
501
|
assert isinstance(url, str), "URL must be a string" # for mypy
|
|
487
|
-
assert isinstance(
|
|
502
|
+
assert isinstance(request_body, dict), "Body must be a dictionary" # for mypy
|
|
488
503
|
requests.append(
|
|
489
504
|
BatchRequest(
|
|
490
505
|
line_num=line_num,
|
|
491
506
|
url=url,
|
|
492
507
|
method=request["method"],
|
|
493
508
|
custom_id=request["custom_id"],
|
|
494
|
-
body=
|
|
509
|
+
body=request_body,
|
|
495
510
|
),
|
|
496
511
|
)
|
|
497
512
|
except json.JSONDecodeError:
|
|
@@ -529,7 +544,7 @@ class ReferenceBatchesImpl(Batches):
|
|
|
529
544
|
async def _process_batch_impl(self, batch_id: str) -> None:
|
|
530
545
|
"""Implementation of batch processing logic."""
|
|
531
546
|
errors: list[BatchError] = []
|
|
532
|
-
batch = await self.retrieve_batch(batch_id)
|
|
547
|
+
batch = await self.retrieve_batch(RetrieveBatchRequest(batch_id=batch_id))
|
|
533
548
|
|
|
534
549
|
errors, requests = await self._validate_input(batch)
|
|
535
550
|
if errors:
|
|
@@ -675,5 +690,8 @@ class ReferenceBatchesImpl(Batches):
|
|
|
675
690
|
|
|
676
691
|
with AsyncBytesIO("\n".join(output_lines).encode("utf-8")) as file_buffer:
|
|
677
692
|
file_buffer.filename = f"{batch_id}_{file_type}.jsonl"
|
|
678
|
-
uploaded_file = await self.files_api.openai_upload_file(
|
|
693
|
+
uploaded_file = await self.files_api.openai_upload_file(
|
|
694
|
+
request=UploadFileRequest(purpose=OpenAIFilePurpose.BATCH),
|
|
695
|
+
file=file_buffer,
|
|
696
|
+
)
|
|
679
697
|
return uploaded_file.id
|
|
@@ -5,13 +5,10 @@
|
|
|
5
5
|
# the root directory of this source tree.
|
|
6
6
|
from typing import Any
|
|
7
7
|
|
|
8
|
-
from llama_stack.
|
|
9
|
-
from llama_stack.apis.datasetio import DatasetIO
|
|
10
|
-
from llama_stack.apis.datasets import Dataset
|
|
11
|
-
from llama_stack.providers.datatypes import DatasetsProtocolPrivate
|
|
8
|
+
from llama_stack.core.storage.kvstore import kvstore_impl
|
|
12
9
|
from llama_stack.providers.utils.datasetio.url_utils import get_dataframe_from_uri
|
|
13
|
-
from llama_stack.providers.utils.kvstore import kvstore_impl
|
|
14
10
|
from llama_stack.providers.utils.pagination import paginate_records
|
|
11
|
+
from llama_stack_api import Dataset, DatasetIO, DatasetsProtocolPrivate, PaginatedResponse
|
|
15
12
|
|
|
16
13
|
from .config import LocalFSDatasetIOConfig
|
|
17
14
|
|
|
@@ -8,28 +8,27 @@ from typing import Any
|
|
|
8
8
|
|
|
9
9
|
from tqdm import tqdm
|
|
10
10
|
|
|
11
|
-
from llama_stack.
|
|
12
|
-
from llama_stack.
|
|
13
|
-
from
|
|
14
|
-
|
|
15
|
-
|
|
11
|
+
from llama_stack.core.storage.kvstore import kvstore_impl
|
|
12
|
+
from llama_stack.providers.utils.common.data_schema_validator import ColumnName
|
|
13
|
+
from llama_stack_api import (
|
|
14
|
+
Agents,
|
|
15
|
+
Benchmark,
|
|
16
|
+
BenchmarkConfig,
|
|
17
|
+
BenchmarksProtocolPrivate,
|
|
18
|
+
DatasetIO,
|
|
19
|
+
Datasets,
|
|
20
|
+
Eval,
|
|
21
|
+
EvaluateResponse,
|
|
16
22
|
Inference,
|
|
23
|
+
Job,
|
|
24
|
+
JobStatus,
|
|
17
25
|
OpenAIChatCompletionRequestWithExtraBody,
|
|
18
26
|
OpenAICompletionRequestWithExtraBody,
|
|
19
27
|
OpenAISystemMessageParam,
|
|
20
28
|
OpenAIUserMessageParam,
|
|
21
|
-
|
|
22
|
-
)
|
|
23
|
-
from llama_stack.apis.scoring import Scoring
|
|
24
|
-
from llama_stack.providers.datatypes import BenchmarksProtocolPrivate
|
|
25
|
-
from llama_stack.providers.inline.agents.meta_reference.agent_instance import (
|
|
26
|
-
MEMORY_QUERY_TOOL,
|
|
29
|
+
Scoring,
|
|
27
30
|
)
|
|
28
|
-
from llama_stack.providers.utils.common.data_schema_validator import ColumnName
|
|
29
|
-
from llama_stack.providers.utils.kvstore import kvstore_impl
|
|
30
31
|
|
|
31
|
-
from .....apis.common.job_types import Job, JobStatus
|
|
32
|
-
from .....apis.eval.eval import BenchmarkConfig, Eval, EvaluateResponse
|
|
33
32
|
from .config import MetaReferenceEvalConfig
|
|
34
33
|
|
|
35
34
|
EVAL_TASKS_PREFIX = "benchmarks:"
|
|
@@ -118,49 +117,6 @@ class MetaReferenceEvalImpl(
|
|
|
118
117
|
self.jobs[job_id] = res
|
|
119
118
|
return Job(job_id=job_id, status=JobStatus.completed)
|
|
120
119
|
|
|
121
|
-
async def _run_agent_generation(
|
|
122
|
-
self, input_rows: list[dict[str, Any]], benchmark_config: BenchmarkConfig
|
|
123
|
-
) -> list[dict[str, Any]]:
|
|
124
|
-
candidate = benchmark_config.eval_candidate
|
|
125
|
-
create_response = await self.agents_api.create_agent(candidate.config)
|
|
126
|
-
agent_id = create_response.agent_id
|
|
127
|
-
|
|
128
|
-
generations = []
|
|
129
|
-
for i, x in tqdm(enumerate(input_rows)):
|
|
130
|
-
assert ColumnName.chat_completion_input.value in x, "Invalid input row"
|
|
131
|
-
input_messages = json.loads(x[ColumnName.chat_completion_input.value])
|
|
132
|
-
input_messages = [UserMessage(**x) for x in input_messages if x["role"] == "user"]
|
|
133
|
-
|
|
134
|
-
# NOTE: only single-turn agent generation is supported. Create a new session for each input row
|
|
135
|
-
session_create_response = await self.agents_api.create_agent_session(agent_id, f"session-{i}")
|
|
136
|
-
session_id = session_create_response.session_id
|
|
137
|
-
|
|
138
|
-
turn_request = dict(
|
|
139
|
-
agent_id=agent_id,
|
|
140
|
-
session_id=session_id,
|
|
141
|
-
messages=input_messages,
|
|
142
|
-
stream=True,
|
|
143
|
-
)
|
|
144
|
-
turn_response = [chunk async for chunk in await self.agents_api.create_agent_turn(**turn_request)]
|
|
145
|
-
final_event = turn_response[-1].event.payload
|
|
146
|
-
|
|
147
|
-
# check if there's a memory retrieval step and extract the context
|
|
148
|
-
memory_rag_context = None
|
|
149
|
-
for step in final_event.turn.steps:
|
|
150
|
-
if step.step_type == StepType.tool_execution.value:
|
|
151
|
-
for tool_response in step.tool_responses:
|
|
152
|
-
if tool_response.tool_name == MEMORY_QUERY_TOOL:
|
|
153
|
-
memory_rag_context = " ".join(x.text for x in tool_response.content)
|
|
154
|
-
|
|
155
|
-
agent_generation = {}
|
|
156
|
-
agent_generation[ColumnName.generated_answer.value] = final_event.turn.output_message.content
|
|
157
|
-
if memory_rag_context:
|
|
158
|
-
agent_generation[ColumnName.context.value] = memory_rag_context
|
|
159
|
-
|
|
160
|
-
generations.append(agent_generation)
|
|
161
|
-
|
|
162
|
-
return generations
|
|
163
|
-
|
|
164
120
|
async def _run_model_generation(
|
|
165
121
|
self, input_rows: list[dict[str, Any]], benchmark_config: BenchmarkConfig
|
|
166
122
|
) -> list[dict[str, Any]]:
|
|
@@ -215,9 +171,8 @@ class MetaReferenceEvalImpl(
|
|
|
215
171
|
benchmark_config: BenchmarkConfig,
|
|
216
172
|
) -> EvaluateResponse:
|
|
217
173
|
candidate = benchmark_config.eval_candidate
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
elif candidate.type == "model":
|
|
174
|
+
# Agent evaluation removed
|
|
175
|
+
if candidate.type == "model":
|
|
221
176
|
generations = await self._run_model_generation(input_rows, benchmark_config)
|
|
222
177
|
else:
|
|
223
178
|
raise ValueError(f"Invalid candidate type: {candidate.type}")
|
|
@@ -7,27 +7,30 @@
|
|
|
7
7
|
import time
|
|
8
8
|
import uuid
|
|
9
9
|
from pathlib import Path
|
|
10
|
-
from typing import Annotated
|
|
11
10
|
|
|
12
|
-
from fastapi import
|
|
11
|
+
from fastapi import Response, UploadFile
|
|
13
12
|
|
|
14
|
-
from llama_stack.
|
|
15
|
-
from llama_stack.
|
|
16
|
-
from llama_stack.
|
|
17
|
-
|
|
13
|
+
from llama_stack.core.access_control.datatypes import Action
|
|
14
|
+
from llama_stack.core.datatypes import AccessRule
|
|
15
|
+
from llama_stack.core.id_generation import generate_object_id
|
|
16
|
+
from llama_stack.core.storage.sqlstore.authorized_sqlstore import AuthorizedSqlStore
|
|
17
|
+
from llama_stack.core.storage.sqlstore.sqlstore import sqlstore_impl
|
|
18
|
+
from llama_stack.log import get_logger
|
|
19
|
+
from llama_stack_api import (
|
|
20
|
+
DeleteFileRequest,
|
|
18
21
|
Files,
|
|
22
|
+
ListFilesRequest,
|
|
19
23
|
ListOpenAIFileResponse,
|
|
20
24
|
OpenAIFileDeleteResponse,
|
|
21
25
|
OpenAIFileObject,
|
|
22
26
|
OpenAIFilePurpose,
|
|
27
|
+
Order,
|
|
28
|
+
ResourceNotFoundError,
|
|
29
|
+
RetrieveFileContentRequest,
|
|
30
|
+
RetrieveFileRequest,
|
|
31
|
+
UploadFileRequest,
|
|
23
32
|
)
|
|
24
|
-
from
|
|
25
|
-
from llama_stack.core.id_generation import generate_object_id
|
|
26
|
-
from llama_stack.log import get_logger
|
|
27
|
-
from llama_stack.providers.utils.files.form_data import parse_expires_after
|
|
28
|
-
from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType
|
|
29
|
-
from llama_stack.providers.utils.sqlstore.authorized_sqlstore import AuthorizedSqlStore
|
|
30
|
-
from llama_stack.providers.utils.sqlstore.sqlstore import sqlstore_impl
|
|
33
|
+
from llama_stack_api.internal.sqlstore import ColumnDefinition, ColumnType
|
|
31
34
|
|
|
32
35
|
from .config import LocalfsFilesImplConfig
|
|
33
36
|
|
|
@@ -72,12 +75,12 @@ class LocalfsFilesImpl(Files):
|
|
|
72
75
|
"""Get the filesystem path for a file ID."""
|
|
73
76
|
return Path(self.config.storage_dir) / file_id
|
|
74
77
|
|
|
75
|
-
async def _lookup_file_id(self, file_id: str) -> tuple[OpenAIFileObject, Path]:
|
|
78
|
+
async def _lookup_file_id(self, file_id: str, action: Action = Action.READ) -> tuple[OpenAIFileObject, Path]:
|
|
76
79
|
"""Look up a OpenAIFileObject and filesystem path from its ID."""
|
|
77
80
|
if not self.sql_store:
|
|
78
81
|
raise RuntimeError("Files provider not initialized")
|
|
79
82
|
|
|
80
|
-
row = await self.sql_store.fetch_one("openai_files", where={"id": file_id})
|
|
83
|
+
row = await self.sql_store.fetch_one("openai_files", where={"id": file_id}, action=action)
|
|
81
84
|
if not row:
|
|
82
85
|
raise ResourceNotFoundError(file_id, "File", "client.files.list()")
|
|
83
86
|
|
|
@@ -87,14 +90,16 @@ class LocalfsFilesImpl(Files):
|
|
|
87
90
|
# OpenAI Files API Implementation
|
|
88
91
|
async def openai_upload_file(
|
|
89
92
|
self,
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
expires_after: Annotated[ExpiresAfter | None, Depends(parse_expires_after)] = None,
|
|
93
|
+
request: UploadFileRequest,
|
|
94
|
+
file: UploadFile,
|
|
93
95
|
) -> OpenAIFileObject:
|
|
94
96
|
"""Upload a file that can be used across various endpoints."""
|
|
95
97
|
if not self.sql_store:
|
|
96
98
|
raise RuntimeError("Files provider not initialized")
|
|
97
99
|
|
|
100
|
+
purpose = request.purpose
|
|
101
|
+
expires_after = request.expires_after
|
|
102
|
+
|
|
98
103
|
if expires_after is not None:
|
|
99
104
|
logger.warning(
|
|
100
105
|
f"File expiration is not supported by this provider, ignoring expires_after: {expires_after}"
|
|
@@ -136,15 +141,17 @@ class LocalfsFilesImpl(Files):
|
|
|
136
141
|
|
|
137
142
|
async def openai_list_files(
|
|
138
143
|
self,
|
|
139
|
-
|
|
140
|
-
limit: int | None = 10000,
|
|
141
|
-
order: Order | None = Order.desc,
|
|
142
|
-
purpose: OpenAIFilePurpose | None = None,
|
|
144
|
+
request: ListFilesRequest,
|
|
143
145
|
) -> ListOpenAIFileResponse:
|
|
144
146
|
"""Returns a list of files that belong to the user's organization."""
|
|
145
147
|
if not self.sql_store:
|
|
146
148
|
raise RuntimeError("Files provider not initialized")
|
|
147
149
|
|
|
150
|
+
after = request.after
|
|
151
|
+
limit = request.limit
|
|
152
|
+
order = request.order
|
|
153
|
+
purpose = request.purpose
|
|
154
|
+
|
|
148
155
|
if not order:
|
|
149
156
|
order = Order.desc
|
|
150
157
|
|
|
@@ -179,16 +186,17 @@ class LocalfsFilesImpl(Files):
|
|
|
179
186
|
last_id=files[-1].id if files else "",
|
|
180
187
|
)
|
|
181
188
|
|
|
182
|
-
async def openai_retrieve_file(self,
|
|
189
|
+
async def openai_retrieve_file(self, request: RetrieveFileRequest) -> OpenAIFileObject:
|
|
183
190
|
"""Returns information about a specific file."""
|
|
184
|
-
file_obj, _ = await self._lookup_file_id(file_id)
|
|
191
|
+
file_obj, _ = await self._lookup_file_id(request.file_id)
|
|
185
192
|
|
|
186
193
|
return file_obj
|
|
187
194
|
|
|
188
|
-
async def openai_delete_file(self,
|
|
195
|
+
async def openai_delete_file(self, request: DeleteFileRequest) -> OpenAIFileDeleteResponse:
|
|
189
196
|
"""Delete a file."""
|
|
197
|
+
file_id = request.file_id
|
|
190
198
|
# Delete physical file
|
|
191
|
-
_, file_path = await self._lookup_file_id(file_id)
|
|
199
|
+
_, file_path = await self._lookup_file_id(file_id, action=Action.DELETE)
|
|
192
200
|
if file_path.exists():
|
|
193
201
|
file_path.unlink()
|
|
194
202
|
|
|
@@ -201,14 +209,15 @@ class LocalfsFilesImpl(Files):
|
|
|
201
209
|
deleted=True,
|
|
202
210
|
)
|
|
203
211
|
|
|
204
|
-
async def openai_retrieve_file_content(self,
|
|
212
|
+
async def openai_retrieve_file_content(self, request: RetrieveFileContentRequest) -> Response:
|
|
205
213
|
"""Returns the contents of the specified file."""
|
|
214
|
+
file_id = request.file_id
|
|
206
215
|
# Read file content
|
|
207
216
|
file_obj, file_path = await self._lookup_file_id(file_id)
|
|
208
217
|
|
|
209
218
|
if not file_path.exists():
|
|
210
219
|
logger.warning(f"File '{file_id}'s underlying '{file_path}' is missing, deleting metadata.")
|
|
211
|
-
await self.openai_delete_file(file_id)
|
|
220
|
+
await self.openai_delete_file(DeleteFileRequest(file_id=file_id))
|
|
212
221
|
raise ResourceNotFoundError(file_id, "File", "client.files.list()")
|
|
213
222
|
|
|
214
223
|
# Return as binary response with appropriate content type
|
|
@@ -8,15 +8,15 @@ from typing import Any
|
|
|
8
8
|
|
|
9
9
|
from pydantic import BaseModel, field_validator
|
|
10
10
|
|
|
11
|
-
from llama_stack.apis.inference import QuantizationConfig
|
|
12
11
|
from llama_stack.providers.utils.inference import supported_inference_models
|
|
12
|
+
from llama_stack_api import QuantizationConfig
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
class MetaReferenceInferenceConfig(BaseModel):
|
|
16
16
|
# this is a placeholder to indicate inference model id
|
|
17
17
|
# the actual inference model id is dtermined by the moddel id in the request
|
|
18
18
|
# Note: you need to register the model before using it for inference
|
|
19
|
-
# models in the resouce list in the
|
|
19
|
+
# models in the resouce list in the config.yaml config will be registered automatically
|
|
20
20
|
model: str | None = None
|
|
21
21
|
torch_seed: int | None = None
|
|
22
22
|
max_seq_len: int = 4096
|