llama-stack 0.3.5__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack/__init__.py +0 -5
- llama_stack/cli/llama.py +3 -3
- llama_stack/cli/stack/_list_deps.py +12 -23
- llama_stack/cli/stack/list_stacks.py +37 -18
- llama_stack/cli/stack/run.py +121 -11
- llama_stack/cli/stack/utils.py +0 -127
- llama_stack/core/access_control/access_control.py +69 -28
- llama_stack/core/access_control/conditions.py +15 -5
- llama_stack/core/admin.py +267 -0
- llama_stack/core/build.py +6 -74
- llama_stack/core/client.py +1 -1
- llama_stack/core/configure.py +6 -6
- llama_stack/core/conversations/conversations.py +28 -25
- llama_stack/core/datatypes.py +271 -79
- llama_stack/core/distribution.py +15 -16
- llama_stack/core/external.py +3 -3
- llama_stack/core/inspect.py +98 -15
- llama_stack/core/library_client.py +73 -61
- llama_stack/core/prompts/prompts.py +12 -11
- llama_stack/core/providers.py +17 -11
- llama_stack/core/resolver.py +65 -56
- llama_stack/core/routers/__init__.py +8 -12
- llama_stack/core/routers/datasets.py +1 -4
- llama_stack/core/routers/eval_scoring.py +7 -4
- llama_stack/core/routers/inference.py +55 -271
- llama_stack/core/routers/safety.py +52 -24
- llama_stack/core/routers/tool_runtime.py +6 -48
- llama_stack/core/routers/vector_io.py +130 -51
- llama_stack/core/routing_tables/benchmarks.py +24 -20
- llama_stack/core/routing_tables/common.py +1 -4
- llama_stack/core/routing_tables/datasets.py +22 -22
- llama_stack/core/routing_tables/models.py +119 -6
- llama_stack/core/routing_tables/scoring_functions.py +7 -7
- llama_stack/core/routing_tables/shields.py +1 -2
- llama_stack/core/routing_tables/toolgroups.py +17 -7
- llama_stack/core/routing_tables/vector_stores.py +51 -16
- llama_stack/core/server/auth.py +5 -3
- llama_stack/core/server/auth_providers.py +36 -20
- llama_stack/core/server/fastapi_router_registry.py +84 -0
- llama_stack/core/server/quota.py +2 -2
- llama_stack/core/server/routes.py +79 -27
- llama_stack/core/server/server.py +102 -87
- llama_stack/core/stack.py +235 -62
- llama_stack/core/storage/datatypes.py +26 -3
- llama_stack/{providers/utils → core/storage}/kvstore/__init__.py +2 -0
- llama_stack/{providers/utils → core/storage}/kvstore/kvstore.py +55 -24
- llama_stack/{providers/utils → core/storage}/kvstore/mongodb/mongodb.py +13 -10
- llama_stack/{providers/utils → core/storage}/kvstore/postgres/postgres.py +28 -17
- llama_stack/{providers/utils → core/storage}/kvstore/redis/redis.py +41 -16
- llama_stack/{providers/utils → core/storage}/kvstore/sqlite/sqlite.py +1 -1
- llama_stack/core/storage/sqlstore/__init__.py +17 -0
- llama_stack/{providers/utils → core/storage}/sqlstore/authorized_sqlstore.py +69 -49
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlalchemy_sqlstore.py +47 -17
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py +25 -8
- llama_stack/core/store/registry.py +1 -1
- llama_stack/core/utils/config.py +8 -2
- llama_stack/core/utils/config_resolution.py +32 -29
- llama_stack/core/utils/context.py +4 -10
- llama_stack/core/utils/exec.py +9 -0
- llama_stack/core/utils/type_inspection.py +45 -0
- llama_stack/distributions/dell/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/dell/dell.py +2 -2
- llama_stack/distributions/dell/run-with-safety.yaml +3 -2
- llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/meta-reference-gpu/meta_reference.py +2 -2
- llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +3 -2
- llama_stack/distributions/nvidia/{run.yaml → config.yaml} +4 -4
- llama_stack/distributions/nvidia/nvidia.py +1 -1
- llama_stack/distributions/nvidia/run-with-safety.yaml +4 -4
- llama_stack/{apis/datasetio → distributions/oci}/__init__.py +1 -1
- llama_stack/distributions/oci/config.yaml +134 -0
- llama_stack/distributions/oci/oci.py +108 -0
- llama_stack/distributions/open-benchmark/{run.yaml → config.yaml} +5 -4
- llama_stack/distributions/open-benchmark/open_benchmark.py +2 -3
- llama_stack/distributions/postgres-demo/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/starter/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/starter/starter.py +8 -5
- llama_stack/distributions/starter-gpu/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/template.py +13 -69
- llama_stack/distributions/watsonx/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/watsonx/watsonx.py +1 -1
- llama_stack/log.py +28 -11
- llama_stack/models/llama/checkpoint.py +6 -6
- llama_stack/models/llama/hadamard_utils.py +2 -0
- llama_stack/models/llama/llama3/generation.py +3 -1
- llama_stack/models/llama/llama3/interface.py +2 -5
- llama_stack/models/llama/llama3/multimodal/encoder_utils.py +3 -3
- llama_stack/models/llama/llama3/multimodal/image_transform.py +6 -6
- llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +1 -1
- llama_stack/models/llama/llama3/tool_utils.py +2 -1
- llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +1 -1
- llama_stack/providers/inline/agents/meta_reference/__init__.py +3 -3
- llama_stack/providers/inline/agents/meta_reference/agents.py +44 -261
- llama_stack/providers/inline/agents/meta_reference/config.py +6 -1
- llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +207 -57
- llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +308 -47
- llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +162 -96
- llama_stack/providers/inline/agents/meta_reference/responses/types.py +23 -8
- llama_stack/providers/inline/agents/meta_reference/responses/utils.py +201 -33
- llama_stack/providers/inline/agents/meta_reference/safety.py +8 -13
- llama_stack/providers/inline/batches/reference/__init__.py +2 -4
- llama_stack/providers/inline/batches/reference/batches.py +78 -60
- llama_stack/providers/inline/datasetio/localfs/datasetio.py +2 -5
- llama_stack/providers/inline/eval/meta_reference/eval.py +16 -61
- llama_stack/providers/inline/files/localfs/files.py +37 -28
- llama_stack/providers/inline/inference/meta_reference/config.py +2 -2
- llama_stack/providers/inline/inference/meta_reference/generators.py +50 -60
- llama_stack/providers/inline/inference/meta_reference/inference.py +403 -19
- llama_stack/providers/inline/inference/meta_reference/model_parallel.py +7 -26
- llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +2 -12
- llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +10 -15
- llama_stack/providers/inline/post_training/common/validator.py +1 -5
- llama_stack/providers/inline/post_training/huggingface/post_training.py +8 -8
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +18 -10
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +12 -9
- llama_stack/providers/inline/post_training/huggingface/utils.py +27 -6
- llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/common/utils.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/post_training.py +8 -8
- llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +16 -16
- llama_stack/providers/inline/safety/code_scanner/code_scanner.py +13 -9
- llama_stack/providers/inline/safety/llama_guard/llama_guard.py +18 -15
- llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +9 -9
- llama_stack/providers/inline/scoring/basic/scoring.py +6 -13
- llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/braintrust/braintrust.py +12 -15
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +7 -14
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +1 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +1 -3
- llama_stack/providers/inline/tool_runtime/rag/__init__.py +1 -1
- llama_stack/providers/inline/tool_runtime/rag/config.py +8 -1
- llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +7 -6
- llama_stack/providers/inline/tool_runtime/rag/memory.py +64 -48
- llama_stack/providers/inline/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/chroma/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/faiss.py +46 -28
- llama_stack/providers/inline/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/milvus/config.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +44 -33
- llama_stack/providers/registry/agents.py +8 -3
- llama_stack/providers/registry/batches.py +1 -1
- llama_stack/providers/registry/datasetio.py +1 -1
- llama_stack/providers/registry/eval.py +1 -1
- llama_stack/{apis/datasets/__init__.py → providers/registry/file_processors.py} +5 -1
- llama_stack/providers/registry/files.py +11 -2
- llama_stack/providers/registry/inference.py +22 -3
- llama_stack/providers/registry/post_training.py +1 -1
- llama_stack/providers/registry/safety.py +1 -1
- llama_stack/providers/registry/scoring.py +1 -1
- llama_stack/providers/registry/tool_runtime.py +2 -2
- llama_stack/providers/registry/vector_io.py +7 -7
- llama_stack/providers/remote/datasetio/huggingface/huggingface.py +2 -5
- llama_stack/providers/remote/datasetio/nvidia/datasetio.py +1 -4
- llama_stack/providers/remote/eval/nvidia/eval.py +15 -9
- llama_stack/providers/remote/files/openai/__init__.py +19 -0
- llama_stack/providers/remote/files/openai/config.py +28 -0
- llama_stack/providers/remote/files/openai/files.py +253 -0
- llama_stack/providers/remote/files/s3/files.py +52 -30
- llama_stack/providers/remote/inference/anthropic/anthropic.py +2 -1
- llama_stack/providers/remote/inference/anthropic/config.py +1 -1
- llama_stack/providers/remote/inference/azure/azure.py +1 -3
- llama_stack/providers/remote/inference/azure/config.py +8 -7
- llama_stack/providers/remote/inference/bedrock/__init__.py +1 -1
- llama_stack/providers/remote/inference/bedrock/bedrock.py +82 -105
- llama_stack/providers/remote/inference/bedrock/config.py +24 -3
- llama_stack/providers/remote/inference/cerebras/cerebras.py +5 -5
- llama_stack/providers/remote/inference/cerebras/config.py +12 -5
- llama_stack/providers/remote/inference/databricks/config.py +13 -6
- llama_stack/providers/remote/inference/databricks/databricks.py +16 -6
- llama_stack/providers/remote/inference/fireworks/config.py +5 -5
- llama_stack/providers/remote/inference/fireworks/fireworks.py +1 -1
- llama_stack/providers/remote/inference/gemini/config.py +1 -1
- llama_stack/providers/remote/inference/gemini/gemini.py +13 -14
- llama_stack/providers/remote/inference/groq/config.py +5 -5
- llama_stack/providers/remote/inference/groq/groq.py +1 -1
- llama_stack/providers/remote/inference/llama_openai_compat/config.py +5 -5
- llama_stack/providers/remote/inference/llama_openai_compat/llama.py +8 -6
- llama_stack/providers/remote/inference/nvidia/__init__.py +1 -1
- llama_stack/providers/remote/inference/nvidia/config.py +21 -11
- llama_stack/providers/remote/inference/nvidia/nvidia.py +115 -3
- llama_stack/providers/remote/inference/nvidia/utils.py +1 -1
- llama_stack/providers/remote/inference/oci/__init__.py +17 -0
- llama_stack/providers/remote/inference/oci/auth.py +79 -0
- llama_stack/providers/remote/inference/oci/config.py +75 -0
- llama_stack/providers/remote/inference/oci/oci.py +162 -0
- llama_stack/providers/remote/inference/ollama/config.py +7 -5
- llama_stack/providers/remote/inference/ollama/ollama.py +17 -8
- llama_stack/providers/remote/inference/openai/config.py +4 -4
- llama_stack/providers/remote/inference/openai/openai.py +1 -1
- llama_stack/providers/remote/inference/passthrough/__init__.py +2 -2
- llama_stack/providers/remote/inference/passthrough/config.py +5 -10
- llama_stack/providers/remote/inference/passthrough/passthrough.py +97 -75
- llama_stack/providers/remote/inference/runpod/config.py +12 -5
- llama_stack/providers/remote/inference/runpod/runpod.py +2 -20
- llama_stack/providers/remote/inference/sambanova/config.py +5 -5
- llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -1
- llama_stack/providers/remote/inference/tgi/config.py +7 -6
- llama_stack/providers/remote/inference/tgi/tgi.py +19 -11
- llama_stack/providers/remote/inference/together/config.py +5 -5
- llama_stack/providers/remote/inference/together/together.py +15 -12
- llama_stack/providers/remote/inference/vertexai/config.py +1 -1
- llama_stack/providers/remote/inference/vllm/config.py +5 -5
- llama_stack/providers/remote/inference/vllm/vllm.py +13 -14
- llama_stack/providers/remote/inference/watsonx/config.py +4 -4
- llama_stack/providers/remote/inference/watsonx/watsonx.py +21 -94
- llama_stack/providers/remote/post_training/nvidia/post_training.py +4 -4
- llama_stack/providers/remote/post_training/nvidia/utils.py +1 -1
- llama_stack/providers/remote/safety/bedrock/bedrock.py +6 -6
- llama_stack/providers/remote/safety/bedrock/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/nvidia.py +11 -5
- llama_stack/providers/remote/safety/sambanova/config.py +1 -1
- llama_stack/providers/remote/safety/sambanova/sambanova.py +6 -6
- llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +12 -7
- llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +8 -2
- llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +57 -15
- llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +11 -6
- llama_stack/providers/remote/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/chroma/chroma.py +131 -23
- llama_stack/providers/remote/vector_io/chroma/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/milvus.py +37 -28
- llama_stack/providers/remote/vector_io/pgvector/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/config.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/pgvector.py +37 -25
- llama_stack/providers/remote/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/qdrant.py +147 -30
- llama_stack/providers/remote/vector_io/weaviate/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/config.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/weaviate.py +31 -26
- llama_stack/providers/utils/common/data_schema_validator.py +1 -5
- llama_stack/providers/utils/files/form_data.py +1 -1
- llama_stack/providers/utils/inference/embedding_mixin.py +1 -1
- llama_stack/providers/utils/inference/inference_store.py +7 -8
- llama_stack/providers/utils/inference/litellm_openai_mixin.py +79 -79
- llama_stack/providers/utils/inference/model_registry.py +1 -3
- llama_stack/providers/utils/inference/openai_compat.py +44 -1171
- llama_stack/providers/utils/inference/openai_mixin.py +68 -42
- llama_stack/providers/utils/inference/prompt_adapter.py +50 -265
- llama_stack/providers/utils/inference/stream_utils.py +23 -0
- llama_stack/providers/utils/memory/__init__.py +2 -0
- llama_stack/providers/utils/memory/file_utils.py +1 -1
- llama_stack/providers/utils/memory/openai_vector_store_mixin.py +181 -84
- llama_stack/providers/utils/memory/vector_store.py +39 -38
- llama_stack/providers/utils/pagination.py +1 -1
- llama_stack/providers/utils/responses/responses_store.py +15 -25
- llama_stack/providers/utils/scoring/aggregation_utils.py +1 -2
- llama_stack/providers/utils/scoring/base_scoring_fn.py +1 -2
- llama_stack/providers/utils/tools/mcp.py +93 -11
- llama_stack/providers/utils/vector_io/__init__.py +16 -0
- llama_stack/providers/utils/vector_io/vector_utils.py +36 -0
- llama_stack/telemetry/constants.py +27 -0
- llama_stack/telemetry/helpers.py +43 -0
- llama_stack/testing/api_recorder.py +25 -16
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/METADATA +57 -55
- llama_stack-0.4.1.dist-info/RECORD +588 -0
- llama_stack-0.4.1.dist-info/top_level.txt +2 -0
- llama_stack_api/__init__.py +945 -0
- llama_stack_api/admin/__init__.py +45 -0
- llama_stack_api/admin/api.py +72 -0
- llama_stack_api/admin/fastapi_routes.py +117 -0
- llama_stack_api/admin/models.py +113 -0
- llama_stack_api/agents.py +173 -0
- llama_stack_api/batches/__init__.py +40 -0
- llama_stack_api/batches/api.py +53 -0
- llama_stack_api/batches/fastapi_routes.py +113 -0
- llama_stack_api/batches/models.py +78 -0
- llama_stack_api/benchmarks/__init__.py +43 -0
- llama_stack_api/benchmarks/api.py +39 -0
- llama_stack_api/benchmarks/fastapi_routes.py +109 -0
- llama_stack_api/benchmarks/models.py +109 -0
- {llama_stack/apis → llama_stack_api}/common/content_types.py +1 -43
- {llama_stack/apis → llama_stack_api}/common/errors.py +0 -8
- {llama_stack/apis → llama_stack_api}/common/job_types.py +1 -1
- llama_stack_api/common/responses.py +77 -0
- {llama_stack/apis → llama_stack_api}/common/training_types.py +1 -1
- {llama_stack/apis → llama_stack_api}/common/type_system.py +2 -14
- llama_stack_api/connectors.py +146 -0
- {llama_stack/apis/conversations → llama_stack_api}/conversations.py +23 -39
- {llama_stack/apis/datasetio → llama_stack_api}/datasetio.py +4 -8
- llama_stack_api/datasets/__init__.py +61 -0
- llama_stack_api/datasets/api.py +35 -0
- llama_stack_api/datasets/fastapi_routes.py +104 -0
- llama_stack_api/datasets/models.py +152 -0
- {llama_stack/providers → llama_stack_api}/datatypes.py +166 -10
- {llama_stack/apis/eval → llama_stack_api}/eval.py +8 -40
- llama_stack_api/file_processors/__init__.py +27 -0
- llama_stack_api/file_processors/api.py +64 -0
- llama_stack_api/file_processors/fastapi_routes.py +78 -0
- llama_stack_api/file_processors/models.py +42 -0
- llama_stack_api/files/__init__.py +35 -0
- llama_stack_api/files/api.py +51 -0
- llama_stack_api/files/fastapi_routes.py +124 -0
- llama_stack_api/files/models.py +107 -0
- {llama_stack/apis/inference → llama_stack_api}/inference.py +90 -194
- llama_stack_api/inspect_api/__init__.py +37 -0
- llama_stack_api/inspect_api/api.py +25 -0
- llama_stack_api/inspect_api/fastapi_routes.py +76 -0
- llama_stack_api/inspect_api/models.py +28 -0
- {llama_stack/apis/agents → llama_stack_api/internal}/__init__.py +3 -1
- llama_stack/providers/utils/kvstore/api.py → llama_stack_api/internal/kvstore.py +5 -0
- llama_stack_api/internal/sqlstore.py +79 -0
- {llama_stack/apis/models → llama_stack_api}/models.py +11 -9
- {llama_stack/apis/agents → llama_stack_api}/openai_responses.py +184 -27
- {llama_stack/apis/post_training → llama_stack_api}/post_training.py +7 -11
- {llama_stack/apis/prompts → llama_stack_api}/prompts.py +3 -4
- llama_stack_api/providers/__init__.py +33 -0
- llama_stack_api/providers/api.py +16 -0
- llama_stack_api/providers/fastapi_routes.py +57 -0
- llama_stack_api/providers/models.py +24 -0
- {llama_stack/apis/tools → llama_stack_api}/rag_tool.py +2 -52
- {llama_stack/apis → llama_stack_api}/resource.py +1 -1
- llama_stack_api/router_utils.py +160 -0
- {llama_stack/apis/safety → llama_stack_api}/safety.py +6 -9
- {llama_stack → llama_stack_api}/schema_utils.py +94 -4
- {llama_stack/apis/scoring → llama_stack_api}/scoring.py +3 -3
- {llama_stack/apis/scoring_functions → llama_stack_api}/scoring_functions.py +9 -6
- {llama_stack/apis/shields → llama_stack_api}/shields.py +6 -7
- {llama_stack/apis/tools → llama_stack_api}/tools.py +26 -21
- {llama_stack/apis/vector_io → llama_stack_api}/vector_io.py +133 -152
- {llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py +1 -1
- llama_stack/apis/agents/agents.py +0 -894
- llama_stack/apis/batches/__init__.py +0 -9
- llama_stack/apis/batches/batches.py +0 -100
- llama_stack/apis/benchmarks/__init__.py +0 -7
- llama_stack/apis/benchmarks/benchmarks.py +0 -108
- llama_stack/apis/common/responses.py +0 -36
- llama_stack/apis/conversations/__init__.py +0 -31
- llama_stack/apis/datasets/datasets.py +0 -251
- llama_stack/apis/datatypes.py +0 -160
- llama_stack/apis/eval/__init__.py +0 -7
- llama_stack/apis/files/__init__.py +0 -7
- llama_stack/apis/files/files.py +0 -199
- llama_stack/apis/inference/__init__.py +0 -7
- llama_stack/apis/inference/event_logger.py +0 -43
- llama_stack/apis/inspect/__init__.py +0 -7
- llama_stack/apis/inspect/inspect.py +0 -94
- llama_stack/apis/models/__init__.py +0 -7
- llama_stack/apis/post_training/__init__.py +0 -7
- llama_stack/apis/prompts/__init__.py +0 -9
- llama_stack/apis/providers/__init__.py +0 -7
- llama_stack/apis/providers/providers.py +0 -69
- llama_stack/apis/safety/__init__.py +0 -7
- llama_stack/apis/scoring/__init__.py +0 -7
- llama_stack/apis/scoring_functions/__init__.py +0 -7
- llama_stack/apis/shields/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +0 -77
- llama_stack/apis/telemetry/__init__.py +0 -7
- llama_stack/apis/telemetry/telemetry.py +0 -423
- llama_stack/apis/tools/__init__.py +0 -8
- llama_stack/apis/vector_io/__init__.py +0 -7
- llama_stack/apis/vector_stores/__init__.py +0 -7
- llama_stack/core/server/tracing.py +0 -80
- llama_stack/core/ui/app.py +0 -55
- llama_stack/core/ui/modules/__init__.py +0 -5
- llama_stack/core/ui/modules/api.py +0 -32
- llama_stack/core/ui/modules/utils.py +0 -42
- llama_stack/core/ui/page/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/datasets.py +0 -18
- llama_stack/core/ui/page/distribution/eval_tasks.py +0 -20
- llama_stack/core/ui/page/distribution/models.py +0 -18
- llama_stack/core/ui/page/distribution/providers.py +0 -27
- llama_stack/core/ui/page/distribution/resources.py +0 -48
- llama_stack/core/ui/page/distribution/scoring_functions.py +0 -18
- llama_stack/core/ui/page/distribution/shields.py +0 -19
- llama_stack/core/ui/page/evaluations/__init__.py +0 -5
- llama_stack/core/ui/page/evaluations/app_eval.py +0 -143
- llama_stack/core/ui/page/evaluations/native_eval.py +0 -253
- llama_stack/core/ui/page/playground/__init__.py +0 -5
- llama_stack/core/ui/page/playground/chat.py +0 -130
- llama_stack/core/ui/page/playground/tools.py +0 -352
- llama_stack/distributions/dell/build.yaml +0 -33
- llama_stack/distributions/meta-reference-gpu/build.yaml +0 -32
- llama_stack/distributions/nvidia/build.yaml +0 -29
- llama_stack/distributions/open-benchmark/build.yaml +0 -36
- llama_stack/distributions/postgres-demo/__init__.py +0 -7
- llama_stack/distributions/postgres-demo/build.yaml +0 -23
- llama_stack/distributions/postgres-demo/postgres_demo.py +0 -125
- llama_stack/distributions/starter/build.yaml +0 -61
- llama_stack/distributions/starter-gpu/build.yaml +0 -61
- llama_stack/distributions/watsonx/build.yaml +0 -33
- llama_stack/providers/inline/agents/meta_reference/agent_instance.py +0 -1024
- llama_stack/providers/inline/agents/meta_reference/persistence.py +0 -228
- llama_stack/providers/inline/telemetry/__init__.py +0 -5
- llama_stack/providers/inline/telemetry/meta_reference/__init__.py +0 -21
- llama_stack/providers/inline/telemetry/meta_reference/config.py +0 -47
- llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +0 -252
- llama_stack/providers/remote/inference/bedrock/models.py +0 -29
- llama_stack/providers/utils/kvstore/sqlite/config.py +0 -20
- llama_stack/providers/utils/sqlstore/__init__.py +0 -5
- llama_stack/providers/utils/sqlstore/api.py +0 -128
- llama_stack/providers/utils/telemetry/__init__.py +0 -5
- llama_stack/providers/utils/telemetry/trace_protocol.py +0 -142
- llama_stack/providers/utils/telemetry/tracing.py +0 -384
- llama_stack/strong_typing/__init__.py +0 -19
- llama_stack/strong_typing/auxiliary.py +0 -228
- llama_stack/strong_typing/classdef.py +0 -440
- llama_stack/strong_typing/core.py +0 -46
- llama_stack/strong_typing/deserializer.py +0 -877
- llama_stack/strong_typing/docstring.py +0 -409
- llama_stack/strong_typing/exception.py +0 -23
- llama_stack/strong_typing/inspection.py +0 -1085
- llama_stack/strong_typing/mapping.py +0 -40
- llama_stack/strong_typing/name.py +0 -182
- llama_stack/strong_typing/schema.py +0 -792
- llama_stack/strong_typing/serialization.py +0 -97
- llama_stack/strong_typing/serializer.py +0 -500
- llama_stack/strong_typing/slots.py +0 -27
- llama_stack/strong_typing/topological.py +0 -89
- llama_stack/ui/node_modules/flatted/python/flatted.py +0 -149
- llama_stack-0.3.5.dist-info/RECORD +0 -625
- llama_stack-0.3.5.dist-info/top_level.txt +0 -1
- /llama_stack/{providers/utils → core/storage}/kvstore/config.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/mongodb/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/postgres/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/redis/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/sqlite/__init__.py +0 -0
- /llama_stack/{apis → providers/inline/file_processor}/__init__.py +0 -0
- /llama_stack/{apis/common → telemetry}/__init__.py +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/WHEEL +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/entry_points.txt +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/licenses/LICENSE +0 -0
- {llama_stack/core/ui → llama_stack_api/common}/__init__.py +0 -0
- {llama_stack/strong_typing → llama_stack_api}/py.typed +0 -0
- {llama_stack/apis → llama_stack_api}/version.py +0 -0
|
@@ -5,12 +5,12 @@
|
|
|
5
5
|
# the root directory of this source tree.
|
|
6
6
|
|
|
7
7
|
|
|
8
|
-
from llama_stack.
|
|
8
|
+
from llama_stack.core.storage.kvstore import kvstore_dependencies
|
|
9
|
+
from llama_stack_api import (
|
|
9
10
|
Api,
|
|
10
11
|
InlineProviderSpec,
|
|
11
12
|
ProviderSpec,
|
|
12
13
|
)
|
|
13
|
-
from llama_stack.providers.utils.kvstore import kvstore_dependencies
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
def available_providers() -> list[ProviderSpec]:
|
|
@@ -20,6 +20,7 @@ def available_providers() -> list[ProviderSpec]:
|
|
|
20
20
|
provider_type="inline::meta-reference",
|
|
21
21
|
pip_packages=[
|
|
22
22
|
"matplotlib",
|
|
23
|
+
"fonttools>=4.60.2",
|
|
23
24
|
"pillow",
|
|
24
25
|
"pandas",
|
|
25
26
|
"scikit-learn",
|
|
@@ -30,11 +31,15 @@ def available_providers() -> list[ProviderSpec]:
|
|
|
30
31
|
config_class="llama_stack.providers.inline.agents.meta_reference.MetaReferenceAgentsImplConfig",
|
|
31
32
|
api_dependencies=[
|
|
32
33
|
Api.inference,
|
|
33
|
-
Api.safety,
|
|
34
34
|
Api.vector_io,
|
|
35
35
|
Api.tool_runtime,
|
|
36
36
|
Api.tool_groups,
|
|
37
37
|
Api.conversations,
|
|
38
|
+
Api.prompts,
|
|
39
|
+
Api.files,
|
|
40
|
+
],
|
|
41
|
+
optional_api_dependencies=[
|
|
42
|
+
Api.safety,
|
|
38
43
|
],
|
|
39
44
|
description="Meta's reference implementation of an agent system that can use tools, access vector databases, and perform complex reasoning tasks.",
|
|
40
45
|
),
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
# the root directory of this source tree.
|
|
6
6
|
|
|
7
7
|
|
|
8
|
-
from
|
|
8
|
+
from llama_stack_api import Api, InlineProviderSpec, ProviderSpec, RemoteProviderSpec
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
def available_providers() -> list[ProviderSpec]:
|
|
@@ -4,8 +4,8 @@
|
|
|
4
4
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
5
|
# the root directory of this source tree.
|
|
6
6
|
|
|
7
|
-
from llama_stack.
|
|
8
|
-
from
|
|
7
|
+
from llama_stack.core.storage.sqlstore.sqlstore import sql_store_pip_packages
|
|
8
|
+
from llama_stack_api import Api, InlineProviderSpec, ProviderSpec, RemoteProviderSpec
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
def available_providers() -> list[ProviderSpec]:
|
|
@@ -28,4 +28,13 @@ def available_providers() -> list[ProviderSpec]:
|
|
|
28
28
|
config_class="llama_stack.providers.remote.files.s3.config.S3FilesImplConfig",
|
|
29
29
|
description="AWS S3-based file storage provider for scalable cloud file management with metadata persistence.",
|
|
30
30
|
),
|
|
31
|
+
RemoteProviderSpec(
|
|
32
|
+
api=Api.files,
|
|
33
|
+
provider_type="remote::openai",
|
|
34
|
+
adapter_type="openai",
|
|
35
|
+
pip_packages=["openai"] + sql_store_pip_packages,
|
|
36
|
+
module="llama_stack.providers.remote.files.openai",
|
|
37
|
+
config_class="llama_stack.providers.remote.files.openai.config.OpenAIFilesImplConfig",
|
|
38
|
+
description="OpenAI Files API provider for managing files through OpenAI's native file storage service.",
|
|
39
|
+
),
|
|
31
40
|
]
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
# the root directory of this source tree.
|
|
6
6
|
|
|
7
7
|
|
|
8
|
-
from
|
|
8
|
+
from llama_stack_api import (
|
|
9
9
|
Api,
|
|
10
10
|
InlineProviderSpec,
|
|
11
11
|
ProviderSpec,
|
|
@@ -61,6 +61,7 @@ def available_providers() -> list[ProviderSpec]:
|
|
|
61
61
|
pip_packages=[],
|
|
62
62
|
module="llama_stack.providers.remote.inference.cerebras",
|
|
63
63
|
config_class="llama_stack.providers.remote.inference.cerebras.CerebrasImplConfig",
|
|
64
|
+
provider_data_validator="llama_stack.providers.remote.inference.cerebras.config.CerebrasProviderDataValidator",
|
|
64
65
|
description="Cerebras inference provider for running models on Cerebras Cloud platform.",
|
|
65
66
|
),
|
|
66
67
|
RemoteProviderSpec(
|
|
@@ -137,10 +138,11 @@ def available_providers() -> list[ProviderSpec]:
|
|
|
137
138
|
api=Api.inference,
|
|
138
139
|
adapter_type="bedrock",
|
|
139
140
|
provider_type="remote::bedrock",
|
|
140
|
-
pip_packages=[
|
|
141
|
+
pip_packages=[],
|
|
141
142
|
module="llama_stack.providers.remote.inference.bedrock",
|
|
142
143
|
config_class="llama_stack.providers.remote.inference.bedrock.BedrockConfig",
|
|
143
|
-
|
|
144
|
+
provider_data_validator="llama_stack.providers.remote.inference.bedrock.config.BedrockProviderDataValidator",
|
|
145
|
+
description="AWS Bedrock inference provider using OpenAI compatible endpoint.",
|
|
144
146
|
),
|
|
145
147
|
RemoteProviderSpec(
|
|
146
148
|
api=Api.inference,
|
|
@@ -149,6 +151,7 @@ def available_providers() -> list[ProviderSpec]:
|
|
|
149
151
|
pip_packages=["databricks-sdk"],
|
|
150
152
|
module="llama_stack.providers.remote.inference.databricks",
|
|
151
153
|
config_class="llama_stack.providers.remote.inference.databricks.DatabricksImplConfig",
|
|
154
|
+
provider_data_validator="llama_stack.providers.remote.inference.databricks.config.DatabricksProviderDataValidator",
|
|
152
155
|
description="Databricks inference provider for running models on Databricks' unified analytics platform.",
|
|
153
156
|
),
|
|
154
157
|
RemoteProviderSpec(
|
|
@@ -158,6 +161,7 @@ def available_providers() -> list[ProviderSpec]:
|
|
|
158
161
|
pip_packages=[],
|
|
159
162
|
module="llama_stack.providers.remote.inference.nvidia",
|
|
160
163
|
config_class="llama_stack.providers.remote.inference.nvidia.NVIDIAConfig",
|
|
164
|
+
provider_data_validator="llama_stack.providers.remote.inference.nvidia.config.NVIDIAProviderDataValidator",
|
|
161
165
|
description="NVIDIA inference provider for accessing NVIDIA NIM models and AI services.",
|
|
162
166
|
),
|
|
163
167
|
RemoteProviderSpec(
|
|
@@ -167,6 +171,7 @@ def available_providers() -> list[ProviderSpec]:
|
|
|
167
171
|
pip_packages=[],
|
|
168
172
|
module="llama_stack.providers.remote.inference.runpod",
|
|
169
173
|
config_class="llama_stack.providers.remote.inference.runpod.RunpodImplConfig",
|
|
174
|
+
provider_data_validator="llama_stack.providers.remote.inference.runpod.config.RunpodProviderDataValidator",
|
|
170
175
|
description="RunPod inference provider for running models on RunPod's cloud GPU platform.",
|
|
171
176
|
),
|
|
172
177
|
RemoteProviderSpec(
|
|
@@ -292,6 +297,20 @@ Available Models:
|
|
|
292
297
|
Azure OpenAI inference provider for accessing GPT models and other Azure services.
|
|
293
298
|
Provider documentation
|
|
294
299
|
https://learn.microsoft.com/en-us/azure/ai-foundry/openai/overview
|
|
300
|
+
""",
|
|
301
|
+
),
|
|
302
|
+
RemoteProviderSpec(
|
|
303
|
+
api=Api.inference,
|
|
304
|
+
provider_type="remote::oci",
|
|
305
|
+
adapter_type="oci",
|
|
306
|
+
pip_packages=["oci"],
|
|
307
|
+
module="llama_stack.providers.remote.inference.oci",
|
|
308
|
+
config_class="llama_stack.providers.remote.inference.oci.config.OCIConfig",
|
|
309
|
+
provider_data_validator="llama_stack.providers.remote.inference.oci.config.OCIProviderDataValidator",
|
|
310
|
+
description="""
|
|
311
|
+
Oracle Cloud Infrastructure (OCI) Generative AI inference provider for accessing OCI's Generative AI Platform-as-a-Service models.
|
|
312
|
+
Provider documentation
|
|
313
|
+
https://docs.oracle.com/en-us/iaas/Content/generative-ai/home.htm
|
|
295
314
|
""",
|
|
296
315
|
),
|
|
297
316
|
]
|
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
|
|
8
8
|
from typing import cast
|
|
9
9
|
|
|
10
|
-
from
|
|
10
|
+
from llama_stack_api import Api, InlineProviderSpec, ProviderSpec, RemoteProviderSpec
|
|
11
11
|
|
|
12
12
|
# We provide two versions of these providers so that distributions can package the appropriate version of torch.
|
|
13
13
|
# The CPU version is used for distributions that don't have GPU support -- they result in smaller container images.
|
|
@@ -5,13 +5,13 @@
|
|
|
5
5
|
# the root directory of this source tree.
|
|
6
6
|
|
|
7
7
|
|
|
8
|
-
from llama_stack.providers.
|
|
8
|
+
from llama_stack.providers.registry.vector_io import DEFAULT_VECTOR_IO_DEPS
|
|
9
|
+
from llama_stack_api import (
|
|
9
10
|
Api,
|
|
10
11
|
InlineProviderSpec,
|
|
11
12
|
ProviderSpec,
|
|
12
13
|
RemoteProviderSpec,
|
|
13
14
|
)
|
|
14
|
-
from llama_stack.providers.registry.vector_io import DEFAULT_VECTOR_IO_DEPS
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
def available_providers() -> list[ProviderSpec]:
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
# the root directory of this source tree.
|
|
6
6
|
|
|
7
7
|
|
|
8
|
-
from
|
|
8
|
+
from llama_stack_api import (
|
|
9
9
|
Api,
|
|
10
10
|
InlineProviderSpec,
|
|
11
11
|
ProviderSpec,
|
|
@@ -163,14 +163,14 @@ The SQLite-vec provider supports three search modes:
|
|
|
163
163
|
Example with hybrid search:
|
|
164
164
|
```python
|
|
165
165
|
response = await vector_io.query_chunks(
|
|
166
|
-
|
|
166
|
+
vector_store_id="my_db",
|
|
167
167
|
query="your query here",
|
|
168
168
|
params={"mode": "hybrid", "max_chunks": 3, "score_threshold": 0.7},
|
|
169
169
|
)
|
|
170
170
|
|
|
171
171
|
# Using RRF ranker
|
|
172
172
|
response = await vector_io.query_chunks(
|
|
173
|
-
|
|
173
|
+
vector_store_id="my_db",
|
|
174
174
|
query="your query here",
|
|
175
175
|
params={
|
|
176
176
|
"mode": "hybrid",
|
|
@@ -182,7 +182,7 @@ response = await vector_io.query_chunks(
|
|
|
182
182
|
|
|
183
183
|
# Using weighted ranker
|
|
184
184
|
response = await vector_io.query_chunks(
|
|
185
|
-
|
|
185
|
+
vector_store_id="my_db",
|
|
186
186
|
query="your query here",
|
|
187
187
|
params={
|
|
188
188
|
"mode": "hybrid",
|
|
@@ -196,7 +196,7 @@ response = await vector_io.query_chunks(
|
|
|
196
196
|
Example with explicit vector search:
|
|
197
197
|
```python
|
|
198
198
|
response = await vector_io.query_chunks(
|
|
199
|
-
|
|
199
|
+
vector_store_id="my_db",
|
|
200
200
|
query="your query here",
|
|
201
201
|
params={"mode": "vector", "max_chunks": 3, "score_threshold": 0.7},
|
|
202
202
|
)
|
|
@@ -205,7 +205,7 @@ response = await vector_io.query_chunks(
|
|
|
205
205
|
Example with keyword search:
|
|
206
206
|
```python
|
|
207
207
|
response = await vector_io.query_chunks(
|
|
208
|
-
|
|
208
|
+
vector_store_id="my_db",
|
|
209
209
|
query="your query here",
|
|
210
210
|
params={"mode": "keyword", "max_chunks": 3, "score_threshold": 0.7},
|
|
211
211
|
)
|
|
@@ -244,7 +244,7 @@ Two ranker types are supported:
|
|
|
244
244
|
Example using RAGQueryConfig with different search modes:
|
|
245
245
|
|
|
246
246
|
```python
|
|
247
|
-
from
|
|
247
|
+
from llama_stack_api import RAGQueryConfig, RRFRanker, WeightedRanker
|
|
248
248
|
|
|
249
249
|
# Vector search
|
|
250
250
|
config = RAGQueryConfig(mode="vector", max_chunks=5)
|
|
@@ -6,12 +6,9 @@
|
|
|
6
6
|
from typing import Any
|
|
7
7
|
from urllib.parse import parse_qs, urlparse
|
|
8
8
|
|
|
9
|
-
from llama_stack.
|
|
10
|
-
from llama_stack.apis.datasetio import DatasetIO
|
|
11
|
-
from llama_stack.apis.datasets import Dataset
|
|
12
|
-
from llama_stack.providers.datatypes import DatasetsProtocolPrivate
|
|
13
|
-
from llama_stack.providers.utils.kvstore import kvstore_impl
|
|
9
|
+
from llama_stack.core.storage.kvstore import kvstore_impl
|
|
14
10
|
from llama_stack.providers.utils.pagination import paginate_records
|
|
11
|
+
from llama_stack_api import Dataset, DatasetIO, DatasetsProtocolPrivate, PaginatedResponse
|
|
15
12
|
|
|
16
13
|
from .config import HuggingfaceDatasetIOConfig
|
|
17
14
|
|
|
@@ -8,10 +8,7 @@ from typing import Any
|
|
|
8
8
|
|
|
9
9
|
import aiohttp
|
|
10
10
|
|
|
11
|
-
from
|
|
12
|
-
from llama_stack.apis.common.responses import PaginatedResponse
|
|
13
|
-
from llama_stack.apis.common.type_system import ParamType
|
|
14
|
-
from llama_stack.apis.datasets import Dataset
|
|
11
|
+
from llama_stack_api import URL, Dataset, PaginatedResponse, ParamType
|
|
15
12
|
|
|
16
13
|
from .config import NvidiaDatasetIOConfig
|
|
17
14
|
|
|
@@ -7,17 +7,23 @@ from typing import Any
|
|
|
7
7
|
|
|
8
8
|
import requests
|
|
9
9
|
|
|
10
|
-
from llama_stack.apis.agents import Agents
|
|
11
|
-
from llama_stack.apis.benchmarks import Benchmark
|
|
12
|
-
from llama_stack.apis.datasetio import DatasetIO
|
|
13
|
-
from llama_stack.apis.datasets import Datasets
|
|
14
|
-
from llama_stack.apis.inference import Inference
|
|
15
|
-
from llama_stack.apis.scoring import Scoring, ScoringResult
|
|
16
|
-
from llama_stack.providers.datatypes import BenchmarksProtocolPrivate
|
|
17
10
|
from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
|
|
11
|
+
from llama_stack_api import (
|
|
12
|
+
Agents,
|
|
13
|
+
Benchmark,
|
|
14
|
+
BenchmarkConfig,
|
|
15
|
+
BenchmarksProtocolPrivate,
|
|
16
|
+
DatasetIO,
|
|
17
|
+
Datasets,
|
|
18
|
+
Eval,
|
|
19
|
+
EvaluateResponse,
|
|
20
|
+
Inference,
|
|
21
|
+
Job,
|
|
22
|
+
JobStatus,
|
|
23
|
+
Scoring,
|
|
24
|
+
ScoringResult,
|
|
25
|
+
)
|
|
18
26
|
|
|
19
|
-
from .....apis.common.job_types import Job, JobStatus
|
|
20
|
-
from .....apis.eval.eval import BenchmarkConfig, Eval, EvaluateResponse
|
|
21
27
|
from .config import NVIDIAEvalConfig
|
|
22
28
|
|
|
23
29
|
DEFAULT_NAMESPACE = "nvidia"
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from llama_stack.core.datatypes import AccessRule, Api
|
|
10
|
+
|
|
11
|
+
from .config import OpenAIFilesImplConfig
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
async def get_adapter_impl(config: OpenAIFilesImplConfig, deps: dict[Api, Any], policy: list[AccessRule] | None = None):
|
|
15
|
+
from .files import OpenAIFilesImpl
|
|
16
|
+
|
|
17
|
+
impl = OpenAIFilesImpl(config, policy or [])
|
|
18
|
+
await impl.initialize()
|
|
19
|
+
return impl
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from pydantic import BaseModel, Field
|
|
10
|
+
|
|
11
|
+
from llama_stack.core.storage.datatypes import SqlStoreReference
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class OpenAIFilesImplConfig(BaseModel):
|
|
15
|
+
"""Configuration for OpenAI Files API provider."""
|
|
16
|
+
|
|
17
|
+
api_key: str = Field(description="OpenAI API key for authentication")
|
|
18
|
+
metadata_store: SqlStoreReference = Field(description="SQL store configuration for file metadata")
|
|
19
|
+
|
|
20
|
+
@classmethod
|
|
21
|
+
def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]:
|
|
22
|
+
return {
|
|
23
|
+
"api_key": "${env.OPENAI_API_KEY}",
|
|
24
|
+
"metadata_store": SqlStoreReference(
|
|
25
|
+
backend="sql_default",
|
|
26
|
+
table_name="openai_files_metadata",
|
|
27
|
+
).model_dump(exclude_none=True),
|
|
28
|
+
}
|
|
@@ -0,0 +1,253 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
from datetime import UTC, datetime
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
from fastapi import Response, UploadFile
|
|
11
|
+
|
|
12
|
+
from llama_stack.core.access_control.datatypes import Action
|
|
13
|
+
from llama_stack.core.datatypes import AccessRule
|
|
14
|
+
from llama_stack.core.storage.sqlstore.authorized_sqlstore import AuthorizedSqlStore
|
|
15
|
+
from llama_stack.core.storage.sqlstore.sqlstore import sqlstore_impl
|
|
16
|
+
from llama_stack_api import (
|
|
17
|
+
DeleteFileRequest,
|
|
18
|
+
ExpiresAfter,
|
|
19
|
+
Files,
|
|
20
|
+
ListFilesRequest,
|
|
21
|
+
ListOpenAIFileResponse,
|
|
22
|
+
OpenAIFileDeleteResponse,
|
|
23
|
+
OpenAIFileObject,
|
|
24
|
+
OpenAIFilePurpose,
|
|
25
|
+
Order,
|
|
26
|
+
ResourceNotFoundError,
|
|
27
|
+
RetrieveFileContentRequest,
|
|
28
|
+
RetrieveFileRequest,
|
|
29
|
+
UploadFileRequest,
|
|
30
|
+
)
|
|
31
|
+
from llama_stack_api.internal.sqlstore import ColumnDefinition, ColumnType
|
|
32
|
+
from openai import OpenAI
|
|
33
|
+
|
|
34
|
+
from .config import OpenAIFilesImplConfig
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _make_file_object(
|
|
38
|
+
*,
|
|
39
|
+
id: str,
|
|
40
|
+
filename: str,
|
|
41
|
+
purpose: str,
|
|
42
|
+
bytes: int,
|
|
43
|
+
created_at: int,
|
|
44
|
+
expires_at: int,
|
|
45
|
+
**kwargs: Any,
|
|
46
|
+
) -> OpenAIFileObject:
|
|
47
|
+
"""
|
|
48
|
+
Construct an OpenAIFileObject and normalize expires_at.
|
|
49
|
+
|
|
50
|
+
If expires_at is greater than the max we treat it as no-expiration and
|
|
51
|
+
return None for expires_at.
|
|
52
|
+
"""
|
|
53
|
+
obj = OpenAIFileObject(
|
|
54
|
+
id=id,
|
|
55
|
+
filename=filename,
|
|
56
|
+
purpose=OpenAIFilePurpose(purpose),
|
|
57
|
+
bytes=bytes,
|
|
58
|
+
created_at=created_at,
|
|
59
|
+
expires_at=expires_at,
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
if obj.expires_at is not None and obj.expires_at > (obj.created_at + ExpiresAfter.MAX):
|
|
63
|
+
obj.expires_at = None # type: ignore
|
|
64
|
+
|
|
65
|
+
return obj
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class OpenAIFilesImpl(Files):
|
|
69
|
+
"""OpenAI Files API implementation."""
|
|
70
|
+
|
|
71
|
+
def __init__(self, config: OpenAIFilesImplConfig, policy: list[AccessRule]) -> None:
|
|
72
|
+
self._config = config
|
|
73
|
+
self.policy = policy
|
|
74
|
+
self._client: OpenAI | None = None
|
|
75
|
+
self._sql_store: AuthorizedSqlStore | None = None
|
|
76
|
+
|
|
77
|
+
def _now(self) -> int:
|
|
78
|
+
"""Return current UTC timestamp as int seconds."""
|
|
79
|
+
return int(datetime.now(UTC).timestamp())
|
|
80
|
+
|
|
81
|
+
async def _get_file(
|
|
82
|
+
self, file_id: str, return_expired: bool = False, action: Action = Action.READ
|
|
83
|
+
) -> dict[str, Any]:
|
|
84
|
+
where: dict[str, str | dict] = {"id": file_id}
|
|
85
|
+
if not return_expired:
|
|
86
|
+
where["expires_at"] = {">": self._now()}
|
|
87
|
+
if not (row := await self.sql_store.fetch_one("openai_files", where=where, action=action)):
|
|
88
|
+
raise ResourceNotFoundError(file_id, "File", "files.list()")
|
|
89
|
+
return row
|
|
90
|
+
|
|
91
|
+
async def _delete_file(self, file_id: str) -> None:
|
|
92
|
+
"""Delete a file from OpenAI and the database."""
|
|
93
|
+
try:
|
|
94
|
+
self.client.files.delete(file_id)
|
|
95
|
+
except Exception as e:
|
|
96
|
+
# If file doesn't exist on OpenAI side, just remove from metadata store
|
|
97
|
+
if "not found" not in str(e).lower():
|
|
98
|
+
raise RuntimeError(f"Failed to delete file from OpenAI: {e}") from e
|
|
99
|
+
|
|
100
|
+
await self.sql_store.delete("openai_files", where={"id": file_id})
|
|
101
|
+
|
|
102
|
+
async def _delete_if_expired(self, file_id: str) -> None:
|
|
103
|
+
"""If the file exists and is expired, delete it."""
|
|
104
|
+
if row := await self._get_file(file_id, return_expired=True):
|
|
105
|
+
if (expires_at := row.get("expires_at")) and expires_at <= self._now():
|
|
106
|
+
await self._delete_file(file_id)
|
|
107
|
+
|
|
108
|
+
async def initialize(self) -> None:
|
|
109
|
+
self._client = OpenAI(api_key=self._config.api_key)
|
|
110
|
+
|
|
111
|
+
self._sql_store = AuthorizedSqlStore(sqlstore_impl(self._config.metadata_store), self.policy)
|
|
112
|
+
await self._sql_store.create_table(
|
|
113
|
+
"openai_files",
|
|
114
|
+
{
|
|
115
|
+
"id": ColumnDefinition(type=ColumnType.STRING, primary_key=True),
|
|
116
|
+
"filename": ColumnType.STRING,
|
|
117
|
+
"purpose": ColumnType.STRING,
|
|
118
|
+
"bytes": ColumnType.INTEGER,
|
|
119
|
+
"created_at": ColumnType.INTEGER,
|
|
120
|
+
"expires_at": ColumnType.INTEGER,
|
|
121
|
+
},
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
async def shutdown(self) -> None:
|
|
125
|
+
pass
|
|
126
|
+
|
|
127
|
+
@property
|
|
128
|
+
def client(self) -> OpenAI:
|
|
129
|
+
assert self._client is not None, "Provider not initialized"
|
|
130
|
+
return self._client
|
|
131
|
+
|
|
132
|
+
@property
|
|
133
|
+
def sql_store(self) -> AuthorizedSqlStore:
|
|
134
|
+
assert self._sql_store is not None, "Provider not initialized"
|
|
135
|
+
return self._sql_store
|
|
136
|
+
|
|
137
|
+
async def openai_upload_file(
|
|
138
|
+
self,
|
|
139
|
+
request: UploadFileRequest,
|
|
140
|
+
file: UploadFile,
|
|
141
|
+
) -> OpenAIFileObject:
|
|
142
|
+
purpose = request.purpose
|
|
143
|
+
expires_after = request.expires_after
|
|
144
|
+
|
|
145
|
+
filename = getattr(file, "filename", None) or "uploaded_file"
|
|
146
|
+
content = await file.read()
|
|
147
|
+
file_size = len(content)
|
|
148
|
+
|
|
149
|
+
created_at = self._now()
|
|
150
|
+
|
|
151
|
+
expires_at = created_at + ExpiresAfter.MAX * 42
|
|
152
|
+
if purpose == OpenAIFilePurpose.BATCH:
|
|
153
|
+
expires_at = created_at + ExpiresAfter.MAX
|
|
154
|
+
|
|
155
|
+
if expires_after is not None:
|
|
156
|
+
expires_at = created_at + expires_after.seconds
|
|
157
|
+
|
|
158
|
+
try:
|
|
159
|
+
from io import BytesIO
|
|
160
|
+
|
|
161
|
+
file_obj = BytesIO(content)
|
|
162
|
+
file_obj.name = filename
|
|
163
|
+
|
|
164
|
+
response = self.client.files.create(
|
|
165
|
+
file=file_obj,
|
|
166
|
+
purpose=purpose.value,
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
file_id = response.id
|
|
170
|
+
|
|
171
|
+
entry: dict[str, Any] = {
|
|
172
|
+
"id": file_id,
|
|
173
|
+
"filename": filename,
|
|
174
|
+
"purpose": purpose.value,
|
|
175
|
+
"bytes": file_size,
|
|
176
|
+
"created_at": created_at,
|
|
177
|
+
"expires_at": expires_at,
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
await self.sql_store.insert("openai_files", entry)
|
|
181
|
+
|
|
182
|
+
return _make_file_object(**entry)
|
|
183
|
+
|
|
184
|
+
except Exception as e:
|
|
185
|
+
raise RuntimeError(f"Failed to upload file to OpenAI: {e}") from e
|
|
186
|
+
|
|
187
|
+
async def openai_list_files(
|
|
188
|
+
self,
|
|
189
|
+
request: ListFilesRequest,
|
|
190
|
+
) -> ListOpenAIFileResponse:
|
|
191
|
+
after = request.after
|
|
192
|
+
limit = request.limit
|
|
193
|
+
order = request.order
|
|
194
|
+
purpose = request.purpose
|
|
195
|
+
|
|
196
|
+
if not order:
|
|
197
|
+
order = Order.desc
|
|
198
|
+
|
|
199
|
+
where_conditions: dict[str, Any] = {"expires_at": {">": self._now()}}
|
|
200
|
+
if purpose:
|
|
201
|
+
where_conditions["purpose"] = purpose.value
|
|
202
|
+
|
|
203
|
+
paginated_result = await self.sql_store.fetch_all(
|
|
204
|
+
table="openai_files",
|
|
205
|
+
where=where_conditions,
|
|
206
|
+
order_by=[("created_at", order.value)],
|
|
207
|
+
cursor=("id", after) if after else None,
|
|
208
|
+
limit=limit,
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
files = [_make_file_object(**row) for row in paginated_result.data]
|
|
212
|
+
|
|
213
|
+
return ListOpenAIFileResponse(
|
|
214
|
+
data=files,
|
|
215
|
+
has_more=paginated_result.has_more,
|
|
216
|
+
first_id=files[0].id if files else "",
|
|
217
|
+
last_id=files[-1].id if files else "",
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
async def openai_retrieve_file(self, request: RetrieveFileRequest) -> OpenAIFileObject:
|
|
221
|
+
file_id = request.file_id
|
|
222
|
+
await self._delete_if_expired(file_id)
|
|
223
|
+
row = await self._get_file(file_id)
|
|
224
|
+
return _make_file_object(**row)
|
|
225
|
+
|
|
226
|
+
async def openai_delete_file(self, request: DeleteFileRequest) -> OpenAIFileDeleteResponse:
|
|
227
|
+
file_id = request.file_id
|
|
228
|
+
await self._delete_if_expired(file_id)
|
|
229
|
+
_ = await self._get_file(file_id, action=Action.DELETE)
|
|
230
|
+
await self._delete_file(file_id)
|
|
231
|
+
return OpenAIFileDeleteResponse(id=file_id, deleted=True)
|
|
232
|
+
|
|
233
|
+
async def openai_retrieve_file_content(self, request: RetrieveFileContentRequest) -> Response:
|
|
234
|
+
file_id = request.file_id
|
|
235
|
+
await self._delete_if_expired(file_id)
|
|
236
|
+
|
|
237
|
+
row = await self._get_file(file_id)
|
|
238
|
+
|
|
239
|
+
try:
|
|
240
|
+
response = self.client.files.content(file_id)
|
|
241
|
+
file_content = response.content
|
|
242
|
+
|
|
243
|
+
except Exception as e:
|
|
244
|
+
if "not found" in str(e).lower():
|
|
245
|
+
await self._delete_file(file_id)
|
|
246
|
+
raise ResourceNotFoundError(file_id, "File", "files.list()") from e
|
|
247
|
+
raise RuntimeError(f"Failed to download file from OpenAI: {e}") from e
|
|
248
|
+
|
|
249
|
+
return Response(
|
|
250
|
+
content=file_content,
|
|
251
|
+
media_type="application/octet-stream",
|
|
252
|
+
headers={"Content-Disposition": f'attachment; filename="{row["filename"]}"'},
|
|
253
|
+
)
|