PyPI - llama-stack - Versions diffs - 0.4.3__py3-none-any.whl → 0.5.0rc1__py3-none-any.whl - Mend

llama-stack 0.4.3py3-none-any.whl → 0.5.0rc1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (307) hide show

llama_stack/cli/stack/_list_deps.py +11 -7
llama_stack/cli/stack/run.py +3 -25
llama_stack/core/access_control/datatypes.py +78 -0
llama_stack/core/configure.py +2 -2
{llama_stack_api/internal → llama_stack/core/connectors}/__init__.py +2 -2
llama_stack/core/connectors/connectors.py +162 -0
llama_stack/core/conversations/conversations.py +61 -58
llama_stack/core/datatypes.py +54 -8
llama_stack/core/library_client.py +60 -13
llama_stack/core/prompts/prompts.py +43 -42
llama_stack/core/routers/datasets.py +20 -17
llama_stack/core/routers/eval_scoring.py +143 -53
llama_stack/core/routers/inference.py +20 -9
llama_stack/core/routers/safety.py +30 -42
llama_stack/core/routers/vector_io.py +15 -7
llama_stack/core/routing_tables/models.py +42 -3
llama_stack/core/routing_tables/scoring_functions.py +19 -19
llama_stack/core/routing_tables/shields.py +20 -17
llama_stack/core/routing_tables/vector_stores.py +8 -5
llama_stack/core/server/auth.py +192 -17
llama_stack/core/server/fastapi_router_registry.py +40 -5
llama_stack/core/server/server.py +24 -5
llama_stack/core/stack.py +54 -10
llama_stack/core/storage/datatypes.py +9 -0
llama_stack/core/store/registry.py +1 -1
llama_stack/core/utils/exec.py +2 -2
llama_stack/core/utils/type_inspection.py +16 -2
llama_stack/distributions/dell/config.yaml +4 -1
llama_stack/distributions/dell/doc_template.md +209 -0
llama_stack/distributions/dell/run-with-safety.yaml +4 -1
llama_stack/distributions/nvidia/config.yaml +4 -1
llama_stack/distributions/nvidia/doc_template.md +170 -0
llama_stack/distributions/nvidia/run-with-safety.yaml +4 -1
llama_stack/distributions/oci/config.yaml +4 -1
llama_stack/distributions/oci/doc_template.md +140 -0
llama_stack/distributions/open-benchmark/config.yaml +9 -1
llama_stack/distributions/postgres-demo/config.yaml +1 -1
llama_stack/distributions/starter/build.yaml +62 -0
llama_stack/distributions/starter/config.yaml +22 -3
llama_stack/distributions/starter/run-with-postgres-store.yaml +22 -3
llama_stack/distributions/starter/starter.py +13 -1
llama_stack/distributions/starter-gpu/build.yaml +62 -0
llama_stack/distributions/starter-gpu/config.yaml +22 -3
llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +22 -3
llama_stack/distributions/template.py +10 -2
llama_stack/distributions/watsonx/config.yaml +4 -1
llama_stack/log.py +1 -0
llama_stack/models/llama/resources/dog.jpg +0 -0
llama_stack/models/llama/resources/pasta.jpeg +0 -0
llama_stack/models/llama/resources/small_dog.jpg +0 -0
llama_stack/providers/inline/agents/meta_reference/__init__.py +1 -0
llama_stack/providers/inline/agents/meta_reference/agents.py +57 -61
llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +183 -60
llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +94 -22
llama_stack/providers/inline/agents/meta_reference/responses/types.py +2 -1
llama_stack/providers/inline/agents/meta_reference/responses/utils.py +4 -1
llama_stack/providers/inline/agents/meta_reference/safety.py +2 -2
llama_stack/providers/inline/batches/reference/batches.py +2 -1
llama_stack/providers/inline/eval/meta_reference/eval.py +40 -32
llama_stack/providers/inline/ios/inference/LocalInferenceImpl/LocalInference.h +9 -0
llama_stack/providers/inline/ios/inference/LocalInferenceImpl/LocalInference.swift +189 -0
llama_stack/providers/inline/ios/inference/LocalInferenceImpl/Parsing.swift +238 -0
llama_stack/providers/inline/ios/inference/LocalInferenceImpl/PromptTemplate.swift +12 -0
llama_stack/providers/inline/ios/inference/LocalInferenceImpl/SystemPrompts.swift +89 -0
llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.pbxproj +550 -0
llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.xcworkspace/contents.xcworkspacedata +7 -0
llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist +8 -0
llama_stack/providers/inline/post_training/huggingface/post_training.py +33 -38
llama_stack/providers/inline/post_training/huggingface/utils.py +2 -5
llama_stack/providers/inline/post_training/torchtune/post_training.py +28 -33
llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +2 -4
llama_stack/providers/inline/safety/code_scanner/code_scanner.py +12 -15
llama_stack/providers/inline/safety/llama_guard/llama_guard.py +15 -18
llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +11 -17
llama_stack/providers/inline/scoring/basic/scoring.py +13 -17
llama_stack/providers/inline/scoring/braintrust/braintrust.py +15 -15
llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +13 -17
llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +1 -1
llama_stack/providers/registry/agents.py +1 -0
llama_stack/providers/registry/inference.py +1 -9
llama_stack/providers/registry/vector_io.py +136 -16
llama_stack/providers/remote/datasetio/nvidia/README.md +74 -0
llama_stack/providers/remote/eval/nvidia/README.md +134 -0
llama_stack/providers/remote/eval/nvidia/eval.py +22 -21
llama_stack/providers/remote/files/s3/README.md +266 -0
llama_stack/providers/remote/files/s3/config.py +5 -3
llama_stack/providers/remote/files/s3/files.py +2 -2
llama_stack/providers/remote/inference/gemini/gemini.py +4 -0
llama_stack/providers/remote/inference/nvidia/NVIDIA.md +203 -0
llama_stack/providers/remote/inference/openai/openai.py +2 -0
llama_stack/providers/remote/inference/together/together.py +4 -0
llama_stack/providers/remote/inference/vertexai/config.py +3 -3
llama_stack/providers/remote/inference/vertexai/vertexai.py +5 -2
llama_stack/providers/remote/inference/vllm/config.py +37 -18
llama_stack/providers/remote/inference/vllm/vllm.py +0 -3
llama_stack/providers/remote/inference/watsonx/watsonx.py +4 -0
llama_stack/providers/remote/post_training/nvidia/README.md +151 -0
llama_stack/providers/remote/post_training/nvidia/post_training.py +31 -33
llama_stack/providers/remote/safety/bedrock/bedrock.py +10 -27
llama_stack/providers/remote/safety/nvidia/README.md +78 -0
llama_stack/providers/remote/safety/nvidia/nvidia.py +9 -25
llama_stack/providers/remote/safety/sambanova/sambanova.py +13 -11
llama_stack/providers/remote/vector_io/elasticsearch/__init__.py +17 -0
llama_stack/providers/remote/vector_io/elasticsearch/config.py +32 -0
llama_stack/providers/remote/vector_io/elasticsearch/elasticsearch.py +463 -0
llama_stack/providers/remote/vector_io/oci/__init__.py +22 -0
llama_stack/providers/remote/vector_io/oci/config.py +41 -0
llama_stack/providers/remote/vector_io/oci/oci26ai.py +595 -0
llama_stack/providers/remote/vector_io/pgvector/config.py +69 -2
llama_stack/providers/remote/vector_io/pgvector/pgvector.py +255 -6
llama_stack/providers/remote/vector_io/qdrant/qdrant.py +62 -38
llama_stack/providers/utils/bedrock/client.py +3 -3
llama_stack/providers/utils/bedrock/config.py +7 -7
llama_stack/providers/utils/inference/embedding_mixin.py +4 -0
llama_stack/providers/utils/inference/http_client.py +239 -0
llama_stack/providers/utils/inference/litellm_openai_mixin.py +5 -0
llama_stack/providers/utils/inference/model_registry.py +148 -2
llama_stack/providers/utils/inference/openai_compat.py +2 -1
llama_stack/providers/utils/inference/openai_mixin.py +41 -2
llama_stack/providers/utils/memory/openai_vector_store_mixin.py +92 -5
llama_stack/providers/utils/memory/vector_store.py +46 -19
llama_stack/providers/utils/responses/responses_store.py +40 -6
llama_stack/providers/utils/safety.py +114 -0
llama_stack/providers/utils/tools/mcp.py +44 -3
llama_stack/testing/api_recorder.py +9 -3
{llama_stack-0.4.3.dist-info → llama_stack-0.5.0rc1.dist-info}/METADATA +14 -2
{llama_stack-0.4.3.dist-info → llama_stack-0.5.0rc1.dist-info}/RECORD +131 -275
llama_stack-0.5.0rc1.dist-info/top_level.txt +1 -0
llama_stack/distributions/meta-reference-gpu/__init__.py +0 -7
llama_stack/distributions/meta-reference-gpu/config.yaml +0 -140
llama_stack/distributions/meta-reference-gpu/meta_reference.py +0 -163
llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +0 -155
llama_stack/models/llama/hadamard_utils.py +0 -88
llama_stack/models/llama/llama3/args.py +0 -74
llama_stack/models/llama/llama3/generation.py +0 -378
llama_stack/models/llama/llama3/model.py +0 -304
llama_stack/models/llama/llama3/multimodal/__init__.py +0 -12
llama_stack/models/llama/llama3/multimodal/encoder_utils.py +0 -180
llama_stack/models/llama/llama3/multimodal/image_transform.py +0 -409
llama_stack/models/llama/llama3/multimodal/model.py +0 -1430
llama_stack/models/llama/llama3/multimodal/utils.py +0 -26
llama_stack/models/llama/llama3/quantization/__init__.py +0 -5
llama_stack/models/llama/llama3/quantization/loader.py +0 -316
llama_stack/models/llama/llama3_1/__init__.py +0 -12
llama_stack/models/llama/llama3_1/prompt_format.md +0 -358
llama_stack/models/llama/llama3_1/prompts.py +0 -258
llama_stack/models/llama/llama3_2/__init__.py +0 -5
llama_stack/models/llama/llama3_2/prompts_text.py +0 -229
llama_stack/models/llama/llama3_2/prompts_vision.py +0 -126
llama_stack/models/llama/llama3_2/text_prompt_format.md +0 -286
llama_stack/models/llama/llama3_2/vision_prompt_format.md +0 -141
llama_stack/models/llama/llama3_3/__init__.py +0 -5
llama_stack/models/llama/llama3_3/prompts.py +0 -259
llama_stack/models/llama/llama4/args.py +0 -107
llama_stack/models/llama/llama4/ffn.py +0 -58
llama_stack/models/llama/llama4/moe.py +0 -214
llama_stack/models/llama/llama4/preprocess.py +0 -435
llama_stack/models/llama/llama4/quantization/__init__.py +0 -5
llama_stack/models/llama/llama4/quantization/loader.py +0 -226
llama_stack/models/llama/llama4/vision/__init__.py +0 -5
llama_stack/models/llama/llama4/vision/embedding.py +0 -210
llama_stack/models/llama/llama4/vision/encoder.py +0 -412
llama_stack/models/llama/quantize_impls.py +0 -316
llama_stack/providers/inline/inference/meta_reference/__init__.py +0 -20
llama_stack/providers/inline/inference/meta_reference/common.py +0 -24
llama_stack/providers/inline/inference/meta_reference/config.py +0 -68
llama_stack/providers/inline/inference/meta_reference/generators.py +0 -201
llama_stack/providers/inline/inference/meta_reference/inference.py +0 -542
llama_stack/providers/inline/inference/meta_reference/model_parallel.py +0 -77
llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +0 -353
llama_stack-0.4.3.dist-info/top_level.txt +0 -2
llama_stack_api/__init__.py +0 -945
llama_stack_api/admin/__init__.py +0 -45
llama_stack_api/admin/api.py +0 -72
llama_stack_api/admin/fastapi_routes.py +0 -117
llama_stack_api/admin/models.py +0 -113
llama_stack_api/agents.py +0 -173
llama_stack_api/batches/__init__.py +0 -40
llama_stack_api/batches/api.py +0 -53
llama_stack_api/batches/fastapi_routes.py +0 -113
llama_stack_api/batches/models.py +0 -78
llama_stack_api/benchmarks/__init__.py +0 -43
llama_stack_api/benchmarks/api.py +0 -39
llama_stack_api/benchmarks/fastapi_routes.py +0 -109
llama_stack_api/benchmarks/models.py +0 -109
llama_stack_api/common/__init__.py +0 -5
llama_stack_api/common/content_types.py +0 -101
llama_stack_api/common/errors.py +0 -95
llama_stack_api/common/job_types.py +0 -38
llama_stack_api/common/responses.py +0 -77
llama_stack_api/common/training_types.py +0 -47
llama_stack_api/common/type_system.py +0 -146
llama_stack_api/connectors.py +0 -146
llama_stack_api/conversations.py +0 -270
llama_stack_api/datasetio.py +0 -55
llama_stack_api/datasets/__init__.py +0 -61
llama_stack_api/datasets/api.py +0 -35
llama_stack_api/datasets/fastapi_routes.py +0 -104
llama_stack_api/datasets/models.py +0 -152
llama_stack_api/datatypes.py +0 -373
llama_stack_api/eval.py +0 -137
llama_stack_api/file_processors/__init__.py +0 -27
llama_stack_api/file_processors/api.py +0 -64
llama_stack_api/file_processors/fastapi_routes.py +0 -78
llama_stack_api/file_processors/models.py +0 -42
llama_stack_api/files/__init__.py +0 -35
llama_stack_api/files/api.py +0 -51
llama_stack_api/files/fastapi_routes.py +0 -124
llama_stack_api/files/models.py +0 -107
llama_stack_api/inference.py +0 -1169
llama_stack_api/inspect_api/__init__.py +0 -37
llama_stack_api/inspect_api/api.py +0 -25
llama_stack_api/inspect_api/fastapi_routes.py +0 -76
llama_stack_api/inspect_api/models.py +0 -28
llama_stack_api/internal/kvstore.py +0 -28
llama_stack_api/internal/sqlstore.py +0 -81
llama_stack_api/llama_stack_api/__init__.py +0 -945
llama_stack_api/llama_stack_api/admin/__init__.py +0 -45
llama_stack_api/llama_stack_api/admin/api.py +0 -72
llama_stack_api/llama_stack_api/admin/fastapi_routes.py +0 -117
llama_stack_api/llama_stack_api/admin/models.py +0 -113
llama_stack_api/llama_stack_api/agents.py +0 -173
llama_stack_api/llama_stack_api/batches/__init__.py +0 -40
llama_stack_api/llama_stack_api/batches/api.py +0 -53
llama_stack_api/llama_stack_api/batches/fastapi_routes.py +0 -113
llama_stack_api/llama_stack_api/batches/models.py +0 -78
llama_stack_api/llama_stack_api/benchmarks/__init__.py +0 -43
llama_stack_api/llama_stack_api/benchmarks/api.py +0 -39
llama_stack_api/llama_stack_api/benchmarks/fastapi_routes.py +0 -109
llama_stack_api/llama_stack_api/benchmarks/models.py +0 -109
llama_stack_api/llama_stack_api/common/__init__.py +0 -5
llama_stack_api/llama_stack_api/common/content_types.py +0 -101
llama_stack_api/llama_stack_api/common/errors.py +0 -95
llama_stack_api/llama_stack_api/common/job_types.py +0 -38
llama_stack_api/llama_stack_api/common/responses.py +0 -77
llama_stack_api/llama_stack_api/common/training_types.py +0 -47
llama_stack_api/llama_stack_api/common/type_system.py +0 -146
llama_stack_api/llama_stack_api/connectors.py +0 -146
llama_stack_api/llama_stack_api/conversations.py +0 -270
llama_stack_api/llama_stack_api/datasetio.py +0 -55
llama_stack_api/llama_stack_api/datasets/__init__.py +0 -61
llama_stack_api/llama_stack_api/datasets/api.py +0 -35
llama_stack_api/llama_stack_api/datasets/fastapi_routes.py +0 -104
llama_stack_api/llama_stack_api/datasets/models.py +0 -152
llama_stack_api/llama_stack_api/datatypes.py +0 -373
llama_stack_api/llama_stack_api/eval.py +0 -137
llama_stack_api/llama_stack_api/file_processors/__init__.py +0 -27
llama_stack_api/llama_stack_api/file_processors/api.py +0 -64
llama_stack_api/llama_stack_api/file_processors/fastapi_routes.py +0 -78
llama_stack_api/llama_stack_api/file_processors/models.py +0 -42
llama_stack_api/llama_stack_api/files/__init__.py +0 -35
llama_stack_api/llama_stack_api/files/api.py +0 -51
llama_stack_api/llama_stack_api/files/fastapi_routes.py +0 -124
llama_stack_api/llama_stack_api/files/models.py +0 -107
llama_stack_api/llama_stack_api/inference.py +0 -1169
llama_stack_api/llama_stack_api/inspect_api/__init__.py +0 -37
llama_stack_api/llama_stack_api/inspect_api/api.py +0 -25
llama_stack_api/llama_stack_api/inspect_api/fastapi_routes.py +0 -76
llama_stack_api/llama_stack_api/inspect_api/models.py +0 -28
llama_stack_api/llama_stack_api/internal/__init__.py +0 -9
llama_stack_api/llama_stack_api/internal/kvstore.py +0 -28
llama_stack_api/llama_stack_api/internal/sqlstore.py +0 -81
llama_stack_api/llama_stack_api/models.py +0 -171
llama_stack_api/llama_stack_api/openai_responses.py +0 -1468
llama_stack_api/llama_stack_api/post_training.py +0 -370
llama_stack_api/llama_stack_api/prompts.py +0 -203
llama_stack_api/llama_stack_api/providers/__init__.py +0 -33
llama_stack_api/llama_stack_api/providers/api.py +0 -16
llama_stack_api/llama_stack_api/providers/fastapi_routes.py +0 -57
llama_stack_api/llama_stack_api/providers/models.py +0 -24
llama_stack_api/llama_stack_api/py.typed +0 -0
llama_stack_api/llama_stack_api/rag_tool.py +0 -168
llama_stack_api/llama_stack_api/resource.py +0 -37
llama_stack_api/llama_stack_api/router_utils.py +0 -160
llama_stack_api/llama_stack_api/safety.py +0 -132
llama_stack_api/llama_stack_api/schema_utils.py +0 -208
llama_stack_api/llama_stack_api/scoring.py +0 -93
llama_stack_api/llama_stack_api/scoring_functions.py +0 -211
llama_stack_api/llama_stack_api/shields.py +0 -93
llama_stack_api/llama_stack_api/tools.py +0 -226
llama_stack_api/llama_stack_api/vector_io.py +0 -941
llama_stack_api/llama_stack_api/vector_stores.py +0 -53
llama_stack_api/llama_stack_api/version.py +0 -9
llama_stack_api/models.py +0 -171
llama_stack_api/openai_responses.py +0 -1468
llama_stack_api/post_training.py +0 -370
llama_stack_api/prompts.py +0 -203
llama_stack_api/providers/__init__.py +0 -33
llama_stack_api/providers/api.py +0 -16
llama_stack_api/providers/fastapi_routes.py +0 -57
llama_stack_api/providers/models.py +0 -24
llama_stack_api/py.typed +0 -0
llama_stack_api/rag_tool.py +0 -168
llama_stack_api/resource.py +0 -37
llama_stack_api/router_utils.py +0 -160
llama_stack_api/safety.py +0 -132
llama_stack_api/schema_utils.py +0 -208
llama_stack_api/scoring.py +0 -93
llama_stack_api/scoring_functions.py +0 -211
llama_stack_api/shields.py +0 -93
llama_stack_api/tools.py +0 -226
llama_stack_api/vector_io.py +0 -941
llama_stack_api/vector_stores.py +0 -53
llama_stack_api/version.py +0 -9
{llama_stack-0.4.3.dist-info → llama_stack-0.5.0rc1.dist-info}/WHEEL +0 -0
{llama_stack-0.4.3.dist-info → llama_stack-0.5.0rc1.dist-info}/entry_points.txt +0 -0
{llama_stack-0.4.3.dist-info → llama_stack-0.5.0rc1.dist-info}/licenses/LICENSE +0 -0

llama_stack/models/llama/llama3_2/prompts_text.py DELETED Viewed

@@ -1,229 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-import json
-import textwrap
-from llama_stack.models.llama.datatypes import (
-    RawMessage,
-    StopReason,
-    ToolCall,
-    ToolPromptFormat,
-)
-from ..prompt_format import (
-    TextCompletionContent,
-    UseCase,
-    llama3_1_builtin_code_interpreter_dialog,
-)
-def user_tool_call():
-    content = textwrap.dedent(
-        """
-        Questions: Can you retrieve the details for the user with the ID 7890, who has black as their special request?
-        Here is a list of functions in JSON format that you can invoke:
-        [
-            {
-                "name": "get_user_info",
-                "description": "Retrieve details for a specific user by their unique identifier. Note that the provided function is in Python 3 syntax.",
-                "parameters": {
-                    "type": "dict",
-                    "required": [
-                        "user_id"
-                    ],
-                    "properties": {
-                        "user_id": {
-                        "type": "integer",
-                        "description": "The unique identifier of the user. It is used to fetch the specific user details from the database."
-                    },
-                    "special": {
-                        "type": "string",
-                        "description": "Any special information or parameters that need to be considered while fetching user details.",
-                        "default": "none"
-                        }
-                    }
-                }
-            }
-        ]
-        Should you decide to return the function call(s),Put it in the format of [func1(params_name=params_value, params_name2=params_value2...), func2(params)]
-        NO other text MUST be included.
-        """
-    )
-    return content.strip()
-def system_tool_call():
-    content = textwrap.dedent(
-        """
-        You are an expert in composing functions. You are given a question and a set of possible functions.
-        Based on the question, you will need to make one or more function/tool calls to achieve the purpose.
-        If none of the function can be used, point it out. If the given question lacks the parameters required by the function,
-        also point it out. You should only return the function call in tools call sections.
-        If you decide to invoke any of the function(s), you MUST put it in the format of [func_name1(params_name1=params_value1, params_name2=params_value2...), func_name2(params)]
-        You SHOULD NOT include any other text in the response.
-        Here is a list of functions in JSON format that you can invoke.
-        [
-            {
-                "name": "get_weather",
-                "description": "Get weather info for places",
-                "parameters": {
-                    "type": "dict",
-                    "required": [
-                        "city"
-                    ],
-                    "properties": {
-                        "city": {
-                            "type": "string",
-                            "description": "The name of the city to get the weather for"
-                        },
-                        "metric": {
-                            "type": "string",
-                            "description": "The metric for weather. Options are: celsius, fahrenheit",
-                            "default": "celsius"
-                        }
-                    }
-                }
-            }
-        ]
-        """
-    )
-    return content.strip()
-def usecases():
-    return [
-        UseCase(
-            title="User and assistant conversation",
-            description="Here is a regular multi-turn user assistant conversation and how its formatted.",
-            dialogs=[
-                [
-                    RawMessage(role="system", content="You are a helpful assistant"),
-                    RawMessage(role="user", content="Who are you?"),
-                ]
-            ],
-            notes="This format is unchanged from Llama3.1",
-        ),
-        UseCase(
-            title="Zero shot function calling",
-            description=textwrap.dedent(
-                """
-                For Llama3.2 1B and 3B instruct models, we are introducing a new format for zero shot function calling.
-                This new format is designed to be more flexible and powerful than the previous format.
-                All available functions can be provided in the system message. A key difference is in the format of how the assistant responds with function calls.
-                It is pythonic in the form of `[func1(params_name=params_value, params_name2=params_value2...), func2(params)]` instead of the `json` or `<function>` tag that were defined in Llama3.1.
-                Here is an example for the same,
-                """
-            ),
-            dialogs=[
-                # Zero shot tool calls as system message
-                [
-                    RawMessage(role="system", content=system_tool_call()),
-                    RawMessage(role="user", content="What is the weather in SF and Seattle?"),
-                ],
-            ],
-            notes=textwrap.dedent(
-                """
-                - The output supports multiple tool calls natively
-                - JSON format for defining the functions in the system prompt is similar to Llama3.1
-                """
-            ),
-        ),
-        UseCase(
-            title="Zero shot function calling with user message",
-            description=textwrap.dedent(
-                """
-                While the default is to provide all function calls in a system message, in Llama3.2 text models you can also provide information for all the available tools in a user message.
-                """
-            ),
-            dialogs=[
-                # Zero shot tool call as user message
-                [
-                    RawMessage(role="user", content=user_tool_call()),
-                ],
-            ],
-            notes=textwrap.dedent(
-                """
-                - The tool call format for the model is the same whether your function calls are provided in the system or user message.
-                - While builtin tool calls end with a <|eom_id|>, notice the <|eot_id|> for zero shot tool calls.
-                """
-            ),
-        ),
-        UseCase(
-            title="Code Interpreter",
-            description=textwrap.dedent(
-                """
-                Code Interpreter continues to work in 3.2 text models similar to Llama 3.1 model family.
-                Here is an example,
-                """
-            ),
-            dialogs=[llama3_1_builtin_code_interpreter_dialog()],
-            notes=textwrap.dedent(
-                """
-                - Note `Environment: ipython` in the system prompt.
-                - Note that the response starts with `<|python_tag|>` and ends with `<|eom_id|>`
-                """
-            ),
-        ),
-        UseCase(
-            title="Zero shot function calling E2E format",
-            description=textwrap.dedent(
-                """
-                Here is an example of the e2e cycle of tool calls with the model in a muti-step way.
-                """
-            ),
-            dialogs=[
-                [
-                    RawMessage(role="system", content=system_tool_call()),
-                    RawMessage(role="user", content="What is the weather in SF?"),
-                    RawMessage(
-                        role="assistant",
-                        content="",
-                        stop_reason=StopReason.end_of_turn,
-                        tool_calls=[
-                            ToolCall(
-                                call_id="cc",
-                                tool_name="get_weather",
-                                arguments={
-                                    "city": "San Francisco",
-                                    "metric": "celsius",
-                                },
-                            )
-                        ],
-                    ),
-                    RawMessage(
-                        role="tool",
-                        content=json.dumps("25 C"),
-                    ),
-                ],
-            ],
-            notes=textwrap.dedent(
-                """
-                - The output of the function call is provided back to the model as a tool response ( in json format ).
-                - Notice `<|start_header_id|>ipython<|end_header_id|>` as the header message preceding the tool response.
-                - The model finally summarizes the information from the tool response and returns the result to the user.
-                """
-            ),
-            tool_prompt_format=ToolPromptFormat.python_list,
-        ),
-        UseCase(
-            title="Prompt format for base models",
-            description=textwrap.dedent(
-                """
-                For base models (Llama3.2-1B and Llama3.2-3B), the prompt format for a simple completion is as follows
-                """
-            ),
-            dialogs=[
-                TextCompletionContent(content="The color of the sky is blue but sometimes it can also be"),
-            ],
-            notes="Same as Llama3.1",
-        ),
-    ]

llama_stack/models/llama/llama3_2/prompts_vision.py DELETED Viewed

@@ -1,126 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-import textwrap
-from pathlib import Path
-from llama_stack.models.llama.datatypes import (
-    RawMediaItem,
-    RawMessage,
-    RawTextItem,
-)
-from ..prompt_format import (
-    TextCompletionContent,
-    UseCase,
-    llama3_1_builtin_tool_call_dialog,
-    # llama3_1_builtin_tool_call_with_image_dialog,
-    llama3_2_user_assistant_conversation,
-)
-def usecases():
-    this_dir = Path(__file__).parent.parent.resolve()
-    with open(this_dir / "scripts/resources/dog.jpg", "rb") as f:
-        img = f.read()
-    return [
-        llama3_2_user_assistant_conversation(),
-        UseCase(
-            title="User and assistant conversation with Images",
-            description="This example shows how to pass and image to the model as part of the messages.",
-            dialogs=[
-                [
-                    RawMessage(
-                        role="user",
-                        content=[
-                            RawMediaItem(data=img),
-                            RawTextItem(text="Describe this image in two sentences"),
-                        ],
-                    )
-                ],
-            ],
-            notes=textwrap.dedent(
-                """
-                - The `<|image|>` tag is used to indicate presence of the image
-                - The model isn't an early fusion model so doesn't actually translate an image into several tokens. Instead the cross-attention layers take input "on the side" from a vision encoder
-                ![Image](mm-model.png)
-                - Its important to postion the <|image|> tag appropriately in the prompt. Image will only attend to the subsequent text tokens
-                - The <|image|> tag is part of the user message body, implying that it should only come after the header `<|start_header_id|>{role}<|end_header_id|>` in the message body
-                - We recommend using a single image in one prompt
-                """
-            ),
-        ),
-        UseCase(
-            title="Builtin and Zero Shot Tool Calling",
-            description=textwrap.dedent(
-                """
-                Llama3.2 vision models follow the same tool calling format as Llama3.1 models when inputs are text only.
-                Use `Environment: ipython` to enable tools.
-                Add `Tools: {{tool_name1}},{{tool_name2}}` for each of the builtin tools.
-                The same builtin tools as Llama3.1 are available,
-                - code_interpreter (for executing python code)
-                - brave_search (to search the web)
-                - wolfram_alpha (for querying wolfram alpha for mathematical questions)
-                """,
-            ),
-            dialogs=[llama3_1_builtin_tool_call_dialog()],
-            notes=textwrap.dedent(
-                """
-                - Note the `<|python_tag|>` before `brave_search` function call.
-                - The `<|eom_id|>` tag is used to indicate the end of the message.
-                - Similar to Llama3.1, code_interpreter is not explicitly mentioned but is enabled via `Environment: ipython`.
-                - Tool Calling does NOT work with images in the prompt as of now.
-                """
-            ),
-        ),
-        # UseCase(
-        #     title="Tool Calling for vision models",
-        #     description=textwrap.dedent(
-        #         """
-        #         While Llama3.2 vision models follow the same tool calling format as Llama3.1 models when inputs are text only,
-        #         they are not able to do tool calling when prompt contains image inputs (along with text).
-        #         The recommended way would be to separate out the image understanding from the tool calling in successive prompts.
-        #         Here is an example of how that could be done,
-        #         """,
-        #     ),
-        #     dialogs=[llama3_1_builtin_tool_call_with_image_dialog()],
-        #     notes=textwrap.dedent(
-        #         """
-        #         - Instead of a single prompt (image understanding + tool call), we split into two prompts to achieve the same result.
-        #         """
-        #     ),
-        # ),
-        UseCase(
-            title="Prompt format for base models",
-            description=textwrap.dedent(
-                """
-                For base models (Llama3.2-11B-Vision and Llama3.2-90B-Vision), the prompt format for a simple completion is as follows
-                """
-            ),
-            dialogs=[
-                TextCompletionContent(content="The color of the sky is blue but sometimes it can also be"),
-            ],
-            notes="- Same as Llama3.1",
-        ),
-        UseCase(
-            title="Prompt format for base models with Image",
-            description=textwrap.dedent(
-                """
-                For base models (Llama3.2-11B-Vision and Llama3.2-90B-Vision), here is an example of how the text completion format looks with an image,
-                """
-            ),
-            dialogs=[
-                TextCompletionContent(
-                    content=[
-                        RawMediaItem(data=img),
-                        RawTextItem(text="If I had to write a haiku for this one"),
-                    ]
-                ),
-            ],
-            notes="- Note the placement of the special tags <|begin_of_text|> and <|image|>",
-        ),
-    ]

llama_stack/models/llama/llama3_2/text_prompt_format.md DELETED Viewed

@@ -1,286 +0,0 @@
-## User and assistant conversation
-Here is a regular multi-turn user assistant conversation and how its formatted.
-##### Input Prompt Format
-```
-<|begin_of_text|><|start_header_id|>system<|end_header_id|>
-You are a helpful assistant<|eot_id|><|start_header_id|>user<|end_header_id|>
-Who are you?<|eot_id|><|start_header_id|>assistant<|end_header_id|>
-```
-##### Model Response Format
-```
-I'm an AI assistant, which means I'm a computer program designed to simulate conversation and answer questions to the best of my ability. I'm here to help you with any questions or tasks you may have, and I'll do my best to provide you with accurate and helpful information.
-I don't have a personal name, but I'm often referred to as a "virtual assistant" or a "chatbot." I'm a machine learning model, which means I've been trained on a large dataset of text and can generate responses based on patterns and context.
-I can help with a wide range of topics, from general knowledge and trivia to more specialized subjects like science, history, and technology. I can also assist with tasks like language translation, text summarization, and even generating creative content like stories or poetry.
-So, what can I help you with today?<|eot_id|>
-```
-##### Notes
-This format is unchanged from Llama3.1
-## Zero shot function calling
-For Llama3.2 1B and 3B instruct models, we are introducing a new format for zero shot function calling.
-This new format is designed to be more flexible and powerful than the previous format.
-All available functions can be provided in the system message. A key difference is in the format of how the assistant responds with function calls.
-It is pythonic in the form of `[func1(params_name=params_value, params_name2=params_value2...), func2(params)]` instead of the `json` or `<function>` tag that were defined in Llama3.1.
-Here is an example for the same,
-##### Input Prompt Format
-```
-<|begin_of_text|><|start_header_id|>system<|end_header_id|>
-You are an expert in composing functions. You are given a question and a set of possible functions.
-Based on the question, you will need to make one or more function/tool calls to achieve the purpose.
-If none of the function can be used, point it out. If the given question lacks the parameters required by the function,
-also point it out. You should only return the function call in tools call sections.
-If you decide to invoke any of the function(s), you MUST put it in the format of [func_name1(params_name1=params_value1, params_name2=params_value2...), func_name2(params)]
-You SHOULD NOT include any other text in the response.
-Here is a list of functions in JSON format that you can invoke.
-[
-    {
-        "name": "get_weather",
-        "description": "Get weather info for places",
-        "parameters": {
-            "type": "dict",
-            "required": [
-                "city"
-            ],
-            "properties": {
-                "city": {
-                    "type": "string",
-                    "description": "The name of the city to get the weather for"
-                },
-                "metric": {
-                    "type": "string",
-                    "description": "The metric for weather. Options are: celsius, fahrenheit",
-                    "default": "celsius"
-                }
-            }
-        }
-    }
-]<|eot_id|><|start_header_id|>user<|end_header_id|>
-What is the weather in SF and Seattle?<|eot_id|><|start_header_id|>assistant<|end_header_id|>
-```
-##### Model Response Format
-```
-[get_weather(city='San Francisco', metric='celsius'), get_weather(city='Seattle', metric='celsius')]<|eot_id|>
-```
-##### Notes
-- The output supports multiple tool calls natively
-- JSON format for defining the functions in the system prompt is similar to Llama3.1
-## Zero shot function calling with user message
-While the default is to provide all function calls in a system message, in Llama3.2 text models you can also provide information for all the available tools in a user message.
-##### Input Prompt Format
-```
-<|begin_of_text|><|start_header_id|>user<|end_header_id|>
-Questions: Can you retrieve the details for the user with the ID 7890, who has black as their special request?
-Here is a list of functions in JSON format that you can invoke:
-[
-    {
-        "name": "get_user_info",
-        "description": "Retrieve details for a specific user by their unique identifier. Note that the provided function is in Python 3 syntax.",
-        "parameters": {
-            "type": "dict",
-            "required": [
-                "user_id"
-            ],
-            "properties": {
-                "user_id": {
-                "type": "integer",
-                "description": "The unique identifier of the user. It is used to fetch the specific user details from the database."
-            },
-            "special": {
-                "type": "string",
-                "description": "Any special information or parameters that need to be considered while fetching user details.",
-                "default": "none"
-                }
-            }
-        }
-    }
-]
-Should you decide to return the function call(s),Put it in the format of [func1(params_name=params_value, params_name2=params_value2...), func2(params)]
-NO other text MUST be included.<|eot_id|><|start_header_id|>assistant<|end_header_id|>
-```
-##### Model Response Format
-```
-[get_user_info(user_id=7890, special='black')]<|eot_id|>
-```
-##### Notes
-- The tool call format for the model is the same whether your function calls are provided in the system or user message.
-- While builtin tool calls end with a <|eom_id|>, notice the <|eot_id|> for zero shot tool calls.
-## Code Interpreter
-Code Interpreter continues to work in 3.2 text models similar to Llama 3.1 model family.
-Here is an example,
-##### Input Prompt Format
-```
-<|begin_of_text|><|start_header_id|>system<|end_header_id|>
-Environment: ipython
-Cutting Knowledge Date: December 2023
-Today Date: 24 September 2024
-<|eot_id|><|start_header_id|>user<|end_header_id|>
-Write code to check if number is prime. Use it to verify if number 7 is prime<|eot_id|><|start_header_id|>assistant<|end_header_id|>
-```
-##### Model Response Format
-```
-<|python_tag|>def is_prime(n):
-    if n <= 1:
-        return False
-    if n == 2:
-        return True
-    if n % 2 == 0:
-        return False
-    max_divisor = int(n**0.5) + 1
-    for d in range(3, max_divisor, 2):
-        if n % d == 0:
-            return False
-    return True
-print(is_prime(7))  # Output: True<|eom_id|>
-```
-##### Notes
-- Note `Environment: ipython` in the system prompt.
-- Note that the response starts with `<|python_tag|>` and ends with `<|eom_id|>`
-## Zero shot function calling E2E format
-Here is an example of the e2e cycle of tool calls with the model in a muti-step way.
-##### Input Prompt Format
-```
-<|begin_of_text|><|start_header_id|>system<|end_header_id|>
-You are an expert in composing functions. You are given a question and a set of possible functions.
-Based on the question, you will need to make one or more function/tool calls to achieve the purpose.
-If none of the function can be used, point it out. If the given question lacks the parameters required by the function,
-also point it out. You should only return the function call in tools call sections.
-If you decide to invoke any of the function(s), you MUST put it in the format of [func_name1(params_name1=params_value1, params_name2=params_value2...), func_name2(params)]
-You SHOULD NOT include any other text in the response.
-Here is a list of functions in JSON format that you can invoke.
-[
-    {
-        "name": "get_weather",
-        "description": "Get weather info for places",
-        "parameters": {
-            "type": "dict",
-            "required": [
-                "city"
-            ],
-            "properties": {
-                "city": {
-                    "type": "string",
-                    "description": "The name of the city to get the weather for"
-                },
-                "metric": {
-                    "type": "string",
-                    "description": "The metric for weather. Options are: celsius, fahrenheit",
-                    "default": "celsius"
-                }
-            }
-        }
-    }
-]<|eot_id|><|start_header_id|>user<|end_header_id|>
-What is the weather in SF?<|eot_id|><|start_header_id|>assistant<|end_header_id|>
-<|python_tag|>[get_weather(city="San Francisco", metric="celsius")]<|eot_id|><|start_header_id|>ipython<|end_header_id|>
-"25 C"<|eot_id|><|start_header_id|>assistant<|end_header_id|>
-```
-##### Model Response Format
-```
-The weather in San Francisco is 25 C.<|eot_id|>
-```
-##### Notes
-- The output of the function call is provided back to the model as a tool response ( in json format ).
-- Notice `<|start_header_id|>ipython<|end_header_id|>` as the header message preceding the tool response.
-- The model finally summarizes the information from the tool response and returns the result to the user.
-## Prompt format for base models
-For base models (Llama3.2-1B and Llama3.2-3B), the prompt format for a simple completion is as follows
-##### Input Prompt Format
-```
-<|begin_of_text|>The color of the sky is blue but sometimes it can also be
-```
-##### Model Response Format
-```
- gray or even purple. The color of the sky can change depending on the time of day, the weather, and the amount of pollution in the air. The color of the sky can also be affected by the presence of dust, smoke, and other particles in the air.
-## Step 1: Identify the factors that
-```
-##### Notes
-Same as Llama3.1
-Thank You!

llama-stack 0.4.3__py3-none-any.whl → 0.5.0rc1__py3-none-any.whl

llama-stack 0.4.3py3-none-any.whl → 0.5.0rc1py3-none-any.whl