PyPI - llama-stack - Versions diffs - 0.4.3__py3-none-any.whl → 0.5.0__py3-none-any.whl - Mend

llama-stack 0.4.3py3-none-any.whl → 0.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (311) hide show

llama_stack/cli/stack/_list_deps.py +11 -7
llama_stack/cli/stack/run.py +3 -25
llama_stack/core/access_control/datatypes.py +78 -0
llama_stack/core/configure.py +2 -2
{llama_stack_api/internal → llama_stack/core/connectors}/__init__.py +2 -2
llama_stack/core/connectors/connectors.py +162 -0
llama_stack/core/conversations/conversations.py +61 -58
llama_stack/core/datatypes.py +54 -8
llama_stack/core/library_client.py +60 -13
llama_stack/core/prompts/prompts.py +43 -42
llama_stack/core/routers/datasets.py +20 -17
llama_stack/core/routers/eval_scoring.py +143 -53
llama_stack/core/routers/inference.py +20 -9
llama_stack/core/routers/safety.py +30 -42
llama_stack/core/routers/vector_io.py +15 -7
llama_stack/core/routing_tables/models.py +42 -3
llama_stack/core/routing_tables/scoring_functions.py +19 -19
llama_stack/core/routing_tables/shields.py +20 -17
llama_stack/core/routing_tables/vector_stores.py +8 -5
llama_stack/core/server/auth.py +192 -17
llama_stack/core/server/fastapi_router_registry.py +40 -5
llama_stack/core/server/server.py +24 -5
llama_stack/core/stack.py +54 -10
llama_stack/core/storage/datatypes.py +9 -0
llama_stack/core/store/registry.py +1 -1
llama_stack/core/utils/exec.py +2 -2
llama_stack/core/utils/type_inspection.py +16 -2
llama_stack/distributions/dell/config.yaml +4 -1
llama_stack/distributions/dell/doc_template.md +209 -0
llama_stack/distributions/dell/run-with-safety.yaml +4 -1
llama_stack/distributions/nvidia/config.yaml +4 -1
llama_stack/distributions/nvidia/doc_template.md +170 -0
llama_stack/distributions/nvidia/run-with-safety.yaml +4 -1
llama_stack/distributions/oci/config.yaml +4 -1
llama_stack/distributions/oci/doc_template.md +140 -0
llama_stack/distributions/open-benchmark/config.yaml +9 -1
llama_stack/distributions/postgres-demo/config.yaml +1 -1
llama_stack/distributions/starter/build.yaml +62 -0
llama_stack/distributions/starter/config.yaml +22 -3
llama_stack/distributions/starter/run-with-postgres-store.yaml +22 -3
llama_stack/distributions/starter/starter.py +13 -1
llama_stack/distributions/starter-gpu/build.yaml +62 -0
llama_stack/distributions/starter-gpu/config.yaml +22 -3
llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +22 -3
llama_stack/distributions/template.py +10 -2
llama_stack/distributions/watsonx/config.yaml +4 -1
llama_stack/log.py +1 -0
llama_stack/models/llama/resources/dog.jpg +0 -0
llama_stack/models/llama/resources/pasta.jpeg +0 -0
llama_stack/models/llama/resources/small_dog.jpg +0 -0
llama_stack/providers/inline/agents/meta_reference/__init__.py +1 -0
llama_stack/providers/inline/agents/meta_reference/agents.py +58 -61
llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +187 -60
llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +99 -22
llama_stack/providers/inline/agents/meta_reference/responses/types.py +2 -1
llama_stack/providers/inline/agents/meta_reference/responses/utils.py +4 -1
llama_stack/providers/inline/agents/meta_reference/safety.py +2 -2
llama_stack/providers/inline/batches/reference/batches.py +2 -1
llama_stack/providers/inline/eval/meta_reference/eval.py +40 -32
llama_stack/providers/inline/ios/inference/LocalInferenceImpl/LocalInference.h +9 -0
llama_stack/providers/inline/ios/inference/LocalInferenceImpl/LocalInference.swift +189 -0
llama_stack/providers/inline/ios/inference/LocalInferenceImpl/Parsing.swift +238 -0
llama_stack/providers/inline/ios/inference/LocalInferenceImpl/PromptTemplate.swift +12 -0
llama_stack/providers/inline/ios/inference/LocalInferenceImpl/SystemPrompts.swift +89 -0
llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.pbxproj +550 -0
llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.xcworkspace/contents.xcworkspacedata +7 -0
llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist +8 -0
llama_stack/providers/inline/post_training/huggingface/post_training.py +33 -38
llama_stack/providers/inline/post_training/huggingface/utils.py +2 -5
llama_stack/providers/inline/post_training/torchtune/common/utils.py +5 -9
llama_stack/providers/inline/post_training/torchtune/post_training.py +28 -33
llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +2 -4
llama_stack/providers/inline/safety/code_scanner/code_scanner.py +12 -15
llama_stack/providers/inline/safety/llama_guard/llama_guard.py +20 -24
llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +11 -17
llama_stack/providers/inline/scoring/basic/scoring.py +13 -17
llama_stack/providers/inline/scoring/braintrust/braintrust.py +15 -15
llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +13 -17
llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +1 -1
llama_stack/providers/registry/agents.py +1 -0
llama_stack/providers/registry/inference.py +1 -9
llama_stack/providers/registry/vector_io.py +136 -16
llama_stack/providers/remote/datasetio/nvidia/README.md +74 -0
llama_stack/providers/remote/eval/nvidia/README.md +134 -0
llama_stack/providers/remote/eval/nvidia/eval.py +22 -21
llama_stack/providers/remote/files/s3/README.md +266 -0
llama_stack/providers/remote/files/s3/config.py +5 -3
llama_stack/providers/remote/files/s3/files.py +2 -2
llama_stack/providers/remote/inference/gemini/gemini.py +4 -0
llama_stack/providers/remote/inference/nvidia/NVIDIA.md +203 -0
llama_stack/providers/remote/inference/openai/openai.py +2 -0
llama_stack/providers/remote/inference/together/together.py +4 -0
llama_stack/providers/remote/inference/vertexai/config.py +3 -3
llama_stack/providers/remote/inference/vertexai/vertexai.py +5 -2
llama_stack/providers/remote/inference/vllm/config.py +37 -18
llama_stack/providers/remote/inference/vllm/vllm.py +0 -3
llama_stack/providers/remote/inference/watsonx/watsonx.py +4 -0
llama_stack/providers/remote/post_training/nvidia/README.md +151 -0
llama_stack/providers/remote/post_training/nvidia/models.py +3 -11
llama_stack/providers/remote/post_training/nvidia/post_training.py +31 -33
llama_stack/providers/remote/safety/bedrock/bedrock.py +10 -27
llama_stack/providers/remote/safety/nvidia/README.md +78 -0
llama_stack/providers/remote/safety/nvidia/nvidia.py +9 -25
llama_stack/providers/remote/safety/sambanova/sambanova.py +13 -11
llama_stack/providers/remote/vector_io/elasticsearch/__init__.py +17 -0
llama_stack/providers/remote/vector_io/elasticsearch/config.py +32 -0
llama_stack/providers/remote/vector_io/elasticsearch/elasticsearch.py +463 -0
llama_stack/providers/remote/vector_io/oci/__init__.py +22 -0
llama_stack/providers/remote/vector_io/oci/config.py +41 -0
llama_stack/providers/remote/vector_io/oci/oci26ai.py +595 -0
llama_stack/providers/remote/vector_io/pgvector/config.py +69 -2
llama_stack/providers/remote/vector_io/pgvector/pgvector.py +255 -6
llama_stack/providers/remote/vector_io/qdrant/qdrant.py +62 -38
llama_stack/providers/utils/bedrock/client.py +3 -3
llama_stack/providers/utils/bedrock/config.py +7 -7
llama_stack/providers/utils/inference/__init__.py +0 -25
llama_stack/providers/utils/inference/embedding_mixin.py +4 -0
llama_stack/providers/utils/inference/http_client.py +239 -0
llama_stack/providers/utils/inference/litellm_openai_mixin.py +6 -0
llama_stack/providers/utils/inference/model_registry.py +148 -2
llama_stack/providers/utils/inference/openai_compat.py +1 -158
llama_stack/providers/utils/inference/openai_mixin.py +42 -2
llama_stack/providers/utils/inference/prompt_adapter.py +0 -209
llama_stack/providers/utils/memory/openai_vector_store_mixin.py +92 -5
llama_stack/providers/utils/memory/vector_store.py +46 -19
llama_stack/providers/utils/responses/responses_store.py +40 -6
llama_stack/providers/utils/safety.py +114 -0
llama_stack/providers/utils/tools/mcp.py +44 -3
llama_stack/testing/api_recorder.py +9 -3
{llama_stack-0.4.3.dist-info → llama_stack-0.5.0.dist-info}/METADATA +14 -2
{llama_stack-0.4.3.dist-info → llama_stack-0.5.0.dist-info}/RECORD +135 -279
llama_stack-0.5.0.dist-info/top_level.txt +1 -0
llama_stack/distributions/meta-reference-gpu/__init__.py +0 -7
llama_stack/distributions/meta-reference-gpu/config.yaml +0 -140
llama_stack/distributions/meta-reference-gpu/meta_reference.py +0 -163
llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +0 -155
llama_stack/models/llama/hadamard_utils.py +0 -88
llama_stack/models/llama/llama3/args.py +0 -74
llama_stack/models/llama/llama3/generation.py +0 -378
llama_stack/models/llama/llama3/model.py +0 -304
llama_stack/models/llama/llama3/multimodal/__init__.py +0 -12
llama_stack/models/llama/llama3/multimodal/encoder_utils.py +0 -180
llama_stack/models/llama/llama3/multimodal/image_transform.py +0 -409
llama_stack/models/llama/llama3/multimodal/model.py +0 -1430
llama_stack/models/llama/llama3/multimodal/utils.py +0 -26
llama_stack/models/llama/llama3/quantization/__init__.py +0 -5
llama_stack/models/llama/llama3/quantization/loader.py +0 -316
llama_stack/models/llama/llama3_1/__init__.py +0 -12
llama_stack/models/llama/llama3_1/prompt_format.md +0 -358
llama_stack/models/llama/llama3_1/prompts.py +0 -258
llama_stack/models/llama/llama3_2/__init__.py +0 -5
llama_stack/models/llama/llama3_2/prompts_text.py +0 -229
llama_stack/models/llama/llama3_2/prompts_vision.py +0 -126
llama_stack/models/llama/llama3_2/text_prompt_format.md +0 -286
llama_stack/models/llama/llama3_2/vision_prompt_format.md +0 -141
llama_stack/models/llama/llama3_3/__init__.py +0 -5
llama_stack/models/llama/llama3_3/prompts.py +0 -259
llama_stack/models/llama/llama4/args.py +0 -107
llama_stack/models/llama/llama4/ffn.py +0 -58
llama_stack/models/llama/llama4/moe.py +0 -214
llama_stack/models/llama/llama4/preprocess.py +0 -435
llama_stack/models/llama/llama4/quantization/__init__.py +0 -5
llama_stack/models/llama/llama4/quantization/loader.py +0 -226
llama_stack/models/llama/llama4/vision/__init__.py +0 -5
llama_stack/models/llama/llama4/vision/embedding.py +0 -210
llama_stack/models/llama/llama4/vision/encoder.py +0 -412
llama_stack/models/llama/quantize_impls.py +0 -316
llama_stack/providers/inline/inference/meta_reference/__init__.py +0 -20
llama_stack/providers/inline/inference/meta_reference/common.py +0 -24
llama_stack/providers/inline/inference/meta_reference/config.py +0 -68
llama_stack/providers/inline/inference/meta_reference/generators.py +0 -201
llama_stack/providers/inline/inference/meta_reference/inference.py +0 -542
llama_stack/providers/inline/inference/meta_reference/model_parallel.py +0 -77
llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +0 -353
llama_stack-0.4.3.dist-info/top_level.txt +0 -2
llama_stack_api/__init__.py +0 -945
llama_stack_api/admin/__init__.py +0 -45
llama_stack_api/admin/api.py +0 -72
llama_stack_api/admin/fastapi_routes.py +0 -117
llama_stack_api/admin/models.py +0 -113
llama_stack_api/agents.py +0 -173
llama_stack_api/batches/__init__.py +0 -40
llama_stack_api/batches/api.py +0 -53
llama_stack_api/batches/fastapi_routes.py +0 -113
llama_stack_api/batches/models.py +0 -78
llama_stack_api/benchmarks/__init__.py +0 -43
llama_stack_api/benchmarks/api.py +0 -39
llama_stack_api/benchmarks/fastapi_routes.py +0 -109
llama_stack_api/benchmarks/models.py +0 -109
llama_stack_api/common/__init__.py +0 -5
llama_stack_api/common/content_types.py +0 -101
llama_stack_api/common/errors.py +0 -95
llama_stack_api/common/job_types.py +0 -38
llama_stack_api/common/responses.py +0 -77
llama_stack_api/common/training_types.py +0 -47
llama_stack_api/common/type_system.py +0 -146
llama_stack_api/connectors.py +0 -146
llama_stack_api/conversations.py +0 -270
llama_stack_api/datasetio.py +0 -55
llama_stack_api/datasets/__init__.py +0 -61
llama_stack_api/datasets/api.py +0 -35
llama_stack_api/datasets/fastapi_routes.py +0 -104
llama_stack_api/datasets/models.py +0 -152
llama_stack_api/datatypes.py +0 -373
llama_stack_api/eval.py +0 -137
llama_stack_api/file_processors/__init__.py +0 -27
llama_stack_api/file_processors/api.py +0 -64
llama_stack_api/file_processors/fastapi_routes.py +0 -78
llama_stack_api/file_processors/models.py +0 -42
llama_stack_api/files/__init__.py +0 -35
llama_stack_api/files/api.py +0 -51
llama_stack_api/files/fastapi_routes.py +0 -124
llama_stack_api/files/models.py +0 -107
llama_stack_api/inference.py +0 -1169
llama_stack_api/inspect_api/__init__.py +0 -37
llama_stack_api/inspect_api/api.py +0 -25
llama_stack_api/inspect_api/fastapi_routes.py +0 -76
llama_stack_api/inspect_api/models.py +0 -28
llama_stack_api/internal/kvstore.py +0 -28
llama_stack_api/internal/sqlstore.py +0 -81
llama_stack_api/llama_stack_api/__init__.py +0 -945
llama_stack_api/llama_stack_api/admin/__init__.py +0 -45
llama_stack_api/llama_stack_api/admin/api.py +0 -72
llama_stack_api/llama_stack_api/admin/fastapi_routes.py +0 -117
llama_stack_api/llama_stack_api/admin/models.py +0 -113
llama_stack_api/llama_stack_api/agents.py +0 -173
llama_stack_api/llama_stack_api/batches/__init__.py +0 -40
llama_stack_api/llama_stack_api/batches/api.py +0 -53
llama_stack_api/llama_stack_api/batches/fastapi_routes.py +0 -113
llama_stack_api/llama_stack_api/batches/models.py +0 -78
llama_stack_api/llama_stack_api/benchmarks/__init__.py +0 -43
llama_stack_api/llama_stack_api/benchmarks/api.py +0 -39
llama_stack_api/llama_stack_api/benchmarks/fastapi_routes.py +0 -109
llama_stack_api/llama_stack_api/benchmarks/models.py +0 -109
llama_stack_api/llama_stack_api/common/__init__.py +0 -5
llama_stack_api/llama_stack_api/common/content_types.py +0 -101
llama_stack_api/llama_stack_api/common/errors.py +0 -95
llama_stack_api/llama_stack_api/common/job_types.py +0 -38
llama_stack_api/llama_stack_api/common/responses.py +0 -77
llama_stack_api/llama_stack_api/common/training_types.py +0 -47
llama_stack_api/llama_stack_api/common/type_system.py +0 -146
llama_stack_api/llama_stack_api/connectors.py +0 -146
llama_stack_api/llama_stack_api/conversations.py +0 -270
llama_stack_api/llama_stack_api/datasetio.py +0 -55
llama_stack_api/llama_stack_api/datasets/__init__.py +0 -61
llama_stack_api/llama_stack_api/datasets/api.py +0 -35
llama_stack_api/llama_stack_api/datasets/fastapi_routes.py +0 -104
llama_stack_api/llama_stack_api/datasets/models.py +0 -152
llama_stack_api/llama_stack_api/datatypes.py +0 -373
llama_stack_api/llama_stack_api/eval.py +0 -137
llama_stack_api/llama_stack_api/file_processors/__init__.py +0 -27
llama_stack_api/llama_stack_api/file_processors/api.py +0 -64
llama_stack_api/llama_stack_api/file_processors/fastapi_routes.py +0 -78
llama_stack_api/llama_stack_api/file_processors/models.py +0 -42
llama_stack_api/llama_stack_api/files/__init__.py +0 -35
llama_stack_api/llama_stack_api/files/api.py +0 -51
llama_stack_api/llama_stack_api/files/fastapi_routes.py +0 -124
llama_stack_api/llama_stack_api/files/models.py +0 -107
llama_stack_api/llama_stack_api/inference.py +0 -1169
llama_stack_api/llama_stack_api/inspect_api/__init__.py +0 -37
llama_stack_api/llama_stack_api/inspect_api/api.py +0 -25
llama_stack_api/llama_stack_api/inspect_api/fastapi_routes.py +0 -76
llama_stack_api/llama_stack_api/inspect_api/models.py +0 -28
llama_stack_api/llama_stack_api/internal/__init__.py +0 -9
llama_stack_api/llama_stack_api/internal/kvstore.py +0 -28
llama_stack_api/llama_stack_api/internal/sqlstore.py +0 -81
llama_stack_api/llama_stack_api/models.py +0 -171
llama_stack_api/llama_stack_api/openai_responses.py +0 -1468
llama_stack_api/llama_stack_api/post_training.py +0 -370
llama_stack_api/llama_stack_api/prompts.py +0 -203
llama_stack_api/llama_stack_api/providers/__init__.py +0 -33
llama_stack_api/llama_stack_api/providers/api.py +0 -16
llama_stack_api/llama_stack_api/providers/fastapi_routes.py +0 -57
llama_stack_api/llama_stack_api/providers/models.py +0 -24
llama_stack_api/llama_stack_api/py.typed +0 -0
llama_stack_api/llama_stack_api/rag_tool.py +0 -168
llama_stack_api/llama_stack_api/resource.py +0 -37
llama_stack_api/llama_stack_api/router_utils.py +0 -160
llama_stack_api/llama_stack_api/safety.py +0 -132
llama_stack_api/llama_stack_api/schema_utils.py +0 -208
llama_stack_api/llama_stack_api/scoring.py +0 -93
llama_stack_api/llama_stack_api/scoring_functions.py +0 -211
llama_stack_api/llama_stack_api/shields.py +0 -93
llama_stack_api/llama_stack_api/tools.py +0 -226
llama_stack_api/llama_stack_api/vector_io.py +0 -941
llama_stack_api/llama_stack_api/vector_stores.py +0 -53
llama_stack_api/llama_stack_api/version.py +0 -9
llama_stack_api/models.py +0 -171
llama_stack_api/openai_responses.py +0 -1468
llama_stack_api/post_training.py +0 -370
llama_stack_api/prompts.py +0 -203
llama_stack_api/providers/__init__.py +0 -33
llama_stack_api/providers/api.py +0 -16
llama_stack_api/providers/fastapi_routes.py +0 -57
llama_stack_api/providers/models.py +0 -24
llama_stack_api/py.typed +0 -0
llama_stack_api/rag_tool.py +0 -168
llama_stack_api/resource.py +0 -37
llama_stack_api/router_utils.py +0 -160
llama_stack_api/safety.py +0 -132
llama_stack_api/schema_utils.py +0 -208
llama_stack_api/scoring.py +0 -93
llama_stack_api/scoring_functions.py +0 -211
llama_stack_api/shields.py +0 -93
llama_stack_api/tools.py +0 -226
llama_stack_api/vector_io.py +0 -941
llama_stack_api/vector_stores.py +0 -53
llama_stack_api/version.py +0 -9
{llama_stack-0.4.3.dist-info → llama_stack-0.5.0.dist-info}/WHEEL +0 -0
{llama_stack-0.4.3.dist-info → llama_stack-0.5.0.dist-info}/entry_points.txt +0 -0
{llama_stack-0.4.3.dist-info → llama_stack-0.5.0.dist-info}/licenses/LICENSE +0 -0

llama_stack/models/llama/llama3_1/prompt_format.md DELETED Viewed

@@ -1,358 +0,0 @@
-# Llama 3.1 - Prompt Formats
-## Tokens
-Here is a list of special tokens that are supported by Llama 3.1:
-- `<|begin_of_text|>`: Specifies the start of the prompt
-- `<|end_of_text|>`: Model will cease to generate more tokens. This token is generated only by the base models.
-- `<|finetune_right_pad_id|>`: This token is used for padding text sequences to the same length in a batch.
-- `<|start_header_id|>` and `<|end_header_id|>`: These tokens enclose the role for a particular message. The possible roles are: [system, user, assistant and ipython]
-- `<|eom_id|>`: End of message. A message represents a possible stopping point for execution where the model can inform the executor that a tool call needs to be made. This is used for multi-step interactions between the model and any available tools. This token is emitted by the model when the Environment: ipython instruction is used in the system prompt, or if the model calls for a built-in tool.
-- `<|eot_id|>`: End of turn. Represents when the model has determined that it has finished interacting with the user message that initiated its response. This is used in two scenarios:
-    - at the end of a direct interaction between the model and the user
-    - at the end of multiple interactions between the model and any available tools
-    This token signals to the executor that the model has finished generating a response.
-- `<|python_tag|>`: Is a special tag used in the model's response to signify a tool call.
-There are 4 different roles that are supported by Llama 3.1
-- `system`: Sets the context in which to interact with the AI model. It typically includes rules, guidelines, or necessary information that helps the model respond effectively.
-- `user`: Represents the human interacting with the model. It includes the inputs, commands, and questions to the model.
-- `ipython`: A new role introduced in Llama 3.1. Semantically, this role means "tool". This role is used to mark messages with the output of a tool call when sent back to the model from the executor.
-- `assistant`: Represents the response generated by the AI model based on the context provided in the `system`, `ipython` and `user` prompts.
-## Llama 3.1 Base Model
-Text completion for Llama 3.1 base model uses this format.
-##### Input Prompt Format
-```
-<|begin_of_text|>Color of sky is blue but sometimes can also be
-```
-##### Model Response Format
-```
- red, orange, yellow, green, purple, pink, brown, gray, black, white, and even rainbow colors. The color of the sky can change due to various reasons such as time of day, weather conditions, pollution, and atmospheric phenomena.
-The color of the sky is primarily blue because of a phenomenon called
-```
-Note start special tag
-## Llama 3.1 Instruct Model
-## User and assistant conversation
-Here is a regular multi-turn user assistant conversation and how its formatted.
-##### Input Prompt Format
-```
-<|begin_of_text|><|start_header_id|>system<|end_header_id|>
-You are a helpful assistant<|eot_id|><|start_header_id|>user<|end_header_id|>
-Answer who are you in the form of jeopardy?<|eot_id|><|start_header_id|>assistant<|end_header_id|>
-```
-##### Model Response Format
-```
-Here's my response
-"What is a helpful assistant?"<|eot_id|>
-```
-## Tool Calling Formats
-The three built-in tools (brave_search, wolfram_alpha, and code interpreter) can be turned on using the system prompt:
-- Brave Search: Tool call to perform web searches.
-- Wolfram Alpha: Tool call to perform complex mathematical calculations.
-- Code Interpreter: Enables the model to output python code.
-## Builtin Tool Calling
-Here is an example of a conversation using brave search
-##### Input Prompt Format
-```
-<|begin_of_text|><|start_header_id|>system<|end_header_id|>
-Environment: ipython
-Tools: brave_search, wolfram_alpha
-Cutting Knowledge Date: December 2023
-Today Date: 21 September 2024
-You are a helpful assistant.
-<|eot_id|><|start_header_id|>user<|end_header_id|>
-Search the web for the latest price of 1oz gold?<|eot_id|><|start_header_id|>assistant<|end_header_id|>
-```
-##### Model Response Format
-```
-<|python_tag|>brave_search.call(query="latest price of 1oz gold")<|eom_id|>
-```
-- Just including Environment: ipython turns on code interpreter; therefore, you don't need to specify code interpretation on the Tools: line. The model can generate python code which is interpreted by the executor, with the result provided back to the model.
-- The message body of the assistant response starts with a special tag <|python_tag|>
-- As alluded to above, in such an environment, the model can generate <|eom_id|> instead of just the standard <|eot_id|> . The latter indicates the turn is finished, while the former indicates continued multi-step reasoning. That is, the model is expecting a continuation message with the output of the tool call.
-- The model tool call response is of the form `tool.call(query="...")` wher tool is `brave_search` or `wolfram_alpha`
-## Builtin Code Interpreter
-Here is an actual example of model responding with code
-##### Input Prompt Format
-```
-<|begin_of_text|><|start_header_id|>system<|end_header_id|>
-Environment: ipython<|eot_id|><|start_header_id|>user<|end_header_id|>
-Write code to check if number is prime, use that to see if the number 7 is prime<|eot_id|><|start_header_id|>assistant<|end_header_id|>
-```
-##### Model Response Format
-```
-<|python_tag|>def is_prime(n):
-    if n <= 1
-        return False
-    for i in range(2, int(n**0.5) + 1):
-        if n % i == 0:
-            return False
-    return True
-print(is_prime(7))  # Output: True<|eom_id|>
-```
-- Model starts with <|python_tag|> and continues writing python code that it needs to be executed
-- No explicit mention of code_interpreter in system prompt. `Environment: ipython` implicitly enables it.
-## Built-in tools full interaction
-Here is a full interaction with the built-in tools including the tool response and the final assistant response.
-##### Input Prompt Format
-```
-<|begin_of_text|><|start_header_id|>system<|end_header_id|>
-Environment: ipython
-Tools: brave_search, wolfram_alpha
-<|eot_id|><|start_header_id|>user<|end_header_id|>
-What is the 100th decimal of pi?<|eot_id|><|start_header_id|>assistant<|end_header_id|>
-<|python_tag|>wolfram_alpha.call(query="100th decimal of pi")<|eom_id|><|start_header_id|>ipython<|end_header_id|>
-{
-    "queryresult": {
-        "success": true,
-        "inputstring": "100th decimal of pi",
-        "pods": [
-            {
-                "title": "Input interpretation",
-                "subpods": [
-                    {
-                        "title": "",
-                        "plaintext": "100th digit | π"
-                    }
-                ]
-            },
-            {
-                "title": "Nearby digits",
-                "subpods": [
-                    {
-                        "title": "",
-                        "plaintext": "...86208998628034825342117067982148086513282306647093..."
-                    }
-                ]
-            },
-            {
-                "title": "Result",
-                "primary": true,
-                "subpods": [
-                    {
-                        "title": "",
-                        "plaintext": "7"
-                    }
-                ]
-            }
-        ]
-    }
-}
-<|eot_id|><|start_header_id|>assistant<|end_header_id|>
-```
-##### Model Response Format
-```
-The 100th decimal of pi is 7.<|eot_id|>
-```
-- Note the `<|python_tag|>` in the assistant response.
-- Role is `ipython` for the wolfram alpha response that is passed back to the model.
-- Final message from assistant has <|eot_id|> tag.
-## Zero shot tool calling
-## JSON based tool calling
-Llama models can now output custom tool calls from a single message to allow easier tool calling.
-The following prompts provide an example of how custom tools can be called from the output of the model.
-It's important to note that the model itself does not execute the calls; it provides structured output to facilitate calling by an executor.
-##### Input Prompt Format
-```
-<|begin_of_text|><|start_header_id|>system<|end_header_id|>
-Environment: ipython
-Cutting Knowledge Date: December 2023
-Today Date: 21 September 2024
-You are a helpful assistant.
-<|eot_id|><|start_header_id|>user<|end_header_id|>
-Answer the user's question by making use of the following functions if needed.
-If none of the function can be used, please say so.
-Here is a list of functions in JSON format:
-{
-    "type": "function",
-    "function": {
-        "name": "trending_songs",
-        "description": "Returns the trending songs on a Music site",
-        "parameters": {
-            "type": "object",
-            "properties": [
-                {
-                    "n": {
-                        "type": "object",
-                        "description": "The number of songs to return"
-                    }
-                },
-                {
-                    "genre": {
-                        "type": "object",
-                        "description": "The genre of the songs to return"
-                    }
-                }
-            ],
-            "required": ["n"]
-        }
-    }
-}
-Return function calls in JSON format.<|eot_id|><|start_header_id|>user<|end_header_id|>
-Use tools to get latest trending songs<|eot_id|><|start_header_id|>assistant<|end_header_id|>
-```
-##### Model Response Format
-```
-<|python_tag|>{
-    "type": "function",
-    "name": "trending_songs",
-    "parameters": {
-        "n": "10",
-        "genre": "all"
-    }
-}<|eom_id|>
-```
-- JSON format for providing tools needs name, description and parameters
-- Model responds with `<|python_tag|>` and `<|eom_id|>` as `Environment: ipython` was in the system prompt
-- Instructions for tools added as a user message
-- Only single tool calls are supported as of now
-## Example of a user defined tool calling
-## `<function>` based tool calling
-Here is an example of how you could also write custom instructions for model to do zero shot tool calling.
-In this example, we define a custom tool calling format using the `<function>` tag.
-##### Input Prompt Format
-```
-<|begin_of_text|><|start_header_id|>system<|end_header_id|>
-Environment: ipython
-Cutting Knowledge Date: December 2023
-Today Date: 21 September 2024
-You are a helpful assistant.
-<|eot_id|><|start_header_id|>user<|end_header_id|>
-You have access to the following functions:
-Use the function 'trending_songs' to 'Returns the trending songs on a Music site':
-{"name": "trending_songs", "description": "Returns the trending songs on a Music site", "parameters": {"genre": {"description": "The genre of the songs to return", "param_type": "str", "required": false}, "n": {"description": "The number of songs to return", "param_type": "int", "required": true}}}
-Think very carefully before calling functions.
-If you choose to call a function ONLY reply in the following format with no prefix or suffix:
-<function=example_function_name>{"example_name": "example_value"}</function>
-Reminder:
-- If looking for real time information use relevant functions before falling back to brave_search
-- Function calls MUST follow the specified format, start with <function= and end with </function>
-- Required parameters MUST be specified
-- Only call one function at a time
-- Put the entire function call reply on one line<|eot_id|><|start_header_id|>user<|end_header_id|>
-Use tools to get latest trending songs<|eot_id|><|start_header_id|>assistant<|end_header_id|>
-```
-##### Model Response Format
-```
-<function=trending_songs>{"n": 10}</function><|eot_id|>
-```
-- In this case, model does NOT respond with `<|python_tag|>` and ends with `<|eot_id|>`
-- Instructions for tools added as a user message
-Thank You!

llama_stack/models/llama/llama3_1/prompts.py DELETED Viewed

@@ -1,258 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# top-level folder for each specific model found within the models/ directory at
-# the top-level of this source tree.
-import json
-import textwrap
-from llama_stack.models.llama.datatypes import (
-    BuiltinTool,
-    RawMessage,
-    StopReason,
-    ToolCall,
-    ToolPromptFormat,
-)
-from llama_stack.models.llama.prompt_format import (
-    # llama3_1_e2e_tool_call_dialog,
-    TextCompletionContent,
-    UseCase,
-    llama3_1_builtin_tool_call_dialog,
-    llama3_1_custom_tool_call_dialog,
-)
-def wolfram_alpha_response():
-    return textwrap.dedent(
-        """
-        {
-            "queryresult": {
-                "success": true,
-                "inputstring": "100th decimal of pi",
-                "pods": [
-                    {
-                        "title": "Input interpretation",
-                        "subpods": [
-                            {
-                                "title": "",
-                                "plaintext": "100th digit | \u03c0"
-                            }
-                        ]
-                    },
-                    {
-                        "title": "Nearby digits",
-                        "subpods": [
-                            {
-                                "title": "",
-                                "plaintext": "...86208998628034825342117067982148086513282306647093..."
-                            }
-                        ]
-                    },
-                    {
-                        "title": "Result",
-                        "primary": true,
-                        "subpods": [
-                            {
-                                "title": "",
-                                "plaintext": "7"
-                            }
-                        ]
-                    }
-                ]
-            }
-        }
-        """
-    )
-def usecases() -> list[UseCase | str]:
-    return [
-        textwrap.dedent(
-            """
-            # Llama 3.1 - Prompt Formats
-            ## Tokens
-            Here is a list of special tokens that are supported by Llama 3.1:
-            - `<|begin_of_text|>`: Specifies the start of the prompt
-            - `<|end_of_text|>`: Model will cease to generate more tokens. This token is generated only by the base models.
-            - `<|finetune_right_pad_id|>`: This token is used for padding text sequences to the same length in a batch.
-            - `<|start_header_id|>` and `<|end_header_id|>`: These tokens enclose the role for a particular message. The possible roles are: [system, user, assistant and tool]
-            - `<|eom_id|>`: End of message. A message represents a possible stopping point for execution where the model can inform the executor that a tool call needs to be made. This is used for multi-step interactions between the model and any available tools. This token is emitted by the model when the Environment: ipython instruction is used in the system prompt, or if the model calls for a built-in tool.
-            - `<|eot_id|>`: End of turn. Represents when the model has determined that it has finished interacting with the user message that initiated its response. This is used in two scenarios:
-                - at the end of a direct interaction between the model and the user
-                - at the end of multiple interactions between the model and any available tools
-                This token signals to the executor that the model has finished generating a response.
-            - `<|python_tag|>`: Is a special tag used in the model's response to signify a tool call.
-            """
-        ),
-        textwrap.dedent(
-            """
-            There are 4 different roles that are supported by Llama 3.1
-            - `system`: Sets the context in which to interact with the AI model. It typically includes rules, guidelines, or necessary information that helps the model respond effectively.
-            - `user`: Represents the human interacting with the model. It includes the inputs, commands, and questions to the model.
-            - `tool`: A new role introduced in Llama 3.1. This role is used to mark messages with the output of a tool call when sent back to the model from the executor. (The actual token used by the model for this role is "ipython".)
-            - `assistant`: Represents the response generated by the AI model based on the context provided in the `system`, `tool` and `user` prompts.
-            """
-        ),
-        UseCase(
-            title="Llama 3.1 Base Model",
-            description="Text completion for Llama 3.1 base model uses this format.",
-            dialogs=[TextCompletionContent(content="Color of sky is blue but sometimes can also be")],
-            notes="Note start special tag",
-        ),
-        "## Llama 3.1 Instruct Model",
-        UseCase(
-            title="User and assistant conversation",
-            description="Here is a regular multi-turn user assistant conversation and how its formatted.",
-            dialogs=[
-                [
-                    RawMessage(role="system", content="You are a helpful assistant"),
-                    RawMessage(
-                        role="user",
-                        content="Answer who are you in the form of jeopardy?",
-                    ),
-                ]
-            ],
-            notes="",
-        ),
-        "## Tool Calling Formats",
-        textwrap.dedent(
-            """
-            The three built-in tools (brave_search, wolfram_alpha, and code interpreter) can be turned on using the system prompt:
-            - Brave Search: Tool call to perform web searches.
-            - Wolfram Alpha: Tool call to perform complex mathematical calculations.
-            - Code Interpreter: Enables the model to output python code.
-            """
-        ),
-        UseCase(
-            title="Builtin Tool Calling",
-            description=textwrap.dedent(
-                """
-                Here is an example of a conversation using brave search
-                """
-            ),
-            dialogs=[llama3_1_builtin_tool_call_dialog()],
-            notes=textwrap.dedent(
-                """
-                - Just including Environment: ipython turns on code interpreter; therefore, you don't need to specify code interpretation on the Tools: line. The model can generate python code which is interpreted by the executor, with the result provided back to the model.
-                - The message body of the assistant response starts with a special tag <|python_tag|>
-                - As alluded to above, in such an environment, the model can generate <|eom_id|> instead of just the standard <|eot_id|> . The latter indicates the turn is finished, while the former indicates continued multi-step reasoning. That is, the model is expecting a continuation message with the output of the tool call.
-                - The model tool call response is of the form `tool.call(query="...")` wher tool is `brave_search` or `wolfram_alpha`
-                """
-            ),
-        ),
-        UseCase(
-            title="Builtin Code Interpreter",
-            description="Here is an actual example of model responding with code",
-            dialogs=[
-                [
-                    RawMessage(role="system", content="Environment: ipython"),
-                    RawMessage(
-                        role="user",
-                        content="Write code to check if number is prime, use that to see if the number 7 is prime",
-                    ),
-                ],
-            ],
-            notes=textwrap.dedent(
-                """
-                - Model starts with <|python_tag|> and continues writing python code that it needs to be executed
-                - No explicit mention of code_interpreter in system prompt. `Environment: ipython` implicitly enables it.
-                """
-            ),
-        ),
-        UseCase(
-            title="Built-in tools full interaction",
-            description="Here is a full interaction with the built-in tools including the tool response and the final assistant response.",
-            dialogs=[
-                [
-                    RawMessage(
-                        role="system",
-                        content="Environment: ipython\nTools: brave_search, wolfram_alpha\n",
-                    ),
-                    RawMessage(role="user", content="What is the 100th decimal of pi?"),
-                    RawMessage(
-                        role="assistant",
-                        content="",
-                        stop_reason=StopReason.end_of_message,
-                        tool_calls=[
-                            ToolCall(
-                                call_id="tool_call_id",
-                                tool_name=BuiltinTool.wolfram_alpha,
-                                arguments=json.dumps({"query": "100th decimal of pi"}),
-                            )
-                        ],
-                    ),
-                    RawMessage(
-                        role="tool",
-                        content=wolfram_alpha_response(),
-                    ),
-                ],
-            ],
-            notes=textwrap.dedent(
-                """
-                - Note the `<|python_tag|>` in the assistant response.
-                - Role is `tool` for the wolfram alpha response that is passed back to the model.
-                - Final message from assistant has <|eot_id|> tag.
-                """
-            ),
-        ),
-        "## Zero shot tool calling",
-        UseCase(
-            title="JSON based tool calling",
-            description=textwrap.dedent(
-                """
-                Llama models can now output custom tool calls from a single message to allow easier tool calling.
-                The following prompts provide an example of how custom tools can be called from the output of the model.
-                It's important to note that the model itself does not execute the calls; it provides structured output to facilitate calling by an executor.
-                """
-            ),
-            dialogs=[llama3_1_custom_tool_call_dialog()],
-            notes=textwrap.dedent(
-                """
-                - JSON format for providing tools needs name, description and parameters
-                - Model responds with `<|python_tag|>` and `<|eom_id|>` as `Environment: ipython` was in the system prompt
-                - Instructions for tools added as a user message
-                - Only single tool calls are supported as of now
-                """
-            ),
-        ),
-        # FIXME: This is not working yet as expected
-        # UseCase(
-        #     title="E2E tool call example",
-        #     description=textwrap.dedent(
-        #         """
-        #         Here is an example showing the whole multi-step turn by taking custom tool outputs and passing back to the model.
-        #         """
-        #     ),
-        #     dialogs=[
-        #         llama3_1_e2e_tool_call_dialog(
-        #             tool_prompt_format=ToolPromptFormat.function_tag
-        #         )
-        #     ],
-        #     notes="",
-        # ),
-        "## Example of a user defined tool calling",
-        UseCase(
-            title="`<function>` based tool calling",
-            description=textwrap.dedent(
-                """
-                Here is an example of how you could also write custom instructions for model to do zero shot tool calling.
-                In this example, we define a custom tool calling format using the `<function>` tag.
-                """
-            ),
-            dialogs=[llama3_1_custom_tool_call_dialog(ToolPromptFormat.function_tag)],
-            notes=textwrap.dedent(
-                """
-                - In this case, model does NOT respond with `<|python_tag|>` and ends with `<|eot_id|>`
-                - Instructions for tools added as a user message
-                """
-            ),
-        ),
-    ]

llama_stack/models/llama/llama3_2/__init__.py DELETED Viewed

@@ -1,5 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.

llama-stack 0.4.3__py3-none-any.whl → 0.5.0__py3-none-any.whl

llama-stack 0.4.3py3-none-any.whl → 0.5.0py3-none-any.whl