llama-stack 0.4.3__py3-none-any.whl → 0.5.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (307) hide show
  1. llama_stack/cli/stack/_list_deps.py +11 -7
  2. llama_stack/cli/stack/run.py +3 -25
  3. llama_stack/core/access_control/datatypes.py +78 -0
  4. llama_stack/core/configure.py +2 -2
  5. {llama_stack_api/internal → llama_stack/core/connectors}/__init__.py +2 -2
  6. llama_stack/core/connectors/connectors.py +162 -0
  7. llama_stack/core/conversations/conversations.py +61 -58
  8. llama_stack/core/datatypes.py +54 -8
  9. llama_stack/core/library_client.py +60 -13
  10. llama_stack/core/prompts/prompts.py +43 -42
  11. llama_stack/core/routers/datasets.py +20 -17
  12. llama_stack/core/routers/eval_scoring.py +143 -53
  13. llama_stack/core/routers/inference.py +20 -9
  14. llama_stack/core/routers/safety.py +30 -42
  15. llama_stack/core/routers/vector_io.py +15 -7
  16. llama_stack/core/routing_tables/models.py +42 -3
  17. llama_stack/core/routing_tables/scoring_functions.py +19 -19
  18. llama_stack/core/routing_tables/shields.py +20 -17
  19. llama_stack/core/routing_tables/vector_stores.py +8 -5
  20. llama_stack/core/server/auth.py +192 -17
  21. llama_stack/core/server/fastapi_router_registry.py +40 -5
  22. llama_stack/core/server/server.py +24 -5
  23. llama_stack/core/stack.py +54 -10
  24. llama_stack/core/storage/datatypes.py +9 -0
  25. llama_stack/core/store/registry.py +1 -1
  26. llama_stack/core/utils/exec.py +2 -2
  27. llama_stack/core/utils/type_inspection.py +16 -2
  28. llama_stack/distributions/dell/config.yaml +4 -1
  29. llama_stack/distributions/dell/doc_template.md +209 -0
  30. llama_stack/distributions/dell/run-with-safety.yaml +4 -1
  31. llama_stack/distributions/nvidia/config.yaml +4 -1
  32. llama_stack/distributions/nvidia/doc_template.md +170 -0
  33. llama_stack/distributions/nvidia/run-with-safety.yaml +4 -1
  34. llama_stack/distributions/oci/config.yaml +4 -1
  35. llama_stack/distributions/oci/doc_template.md +140 -0
  36. llama_stack/distributions/open-benchmark/config.yaml +9 -1
  37. llama_stack/distributions/postgres-demo/config.yaml +1 -1
  38. llama_stack/distributions/starter/build.yaml +62 -0
  39. llama_stack/distributions/starter/config.yaml +22 -3
  40. llama_stack/distributions/starter/run-with-postgres-store.yaml +22 -3
  41. llama_stack/distributions/starter/starter.py +13 -1
  42. llama_stack/distributions/starter-gpu/build.yaml +62 -0
  43. llama_stack/distributions/starter-gpu/config.yaml +22 -3
  44. llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +22 -3
  45. llama_stack/distributions/template.py +10 -2
  46. llama_stack/distributions/watsonx/config.yaml +4 -1
  47. llama_stack/log.py +1 -0
  48. llama_stack/models/llama/resources/dog.jpg +0 -0
  49. llama_stack/models/llama/resources/pasta.jpeg +0 -0
  50. llama_stack/models/llama/resources/small_dog.jpg +0 -0
  51. llama_stack/providers/inline/agents/meta_reference/__init__.py +1 -0
  52. llama_stack/providers/inline/agents/meta_reference/agents.py +57 -61
  53. llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +183 -60
  54. llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +94 -22
  55. llama_stack/providers/inline/agents/meta_reference/responses/types.py +2 -1
  56. llama_stack/providers/inline/agents/meta_reference/responses/utils.py +4 -1
  57. llama_stack/providers/inline/agents/meta_reference/safety.py +2 -2
  58. llama_stack/providers/inline/batches/reference/batches.py +2 -1
  59. llama_stack/providers/inline/eval/meta_reference/eval.py +40 -32
  60. llama_stack/providers/inline/ios/inference/LocalInferenceImpl/LocalInference.h +9 -0
  61. llama_stack/providers/inline/ios/inference/LocalInferenceImpl/LocalInference.swift +189 -0
  62. llama_stack/providers/inline/ios/inference/LocalInferenceImpl/Parsing.swift +238 -0
  63. llama_stack/providers/inline/ios/inference/LocalInferenceImpl/PromptTemplate.swift +12 -0
  64. llama_stack/providers/inline/ios/inference/LocalInferenceImpl/SystemPrompts.swift +89 -0
  65. llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.pbxproj +550 -0
  66. llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.xcworkspace/contents.xcworkspacedata +7 -0
  67. llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist +8 -0
  68. llama_stack/providers/inline/post_training/huggingface/post_training.py +33 -38
  69. llama_stack/providers/inline/post_training/huggingface/utils.py +2 -5
  70. llama_stack/providers/inline/post_training/torchtune/post_training.py +28 -33
  71. llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +2 -4
  72. llama_stack/providers/inline/safety/code_scanner/code_scanner.py +12 -15
  73. llama_stack/providers/inline/safety/llama_guard/llama_guard.py +15 -18
  74. llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +11 -17
  75. llama_stack/providers/inline/scoring/basic/scoring.py +13 -17
  76. llama_stack/providers/inline/scoring/braintrust/braintrust.py +15 -15
  77. llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +13 -17
  78. llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +1 -1
  79. llama_stack/providers/registry/agents.py +1 -0
  80. llama_stack/providers/registry/inference.py +1 -9
  81. llama_stack/providers/registry/vector_io.py +136 -16
  82. llama_stack/providers/remote/datasetio/nvidia/README.md +74 -0
  83. llama_stack/providers/remote/eval/nvidia/README.md +134 -0
  84. llama_stack/providers/remote/eval/nvidia/eval.py +22 -21
  85. llama_stack/providers/remote/files/s3/README.md +266 -0
  86. llama_stack/providers/remote/files/s3/config.py +5 -3
  87. llama_stack/providers/remote/files/s3/files.py +2 -2
  88. llama_stack/providers/remote/inference/gemini/gemini.py +4 -0
  89. llama_stack/providers/remote/inference/nvidia/NVIDIA.md +203 -0
  90. llama_stack/providers/remote/inference/openai/openai.py +2 -0
  91. llama_stack/providers/remote/inference/together/together.py +4 -0
  92. llama_stack/providers/remote/inference/vertexai/config.py +3 -3
  93. llama_stack/providers/remote/inference/vertexai/vertexai.py +5 -2
  94. llama_stack/providers/remote/inference/vllm/config.py +37 -18
  95. llama_stack/providers/remote/inference/vllm/vllm.py +0 -3
  96. llama_stack/providers/remote/inference/watsonx/watsonx.py +4 -0
  97. llama_stack/providers/remote/post_training/nvidia/README.md +151 -0
  98. llama_stack/providers/remote/post_training/nvidia/post_training.py +31 -33
  99. llama_stack/providers/remote/safety/bedrock/bedrock.py +10 -27
  100. llama_stack/providers/remote/safety/nvidia/README.md +78 -0
  101. llama_stack/providers/remote/safety/nvidia/nvidia.py +9 -25
  102. llama_stack/providers/remote/safety/sambanova/sambanova.py +13 -11
  103. llama_stack/providers/remote/vector_io/elasticsearch/__init__.py +17 -0
  104. llama_stack/providers/remote/vector_io/elasticsearch/config.py +32 -0
  105. llama_stack/providers/remote/vector_io/elasticsearch/elasticsearch.py +463 -0
  106. llama_stack/providers/remote/vector_io/oci/__init__.py +22 -0
  107. llama_stack/providers/remote/vector_io/oci/config.py +41 -0
  108. llama_stack/providers/remote/vector_io/oci/oci26ai.py +595 -0
  109. llama_stack/providers/remote/vector_io/pgvector/config.py +69 -2
  110. llama_stack/providers/remote/vector_io/pgvector/pgvector.py +255 -6
  111. llama_stack/providers/remote/vector_io/qdrant/qdrant.py +62 -38
  112. llama_stack/providers/utils/bedrock/client.py +3 -3
  113. llama_stack/providers/utils/bedrock/config.py +7 -7
  114. llama_stack/providers/utils/inference/embedding_mixin.py +4 -0
  115. llama_stack/providers/utils/inference/http_client.py +239 -0
  116. llama_stack/providers/utils/inference/litellm_openai_mixin.py +5 -0
  117. llama_stack/providers/utils/inference/model_registry.py +148 -2
  118. llama_stack/providers/utils/inference/openai_compat.py +2 -1
  119. llama_stack/providers/utils/inference/openai_mixin.py +41 -2
  120. llama_stack/providers/utils/memory/openai_vector_store_mixin.py +92 -5
  121. llama_stack/providers/utils/memory/vector_store.py +46 -19
  122. llama_stack/providers/utils/responses/responses_store.py +40 -6
  123. llama_stack/providers/utils/safety.py +114 -0
  124. llama_stack/providers/utils/tools/mcp.py +44 -3
  125. llama_stack/testing/api_recorder.py +9 -3
  126. {llama_stack-0.4.3.dist-info → llama_stack-0.5.0rc1.dist-info}/METADATA +14 -2
  127. {llama_stack-0.4.3.dist-info → llama_stack-0.5.0rc1.dist-info}/RECORD +131 -275
  128. llama_stack-0.5.0rc1.dist-info/top_level.txt +1 -0
  129. llama_stack/distributions/meta-reference-gpu/__init__.py +0 -7
  130. llama_stack/distributions/meta-reference-gpu/config.yaml +0 -140
  131. llama_stack/distributions/meta-reference-gpu/meta_reference.py +0 -163
  132. llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +0 -155
  133. llama_stack/models/llama/hadamard_utils.py +0 -88
  134. llama_stack/models/llama/llama3/args.py +0 -74
  135. llama_stack/models/llama/llama3/generation.py +0 -378
  136. llama_stack/models/llama/llama3/model.py +0 -304
  137. llama_stack/models/llama/llama3/multimodal/__init__.py +0 -12
  138. llama_stack/models/llama/llama3/multimodal/encoder_utils.py +0 -180
  139. llama_stack/models/llama/llama3/multimodal/image_transform.py +0 -409
  140. llama_stack/models/llama/llama3/multimodal/model.py +0 -1430
  141. llama_stack/models/llama/llama3/multimodal/utils.py +0 -26
  142. llama_stack/models/llama/llama3/quantization/__init__.py +0 -5
  143. llama_stack/models/llama/llama3/quantization/loader.py +0 -316
  144. llama_stack/models/llama/llama3_1/__init__.py +0 -12
  145. llama_stack/models/llama/llama3_1/prompt_format.md +0 -358
  146. llama_stack/models/llama/llama3_1/prompts.py +0 -258
  147. llama_stack/models/llama/llama3_2/__init__.py +0 -5
  148. llama_stack/models/llama/llama3_2/prompts_text.py +0 -229
  149. llama_stack/models/llama/llama3_2/prompts_vision.py +0 -126
  150. llama_stack/models/llama/llama3_2/text_prompt_format.md +0 -286
  151. llama_stack/models/llama/llama3_2/vision_prompt_format.md +0 -141
  152. llama_stack/models/llama/llama3_3/__init__.py +0 -5
  153. llama_stack/models/llama/llama3_3/prompts.py +0 -259
  154. llama_stack/models/llama/llama4/args.py +0 -107
  155. llama_stack/models/llama/llama4/ffn.py +0 -58
  156. llama_stack/models/llama/llama4/moe.py +0 -214
  157. llama_stack/models/llama/llama4/preprocess.py +0 -435
  158. llama_stack/models/llama/llama4/quantization/__init__.py +0 -5
  159. llama_stack/models/llama/llama4/quantization/loader.py +0 -226
  160. llama_stack/models/llama/llama4/vision/__init__.py +0 -5
  161. llama_stack/models/llama/llama4/vision/embedding.py +0 -210
  162. llama_stack/models/llama/llama4/vision/encoder.py +0 -412
  163. llama_stack/models/llama/quantize_impls.py +0 -316
  164. llama_stack/providers/inline/inference/meta_reference/__init__.py +0 -20
  165. llama_stack/providers/inline/inference/meta_reference/common.py +0 -24
  166. llama_stack/providers/inline/inference/meta_reference/config.py +0 -68
  167. llama_stack/providers/inline/inference/meta_reference/generators.py +0 -201
  168. llama_stack/providers/inline/inference/meta_reference/inference.py +0 -542
  169. llama_stack/providers/inline/inference/meta_reference/model_parallel.py +0 -77
  170. llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +0 -353
  171. llama_stack-0.4.3.dist-info/top_level.txt +0 -2
  172. llama_stack_api/__init__.py +0 -945
  173. llama_stack_api/admin/__init__.py +0 -45
  174. llama_stack_api/admin/api.py +0 -72
  175. llama_stack_api/admin/fastapi_routes.py +0 -117
  176. llama_stack_api/admin/models.py +0 -113
  177. llama_stack_api/agents.py +0 -173
  178. llama_stack_api/batches/__init__.py +0 -40
  179. llama_stack_api/batches/api.py +0 -53
  180. llama_stack_api/batches/fastapi_routes.py +0 -113
  181. llama_stack_api/batches/models.py +0 -78
  182. llama_stack_api/benchmarks/__init__.py +0 -43
  183. llama_stack_api/benchmarks/api.py +0 -39
  184. llama_stack_api/benchmarks/fastapi_routes.py +0 -109
  185. llama_stack_api/benchmarks/models.py +0 -109
  186. llama_stack_api/common/__init__.py +0 -5
  187. llama_stack_api/common/content_types.py +0 -101
  188. llama_stack_api/common/errors.py +0 -95
  189. llama_stack_api/common/job_types.py +0 -38
  190. llama_stack_api/common/responses.py +0 -77
  191. llama_stack_api/common/training_types.py +0 -47
  192. llama_stack_api/common/type_system.py +0 -146
  193. llama_stack_api/connectors.py +0 -146
  194. llama_stack_api/conversations.py +0 -270
  195. llama_stack_api/datasetio.py +0 -55
  196. llama_stack_api/datasets/__init__.py +0 -61
  197. llama_stack_api/datasets/api.py +0 -35
  198. llama_stack_api/datasets/fastapi_routes.py +0 -104
  199. llama_stack_api/datasets/models.py +0 -152
  200. llama_stack_api/datatypes.py +0 -373
  201. llama_stack_api/eval.py +0 -137
  202. llama_stack_api/file_processors/__init__.py +0 -27
  203. llama_stack_api/file_processors/api.py +0 -64
  204. llama_stack_api/file_processors/fastapi_routes.py +0 -78
  205. llama_stack_api/file_processors/models.py +0 -42
  206. llama_stack_api/files/__init__.py +0 -35
  207. llama_stack_api/files/api.py +0 -51
  208. llama_stack_api/files/fastapi_routes.py +0 -124
  209. llama_stack_api/files/models.py +0 -107
  210. llama_stack_api/inference.py +0 -1169
  211. llama_stack_api/inspect_api/__init__.py +0 -37
  212. llama_stack_api/inspect_api/api.py +0 -25
  213. llama_stack_api/inspect_api/fastapi_routes.py +0 -76
  214. llama_stack_api/inspect_api/models.py +0 -28
  215. llama_stack_api/internal/kvstore.py +0 -28
  216. llama_stack_api/internal/sqlstore.py +0 -81
  217. llama_stack_api/llama_stack_api/__init__.py +0 -945
  218. llama_stack_api/llama_stack_api/admin/__init__.py +0 -45
  219. llama_stack_api/llama_stack_api/admin/api.py +0 -72
  220. llama_stack_api/llama_stack_api/admin/fastapi_routes.py +0 -117
  221. llama_stack_api/llama_stack_api/admin/models.py +0 -113
  222. llama_stack_api/llama_stack_api/agents.py +0 -173
  223. llama_stack_api/llama_stack_api/batches/__init__.py +0 -40
  224. llama_stack_api/llama_stack_api/batches/api.py +0 -53
  225. llama_stack_api/llama_stack_api/batches/fastapi_routes.py +0 -113
  226. llama_stack_api/llama_stack_api/batches/models.py +0 -78
  227. llama_stack_api/llama_stack_api/benchmarks/__init__.py +0 -43
  228. llama_stack_api/llama_stack_api/benchmarks/api.py +0 -39
  229. llama_stack_api/llama_stack_api/benchmarks/fastapi_routes.py +0 -109
  230. llama_stack_api/llama_stack_api/benchmarks/models.py +0 -109
  231. llama_stack_api/llama_stack_api/common/__init__.py +0 -5
  232. llama_stack_api/llama_stack_api/common/content_types.py +0 -101
  233. llama_stack_api/llama_stack_api/common/errors.py +0 -95
  234. llama_stack_api/llama_stack_api/common/job_types.py +0 -38
  235. llama_stack_api/llama_stack_api/common/responses.py +0 -77
  236. llama_stack_api/llama_stack_api/common/training_types.py +0 -47
  237. llama_stack_api/llama_stack_api/common/type_system.py +0 -146
  238. llama_stack_api/llama_stack_api/connectors.py +0 -146
  239. llama_stack_api/llama_stack_api/conversations.py +0 -270
  240. llama_stack_api/llama_stack_api/datasetio.py +0 -55
  241. llama_stack_api/llama_stack_api/datasets/__init__.py +0 -61
  242. llama_stack_api/llama_stack_api/datasets/api.py +0 -35
  243. llama_stack_api/llama_stack_api/datasets/fastapi_routes.py +0 -104
  244. llama_stack_api/llama_stack_api/datasets/models.py +0 -152
  245. llama_stack_api/llama_stack_api/datatypes.py +0 -373
  246. llama_stack_api/llama_stack_api/eval.py +0 -137
  247. llama_stack_api/llama_stack_api/file_processors/__init__.py +0 -27
  248. llama_stack_api/llama_stack_api/file_processors/api.py +0 -64
  249. llama_stack_api/llama_stack_api/file_processors/fastapi_routes.py +0 -78
  250. llama_stack_api/llama_stack_api/file_processors/models.py +0 -42
  251. llama_stack_api/llama_stack_api/files/__init__.py +0 -35
  252. llama_stack_api/llama_stack_api/files/api.py +0 -51
  253. llama_stack_api/llama_stack_api/files/fastapi_routes.py +0 -124
  254. llama_stack_api/llama_stack_api/files/models.py +0 -107
  255. llama_stack_api/llama_stack_api/inference.py +0 -1169
  256. llama_stack_api/llama_stack_api/inspect_api/__init__.py +0 -37
  257. llama_stack_api/llama_stack_api/inspect_api/api.py +0 -25
  258. llama_stack_api/llama_stack_api/inspect_api/fastapi_routes.py +0 -76
  259. llama_stack_api/llama_stack_api/inspect_api/models.py +0 -28
  260. llama_stack_api/llama_stack_api/internal/__init__.py +0 -9
  261. llama_stack_api/llama_stack_api/internal/kvstore.py +0 -28
  262. llama_stack_api/llama_stack_api/internal/sqlstore.py +0 -81
  263. llama_stack_api/llama_stack_api/models.py +0 -171
  264. llama_stack_api/llama_stack_api/openai_responses.py +0 -1468
  265. llama_stack_api/llama_stack_api/post_training.py +0 -370
  266. llama_stack_api/llama_stack_api/prompts.py +0 -203
  267. llama_stack_api/llama_stack_api/providers/__init__.py +0 -33
  268. llama_stack_api/llama_stack_api/providers/api.py +0 -16
  269. llama_stack_api/llama_stack_api/providers/fastapi_routes.py +0 -57
  270. llama_stack_api/llama_stack_api/providers/models.py +0 -24
  271. llama_stack_api/llama_stack_api/py.typed +0 -0
  272. llama_stack_api/llama_stack_api/rag_tool.py +0 -168
  273. llama_stack_api/llama_stack_api/resource.py +0 -37
  274. llama_stack_api/llama_stack_api/router_utils.py +0 -160
  275. llama_stack_api/llama_stack_api/safety.py +0 -132
  276. llama_stack_api/llama_stack_api/schema_utils.py +0 -208
  277. llama_stack_api/llama_stack_api/scoring.py +0 -93
  278. llama_stack_api/llama_stack_api/scoring_functions.py +0 -211
  279. llama_stack_api/llama_stack_api/shields.py +0 -93
  280. llama_stack_api/llama_stack_api/tools.py +0 -226
  281. llama_stack_api/llama_stack_api/vector_io.py +0 -941
  282. llama_stack_api/llama_stack_api/vector_stores.py +0 -53
  283. llama_stack_api/llama_stack_api/version.py +0 -9
  284. llama_stack_api/models.py +0 -171
  285. llama_stack_api/openai_responses.py +0 -1468
  286. llama_stack_api/post_training.py +0 -370
  287. llama_stack_api/prompts.py +0 -203
  288. llama_stack_api/providers/__init__.py +0 -33
  289. llama_stack_api/providers/api.py +0 -16
  290. llama_stack_api/providers/fastapi_routes.py +0 -57
  291. llama_stack_api/providers/models.py +0 -24
  292. llama_stack_api/py.typed +0 -0
  293. llama_stack_api/rag_tool.py +0 -168
  294. llama_stack_api/resource.py +0 -37
  295. llama_stack_api/router_utils.py +0 -160
  296. llama_stack_api/safety.py +0 -132
  297. llama_stack_api/schema_utils.py +0 -208
  298. llama_stack_api/scoring.py +0 -93
  299. llama_stack_api/scoring_functions.py +0 -211
  300. llama_stack_api/shields.py +0 -93
  301. llama_stack_api/tools.py +0 -226
  302. llama_stack_api/vector_io.py +0 -941
  303. llama_stack_api/vector_stores.py +0 -53
  304. llama_stack_api/version.py +0 -9
  305. {llama_stack-0.4.3.dist-info → llama_stack-0.5.0rc1.dist-info}/WHEEL +0 -0
  306. {llama_stack-0.4.3.dist-info → llama_stack-0.5.0rc1.dist-info}/entry_points.txt +0 -0
  307. {llama_stack-0.4.3.dist-info → llama_stack-0.5.0rc1.dist-info}/licenses/LICENSE +0 -0
@@ -1,229 +0,0 @@
1
- # Copyright (c) Meta Platforms, Inc. and affiliates.
2
- # All rights reserved.
3
- #
4
- # This source code is licensed under the terms described in the LICENSE file in
5
- # the root directory of this source tree.
6
-
7
- import json
8
- import textwrap
9
-
10
- from llama_stack.models.llama.datatypes import (
11
- RawMessage,
12
- StopReason,
13
- ToolCall,
14
- ToolPromptFormat,
15
- )
16
-
17
- from ..prompt_format import (
18
- TextCompletionContent,
19
- UseCase,
20
- llama3_1_builtin_code_interpreter_dialog,
21
- )
22
-
23
-
24
- def user_tool_call():
25
- content = textwrap.dedent(
26
- """
27
- Questions: Can you retrieve the details for the user with the ID 7890, who has black as their special request?
28
- Here is a list of functions in JSON format that you can invoke:
29
- [
30
- {
31
- "name": "get_user_info",
32
- "description": "Retrieve details for a specific user by their unique identifier. Note that the provided function is in Python 3 syntax.",
33
- "parameters": {
34
- "type": "dict",
35
- "required": [
36
- "user_id"
37
- ],
38
- "properties": {
39
- "user_id": {
40
- "type": "integer",
41
- "description": "The unique identifier of the user. It is used to fetch the specific user details from the database."
42
- },
43
- "special": {
44
- "type": "string",
45
- "description": "Any special information or parameters that need to be considered while fetching user details.",
46
- "default": "none"
47
- }
48
- }
49
- }
50
- }
51
- ]
52
-
53
- Should you decide to return the function call(s),Put it in the format of [func1(params_name=params_value, params_name2=params_value2...), func2(params)]
54
-
55
- NO other text MUST be included.
56
- """
57
- )
58
- return content.strip()
59
-
60
-
61
- def system_tool_call():
62
- content = textwrap.dedent(
63
- """
64
- You are an expert in composing functions. You are given a question and a set of possible functions.
65
- Based on the question, you will need to make one or more function/tool calls to achieve the purpose.
66
- If none of the function can be used, point it out. If the given question lacks the parameters required by the function,
67
- also point it out. You should only return the function call in tools call sections.
68
-
69
- If you decide to invoke any of the function(s), you MUST put it in the format of [func_name1(params_name1=params_value1, params_name2=params_value2...), func_name2(params)]
70
- You SHOULD NOT include any other text in the response.
71
-
72
- Here is a list of functions in JSON format that you can invoke.
73
-
74
- [
75
- {
76
- "name": "get_weather",
77
- "description": "Get weather info for places",
78
- "parameters": {
79
- "type": "dict",
80
- "required": [
81
- "city"
82
- ],
83
- "properties": {
84
- "city": {
85
- "type": "string",
86
- "description": "The name of the city to get the weather for"
87
- },
88
- "metric": {
89
- "type": "string",
90
- "description": "The metric for weather. Options are: celsius, fahrenheit",
91
- "default": "celsius"
92
- }
93
- }
94
- }
95
- }
96
- ]
97
- """
98
- )
99
- return content.strip()
100
-
101
-
102
- def usecases():
103
- return [
104
- UseCase(
105
- title="User and assistant conversation",
106
- description="Here is a regular multi-turn user assistant conversation and how its formatted.",
107
- dialogs=[
108
- [
109
- RawMessage(role="system", content="You are a helpful assistant"),
110
- RawMessage(role="user", content="Who are you?"),
111
- ]
112
- ],
113
- notes="This format is unchanged from Llama3.1",
114
- ),
115
- UseCase(
116
- title="Zero shot function calling",
117
- description=textwrap.dedent(
118
- """
119
- For Llama3.2 1B and 3B instruct models, we are introducing a new format for zero shot function calling.
120
- This new format is designed to be more flexible and powerful than the previous format.
121
- All available functions can be provided in the system message. A key difference is in the format of how the assistant responds with function calls.
122
- It is pythonic in the form of `[func1(params_name=params_value, params_name2=params_value2...), func2(params)]` instead of the `json` or `<function>` tag that were defined in Llama3.1.
123
- Here is an example for the same,
124
- """
125
- ),
126
- dialogs=[
127
- # Zero shot tool calls as system message
128
- [
129
- RawMessage(role="system", content=system_tool_call()),
130
- RawMessage(role="user", content="What is the weather in SF and Seattle?"),
131
- ],
132
- ],
133
- notes=textwrap.dedent(
134
- """
135
- - The output supports multiple tool calls natively
136
- - JSON format for defining the functions in the system prompt is similar to Llama3.1
137
- """
138
- ),
139
- ),
140
- UseCase(
141
- title="Zero shot function calling with user message",
142
- description=textwrap.dedent(
143
- """
144
- While the default is to provide all function calls in a system message, in Llama3.2 text models you can also provide information for all the available tools in a user message.
145
- """
146
- ),
147
- dialogs=[
148
- # Zero shot tool call as user message
149
- [
150
- RawMessage(role="user", content=user_tool_call()),
151
- ],
152
- ],
153
- notes=textwrap.dedent(
154
- """
155
- - The tool call format for the model is the same whether your function calls are provided in the system or user message.
156
- - While builtin tool calls end with a <|eom_id|>, notice the <|eot_id|> for zero shot tool calls.
157
- """
158
- ),
159
- ),
160
- UseCase(
161
- title="Code Interpreter",
162
- description=textwrap.dedent(
163
- """
164
- Code Interpreter continues to work in 3.2 text models similar to Llama 3.1 model family.
165
- Here is an example,
166
- """
167
- ),
168
- dialogs=[llama3_1_builtin_code_interpreter_dialog()],
169
- notes=textwrap.dedent(
170
- """
171
- - Note `Environment: ipython` in the system prompt.
172
- - Note that the response starts with `<|python_tag|>` and ends with `<|eom_id|>`
173
- """
174
- ),
175
- ),
176
- UseCase(
177
- title="Zero shot function calling E2E format",
178
- description=textwrap.dedent(
179
- """
180
- Here is an example of the e2e cycle of tool calls with the model in a muti-step way.
181
- """
182
- ),
183
- dialogs=[
184
- [
185
- RawMessage(role="system", content=system_tool_call()),
186
- RawMessage(role="user", content="What is the weather in SF?"),
187
- RawMessage(
188
- role="assistant",
189
- content="",
190
- stop_reason=StopReason.end_of_turn,
191
- tool_calls=[
192
- ToolCall(
193
- call_id="cc",
194
- tool_name="get_weather",
195
- arguments={
196
- "city": "San Francisco",
197
- "metric": "celsius",
198
- },
199
- )
200
- ],
201
- ),
202
- RawMessage(
203
- role="tool",
204
- content=json.dumps("25 C"),
205
- ),
206
- ],
207
- ],
208
- notes=textwrap.dedent(
209
- """
210
- - The output of the function call is provided back to the model as a tool response ( in json format ).
211
- - Notice `<|start_header_id|>ipython<|end_header_id|>` as the header message preceding the tool response.
212
- - The model finally summarizes the information from the tool response and returns the result to the user.
213
- """
214
- ),
215
- tool_prompt_format=ToolPromptFormat.python_list,
216
- ),
217
- UseCase(
218
- title="Prompt format for base models",
219
- description=textwrap.dedent(
220
- """
221
- For base models (Llama3.2-1B and Llama3.2-3B), the prompt format for a simple completion is as follows
222
- """
223
- ),
224
- dialogs=[
225
- TextCompletionContent(content="The color of the sky is blue but sometimes it can also be"),
226
- ],
227
- notes="Same as Llama3.1",
228
- ),
229
- ]
@@ -1,126 +0,0 @@
1
- # Copyright (c) Meta Platforms, Inc. and affiliates.
2
- # All rights reserved.
3
- #
4
- # This source code is licensed under the terms described in the LICENSE file in
5
- # the root directory of this source tree.
6
-
7
- import textwrap
8
- from pathlib import Path
9
-
10
- from llama_stack.models.llama.datatypes import (
11
- RawMediaItem,
12
- RawMessage,
13
- RawTextItem,
14
- )
15
-
16
- from ..prompt_format import (
17
- TextCompletionContent,
18
- UseCase,
19
- llama3_1_builtin_tool_call_dialog,
20
- # llama3_1_builtin_tool_call_with_image_dialog,
21
- llama3_2_user_assistant_conversation,
22
- )
23
-
24
-
25
- def usecases():
26
- this_dir = Path(__file__).parent.parent.resolve()
27
- with open(this_dir / "scripts/resources/dog.jpg", "rb") as f:
28
- img = f.read()
29
-
30
- return [
31
- llama3_2_user_assistant_conversation(),
32
- UseCase(
33
- title="User and assistant conversation with Images",
34
- description="This example shows how to pass and image to the model as part of the messages.",
35
- dialogs=[
36
- [
37
- RawMessage(
38
- role="user",
39
- content=[
40
- RawMediaItem(data=img),
41
- RawTextItem(text="Describe this image in two sentences"),
42
- ],
43
- )
44
- ],
45
- ],
46
- notes=textwrap.dedent(
47
- """
48
- - The `<|image|>` tag is used to indicate presence of the image
49
- - The model isn't an early fusion model so doesn't actually translate an image into several tokens. Instead the cross-attention layers take input "on the side" from a vision encoder
50
- ![Image](mm-model.png)
51
- - Its important to postion the <|image|> tag appropriately in the prompt. Image will only attend to the subsequent text tokens
52
- - The <|image|> tag is part of the user message body, implying that it should only come after the header `<|start_header_id|>{role}<|end_header_id|>` in the message body
53
- - We recommend using a single image in one prompt
54
- """
55
- ),
56
- ),
57
- UseCase(
58
- title="Builtin and Zero Shot Tool Calling",
59
- description=textwrap.dedent(
60
- """
61
- Llama3.2 vision models follow the same tool calling format as Llama3.1 models when inputs are text only.
62
- Use `Environment: ipython` to enable tools.
63
- Add `Tools: {{tool_name1}},{{tool_name2}}` for each of the builtin tools.
64
- The same builtin tools as Llama3.1 are available,
65
- - code_interpreter (for executing python code)
66
- - brave_search (to search the web)
67
- - wolfram_alpha (for querying wolfram alpha for mathematical questions)
68
- """,
69
- ),
70
- dialogs=[llama3_1_builtin_tool_call_dialog()],
71
- notes=textwrap.dedent(
72
- """
73
- - Note the `<|python_tag|>` before `brave_search` function call.
74
- - The `<|eom_id|>` tag is used to indicate the end of the message.
75
- - Similar to Llama3.1, code_interpreter is not explicitly mentioned but is enabled via `Environment: ipython`.
76
- - Tool Calling does NOT work with images in the prompt as of now.
77
- """
78
- ),
79
- ),
80
- # UseCase(
81
- # title="Tool Calling for vision models",
82
- # description=textwrap.dedent(
83
- # """
84
- # While Llama3.2 vision models follow the same tool calling format as Llama3.1 models when inputs are text only,
85
- # they are not able to do tool calling when prompt contains image inputs (along with text).
86
- # The recommended way would be to separate out the image understanding from the tool calling in successive prompts.
87
- # Here is an example of how that could be done,
88
- # """,
89
- # ),
90
- # dialogs=[llama3_1_builtin_tool_call_with_image_dialog()],
91
- # notes=textwrap.dedent(
92
- # """
93
- # - Instead of a single prompt (image understanding + tool call), we split into two prompts to achieve the same result.
94
- # """
95
- # ),
96
- # ),
97
- UseCase(
98
- title="Prompt format for base models",
99
- description=textwrap.dedent(
100
- """
101
- For base models (Llama3.2-11B-Vision and Llama3.2-90B-Vision), the prompt format for a simple completion is as follows
102
- """
103
- ),
104
- dialogs=[
105
- TextCompletionContent(content="The color of the sky is blue but sometimes it can also be"),
106
- ],
107
- notes="- Same as Llama3.1",
108
- ),
109
- UseCase(
110
- title="Prompt format for base models with Image",
111
- description=textwrap.dedent(
112
- """
113
- For base models (Llama3.2-11B-Vision and Llama3.2-90B-Vision), here is an example of how the text completion format looks with an image,
114
- """
115
- ),
116
- dialogs=[
117
- TextCompletionContent(
118
- content=[
119
- RawMediaItem(data=img),
120
- RawTextItem(text="If I had to write a haiku for this one"),
121
- ]
122
- ),
123
- ],
124
- notes="- Note the placement of the special tags <|begin_of_text|> and <|image|>",
125
- ),
126
- ]
@@ -1,286 +0,0 @@
1
- ## User and assistant conversation
2
-
3
- Here is a regular multi-turn user assistant conversation and how its formatted.
4
-
5
- ##### Input Prompt Format
6
- ```
7
- <|begin_of_text|><|start_header_id|>system<|end_header_id|>
8
-
9
- You are a helpful assistant<|eot_id|><|start_header_id|>user<|end_header_id|>
10
-
11
- Who are you?<|eot_id|><|start_header_id|>assistant<|end_header_id|>
12
-
13
-
14
- ```
15
-
16
- ##### Model Response Format
17
- ```
18
- I'm an AI assistant, which means I'm a computer program designed to simulate conversation and answer questions to the best of my ability. I'm here to help you with any questions or tasks you may have, and I'll do my best to provide you with accurate and helpful information.
19
-
20
- I don't have a personal name, but I'm often referred to as a "virtual assistant" or a "chatbot." I'm a machine learning model, which means I've been trained on a large dataset of text and can generate responses based on patterns and context.
21
-
22
- I can help with a wide range of topics, from general knowledge and trivia to more specialized subjects like science, history, and technology. I can also assist with tasks like language translation, text summarization, and even generating creative content like stories or poetry.
23
-
24
- So, what can I help you with today?<|eot_id|>
25
- ```
26
-
27
-
28
- ##### Notes
29
- This format is unchanged from Llama3.1
30
-
31
- ## Zero shot function calling
32
-
33
-
34
- For Llama3.2 1B and 3B instruct models, we are introducing a new format for zero shot function calling.
35
- This new format is designed to be more flexible and powerful than the previous format.
36
- All available functions can be provided in the system message. A key difference is in the format of how the assistant responds with function calls.
37
- It is pythonic in the form of `[func1(params_name=params_value, params_name2=params_value2...), func2(params)]` instead of the `json` or `<function>` tag that were defined in Llama3.1.
38
- Here is an example for the same,
39
-
40
-
41
- ##### Input Prompt Format
42
- ```
43
- <|begin_of_text|><|start_header_id|>system<|end_header_id|>
44
-
45
- You are an expert in composing functions. You are given a question and a set of possible functions.
46
- Based on the question, you will need to make one or more function/tool calls to achieve the purpose.
47
- If none of the function can be used, point it out. If the given question lacks the parameters required by the function,
48
- also point it out. You should only return the function call in tools call sections.
49
-
50
- If you decide to invoke any of the function(s), you MUST put it in the format of [func_name1(params_name1=params_value1, params_name2=params_value2...), func_name2(params)]
51
- You SHOULD NOT include any other text in the response.
52
-
53
- Here is a list of functions in JSON format that you can invoke.
54
-
55
- [
56
- {
57
- "name": "get_weather",
58
- "description": "Get weather info for places",
59
- "parameters": {
60
- "type": "dict",
61
- "required": [
62
- "city"
63
- ],
64
- "properties": {
65
- "city": {
66
- "type": "string",
67
- "description": "The name of the city to get the weather for"
68
- },
69
- "metric": {
70
- "type": "string",
71
- "description": "The metric for weather. Options are: celsius, fahrenheit",
72
- "default": "celsius"
73
- }
74
- }
75
- }
76
- }
77
- ]<|eot_id|><|start_header_id|>user<|end_header_id|>
78
-
79
- What is the weather in SF and Seattle?<|eot_id|><|start_header_id|>assistant<|end_header_id|>
80
-
81
-
82
- ```
83
-
84
- ##### Model Response Format
85
- ```
86
- [get_weather(city='San Francisco', metric='celsius'), get_weather(city='Seattle', metric='celsius')]<|eot_id|>
87
- ```
88
-
89
-
90
- ##### Notes
91
-
92
- - The output supports multiple tool calls natively
93
- - JSON format for defining the functions in the system prompt is similar to Llama3.1
94
-
95
-
96
- ## Zero shot function calling with user message
97
-
98
-
99
- While the default is to provide all function calls in a system message, in Llama3.2 text models you can also provide information for all the available tools in a user message.
100
-
101
-
102
- ##### Input Prompt Format
103
- ```
104
- <|begin_of_text|><|start_header_id|>user<|end_header_id|>
105
-
106
- Questions: Can you retrieve the details for the user with the ID 7890, who has black as their special request?
107
- Here is a list of functions in JSON format that you can invoke:
108
- [
109
- {
110
- "name": "get_user_info",
111
- "description": "Retrieve details for a specific user by their unique identifier. Note that the provided function is in Python 3 syntax.",
112
- "parameters": {
113
- "type": "dict",
114
- "required": [
115
- "user_id"
116
- ],
117
- "properties": {
118
- "user_id": {
119
- "type": "integer",
120
- "description": "The unique identifier of the user. It is used to fetch the specific user details from the database."
121
- },
122
- "special": {
123
- "type": "string",
124
- "description": "Any special information or parameters that need to be considered while fetching user details.",
125
- "default": "none"
126
- }
127
- }
128
- }
129
- }
130
- ]
131
-
132
- Should you decide to return the function call(s),Put it in the format of [func1(params_name=params_value, params_name2=params_value2...), func2(params)]
133
-
134
- NO other text MUST be included.<|eot_id|><|start_header_id|>assistant<|end_header_id|>
135
-
136
-
137
- ```
138
-
139
- ##### Model Response Format
140
- ```
141
- [get_user_info(user_id=7890, special='black')]<|eot_id|>
142
- ```
143
-
144
-
145
- ##### Notes
146
-
147
- - The tool call format for the model is the same whether your function calls are provided in the system or user message.
148
- - While builtin tool calls end with a <|eom_id|>, notice the <|eot_id|> for zero shot tool calls.
149
-
150
-
151
- ## Code Interpreter
152
-
153
-
154
- Code Interpreter continues to work in 3.2 text models similar to Llama 3.1 model family.
155
- Here is an example,
156
-
157
-
158
- ##### Input Prompt Format
159
- ```
160
- <|begin_of_text|><|start_header_id|>system<|end_header_id|>
161
-
162
- Environment: ipython
163
-
164
- Cutting Knowledge Date: December 2023
165
- Today Date: 24 September 2024
166
- <|eot_id|><|start_header_id|>user<|end_header_id|>
167
-
168
- Write code to check if number is prime. Use it to verify if number 7 is prime<|eot_id|><|start_header_id|>assistant<|end_header_id|>
169
-
170
-
171
- ```
172
-
173
- ##### Model Response Format
174
- ```
175
- <|python_tag|>def is_prime(n):
176
- if n <= 1:
177
- return False
178
- if n == 2:
179
- return True
180
- if n % 2 == 0:
181
- return False
182
- max_divisor = int(n**0.5) + 1
183
- for d in range(3, max_divisor, 2):
184
- if n % d == 0:
185
- return False
186
- return True
187
-
188
- print(is_prime(7)) # Output: True<|eom_id|>
189
- ```
190
-
191
-
192
- ##### Notes
193
-
194
- - Note `Environment: ipython` in the system prompt.
195
- - Note that the response starts with `<|python_tag|>` and ends with `<|eom_id|>`
196
-
197
-
198
- ## Zero shot function calling E2E format
199
-
200
-
201
- Here is an example of the e2e cycle of tool calls with the model in a muti-step way.
202
-
203
-
204
- ##### Input Prompt Format
205
- ```
206
- <|begin_of_text|><|start_header_id|>system<|end_header_id|>
207
-
208
- You are an expert in composing functions. You are given a question and a set of possible functions.
209
- Based on the question, you will need to make one or more function/tool calls to achieve the purpose.
210
- If none of the function can be used, point it out. If the given question lacks the parameters required by the function,
211
- also point it out. You should only return the function call in tools call sections.
212
-
213
- If you decide to invoke any of the function(s), you MUST put it in the format of [func_name1(params_name1=params_value1, params_name2=params_value2...), func_name2(params)]
214
- You SHOULD NOT include any other text in the response.
215
-
216
- Here is a list of functions in JSON format that you can invoke.
217
-
218
- [
219
- {
220
- "name": "get_weather",
221
- "description": "Get weather info for places",
222
- "parameters": {
223
- "type": "dict",
224
- "required": [
225
- "city"
226
- ],
227
- "properties": {
228
- "city": {
229
- "type": "string",
230
- "description": "The name of the city to get the weather for"
231
- },
232
- "metric": {
233
- "type": "string",
234
- "description": "The metric for weather. Options are: celsius, fahrenheit",
235
- "default": "celsius"
236
- }
237
- }
238
- }
239
- }
240
- ]<|eot_id|><|start_header_id|>user<|end_header_id|>
241
-
242
- What is the weather in SF?<|eot_id|><|start_header_id|>assistant<|end_header_id|>
243
-
244
- <|python_tag|>[get_weather(city="San Francisco", metric="celsius")]<|eot_id|><|start_header_id|>ipython<|end_header_id|>
245
-
246
- "25 C"<|eot_id|><|start_header_id|>assistant<|end_header_id|>
247
-
248
-
249
- ```
250
-
251
- ##### Model Response Format
252
- ```
253
- The weather in San Francisco is 25 C.<|eot_id|>
254
- ```
255
-
256
-
257
- ##### Notes
258
-
259
- - The output of the function call is provided back to the model as a tool response ( in json format ).
260
- - Notice `<|start_header_id|>ipython<|end_header_id|>` as the header message preceding the tool response.
261
- - The model finally summarizes the information from the tool response and returns the result to the user.
262
-
263
-
264
- ## Prompt format for base models
265
-
266
-
267
- For base models (Llama3.2-1B and Llama3.2-3B), the prompt format for a simple completion is as follows
268
-
269
-
270
- ##### Input Prompt Format
271
- ```
272
- <|begin_of_text|>The color of the sky is blue but sometimes it can also be
273
- ```
274
-
275
- ##### Model Response Format
276
- ```
277
- gray or even purple. The color of the sky can change depending on the time of day, the weather, and the amount of pollution in the air. The color of the sky can also be affected by the presence of dust, smoke, and other particles in the air.
278
-
279
- ## Step 1: Identify the factors that
280
- ```
281
-
282
-
283
- ##### Notes
284
- Same as Llama3.1
285
-
286
- Thank You!