llama-stack 0.3.4__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (458) hide show
  1. llama_stack/__init__.py +0 -5
  2. llama_stack/cli/llama.py +3 -3
  3. llama_stack/cli/stack/_list_deps.py +12 -23
  4. llama_stack/cli/stack/list_stacks.py +37 -18
  5. llama_stack/cli/stack/run.py +121 -11
  6. llama_stack/cli/stack/utils.py +0 -127
  7. llama_stack/core/access_control/access_control.py +69 -28
  8. llama_stack/core/access_control/conditions.py +15 -5
  9. llama_stack/core/admin.py +267 -0
  10. llama_stack/core/build.py +6 -74
  11. llama_stack/core/client.py +1 -1
  12. llama_stack/core/configure.py +6 -6
  13. llama_stack/core/conversations/conversations.py +28 -25
  14. llama_stack/core/datatypes.py +271 -79
  15. llama_stack/core/distribution.py +15 -16
  16. llama_stack/core/external.py +3 -3
  17. llama_stack/core/inspect.py +98 -15
  18. llama_stack/core/library_client.py +73 -61
  19. llama_stack/core/prompts/prompts.py +12 -11
  20. llama_stack/core/providers.py +17 -11
  21. llama_stack/core/resolver.py +65 -56
  22. llama_stack/core/routers/__init__.py +8 -12
  23. llama_stack/core/routers/datasets.py +1 -4
  24. llama_stack/core/routers/eval_scoring.py +7 -4
  25. llama_stack/core/routers/inference.py +55 -271
  26. llama_stack/core/routers/safety.py +52 -24
  27. llama_stack/core/routers/tool_runtime.py +6 -48
  28. llama_stack/core/routers/vector_io.py +130 -51
  29. llama_stack/core/routing_tables/benchmarks.py +24 -20
  30. llama_stack/core/routing_tables/common.py +1 -4
  31. llama_stack/core/routing_tables/datasets.py +22 -22
  32. llama_stack/core/routing_tables/models.py +119 -6
  33. llama_stack/core/routing_tables/scoring_functions.py +7 -7
  34. llama_stack/core/routing_tables/shields.py +1 -2
  35. llama_stack/core/routing_tables/toolgroups.py +17 -7
  36. llama_stack/core/routing_tables/vector_stores.py +51 -16
  37. llama_stack/core/server/auth.py +5 -3
  38. llama_stack/core/server/auth_providers.py +36 -20
  39. llama_stack/core/server/fastapi_router_registry.py +84 -0
  40. llama_stack/core/server/quota.py +2 -2
  41. llama_stack/core/server/routes.py +79 -27
  42. llama_stack/core/server/server.py +102 -87
  43. llama_stack/core/stack.py +201 -58
  44. llama_stack/core/storage/datatypes.py +26 -3
  45. llama_stack/{providers/utils → core/storage}/kvstore/__init__.py +2 -0
  46. llama_stack/{providers/utils → core/storage}/kvstore/kvstore.py +55 -24
  47. llama_stack/{providers/utils → core/storage}/kvstore/mongodb/mongodb.py +13 -10
  48. llama_stack/{providers/utils → core/storage}/kvstore/postgres/postgres.py +28 -17
  49. llama_stack/{providers/utils → core/storage}/kvstore/redis/redis.py +41 -16
  50. llama_stack/{providers/utils → core/storage}/kvstore/sqlite/sqlite.py +1 -1
  51. llama_stack/core/storage/sqlstore/__init__.py +17 -0
  52. llama_stack/{providers/utils → core/storage}/sqlstore/authorized_sqlstore.py +69 -49
  53. llama_stack/{providers/utils → core/storage}/sqlstore/sqlalchemy_sqlstore.py +47 -17
  54. llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py +25 -8
  55. llama_stack/core/store/registry.py +1 -1
  56. llama_stack/core/utils/config.py +8 -2
  57. llama_stack/core/utils/config_resolution.py +32 -29
  58. llama_stack/core/utils/context.py +4 -10
  59. llama_stack/core/utils/exec.py +9 -0
  60. llama_stack/core/utils/type_inspection.py +45 -0
  61. llama_stack/distributions/dell/{run.yaml → config.yaml} +3 -2
  62. llama_stack/distributions/dell/dell.py +2 -2
  63. llama_stack/distributions/dell/run-with-safety.yaml +3 -2
  64. llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} +3 -2
  65. llama_stack/distributions/meta-reference-gpu/meta_reference.py +2 -2
  66. llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +3 -2
  67. llama_stack/distributions/nvidia/{run.yaml → config.yaml} +4 -4
  68. llama_stack/distributions/nvidia/nvidia.py +1 -1
  69. llama_stack/distributions/nvidia/run-with-safety.yaml +4 -4
  70. llama_stack/{apis/datasetio → distributions/oci}/__init__.py +1 -1
  71. llama_stack/distributions/oci/config.yaml +134 -0
  72. llama_stack/distributions/oci/oci.py +108 -0
  73. llama_stack/distributions/open-benchmark/{run.yaml → config.yaml} +5 -4
  74. llama_stack/distributions/open-benchmark/open_benchmark.py +2 -3
  75. llama_stack/distributions/postgres-demo/{run.yaml → config.yaml} +4 -3
  76. llama_stack/distributions/starter/{run.yaml → config.yaml} +64 -13
  77. llama_stack/distributions/starter/run-with-postgres-store.yaml +64 -13
  78. llama_stack/distributions/starter/starter.py +8 -5
  79. llama_stack/distributions/starter-gpu/{run.yaml → config.yaml} +64 -13
  80. llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +64 -13
  81. llama_stack/distributions/template.py +13 -69
  82. llama_stack/distributions/watsonx/{run.yaml → config.yaml} +4 -3
  83. llama_stack/distributions/watsonx/watsonx.py +1 -1
  84. llama_stack/log.py +28 -11
  85. llama_stack/models/llama/checkpoint.py +6 -6
  86. llama_stack/models/llama/hadamard_utils.py +2 -0
  87. llama_stack/models/llama/llama3/generation.py +3 -1
  88. llama_stack/models/llama/llama3/interface.py +2 -5
  89. llama_stack/models/llama/llama3/multimodal/encoder_utils.py +3 -3
  90. llama_stack/models/llama/llama3/multimodal/image_transform.py +6 -6
  91. llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +1 -1
  92. llama_stack/models/llama/llama3/tool_utils.py +2 -1
  93. llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +1 -1
  94. llama_stack/providers/inline/agents/meta_reference/__init__.py +3 -3
  95. llama_stack/providers/inline/agents/meta_reference/agents.py +44 -261
  96. llama_stack/providers/inline/agents/meta_reference/config.py +6 -1
  97. llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +207 -57
  98. llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +308 -47
  99. llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +162 -96
  100. llama_stack/providers/inline/agents/meta_reference/responses/types.py +23 -8
  101. llama_stack/providers/inline/agents/meta_reference/responses/utils.py +201 -33
  102. llama_stack/providers/inline/agents/meta_reference/safety.py +8 -13
  103. llama_stack/providers/inline/batches/reference/__init__.py +2 -4
  104. llama_stack/providers/inline/batches/reference/batches.py +78 -60
  105. llama_stack/providers/inline/datasetio/localfs/datasetio.py +2 -5
  106. llama_stack/providers/inline/eval/meta_reference/eval.py +16 -61
  107. llama_stack/providers/inline/files/localfs/files.py +37 -28
  108. llama_stack/providers/inline/inference/meta_reference/config.py +2 -2
  109. llama_stack/providers/inline/inference/meta_reference/generators.py +50 -60
  110. llama_stack/providers/inline/inference/meta_reference/inference.py +403 -19
  111. llama_stack/providers/inline/inference/meta_reference/model_parallel.py +7 -26
  112. llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +2 -12
  113. llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +10 -15
  114. llama_stack/providers/inline/post_training/common/validator.py +1 -5
  115. llama_stack/providers/inline/post_training/huggingface/post_training.py +8 -8
  116. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +18 -10
  117. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +12 -9
  118. llama_stack/providers/inline/post_training/huggingface/utils.py +27 -6
  119. llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +1 -1
  120. llama_stack/providers/inline/post_training/torchtune/common/utils.py +1 -1
  121. llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +1 -1
  122. llama_stack/providers/inline/post_training/torchtune/post_training.py +8 -8
  123. llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +16 -16
  124. llama_stack/providers/inline/safety/code_scanner/code_scanner.py +13 -9
  125. llama_stack/providers/inline/safety/llama_guard/llama_guard.py +18 -15
  126. llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +9 -9
  127. llama_stack/providers/inline/scoring/basic/scoring.py +6 -13
  128. llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +1 -2
  129. llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +1 -2
  130. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +2 -2
  131. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +2 -2
  132. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +2 -2
  133. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +2 -2
  134. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +2 -2
  135. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +2 -2
  136. llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +1 -2
  137. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +1 -2
  138. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +1 -2
  139. llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +1 -2
  140. llama_stack/providers/inline/scoring/braintrust/braintrust.py +12 -15
  141. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +2 -2
  142. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +2 -2
  143. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +2 -2
  144. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +2 -2
  145. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +2 -2
  146. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +2 -2
  147. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +2 -2
  148. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +2 -2
  149. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +2 -2
  150. llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +7 -14
  151. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +2 -2
  152. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +1 -2
  153. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +1 -3
  154. llama_stack/providers/inline/tool_runtime/rag/__init__.py +1 -1
  155. llama_stack/providers/inline/tool_runtime/rag/config.py +8 -1
  156. llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +7 -6
  157. llama_stack/providers/inline/tool_runtime/rag/memory.py +64 -48
  158. llama_stack/providers/inline/vector_io/chroma/__init__.py +1 -1
  159. llama_stack/providers/inline/vector_io/chroma/config.py +1 -1
  160. llama_stack/providers/inline/vector_io/faiss/__init__.py +1 -1
  161. llama_stack/providers/inline/vector_io/faiss/config.py +1 -1
  162. llama_stack/providers/inline/vector_io/faiss/faiss.py +43 -28
  163. llama_stack/providers/inline/vector_io/milvus/__init__.py +1 -1
  164. llama_stack/providers/inline/vector_io/milvus/config.py +1 -1
  165. llama_stack/providers/inline/vector_io/qdrant/__init__.py +1 -1
  166. llama_stack/providers/inline/vector_io/qdrant/config.py +1 -1
  167. llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +1 -1
  168. llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +40 -33
  169. llama_stack/providers/registry/agents.py +7 -3
  170. llama_stack/providers/registry/batches.py +1 -1
  171. llama_stack/providers/registry/datasetio.py +1 -1
  172. llama_stack/providers/registry/eval.py +1 -1
  173. llama_stack/{apis/datasets/__init__.py → providers/registry/file_processors.py} +5 -1
  174. llama_stack/providers/registry/files.py +11 -2
  175. llama_stack/providers/registry/inference.py +22 -3
  176. llama_stack/providers/registry/post_training.py +1 -1
  177. llama_stack/providers/registry/safety.py +1 -1
  178. llama_stack/providers/registry/scoring.py +1 -1
  179. llama_stack/providers/registry/tool_runtime.py +2 -2
  180. llama_stack/providers/registry/vector_io.py +7 -7
  181. llama_stack/providers/remote/datasetio/huggingface/huggingface.py +2 -5
  182. llama_stack/providers/remote/datasetio/nvidia/datasetio.py +1 -4
  183. llama_stack/providers/remote/eval/nvidia/eval.py +15 -9
  184. llama_stack/providers/remote/files/openai/__init__.py +19 -0
  185. llama_stack/providers/remote/files/openai/config.py +28 -0
  186. llama_stack/providers/remote/files/openai/files.py +253 -0
  187. llama_stack/providers/remote/files/s3/files.py +52 -30
  188. llama_stack/providers/remote/inference/anthropic/anthropic.py +2 -1
  189. llama_stack/providers/remote/inference/anthropic/config.py +1 -1
  190. llama_stack/providers/remote/inference/azure/azure.py +1 -3
  191. llama_stack/providers/remote/inference/azure/config.py +8 -7
  192. llama_stack/providers/remote/inference/bedrock/__init__.py +1 -1
  193. llama_stack/providers/remote/inference/bedrock/bedrock.py +82 -105
  194. llama_stack/providers/remote/inference/bedrock/config.py +24 -3
  195. llama_stack/providers/remote/inference/cerebras/cerebras.py +5 -5
  196. llama_stack/providers/remote/inference/cerebras/config.py +12 -5
  197. llama_stack/providers/remote/inference/databricks/config.py +13 -6
  198. llama_stack/providers/remote/inference/databricks/databricks.py +16 -6
  199. llama_stack/providers/remote/inference/fireworks/config.py +5 -5
  200. llama_stack/providers/remote/inference/fireworks/fireworks.py +1 -1
  201. llama_stack/providers/remote/inference/gemini/config.py +1 -1
  202. llama_stack/providers/remote/inference/gemini/gemini.py +13 -14
  203. llama_stack/providers/remote/inference/groq/config.py +5 -5
  204. llama_stack/providers/remote/inference/groq/groq.py +1 -1
  205. llama_stack/providers/remote/inference/llama_openai_compat/config.py +5 -5
  206. llama_stack/providers/remote/inference/llama_openai_compat/llama.py +8 -6
  207. llama_stack/providers/remote/inference/nvidia/__init__.py +1 -1
  208. llama_stack/providers/remote/inference/nvidia/config.py +21 -11
  209. llama_stack/providers/remote/inference/nvidia/nvidia.py +115 -3
  210. llama_stack/providers/remote/inference/nvidia/utils.py +1 -1
  211. llama_stack/providers/remote/inference/oci/__init__.py +17 -0
  212. llama_stack/providers/remote/inference/oci/auth.py +79 -0
  213. llama_stack/providers/remote/inference/oci/config.py +75 -0
  214. llama_stack/providers/remote/inference/oci/oci.py +162 -0
  215. llama_stack/providers/remote/inference/ollama/config.py +7 -5
  216. llama_stack/providers/remote/inference/ollama/ollama.py +17 -8
  217. llama_stack/providers/remote/inference/openai/config.py +4 -4
  218. llama_stack/providers/remote/inference/openai/openai.py +1 -1
  219. llama_stack/providers/remote/inference/passthrough/__init__.py +2 -2
  220. llama_stack/providers/remote/inference/passthrough/config.py +5 -10
  221. llama_stack/providers/remote/inference/passthrough/passthrough.py +97 -75
  222. llama_stack/providers/remote/inference/runpod/config.py +12 -5
  223. llama_stack/providers/remote/inference/runpod/runpod.py +2 -20
  224. llama_stack/providers/remote/inference/sambanova/config.py +5 -5
  225. llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -1
  226. llama_stack/providers/remote/inference/tgi/config.py +7 -6
  227. llama_stack/providers/remote/inference/tgi/tgi.py +19 -11
  228. llama_stack/providers/remote/inference/together/config.py +5 -5
  229. llama_stack/providers/remote/inference/together/together.py +15 -12
  230. llama_stack/providers/remote/inference/vertexai/config.py +1 -1
  231. llama_stack/providers/remote/inference/vllm/config.py +5 -5
  232. llama_stack/providers/remote/inference/vllm/vllm.py +13 -14
  233. llama_stack/providers/remote/inference/watsonx/config.py +4 -4
  234. llama_stack/providers/remote/inference/watsonx/watsonx.py +21 -94
  235. llama_stack/providers/remote/post_training/nvidia/post_training.py +4 -4
  236. llama_stack/providers/remote/post_training/nvidia/utils.py +1 -1
  237. llama_stack/providers/remote/safety/bedrock/bedrock.py +6 -6
  238. llama_stack/providers/remote/safety/bedrock/config.py +1 -1
  239. llama_stack/providers/remote/safety/nvidia/config.py +1 -1
  240. llama_stack/providers/remote/safety/nvidia/nvidia.py +11 -5
  241. llama_stack/providers/remote/safety/sambanova/config.py +1 -1
  242. llama_stack/providers/remote/safety/sambanova/sambanova.py +6 -6
  243. llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +11 -6
  244. llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +12 -7
  245. llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +8 -2
  246. llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +57 -15
  247. llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +11 -6
  248. llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +11 -6
  249. llama_stack/providers/remote/vector_io/chroma/__init__.py +1 -1
  250. llama_stack/providers/remote/vector_io/chroma/chroma.py +125 -20
  251. llama_stack/providers/remote/vector_io/chroma/config.py +1 -1
  252. llama_stack/providers/remote/vector_io/milvus/__init__.py +1 -1
  253. llama_stack/providers/remote/vector_io/milvus/config.py +1 -1
  254. llama_stack/providers/remote/vector_io/milvus/milvus.py +27 -21
  255. llama_stack/providers/remote/vector_io/pgvector/__init__.py +1 -1
  256. llama_stack/providers/remote/vector_io/pgvector/config.py +1 -1
  257. llama_stack/providers/remote/vector_io/pgvector/pgvector.py +26 -18
  258. llama_stack/providers/remote/vector_io/qdrant/__init__.py +1 -1
  259. llama_stack/providers/remote/vector_io/qdrant/config.py +1 -1
  260. llama_stack/providers/remote/vector_io/qdrant/qdrant.py +141 -24
  261. llama_stack/providers/remote/vector_io/weaviate/__init__.py +1 -1
  262. llama_stack/providers/remote/vector_io/weaviate/config.py +1 -1
  263. llama_stack/providers/remote/vector_io/weaviate/weaviate.py +26 -21
  264. llama_stack/providers/utils/common/data_schema_validator.py +1 -5
  265. llama_stack/providers/utils/files/form_data.py +1 -1
  266. llama_stack/providers/utils/inference/embedding_mixin.py +1 -1
  267. llama_stack/providers/utils/inference/inference_store.py +12 -21
  268. llama_stack/providers/utils/inference/litellm_openai_mixin.py +79 -79
  269. llama_stack/providers/utils/inference/model_registry.py +1 -3
  270. llama_stack/providers/utils/inference/openai_compat.py +44 -1171
  271. llama_stack/providers/utils/inference/openai_mixin.py +68 -42
  272. llama_stack/providers/utils/inference/prompt_adapter.py +50 -265
  273. llama_stack/providers/utils/inference/stream_utils.py +23 -0
  274. llama_stack/providers/utils/memory/__init__.py +2 -0
  275. llama_stack/providers/utils/memory/file_utils.py +1 -1
  276. llama_stack/providers/utils/memory/openai_vector_store_mixin.py +181 -84
  277. llama_stack/providers/utils/memory/vector_store.py +39 -38
  278. llama_stack/providers/utils/pagination.py +1 -1
  279. llama_stack/providers/utils/responses/responses_store.py +15 -25
  280. llama_stack/providers/utils/scoring/aggregation_utils.py +1 -2
  281. llama_stack/providers/utils/scoring/base_scoring_fn.py +1 -2
  282. llama_stack/providers/utils/tools/mcp.py +93 -11
  283. llama_stack/telemetry/constants.py +27 -0
  284. llama_stack/telemetry/helpers.py +43 -0
  285. llama_stack/testing/api_recorder.py +25 -16
  286. {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/METADATA +56 -131
  287. llama_stack-0.4.0.dist-info/RECORD +588 -0
  288. llama_stack-0.4.0.dist-info/top_level.txt +2 -0
  289. llama_stack_api/__init__.py +945 -0
  290. llama_stack_api/admin/__init__.py +45 -0
  291. llama_stack_api/admin/api.py +72 -0
  292. llama_stack_api/admin/fastapi_routes.py +117 -0
  293. llama_stack_api/admin/models.py +113 -0
  294. llama_stack_api/agents.py +173 -0
  295. llama_stack_api/batches/__init__.py +40 -0
  296. llama_stack_api/batches/api.py +53 -0
  297. llama_stack_api/batches/fastapi_routes.py +113 -0
  298. llama_stack_api/batches/models.py +78 -0
  299. llama_stack_api/benchmarks/__init__.py +43 -0
  300. llama_stack_api/benchmarks/api.py +39 -0
  301. llama_stack_api/benchmarks/fastapi_routes.py +109 -0
  302. llama_stack_api/benchmarks/models.py +109 -0
  303. {llama_stack/apis → llama_stack_api}/common/content_types.py +1 -43
  304. {llama_stack/apis → llama_stack_api}/common/errors.py +0 -8
  305. {llama_stack/apis → llama_stack_api}/common/job_types.py +1 -1
  306. llama_stack_api/common/responses.py +77 -0
  307. {llama_stack/apis → llama_stack_api}/common/training_types.py +1 -1
  308. {llama_stack/apis → llama_stack_api}/common/type_system.py +2 -14
  309. llama_stack_api/connectors.py +146 -0
  310. {llama_stack/apis/conversations → llama_stack_api}/conversations.py +23 -39
  311. {llama_stack/apis/datasetio → llama_stack_api}/datasetio.py +4 -8
  312. llama_stack_api/datasets/__init__.py +61 -0
  313. llama_stack_api/datasets/api.py +35 -0
  314. llama_stack_api/datasets/fastapi_routes.py +104 -0
  315. llama_stack_api/datasets/models.py +152 -0
  316. {llama_stack/providers → llama_stack_api}/datatypes.py +166 -10
  317. {llama_stack/apis/eval → llama_stack_api}/eval.py +8 -40
  318. llama_stack_api/file_processors/__init__.py +27 -0
  319. llama_stack_api/file_processors/api.py +64 -0
  320. llama_stack_api/file_processors/fastapi_routes.py +78 -0
  321. llama_stack_api/file_processors/models.py +42 -0
  322. llama_stack_api/files/__init__.py +35 -0
  323. llama_stack_api/files/api.py +51 -0
  324. llama_stack_api/files/fastapi_routes.py +124 -0
  325. llama_stack_api/files/models.py +107 -0
  326. {llama_stack/apis/inference → llama_stack_api}/inference.py +90 -194
  327. llama_stack_api/inspect_api/__init__.py +37 -0
  328. llama_stack_api/inspect_api/api.py +25 -0
  329. llama_stack_api/inspect_api/fastapi_routes.py +76 -0
  330. llama_stack_api/inspect_api/models.py +28 -0
  331. {llama_stack/apis/agents → llama_stack_api/internal}/__init__.py +3 -1
  332. llama_stack/providers/utils/kvstore/api.py → llama_stack_api/internal/kvstore.py +5 -0
  333. llama_stack_api/internal/sqlstore.py +79 -0
  334. {llama_stack/apis/models → llama_stack_api}/models.py +11 -9
  335. {llama_stack/apis/agents → llama_stack_api}/openai_responses.py +184 -27
  336. {llama_stack/apis/post_training → llama_stack_api}/post_training.py +7 -11
  337. {llama_stack/apis/prompts → llama_stack_api}/prompts.py +3 -4
  338. llama_stack_api/providers/__init__.py +33 -0
  339. llama_stack_api/providers/api.py +16 -0
  340. llama_stack_api/providers/fastapi_routes.py +57 -0
  341. llama_stack_api/providers/models.py +24 -0
  342. {llama_stack/apis/tools → llama_stack_api}/rag_tool.py +2 -52
  343. {llama_stack/apis → llama_stack_api}/resource.py +1 -1
  344. llama_stack_api/router_utils.py +160 -0
  345. {llama_stack/apis/safety → llama_stack_api}/safety.py +6 -9
  346. {llama_stack → llama_stack_api}/schema_utils.py +94 -4
  347. {llama_stack/apis/scoring → llama_stack_api}/scoring.py +3 -3
  348. {llama_stack/apis/scoring_functions → llama_stack_api}/scoring_functions.py +9 -6
  349. {llama_stack/apis/shields → llama_stack_api}/shields.py +6 -7
  350. {llama_stack/apis/tools → llama_stack_api}/tools.py +26 -21
  351. {llama_stack/apis/vector_io → llama_stack_api}/vector_io.py +133 -152
  352. {llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py +1 -1
  353. llama_stack/apis/agents/agents.py +0 -894
  354. llama_stack/apis/batches/__init__.py +0 -9
  355. llama_stack/apis/batches/batches.py +0 -100
  356. llama_stack/apis/benchmarks/__init__.py +0 -7
  357. llama_stack/apis/benchmarks/benchmarks.py +0 -108
  358. llama_stack/apis/common/responses.py +0 -36
  359. llama_stack/apis/conversations/__init__.py +0 -31
  360. llama_stack/apis/datasets/datasets.py +0 -251
  361. llama_stack/apis/datatypes.py +0 -160
  362. llama_stack/apis/eval/__init__.py +0 -7
  363. llama_stack/apis/files/__init__.py +0 -7
  364. llama_stack/apis/files/files.py +0 -199
  365. llama_stack/apis/inference/__init__.py +0 -7
  366. llama_stack/apis/inference/event_logger.py +0 -43
  367. llama_stack/apis/inspect/__init__.py +0 -7
  368. llama_stack/apis/inspect/inspect.py +0 -94
  369. llama_stack/apis/models/__init__.py +0 -7
  370. llama_stack/apis/post_training/__init__.py +0 -7
  371. llama_stack/apis/prompts/__init__.py +0 -9
  372. llama_stack/apis/providers/__init__.py +0 -7
  373. llama_stack/apis/providers/providers.py +0 -69
  374. llama_stack/apis/safety/__init__.py +0 -7
  375. llama_stack/apis/scoring/__init__.py +0 -7
  376. llama_stack/apis/scoring_functions/__init__.py +0 -7
  377. llama_stack/apis/shields/__init__.py +0 -7
  378. llama_stack/apis/synthetic_data_generation/__init__.py +0 -7
  379. llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +0 -77
  380. llama_stack/apis/telemetry/__init__.py +0 -7
  381. llama_stack/apis/telemetry/telemetry.py +0 -423
  382. llama_stack/apis/tools/__init__.py +0 -8
  383. llama_stack/apis/vector_io/__init__.py +0 -7
  384. llama_stack/apis/vector_stores/__init__.py +0 -7
  385. llama_stack/core/server/tracing.py +0 -80
  386. llama_stack/core/ui/app.py +0 -55
  387. llama_stack/core/ui/modules/__init__.py +0 -5
  388. llama_stack/core/ui/modules/api.py +0 -32
  389. llama_stack/core/ui/modules/utils.py +0 -42
  390. llama_stack/core/ui/page/__init__.py +0 -5
  391. llama_stack/core/ui/page/distribution/__init__.py +0 -5
  392. llama_stack/core/ui/page/distribution/datasets.py +0 -18
  393. llama_stack/core/ui/page/distribution/eval_tasks.py +0 -20
  394. llama_stack/core/ui/page/distribution/models.py +0 -18
  395. llama_stack/core/ui/page/distribution/providers.py +0 -27
  396. llama_stack/core/ui/page/distribution/resources.py +0 -48
  397. llama_stack/core/ui/page/distribution/scoring_functions.py +0 -18
  398. llama_stack/core/ui/page/distribution/shields.py +0 -19
  399. llama_stack/core/ui/page/evaluations/__init__.py +0 -5
  400. llama_stack/core/ui/page/evaluations/app_eval.py +0 -143
  401. llama_stack/core/ui/page/evaluations/native_eval.py +0 -253
  402. llama_stack/core/ui/page/playground/__init__.py +0 -5
  403. llama_stack/core/ui/page/playground/chat.py +0 -130
  404. llama_stack/core/ui/page/playground/tools.py +0 -352
  405. llama_stack/distributions/dell/build.yaml +0 -33
  406. llama_stack/distributions/meta-reference-gpu/build.yaml +0 -32
  407. llama_stack/distributions/nvidia/build.yaml +0 -29
  408. llama_stack/distributions/open-benchmark/build.yaml +0 -36
  409. llama_stack/distributions/postgres-demo/__init__.py +0 -7
  410. llama_stack/distributions/postgres-demo/build.yaml +0 -23
  411. llama_stack/distributions/postgres-demo/postgres_demo.py +0 -125
  412. llama_stack/distributions/starter/build.yaml +0 -61
  413. llama_stack/distributions/starter-gpu/build.yaml +0 -61
  414. llama_stack/distributions/watsonx/build.yaml +0 -33
  415. llama_stack/providers/inline/agents/meta_reference/agent_instance.py +0 -1024
  416. llama_stack/providers/inline/agents/meta_reference/persistence.py +0 -228
  417. llama_stack/providers/inline/telemetry/__init__.py +0 -5
  418. llama_stack/providers/inline/telemetry/meta_reference/__init__.py +0 -21
  419. llama_stack/providers/inline/telemetry/meta_reference/config.py +0 -47
  420. llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +0 -252
  421. llama_stack/providers/remote/inference/bedrock/models.py +0 -29
  422. llama_stack/providers/utils/kvstore/sqlite/config.py +0 -20
  423. llama_stack/providers/utils/sqlstore/__init__.py +0 -5
  424. llama_stack/providers/utils/sqlstore/api.py +0 -128
  425. llama_stack/providers/utils/telemetry/__init__.py +0 -5
  426. llama_stack/providers/utils/telemetry/trace_protocol.py +0 -142
  427. llama_stack/providers/utils/telemetry/tracing.py +0 -384
  428. llama_stack/strong_typing/__init__.py +0 -19
  429. llama_stack/strong_typing/auxiliary.py +0 -228
  430. llama_stack/strong_typing/classdef.py +0 -440
  431. llama_stack/strong_typing/core.py +0 -46
  432. llama_stack/strong_typing/deserializer.py +0 -877
  433. llama_stack/strong_typing/docstring.py +0 -409
  434. llama_stack/strong_typing/exception.py +0 -23
  435. llama_stack/strong_typing/inspection.py +0 -1085
  436. llama_stack/strong_typing/mapping.py +0 -40
  437. llama_stack/strong_typing/name.py +0 -182
  438. llama_stack/strong_typing/schema.py +0 -792
  439. llama_stack/strong_typing/serialization.py +0 -97
  440. llama_stack/strong_typing/serializer.py +0 -500
  441. llama_stack/strong_typing/slots.py +0 -27
  442. llama_stack/strong_typing/topological.py +0 -89
  443. llama_stack/ui/node_modules/flatted/python/flatted.py +0 -149
  444. llama_stack-0.3.4.dist-info/RECORD +0 -625
  445. llama_stack-0.3.4.dist-info/top_level.txt +0 -1
  446. /llama_stack/{providers/utils → core/storage}/kvstore/config.py +0 -0
  447. /llama_stack/{providers/utils → core/storage}/kvstore/mongodb/__init__.py +0 -0
  448. /llama_stack/{providers/utils → core/storage}/kvstore/postgres/__init__.py +0 -0
  449. /llama_stack/{providers/utils → core/storage}/kvstore/redis/__init__.py +0 -0
  450. /llama_stack/{providers/utils → core/storage}/kvstore/sqlite/__init__.py +0 -0
  451. /llama_stack/{apis → providers/inline/file_processor}/__init__.py +0 -0
  452. /llama_stack/{apis/common → telemetry}/__init__.py +0 -0
  453. {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/WHEEL +0 -0
  454. {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/entry_points.txt +0 -0
  455. {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/licenses/LICENSE +0 -0
  456. {llama_stack/core/ui → llama_stack_api/common}/__init__.py +0 -0
  457. {llama_stack/strong_typing → llama_stack_api}/py.typed +0 -0
  458. {llama_stack/apis → llama_stack_api}/version.py +0 -0
@@ -5,12 +5,12 @@
5
5
  # the root directory of this source tree.
6
6
 
7
7
 
8
- from llama_stack.providers.datatypes import (
8
+ from llama_stack.core.storage.kvstore import kvstore_dependencies
9
+ from llama_stack_api import (
9
10
  Api,
10
11
  InlineProviderSpec,
11
12
  ProviderSpec,
12
13
  )
13
- from llama_stack.providers.utils.kvstore import kvstore_dependencies
14
14
 
15
15
 
16
16
  def available_providers() -> list[ProviderSpec]:
@@ -30,11 +30,15 @@ def available_providers() -> list[ProviderSpec]:
30
30
  config_class="llama_stack.providers.inline.agents.meta_reference.MetaReferenceAgentsImplConfig",
31
31
  api_dependencies=[
32
32
  Api.inference,
33
- Api.safety,
34
33
  Api.vector_io,
35
34
  Api.tool_runtime,
36
35
  Api.tool_groups,
37
36
  Api.conversations,
37
+ Api.prompts,
38
+ Api.files,
39
+ ],
40
+ optional_api_dependencies=[
41
+ Api.safety,
38
42
  ],
39
43
  description="Meta's reference implementation of an agent system that can use tools, access vector databases, and perform complex reasoning tasks.",
40
44
  ),
@@ -5,7 +5,7 @@
5
5
  # the root directory of this source tree.
6
6
 
7
7
 
8
- from llama_stack.providers.datatypes import Api, InlineProviderSpec, ProviderSpec
8
+ from llama_stack_api import Api, InlineProviderSpec, ProviderSpec
9
9
 
10
10
 
11
11
  def available_providers() -> list[ProviderSpec]:
@@ -5,7 +5,7 @@
5
5
  # the root directory of this source tree.
6
6
 
7
7
 
8
- from llama_stack.providers.datatypes import (
8
+ from llama_stack_api import (
9
9
  Api,
10
10
  InlineProviderSpec,
11
11
  ProviderSpec,
@@ -5,7 +5,7 @@
5
5
  # the root directory of this source tree.
6
6
 
7
7
 
8
- from llama_stack.providers.datatypes import Api, InlineProviderSpec, ProviderSpec, RemoteProviderSpec
8
+ from llama_stack_api import Api, InlineProviderSpec, ProviderSpec, RemoteProviderSpec
9
9
 
10
10
 
11
11
  def available_providers() -> list[ProviderSpec]:
@@ -4,4 +4,8 @@
4
4
  # This source code is licensed under the terms described in the LICENSE file in
5
5
  # the root directory of this source tree.
6
6
 
7
- from .datasets import *
7
+ from llama_stack_api import ProviderSpec
8
+
9
+
10
+ def available_providers() -> list[ProviderSpec]:
11
+ return []
@@ -4,8 +4,8 @@
4
4
  # This source code is licensed under the terms described in the LICENSE file in
5
5
  # the root directory of this source tree.
6
6
 
7
- from llama_stack.providers.datatypes import Api, InlineProviderSpec, ProviderSpec, RemoteProviderSpec
8
- from llama_stack.providers.utils.sqlstore.sqlstore import sql_store_pip_packages
7
+ from llama_stack.core.storage.sqlstore.sqlstore import sql_store_pip_packages
8
+ from llama_stack_api import Api, InlineProviderSpec, ProviderSpec, RemoteProviderSpec
9
9
 
10
10
 
11
11
  def available_providers() -> list[ProviderSpec]:
@@ -28,4 +28,13 @@ def available_providers() -> list[ProviderSpec]:
28
28
  config_class="llama_stack.providers.remote.files.s3.config.S3FilesImplConfig",
29
29
  description="AWS S3-based file storage provider for scalable cloud file management with metadata persistence.",
30
30
  ),
31
+ RemoteProviderSpec(
32
+ api=Api.files,
33
+ provider_type="remote::openai",
34
+ adapter_type="openai",
35
+ pip_packages=["openai"] + sql_store_pip_packages,
36
+ module="llama_stack.providers.remote.files.openai",
37
+ config_class="llama_stack.providers.remote.files.openai.config.OpenAIFilesImplConfig",
38
+ description="OpenAI Files API provider for managing files through OpenAI's native file storage service.",
39
+ ),
31
40
  ]
@@ -5,7 +5,7 @@
5
5
  # the root directory of this source tree.
6
6
 
7
7
 
8
- from llama_stack.providers.datatypes import (
8
+ from llama_stack_api import (
9
9
  Api,
10
10
  InlineProviderSpec,
11
11
  ProviderSpec,
@@ -61,6 +61,7 @@ def available_providers() -> list[ProviderSpec]:
61
61
  pip_packages=[],
62
62
  module="llama_stack.providers.remote.inference.cerebras",
63
63
  config_class="llama_stack.providers.remote.inference.cerebras.CerebrasImplConfig",
64
+ provider_data_validator="llama_stack.providers.remote.inference.cerebras.config.CerebrasProviderDataValidator",
64
65
  description="Cerebras inference provider for running models on Cerebras Cloud platform.",
65
66
  ),
66
67
  RemoteProviderSpec(
@@ -137,10 +138,11 @@ def available_providers() -> list[ProviderSpec]:
137
138
  api=Api.inference,
138
139
  adapter_type="bedrock",
139
140
  provider_type="remote::bedrock",
140
- pip_packages=["boto3"],
141
+ pip_packages=[],
141
142
  module="llama_stack.providers.remote.inference.bedrock",
142
143
  config_class="llama_stack.providers.remote.inference.bedrock.BedrockConfig",
143
- description="AWS Bedrock inference provider for accessing various AI models through AWS's managed service.",
144
+ provider_data_validator="llama_stack.providers.remote.inference.bedrock.config.BedrockProviderDataValidator",
145
+ description="AWS Bedrock inference provider using OpenAI compatible endpoint.",
144
146
  ),
145
147
  RemoteProviderSpec(
146
148
  api=Api.inference,
@@ -149,6 +151,7 @@ def available_providers() -> list[ProviderSpec]:
149
151
  pip_packages=["databricks-sdk"],
150
152
  module="llama_stack.providers.remote.inference.databricks",
151
153
  config_class="llama_stack.providers.remote.inference.databricks.DatabricksImplConfig",
154
+ provider_data_validator="llama_stack.providers.remote.inference.databricks.config.DatabricksProviderDataValidator",
152
155
  description="Databricks inference provider for running models on Databricks' unified analytics platform.",
153
156
  ),
154
157
  RemoteProviderSpec(
@@ -158,6 +161,7 @@ def available_providers() -> list[ProviderSpec]:
158
161
  pip_packages=[],
159
162
  module="llama_stack.providers.remote.inference.nvidia",
160
163
  config_class="llama_stack.providers.remote.inference.nvidia.NVIDIAConfig",
164
+ provider_data_validator="llama_stack.providers.remote.inference.nvidia.config.NVIDIAProviderDataValidator",
161
165
  description="NVIDIA inference provider for accessing NVIDIA NIM models and AI services.",
162
166
  ),
163
167
  RemoteProviderSpec(
@@ -167,6 +171,7 @@ def available_providers() -> list[ProviderSpec]:
167
171
  pip_packages=[],
168
172
  module="llama_stack.providers.remote.inference.runpod",
169
173
  config_class="llama_stack.providers.remote.inference.runpod.RunpodImplConfig",
174
+ provider_data_validator="llama_stack.providers.remote.inference.runpod.config.RunpodProviderDataValidator",
170
175
  description="RunPod inference provider for running models on RunPod's cloud GPU platform.",
171
176
  ),
172
177
  RemoteProviderSpec(
@@ -292,6 +297,20 @@ Available Models:
292
297
  Azure OpenAI inference provider for accessing GPT models and other Azure services.
293
298
  Provider documentation
294
299
  https://learn.microsoft.com/en-us/azure/ai-foundry/openai/overview
300
+ """,
301
+ ),
302
+ RemoteProviderSpec(
303
+ api=Api.inference,
304
+ provider_type="remote::oci",
305
+ adapter_type="oci",
306
+ pip_packages=["oci"],
307
+ module="llama_stack.providers.remote.inference.oci",
308
+ config_class="llama_stack.providers.remote.inference.oci.config.OCIConfig",
309
+ provider_data_validator="llama_stack.providers.remote.inference.oci.config.OCIProviderDataValidator",
310
+ description="""
311
+ Oracle Cloud Infrastructure (OCI) Generative AI inference provider for accessing OCI's Generative AI Platform-as-a-Service models.
312
+ Provider documentation
313
+ https://docs.oracle.com/en-us/iaas/Content/generative-ai/home.htm
295
314
  """,
296
315
  ),
297
316
  ]
@@ -7,7 +7,7 @@
7
7
 
8
8
  from typing import cast
9
9
 
10
- from llama_stack.providers.datatypes import Api, InlineProviderSpec, ProviderSpec, RemoteProviderSpec
10
+ from llama_stack_api import Api, InlineProviderSpec, ProviderSpec, RemoteProviderSpec
11
11
 
12
12
  # We provide two versions of these providers so that distributions can package the appropriate version of torch.
13
13
  # The CPU version is used for distributions that don't have GPU support -- they result in smaller container images.
@@ -5,7 +5,7 @@
5
5
  # the root directory of this source tree.
6
6
 
7
7
 
8
- from llama_stack.providers.datatypes import (
8
+ from llama_stack_api import (
9
9
  Api,
10
10
  InlineProviderSpec,
11
11
  ProviderSpec,
@@ -5,7 +5,7 @@
5
5
  # the root directory of this source tree.
6
6
 
7
7
 
8
- from llama_stack.providers.datatypes import Api, InlineProviderSpec, ProviderSpec
8
+ from llama_stack_api import Api, InlineProviderSpec, ProviderSpec
9
9
 
10
10
 
11
11
  def available_providers() -> list[ProviderSpec]:
@@ -5,13 +5,13 @@
5
5
  # the root directory of this source tree.
6
6
 
7
7
 
8
- from llama_stack.providers.datatypes import (
8
+ from llama_stack.providers.registry.vector_io import DEFAULT_VECTOR_IO_DEPS
9
+ from llama_stack_api import (
9
10
  Api,
10
11
  InlineProviderSpec,
11
12
  ProviderSpec,
12
13
  RemoteProviderSpec,
13
14
  )
14
- from llama_stack.providers.registry.vector_io import DEFAULT_VECTOR_IO_DEPS
15
15
 
16
16
 
17
17
  def available_providers() -> list[ProviderSpec]:
@@ -5,7 +5,7 @@
5
5
  # the root directory of this source tree.
6
6
 
7
7
 
8
- from llama_stack.providers.datatypes import (
8
+ from llama_stack_api import (
9
9
  Api,
10
10
  InlineProviderSpec,
11
11
  ProviderSpec,
@@ -163,14 +163,14 @@ The SQLite-vec provider supports three search modes:
163
163
  Example with hybrid search:
164
164
  ```python
165
165
  response = await vector_io.query_chunks(
166
- vector_db_id="my_db",
166
+ vector_store_id="my_db",
167
167
  query="your query here",
168
168
  params={"mode": "hybrid", "max_chunks": 3, "score_threshold": 0.7},
169
169
  )
170
170
 
171
171
  # Using RRF ranker
172
172
  response = await vector_io.query_chunks(
173
- vector_db_id="my_db",
173
+ vector_store_id="my_db",
174
174
  query="your query here",
175
175
  params={
176
176
  "mode": "hybrid",
@@ -182,7 +182,7 @@ response = await vector_io.query_chunks(
182
182
 
183
183
  # Using weighted ranker
184
184
  response = await vector_io.query_chunks(
185
- vector_db_id="my_db",
185
+ vector_store_id="my_db",
186
186
  query="your query here",
187
187
  params={
188
188
  "mode": "hybrid",
@@ -196,7 +196,7 @@ response = await vector_io.query_chunks(
196
196
  Example with explicit vector search:
197
197
  ```python
198
198
  response = await vector_io.query_chunks(
199
- vector_db_id="my_db",
199
+ vector_store_id="my_db",
200
200
  query="your query here",
201
201
  params={"mode": "vector", "max_chunks": 3, "score_threshold": 0.7},
202
202
  )
@@ -205,7 +205,7 @@ response = await vector_io.query_chunks(
205
205
  Example with keyword search:
206
206
  ```python
207
207
  response = await vector_io.query_chunks(
208
- vector_db_id="my_db",
208
+ vector_store_id="my_db",
209
209
  query="your query here",
210
210
  params={"mode": "keyword", "max_chunks": 3, "score_threshold": 0.7},
211
211
  )
@@ -244,7 +244,7 @@ Two ranker types are supported:
244
244
  Example using RAGQueryConfig with different search modes:
245
245
 
246
246
  ```python
247
- from llama_stack.apis.tools import RAGQueryConfig, RRFRanker, WeightedRanker
247
+ from llama_stack_api import RAGQueryConfig, RRFRanker, WeightedRanker
248
248
 
249
249
  # Vector search
250
250
  config = RAGQueryConfig(mode="vector", max_chunks=5)
@@ -6,12 +6,9 @@
6
6
  from typing import Any
7
7
  from urllib.parse import parse_qs, urlparse
8
8
 
9
- from llama_stack.apis.common.responses import PaginatedResponse
10
- from llama_stack.apis.datasetio import DatasetIO
11
- from llama_stack.apis.datasets import Dataset
12
- from llama_stack.providers.datatypes import DatasetsProtocolPrivate
13
- from llama_stack.providers.utils.kvstore import kvstore_impl
9
+ from llama_stack.core.storage.kvstore import kvstore_impl
14
10
  from llama_stack.providers.utils.pagination import paginate_records
11
+ from llama_stack_api import Dataset, DatasetIO, DatasetsProtocolPrivate, PaginatedResponse
15
12
 
16
13
  from .config import HuggingfaceDatasetIOConfig
17
14
 
@@ -8,10 +8,7 @@ from typing import Any
8
8
 
9
9
  import aiohttp
10
10
 
11
- from llama_stack.apis.common.content_types import URL
12
- from llama_stack.apis.common.responses import PaginatedResponse
13
- from llama_stack.apis.common.type_system import ParamType
14
- from llama_stack.apis.datasets import Dataset
11
+ from llama_stack_api import URL, Dataset, PaginatedResponse, ParamType
15
12
 
16
13
  from .config import NvidiaDatasetIOConfig
17
14
 
@@ -7,17 +7,23 @@ from typing import Any
7
7
 
8
8
  import requests
9
9
 
10
- from llama_stack.apis.agents import Agents
11
- from llama_stack.apis.benchmarks import Benchmark
12
- from llama_stack.apis.datasetio import DatasetIO
13
- from llama_stack.apis.datasets import Datasets
14
- from llama_stack.apis.inference import Inference
15
- from llama_stack.apis.scoring import Scoring, ScoringResult
16
- from llama_stack.providers.datatypes import BenchmarksProtocolPrivate
17
10
  from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
11
+ from llama_stack_api import (
12
+ Agents,
13
+ Benchmark,
14
+ BenchmarkConfig,
15
+ BenchmarksProtocolPrivate,
16
+ DatasetIO,
17
+ Datasets,
18
+ Eval,
19
+ EvaluateResponse,
20
+ Inference,
21
+ Job,
22
+ JobStatus,
23
+ Scoring,
24
+ ScoringResult,
25
+ )
18
26
 
19
- from .....apis.common.job_types import Job, JobStatus
20
- from .....apis.eval.eval import BenchmarkConfig, Eval, EvaluateResponse
21
27
  from .config import NVIDIAEvalConfig
22
28
 
23
29
  DEFAULT_NAMESPACE = "nvidia"
@@ -0,0 +1,19 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the terms described in the LICENSE file in
5
+ # the root directory of this source tree.
6
+
7
+ from typing import Any
8
+
9
+ from llama_stack.core.datatypes import AccessRule, Api
10
+
11
+ from .config import OpenAIFilesImplConfig
12
+
13
+
14
+ async def get_adapter_impl(config: OpenAIFilesImplConfig, deps: dict[Api, Any], policy: list[AccessRule] | None = None):
15
+ from .files import OpenAIFilesImpl
16
+
17
+ impl = OpenAIFilesImpl(config, policy or [])
18
+ await impl.initialize()
19
+ return impl
@@ -0,0 +1,28 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the terms described in the LICENSE file in
5
+ # the root directory of this source tree.
6
+
7
+ from typing import Any
8
+
9
+ from pydantic import BaseModel, Field
10
+
11
+ from llama_stack.core.storage.datatypes import SqlStoreReference
12
+
13
+
14
+ class OpenAIFilesImplConfig(BaseModel):
15
+ """Configuration for OpenAI Files API provider."""
16
+
17
+ api_key: str = Field(description="OpenAI API key for authentication")
18
+ metadata_store: SqlStoreReference = Field(description="SQL store configuration for file metadata")
19
+
20
+ @classmethod
21
+ def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]:
22
+ return {
23
+ "api_key": "${env.OPENAI_API_KEY}",
24
+ "metadata_store": SqlStoreReference(
25
+ backend="sql_default",
26
+ table_name="openai_files_metadata",
27
+ ).model_dump(exclude_none=True),
28
+ }
@@ -0,0 +1,253 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the terms described in the LICENSE file in
5
+ # the root directory of this source tree.
6
+
7
+ from datetime import UTC, datetime
8
+ from typing import Any
9
+
10
+ from fastapi import Response, UploadFile
11
+
12
+ from llama_stack.core.access_control.datatypes import Action
13
+ from llama_stack.core.datatypes import AccessRule
14
+ from llama_stack.core.storage.sqlstore.authorized_sqlstore import AuthorizedSqlStore
15
+ from llama_stack.core.storage.sqlstore.sqlstore import sqlstore_impl
16
+ from llama_stack_api import (
17
+ DeleteFileRequest,
18
+ ExpiresAfter,
19
+ Files,
20
+ ListFilesRequest,
21
+ ListOpenAIFileResponse,
22
+ OpenAIFileDeleteResponse,
23
+ OpenAIFileObject,
24
+ OpenAIFilePurpose,
25
+ Order,
26
+ ResourceNotFoundError,
27
+ RetrieveFileContentRequest,
28
+ RetrieveFileRequest,
29
+ UploadFileRequest,
30
+ )
31
+ from llama_stack_api.internal.sqlstore import ColumnDefinition, ColumnType
32
+ from openai import OpenAI
33
+
34
+ from .config import OpenAIFilesImplConfig
35
+
36
+
37
+ def _make_file_object(
38
+ *,
39
+ id: str,
40
+ filename: str,
41
+ purpose: str,
42
+ bytes: int,
43
+ created_at: int,
44
+ expires_at: int,
45
+ **kwargs: Any,
46
+ ) -> OpenAIFileObject:
47
+ """
48
+ Construct an OpenAIFileObject and normalize expires_at.
49
+
50
+ If expires_at is greater than the max we treat it as no-expiration and
51
+ return None for expires_at.
52
+ """
53
+ obj = OpenAIFileObject(
54
+ id=id,
55
+ filename=filename,
56
+ purpose=OpenAIFilePurpose(purpose),
57
+ bytes=bytes,
58
+ created_at=created_at,
59
+ expires_at=expires_at,
60
+ )
61
+
62
+ if obj.expires_at is not None and obj.expires_at > (obj.created_at + ExpiresAfter.MAX):
63
+ obj.expires_at = None # type: ignore
64
+
65
+ return obj
66
+
67
+
68
+ class OpenAIFilesImpl(Files):
69
+ """OpenAI Files API implementation."""
70
+
71
+ def __init__(self, config: OpenAIFilesImplConfig, policy: list[AccessRule]) -> None:
72
+ self._config = config
73
+ self.policy = policy
74
+ self._client: OpenAI | None = None
75
+ self._sql_store: AuthorizedSqlStore | None = None
76
+
77
+ def _now(self) -> int:
78
+ """Return current UTC timestamp as int seconds."""
79
+ return int(datetime.now(UTC).timestamp())
80
+
81
+ async def _get_file(
82
+ self, file_id: str, return_expired: bool = False, action: Action = Action.READ
83
+ ) -> dict[str, Any]:
84
+ where: dict[str, str | dict] = {"id": file_id}
85
+ if not return_expired:
86
+ where["expires_at"] = {">": self._now()}
87
+ if not (row := await self.sql_store.fetch_one("openai_files", where=where, action=action)):
88
+ raise ResourceNotFoundError(file_id, "File", "files.list()")
89
+ return row
90
+
91
+ async def _delete_file(self, file_id: str) -> None:
92
+ """Delete a file from OpenAI and the database."""
93
+ try:
94
+ self.client.files.delete(file_id)
95
+ except Exception as e:
96
+ # If file doesn't exist on OpenAI side, just remove from metadata store
97
+ if "not found" not in str(e).lower():
98
+ raise RuntimeError(f"Failed to delete file from OpenAI: {e}") from e
99
+
100
+ await self.sql_store.delete("openai_files", where={"id": file_id})
101
+
102
+ async def _delete_if_expired(self, file_id: str) -> None:
103
+ """If the file exists and is expired, delete it."""
104
+ if row := await self._get_file(file_id, return_expired=True):
105
+ if (expires_at := row.get("expires_at")) and expires_at <= self._now():
106
+ await self._delete_file(file_id)
107
+
108
+ async def initialize(self) -> None:
109
+ self._client = OpenAI(api_key=self._config.api_key)
110
+
111
+ self._sql_store = AuthorizedSqlStore(sqlstore_impl(self._config.metadata_store), self.policy)
112
+ await self._sql_store.create_table(
113
+ "openai_files",
114
+ {
115
+ "id": ColumnDefinition(type=ColumnType.STRING, primary_key=True),
116
+ "filename": ColumnType.STRING,
117
+ "purpose": ColumnType.STRING,
118
+ "bytes": ColumnType.INTEGER,
119
+ "created_at": ColumnType.INTEGER,
120
+ "expires_at": ColumnType.INTEGER,
121
+ },
122
+ )
123
+
124
+ async def shutdown(self) -> None:
125
+ pass
126
+
127
+ @property
128
+ def client(self) -> OpenAI:
129
+ assert self._client is not None, "Provider not initialized"
130
+ return self._client
131
+
132
+ @property
133
+ def sql_store(self) -> AuthorizedSqlStore:
134
+ assert self._sql_store is not None, "Provider not initialized"
135
+ return self._sql_store
136
+
137
+ async def openai_upload_file(
138
+ self,
139
+ request: UploadFileRequest,
140
+ file: UploadFile,
141
+ ) -> OpenAIFileObject:
142
+ purpose = request.purpose
143
+ expires_after = request.expires_after
144
+
145
+ filename = getattr(file, "filename", None) or "uploaded_file"
146
+ content = await file.read()
147
+ file_size = len(content)
148
+
149
+ created_at = self._now()
150
+
151
+ expires_at = created_at + ExpiresAfter.MAX * 42
152
+ if purpose == OpenAIFilePurpose.BATCH:
153
+ expires_at = created_at + ExpiresAfter.MAX
154
+
155
+ if expires_after is not None:
156
+ expires_at = created_at + expires_after.seconds
157
+
158
+ try:
159
+ from io import BytesIO
160
+
161
+ file_obj = BytesIO(content)
162
+ file_obj.name = filename
163
+
164
+ response = self.client.files.create(
165
+ file=file_obj,
166
+ purpose=purpose.value,
167
+ )
168
+
169
+ file_id = response.id
170
+
171
+ entry: dict[str, Any] = {
172
+ "id": file_id,
173
+ "filename": filename,
174
+ "purpose": purpose.value,
175
+ "bytes": file_size,
176
+ "created_at": created_at,
177
+ "expires_at": expires_at,
178
+ }
179
+
180
+ await self.sql_store.insert("openai_files", entry)
181
+
182
+ return _make_file_object(**entry)
183
+
184
+ except Exception as e:
185
+ raise RuntimeError(f"Failed to upload file to OpenAI: {e}") from e
186
+
187
+ async def openai_list_files(
188
+ self,
189
+ request: ListFilesRequest,
190
+ ) -> ListOpenAIFileResponse:
191
+ after = request.after
192
+ limit = request.limit
193
+ order = request.order
194
+ purpose = request.purpose
195
+
196
+ if not order:
197
+ order = Order.desc
198
+
199
+ where_conditions: dict[str, Any] = {"expires_at": {">": self._now()}}
200
+ if purpose:
201
+ where_conditions["purpose"] = purpose.value
202
+
203
+ paginated_result = await self.sql_store.fetch_all(
204
+ table="openai_files",
205
+ where=where_conditions,
206
+ order_by=[("created_at", order.value)],
207
+ cursor=("id", after) if after else None,
208
+ limit=limit,
209
+ )
210
+
211
+ files = [_make_file_object(**row) for row in paginated_result.data]
212
+
213
+ return ListOpenAIFileResponse(
214
+ data=files,
215
+ has_more=paginated_result.has_more,
216
+ first_id=files[0].id if files else "",
217
+ last_id=files[-1].id if files else "",
218
+ )
219
+
220
+ async def openai_retrieve_file(self, request: RetrieveFileRequest) -> OpenAIFileObject:
221
+ file_id = request.file_id
222
+ await self._delete_if_expired(file_id)
223
+ row = await self._get_file(file_id)
224
+ return _make_file_object(**row)
225
+
226
+ async def openai_delete_file(self, request: DeleteFileRequest) -> OpenAIFileDeleteResponse:
227
+ file_id = request.file_id
228
+ await self._delete_if_expired(file_id)
229
+ _ = await self._get_file(file_id, action=Action.DELETE)
230
+ await self._delete_file(file_id)
231
+ return OpenAIFileDeleteResponse(id=file_id, deleted=True)
232
+
233
+ async def openai_retrieve_file_content(self, request: RetrieveFileContentRequest) -> Response:
234
+ file_id = request.file_id
235
+ await self._delete_if_expired(file_id)
236
+
237
+ row = await self._get_file(file_id)
238
+
239
+ try:
240
+ response = self.client.files.content(file_id)
241
+ file_content = response.content
242
+
243
+ except Exception as e:
244
+ if "not found" in str(e).lower():
245
+ await self._delete_file(file_id)
246
+ raise ResourceNotFoundError(file_id, "File", "files.list()") from e
247
+ raise RuntimeError(f"Failed to download file from OpenAI: {e}") from e
248
+
249
+ return Response(
250
+ content=file_content,
251
+ media_type="application/octet-stream",
252
+ headers={"Content-Disposition": f'attachment; filename="{row["filename"]}"'},
253
+ )