llama-stack 0.3.5__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (460) hide show
  1. llama_stack/__init__.py +0 -5
  2. llama_stack/cli/llama.py +3 -3
  3. llama_stack/cli/stack/_list_deps.py +12 -23
  4. llama_stack/cli/stack/list_stacks.py +37 -18
  5. llama_stack/cli/stack/run.py +121 -11
  6. llama_stack/cli/stack/utils.py +0 -127
  7. llama_stack/core/access_control/access_control.py +69 -28
  8. llama_stack/core/access_control/conditions.py +15 -5
  9. llama_stack/core/admin.py +267 -0
  10. llama_stack/core/build.py +6 -74
  11. llama_stack/core/client.py +1 -1
  12. llama_stack/core/configure.py +6 -6
  13. llama_stack/core/conversations/conversations.py +28 -25
  14. llama_stack/core/datatypes.py +271 -79
  15. llama_stack/core/distribution.py +15 -16
  16. llama_stack/core/external.py +3 -3
  17. llama_stack/core/inspect.py +98 -15
  18. llama_stack/core/library_client.py +73 -61
  19. llama_stack/core/prompts/prompts.py +12 -11
  20. llama_stack/core/providers.py +17 -11
  21. llama_stack/core/resolver.py +65 -56
  22. llama_stack/core/routers/__init__.py +8 -12
  23. llama_stack/core/routers/datasets.py +1 -4
  24. llama_stack/core/routers/eval_scoring.py +7 -4
  25. llama_stack/core/routers/inference.py +55 -271
  26. llama_stack/core/routers/safety.py +52 -24
  27. llama_stack/core/routers/tool_runtime.py +6 -48
  28. llama_stack/core/routers/vector_io.py +130 -51
  29. llama_stack/core/routing_tables/benchmarks.py +24 -20
  30. llama_stack/core/routing_tables/common.py +1 -4
  31. llama_stack/core/routing_tables/datasets.py +22 -22
  32. llama_stack/core/routing_tables/models.py +119 -6
  33. llama_stack/core/routing_tables/scoring_functions.py +7 -7
  34. llama_stack/core/routing_tables/shields.py +1 -2
  35. llama_stack/core/routing_tables/toolgroups.py +17 -7
  36. llama_stack/core/routing_tables/vector_stores.py +51 -16
  37. llama_stack/core/server/auth.py +5 -3
  38. llama_stack/core/server/auth_providers.py +36 -20
  39. llama_stack/core/server/fastapi_router_registry.py +84 -0
  40. llama_stack/core/server/quota.py +2 -2
  41. llama_stack/core/server/routes.py +79 -27
  42. llama_stack/core/server/server.py +102 -87
  43. llama_stack/core/stack.py +235 -62
  44. llama_stack/core/storage/datatypes.py +26 -3
  45. llama_stack/{providers/utils → core/storage}/kvstore/__init__.py +2 -0
  46. llama_stack/{providers/utils → core/storage}/kvstore/kvstore.py +55 -24
  47. llama_stack/{providers/utils → core/storage}/kvstore/mongodb/mongodb.py +13 -10
  48. llama_stack/{providers/utils → core/storage}/kvstore/postgres/postgres.py +28 -17
  49. llama_stack/{providers/utils → core/storage}/kvstore/redis/redis.py +41 -16
  50. llama_stack/{providers/utils → core/storage}/kvstore/sqlite/sqlite.py +1 -1
  51. llama_stack/core/storage/sqlstore/__init__.py +17 -0
  52. llama_stack/{providers/utils → core/storage}/sqlstore/authorized_sqlstore.py +69 -49
  53. llama_stack/{providers/utils → core/storage}/sqlstore/sqlalchemy_sqlstore.py +47 -17
  54. llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py +25 -8
  55. llama_stack/core/store/registry.py +1 -1
  56. llama_stack/core/utils/config.py +8 -2
  57. llama_stack/core/utils/config_resolution.py +32 -29
  58. llama_stack/core/utils/context.py +4 -10
  59. llama_stack/core/utils/exec.py +9 -0
  60. llama_stack/core/utils/type_inspection.py +45 -0
  61. llama_stack/distributions/dell/{run.yaml → config.yaml} +3 -2
  62. llama_stack/distributions/dell/dell.py +2 -2
  63. llama_stack/distributions/dell/run-with-safety.yaml +3 -2
  64. llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} +3 -2
  65. llama_stack/distributions/meta-reference-gpu/meta_reference.py +2 -2
  66. llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +3 -2
  67. llama_stack/distributions/nvidia/{run.yaml → config.yaml} +4 -4
  68. llama_stack/distributions/nvidia/nvidia.py +1 -1
  69. llama_stack/distributions/nvidia/run-with-safety.yaml +4 -4
  70. llama_stack/{apis/datasetio → distributions/oci}/__init__.py +1 -1
  71. llama_stack/distributions/oci/config.yaml +134 -0
  72. llama_stack/distributions/oci/oci.py +108 -0
  73. llama_stack/distributions/open-benchmark/{run.yaml → config.yaml} +5 -4
  74. llama_stack/distributions/open-benchmark/open_benchmark.py +2 -3
  75. llama_stack/distributions/postgres-demo/{run.yaml → config.yaml} +4 -3
  76. llama_stack/distributions/starter/{run.yaml → config.yaml} +64 -13
  77. llama_stack/distributions/starter/run-with-postgres-store.yaml +64 -13
  78. llama_stack/distributions/starter/starter.py +8 -5
  79. llama_stack/distributions/starter-gpu/{run.yaml → config.yaml} +64 -13
  80. llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +64 -13
  81. llama_stack/distributions/template.py +13 -69
  82. llama_stack/distributions/watsonx/{run.yaml → config.yaml} +4 -3
  83. llama_stack/distributions/watsonx/watsonx.py +1 -1
  84. llama_stack/log.py +28 -11
  85. llama_stack/models/llama/checkpoint.py +6 -6
  86. llama_stack/models/llama/hadamard_utils.py +2 -0
  87. llama_stack/models/llama/llama3/generation.py +3 -1
  88. llama_stack/models/llama/llama3/interface.py +2 -5
  89. llama_stack/models/llama/llama3/multimodal/encoder_utils.py +3 -3
  90. llama_stack/models/llama/llama3/multimodal/image_transform.py +6 -6
  91. llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +1 -1
  92. llama_stack/models/llama/llama3/tool_utils.py +2 -1
  93. llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +1 -1
  94. llama_stack/providers/inline/agents/meta_reference/__init__.py +3 -3
  95. llama_stack/providers/inline/agents/meta_reference/agents.py +44 -261
  96. llama_stack/providers/inline/agents/meta_reference/config.py +6 -1
  97. llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +207 -57
  98. llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +308 -47
  99. llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +162 -96
  100. llama_stack/providers/inline/agents/meta_reference/responses/types.py +23 -8
  101. llama_stack/providers/inline/agents/meta_reference/responses/utils.py +201 -33
  102. llama_stack/providers/inline/agents/meta_reference/safety.py +8 -13
  103. llama_stack/providers/inline/batches/reference/__init__.py +2 -4
  104. llama_stack/providers/inline/batches/reference/batches.py +78 -60
  105. llama_stack/providers/inline/datasetio/localfs/datasetio.py +2 -5
  106. llama_stack/providers/inline/eval/meta_reference/eval.py +16 -61
  107. llama_stack/providers/inline/files/localfs/files.py +37 -28
  108. llama_stack/providers/inline/inference/meta_reference/config.py +2 -2
  109. llama_stack/providers/inline/inference/meta_reference/generators.py +50 -60
  110. llama_stack/providers/inline/inference/meta_reference/inference.py +403 -19
  111. llama_stack/providers/inline/inference/meta_reference/model_parallel.py +7 -26
  112. llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +2 -12
  113. llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +10 -15
  114. llama_stack/providers/inline/post_training/common/validator.py +1 -5
  115. llama_stack/providers/inline/post_training/huggingface/post_training.py +8 -8
  116. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +18 -10
  117. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +12 -9
  118. llama_stack/providers/inline/post_training/huggingface/utils.py +27 -6
  119. llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +1 -1
  120. llama_stack/providers/inline/post_training/torchtune/common/utils.py +1 -1
  121. llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +1 -1
  122. llama_stack/providers/inline/post_training/torchtune/post_training.py +8 -8
  123. llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +16 -16
  124. llama_stack/providers/inline/safety/code_scanner/code_scanner.py +13 -9
  125. llama_stack/providers/inline/safety/llama_guard/llama_guard.py +18 -15
  126. llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +9 -9
  127. llama_stack/providers/inline/scoring/basic/scoring.py +6 -13
  128. llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +1 -2
  129. llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +1 -2
  130. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +2 -2
  131. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +2 -2
  132. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +2 -2
  133. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +2 -2
  134. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +2 -2
  135. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +2 -2
  136. llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +1 -2
  137. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +1 -2
  138. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +1 -2
  139. llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +1 -2
  140. llama_stack/providers/inline/scoring/braintrust/braintrust.py +12 -15
  141. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +2 -2
  142. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +2 -2
  143. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +2 -2
  144. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +2 -2
  145. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +2 -2
  146. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +2 -2
  147. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +2 -2
  148. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +2 -2
  149. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +2 -2
  150. llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +7 -14
  151. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +2 -2
  152. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +1 -2
  153. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +1 -3
  154. llama_stack/providers/inline/tool_runtime/rag/__init__.py +1 -1
  155. llama_stack/providers/inline/tool_runtime/rag/config.py +8 -1
  156. llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +7 -6
  157. llama_stack/providers/inline/tool_runtime/rag/memory.py +64 -48
  158. llama_stack/providers/inline/vector_io/chroma/__init__.py +1 -1
  159. llama_stack/providers/inline/vector_io/chroma/config.py +1 -1
  160. llama_stack/providers/inline/vector_io/faiss/__init__.py +1 -1
  161. llama_stack/providers/inline/vector_io/faiss/config.py +1 -1
  162. llama_stack/providers/inline/vector_io/faiss/faiss.py +46 -28
  163. llama_stack/providers/inline/vector_io/milvus/__init__.py +1 -1
  164. llama_stack/providers/inline/vector_io/milvus/config.py +1 -1
  165. llama_stack/providers/inline/vector_io/qdrant/__init__.py +1 -1
  166. llama_stack/providers/inline/vector_io/qdrant/config.py +1 -1
  167. llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +1 -1
  168. llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +44 -33
  169. llama_stack/providers/registry/agents.py +8 -3
  170. llama_stack/providers/registry/batches.py +1 -1
  171. llama_stack/providers/registry/datasetio.py +1 -1
  172. llama_stack/providers/registry/eval.py +1 -1
  173. llama_stack/{apis/datasets/__init__.py → providers/registry/file_processors.py} +5 -1
  174. llama_stack/providers/registry/files.py +11 -2
  175. llama_stack/providers/registry/inference.py +22 -3
  176. llama_stack/providers/registry/post_training.py +1 -1
  177. llama_stack/providers/registry/safety.py +1 -1
  178. llama_stack/providers/registry/scoring.py +1 -1
  179. llama_stack/providers/registry/tool_runtime.py +2 -2
  180. llama_stack/providers/registry/vector_io.py +7 -7
  181. llama_stack/providers/remote/datasetio/huggingface/huggingface.py +2 -5
  182. llama_stack/providers/remote/datasetio/nvidia/datasetio.py +1 -4
  183. llama_stack/providers/remote/eval/nvidia/eval.py +15 -9
  184. llama_stack/providers/remote/files/openai/__init__.py +19 -0
  185. llama_stack/providers/remote/files/openai/config.py +28 -0
  186. llama_stack/providers/remote/files/openai/files.py +253 -0
  187. llama_stack/providers/remote/files/s3/files.py +52 -30
  188. llama_stack/providers/remote/inference/anthropic/anthropic.py +2 -1
  189. llama_stack/providers/remote/inference/anthropic/config.py +1 -1
  190. llama_stack/providers/remote/inference/azure/azure.py +1 -3
  191. llama_stack/providers/remote/inference/azure/config.py +8 -7
  192. llama_stack/providers/remote/inference/bedrock/__init__.py +1 -1
  193. llama_stack/providers/remote/inference/bedrock/bedrock.py +82 -105
  194. llama_stack/providers/remote/inference/bedrock/config.py +24 -3
  195. llama_stack/providers/remote/inference/cerebras/cerebras.py +5 -5
  196. llama_stack/providers/remote/inference/cerebras/config.py +12 -5
  197. llama_stack/providers/remote/inference/databricks/config.py +13 -6
  198. llama_stack/providers/remote/inference/databricks/databricks.py +16 -6
  199. llama_stack/providers/remote/inference/fireworks/config.py +5 -5
  200. llama_stack/providers/remote/inference/fireworks/fireworks.py +1 -1
  201. llama_stack/providers/remote/inference/gemini/config.py +1 -1
  202. llama_stack/providers/remote/inference/gemini/gemini.py +13 -14
  203. llama_stack/providers/remote/inference/groq/config.py +5 -5
  204. llama_stack/providers/remote/inference/groq/groq.py +1 -1
  205. llama_stack/providers/remote/inference/llama_openai_compat/config.py +5 -5
  206. llama_stack/providers/remote/inference/llama_openai_compat/llama.py +8 -6
  207. llama_stack/providers/remote/inference/nvidia/__init__.py +1 -1
  208. llama_stack/providers/remote/inference/nvidia/config.py +21 -11
  209. llama_stack/providers/remote/inference/nvidia/nvidia.py +115 -3
  210. llama_stack/providers/remote/inference/nvidia/utils.py +1 -1
  211. llama_stack/providers/remote/inference/oci/__init__.py +17 -0
  212. llama_stack/providers/remote/inference/oci/auth.py +79 -0
  213. llama_stack/providers/remote/inference/oci/config.py +75 -0
  214. llama_stack/providers/remote/inference/oci/oci.py +162 -0
  215. llama_stack/providers/remote/inference/ollama/config.py +7 -5
  216. llama_stack/providers/remote/inference/ollama/ollama.py +17 -8
  217. llama_stack/providers/remote/inference/openai/config.py +4 -4
  218. llama_stack/providers/remote/inference/openai/openai.py +1 -1
  219. llama_stack/providers/remote/inference/passthrough/__init__.py +2 -2
  220. llama_stack/providers/remote/inference/passthrough/config.py +5 -10
  221. llama_stack/providers/remote/inference/passthrough/passthrough.py +97 -75
  222. llama_stack/providers/remote/inference/runpod/config.py +12 -5
  223. llama_stack/providers/remote/inference/runpod/runpod.py +2 -20
  224. llama_stack/providers/remote/inference/sambanova/config.py +5 -5
  225. llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -1
  226. llama_stack/providers/remote/inference/tgi/config.py +7 -6
  227. llama_stack/providers/remote/inference/tgi/tgi.py +19 -11
  228. llama_stack/providers/remote/inference/together/config.py +5 -5
  229. llama_stack/providers/remote/inference/together/together.py +15 -12
  230. llama_stack/providers/remote/inference/vertexai/config.py +1 -1
  231. llama_stack/providers/remote/inference/vllm/config.py +5 -5
  232. llama_stack/providers/remote/inference/vllm/vllm.py +13 -14
  233. llama_stack/providers/remote/inference/watsonx/config.py +4 -4
  234. llama_stack/providers/remote/inference/watsonx/watsonx.py +21 -94
  235. llama_stack/providers/remote/post_training/nvidia/post_training.py +4 -4
  236. llama_stack/providers/remote/post_training/nvidia/utils.py +1 -1
  237. llama_stack/providers/remote/safety/bedrock/bedrock.py +6 -6
  238. llama_stack/providers/remote/safety/bedrock/config.py +1 -1
  239. llama_stack/providers/remote/safety/nvidia/config.py +1 -1
  240. llama_stack/providers/remote/safety/nvidia/nvidia.py +11 -5
  241. llama_stack/providers/remote/safety/sambanova/config.py +1 -1
  242. llama_stack/providers/remote/safety/sambanova/sambanova.py +6 -6
  243. llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +11 -6
  244. llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +12 -7
  245. llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +8 -2
  246. llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +57 -15
  247. llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +11 -6
  248. llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +11 -6
  249. llama_stack/providers/remote/vector_io/chroma/__init__.py +1 -1
  250. llama_stack/providers/remote/vector_io/chroma/chroma.py +131 -23
  251. llama_stack/providers/remote/vector_io/chroma/config.py +1 -1
  252. llama_stack/providers/remote/vector_io/milvus/__init__.py +1 -1
  253. llama_stack/providers/remote/vector_io/milvus/config.py +1 -1
  254. llama_stack/providers/remote/vector_io/milvus/milvus.py +37 -28
  255. llama_stack/providers/remote/vector_io/pgvector/__init__.py +1 -1
  256. llama_stack/providers/remote/vector_io/pgvector/config.py +1 -1
  257. llama_stack/providers/remote/vector_io/pgvector/pgvector.py +37 -25
  258. llama_stack/providers/remote/vector_io/qdrant/__init__.py +1 -1
  259. llama_stack/providers/remote/vector_io/qdrant/config.py +1 -1
  260. llama_stack/providers/remote/vector_io/qdrant/qdrant.py +147 -30
  261. llama_stack/providers/remote/vector_io/weaviate/__init__.py +1 -1
  262. llama_stack/providers/remote/vector_io/weaviate/config.py +1 -1
  263. llama_stack/providers/remote/vector_io/weaviate/weaviate.py +31 -26
  264. llama_stack/providers/utils/common/data_schema_validator.py +1 -5
  265. llama_stack/providers/utils/files/form_data.py +1 -1
  266. llama_stack/providers/utils/inference/embedding_mixin.py +1 -1
  267. llama_stack/providers/utils/inference/inference_store.py +7 -8
  268. llama_stack/providers/utils/inference/litellm_openai_mixin.py +79 -79
  269. llama_stack/providers/utils/inference/model_registry.py +1 -3
  270. llama_stack/providers/utils/inference/openai_compat.py +44 -1171
  271. llama_stack/providers/utils/inference/openai_mixin.py +68 -42
  272. llama_stack/providers/utils/inference/prompt_adapter.py +50 -265
  273. llama_stack/providers/utils/inference/stream_utils.py +23 -0
  274. llama_stack/providers/utils/memory/__init__.py +2 -0
  275. llama_stack/providers/utils/memory/file_utils.py +1 -1
  276. llama_stack/providers/utils/memory/openai_vector_store_mixin.py +181 -84
  277. llama_stack/providers/utils/memory/vector_store.py +39 -38
  278. llama_stack/providers/utils/pagination.py +1 -1
  279. llama_stack/providers/utils/responses/responses_store.py +15 -25
  280. llama_stack/providers/utils/scoring/aggregation_utils.py +1 -2
  281. llama_stack/providers/utils/scoring/base_scoring_fn.py +1 -2
  282. llama_stack/providers/utils/tools/mcp.py +93 -11
  283. llama_stack/providers/utils/vector_io/__init__.py +16 -0
  284. llama_stack/providers/utils/vector_io/vector_utils.py +36 -0
  285. llama_stack/telemetry/constants.py +27 -0
  286. llama_stack/telemetry/helpers.py +43 -0
  287. llama_stack/testing/api_recorder.py +25 -16
  288. {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/METADATA +57 -55
  289. llama_stack-0.4.1.dist-info/RECORD +588 -0
  290. llama_stack-0.4.1.dist-info/top_level.txt +2 -0
  291. llama_stack_api/__init__.py +945 -0
  292. llama_stack_api/admin/__init__.py +45 -0
  293. llama_stack_api/admin/api.py +72 -0
  294. llama_stack_api/admin/fastapi_routes.py +117 -0
  295. llama_stack_api/admin/models.py +113 -0
  296. llama_stack_api/agents.py +173 -0
  297. llama_stack_api/batches/__init__.py +40 -0
  298. llama_stack_api/batches/api.py +53 -0
  299. llama_stack_api/batches/fastapi_routes.py +113 -0
  300. llama_stack_api/batches/models.py +78 -0
  301. llama_stack_api/benchmarks/__init__.py +43 -0
  302. llama_stack_api/benchmarks/api.py +39 -0
  303. llama_stack_api/benchmarks/fastapi_routes.py +109 -0
  304. llama_stack_api/benchmarks/models.py +109 -0
  305. {llama_stack/apis → llama_stack_api}/common/content_types.py +1 -43
  306. {llama_stack/apis → llama_stack_api}/common/errors.py +0 -8
  307. {llama_stack/apis → llama_stack_api}/common/job_types.py +1 -1
  308. llama_stack_api/common/responses.py +77 -0
  309. {llama_stack/apis → llama_stack_api}/common/training_types.py +1 -1
  310. {llama_stack/apis → llama_stack_api}/common/type_system.py +2 -14
  311. llama_stack_api/connectors.py +146 -0
  312. {llama_stack/apis/conversations → llama_stack_api}/conversations.py +23 -39
  313. {llama_stack/apis/datasetio → llama_stack_api}/datasetio.py +4 -8
  314. llama_stack_api/datasets/__init__.py +61 -0
  315. llama_stack_api/datasets/api.py +35 -0
  316. llama_stack_api/datasets/fastapi_routes.py +104 -0
  317. llama_stack_api/datasets/models.py +152 -0
  318. {llama_stack/providers → llama_stack_api}/datatypes.py +166 -10
  319. {llama_stack/apis/eval → llama_stack_api}/eval.py +8 -40
  320. llama_stack_api/file_processors/__init__.py +27 -0
  321. llama_stack_api/file_processors/api.py +64 -0
  322. llama_stack_api/file_processors/fastapi_routes.py +78 -0
  323. llama_stack_api/file_processors/models.py +42 -0
  324. llama_stack_api/files/__init__.py +35 -0
  325. llama_stack_api/files/api.py +51 -0
  326. llama_stack_api/files/fastapi_routes.py +124 -0
  327. llama_stack_api/files/models.py +107 -0
  328. {llama_stack/apis/inference → llama_stack_api}/inference.py +90 -194
  329. llama_stack_api/inspect_api/__init__.py +37 -0
  330. llama_stack_api/inspect_api/api.py +25 -0
  331. llama_stack_api/inspect_api/fastapi_routes.py +76 -0
  332. llama_stack_api/inspect_api/models.py +28 -0
  333. {llama_stack/apis/agents → llama_stack_api/internal}/__init__.py +3 -1
  334. llama_stack/providers/utils/kvstore/api.py → llama_stack_api/internal/kvstore.py +5 -0
  335. llama_stack_api/internal/sqlstore.py +79 -0
  336. {llama_stack/apis/models → llama_stack_api}/models.py +11 -9
  337. {llama_stack/apis/agents → llama_stack_api}/openai_responses.py +184 -27
  338. {llama_stack/apis/post_training → llama_stack_api}/post_training.py +7 -11
  339. {llama_stack/apis/prompts → llama_stack_api}/prompts.py +3 -4
  340. llama_stack_api/providers/__init__.py +33 -0
  341. llama_stack_api/providers/api.py +16 -0
  342. llama_stack_api/providers/fastapi_routes.py +57 -0
  343. llama_stack_api/providers/models.py +24 -0
  344. {llama_stack/apis/tools → llama_stack_api}/rag_tool.py +2 -52
  345. {llama_stack/apis → llama_stack_api}/resource.py +1 -1
  346. llama_stack_api/router_utils.py +160 -0
  347. {llama_stack/apis/safety → llama_stack_api}/safety.py +6 -9
  348. {llama_stack → llama_stack_api}/schema_utils.py +94 -4
  349. {llama_stack/apis/scoring → llama_stack_api}/scoring.py +3 -3
  350. {llama_stack/apis/scoring_functions → llama_stack_api}/scoring_functions.py +9 -6
  351. {llama_stack/apis/shields → llama_stack_api}/shields.py +6 -7
  352. {llama_stack/apis/tools → llama_stack_api}/tools.py +26 -21
  353. {llama_stack/apis/vector_io → llama_stack_api}/vector_io.py +133 -152
  354. {llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py +1 -1
  355. llama_stack/apis/agents/agents.py +0 -894
  356. llama_stack/apis/batches/__init__.py +0 -9
  357. llama_stack/apis/batches/batches.py +0 -100
  358. llama_stack/apis/benchmarks/__init__.py +0 -7
  359. llama_stack/apis/benchmarks/benchmarks.py +0 -108
  360. llama_stack/apis/common/responses.py +0 -36
  361. llama_stack/apis/conversations/__init__.py +0 -31
  362. llama_stack/apis/datasets/datasets.py +0 -251
  363. llama_stack/apis/datatypes.py +0 -160
  364. llama_stack/apis/eval/__init__.py +0 -7
  365. llama_stack/apis/files/__init__.py +0 -7
  366. llama_stack/apis/files/files.py +0 -199
  367. llama_stack/apis/inference/__init__.py +0 -7
  368. llama_stack/apis/inference/event_logger.py +0 -43
  369. llama_stack/apis/inspect/__init__.py +0 -7
  370. llama_stack/apis/inspect/inspect.py +0 -94
  371. llama_stack/apis/models/__init__.py +0 -7
  372. llama_stack/apis/post_training/__init__.py +0 -7
  373. llama_stack/apis/prompts/__init__.py +0 -9
  374. llama_stack/apis/providers/__init__.py +0 -7
  375. llama_stack/apis/providers/providers.py +0 -69
  376. llama_stack/apis/safety/__init__.py +0 -7
  377. llama_stack/apis/scoring/__init__.py +0 -7
  378. llama_stack/apis/scoring_functions/__init__.py +0 -7
  379. llama_stack/apis/shields/__init__.py +0 -7
  380. llama_stack/apis/synthetic_data_generation/__init__.py +0 -7
  381. llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +0 -77
  382. llama_stack/apis/telemetry/__init__.py +0 -7
  383. llama_stack/apis/telemetry/telemetry.py +0 -423
  384. llama_stack/apis/tools/__init__.py +0 -8
  385. llama_stack/apis/vector_io/__init__.py +0 -7
  386. llama_stack/apis/vector_stores/__init__.py +0 -7
  387. llama_stack/core/server/tracing.py +0 -80
  388. llama_stack/core/ui/app.py +0 -55
  389. llama_stack/core/ui/modules/__init__.py +0 -5
  390. llama_stack/core/ui/modules/api.py +0 -32
  391. llama_stack/core/ui/modules/utils.py +0 -42
  392. llama_stack/core/ui/page/__init__.py +0 -5
  393. llama_stack/core/ui/page/distribution/__init__.py +0 -5
  394. llama_stack/core/ui/page/distribution/datasets.py +0 -18
  395. llama_stack/core/ui/page/distribution/eval_tasks.py +0 -20
  396. llama_stack/core/ui/page/distribution/models.py +0 -18
  397. llama_stack/core/ui/page/distribution/providers.py +0 -27
  398. llama_stack/core/ui/page/distribution/resources.py +0 -48
  399. llama_stack/core/ui/page/distribution/scoring_functions.py +0 -18
  400. llama_stack/core/ui/page/distribution/shields.py +0 -19
  401. llama_stack/core/ui/page/evaluations/__init__.py +0 -5
  402. llama_stack/core/ui/page/evaluations/app_eval.py +0 -143
  403. llama_stack/core/ui/page/evaluations/native_eval.py +0 -253
  404. llama_stack/core/ui/page/playground/__init__.py +0 -5
  405. llama_stack/core/ui/page/playground/chat.py +0 -130
  406. llama_stack/core/ui/page/playground/tools.py +0 -352
  407. llama_stack/distributions/dell/build.yaml +0 -33
  408. llama_stack/distributions/meta-reference-gpu/build.yaml +0 -32
  409. llama_stack/distributions/nvidia/build.yaml +0 -29
  410. llama_stack/distributions/open-benchmark/build.yaml +0 -36
  411. llama_stack/distributions/postgres-demo/__init__.py +0 -7
  412. llama_stack/distributions/postgres-demo/build.yaml +0 -23
  413. llama_stack/distributions/postgres-demo/postgres_demo.py +0 -125
  414. llama_stack/distributions/starter/build.yaml +0 -61
  415. llama_stack/distributions/starter-gpu/build.yaml +0 -61
  416. llama_stack/distributions/watsonx/build.yaml +0 -33
  417. llama_stack/providers/inline/agents/meta_reference/agent_instance.py +0 -1024
  418. llama_stack/providers/inline/agents/meta_reference/persistence.py +0 -228
  419. llama_stack/providers/inline/telemetry/__init__.py +0 -5
  420. llama_stack/providers/inline/telemetry/meta_reference/__init__.py +0 -21
  421. llama_stack/providers/inline/telemetry/meta_reference/config.py +0 -47
  422. llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +0 -252
  423. llama_stack/providers/remote/inference/bedrock/models.py +0 -29
  424. llama_stack/providers/utils/kvstore/sqlite/config.py +0 -20
  425. llama_stack/providers/utils/sqlstore/__init__.py +0 -5
  426. llama_stack/providers/utils/sqlstore/api.py +0 -128
  427. llama_stack/providers/utils/telemetry/__init__.py +0 -5
  428. llama_stack/providers/utils/telemetry/trace_protocol.py +0 -142
  429. llama_stack/providers/utils/telemetry/tracing.py +0 -384
  430. llama_stack/strong_typing/__init__.py +0 -19
  431. llama_stack/strong_typing/auxiliary.py +0 -228
  432. llama_stack/strong_typing/classdef.py +0 -440
  433. llama_stack/strong_typing/core.py +0 -46
  434. llama_stack/strong_typing/deserializer.py +0 -877
  435. llama_stack/strong_typing/docstring.py +0 -409
  436. llama_stack/strong_typing/exception.py +0 -23
  437. llama_stack/strong_typing/inspection.py +0 -1085
  438. llama_stack/strong_typing/mapping.py +0 -40
  439. llama_stack/strong_typing/name.py +0 -182
  440. llama_stack/strong_typing/schema.py +0 -792
  441. llama_stack/strong_typing/serialization.py +0 -97
  442. llama_stack/strong_typing/serializer.py +0 -500
  443. llama_stack/strong_typing/slots.py +0 -27
  444. llama_stack/strong_typing/topological.py +0 -89
  445. llama_stack/ui/node_modules/flatted/python/flatted.py +0 -149
  446. llama_stack-0.3.5.dist-info/RECORD +0 -625
  447. llama_stack-0.3.5.dist-info/top_level.txt +0 -1
  448. /llama_stack/{providers/utils → core/storage}/kvstore/config.py +0 -0
  449. /llama_stack/{providers/utils → core/storage}/kvstore/mongodb/__init__.py +0 -0
  450. /llama_stack/{providers/utils → core/storage}/kvstore/postgres/__init__.py +0 -0
  451. /llama_stack/{providers/utils → core/storage}/kvstore/redis/__init__.py +0 -0
  452. /llama_stack/{providers/utils → core/storage}/kvstore/sqlite/__init__.py +0 -0
  453. /llama_stack/{apis → providers/inline/file_processor}/__init__.py +0 -0
  454. /llama_stack/{apis/common → telemetry}/__init__.py +0 -0
  455. {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/WHEEL +0 -0
  456. {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/entry_points.txt +0 -0
  457. {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/licenses/LICENSE +0 -0
  458. {llama_stack/core/ui → llama_stack_api/common}/__init__.py +0 -0
  459. {llama_stack/strong_typing → llama_stack_api}/py.typed +0 -0
  460. {llama_stack/apis → llama_stack_api}/version.py +0 -0
@@ -4,21 +4,177 @@
4
4
  # This source code is licensed under the terms described in the LICENSE file in
5
5
  # the root directory of this source tree.
6
6
 
7
- from enum import StrEnum
7
+ from enum import Enum, EnumMeta, StrEnum
8
8
  from typing import Any, Protocol
9
9
  from urllib.parse import urlparse
10
10
 
11
11
  from pydantic import BaseModel, Field
12
12
 
13
- from llama_stack.apis.benchmarks import Benchmark
14
- from llama_stack.apis.datasets import Dataset
15
- from llama_stack.apis.datatypes import Api
16
- from llama_stack.apis.models import Model
17
- from llama_stack.apis.scoring_functions import ScoringFn
18
- from llama_stack.apis.shields import Shield
19
- from llama_stack.apis.tools import ToolGroup
20
- from llama_stack.apis.vector_stores import VectorStore
21
- from llama_stack.schema_utils import json_schema_type
13
+ from llama_stack_api.benchmarks import Benchmark
14
+ from llama_stack_api.datasets import Dataset
15
+ from llama_stack_api.models import Model
16
+ from llama_stack_api.schema_utils import json_schema_type
17
+ from llama_stack_api.scoring_functions import ScoringFn
18
+ from llama_stack_api.shields import Shield
19
+ from llama_stack_api.tools import ToolGroup
20
+ from llama_stack_api.vector_stores import VectorStore
21
+
22
+
23
+ class DynamicApiMeta(EnumMeta):
24
+ def __new__(cls, name, bases, namespace):
25
+ # Store the original enum values
26
+ original_values = {k: v for k, v in namespace.items() if not k.startswith("_")}
27
+
28
+ # Create the enum class
29
+ cls = super().__new__(cls, name, bases, namespace)
30
+
31
+ # Store the original values for reference
32
+ cls._original_values = original_values
33
+ # Initialize _dynamic_values
34
+ cls._dynamic_values = {}
35
+
36
+ return cls
37
+
38
+ def __call__(cls, value):
39
+ try:
40
+ return super().__call__(value)
41
+ except ValueError as e:
42
+ # If this value was already dynamically added, return it
43
+ if value in cls._dynamic_values:
44
+ return cls._dynamic_values[value]
45
+
46
+ # If the value doesn't exist, create a new enum member
47
+ # Create a new member name from the value
48
+ member_name = value.lower().replace("-", "_")
49
+
50
+ # If this member name already exists in the enum, return the existing member
51
+ if member_name in cls._member_map_:
52
+ return cls._member_map_[member_name]
53
+
54
+ # Instead of creating a new member, raise ValueError to force users to use Api.add() to
55
+ # register new APIs explicitly
56
+ raise ValueError(f"API '{value}' does not exist. Use Api.add() to register new APIs.") from e
57
+
58
+ def __iter__(cls):
59
+ # Allow iteration over both static and dynamic members
60
+ yield from super().__iter__()
61
+ if hasattr(cls, "_dynamic_values"):
62
+ yield from cls._dynamic_values.values()
63
+
64
+ def add(cls, value):
65
+ """
66
+ Add a new API to the enum.
67
+ Used to register external APIs.
68
+ """
69
+ member_name = value.lower().replace("-", "_")
70
+
71
+ # If this member name already exists in the enum, return it
72
+ if member_name in cls._member_map_:
73
+ return cls._member_map_[member_name]
74
+
75
+ # Create a new enum member
76
+ member = object.__new__(cls)
77
+ member._name_ = member_name
78
+ member._value_ = value
79
+
80
+ # Add it to the enum class
81
+ cls._member_map_[member_name] = member
82
+ cls._member_names_.append(member_name)
83
+ cls._member_type_ = str
84
+
85
+ # Store it in our dynamic values
86
+ cls._dynamic_values[value] = member
87
+
88
+ return member
89
+
90
+
91
+ @json_schema_type
92
+ class Api(Enum, metaclass=DynamicApiMeta):
93
+ """Enumeration of all available APIs in the Llama Stack system.
94
+ :cvar providers: Provider management and configuration
95
+ :cvar inference: Text generation, chat completions, and embeddings
96
+ :cvar safety: Content moderation and safety shields
97
+ :cvar agents: Agent orchestration and execution
98
+ :cvar batches: Batch processing for asynchronous API requests
99
+ :cvar vector_io: Vector database operations and queries
100
+ :cvar datasetio: Dataset input/output operations
101
+ :cvar scoring: Model output evaluation and scoring
102
+ :cvar eval: Model evaluation and benchmarking framework
103
+ :cvar post_training: Fine-tuning and model training
104
+ :cvar tool_runtime: Tool execution and management
105
+ :cvar telemetry: Observability and system monitoring
106
+ :cvar models: Model metadata and management
107
+ :cvar shields: Safety shield implementations
108
+ :cvar datasets: Dataset creation and management
109
+ :cvar scoring_functions: Scoring function definitions
110
+ :cvar benchmarks: Benchmark suite management
111
+ :cvar tool_groups: Tool group organization
112
+ :cvar files: File storage and management
113
+ :cvar file_processors: File parsing and processing operations
114
+ :cvar prompts: Prompt versions and management
115
+ :cvar connectors: External connector management (e.g., MCP servers)
116
+ :cvar inspect: Built-in system inspection and introspection
117
+ """
118
+
119
+ providers = "providers"
120
+ inference = "inference"
121
+ safety = "safety"
122
+ agents = "agents"
123
+ batches = "batches"
124
+ vector_io = "vector_io"
125
+ datasetio = "datasetio"
126
+ scoring = "scoring"
127
+ eval = "eval"
128
+ post_training = "post_training"
129
+ tool_runtime = "tool_runtime"
130
+
131
+ models = "models"
132
+ shields = "shields"
133
+ vector_stores = "vector_stores" # only used for routing table
134
+ datasets = "datasets"
135
+ scoring_functions = "scoring_functions"
136
+ benchmarks = "benchmarks"
137
+ tool_groups = "tool_groups"
138
+ files = "files"
139
+ file_processors = "file_processors"
140
+ prompts = "prompts"
141
+ conversations = "conversations"
142
+ connectors = "connectors"
143
+
144
+ # built-in API
145
+ inspect = "inspect"
146
+ admin = "admin"
147
+
148
+
149
+ @json_schema_type
150
+ class Error(BaseModel):
151
+ """
152
+ Error response from the API. Roughly follows RFC 7807.
153
+
154
+ :param status: HTTP status code
155
+ :param title: Error title, a short summary of the error which is invariant for an error type
156
+ :param detail: Error detail, a longer human-readable description of the error
157
+ :param instance: (Optional) A URL which can be used to retrieve more information about the specific occurrence of the error
158
+ """
159
+
160
+ status: int
161
+ title: str
162
+ detail: str
163
+ instance: str | None = None
164
+
165
+
166
+ class ExternalApiSpec(BaseModel):
167
+ """Specification for an external API implementation."""
168
+
169
+ module: str = Field(..., description="Python module containing the API implementation")
170
+ name: str = Field(..., description="Name of the API")
171
+ pip_packages: list[str] = Field(default=[], description="List of pip packages to install the API")
172
+ protocol: str = Field(..., description="Name of the protocol class for the API")
173
+
174
+
175
+ # Provider-related types (merged from providers/datatypes.py)
176
+ # NOTE: These imports are forward references to avoid circular dependencies
177
+ # They will be resolved at runtime when the classes are used
22
178
 
23
179
 
24
180
  class ModelsProtocolPrivate(Protocol):
@@ -4,17 +4,16 @@
4
4
  # This source code is licensed under the terms described in the LICENSE file in
5
5
  # the root directory of this source tree.
6
6
 
7
- from typing import Annotated, Any, Literal, Protocol
7
+ from typing import Any, Literal, Protocol
8
8
 
9
9
  from pydantic import BaseModel, Field
10
10
 
11
- from llama_stack.apis.agents import AgentConfig
12
- from llama_stack.apis.common.job_types import Job
13
- from llama_stack.apis.inference import SamplingParams, SystemMessage
14
- from llama_stack.apis.scoring import ScoringResult
15
- from llama_stack.apis.scoring_functions import ScoringFnParams
16
- from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
17
- from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
11
+ from llama_stack_api.common.job_types import Job
12
+ from llama_stack_api.inference import SamplingParams, SystemMessage
13
+ from llama_stack_api.schema_utils import json_schema_type, webmethod
14
+ from llama_stack_api.scoring import ScoringResult
15
+ from llama_stack_api.scoring_functions import ScoringFnParams
16
+ from llama_stack_api.version import LLAMA_STACK_API_V1ALPHA
18
17
 
19
18
 
20
19
  @json_schema_type
@@ -32,19 +31,7 @@ class ModelCandidate(BaseModel):
32
31
  system_message: SystemMessage | None = None
33
32
 
34
33
 
35
- @json_schema_type
36
- class AgentCandidate(BaseModel):
37
- """An agent candidate for evaluation.
38
-
39
- :param config: The configuration for the agent candidate.
40
- """
41
-
42
- type: Literal["agent"] = "agent"
43
- config: AgentConfig
44
-
45
-
46
- EvalCandidate = Annotated[ModelCandidate | AgentCandidate, Field(discriminator="type")]
47
- register_schema(EvalCandidate, name="EvalCandidate")
34
+ EvalCandidate = ModelCandidate
48
35
 
49
36
 
50
37
  @json_schema_type
@@ -86,7 +73,6 @@ class Eval(Protocol):
86
73
 
87
74
  Llama Stack Evaluation API for running evaluations on model and agent candidates."""
88
75
 
89
- @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
90
76
  @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST", level=LLAMA_STACK_API_V1ALPHA)
91
77
  async def run_eval(
92
78
  self,
@@ -101,9 +87,6 @@ class Eval(Protocol):
101
87
  """
102
88
  ...
103
89
 
104
- @webmethod(
105
- route="/eval/benchmarks/{benchmark_id}/evaluations", method="POST", level=LLAMA_STACK_API_V1, deprecated=True
106
- )
107
90
  @webmethod(route="/eval/benchmarks/{benchmark_id}/evaluations", method="POST", level=LLAMA_STACK_API_V1ALPHA)
108
91
  async def evaluate_rows(
109
92
  self,
@@ -122,9 +105,6 @@ class Eval(Protocol):
122
105
  """
123
106
  ...
124
107
 
125
- @webmethod(
126
- route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True
127
- )
128
108
  @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="GET", level=LLAMA_STACK_API_V1ALPHA)
129
109
  async def job_status(self, benchmark_id: str, job_id: str) -> Job:
130
110
  """Get the status of a job.
@@ -135,12 +115,6 @@ class Eval(Protocol):
135
115
  """
136
116
  ...
137
117
 
138
- @webmethod(
139
- route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}",
140
- method="DELETE",
141
- level=LLAMA_STACK_API_V1,
142
- deprecated=True,
143
- )
144
118
  @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="DELETE", level=LLAMA_STACK_API_V1ALPHA)
145
119
  async def job_cancel(self, benchmark_id: str, job_id: str) -> None:
146
120
  """Cancel a job.
@@ -150,12 +124,6 @@ class Eval(Protocol):
150
124
  """
151
125
  ...
152
126
 
153
- @webmethod(
154
- route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result",
155
- method="GET",
156
- level=LLAMA_STACK_API_V1,
157
- deprecated=True,
158
- )
159
127
  @webmethod(
160
128
  route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result", method="GET", level=LLAMA_STACK_API_V1ALPHA
161
129
  )
@@ -0,0 +1,27 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the terms described in the LICENSE file in
5
+ # the root directory of this source tree.
6
+
7
+ """File Processors API protocol and models.
8
+
9
+ This module contains the File Processors protocol definition.
10
+ Pydantic models are defined in llama_stack_api.file_processors.models.
11
+ The FastAPI router is defined in llama_stack_api.file_processors.fastapi_routes.
12
+ """
13
+
14
+ # Import fastapi_routes for router factory access
15
+ from . import fastapi_routes
16
+
17
+ # Import protocol for re-export
18
+ from .api import FileProcessors
19
+
20
+ # Import models for re-export
21
+ from .models import ProcessFileResponse
22
+
23
+ __all__ = [
24
+ "FileProcessors",
25
+ "ProcessFileResponse",
26
+ "fastapi_routes",
27
+ ]
@@ -0,0 +1,64 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the terms described in the LICENSE file in
5
+ # the root directory of this source tree.
6
+
7
+ from typing import Any, Protocol, runtime_checkable
8
+
9
+ from fastapi import UploadFile
10
+
11
+ from llama_stack_api.vector_io import VectorStoreChunkingStrategy
12
+
13
+ from .models import ProcessFileResponse
14
+
15
+
16
+ @runtime_checkable
17
+ class FileProcessors(Protocol):
18
+ """
19
+ File Processor API for converting files into structured, processable content.
20
+
21
+ This API provides a flexible interface for processing various file formats
22
+ (PDFs, documents, images, etc.) into normalized text content that can be used for
23
+ vector store ingestion, RAG applications, or standalone content extraction.
24
+
25
+ The API focuses on parsing and normalization:
26
+ - Multiple file formats through extensible provider architecture
27
+ - Multipart form uploads or file ID references
28
+ - Configurable processing options per provider
29
+ - Optional chunking using provider's native capabilities
30
+ - Rich metadata about processing results
31
+
32
+ For embedding generation, use the chunks from this API with the separate
33
+ embedding API to maintain clean separation of concerns.
34
+
35
+ Future providers can extend this interface to support additional formats,
36
+ processing capabilities, and optimization strategies.
37
+ """
38
+
39
+ async def process_file(
40
+ self,
41
+ file: UploadFile | None = None,
42
+ file_id: str | None = None,
43
+ options: dict[str, Any] | None = None,
44
+ chunking_strategy: VectorStoreChunkingStrategy | None = None,
45
+ ) -> ProcessFileResponse:
46
+ """
47
+ Process a file into chunks ready for vector database storage.
48
+
49
+ This method supports two modes of operation via multipart form request:
50
+ 1. Direct upload: Upload and process a file directly (file parameter)
51
+ 2. File storage: Process files already uploaded to file storage (file_id parameter)
52
+
53
+ Exactly one of file or file_id must be provided.
54
+
55
+ If no chunking_strategy is provided, the entire file content is returned as a single chunk.
56
+ If chunking_strategy is provided, the file is split according to the strategy.
57
+
58
+ :param file: The uploaded file object containing content and metadata (filename, content_type, etc.). Mutually exclusive with file_id.
59
+ :param file_id: ID of file already uploaded to file storage. Mutually exclusive with file.
60
+ :param options: Provider-specific processing options (e.g., OCR settings, output format).
61
+ :param chunking_strategy: Optional strategy for splitting content into chunks.
62
+ :returns: ProcessFileResponse with chunks ready for vector database storage.
63
+ """
64
+ ...
@@ -0,0 +1,78 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the terms described in the LICENSE file in
5
+ # the root directory of this source tree.
6
+
7
+ """FastAPI router for the File Processors API.
8
+
9
+ This module defines the FastAPI router for the File Processors API using standard
10
+ FastAPI route decorators. The router is defined in the API package to keep
11
+ all API-related code together.
12
+ """
13
+
14
+ from typing import Annotated, Any
15
+
16
+ from fastapi import APIRouter, File, Form, UploadFile
17
+
18
+ from llama_stack_api.router_utils import standard_responses
19
+ from llama_stack_api.vector_io import VectorStoreChunkingStrategy
20
+ from llama_stack_api.version import LLAMA_STACK_API_V1ALPHA
21
+
22
+ from .api import FileProcessors
23
+ from .models import ProcessFileResponse
24
+
25
+
26
+ def create_router(impl: FileProcessors) -> APIRouter:
27
+ """Create a FastAPI router for the File Processors API.
28
+
29
+ Args:
30
+ impl: The FileProcessors implementation instance
31
+
32
+ Returns:
33
+ APIRouter configured for the File Processors API
34
+ """
35
+ router = APIRouter(
36
+ prefix=f"/{LLAMA_STACK_API_V1ALPHA}",
37
+ tags=["File Processors"],
38
+ responses=standard_responses,
39
+ )
40
+
41
+ @router.post(
42
+ "/file-processors/process",
43
+ response_model=ProcessFileResponse,
44
+ summary="Process a file into chunks ready for vector database storage.",
45
+ description="Process a file into chunks ready for vector database storage. Supports direct upload via multipart form or processing files already uploaded to file storage via file_id. Exactly one of file or file_id must be provided.",
46
+ responses={
47
+ 200: {"description": "The processed file chunks."},
48
+ },
49
+ )
50
+ async def process_file(
51
+ file: Annotated[
52
+ UploadFile | None,
53
+ File(description="The File object to be uploaded and processed. Mutually exclusive with file_id."),
54
+ ] = None,
55
+ file_id: Annotated[
56
+ str | None, Form(description="ID of file already uploaded to file storage. Mutually exclusive with file.")
57
+ ] = None,
58
+ options: Annotated[
59
+ dict[str, Any] | None,
60
+ Form(
61
+ description="Optional processing options. Provider-specific parameters (e.g., OCR settings, output format)."
62
+ ),
63
+ ] = None,
64
+ chunking_strategy: Annotated[
65
+ VectorStoreChunkingStrategy | None,
66
+ Form(description="Optional chunking strategy for splitting content into chunks."),
67
+ ] = None,
68
+ ) -> ProcessFileResponse:
69
+ # Pass the parameters directly to the implementation
70
+ # The protocol method signature expects individual parameters for multipart handling
71
+ return await impl.process_file(
72
+ file=file,
73
+ file_id=file_id,
74
+ options=options,
75
+ chunking_strategy=chunking_strategy,
76
+ )
77
+
78
+ return router
@@ -0,0 +1,42 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the terms described in the LICENSE file in
5
+ # the root directory of this source tree.
6
+
7
+ """Pydantic models for File Processors API responses.
8
+
9
+ This module defines the response models for the File Processors API
10
+ using Pydantic with Field descriptions for OpenAPI schema generation.
11
+
12
+ Request models are not needed for this API since it uses multipart form data
13
+ with individual parameters rather than a JSON request body.
14
+ """
15
+
16
+ from typing import Any
17
+
18
+ from pydantic import BaseModel, Field
19
+
20
+ from llama_stack_api.schema_utils import json_schema_type
21
+ from llama_stack_api.vector_io import Chunk
22
+
23
+
24
+ @json_schema_type
25
+ class ProcessFileResponse(BaseModel):
26
+ """Response model for file processing operation.
27
+
28
+ Returns a list of chunks ready for storage in vector databases.
29
+ Each chunk contains the content and metadata.
30
+ """
31
+
32
+ chunks: list[Chunk] = Field(..., description="Processed chunks from the file. Always returns at least one chunk.")
33
+
34
+ metadata: dict[str, Any] = Field(
35
+ ...,
36
+ description="Processing-run metadata such as processor name/version, processing_time_ms, page_count, extraction_method (e.g. docling/pypdf/ocr), confidence scores, plus provider-specific fields.",
37
+ )
38
+
39
+
40
+ __all__ = [
41
+ "ProcessFileResponse",
42
+ ]
@@ -0,0 +1,35 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the terms described in the LICENSE file in
5
+ # the root directory of this source tree.
6
+
7
+ from . import fastapi_routes
8
+ from .api import Files
9
+ from .models import (
10
+ DeleteFileRequest,
11
+ ExpiresAfter,
12
+ ListFilesRequest,
13
+ ListOpenAIFileResponse,
14
+ OpenAIFileDeleteResponse,
15
+ OpenAIFileObject,
16
+ OpenAIFilePurpose,
17
+ RetrieveFileContentRequest,
18
+ RetrieveFileRequest,
19
+ UploadFileRequest,
20
+ )
21
+
22
+ __all__ = [
23
+ "DeleteFileRequest",
24
+ "ExpiresAfter",
25
+ "fastapi_routes",
26
+ "Files",
27
+ "ListFilesRequest",
28
+ "ListOpenAIFileResponse",
29
+ "OpenAIFileDeleteResponse",
30
+ "OpenAIFileObject",
31
+ "OpenAIFilePurpose",
32
+ "RetrieveFileContentRequest",
33
+ "RetrieveFileRequest",
34
+ "UploadFileRequest",
35
+ ]
@@ -0,0 +1,51 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the terms described in the LICENSE file in
5
+ # the root directory of this source tree.
6
+
7
+ from typing import Protocol, runtime_checkable
8
+
9
+ from fastapi import Response, UploadFile
10
+
11
+ from .models import (
12
+ DeleteFileRequest,
13
+ ListFilesRequest,
14
+ ListOpenAIFileResponse,
15
+ OpenAIFileDeleteResponse,
16
+ OpenAIFileObject,
17
+ RetrieveFileContentRequest,
18
+ RetrieveFileRequest,
19
+ UploadFileRequest,
20
+ )
21
+
22
+
23
+ @runtime_checkable
24
+ class Files(Protocol):
25
+ """Files API for managing file uploads and retrieval."""
26
+
27
+ async def openai_upload_file(
28
+ self,
29
+ request: UploadFileRequest,
30
+ file: UploadFile,
31
+ ) -> OpenAIFileObject: ...
32
+
33
+ async def openai_list_files(
34
+ self,
35
+ request: ListFilesRequest,
36
+ ) -> ListOpenAIFileResponse: ...
37
+
38
+ async def openai_retrieve_file(
39
+ self,
40
+ request: RetrieveFileRequest,
41
+ ) -> OpenAIFileObject: ...
42
+
43
+ async def openai_delete_file(
44
+ self,
45
+ request: DeleteFileRequest,
46
+ ) -> OpenAIFileDeleteResponse: ...
47
+
48
+ async def openai_retrieve_file_content(
49
+ self,
50
+ request: RetrieveFileContentRequest,
51
+ ) -> Response: ...