llama-stack-api 0.5.2__tar.gz → 0.6.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/PKG-INFO +1 -1
  2. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/__init__.py +61 -3
  3. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/agents/__init__.py +2 -0
  4. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/agents/fastapi_routes.py +14 -46
  5. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/agents/models.py +53 -1
  6. llama_stack_api-0.6.1/common/errors.py +350 -0
  7. llama_stack_api-0.6.1/common/upload_safety.py +96 -0
  8. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/conversations/fastapi_routes.py +7 -1
  9. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/conversations/models.py +29 -2
  10. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/file_processors/__init__.py +2 -1
  11. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/file_processors/api.py +7 -13
  12. llama_stack_api-0.6.1/file_processors/fastapi_routes.py +122 -0
  13. llama_stack_api-0.6.1/file_processors/models.py +69 -0
  14. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/files/fastapi_routes.py +9 -2
  15. llama_stack_api-0.6.1/filters.py +67 -0
  16. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/inference/__init__.py +4 -0
  17. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/inference/fastapi_routes.py +2 -1
  18. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/inference/models.py +72 -27
  19. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/llama_stack_api.egg-info/PKG-INFO +1 -1
  20. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/llama_stack_api.egg-info/SOURCES.txt +4 -0
  21. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/models/models.py +8 -0
  22. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/openai_responses.py +30 -3
  23. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/post_training/fastapi_routes.py +3 -3
  24. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/pyproject.toml +3 -1
  25. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/router_utils.py +55 -1
  26. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/vector_io/__init__.py +27 -0
  27. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/vector_io/api.py +13 -26
  28. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/vector_io/fastapi_routes.py +18 -83
  29. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/vector_io/models.py +183 -5
  30. llama_stack_api-0.5.2/common/errors.py +0 -110
  31. llama_stack_api-0.5.2/file_processors/fastapi_routes.py +0 -78
  32. llama_stack_api-0.5.2/file_processors/models.py +0 -42
  33. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/README.md +0 -0
  34. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/admin/__init__.py +0 -0
  35. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/admin/api.py +0 -0
  36. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/admin/fastapi_routes.py +0 -0
  37. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/admin/models.py +0 -0
  38. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/agents/api.py +0 -0
  39. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/batches/__init__.py +0 -0
  40. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/batches/api.py +0 -0
  41. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/batches/fastapi_routes.py +0 -0
  42. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/batches/models.py +0 -0
  43. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/benchmarks/__init__.py +0 -0
  44. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/benchmarks/api.py +0 -0
  45. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/benchmarks/fastapi_routes.py +0 -0
  46. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/benchmarks/models.py +0 -0
  47. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/common/__init__.py +0 -0
  48. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/common/content_types.py +0 -0
  49. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/common/job_types.py +0 -0
  50. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/common/responses.py +0 -0
  51. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/common/training_types.py +0 -0
  52. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/common/type_system.py +0 -0
  53. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/connectors/__init__.py +0 -0
  54. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/connectors/api.py +0 -0
  55. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/connectors/fastapi_routes.py +0 -0
  56. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/connectors/models.py +0 -0
  57. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/conversations/__init__.py +0 -0
  58. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/conversations/api.py +0 -0
  59. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/datasetio/__init__.py +0 -0
  60. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/datasetio/api.py +0 -0
  61. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/datasetio/fastapi_routes.py +0 -0
  62. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/datasetio/models.py +0 -0
  63. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/datasets/__init__.py +0 -0
  64. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/datasets/api.py +0 -0
  65. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/datasets/fastapi_routes.py +0 -0
  66. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/datasets/models.py +0 -0
  67. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/datatypes.py +0 -0
  68. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/eval/__init__.py +0 -0
  69. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/eval/api.py +0 -0
  70. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/eval/compat.py +0 -0
  71. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/eval/fastapi_routes.py +0 -0
  72. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/eval/models.py +0 -0
  73. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/files/__init__.py +0 -0
  74. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/files/api.py +0 -0
  75. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/files/models.py +0 -0
  76. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/inference/api.py +0 -0
  77. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/inspect_api/__init__.py +0 -0
  78. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/inspect_api/api.py +0 -0
  79. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/inspect_api/fastapi_routes.py +0 -0
  80. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/inspect_api/models.py +0 -0
  81. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/internal/__init__.py +0 -0
  82. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/internal/kvstore.py +0 -0
  83. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/internal/sqlstore.py +0 -0
  84. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/llama_stack_api.egg-info/dependency_links.txt +0 -0
  85. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/llama_stack_api.egg-info/requires.txt +0 -0
  86. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/llama_stack_api.egg-info/top_level.txt +0 -0
  87. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/models/__init__.py +0 -0
  88. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/models/api.py +0 -0
  89. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/models/fastapi_routes.py +0 -0
  90. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/post_training/__init__.py +0 -0
  91. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/post_training/api.py +0 -0
  92. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/post_training/models.py +0 -0
  93. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/prompts/__init__.py +0 -0
  94. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/prompts/api.py +0 -0
  95. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/prompts/fastapi_routes.py +0 -0
  96. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/prompts/models.py +0 -0
  97. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/providers/__init__.py +0 -0
  98. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/providers/api.py +0 -0
  99. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/providers/fastapi_routes.py +0 -0
  100. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/providers/models.py +0 -0
  101. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/py.typed +0 -0
  102. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/rag_tool.py +0 -0
  103. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/resource.py +0 -0
  104. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/safety/__init__.py +0 -0
  105. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/safety/api.py +0 -0
  106. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/safety/datatypes.py +0 -0
  107. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/safety/fastapi_routes.py +0 -0
  108. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/safety/models.py +0 -0
  109. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/schema_utils.py +0 -0
  110. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/scoring/__init__.py +0 -0
  111. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/scoring/api.py +0 -0
  112. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/scoring/fastapi_routes.py +0 -0
  113. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/scoring/models.py +0 -0
  114. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/scoring_functions/__init__.py +0 -0
  115. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/scoring_functions/api.py +0 -0
  116. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/scoring_functions/fastapi_routes.py +0 -0
  117. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/scoring_functions/models.py +0 -0
  118. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/setup.cfg +0 -0
  119. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/shields/__init__.py +0 -0
  120. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/shields/api.py +0 -0
  121. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/shields/fastapi_routes.py +0 -0
  122. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/shields/models.py +0 -0
  123. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/tools.py +0 -0
  124. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/uv.lock +0 -0
  125. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/validators.py +0 -0
  126. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/vector_stores.py +0 -0
  127. {llama_stack_api-0.5.2 → llama_stack_api-0.6.1}/version.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: llama-stack-api
3
- Version: 0.5.2
3
+ Version: 0.6.1
4
4
  Summary: API and Provider specifications for Llama Stack - lightweight package with protocol definitions and provider specs
5
5
  Author-email: Meta Llama <llama-oss@meta.com>
6
6
  License: MIT
@@ -60,6 +60,7 @@ from .agents import (
60
60
  ResponseGuardrail,
61
61
  ResponseGuardrailSpec,
62
62
  ResponseItemInclude,
63
+ ResponseTruncation,
63
64
  RetrieveResponseRequest,
64
65
  )
65
66
  from .batches import (
@@ -93,14 +94,22 @@ from .common.content_types import (
93
94
  _URLOrData,
94
95
  )
95
96
  from .common.errors import (
97
+ BatchNotFoundError,
96
98
  ConflictError,
97
99
  ConnectorNotFoundError,
98
100
  ConnectorToolNotFoundError,
101
+ ConversationItemNotFoundError,
102
+ ConversationNotFoundError,
99
103
  DatasetNotFoundError,
100
- InvalidConversationIdError,
104
+ InternalServerError,
105
+ InvalidParameterError,
101
106
  ModelNotFoundError,
102
107
  ModelTypeError,
108
+ OpenAIFileObjectNotFoundError,
103
109
  ResourceNotFoundError,
110
+ ResponseInputItemNotFoundError,
111
+ ResponseNotFoundError,
112
+ ServiceNotEnabledError,
104
113
  TokenValidationError,
105
114
  ToolGroupNotFoundError,
106
115
  UnsupportedModelError,
@@ -206,7 +215,8 @@ from .eval import (
206
215
  resolve_job_status_request,
207
216
  resolve_run_eval_request,
208
217
  )
209
- from .file_processors import FileProcessors, ProcessFileResponse
218
+ from .file_processors import FileProcessors, ProcessFileRequest, ProcessFileResponse
219
+ from .filters import COMPARISON_FILTER_TYPES, COMPOUND_FILTER_TYPES, ComparisonFilter, CompoundFilter, Filter
210
220
  from .files import (
211
221
  DeleteFileRequest,
212
222
  ExpiresAfter,
@@ -256,6 +266,7 @@ from .inference import (
256
266
  OpenAIChatCompletionUsage,
257
267
  OpenAIChatCompletionUsageCompletionTokensDetails,
258
268
  OpenAIChatCompletionUsagePromptTokensDetails,
269
+ OpenAIChatCompletionResponseMessage,
259
270
  OpenAIChoice,
260
271
  OpenAIChoiceDelta,
261
272
  OpenAIChoiceLogprobs,
@@ -335,6 +346,7 @@ from .openai_responses import (
335
346
  OpenAIResponseContentPartReasoningText,
336
347
  OpenAIResponseContentPartRefusal,
337
348
  OpenAIResponseError,
349
+ OpenAIResponseIncompleteDetails,
338
350
  OpenAIResponseInput,
339
351
  OpenAIResponseInputFunctionToolCallOutput,
340
352
  OpenAIResponseInputMessageContent,
@@ -535,15 +547,28 @@ from .tools import (
535
547
  from .validators import validate_embeddings_input_is_text
536
548
  from .vector_io import (
537
549
  Chunk,
550
+ ChunkForDeletion,
538
551
  ChunkMetadata,
552
+ DEFAULT_CHUNK_OVERLAP_TOKENS,
553
+ DEFAULT_CHUNK_SIZE_TOKENS,
554
+ DeleteChunksRequest,
539
555
  EmbeddedChunk,
556
+ InsertChunksRequest,
557
+ MAX_PAGINATION_LIMIT,
558
+ OpenAIAttachFileRequest,
540
559
  OpenAICreateVectorStoreFileBatchRequestWithExtraBody,
541
560
  OpenAICreateVectorStoreRequestWithExtraBody,
561
+ OpenAISearchVectorStoreRequest,
562
+ OpenAIUpdateVectorStoreFileRequest,
563
+ OpenAIUpdateVectorStoreRequest,
564
+ QueryChunksRequest,
542
565
  QueryChunksResponse,
543
566
  SearchRankingOptions,
544
567
  VectorIO,
545
568
  VectorStoreChunkingStrategy,
546
569
  VectorStoreChunkingStrategyAuto,
570
+ VectorStoreChunkingStrategyContextual,
571
+ VectorStoreChunkingStrategyContextualConfig,
547
572
  VectorStoreChunkingStrategyStatic,
548
573
  VectorStoreChunkingStrategyStaticConfig,
549
574
  VectorStoreContent,
@@ -599,6 +624,7 @@ __all__ = [
599
624
  "ApprovalFilter",
600
625
  "BasicScoringFnParams",
601
626
  "Batches",
627
+ "BatchNotFoundError",
602
628
  "BatchObject",
603
629
  "CancelBatchRequest",
604
630
  "CancelTrainingJobRequest",
@@ -615,7 +641,11 @@ __all__ = [
615
641
  "ChatCompletionResponseEventType",
616
642
  "Checkpoint",
617
643
  "Chunk",
644
+ "ChunkForDeletion",
618
645
  "ChunkMetadata",
646
+ "DEFAULT_CHUNK_OVERLAP_TOKENS",
647
+ "DEFAULT_CHUNK_SIZE_TOKENS",
648
+ "DeleteChunksRequest",
619
649
  "EmbeddedChunk",
620
650
  "CommonBenchmarkFields",
621
651
  "ConflictError",
@@ -628,6 +658,8 @@ __all__ = [
628
658
  "Connector",
629
659
  "ConnectorNotFoundError",
630
660
  "ConnectorToolNotFoundError",
661
+ "ConversationItemNotFoundError",
662
+ "ConversationNotFoundError",
631
663
  "ConnectorInput",
632
664
  "Connectors",
633
665
  "ConnectorType",
@@ -694,6 +726,11 @@ __all__ = [
694
726
  "ExtraBodyField",
695
727
  "FileProcessors",
696
728
  "Files",
729
+ "Filter",
730
+ "ComparisonFilter",
731
+ "CompoundFilter",
732
+ "COMPARISON_FILTER_TYPES",
733
+ "COMPOUND_FILTER_TYPES",
697
734
  "Fp8QuantizationConfig",
698
735
  "clear_dynamic_schema_types",
699
736
  "get_schema_identifier",
@@ -707,13 +744,15 @@ __all__ = [
707
744
  "Inference",
708
745
  "InferenceProvider",
709
746
  "InlineProviderSpec",
747
+ "InsertChunksRequest",
710
748
  "Inspect",
711
749
  "InspectProviderRequest",
750
+ "InternalServerError",
712
751
  "Admin",
713
752
  "Int4QuantizationConfig",
714
753
  "InterleavedContent",
715
754
  "InterleavedContentItem",
716
- "InvalidConversationIdError",
755
+ "InvalidParameterError",
717
756
  "is_generic_list",
718
757
  "is_type_optional",
719
758
  "is_type_union",
@@ -763,6 +802,7 @@ __all__ = [
763
802
  "ListToolsResponse",
764
803
  "LogProbConfig",
765
804
  "LoraFinetuningConfig",
805
+ "MAX_PAGINATION_LIMIT",
766
806
  "MCPListToolsTool",
767
807
  "Metadata",
768
808
  "Model",
@@ -801,6 +841,7 @@ __all__ = [
801
841
  "OpenAIChatCompletionToolChoiceFunctionTool",
802
842
  "OpenAIChatCompletionToolChoiceCustomTool",
803
843
  "OpenAIChatCompletionToolChoice",
844
+ "OpenAIChatCompletionResponseMessage",
804
845
  "OpenAIChoice",
805
846
  "OpenAIChoiceDelta",
806
847
  "OpenAIChoiceLogprobs",
@@ -822,6 +863,7 @@ __all__ = [
822
863
  "OpenAIFileDeleteResponse",
823
864
  "OpenAIFileFile",
824
865
  "OpenAIFileObject",
866
+ "OpenAIFileObjectNotFoundError",
825
867
  "OpenAIFilePurpose",
826
868
  "OpenAIFinishReason",
827
869
  "OpenAIImageURL",
@@ -830,6 +872,10 @@ __all__ = [
830
872
  "OpenAIMessageParam",
831
873
  "OpenAIModel",
832
874
  "Order",
875
+ "OpenAIAttachFileRequest",
876
+ "OpenAISearchVectorStoreRequest",
877
+ "OpenAIUpdateVectorStoreFileRequest",
878
+ "OpenAIUpdateVectorStoreRequest",
833
879
  "OpenAIResponseAnnotationCitation",
834
880
  "OpenAIResponseAnnotationContainerFileCitation",
835
881
  "OpenAIResponseAnnotationFileCitation",
@@ -841,6 +887,7 @@ __all__ = [
841
887
  "OpenAIResponseContentPartReasoningText",
842
888
  "OpenAIResponseContentPartRefusal",
843
889
  "OpenAIResponseError",
890
+ "OpenAIResponseIncompleteDetails",
844
891
  "OpenAIResponseFormatJSONObject",
845
892
  "OpenAIResponseFormatJSONSchema",
846
893
  "OpenAIResponseFormatParam",
@@ -936,6 +983,7 @@ __all__ = [
936
983
  "ParamType",
937
984
  "parse_type",
938
985
  "PostTraining",
986
+ "ProcessFileRequest",
939
987
  "ProcessFileResponse",
940
988
  "PostTrainingMetric",
941
989
  "PostTrainingJob",
@@ -961,6 +1009,7 @@ __all__ = [
961
1009
  "QATFinetuningConfig",
962
1010
  "QuantizationConfig",
963
1011
  "QuantizationType",
1012
+ "QueryChunksRequest",
964
1013
  "QueryChunksResponse",
965
1014
  "RAGDocument",
966
1015
  "RAGQueryConfig",
@@ -980,12 +1029,16 @@ __all__ = [
980
1029
  "RerankResponse",
981
1030
  "Resource",
982
1031
  "ResourceNotFoundError",
1032
+ "ResponseInputItemNotFoundError",
1033
+ "ResponseNotFoundError",
983
1034
  "ResourceType",
984
1035
  "ResponseFormat",
985
1036
  "ResponseFormatType",
986
1037
  "ResponseGuardrail",
987
1038
  "ResponseGuardrailSpec",
988
1039
  "ResponseItemInclude",
1040
+ "ResponseTruncation",
1041
+ "ResponseNotFoundError",
989
1042
  "RetrieveFileContentRequest",
990
1043
  "RetrieveFileRequest",
991
1044
  "RouteInfo",
@@ -1020,6 +1073,7 @@ __all__ = [
1020
1073
  "SchemaInfo",
1021
1074
  "SchemaOptions",
1022
1075
  "SearchRankingOptions",
1076
+ "ServiceNotEnabledError",
1023
1077
  "Shield",
1024
1078
  "ShieldInput",
1025
1079
  "ShieldStore",
@@ -1065,6 +1119,8 @@ __all__ = [
1065
1119
  "VectorStore",
1066
1120
  "VectorStoreChunkingStrategy",
1067
1121
  "VectorStoreChunkingStrategyAuto",
1122
+ "VectorStoreChunkingStrategyContextual",
1123
+ "VectorStoreChunkingStrategyContextualConfig",
1068
1124
  "VectorStoreChunkingStrategyStatic",
1069
1125
  "VectorStoreChunkingStrategyStaticConfig",
1070
1126
  "VectorStoreContent",
@@ -1097,4 +1153,6 @@ __all__ = [
1097
1153
  "WeightedRanker",
1098
1154
  # Validators
1099
1155
  "validate_embeddings_input_is_text",
1156
+ # helpers
1157
+ "remove_null_from_anyof",
1100
1158
  ]
@@ -21,6 +21,7 @@ from .models import (
21
21
  ResponseGuardrail,
22
22
  ResponseGuardrailSpec,
23
23
  ResponseItemInclude,
24
+ ResponseTruncation,
24
25
  RetrieveResponseRequest,
25
26
  )
26
27
 
@@ -33,6 +34,7 @@ __all__ = [
33
34
  "ResponseGuardrail",
34
35
  "ResponseGuardrailSpec",
35
36
  "ResponseItemInclude",
37
+ "ResponseTruncation",
36
38
  "RetrieveResponseRequest",
37
39
  "fastapi_routes",
38
40
  ]
@@ -17,10 +17,11 @@ import logging # allow-direct-logging
17
17
  from collections.abc import AsyncIterator
18
18
  from typing import Annotated, Any
19
19
 
20
- from fastapi import APIRouter, Body, Depends, HTTPException, Path, Query
20
+ from fastapi import APIRouter, Body, Depends, Path, Query
21
21
  from fastapi.responses import StreamingResponse
22
22
  from pydantic import BaseModel
23
23
 
24
+ from llama_stack_api.common.errors import OpenAIErrorResponse
24
25
  from llama_stack_api.common.responses import Order
25
26
  from llama_stack_api.openai_responses import (
26
27
  ListOpenAIResponseInputItem,
@@ -29,9 +30,11 @@ from llama_stack_api.openai_responses import (
29
30
  OpenAIResponseObject,
30
31
  )
31
32
  from llama_stack_api.router_utils import (
33
+ ExceptionTranslatingRoute,
32
34
  create_path_dependency,
33
35
  create_query_dependency,
34
36
  standard_responses,
37
+ try_translate_to_http_exception,
35
38
  )
36
39
  from llama_stack_api.version import LLAMA_STACK_API_V1
37
40
 
@@ -72,8 +75,10 @@ async def sse_generator(event_gen):
72
75
  raise # Re-raise to maintain proper cancellation semantics
73
76
  except Exception as e:
74
77
  logger.exception("Error in SSE generator")
75
- exc = _http_exception_from_sse_error(e)
76
- yield create_sse_event({"error": {"status_code": exc.status_code, "message": exc.detail}})
78
+ http_exc = try_translate_to_http_exception(e)
79
+ status_code = http_exc.status_code if http_exc else 500
80
+ detail = http_exc.detail if http_exc else "Internal server error: An unexpected error occurred."
81
+ yield create_sse_event(OpenAIErrorResponse.from_message(detail, code=str(status_code)).to_dict())
77
82
 
78
83
 
79
84
  # Automatically generate dependency functions from Pydantic models
@@ -115,29 +120,6 @@ async def get_list_response_input_items_request(
115
120
  )
116
121
 
117
122
 
118
- def _http_exception_from_value_error(exc: ValueError) -> HTTPException:
119
- """Convert implementation `ValueError` into an OpenAI-compatible HTTP error.
120
-
121
- The compatibility OpenAI client maps HTTP 400 -> `BadRequestError`.
122
- The existing API surface (and integration tests) expect "not found" cases
123
- to be represented as a 400, not a 404.
124
- """
125
-
126
- detail = str(exc) or "Invalid value"
127
- return HTTPException(status_code=400, detail=detail)
128
-
129
-
130
- def _http_exception_from_sse_error(exc: Exception) -> HTTPException:
131
- if isinstance(exc, HTTPException):
132
- return exc
133
- if isinstance(exc, ValueError):
134
- return _http_exception_from_value_error(exc)
135
- status_code = getattr(exc, "status_code", None)
136
- if isinstance(status_code, int):
137
- return HTTPException(status_code=status_code, detail=str(exc))
138
- return HTTPException(status_code=500, detail="Internal server error: An unexpected error occurred.")
139
-
140
-
141
123
  def _preserve_context_for_sse(event_gen):
142
124
  # StreamingResponse runs in a different task, losing request contextvars.
143
125
  # create_task inside context.run captures the context at task creation.
@@ -173,6 +155,7 @@ def create_router(impl: Agents) -> APIRouter:
173
155
  prefix=f"/{LLAMA_STACK_API_V1}",
174
156
  tags=["Agents"],
175
157
  responses=standard_responses,
158
+ route_class=ExceptionTranslatingRoute,
176
159
  )
177
160
 
178
161
  @router.get(
@@ -184,10 +167,7 @@ def create_router(impl: Agents) -> APIRouter:
184
167
  async def get_openai_response(
185
168
  request: Annotated[RetrieveResponseRequest, Depends(get_retrieve_response_request)],
186
169
  ) -> OpenAIResponseObject:
187
- try:
188
- return await impl.get_openai_response(request)
189
- except ValueError as exc:
190
- raise _http_exception_from_value_error(exc) from exc
170
+ return await impl.get_openai_response(request)
191
171
 
192
172
  @router.post(
193
173
  "/responses",
@@ -208,10 +188,7 @@ def create_router(impl: Agents) -> APIRouter:
208
188
  async def create_openai_response(
209
189
  request: Annotated[CreateResponseRequest, Body(...)],
210
190
  ) -> OpenAIResponseObject | StreamingResponse:
211
- try:
212
- result = await impl.create_openai_response(request)
213
- except ValueError as exc:
214
- raise _http_exception_from_value_error(exc) from exc
191
+ result = await impl.create_openai_response(request)
215
192
 
216
193
  # For streaming responses, wrap in StreamingResponse for HTTP requests.
217
194
  # The implementation is typed to return an `AsyncIterator` for streaming.
@@ -232,10 +209,7 @@ def create_router(impl: Agents) -> APIRouter:
232
209
  async def list_openai_responses(
233
210
  request: Annotated[ListResponsesRequest, Depends(get_list_responses_request)],
234
211
  ) -> ListOpenAIResponseObject:
235
- try:
236
- return await impl.list_openai_responses(request)
237
- except ValueError as exc:
238
- raise _http_exception_from_value_error(exc) from exc
212
+ return await impl.list_openai_responses(request)
239
213
 
240
214
  @router.get(
241
215
  "/responses/{response_id}/input_items",
@@ -246,10 +220,7 @@ def create_router(impl: Agents) -> APIRouter:
246
220
  async def list_openai_response_input_items(
247
221
  request: Annotated[ListResponseInputItemsRequest, Depends(get_list_response_input_items_request)],
248
222
  ) -> ListOpenAIResponseInputItem:
249
- try:
250
- return await impl.list_openai_response_input_items(request)
251
- except ValueError as exc:
252
- raise _http_exception_from_value_error(exc) from exc
223
+ return await impl.list_openai_response_input_items(request)
253
224
 
254
225
  @router.delete(
255
226
  "/responses/{response_id}",
@@ -260,9 +231,6 @@ def create_router(impl: Agents) -> APIRouter:
260
231
  async def delete_openai_response(
261
232
  request: Annotated[DeleteResponseRequest, Depends(get_delete_response_request)],
262
233
  ) -> OpenAIDeleteResponseObject:
263
- try:
264
- return await impl.delete_openai_response(request)
265
- except ValueError as exc:
266
- raise _http_exception_from_value_error(exc) from exc
234
+ return await impl.delete_openai_response(request)
267
235
 
268
236
  return router
@@ -15,6 +15,7 @@ from enum import StrEnum
15
15
  from pydantic import BaseModel, ConfigDict, Field
16
16
 
17
17
  from llama_stack_api.common.responses import Order
18
+ from llama_stack_api.inference import ServiceTier
18
19
  from llama_stack_api.openai_responses import (
19
20
  OpenAIResponseInput,
20
21
  OpenAIResponseInputTool,
@@ -23,6 +24,7 @@ from llama_stack_api.openai_responses import (
23
24
  OpenAIResponseReasoning,
24
25
  OpenAIResponseText,
25
26
  )
27
+ from llama_stack_api.schema_utils import remove_null_from_anyof
26
28
 
27
29
 
28
30
  class ResponseItemInclude(StrEnum):
@@ -37,6 +39,13 @@ class ResponseItemInclude(StrEnum):
37
39
  reasoning_encrypted_content = "reasoning.encrypted_content"
38
40
 
39
41
 
42
+ class ResponseTruncation(StrEnum):
43
+ """Controls how the service truncates input when it exceeds the model context window."""
44
+
45
+ auto = "auto" # Let the service decide how to truncate
46
+ disabled = "disabled" # Disable truncation; context over limit results in 400 error
47
+
48
+
40
49
  class ResponseGuardrailSpec(BaseModel):
41
50
  """Specification for a guardrail to apply during response generation."""
42
51
 
@@ -49,13 +58,19 @@ class ResponseGuardrailSpec(BaseModel):
49
58
  ResponseGuardrail = str | ResponseGuardrailSpec
50
59
 
51
60
 
61
+ # extra_body can be accessed via .model_extra
52
62
  class CreateResponseRequest(BaseModel):
53
63
  """Request model for creating a response."""
54
64
 
55
- model_config = ConfigDict(extra="forbid")
65
+ model_config = ConfigDict(extra="allow")
56
66
 
57
67
  input: str | list[OpenAIResponseInput] = Field(..., description="Input message(s) to create the response.")
58
68
  model: str = Field(..., description="The underlying LLM used for completions.")
69
+ background: bool | None = Field(
70
+ default=None,
71
+ description="Whether to run the model response in the background. When true, returns immediately with status 'queued'.",
72
+ json_schema_extra=remove_null_from_anyof,
73
+ )
59
74
  prompt: OpenAIResponsePrompt | None = Field(
60
75
  default=None, description="Prompt object with ID, version, and variables."
61
76
  )
@@ -68,6 +83,11 @@ class CreateResponseRequest(BaseModel):
68
83
  default=None,
69
84
  description="Optional ID of a previous response to continue from.",
70
85
  )
86
+ prompt_cache_key: str | None = Field(
87
+ default=None,
88
+ max_length=64,
89
+ description="A key to use when reading from or writing to the prompt cache.",
90
+ )
71
91
  conversation: str | None = Field(
72
92
  default=None,
73
93
  description="Optional ID of a conversation to add the response to.",
@@ -86,6 +106,18 @@ class CreateResponseRequest(BaseModel):
86
106
  le=2.0,
87
107
  description="Sampling temperature.",
88
108
  )
109
+ top_p: float | None = Field(
110
+ default=None,
111
+ ge=0.0,
112
+ le=1.0,
113
+ description="Nucleus sampling parameter that controls response diversity (lower values increase focus).",
114
+ )
115
+ frequency_penalty: float | None = Field(
116
+ default=None,
117
+ ge=-2.0,
118
+ le=2.0,
119
+ description="Penalizes new tokens based on their frequency in the text so far.",
120
+ )
89
121
  text: OpenAIResponseText | None = Field(
90
122
  default=None,
91
123
  description="Configuration for text response generation.",
@@ -130,10 +162,30 @@ class CreateResponseRequest(BaseModel):
130
162
  max_length=64,
131
163
  description="A stable identifier used for safety monitoring and abuse detection.",
132
164
  )
165
+ service_tier: ServiceTier | None = Field(
166
+ default=None,
167
+ description="The service tier to use for this request.",
168
+ )
133
169
  metadata: dict[str, str] | None = Field(
134
170
  default=None,
135
171
  description="Dictionary of metadata key-value pairs to attach to the response.",
136
172
  )
173
+ truncation: ResponseTruncation | None = Field(
174
+ default=None,
175
+ description="Controls how the service truncates input when it exceeds the model context window.",
176
+ )
177
+ top_logprobs: int | None = Field(
178
+ default=None,
179
+ ge=0,
180
+ le=20,
181
+ description="The number of most likely tokens to return at each position, along with their log probabilities.",
182
+ )
183
+ presence_penalty: float | None = Field(
184
+ default=None,
185
+ ge=-2.0,
186
+ le=2.0,
187
+ description="Penalizes new tokens based on whether they appear in the text so far.",
188
+ )
137
189
 
138
190
 
139
191
  class RetrieveResponseRequest(BaseModel):