llama-stack-api 0.5.2__tar.gz → 0.6.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/PKG-INFO +1 -1
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/__init__.py +61 -3
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/agents/__init__.py +2 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/agents/fastapi_routes.py +14 -46
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/agents/models.py +53 -1
- llama_stack_api-0.6.0/common/errors.py +350 -0
- llama_stack_api-0.6.0/common/upload_safety.py +96 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/conversations/fastapi_routes.py +7 -1
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/conversations/models.py +29 -2
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/file_processors/__init__.py +2 -1
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/file_processors/api.py +7 -13
- llama_stack_api-0.6.0/file_processors/fastapi_routes.py +122 -0
- llama_stack_api-0.6.0/file_processors/models.py +69 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/files/fastapi_routes.py +9 -2
- llama_stack_api-0.6.0/filters.py +67 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/inference/__init__.py +4 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/inference/fastapi_routes.py +2 -1
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/inference/models.py +72 -27
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/llama_stack_api.egg-info/PKG-INFO +1 -1
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/llama_stack_api.egg-info/SOURCES.txt +4 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/models/models.py +8 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/openai_responses.py +30 -3
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/post_training/fastapi_routes.py +3 -3
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/pyproject.toml +3 -1
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/router_utils.py +55 -1
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/vector_io/__init__.py +27 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/vector_io/api.py +13 -26
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/vector_io/fastapi_routes.py +18 -83
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/vector_io/models.py +183 -5
- llama_stack_api-0.5.2/common/errors.py +0 -110
- llama_stack_api-0.5.2/file_processors/fastapi_routes.py +0 -78
- llama_stack_api-0.5.2/file_processors/models.py +0 -42
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/README.md +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/admin/__init__.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/admin/api.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/admin/fastapi_routes.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/admin/models.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/agents/api.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/batches/__init__.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/batches/api.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/batches/fastapi_routes.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/batches/models.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/benchmarks/__init__.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/benchmarks/api.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/benchmarks/fastapi_routes.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/benchmarks/models.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/common/__init__.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/common/content_types.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/common/job_types.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/common/responses.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/common/training_types.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/common/type_system.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/connectors/__init__.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/connectors/api.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/connectors/fastapi_routes.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/connectors/models.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/conversations/__init__.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/conversations/api.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/datasetio/__init__.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/datasetio/api.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/datasetio/fastapi_routes.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/datasetio/models.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/datasets/__init__.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/datasets/api.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/datasets/fastapi_routes.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/datasets/models.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/datatypes.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/eval/__init__.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/eval/api.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/eval/compat.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/eval/fastapi_routes.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/eval/models.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/files/__init__.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/files/api.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/files/models.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/inference/api.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/inspect_api/__init__.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/inspect_api/api.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/inspect_api/fastapi_routes.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/inspect_api/models.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/internal/__init__.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/internal/kvstore.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/internal/sqlstore.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/llama_stack_api.egg-info/dependency_links.txt +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/llama_stack_api.egg-info/requires.txt +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/llama_stack_api.egg-info/top_level.txt +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/models/__init__.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/models/api.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/models/fastapi_routes.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/post_training/__init__.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/post_training/api.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/post_training/models.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/prompts/__init__.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/prompts/api.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/prompts/fastapi_routes.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/prompts/models.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/providers/__init__.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/providers/api.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/providers/fastapi_routes.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/providers/models.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/py.typed +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/rag_tool.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/resource.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/safety/__init__.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/safety/api.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/safety/datatypes.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/safety/fastapi_routes.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/safety/models.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/schema_utils.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/scoring/__init__.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/scoring/api.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/scoring/fastapi_routes.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/scoring/models.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/scoring_functions/__init__.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/scoring_functions/api.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/scoring_functions/fastapi_routes.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/scoring_functions/models.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/setup.cfg +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/shields/__init__.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/shields/api.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/shields/fastapi_routes.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/shields/models.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/tools.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/uv.lock +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/validators.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/vector_stores.py +0 -0
- {llama_stack_api-0.5.2 → llama_stack_api-0.6.0}/version.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: llama-stack-api
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.6.0
|
|
4
4
|
Summary: API and Provider specifications for Llama Stack - lightweight package with protocol definitions and provider specs
|
|
5
5
|
Author-email: Meta Llama <llama-oss@meta.com>
|
|
6
6
|
License: MIT
|
|
@@ -60,6 +60,7 @@ from .agents import (
|
|
|
60
60
|
ResponseGuardrail,
|
|
61
61
|
ResponseGuardrailSpec,
|
|
62
62
|
ResponseItemInclude,
|
|
63
|
+
ResponseTruncation,
|
|
63
64
|
RetrieveResponseRequest,
|
|
64
65
|
)
|
|
65
66
|
from .batches import (
|
|
@@ -93,14 +94,22 @@ from .common.content_types import (
|
|
|
93
94
|
_URLOrData,
|
|
94
95
|
)
|
|
95
96
|
from .common.errors import (
|
|
97
|
+
BatchNotFoundError,
|
|
96
98
|
ConflictError,
|
|
97
99
|
ConnectorNotFoundError,
|
|
98
100
|
ConnectorToolNotFoundError,
|
|
101
|
+
ConversationItemNotFoundError,
|
|
102
|
+
ConversationNotFoundError,
|
|
99
103
|
DatasetNotFoundError,
|
|
100
|
-
|
|
104
|
+
InternalServerError,
|
|
105
|
+
InvalidParameterError,
|
|
101
106
|
ModelNotFoundError,
|
|
102
107
|
ModelTypeError,
|
|
108
|
+
OpenAIFileObjectNotFoundError,
|
|
103
109
|
ResourceNotFoundError,
|
|
110
|
+
ResponseInputItemNotFoundError,
|
|
111
|
+
ResponseNotFoundError,
|
|
112
|
+
ServiceNotEnabledError,
|
|
104
113
|
TokenValidationError,
|
|
105
114
|
ToolGroupNotFoundError,
|
|
106
115
|
UnsupportedModelError,
|
|
@@ -206,7 +215,8 @@ from .eval import (
|
|
|
206
215
|
resolve_job_status_request,
|
|
207
216
|
resolve_run_eval_request,
|
|
208
217
|
)
|
|
209
|
-
from .file_processors import FileProcessors, ProcessFileResponse
|
|
218
|
+
from .file_processors import FileProcessors, ProcessFileRequest, ProcessFileResponse
|
|
219
|
+
from .filters import COMPARISON_FILTER_TYPES, COMPOUND_FILTER_TYPES, ComparisonFilter, CompoundFilter, Filter
|
|
210
220
|
from .files import (
|
|
211
221
|
DeleteFileRequest,
|
|
212
222
|
ExpiresAfter,
|
|
@@ -256,6 +266,7 @@ from .inference import (
|
|
|
256
266
|
OpenAIChatCompletionUsage,
|
|
257
267
|
OpenAIChatCompletionUsageCompletionTokensDetails,
|
|
258
268
|
OpenAIChatCompletionUsagePromptTokensDetails,
|
|
269
|
+
OpenAIChatCompletionResponseMessage,
|
|
259
270
|
OpenAIChoice,
|
|
260
271
|
OpenAIChoiceDelta,
|
|
261
272
|
OpenAIChoiceLogprobs,
|
|
@@ -335,6 +346,7 @@ from .openai_responses import (
|
|
|
335
346
|
OpenAIResponseContentPartReasoningText,
|
|
336
347
|
OpenAIResponseContentPartRefusal,
|
|
337
348
|
OpenAIResponseError,
|
|
349
|
+
OpenAIResponseIncompleteDetails,
|
|
338
350
|
OpenAIResponseInput,
|
|
339
351
|
OpenAIResponseInputFunctionToolCallOutput,
|
|
340
352
|
OpenAIResponseInputMessageContent,
|
|
@@ -535,15 +547,28 @@ from .tools import (
|
|
|
535
547
|
from .validators import validate_embeddings_input_is_text
|
|
536
548
|
from .vector_io import (
|
|
537
549
|
Chunk,
|
|
550
|
+
ChunkForDeletion,
|
|
538
551
|
ChunkMetadata,
|
|
552
|
+
DEFAULT_CHUNK_OVERLAP_TOKENS,
|
|
553
|
+
DEFAULT_CHUNK_SIZE_TOKENS,
|
|
554
|
+
DeleteChunksRequest,
|
|
539
555
|
EmbeddedChunk,
|
|
556
|
+
InsertChunksRequest,
|
|
557
|
+
MAX_PAGINATION_LIMIT,
|
|
558
|
+
OpenAIAttachFileRequest,
|
|
540
559
|
OpenAICreateVectorStoreFileBatchRequestWithExtraBody,
|
|
541
560
|
OpenAICreateVectorStoreRequestWithExtraBody,
|
|
561
|
+
OpenAISearchVectorStoreRequest,
|
|
562
|
+
OpenAIUpdateVectorStoreFileRequest,
|
|
563
|
+
OpenAIUpdateVectorStoreRequest,
|
|
564
|
+
QueryChunksRequest,
|
|
542
565
|
QueryChunksResponse,
|
|
543
566
|
SearchRankingOptions,
|
|
544
567
|
VectorIO,
|
|
545
568
|
VectorStoreChunkingStrategy,
|
|
546
569
|
VectorStoreChunkingStrategyAuto,
|
|
570
|
+
VectorStoreChunkingStrategyContextual,
|
|
571
|
+
VectorStoreChunkingStrategyContextualConfig,
|
|
547
572
|
VectorStoreChunkingStrategyStatic,
|
|
548
573
|
VectorStoreChunkingStrategyStaticConfig,
|
|
549
574
|
VectorStoreContent,
|
|
@@ -599,6 +624,7 @@ __all__ = [
|
|
|
599
624
|
"ApprovalFilter",
|
|
600
625
|
"BasicScoringFnParams",
|
|
601
626
|
"Batches",
|
|
627
|
+
"BatchNotFoundError",
|
|
602
628
|
"BatchObject",
|
|
603
629
|
"CancelBatchRequest",
|
|
604
630
|
"CancelTrainingJobRequest",
|
|
@@ -615,7 +641,11 @@ __all__ = [
|
|
|
615
641
|
"ChatCompletionResponseEventType",
|
|
616
642
|
"Checkpoint",
|
|
617
643
|
"Chunk",
|
|
644
|
+
"ChunkForDeletion",
|
|
618
645
|
"ChunkMetadata",
|
|
646
|
+
"DEFAULT_CHUNK_OVERLAP_TOKENS",
|
|
647
|
+
"DEFAULT_CHUNK_SIZE_TOKENS",
|
|
648
|
+
"DeleteChunksRequest",
|
|
619
649
|
"EmbeddedChunk",
|
|
620
650
|
"CommonBenchmarkFields",
|
|
621
651
|
"ConflictError",
|
|
@@ -628,6 +658,8 @@ __all__ = [
|
|
|
628
658
|
"Connector",
|
|
629
659
|
"ConnectorNotFoundError",
|
|
630
660
|
"ConnectorToolNotFoundError",
|
|
661
|
+
"ConversationItemNotFoundError",
|
|
662
|
+
"ConversationNotFoundError",
|
|
631
663
|
"ConnectorInput",
|
|
632
664
|
"Connectors",
|
|
633
665
|
"ConnectorType",
|
|
@@ -694,6 +726,11 @@ __all__ = [
|
|
|
694
726
|
"ExtraBodyField",
|
|
695
727
|
"FileProcessors",
|
|
696
728
|
"Files",
|
|
729
|
+
"Filter",
|
|
730
|
+
"ComparisonFilter",
|
|
731
|
+
"CompoundFilter",
|
|
732
|
+
"COMPARISON_FILTER_TYPES",
|
|
733
|
+
"COMPOUND_FILTER_TYPES",
|
|
697
734
|
"Fp8QuantizationConfig",
|
|
698
735
|
"clear_dynamic_schema_types",
|
|
699
736
|
"get_schema_identifier",
|
|
@@ -707,13 +744,15 @@ __all__ = [
|
|
|
707
744
|
"Inference",
|
|
708
745
|
"InferenceProvider",
|
|
709
746
|
"InlineProviderSpec",
|
|
747
|
+
"InsertChunksRequest",
|
|
710
748
|
"Inspect",
|
|
711
749
|
"InspectProviderRequest",
|
|
750
|
+
"InternalServerError",
|
|
712
751
|
"Admin",
|
|
713
752
|
"Int4QuantizationConfig",
|
|
714
753
|
"InterleavedContent",
|
|
715
754
|
"InterleavedContentItem",
|
|
716
|
-
"
|
|
755
|
+
"InvalidParameterError",
|
|
717
756
|
"is_generic_list",
|
|
718
757
|
"is_type_optional",
|
|
719
758
|
"is_type_union",
|
|
@@ -763,6 +802,7 @@ __all__ = [
|
|
|
763
802
|
"ListToolsResponse",
|
|
764
803
|
"LogProbConfig",
|
|
765
804
|
"LoraFinetuningConfig",
|
|
805
|
+
"MAX_PAGINATION_LIMIT",
|
|
766
806
|
"MCPListToolsTool",
|
|
767
807
|
"Metadata",
|
|
768
808
|
"Model",
|
|
@@ -801,6 +841,7 @@ __all__ = [
|
|
|
801
841
|
"OpenAIChatCompletionToolChoiceFunctionTool",
|
|
802
842
|
"OpenAIChatCompletionToolChoiceCustomTool",
|
|
803
843
|
"OpenAIChatCompletionToolChoice",
|
|
844
|
+
"OpenAIChatCompletionResponseMessage",
|
|
804
845
|
"OpenAIChoice",
|
|
805
846
|
"OpenAIChoiceDelta",
|
|
806
847
|
"OpenAIChoiceLogprobs",
|
|
@@ -822,6 +863,7 @@ __all__ = [
|
|
|
822
863
|
"OpenAIFileDeleteResponse",
|
|
823
864
|
"OpenAIFileFile",
|
|
824
865
|
"OpenAIFileObject",
|
|
866
|
+
"OpenAIFileObjectNotFoundError",
|
|
825
867
|
"OpenAIFilePurpose",
|
|
826
868
|
"OpenAIFinishReason",
|
|
827
869
|
"OpenAIImageURL",
|
|
@@ -830,6 +872,10 @@ __all__ = [
|
|
|
830
872
|
"OpenAIMessageParam",
|
|
831
873
|
"OpenAIModel",
|
|
832
874
|
"Order",
|
|
875
|
+
"OpenAIAttachFileRequest",
|
|
876
|
+
"OpenAISearchVectorStoreRequest",
|
|
877
|
+
"OpenAIUpdateVectorStoreFileRequest",
|
|
878
|
+
"OpenAIUpdateVectorStoreRequest",
|
|
833
879
|
"OpenAIResponseAnnotationCitation",
|
|
834
880
|
"OpenAIResponseAnnotationContainerFileCitation",
|
|
835
881
|
"OpenAIResponseAnnotationFileCitation",
|
|
@@ -841,6 +887,7 @@ __all__ = [
|
|
|
841
887
|
"OpenAIResponseContentPartReasoningText",
|
|
842
888
|
"OpenAIResponseContentPartRefusal",
|
|
843
889
|
"OpenAIResponseError",
|
|
890
|
+
"OpenAIResponseIncompleteDetails",
|
|
844
891
|
"OpenAIResponseFormatJSONObject",
|
|
845
892
|
"OpenAIResponseFormatJSONSchema",
|
|
846
893
|
"OpenAIResponseFormatParam",
|
|
@@ -936,6 +983,7 @@ __all__ = [
|
|
|
936
983
|
"ParamType",
|
|
937
984
|
"parse_type",
|
|
938
985
|
"PostTraining",
|
|
986
|
+
"ProcessFileRequest",
|
|
939
987
|
"ProcessFileResponse",
|
|
940
988
|
"PostTrainingMetric",
|
|
941
989
|
"PostTrainingJob",
|
|
@@ -961,6 +1009,7 @@ __all__ = [
|
|
|
961
1009
|
"QATFinetuningConfig",
|
|
962
1010
|
"QuantizationConfig",
|
|
963
1011
|
"QuantizationType",
|
|
1012
|
+
"QueryChunksRequest",
|
|
964
1013
|
"QueryChunksResponse",
|
|
965
1014
|
"RAGDocument",
|
|
966
1015
|
"RAGQueryConfig",
|
|
@@ -980,12 +1029,16 @@ __all__ = [
|
|
|
980
1029
|
"RerankResponse",
|
|
981
1030
|
"Resource",
|
|
982
1031
|
"ResourceNotFoundError",
|
|
1032
|
+
"ResponseInputItemNotFoundError",
|
|
1033
|
+
"ResponseNotFoundError",
|
|
983
1034
|
"ResourceType",
|
|
984
1035
|
"ResponseFormat",
|
|
985
1036
|
"ResponseFormatType",
|
|
986
1037
|
"ResponseGuardrail",
|
|
987
1038
|
"ResponseGuardrailSpec",
|
|
988
1039
|
"ResponseItemInclude",
|
|
1040
|
+
"ResponseTruncation",
|
|
1041
|
+
"ResponseNotFoundError",
|
|
989
1042
|
"RetrieveFileContentRequest",
|
|
990
1043
|
"RetrieveFileRequest",
|
|
991
1044
|
"RouteInfo",
|
|
@@ -1020,6 +1073,7 @@ __all__ = [
|
|
|
1020
1073
|
"SchemaInfo",
|
|
1021
1074
|
"SchemaOptions",
|
|
1022
1075
|
"SearchRankingOptions",
|
|
1076
|
+
"ServiceNotEnabledError",
|
|
1023
1077
|
"Shield",
|
|
1024
1078
|
"ShieldInput",
|
|
1025
1079
|
"ShieldStore",
|
|
@@ -1065,6 +1119,8 @@ __all__ = [
|
|
|
1065
1119
|
"VectorStore",
|
|
1066
1120
|
"VectorStoreChunkingStrategy",
|
|
1067
1121
|
"VectorStoreChunkingStrategyAuto",
|
|
1122
|
+
"VectorStoreChunkingStrategyContextual",
|
|
1123
|
+
"VectorStoreChunkingStrategyContextualConfig",
|
|
1068
1124
|
"VectorStoreChunkingStrategyStatic",
|
|
1069
1125
|
"VectorStoreChunkingStrategyStaticConfig",
|
|
1070
1126
|
"VectorStoreContent",
|
|
@@ -1097,4 +1153,6 @@ __all__ = [
|
|
|
1097
1153
|
"WeightedRanker",
|
|
1098
1154
|
# Validators
|
|
1099
1155
|
"validate_embeddings_input_is_text",
|
|
1156
|
+
# helpers
|
|
1157
|
+
"remove_null_from_anyof",
|
|
1100
1158
|
]
|
|
@@ -21,6 +21,7 @@ from .models import (
|
|
|
21
21
|
ResponseGuardrail,
|
|
22
22
|
ResponseGuardrailSpec,
|
|
23
23
|
ResponseItemInclude,
|
|
24
|
+
ResponseTruncation,
|
|
24
25
|
RetrieveResponseRequest,
|
|
25
26
|
)
|
|
26
27
|
|
|
@@ -33,6 +34,7 @@ __all__ = [
|
|
|
33
34
|
"ResponseGuardrail",
|
|
34
35
|
"ResponseGuardrailSpec",
|
|
35
36
|
"ResponseItemInclude",
|
|
37
|
+
"ResponseTruncation",
|
|
36
38
|
"RetrieveResponseRequest",
|
|
37
39
|
"fastapi_routes",
|
|
38
40
|
]
|
|
@@ -17,10 +17,11 @@ import logging # allow-direct-logging
|
|
|
17
17
|
from collections.abc import AsyncIterator
|
|
18
18
|
from typing import Annotated, Any
|
|
19
19
|
|
|
20
|
-
from fastapi import APIRouter, Body, Depends,
|
|
20
|
+
from fastapi import APIRouter, Body, Depends, Path, Query
|
|
21
21
|
from fastapi.responses import StreamingResponse
|
|
22
22
|
from pydantic import BaseModel
|
|
23
23
|
|
|
24
|
+
from llama_stack_api.common.errors import OpenAIErrorResponse
|
|
24
25
|
from llama_stack_api.common.responses import Order
|
|
25
26
|
from llama_stack_api.openai_responses import (
|
|
26
27
|
ListOpenAIResponseInputItem,
|
|
@@ -29,9 +30,11 @@ from llama_stack_api.openai_responses import (
|
|
|
29
30
|
OpenAIResponseObject,
|
|
30
31
|
)
|
|
31
32
|
from llama_stack_api.router_utils import (
|
|
33
|
+
ExceptionTranslatingRoute,
|
|
32
34
|
create_path_dependency,
|
|
33
35
|
create_query_dependency,
|
|
34
36
|
standard_responses,
|
|
37
|
+
try_translate_to_http_exception,
|
|
35
38
|
)
|
|
36
39
|
from llama_stack_api.version import LLAMA_STACK_API_V1
|
|
37
40
|
|
|
@@ -72,8 +75,10 @@ async def sse_generator(event_gen):
|
|
|
72
75
|
raise # Re-raise to maintain proper cancellation semantics
|
|
73
76
|
except Exception as e:
|
|
74
77
|
logger.exception("Error in SSE generator")
|
|
75
|
-
|
|
76
|
-
|
|
78
|
+
http_exc = try_translate_to_http_exception(e)
|
|
79
|
+
status_code = http_exc.status_code if http_exc else 500
|
|
80
|
+
detail = http_exc.detail if http_exc else "Internal server error: An unexpected error occurred."
|
|
81
|
+
yield create_sse_event(OpenAIErrorResponse.from_message(detail, code=str(status_code)).to_dict())
|
|
77
82
|
|
|
78
83
|
|
|
79
84
|
# Automatically generate dependency functions from Pydantic models
|
|
@@ -115,29 +120,6 @@ async def get_list_response_input_items_request(
|
|
|
115
120
|
)
|
|
116
121
|
|
|
117
122
|
|
|
118
|
-
def _http_exception_from_value_error(exc: ValueError) -> HTTPException:
|
|
119
|
-
"""Convert implementation `ValueError` into an OpenAI-compatible HTTP error.
|
|
120
|
-
|
|
121
|
-
The compatibility OpenAI client maps HTTP 400 -> `BadRequestError`.
|
|
122
|
-
The existing API surface (and integration tests) expect "not found" cases
|
|
123
|
-
to be represented as a 400, not a 404.
|
|
124
|
-
"""
|
|
125
|
-
|
|
126
|
-
detail = str(exc) or "Invalid value"
|
|
127
|
-
return HTTPException(status_code=400, detail=detail)
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
def _http_exception_from_sse_error(exc: Exception) -> HTTPException:
|
|
131
|
-
if isinstance(exc, HTTPException):
|
|
132
|
-
return exc
|
|
133
|
-
if isinstance(exc, ValueError):
|
|
134
|
-
return _http_exception_from_value_error(exc)
|
|
135
|
-
status_code = getattr(exc, "status_code", None)
|
|
136
|
-
if isinstance(status_code, int):
|
|
137
|
-
return HTTPException(status_code=status_code, detail=str(exc))
|
|
138
|
-
return HTTPException(status_code=500, detail="Internal server error: An unexpected error occurred.")
|
|
139
|
-
|
|
140
|
-
|
|
141
123
|
def _preserve_context_for_sse(event_gen):
|
|
142
124
|
# StreamingResponse runs in a different task, losing request contextvars.
|
|
143
125
|
# create_task inside context.run captures the context at task creation.
|
|
@@ -173,6 +155,7 @@ def create_router(impl: Agents) -> APIRouter:
|
|
|
173
155
|
prefix=f"/{LLAMA_STACK_API_V1}",
|
|
174
156
|
tags=["Agents"],
|
|
175
157
|
responses=standard_responses,
|
|
158
|
+
route_class=ExceptionTranslatingRoute,
|
|
176
159
|
)
|
|
177
160
|
|
|
178
161
|
@router.get(
|
|
@@ -184,10 +167,7 @@ def create_router(impl: Agents) -> APIRouter:
|
|
|
184
167
|
async def get_openai_response(
|
|
185
168
|
request: Annotated[RetrieveResponseRequest, Depends(get_retrieve_response_request)],
|
|
186
169
|
) -> OpenAIResponseObject:
|
|
187
|
-
|
|
188
|
-
return await impl.get_openai_response(request)
|
|
189
|
-
except ValueError as exc:
|
|
190
|
-
raise _http_exception_from_value_error(exc) from exc
|
|
170
|
+
return await impl.get_openai_response(request)
|
|
191
171
|
|
|
192
172
|
@router.post(
|
|
193
173
|
"/responses",
|
|
@@ -208,10 +188,7 @@ def create_router(impl: Agents) -> APIRouter:
|
|
|
208
188
|
async def create_openai_response(
|
|
209
189
|
request: Annotated[CreateResponseRequest, Body(...)],
|
|
210
190
|
) -> OpenAIResponseObject | StreamingResponse:
|
|
211
|
-
|
|
212
|
-
result = await impl.create_openai_response(request)
|
|
213
|
-
except ValueError as exc:
|
|
214
|
-
raise _http_exception_from_value_error(exc) from exc
|
|
191
|
+
result = await impl.create_openai_response(request)
|
|
215
192
|
|
|
216
193
|
# For streaming responses, wrap in StreamingResponse for HTTP requests.
|
|
217
194
|
# The implementation is typed to return an `AsyncIterator` for streaming.
|
|
@@ -232,10 +209,7 @@ def create_router(impl: Agents) -> APIRouter:
|
|
|
232
209
|
async def list_openai_responses(
|
|
233
210
|
request: Annotated[ListResponsesRequest, Depends(get_list_responses_request)],
|
|
234
211
|
) -> ListOpenAIResponseObject:
|
|
235
|
-
|
|
236
|
-
return await impl.list_openai_responses(request)
|
|
237
|
-
except ValueError as exc:
|
|
238
|
-
raise _http_exception_from_value_error(exc) from exc
|
|
212
|
+
return await impl.list_openai_responses(request)
|
|
239
213
|
|
|
240
214
|
@router.get(
|
|
241
215
|
"/responses/{response_id}/input_items",
|
|
@@ -246,10 +220,7 @@ def create_router(impl: Agents) -> APIRouter:
|
|
|
246
220
|
async def list_openai_response_input_items(
|
|
247
221
|
request: Annotated[ListResponseInputItemsRequest, Depends(get_list_response_input_items_request)],
|
|
248
222
|
) -> ListOpenAIResponseInputItem:
|
|
249
|
-
|
|
250
|
-
return await impl.list_openai_response_input_items(request)
|
|
251
|
-
except ValueError as exc:
|
|
252
|
-
raise _http_exception_from_value_error(exc) from exc
|
|
223
|
+
return await impl.list_openai_response_input_items(request)
|
|
253
224
|
|
|
254
225
|
@router.delete(
|
|
255
226
|
"/responses/{response_id}",
|
|
@@ -260,9 +231,6 @@ def create_router(impl: Agents) -> APIRouter:
|
|
|
260
231
|
async def delete_openai_response(
|
|
261
232
|
request: Annotated[DeleteResponseRequest, Depends(get_delete_response_request)],
|
|
262
233
|
) -> OpenAIDeleteResponseObject:
|
|
263
|
-
|
|
264
|
-
return await impl.delete_openai_response(request)
|
|
265
|
-
except ValueError as exc:
|
|
266
|
-
raise _http_exception_from_value_error(exc) from exc
|
|
234
|
+
return await impl.delete_openai_response(request)
|
|
267
235
|
|
|
268
236
|
return router
|
|
@@ -15,6 +15,7 @@ from enum import StrEnum
|
|
|
15
15
|
from pydantic import BaseModel, ConfigDict, Field
|
|
16
16
|
|
|
17
17
|
from llama_stack_api.common.responses import Order
|
|
18
|
+
from llama_stack_api.inference import ServiceTier
|
|
18
19
|
from llama_stack_api.openai_responses import (
|
|
19
20
|
OpenAIResponseInput,
|
|
20
21
|
OpenAIResponseInputTool,
|
|
@@ -23,6 +24,7 @@ from llama_stack_api.openai_responses import (
|
|
|
23
24
|
OpenAIResponseReasoning,
|
|
24
25
|
OpenAIResponseText,
|
|
25
26
|
)
|
|
27
|
+
from llama_stack_api.schema_utils import remove_null_from_anyof
|
|
26
28
|
|
|
27
29
|
|
|
28
30
|
class ResponseItemInclude(StrEnum):
|
|
@@ -37,6 +39,13 @@ class ResponseItemInclude(StrEnum):
|
|
|
37
39
|
reasoning_encrypted_content = "reasoning.encrypted_content"
|
|
38
40
|
|
|
39
41
|
|
|
42
|
+
class ResponseTruncation(StrEnum):
|
|
43
|
+
"""Controls how the service truncates input when it exceeds the model context window."""
|
|
44
|
+
|
|
45
|
+
auto = "auto" # Let the service decide how to truncate
|
|
46
|
+
disabled = "disabled" # Disable truncation; context over limit results in 400 error
|
|
47
|
+
|
|
48
|
+
|
|
40
49
|
class ResponseGuardrailSpec(BaseModel):
|
|
41
50
|
"""Specification for a guardrail to apply during response generation."""
|
|
42
51
|
|
|
@@ -49,13 +58,19 @@ class ResponseGuardrailSpec(BaseModel):
|
|
|
49
58
|
ResponseGuardrail = str | ResponseGuardrailSpec
|
|
50
59
|
|
|
51
60
|
|
|
61
|
+
# extra_body can be accessed via .model_extra
|
|
52
62
|
class CreateResponseRequest(BaseModel):
|
|
53
63
|
"""Request model for creating a response."""
|
|
54
64
|
|
|
55
|
-
model_config = ConfigDict(extra="
|
|
65
|
+
model_config = ConfigDict(extra="allow")
|
|
56
66
|
|
|
57
67
|
input: str | list[OpenAIResponseInput] = Field(..., description="Input message(s) to create the response.")
|
|
58
68
|
model: str = Field(..., description="The underlying LLM used for completions.")
|
|
69
|
+
background: bool | None = Field(
|
|
70
|
+
default=None,
|
|
71
|
+
description="Whether to run the model response in the background. When true, returns immediately with status 'queued'.",
|
|
72
|
+
json_schema_extra=remove_null_from_anyof,
|
|
73
|
+
)
|
|
59
74
|
prompt: OpenAIResponsePrompt | None = Field(
|
|
60
75
|
default=None, description="Prompt object with ID, version, and variables."
|
|
61
76
|
)
|
|
@@ -68,6 +83,11 @@ class CreateResponseRequest(BaseModel):
|
|
|
68
83
|
default=None,
|
|
69
84
|
description="Optional ID of a previous response to continue from.",
|
|
70
85
|
)
|
|
86
|
+
prompt_cache_key: str | None = Field(
|
|
87
|
+
default=None,
|
|
88
|
+
max_length=64,
|
|
89
|
+
description="A key to use when reading from or writing to the prompt cache.",
|
|
90
|
+
)
|
|
71
91
|
conversation: str | None = Field(
|
|
72
92
|
default=None,
|
|
73
93
|
description="Optional ID of a conversation to add the response to.",
|
|
@@ -86,6 +106,18 @@ class CreateResponseRequest(BaseModel):
|
|
|
86
106
|
le=2.0,
|
|
87
107
|
description="Sampling temperature.",
|
|
88
108
|
)
|
|
109
|
+
top_p: float | None = Field(
|
|
110
|
+
default=None,
|
|
111
|
+
ge=0.0,
|
|
112
|
+
le=1.0,
|
|
113
|
+
description="Nucleus sampling parameter that controls response diversity (lower values increase focus).",
|
|
114
|
+
)
|
|
115
|
+
frequency_penalty: float | None = Field(
|
|
116
|
+
default=None,
|
|
117
|
+
ge=-2.0,
|
|
118
|
+
le=2.0,
|
|
119
|
+
description="Penalizes new tokens based on their frequency in the text so far.",
|
|
120
|
+
)
|
|
89
121
|
text: OpenAIResponseText | None = Field(
|
|
90
122
|
default=None,
|
|
91
123
|
description="Configuration for text response generation.",
|
|
@@ -130,10 +162,30 @@ class CreateResponseRequest(BaseModel):
|
|
|
130
162
|
max_length=64,
|
|
131
163
|
description="A stable identifier used for safety monitoring and abuse detection.",
|
|
132
164
|
)
|
|
165
|
+
service_tier: ServiceTier | None = Field(
|
|
166
|
+
default=None,
|
|
167
|
+
description="The service tier to use for this request.",
|
|
168
|
+
)
|
|
133
169
|
metadata: dict[str, str] | None = Field(
|
|
134
170
|
default=None,
|
|
135
171
|
description="Dictionary of metadata key-value pairs to attach to the response.",
|
|
136
172
|
)
|
|
173
|
+
truncation: ResponseTruncation | None = Field(
|
|
174
|
+
default=None,
|
|
175
|
+
description="Controls how the service truncates input when it exceeds the model context window.",
|
|
176
|
+
)
|
|
177
|
+
top_logprobs: int | None = Field(
|
|
178
|
+
default=None,
|
|
179
|
+
ge=0,
|
|
180
|
+
le=20,
|
|
181
|
+
description="The number of most likely tokens to return at each position, along with their log probabilities.",
|
|
182
|
+
)
|
|
183
|
+
presence_penalty: float | None = Field(
|
|
184
|
+
default=None,
|
|
185
|
+
ge=-2.0,
|
|
186
|
+
le=2.0,
|
|
187
|
+
description="Penalizes new tokens based on whether they appear in the text so far.",
|
|
188
|
+
)
|
|
137
189
|
|
|
138
190
|
|
|
139
191
|
class RetrieveResponseRequest(BaseModel):
|