llama-stack-api 0.4.2__py3-none-any.whl → 0.4.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack_api/__init__.py +945 -0
- llama_stack_api/admin/__init__.py +45 -0
- llama_stack_api/admin/api.py +72 -0
- llama_stack_api/admin/fastapi_routes.py +117 -0
- llama_stack_api/admin/models.py +113 -0
- llama_stack_api/agents.py +173 -0
- llama_stack_api/batches/__init__.py +40 -0
- llama_stack_api/batches/api.py +53 -0
- llama_stack_api/batches/fastapi_routes.py +113 -0
- llama_stack_api/batches/models.py +78 -0
- llama_stack_api/benchmarks/__init__.py +43 -0
- llama_stack_api/benchmarks/api.py +39 -0
- llama_stack_api/benchmarks/fastapi_routes.py +109 -0
- llama_stack_api/benchmarks/models.py +109 -0
- llama_stack_api/common/__init__.py +5 -0
- llama_stack_api/common/content_types.py +101 -0
- llama_stack_api/common/errors.py +95 -0
- llama_stack_api/common/job_types.py +38 -0
- llama_stack_api/common/responses.py +77 -0
- llama_stack_api/common/training_types.py +47 -0
- llama_stack_api/common/type_system.py +146 -0
- llama_stack_api/connectors.py +146 -0
- llama_stack_api/conversations.py +270 -0
- llama_stack_api/datasetio.py +55 -0
- llama_stack_api/datasets/__init__.py +61 -0
- llama_stack_api/datasets/api.py +35 -0
- llama_stack_api/datasets/fastapi_routes.py +104 -0
- llama_stack_api/datasets/models.py +152 -0
- llama_stack_api/datatypes.py +373 -0
- llama_stack_api/eval.py +137 -0
- llama_stack_api/file_processors/__init__.py +27 -0
- llama_stack_api/file_processors/api.py +64 -0
- llama_stack_api/file_processors/fastapi_routes.py +78 -0
- llama_stack_api/file_processors/models.py +42 -0
- llama_stack_api/files/__init__.py +35 -0
- llama_stack_api/files/api.py +51 -0
- llama_stack_api/files/fastapi_routes.py +124 -0
- llama_stack_api/files/models.py +107 -0
- llama_stack_api/inference.py +1169 -0
- llama_stack_api/inspect_api/__init__.py +37 -0
- llama_stack_api/inspect_api/api.py +25 -0
- llama_stack_api/inspect_api/fastapi_routes.py +76 -0
- llama_stack_api/inspect_api/models.py +28 -0
- llama_stack_api/internal/__init__.py +9 -0
- llama_stack_api/internal/kvstore.py +28 -0
- llama_stack_api/internal/sqlstore.py +81 -0
- llama_stack_api/models.py +171 -0
- llama_stack_api/openai_responses.py +1468 -0
- llama_stack_api/post_training.py +370 -0
- llama_stack_api/prompts.py +203 -0
- llama_stack_api/providers/__init__.py +33 -0
- llama_stack_api/providers/api.py +16 -0
- llama_stack_api/providers/fastapi_routes.py +57 -0
- llama_stack_api/providers/models.py +24 -0
- llama_stack_api/rag_tool.py +168 -0
- llama_stack_api/resource.py +37 -0
- llama_stack_api/router_utils.py +160 -0
- llama_stack_api/safety.py +132 -0
- llama_stack_api/schema_utils.py +208 -0
- llama_stack_api/scoring.py +93 -0
- llama_stack_api/scoring_functions.py +211 -0
- llama_stack_api/shields.py +93 -0
- llama_stack_api/tools.py +226 -0
- llama_stack_api/vector_io.py +941 -0
- llama_stack_api/vector_stores.py +53 -0
- llama_stack_api/version.py +9 -0
- {llama_stack_api-0.4.2.dist-info → llama_stack_api-0.4.4.dist-info}/METADATA +1 -1
- llama_stack_api-0.4.4.dist-info/RECORD +70 -0
- {llama_stack_api-0.4.2.dist-info → llama_stack_api-0.4.4.dist-info}/WHEEL +1 -1
- llama_stack_api-0.4.4.dist-info/top_level.txt +1 -0
- llama_stack_api-0.4.2.dist-info/RECORD +0 -4
- llama_stack_api-0.4.2.dist-info/top_level.txt +0 -1
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
"""Admin API protocol and models.
|
|
8
|
+
|
|
9
|
+
This module contains the Admin protocol definition.
|
|
10
|
+
Pydantic models are defined in llama_stack_api.admin.models.
|
|
11
|
+
The FastAPI router is defined in llama_stack_api.admin.fastapi_routes.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
# Import fastapi_routes for router factory access
|
|
15
|
+
from . import fastapi_routes
|
|
16
|
+
|
|
17
|
+
# Import protocol for re-export
|
|
18
|
+
from .api import Admin
|
|
19
|
+
|
|
20
|
+
# Import models for re-export
|
|
21
|
+
from .models import (
|
|
22
|
+
ApiFilter,
|
|
23
|
+
HealthInfo,
|
|
24
|
+
InspectProviderRequest,
|
|
25
|
+
ListProvidersResponse,
|
|
26
|
+
ListRoutesRequest,
|
|
27
|
+
ListRoutesResponse,
|
|
28
|
+
ProviderInfo,
|
|
29
|
+
RouteInfo,
|
|
30
|
+
VersionInfo,
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
__all__ = [
|
|
34
|
+
"Admin",
|
|
35
|
+
"ApiFilter",
|
|
36
|
+
"HealthInfo",
|
|
37
|
+
"InspectProviderRequest",
|
|
38
|
+
"ListProvidersResponse",
|
|
39
|
+
"ListRoutesRequest",
|
|
40
|
+
"ListRoutesResponse",
|
|
41
|
+
"ProviderInfo",
|
|
42
|
+
"RouteInfo",
|
|
43
|
+
"VersionInfo",
|
|
44
|
+
"fastapi_routes",
|
|
45
|
+
]
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
from typing import Protocol, runtime_checkable
|
|
8
|
+
|
|
9
|
+
from .models import (
|
|
10
|
+
HealthInfo,
|
|
11
|
+
InspectProviderRequest,
|
|
12
|
+
ListProvidersResponse,
|
|
13
|
+
ListRoutesRequest,
|
|
14
|
+
ListRoutesResponse,
|
|
15
|
+
ProviderInfo,
|
|
16
|
+
VersionInfo,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@runtime_checkable
|
|
21
|
+
class Admin(Protocol):
|
|
22
|
+
"""Admin
|
|
23
|
+
|
|
24
|
+
Admin API for stack operations only available to administrative users.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
async def list_providers(self) -> ListProvidersResponse:
|
|
28
|
+
"""List providers.
|
|
29
|
+
|
|
30
|
+
List all available providers.
|
|
31
|
+
|
|
32
|
+
:returns: A ListProvidersResponse containing information about all providers.
|
|
33
|
+
"""
|
|
34
|
+
...
|
|
35
|
+
|
|
36
|
+
async def inspect_provider(self, request: InspectProviderRequest) -> ProviderInfo:
|
|
37
|
+
"""Get provider.
|
|
38
|
+
|
|
39
|
+
Get detailed information about a specific provider.
|
|
40
|
+
|
|
41
|
+
:param request: Request containing the provider ID to inspect
|
|
42
|
+
:returns: A ProviderInfo object containing the provider's details.
|
|
43
|
+
"""
|
|
44
|
+
...
|
|
45
|
+
|
|
46
|
+
async def list_routes(self, request: ListRoutesRequest) -> ListRoutesResponse:
|
|
47
|
+
"""List routes.
|
|
48
|
+
|
|
49
|
+
List all available API routes with their methods and implementing providers.
|
|
50
|
+
|
|
51
|
+
:param request: Request containing optional filter parameters
|
|
52
|
+
:returns: Response containing information about all available routes.
|
|
53
|
+
"""
|
|
54
|
+
...
|
|
55
|
+
|
|
56
|
+
async def health(self) -> HealthInfo:
|
|
57
|
+
"""Get health status.
|
|
58
|
+
|
|
59
|
+
Get the current health status of the service.
|
|
60
|
+
|
|
61
|
+
:returns: Health information indicating if the service is operational.
|
|
62
|
+
"""
|
|
63
|
+
...
|
|
64
|
+
|
|
65
|
+
async def version(self) -> VersionInfo:
|
|
66
|
+
"""Get version.
|
|
67
|
+
|
|
68
|
+
Get the version of the service.
|
|
69
|
+
|
|
70
|
+
:returns: Version information containing the service version number.
|
|
71
|
+
"""
|
|
72
|
+
...
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
"""FastAPI router for the Admin API.
|
|
8
|
+
|
|
9
|
+
This module defines the FastAPI router for the Admin API using standard
|
|
10
|
+
FastAPI route decorators. The router is defined in the API package to keep
|
|
11
|
+
all API-related code together.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from typing import Annotated
|
|
15
|
+
|
|
16
|
+
from fastapi import APIRouter, Depends
|
|
17
|
+
|
|
18
|
+
from llama_stack_api.router_utils import create_path_dependency, create_query_dependency, standard_responses
|
|
19
|
+
from llama_stack_api.version import LLAMA_STACK_API_V1ALPHA
|
|
20
|
+
|
|
21
|
+
from .api import Admin
|
|
22
|
+
from .models import (
|
|
23
|
+
HealthInfo,
|
|
24
|
+
InspectProviderRequest,
|
|
25
|
+
ListProvidersResponse,
|
|
26
|
+
ListRoutesRequest,
|
|
27
|
+
ListRoutesResponse,
|
|
28
|
+
ProviderInfo,
|
|
29
|
+
VersionInfo,
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
# Automatically generate dependency functions from Pydantic models
|
|
33
|
+
get_inspect_provider_request = create_path_dependency(InspectProviderRequest)
|
|
34
|
+
get_list_routes_request = create_query_dependency(ListRoutesRequest)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def create_router(impl: Admin) -> APIRouter:
|
|
38
|
+
"""Create a FastAPI router for the Admin API.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
impl: The Admin implementation instance
|
|
42
|
+
|
|
43
|
+
Returns:
|
|
44
|
+
APIRouter configured for the Admin API
|
|
45
|
+
"""
|
|
46
|
+
router = APIRouter(
|
|
47
|
+
prefix=f"/{LLAMA_STACK_API_V1ALPHA}",
|
|
48
|
+
tags=["Admin"],
|
|
49
|
+
responses=standard_responses,
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
@router.get(
|
|
53
|
+
"/admin/providers",
|
|
54
|
+
response_model=ListProvidersResponse,
|
|
55
|
+
summary="List all available providers",
|
|
56
|
+
description="List all available providers with their configuration and health status.",
|
|
57
|
+
responses={
|
|
58
|
+
200: {"description": "A list of provider information objects."},
|
|
59
|
+
},
|
|
60
|
+
)
|
|
61
|
+
async def list_providers() -> ListProvidersResponse:
|
|
62
|
+
return await impl.list_providers()
|
|
63
|
+
|
|
64
|
+
@router.get(
|
|
65
|
+
"/admin/providers/{provider_id}",
|
|
66
|
+
response_model=ProviderInfo,
|
|
67
|
+
summary="Get provider details",
|
|
68
|
+
description="Get detailed information about a specific provider.",
|
|
69
|
+
responses={
|
|
70
|
+
200: {"description": "The provider information object."},
|
|
71
|
+
404: {"description": "Provider not found."},
|
|
72
|
+
},
|
|
73
|
+
)
|
|
74
|
+
async def inspect_provider(
|
|
75
|
+
request: Annotated[InspectProviderRequest, Depends(get_inspect_provider_request)],
|
|
76
|
+
) -> ProviderInfo:
|
|
77
|
+
return await impl.inspect_provider(request)
|
|
78
|
+
|
|
79
|
+
@router.get(
|
|
80
|
+
"/admin/inspect/routes",
|
|
81
|
+
response_model=ListRoutesResponse,
|
|
82
|
+
summary="List all available API routes",
|
|
83
|
+
description="List all available API routes with their methods and implementing providers.",
|
|
84
|
+
responses={
|
|
85
|
+
200: {"description": "A list of route information objects."},
|
|
86
|
+
},
|
|
87
|
+
)
|
|
88
|
+
async def list_routes(
|
|
89
|
+
request: Annotated[ListRoutesRequest, Depends(get_list_routes_request)],
|
|
90
|
+
) -> ListRoutesResponse:
|
|
91
|
+
return await impl.list_routes(request)
|
|
92
|
+
|
|
93
|
+
@router.get(
|
|
94
|
+
"/admin/health",
|
|
95
|
+
response_model=HealthInfo,
|
|
96
|
+
summary="Get service health status",
|
|
97
|
+
description="Get the current health status of the service.",
|
|
98
|
+
responses={
|
|
99
|
+
200: {"description": "Health information object."},
|
|
100
|
+
},
|
|
101
|
+
)
|
|
102
|
+
async def health() -> HealthInfo:
|
|
103
|
+
return await impl.health()
|
|
104
|
+
|
|
105
|
+
@router.get(
|
|
106
|
+
"/admin/version",
|
|
107
|
+
response_model=VersionInfo,
|
|
108
|
+
summary="Get service version",
|
|
109
|
+
description="Get the version of the service.",
|
|
110
|
+
responses={
|
|
111
|
+
200: {"description": "Version information object."},
|
|
112
|
+
},
|
|
113
|
+
)
|
|
114
|
+
async def version() -> VersionInfo:
|
|
115
|
+
return await impl.version()
|
|
116
|
+
|
|
117
|
+
return router
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
from typing import Any, Literal
|
|
8
|
+
|
|
9
|
+
from pydantic import BaseModel, Field
|
|
10
|
+
|
|
11
|
+
from llama_stack_api.datatypes import HealthResponse, HealthStatus
|
|
12
|
+
from llama_stack_api.schema_utils import json_schema_type
|
|
13
|
+
|
|
14
|
+
# Valid values for the route filter parameter.
|
|
15
|
+
# Actual API levels: v1, v1alpha, v1beta (filters by level, excludes deprecated)
|
|
16
|
+
# Special filter value: "deprecated" (shows deprecated routes regardless of level)
|
|
17
|
+
ApiFilter = Literal["v1", "v1alpha", "v1beta", "deprecated"]
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@json_schema_type
|
|
21
|
+
class RouteInfo(BaseModel):
|
|
22
|
+
"""Information about an API route including its path, method, and implementing providers.
|
|
23
|
+
|
|
24
|
+
:param route: The API endpoint path
|
|
25
|
+
:param method: HTTP method for the route
|
|
26
|
+
:param provider_types: List of provider types that implement this route
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
route: str = Field(description="The API route path")
|
|
30
|
+
method: str = Field(description="The HTTP method for the route")
|
|
31
|
+
provider_types: list[str] = Field(description="List of provider types implementing this route")
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@json_schema_type
|
|
35
|
+
class HealthInfo(BaseModel):
|
|
36
|
+
"""Health status information for the service.
|
|
37
|
+
|
|
38
|
+
:param status: Current health status of the service
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
status: HealthStatus = Field(description="The health status of the service")
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@json_schema_type
|
|
45
|
+
class VersionInfo(BaseModel):
|
|
46
|
+
"""Version information for the service.
|
|
47
|
+
|
|
48
|
+
:param version: Version number of the service
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
version: str = Field(description="The version string of the service")
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
@json_schema_type
|
|
55
|
+
class ListRoutesResponse(BaseModel):
|
|
56
|
+
"""Response containing a list of all available API routes.
|
|
57
|
+
|
|
58
|
+
:param data: List of available route information objects
|
|
59
|
+
"""
|
|
60
|
+
|
|
61
|
+
data: list[RouteInfo] = Field(description="List of available API routes")
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
@json_schema_type
|
|
65
|
+
class ProviderInfo(BaseModel):
|
|
66
|
+
"""Information about a registered provider including its configuration and health status.
|
|
67
|
+
|
|
68
|
+
:param api: The API name this provider implements
|
|
69
|
+
:param provider_id: Unique identifier for the provider
|
|
70
|
+
:param provider_type: The type of provider implementation
|
|
71
|
+
:param config: Configuration parameters for the provider
|
|
72
|
+
:param health: Current health status of the provider
|
|
73
|
+
"""
|
|
74
|
+
|
|
75
|
+
api: str = Field(..., description="The API name this provider implements")
|
|
76
|
+
provider_id: str = Field(..., description="Unique identifier for the provider")
|
|
77
|
+
provider_type: str = Field(..., description="The type of provider implementation")
|
|
78
|
+
config: dict[str, Any] = Field(..., description="Configuration parameters for the provider")
|
|
79
|
+
health: HealthResponse = Field(..., description="Current health status of the provider")
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
@json_schema_type
|
|
83
|
+
class ListProvidersResponse(BaseModel):
|
|
84
|
+
"""Response containing a list of all available providers.
|
|
85
|
+
|
|
86
|
+
:param data: List of provider information objects
|
|
87
|
+
"""
|
|
88
|
+
|
|
89
|
+
data: list[ProviderInfo] = Field(..., description="List of provider information objects")
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
# Request models for FastAPI
|
|
93
|
+
@json_schema_type
|
|
94
|
+
class ListRoutesRequest(BaseModel):
|
|
95
|
+
"""Request to list API routes.
|
|
96
|
+
|
|
97
|
+
:param api_filter: Optional filter to control which routes are returned
|
|
98
|
+
"""
|
|
99
|
+
|
|
100
|
+
api_filter: ApiFilter | None = Field(
|
|
101
|
+
default=None,
|
|
102
|
+
description="Filter to control which routes are returned. Can be an API level ('v1', 'v1alpha', 'v1beta') to show non-deprecated routes at that level, or 'deprecated' to show deprecated routes across all levels. If not specified, returns all non-deprecated routes.",
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
@json_schema_type
|
|
107
|
+
class InspectProviderRequest(BaseModel):
|
|
108
|
+
"""Request to inspect a specific provider.
|
|
109
|
+
|
|
110
|
+
:param provider_id: The ID of the provider to inspect
|
|
111
|
+
"""
|
|
112
|
+
|
|
113
|
+
provider_id: str = Field(..., description="The ID of the provider to inspect.")
|
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
from collections.abc import AsyncIterator
|
|
8
|
+
from enum import StrEnum
|
|
9
|
+
from typing import Annotated, Protocol, runtime_checkable
|
|
10
|
+
|
|
11
|
+
from pydantic import BaseModel
|
|
12
|
+
|
|
13
|
+
from llama_stack_api.common.responses import Order
|
|
14
|
+
from llama_stack_api.schema_utils import ExtraBodyField, json_schema_type, webmethod
|
|
15
|
+
from llama_stack_api.version import LLAMA_STACK_API_V1
|
|
16
|
+
|
|
17
|
+
from .openai_responses import (
|
|
18
|
+
ListOpenAIResponseInputItem,
|
|
19
|
+
ListOpenAIResponseObject,
|
|
20
|
+
OpenAIDeleteResponseObject,
|
|
21
|
+
OpenAIResponseInput,
|
|
22
|
+
OpenAIResponseInputTool,
|
|
23
|
+
OpenAIResponseInputToolChoice,
|
|
24
|
+
OpenAIResponseObject,
|
|
25
|
+
OpenAIResponseObjectStream,
|
|
26
|
+
OpenAIResponsePrompt,
|
|
27
|
+
OpenAIResponseText,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@json_schema_type
|
|
32
|
+
class ResponseGuardrailSpec(BaseModel):
|
|
33
|
+
"""Specification for a guardrail to apply during response generation.
|
|
34
|
+
|
|
35
|
+
:param type: The type/identifier of the guardrail.
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
type: str
|
|
39
|
+
# TODO: more fields to be added for guardrail configuration
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
ResponseGuardrail = str | ResponseGuardrailSpec
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class ResponseItemInclude(StrEnum):
|
|
46
|
+
"""
|
|
47
|
+
Specify additional output data to include in the model response.
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
web_search_call_action_sources = "web_search_call.action.sources"
|
|
51
|
+
code_interpreter_call_outputs = "code_interpreter_call.outputs"
|
|
52
|
+
computer_call_output_output_image_url = "computer_call_output.output.image_url"
|
|
53
|
+
file_search_call_results = "file_search_call.results"
|
|
54
|
+
message_input_image_image_url = "message.input_image.image_url"
|
|
55
|
+
message_output_text_logprobs = "message.output_text.logprobs"
|
|
56
|
+
reasoning_encrypted_content = "reasoning.encrypted_content"
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
@runtime_checkable
|
|
60
|
+
class Agents(Protocol):
|
|
61
|
+
"""Agents
|
|
62
|
+
|
|
63
|
+
APIs for creating and interacting with agentic systems."""
|
|
64
|
+
|
|
65
|
+
# We situate the OpenAI Responses API in the Agents API just like we did things
|
|
66
|
+
# for Inference. The Responses API, in its intent, serves the same purpose as
|
|
67
|
+
# the Agents API above -- it is essentially a lightweight "agentic loop" with
|
|
68
|
+
# integrated tool calling.
|
|
69
|
+
#
|
|
70
|
+
# Both of these APIs are inherently stateful.
|
|
71
|
+
|
|
72
|
+
@webmethod(route="/responses/{response_id}", method="GET", level=LLAMA_STACK_API_V1)
|
|
73
|
+
async def get_openai_response(
|
|
74
|
+
self,
|
|
75
|
+
response_id: str,
|
|
76
|
+
) -> OpenAIResponseObject:
|
|
77
|
+
"""Get a model response.
|
|
78
|
+
|
|
79
|
+
:param response_id: The ID of the OpenAI response to retrieve.
|
|
80
|
+
:returns: An OpenAIResponseObject.
|
|
81
|
+
"""
|
|
82
|
+
...
|
|
83
|
+
|
|
84
|
+
@webmethod(route="/responses", method="POST", level=LLAMA_STACK_API_V1)
|
|
85
|
+
async def create_openai_response(
|
|
86
|
+
self,
|
|
87
|
+
input: str | list[OpenAIResponseInput],
|
|
88
|
+
model: str,
|
|
89
|
+
prompt: OpenAIResponsePrompt | None = None,
|
|
90
|
+
instructions: str | None = None,
|
|
91
|
+
parallel_tool_calls: bool | None = True,
|
|
92
|
+
previous_response_id: str | None = None,
|
|
93
|
+
conversation: str | None = None,
|
|
94
|
+
store: bool | None = True,
|
|
95
|
+
stream: bool | None = False,
|
|
96
|
+
temperature: float | None = None,
|
|
97
|
+
text: OpenAIResponseText | None = None,
|
|
98
|
+
tool_choice: OpenAIResponseInputToolChoice | None = None,
|
|
99
|
+
tools: list[OpenAIResponseInputTool] | None = None,
|
|
100
|
+
include: list[ResponseItemInclude] | None = None,
|
|
101
|
+
max_infer_iters: int | None = 10, # this is an extension to the OpenAI API
|
|
102
|
+
guardrails: Annotated[
|
|
103
|
+
list[ResponseGuardrail] | None,
|
|
104
|
+
ExtraBodyField(
|
|
105
|
+
"List of guardrails to apply during response generation. Guardrails provide safety and content moderation."
|
|
106
|
+
),
|
|
107
|
+
] = None,
|
|
108
|
+
max_tool_calls: int | None = None,
|
|
109
|
+
metadata: dict[str, str] | None = None,
|
|
110
|
+
) -> OpenAIResponseObject | AsyncIterator[OpenAIResponseObjectStream]:
|
|
111
|
+
"""Create a model response.
|
|
112
|
+
|
|
113
|
+
:param input: Input message(s) to create the response.
|
|
114
|
+
:param model: The underlying LLM used for completions.
|
|
115
|
+
:param prompt: (Optional) Prompt object with ID, version, and variables.
|
|
116
|
+
:param previous_response_id: (Optional) if specified, the new response will be a continuation of the previous response. This can be used to easily fork-off new responses from existing responses.
|
|
117
|
+
:param conversation: (Optional) The ID of a conversation to add the response to. Must begin with 'conv_'. Input and output messages will be automatically added to the conversation.
|
|
118
|
+
:param include: (Optional) Additional fields to include in the response.
|
|
119
|
+
:param guardrails: (Optional) List of guardrails to apply during response generation. Can be guardrail IDs (strings) or guardrail specifications.
|
|
120
|
+
:param max_tool_calls: (Optional) Max number of total calls to built-in tools that can be processed in a response.
|
|
121
|
+
:param metadata: (Optional) Dictionary of metadata key-value pairs to attach to the response.
|
|
122
|
+
:returns: An OpenAIResponseObject.
|
|
123
|
+
"""
|
|
124
|
+
...
|
|
125
|
+
|
|
126
|
+
@webmethod(route="/responses", method="GET", level=LLAMA_STACK_API_V1)
|
|
127
|
+
async def list_openai_responses(
|
|
128
|
+
self,
|
|
129
|
+
after: str | None = None,
|
|
130
|
+
limit: int | None = 50,
|
|
131
|
+
model: str | None = None,
|
|
132
|
+
order: Order | None = Order.desc,
|
|
133
|
+
) -> ListOpenAIResponseObject:
|
|
134
|
+
"""List all responses.
|
|
135
|
+
|
|
136
|
+
:param after: The ID of the last response to return.
|
|
137
|
+
:param limit: The number of responses to return.
|
|
138
|
+
:param model: The model to filter responses by.
|
|
139
|
+
:param order: The order to sort responses by when sorted by created_at ('asc' or 'desc').
|
|
140
|
+
:returns: A ListOpenAIResponseObject.
|
|
141
|
+
"""
|
|
142
|
+
...
|
|
143
|
+
|
|
144
|
+
@webmethod(route="/responses/{response_id}/input_items", method="GET", level=LLAMA_STACK_API_V1)
|
|
145
|
+
async def list_openai_response_input_items(
|
|
146
|
+
self,
|
|
147
|
+
response_id: str,
|
|
148
|
+
after: str | None = None,
|
|
149
|
+
before: str | None = None,
|
|
150
|
+
include: list[str] | None = None,
|
|
151
|
+
limit: int | None = 20,
|
|
152
|
+
order: Order | None = Order.desc,
|
|
153
|
+
) -> ListOpenAIResponseInputItem:
|
|
154
|
+
"""List input items.
|
|
155
|
+
|
|
156
|
+
:param response_id: The ID of the response to retrieve input items for.
|
|
157
|
+
:param after: An item ID to list items after, used for pagination.
|
|
158
|
+
:param before: An item ID to list items before, used for pagination.
|
|
159
|
+
:param include: Additional fields to include in the response.
|
|
160
|
+
:param limit: A limit on the number of objects to be returned. Limit can range between 1 and 100, and the default is 20.
|
|
161
|
+
:param order: The order to return the input items in. Default is desc.
|
|
162
|
+
:returns: An ListOpenAIResponseInputItem.
|
|
163
|
+
"""
|
|
164
|
+
...
|
|
165
|
+
|
|
166
|
+
@webmethod(route="/responses/{response_id}", method="DELETE", level=LLAMA_STACK_API_V1)
|
|
167
|
+
async def delete_openai_response(self, response_id: str) -> OpenAIDeleteResponseObject:
|
|
168
|
+
"""Delete a response.
|
|
169
|
+
|
|
170
|
+
:param response_id: The ID of the OpenAI response to delete.
|
|
171
|
+
:returns: An OpenAIDeleteResponseObject
|
|
172
|
+
"""
|
|
173
|
+
...
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
"""Batches API protocol and models.
|
|
8
|
+
|
|
9
|
+
This module contains the Batches protocol definition.
|
|
10
|
+
Pydantic models are defined in llama_stack_api.batches.models.
|
|
11
|
+
The FastAPI router is defined in llama_stack_api.batches.fastapi_routes.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from openai.types import Batch as BatchObject
|
|
15
|
+
|
|
16
|
+
# Import fastapi_routes for router factory access
|
|
17
|
+
from . import fastapi_routes
|
|
18
|
+
|
|
19
|
+
# Import protocol for re-export
|
|
20
|
+
from .api import Batches
|
|
21
|
+
|
|
22
|
+
# Import models for re-export
|
|
23
|
+
from .models import (
|
|
24
|
+
CancelBatchRequest,
|
|
25
|
+
CreateBatchRequest,
|
|
26
|
+
ListBatchesRequest,
|
|
27
|
+
ListBatchesResponse,
|
|
28
|
+
RetrieveBatchRequest,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
__all__ = [
|
|
32
|
+
"Batches",
|
|
33
|
+
"BatchObject",
|
|
34
|
+
"CancelBatchRequest",
|
|
35
|
+
"CreateBatchRequest",
|
|
36
|
+
"ListBatchesRequest",
|
|
37
|
+
"ListBatchesResponse",
|
|
38
|
+
"RetrieveBatchRequest",
|
|
39
|
+
"fastapi_routes",
|
|
40
|
+
]
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
from typing import Protocol, runtime_checkable
|
|
8
|
+
|
|
9
|
+
from openai.types import Batch as BatchObject
|
|
10
|
+
|
|
11
|
+
from .models import (
|
|
12
|
+
CancelBatchRequest,
|
|
13
|
+
CreateBatchRequest,
|
|
14
|
+
ListBatchesRequest,
|
|
15
|
+
ListBatchesResponse,
|
|
16
|
+
RetrieveBatchRequest,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@runtime_checkable
|
|
21
|
+
class Batches(Protocol):
|
|
22
|
+
"""
|
|
23
|
+
The Batches API enables efficient processing of multiple requests in a single operation,
|
|
24
|
+
particularly useful for processing large datasets, batch evaluation workflows, and
|
|
25
|
+
cost-effective inference at scale.
|
|
26
|
+
|
|
27
|
+
The API is designed to allow use of openai client libraries for seamless integration.
|
|
28
|
+
|
|
29
|
+
This API provides the following extensions:
|
|
30
|
+
- idempotent batch creation
|
|
31
|
+
|
|
32
|
+
Note: This API is currently under active development and may undergo changes.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
async def create_batch(
|
|
36
|
+
self,
|
|
37
|
+
request: CreateBatchRequest,
|
|
38
|
+
) -> BatchObject: ...
|
|
39
|
+
|
|
40
|
+
async def retrieve_batch(
|
|
41
|
+
self,
|
|
42
|
+
request: RetrieveBatchRequest,
|
|
43
|
+
) -> BatchObject: ...
|
|
44
|
+
|
|
45
|
+
async def cancel_batch(
|
|
46
|
+
self,
|
|
47
|
+
request: CancelBatchRequest,
|
|
48
|
+
) -> BatchObject: ...
|
|
49
|
+
|
|
50
|
+
async def list_batches(
|
|
51
|
+
self,
|
|
52
|
+
request: ListBatchesRequest,
|
|
53
|
+
) -> ListBatchesResponse: ...
|