llama-stack-api 0.4.3__py3-none-any.whl → 0.5.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack_api/__init__.py +1100 -0
- llama_stack_api/admin/__init__.py +45 -0
- llama_stack_api/admin/api.py +72 -0
- llama_stack_api/admin/fastapi_routes.py +117 -0
- llama_stack_api/admin/models.py +113 -0
- llama_stack_api/agents/__init__.py +38 -0
- llama_stack_api/agents/api.py +52 -0
- llama_stack_api/agents/fastapi_routes.py +268 -0
- llama_stack_api/agents/models.py +181 -0
- llama_stack_api/batches/__init__.py +40 -0
- llama_stack_api/batches/api.py +53 -0
- llama_stack_api/batches/fastapi_routes.py +113 -0
- llama_stack_api/batches/models.py +78 -0
- llama_stack_api/benchmarks/__init__.py +43 -0
- llama_stack_api/benchmarks/api.py +39 -0
- llama_stack_api/benchmarks/fastapi_routes.py +109 -0
- llama_stack_api/benchmarks/models.py +109 -0
- llama_stack_api/common/__init__.py +5 -0
- llama_stack_api/common/content_types.py +101 -0
- llama_stack_api/common/errors.py +110 -0
- llama_stack_api/common/job_types.py +38 -0
- llama_stack_api/common/responses.py +77 -0
- llama_stack_api/common/training_types.py +47 -0
- llama_stack_api/common/type_system.py +146 -0
- llama_stack_api/connectors/__init__.py +38 -0
- llama_stack_api/connectors/api.py +50 -0
- llama_stack_api/connectors/fastapi_routes.py +103 -0
- llama_stack_api/connectors/models.py +103 -0
- llama_stack_api/conversations/__init__.py +61 -0
- llama_stack_api/conversations/api.py +44 -0
- llama_stack_api/conversations/fastapi_routes.py +177 -0
- llama_stack_api/conversations/models.py +245 -0
- llama_stack_api/datasetio/__init__.py +34 -0
- llama_stack_api/datasetio/api.py +42 -0
- llama_stack_api/datasetio/fastapi_routes.py +94 -0
- llama_stack_api/datasetio/models.py +48 -0
- llama_stack_api/datasets/__init__.py +61 -0
- llama_stack_api/datasets/api.py +35 -0
- llama_stack_api/datasets/fastapi_routes.py +104 -0
- llama_stack_api/datasets/models.py +152 -0
- llama_stack_api/datatypes.py +373 -0
- llama_stack_api/eval/__init__.py +55 -0
- llama_stack_api/eval/api.py +51 -0
- llama_stack_api/eval/compat.py +300 -0
- llama_stack_api/eval/fastapi_routes.py +126 -0
- llama_stack_api/eval/models.py +141 -0
- llama_stack_api/file_processors/__init__.py +27 -0
- llama_stack_api/file_processors/api.py +64 -0
- llama_stack_api/file_processors/fastapi_routes.py +78 -0
- llama_stack_api/file_processors/models.py +42 -0
- llama_stack_api/files/__init__.py +35 -0
- llama_stack_api/files/api.py +51 -0
- llama_stack_api/files/fastapi_routes.py +124 -0
- llama_stack_api/files/models.py +107 -0
- llama_stack_api/inference/__init__.py +207 -0
- llama_stack_api/inference/api.py +93 -0
- llama_stack_api/inference/fastapi_routes.py +243 -0
- llama_stack_api/inference/models.py +1035 -0
- llama_stack_api/inspect_api/__init__.py +37 -0
- llama_stack_api/inspect_api/api.py +25 -0
- llama_stack_api/inspect_api/fastapi_routes.py +76 -0
- llama_stack_api/inspect_api/models.py +28 -0
- llama_stack_api/internal/__init__.py +9 -0
- llama_stack_api/internal/kvstore.py +28 -0
- llama_stack_api/internal/sqlstore.py +81 -0
- llama_stack_api/models/__init__.py +47 -0
- llama_stack_api/models/api.py +38 -0
- llama_stack_api/models/fastapi_routes.py +104 -0
- llama_stack_api/models/models.py +157 -0
- llama_stack_api/openai_responses.py +1494 -0
- llama_stack_api/post_training/__init__.py +73 -0
- llama_stack_api/post_training/api.py +36 -0
- llama_stack_api/post_training/fastapi_routes.py +116 -0
- llama_stack_api/post_training/models.py +339 -0
- llama_stack_api/prompts/__init__.py +47 -0
- llama_stack_api/prompts/api.py +44 -0
- llama_stack_api/prompts/fastapi_routes.py +163 -0
- llama_stack_api/prompts/models.py +177 -0
- llama_stack_api/providers/__init__.py +33 -0
- llama_stack_api/providers/api.py +16 -0
- llama_stack_api/providers/fastapi_routes.py +57 -0
- llama_stack_api/providers/models.py +24 -0
- llama_stack_api/rag_tool.py +168 -0
- llama_stack_api/resource.py +36 -0
- llama_stack_api/router_utils.py +160 -0
- llama_stack_api/safety/__init__.py +37 -0
- llama_stack_api/safety/api.py +29 -0
- llama_stack_api/safety/datatypes.py +83 -0
- llama_stack_api/safety/fastapi_routes.py +55 -0
- llama_stack_api/safety/models.py +38 -0
- llama_stack_api/schema_utils.py +251 -0
- llama_stack_api/scoring/__init__.py +66 -0
- llama_stack_api/scoring/api.py +35 -0
- llama_stack_api/scoring/fastapi_routes.py +67 -0
- llama_stack_api/scoring/models.py +81 -0
- llama_stack_api/scoring_functions/__init__.py +50 -0
- llama_stack_api/scoring_functions/api.py +39 -0
- llama_stack_api/scoring_functions/fastapi_routes.py +108 -0
- llama_stack_api/scoring_functions/models.py +214 -0
- llama_stack_api/shields/__init__.py +41 -0
- llama_stack_api/shields/api.py +39 -0
- llama_stack_api/shields/fastapi_routes.py +104 -0
- llama_stack_api/shields/models.py +74 -0
- llama_stack_api/tools.py +226 -0
- llama_stack_api/validators.py +46 -0
- llama_stack_api/vector_io/__init__.py +88 -0
- llama_stack_api/vector_io/api.py +234 -0
- llama_stack_api/vector_io/fastapi_routes.py +447 -0
- llama_stack_api/vector_io/models.py +663 -0
- llama_stack_api/vector_stores.py +53 -0
- llama_stack_api/version.py +9 -0
- {llama_stack_api-0.4.3.dist-info → llama_stack_api-0.5.0rc1.dist-info}/METADATA +1 -1
- llama_stack_api-0.5.0rc1.dist-info/RECORD +115 -0
- llama_stack_api-0.5.0rc1.dist-info/top_level.txt +1 -0
- llama_stack_api-0.4.3.dist-info/RECORD +0 -4
- llama_stack_api-0.4.3.dist-info/top_level.txt +0 -1
- {llama_stack_api-0.4.3.dist-info → llama_stack_api-0.5.0rc1.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
"""FastAPI router for the Prompts API.
|
|
8
|
+
|
|
9
|
+
This module defines the FastAPI router for the Prompts API using standard
|
|
10
|
+
FastAPI route decorators.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from typing import Annotated
|
|
14
|
+
|
|
15
|
+
from fastapi import APIRouter, Body, Depends, Path, Query
|
|
16
|
+
|
|
17
|
+
from llama_stack_api.router_utils import create_path_dependency, standard_responses
|
|
18
|
+
from llama_stack_api.version import LLAMA_STACK_API_V1
|
|
19
|
+
|
|
20
|
+
from .api import Prompts
|
|
21
|
+
from .models import (
|
|
22
|
+
CreatePromptRequest,
|
|
23
|
+
DeletePromptRequest,
|
|
24
|
+
GetPromptRequest,
|
|
25
|
+
ListPromptsResponse,
|
|
26
|
+
ListPromptVersionsRequest,
|
|
27
|
+
Prompt,
|
|
28
|
+
SetDefaultVersionBodyRequest,
|
|
29
|
+
SetDefaultVersionRequest,
|
|
30
|
+
UpdatePromptBodyRequest,
|
|
31
|
+
UpdatePromptRequest,
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
# Path parameter dependencies for single-field models
|
|
35
|
+
list_prompt_versions_request = create_path_dependency(ListPromptVersionsRequest)
|
|
36
|
+
delete_prompt_request = create_path_dependency(DeletePromptRequest)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def create_router(impl: Prompts) -> APIRouter:
|
|
40
|
+
"""Create a FastAPI router for the Prompts API.
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
impl: The Prompts implementation instance
|
|
44
|
+
|
|
45
|
+
Returns:
|
|
46
|
+
APIRouter configured for the Prompts API
|
|
47
|
+
"""
|
|
48
|
+
router = APIRouter(
|
|
49
|
+
prefix=f"/{LLAMA_STACK_API_V1}",
|
|
50
|
+
tags=["Prompts"],
|
|
51
|
+
responses=standard_responses,
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
@router.get(
|
|
55
|
+
"/prompts",
|
|
56
|
+
response_model=ListPromptsResponse,
|
|
57
|
+
summary="List all prompts.",
|
|
58
|
+
description="List all prompts.",
|
|
59
|
+
responses={
|
|
60
|
+
200: {"description": "A ListPromptsResponse containing all prompts."},
|
|
61
|
+
},
|
|
62
|
+
)
|
|
63
|
+
async def list_prompts() -> ListPromptsResponse:
|
|
64
|
+
return await impl.list_prompts()
|
|
65
|
+
|
|
66
|
+
@router.get(
|
|
67
|
+
"/prompts/{prompt_id}/versions",
|
|
68
|
+
response_model=ListPromptsResponse,
|
|
69
|
+
summary="List prompt versions.",
|
|
70
|
+
description="List all versions of a specific prompt.",
|
|
71
|
+
responses={
|
|
72
|
+
200: {"description": "A ListPromptsResponse containing all versions of the prompt."},
|
|
73
|
+
},
|
|
74
|
+
)
|
|
75
|
+
async def list_prompt_versions(
|
|
76
|
+
request: Annotated[ListPromptVersionsRequest, Depends(list_prompt_versions_request)],
|
|
77
|
+
) -> ListPromptsResponse:
|
|
78
|
+
return await impl.list_prompt_versions(request)
|
|
79
|
+
|
|
80
|
+
@router.get(
|
|
81
|
+
"/prompts/{prompt_id}",
|
|
82
|
+
response_model=Prompt,
|
|
83
|
+
summary="Get a prompt.",
|
|
84
|
+
description="Get a prompt by its identifier and optional version.",
|
|
85
|
+
responses={
|
|
86
|
+
200: {"description": "A Prompt resource."},
|
|
87
|
+
},
|
|
88
|
+
)
|
|
89
|
+
async def get_prompt(
|
|
90
|
+
prompt_id: Annotated[str, Path(description="The identifier of the prompt to get.")],
|
|
91
|
+
version: Annotated[
|
|
92
|
+
int | None, Query(description="The version of the prompt to get (defaults to latest).")
|
|
93
|
+
] = None,
|
|
94
|
+
) -> Prompt:
|
|
95
|
+
request = GetPromptRequest(prompt_id=prompt_id, version=version)
|
|
96
|
+
return await impl.get_prompt(request)
|
|
97
|
+
|
|
98
|
+
@router.post(
|
|
99
|
+
"/prompts",
|
|
100
|
+
response_model=Prompt,
|
|
101
|
+
summary="Create a prompt.",
|
|
102
|
+
description="Create a new prompt.",
|
|
103
|
+
responses={
|
|
104
|
+
200: {"description": "The created Prompt resource."},
|
|
105
|
+
},
|
|
106
|
+
)
|
|
107
|
+
async def create_prompt(
|
|
108
|
+
request: Annotated[CreatePromptRequest, Body(...)],
|
|
109
|
+
) -> Prompt:
|
|
110
|
+
return await impl.create_prompt(request)
|
|
111
|
+
|
|
112
|
+
@router.put(
|
|
113
|
+
"/prompts/{prompt_id}",
|
|
114
|
+
response_model=Prompt,
|
|
115
|
+
summary="Update a prompt.",
|
|
116
|
+
description="Update an existing prompt (increments version).",
|
|
117
|
+
responses={
|
|
118
|
+
200: {"description": "The updated Prompt resource with incremented version."},
|
|
119
|
+
},
|
|
120
|
+
)
|
|
121
|
+
async def update_prompt(
|
|
122
|
+
prompt_id: Annotated[str, Path(description="The identifier of the prompt to update.")],
|
|
123
|
+
body: Annotated[UpdatePromptBodyRequest, Body(...)],
|
|
124
|
+
) -> Prompt:
|
|
125
|
+
request = UpdatePromptRequest(
|
|
126
|
+
prompt_id=prompt_id,
|
|
127
|
+
prompt=body.prompt,
|
|
128
|
+
version=body.version,
|
|
129
|
+
variables=body.variables,
|
|
130
|
+
set_as_default=body.set_as_default,
|
|
131
|
+
)
|
|
132
|
+
return await impl.update_prompt(request)
|
|
133
|
+
|
|
134
|
+
@router.delete(
|
|
135
|
+
"/prompts/{prompt_id}",
|
|
136
|
+
summary="Delete a prompt.",
|
|
137
|
+
description="Delete a prompt.",
|
|
138
|
+
responses={
|
|
139
|
+
200: {"description": "The prompt was successfully deleted."},
|
|
140
|
+
},
|
|
141
|
+
)
|
|
142
|
+
async def delete_prompt(
|
|
143
|
+
request: Annotated[DeletePromptRequest, Depends(delete_prompt_request)],
|
|
144
|
+
) -> None:
|
|
145
|
+
return await impl.delete_prompt(request)
|
|
146
|
+
|
|
147
|
+
@router.put(
|
|
148
|
+
"/prompts/{prompt_id}/set-default-version",
|
|
149
|
+
response_model=Prompt,
|
|
150
|
+
summary="Set prompt version.",
|
|
151
|
+
description="Set which version of a prompt should be the default in get_prompt (latest).",
|
|
152
|
+
responses={
|
|
153
|
+
200: {"description": "The prompt with the specified version now set as default."},
|
|
154
|
+
},
|
|
155
|
+
)
|
|
156
|
+
async def set_default_version(
|
|
157
|
+
prompt_id: Annotated[str, Path(description="The identifier of the prompt.")],
|
|
158
|
+
body: Annotated[SetDefaultVersionBodyRequest, Body(...)],
|
|
159
|
+
) -> Prompt:
|
|
160
|
+
request = SetDefaultVersionRequest(prompt_id=prompt_id, version=body.version)
|
|
161
|
+
return await impl.set_default_version(request)
|
|
162
|
+
|
|
163
|
+
return router
|
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
"""Pydantic models for Prompts API requests and responses.
|
|
8
|
+
|
|
9
|
+
This module defines the request and response models for the Prompts API
|
|
10
|
+
using Pydantic with Field descriptions for OpenAPI schema generation.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import re
|
|
14
|
+
import secrets
|
|
15
|
+
|
|
16
|
+
from pydantic import BaseModel, Field, field_validator, model_validator
|
|
17
|
+
|
|
18
|
+
from llama_stack_api.schema_utils import json_schema_type
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@json_schema_type
|
|
22
|
+
class Prompt(BaseModel):
|
|
23
|
+
"""A prompt resource representing a stored OpenAI Compatible prompt template in Llama Stack."""
|
|
24
|
+
|
|
25
|
+
prompt: str | None = Field(default=None, description="The system prompt with variable placeholders")
|
|
26
|
+
version: int = Field(description="Version (integer starting at 1, incremented on save)", ge=1)
|
|
27
|
+
prompt_id: str = Field(description="Unique identifier in format 'pmpt_<48-digit-hash>'")
|
|
28
|
+
variables: list[str] = Field(
|
|
29
|
+
default_factory=list, description="List of variable names that can be used in the prompt template"
|
|
30
|
+
)
|
|
31
|
+
is_default: bool = Field(
|
|
32
|
+
default=False, description="Boolean indicating whether this version is the default version"
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
@field_validator("prompt_id")
|
|
36
|
+
@classmethod
|
|
37
|
+
def validate_prompt_id(cls, prompt_id: str) -> str:
|
|
38
|
+
if not isinstance(prompt_id, str):
|
|
39
|
+
raise TypeError("prompt_id must be a string in format 'pmpt_<48-digit-hash>'")
|
|
40
|
+
|
|
41
|
+
if not prompt_id.startswith("pmpt_"):
|
|
42
|
+
raise ValueError("prompt_id must start with 'pmpt_' prefix")
|
|
43
|
+
|
|
44
|
+
hex_part = prompt_id[5:]
|
|
45
|
+
if len(hex_part) != 48:
|
|
46
|
+
raise ValueError("prompt_id must be in format 'pmpt_<48-digit-hash>' (48 lowercase hex chars)")
|
|
47
|
+
|
|
48
|
+
for char in hex_part:
|
|
49
|
+
if char not in "0123456789abcdef":
|
|
50
|
+
raise ValueError("prompt_id hex part must contain only lowercase hex characters [0-9a-f]")
|
|
51
|
+
|
|
52
|
+
return prompt_id
|
|
53
|
+
|
|
54
|
+
@field_validator("version")
|
|
55
|
+
@classmethod
|
|
56
|
+
def validate_version(cls, prompt_version: int) -> int:
|
|
57
|
+
if prompt_version < 1:
|
|
58
|
+
raise ValueError("version must be >= 1")
|
|
59
|
+
return prompt_version
|
|
60
|
+
|
|
61
|
+
@model_validator(mode="after")
|
|
62
|
+
def validate_prompt_variables(self):
|
|
63
|
+
"""Validate that all variables used in the prompt are declared in the variables list."""
|
|
64
|
+
if not self.prompt:
|
|
65
|
+
return self
|
|
66
|
+
|
|
67
|
+
prompt_variables = set(re.findall(r"{{\s*(\w+)\s*}}", self.prompt))
|
|
68
|
+
declared_variables = set(self.variables)
|
|
69
|
+
|
|
70
|
+
undeclared = prompt_variables - declared_variables
|
|
71
|
+
if undeclared:
|
|
72
|
+
raise ValueError(f"Prompt contains undeclared variables: {sorted(undeclared)}")
|
|
73
|
+
|
|
74
|
+
return self
|
|
75
|
+
|
|
76
|
+
@classmethod
|
|
77
|
+
def generate_prompt_id(cls) -> str:
|
|
78
|
+
# Generate 48 hex characters (24 bytes)
|
|
79
|
+
random_bytes = secrets.token_bytes(24)
|
|
80
|
+
hex_string = random_bytes.hex()
|
|
81
|
+
return f"pmpt_{hex_string}"
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
@json_schema_type
|
|
85
|
+
class ListPromptsResponse(BaseModel):
|
|
86
|
+
"""Response model to list prompts."""
|
|
87
|
+
|
|
88
|
+
data: list[Prompt]
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
# Request models for each endpoint
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
@json_schema_type
|
|
95
|
+
class ListPromptVersionsRequest(BaseModel):
|
|
96
|
+
"""Request model for listing all versions of a prompt."""
|
|
97
|
+
|
|
98
|
+
prompt_id: str = Field(..., description="The identifier of the prompt to list versions for.")
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
@json_schema_type
|
|
102
|
+
class GetPromptRequest(BaseModel):
|
|
103
|
+
"""Request model for getting a prompt by ID and optional version."""
|
|
104
|
+
|
|
105
|
+
prompt_id: str = Field(..., description="The identifier of the prompt to get.")
|
|
106
|
+
version: int | None = Field(default=None, description="The version of the prompt to get (defaults to latest).")
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
@json_schema_type
|
|
110
|
+
class CreatePromptRequest(BaseModel):
|
|
111
|
+
"""Request model for creating a new prompt."""
|
|
112
|
+
|
|
113
|
+
prompt: str = Field(..., description="The prompt text content with variable placeholders.")
|
|
114
|
+
variables: list[str] | None = Field(
|
|
115
|
+
default=None, description="List of variable names that can be used in the prompt template."
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
@json_schema_type
|
|
120
|
+
class UpdatePromptBodyRequest(BaseModel):
|
|
121
|
+
"""Request body model for updating a prompt."""
|
|
122
|
+
|
|
123
|
+
prompt: str = Field(..., description="The updated prompt text content.")
|
|
124
|
+
version: int = Field(..., description="The current version of the prompt being updated.")
|
|
125
|
+
variables: list[str] | None = Field(
|
|
126
|
+
default=None, description="Updated list of variable names that can be used in the prompt template."
|
|
127
|
+
)
|
|
128
|
+
set_as_default: bool = Field(default=True, description="Set the new version as the default (default=True).")
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
@json_schema_type
|
|
132
|
+
class UpdatePromptRequest(BaseModel):
|
|
133
|
+
"""Request model for updating a prompt (combines path and body parameters)."""
|
|
134
|
+
|
|
135
|
+
prompt_id: str = Field(..., description="The identifier of the prompt to update.")
|
|
136
|
+
prompt: str = Field(..., description="The updated prompt text content.")
|
|
137
|
+
version: int = Field(..., description="The current version of the prompt being updated.")
|
|
138
|
+
variables: list[str] | None = Field(
|
|
139
|
+
default=None, description="Updated list of variable names that can be used in the prompt template."
|
|
140
|
+
)
|
|
141
|
+
set_as_default: bool = Field(default=True, description="Set the new version as the default (default=True).")
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
@json_schema_type
|
|
145
|
+
class DeletePromptRequest(BaseModel):
|
|
146
|
+
"""Request model for deleting a prompt."""
|
|
147
|
+
|
|
148
|
+
prompt_id: str = Field(..., description="The identifier of the prompt to delete.")
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
@json_schema_type
|
|
152
|
+
class SetDefaultVersionBodyRequest(BaseModel):
|
|
153
|
+
"""Request body model for setting the default version of a prompt."""
|
|
154
|
+
|
|
155
|
+
version: int = Field(..., description="The version to set as default.")
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
@json_schema_type
|
|
159
|
+
class SetDefaultVersionRequest(BaseModel):
|
|
160
|
+
"""Request model for setting the default version of a prompt (combines path and body parameters)."""
|
|
161
|
+
|
|
162
|
+
prompt_id: str = Field(..., description="The identifier of the prompt.")
|
|
163
|
+
version: int = Field(..., description="The version to set as default.")
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
__all__ = [
|
|
167
|
+
"CreatePromptRequest",
|
|
168
|
+
"DeletePromptRequest",
|
|
169
|
+
"GetPromptRequest",
|
|
170
|
+
"ListPromptVersionsRequest",
|
|
171
|
+
"ListPromptsResponse",
|
|
172
|
+
"Prompt",
|
|
173
|
+
"SetDefaultVersionBodyRequest",
|
|
174
|
+
"SetDefaultVersionRequest",
|
|
175
|
+
"UpdatePromptBodyRequest",
|
|
176
|
+
"UpdatePromptRequest",
|
|
177
|
+
]
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
"""Providers API protocol and models.
|
|
8
|
+
|
|
9
|
+
This module contains the Providers protocol definition.
|
|
10
|
+
Pydantic models are defined in llama_stack_api.providers.models.
|
|
11
|
+
The FastAPI router is defined in llama_stack_api.providers.fastapi_routes.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
# Import fastapi_routes for router factory access
|
|
15
|
+
from . import fastapi_routes
|
|
16
|
+
|
|
17
|
+
# Import protocol for re-export
|
|
18
|
+
from .api import Providers
|
|
19
|
+
|
|
20
|
+
# Import models for re-export
|
|
21
|
+
from .models import (
|
|
22
|
+
InspectProviderRequest,
|
|
23
|
+
ListProvidersResponse,
|
|
24
|
+
ProviderInfo,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
__all__ = [
|
|
28
|
+
"Providers",
|
|
29
|
+
"ProviderInfo",
|
|
30
|
+
"ListProvidersResponse",
|
|
31
|
+
"InspectProviderRequest",
|
|
32
|
+
"fastapi_routes",
|
|
33
|
+
]
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
from typing import Protocol, runtime_checkable
|
|
8
|
+
|
|
9
|
+
from .models import InspectProviderRequest, ListProvidersResponse, ProviderInfo
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@runtime_checkable
|
|
13
|
+
class Providers(Protocol):
|
|
14
|
+
async def list_providers(self) -> ListProvidersResponse: ...
|
|
15
|
+
|
|
16
|
+
async def inspect_provider(self, request: InspectProviderRequest) -> ProviderInfo: ...
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
"""FastAPI router for the Providers API.
|
|
8
|
+
|
|
9
|
+
This module defines the FastAPI router for the Providers API using standard
|
|
10
|
+
FastAPI route decorators.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from typing import Annotated
|
|
14
|
+
|
|
15
|
+
from fastapi import APIRouter, Depends
|
|
16
|
+
|
|
17
|
+
from llama_stack_api.router_utils import create_path_dependency, standard_responses
|
|
18
|
+
from llama_stack_api.version import LLAMA_STACK_API_V1
|
|
19
|
+
|
|
20
|
+
from .api import Providers
|
|
21
|
+
from .models import InspectProviderRequest, ListProvidersResponse, ProviderInfo
|
|
22
|
+
|
|
23
|
+
# Path parameter dependencies for single-field models
|
|
24
|
+
get_inspect_provider_request = create_path_dependency(InspectProviderRequest)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def create_router(impl: Providers) -> APIRouter:
|
|
28
|
+
"""Create a FastAPI router for the Providers API."""
|
|
29
|
+
router = APIRouter(
|
|
30
|
+
prefix=f"/{LLAMA_STACK_API_V1}",
|
|
31
|
+
tags=["Providers"],
|
|
32
|
+
responses=standard_responses,
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
@router.get(
|
|
36
|
+
"/providers",
|
|
37
|
+
response_model=ListProvidersResponse,
|
|
38
|
+
summary="List providers.",
|
|
39
|
+
description="List all available providers.",
|
|
40
|
+
responses={200: {"description": "A ListProvidersResponse containing information about all providers."}},
|
|
41
|
+
)
|
|
42
|
+
async def list_providers() -> ListProvidersResponse:
|
|
43
|
+
return await impl.list_providers()
|
|
44
|
+
|
|
45
|
+
@router.get(
|
|
46
|
+
"/providers/{provider_id}",
|
|
47
|
+
response_model=ProviderInfo,
|
|
48
|
+
summary="Get provider.",
|
|
49
|
+
description="Get detailed information about a specific provider.",
|
|
50
|
+
responses={200: {"description": "A ProviderInfo object containing the provider's details."}},
|
|
51
|
+
)
|
|
52
|
+
async def inspect_provider(
|
|
53
|
+
request: Annotated[InspectProviderRequest, Depends(get_inspect_provider_request)],
|
|
54
|
+
) -> ProviderInfo:
|
|
55
|
+
return await impl.inspect_provider(request)
|
|
56
|
+
|
|
57
|
+
return router
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
"""Pydantic models for Providers API requests and responses.
|
|
8
|
+
|
|
9
|
+
This module re-exports models from llama_stack_api.admin.models to ensure
|
|
10
|
+
a single source of truth and avoid type conflicts.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
# Import and re-export shared models from admin
|
|
14
|
+
from llama_stack_api.admin.models import (
|
|
15
|
+
InspectProviderRequest,
|
|
16
|
+
ListProvidersResponse,
|
|
17
|
+
ProviderInfo,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
__all__ = [
|
|
21
|
+
"ProviderInfo",
|
|
22
|
+
"ListProvidersResponse",
|
|
23
|
+
"InspectProviderRequest",
|
|
24
|
+
]
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
from enum import Enum, StrEnum
|
|
8
|
+
from typing import Annotated, Any, Literal
|
|
9
|
+
|
|
10
|
+
from pydantic import BaseModel, Field, field_validator
|
|
11
|
+
|
|
12
|
+
from llama_stack_api.common.content_types import URL, InterleavedContent
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class RRFRanker(BaseModel):
|
|
16
|
+
"""
|
|
17
|
+
Reciprocal Rank Fusion (RRF) ranker configuration.
|
|
18
|
+
|
|
19
|
+
:param type: The type of ranker, always "rrf"
|
|
20
|
+
:param impact_factor: The impact factor for RRF scoring. Higher values give more weight to higher-ranked results.
|
|
21
|
+
Must be greater than 0
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
type: Literal["rrf"] = "rrf"
|
|
25
|
+
impact_factor: float = Field(default=60.0, gt=0.0) # default of 60 for optimal performance
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class WeightedRanker(BaseModel):
|
|
29
|
+
"""
|
|
30
|
+
Weighted ranker configuration that combines vector and keyword scores.
|
|
31
|
+
|
|
32
|
+
:param type: The type of ranker, always "weighted"
|
|
33
|
+
:param alpha: Weight factor between 0 and 1.
|
|
34
|
+
0 means only use keyword scores,
|
|
35
|
+
1 means only use vector scores,
|
|
36
|
+
values in between blend both scores.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
type: Literal["weighted"] = "weighted"
|
|
40
|
+
alpha: float = Field(
|
|
41
|
+
default=0.5,
|
|
42
|
+
ge=0.0,
|
|
43
|
+
le=1.0,
|
|
44
|
+
description="Weight factor between 0 and 1. 0 means only keyword scores, 1 means only vector scores.",
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
Ranker = Annotated[
|
|
49
|
+
RRFRanker | WeightedRanker,
|
|
50
|
+
Field(discriminator="type"),
|
|
51
|
+
]
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class RAGDocument(BaseModel):
|
|
55
|
+
"""
|
|
56
|
+
A document to be used for document ingestion in the RAG Tool.
|
|
57
|
+
|
|
58
|
+
:param document_id: The unique identifier for the document.
|
|
59
|
+
:param content: The content of the document.
|
|
60
|
+
:param mime_type: The MIME type of the document.
|
|
61
|
+
:param metadata: Additional metadata for the document.
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
document_id: str
|
|
65
|
+
content: InterleavedContent | URL
|
|
66
|
+
mime_type: str | None = None
|
|
67
|
+
metadata: dict[str, Any] = Field(default_factory=dict)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class RAGQueryResult(BaseModel):
|
|
71
|
+
"""Result of a RAG query containing retrieved content and metadata.
|
|
72
|
+
|
|
73
|
+
:param content: (Optional) The retrieved content from the query
|
|
74
|
+
:param metadata: Additional metadata about the query result
|
|
75
|
+
"""
|
|
76
|
+
|
|
77
|
+
content: InterleavedContent | None = None
|
|
78
|
+
metadata: dict[str, Any] = Field(default_factory=dict)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
class RAGQueryGenerator(Enum):
|
|
82
|
+
"""Types of query generators for RAG systems.
|
|
83
|
+
|
|
84
|
+
:cvar default: Default query generator using simple text processing
|
|
85
|
+
:cvar llm: LLM-based query generator for enhanced query understanding
|
|
86
|
+
:cvar custom: Custom query generator implementation
|
|
87
|
+
"""
|
|
88
|
+
|
|
89
|
+
default = "default"
|
|
90
|
+
llm = "llm"
|
|
91
|
+
custom = "custom"
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
class RAGSearchMode(StrEnum):
|
|
95
|
+
"""
|
|
96
|
+
Search modes for RAG query retrieval:
|
|
97
|
+
- VECTOR: Uses vector similarity search for semantic matching
|
|
98
|
+
- KEYWORD: Uses keyword-based search for exact matching
|
|
99
|
+
- HYBRID: Combines both vector and keyword search for better results
|
|
100
|
+
"""
|
|
101
|
+
|
|
102
|
+
VECTOR = "vector"
|
|
103
|
+
KEYWORD = "keyword"
|
|
104
|
+
HYBRID = "hybrid"
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
class DefaultRAGQueryGeneratorConfig(BaseModel):
|
|
108
|
+
"""Configuration for the default RAG query generator.
|
|
109
|
+
|
|
110
|
+
:param type: Type of query generator, always 'default'
|
|
111
|
+
:param separator: String separator used to join query terms
|
|
112
|
+
"""
|
|
113
|
+
|
|
114
|
+
type: Literal["default"] = "default"
|
|
115
|
+
separator: str = " "
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
class LLMRAGQueryGeneratorConfig(BaseModel):
|
|
119
|
+
"""Configuration for the LLM-based RAG query generator.
|
|
120
|
+
|
|
121
|
+
:param type: Type of query generator, always 'llm'
|
|
122
|
+
:param model: Name of the language model to use for query generation
|
|
123
|
+
:param template: Template string for formatting the query generation prompt
|
|
124
|
+
"""
|
|
125
|
+
|
|
126
|
+
type: Literal["llm"] = "llm"
|
|
127
|
+
model: str
|
|
128
|
+
template: str
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
RAGQueryGeneratorConfig = Annotated[
|
|
132
|
+
DefaultRAGQueryGeneratorConfig | LLMRAGQueryGeneratorConfig,
|
|
133
|
+
Field(discriminator="type"),
|
|
134
|
+
]
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
class RAGQueryConfig(BaseModel):
|
|
138
|
+
"""
|
|
139
|
+
Configuration for the RAG query generation.
|
|
140
|
+
|
|
141
|
+
:param query_generator_config: Configuration for the query generator.
|
|
142
|
+
:param max_tokens_in_context: Maximum number of tokens in the context.
|
|
143
|
+
:param max_chunks: Maximum number of chunks to retrieve.
|
|
144
|
+
:param chunk_template: Template for formatting each retrieved chunk in the context.
|
|
145
|
+
Available placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk content string), {metadata} (chunk metadata dict).
|
|
146
|
+
Default: "Result {index}\\nContent: {chunk.content}\\nMetadata: {metadata}\\n"
|
|
147
|
+
:param mode: Search mode for retrieval—either "vector", "keyword", or "hybrid". Default "vector".
|
|
148
|
+
:param ranker: Configuration for the ranker to use in hybrid search. Defaults to RRF ranker.
|
|
149
|
+
"""
|
|
150
|
+
|
|
151
|
+
# This config defines how a query is generated using the messages
|
|
152
|
+
# for memory bank retrieval.
|
|
153
|
+
query_generator_config: RAGQueryGeneratorConfig = Field(default=DefaultRAGQueryGeneratorConfig())
|
|
154
|
+
max_tokens_in_context: int = 4096
|
|
155
|
+
max_chunks: int = 5
|
|
156
|
+
chunk_template: str = "Result {index}\nContent: {chunk.content}\nMetadata: {metadata}\n"
|
|
157
|
+
mode: RAGSearchMode | None = RAGSearchMode.VECTOR
|
|
158
|
+
ranker: Ranker | None = Field(default=None) # Only used for hybrid mode
|
|
159
|
+
|
|
160
|
+
@field_validator("chunk_template")
|
|
161
|
+
def validate_chunk_template(cls, v: str) -> str:
|
|
162
|
+
if "{chunk.content}" not in v:
|
|
163
|
+
raise ValueError("chunk_template must contain {chunk.content}")
|
|
164
|
+
if "{index}" not in v:
|
|
165
|
+
raise ValueError("chunk_template must contain {index}")
|
|
166
|
+
if len(v) == 0:
|
|
167
|
+
raise ValueError("chunk_template must not be empty")
|
|
168
|
+
return v
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
from enum import StrEnum
|
|
8
|
+
|
|
9
|
+
from pydantic import BaseModel, Field
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class ResourceType(StrEnum):
|
|
13
|
+
model = "model"
|
|
14
|
+
shield = "shield"
|
|
15
|
+
vector_store = "vector_store"
|
|
16
|
+
dataset = "dataset"
|
|
17
|
+
scoring_function = "scoring_function"
|
|
18
|
+
benchmark = "benchmark"
|
|
19
|
+
tool = "tool"
|
|
20
|
+
tool_group = "tool_group"
|
|
21
|
+
prompt = "prompt"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class Resource(BaseModel):
|
|
25
|
+
"""Base class for all Llama Stack resources"""
|
|
26
|
+
|
|
27
|
+
identifier: str = Field(description="Unique identifier for this resource in llama stack")
|
|
28
|
+
|
|
29
|
+
provider_resource_id: str | None = Field(
|
|
30
|
+
default=None,
|
|
31
|
+
description="Unique identifier for this resource in the provider",
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
provider_id: str = Field(description="ID of the provider that owns this resource")
|
|
35
|
+
|
|
36
|
+
type: ResourceType = Field(description="Type of resource (e.g. 'model', 'shield', 'vector_store', etc.)")
|