lightspeed-stack 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- app/__init__.py +1 -0
- app/endpoints/.ruff_cache/.gitignore +2 -0
- app/endpoints/.ruff_cache/0.9.1/5703048272820174433 +0 -0
- app/endpoints/.ruff_cache/0.9.1/9961612457335986079 +0 -0
- app/endpoints/.ruff_cache/CACHEDIR.TAG +1 -0
- app/endpoints/__init__.py +1 -0
- app/endpoints/config.py +64 -0
- app/endpoints/feedback.py +129 -0
- app/endpoints/health.py +111 -0
- app/endpoints/info.py +26 -0
- app/endpoints/models.py +79 -0
- app/endpoints/query.py +360 -0
- app/endpoints/root.py +777 -0
- app/endpoints/streaming_query.py +321 -0
- app/main.py +38 -0
- app/routers.py +30 -0
- auth/__init__.py +38 -0
- auth/interface.py +13 -0
- auth/k8s.py +270 -0
- auth/noop.py +42 -0
- auth/noop_with_token.py +46 -0
- auth/utils.py +26 -0
- lightspeed_stack-0.1.0.dist-info/METADATA +443 -0
- lightspeed_stack-0.1.0.dist-info/RECORD +43 -0
- lightspeed_stack-0.1.0.dist-info/WHEEL +4 -0
- lightspeed_stack-0.1.0.dist-info/entry_points.txt +4 -0
- lightspeed_stack-0.1.0.dist-info/licenses/LICENSE +201 -0
- models/__init__.py +1 -0
- models/config.py +161 -0
- models/requests.py +208 -0
- models/responses.py +244 -0
- runners/__init__.py +1 -0
- runners/uvicorn.py +31 -0
- utils/.ruff_cache/.gitignore +2 -0
- utils/.ruff_cache/0.9.1/18446581155718949728 +0 -0
- utils/.ruff_cache/0.9.1/4991844299736624256 +0 -0
- utils/.ruff_cache/CACHEDIR.TAG +1 -0
- utils/__init__.py +1 -0
- utils/checks.py +27 -0
- utils/common.py +111 -0
- utils/endpoints.py +34 -0
- utils/mcp_headers.py +48 -0
- utils/suid.py +28 -0
app/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""REST API service based on FastAPI."""
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
Signature: 8a477f597d28d172789f06886806bc55
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Implementation of all endpoints."""
|
app/endpoints/config.py
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
"""Handler for REST API call to retrieve service configuration."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
from fastapi import APIRouter, Request
|
|
7
|
+
|
|
8
|
+
from models.config import Configuration
|
|
9
|
+
from configuration import configuration
|
|
10
|
+
from utils.endpoints import check_configuration_loaded
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
router = APIRouter(tags=["config"])
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
get_config_responses: dict[int | str, dict[str, Any]] = {
|
|
17
|
+
200: {
|
|
18
|
+
"name": "foo bar baz",
|
|
19
|
+
"service": {
|
|
20
|
+
"host": "localhost",
|
|
21
|
+
"port": 8080,
|
|
22
|
+
"auth_enabled": False,
|
|
23
|
+
"workers": 1,
|
|
24
|
+
"color_log": True,
|
|
25
|
+
"access_log": True,
|
|
26
|
+
"tls_config": {
|
|
27
|
+
"tls_certificate_path": "config/certificate.crt",
|
|
28
|
+
"tls_key_path": "config/private.key",
|
|
29
|
+
"tls_key_password": None,
|
|
30
|
+
},
|
|
31
|
+
},
|
|
32
|
+
"llama_stack": {
|
|
33
|
+
"url": "http://localhost:8321",
|
|
34
|
+
"api_key": "xyzzy",
|
|
35
|
+
"use_as_library_client": False,
|
|
36
|
+
"library_client_config_path": None,
|
|
37
|
+
},
|
|
38
|
+
"user_data_collection": {
|
|
39
|
+
"feedback_disabled": False,
|
|
40
|
+
"feedback_storage": "/tmp/data/feedback",
|
|
41
|
+
"transcripts_disabled": True,
|
|
42
|
+
"transcripts_storage": None,
|
|
43
|
+
},
|
|
44
|
+
"mcp_servers": [
|
|
45
|
+
{"name": "server1", "provider_id": "provider1", "url": "http://url.com:1"},
|
|
46
|
+
{"name": "server2", "provider_id": "provider2", "url": "http://url.com:2"},
|
|
47
|
+
{"name": "server3", "provider_id": "provider3", "url": "http://url.com:3"},
|
|
48
|
+
],
|
|
49
|
+
},
|
|
50
|
+
503: {
|
|
51
|
+
"detail": {
|
|
52
|
+
"response": "Configuration is no loaded",
|
|
53
|
+
}
|
|
54
|
+
},
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
@router.get("/config", responses=get_config_responses)
|
|
59
|
+
def config_endpoint_handler(_request: Request) -> Configuration:
|
|
60
|
+
"""Handle requests to the /config endpoint."""
|
|
61
|
+
# ensure that configuration is loaded
|
|
62
|
+
check_configuration_loaded(configuration)
|
|
63
|
+
|
|
64
|
+
return configuration.configuration
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
"""Handler for REST API call to provide info."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from typing import Any
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
import json
|
|
7
|
+
from datetime import datetime, UTC
|
|
8
|
+
|
|
9
|
+
from fastapi import APIRouter, Request, HTTPException, Depends, status
|
|
10
|
+
|
|
11
|
+
from auth import get_auth_dependency
|
|
12
|
+
from configuration import configuration
|
|
13
|
+
from models.responses import FeedbackResponse, StatusResponse
|
|
14
|
+
from models.requests import FeedbackRequest
|
|
15
|
+
from utils.suid import get_suid
|
|
16
|
+
from utils.common import retrieve_user_id
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
19
|
+
router = APIRouter(prefix="/feedback", tags=["feedback"])
|
|
20
|
+
auth_dependency = get_auth_dependency()
|
|
21
|
+
|
|
22
|
+
# Response for the feedback endpoint
|
|
23
|
+
feedback_response: dict[int | str, dict[str, Any]] = {
|
|
24
|
+
200: {"response": "Feedback received and stored"},
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def is_feedback_enabled() -> bool:
|
|
29
|
+
"""Check if feedback is enabled.
|
|
30
|
+
|
|
31
|
+
Returns:
|
|
32
|
+
bool: True if feedback is enabled, False otherwise.
|
|
33
|
+
"""
|
|
34
|
+
return not configuration.user_data_collection_configuration.feedback_disabled
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
async def assert_feedback_enabled(_request: Request) -> None:
|
|
38
|
+
"""Check if feedback is enabled.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
request (Request): The FastAPI request object.
|
|
42
|
+
|
|
43
|
+
Raises:
|
|
44
|
+
HTTPException: If feedback is disabled.
|
|
45
|
+
"""
|
|
46
|
+
feedback_enabled = is_feedback_enabled()
|
|
47
|
+
if not feedback_enabled:
|
|
48
|
+
raise HTTPException(
|
|
49
|
+
status_code=status.HTTP_403_FORBIDDEN,
|
|
50
|
+
detail="Forbidden: Feedback is disabled",
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
@router.post("", responses=feedback_response)
|
|
55
|
+
def feedback_endpoint_handler(
|
|
56
|
+
_request: Request,
|
|
57
|
+
feedback_request: FeedbackRequest,
|
|
58
|
+
_ensure_feedback_enabled: Any = Depends(assert_feedback_enabled),
|
|
59
|
+
auth: Any = Depends(auth_dependency),
|
|
60
|
+
) -> FeedbackResponse:
|
|
61
|
+
"""Handle feedback requests.
|
|
62
|
+
|
|
63
|
+
Args:
|
|
64
|
+
feedback_request: The request containing feedback information.
|
|
65
|
+
ensure_feedback_enabled: The feedback handler (FastAPI Depends) that
|
|
66
|
+
will handle feedback status checks.
|
|
67
|
+
auth: The Authentication handler (FastAPI Depends) that will
|
|
68
|
+
handle authentication Logic.
|
|
69
|
+
|
|
70
|
+
Returns:
|
|
71
|
+
Response indicating the status of the feedback storage request.
|
|
72
|
+
"""
|
|
73
|
+
logger.debug("Feedback received %s", str(feedback_request))
|
|
74
|
+
|
|
75
|
+
user_id = retrieve_user_id(auth)
|
|
76
|
+
try:
|
|
77
|
+
store_feedback(user_id, feedback_request.model_dump(exclude={"model_config"}))
|
|
78
|
+
except Exception as e:
|
|
79
|
+
logger.error("Error storing user feedback: %s", e)
|
|
80
|
+
raise HTTPException(
|
|
81
|
+
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
82
|
+
detail={
|
|
83
|
+
"response": "Error storing user feedback",
|
|
84
|
+
"cause": str(e),
|
|
85
|
+
},
|
|
86
|
+
) from e
|
|
87
|
+
|
|
88
|
+
return FeedbackResponse(response="feedback received")
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def store_feedback(user_id: str, feedback: dict) -> None:
|
|
92
|
+
"""Store feedback in the local filesystem.
|
|
93
|
+
|
|
94
|
+
Args:
|
|
95
|
+
user_id: The user ID (UUID).
|
|
96
|
+
feedback: The feedback to store.
|
|
97
|
+
"""
|
|
98
|
+
logger.debug("Storing feedback for user %s", user_id)
|
|
99
|
+
# Creates storage path only if it doesn't exist. The `exist_ok=True` prevents
|
|
100
|
+
# race conditions in case of multiple server instances trying to set up storage
|
|
101
|
+
# at the same location.
|
|
102
|
+
storage_path = Path(
|
|
103
|
+
configuration.user_data_collection_configuration.feedback_storage or ""
|
|
104
|
+
)
|
|
105
|
+
storage_path.mkdir(parents=True, exist_ok=True)
|
|
106
|
+
|
|
107
|
+
current_time = str(datetime.now(UTC))
|
|
108
|
+
data_to_store = {"user_id": user_id, "timestamp": current_time, **feedback}
|
|
109
|
+
|
|
110
|
+
# stores feedback in a file under unique uuid
|
|
111
|
+
feedback_file_path = storage_path / f"{get_suid()}.json"
|
|
112
|
+
with open(feedback_file_path, "w", encoding="utf-8") as feedback_file:
|
|
113
|
+
json.dump(data_to_store, feedback_file)
|
|
114
|
+
|
|
115
|
+
logger.info("Feedback stored sucessfully at %s", feedback_file_path)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
@router.get("/status")
|
|
119
|
+
def feedback_status() -> StatusResponse:
|
|
120
|
+
"""Handle feedback status requests.
|
|
121
|
+
|
|
122
|
+
Returns:
|
|
123
|
+
Response indicating the status of the feedback.
|
|
124
|
+
"""
|
|
125
|
+
logger.debug("Feedback status requested")
|
|
126
|
+
feedback_status_enabled = is_feedback_enabled()
|
|
127
|
+
return StatusResponse(
|
|
128
|
+
functionality="feedback", status={"enabled": feedback_status_enabled}
|
|
129
|
+
)
|
app/endpoints/health.py
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
"""Handlers for health REST API endpoints.
|
|
2
|
+
|
|
3
|
+
These endpoints are used to check if service is live and prepared to accept
|
|
4
|
+
requests. Note that these endpoints can be accessed using GET or HEAD HTTP
|
|
5
|
+
methods. For HEAD HTTP method, just the HTTP response code is used.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import logging
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
from llama_stack.providers.datatypes import HealthStatus
|
|
12
|
+
|
|
13
|
+
from fastapi import APIRouter, status, Response
|
|
14
|
+
from client import get_llama_stack_client
|
|
15
|
+
from configuration import configuration
|
|
16
|
+
from models.responses import (
|
|
17
|
+
LivenessResponse,
|
|
18
|
+
ReadinessResponse,
|
|
19
|
+
ProviderHealthStatus,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
logger = logging.getLogger(__name__)
|
|
23
|
+
router = APIRouter(tags=["health"])
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def get_providers_health_statuses() -> list[ProviderHealthStatus]:
|
|
27
|
+
"""Check health of all providers.
|
|
28
|
+
|
|
29
|
+
Returns:
|
|
30
|
+
List of provider health statuses.
|
|
31
|
+
"""
|
|
32
|
+
try:
|
|
33
|
+
llama_stack_config = configuration.llama_stack_configuration
|
|
34
|
+
|
|
35
|
+
client = get_llama_stack_client(llama_stack_config)
|
|
36
|
+
|
|
37
|
+
providers = client.providers.list()
|
|
38
|
+
logger.debug("Found %d providers", len(providers))
|
|
39
|
+
|
|
40
|
+
health_results = [
|
|
41
|
+
ProviderHealthStatus(
|
|
42
|
+
provider_id=provider.provider_id,
|
|
43
|
+
status=str(provider.health.get("status", "unknown")),
|
|
44
|
+
message=str(provider.health.get("message", "")),
|
|
45
|
+
)
|
|
46
|
+
for provider in providers
|
|
47
|
+
]
|
|
48
|
+
return health_results
|
|
49
|
+
|
|
50
|
+
except Exception as e: # pylint: disable=broad-exception-caught
|
|
51
|
+
# eg. no providers defined
|
|
52
|
+
logger.error("Failed to check providers health: %s", e)
|
|
53
|
+
return [
|
|
54
|
+
ProviderHealthStatus(
|
|
55
|
+
provider_id="unknown",
|
|
56
|
+
status=HealthStatus.ERROR.value,
|
|
57
|
+
message=f"Failed to initialize health check: {str(e)}",
|
|
58
|
+
)
|
|
59
|
+
]
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
get_readiness_responses: dict[int | str, dict[str, Any]] = {
|
|
63
|
+
200: {
|
|
64
|
+
"description": "Service is ready",
|
|
65
|
+
"model": ReadinessResponse,
|
|
66
|
+
},
|
|
67
|
+
503: {
|
|
68
|
+
"description": "Service is not ready",
|
|
69
|
+
"model": ReadinessResponse,
|
|
70
|
+
},
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
@router.get("/readiness", responses=get_readiness_responses)
|
|
75
|
+
def readiness_probe_get_method(response: Response) -> ReadinessResponse:
|
|
76
|
+
"""Ready status of service with provider health details."""
|
|
77
|
+
provider_statuses = get_providers_health_statuses()
|
|
78
|
+
|
|
79
|
+
# Check if any provider is unhealthy (not counting not_implemented as unhealthy)
|
|
80
|
+
unhealthy_providers = [
|
|
81
|
+
p for p in provider_statuses if p.status == HealthStatus.ERROR.value
|
|
82
|
+
]
|
|
83
|
+
|
|
84
|
+
if unhealthy_providers:
|
|
85
|
+
ready = False
|
|
86
|
+
unhealthy_provider_names = [p.provider_id for p in unhealthy_providers]
|
|
87
|
+
reason = f"Providers not healthy: {', '.join(unhealthy_provider_names)}"
|
|
88
|
+
response.status_code = status.HTTP_503_SERVICE_UNAVAILABLE
|
|
89
|
+
else:
|
|
90
|
+
ready = True
|
|
91
|
+
reason = "All providers are healthy"
|
|
92
|
+
|
|
93
|
+
return ReadinessResponse(ready=ready, reason=reason, providers=unhealthy_providers)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
get_liveness_responses: dict[int | str, dict[str, Any]] = {
|
|
97
|
+
200: {
|
|
98
|
+
"description": "Service is alive",
|
|
99
|
+
"model": LivenessResponse,
|
|
100
|
+
},
|
|
101
|
+
503: {
|
|
102
|
+
"description": "Service is not alive",
|
|
103
|
+
"model": LivenessResponse,
|
|
104
|
+
},
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
@router.get("/liveness", responses=get_liveness_responses)
|
|
109
|
+
def liveness_probe_get_method() -> LivenessResponse:
|
|
110
|
+
"""Live status of service."""
|
|
111
|
+
return LivenessResponse(alive=True)
|
app/endpoints/info.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
"""Handler for REST API call to provide info."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
from fastapi import APIRouter, Request
|
|
7
|
+
|
|
8
|
+
from version import __version__
|
|
9
|
+
from models.responses import InfoResponse
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
router = APIRouter(tags=["info"])
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
get_into_responses: dict[int | str, dict[str, Any]] = {
|
|
16
|
+
200: {
|
|
17
|
+
"name": "Service name",
|
|
18
|
+
"version": "Service version",
|
|
19
|
+
},
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@router.get("/info", responses=get_into_responses)
|
|
24
|
+
def info_endpoint_handler(_request: Request) -> InfoResponse:
|
|
25
|
+
"""Handle request to the /info endpoint."""
|
|
26
|
+
return InfoResponse(name="foo", version=__version__)
|
app/endpoints/models.py
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
"""Handler for REST API call to provide info."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
from llama_stack_client import APIConnectionError
|
|
7
|
+
from fastapi import APIRouter, HTTPException, Request, status
|
|
8
|
+
|
|
9
|
+
from client import get_llama_stack_client
|
|
10
|
+
from configuration import configuration
|
|
11
|
+
from models.responses import ModelsResponse
|
|
12
|
+
from utils.endpoints import check_configuration_loaded
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
router = APIRouter(tags=["models"])
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
models_responses: dict[int | str, dict[str, Any]] = {
|
|
19
|
+
200: {
|
|
20
|
+
"models": [
|
|
21
|
+
{
|
|
22
|
+
"identifier": "all-MiniLM-L6-v2",
|
|
23
|
+
"metadata": {"embedding_dimension": 384},
|
|
24
|
+
"api_model_type": "embedding",
|
|
25
|
+
"provider_id": "ollama",
|
|
26
|
+
"provider_resource_id": "all-minilm:latest",
|
|
27
|
+
"type": "model",
|
|
28
|
+
"model_type": "embedding",
|
|
29
|
+
},
|
|
30
|
+
{
|
|
31
|
+
"identifier": "llama3.2:3b-instruct-fp16",
|
|
32
|
+
"metadata": {},
|
|
33
|
+
"api_model_type": "llm",
|
|
34
|
+
"provider_id": "ollama",
|
|
35
|
+
"provider_resource_id": "llama3.2:3b-instruct-fp16",
|
|
36
|
+
"type": "model",
|
|
37
|
+
"model_type": "llm",
|
|
38
|
+
},
|
|
39
|
+
]
|
|
40
|
+
},
|
|
41
|
+
503: {"description": "Connection to Llama Stack is broken"},
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@router.get("/models", responses=models_responses)
|
|
46
|
+
def models_endpoint_handler(_request: Request) -> ModelsResponse:
|
|
47
|
+
"""Handle requests to the /models endpoint."""
|
|
48
|
+
check_configuration_loaded(configuration)
|
|
49
|
+
|
|
50
|
+
llama_stack_configuration = configuration.llama_stack_configuration
|
|
51
|
+
logger.info("LLama stack config: %s", llama_stack_configuration)
|
|
52
|
+
|
|
53
|
+
try:
|
|
54
|
+
# try to get Llama Stack client
|
|
55
|
+
client = get_llama_stack_client(llama_stack_configuration)
|
|
56
|
+
# retrieve models
|
|
57
|
+
models = client.models.list()
|
|
58
|
+
m = [dict(m) for m in models]
|
|
59
|
+
return ModelsResponse(models=m)
|
|
60
|
+
# connection to Llama Stack server
|
|
61
|
+
except APIConnectionError as e:
|
|
62
|
+
logger.error("Unable to connect to Llama Stack: %s", e)
|
|
63
|
+
raise HTTPException(
|
|
64
|
+
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
65
|
+
detail={
|
|
66
|
+
"response": "Unable to connect to Llama Stack",
|
|
67
|
+
"cause": str(e),
|
|
68
|
+
},
|
|
69
|
+
) from e
|
|
70
|
+
# any other exception that can occur during model listing
|
|
71
|
+
except Exception as e:
|
|
72
|
+
logger.error("Unable to retrieve list of models: %s", e)
|
|
73
|
+
raise HTTPException(
|
|
74
|
+
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
75
|
+
detail={
|
|
76
|
+
"response": "Unable to retrieve list of models",
|
|
77
|
+
"cause": str(e),
|
|
78
|
+
},
|
|
79
|
+
) from e
|