llama-stack 0.2.21__py3-none-any.whl → 0.2.23__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack/apis/agents/agents.py +26 -14
- llama_stack/apis/batch_inference/batch_inference.py +3 -2
- llama_stack/apis/batches/batches.py +5 -4
- llama_stack/apis/benchmarks/benchmarks.py +16 -3
- llama_stack/apis/datasetio/datasetio.py +3 -2
- llama_stack/apis/datasets/datasets.py +5 -4
- llama_stack/apis/eval/eval.py +27 -5
- llama_stack/apis/files/files.py +7 -6
- llama_stack/apis/inference/inference.py +13 -11
- llama_stack/apis/inspect/inspect.py +4 -3
- llama_stack/apis/models/models.py +6 -5
- llama_stack/apis/post_training/post_training.py +13 -6
- llama_stack/apis/prompts/prompts.py +8 -7
- llama_stack/apis/providers/providers.py +3 -2
- llama_stack/apis/safety/safety.py +3 -2
- llama_stack/apis/scoring/scoring.py +3 -2
- llama_stack/apis/scoring_functions/scoring_functions.py +12 -3
- llama_stack/apis/shields/shields.py +5 -4
- llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +2 -1
- llama_stack/apis/telemetry/telemetry.py +21 -8
- llama_stack/apis/tools/rag_tool.py +3 -2
- llama_stack/apis/tools/tools.py +9 -8
- llama_stack/apis/vector_dbs/vector_dbs.py +5 -4
- llama_stack/apis/vector_io/vector_io.py +25 -14
- llama_stack/apis/version.py +3 -1
- llama_stack/cli/stack/_build.py +7 -0
- llama_stack/cli/verify_download.py +7 -10
- llama_stack/core/build_container.sh +2 -2
- llama_stack/core/client.py +18 -2
- llama_stack/core/datatypes.py +10 -7
- llama_stack/core/distribution.py +7 -20
- llama_stack/core/library_client.py +6 -4
- llama_stack/core/routers/__init__.py +4 -1
- llama_stack/core/routers/inference.py +12 -7
- llama_stack/core/routing_tables/benchmarks.py +4 -0
- llama_stack/core/routing_tables/common.py +4 -0
- llama_stack/core/routing_tables/models.py +1 -1
- llama_stack/core/routing_tables/scoring_functions.py +4 -0
- llama_stack/core/routing_tables/toolgroups.py +13 -2
- llama_stack/core/server/routes.py +15 -15
- llama_stack/core/server/server.py +99 -124
- llama_stack/core/server/tracing.py +80 -0
- llama_stack/core/stack.py +66 -60
- llama_stack/core/start_stack.sh +1 -1
- llama_stack/distributions/ci-tests/build.yaml +1 -0
- llama_stack/distributions/ci-tests/run.yaml +7 -0
- llama_stack/distributions/nvidia/build.yaml +2 -0
- llama_stack/distributions/nvidia/nvidia.py +12 -10
- llama_stack/distributions/nvidia/run-with-safety.yaml +9 -0
- llama_stack/distributions/nvidia/run.yaml +10 -84
- llama_stack/distributions/starter/build.yaml +1 -0
- llama_stack/distributions/starter/run.yaml +7 -0
- llama_stack/distributions/starter/starter.py +20 -2
- llama_stack/distributions/starter-gpu/build.yaml +1 -0
- llama_stack/distributions/starter-gpu/run.yaml +7 -0
- llama_stack/distributions/watsonx/run.yaml +9 -0
- llama_stack/distributions/watsonx/watsonx.py +10 -2
- llama_stack/providers/datatypes.py +17 -71
- llama_stack/providers/inline/eval/meta_reference/eval.py +7 -0
- llama_stack/providers/inline/files/localfs/files.py +2 -3
- llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +3 -0
- llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +6 -6
- llama_stack/providers/inline/tool_runtime/rag/memory.py +101 -46
- llama_stack/providers/registry/batches.py +1 -1
- llama_stack/providers/registry/datasetio.py +19 -22
- llama_stack/providers/registry/eval.py +10 -11
- llama_stack/providers/registry/files.py +8 -15
- llama_stack/providers/registry/inference.py +189 -191
- llama_stack/providers/registry/post_training.py +8 -9
- llama_stack/providers/registry/safety.py +23 -27
- llama_stack/providers/registry/scoring.py +1 -1
- llama_stack/providers/registry/tool_runtime.py +41 -47
- llama_stack/providers/registry/vector_io.py +59 -59
- llama_stack/providers/remote/eval/nvidia/eval.py +12 -4
- llama_stack/providers/remote/files/s3/files.py +2 -3
- llama_stack/providers/remote/inference/anthropic/__init__.py +0 -6
- llama_stack/providers/remote/inference/anthropic/anthropic.py +12 -2
- llama_stack/providers/remote/inference/azure/__init__.py +15 -0
- llama_stack/providers/remote/inference/azure/azure.py +62 -0
- llama_stack/providers/remote/inference/azure/config.py +63 -0
- llama_stack/providers/remote/inference/bedrock/bedrock.py +50 -3
- llama_stack/providers/remote/inference/cerebras/cerebras.py +14 -14
- llama_stack/providers/remote/inference/cerebras/config.py +2 -2
- llama_stack/providers/remote/inference/databricks/__init__.py +2 -1
- llama_stack/providers/remote/inference/databricks/config.py +5 -5
- llama_stack/providers/remote/inference/databricks/databricks.py +84 -94
- llama_stack/providers/remote/inference/fireworks/fireworks.py +17 -169
- llama_stack/providers/remote/inference/gemini/__init__.py +0 -6
- llama_stack/providers/remote/inference/gemini/gemini.py +4 -2
- llama_stack/providers/remote/inference/groq/__init__.py +1 -3
- llama_stack/providers/remote/inference/groq/groq.py +0 -3
- llama_stack/providers/remote/inference/llama_openai_compat/llama.py +0 -3
- llama_stack/providers/remote/inference/nvidia/nvidia.py +9 -8
- llama_stack/providers/remote/inference/ollama/ollama.py +70 -217
- llama_stack/providers/remote/inference/openai/__init__.py +0 -6
- llama_stack/providers/remote/inference/openai/openai.py +5 -2
- llama_stack/providers/remote/inference/passthrough/passthrough.py +1 -1
- llama_stack/providers/remote/inference/sambanova/__init__.py +1 -3
- llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -3
- llama_stack/providers/remote/inference/tgi/tgi.py +43 -15
- llama_stack/providers/remote/inference/together/together.py +85 -130
- llama_stack/providers/remote/inference/vertexai/vertexai.py +29 -6
- llama_stack/providers/remote/inference/vllm/__init__.py +6 -0
- llama_stack/providers/remote/inference/vllm/vllm.py +56 -193
- llama_stack/providers/remote/inference/watsonx/config.py +2 -2
- llama_stack/providers/remote/inference/watsonx/watsonx.py +19 -3
- llama_stack/providers/remote/vector_io/qdrant/qdrant.py +6 -2
- llama_stack/providers/utils/inference/inference_store.py +130 -22
- llama_stack/providers/utils/inference/litellm_openai_mixin.py +3 -3
- llama_stack/providers/utils/inference/model_registry.py +9 -22
- llama_stack/providers/utils/inference/openai_mixin.py +109 -24
- llama_stack/providers/utils/kvstore/config.py +5 -5
- llama_stack/providers/utils/kvstore/mongodb/mongodb.py +8 -3
- llama_stack/providers/utils/kvstore/sqlite/sqlite.py +7 -0
- llama_stack/providers/utils/responses/responses_store.py +2 -5
- llama_stack/providers/utils/sqlstore/authorized_sqlstore.py +19 -6
- llama_stack/providers/utils/telemetry/tracing.py +29 -15
- llama_stack/providers/utils/vector_io/vector_utils.py +2 -4
- llama_stack/schema_utils.py +15 -1
- llama_stack/testing/inference_recorder.py +51 -31
- {llama_stack-0.2.21.dist-info → llama_stack-0.2.23.dist-info}/METADATA +15 -15
- {llama_stack-0.2.21.dist-info → llama_stack-0.2.23.dist-info}/RECORD +126 -134
- llama_stack/providers/remote/inference/anthropic/models.py +0 -40
- llama_stack/providers/remote/inference/cerebras/models.py +0 -28
- llama_stack/providers/remote/inference/fireworks/models.py +0 -70
- llama_stack/providers/remote/inference/gemini/models.py +0 -34
- llama_stack/providers/remote/inference/groq/models.py +0 -48
- llama_stack/providers/remote/inference/llama_openai_compat/models.py +0 -25
- llama_stack/providers/remote/inference/nvidia/models.py +0 -109
- llama_stack/providers/remote/inference/ollama/models.py +0 -106
- llama_stack/providers/remote/inference/openai/models.py +0 -60
- llama_stack/providers/remote/inference/sambanova/models.py +0 -28
- llama_stack/providers/remote/inference/together/models.py +0 -77
- llama_stack/providers/remote/inference/vertexai/models.py +0 -20
- {llama_stack-0.2.21.dist-info → llama_stack-0.2.23.dist-info}/WHEEL +0 -0
- {llama_stack-0.2.21.dist-info → llama_stack-0.2.23.dist-info}/entry_points.txt +0 -0
- {llama_stack-0.2.21.dist-info → llama_stack-0.2.23.dist-info}/licenses/LICENSE +0 -0
- {llama_stack-0.2.21.dist-info → llama_stack-0.2.23.dist-info}/top_level.txt +0 -0
|
@@ -27,6 +27,7 @@ from llama_stack.apis.inference import (
|
|
|
27
27
|
)
|
|
28
28
|
from llama_stack.apis.safety import SafetyViolation
|
|
29
29
|
from llama_stack.apis.tools import ToolDef
|
|
30
|
+
from llama_stack.apis.version import LLAMA_STACK_API_V1
|
|
30
31
|
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
|
|
31
32
|
|
|
32
33
|
from .openai_responses import (
|
|
@@ -481,7 +482,7 @@ class Agents(Protocol):
|
|
|
481
482
|
- Agents can also use Memory to retrieve information from knowledge bases. See the RAG Tool and Vector IO APIs for more details.
|
|
482
483
|
"""
|
|
483
484
|
|
|
484
|
-
@webmethod(route="/agents", method="POST", descriptive_name="create_agent")
|
|
485
|
+
@webmethod(route="/agents", method="POST", descriptive_name="create_agent", level=LLAMA_STACK_API_V1)
|
|
485
486
|
async def create_agent(
|
|
486
487
|
self,
|
|
487
488
|
agent_config: AgentConfig,
|
|
@@ -494,7 +495,10 @@ class Agents(Protocol):
|
|
|
494
495
|
...
|
|
495
496
|
|
|
496
497
|
@webmethod(
|
|
497
|
-
route="/agents/{agent_id}/session/{session_id}/turn",
|
|
498
|
+
route="/agents/{agent_id}/session/{session_id}/turn",
|
|
499
|
+
method="POST",
|
|
500
|
+
descriptive_name="create_agent_turn",
|
|
501
|
+
level=LLAMA_STACK_API_V1,
|
|
498
502
|
)
|
|
499
503
|
async def create_agent_turn(
|
|
500
504
|
self,
|
|
@@ -524,6 +528,7 @@ class Agents(Protocol):
|
|
|
524
528
|
route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume",
|
|
525
529
|
method="POST",
|
|
526
530
|
descriptive_name="resume_agent_turn",
|
|
531
|
+
level=LLAMA_STACK_API_V1,
|
|
527
532
|
)
|
|
528
533
|
async def resume_agent_turn(
|
|
529
534
|
self,
|
|
@@ -549,6 +554,7 @@ class Agents(Protocol):
|
|
|
549
554
|
@webmethod(
|
|
550
555
|
route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}",
|
|
551
556
|
method="GET",
|
|
557
|
+
level=LLAMA_STACK_API_V1,
|
|
552
558
|
)
|
|
553
559
|
async def get_agents_turn(
|
|
554
560
|
self,
|
|
@@ -568,6 +574,7 @@ class Agents(Protocol):
|
|
|
568
574
|
@webmethod(
|
|
569
575
|
route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}",
|
|
570
576
|
method="GET",
|
|
577
|
+
level=LLAMA_STACK_API_V1,
|
|
571
578
|
)
|
|
572
579
|
async def get_agents_step(
|
|
573
580
|
self,
|
|
@@ -586,7 +593,12 @@ class Agents(Protocol):
|
|
|
586
593
|
"""
|
|
587
594
|
...
|
|
588
595
|
|
|
589
|
-
@webmethod(
|
|
596
|
+
@webmethod(
|
|
597
|
+
route="/agents/{agent_id}/session",
|
|
598
|
+
method="POST",
|
|
599
|
+
descriptive_name="create_agent_session",
|
|
600
|
+
level=LLAMA_STACK_API_V1,
|
|
601
|
+
)
|
|
590
602
|
async def create_agent_session(
|
|
591
603
|
self,
|
|
592
604
|
agent_id: str,
|
|
@@ -600,7 +612,7 @@ class Agents(Protocol):
|
|
|
600
612
|
"""
|
|
601
613
|
...
|
|
602
614
|
|
|
603
|
-
@webmethod(route="/agents/{agent_id}/session/{session_id}", method="GET")
|
|
615
|
+
@webmethod(route="/agents/{agent_id}/session/{session_id}", method="GET", level=LLAMA_STACK_API_V1)
|
|
604
616
|
async def get_agents_session(
|
|
605
617
|
self,
|
|
606
618
|
session_id: str,
|
|
@@ -616,7 +628,7 @@ class Agents(Protocol):
|
|
|
616
628
|
"""
|
|
617
629
|
...
|
|
618
630
|
|
|
619
|
-
@webmethod(route="/agents/{agent_id}/session/{session_id}", method="DELETE")
|
|
631
|
+
@webmethod(route="/agents/{agent_id}/session/{session_id}", method="DELETE", level=LLAMA_STACK_API_V1)
|
|
620
632
|
async def delete_agents_session(
|
|
621
633
|
self,
|
|
622
634
|
session_id: str,
|
|
@@ -629,7 +641,7 @@ class Agents(Protocol):
|
|
|
629
641
|
"""
|
|
630
642
|
...
|
|
631
643
|
|
|
632
|
-
@webmethod(route="/agents/{agent_id}", method="DELETE")
|
|
644
|
+
@webmethod(route="/agents/{agent_id}", method="DELETE", level=LLAMA_STACK_API_V1)
|
|
633
645
|
async def delete_agent(
|
|
634
646
|
self,
|
|
635
647
|
agent_id: str,
|
|
@@ -640,7 +652,7 @@ class Agents(Protocol):
|
|
|
640
652
|
"""
|
|
641
653
|
...
|
|
642
654
|
|
|
643
|
-
@webmethod(route="/agents", method="GET")
|
|
655
|
+
@webmethod(route="/agents", method="GET", level=LLAMA_STACK_API_V1)
|
|
644
656
|
async def list_agents(self, start_index: int | None = None, limit: int | None = None) -> PaginatedResponse:
|
|
645
657
|
"""List all agents.
|
|
646
658
|
|
|
@@ -650,7 +662,7 @@ class Agents(Protocol):
|
|
|
650
662
|
"""
|
|
651
663
|
...
|
|
652
664
|
|
|
653
|
-
@webmethod(route="/agents/{agent_id}", method="GET")
|
|
665
|
+
@webmethod(route="/agents/{agent_id}", method="GET", level=LLAMA_STACK_API_V1)
|
|
654
666
|
async def get_agent(self, agent_id: str) -> Agent:
|
|
655
667
|
"""Describe an agent by its ID.
|
|
656
668
|
|
|
@@ -659,7 +671,7 @@ class Agents(Protocol):
|
|
|
659
671
|
"""
|
|
660
672
|
...
|
|
661
673
|
|
|
662
|
-
@webmethod(route="/agents/{agent_id}/sessions", method="GET")
|
|
674
|
+
@webmethod(route="/agents/{agent_id}/sessions", method="GET", level=LLAMA_STACK_API_V1)
|
|
663
675
|
async def list_agent_sessions(
|
|
664
676
|
self,
|
|
665
677
|
agent_id: str,
|
|
@@ -682,7 +694,7 @@ class Agents(Protocol):
|
|
|
682
694
|
#
|
|
683
695
|
# Both of these APIs are inherently stateful.
|
|
684
696
|
|
|
685
|
-
@webmethod(route="/openai/v1/responses/{response_id}", method="GET")
|
|
697
|
+
@webmethod(route="/openai/v1/responses/{response_id}", method="GET", level=LLAMA_STACK_API_V1)
|
|
686
698
|
async def get_openai_response(
|
|
687
699
|
self,
|
|
688
700
|
response_id: str,
|
|
@@ -694,7 +706,7 @@ class Agents(Protocol):
|
|
|
694
706
|
"""
|
|
695
707
|
...
|
|
696
708
|
|
|
697
|
-
@webmethod(route="/openai/v1/responses", method="POST")
|
|
709
|
+
@webmethod(route="/openai/v1/responses", method="POST", level=LLAMA_STACK_API_V1)
|
|
698
710
|
async def create_openai_response(
|
|
699
711
|
self,
|
|
700
712
|
input: str | list[OpenAIResponseInput],
|
|
@@ -719,7 +731,7 @@ class Agents(Protocol):
|
|
|
719
731
|
"""
|
|
720
732
|
...
|
|
721
733
|
|
|
722
|
-
@webmethod(route="/openai/v1/responses", method="GET")
|
|
734
|
+
@webmethod(route="/openai/v1/responses", method="GET", level=LLAMA_STACK_API_V1)
|
|
723
735
|
async def list_openai_responses(
|
|
724
736
|
self,
|
|
725
737
|
after: str | None = None,
|
|
@@ -737,7 +749,7 @@ class Agents(Protocol):
|
|
|
737
749
|
"""
|
|
738
750
|
...
|
|
739
751
|
|
|
740
|
-
@webmethod(route="/openai/v1/responses/{response_id}/input_items", method="GET")
|
|
752
|
+
@webmethod(route="/openai/v1/responses/{response_id}/input_items", method="GET", level=LLAMA_STACK_API_V1)
|
|
741
753
|
async def list_openai_response_input_items(
|
|
742
754
|
self,
|
|
743
755
|
response_id: str,
|
|
@@ -759,7 +771,7 @@ class Agents(Protocol):
|
|
|
759
771
|
"""
|
|
760
772
|
...
|
|
761
773
|
|
|
762
|
-
@webmethod(route="/openai/v1/responses/{response_id}", method="DELETE")
|
|
774
|
+
@webmethod(route="/openai/v1/responses/{response_id}", method="DELETE", level=LLAMA_STACK_API_V1)
|
|
763
775
|
async def delete_openai_response(self, response_id: str) -> OpenAIDeleteResponseObject:
|
|
764
776
|
"""Delete an OpenAI response by its ID.
|
|
765
777
|
|
|
@@ -17,6 +17,7 @@ from llama_stack.apis.inference import (
|
|
|
17
17
|
ToolDefinition,
|
|
18
18
|
ToolPromptFormat,
|
|
19
19
|
)
|
|
20
|
+
from llama_stack.apis.version import LLAMA_STACK_API_V1
|
|
20
21
|
from llama_stack.schema_utils import webmethod
|
|
21
22
|
|
|
22
23
|
|
|
@@ -30,7 +31,7 @@ class BatchInference(Protocol):
|
|
|
30
31
|
including (post-training, evals, etc).
|
|
31
32
|
"""
|
|
32
33
|
|
|
33
|
-
@webmethod(route="/batch-inference/completion", method="POST")
|
|
34
|
+
@webmethod(route="/batch-inference/completion", method="POST", level=LLAMA_STACK_API_V1)
|
|
34
35
|
async def completion(
|
|
35
36
|
self,
|
|
36
37
|
model: str,
|
|
@@ -50,7 +51,7 @@ class BatchInference(Protocol):
|
|
|
50
51
|
"""
|
|
51
52
|
...
|
|
52
53
|
|
|
53
|
-
@webmethod(route="/batch-inference/chat-completion", method="POST")
|
|
54
|
+
@webmethod(route="/batch-inference/chat-completion", method="POST", level=LLAMA_STACK_API_V1)
|
|
54
55
|
async def chat_completion(
|
|
55
56
|
self,
|
|
56
57
|
model: str,
|
|
@@ -8,6 +8,7 @@ from typing import Literal, Protocol, runtime_checkable
|
|
|
8
8
|
|
|
9
9
|
from pydantic import BaseModel, Field
|
|
10
10
|
|
|
11
|
+
from llama_stack.apis.version import LLAMA_STACK_API_V1
|
|
11
12
|
from llama_stack.schema_utils import json_schema_type, webmethod
|
|
12
13
|
|
|
13
14
|
try:
|
|
@@ -42,7 +43,7 @@ class Batches(Protocol):
|
|
|
42
43
|
Note: This API is currently under active development and may undergo changes.
|
|
43
44
|
"""
|
|
44
45
|
|
|
45
|
-
@webmethod(route="/openai/v1/batches", method="POST")
|
|
46
|
+
@webmethod(route="/openai/v1/batches", method="POST", level=LLAMA_STACK_API_V1)
|
|
46
47
|
async def create_batch(
|
|
47
48
|
self,
|
|
48
49
|
input_file_id: str,
|
|
@@ -62,7 +63,7 @@ class Batches(Protocol):
|
|
|
62
63
|
"""
|
|
63
64
|
...
|
|
64
65
|
|
|
65
|
-
@webmethod(route="/openai/v1/batches/{batch_id}", method="GET")
|
|
66
|
+
@webmethod(route="/openai/v1/batches/{batch_id}", method="GET", level=LLAMA_STACK_API_V1)
|
|
66
67
|
async def retrieve_batch(self, batch_id: str) -> BatchObject:
|
|
67
68
|
"""Retrieve information about a specific batch.
|
|
68
69
|
|
|
@@ -71,7 +72,7 @@ class Batches(Protocol):
|
|
|
71
72
|
"""
|
|
72
73
|
...
|
|
73
74
|
|
|
74
|
-
@webmethod(route="/openai/v1/batches/{batch_id}/cancel", method="POST")
|
|
75
|
+
@webmethod(route="/openai/v1/batches/{batch_id}/cancel", method="POST", level=LLAMA_STACK_API_V1)
|
|
75
76
|
async def cancel_batch(self, batch_id: str) -> BatchObject:
|
|
76
77
|
"""Cancel a batch that is in progress.
|
|
77
78
|
|
|
@@ -80,7 +81,7 @@ class Batches(Protocol):
|
|
|
80
81
|
"""
|
|
81
82
|
...
|
|
82
83
|
|
|
83
|
-
@webmethod(route="/openai/v1/batches", method="GET")
|
|
84
|
+
@webmethod(route="/openai/v1/batches", method="GET", level=LLAMA_STACK_API_V1)
|
|
84
85
|
async def list_batches(
|
|
85
86
|
self,
|
|
86
87
|
after: str | None = None,
|
|
@@ -8,6 +8,7 @@ from typing import Any, Literal, Protocol, runtime_checkable
|
|
|
8
8
|
from pydantic import BaseModel, Field
|
|
9
9
|
|
|
10
10
|
from llama_stack.apis.resource import Resource, ResourceType
|
|
11
|
+
from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
|
|
11
12
|
from llama_stack.schema_utils import json_schema_type, webmethod
|
|
12
13
|
|
|
13
14
|
|
|
@@ -53,7 +54,8 @@ class ListBenchmarksResponse(BaseModel):
|
|
|
53
54
|
|
|
54
55
|
@runtime_checkable
|
|
55
56
|
class Benchmarks(Protocol):
|
|
56
|
-
@webmethod(route="/eval/benchmarks", method="GET")
|
|
57
|
+
@webmethod(route="/eval/benchmarks", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
|
|
58
|
+
@webmethod(route="/eval/benchmarks", method="GET", level=LLAMA_STACK_API_V1ALPHA)
|
|
57
59
|
async def list_benchmarks(self) -> ListBenchmarksResponse:
|
|
58
60
|
"""List all benchmarks.
|
|
59
61
|
|
|
@@ -61,7 +63,8 @@ class Benchmarks(Protocol):
|
|
|
61
63
|
"""
|
|
62
64
|
...
|
|
63
65
|
|
|
64
|
-
@webmethod(route="/eval/benchmarks/{benchmark_id}", method="GET")
|
|
66
|
+
@webmethod(route="/eval/benchmarks/{benchmark_id}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
|
|
67
|
+
@webmethod(route="/eval/benchmarks/{benchmark_id}", method="GET", level=LLAMA_STACK_API_V1ALPHA)
|
|
65
68
|
async def get_benchmark(
|
|
66
69
|
self,
|
|
67
70
|
benchmark_id: str,
|
|
@@ -73,7 +76,8 @@ class Benchmarks(Protocol):
|
|
|
73
76
|
"""
|
|
74
77
|
...
|
|
75
78
|
|
|
76
|
-
@webmethod(route="/eval/benchmarks", method="POST")
|
|
79
|
+
@webmethod(route="/eval/benchmarks", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
|
|
80
|
+
@webmethod(route="/eval/benchmarks", method="POST", level=LLAMA_STACK_API_V1ALPHA)
|
|
77
81
|
async def register_benchmark(
|
|
78
82
|
self,
|
|
79
83
|
benchmark_id: str,
|
|
@@ -93,3 +97,12 @@ class Benchmarks(Protocol):
|
|
|
93
97
|
:param metadata: The metadata to use for the benchmark.
|
|
94
98
|
"""
|
|
95
99
|
...
|
|
100
|
+
|
|
101
|
+
@webmethod(route="/eval/benchmarks/{benchmark_id}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True)
|
|
102
|
+
@webmethod(route="/eval/benchmarks/{benchmark_id}", method="DELETE", level=LLAMA_STACK_API_V1ALPHA)
|
|
103
|
+
async def unregister_benchmark(self, benchmark_id: str) -> None:
|
|
104
|
+
"""Unregister a benchmark.
|
|
105
|
+
|
|
106
|
+
:param benchmark_id: The ID of the benchmark to unregister.
|
|
107
|
+
"""
|
|
108
|
+
...
|
|
@@ -8,6 +8,7 @@ from typing import Any, Protocol, runtime_checkable
|
|
|
8
8
|
|
|
9
9
|
from llama_stack.apis.common.responses import PaginatedResponse
|
|
10
10
|
from llama_stack.apis.datasets import Dataset
|
|
11
|
+
from llama_stack.apis.version import LLAMA_STACK_API_V1
|
|
11
12
|
from llama_stack.schema_utils import webmethod
|
|
12
13
|
|
|
13
14
|
|
|
@@ -20,7 +21,7 @@ class DatasetIO(Protocol):
|
|
|
20
21
|
# keeping for aligning with inference/safety, but this is not used
|
|
21
22
|
dataset_store: DatasetStore
|
|
22
23
|
|
|
23
|
-
@webmethod(route="/datasetio/iterrows/{dataset_id:path}", method="GET")
|
|
24
|
+
@webmethod(route="/datasetio/iterrows/{dataset_id:path}", method="GET", level=LLAMA_STACK_API_V1)
|
|
24
25
|
async def iterrows(
|
|
25
26
|
self,
|
|
26
27
|
dataset_id: str,
|
|
@@ -44,7 +45,7 @@ class DatasetIO(Protocol):
|
|
|
44
45
|
"""
|
|
45
46
|
...
|
|
46
47
|
|
|
47
|
-
@webmethod(route="/datasetio/append-rows/{dataset_id:path}", method="POST")
|
|
48
|
+
@webmethod(route="/datasetio/append-rows/{dataset_id:path}", method="POST", level=LLAMA_STACK_API_V1)
|
|
48
49
|
async def append_rows(self, dataset_id: str, rows: list[dict[str, Any]]) -> None:
|
|
49
50
|
"""Append rows to a dataset.
|
|
50
51
|
|
|
@@ -10,6 +10,7 @@ from typing import Annotated, Any, Literal, Protocol
|
|
|
10
10
|
from pydantic import BaseModel, Field
|
|
11
11
|
|
|
12
12
|
from llama_stack.apis.resource import Resource, ResourceType
|
|
13
|
+
from llama_stack.apis.version import LLAMA_STACK_API_V1
|
|
13
14
|
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
|
|
14
15
|
|
|
15
16
|
|
|
@@ -145,7 +146,7 @@ class ListDatasetsResponse(BaseModel):
|
|
|
145
146
|
|
|
146
147
|
|
|
147
148
|
class Datasets(Protocol):
|
|
148
|
-
@webmethod(route="/datasets", method="POST")
|
|
149
|
+
@webmethod(route="/datasets", method="POST", level=LLAMA_STACK_API_V1)
|
|
149
150
|
async def register_dataset(
|
|
150
151
|
self,
|
|
151
152
|
purpose: DatasetPurpose,
|
|
@@ -214,7 +215,7 @@ class Datasets(Protocol):
|
|
|
214
215
|
"""
|
|
215
216
|
...
|
|
216
217
|
|
|
217
|
-
@webmethod(route="/datasets/{dataset_id:path}", method="GET")
|
|
218
|
+
@webmethod(route="/datasets/{dataset_id:path}", method="GET", level=LLAMA_STACK_API_V1)
|
|
218
219
|
async def get_dataset(
|
|
219
220
|
self,
|
|
220
221
|
dataset_id: str,
|
|
@@ -226,7 +227,7 @@ class Datasets(Protocol):
|
|
|
226
227
|
"""
|
|
227
228
|
...
|
|
228
229
|
|
|
229
|
-
@webmethod(route="/datasets", method="GET")
|
|
230
|
+
@webmethod(route="/datasets", method="GET", level=LLAMA_STACK_API_V1)
|
|
230
231
|
async def list_datasets(self) -> ListDatasetsResponse:
|
|
231
232
|
"""List all datasets.
|
|
232
233
|
|
|
@@ -234,7 +235,7 @@ class Datasets(Protocol):
|
|
|
234
235
|
"""
|
|
235
236
|
...
|
|
236
237
|
|
|
237
|
-
@webmethod(route="/datasets/{dataset_id:path}", method="DELETE")
|
|
238
|
+
@webmethod(route="/datasets/{dataset_id:path}", method="DELETE", level=LLAMA_STACK_API_V1)
|
|
238
239
|
async def unregister_dataset(
|
|
239
240
|
self,
|
|
240
241
|
dataset_id: str,
|
llama_stack/apis/eval/eval.py
CHANGED
|
@@ -13,6 +13,7 @@ from llama_stack.apis.common.job_types import Job
|
|
|
13
13
|
from llama_stack.apis.inference import SamplingParams, SystemMessage
|
|
14
14
|
from llama_stack.apis.scoring import ScoringResult
|
|
15
15
|
from llama_stack.apis.scoring_functions import ScoringFnParams
|
|
16
|
+
from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
|
|
16
17
|
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
|
|
17
18
|
|
|
18
19
|
|
|
@@ -83,7 +84,8 @@ class EvaluateResponse(BaseModel):
|
|
|
83
84
|
class Eval(Protocol):
|
|
84
85
|
"""Llama Stack Evaluation API for running evaluations on model and agent candidates."""
|
|
85
86
|
|
|
86
|
-
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST")
|
|
87
|
+
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
|
|
88
|
+
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST", level=LLAMA_STACK_API_V1ALPHA)
|
|
87
89
|
async def run_eval(
|
|
88
90
|
self,
|
|
89
91
|
benchmark_id: str,
|
|
@@ -97,7 +99,10 @@ class Eval(Protocol):
|
|
|
97
99
|
"""
|
|
98
100
|
...
|
|
99
101
|
|
|
100
|
-
@webmethod(
|
|
102
|
+
@webmethod(
|
|
103
|
+
route="/eval/benchmarks/{benchmark_id}/evaluations", method="POST", level=LLAMA_STACK_API_V1, deprecated=True
|
|
104
|
+
)
|
|
105
|
+
@webmethod(route="/eval/benchmarks/{benchmark_id}/evaluations", method="POST", level=LLAMA_STACK_API_V1ALPHA)
|
|
101
106
|
async def evaluate_rows(
|
|
102
107
|
self,
|
|
103
108
|
benchmark_id: str,
|
|
@@ -115,7 +120,10 @@ class Eval(Protocol):
|
|
|
115
120
|
"""
|
|
116
121
|
...
|
|
117
122
|
|
|
118
|
-
@webmethod(
|
|
123
|
+
@webmethod(
|
|
124
|
+
route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True
|
|
125
|
+
)
|
|
126
|
+
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="GET", level=LLAMA_STACK_API_V1ALPHA)
|
|
119
127
|
async def job_status(self, benchmark_id: str, job_id: str) -> Job:
|
|
120
128
|
"""Get the status of a job.
|
|
121
129
|
|
|
@@ -125,7 +133,13 @@ class Eval(Protocol):
|
|
|
125
133
|
"""
|
|
126
134
|
...
|
|
127
135
|
|
|
128
|
-
@webmethod(
|
|
136
|
+
@webmethod(
|
|
137
|
+
route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}",
|
|
138
|
+
method="DELETE",
|
|
139
|
+
level=LLAMA_STACK_API_V1,
|
|
140
|
+
deprecated=True,
|
|
141
|
+
)
|
|
142
|
+
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="DELETE", level=LLAMA_STACK_API_V1ALPHA)
|
|
129
143
|
async def job_cancel(self, benchmark_id: str, job_id: str) -> None:
|
|
130
144
|
"""Cancel a job.
|
|
131
145
|
|
|
@@ -134,7 +148,15 @@ class Eval(Protocol):
|
|
|
134
148
|
"""
|
|
135
149
|
...
|
|
136
150
|
|
|
137
|
-
@webmethod(
|
|
151
|
+
@webmethod(
|
|
152
|
+
route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result",
|
|
153
|
+
method="GET",
|
|
154
|
+
level=LLAMA_STACK_API_V1,
|
|
155
|
+
deprecated=True,
|
|
156
|
+
)
|
|
157
|
+
@webmethod(
|
|
158
|
+
route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result", method="GET", level=LLAMA_STACK_API_V1ALPHA
|
|
159
|
+
)
|
|
138
160
|
async def job_result(self, benchmark_id: str, job_id: str) -> EvaluateResponse:
|
|
139
161
|
"""Get the result of a job.
|
|
140
162
|
|
llama_stack/apis/files/files.py
CHANGED
|
@@ -11,6 +11,7 @@ from fastapi import File, Form, Response, UploadFile
|
|
|
11
11
|
from pydantic import BaseModel, Field
|
|
12
12
|
|
|
13
13
|
from llama_stack.apis.common.responses import Order
|
|
14
|
+
from llama_stack.apis.version import LLAMA_STACK_API_V1
|
|
14
15
|
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
|
|
15
16
|
from llama_stack.schema_utils import json_schema_type, webmethod
|
|
16
17
|
|
|
@@ -104,7 +105,7 @@ class OpenAIFileDeleteResponse(BaseModel):
|
|
|
104
105
|
@trace_protocol
|
|
105
106
|
class Files(Protocol):
|
|
106
107
|
# OpenAI Files API Endpoints
|
|
107
|
-
@webmethod(route="/openai/v1/files", method="POST")
|
|
108
|
+
@webmethod(route="/openai/v1/files", method="POST", level=LLAMA_STACK_API_V1)
|
|
108
109
|
async def openai_upload_file(
|
|
109
110
|
self,
|
|
110
111
|
file: Annotated[UploadFile, File()],
|
|
@@ -119,7 +120,7 @@ class Files(Protocol):
|
|
|
119
120
|
The file upload should be a multipart form request with:
|
|
120
121
|
- file: The File object (not file name) to be uploaded.
|
|
121
122
|
- purpose: The intended purpose of the uploaded file.
|
|
122
|
-
- expires_after: Optional form values describing expiration for the file. Expected expires_after[anchor] = "created_at", expires_after[seconds] =
|
|
123
|
+
- expires_after: Optional form values describing expiration for the file. Expected expires_after[anchor] = "created_at", expires_after[seconds] = {integer}. Seconds must be between 3600 and 2592000 (1 hour to 30 days).
|
|
123
124
|
|
|
124
125
|
:param file: The uploaded file object containing content and metadata (filename, content_type, etc.).
|
|
125
126
|
:param purpose: The intended purpose of the uploaded file (e.g., "assistants", "fine-tune").
|
|
@@ -127,7 +128,7 @@ class Files(Protocol):
|
|
|
127
128
|
"""
|
|
128
129
|
...
|
|
129
130
|
|
|
130
|
-
@webmethod(route="/openai/v1/files", method="GET")
|
|
131
|
+
@webmethod(route="/openai/v1/files", method="GET", level=LLAMA_STACK_API_V1)
|
|
131
132
|
async def openai_list_files(
|
|
132
133
|
self,
|
|
133
134
|
after: str | None = None,
|
|
@@ -146,7 +147,7 @@ class Files(Protocol):
|
|
|
146
147
|
"""
|
|
147
148
|
...
|
|
148
149
|
|
|
149
|
-
@webmethod(route="/openai/v1/files/{file_id}", method="GET")
|
|
150
|
+
@webmethod(route="/openai/v1/files/{file_id}", method="GET", level=LLAMA_STACK_API_V1)
|
|
150
151
|
async def openai_retrieve_file(
|
|
151
152
|
self,
|
|
152
153
|
file_id: str,
|
|
@@ -159,7 +160,7 @@ class Files(Protocol):
|
|
|
159
160
|
"""
|
|
160
161
|
...
|
|
161
162
|
|
|
162
|
-
@webmethod(route="/openai/v1/files/{file_id}", method="DELETE")
|
|
163
|
+
@webmethod(route="/openai/v1/files/{file_id}", method="DELETE", level=LLAMA_STACK_API_V1)
|
|
163
164
|
async def openai_delete_file(
|
|
164
165
|
self,
|
|
165
166
|
file_id: str,
|
|
@@ -172,7 +173,7 @@ class Files(Protocol):
|
|
|
172
173
|
"""
|
|
173
174
|
...
|
|
174
175
|
|
|
175
|
-
@webmethod(route="/openai/v1/files/{file_id}/content", method="GET")
|
|
176
|
+
@webmethod(route="/openai/v1/files/{file_id}/content", method="GET", level=LLAMA_STACK_API_V1)
|
|
176
177
|
async def openai_retrieve_file_content(
|
|
177
178
|
self,
|
|
178
179
|
file_id: str,
|
|
@@ -21,6 +21,7 @@ from llama_stack.apis.common.content_types import ContentDelta, InterleavedConte
|
|
|
21
21
|
from llama_stack.apis.common.responses import Order
|
|
22
22
|
from llama_stack.apis.models import Model
|
|
23
23
|
from llama_stack.apis.telemetry import MetricResponseMixin
|
|
24
|
+
from llama_stack.apis.version import LLAMA_STACK_API_V1
|
|
24
25
|
from llama_stack.models.llama.datatypes import (
|
|
25
26
|
BuiltinTool,
|
|
26
27
|
StopReason,
|
|
@@ -913,6 +914,7 @@ class OpenAIEmbeddingData(BaseModel):
|
|
|
913
914
|
"""
|
|
914
915
|
|
|
915
916
|
object: Literal["embedding"] = "embedding"
|
|
917
|
+
# TODO: consider dropping str and using openai.types.embeddings.Embedding instead of OpenAIEmbeddingData
|
|
916
918
|
embedding: list[float] | str
|
|
917
919
|
index: int
|
|
918
920
|
|
|
@@ -1026,7 +1028,7 @@ class InferenceProvider(Protocol):
|
|
|
1026
1028
|
|
|
1027
1029
|
model_store: ModelStore | None = None
|
|
1028
1030
|
|
|
1029
|
-
@webmethod(route="/inference/completion", method="POST")
|
|
1031
|
+
@webmethod(route="/inference/completion", method="POST", level=LLAMA_STACK_API_V1)
|
|
1030
1032
|
async def completion(
|
|
1031
1033
|
self,
|
|
1032
1034
|
model_id: str,
|
|
@@ -1049,7 +1051,7 @@ class InferenceProvider(Protocol):
|
|
|
1049
1051
|
"""
|
|
1050
1052
|
...
|
|
1051
1053
|
|
|
1052
|
-
@webmethod(route="/inference/batch-completion", method="POST", experimental=True)
|
|
1054
|
+
@webmethod(route="/inference/batch-completion", method="POST", experimental=True, level=LLAMA_STACK_API_V1)
|
|
1053
1055
|
async def batch_completion(
|
|
1054
1056
|
self,
|
|
1055
1057
|
model_id: str,
|
|
@@ -1070,7 +1072,7 @@ class InferenceProvider(Protocol):
|
|
|
1070
1072
|
raise NotImplementedError("Batch completion is not implemented")
|
|
1071
1073
|
return # this is so mypy's safe-super rule will consider the method concrete
|
|
1072
1074
|
|
|
1073
|
-
@webmethod(route="/inference/chat-completion", method="POST")
|
|
1075
|
+
@webmethod(route="/inference/chat-completion", method="POST", level=LLAMA_STACK_API_V1)
|
|
1074
1076
|
async def chat_completion(
|
|
1075
1077
|
self,
|
|
1076
1078
|
model_id: str,
|
|
@@ -1110,7 +1112,7 @@ class InferenceProvider(Protocol):
|
|
|
1110
1112
|
"""
|
|
1111
1113
|
...
|
|
1112
1114
|
|
|
1113
|
-
@webmethod(route="/inference/batch-chat-completion", method="POST", experimental=True)
|
|
1115
|
+
@webmethod(route="/inference/batch-chat-completion", method="POST", experimental=True, level=LLAMA_STACK_API_V1)
|
|
1114
1116
|
async def batch_chat_completion(
|
|
1115
1117
|
self,
|
|
1116
1118
|
model_id: str,
|
|
@@ -1135,7 +1137,7 @@ class InferenceProvider(Protocol):
|
|
|
1135
1137
|
raise NotImplementedError("Batch chat completion is not implemented")
|
|
1136
1138
|
return # this is so mypy's safe-super rule will consider the method concrete
|
|
1137
1139
|
|
|
1138
|
-
@webmethod(route="/inference/embeddings", method="POST")
|
|
1140
|
+
@webmethod(route="/inference/embeddings", method="POST", level=LLAMA_STACK_API_V1)
|
|
1139
1141
|
async def embeddings(
|
|
1140
1142
|
self,
|
|
1141
1143
|
model_id: str,
|
|
@@ -1155,7 +1157,7 @@ class InferenceProvider(Protocol):
|
|
|
1155
1157
|
"""
|
|
1156
1158
|
...
|
|
1157
1159
|
|
|
1158
|
-
@webmethod(route="/inference/rerank", method="POST", experimental=True)
|
|
1160
|
+
@webmethod(route="/inference/rerank", method="POST", experimental=True, level=LLAMA_STACK_API_V1)
|
|
1159
1161
|
async def rerank(
|
|
1160
1162
|
self,
|
|
1161
1163
|
model: str,
|
|
@@ -1174,7 +1176,7 @@ class InferenceProvider(Protocol):
|
|
|
1174
1176
|
raise NotImplementedError("Reranking is not implemented")
|
|
1175
1177
|
return # this is so mypy's safe-super rule will consider the method concrete
|
|
1176
1178
|
|
|
1177
|
-
@webmethod(route="/openai/v1/completions", method="POST")
|
|
1179
|
+
@webmethod(route="/openai/v1/completions", method="POST", level=LLAMA_STACK_API_V1)
|
|
1178
1180
|
async def openai_completion(
|
|
1179
1181
|
self,
|
|
1180
1182
|
# Standard OpenAI completion parameters
|
|
@@ -1225,7 +1227,7 @@ class InferenceProvider(Protocol):
|
|
|
1225
1227
|
"""
|
|
1226
1228
|
...
|
|
1227
1229
|
|
|
1228
|
-
@webmethod(route="/openai/v1/chat/completions", method="POST")
|
|
1230
|
+
@webmethod(route="/openai/v1/chat/completions", method="POST", level=LLAMA_STACK_API_V1)
|
|
1229
1231
|
async def openai_chat_completion(
|
|
1230
1232
|
self,
|
|
1231
1233
|
model: str,
|
|
@@ -1281,7 +1283,7 @@ class InferenceProvider(Protocol):
|
|
|
1281
1283
|
"""
|
|
1282
1284
|
...
|
|
1283
1285
|
|
|
1284
|
-
@webmethod(route="/openai/v1/embeddings", method="POST")
|
|
1286
|
+
@webmethod(route="/openai/v1/embeddings", method="POST", level=LLAMA_STACK_API_V1)
|
|
1285
1287
|
async def openai_embeddings(
|
|
1286
1288
|
self,
|
|
1287
1289
|
model: str,
|
|
@@ -1310,7 +1312,7 @@ class Inference(InferenceProvider):
|
|
|
1310
1312
|
- Embedding models: these models generate embeddings to be used for semantic search.
|
|
1311
1313
|
"""
|
|
1312
1314
|
|
|
1313
|
-
@webmethod(route="/openai/v1/chat/completions", method="GET")
|
|
1315
|
+
@webmethod(route="/openai/v1/chat/completions", method="GET", level=LLAMA_STACK_API_V1)
|
|
1314
1316
|
async def list_chat_completions(
|
|
1315
1317
|
self,
|
|
1316
1318
|
after: str | None = None,
|
|
@@ -1328,7 +1330,7 @@ class Inference(InferenceProvider):
|
|
|
1328
1330
|
"""
|
|
1329
1331
|
raise NotImplementedError("List chat completions is not implemented")
|
|
1330
1332
|
|
|
1331
|
-
@webmethod(route="/openai/v1/chat/completions/{completion_id}", method="GET")
|
|
1333
|
+
@webmethod(route="/openai/v1/chat/completions/{completion_id}", method="GET", level=LLAMA_STACK_API_V1)
|
|
1332
1334
|
async def get_chat_completion(self, completion_id: str) -> OpenAICompletionWithInputMessages:
|
|
1333
1335
|
"""Describe a chat completion by its ID.
|
|
1334
1336
|
|
|
@@ -8,6 +8,7 @@ from typing import Protocol, runtime_checkable
|
|
|
8
8
|
|
|
9
9
|
from pydantic import BaseModel
|
|
10
10
|
|
|
11
|
+
from llama_stack.apis.version import LLAMA_STACK_API_V1
|
|
11
12
|
from llama_stack.providers.datatypes import HealthStatus
|
|
12
13
|
from llama_stack.schema_utils import json_schema_type, webmethod
|
|
13
14
|
|
|
@@ -57,7 +58,7 @@ class ListRoutesResponse(BaseModel):
|
|
|
57
58
|
|
|
58
59
|
@runtime_checkable
|
|
59
60
|
class Inspect(Protocol):
|
|
60
|
-
@webmethod(route="/inspect/routes", method="GET")
|
|
61
|
+
@webmethod(route="/inspect/routes", method="GET", level=LLAMA_STACK_API_V1)
|
|
61
62
|
async def list_routes(self) -> ListRoutesResponse:
|
|
62
63
|
"""List all available API routes with their methods and implementing providers.
|
|
63
64
|
|
|
@@ -65,7 +66,7 @@ class Inspect(Protocol):
|
|
|
65
66
|
"""
|
|
66
67
|
...
|
|
67
68
|
|
|
68
|
-
@webmethod(route="/health", method="GET")
|
|
69
|
+
@webmethod(route="/health", method="GET", level=LLAMA_STACK_API_V1)
|
|
69
70
|
async def health(self) -> HealthInfo:
|
|
70
71
|
"""Get the current health status of the service.
|
|
71
72
|
|
|
@@ -73,7 +74,7 @@ class Inspect(Protocol):
|
|
|
73
74
|
"""
|
|
74
75
|
...
|
|
75
76
|
|
|
76
|
-
@webmethod(route="/version", method="GET")
|
|
77
|
+
@webmethod(route="/version", method="GET", level=LLAMA_STACK_API_V1)
|
|
77
78
|
async def version(self) -> VersionInfo:
|
|
78
79
|
"""Get the version of the service.
|
|
79
80
|
|