llama-stack 0.2.21__py3-none-any.whl → 0.2.23__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. llama_stack/apis/agents/agents.py +26 -14
  2. llama_stack/apis/batch_inference/batch_inference.py +3 -2
  3. llama_stack/apis/batches/batches.py +5 -4
  4. llama_stack/apis/benchmarks/benchmarks.py +16 -3
  5. llama_stack/apis/datasetio/datasetio.py +3 -2
  6. llama_stack/apis/datasets/datasets.py +5 -4
  7. llama_stack/apis/eval/eval.py +27 -5
  8. llama_stack/apis/files/files.py +7 -6
  9. llama_stack/apis/inference/inference.py +13 -11
  10. llama_stack/apis/inspect/inspect.py +4 -3
  11. llama_stack/apis/models/models.py +6 -5
  12. llama_stack/apis/post_training/post_training.py +13 -6
  13. llama_stack/apis/prompts/prompts.py +8 -7
  14. llama_stack/apis/providers/providers.py +3 -2
  15. llama_stack/apis/safety/safety.py +3 -2
  16. llama_stack/apis/scoring/scoring.py +3 -2
  17. llama_stack/apis/scoring_functions/scoring_functions.py +12 -3
  18. llama_stack/apis/shields/shields.py +5 -4
  19. llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +2 -1
  20. llama_stack/apis/telemetry/telemetry.py +21 -8
  21. llama_stack/apis/tools/rag_tool.py +3 -2
  22. llama_stack/apis/tools/tools.py +9 -8
  23. llama_stack/apis/vector_dbs/vector_dbs.py +5 -4
  24. llama_stack/apis/vector_io/vector_io.py +25 -14
  25. llama_stack/apis/version.py +3 -1
  26. llama_stack/cli/stack/_build.py +7 -0
  27. llama_stack/cli/verify_download.py +7 -10
  28. llama_stack/core/build_container.sh +2 -2
  29. llama_stack/core/client.py +18 -2
  30. llama_stack/core/datatypes.py +10 -7
  31. llama_stack/core/distribution.py +7 -20
  32. llama_stack/core/library_client.py +6 -4
  33. llama_stack/core/routers/__init__.py +4 -1
  34. llama_stack/core/routers/inference.py +12 -7
  35. llama_stack/core/routing_tables/benchmarks.py +4 -0
  36. llama_stack/core/routing_tables/common.py +4 -0
  37. llama_stack/core/routing_tables/models.py +1 -1
  38. llama_stack/core/routing_tables/scoring_functions.py +4 -0
  39. llama_stack/core/routing_tables/toolgroups.py +13 -2
  40. llama_stack/core/server/routes.py +15 -15
  41. llama_stack/core/server/server.py +99 -124
  42. llama_stack/core/server/tracing.py +80 -0
  43. llama_stack/core/stack.py +66 -60
  44. llama_stack/core/start_stack.sh +1 -1
  45. llama_stack/distributions/ci-tests/build.yaml +1 -0
  46. llama_stack/distributions/ci-tests/run.yaml +7 -0
  47. llama_stack/distributions/nvidia/build.yaml +2 -0
  48. llama_stack/distributions/nvidia/nvidia.py +12 -10
  49. llama_stack/distributions/nvidia/run-with-safety.yaml +9 -0
  50. llama_stack/distributions/nvidia/run.yaml +10 -84
  51. llama_stack/distributions/starter/build.yaml +1 -0
  52. llama_stack/distributions/starter/run.yaml +7 -0
  53. llama_stack/distributions/starter/starter.py +20 -2
  54. llama_stack/distributions/starter-gpu/build.yaml +1 -0
  55. llama_stack/distributions/starter-gpu/run.yaml +7 -0
  56. llama_stack/distributions/watsonx/run.yaml +9 -0
  57. llama_stack/distributions/watsonx/watsonx.py +10 -2
  58. llama_stack/providers/datatypes.py +17 -71
  59. llama_stack/providers/inline/eval/meta_reference/eval.py +7 -0
  60. llama_stack/providers/inline/files/localfs/files.py +2 -3
  61. llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +3 -0
  62. llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +6 -6
  63. llama_stack/providers/inline/tool_runtime/rag/memory.py +101 -46
  64. llama_stack/providers/registry/batches.py +1 -1
  65. llama_stack/providers/registry/datasetio.py +19 -22
  66. llama_stack/providers/registry/eval.py +10 -11
  67. llama_stack/providers/registry/files.py +8 -15
  68. llama_stack/providers/registry/inference.py +189 -191
  69. llama_stack/providers/registry/post_training.py +8 -9
  70. llama_stack/providers/registry/safety.py +23 -27
  71. llama_stack/providers/registry/scoring.py +1 -1
  72. llama_stack/providers/registry/tool_runtime.py +41 -47
  73. llama_stack/providers/registry/vector_io.py +59 -59
  74. llama_stack/providers/remote/eval/nvidia/eval.py +12 -4
  75. llama_stack/providers/remote/files/s3/files.py +2 -3
  76. llama_stack/providers/remote/inference/anthropic/__init__.py +0 -6
  77. llama_stack/providers/remote/inference/anthropic/anthropic.py +12 -2
  78. llama_stack/providers/remote/inference/azure/__init__.py +15 -0
  79. llama_stack/providers/remote/inference/azure/azure.py +62 -0
  80. llama_stack/providers/remote/inference/azure/config.py +63 -0
  81. llama_stack/providers/remote/inference/bedrock/bedrock.py +50 -3
  82. llama_stack/providers/remote/inference/cerebras/cerebras.py +14 -14
  83. llama_stack/providers/remote/inference/cerebras/config.py +2 -2
  84. llama_stack/providers/remote/inference/databricks/__init__.py +2 -1
  85. llama_stack/providers/remote/inference/databricks/config.py +5 -5
  86. llama_stack/providers/remote/inference/databricks/databricks.py +84 -94
  87. llama_stack/providers/remote/inference/fireworks/fireworks.py +17 -169
  88. llama_stack/providers/remote/inference/gemini/__init__.py +0 -6
  89. llama_stack/providers/remote/inference/gemini/gemini.py +4 -2
  90. llama_stack/providers/remote/inference/groq/__init__.py +1 -3
  91. llama_stack/providers/remote/inference/groq/groq.py +0 -3
  92. llama_stack/providers/remote/inference/llama_openai_compat/llama.py +0 -3
  93. llama_stack/providers/remote/inference/nvidia/nvidia.py +9 -8
  94. llama_stack/providers/remote/inference/ollama/ollama.py +70 -217
  95. llama_stack/providers/remote/inference/openai/__init__.py +0 -6
  96. llama_stack/providers/remote/inference/openai/openai.py +5 -2
  97. llama_stack/providers/remote/inference/passthrough/passthrough.py +1 -1
  98. llama_stack/providers/remote/inference/sambanova/__init__.py +1 -3
  99. llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -3
  100. llama_stack/providers/remote/inference/tgi/tgi.py +43 -15
  101. llama_stack/providers/remote/inference/together/together.py +85 -130
  102. llama_stack/providers/remote/inference/vertexai/vertexai.py +29 -6
  103. llama_stack/providers/remote/inference/vllm/__init__.py +6 -0
  104. llama_stack/providers/remote/inference/vllm/vllm.py +56 -193
  105. llama_stack/providers/remote/inference/watsonx/config.py +2 -2
  106. llama_stack/providers/remote/inference/watsonx/watsonx.py +19 -3
  107. llama_stack/providers/remote/vector_io/qdrant/qdrant.py +6 -2
  108. llama_stack/providers/utils/inference/inference_store.py +130 -22
  109. llama_stack/providers/utils/inference/litellm_openai_mixin.py +3 -3
  110. llama_stack/providers/utils/inference/model_registry.py +9 -22
  111. llama_stack/providers/utils/inference/openai_mixin.py +109 -24
  112. llama_stack/providers/utils/kvstore/config.py +5 -5
  113. llama_stack/providers/utils/kvstore/mongodb/mongodb.py +8 -3
  114. llama_stack/providers/utils/kvstore/sqlite/sqlite.py +7 -0
  115. llama_stack/providers/utils/responses/responses_store.py +2 -5
  116. llama_stack/providers/utils/sqlstore/authorized_sqlstore.py +19 -6
  117. llama_stack/providers/utils/telemetry/tracing.py +29 -15
  118. llama_stack/providers/utils/vector_io/vector_utils.py +2 -4
  119. llama_stack/schema_utils.py +15 -1
  120. llama_stack/testing/inference_recorder.py +51 -31
  121. {llama_stack-0.2.21.dist-info → llama_stack-0.2.23.dist-info}/METADATA +15 -15
  122. {llama_stack-0.2.21.dist-info → llama_stack-0.2.23.dist-info}/RECORD +126 -134
  123. llama_stack/providers/remote/inference/anthropic/models.py +0 -40
  124. llama_stack/providers/remote/inference/cerebras/models.py +0 -28
  125. llama_stack/providers/remote/inference/fireworks/models.py +0 -70
  126. llama_stack/providers/remote/inference/gemini/models.py +0 -34
  127. llama_stack/providers/remote/inference/groq/models.py +0 -48
  128. llama_stack/providers/remote/inference/llama_openai_compat/models.py +0 -25
  129. llama_stack/providers/remote/inference/nvidia/models.py +0 -109
  130. llama_stack/providers/remote/inference/ollama/models.py +0 -106
  131. llama_stack/providers/remote/inference/openai/models.py +0 -60
  132. llama_stack/providers/remote/inference/sambanova/models.py +0 -28
  133. llama_stack/providers/remote/inference/together/models.py +0 -77
  134. llama_stack/providers/remote/inference/vertexai/models.py +0 -20
  135. {llama_stack-0.2.21.dist-info → llama_stack-0.2.23.dist-info}/WHEEL +0 -0
  136. {llama_stack-0.2.21.dist-info → llama_stack-0.2.23.dist-info}/entry_points.txt +0 -0
  137. {llama_stack-0.2.21.dist-info → llama_stack-0.2.23.dist-info}/licenses/LICENSE +0 -0
  138. {llama_stack-0.2.21.dist-info → llama_stack-0.2.23.dist-info}/top_level.txt +0 -0
@@ -27,6 +27,7 @@ from llama_stack.apis.inference import (
27
27
  )
28
28
  from llama_stack.apis.safety import SafetyViolation
29
29
  from llama_stack.apis.tools import ToolDef
30
+ from llama_stack.apis.version import LLAMA_STACK_API_V1
30
31
  from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
31
32
 
32
33
  from .openai_responses import (
@@ -481,7 +482,7 @@ class Agents(Protocol):
481
482
  - Agents can also use Memory to retrieve information from knowledge bases. See the RAG Tool and Vector IO APIs for more details.
482
483
  """
483
484
 
484
- @webmethod(route="/agents", method="POST", descriptive_name="create_agent")
485
+ @webmethod(route="/agents", method="POST", descriptive_name="create_agent", level=LLAMA_STACK_API_V1)
485
486
  async def create_agent(
486
487
  self,
487
488
  agent_config: AgentConfig,
@@ -494,7 +495,10 @@ class Agents(Protocol):
494
495
  ...
495
496
 
496
497
  @webmethod(
497
- route="/agents/{agent_id}/session/{session_id}/turn", method="POST", descriptive_name="create_agent_turn"
498
+ route="/agents/{agent_id}/session/{session_id}/turn",
499
+ method="POST",
500
+ descriptive_name="create_agent_turn",
501
+ level=LLAMA_STACK_API_V1,
498
502
  )
499
503
  async def create_agent_turn(
500
504
  self,
@@ -524,6 +528,7 @@ class Agents(Protocol):
524
528
  route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume",
525
529
  method="POST",
526
530
  descriptive_name="resume_agent_turn",
531
+ level=LLAMA_STACK_API_V1,
527
532
  )
528
533
  async def resume_agent_turn(
529
534
  self,
@@ -549,6 +554,7 @@ class Agents(Protocol):
549
554
  @webmethod(
550
555
  route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}",
551
556
  method="GET",
557
+ level=LLAMA_STACK_API_V1,
552
558
  )
553
559
  async def get_agents_turn(
554
560
  self,
@@ -568,6 +574,7 @@ class Agents(Protocol):
568
574
  @webmethod(
569
575
  route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}",
570
576
  method="GET",
577
+ level=LLAMA_STACK_API_V1,
571
578
  )
572
579
  async def get_agents_step(
573
580
  self,
@@ -586,7 +593,12 @@ class Agents(Protocol):
586
593
  """
587
594
  ...
588
595
 
589
- @webmethod(route="/agents/{agent_id}/session", method="POST", descriptive_name="create_agent_session")
596
+ @webmethod(
597
+ route="/agents/{agent_id}/session",
598
+ method="POST",
599
+ descriptive_name="create_agent_session",
600
+ level=LLAMA_STACK_API_V1,
601
+ )
590
602
  async def create_agent_session(
591
603
  self,
592
604
  agent_id: str,
@@ -600,7 +612,7 @@ class Agents(Protocol):
600
612
  """
601
613
  ...
602
614
 
603
- @webmethod(route="/agents/{agent_id}/session/{session_id}", method="GET")
615
+ @webmethod(route="/agents/{agent_id}/session/{session_id}", method="GET", level=LLAMA_STACK_API_V1)
604
616
  async def get_agents_session(
605
617
  self,
606
618
  session_id: str,
@@ -616,7 +628,7 @@ class Agents(Protocol):
616
628
  """
617
629
  ...
618
630
 
619
- @webmethod(route="/agents/{agent_id}/session/{session_id}", method="DELETE")
631
+ @webmethod(route="/agents/{agent_id}/session/{session_id}", method="DELETE", level=LLAMA_STACK_API_V1)
620
632
  async def delete_agents_session(
621
633
  self,
622
634
  session_id: str,
@@ -629,7 +641,7 @@ class Agents(Protocol):
629
641
  """
630
642
  ...
631
643
 
632
- @webmethod(route="/agents/{agent_id}", method="DELETE")
644
+ @webmethod(route="/agents/{agent_id}", method="DELETE", level=LLAMA_STACK_API_V1)
633
645
  async def delete_agent(
634
646
  self,
635
647
  agent_id: str,
@@ -640,7 +652,7 @@ class Agents(Protocol):
640
652
  """
641
653
  ...
642
654
 
643
- @webmethod(route="/agents", method="GET")
655
+ @webmethod(route="/agents", method="GET", level=LLAMA_STACK_API_V1)
644
656
  async def list_agents(self, start_index: int | None = None, limit: int | None = None) -> PaginatedResponse:
645
657
  """List all agents.
646
658
 
@@ -650,7 +662,7 @@ class Agents(Protocol):
650
662
  """
651
663
  ...
652
664
 
653
- @webmethod(route="/agents/{agent_id}", method="GET")
665
+ @webmethod(route="/agents/{agent_id}", method="GET", level=LLAMA_STACK_API_V1)
654
666
  async def get_agent(self, agent_id: str) -> Agent:
655
667
  """Describe an agent by its ID.
656
668
 
@@ -659,7 +671,7 @@ class Agents(Protocol):
659
671
  """
660
672
  ...
661
673
 
662
- @webmethod(route="/agents/{agent_id}/sessions", method="GET")
674
+ @webmethod(route="/agents/{agent_id}/sessions", method="GET", level=LLAMA_STACK_API_V1)
663
675
  async def list_agent_sessions(
664
676
  self,
665
677
  agent_id: str,
@@ -682,7 +694,7 @@ class Agents(Protocol):
682
694
  #
683
695
  # Both of these APIs are inherently stateful.
684
696
 
685
- @webmethod(route="/openai/v1/responses/{response_id}", method="GET")
697
+ @webmethod(route="/openai/v1/responses/{response_id}", method="GET", level=LLAMA_STACK_API_V1)
686
698
  async def get_openai_response(
687
699
  self,
688
700
  response_id: str,
@@ -694,7 +706,7 @@ class Agents(Protocol):
694
706
  """
695
707
  ...
696
708
 
697
- @webmethod(route="/openai/v1/responses", method="POST")
709
+ @webmethod(route="/openai/v1/responses", method="POST", level=LLAMA_STACK_API_V1)
698
710
  async def create_openai_response(
699
711
  self,
700
712
  input: str | list[OpenAIResponseInput],
@@ -719,7 +731,7 @@ class Agents(Protocol):
719
731
  """
720
732
  ...
721
733
 
722
- @webmethod(route="/openai/v1/responses", method="GET")
734
+ @webmethod(route="/openai/v1/responses", method="GET", level=LLAMA_STACK_API_V1)
723
735
  async def list_openai_responses(
724
736
  self,
725
737
  after: str | None = None,
@@ -737,7 +749,7 @@ class Agents(Protocol):
737
749
  """
738
750
  ...
739
751
 
740
- @webmethod(route="/openai/v1/responses/{response_id}/input_items", method="GET")
752
+ @webmethod(route="/openai/v1/responses/{response_id}/input_items", method="GET", level=LLAMA_STACK_API_V1)
741
753
  async def list_openai_response_input_items(
742
754
  self,
743
755
  response_id: str,
@@ -759,7 +771,7 @@ class Agents(Protocol):
759
771
  """
760
772
  ...
761
773
 
762
- @webmethod(route="/openai/v1/responses/{response_id}", method="DELETE")
774
+ @webmethod(route="/openai/v1/responses/{response_id}", method="DELETE", level=LLAMA_STACK_API_V1)
763
775
  async def delete_openai_response(self, response_id: str) -> OpenAIDeleteResponseObject:
764
776
  """Delete an OpenAI response by its ID.
765
777
 
@@ -17,6 +17,7 @@ from llama_stack.apis.inference import (
17
17
  ToolDefinition,
18
18
  ToolPromptFormat,
19
19
  )
20
+ from llama_stack.apis.version import LLAMA_STACK_API_V1
20
21
  from llama_stack.schema_utils import webmethod
21
22
 
22
23
 
@@ -30,7 +31,7 @@ class BatchInference(Protocol):
30
31
  including (post-training, evals, etc).
31
32
  """
32
33
 
33
- @webmethod(route="/batch-inference/completion", method="POST")
34
+ @webmethod(route="/batch-inference/completion", method="POST", level=LLAMA_STACK_API_V1)
34
35
  async def completion(
35
36
  self,
36
37
  model: str,
@@ -50,7 +51,7 @@ class BatchInference(Protocol):
50
51
  """
51
52
  ...
52
53
 
53
- @webmethod(route="/batch-inference/chat-completion", method="POST")
54
+ @webmethod(route="/batch-inference/chat-completion", method="POST", level=LLAMA_STACK_API_V1)
54
55
  async def chat_completion(
55
56
  self,
56
57
  model: str,
@@ -8,6 +8,7 @@ from typing import Literal, Protocol, runtime_checkable
8
8
 
9
9
  from pydantic import BaseModel, Field
10
10
 
11
+ from llama_stack.apis.version import LLAMA_STACK_API_V1
11
12
  from llama_stack.schema_utils import json_schema_type, webmethod
12
13
 
13
14
  try:
@@ -42,7 +43,7 @@ class Batches(Protocol):
42
43
  Note: This API is currently under active development and may undergo changes.
43
44
  """
44
45
 
45
- @webmethod(route="/openai/v1/batches", method="POST")
46
+ @webmethod(route="/openai/v1/batches", method="POST", level=LLAMA_STACK_API_V1)
46
47
  async def create_batch(
47
48
  self,
48
49
  input_file_id: str,
@@ -62,7 +63,7 @@ class Batches(Protocol):
62
63
  """
63
64
  ...
64
65
 
65
- @webmethod(route="/openai/v1/batches/{batch_id}", method="GET")
66
+ @webmethod(route="/openai/v1/batches/{batch_id}", method="GET", level=LLAMA_STACK_API_V1)
66
67
  async def retrieve_batch(self, batch_id: str) -> BatchObject:
67
68
  """Retrieve information about a specific batch.
68
69
 
@@ -71,7 +72,7 @@ class Batches(Protocol):
71
72
  """
72
73
  ...
73
74
 
74
- @webmethod(route="/openai/v1/batches/{batch_id}/cancel", method="POST")
75
+ @webmethod(route="/openai/v1/batches/{batch_id}/cancel", method="POST", level=LLAMA_STACK_API_V1)
75
76
  async def cancel_batch(self, batch_id: str) -> BatchObject:
76
77
  """Cancel a batch that is in progress.
77
78
 
@@ -80,7 +81,7 @@ class Batches(Protocol):
80
81
  """
81
82
  ...
82
83
 
83
- @webmethod(route="/openai/v1/batches", method="GET")
84
+ @webmethod(route="/openai/v1/batches", method="GET", level=LLAMA_STACK_API_V1)
84
85
  async def list_batches(
85
86
  self,
86
87
  after: str | None = None,
@@ -8,6 +8,7 @@ from typing import Any, Literal, Protocol, runtime_checkable
8
8
  from pydantic import BaseModel, Field
9
9
 
10
10
  from llama_stack.apis.resource import Resource, ResourceType
11
+ from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
11
12
  from llama_stack.schema_utils import json_schema_type, webmethod
12
13
 
13
14
 
@@ -53,7 +54,8 @@ class ListBenchmarksResponse(BaseModel):
53
54
 
54
55
  @runtime_checkable
55
56
  class Benchmarks(Protocol):
56
- @webmethod(route="/eval/benchmarks", method="GET")
57
+ @webmethod(route="/eval/benchmarks", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
58
+ @webmethod(route="/eval/benchmarks", method="GET", level=LLAMA_STACK_API_V1ALPHA)
57
59
  async def list_benchmarks(self) -> ListBenchmarksResponse:
58
60
  """List all benchmarks.
59
61
 
@@ -61,7 +63,8 @@ class Benchmarks(Protocol):
61
63
  """
62
64
  ...
63
65
 
64
- @webmethod(route="/eval/benchmarks/{benchmark_id}", method="GET")
66
+ @webmethod(route="/eval/benchmarks/{benchmark_id}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
67
+ @webmethod(route="/eval/benchmarks/{benchmark_id}", method="GET", level=LLAMA_STACK_API_V1ALPHA)
65
68
  async def get_benchmark(
66
69
  self,
67
70
  benchmark_id: str,
@@ -73,7 +76,8 @@ class Benchmarks(Protocol):
73
76
  """
74
77
  ...
75
78
 
76
- @webmethod(route="/eval/benchmarks", method="POST")
79
+ @webmethod(route="/eval/benchmarks", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
80
+ @webmethod(route="/eval/benchmarks", method="POST", level=LLAMA_STACK_API_V1ALPHA)
77
81
  async def register_benchmark(
78
82
  self,
79
83
  benchmark_id: str,
@@ -93,3 +97,12 @@ class Benchmarks(Protocol):
93
97
  :param metadata: The metadata to use for the benchmark.
94
98
  """
95
99
  ...
100
+
101
+ @webmethod(route="/eval/benchmarks/{benchmark_id}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True)
102
+ @webmethod(route="/eval/benchmarks/{benchmark_id}", method="DELETE", level=LLAMA_STACK_API_V1ALPHA)
103
+ async def unregister_benchmark(self, benchmark_id: str) -> None:
104
+ """Unregister a benchmark.
105
+
106
+ :param benchmark_id: The ID of the benchmark to unregister.
107
+ """
108
+ ...
@@ -8,6 +8,7 @@ from typing import Any, Protocol, runtime_checkable
8
8
 
9
9
  from llama_stack.apis.common.responses import PaginatedResponse
10
10
  from llama_stack.apis.datasets import Dataset
11
+ from llama_stack.apis.version import LLAMA_STACK_API_V1
11
12
  from llama_stack.schema_utils import webmethod
12
13
 
13
14
 
@@ -20,7 +21,7 @@ class DatasetIO(Protocol):
20
21
  # keeping for aligning with inference/safety, but this is not used
21
22
  dataset_store: DatasetStore
22
23
 
23
- @webmethod(route="/datasetio/iterrows/{dataset_id:path}", method="GET")
24
+ @webmethod(route="/datasetio/iterrows/{dataset_id:path}", method="GET", level=LLAMA_STACK_API_V1)
24
25
  async def iterrows(
25
26
  self,
26
27
  dataset_id: str,
@@ -44,7 +45,7 @@ class DatasetIO(Protocol):
44
45
  """
45
46
  ...
46
47
 
47
- @webmethod(route="/datasetio/append-rows/{dataset_id:path}", method="POST")
48
+ @webmethod(route="/datasetio/append-rows/{dataset_id:path}", method="POST", level=LLAMA_STACK_API_V1)
48
49
  async def append_rows(self, dataset_id: str, rows: list[dict[str, Any]]) -> None:
49
50
  """Append rows to a dataset.
50
51
 
@@ -10,6 +10,7 @@ from typing import Annotated, Any, Literal, Protocol
10
10
  from pydantic import BaseModel, Field
11
11
 
12
12
  from llama_stack.apis.resource import Resource, ResourceType
13
+ from llama_stack.apis.version import LLAMA_STACK_API_V1
13
14
  from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
14
15
 
15
16
 
@@ -145,7 +146,7 @@ class ListDatasetsResponse(BaseModel):
145
146
 
146
147
 
147
148
  class Datasets(Protocol):
148
- @webmethod(route="/datasets", method="POST")
149
+ @webmethod(route="/datasets", method="POST", level=LLAMA_STACK_API_V1)
149
150
  async def register_dataset(
150
151
  self,
151
152
  purpose: DatasetPurpose,
@@ -214,7 +215,7 @@ class Datasets(Protocol):
214
215
  """
215
216
  ...
216
217
 
217
- @webmethod(route="/datasets/{dataset_id:path}", method="GET")
218
+ @webmethod(route="/datasets/{dataset_id:path}", method="GET", level=LLAMA_STACK_API_V1)
218
219
  async def get_dataset(
219
220
  self,
220
221
  dataset_id: str,
@@ -226,7 +227,7 @@ class Datasets(Protocol):
226
227
  """
227
228
  ...
228
229
 
229
- @webmethod(route="/datasets", method="GET")
230
+ @webmethod(route="/datasets", method="GET", level=LLAMA_STACK_API_V1)
230
231
  async def list_datasets(self) -> ListDatasetsResponse:
231
232
  """List all datasets.
232
233
 
@@ -234,7 +235,7 @@ class Datasets(Protocol):
234
235
  """
235
236
  ...
236
237
 
237
- @webmethod(route="/datasets/{dataset_id:path}", method="DELETE")
238
+ @webmethod(route="/datasets/{dataset_id:path}", method="DELETE", level=LLAMA_STACK_API_V1)
238
239
  async def unregister_dataset(
239
240
  self,
240
241
  dataset_id: str,
@@ -13,6 +13,7 @@ from llama_stack.apis.common.job_types import Job
13
13
  from llama_stack.apis.inference import SamplingParams, SystemMessage
14
14
  from llama_stack.apis.scoring import ScoringResult
15
15
  from llama_stack.apis.scoring_functions import ScoringFnParams
16
+ from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
16
17
  from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
17
18
 
18
19
 
@@ -83,7 +84,8 @@ class EvaluateResponse(BaseModel):
83
84
  class Eval(Protocol):
84
85
  """Llama Stack Evaluation API for running evaluations on model and agent candidates."""
85
86
 
86
- @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST")
87
+ @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
88
+ @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST", level=LLAMA_STACK_API_V1ALPHA)
87
89
  async def run_eval(
88
90
  self,
89
91
  benchmark_id: str,
@@ -97,7 +99,10 @@ class Eval(Protocol):
97
99
  """
98
100
  ...
99
101
 
100
- @webmethod(route="/eval/benchmarks/{benchmark_id}/evaluations", method="POST")
102
+ @webmethod(
103
+ route="/eval/benchmarks/{benchmark_id}/evaluations", method="POST", level=LLAMA_STACK_API_V1, deprecated=True
104
+ )
105
+ @webmethod(route="/eval/benchmarks/{benchmark_id}/evaluations", method="POST", level=LLAMA_STACK_API_V1ALPHA)
101
106
  async def evaluate_rows(
102
107
  self,
103
108
  benchmark_id: str,
@@ -115,7 +120,10 @@ class Eval(Protocol):
115
120
  """
116
121
  ...
117
122
 
118
- @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="GET")
123
+ @webmethod(
124
+ route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True
125
+ )
126
+ @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="GET", level=LLAMA_STACK_API_V1ALPHA)
119
127
  async def job_status(self, benchmark_id: str, job_id: str) -> Job:
120
128
  """Get the status of a job.
121
129
 
@@ -125,7 +133,13 @@ class Eval(Protocol):
125
133
  """
126
134
  ...
127
135
 
128
- @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="DELETE")
136
+ @webmethod(
137
+ route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}",
138
+ method="DELETE",
139
+ level=LLAMA_STACK_API_V1,
140
+ deprecated=True,
141
+ )
142
+ @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="DELETE", level=LLAMA_STACK_API_V1ALPHA)
129
143
  async def job_cancel(self, benchmark_id: str, job_id: str) -> None:
130
144
  """Cancel a job.
131
145
 
@@ -134,7 +148,15 @@ class Eval(Protocol):
134
148
  """
135
149
  ...
136
150
 
137
- @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result", method="GET")
151
+ @webmethod(
152
+ route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result",
153
+ method="GET",
154
+ level=LLAMA_STACK_API_V1,
155
+ deprecated=True,
156
+ )
157
+ @webmethod(
158
+ route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result", method="GET", level=LLAMA_STACK_API_V1ALPHA
159
+ )
138
160
  async def job_result(self, benchmark_id: str, job_id: str) -> EvaluateResponse:
139
161
  """Get the result of a job.
140
162
 
@@ -11,6 +11,7 @@ from fastapi import File, Form, Response, UploadFile
11
11
  from pydantic import BaseModel, Field
12
12
 
13
13
  from llama_stack.apis.common.responses import Order
14
+ from llama_stack.apis.version import LLAMA_STACK_API_V1
14
15
  from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
15
16
  from llama_stack.schema_utils import json_schema_type, webmethod
16
17
 
@@ -104,7 +105,7 @@ class OpenAIFileDeleteResponse(BaseModel):
104
105
  @trace_protocol
105
106
  class Files(Protocol):
106
107
  # OpenAI Files API Endpoints
107
- @webmethod(route="/openai/v1/files", method="POST")
108
+ @webmethod(route="/openai/v1/files", method="POST", level=LLAMA_STACK_API_V1)
108
109
  async def openai_upload_file(
109
110
  self,
110
111
  file: Annotated[UploadFile, File()],
@@ -119,7 +120,7 @@ class Files(Protocol):
119
120
  The file upload should be a multipart form request with:
120
121
  - file: The File object (not file name) to be uploaded.
121
122
  - purpose: The intended purpose of the uploaded file.
122
- - expires_after: Optional form values describing expiration for the file. Expected expires_after[anchor] = "created_at", expires_after[seconds] = <int>. Seconds must be between 3600 and 2592000 (1 hour to 30 days).
123
+ - expires_after: Optional form values describing expiration for the file. Expected expires_after[anchor] = "created_at", expires_after[seconds] = {integer}. Seconds must be between 3600 and 2592000 (1 hour to 30 days).
123
124
 
124
125
  :param file: The uploaded file object containing content and metadata (filename, content_type, etc.).
125
126
  :param purpose: The intended purpose of the uploaded file (e.g., "assistants", "fine-tune").
@@ -127,7 +128,7 @@ class Files(Protocol):
127
128
  """
128
129
  ...
129
130
 
130
- @webmethod(route="/openai/v1/files", method="GET")
131
+ @webmethod(route="/openai/v1/files", method="GET", level=LLAMA_STACK_API_V1)
131
132
  async def openai_list_files(
132
133
  self,
133
134
  after: str | None = None,
@@ -146,7 +147,7 @@ class Files(Protocol):
146
147
  """
147
148
  ...
148
149
 
149
- @webmethod(route="/openai/v1/files/{file_id}", method="GET")
150
+ @webmethod(route="/openai/v1/files/{file_id}", method="GET", level=LLAMA_STACK_API_V1)
150
151
  async def openai_retrieve_file(
151
152
  self,
152
153
  file_id: str,
@@ -159,7 +160,7 @@ class Files(Protocol):
159
160
  """
160
161
  ...
161
162
 
162
- @webmethod(route="/openai/v1/files/{file_id}", method="DELETE")
163
+ @webmethod(route="/openai/v1/files/{file_id}", method="DELETE", level=LLAMA_STACK_API_V1)
163
164
  async def openai_delete_file(
164
165
  self,
165
166
  file_id: str,
@@ -172,7 +173,7 @@ class Files(Protocol):
172
173
  """
173
174
  ...
174
175
 
175
- @webmethod(route="/openai/v1/files/{file_id}/content", method="GET")
176
+ @webmethod(route="/openai/v1/files/{file_id}/content", method="GET", level=LLAMA_STACK_API_V1)
176
177
  async def openai_retrieve_file_content(
177
178
  self,
178
179
  file_id: str,
@@ -21,6 +21,7 @@ from llama_stack.apis.common.content_types import ContentDelta, InterleavedConte
21
21
  from llama_stack.apis.common.responses import Order
22
22
  from llama_stack.apis.models import Model
23
23
  from llama_stack.apis.telemetry import MetricResponseMixin
24
+ from llama_stack.apis.version import LLAMA_STACK_API_V1
24
25
  from llama_stack.models.llama.datatypes import (
25
26
  BuiltinTool,
26
27
  StopReason,
@@ -913,6 +914,7 @@ class OpenAIEmbeddingData(BaseModel):
913
914
  """
914
915
 
915
916
  object: Literal["embedding"] = "embedding"
917
+ # TODO: consider dropping str and using openai.types.embeddings.Embedding instead of OpenAIEmbeddingData
916
918
  embedding: list[float] | str
917
919
  index: int
918
920
 
@@ -1026,7 +1028,7 @@ class InferenceProvider(Protocol):
1026
1028
 
1027
1029
  model_store: ModelStore | None = None
1028
1030
 
1029
- @webmethod(route="/inference/completion", method="POST")
1031
+ @webmethod(route="/inference/completion", method="POST", level=LLAMA_STACK_API_V1)
1030
1032
  async def completion(
1031
1033
  self,
1032
1034
  model_id: str,
@@ -1049,7 +1051,7 @@ class InferenceProvider(Protocol):
1049
1051
  """
1050
1052
  ...
1051
1053
 
1052
- @webmethod(route="/inference/batch-completion", method="POST", experimental=True)
1054
+ @webmethod(route="/inference/batch-completion", method="POST", experimental=True, level=LLAMA_STACK_API_V1)
1053
1055
  async def batch_completion(
1054
1056
  self,
1055
1057
  model_id: str,
@@ -1070,7 +1072,7 @@ class InferenceProvider(Protocol):
1070
1072
  raise NotImplementedError("Batch completion is not implemented")
1071
1073
  return # this is so mypy's safe-super rule will consider the method concrete
1072
1074
 
1073
- @webmethod(route="/inference/chat-completion", method="POST")
1075
+ @webmethod(route="/inference/chat-completion", method="POST", level=LLAMA_STACK_API_V1)
1074
1076
  async def chat_completion(
1075
1077
  self,
1076
1078
  model_id: str,
@@ -1110,7 +1112,7 @@ class InferenceProvider(Protocol):
1110
1112
  """
1111
1113
  ...
1112
1114
 
1113
- @webmethod(route="/inference/batch-chat-completion", method="POST", experimental=True)
1115
+ @webmethod(route="/inference/batch-chat-completion", method="POST", experimental=True, level=LLAMA_STACK_API_V1)
1114
1116
  async def batch_chat_completion(
1115
1117
  self,
1116
1118
  model_id: str,
@@ -1135,7 +1137,7 @@ class InferenceProvider(Protocol):
1135
1137
  raise NotImplementedError("Batch chat completion is not implemented")
1136
1138
  return # this is so mypy's safe-super rule will consider the method concrete
1137
1139
 
1138
- @webmethod(route="/inference/embeddings", method="POST")
1140
+ @webmethod(route="/inference/embeddings", method="POST", level=LLAMA_STACK_API_V1)
1139
1141
  async def embeddings(
1140
1142
  self,
1141
1143
  model_id: str,
@@ -1155,7 +1157,7 @@ class InferenceProvider(Protocol):
1155
1157
  """
1156
1158
  ...
1157
1159
 
1158
- @webmethod(route="/inference/rerank", method="POST", experimental=True)
1160
+ @webmethod(route="/inference/rerank", method="POST", experimental=True, level=LLAMA_STACK_API_V1)
1159
1161
  async def rerank(
1160
1162
  self,
1161
1163
  model: str,
@@ -1174,7 +1176,7 @@ class InferenceProvider(Protocol):
1174
1176
  raise NotImplementedError("Reranking is not implemented")
1175
1177
  return # this is so mypy's safe-super rule will consider the method concrete
1176
1178
 
1177
- @webmethod(route="/openai/v1/completions", method="POST")
1179
+ @webmethod(route="/openai/v1/completions", method="POST", level=LLAMA_STACK_API_V1)
1178
1180
  async def openai_completion(
1179
1181
  self,
1180
1182
  # Standard OpenAI completion parameters
@@ -1225,7 +1227,7 @@ class InferenceProvider(Protocol):
1225
1227
  """
1226
1228
  ...
1227
1229
 
1228
- @webmethod(route="/openai/v1/chat/completions", method="POST")
1230
+ @webmethod(route="/openai/v1/chat/completions", method="POST", level=LLAMA_STACK_API_V1)
1229
1231
  async def openai_chat_completion(
1230
1232
  self,
1231
1233
  model: str,
@@ -1281,7 +1283,7 @@ class InferenceProvider(Protocol):
1281
1283
  """
1282
1284
  ...
1283
1285
 
1284
- @webmethod(route="/openai/v1/embeddings", method="POST")
1286
+ @webmethod(route="/openai/v1/embeddings", method="POST", level=LLAMA_STACK_API_V1)
1285
1287
  async def openai_embeddings(
1286
1288
  self,
1287
1289
  model: str,
@@ -1310,7 +1312,7 @@ class Inference(InferenceProvider):
1310
1312
  - Embedding models: these models generate embeddings to be used for semantic search.
1311
1313
  """
1312
1314
 
1313
- @webmethod(route="/openai/v1/chat/completions", method="GET")
1315
+ @webmethod(route="/openai/v1/chat/completions", method="GET", level=LLAMA_STACK_API_V1)
1314
1316
  async def list_chat_completions(
1315
1317
  self,
1316
1318
  after: str | None = None,
@@ -1328,7 +1330,7 @@ class Inference(InferenceProvider):
1328
1330
  """
1329
1331
  raise NotImplementedError("List chat completions is not implemented")
1330
1332
 
1331
- @webmethod(route="/openai/v1/chat/completions/{completion_id}", method="GET")
1333
+ @webmethod(route="/openai/v1/chat/completions/{completion_id}", method="GET", level=LLAMA_STACK_API_V1)
1332
1334
  async def get_chat_completion(self, completion_id: str) -> OpenAICompletionWithInputMessages:
1333
1335
  """Describe a chat completion by its ID.
1334
1336
 
@@ -8,6 +8,7 @@ from typing import Protocol, runtime_checkable
8
8
 
9
9
  from pydantic import BaseModel
10
10
 
11
+ from llama_stack.apis.version import LLAMA_STACK_API_V1
11
12
  from llama_stack.providers.datatypes import HealthStatus
12
13
  from llama_stack.schema_utils import json_schema_type, webmethod
13
14
 
@@ -57,7 +58,7 @@ class ListRoutesResponse(BaseModel):
57
58
 
58
59
  @runtime_checkable
59
60
  class Inspect(Protocol):
60
- @webmethod(route="/inspect/routes", method="GET")
61
+ @webmethod(route="/inspect/routes", method="GET", level=LLAMA_STACK_API_V1)
61
62
  async def list_routes(self) -> ListRoutesResponse:
62
63
  """List all available API routes with their methods and implementing providers.
63
64
 
@@ -65,7 +66,7 @@ class Inspect(Protocol):
65
66
  """
66
67
  ...
67
68
 
68
- @webmethod(route="/health", method="GET")
69
+ @webmethod(route="/health", method="GET", level=LLAMA_STACK_API_V1)
69
70
  async def health(self) -> HealthInfo:
70
71
  """Get the current health status of the service.
71
72
 
@@ -73,7 +74,7 @@ class Inspect(Protocol):
73
74
  """
74
75
  ...
75
76
 
76
- @webmethod(route="/version", method="GET")
77
+ @webmethod(route="/version", method="GET", level=LLAMA_STACK_API_V1)
77
78
  async def version(self) -> VersionInfo:
78
79
  """Get the version of the service.
79
80