PyPI - llama-stack-client - Versions diffs - 0.0.1a0__py3-none-any.whl - Mend

llama-stack-client 0.0.1a0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (168) hide show

llama_stack/__init__.py +95 -0
llama_stack/_base_client.py +2019 -0
llama_stack/_client.py +518 -0
llama_stack/_compat.py +217 -0
llama_stack/_constants.py +14 -0
llama_stack/_exceptions.py +108 -0
llama_stack/_files.py +123 -0
llama_stack/_models.py +785 -0
llama_stack/_qs.py +150 -0
llama_stack/_resource.py +43 -0
llama_stack/_response.py +823 -0
llama_stack/_streaming.py +333 -0
llama_stack/_types.py +217 -0
llama_stack/_utils/__init__.py +55 -0
llama_stack/_utils/_logs.py +25 -0
llama_stack/_utils/_proxy.py +62 -0
llama_stack/_utils/_reflection.py +42 -0
llama_stack/_utils/_streams.py +12 -0
llama_stack/_utils/_sync.py +81 -0
llama_stack/_utils/_transform.py +382 -0
llama_stack/_utils/_typing.py +120 -0
llama_stack/_utils/_utils.py +397 -0
llama_stack/_version.py +4 -0
llama_stack/lib/.keep +4 -0
llama_stack/py.typed +0 -0
llama_stack/resources/__init__.py +173 -0
llama_stack/resources/agentic_system/__init__.py +61 -0
llama_stack/resources/agentic_system/agentic_system.py +338 -0
llama_stack/resources/agentic_system/sessions.py +363 -0
llama_stack/resources/agentic_system/steps.py +186 -0
llama_stack/resources/agentic_system/turns.py +443 -0
llama_stack/resources/batch_inference.py +315 -0
llama_stack/resources/datasets.py +331 -0
llama_stack/resources/evaluate/__init__.py +47 -0
llama_stack/resources/evaluate/evaluate.py +135 -0
llama_stack/resources/evaluate/jobs/__init__.py +61 -0
llama_stack/resources/evaluate/jobs/artifacts.py +168 -0
llama_stack/resources/evaluate/jobs/jobs.py +310 -0
llama_stack/resources/evaluate/jobs/logs.py +168 -0
llama_stack/resources/evaluate/jobs/status.py +168 -0
llama_stack/resources/evaluate/question_answering.py +167 -0
llama_stack/resources/evaluations.py +243 -0
llama_stack/resources/inference/__init__.py +33 -0
llama_stack/resources/inference/embeddings.py +178 -0
llama_stack/resources/inference/inference.py +591 -0
llama_stack/resources/memory_banks/__init__.py +33 -0
llama_stack/resources/memory_banks/documents.py +268 -0
llama_stack/resources/memory_banks/memory_banks.py +675 -0
llama_stack/resources/post_training/__init__.py +33 -0
llama_stack/resources/post_training/jobs.py +451 -0
llama_stack/resources/post_training/post_training.py +365 -0
llama_stack/resources/reward_scoring.py +178 -0
llama_stack/resources/safety.py +179 -0
llama_stack/resources/synthetic_data_generation.py +183 -0
llama_stack/resources/telemetry.py +244 -0
llama_stack/types/__init__.py +75 -0
llama_stack/types/agentic_system/__init__.py +16 -0
llama_stack/types/agentic_system/agentic_system_step.py +18 -0
llama_stack/types/agentic_system/agentic_system_turn_stream_chunk.py +12 -0
llama_stack/types/agentic_system/session.py +21 -0
llama_stack/types/agentic_system/session_create_params.py +13 -0
llama_stack/types/agentic_system/session_create_response.py +11 -0
llama_stack/types/agentic_system/session_delete_params.py +13 -0
llama_stack/types/agentic_system/session_retrieve_params.py +16 -0
llama_stack/types/agentic_system/step_retrieve_params.py +15 -0
llama_stack/types/agentic_system/turn.py +39 -0
llama_stack/types/agentic_system/turn_create_params.py +36 -0
llama_stack/types/agentic_system/turn_retrieve_params.py +13 -0
llama_stack/types/agentic_system/turn_stream_event.py +98 -0
llama_stack/types/agentic_system_create_params.py +191 -0
llama_stack/types/agentic_system_create_response.py +11 -0
llama_stack/types/agentic_system_delete_params.py +11 -0
llama_stack/types/batch_chat_completion.py +12 -0
llama_stack/types/batch_inference_chat_completion_params.py +57 -0
llama_stack/types/batch_inference_completion_params.py +24 -0
llama_stack/types/chat_completion_stream_chunk.py +41 -0
llama_stack/types/completion_stream_chunk.py +17 -0
llama_stack/types/custom_query_generator_config_param.py +11 -0
llama_stack/types/dataset_create_params.py +15 -0
llama_stack/types/dataset_delete_params.py +11 -0
llama_stack/types/dataset_get_params.py +11 -0
llama_stack/types/default_query_generator_config_param.py +13 -0
llama_stack/types/evaluate/__init__.py +9 -0
llama_stack/types/evaluate/evaluation_job_artifacts.py +11 -0
llama_stack/types/evaluate/evaluation_job_log_stream.py +11 -0
llama_stack/types/evaluate/evaluation_job_status.py +11 -0
llama_stack/types/evaluate/job_cancel_params.py +11 -0
llama_stack/types/evaluate/jobs/__init__.py +7 -0
llama_stack/types/evaluate/jobs/artifact_list_params.py +11 -0
llama_stack/types/evaluate/jobs/log_list_params.py +11 -0
llama_stack/types/evaluate/jobs/status_list_params.py +11 -0
llama_stack/types/evaluate/question_answering_create_params.py +12 -0
llama_stack/types/evaluation_job.py +11 -0
llama_stack/types/evaluation_summarization_params.py +12 -0
llama_stack/types/evaluation_text_generation_params.py +12 -0
llama_stack/types/inference/__init__.py +6 -0
llama_stack/types/inference/embedding_create_params.py +14 -0
llama_stack/types/inference/embeddings.py +11 -0
llama_stack/types/inference_chat_completion_params.py +75 -0
llama_stack/types/inference_chat_completion_response.py +20 -0
llama_stack/types/inference_completion_params.py +26 -0
llama_stack/types/inference_completion_response.py +20 -0
llama_stack/types/inference_step.py +26 -0
llama_stack/types/llm_query_generator_config_param.py +15 -0
llama_stack/types/memory_bank_create_params.py +11 -0
llama_stack/types/memory_bank_drop_params.py +11 -0
llama_stack/types/memory_bank_drop_response.py +7 -0
llama_stack/types/memory_bank_insert_params.py +26 -0
llama_stack/types/memory_bank_query_params.py +16 -0
llama_stack/types/memory_bank_retrieve_params.py +11 -0
llama_stack/types/memory_bank_update_params.py +24 -0
llama_stack/types/memory_banks/__init__.py +7 -0
llama_stack/types/memory_banks/document_delete_params.py +14 -0
llama_stack/types/memory_banks/document_retrieve_params.py +14 -0
llama_stack/types/memory_banks/document_retrieve_response.py +17 -0
llama_stack/types/memory_retrieval_step.py +25 -0
llama_stack/types/post_training/__init__.py +11 -0
llama_stack/types/post_training/job_artifacts_params.py +11 -0
llama_stack/types/post_training/job_cancel_params.py +11 -0
llama_stack/types/post_training/job_logs_params.py +11 -0
llama_stack/types/post_training/job_status_params.py +11 -0
llama_stack/types/post_training/post_training_job_artifacts.py +13 -0
llama_stack/types/post_training/post_training_job_log_stream.py +13 -0
llama_stack/types/post_training/post_training_job_status.py +25 -0
llama_stack/types/post_training_job.py +11 -0
llama_stack/types/post_training_preference_optimize_params.py +68 -0
llama_stack/types/post_training_supervised_fine_tune_params.py +107 -0
llama_stack/types/query_documents.py +21 -0
llama_stack/types/rest_api_execution_config_param.py +20 -0
llama_stack/types/reward_scoring.py +12 -0
llama_stack/types/reward_scoring_score_params.py +35 -0
llama_stack/types/safety_run_shields_params.py +23 -0
llama_stack/types/safety_run_shields_response.py +12 -0
llama_stack/types/scored_dialog_generations.py +28 -0
llama_stack/types/shared/__init__.py +10 -0
llama_stack/types/shared/attachment.py +13 -0
llama_stack/types/shared/batch_completion.py +12 -0
llama_stack/types/shared/completion_message.py +19 -0
llama_stack/types/shared/sampling_params.py +22 -0
llama_stack/types/shared/system_message.py +14 -0
llama_stack/types/shared/tool_call.py +19 -0
llama_stack/types/shared/tool_response_message.py +18 -0
llama_stack/types/shared/user_message.py +16 -0
llama_stack/types/shared_params/__init__.py +9 -0
llama_stack/types/shared_params/attachment.py +14 -0
llama_stack/types/shared_params/completion_message.py +20 -0
llama_stack/types/shared_params/sampling_params.py +21 -0
llama_stack/types/shared_params/system_message.py +14 -0
llama_stack/types/shared_params/tool_call.py +23 -0
llama_stack/types/shared_params/tool_response_message.py +18 -0
llama_stack/types/shared_params/user_message.py +16 -0
llama_stack/types/sheid_response.py +20 -0
llama_stack/types/shield_call_step.py +24 -0
llama_stack/types/shield_definition_param.py +28 -0
llama_stack/types/synthetic_data_generation.py +14 -0
llama_stack/types/synthetic_data_generation_generate_params.py +24 -0
llama_stack/types/telemetry_get_trace_params.py +11 -0
llama_stack/types/telemetry_get_trace_response.py +18 -0
llama_stack/types/telemetry_log_params.py +94 -0
llama_stack/types/token_log_probs.py +11 -0
llama_stack/types/tool_execution_step.py +34 -0
llama_stack/types/tool_param_definition_param.py +15 -0
llama_stack/types/train_eval_dataset.py +16 -0
llama_stack/types/train_eval_dataset_param.py +16 -0
llama_stack_client-0.0.1a0.dist-info/METADATA +365 -0
llama_stack_client-0.0.1a0.dist-info/RECORD +168 -0
llama_stack_client-0.0.1a0.dist-info/WHEEL +4 -0
llama_stack_client-0.0.1a0.dist-info/licenses/LICENSE +201 -0

llama_stack/_client.py ADDED Viewed

@@ -0,0 +1,518 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+from __future__ import annotations
+import os
+from typing import Any, Dict, Union, Mapping, cast
+from typing_extensions import Self, Literal, override
+import httpx
+from . import resources, _exceptions
+from ._qs import Querystring
+from ._types import (
+    NOT_GIVEN,
+    Omit,
+    Timeout,
+    NotGiven,
+    Transport,
+    ProxiesTypes,
+    RequestOptions,
+)
+from ._utils import (
+    is_given,
+    get_async_library,
+)
+from ._version import __version__
+from ._streaming import Stream as Stream, AsyncStream as AsyncStream
+from ._exceptions import APIStatusError
+from ._base_client import (
+    DEFAULT_MAX_RETRIES,
+    SyncAPIClient,
+    AsyncAPIClient,
+)
+__all__ = [
+    "ENVIRONMENTS",
+    "Timeout",
+    "Transport",
+    "ProxiesTypes",
+    "RequestOptions",
+    "resources",
+    "LlamaStack",
+    "AsyncLlamaStack",
+    "Client",
+    "AsyncClient",
+]
+ENVIRONMENTS: Dict[str, str] = {
+    "production": "http://any-hosted-llama-stack.com",
+    "sandbox": "https://example.com",
+}
+class LlamaStack(SyncAPIClient):
+    telemetry: resources.TelemetryResource
+    agentic_system: resources.AgenticSystemResource
+    datasets: resources.DatasetsResource
+    evaluate: resources.EvaluateResource
+    evaluations: resources.EvaluationsResource
+    inference: resources.InferenceResource
+    safety: resources.SafetyResource
+    memory_banks: resources.MemoryBanksResource
+    post_training: resources.PostTrainingResource
+    reward_scoring: resources.RewardScoringResource
+    synthetic_data_generation: resources.SyntheticDataGenerationResource
+    batch_inference: resources.BatchInferenceResource
+    with_raw_response: LlamaStackWithRawResponse
+    with_streaming_response: LlamaStackWithStreamedResponse
+    # client options
+    _environment: Literal["production", "sandbox"] | NotGiven
+    def __init__(
+        self,
+        *,
+        environment: Literal["production", "sandbox"] | NotGiven = NOT_GIVEN,
+        base_url: str | httpx.URL | None | NotGiven = NOT_GIVEN,
+        timeout: Union[float, Timeout, None, NotGiven] = NOT_GIVEN,
+        max_retries: int = DEFAULT_MAX_RETRIES,
+        default_headers: Mapping[str, str] | None = None,
+        default_query: Mapping[str, object] | None = None,
+        # Configure a custom httpx client.
+        # We provide a `DefaultHttpxClient` class that you can pass to retain the default values we use for `limits`, `timeout` & `follow_redirects`.
+        # See the [httpx documentation](https://www.python-httpx.org/api/#client) for more details.
+        http_client: httpx.Client | None = None,
+        # Enable or disable schema validation for data returned by the API.
+        # When enabled an error APIResponseValidationError is raised
+        # if the API responds with invalid data for the expected schema.
+        #
+        # This parameter may be removed or changed in the future.
+        # If you rely on this feature, please open a GitHub issue
+        # outlining your use-case to help us decide if it should be
+        # part of our public interface in the future.
+        _strict_response_validation: bool = False,
+    ) -> None:
+        """Construct a new synchronous llama-stack client instance."""
+        self._environment = environment
+        base_url_env = os.environ.get("LLAMA_STACK_BASE_URL")
+        if is_given(base_url) and base_url is not None:
+            # cast required because mypy doesn't understand the type narrowing
+            base_url = cast("str | httpx.URL", base_url)  # pyright: ignore[reportUnnecessaryCast]
+        elif is_given(environment):
+            if base_url_env and base_url is not None:
+                raise ValueError(
+                    "Ambiguous URL; The `LLAMA_STACK_BASE_URL` env var and the `environment` argument are given. If you want to use the environment, you must pass base_url=None",
+                )
+            try:
+                base_url = ENVIRONMENTS[environment]
+            except KeyError as exc:
+                raise ValueError(f"Unknown environment: {environment}") from exc
+        elif base_url_env is not None:
+            base_url = base_url_env
+        else:
+            self._environment = environment = "production"
+            try:
+                base_url = ENVIRONMENTS[environment]
+            except KeyError as exc:
+                raise ValueError(f"Unknown environment: {environment}") from exc
+        super().__init__(
+            version=__version__,
+            base_url=base_url,
+            max_retries=max_retries,
+            timeout=timeout,
+            http_client=http_client,
+            custom_headers=default_headers,
+            custom_query=default_query,
+            _strict_response_validation=_strict_response_validation,
+        )
+        self.telemetry = resources.TelemetryResource(self)
+        self.agentic_system = resources.AgenticSystemResource(self)
+        self.datasets = resources.DatasetsResource(self)
+        self.evaluate = resources.EvaluateResource(self)
+        self.evaluations = resources.EvaluationsResource(self)
+        self.inference = resources.InferenceResource(self)
+        self.safety = resources.SafetyResource(self)
+        self.memory_banks = resources.MemoryBanksResource(self)
+        self.post_training = resources.PostTrainingResource(self)
+        self.reward_scoring = resources.RewardScoringResource(self)
+        self.synthetic_data_generation = resources.SyntheticDataGenerationResource(self)
+        self.batch_inference = resources.BatchInferenceResource(self)
+        self.with_raw_response = LlamaStackWithRawResponse(self)
+        self.with_streaming_response = LlamaStackWithStreamedResponse(self)
+    @property
+    @override
+    def qs(self) -> Querystring:
+        return Querystring(array_format="comma")
+    @property
+    @override
+    def default_headers(self) -> dict[str, str | Omit]:
+        return {
+            **super().default_headers,
+            "X-Stainless-Async": "false",
+            **self._custom_headers,
+        }
+    def copy(
+        self,
+        *,
+        environment: Literal["production", "sandbox"] | None = None,
+        base_url: str | httpx.URL | None = None,
+        timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
+        http_client: httpx.Client | None = None,
+        max_retries: int | NotGiven = NOT_GIVEN,
+        default_headers: Mapping[str, str] | None = None,
+        set_default_headers: Mapping[str, str] | None = None,
+        default_query: Mapping[str, object] | None = None,
+        set_default_query: Mapping[str, object] | None = None,
+        _extra_kwargs: Mapping[str, Any] = {},
+    ) -> Self:
+        """
+        Create a new client instance re-using the same options given to the current client with optional overriding.
+        """
+        if default_headers is not None and set_default_headers is not None:
+            raise ValueError("The `default_headers` and `set_default_headers` arguments are mutually exclusive")
+        if default_query is not None and set_default_query is not None:
+            raise ValueError("The `default_query` and `set_default_query` arguments are mutually exclusive")
+        headers = self._custom_headers
+        if default_headers is not None:
+            headers = {**headers, **default_headers}
+        elif set_default_headers is not None:
+            headers = set_default_headers
+        params = self._custom_query
+        if default_query is not None:
+            params = {**params, **default_query}
+        elif set_default_query is not None:
+            params = set_default_query
+        http_client = http_client or self._client
+        return self.__class__(
+            base_url=base_url or self.base_url,
+            environment=environment or self._environment,
+            timeout=self.timeout if isinstance(timeout, NotGiven) else timeout,
+            http_client=http_client,
+            max_retries=max_retries if is_given(max_retries) else self.max_retries,
+            default_headers=headers,
+            default_query=params,
+            **_extra_kwargs,
+        )
+    # Alias for `copy` for nicer inline usage, e.g.
+    # client.with_options(timeout=10).foo.create(...)
+    with_options = copy
+    @override
+    def _make_status_error(
+        self,
+        err_msg: str,
+        *,
+        body: object,
+        response: httpx.Response,
+    ) -> APIStatusError:
+        if response.status_code == 400:
+            return _exceptions.BadRequestError(err_msg, response=response, body=body)
+        if response.status_code == 401:
+            return _exceptions.AuthenticationError(err_msg, response=response, body=body)
+        if response.status_code == 403:
+            return _exceptions.PermissionDeniedError(err_msg, response=response, body=body)
+        if response.status_code == 404:
+            return _exceptions.NotFoundError(err_msg, response=response, body=body)
+        if response.status_code == 409:
+            return _exceptions.ConflictError(err_msg, response=response, body=body)
+        if response.status_code == 422:
+            return _exceptions.UnprocessableEntityError(err_msg, response=response, body=body)
+        if response.status_code == 429:
+            return _exceptions.RateLimitError(err_msg, response=response, body=body)
+        if response.status_code >= 500:
+            return _exceptions.InternalServerError(err_msg, response=response, body=body)
+        return APIStatusError(err_msg, response=response, body=body)
+class AsyncLlamaStack(AsyncAPIClient):
+    telemetry: resources.AsyncTelemetryResource
+    agentic_system: resources.AsyncAgenticSystemResource
+    datasets: resources.AsyncDatasetsResource
+    evaluate: resources.AsyncEvaluateResource
+    evaluations: resources.AsyncEvaluationsResource
+    inference: resources.AsyncInferenceResource
+    safety: resources.AsyncSafetyResource
+    memory_banks: resources.AsyncMemoryBanksResource
+    post_training: resources.AsyncPostTrainingResource
+    reward_scoring: resources.AsyncRewardScoringResource
+    synthetic_data_generation: resources.AsyncSyntheticDataGenerationResource
+    batch_inference: resources.AsyncBatchInferenceResource
+    with_raw_response: AsyncLlamaStackWithRawResponse
+    with_streaming_response: AsyncLlamaStackWithStreamedResponse
+    # client options
+    _environment: Literal["production", "sandbox"] | NotGiven
+    def __init__(
+        self,
+        *,
+        environment: Literal["production", "sandbox"] | NotGiven = NOT_GIVEN,
+        base_url: str | httpx.URL | None | NotGiven = NOT_GIVEN,
+        timeout: Union[float, Timeout, None, NotGiven] = NOT_GIVEN,
+        max_retries: int = DEFAULT_MAX_RETRIES,
+        default_headers: Mapping[str, str] | None = None,
+        default_query: Mapping[str, object] | None = None,
+        # Configure a custom httpx client.
+        # We provide a `DefaultAsyncHttpxClient` class that you can pass to retain the default values we use for `limits`, `timeout` & `follow_redirects`.
+        # See the [httpx documentation](https://www.python-httpx.org/api/#asyncclient) for more details.
+        http_client: httpx.AsyncClient | None = None,
+        # Enable or disable schema validation for data returned by the API.
+        # When enabled an error APIResponseValidationError is raised
+        # if the API responds with invalid data for the expected schema.
+        #
+        # This parameter may be removed or changed in the future.
+        # If you rely on this feature, please open a GitHub issue
+        # outlining your use-case to help us decide if it should be
+        # part of our public interface in the future.
+        _strict_response_validation: bool = False,
+    ) -> None:
+        """Construct a new async llama-stack client instance."""
+        self._environment = environment
+        base_url_env = os.environ.get("LLAMA_STACK_BASE_URL")
+        if is_given(base_url) and base_url is not None:
+            # cast required because mypy doesn't understand the type narrowing
+            base_url = cast("str | httpx.URL", base_url)  # pyright: ignore[reportUnnecessaryCast]
+        elif is_given(environment):
+            if base_url_env and base_url is not None:
+                raise ValueError(
+                    "Ambiguous URL; The `LLAMA_STACK_BASE_URL` env var and the `environment` argument are given. If you want to use the environment, you must pass base_url=None",
+                )
+            try:
+                base_url = ENVIRONMENTS[environment]
+            except KeyError as exc:
+                raise ValueError(f"Unknown environment: {environment}") from exc
+        elif base_url_env is not None:
+            base_url = base_url_env
+        else:
+            self._environment = environment = "production"
+            try:
+                base_url = ENVIRONMENTS[environment]
+            except KeyError as exc:
+                raise ValueError(f"Unknown environment: {environment}") from exc
+        super().__init__(
+            version=__version__,
+            base_url=base_url,
+            max_retries=max_retries,
+            timeout=timeout,
+            http_client=http_client,
+            custom_headers=default_headers,
+            custom_query=default_query,
+            _strict_response_validation=_strict_response_validation,
+        )
+        self.telemetry = resources.AsyncTelemetryResource(self)
+        self.agentic_system = resources.AsyncAgenticSystemResource(self)
+        self.datasets = resources.AsyncDatasetsResource(self)
+        self.evaluate = resources.AsyncEvaluateResource(self)
+        self.evaluations = resources.AsyncEvaluationsResource(self)
+        self.inference = resources.AsyncInferenceResource(self)
+        self.safety = resources.AsyncSafetyResource(self)
+        self.memory_banks = resources.AsyncMemoryBanksResource(self)
+        self.post_training = resources.AsyncPostTrainingResource(self)
+        self.reward_scoring = resources.AsyncRewardScoringResource(self)
+        self.synthetic_data_generation = resources.AsyncSyntheticDataGenerationResource(self)
+        self.batch_inference = resources.AsyncBatchInferenceResource(self)
+        self.with_raw_response = AsyncLlamaStackWithRawResponse(self)
+        self.with_streaming_response = AsyncLlamaStackWithStreamedResponse(self)
+    @property
+    @override
+    def qs(self) -> Querystring:
+        return Querystring(array_format="comma")
+    @property
+    @override
+    def default_headers(self) -> dict[str, str | Omit]:
+        return {
+            **super().default_headers,
+            "X-Stainless-Async": f"async:{get_async_library()}",
+            **self._custom_headers,
+        }
+    def copy(
+        self,
+        *,
+        environment: Literal["production", "sandbox"] | None = None,
+        base_url: str | httpx.URL | None = None,
+        timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
+        http_client: httpx.AsyncClient | None = None,
+        max_retries: int | NotGiven = NOT_GIVEN,
+        default_headers: Mapping[str, str] | None = None,
+        set_default_headers: Mapping[str, str] | None = None,
+        default_query: Mapping[str, object] | None = None,
+        set_default_query: Mapping[str, object] | None = None,
+        _extra_kwargs: Mapping[str, Any] = {},
+    ) -> Self:
+        """
+        Create a new client instance re-using the same options given to the current client with optional overriding.
+        """
+        if default_headers is not None and set_default_headers is not None:
+            raise ValueError("The `default_headers` and `set_default_headers` arguments are mutually exclusive")
+        if default_query is not None and set_default_query is not None:
+            raise ValueError("The `default_query` and `set_default_query` arguments are mutually exclusive")
+        headers = self._custom_headers
+        if default_headers is not None:
+            headers = {**headers, **default_headers}
+        elif set_default_headers is not None:
+            headers = set_default_headers
+        params = self._custom_query
+        if default_query is not None:
+            params = {**params, **default_query}
+        elif set_default_query is not None:
+            params = set_default_query
+        http_client = http_client or self._client
+        return self.__class__(
+            base_url=base_url or self.base_url,
+            environment=environment or self._environment,
+            timeout=self.timeout if isinstance(timeout, NotGiven) else timeout,
+            http_client=http_client,
+            max_retries=max_retries if is_given(max_retries) else self.max_retries,
+            default_headers=headers,
+            default_query=params,
+            **_extra_kwargs,
+        )
+    # Alias for `copy` for nicer inline usage, e.g.
+    # client.with_options(timeout=10).foo.create(...)
+    with_options = copy
+    @override
+    def _make_status_error(
+        self,
+        err_msg: str,
+        *,
+        body: object,
+        response: httpx.Response,
+    ) -> APIStatusError:
+        if response.status_code == 400:
+            return _exceptions.BadRequestError(err_msg, response=response, body=body)
+        if response.status_code == 401:
+            return _exceptions.AuthenticationError(err_msg, response=response, body=body)
+        if response.status_code == 403:
+            return _exceptions.PermissionDeniedError(err_msg, response=response, body=body)
+        if response.status_code == 404:
+            return _exceptions.NotFoundError(err_msg, response=response, body=body)
+        if response.status_code == 409:
+            return _exceptions.ConflictError(err_msg, response=response, body=body)
+        if response.status_code == 422:
+            return _exceptions.UnprocessableEntityError(err_msg, response=response, body=body)
+        if response.status_code == 429:
+            return _exceptions.RateLimitError(err_msg, response=response, body=body)
+        if response.status_code >= 500:
+            return _exceptions.InternalServerError(err_msg, response=response, body=body)
+        return APIStatusError(err_msg, response=response, body=body)
+class LlamaStackWithRawResponse:
+    def __init__(self, client: LlamaStack) -> None:
+        self.telemetry = resources.TelemetryResourceWithRawResponse(client.telemetry)
+        self.agentic_system = resources.AgenticSystemResourceWithRawResponse(client.agentic_system)
+        self.datasets = resources.DatasetsResourceWithRawResponse(client.datasets)
+        self.evaluate = resources.EvaluateResourceWithRawResponse(client.evaluate)
+        self.evaluations = resources.EvaluationsResourceWithRawResponse(client.evaluations)
+        self.inference = resources.InferenceResourceWithRawResponse(client.inference)
+        self.safety = resources.SafetyResourceWithRawResponse(client.safety)
+        self.memory_banks = resources.MemoryBanksResourceWithRawResponse(client.memory_banks)
+        self.post_training = resources.PostTrainingResourceWithRawResponse(client.post_training)
+        self.reward_scoring = resources.RewardScoringResourceWithRawResponse(client.reward_scoring)
+        self.synthetic_data_generation = resources.SyntheticDataGenerationResourceWithRawResponse(
+            client.synthetic_data_generation
+        )
+        self.batch_inference = resources.BatchInferenceResourceWithRawResponse(client.batch_inference)
+class AsyncLlamaStackWithRawResponse:
+    def __init__(self, client: AsyncLlamaStack) -> None:
+        self.telemetry = resources.AsyncTelemetryResourceWithRawResponse(client.telemetry)
+        self.agentic_system = resources.AsyncAgenticSystemResourceWithRawResponse(client.agentic_system)
+        self.datasets = resources.AsyncDatasetsResourceWithRawResponse(client.datasets)
+        self.evaluate = resources.AsyncEvaluateResourceWithRawResponse(client.evaluate)
+        self.evaluations = resources.AsyncEvaluationsResourceWithRawResponse(client.evaluations)
+        self.inference = resources.AsyncInferenceResourceWithRawResponse(client.inference)
+        self.safety = resources.AsyncSafetyResourceWithRawResponse(client.safety)
+        self.memory_banks = resources.AsyncMemoryBanksResourceWithRawResponse(client.memory_banks)
+        self.post_training = resources.AsyncPostTrainingResourceWithRawResponse(client.post_training)
+        self.reward_scoring = resources.AsyncRewardScoringResourceWithRawResponse(client.reward_scoring)
+        self.synthetic_data_generation = resources.AsyncSyntheticDataGenerationResourceWithRawResponse(
+            client.synthetic_data_generation
+        )
+        self.batch_inference = resources.AsyncBatchInferenceResourceWithRawResponse(client.batch_inference)
+class LlamaStackWithStreamedResponse:
+    def __init__(self, client: LlamaStack) -> None:
+        self.telemetry = resources.TelemetryResourceWithStreamingResponse(client.telemetry)
+        self.agentic_system = resources.AgenticSystemResourceWithStreamingResponse(client.agentic_system)
+        self.datasets = resources.DatasetsResourceWithStreamingResponse(client.datasets)
+        self.evaluate = resources.EvaluateResourceWithStreamingResponse(client.evaluate)
+        self.evaluations = resources.EvaluationsResourceWithStreamingResponse(client.evaluations)
+        self.inference = resources.InferenceResourceWithStreamingResponse(client.inference)
+        self.safety = resources.SafetyResourceWithStreamingResponse(client.safety)
+        self.memory_banks = resources.MemoryBanksResourceWithStreamingResponse(client.memory_banks)
+        self.post_training = resources.PostTrainingResourceWithStreamingResponse(client.post_training)
+        self.reward_scoring = resources.RewardScoringResourceWithStreamingResponse(client.reward_scoring)
+        self.synthetic_data_generation = resources.SyntheticDataGenerationResourceWithStreamingResponse(
+            client.synthetic_data_generation
+        )
+        self.batch_inference = resources.BatchInferenceResourceWithStreamingResponse(client.batch_inference)
+class AsyncLlamaStackWithStreamedResponse:
+    def __init__(self, client: AsyncLlamaStack) -> None:
+        self.telemetry = resources.AsyncTelemetryResourceWithStreamingResponse(client.telemetry)
+        self.agentic_system = resources.AsyncAgenticSystemResourceWithStreamingResponse(client.agentic_system)
+        self.datasets = resources.AsyncDatasetsResourceWithStreamingResponse(client.datasets)
+        self.evaluate = resources.AsyncEvaluateResourceWithStreamingResponse(client.evaluate)
+        self.evaluations = resources.AsyncEvaluationsResourceWithStreamingResponse(client.evaluations)
+        self.inference = resources.AsyncInferenceResourceWithStreamingResponse(client.inference)
+        self.safety = resources.AsyncSafetyResourceWithStreamingResponse(client.safety)
+        self.memory_banks = resources.AsyncMemoryBanksResourceWithStreamingResponse(client.memory_banks)
+        self.post_training = resources.AsyncPostTrainingResourceWithStreamingResponse(client.post_training)
+        self.reward_scoring = resources.AsyncRewardScoringResourceWithStreamingResponse(client.reward_scoring)
+        self.synthetic_data_generation = resources.AsyncSyntheticDataGenerationResourceWithStreamingResponse(
+            client.synthetic_data_generation
+        )
+        self.batch_inference = resources.AsyncBatchInferenceResourceWithStreamingResponse(client.batch_inference)
+Client = LlamaStack
+AsyncClient = AsyncLlamaStack