RubyGems - venice_client - Versions diffs - 1.0.0 → 1.0.2 - Mend

venice_client 1.0.0 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (586) hide show

data/swagger.yaml ADDED Viewed

@@ -0,0 +1,4791 @@
+externalDocs:
+  description: Venice.ai API documentation
+  url: https://docs.venice.ai
+info:
+  description: The Venice.ai API.
+  termsOfService: https://venice.ai/legal/tos
+  title: Venice.ai API
+  version: "20250709.013306"
+openapi: 3.0.0
+security:
+  - BearerAuth: []
+servers:
+  - url: https://api.venice.ai/api/v1
+tags:
+  - description: Given a list of messages comprising a conversation, the model will
+      return a response.
+    name: Chat
+  - description: List and describe the various models available in the API.
+    name: Models
+  - description: Generate and manipulate images using AI models.
+    name: Image
+components:
+  securitySchemes:
+    BearerAuth:
+      bearerFormat: JWT
+      scheme: bearer
+      type: http
+  schemas:
+    ChatCompletionRequest:
+      type: object
+      properties:
+        frequency_penalty:
+          type: number
+          maximum: 2
+          minimum: -2
+          default: 0
+          description: Number between -2.0 and 2.0. Positive values penalize new tokens
+            based on their existing frequency in the text so far, decreasing the
+            model's likelihood to repeat the same line verbatim.
+        logprobs:
+          type: boolean
+          description: Whether to include log probabilities in the response. This is not
+            supported by all models.
+          example: true
+        top_logprobs:
+          type: integer
+          minimum: 0
+          description: The number of highest probability tokens to return for each token
+            position.
+          example: 1
+        max_completion_tokens:
+          type: integer
+          description: An upper bound for the number of tokens that can be generated for a
+            completion, including visible output tokens and reasoning tokens.
+        max_temp:
+          type: number
+          minimum: 0
+          maximum: 2
+          description: Maximum temperature value for dynamic temperature scaling.
+          example: 1.5
+        max_tokens:
+          type: integer
+          description: The maximum number of tokens that can be generated in the chat
+            completion. This value can be used to control costs for text
+            generated via API. This value is now deprecated in favor of
+            max_completion_tokens.
+        messages:
+          type: array
+          items:
+            anyOf:
+              - type: object
+                properties:
+                  content:
+                    anyOf:
+                      - type: string
+                        title: String
+                      - type: array
+                        items:
+                          oneOf:
+                            - type: object
+                              properties:
+                                text:
+                                  type: string
+                                  minLength: 1
+                                  description: The prompt text of the message. Must be at-least one character in
+                                    length
+                                  example: Why is the sky blue?
+                                  title: Text Content Object
+                                type:
+                                  type: string
+                                  enum:
+                                    - text
+                                  title: Text Content String
+                              required:
+                                - text
+                                - type
+                              additionalProperties: false
+                              description: Text message type.
+                              example:
+                                text: Why is the sky blue?
+                                type: text
+                              title: text
+                            - type: object
+                              properties:
+                                image_url:
+                                  type: object
+                                  properties:
+                                    url:
+                                      type: string
+                                      description: The URL of the image. Can be a data URL with a base64 encoded image
+                                        or a public URL. URL must be publicly
+                                        accessible. Image must pass validation
+                                        checks and be >= 64 pixels square.
+                                      format: uri
+                                  required:
+                                    - url
+                                  description: Object containing the image URL information
+                                  title: Image URL Object
+                                type:
+                                  type: string
+                                  enum:
+                                    - image_url
+                              required:
+                                - image_url
+                                - type
+                              additionalProperties: false
+                              description: image_url message type.
+                              title: image_url
+                        title: Objects
+                  role:
+                    type: string
+                    enum:
+                      - user
+                required:
+                  - content
+                  - role
+                description: The user message is the input from the user. It is part of the
+                  conversation and is visible to the assistant.
+                title: User Message
+              - type: object
+                properties:
+                  content:
+                    anyOf:
+                      - type: string
+                        title: String
+                      - type: array
+                        items:
+                          type: object
+                          properties:
+                            text:
+                              type: string
+                              minLength: 1
+                              description: The prompt text of the message. Must be at-least one character in
+                                length
+                              example: Why is the sky blue?
+                              title: Text Content Object
+                            type:
+                              type: string
+                              enum:
+                                - text
+                              title: Text Content String
+                          required:
+                            - text
+                            - type
+                          additionalProperties: false
+                          description: Text message type.
+                          example:
+                            text: Why is the sky blue?
+                            type: text
+                          title: text
+                        title: Objects
+                      - nullable: true
+                        title: "null"
+                  name:
+                    type: string
+                  reasoning_content:
+                    type: string
+                    nullable: true
+                  role:
+                    type: string
+                    enum:
+                      - assistant
+                  tool_calls:
+                    type: array
+                    items:
+                      nullable: true
+                required:
+                  - content
+                  - role
+                description: The assistant message contains the response from the LLM.
+                title: Assistant Message
+              - type: object
+                properties:
+                  content:
+                    type: string
+                  name:
+                    type: string
+                  reasoning_content:
+                    type: string
+                    nullable: true
+                  role:
+                    type: string
+                    enum:
+                      - tool
+                  tool_call_id:
+                    type: string
+                  tool_calls:
+                    type: array
+                    items:
+                      nullable: true
+                required:
+                  - content
+                  - role
+                  - tool_call_id
+                description: The tool message is a special message that is used to call a tool.
+                  It is not part of the conversation and is not visible to the
+                  user.
+                title: Tool Message
+              - type: object
+                properties:
+                  content:
+                    anyOf:
+                      - type: string
+                        title: String
+                      - type: array
+                        items:
+                          type: object
+                          properties:
+                            text:
+                              type: string
+                              minLength: 1
+                              description: The prompt text of the message. Must be at-least one character in
+                                length
+                              example: Why is the sky blue?
+                              title: Text Content Object
+                            type:
+                              type: string
+                              enum:
+                                - text
+                              title: Text Content String
+                          required:
+                            - text
+                            - type
+                          additionalProperties: false
+                          description: Text message type.
+                          example:
+                            text: Why is the sky blue?
+                            type: text
+                          title: text
+                        title: Objects
+                  name:
+                    type: string
+                  role:
+                    type: string
+                    enum:
+                      - system
+                required:
+                  - content
+                  - role
+                description: The system message is a special message that provides context to
+                  the model. It is not part of the conversation and is not
+                  visible to the user.
+                title: System Message
+          minItems: 1
+          description: A list of messages comprising the conversation so far. Depending on
+            the model you use, different message types (modalities) are
+            supported, like text and images. For compatibility purposes, the
+            schema supports submitting multiple image_url messages, however,
+            only the last image_url message will be passed to and processed by
+            the model.
+        min_p:
+          type: number
+          minimum: 0
+          maximum: 1
+          description: Sets a minimum probability threshold for token selection. Tokens
+            with probabilities below this value are filtered out.
+          example: 0.05
+        min_temp:
+          type: number
+          minimum: 0
+          maximum: 2
+          description: Minimum temperature value for dynamic temperature scaling.
+          example: 0.1
+        model:
+          type: string
+          description: The ID of the model you wish to prompt. May also be a model trait,
+            or a model compatibility mapping. See the models endpoint for a list
+            of models available to you. You can use feature suffixes to enable
+            features from the venice_parameters object. Please see "Model
+            Feature Suffix" documentation for more details.
+          example: venice-uncensored
+        n:
+          type: integer
+          default: 1
+          description: How many chat completion choices to generate for each input
+            message. Note that you will be charged based on the number of
+            generated tokens across all of the choices. Keep n as 1 to minimize
+            costs.
+        presence_penalty:
+          type: number
+          maximum: 2
+          minimum: -2
+          default: 0
+          description: Number between -2.0 and 2.0. Positive values penalize new tokens
+            based on whether they appear in the text so far, increasing the
+            model's likelihood to talk about new topics.
+        repetition_penalty:
+          type: number
+          minimum: 0
+          description: The parameter for repetition penalty. 1.0 means no penalty. Values
+            > 1.0 discourage repetition.
+          example: 1.2
+        seed:
+          type: integer
+          minimum: 0
+          exclusiveMinimum: true
+          description: The random seed used to generate the response. This is useful for
+            reproducibility.
+          example: 42
+        stop:
+          anyOf:
+            - type: string
+              title: String
+            - type: array
+              items:
+                type: string
+              minItems: 1
+              maxItems: 4
+              title: Array of Strings
+            - nullable: true
+              title: "null"
+          description: Up to 4 sequences where the API will stop generating further
+            tokens. Defaults to null.
+        stop_token_ids:
+          type: array
+          items:
+            type: number
+          description: Array of token IDs where the API will stop generating further tokens.
+          example:
+            - 151643
+            - 151645
+        stream:
+          type: boolean
+          description: Whether to stream back partial progress. Defaults to false.
+          example: true
+        stream_options:
+          type: object
+          properties:
+            include_usage:
+              type: boolean
+              description: Whether to include usage information in the stream.
+        temperature:
+          type: number
+          minimum: 0
+          maximum: 2
+          default: 0.7
+          description: What sampling temperature to use, between 0 and 2. Higher values
+            like 0.8 will make the output more random, while lower values like
+            0.2 will make it more focused and deterministic. We generally
+            recommend altering this or top_p but not both.
+          example: 0.7
+        top_k:
+          type: integer
+          minimum: 0
+          description: The number of highest probability vocabulary tokens to keep for
+            top-k-filtering.
+          example: 40
+        top_p:
+          type: number
+          minimum: 0
+          maximum: 1
+          default: 0.9
+          description: An alternative to sampling with temperature, called nucleus
+            sampling, where the model considers the results of the tokens with
+            top_p probability mass. So 0.1 means only the tokens comprising the
+            top 10% probability mass are considered.
+          example: 0.9
+        user:
+          type: string
+          description: This field is discarded on the request but is supported in the
+            Venice API for compatibility with OpenAI clients.
+        venice_parameters:
+          type: object
+          properties:
+            character_slug:
+              type: string
+              description: The character slug of a public Venice character.
+              example: venice
+            strip_thinking_response:
+              type: boolean
+              default: false
+              description: Strip <think></think> blocks from the response. Applicable only to
+                reasoning / thinking models. Also available to use as a model
+                feature suffix. Defaults to false.
+              example: false
+            disable_thinking:
+              type: boolean
+              default: false
+              description: On supported reasoning models, will disable thinking and strip the
+                <think></think> blocks from the response. Defaults to false.
+              example: false
+            enable_web_search:
+              type: string
+              enum:
+                - auto
+                - off
+                - on
+              default: off
+              description: Enable web search for this request. Defaults to off. On will force
+                web search on the request. Auto will enable it based on the
+                model's discretion. Citations will be returned either in the
+                first chunk of a streaming result, or in the non streaming
+                response.
+              example: auto
+            enable_web_citations:
+              type: boolean
+              default: false
+              description: When web search is enabled, this will request that the LLM cite its
+                sources using a [REF]0[/REF] format. Defaults to false.
+              example: true
+            include_search_results_in_stream:
+              type: boolean
+              default: false
+              description: Experimental feature - When set to true, the LLM will include
+                search results in the stream as the first emitted chunk.
+                Defaults to false.
+              example: true
+            include_venice_system_prompt:
+              type: boolean
+              default: true
+              description: Whether to include the Venice supplied system prompts along side
+                specified system prompts. Defaults to true.
+          description: Unique parameters to Venice's API implementation.
+        parallel_tool_calls:
+          type: boolean
+          default: true
+          description: Whether to enable parallel function calling during tool use.
+          example: false
+        response_format:
+          oneOf:
+            - type: object
+              properties:
+                json_schema:
+                  type: object
+                  additionalProperties:
+                    nullable: true
+                type:
+                  type: string
+                  enum:
+                    - json_schema
+              required:
+                - json_schema
+                - type
+              additionalProperties: false
+              description: The JSON Schema that should be used to validate and format the
+                response.
+              example:
+                json_schema:
+                  properties:
+                    age:
+                      type: number
+                    name:
+                      type: string
+                  required:
+                    - name
+                    - age
+                  type: object
+                type: json_schema
+              title: json_schema
+            - type: object
+              properties:
+                type:
+                  type: string
+                  enum:
+                    - json_object
+              required:
+                - type
+              additionalProperties: false
+              description: The response should be formatted as a JSON object. This is a
+                deprecated implementation and the preferred use is json_schema.
+              title: json_object
+          description: Format in which the response should be returned.
+        tool_choice:
+          anyOf:
+            - type: object
+              properties:
+                function:
+                  type: object
+                  properties:
+                    name:
+                      type: string
+                  required:
+                    - name
+                  additionalProperties: false
+                type:
+                  type: string
+              required:
+                - function
+                - type
+              additionalProperties: false
+            - type: string
+        tools:
+          type: array
+          nullable: true
+          items:
+            type: object
+            properties:
+              function:
+                type: object
+                properties:
+                  description:
+                    type: string
+                  name:
+                    type: string
+                  parameters:
+                    type: object
+                    additionalProperties:
+                      nullable: true
+                required:
+                  - name
+                additionalProperties: false
+              id:
+                type: string
+              type:
+                type: string
+            required:
+              - function
+            description: A tool that can be called by the model. Currently, only functions
+              are supported as tools.
+            title: Tool Call
+          description: A list of tools the model may call. Currently, only functions are
+            supported as a tool. Use this to provide a list of functions the
+            model may generate JSON inputs for.
+      required:
+        - messages
+        - model
+      additionalProperties: false
+    GenerateImageRequest:
+      type: object
+      properties:
+        cfg_scale:
+          type: number
+          minimum: 0
+          exclusiveMinimum: true
+          maximum: 20
+          description: CFG scale parameter. Higher values lead to more adherence to the
+            prompt.
+          example: 7.5
+        embed_exif_metadata:
+          type: boolean
+          default: false
+          description: Embed prompt generation information into the image's EXIF metadata.
+          example: false
+        format:
+          type: string
+          enum:
+            - jpeg
+            - png
+            - webp
+          default: webp
+          description: "The image format to return. WebP are smaller and optimized for web
+            use. PNG are higher quality but larger in file size. "
+          example: webp
+        height:
+          type: integer
+          minimum: 0
+          exclusiveMinimum: true
+          maximum: 1280
+          default: 1024
+          description: Height of the generated image. Each model has a specific height and
+            width divisor listed in the widthHeightDivisor constraint in the
+            model list endpoint.
+          example: 1024
+        hide_watermark:
+          type: boolean
+          default: false
+          description: Whether to hide the Venice watermark. Venice may ignore this
+            parameter for certain generated content.
+          example: false
+        inpaint:
+          nullable: true
+          description: This feature is deprecated and was disabled on May 19th, 2025. A
+            revised in-painting API will be launched in the near future.
+          deprecated: true
+        lora_strength:
+          type: integer
+          minimum: 0
+          maximum: 100
+          description: Lora strength for the model. Only applies if the model uses
+            additional Loras.
+          example: 50
+        model:
+          type: string
+          description: The model to use for image generation.
+          example: hidream
+        negative_prompt:
+          type: string
+          maxLength: 1500
+          description: A description of what should not be in the image. Character limit
+            is model specific and is listed in the promptCharacterLimit
+            constraint in the model list endpoint.
+          example: Clouds, Rain, Snow
+        prompt:
+          type: string
+          minLength: 1
+          maxLength: 1500
+          description: The description for the image. Character limit is model specific
+            and is listed in the promptCharacterLimit setting in the model list
+            endpoint.
+          example: A beautiful sunset over a mountain range
+        return_binary:
+          type: boolean
+          default: false
+          description: Whether to return binary image data instead of base64.
+          example: false
+        safe_mode:
+          type: boolean
+          default: true
+          description: Whether to use safe mode. If enabled, this will blur images that
+            are classified as having adult content.
+          example: false
+        seed:
+          type: integer
+          minimum: -999999999
+          maximum: 999999999
+          default: 0
+          description: Random seed for generation. If not provided, a random seed will be
+            used.
+          example: 123456789
+        steps:
+          type: integer
+          minimum: 0
+          exclusiveMinimum: true
+          maximum: 50
+          default: 20
+          description: "Number of inference steps. The following models have reduced max
+            steps from the global max: venice-sd35: 30 max steps, hidream: 50
+            max steps, fluently-xl: 50 max steps, flux-dev: 30 max steps,
+            flux-dev-uncensored-11: 30 max steps, flux-dev-uncensored: 30 max
+            steps, lustify-sdxl: 50 max steps, pony-realism: 50 max steps,
+            stable-diffusion-3.5: 30 max steps. These constraints are exposed in
+            the model list endpoint for each model."
+          example: 20
+        style_preset:
+          type: string
+          description: An image style to apply to the image. Visit
+            https://docs.venice.ai/api-reference/endpoint/image/styles for more
+            details.
+          example: 3D Model
+        width:
+          type: integer
+          minimum: 0
+          exclusiveMinimum: true
+          maximum: 1280
+          default: 1024
+          description: Width of the generated image. Each model has a specific height and
+            width divisor listed in the widthHeightDivisor constraint in the
+            model list endpoint.
+          example: 1024
+      required:
+        - model
+        - prompt
+      additionalProperties: false
+    SimpleGenerateImageRequest:
+      type: object
+      properties:
+        background:
+          type: string
+          nullable: true
+          enum:
+            - transparent
+            - opaque
+            - auto
+          default: auto
+          description: This parameter is not used in Venice image generation but is
+            supported for compatibility with OpenAI API
+          example: auto
+        model:
+          type: string
+          default: default
+          description: The model to use for image generation. Defaults to Venice's default
+            image model. If a non-existent model is specified (ie an OpenAI
+            model name), it will default to Venice's default image model.
+          example: hidream
+        moderation:
+          type: string
+          nullable: true
+          enum:
+            - low
+            - auto
+          default: auto
+          description: auto enables safe venice mode which will blur out adult content.
+            low disables safe venice mode.
+          example: auto
+        n:
+          type: integer
+          nullable: true
+          minimum: 1
+          maximum: 1
+          default: 1
+          description: Number of images to generate. Venice presently only supports 1
+            image per request.
+          example: 1
+        output_compression:
+          type: integer
+          nullable: true
+          minimum: 0
+          maximum: 100
+          default: 100
+          description: This parameter is not used in Venice image generation but is
+            supported for compatibility with OpenAI API
+        output_format:
+          type: string
+          enum:
+            - jpeg
+            - png
+            - webp
+          default: png
+          description: Output format for generated images
+          example: png
+        prompt:
+          type: string
+          minLength: 1
+          maxLength: 1500
+          description: A text description of the desired image.
+          example: A beautiful sunset over mountain ranges
+        quality:
+          type: string
+          nullable: true
+          enum:
+            - auto
+            - high
+            - medium
+            - low
+            - hd
+            - standard
+          default: auto
+          description: This parameter is not used in Venice image generation but is
+            supported for compatibility with OpenAI API
+          example: auto
+        response_format:
+          type: string
+          nullable: true
+          enum:
+            - b64_json
+            - url
+          default: b64_json
+          description: Response format. URL will be a data URL.
+          example: b64_json
+        size:
+          type: string
+          nullable: true
+          enum:
+            - auto
+            - 256x256
+            - 512x512
+            - 1024x1024
+            - 1536x1024
+            - 1024x1536
+            - 1792x1024
+            - 1024x1792
+          default: auto
+          description: Size of generated images. Default is 1024x1024
+          example: 1024x1024
+        style:
+          type: string
+          nullable: true
+          enum:
+            - vivid
+            - natural
+          default: natural
+          description: This parameter is not used in Venice image generation but is
+            supported for compatibility with OpenAI API
+          example: natural
+        user:
+          type: string
+          description: This parameter is not used in Venice image generation but is
+            supported for compatibility with OpenAI API
+          example: user123
+      required:
+        - prompt
+      additionalProperties: false
+    UpscaleImageRequest:
+      type: object
+      properties:
+        enhance:
+          anyOf:
+            - type: boolean
+            - type: string
+              enum:
+                - "true"
+                - "false"
+          default: "false"
+          description: Whether to enhance the image using Venice's image engine during
+            upscaling. Must be true if scale is 1.
+          example: true
+        enhanceCreativity:
+          type: number
+          nullable: true
+          minimum: 0
+          maximum: 1
+          default: 0.5
+          description: Higher values let the enhancement AI change the image more. Setting
+            this to 1 effectively creates an entirely new image.
+          example: 0.5
+        enhancePrompt:
+          type: string
+          maxLength: 1500
+          description: The text to image style to apply during prompt enhancement. Does
+            best with short descriptive prompts, like gold, marble or angry,
+            menacing.
+          example: gold
+        image:
+          anyOf:
+            - {}
+            - type: string
+          description: The image to upscale. Can be either a file upload or a
+            base64-encoded string. Image dimensions must be at least 65536
+            pixels and final dimensions after scaling must not exceed 16777216
+            pixels.
+        replication:
+          type: number
+          nullable: true
+          minimum: 0
+          maximum: 1
+          default: 0.35
+          description: How strongly lines and noise in the base image are preserved.
+            Higher values are noisier but less plastic/AI
+            "generated"/hallucinated. Must be between 0 and 1.
+          example: 0.35
+        scale:
+          type: number
+          minimum: 1
+          maximum: 4
+          default: 2
+          description: The scale factor for upscaling the image. Must be a number between
+            1 and 4. Scale of 1 requires enhance to be set true and will only
+            run the enhancer. Scale must be > 1 if enhance is false. A scale of
+            4 with large images will result in the scale being dynamically set
+            to ensure the final image stays within the maximum size limits.
+          example: 2
+      required:
+        - image
+      additionalProperties: false
+      description: Upscale or enhance an image based on the supplied parameters. Using
+        a scale of 1 with enhance enabled will only run the enhancer.
+      example:
+        enhance: true
+        enhanceCreativity: 0.5
+        enhancePrompt: gold
+        image: iVBORw0KGgoAAAANSUhEUgAAAgAAAAIACAIAAAB7GkOtAAAAIGNIUk0A...
+        scale: 2
+    EditImageRequest:
+      type: object
+      properties:
+        prompt:
+          type: string
+          maxLength: 1500
+          description: 'The text directions to edit or modify the image. Does best with
+            short but descriptive prompts. IE: "Change the color of", "remove
+            the object", "change the sky to a sunrise", etc.'
+          example: Change the color of the sky to a sunrise
+        image:
+          anyOf:
+            - {}
+            - type: string
+          description: The image to edit. Can be either a file upload or a base64-encoded
+            string. Image dimensions must be at least 65536 pixels and must not
+            exceed 33177600 pixels.
+      required:
+        - prompt
+        - image
+      additionalProperties: false
+      description: Edit an image based on the supplied prompt.
+      example:
+        prompt: Colorize
+        image: iVBORw0KGgoAAAANSUhEUgAAAgAAAAIACAIAAAB7GkOtAAAAIGNIUk0A...
+    CreateEmbeddingRequestSchema:
+      type: object
+      properties:
+        dimensions:
+          type: integer
+          minimum: 1
+          description: The number of dimensions the resulting output embeddings should have.
+        encoding_format:
+          type: string
+          enum:
+            - float
+            - base64
+          default: float
+          description: The format to return the embeddings in. Can be either `float` or
+            `base64`.
+          example: float
+        input:
+          anyOf:
+            - type: string
+              minLength: 1
+              description: The string that will be turned into an embedding. Cannot be an
+                empty string.
+              example: This is a test.
+              title: string
+            - type: array
+              items:
+                type: string
+              minItems: 1
+              maxItems: 2048
+              description: The array of strings that will be turned into an embedding. Array
+                must be 2048 dimensions or less.
+              example:
+                - This is a test.
+              title: array
+            - type: array
+              items:
+                type: integer
+                minimum: 1
+              minItems: 1
+              maxItems: 2048
+              description: The array of integers that will be turned into an embedding. Array
+                must be 2048 dimensions or less.
+              example:
+                - 1212
+                - 318
+                - 257
+                - 1332
+                - 13
+              title: array
+            - type: array
+              items:
+                type: array
+                items:
+                  type: integer
+                minItems: 1
+              minItems: 1
+              maxItems: 2048
+              description: The array of arrays containing integers that will be turned into an
+                embedding. Array must be 2048 dimensions or less.
+              example:
+                - - 1212
+                  - 318
+                  - 257
+                  - 1332
+                  - 13
+              title: array
+          description: Input text to embed, encoded as a string or array of tokens. To
+            embed multiple inputs in a single request, pass an array of strings
+            or array of token arrays. The input must not exceed the max input
+            tokens for the model (8192 tokens), cannot be an empty string, and
+            any array must be 2048 dimensions or less.
+          example: The quick brown fox jumped over the lazy dog
+        model:
+          anyOf:
+            - type: string
+            - type: string
+              enum:
+                - text-embedding-bge-m3
+          description: ID of the model to use. You can use the List models API to see all
+            of your available models, or see our Model overview for descriptions
+            of them.
+          example: text-embedding-bge-m3
+        user:
+          type: string
+          description: This is an unused parameter and is discarded by Venice. It is
+            supported solely for API compatibility with OpenAI.
+      required:
+        - input
+        - model
+      additionalProperties: false
+      description: Create embeddings for the supplied input.
+      example:
+        encoding_format: float
+        input: The quick brown fox jumped over the lazy dog
+        model: text-embedding-bge-m3
+    CreateSpeechRequestSchema:
+      type: object
+      properties:
+        input:
+          type: string
+          minLength: 1
+          maxLength: 4096
+          description: The text to generate audio for. The maximum length is 4096
+            characters.
+          example: Hello, this is a test of the text to speech system.
+        model:
+          type: string
+          enum:
+            - tts-kokoro
+          default: tts-kokoro
+          description: The model ID of a Venice TTS model.
+          example: tts-kokoro
+        response_format:
+          type: string
+          enum:
+            - mp3
+            - opus
+            - aac
+            - flac
+            - wav
+            - pcm
+          default: mp3
+          description: The format to audio in.
+          example: mp3
+        speed:
+          type: number
+          minimum: 0.25
+          maximum: 4
+          default: 1
+          description: The speed of the generated audio. Select a value from 0.25 to 4.0.
+            1.0 is the default.
+          example: 1
+        streaming:
+          type: boolean
+          default: false
+          description: Should the content stream back sentence by sentence or be processed
+            and returned as a complete audio file.
+          example: true
+        voice:
+          type: string
+          enum:
+            - af_alloy
+            - af_aoede
+            - af_bella
+            - af_heart
+            - af_jadzia
+            - af_jessica
+            - af_kore
+            - af_nicole
+            - af_nova
+            - af_river
+            - af_sarah
+            - af_sky
+            - am_adam
+            - am_echo
+            - am_eric
+            - am_fenrir
+            - am_liam
+            - am_michael
+            - am_onyx
+            - am_puck
+            - am_santa
+            - bf_alice
+            - bf_emma
+            - bf_lily
+            - bm_daniel
+            - bm_fable
+            - bm_george
+            - bm_lewis
+            - zf_xiaobei
+            - zf_xiaoni
+            - zf_xiaoxiao
+            - zf_xiaoyi
+            - zm_yunjian
+            - zm_yunxi
+            - zm_yunxia
+            - zm_yunyang
+            - ff_siwis
+            - hf_alpha
+            - hf_beta
+            - hm_omega
+            - hm_psi
+            - if_sara
+            - im_nicola
+            - jf_alpha
+            - jf_gongitsune
+            - jf_nezumi
+            - jf_tebukuro
+            - jm_kumo
+            - pf_dora
+            - pm_alex
+            - pm_santa
+            - ef_dora
+            - em_alex
+            - em_santa
+          default: af_sky
+          description: The voice to use when generating the audio.
+          example: af_sky
+      required:
+        - input
+      additionalProperties: false
+      description: Request to generate audio from text.
+      example:
+        input: Hello, welcome to Venice Voice.
+        model: tts-kokoro
+        response_format: mp3
+        speed: 1
+        streaming: false
+        voice: af_sky
+    BillingUsageRequest:
+      type: object
+      properties:
+        currency:
+          type: string
+          description: Filter by currency
+          enum:
+            - USD
+            - VCU
+            - DIEM
+          example: USD
+        endDate:
+          type: string
+          format: date-time
+          description: End date for filtering records (ISO 8601)
+          example: 2024-12-31T23:59:59Z
+        limit:
+          type: integer
+          minimum: 0
+          exclusiveMinimum: true
+          maximum: 500
+          default: 200
+          description: Number of items per page
+          example: 200
+        page:
+          type: integer
+          minimum: 0
+          exclusiveMinimum: true
+          default: 1
+          description: Page number for pagination
+          example: 1
+        sortOrder:
+          type: string
+          enum:
+            - asc
+            - desc
+          default: desc
+          description: Sort order for createdAt field
+          example: desc
+        startDate:
+          type: string
+          format: date-time
+          description: Start date for filtering records (ISO 8601)
+          example: 2024-01-01T00:00:00Z
+    BillingUsageResponse:
+      type: object
+      properties:
+        data:
+          type: array
+          items:
+            type: object
+            properties:
+              amount:
+                type: number
+                description: The total amount charged for the billing usage entry
+              currency:
+                type: string
+                enum:
+                  - USD
+                  - VCU
+                  - DIEM
+                description: The currency charged for the billing usage entry
+                example: USD
+              inferenceDetails:
+                type: object
+                nullable: true
+                properties:
+                  completionTokens:
+                    type: number
+                    nullable: true
+                    description: Number of tokens used in the completion. Only present for LLM
+                      usage.
+                  inferenceExecutionTime:
+                    type: number
+                    nullable: true
+                    description: Time taken for inference execution in milliseconds
+                  promptTokens:
+                    type: number
+                    nullable: true
+                    description: Number of tokens requested in the prompt. Only present for LLM
+                      usage.
+                  requestId:
+                    type: string
+                    nullable: true
+                    description: Unique identifier for the inference request
+                required:
+                  - completionTokens
+                  - inferenceExecutionTime
+                  - promptTokens
+                  - requestId
+                description: Details about the related inference request, if applicable
+              notes:
+                type: string
+                description: Notes about the billing usage entry
+              pricePerUnitUsd:
+                type: number
+                description: The price per unit in USD
+              sku:
+                type: string
+                description: The product associated with the billing usage entry
+              timestamp:
+                type: string
+                description: The timestamp the billing usage entry was created
+                example: 2025-01-01T00:00:00Z
+              units:
+                type: number
+                description: The number of units consumed
+            required:
+              - amount
+              - currency
+              - inferenceDetails
+              - notes
+              - pricePerUnitUsd
+              - sku
+              - timestamp
+              - units
+        pagination:
+          type: object
+          properties:
+            limit:
+              type: number
+            page:
+              type: number
+            total:
+              type: number
+            totalPages:
+              type: number
+          required:
+            - limit
+            - page
+            - total
+            - totalPages
+      required:
+        - data
+        - pagination
+      additionalProperties: false
+      description: The response schema for the billing usage endpoint
+      example:
+        data:
+          - amount: -0.1
+            currency: DIEM
+            inferenceDetails: null
+            notes: API Inference
+            pricePerUnitUsd: 0.1
+            sku: venice-sd35-image-unit
+            timestamp: {}
+            units: 1
+          - amount: -0.06356
+            currency: DIEM
+            inferenceDetails:
+              completionTokens: 227
+              inferenceExecutionTime: 2964
+              promptTokens: 339
+              requestId: chatcmpl-4007fd29f42b7d3c4107f4345e8d174a
+            notes: API Inference
+            pricePerUnitUsd: 2.8
+            sku: llama-3.3-70b-llm-output-mtoken
+            timestamp: {}
+            units: 0.000227
+        pagination:
+          limit: 1
+          page: 200
+          total: 56090
+          totalPages: 56090
+    ModelResponse:
+      type: object
+      properties:
+        created:
+          type: number
+          description: Release date on Venice API
+          example: 1699000000
+        id:
+          type: string
+          description: Model ID
+          example: venice-uncensored
+        model_spec:
+          type: object
+          properties:
+            availableContextTokens:
+              type: number
+              description: The context length supported by the model. Only applicable for text
+                models.
+              example: 32768
+            beta:
+              type: boolean
+              description: Is this model in beta?
+              example: false
+            capabilities:
+              type: object
+              properties:
+                optimizedForCode:
+                  type: boolean
+                  description: Is the LLM optimized for coding?
+                  example: true
+                quantization:
+                  type: string
+                  enum:
+                    - fp8
+                    - fp16
+                    - not-available
+                  description: The quantization type of the running model.
+                  example: fp8
+                supportsFunctionCalling:
+                  type: boolean
+                  description: Does the LLM model support function calling?
+                  example: true
+                supportsReasoning:
+                  type: boolean
+                  description: Does the model support reasoning with <thinking> blocks of output.
+                  example: true
+                supportsResponseSchema:
+                  type: boolean
+                  description: Does the LLM model support response schema? Only models that
+                    support function calling can support response_schema.
+                  example: true
+                supportsVision:
+                  type: boolean
+                  description: Does the LLM support vision?
+                  example: true
+                supportsWebSearch:
+                  type: boolean
+                  description: Does the LLM model support web search?
+                  example: true
+                supportsLogProbs:
+                  type: boolean
+                  description: Does the LLM model support logprobs parameter?
+                  example: true
+              required:
+                - optimizedForCode
+                - quantization
+                - supportsFunctionCalling
+                - supportsReasoning
+                - supportsResponseSchema
+                - supportsVision
+                - supportsWebSearch
+                - supportsLogProbs
+              additionalProperties: false
+              description: Text model specific capabilities.
+            constraints:
+              anyOf:
+                - type: object
+                  properties:
+                    promptCharacterLimit:
+                      type: number
+                      description: The maximum supported prompt length.
+                      example: 2048
+                    steps:
+                      type: object
+                      properties:
+                        default:
+                          type: number
+                          description: The default steps value for the model
+                          example: 25
+                        max:
+                          type: number
+                          description: The maximum supported steps value for the model
+                          example: 50
+                      required:
+                        - default
+                        - max
+                    widthHeightDivisor:
+                      type: number
+                      description: The requested width and height of the image generation must be
+                        divisible by this value.
+                      example: 8
+                  required:
+                    - promptCharacterLimit
+                    - steps
+                    - widthHeightDivisor
+                  description: Constraints that apply to image models.
+                  title: Image Model Constraints
+                - type: object
+                  properties:
+                    temperature:
+                      type: object
+                      properties:
+                        default:
+                          type: number
+                          description: The default temperature value for the model
+                          example: 0.7
+                      required:
+                        - default
+                    top_p:
+                      type: object
+                      properties:
+                        default:
+                          type: number
+                          description: The default top_p value for the model
+                          example: 0.9
+                      required:
+                        - default
+                  required:
+                    - temperature
+                    - top_p
+                  description: Constraints that apply to text models.
+                  title: Text Model Constraints
+              description: Constraints that apply to this model.
+            name:
+              type: string
+              description: The name of the model.
+              example: Venice Uncensored 1.1
+            modelSource:
+              type: string
+              description: The source of the model, such as a URL to the model repository.
+              example: https://huggingface.co/cognitivecomputations/Dolphin-Mistral-24B-Venice-Edition
+            offline:
+              type: boolean
+              default: false
+              description: Is this model presently offline?
+              example: false
+            pricing:
+              anyOf:
+                - type: object
+                  properties:
+                    input:
+                      type: object
+                      properties:
+                        usd:
+                          type: number
+                          description: USD cost per million input tokens
+                          example: 0.7
+                        vcu:
+                          type: number
+                          description: VCU cost per million input tokens (deprecated - use Diem instead)
+                          deprecated: true
+                          example: 7
+                        diem:
+                          type: number
+                          description: Diem cost per million input tokens
+                          example: 7
+                      required:
+                        - usd
+                        - vcu
+                        - diem
+                    output:
+                      type: object
+                      properties:
+                        usd:
+                          type: number
+                          description: USD cost per million output tokens
+                          example: 2.8
+                        vcu:
+                          type: number
+                          description: VCU cost per million output tokens (deprecated - use Diem instead)
+                          deprecated: true
+                          example: 28
+                        diem:
+                          type: number
+                          description: Diem cost per million output tokens
+                          example: 28
+                      required:
+                        - usd
+                        - vcu
+                        - diem
+                  required:
+                    - input
+                    - output
+                  description: Token-based pricing for chat models
+                  title: LLM Model Pricing
+                - type: object
+                  properties:
+                    generation:
+                      type: object
+                      properties:
+                        usd:
+                          type: number
+                          description: USD cost per image generation
+                          example: 0.01
+                        vcu:
+                          type: number
+                          description: VCU cost per image generation (deprecated - use Diem instead)
+                          deprecated: true
+                          example: 0.1
+                        diem:
+                          type: number
+                          description: Diem cost per image generation
+                          example: 0.1
+                      required:
+                        - usd
+                        - vcu
+                        - diem
+                    upscale:
+                      type: object
+                      properties:
+                        2x:
+                          type: object
+                          properties:
+                            usd:
+                              type: number
+                              description: USD cost for 2x upscale
+                              example: 0.02
+                            vcu:
+                              type: number
+                              description: VCU cost for 2x upscale (deprecated - use Diem instead)
+                              deprecated: true
+                              example: 0.2
+                            diem:
+                              type: number
+                              description: Diem cost for 2x upscale
+                              example: 0.2
+                          required:
+                            - usd
+                            - vcu
+                            - diem
+                        4x:
+                          type: object
+                          properties:
+                            usd:
+                              type: number
+                              description: USD cost for 4x upscale
+                              example: 0.08
+                            vcu:
+                              type: number
+                              description: VCU cost for 4x upscale (deprecated - use Diem instead)
+                              deprecated: true
+                              example: 0.8
+                            diem:
+                              type: number
+                              description: Diem cost for 4x upscale
+                              example: 0.8
+                          required:
+                            - usd
+                            - vcu
+                            - diem
+                      required:
+                        - 2x
+                        - 4x
+                  required:
+                    - generation
+                    - upscale
+                  description: Pricing for image generation and upscaling
+                  title: Image Model Pricing
+                - type: object
+                  properties:
+                    input:
+                      type: object
+                      properties:
+                        usd:
+                          type: number
+                          description: USD cost per million input characters
+                          example: 3.5
+                        vcu:
+                          type: number
+                          description: VCU cost per million input characters (deprecated - use Diem
+                            instead)
+                          deprecated: true
+                          example: 35
+                        diem:
+                          type: number
+                          description: Diem cost per million input characters
+                          example: 35
+                      required:
+                        - usd
+                        - vcu
+                        - diem
+                  required:
+                    - input
+                  description: Pricing for audio models (TTS)
+                  title: Audio Model Pricing
+              description: Pricing details for the model
+            traits:
+              type: array
+              items:
+                type: string
+              description: Traits that apply to this model. You can specify a trait to
+                auto-select a model vs. specifying the model ID in your request
+                to avoid breakage as Venice updates and iterates on its models.
+              example:
+                - default_code
+            voices:
+              type: array
+              items:
+                type: string
+              description: The voices available for this TTS model. Only applicable for TTS
+                models.
+              example:
+                - af_alloy
+                - af_aoede
+                - af_bella
+                - af_heart
+                - af_jadzia
+        object:
+          type: string
+          enum:
+            - model
+          description: Object type
+          example: model
+        owned_by:
+          type: string
+          enum:
+            - venice.ai
+          description: Who runs the model
+          example: venice.ai
+        type:
+          type: string
+          enum:
+            - embedding
+            - image
+            - text
+            - tts
+            - upscale
+            - inpaint
+          description: Model type
+          example: text
+      required:
+        - id
+        - model_spec
+        - object
+        - owned_by
+        - type
+      description: Response schema for model information
+      example:
+        created: 1727966436
+        id: llama-3.2-3b
+        model_spec:
+          availableContextTokens: 131072
+          capabilities:
+            optimizedForCode: false
+            quantization: fp16
+            supportsFunctionCalling: true
+            supportsReasoning: false
+            supportsResponseSchema: true
+            supportsVision: false
+            supportsWebSearch: true
+            supportsLogProbs: true
+          constraints:
+            temperature:
+              default: 0.8
+            top_p:
+              default: 0.9
+          name: Llama 3.2 3B
+          modelSource: https://huggingface.co/meta-llama/Llama-3.2-3B
+          offline: false
+          pricing:
+            input:
+              usd: 0.15
+              vcu: 1.5
+              diem: 0.15
+            output:
+              usd: 0.6
+              vcu: 6
+              diem: 0.6
+          traits:
+            - fastest
+        object: model
+        owned_by: venice.ai
+        type: text
+    ModelTraitSchema:
+      type: object
+      additionalProperties:
+        type: string
+      description: List of available models
+      example:
+        default: llama-3.3-70b
+        fastest: llama-3.2-3b-akash
+    ModelCompatibilitySchema:
+      type: object
+      additionalProperties:
+        type: string
+      description: List of available models
+      example:
+        gpt-4o: llama-3.3-70b
+  parameters: {}
+paths:
+  /chat/completions:
+    post:
+      description: Run text inference based on the supplied parameters. Long running
+        requests should use the streaming API by setting stream=true in your
+        request.
+      operationId: createChatCompletion
+      parameters:
+        - description: Supported compression encodings (gzip, br). Only applied when
+            stream is false.
+          in: header
+          name: Accept-Encoding
+          required: false
+          schema:
+            example: gzip, br
+            type: string
+      summary: /api/v1/chat/completions
+      tags:
+        - Chat
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: "#/components/schemas/ChatCompletionRequest"
+      responses:
+        "200":
+          description: OK
+          headers:
+            Content-Encoding:
+              description: The encoding used to compress the response
+              schema:
+                enum:
+                  - gzip
+                  - br
+                type: string
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  choices:
+                    type: array
+                    items:
+                      type: object
+                      properties:
+                        finish_reason:
+                          type: string
+                          enum:
+                            - stop
+                            - length
+                          description: The reason the completion finished.
+                          example: stop
+                        index:
+                          type: integer
+                          description: The index of the choice in the list.
+                          example: 0
+                        logprobs:
+                          type: object
+                          nullable: true
+                          properties:
+                            bytes:
+                              type: array
+                              items:
+                                type: number
+                              description: Raw bytes of the token
+                              example:
+                                - 104
+                                - 101
+                                - 108
+                                - 108
+                                - 111
+                            logprob:
+                              type: number
+                              description: The log probability of this token
+                              example: -0.34
+                            token:
+                              type: string
+                              description: The token string
+                              example: hello
+                            top_logprobs:
+                              type: array
+                              items:
+                                type: object
+                                properties:
+                                  bytes:
+                                    type: array
+                                    items:
+                                      type: number
+                                  logprob:
+                                    type: number
+                                  token:
+                                    type: string
+                                required:
+                                  - logprob
+                                  - token
+                              description: Top tokens considered with their log probabilities
+                          required:
+                            - logprob
+                            - token
+                        message:
+                          anyOf:
+                            - type: object
+                              properties:
+                                content:
+                                  anyOf:
+                                    - type: string
+                                      title: String
+                                    - type: array
+                                      items:
+                                        type: object
+                                        properties:
+                                          text:
+                                            type: string
+                                            minLength: 1
+                                            description: The prompt text of the message. Must be at-least one character in
+                                              length
+                                            example: Why is the sky blue?
+                                            title: Text Content Object
+                                          type:
+                                            type: string
+                                            enum:
+                                              - text
+                                            title: Text Content String
+                                        required:
+                                          - text
+                                          - type
+                                        additionalProperties: false
+                                        description: Text message type.
+                                        example:
+                                          text: Why is the sky blue?
+                                          type: text
+                                        title: text
+                                      title: Objects
+                                    - nullable: true
+                                      title: "null"
+                                name:
+                                  type: string
+                                reasoning_content:
+                                  type: string
+                                  nullable: true
+                                role:
+                                  type: string
+                                  enum:
+                                    - assistant
+                                tool_calls:
+                                  type: array
+                                  items:
+                                    nullable: true
+                              required:
+                                - content
+                                - role
+                              description: The assistant message contains the response from the LLM.
+                              title: Assistant Message
+                            - type: object
+                              properties:
+                                content:
+                                  type: string
+                                name:
+                                  type: string
+                                reasoning_content:
+                                  type: string
+                                  nullable: true
+                                role:
+                                  type: string
+                                  enum:
+                                    - tool
+                                tool_call_id:
+                                  type: string
+                                tool_calls:
+                                  type: array
+                                  items:
+                                    nullable: true
+                              required:
+                                - content
+                                - role
+                                - tool_call_id
+                              description: The tool message is a special message that is used to call a tool.
+                                It is not part of the conversation and is not
+                                visible to the user.
+                              title: Tool Message
+                        stop_reason:
+                          type: string
+                          nullable: true
+                          enum:
+                            - stop
+                            - length
+                          description: The reason the completion stopped.
+                          example: stop
+                      required:
+                        - finish_reason
+                        - index
+                        - logprobs
+                        - message
+                    description: A list of chat completion choices. Can be more than one if n is
+                      greater than 1.
+                    example:
+                      - finish_reason: stop
+                        index: 0
+                        logprobs: null
+                        message:
+                          content: The sky appears blue because of the way Earth's atmosphere scatters
+                            sunlight. When sunlight reaches Earth's atmosphere,
+                            it is made up of various colors of the spectrum, but
+                            blue light waves are shorter and scatter more easily
+                            when they hit the gases and particles in the
+                            atmosphere. This scattering occurs in all
+                            directions, but from our perspective on the ground,
+                            it appears as a blue hue that dominates the sky's
+                            color. This phenomenon is known as Rayleigh
+                            scattering. During sunrise and sunset, the sunlight
+                            has to travel further through the atmosphere, which
+                            allows more time for the blue light to scatter away
+                            from our direct line of sight, leaving the longer
+                            wavelengths, such as red, yellow, and orange, to
+                            dominate the sky's color.
+                          reasoning_content: null
+                          role: assistant
+                          tool_calls: []
+                        stop_reason: null
+                  created:
+                    type: integer
+                    description: The time at which the request was created.
+                    example: 1677858240
+                  id:
+                    type: string
+                    description: The ID of the request.
+                    example: chatcmpl-abc123
+                  model:
+                    type: string
+                    description: The model id used for the request.
+                    example: venice-uncensored
+                  object:
+                    type: string
+                    enum:
+                      - chat.completion
+                    description: The type of the object returned.
+                    example: chat.completion
+                  prompt_logprobs:
+                    anyOf:
+                      - nullable: true
+                        title: "null"
+                      - type: object
+                        additionalProperties:
+                          nullable: true
+                      - nullable: true
+                        title: "null"
+                    description: Log probability information for the prompt.
+                  usage:
+                    type: object
+                    properties:
+                      completion_tokens:
+                        type: integer
+                        description: The number of tokens in the completion.
+                        example: 20
+                      prompt_tokens:
+                        type: integer
+                        description: The number of tokens in the prompt.
+                        example: 10
+                      prompt_tokens_details:
+                        type: object
+                        nullable: true
+                        properties: {}
+                        description: Breakdown of tokens used in the prompt. Not presently used by
+                          Venice.
+                      total_tokens:
+                        type: integer
+                        description: The total number of tokens used in the request.
+                        example: 30
+                    required:
+                      - completion_tokens
+                      - prompt_tokens
+                      - total_tokens
+                  venice_parameters:
+                    type: object
+                    properties:
+                      enable_web_search:
+                        type: string
+                        enum:
+                          - auto
+                          - off
+                          - on
+                        description: Did the request enable web search?
+                        example: auto
+                      enable_web_citations:
+                        type: boolean
+                        description: Did the request enable web citations?
+                        example: true
+                      include_venice_system_prompt:
+                        type: boolean
+                        description: Did the request include the Venice system prompt?
+                        example: true
+                      include_search_results_in_stream:
+                        type: boolean
+                        description: Did the request include search results in the stream?
+                        example: false
+                      character_slug:
+                        type: string
+                        description: The character slug of a public Venice character.
+                        example: venice
+                      strip_thinking_response:
+                        type: boolean
+                        description: Did the request strip thinking response?
+                        example: true
+                      disable_thinking:
+                        type: boolean
+                        description: Did the request disable thinking?
+                        example: true
+                      web_search_citations:
+                        type: array
+                        items:
+                          type: object
+                          properties:
+                            content:
+                              type: string
+                            date:
+                              type: string
+                            title:
+                              type: string
+                            url:
+                              type: string
+                          required:
+                            - title
+                            - url
+                        description: Citations from web search results.
+                        example:
+                          - content: >-
+                              What&#x27;s the scientific reason behind
+                              Earth&#x27;s sky appearing blue to the human eye?
+                              And what&#x27;s the real colour of the sky?
+                              Save 30% on the shop price when you subscribe to
+                              BBC Sky at Night Magazine today!
+                              In this article we'll look at the science behind
+                              why the sky is blue, or at least why it appears
+                              blue to our eyes.
+                              A beautiful blue sky is the sign of a pleasant day
+                              ahead. But what makes the sky appear blue?
+                              So, the sky appears blue because the molecules of
+                              nitrogen and oxygen in the atmosphere scatter
+                              light in short wavelengths towards the blue end of
+                              the visible spectrum.
+                            date: 2024-08-13T13:45:16
+                            title: Why is the sky blue? | BBC Sky at Night Magazine
+                            url: https://www.skyatnightmagazine.com/space-science/why-is-the-sky-blue
+                          - content: >-
+                              It was around 1870 when the British physicist John
+                              William Strutt, better known as Lord Rayleigh,
+                              first found an explanation for why the sky is
+                              blue: Blue light from the Sun is scattered the
+                              most when it passes through the atmosphere.
+                              Published: January 20, 2025 8:34am EST · Daniel
+                              Freedman, University of Wisconsin-Stout · Daniel
+                              Freedman · Dean of the College of Science,
+                              Technology, Engineering, Mathematics & Management,
+                              University of Wisconsin-Stout ·
+                              The answer has to do with molecules.
+                              It was around 1870 when the British physicist John
+                              William Strutt, better known as Lord Rayleigh,
+                              first found an explanation for why the sky is
+                              blue: Blue light from the Sun is scattered the
+                              most when it passes through the atmosphere.
+                              When the Sun is near the horizon, its light passes
+                              through a lot more of the atmosphere to reach the
+                              Earth’s surface than when it is directly overhead.
+                              The blue and green light is scattered so well that
+                              you can hardly see it. The sky is colored,
+                              instead, with red and orange light.
+                            date: 2025-04-16T16:55:11
+                            title: Why is the sky blue?
+                            url: https://theconversation.com/why-is-the-sky-blue-246393
+                    required:
+                      - enable_web_search
+                      - enable_web_citations
+                      - include_venice_system_prompt
+                      - include_search_results_in_stream
+                      - strip_thinking_response
+                      - disable_thinking
+                    description: Unique parameters to Venice's API implementation.
+                required:
+                  - choices
+                  - created
+                  - id
+                  - model
+                  - object
+                  - usage
+                example:
+                  choices:
+                    - finish_reason: stop
+                      index: 0
+                      logprobs: null
+                      message:
+                        content: The sky appears blue because of the way Earth's atmosphere scatters
+                          sunlight. When sunlight reaches Earth's atmosphere, it
+                          is made up of various colors of the spectrum, but blue
+                          light waves are shorter and scatter more easily when
+                          they hit the gases and particles in the atmosphere.
+                          This scattering occurs in all directions, but from our
+                          perspective on the ground, it appears as a blue hue
+                          that dominates the sky's color. This phenomenon is
+                          known as Rayleigh scattering. During sunrise and
+                          sunset, the sunlight has to travel further through the
+                          atmosphere, which allows more time for the blue light
+                          to scatter away from our direct line of sight, leaving
+                          the longer wavelengths, such as red, yellow, and
+                          orange, to dominate the sky's color.
+                        reasoning_content: null
+                        role: assistant
+                        tool_calls: []
+                      stop_reason: null
+                  created: 1739928524
+                  id: chatcmpl-a81fbc2d81a7a083bb83ccf9f44c6e5e
+                  model: qwen-2.5-vl
+                  object: chat.completion
+                  prompt_logprobs: null
+                  usage:
+                    completion_tokens: 146
+                    prompt_tokens: 612
+                    prompt_tokens_details: null
+                    total_tokens: 758
+                  venice_parameters:
+                    include_venice_system_prompt: true
+                    include_search_results_in_stream: false
+                    web_search_citations: []
+                    enable_web_search: auto
+                    enable_web_citations: true
+                    strip_thinking_response: true
+                    disable_thinking: true
+                    character_slug: venice
+        "400":
+          description: Invalid request parameters
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  details:
+                    type: object
+                    properties: {}
+                    description: Details about the incorrect input
+                    example:
+                      _errors: []
+                      field:
+                        _errors:
+                          - Field is required
+                  error:
+                    type: string
+                    description: A description of the error
+                    example: Invalid request parameters
+                required:
+                  - error
+        "401":
+          description: Authentication failed
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  error:
+                    type: string
+                    description: A description of the error
+                    example: Authentication failed
+                required:
+                  - error
+        "402":
+          description: Insufficient USD or Diem balance to complete request
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  error:
+                    type: string
+                    description: A description of the error
+                    example: Insufficient USD or Diem balance to complete request
+                required:
+                  - error
+        "415":
+          description: Invalid request content-type
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  error:
+                    type: string
+                    description: A description of the error
+                    example: Invalid request content-type
+                required:
+                  - error
+        "429":
+          description: Rate limit exceeded
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  error:
+                    type: string
+                    description: A description of the error
+                    example: Rate limit exceeded
+                required:
+                  - error
+        "500":
+          description: Inference processing failed
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  error:
+                    type: string
+                    description: A description of the error
+                    example: Inference processing failed
+                required:
+                  - error
+        "503":
+          description: The model is at capacity. Please try again later.
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  error:
+                    type: string
+                    description: A description of the error
+                    example: The model is at capacity. Please try again later.
+                required:
+                  - error
+        "504":
+          description: The request took too long to complete and was timed-out. For
+            long-running inference requests, use the streaming API by setting
+            stream=true in your request.
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  error:
+                    type: string
+                    description: A description of the error
+                    example: The request took too long to complete and was timed-out. For
+                      long-running inference requests, use the streaming API by
+                      setting stream=true in your request.
+                required:
+                  - error
+  /image/generate:
+    post:
+      description: Generate an image based on input parameters
+      operationId: generateImage
+      parameters:
+        - description: Supported compression encodings (gzip, br). Only applied when
+            return_binary is false.
+          in: header
+          name: Accept-Encoding
+          required: false
+          schema:
+            example: gzip, br
+            type: string
+      summary: /api/v1/image/generate
+      tags:
+        - Image
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: "#/components/schemas/GenerateImageRequest"
+      responses:
+        "200":
+          description: Successfully generated image
+          headers:
+            Content-Encoding:
+              description: The encoding used to compress the response
+              schema:
+                enum:
+                  - gzip
+                  - br
+                type: string
+            x-venice-is-blurred:
+              description: Indicates if the generated image is blurred. When Safe Venice is
+                enabled, adult material will be returned blurred.
+              required: false
+              schema:
+                type: boolean
+            x-venice-is-content-violation:
+              description: Indicates if the generated image does not meet Venice terms of
+                service.
+              required: false
+              schema:
+                type: boolean
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  id:
+                    type: string
+                    description: The ID of the request.
+                    example: generate-image-1234567890
+                  images:
+                    type: array
+                    items:
+                      type: string
+                    description: Base64 encoded image data.
+                  request:
+                    nullable: true
+                    description: The original request data sent to the API.
+                  timing:
+                    type: object
+                    properties:
+                      inferenceDuration:
+                        type: number
+                        description: Duration of inference in milliseconds
+                      inferencePreprocessingTime:
+                        type: number
+                        description: Duration of preprocessing in milliseconds
+                      inferenceQueueTime:
+                        type: number
+                        description: Duration of queueing in milliseconds
+                      total:
+                        type: number
+                        description: Total duration of the request in milliseconds
+                    required:
+                      - inferenceDuration
+                      - inferencePreprocessingTime
+                      - inferenceQueueTime
+                      - total
+                required:
+                  - id
+                  - images
+                  - timing
+            image/jpeg:
+              schema:
+                description: Raw image data when return_binary is true and format is jpeg
+                format: binary
+                type: string
+            image/png:
+              schema:
+                description: Raw image data when return_binary is true and format is png
+                format: binary
+                type: string
+            image/webp:
+              schema:
+                description: Raw image data when return_binary is true and format is webp
+                format: binary
+                type: string
+        "400":
+          description: Invalid request parameters
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  details:
+                    type: object
+                    properties: {}
+                    description: Details about the incorrect input
+                    example:
+                      _errors: []
+                      field:
+                        _errors:
+                          - Field is required
+                  error:
+                    type: string
+                    description: A description of the error
+                    example: Invalid request parameters
+                required:
+                  - error
+        "401":
+          description: Authentication failed
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  error:
+                    type: string
+                    description: A description of the error
+                    example: Authentication failed
+                required:
+                  - error
+        "402":
+          description: Insufficient USD or Diem balance to complete request
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  error:
+                    type: string
+                    description: A description of the error
+                    example: Insufficient USD or Diem balance to complete request
+                required:
+                  - error
+        "415":
+          description: Invalid request content-type
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  error:
+                    type: string
+                    description: A description of the error
+                    example: Invalid request content-type
+                required:
+                  - error
+        "429":
+          description: Rate limit exceeded
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  error:
+                    type: string
+                    description: A description of the error
+                    example: Rate limit exceeded
+                required:
+                  - error
+        "500":
+          description: Inference processing failed
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  error:
+                    type: string
+                    description: A description of the error
+                    example: Inference processing failed
+                required:
+                  - error
+        "503":
+          description: The model is at capacity. Please try again later.
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  error:
+                    type: string
+                    description: A description of the error
+                    example: The model is at capacity. Please try again later.
+                required:
+                  - error
+  /images/generations:
+    post:
+      description: Generate an image based on input parameters using an OpenAI
+        compatible endpoint. This endpoint does not support the full feature set
+        of the Venice Image Generation endpoint, but is compatible with the
+        existing OpenAI endpoint.
+      operationId: simpleGenerateImage
+      parameters:
+        - description: Supported compression encodings (gzip, br).
+          in: header
+          name: Accept-Encoding
+          required: false
+          schema:
+            example: gzip, br
+            type: string
+      summary: /api/v1/image/generations
+      tags:
+        - Image
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: "#/components/schemas/SimpleGenerateImageRequest"
+      responses:
+        "200":
+          description: Successfully generated image
+          headers:
+            Content-Encoding:
+              description: The encoding used to compress the response
+              schema:
+                enum:
+                  - gzip
+                  - br
+                type: string
+            x-venice-is-blurred:
+              description: Indicates if the generated image is blurred. When Safe Venice is
+                enabled, adult material will be returned blurred.
+              required: false
+              schema:
+                type: boolean
+            x-venice-is-content-violation:
+              description: Indicates if the generated image does not meet Venice terms of
+                service.
+              required: false
+              schema:
+                type: boolean
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  created:
+                    type: integer
+                    description: Unix timestamp for when the request was created
+                    example: 1713833628
+                  data:
+                    type: array
+                    items:
+                      anyOf:
+                        - type: object
+                          properties:
+                            b64_json:
+                              type: string
+                              description: Base64-encoded JSON string of the generated image
+                              example: iVBORw0KGgoAAAANSUhEUgAA...
+                          required:
+                            - b64_json
+                        - type: object
+                          properties:
+                            url:
+                              type: string
+                              description: Data URL of the generated image
+                              example: data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAA...
+                          required:
+                            - url
+                required:
+                  - created
+                  - data
+                additionalProperties: false
+        "400":
+          description: Invalid request parameters
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  details:
+                    type: object
+                    properties: {}
+                    description: Details about the incorrect input
+                    example:
+                      _errors: []
+                      field:
+                        _errors:
+                          - Field is required
+                  error:
+                    type: string
+                    description: A description of the error
+                    example: Invalid request parameters
+                required:
+                  - error
+        "401":
+          description: Authentication failed
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  error:
+                    type: string
+                    description: A description of the error
+                    example: Authentication failed
+                required:
+                  - error
+        "402":
+          description: Insufficient USD or Diem balance to complete request
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  error:
+                    type: string
+                    description: A description of the error
+                    example: Insufficient USD or Diem balance to complete request
+                required:
+                  - error
+        "415":
+          description: Invalid request content-type
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  error:
+                    type: string
+                    description: A description of the error
+                    example: Invalid request content-type
+                required:
+                  - error
+        "429":
+          description: Rate limit exceeded
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  error:
+                    type: string
+                    description: A description of the error
+                    example: Rate limit exceeded
+                required:
+                  - error
+        "500":
+          description: Inference processing failed
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  error:
+                    type: string
+                    description: A description of the error
+                    example: Inference processing failed
+                required:
+                  - error
+        "503":
+          description: The model is at capacity. Please try again later.
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  error:
+                    type: string
+                    description: A description of the error
+                    example: The model is at capacity. Please try again later.
+                required:
+                  - error
+  /image/styles:
+    get:
+      description: List available image styles that can be used with the generate API.
+      security:
+        - {}
+        - BearerAuth: []
+      summary: /api/v1/image/styles
+      tags:
+        - Image
+      responses:
+        "200":
+          description: OK
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  data:
+                    type: array
+                    items:
+                      type: string
+                    description: List of available image styles
+                    example:
+                      - 3D Model
+                      - Analog Film
+                      - Anime
+                      - Cinematic
+                      - Comic Book
+                  object:
+                    type: string
+                    enum:
+                      - list
+                required:
+                  - data
+                  - object
+        "401":
+          description: Authentication failed
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  error:
+                    type: string
+                    description: A description of the error
+                    example: Authentication failed
+                required:
+                  - error
+        "500":
+          description: An unknown error occurred
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  error:
+                    type: string
+                    description: A description of the error
+                    example: An unknown error occurred
+                required:
+                  - error
+  /image/upscale:
+    post:
+      description: Upscale or enhance an image based on the supplied parameters. Using
+        a scale of 1 with enhance enabled will only run the enhancer. The image
+        can be provided either as a multipart form-data file upload or as a
+        base64-encoded string in a JSON request.
+      operationId: upscaleImage
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: "#/components/schemas/UpscaleImageRequest"
+          multipart/form-data:
+            schema:
+              $ref: "#/components/schemas/UpscaleImageRequest"
+      summary: /api/v1/image/upscale
+      tags:
+        - Image
+      responses:
+        "200":
+          description: OK
+          content:
+            image/png:
+              schema:
+                format: binary
+                type: string
+        "400":
+          description: Invalid request parameters
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  details:
+                    type: object
+                    properties: {}
+                    description: Details about the incorrect input
+                    example:
+                      _errors: []
+                      field:
+                        _errors:
+                          - Field is required
+                  error:
+                    type: string
+                    description: A description of the error
+                    example: Invalid request parameters
+                required:
+                  - error
+        "401":
+          description: Authentication failed
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  error:
+                    type: string
+                    description: A description of the error
+                    example: Authentication failed
+                required:
+                  - error
+        "402":
+          description: Insufficient USD or Diem balance to complete request
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  error:
+                    type: string
+                    description: A description of the error
+                    example: Insufficient USD or Diem balance to complete request
+                required:
+                  - error
+        "415":
+          description: Invalid request content-type
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  error:
+                    type: string
+                    description: A description of the error
+                    example: Invalid request content-type
+                required:
+                  - error
+        "429":
+          description: Rate limit exceeded
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  error:
+                    type: string
+                    description: A description of the error
+                    example: Rate limit exceeded
+                required:
+                  - error
+        "500":
+          description: Inference processing failed
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  error:
+                    type: string
+                    description: A description of the error
+                    example: Inference processing failed
+                required:
+                  - error
+        "503":
+          description: The model is at capacity. Please try again later.
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  error:
+                    type: string
+                    description: A description of the error
+                    example: The model is at capacity. Please try again later.
+                required:
+                  - error
+  /image/edit:
+    post:
+      description: Edit or modify an image based on the supplied prompt. The image can
+        be provided either as a multipart form-data file upload or as a
+        base64-encoded string in a JSON request.
+      operationId: editImage
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: "#/components/schemas/EditImageRequest"
+          multipart/form-data:
+            schema:
+              $ref: "#/components/schemas/EditImageRequest"
+      summary: /api/v1/image/edit
+      tags:
+        - Image
+      responses:
+        "200":
+          description: OK
+          content:
+            image/png:
+              schema:
+                format: binary
+                type: string
+        "400":
+          description: Invalid request parameters
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  details:
+                    type: object
+                    properties: {}
+                    description: Details about the incorrect input
+                    example:
+                      _errors: []
+                      field:
+                        _errors:
+                          - Field is required
+                  error:
+                    type: string
+                    description: A description of the error
+                    example: Invalid request parameters
+                required:
+                  - error
+        "401":
+          description: Authentication failed
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  error:
+                    type: string
+                    description: A description of the error
+                    example: Authentication failed
+                required:
+                  - error
+        "402":
+          description: Insufficient USD or Diem balance to complete request
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  error:
+                    type: string
+                    description: A description of the error
+                    example: Insufficient USD or Diem balance to complete request
+                required:
+                  - error
+        "415":
+          description: Invalid request content-type
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  error:
+                    type: string
+                    description: A description of the error
+                    example: Invalid request content-type
+                required:
+                  - error
+        "429":
+          description: Rate limit exceeded
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  error:
+                    type: string
+                    description: A description of the error
+                    example: Rate limit exceeded
+                required:
+                  - error
+        "500":
+          description: Inference processing failed
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  error:
+                    type: string
+                    description: A description of the error
+                    example: Inference processing failed
+                required:
+                  - error
+        "503":
+          description: The model is at capacity. Please try again later.
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  error:
+                    type: string
+                    description: A description of the error
+                    example: The model is at capacity. Please try again later.
+                required:
+                  - error
+  /models:
+    get:
+      description: Returns a list of available models supported by the Venice.ai API
+        for both text and image inference.
+      operationId: listModels
+      security:
+        - {}
+        - BearerAuth: []
+      summary: /api/v1/models
+      tags:
+        - Models
+      parameters:
+        - schema:
+            anyOf:
+              - type: string
+                enum:
+                  - embedding
+                  - image
+                  - text
+                  - tts
+                  - upscale
+                  - inpaint
+              - type: string
+                enum:
+                  - all
+                  - code
+            description: Filter models by type. Use "all" to get all model types.
+            example: text
+          required: false
+          name: type
+          in: query
+      responses:
+        "200":
+          description: OK
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  data:
+                    type: array
+                    items:
+                      $ref: "#/components/schemas/ModelResponse"
+                    description: List of available models
+                  object:
+                    type: string
+                    enum:
+                      - list
+                  type:
+                    anyOf:
+                      - type: string
+                        enum:
+                          - embedding
+                          - image
+                          - text
+                          - tts
+                          - upscale
+                          - inpaint
+                      - type: string
+                        enum:
+                          - all
+                          - code
+                    description: Type of models returned.
+                    example: text
+                required:
+                  - data
+                  - object
+                  - type
+        "500":
+          description: An unknown error occurred
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  error:
+                    type: string
+                    description: A description of the error
+                    example: An unknown error occurred
+                required:
+                  - error
+  /models/traits:
+    get:
+      description: Returns a list of model traits and the associated model.
+      operationId: listModelTraits
+      security:
+        - {}
+        - BearerAuth: []
+      summary: /api/v1/models/traits
+      tags:
+        - Models
+      parameters:
+        - schema:
+            type: string
+            enum:
+              - embedding
+              - image
+              - text
+              - tts
+              - upscale
+              - inpaint
+            default: text
+            description: Filter models by type.
+            example: text
+          required: false
+          name: type
+          in: query
+      responses:
+        "200":
+          description: OK
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  data:
+                    $ref: "#/components/schemas/ModelTraitSchema"
+                  object:
+                    type: string
+                    enum:
+                      - list
+                  type:
+                    anyOf:
+                      - type: string
+                        enum:
+                          - embedding
+                          - image
+                          - text
+                          - tts
+                          - upscale
+                          - inpaint
+                      - type: string
+                        enum:
+                          - all
+                          - code
+                    description: Type of models returned.
+                    example: text
+                required:
+                  - data
+                  - object
+                  - type
+        "401":
+          description: Authentication failed
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  error:
+                    type: string
+                    description: A description of the error
+                    example: Authentication failed
+                required:
+                  - error
+        "500":
+          description: An unknown error occurred
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  error:
+                    type: string
+                    description: A description of the error
+                    example: An unknown error occurred
+                required:
+                  - error
+  /models/compatibility_mapping:
+    get:
+      description: Returns a list of model compatibility mappings and the associated model.
+      operationId: listModelCompatibilityMapping
+      security:
+        - {}
+        - BearerAuth: []
+      summary: /api/v1/models/compatibility_mapping
+      tags:
+        - Models
+      parameters:
+        - schema:
+            type: string
+            enum:
+              - embedding
+              - image
+              - text
+              - tts
+              - upscale
+              - inpaint
+            default: text
+            description: Filter models by type.
+            example: text
+          required: false
+          name: type
+          in: query
+      responses:
+        "200":
+          description: OK
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  data:
+                    $ref: "#/components/schemas/ModelCompatibilitySchema"
+                  object:
+                    type: string
+                    enum:
+                      - list
+                  type:
+                    anyOf:
+                      - type: string
+                        enum:
+                          - embedding
+                          - image
+                          - text
+                          - tts
+                          - upscale
+                          - inpaint
+                      - type: string
+                        enum:
+                          - all
+                          - code
+                    description: Type of models returned.
+                    example: text
+                required:
+                  - data
+                  - object
+                  - type
+        "401":
+          description: Authentication failed
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  error:
+                    type: string
+                    description: A description of the error
+                    example: Authentication failed
+                required:
+                  - error
+        "500":
+          description: An unknown error occurred
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  error:
+                    type: string
+                    description: A description of the error
+                    example: An unknown error occurred
+                required:
+                  - error
+  /api_keys:
+    get:
+      description: Return a list of API keys.
+      operationId: getApiKeys
+      summary: /api/v1/api_keys
+      tags:
+        - API Keys
+      responses:
+        "200":
+          description: OK
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  data:
+                    type: array
+                    items:
+                      type: object
+                      properties:
+                        apiKeyType:
+                          type: string
+                          enum:
+                            - INFERENCE
+                            - ADMIN
+                          description: API Key type
+                          example: ADMIN
+                        consumptionLimits:
+                          type: object
+                          properties:
+                            usd:
+                              anyOf:
+                                - type: number
+                                  minimum: 0
+                                - nullable: true
+                                  title: "null"
+                                - nullable: true
+                                  title: "null"
+                              description: USD limit
+                              example: 50
+                            diem:
+                              anyOf:
+                                - type: number
+                                  minimum: 0
+                                - nullable: true
+                                  title: "null"
+                                - nullable: true
+                                  title: "null"
+                              description: Diem limit
+                              example: 10
+                            vcu:
+                              anyOf:
+                                - type: number
+                                  minimum: 0
+                                - nullable: true
+                                  title: "null"
+                                - nullable: true
+                                  title: "null"
+                              description: VCU limit (deprecated - use Diem instead)
+                              deprecated: true
+                              example: 100
+                          description: The API Key consumption limits for each epoch.
+                          example:
+                            usd: 50
+                            diem: 10
+                        createdAt:
+                          type: string
+                          nullable: true
+                          description: API Key creation date
+                          example: 2023-10-01T12:00:00Z
+                        description:
+                          type: string
+                          description: API Key description
+                          example: Example API Key
+                        expiresAt:
+                          type: string
+                          nullable: true
+                          description: API Key expiration date
+                          example: 2023-10-01T12:00:00Z
+                        id:
+                          type: string
+                          description: API Key ID
+                          example: e28e82dc-9df2-4b47-b726-d0a222ef2ab5
+                        last6Chars:
+                          type: string
+                          description: Last 6 characters of the API Key
+                          example: 2V2jNW
+                        lastUsedAt:
+                          type: string
+                          nullable: true
+                          description: API Key last used date
+                          example: 2023-10-01T12:00:00Z
+                        usage:
+                          type: object
+                          properties:
+                            trailingSevenDays:
+                              type: object
+                              properties:
+                                usd:
+                                  type: string
+                                  description: USD usage in the trailing 7 days
+                                  example: "10.2424"
+                                vcu:
+                                  type: string
+                                  description: VCU usage in the trailing 7 days (deprecated - use Diem instead)
+                                  deprecated: true
+                                  example: "42.2315"
+                                diem:
+                                  type: string
+                                  description: Diem usage in the trailing 7 days
+                                  example: "4.2231"
+                              required:
+                                - usd
+                                - vcu
+                                - diem
+                          required:
+                            - trailingSevenDays
+                      required:
+                        - apiKeyType
+                        - consumptionLimits
+                        - createdAt
+                        - expiresAt
+                        - id
+                        - last6Chars
+                        - lastUsedAt
+                      additionalProperties: false
+                    description: List of active API keys
+                  object:
+                    type: string
+                    enum:
+                      - list
+                required:
+                  - data
+                  - object
+                additionalProperties: false
+        "401":
+          description: Authentication failed
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  error:
+                    type: string
+                    description: A description of the error
+                    example: Authentication failed
+                required:
+                  - error
+        "500":
+          description: An unknown error occurred
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  error:
+                    type: string
+                    description: A description of the error
+                    example: An unknown error occurred
+                required:
+                  - error
+    delete:
+      description: Delete an API key.
+      operationId: deleteApiKey
+      parameters:
+        - description: The ID of the API key to delete
+          in: query
+          name: id
+          required: false
+          schema:
+            type: string
+      summary: /api/v1/api_keys
+      tags:
+        - API Keys
+      responses:
+        "200":
+          description: OK
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  success:
+                    type: boolean
+                required:
+                  - success
+                additionalProperties: false
+        "400":
+          description: Invalid request parameters
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  details:
+                    type: object
+                    properties: {}
+                    description: Details about the incorrect input
+                    example:
+                      _errors: []
+                      field:
+                        _errors:
+                          - Field is required
+                  error:
+                    type: string
+                    description: A description of the error
+                    example: Invalid request parameters
+                required:
+                  - error
+        "401":
+          description: Authentication failed
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  error:
+                    type: string
+                    description: A description of the error
+                    example: Authentication failed
+                required:
+                  - error
+        "500":
+          description: An unknown error occurred
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  error:
+                    type: string
+                    description: A description of the error
+                    example: An unknown error occurred
+                required:
+                  - error
+    post:
+      description: Create a new API key.
+      operationId: createApiKey
+      summary: /api/v1/api_keys
+      tags:
+        - API Keys
+      requestBody:
+        content:
+          application/json:
+            schema:
+              type: object
+              properties:
+                apiKeyType:
+                  type: string
+                  enum:
+                    - INFERENCE
+                    - ADMIN
+                  description: The API Key type. Admin keys have full access to the API while
+                    inference keys are only able to call inference endpoints.
+                  example: ADMIN
+                consumptionLimit:
+                  type: object
+                  properties:
+                    usd:
+                      anyOf:
+                        - type: number
+                          minimum: 0
+                        - nullable: true
+                          title: "null"
+                        - nullable: true
+                          title: "null"
+                      description: USD limit
+                      example: 50
+                    diem:
+                      anyOf:
+                        - type: number
+                          minimum: 0
+                        - nullable: true
+                          title: "null"
+                        - nullable: true
+                          title: "null"
+                      description: Diem limit
+                      example: 10
+                    vcu:
+                      anyOf:
+                        - type: number
+                          minimum: 0
+                        - nullable: true
+                          title: "null"
+                        - nullable: true
+                          title: "null"
+                      description: VCU limit (deprecated - use Diem instead)
+                      deprecated: true
+                      example: 100
+                  description: The API Key consumption limits for each epoch.
+                  example:
+                    usd: 50
+                    diem: 10
+                description:
+                  type: string
+                  description: The API Key description
+                  example: Example API Key
+                expiresAt:
+                  anyOf:
+                    - type: string
+                      enum:
+                        - ""
+                    - type: string
+                      pattern: ^\d{4}-\d{2}-\d{2}$
+                    - type: string
+                      pattern: ^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d{3})?Z$
+                  description: The API Key expiration date. If not provided, the key will not
+                    expire.
+                  example: 2023-10-01T12:00:00Z
+              required:
+                - apiKeyType
+                - description
+              additionalProperties: false
+              description: The request body for creating a new API key. API key creation is
+                rate limited to 20 requests per minute and a maximum of 500
+                active API keys per user. VCU is being deprecated in favor of
+                Diem. Please update your API calls to use Diem instead.
+      responses:
+        "200":
+          description: OK
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  data:
+                    type: object
+                    properties:
+                      apiKey:
+                        type: string
+                        description: The API Key. This is only shown once, so make sure to save it
+                          somewhere safe.
+                      apiKeyType:
+                        type: string
+                        enum:
+                          - INFERENCE
+                          - ADMIN
+                        description: The API Key type
+                        example: ADMIN
+                      consumptionLimit:
+                        type: object
+                        properties:
+                          usd:
+                            anyOf:
+                              - type: number
+                                minimum: 0
+                              - nullable: true
+                                title: "null"
+                              - nullable: true
+                                title: "null"
+                            description: USD limit
+                            example: 50
+                          diem:
+                            anyOf:
+                              - type: number
+                                minimum: 0
+                              - nullable: true
+                                title: "null"
+                              - nullable: true
+                                title: "null"
+                            description: Diem limit
+                            example: 10
+                          vcu:
+                            anyOf:
+                              - type: number
+                                minimum: 0
+                              - nullable: true
+                                title: "null"
+                              - nullable: true
+                                title: "null"
+                            description: VCU limit (deprecated - use Diem instead)
+                            deprecated: true
+                            example: 100
+                        description: The API Key consumption limits for each epoch.
+                        example:
+                          usd: 50
+                          diem: 10
+                      description:
+                        type: string
+                        description: The API Key description
+                        example: Example API Key
+                      expiresAt:
+                        type: string
+                        nullable: true
+                        description: The API Key expiration date
+                        example: 2023-10-01T12:00:00Z
+                      id:
+                        type: string
+                        description: The API Key ID
+                        example: e28e82dc-9df2-4b47-b726-d0a222ef2ab5
+                    required:
+                      - apiKey
+                      - apiKeyType
+                      - consumptionLimit
+                      - expiresAt
+                      - id
+                    additionalProperties: false
+                  success:
+                    type: boolean
+                required:
+                  - data
+                  - success
+                additionalProperties: false
+        "400":
+          description: Invalid request parameters
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  details:
+                    type: object
+                    properties: {}
+                    description: Details about the incorrect input
+                    example:
+                      _errors: []
+                      field:
+                        _errors:
+                          - Field is required
+                  error:
+                    type: string
+                    description: A description of the error
+                    example: Invalid request parameters
+                required:
+                  - error
+        "401":
+          description: Authentication failed
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  error:
+                    type: string
+                    description: A description of the error
+                    example: Authentication failed
+                required:
+                  - error
+        "429":
+          description: Rate limit exceeded
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  error:
+                    type: string
+                    description: A description of the error
+                    example: Rate limit exceeded
+                required:
+                  - error
+        "500":
+          description: An unknown error occurred
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  error:
+                    type: string
+                    description: A description of the error
+                    example: An unknown error occurred
+                required:
+                  - error
+  /api_keys/rate_limits:
+    get:
+      description: Return details about user balances and rate limits.
+      operationId: getApiKeyRateLimits
+      summary: /api/v1/api_keys/rate_limits
+      tags:
+        - API Keys
+      responses:
+        "200":
+          description: OK
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  data:
+                    type: object
+                    properties:
+                      accessPermitted:
+                        type: boolean
+                        description: Does the API key have access to consume the inference APIs?
+                        example: true
+                      apiTier:
+                        type: object
+                        properties:
+                          id:
+                            type: string
+                            description: The ID of the API tier.
+                            example: paid
+                          isCharged:
+                            type: boolean
+                            description: Is the API key pay per use (in Diem or USD).
+                            example: true
+                        required:
+                          - id
+                          - isCharged
+                      balances:
+                        type: object
+                        properties:
+                          USD:
+                            type: number
+                            description: The USD balance of the key.
+                            example: 50.23
+                          VCU:
+                            type: number
+                            description: The VCU balance of the key. VCU is being deprecated in favor of
+                              Diem.
+                            example: 100.023
+                          DIEM:
+                            type: number
+                            description: The Diem balance of the key.
+                            example: 100.023
+                      keyExpiration:
+                        type: string
+                        nullable: true
+                        description: The timestamp the API key expires. If null, the key never expires.
+                        example: 2025-06-01T00:00:00Z
+                      nextEpochBegins:
+                        type: string
+                        description: The timestamp when the next epoch begins. This is relevant for rate
+                          limits that reset at the start of each epoch.
+                        example: 2025-05-07T00:00:00.000Z
+                      rateLimits:
+                        type: array
+                        items:
+                          type: object
+                          properties:
+                            apiModelId:
+                              type: string
+                              description: The ID of the API model.
+                              example: venice-uncensored
+                            rateLimits:
+                              type: array
+                              items:
+                                type: object
+                                properties:
+                                  amount:
+                                    type: number
+                                    description: The rate limit for the API model.
+                                    example: 100
+                                  type:
+                                    type: string
+                                    description: The time period for the rate limit. Can be Requests Per Minute
+                                      (RPM), Requests Per Day (RPD), or Tokens
+                                      Per Minute (TPM).
+                                    example: RPM
+                                required:
+                                  - amount
+                                  - type
+                          required:
+                            - rateLimits
+                    required:
+                      - accessPermitted
+                      - apiTier
+                      - balances
+                      - keyExpiration
+                      - nextEpochBegins
+                      - rateLimits
+                required:
+                  - data
+        "401":
+          description: Authentication failed
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  error:
+                    type: string
+                    description: A description of the error
+                    example: Authentication failed
+                required:
+                  - error
+        "500":
+          description: An unknown error occurred
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  error:
+                    type: string
+                    description: A description of the error
+                    example: An unknown error occurred
+                required:
+                  - error
+  /api_keys/rate_limits/log:
+    get:
+      description: Returns the last 50 rate limits that the account exceeded.
+      operationId: getApiKeyRateLimitLogs
+      summary: /api/v1/api_keys/rate_limits/log
+      tags:
+        - API Keys
+      responses:
+        "200":
+          description: OK
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  data:
+                    type: array
+                    items:
+                      type: object
+                      properties:
+                        apiKeyId:
+                          type: string
+                          description: The ID of the API key that exceeded the limit.
+                        modelId:
+                          type: string
+                          default: venice-uncensored
+                          description: The ID of the model that was used when the rate limit was exceeded.
+                        rateLimitTier:
+                          type: string
+                          description: The API tier of the rate limit.
+                          example: paid
+                        rateLimitType:
+                          type: string
+                          description: The type of rate limit that was exceeded.
+                          example: RPM
+                        timestamp:
+                          type: string
+                          description: The timestamp when the rate limit was exceeded.
+                          example: 2023-10-01T12:00:00Z
+                      required:
+                        - apiKeyId
+                        - modelId
+                        - rateLimitTier
+                        - rateLimitType
+                        - timestamp
+                      additionalProperties: false
+                    description: The last 50 rate limit logs for the account.
+                  object:
+                    type: string
+                    enum:
+                      - list
+                required:
+                  - data
+                  - object
+                additionalProperties: false
+        "401":
+          description: Authentication failed
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  error:
+                    type: string
+                    description: A description of the error
+                    example: Authentication failed
+                required:
+                  - error
+        "500":
+          description: An unknown error occurred
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  error:
+                    type: string
+                    description: A description of the error
+                    example: An unknown error occurred
+                required:
+                  - error
+  /api_keys/generate_web3_key:
+    get:
+      description: Returns the token required to generate an API key via a wallet.
+      operationId: getApiKeyGenerateWeb3Key
+      security: []
+      summary: /api/v1/api_keys/generate_web3_key
+      tags:
+        - API Keys
+      responses:
+        "200":
+          description: OK
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  data:
+                    type: object
+                    properties:
+                      token:
+                        type: string
+                        description: The token to sign with the wallet
+                        example: eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ.SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c
+                    required:
+                      - token
+                  success:
+                    type: boolean
+                required:
+                  - data
+                  - success
+    post:
+      description: Authenticates a wallet holding sVVV and creates an API key.
+      operationId: postApiKeyGenerateWeb3Key
+      security: []
+      summary: /api/v1/api_keys/generate_web3_key
+      tags:
+        - API Keys
+      requestBody:
+        content:
+          application/json:
+            schema:
+              type: object
+              properties:
+                apiKeyType:
+                  type: string
+                  enum:
+                    - INFERENCE
+                    - ADMIN
+                  description: The API Key type. Admin keys have full access to the API while
+                    inference keys are only able to call inference endpoints.
+                  example: ADMIN
+                consumptionLimit:
+                  type: object
+                  properties:
+                    usd:
+                      anyOf:
+                        - type: number
+                          minimum: 0
+                        - nullable: true
+                          title: "null"
+                        - nullable: true
+                          title: "null"
+                      description: USD limit
+                      example: 50
+                    diem:
+                      anyOf:
+                        - type: number
+                          minimum: 0
+                        - nullable: true
+                          title: "null"
+                        - nullable: true
+                          title: "null"
+                      description: Diem limit
+                      example: 10
+                    vcu:
+                      anyOf:
+                        - type: number
+                          minimum: 0
+                        - nullable: true
+                          title: "null"
+                        - nullable: true
+                          title: "null"
+                      description: VCU limit (deprecated - use Diem instead)
+                      deprecated: true
+                      example: 100
+                  description: The API Key consumption limits for each epoch.
+                  example:
+                    usd: 50
+                    diem: 10
+                description:
+                  type: string
+                  default: Web3 API Key
+                  description: The API Key description
+                  example: Web3 API Key
+                expiresAt:
+                  anyOf:
+                    - type: string
+                      enum:
+                        - ""
+                    - type: string
+                      pattern: ^\d{4}-\d{2}-\d{2}$
+                    - type: string
+                      pattern: ^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d{3})?Z$
+                  description: The API Key expiration date. If not provided, the key will not
+                    expire.
+                  example: 2023-10-01T12:00:00Z
+                address:
+                  type: string
+                  description: The wallet's address
+                  example: "0x45B73055F3aDcC4577Bb709db10B19d11b5c94eE"
+                signature:
+                  type: string
+                  description: The token, signed with the wallet's private key
+                  example: "0xbb5ff2e177f3a97fa553057864ad892eb64120f3eaf9356b4742a10f9a068d42725\
+                    de895b5e45160b679cbe6961dc4cb552ba10dc97bdd8258d9154810785c\
+                    451c"
+                token:
+                  type: string
+                  description: The token obtained from
+                    https://api.venice.ai/api/v1/api_keys/generate_web3_key
+                  example: eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ.SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c
+              required:
+                - apiKeyType
+                - address
+                - signature
+                - token
+              additionalProperties: false
+      responses:
+        "200":
+          description: OK
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  data:
+                    type: object
+                    properties:
+                      apiKey:
+                        type: string
+                        description: The API Key. This is only shown once, so make sure to save it
+                          somewhere safe.
+                      apiKeyType:
+                        type: string
+                        enum:
+                          - INFERENCE
+                          - ADMIN
+                        description: The API Key type
+                        example: ADMIN
+                      consumptionLimit:
+                        type: object
+                        properties:
+                          usd:
+                            anyOf:
+                              - type: number
+                                minimum: 0
+                              - nullable: true
+                                title: "null"
+                              - nullable: true
+                                title: "null"
+                            description: USD limit
+                            example: 50
+                          diem:
+                            anyOf:
+                              - type: number
+                                minimum: 0
+                              - nullable: true
+                                title: "null"
+                              - nullable: true
+                                title: "null"
+                            description: Diem limit
+                            example: 10
+                          vcu:
+                            anyOf:
+                              - type: number
+                                minimum: 0
+                              - nullable: true
+                                title: "null"
+                              - nullable: true
+                                title: "null"
+                            description: VCU limit (deprecated - use Diem instead)
+                            deprecated: true
+                            example: 100
+                        description: The API Key consumption limits for each epoch.
+                        example:
+                          usd: 50
+                          diem: 10
+                      description:
+                        type: string
+                        description: The API Key description
+                        example: Example API Key
+                      expiresAt:
+                        type: string
+                        nullable: true
+                        description: The API Key expiration date
+                        example: 2023-10-01T12:00:00Z
+                      id:
+                        type: string
+                        description: The API Key ID
+                        example: e28e82dc-9df2-4b47-b726-d0a222ef2ab5
+                    required:
+                      - apiKey
+                      - apiKeyType
+                      - consumptionLimit
+                      - expiresAt
+                      - id
+                    additionalProperties: false
+                  success:
+                    type: boolean
+                required:
+                  - data
+                  - success
+                additionalProperties: false
+  /characters:
+    get:
+      description: This is a preview API and may change. Returns a list of characters
+        supported in the API.
+      operationId: listCharacters
+      summary: /api/v1/characters
+      tags:
+        - Characters
+        - Preview
+      responses:
+        "200":
+          description: OK
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  data:
+                    type: array
+                    items:
+                      type: object
+                      properties:
+                        adult:
+                          type: boolean
+                          description: Whether the character is considered adult content
+                          example: false
+                        createdAt:
+                          type: string
+                          description: Date when the character was created
+                          example: 2024-12-20T21:28:08.934Z
+                        description:
+                          type: string
+                          nullable: true
+                          description: Description of the character
+                          example: Alan Watts (6 January 1915 – 16 November 1973) was a British and
+                            American writer, speaker, and self-styled
+                            "philosophical entertainer", known for interpreting
+                            and popularizing Buddhist, Taoist, and Hindu
+                            philosophy for a Western audience.
+                        name:
+                          type: string
+                          description: Name of the character
+                          example: Alan Watts
+                        shareUrl:
+                          type: string
+                          nullable: true
+                          description: Share URL of the character
+                          example: https://venice.ai/c/alan-watts
+                        slug:
+                          type: string
+                          description: Slug of the character to be used in the completions API
+                          example: alan-watts
+                        stats:
+                          type: object
+                          properties:
+                            imports:
+                              type: number
+                              description: Number of imports for the character
+                              example: 112
+                          required:
+                            - imports
+                        tags:
+                          type: array
+                          items:
+                            type: string
+                          description: Tags associated with the character
+                          example:
+                            - AlanWatts
+                            - Philosophy
+                            - Buddhism
+                            - Taoist
+                            - Hindu
+                        updatedAt:
+                          type: string
+                          description: Date when the character was last updated
+                          example: 2025-02-09T03:23:53.708Z
+                        webEnabled:
+                          type: boolean
+                          description: Whether the character is enabled for web use
+                          example: true
+                      required:
+                        - adult
+                        - createdAt
+                        - description
+                        - name
+                        - shareUrl
+                        - slug
+                        - stats
+                        - tags
+                        - updatedAt
+                        - webEnabled
+                  object:
+                    type: string
+                    enum:
+                      - list
+                required:
+                  - data
+                  - object
+        "401":
+          description: Authentication failed
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  error:
+                    type: string
+                    description: A description of the error
+                    example: Authentication failed
+                required:
+                  - error
+        "500":
+          description: An unknown error occurred
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  error:
+                    type: string
+                    description: A description of the error
+                    example: An unknown error occurred
+                required:
+                  - error
+  /embeddings:
+    post:
+      description: Create embeddings for the supplied input.
+      operationId: createEmbedding
+      parameters:
+        - description: Supported compression encodings (gzip, br)
+          in: header
+          name: Accept-Encoding
+          required: false
+          schema:
+            example: gzip, br
+            type: string
+      summary: /api/v1/embeddings
+      tags:
+        - Embeddings
+      requestBody:
+        required: true
+        content:
+          application/json:
+            schema:
+              $ref: "#/components/schemas/CreateEmbeddingRequestSchema"
+      responses:
+        "200":
+          description: OK
+          headers:
+            Content-Encoding:
+              description: The encoding used to compress the response
+              schema:
+                enum:
+                  - gzip
+                  - br
+                type: string
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  data:
+                    type: array
+                    items:
+                      type: object
+                      properties:
+                        embedding:
+                          type: array
+                          items:
+                            type: number
+                          description: The embedding vector
+                        index:
+                          type: integer
+                          description: The index of this embedding in the list
+                        object:
+                          type: string
+                          enum:
+                            - embedding
+                          description: The object type, which is always "embedding"
+                      required:
+                        - embedding
+                        - index
+                        - object
+                    description: The list of embeddings generated by the model.
+                  model:
+                    type: string
+                    description: The name of the model used to generate the embedding.
+                  object:
+                    type: string
+                    enum:
+                      - list
+                    description: The object type, which is always "list"
+                  usage:
+                    type: object
+                    properties:
+                      prompt_tokens:
+                        type: integer
+                        description: The number of tokens used by the prompt.
+                      total_tokens:
+                        type: integer
+                        description: The total number of tokens used by the request.
+                    required:
+                      - prompt_tokens
+                      - total_tokens
+                    description: The usage information for the request.
+                required:
+                  - data
+                  - model
+                  - object
+                  - usage
+                example:
+                  data:
+                    - embedding:
+                        - 0.0023064255
+                        - -0.009327292
+                        - 0.015797377
+                      index: 0
+                      object: embedding
+                  model: text-embedding-bge-m3
+                  object: list
+                  usage:
+                    prompt_tokens: 8
+                    total_tokens: 8
+        "400":
+          description: Invalid request parameters
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  details:
+                    type: object
+                    properties: {}
+                    description: Details about the incorrect input
+                    example:
+                      _errors: []
+                      field:
+                        _errors:
+                          - Field is required
+                  error:
+                    type: string
+                    description: A description of the error
+                    example: Invalid request parameters
+                required:
+                  - error
+        "401":
+          description: Authentication failed
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  error:
+                    type: string
+                    description: A description of the error
+                    example: Authentication failed
+                required:
+                  - error
+        "402":
+          description: Insufficient USD or Diem balance to complete request
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  error:
+                    type: string
+                    description: A description of the error
+                    example: Insufficient USD or Diem balance to complete request
+                required:
+                  - error
+        "415":
+          description: Invalid request content-type
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  error:
+                    type: string
+                    description: A description of the error
+                    example: Invalid request content-type
+                required:
+                  - error
+        "429":
+          description: Rate limit exceeded
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  error:
+                    type: string
+                    description: A description of the error
+                    example: Rate limit exceeded
+                required:
+                  - error
+        "500":
+          description: Inference processing failed
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  error:
+                    type: string
+                    description: A description of the error
+                    example: Inference processing failed
+                required:
+                  - error
+        "503":
+          description: The model is at capacity. Please try again later.
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  error:
+                    type: string
+                    description: A description of the error
+                    example: The model is at capacity. Please try again later.
+                required:
+                  - error
+  /audio/speech:
+    post:
+      description: Converts text to speech using various voice models and formats.
+      operationId: createSpeech
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: "#/components/schemas/CreateSpeechRequestSchema"
+      summary: /api/v1/audio/speech
+      tags:
+        - Audio
+        - Speech
+      responses:
+        "200":
+          description: Audio content generated successfully
+          content:
+            audio/aac:
+              schema:
+                format: binary
+                type: string
+            audio/flac:
+              schema:
+                format: binary
+                type: string
+            audio/mpeg:
+              schema:
+                format: binary
+                type: string
+            audio/opus:
+              schema:
+                format: binary
+                type: string
+            audio/pcm:
+              schema:
+                format: binary
+                type: string
+            audio/wav:
+              schema:
+                format: binary
+                type: string
+        "400":
+          description: Invalid request parameters
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  details:
+                    type: object
+                    properties: {}
+                    description: Details about the incorrect input
+                    example:
+                      _errors: []
+                      field:
+                        _errors:
+                          - Field is required
+                  error:
+                    type: string
+                    description: A description of the error
+                    example: Invalid request parameters
+                required:
+                  - error
+        "401":
+          description: Authentication failed
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  error:
+                    type: string
+                    description: A description of the error
+                    example: Authentication failed
+                required:
+                  - error
+        "402":
+          description: Insufficient USD or Diem balance to complete request
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  error:
+                    type: string
+                    description: A description of the error
+                    example: Insufficient USD or Diem balance to complete request
+                required:
+                  - error
+        "403":
+          description: Unauthorized access
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  error:
+                    type: string
+                    description: A description of the error
+                    example: Unauthorized access
+                required:
+                  - error
+        "415":
+          description: Invalid request content-type
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  error:
+                    type: string
+                    description: A description of the error
+                    example: Invalid request content-type
+                required:
+                  - error
+        "429":
+          description: Rate limit exceeded
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  error:
+                    type: string
+                    description: A description of the error
+                    example: Rate limit exceeded
+                required:
+                  - error
+        "500":
+          description: Inference processing failed
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  error:
+                    type: string
+                    description: A description of the error
+                    example: Inference processing failed
+                required:
+                  - error
+        "503":
+          description: The model is at capacity. Please try again later.
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  error:
+                    type: string
+                    description: A description of the error
+                    example: The model is at capacity. Please try again later.
+                required:
+                  - error
+  /billing/usage:
+    get:
+      description: "Get paginated billing usage data for the authenticated user. NOTE:
+        This is a beta endpoint and may be subject to change."
+      operationId: getBillingUsage
+      parameters:
+        - description: Accept header to specify the response format
+          in: header
+          name: Accept
+          schema:
+            example: application/json, text/csv
+            type: string
+        - schema:
+            type: string
+            description: Filter by currency
+            enum:
+              - USD
+              - VCU
+              - DIEM
+            example: USD
+          required: false
+          name: currency
+          in: query
+        - schema:
+            type: string
+            format: date-time
+            description: End date for filtering records (ISO 8601)
+            example: 2024-12-31T23:59:59Z
+          required: false
+          name: endDate
+          in: query
+        - schema:
+            type: integer
+            minimum: 0
+            exclusiveMinimum: true
+            maximum: 500
+            default: 200
+            description: Number of items per page
+            example: 200
+          required: false
+          name: limit
+          in: query
+        - schema:
+            type: integer
+            minimum: 0
+            exclusiveMinimum: true
+            default: 1
+            description: Page number for pagination
+            example: 1
+          required: false
+          name: page
+          in: query
+        - schema:
+            type: string
+            enum:
+              - asc
+              - desc
+            default: desc
+            description: Sort order for createdAt field
+            example: desc
+          required: false
+          name: sortOrder
+          in: query
+        - schema:
+            type: string
+            format: date-time
+            description: Start date for filtering records (ISO 8601)
+            example: 2024-01-01T00:00:00Z
+          required: false
+          name: startDate
+          in: query
+      summary: /api/v1/billing/usage
+      tags:
+        - Billing
+      responses:
+        "200":
+          description: Successful response
+          headers:
+            Content-Disposition:
+              schema:
+                description: attachment; filename=billing_usage.csv
+                example: attachment; filename=billing_usage.csv
+                type: string
+            x-pagination-limit:
+              schema:
+                description: Number of items per page
+                example: 200
+                type: number
+            x-pagination-page:
+              schema:
+                description: Current page number
+                example: 1
+                type: number
+            x-pagination-total:
+              schema:
+                description: Total number of items
+                example: 1000
+                type: number
+            x-pagination-total-pages:
+              schema:
+                description: Total number of pages
+                example: 5
+                type: number
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  data:
+                    type: array
+                    items:
+                      type: object
+                      properties:
+                        amount:
+                          type: number
+                          description: The total amount charged for the billing usage entry
+                        currency:
+                          type: string
+                          enum:
+                            - USD
+                            - VCU
+                            - DIEM
+                          description: The currency charged for the billing usage entry
+                          example: USD
+                        inferenceDetails:
+                          type: object
+                          nullable: true
+                          properties:
+                            completionTokens:
+                              type: number
+                              nullable: true
+                              description: Number of tokens used in the completion. Only present for LLM
+                                usage.
+                            inferenceExecutionTime:
+                              type: number
+                              nullable: true
+                              description: Time taken for inference execution in milliseconds
+                            promptTokens:
+                              type: number
+                              nullable: true
+                              description: Number of tokens requested in the prompt. Only present for LLM
+                                usage.
+                            requestId:
+                              type: string
+                              nullable: true
+                              description: Unique identifier for the inference request
+                          required:
+                            - completionTokens
+                            - inferenceExecutionTime
+                            - promptTokens
+                            - requestId
+                          description: Details about the related inference request, if applicable
+                        notes:
+                          type: string
+                          description: Notes about the billing usage entry
+                        pricePerUnitUsd:
+                          type: number
+                          description: The price per unit in USD
+                        sku:
+                          type: string
+                          description: The product associated with the billing usage entry
+                        timestamp:
+                          type: string
+                          description: The timestamp the billing usage entry was created
+                          example: 2025-01-01T00:00:00Z
+                        units:
+                          type: number
+                          description: The number of units consumed
+                      required:
+                        - amount
+                        - currency
+                        - inferenceDetails
+                        - notes
+                        - pricePerUnitUsd
+                        - sku
+                        - timestamp
+                        - units
+                  pagination:
+                    type: object
+                    properties:
+                      limit:
+                        type: number
+                      page:
+                        type: number
+                      total:
+                        type: number
+                      totalPages:
+                        type: number
+                    required:
+                      - limit
+                      - page
+                      - total
+                      - totalPages
+                required:
+                  - data
+                  - pagination
+                additionalProperties: false
+                description: The response schema for the billing usage endpoint
+                example:
+                  data:
+                    - amount: -0.1
+                      currency: DIEM
+                      inferenceDetails: null
+                      notes: API Inference
+                      pricePerUnitUsd: 0.1
+                      sku: venice-sd35-image-unit
+                      timestamp: {}
+                      units: 1
+                    - amount: -0.06356
+                      currency: DIEM
+                      inferenceDetails:
+                        completionTokens: 227
+                        inferenceExecutionTime: 2964
+                        promptTokens: 339
+                        requestId: chatcmpl-4007fd29f42b7d3c4107f4345e8d174a
+                      notes: API Inference
+                      pricePerUnitUsd: 2.8
+                      sku: llama-3.3-70b-llm-output-mtoken
+                      timestamp: {}
+                      units: 0.000227
+                  pagination:
+                    limit: 1
+                    page: 200
+                    total: 56090
+                    totalPages: 56090
+            text/csv:
+              schema:
+                description: CSV formatted billing usage data
+                type: string
+        "400":
+          description: Invalid request parameters
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  details:
+                    type: object
+                    properties: {}
+                    description: Details about the incorrect input
+                    example:
+                      _errors: []
+                      field:
+                        _errors:
+                          - Field is required
+                  error:
+                    type: string
+                    description: A description of the error
+                    example: Invalid request parameters
+                required:
+                  - error
+        "401":
+          description: Authentication failed
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  error:
+                    type: string
+                    description: A description of the error
+                    example: Authentication failed
+                required:
+                  - error
+        "500":
+          description: Inference processing failed
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  error:
+                    type: string
+                    description: A description of the error
+                    example: Inference processing failed
+                required:
+                  - error