npm - @j-o-r/hello-dave - Versions diffs - 0.1.1 → 0.1.5 - Mend

@j-o-r/hello-dave 0.1.1 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (173) hide show

package/CHANGELOG.md +42 -25
package/README.md +81 -221
package/TODO.md +173 -35
package/agents/agent_creator.js +105 -0
package/agents/agent_creator.prompt.md +371 -0
package/agents/ask_agent.js +64 -127
package/agents/claude_agent.js +68 -0
package/agents/code_agent.js +55 -135
package/agents/code_agent.prompt.md +50 -0
package/agents/echo_agent.js +76 -0
package/agents/financial_expert.js +75 -0
package/agents/gpt_agent.js +52 -103
package/agents/gpt_code.js +81 -0
package/agents/grok_agent.js +58 -114
package/agents/minimax_agent.js +92 -0
package/agents/mureka_agent.js +77 -0
package/agents/planner_agent.js +172 -0
package/agents/stability_agent.js +87 -0
package/agents/test_agent.js +75 -157
package/agents/weather_agent.js +73 -0
package/agents/workflow_agent.js +189 -0
package/bin/dave.js +436 -184
package/docs/bin-dave.md +85 -35
package/docs/cdn-ssh.md +100 -0
package/docs/creating-agents.md +301 -0
package/docs/creating-toolsets.md +336 -0
package/docs/docs-organization.md +48 -0
package/docs/project-overview.md +86 -51
package/lib/API/elevenlabs.io/music.compose.md +441 -0
package/lib/API/elevenlabs.io/music.create-composition-plan.md +370 -0
package/lib/API/elevenlabs.io/music.stream.md +425 -0
package/lib/API/lalal.ai/lalal.js +445 -0
package/lib/API/lalal.ai/openapi.json +2614 -0
package/lib/API/minimax/ImageToolset.js +82 -37
package/lib/API/minimax/MusicToolset.js +125 -79
package/lib/API/minimax/VideoToolset.js +170 -167
package/lib/API/minimax/image.js +5 -1
package/lib/API/minimax/music.js +210 -23
package/lib/API/minimax/video.js +242 -53
package/lib/API/mureka/MusicToolset.js +646 -0
package/lib/API/mureka/README.md +41 -0
package/lib/API/mureka/index.js +7 -0
package/lib/API/mureka/music.js +658 -0
package/lib/API/openai.com/index.js +7 -0
package/lib/API/openai.com/{reponses/text.js → responses.js} +64 -18
package/lib/API/openai.com/video.create.character.md +40 -0
package/lib/API/openai.com/video.create.md +219 -0
package/lib/API/openai.com/video.delete.md +44 -0
package/lib/API/openai.com/video.download.md +31 -0
package/lib/API/openai.com/video.edit.md +155 -0
package/lib/API/openai.com/video.extend.md +166 -0
package/lib/API/openai.com/video.fetch.character.md +43 -0
package/lib/API/openai.com/video.js +784 -0
package/lib/API/openai.com/video.list.md +201 -0
package/lib/API/openai.com/video.remix.md +175 -0
package/lib/API/openai.com/video.retrieve.md +139 -0
package/lib/API/openai.com/videoToolset.js +616 -0
package/lib/API/stability.ai/ImageToolset.js +131 -40
package/lib/API/stability.ai/MusicToolset.js +79 -47
package/lib/API/stability.ai/audio.js +63 -131
package/lib/API/x.ai/chat.responses.md +1040 -0
package/lib/API/x.ai/image.js +229 -59
package/lib/API/x.ai/imageToolset.js +376 -0
package/lib/API/x.ai/index.js +1 -1
package/lib/API/x.ai/responses.js +9 -18
package/lib/Agent.js +271 -0
package/lib/Agent.js.old +284 -0
package/lib/AgentLauncher.js +593 -0
package/lib/Cli.js +87 -13
package/lib/Prompt.js +23 -1
package/lib/Session.js +5 -4
package/lib/ToolSet.js +102 -6
package/lib/agentLoader.js +369 -0
package/lib/cdn.js +67 -231
package/lib/{CdnToolset.js → cdnToolset.js} +47 -64
package/lib/defaultToolsets.js +43 -0
package/lib/fafs.js +1 -1
package/lib/genericToolset.js +442 -119
package/lib/handOffToolset.js +179 -0
package/lib/index.js +34 -27
package/lib/toolsetLoader.js +248 -0
package/package.json +10 -4
package/types/API/lalal.ai/lalal.d.ts +116 -0
package/types/API/minimax/image.d.ts +2 -1
package/types/API/minimax/music.d.ts +189 -26
package/types/API/minimax/video.d.ts +100 -31
package/types/API/mureka/index.d.ts +7 -0
package/types/API/mureka/music.d.ts +472 -0
package/types/API/openai.com/index.d.ts +7 -0
package/types/API/openai.com/{reponses/text.d.ts → responses.d.ts} +11 -11
package/types/API/openai.com/video.d.ts +409 -0
package/types/API/openai.com/videoToolset.d.ts +24 -0
package/types/API/stability.ai/audio.d.ts +14 -103
package/types/API/stability.ai/image.d.ts +2 -2
package/types/API/x.ai/image.d.ts +138 -26
package/types/API/x.ai/imageToolset.d.ts +3 -0
package/types/API/x.ai/index.d.ts +1 -1
package/types/API/x.ai/responses.d.ts +4 -4
package/types/Agent.d.ts +123 -0
package/types/AgentLauncher.d.ts +250 -0
package/types/Cli.d.ts +28 -8
package/types/Prompt.d.ts +23 -5
package/types/Session.d.ts +1 -1
package/types/ToolSet.d.ts +10 -0
package/types/agentLoader.d.ts +78 -0
package/types/cdn.d.ts +15 -90
package/types/defaultToolsets.d.ts +9 -0
package/types/fafs.d.ts +1 -1
package/types/genericToolset.d.ts +1 -1
package/types/handOffToolset.d.ts +28 -0
package/types/index.d.ts +19 -17
package/types/toolsetLoader.d.ts +114 -0
package/utils/format_log.js +101 -23
package/utils/launch_agent.js +18 -0
package/utils/list_sessions.sh +13 -5
package/utils/search_sessions.sh +65 -29
package/utils/toolsets.js +33 -0
package/README.md.bak.1779452127 +0 -240
package/agents/codeserver.sh +0 -47
package/agents/daisy_agent.js +0 -173
package/agents/docs_agent.js +0 -148
package/agents/memory_agent.js +0 -263
package/agents/minimax.js +0 -173
package/agents/npm_agent.js +0 -202
package/agents/prompt_agent.js +0 -133
package/agents/readme_agent.js +0 -148
package/agents/spawn_agent.js +0 -160
package/agents/stability.js +0 -173
package/agents/todo_agent.js +0 -175
package/bin/codeDave +0 -58
package/docs/agent-dave-websocket-protocol.md +0 -180
package/docs/agent-manager.md +0 -244
package/docs/codeserver-pattern.md +0 -191
package/docs/generic-toolset.md +0 -326
package/docs/howtos/agent-networking.md +0 -253
package/docs/howtos/spawn-agents.md.bak +0 -200
package/docs/howtos/spawn-agents.md.bak_new +0 -200
package/docs/multi-agent-clusters.md +0 -265
package/docs/music-toolsets.md +0 -137
package/docs/path-resolution-best-practices.md +0 -104
package/docs/plans/minimax-music-generation.md +0 -80
package/docs/plans/unified-agent-architecture.md +0 -146
package/docs/plans/websocket-streaming-plan.md.bak +0 -317
package/docs/prompt/spawn_agent.md +0 -175
package/docs/prompt/spawn_agent.md.bak +0 -201
package/docs/prompt/task_clarification_and_documentation.md +0 -35
package/docs/prompt-class.md +0 -141
package/docs/todo-archive-infra-2026-04-21.md +0 -15
package/docs/todo-archive-v0.0.8.md +0 -1
package/docs/todo-archive-v0.1.0.md +0 -32
package/docs/todo-archive.md +0 -44
package/docs/tools-syntax-validation.md +0 -121
package/docs/toolset.md +0 -164
package/docs/xai-responses.md +0 -111
package/docs/xai_collections.md +0 -106
package/lib/API/x.ai/ImageToolset.js +0 -165
package/lib/API/x.ai/text.js +0 -415
package/lib/AgentClient.js +0 -248
package/lib/AgentManager.js +0 -245
package/lib/AgentServer.js +0 -404
package/lib/wsCli.js +0 -287
package/lib/wsIO.js +0 -90
package/types/API/x.ai/text.d.ts +0 -286
package/types/AgentClient.d.ts +0 -109
package/types/AgentManager.d.ts +0 -100
package/types/AgentServer.d.ts +0 -89
package/types/wsCli.d.ts +0 -17
package/types/wsIO.d.ts +0 -30
package/utils/test.sh +0 -46
/package/docs/{suggestions.md → _notes/token-counts.md} +0 -0
/package/lib/API/openai.com/{reponses/MESSAGES.md → MESSAGES.md} +0 -0
/package/types/API/{x.ai/ImageToolset.d.ts → mureka/MusicToolset.d.ts} +0 -0
/package/types/{CdnToolset.d.ts → cdnToolset.d.ts} +0 -0

package/lib/API/x.ai/chat.responses.md ADDED Viewed

@@ -0,0 +1,1040 @@
+#### Inference API
+# Chat
+## POST /v1/chat/completions
+Create a chat response from text/image chat prompts. This is the endpoint for making requests to chat and image understanding models.
+### Request Body
+* `deferred` (boolean | null) — If set to \`true\`, the request returns a \`request\_id\`. You can then get the deferred response by GET \`/v1/chat/deferred-completion/\{request\_id}\`.
+* `frequency_penalty` (number | null) — (Not supported by reasoning models) Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
+* `logit_bias` (object | null) — (Unsupported) A JSON object that maps tokens (specified by their token ID in the tokenizer) to an associated bias value from -100 to 100. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model, but values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should result in a ban or exclusive selection of the relevant token.
+* `logprobs` (boolean | null) — Whether to return log probabilities of the output tokens or not. If true, returns the log probabilities of each output token returned in the content of message. Not supported by models \`grok-4.20\` and newer; the field will be silently ignored if set.
+* `max_completion_tokens` (integer | null) — An upper bound for the number of tokens that can be generated for a completion, only applies to visible output tokens (i.e. does not apply to tokens used for reasoning or function calls). Defaults to None, meaning the model will generate as many tokens as needed up until the model's maximum context length.
+* `max_tokens` (integer | null) — \\\[DEPRECATED\\] The maximum number of tokens that can be generated in the chat completion. Deprecated in favor of \`max\_completion\_tokens\`.
+* `messages` (array\<object | object | object | object | object>) — A list of messages that make up the chat conversation. Different models support different message types, such as image and text.
+* `model` (string) — Model name for the model to use. Obtainable from \<https://console.x.ai/team/default/models> or \<https://docs.x.ai/docs/models>.
+* `n` (integer | null) — How many chat completion choices to generate for each input message. Note that you will be charged based on the number of generated tokens across all of the choices. Keep n as 1 to minimize costs.
+* `parallel_tool_calls` (boolean | null) — If set to false, the model can perform maximum one tool call.
+* `presence_penalty` (number | null) — (Not supported by \`grok-3\` and reasoning models) Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
+* `reasoning_effort` (string | null) — Constrains how hard a reasoning model thinks before responding. Only supported by \`grok-4.3\`. Possible values are \`none\` (disables reasoning completely), \`low\` (this is the default if not specified), \`medium\` and \`high\` (uses the most reasoning tokens).
+* `response_format` (object | object | object)
+* `search_parameters` (object)
+  * `from_date` (string | null) — Date from which to consider the results in ISO-8601 YYYY-MM-DD. See
+    \<https://en.wikipedia.org/wiki/ISO\_8601>.
+  * `max_search_results` (integer | null) — Maximum number of search results to use.
+  * `mode` (string | null) — Choose the mode to query realtime data:
+    \* \`off\`: no search performed and no external will be considered.
+    \* \`on\` (default): the model will search in every sources for relevant data.
+    \* \`auto\`: the model choose whether to search data or not and where to search the data.
+  * `return_citations` (boolean | null) — Whether to return citations in the response or not.
+  * `sources` (array | null) — List of sources to search in. If no sources specified, the model will look over the web and X by default.
+  * `to_date` (string | null) — Date up to which to consider the results in ISO-8601 YYYY-MM-DD. See
+    \<https://en.wikipedia.org/wiki/ISO\_8601>.
+* `seed` (integer | null) — If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same \`seed\` and parameters should return the same result. Determinism is not guaranteed, and you should refer to the \`system\_fingerprint\` response parameter to monitor changes in the backend.
+* `stop` (array | null) — (Not supported by reasoning models) Up to 4 sequences where the API will stop generating further tokens.
+* `stream` (boolean | null) — If set, partial message deltas will be sent. Tokens will be sent as data-only server-sent events as they become available, with the stream terminated by a \`data: \[DONE]\` message.
+* `stream_options` (object)
+  * `include_usage` (boolean, required) — Set an additional chunk to be streamed before the \`data: \[DONE]\` message. The other chunks will return \`null\` in \`usage\` field.
+* `temperature` (number | null) — What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
+* `tool_choice` (string | object)
+* `tools` (array | null) — A list of tools the model may call in JSON-schema. Currently, only functions are supported as a tool. Use this to provide a list of functions the model may generate JSON inputs for. A max of 128 functions are supported.
+* `top_logprobs` (integer | null) — An integer between 0 and 8 specifying the number of most likely tokens to return at each token position, each with an associated log probability. logprobs must be set to true if this parameter is used. Not supported by models \`grok-4.20\` and newer; the field will be silently ignored if set.
+* `top_p` (number | null) — An alternative to sampling with \`temperature\`, called nucleus sampling, where the model considers the results of the tokens with \`top\_p\` probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. It is generally recommended to alter this or \`temperature\` but not both.
+* `user` (string | null) — A unique identifier representing your end-user, which can help xAI to monitor and detect abuse.
+* `web_search_options` (object)
+  * `filters` (object) — Only included for compatibility.
+  * `search_context_size` (string | null) — This field included for compatibility reason with OpenAI's API. It is mapped to \`max\_search\`.
+  * `user_location` (object) — Only included for compatibility.
+### Response Body
+* `choices` (array\<object>, required) — A list of response choices from the model. The length corresponds to the \`n\` in request body (default to 1).
+  * `finish_reason` (string | null) — Finish reason. \`"stop"\` means the inference has reached a model-defined or user-supplied stop sequence in \`stop\`. \`"length"\` means the inference result has reached models' maximum allowed token length or user defined value in \`max\_tokens\`. \`"end\_turn"\` or \`null\` in streaming mode when the chunk is not the last.
+  * `index` (integer, required) — Index of the choice within the response choices, starting from 0.
+  * `logprobs` (object)
+    * `content` (array | null) — An array the log probabilities of each output token returned.
+  * `message` (object, required)
+    * `content` (string | null) — The content of the message.
+    * `reasoning_content` (string | null) — The reasoning trace generated by the model.
+    * `refusal` (string | null) — The reason given by model if the model is unable to generate a response. null if model is able to generate.
+    * `role` (string, required) — The role that the message belongs to, the response from model is always \`"assistant"\`.
+    * `tool_calls` (array | null) — A list of tool calls asked by model for user to perform.
+* `citations` (array | null) — List of all the external pages used by the model to answer.
+* `created` (integer, required) — The chat completion creation time in Unix timestamp.
+* `id` (string, required) — A unique ID for the chat response.
+* `model` (string, required) — Model ID used to create chat completion.
+* `object` (string, required) — The object type, which is always \`"chat.completion"\`.
+* `output_files` (array | null) — Files generated during the response (e.g., by the code execution tool).
+  Only populated when \`code\_execution\_files\_output\` is included.
+* `system_fingerprint` (string | null) — System fingerprint, used to indicate xAI system configuration changes.
+* `usage` (object)
+  * `completion_tokens` (integer, required) — Total completion token used.
+  * `completion_tokens_details` (object, required) — Details of completion usage.
+    * `accepted_prediction_tokens` (integer, required) — The number of tokens in the prediction that appeared in the completion.
+    * `audio_tokens` (integer, required) — Audio input tokens generated by the model.
+    * `reasoning_tokens` (integer, required) — Tokens generated by the model for reasoning.
+    * `rejected_prediction_tokens` (integer, required) — The number of tokens in the prediction that did not appear in the completion.
+  * `cost_in_usd_ticks` (integer, required) — Accurate cost of this request in USD ticks, where "tick" is defined as follows:
+    TICKS\_IN\_USD\_CENT: i64 = 100\_000\_000
+    which means there is 10'000'000'000 ticks in one \*dollar\*.
+  * `num_sources_used` (integer, required) — Number of individual live search source used.
+  * `prompt_tokens` (integer, required) — Total prompt token used.
+  * `prompt_tokens_details` (object, required) — Details of prompt usage.
+    * `audio_tokens` (integer, required) — Audio prompt token used.
+    * `cached_tokens` (integer, required) — Token cached by xAI from previous requests and reused for this request.
+    * `image_tokens` (integer, required) — Image prompt token used.
+    * `text_tokens` (integer, required) — Total text prompt token used (cached + non-cached text tokens).
+  * `total_tokens` (integer, required) — Total token used, the sum of prompt token and completion token amount.
+\*\*Request example:\*\*
+```json
+{
+  "messages": [
+    {
+      "role": "system",
+      "content": "You are a helpful assistant that can answer questions and help with tasks."
+    },
+    {
+      "role": "user",
+      "content": "What is 101*3?"
+    }
+  ],
+  "model": "latest"
+}
+```
+\*\*Response example:\*\*
+```json
+{
+  "id": "a3d1008e-4544-40d4-d075-11527e794e4a",
+  "object": "chat.completion",
+  "created": 1752854522,
+  "model": "latest",
+  "choices": [
+    {
+      "index": 0,
+      "message": {
+        "role": "assistant",
+        "content": "101 multiplied by 3 is 303.",
+        "refusal": null
+      },
+      "finish_reason": "stop"
+    }
+  ],
+  "usage": {
+    "prompt_tokens": 32,
+    "completion_tokens": 9,
+    "total_tokens": 135,
+    "prompt_tokens_details": {
+      "text_tokens": 32,
+      "audio_tokens": 0,
+      "image_tokens": 0,
+      "cached_tokens": 6
+    },
+    "completion_tokens_details": {
+      "reasoning_tokens": 94,
+      "audio_tokens": 0,
+      "accepted_prediction_tokens": 0,
+      "rejected_prediction_tokens": 0
+    },
+    "num_sources_used": 0
+  },
+  "system_fingerprint": "fp_3a7881249c"
+}
+```
+***
+## POST /v1/responses
+Generates a response based on text or image prompts. The response ID can be used to retrieve the response later or to continue the conversation without repeating prior context. New responses will be stored for 30 days and then permanently deleted.
+### Request Body
+* `background` (boolean | null) — (Unsupported) Whether to process the response asynchronously in the background.
+* `context_management` (array | null) — Optional context-management directives (e.g. compaction). Parsed but not yet executed.
+* `include` (array | null) — What additional output data to include in the response. Currently the only supported value is \`reasoning.encrypted\_content\` which returns an encrypted version of the reasoning tokens.
+* `input` (string | array\<object | object | object | object | object>, required) — Content of the input passed to a \`/v1/response\` request.
+* `instructions` (string | null) — An alternate way to specify the system prompt. Note that this cannot be used alongside \`previous\_response\_id\`, where the system prompt of the previous message will be used.
+* `logprobs` (boolean | null) — Whether to return log probabilities of the output tokens or not. If true, returns the log probabilities of each output token returned in the content of message. Not supported by models \`grok-4.20\` and newer; the field will be silently ignored if set.
+* `max_output_tokens` (integer | null) — Max number of tokens that can be generated in a response. This includes both output and reasoning tokens.
+* `max_turns` (integer | null) — Maximum number of agentic tool calling turns allowed for this request.
+  If not set, defaults to the server's global cap.
+  This parameter will be ignored for any non-agentic requests.
+* `metadata` (object) — Not supported. Only maintained for compatibility reasons.
+* `model` (string) — Model name for the model to use. Obtainable from \<https://console.x.ai/team/default/models> or \<https://docs.x.ai/docs/models>.
+* `parallel_tool_calls` (boolean | null) — Whether to allow the model to run parallel tool calls.
+* `previous_response_id` (string | null) — The ID of the previous response from the model.
+* `prompt_cache_key` (string | null) — Plumbed to x-grok-conv-id for Open Responses compatibility, used for routing.
+* `reasoning` (object)
+  * `effort` (string | null) — Constrains how hard a reasoning model thinks before responding. Only supported by \`grok-4.3\`. Possible values are \`none\` (disables reasoning completely), \`low\` (this is the default if not specified), \`medium\` and \`high\` (uses the most reasoning tokens).
+  * `generate_summary` (string | null) — Only included for compatibility.
+  * `summary` (string | null) — A summary of the model's reasoning process. Possible values are \`auto\`, \`concise\` and \`detailed\`. Only included for compatibility. The model shall always return \`detailed\`.
+* `search_parameters` (object)
+  * `from_date` (string | null) — Date from which to consider the results in ISO-8601 YYYY-MM-DD. See
+    \<https://en.wikipedia.org/wiki/ISO\_8601>.
+  * `max_search_results` (integer | null) — Maximum number of search results to use.
+  * `mode` (string | null) — Choose the mode to query realtime data:
+    \* \`off\`: no search performed and no external will be considered.
+    \* \`on\` (default): the model will search in every sources for relevant data.
+    \* \`auto\`: the model choose whether to search data or not and where to search the data.
+  * `return_citations` (boolean | null) — Whether to return citations in the response or not.
+  * `sources` (array | null) — List of sources to search in. If no sources specified, the model will look over the web and X by default.
+  * `to_date` (string | null) — Date up to which to consider the results in ISO-8601 YYYY-MM-DD. See
+    \<https://en.wikipedia.org/wiki/ISO\_8601>.
+* `service_tier` (string | null) — Not supported. Only maintained for compatibility reasons.
+* `store` (boolean | null) — Whether to store the input message(s) and model response for later retrieval.
+* `stream` (boolean | null) — If set, partial message deltas will be sent. Tokens will be sent as data-only server-sent events as they become available, with the stream terminated by a \`data: \[DONE]\` message.
+* `temperature` (number | null) — What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
+* `text` (object)
+  * `format` (object | object | object)
+* `tool_choice` (string | object)
+* `tools` (array | null) — A list of tools the model may call in JSON-schema. Currently, only functions and web search are supported as tools. A max of 128 tools are supported.\`web\_search\_preview\` tool, if specified, will be overridden by \`search\_parameters\`.
+* `top_logprobs` (integer | null) — An integer between 0 and 8 specifying the number of most likely tokens to return at each token position, each with an associated log probability. logprobs must be set to true if this parameter is used. Not supported by models \`grok-4.20\` and newer; the field will be silently ignored if set.
+* `top_p` (number | null) — An alternative to sampling with \`temperature\`, called nucleus sampling, where the model considers the results of the tokens with \`top\_p\` probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. It is generally recommended to alter this or \`temperature\` but not both.
+* `truncation` (string | null) — Not supported. Only maintained for compatibility reasons.
+* `user` (string | null) — A unique identifier representing your end-user, which can help xAI to monitor and detect abuse.
+### Response Body
+* `background` (boolean, required) — OpenResponses compatibility fields.
+  Not used at the moment. Just for OpenResponses compatibility.
+  Whether to process the response asynchronously in the background.
+* `completed_at` (integer | null) — The Unix timestamp (in seconds) for the response completion time. Only set when the response is completed.
+* `created_at` (integer, required) — The Unix timestamp (in seconds) for the response creation time.
+* `error` (object) — An error object returned when the model fails to generate a response.
+* `frequency_penalty` (number, required) — (NOT SUPPORTED in Responses API) Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
+* `id` (string, required) — Unique ID of the response.
+* `incomplete_details` (object | object | object)
+* `instructions` (string | null) — A system (or developer) message inserted into the model's context.
+* `max_output_tokens` (integer | null) — Max number of tokens that can be generated in a response. This includes both output and reasoning tokens.
+* `max_tool_calls` (integer | null) — The maximum number of tool calls allowed for this response.
+* `metadata` (object, required) — Only included for compatibility.
+* `model` (string, required) — Model name used to generate the response.
+* `object` (string, required) — The object type of this resource. Always set to \`response\`.
+* `output` (array\<object | object | object | object | object | object | object | object | object>, required) — The response generated by the model.
+* `parallel_tool_calls` (boolean, required) — Whether to allow the model to run parallel tool calls.
+* `presence_penalty` (number, required) — (NOT SUPPORTED in Responses API) Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
+* `previous_response_id` (string | null) — The ID of the previous response from the model.
+* `prompt_cache_key` (string | null) — The cache key used for the prompt for routing to the correct engine.
+* `reasoning` (object)
+  * `effort` (string | null) — Constrains how hard a reasoning model thinks before responding. Only supported by \`grok-4.3\`. Possible values are \`none\` (disables reasoning completely), \`low\` (this is the default if not specified), \`medium\` and \`high\` (uses the most reasoning tokens).
+  * `generate_summary` (string | null) — Only included for compatibility.
+  * `summary` (string | null) — A summary of the model's reasoning process. Possible values are \`auto\`, \`concise\` and \`detailed\`. Only included for compatibility. The model shall always return \`detailed\`.
+* `safety_identifier` (string | null) — A stable identifier used to help detect users of your application that may be violating xAI's usage policies.
+* `service_tier` (string, required) — Specifies the processing tier used for serving the request.
+* `status` (string, required) — Status of the response. One of \`completed\`, \`in\_progress\` or \`incomplete\`.
+* `store` (boolean, required) — Whether to store the input message(s) and model response for later retrieval.
+* `temperature` (number | null) — What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
+* `text` (object, required)
+  * `format` (object | object | object)
+* `tool_choice` (string | object, required) — Parameter to control how model chooses the tools.
+  * `name` (string, required) — Name of the function to use.
+  * `type` (string, required) — Type is always \`"function"\`.
+* `tools` (array\<object | object | object | object | object | object | object>, required) — A list of tools the model may call in JSON-schema. Currently, only functions and web search are supported as tools. A max of 128 tools are supported.
+* `top_logprobs` (integer, required) — An integer between 0 and 8 specifying the number of most likely tokens to return at each token position.
+* `top_p` (number | null) — An alternative to sampling with \`temperature\`, called nucleus sampling, where the model considers the results of the tokens with \`top\_p\` probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. It is generally recommended to alter this or \`temperature\` but not both.
+* `truncation` (string, required) — The truncation strategy to use for the model response.
+* `usage` (object)
+  * `context_details` (object)
+    * `input_tokens` (integer, required) — Prompt tokens in the latest context (sourced from
+      \`SamplingUsage.context\_prompt\_tokens\`).
+    * `output_tokens` (integer, required) — Completion + reasoning tokens in the latest context (sourced from
+      \`SamplingUsage.context\_output\_tokens\`).
+  * `cost_in_nano_usd` (integer | null) — Cost in nano US dollars for this request.
+  * `cost_in_usd_ticks` (integer | null) — Accurate cost of this request in USD ticks, where "tick" is defined as follows:
+    TICKS\_IN\_USD\_CENT: i64 = 100\_000\_000
+    which means there is 10'000'000'000 ticks in one \*dollar\*.
+  * `input_tokens` (integer, required) — Number of input tokens used.
+  * `input_tokens_details` (object, required)
+    * `cached_tokens` (integer, required) — Token cached by xAI from previous requests and reused for this request.
+  * `num_server_side_tools_used` (integer, required) — Number of server side tools used.
+  * `num_sources_used` (integer, required) — Number of sources used (for live search).
+  * `output_tokens` (integer, required) — Number of output tokens used.
+  * `output_tokens_details` (object, required)
+    * `reasoning_tokens` (integer, required) — Tokens generated by the model for reasoning.
+  * `server_side_tool_usage_details` (object)
+    * `code_interpreter_calls` (integer, required) — Number of code interpreter calls.
+    * `document_search_calls` (integer, required) — Number of document search calls.
+    * `file_search_calls` (integer, required) — Number of file search calls.
+    * `mcp_calls` (integer, required) — Number of MCP calls.
+    * `web_search_calls` (integer, required) — Number of web search calls.
+    * `x_search_calls` (integer, required) — Number of X search calls.
+  * `total_tokens` (integer, required) — Total tokens used.
+* `user` (string | null) — A unique identifier representing your end-user, which can help xAI to monitor and detect abuse.
+### Code Examples
+```bash
+curl -s https://api.x.ai/v1/responses \
+  -H "Content-Type: application/json" \
+  -H "Authorization: Bearer $XAI_API_KEY" \
+  -d '{
+    "model": "grok-4.3",
+    "input": "What is the meaning of life?"
+  }'
+```
+```javascriptAISDK
+import { xai } from "@ai-sdk/xai";
+import { generateText } from "ai";
+const result = await generateText({
+  model: xai.responses("grok-4.3"),
+  prompt: "What is the meaning of life?",
+});
+console.log(JSON.stringify(result, null, 2));
+```
+```pythonOpenAISDK
+import os
+from openai import OpenAI
+client = OpenAI(
+    api_key=os.environ["XAI_API_KEY"],
+    base_url="https://api.x.ai/v1",
+)
+response = client.responses.create(
+    model="grok-4.3",
+    input="What is the meaning of life?",
+)
+print(response.model_dump_json(indent=2))
+```
+```javascriptOpenAISDK
+import OpenAI from "openai";
+const client = new OpenAI({
+  apiKey: process.env.XAI_API_KEY,
+  baseURL: "https://api.x.ai/v1",
+});
+const response = await client.responses.create({
+  model: "grok-4.3",
+  input: "What is the meaning of life?",
+});
+console.log(JSON.stringify(response, null, 2));
+```
+\*\*Response example:\*\*
+```json
+{
+  "created_at": 1754475266,
+  "id": "ad5663da-63e6-86c6-e0be-ff15effa8357",
+  "max_output_tokens": null,
+  "model": "latest",
+  "object": "response",
+  "output": [
+    {
+      "content": [
+        {
+          "type": "output_text",
+          "text": "101 multiplied by 3 is 303.",
+          "logprobs": null,
+          "annotations": []
+        }
+      ],
+      "id": "msg_ad5663da-63e6-86c6-e0be-ff15effa8357",
+      "role": "assistant",
+      "type": "message",
+      "status": "completed"
+    }
+  ],
+  "parallel_tool_calls": true,
+  "previous_response_id": null,
+  "reasoning": null,
+  "temperature": null,
+  "text": {
+    "format": {
+      "type": "text"
+    }
+  },
+  "tool_choice": "auto",
+  "tools": [],
+  "top_p": null,
+  "usage": {
+    "input_tokens": 32,
+    "input_tokens_details": {
+      "cached_tokens": 8
+    },
+    "output_tokens": 9,
+    "output_tokens_details": {
+      "reasoning_tokens": 110
+    },
+    "total_tokens": 151,
+    "num_sources_used": 0,
+    "num_server_side_tools_used": 0
+  },
+  "user": null,
+  "incomplete_details": null,
+  "status": "completed",
+  "store": true
+}
+```
+***
+## POST /v1/responses/compact
+Compacts a full Responses API input window into a shorter canonical window.
+### Request Body
+* `input` (string | array\<object | object | object | object | object>, required) — Content of the input passed to a \`/v1/response\` request.
+* `model` (string, required) — Model to use for compaction summarization (required).
+### Response Body
+* `created_at` (integer, required) — Unix timestamp (in seconds) when the compacted conversation was created.
+* `id` (string, required) — Unique ID for this compaction (e.g. \`cmp\_\<uuid>\`).
+* `model` (string, required) — Model used for the compaction summary.
+* `object` (string, required) — Always \`"response.compaction"\`.
+* `output` (array\<object | object | object | object | object>, required) — Compacted output containing a single compaction item.
+  Pass this verbatim as input to the next \`/v1/responses\` call.
+* `usage` (object)
+  * `dropped_message_count` (integer, required) — Number of messages dropped/summarized during compaction.
+  * `input_tokens` (integer, required) — Number of tokens in the original (pre-compaction) input.
+  * `input_tokens_details` (object, required) — Breakdown of input tokens for a compaction call.
+    * `cached_tokens` (integer, required) — Number of input tokens that were served from the prompt cache.
+  * `output_tokens` (integer, required) — Number of tokens in the compacted output.
+  * `output_tokens_details` (object, required) — Breakdown of output tokens for a compaction call.
+    * `reasoning_tokens` (integer, required) — Number of reasoning tokens generated during compaction.
+  * `total_tokens` (integer, required) — Total number of tokens used (input + output, including reasoning).
+### Code Examples
+```bash
+curl -s https://api.x.ai/v1/responses/compact \
+  -H "Content-Type: application/json" \
+  -H "Authorization: Bearer $XAI_API_KEY" \
+  -d '{
+    "model": "grok-4.3",
+    "input": [
+      {"role": "system", "content": "You are a concise and knowledgeable science tutor."},
+      {"role": "user", "content": "What is the Higgs boson and why is it important?"},
+      {"role": "assistant", "content": "The Higgs boson is an elementary particle in the Standard Model, predicted by Peter Higgs in 1964 and confirmed at CERN in 2012. It is the quantum excitation of the Higgs field, which gives mass to fundamental particles via the Higgs mechanism."},
+      {"role": "user", "content": "How does the Higgs mechanism actually work?"},
+      {"role": "assistant", "content": "Through spontaneous symmetry breaking. The Higgs field has a nonzero vacuum value, and particles acquire mass in proportion to how strongly they couple to it. Photons do not couple, which is why they remain massless."}
+    ]
+  }'
+```
+```pythonOpenAISDK
+import os
+from openai import OpenAI
+client = OpenAI(
+    api_key=os.environ["XAI_API_KEY"],
+    base_url="https://api.x.ai/v1",
+)
+compacted = client.responses.compact(
+    model="grok-4.3",
+    input=[
+        {"role": "system", "content": "You are a concise and knowledgeable science tutor."},
+        {"role": "user", "content": "What is the Higgs boson and why is it important?"},
+        {
+            "role": "assistant",
+            "content": (
+                "The Higgs boson is an elementary particle in the Standard Model, predicted by "
+                "Peter Higgs in 1964 and confirmed at CERN in 2012. It is the quantum excitation "
+                "of the Higgs field, which gives mass to fundamental particles via the Higgs mechanism."
+            ),
+        },
+        {"role": "user", "content": "How does the Higgs mechanism actually work?"},
+        {
+            "role": "assistant",
+            "content": (
+                "Through spontaneous symmetry breaking. The Higgs field has a nonzero vacuum value, "
+                "and particles acquire mass in proportion to how strongly they couple to it. Photons "
+                "do not couple, which is why they remain massless."
+            ),
+        },
+    ],
+)
+print(compacted.model_dump_json(indent=2))
+```
+```javascriptOpenAISDK
+import OpenAI from "openai";
+const client = new OpenAI({
+  apiKey: process.env.XAI_API_KEY,
+  baseURL: "https://api.x.ai/v1",
+});
+const compacted = await client.responses.compact({
+  model: "grok-4.3",
+  input: [
+    { role: "system", content: "You are a concise and knowledgeable science tutor." },
+    { role: "user", content: "What is the Higgs boson and why is it important?" },
+    {
+      role: "assistant",
+      content:
+        "The Higgs boson is an elementary particle in the Standard Model, predicted by Peter Higgs in 1964 and confirmed at CERN in 2012. It is the quantum excitation of the Higgs field, which gives mass to fundamental particles via the Higgs mechanism.",
+    },
+    { role: "user", content: "How does the Higgs mechanism actually work?" },
+    {
+      role: "assistant",
+      content:
+        "Through spontaneous symmetry breaking. The Higgs field has a nonzero vacuum value, and particles acquire mass in proportion to how strongly they couple to it. Photons do not couple, which is why they remain massless.",
+    },
+  ],
+});
+console.log(JSON.stringify(compacted, null, 2));
+```
+\*\*Response example:\*\*
+```json
+{}
+```
+***
+## GET /v1/responses/\{response\_id}
+Retrieve a previously generated response.
+### Path Parameters
+* `response_id` (string, required) — The response id returned by a previous create response request.
+### Response Body
+* `background` (boolean, required) — OpenResponses compatibility fields.
+  Not used at the moment. Just for OpenResponses compatibility.
+  Whether to process the response asynchronously in the background.
+* `completed_at` (integer | null) — The Unix timestamp (in seconds) for the response completion time. Only set when the response is completed.
+* `created_at` (integer, required) — The Unix timestamp (in seconds) for the response creation time.
+* `error` (object) — An error object returned when the model fails to generate a response.
+* `frequency_penalty` (number, required) — (NOT SUPPORTED in Responses API) Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
+* `id` (string, required) — Unique ID of the response.
+* `incomplete_details` (object | object | object)
+* `instructions` (string | null) — A system (or developer) message inserted into the model's context.
+* `max_output_tokens` (integer | null) — Max number of tokens that can be generated in a response. This includes both output and reasoning tokens.
+* `max_tool_calls` (integer | null) — The maximum number of tool calls allowed for this response.
+* `metadata` (object, required) — Only included for compatibility.
+* `model` (string, required) — Model name used to generate the response.
+* `object` (string, required) — The object type of this resource. Always set to \`response\`.
+* `output` (array\<object | object | object | object | object | object | object | object | object>, required) — The response generated by the model.
+* `parallel_tool_calls` (boolean, required) — Whether to allow the model to run parallel tool calls.
+* `presence_penalty` (number, required) — (NOT SUPPORTED in Responses API) Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
+* `previous_response_id` (string | null) — The ID of the previous response from the model.
+* `prompt_cache_key` (string | null) — The cache key used for the prompt for routing to the correct engine.
+* `reasoning` (object)
+  * `effort` (string | null) — Constrains how hard a reasoning model thinks before responding. Only supported by \`grok-4.3\`. Possible values are \`none\` (disables reasoning completely), \`low\` (this is the default if not specified), \`medium\` and \`high\` (uses the most reasoning tokens).
+  * `generate_summary` (string | null) — Only included for compatibility.
+  * `summary` (string | null) — A summary of the model's reasoning process. Possible values are \`auto\`, \`concise\` and \`detailed\`. Only included for compatibility. The model shall always return \`detailed\`.
+* `safety_identifier` (string | null) — A stable identifier used to help detect users of your application that may be violating xAI's usage policies.
+* `service_tier` (string, required) — Specifies the processing tier used for serving the request.
+* `status` (string, required) — Status of the response. One of \`completed\`, \`in\_progress\` or \`incomplete\`.
+* `store` (boolean, required) — Whether to store the input message(s) and model response for later retrieval.
+* `temperature` (number | null) — What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
+* `text` (object, required)
+  * `format` (object | object | object)
+* `tool_choice` (string | object, required) — Parameter to control how model chooses the tools.
+  * `name` (string, required) — Name of the function to use.
+  * `type` (string, required) — Type is always \`"function"\`.
+* `tools` (array\<object | object | object | object | object | object | object>, required) — A list of tools the model may call in JSON-schema. Currently, only functions and web search are supported as tools. A max of 128 tools are supported.
+* `top_logprobs` (integer, required) — An integer between 0 and 8 specifying the number of most likely tokens to return at each token position.
+* `top_p` (number | null) — An alternative to sampling with \`temperature\`, called nucleus sampling, where the model considers the results of the tokens with \`top\_p\` probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. It is generally recommended to alter this or \`temperature\` but not both.
+* `truncation` (string, required) — The truncation strategy to use for the model response.
+* `usage` (object)
+  * `context_details` (object)
+    * `input_tokens` (integer, required) — Prompt tokens in the latest context (sourced from
+      \`SamplingUsage.context\_prompt\_tokens\`).
+    * `output_tokens` (integer, required) — Completion + reasoning tokens in the latest context (sourced from
+      \`SamplingUsage.context\_output\_tokens\`).
+  * `cost_in_nano_usd` (integer | null) — Cost in nano US dollars for this request.
+  * `cost_in_usd_ticks` (integer | null) — Accurate cost of this request in USD ticks, where "tick" is defined as follows:
+    TICKS\_IN\_USD\_CENT: i64 = 100\_000\_000
+    which means there is 10'000'000'000 ticks in one \*dollar\*.
+  * `input_tokens` (integer, required) — Number of input tokens used.
+  * `input_tokens_details` (object, required)
+    * `cached_tokens` (integer, required) — Token cached by xAI from previous requests and reused for this request.
+  * `num_server_side_tools_used` (integer, required) — Number of server side tools used.
+  * `num_sources_used` (integer, required) — Number of sources used (for live search).
+  * `output_tokens` (integer, required) — Number of output tokens used.
+  * `output_tokens_details` (object, required)
+    * `reasoning_tokens` (integer, required) — Tokens generated by the model for reasoning.
+  * `server_side_tool_usage_details` (object)
+    * `code_interpreter_calls` (integer, required) — Number of code interpreter calls.
+    * `document_search_calls` (integer, required) — Number of document search calls.
+    * `file_search_calls` (integer, required) — Number of file search calls.
+    * `mcp_calls` (integer, required) — Number of MCP calls.
+    * `web_search_calls` (integer, required) — Number of web search calls.
+    * `x_search_calls` (integer, required) — Number of X search calls.
+  * `total_tokens` (integer, required) — Total tokens used.
+* `user` (string | null) — A unique identifier representing your end-user, which can help xAI to monitor and detect abuse.
+\*\*Response example:\*\*
+```json
+{
+  "created_at": 1754475266,
+  "id": "ad5663da-63e6-86c6-e0be-ff15effa8357",
+  "max_output_tokens": null,
+  "model": "latest",
+  "object": "response",
+  "output": [
+    {
+      "content": [
+        {
+          "type": "output_text",
+          "text": "101 multiplied by 3 is 303.",
+          "logprobs": null,
+          "annotations": []
+        }
+      ],
+      "id": "msg_ad5663da-63e6-86c6-e0be-ff15effa8357",
+      "role": "assistant",
+      "type": "message",
+      "status": "completed"
+    },
+    {
+      "id": "",
+      "summary": [
+        {
+          "text": "First, the user asked: \"What is 101*3?\"\n\nThis is a simple multiplication: 101 multiplied by 3.\n\nCalculating: 100 * 3 = 300, and 1 * 3 = 3, so 300 + 3 = 303.\n\nI should respond helpfully and directly, as per my system prompt: \"You are a helpful assistant that can answer questions and help with tasks.\"\n\nKeep the response concise and accurate. No need for extra fluff unless it adds value.\n\nFinal answer: 303.",
+          "type": "summary_text"
+        }
+      ],
+      "type": "reasoning",
+      "status": "completed"
+    }
+  ],
+  "parallel_tool_calls": true,
+  "previous_response_id": null,
+  "reasoning": null,
+  "temperature": null,
+  "text": {
+    "format": {
+      "type": "text"
+    }
+  },
+  "tool_choice": "auto",
+  "tools": [],
+  "top_p": null,
+  "usage": {
+    "prompt_tokens": 32,
+    "completion_tokens": 9,
+    "total_tokens": 151,
+    "prompt_tokens_details": {
+      "text_tokens": 32,
+      "audio_tokens": 0,
+      "image_tokens": 0,
+      "cached_tokens": 8
+    },
+    "completion_tokens_details": {
+      "reasoning_tokens": 110,
+      "audio_tokens": 0,
+      "accepted_prediction_tokens": 0,
+      "rejected_prediction_tokens": 0
+    },
+    "num_sources_used": 0
+  },
+  "user": null,
+  "incomplete_details": null,
+  "status": "completed",
+  "store": true
+}
+```
+***
+## DELETE /v1/responses/\{response\_id}
+Delete a previously generated response.
+### Path Parameters
+* `response_id` (string, required) — The response id returned by a previous create response request.
+### Response Body
+* `deleted` (boolean, required) — Whether the response was successfully deleted.
+* `id` (string, required) — The response\_id to be deleted.
+* `object` (string, required) — The deleted object type, which is always \`response\`.
+\*\*Response example:\*\*
+```json
+{
+  "id": "ad5663da-63e6-86c6-e0be-ff15effa8357",
+  "object": "response",
+  "deleted": true
+}
+```
+***
+## GET /v1/chat/deferred-completion/\{request\_id}
+Tries to fetch a result for a previously-started deferred completion. Returns \`200 Success\` with the response body, if the request has been completed. Returns \`202 Accepted\` when the request is pending processing.
+### Path Parameters
+* `request_id` (string, required) — The deferred request id returned by a previous deferred chat request.
+### Response Body
+* `choices` (array\<object>, required) — A list of response choices from the model. The length corresponds to the \`n\` in request body (default to 1).
+  * `finish_reason` (string | null) — Finish reason. \`"stop"\` means the inference has reached a model-defined or user-supplied stop sequence in \`stop\`. \`"length"\` means the inference result has reached models' maximum allowed token length or user defined value in \`max\_tokens\`. \`"end\_turn"\` or \`null\` in streaming mode when the chunk is not the last.
+  * `index` (integer, required) — Index of the choice within the response choices, starting from 0.
+  * `logprobs` (object)
+    * `content` (array | null) — An array the log probabilities of each output token returned.
+  * `message` (object, required)
+    * `content` (string | null) — The content of the message.
+    * `reasoning_content` (string | null) — The reasoning trace generated by the model.
+    * `refusal` (string | null) — The reason given by model if the model is unable to generate a response. null if model is able to generate.
+    * `role` (string, required) — The role that the message belongs to, the response from model is always \`"assistant"\`.
+    * `tool_calls` (array | null) — A list of tool calls asked by model for user to perform.
+* `citations` (array | null) — List of all the external pages used by the model to answer.
+* `created` (integer, required) — The chat completion creation time in Unix timestamp.
+* `id` (string, required) — A unique ID for the chat response.
+* `model` (string, required) — Model ID used to create chat completion.
+* `object` (string, required) — The object type, which is always \`"chat.completion"\`.
+* `output_files` (array | null) — Files generated during the response (e.g., by the code execution tool).
+  Only populated when \`code\_execution\_files\_output\` is included.
+* `system_fingerprint` (string | null) — System fingerprint, used to indicate xAI system configuration changes.
+* `usage` (object)
+  * `completion_tokens` (integer, required) — Total completion token used.
+  * `completion_tokens_details` (object, required) — Details of completion usage.
+    * `accepted_prediction_tokens` (integer, required) — The number of tokens in the prediction that appeared in the completion.
+    * `audio_tokens` (integer, required) — Audio input tokens generated by the model.
+    * `reasoning_tokens` (integer, required) — Tokens generated by the model for reasoning.
+    * `rejected_prediction_tokens` (integer, required) — The number of tokens in the prediction that did not appear in the completion.
+  * `cost_in_usd_ticks` (integer, required) — Accurate cost of this request in USD ticks, where "tick" is defined as follows:
+    TICKS\_IN\_USD\_CENT: i64 = 100\_000\_000
+    which means there is 10'000'000'000 ticks in one \*dollar\*.
+  * `num_sources_used` (integer, required) — Number of individual live search source used.
+  * `prompt_tokens` (integer, required) — Total prompt token used.
+  * `prompt_tokens_details` (object, required) — Details of prompt usage.
+    * `audio_tokens` (integer, required) — Audio prompt token used.
+    * `cached_tokens` (integer, required) — Token cached by xAI from previous requests and reused for this request.
+    * `image_tokens` (integer, required) — Image prompt token used.
+    * `text_tokens` (integer, required) — Total text prompt token used (cached + non-cached text tokens).
+  * `total_tokens` (integer, required) — Total token used, the sum of prompt token and completion token amount.
+\*\*Response example:\*\*
+```json
+{
+  "id": "335b92e4-afa5-48e7-b99c-b9a4eabc1c8e",
+  "object": "chat.completion",
+  "created": 1743770624,
+  "model": "latest",
+  "choices": [
+    {
+      "index": 0,
+      "message": {
+        "role": "assistant",
+        "content": "101 multiplied by 3 is 303.",
+        "refusal": null
+      },
+      "finish_reason": "stop"
+    }
+  ],
+  "usage": {
+    "prompt_tokens": 31,
+    "completion_tokens": 11,
+    "total_tokens": 42,
+    "prompt_tokens_details": {
+      "text_tokens": 31,
+      "audio_tokens": 0,
+      "image_tokens": 0,
+      "cached_tokens": 0
+    },
+    "completion_tokens_details": {
+      "reasoning_tokens": 0,
+      "audio_tokens": 0,
+      "accepted_prediction_tokens": 0,
+      "rejected_prediction_tokens": 0
+    }
+  },
+  "system_fingerprint": "fp_156d35dcaa"
+}
+```