@j-o-r/hello-dave 0.1.1 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (173) hide show
  1. package/CHANGELOG.md +42 -25
  2. package/README.md +81 -221
  3. package/TODO.md +173 -35
  4. package/agents/agent_creator.js +105 -0
  5. package/agents/agent_creator.prompt.md +371 -0
  6. package/agents/ask_agent.js +64 -127
  7. package/agents/claude_agent.js +68 -0
  8. package/agents/code_agent.js +55 -135
  9. package/agents/code_agent.prompt.md +50 -0
  10. package/agents/echo_agent.js +76 -0
  11. package/agents/financial_expert.js +75 -0
  12. package/agents/gpt_agent.js +52 -103
  13. package/agents/gpt_code.js +81 -0
  14. package/agents/grok_agent.js +58 -114
  15. package/agents/minimax_agent.js +92 -0
  16. package/agents/mureka_agent.js +77 -0
  17. package/agents/planner_agent.js +172 -0
  18. package/agents/stability_agent.js +87 -0
  19. package/agents/test_agent.js +75 -157
  20. package/agents/weather_agent.js +73 -0
  21. package/agents/workflow_agent.js +189 -0
  22. package/bin/dave.js +436 -184
  23. package/docs/bin-dave.md +85 -35
  24. package/docs/cdn-ssh.md +100 -0
  25. package/docs/creating-agents.md +301 -0
  26. package/docs/creating-toolsets.md +336 -0
  27. package/docs/docs-organization.md +48 -0
  28. package/docs/project-overview.md +86 -51
  29. package/lib/API/elevenlabs.io/music.compose.md +441 -0
  30. package/lib/API/elevenlabs.io/music.create-composition-plan.md +370 -0
  31. package/lib/API/elevenlabs.io/music.stream.md +425 -0
  32. package/lib/API/lalal.ai/lalal.js +445 -0
  33. package/lib/API/lalal.ai/openapi.json +2614 -0
  34. package/lib/API/minimax/ImageToolset.js +82 -37
  35. package/lib/API/minimax/MusicToolset.js +125 -79
  36. package/lib/API/minimax/VideoToolset.js +170 -167
  37. package/lib/API/minimax/image.js +5 -1
  38. package/lib/API/minimax/music.js +210 -23
  39. package/lib/API/minimax/video.js +242 -53
  40. package/lib/API/mureka/MusicToolset.js +646 -0
  41. package/lib/API/mureka/README.md +41 -0
  42. package/lib/API/mureka/index.js +7 -0
  43. package/lib/API/mureka/music.js +658 -0
  44. package/lib/API/openai.com/index.js +7 -0
  45. package/lib/API/openai.com/{reponses/text.js → responses.js} +64 -18
  46. package/lib/API/openai.com/video.create.character.md +40 -0
  47. package/lib/API/openai.com/video.create.md +219 -0
  48. package/lib/API/openai.com/video.delete.md +44 -0
  49. package/lib/API/openai.com/video.download.md +31 -0
  50. package/lib/API/openai.com/video.edit.md +155 -0
  51. package/lib/API/openai.com/video.extend.md +166 -0
  52. package/lib/API/openai.com/video.fetch.character.md +43 -0
  53. package/lib/API/openai.com/video.js +784 -0
  54. package/lib/API/openai.com/video.list.md +201 -0
  55. package/lib/API/openai.com/video.remix.md +175 -0
  56. package/lib/API/openai.com/video.retrieve.md +139 -0
  57. package/lib/API/openai.com/videoToolset.js +616 -0
  58. package/lib/API/stability.ai/ImageToolset.js +131 -40
  59. package/lib/API/stability.ai/MusicToolset.js +79 -47
  60. package/lib/API/stability.ai/audio.js +63 -131
  61. package/lib/API/x.ai/chat.responses.md +1040 -0
  62. package/lib/API/x.ai/image.js +229 -59
  63. package/lib/API/x.ai/imageToolset.js +376 -0
  64. package/lib/API/x.ai/index.js +1 -1
  65. package/lib/API/x.ai/responses.js +9 -18
  66. package/lib/Agent.js +271 -0
  67. package/lib/Agent.js.old +284 -0
  68. package/lib/AgentLauncher.js +593 -0
  69. package/lib/Cli.js +87 -13
  70. package/lib/Prompt.js +23 -1
  71. package/lib/Session.js +5 -4
  72. package/lib/ToolSet.js +102 -6
  73. package/lib/agentLoader.js +369 -0
  74. package/lib/cdn.js +67 -231
  75. package/lib/{CdnToolset.js → cdnToolset.js} +47 -64
  76. package/lib/defaultToolsets.js +43 -0
  77. package/lib/fafs.js +1 -1
  78. package/lib/genericToolset.js +442 -119
  79. package/lib/handOffToolset.js +179 -0
  80. package/lib/index.js +34 -27
  81. package/lib/toolsetLoader.js +248 -0
  82. package/package.json +10 -4
  83. package/types/API/lalal.ai/lalal.d.ts +116 -0
  84. package/types/API/minimax/image.d.ts +2 -1
  85. package/types/API/minimax/music.d.ts +189 -26
  86. package/types/API/minimax/video.d.ts +100 -31
  87. package/types/API/mureka/index.d.ts +7 -0
  88. package/types/API/mureka/music.d.ts +472 -0
  89. package/types/API/openai.com/index.d.ts +7 -0
  90. package/types/API/openai.com/{reponses/text.d.ts → responses.d.ts} +11 -11
  91. package/types/API/openai.com/video.d.ts +409 -0
  92. package/types/API/openai.com/videoToolset.d.ts +24 -0
  93. package/types/API/stability.ai/audio.d.ts +14 -103
  94. package/types/API/stability.ai/image.d.ts +2 -2
  95. package/types/API/x.ai/image.d.ts +138 -26
  96. package/types/API/x.ai/imageToolset.d.ts +3 -0
  97. package/types/API/x.ai/index.d.ts +1 -1
  98. package/types/API/x.ai/responses.d.ts +4 -4
  99. package/types/Agent.d.ts +123 -0
  100. package/types/AgentLauncher.d.ts +250 -0
  101. package/types/Cli.d.ts +28 -8
  102. package/types/Prompt.d.ts +23 -5
  103. package/types/Session.d.ts +1 -1
  104. package/types/ToolSet.d.ts +10 -0
  105. package/types/agentLoader.d.ts +78 -0
  106. package/types/cdn.d.ts +15 -90
  107. package/types/defaultToolsets.d.ts +9 -0
  108. package/types/fafs.d.ts +1 -1
  109. package/types/genericToolset.d.ts +1 -1
  110. package/types/handOffToolset.d.ts +28 -0
  111. package/types/index.d.ts +19 -17
  112. package/types/toolsetLoader.d.ts +114 -0
  113. package/utils/format_log.js +101 -23
  114. package/utils/launch_agent.js +18 -0
  115. package/utils/list_sessions.sh +13 -5
  116. package/utils/search_sessions.sh +65 -29
  117. package/utils/toolsets.js +33 -0
  118. package/README.md.bak.1779452127 +0 -240
  119. package/agents/codeserver.sh +0 -47
  120. package/agents/daisy_agent.js +0 -173
  121. package/agents/docs_agent.js +0 -148
  122. package/agents/memory_agent.js +0 -263
  123. package/agents/minimax.js +0 -173
  124. package/agents/npm_agent.js +0 -202
  125. package/agents/prompt_agent.js +0 -133
  126. package/agents/readme_agent.js +0 -148
  127. package/agents/spawn_agent.js +0 -160
  128. package/agents/stability.js +0 -173
  129. package/agents/todo_agent.js +0 -175
  130. package/bin/codeDave +0 -58
  131. package/docs/agent-dave-websocket-protocol.md +0 -180
  132. package/docs/agent-manager.md +0 -244
  133. package/docs/codeserver-pattern.md +0 -191
  134. package/docs/generic-toolset.md +0 -326
  135. package/docs/howtos/agent-networking.md +0 -253
  136. package/docs/howtos/spawn-agents.md.bak +0 -200
  137. package/docs/howtos/spawn-agents.md.bak_new +0 -200
  138. package/docs/multi-agent-clusters.md +0 -265
  139. package/docs/music-toolsets.md +0 -137
  140. package/docs/path-resolution-best-practices.md +0 -104
  141. package/docs/plans/minimax-music-generation.md +0 -80
  142. package/docs/plans/unified-agent-architecture.md +0 -146
  143. package/docs/plans/websocket-streaming-plan.md.bak +0 -317
  144. package/docs/prompt/spawn_agent.md +0 -175
  145. package/docs/prompt/spawn_agent.md.bak +0 -201
  146. package/docs/prompt/task_clarification_and_documentation.md +0 -35
  147. package/docs/prompt-class.md +0 -141
  148. package/docs/todo-archive-infra-2026-04-21.md +0 -15
  149. package/docs/todo-archive-v0.0.8.md +0 -1
  150. package/docs/todo-archive-v0.1.0.md +0 -32
  151. package/docs/todo-archive.md +0 -44
  152. package/docs/tools-syntax-validation.md +0 -121
  153. package/docs/toolset.md +0 -164
  154. package/docs/xai-responses.md +0 -111
  155. package/docs/xai_collections.md +0 -106
  156. package/lib/API/x.ai/ImageToolset.js +0 -165
  157. package/lib/API/x.ai/text.js +0 -415
  158. package/lib/AgentClient.js +0 -248
  159. package/lib/AgentManager.js +0 -245
  160. package/lib/AgentServer.js +0 -404
  161. package/lib/wsCli.js +0 -287
  162. package/lib/wsIO.js +0 -90
  163. package/types/API/x.ai/text.d.ts +0 -286
  164. package/types/AgentClient.d.ts +0 -109
  165. package/types/AgentManager.d.ts +0 -100
  166. package/types/AgentServer.d.ts +0 -89
  167. package/types/wsCli.d.ts +0 -17
  168. package/types/wsIO.d.ts +0 -30
  169. package/utils/test.sh +0 -46
  170. /package/docs/{suggestions.md → _notes/token-counts.md} +0 -0
  171. /package/lib/API/openai.com/{reponses/MESSAGES.md → MESSAGES.md} +0 -0
  172. /package/types/API/{x.ai/ImageToolset.d.ts → mureka/MusicToolset.d.ts} +0 -0
  173. /package/types/{CdnToolset.d.ts → cdnToolset.d.ts} +0 -0
@@ -0,0 +1,1040 @@
1
+ #### Inference API
2
+
3
+ # Chat
4
+
5
+ ## POST /v1/chat/completions
6
+
7
+ Create a chat response from text/image chat prompts. This is the endpoint for making requests to chat and image understanding models.
8
+
9
+ ### Request Body
10
+
11
+ * `deferred` (boolean | null) — If set to \`true\`, the request returns a \`request\_id\`. You can then get the deferred response by GET \`/v1/chat/deferred-completion/\{request\_id}\`.
12
+
13
+ * `frequency_penalty` (number | null) — (Not supported by reasoning models) Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
14
+
15
+ * `logit_bias` (object | null) — (Unsupported) A JSON object that maps tokens (specified by their token ID in the tokenizer) to an associated bias value from -100 to 100. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model, but values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should result in a ban or exclusive selection of the relevant token.
16
+
17
+ * `logprobs` (boolean | null) — Whether to return log probabilities of the output tokens or not. If true, returns the log probabilities of each output token returned in the content of message. Not supported by models \`grok-4.20\` and newer; the field will be silently ignored if set.
18
+
19
+ * `max_completion_tokens` (integer | null) — An upper bound for the number of tokens that can be generated for a completion, only applies to visible output tokens (i.e. does not apply to tokens used for reasoning or function calls). Defaults to None, meaning the model will generate as many tokens as needed up until the model's maximum context length.
20
+
21
+ * `max_tokens` (integer | null) — \\\[DEPRECATED\\] The maximum number of tokens that can be generated in the chat completion. Deprecated in favor of \`max\_completion\_tokens\`.
22
+
23
+ * `messages` (array\<object | object | object | object | object>) — A list of messages that make up the chat conversation. Different models support different message types, such as image and text.
24
+
25
+ * `model` (string) — Model name for the model to use. Obtainable from \<https://console.x.ai/team/default/models> or \<https://docs.x.ai/docs/models>.
26
+
27
+ * `n` (integer | null) — How many chat completion choices to generate for each input message. Note that you will be charged based on the number of generated tokens across all of the choices. Keep n as 1 to minimize costs.
28
+
29
+ * `parallel_tool_calls` (boolean | null) — If set to false, the model can perform maximum one tool call.
30
+
31
+ * `presence_penalty` (number | null) — (Not supported by \`grok-3\` and reasoning models) Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
32
+
33
+ * `reasoning_effort` (string | null) — Constrains how hard a reasoning model thinks before responding. Only supported by \`grok-4.3\`. Possible values are \`none\` (disables reasoning completely), \`low\` (this is the default if not specified), \`medium\` and \`high\` (uses the most reasoning tokens).
34
+
35
+ * `response_format` (object | object | object)
36
+
37
+ * `search_parameters` (object)
38
+
39
+ * `from_date` (string | null) — Date from which to consider the results in ISO-8601 YYYY-MM-DD. See
40
+ \<https://en.wikipedia.org/wiki/ISO\_8601>.
41
+
42
+ * `max_search_results` (integer | null) — Maximum number of search results to use.
43
+
44
+ * `mode` (string | null) — Choose the mode to query realtime data:
45
+ \* \`off\`: no search performed and no external will be considered.
46
+ \* \`on\` (default): the model will search in every sources for relevant data.
47
+ \* \`auto\`: the model choose whether to search data or not and where to search the data.
48
+
49
+ * `return_citations` (boolean | null) — Whether to return citations in the response or not.
50
+
51
+ * `sources` (array | null) — List of sources to search in. If no sources specified, the model will look over the web and X by default.
52
+
53
+ * `to_date` (string | null) — Date up to which to consider the results in ISO-8601 YYYY-MM-DD. See
54
+ \<https://en.wikipedia.org/wiki/ISO\_8601>.
55
+
56
+ * `seed` (integer | null) — If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same \`seed\` and parameters should return the same result. Determinism is not guaranteed, and you should refer to the \`system\_fingerprint\` response parameter to monitor changes in the backend.
57
+
58
+ * `stop` (array | null) — (Not supported by reasoning models) Up to 4 sequences where the API will stop generating further tokens.
59
+
60
+ * `stream` (boolean | null) — If set, partial message deltas will be sent. Tokens will be sent as data-only server-sent events as they become available, with the stream terminated by a \`data: \[DONE]\` message.
61
+
62
+ * `stream_options` (object)
63
+
64
+ * `include_usage` (boolean, required) — Set an additional chunk to be streamed before the \`data: \[DONE]\` message. The other chunks will return \`null\` in \`usage\` field.
65
+
66
+ * `temperature` (number | null) — What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
67
+
68
+ * `tool_choice` (string | object)
69
+
70
+ * `tools` (array | null) — A list of tools the model may call in JSON-schema. Currently, only functions are supported as a tool. Use this to provide a list of functions the model may generate JSON inputs for. A max of 128 functions are supported.
71
+
72
+ * `top_logprobs` (integer | null) — An integer between 0 and 8 specifying the number of most likely tokens to return at each token position, each with an associated log probability. logprobs must be set to true if this parameter is used. Not supported by models \`grok-4.20\` and newer; the field will be silently ignored if set.
73
+
74
+ * `top_p` (number | null) — An alternative to sampling with \`temperature\`, called nucleus sampling, where the model considers the results of the tokens with \`top\_p\` probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. It is generally recommended to alter this or \`temperature\` but not both.
75
+
76
+ * `user` (string | null) — A unique identifier representing your end-user, which can help xAI to monitor and detect abuse.
77
+
78
+ * `web_search_options` (object)
79
+
80
+ * `filters` (object) — Only included for compatibility.
81
+
82
+ * `search_context_size` (string | null) — This field included for compatibility reason with OpenAI's API. It is mapped to \`max\_search\`.
83
+
84
+ * `user_location` (object) — Only included for compatibility.
85
+
86
+ ### Response Body
87
+
88
+ * `choices` (array\<object>, required) — A list of response choices from the model. The length corresponds to the \`n\` in request body (default to 1).
89
+
90
+ * `finish_reason` (string | null) — Finish reason. \`"stop"\` means the inference has reached a model-defined or user-supplied stop sequence in \`stop\`. \`"length"\` means the inference result has reached models' maximum allowed token length or user defined value in \`max\_tokens\`. \`"end\_turn"\` or \`null\` in streaming mode when the chunk is not the last.
91
+
92
+ * `index` (integer, required) — Index of the choice within the response choices, starting from 0.
93
+
94
+ * `logprobs` (object)
95
+
96
+ * `content` (array | null) — An array the log probabilities of each output token returned.
97
+
98
+ * `message` (object, required)
99
+
100
+ * `content` (string | null) — The content of the message.
101
+
102
+ * `reasoning_content` (string | null) — The reasoning trace generated by the model.
103
+
104
+ * `refusal` (string | null) — The reason given by model if the model is unable to generate a response. null if model is able to generate.
105
+
106
+ * `role` (string, required) — The role that the message belongs to, the response from model is always \`"assistant"\`.
107
+
108
+ * `tool_calls` (array | null) — A list of tool calls asked by model for user to perform.
109
+
110
+ * `citations` (array | null) — List of all the external pages used by the model to answer.
111
+
112
+ * `created` (integer, required) — The chat completion creation time in Unix timestamp.
113
+
114
+ * `id` (string, required) — A unique ID for the chat response.
115
+
116
+ * `model` (string, required) — Model ID used to create chat completion.
117
+
118
+ * `object` (string, required) — The object type, which is always \`"chat.completion"\`.
119
+
120
+ * `output_files` (array | null) — Files generated during the response (e.g., by the code execution tool).
121
+ Only populated when \`code\_execution\_files\_output\` is included.
122
+
123
+ * `system_fingerprint` (string | null) — System fingerprint, used to indicate xAI system configuration changes.
124
+
125
+ * `usage` (object)
126
+
127
+ * `completion_tokens` (integer, required) — Total completion token used.
128
+
129
+ * `completion_tokens_details` (object, required) — Details of completion usage.
130
+
131
+ * `accepted_prediction_tokens` (integer, required) — The number of tokens in the prediction that appeared in the completion.
132
+
133
+ * `audio_tokens` (integer, required) — Audio input tokens generated by the model.
134
+
135
+ * `reasoning_tokens` (integer, required) — Tokens generated by the model for reasoning.
136
+
137
+ * `rejected_prediction_tokens` (integer, required) — The number of tokens in the prediction that did not appear in the completion.
138
+
139
+ * `cost_in_usd_ticks` (integer, required) — Accurate cost of this request in USD ticks, where "tick" is defined as follows:
140
+ TICKS\_IN\_USD\_CENT: i64 = 100\_000\_000
141
+ which means there is 10'000'000'000 ticks in one \*dollar\*.
142
+
143
+ * `num_sources_used` (integer, required) — Number of individual live search source used.
144
+
145
+ * `prompt_tokens` (integer, required) — Total prompt token used.
146
+
147
+ * `prompt_tokens_details` (object, required) — Details of prompt usage.
148
+
149
+ * `audio_tokens` (integer, required) — Audio prompt token used.
150
+
151
+ * `cached_tokens` (integer, required) — Token cached by xAI from previous requests and reused for this request.
152
+
153
+ * `image_tokens` (integer, required) — Image prompt token used.
154
+
155
+ * `text_tokens` (integer, required) — Total text prompt token used (cached + non-cached text tokens).
156
+
157
+ * `total_tokens` (integer, required) — Total token used, the sum of prompt token and completion token amount.
158
+
159
+ \*\*Request example:\*\*
160
+
161
+ ```json
162
+ {
163
+ "messages": [
164
+ {
165
+ "role": "system",
166
+ "content": "You are a helpful assistant that can answer questions and help with tasks."
167
+ },
168
+ {
169
+ "role": "user",
170
+ "content": "What is 101*3?"
171
+ }
172
+ ],
173
+ "model": "latest"
174
+ }
175
+ ```
176
+
177
+ \*\*Response example:\*\*
178
+
179
+ ```json
180
+ {
181
+ "id": "a3d1008e-4544-40d4-d075-11527e794e4a",
182
+ "object": "chat.completion",
183
+ "created": 1752854522,
184
+ "model": "latest",
185
+ "choices": [
186
+ {
187
+ "index": 0,
188
+ "message": {
189
+ "role": "assistant",
190
+ "content": "101 multiplied by 3 is 303.",
191
+ "refusal": null
192
+ },
193
+ "finish_reason": "stop"
194
+ }
195
+ ],
196
+ "usage": {
197
+ "prompt_tokens": 32,
198
+ "completion_tokens": 9,
199
+ "total_tokens": 135,
200
+ "prompt_tokens_details": {
201
+ "text_tokens": 32,
202
+ "audio_tokens": 0,
203
+ "image_tokens": 0,
204
+ "cached_tokens": 6
205
+ },
206
+ "completion_tokens_details": {
207
+ "reasoning_tokens": 94,
208
+ "audio_tokens": 0,
209
+ "accepted_prediction_tokens": 0,
210
+ "rejected_prediction_tokens": 0
211
+ },
212
+ "num_sources_used": 0
213
+ },
214
+ "system_fingerprint": "fp_3a7881249c"
215
+ }
216
+ ```
217
+
218
+ ***
219
+
220
+ ## POST /v1/responses
221
+
222
+ Generates a response based on text or image prompts. The response ID can be used to retrieve the response later or to continue the conversation without repeating prior context. New responses will be stored for 30 days and then permanently deleted.
223
+
224
+ ### Request Body
225
+
226
+ * `background` (boolean | null) — (Unsupported) Whether to process the response asynchronously in the background.
227
+
228
+ * `context_management` (array | null) — Optional context-management directives (e.g. compaction). Parsed but not yet executed.
229
+
230
+ * `include` (array | null) — What additional output data to include in the response. Currently the only supported value is \`reasoning.encrypted\_content\` which returns an encrypted version of the reasoning tokens.
231
+
232
+ * `input` (string | array\<object | object | object | object | object>, required) — Content of the input passed to a \`/v1/response\` request.
233
+
234
+ * `instructions` (string | null) — An alternate way to specify the system prompt. Note that this cannot be used alongside \`previous\_response\_id\`, where the system prompt of the previous message will be used.
235
+
236
+ * `logprobs` (boolean | null) — Whether to return log probabilities of the output tokens or not. If true, returns the log probabilities of each output token returned in the content of message. Not supported by models \`grok-4.20\` and newer; the field will be silently ignored if set.
237
+
238
+ * `max_output_tokens` (integer | null) — Max number of tokens that can be generated in a response. This includes both output and reasoning tokens.
239
+
240
+ * `max_turns` (integer | null) — Maximum number of agentic tool calling turns allowed for this request.
241
+ If not set, defaults to the server's global cap.
242
+ This parameter will be ignored for any non-agentic requests.
243
+
244
+ * `metadata` (object) — Not supported. Only maintained for compatibility reasons.
245
+
246
+ * `model` (string) — Model name for the model to use. Obtainable from \<https://console.x.ai/team/default/models> or \<https://docs.x.ai/docs/models>.
247
+
248
+ * `parallel_tool_calls` (boolean | null) — Whether to allow the model to run parallel tool calls.
249
+
250
+ * `previous_response_id` (string | null) — The ID of the previous response from the model.
251
+
252
+ * `prompt_cache_key` (string | null) — Plumbed to x-grok-conv-id for Open Responses compatibility, used for routing.
253
+
254
+ * `reasoning` (object)
255
+
256
+ * `effort` (string | null) — Constrains how hard a reasoning model thinks before responding. Only supported by \`grok-4.3\`. Possible values are \`none\` (disables reasoning completely), \`low\` (this is the default if not specified), \`medium\` and \`high\` (uses the most reasoning tokens).
257
+
258
+ * `generate_summary` (string | null) — Only included for compatibility.
259
+
260
+ * `summary` (string | null) — A summary of the model's reasoning process. Possible values are \`auto\`, \`concise\` and \`detailed\`. Only included for compatibility. The model shall always return \`detailed\`.
261
+
262
+ * `search_parameters` (object)
263
+
264
+ * `from_date` (string | null) — Date from which to consider the results in ISO-8601 YYYY-MM-DD. See
265
+ \<https://en.wikipedia.org/wiki/ISO\_8601>.
266
+
267
+ * `max_search_results` (integer | null) — Maximum number of search results to use.
268
+
269
+ * `mode` (string | null) — Choose the mode to query realtime data:
270
+ \* \`off\`: no search performed and no external will be considered.
271
+ \* \`on\` (default): the model will search in every sources for relevant data.
272
+ \* \`auto\`: the model choose whether to search data or not and where to search the data.
273
+
274
+ * `return_citations` (boolean | null) — Whether to return citations in the response or not.
275
+
276
+ * `sources` (array | null) — List of sources to search in. If no sources specified, the model will look over the web and X by default.
277
+
278
+ * `to_date` (string | null) — Date up to which to consider the results in ISO-8601 YYYY-MM-DD. See
279
+ \<https://en.wikipedia.org/wiki/ISO\_8601>.
280
+
281
+ * `service_tier` (string | null) — Not supported. Only maintained for compatibility reasons.
282
+
283
+ * `store` (boolean | null) — Whether to store the input message(s) and model response for later retrieval.
284
+
285
+ * `stream` (boolean | null) — If set, partial message deltas will be sent. Tokens will be sent as data-only server-sent events as they become available, with the stream terminated by a \`data: \[DONE]\` message.
286
+
287
+ * `temperature` (number | null) — What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
288
+
289
+ * `text` (object)
290
+
291
+ * `format` (object | object | object)
292
+
293
+ * `tool_choice` (string | object)
294
+
295
+ * `tools` (array | null) — A list of tools the model may call in JSON-schema. Currently, only functions and web search are supported as tools. A max of 128 tools are supported.\`web\_search\_preview\` tool, if specified, will be overridden by \`search\_parameters\`.
296
+
297
+ * `top_logprobs` (integer | null) — An integer between 0 and 8 specifying the number of most likely tokens to return at each token position, each with an associated log probability. logprobs must be set to true if this parameter is used. Not supported by models \`grok-4.20\` and newer; the field will be silently ignored if set.
298
+
299
+ * `top_p` (number | null) — An alternative to sampling with \`temperature\`, called nucleus sampling, where the model considers the results of the tokens with \`top\_p\` probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. It is generally recommended to alter this or \`temperature\` but not both.
300
+
301
+ * `truncation` (string | null) — Not supported. Only maintained for compatibility reasons.
302
+
303
+ * `user` (string | null) — A unique identifier representing your end-user, which can help xAI to monitor and detect abuse.
304
+
305
+ ### Response Body
306
+
307
+ * `background` (boolean, required) — OpenResponses compatibility fields.
308
+ Not used at the moment. Just for OpenResponses compatibility.
309
+ Whether to process the response asynchronously in the background.
310
+
311
+ * `completed_at` (integer | null) — The Unix timestamp (in seconds) for the response completion time. Only set when the response is completed.
312
+
313
+ * `created_at` (integer, required) — The Unix timestamp (in seconds) for the response creation time.
314
+
315
+ * `error` (object) — An error object returned when the model fails to generate a response.
316
+
317
+ * `frequency_penalty` (number, required) — (NOT SUPPORTED in Responses API) Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
318
+
319
+ * `id` (string, required) — Unique ID of the response.
320
+
321
+ * `incomplete_details` (object | object | object)
322
+
323
+ * `instructions` (string | null) — A system (or developer) message inserted into the model's context.
324
+
325
+ * `max_output_tokens` (integer | null) — Max number of tokens that can be generated in a response. This includes both output and reasoning tokens.
326
+
327
+ * `max_tool_calls` (integer | null) — The maximum number of tool calls allowed for this response.
328
+
329
+ * `metadata` (object, required) — Only included for compatibility.
330
+
331
+ * `model` (string, required) — Model name used to generate the response.
332
+
333
+ * `object` (string, required) — The object type of this resource. Always set to \`response\`.
334
+
335
+ * `output` (array\<object | object | object | object | object | object | object | object | object>, required) — The response generated by the model.
336
+
337
+ * `parallel_tool_calls` (boolean, required) — Whether to allow the model to run parallel tool calls.
338
+
339
+ * `presence_penalty` (number, required) — (NOT SUPPORTED in Responses API) Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
340
+
341
+ * `previous_response_id` (string | null) — The ID of the previous response from the model.
342
+
343
+ * `prompt_cache_key` (string | null) — The cache key used for the prompt for routing to the correct engine.
344
+
345
+ * `reasoning` (object)
346
+
347
+ * `effort` (string | null) — Constrains how hard a reasoning model thinks before responding. Only supported by \`grok-4.3\`. Possible values are \`none\` (disables reasoning completely), \`low\` (this is the default if not specified), \`medium\` and \`high\` (uses the most reasoning tokens).
348
+
349
+ * `generate_summary` (string | null) — Only included for compatibility.
350
+
351
+ * `summary` (string | null) — A summary of the model's reasoning process. Possible values are \`auto\`, \`concise\` and \`detailed\`. Only included for compatibility. The model shall always return \`detailed\`.
352
+
353
+ * `safety_identifier` (string | null) — A stable identifier used to help detect users of your application that may be violating xAI's usage policies.
354
+
355
+ * `service_tier` (string, required) — Specifies the processing tier used for serving the request.
356
+
357
+ * `status` (string, required) — Status of the response. One of \`completed\`, \`in\_progress\` or \`incomplete\`.
358
+
359
+ * `store` (boolean, required) — Whether to store the input message(s) and model response for later retrieval.
360
+
361
+ * `temperature` (number | null) — What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
362
+
363
+ * `text` (object, required)
364
+
365
+ * `format` (object | object | object)
366
+
367
+ * `tool_choice` (string | object, required) — Parameter to control how model chooses the tools.
368
+
369
+ * `name` (string, required) — Name of the function to use.
370
+
371
+ * `type` (string, required) — Type is always \`"function"\`.
372
+
373
+ * `tools` (array\<object | object | object | object | object | object | object>, required) — A list of tools the model may call in JSON-schema. Currently, only functions and web search are supported as tools. A max of 128 tools are supported.
374
+
375
+ * `top_logprobs` (integer, required) — An integer between 0 and 8 specifying the number of most likely tokens to return at each token position.
376
+
377
+ * `top_p` (number | null) — An alternative to sampling with \`temperature\`, called nucleus sampling, where the model considers the results of the tokens with \`top\_p\` probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. It is generally recommended to alter this or \`temperature\` but not both.
378
+
379
+ * `truncation` (string, required) — The truncation strategy to use for the model response.
380
+
381
+ * `usage` (object)
382
+
383
+ * `context_details` (object)
384
+
385
+ * `input_tokens` (integer, required) — Prompt tokens in the latest context (sourced from
386
+ \`SamplingUsage.context\_prompt\_tokens\`).
387
+
388
+ * `output_tokens` (integer, required) — Completion + reasoning tokens in the latest context (sourced from
389
+ \`SamplingUsage.context\_output\_tokens\`).
390
+
391
+ * `cost_in_nano_usd` (integer | null) — Cost in nano US dollars for this request.
392
+
393
+ * `cost_in_usd_ticks` (integer | null) — Accurate cost of this request in USD ticks, where "tick" is defined as follows:
394
+ TICKS\_IN\_USD\_CENT: i64 = 100\_000\_000
395
+ which means there is 10'000'000'000 ticks in one \*dollar\*.
396
+
397
+ * `input_tokens` (integer, required) — Number of input tokens used.
398
+
399
+ * `input_tokens_details` (object, required)
400
+
401
+ * `cached_tokens` (integer, required) — Token cached by xAI from previous requests and reused for this request.
402
+
403
+ * `num_server_side_tools_used` (integer, required) — Number of server side tools used.
404
+
405
+ * `num_sources_used` (integer, required) — Number of sources used (for live search).
406
+
407
+ * `output_tokens` (integer, required) — Number of output tokens used.
408
+
409
+ * `output_tokens_details` (object, required)
410
+
411
+ * `reasoning_tokens` (integer, required) — Tokens generated by the model for reasoning.
412
+
413
+ * `server_side_tool_usage_details` (object)
414
+
415
+ * `code_interpreter_calls` (integer, required) — Number of code interpreter calls.
416
+
417
+ * `document_search_calls` (integer, required) — Number of document search calls.
418
+
419
+ * `file_search_calls` (integer, required) — Number of file search calls.
420
+
421
+ * `mcp_calls` (integer, required) — Number of MCP calls.
422
+
423
+ * `web_search_calls` (integer, required) — Number of web search calls.
424
+
425
+ * `x_search_calls` (integer, required) — Number of X search calls.
426
+
427
+ * `total_tokens` (integer, required) — Total tokens used.
428
+
429
+ * `user` (string | null) — A unique identifier representing your end-user, which can help xAI to monitor and detect abuse.
430
+
431
+ ### Code Examples
432
+
433
+ ```bash
434
+ curl -s https://api.x.ai/v1/responses \
435
+ -H "Content-Type: application/json" \
436
+ -H "Authorization: Bearer $XAI_API_KEY" \
437
+ -d '{
438
+ "model": "grok-4.3",
439
+ "input": "What is the meaning of life?"
440
+ }'
441
+ ```
442
+
443
+ ```javascriptAISDK
444
+ import { xai } from "@ai-sdk/xai";
445
+ import { generateText } from "ai";
446
+
447
+ const result = await generateText({
448
+ model: xai.responses("grok-4.3"),
449
+ prompt: "What is the meaning of life?",
450
+ });
451
+
452
+ console.log(JSON.stringify(result, null, 2));
453
+ ```
454
+
455
+ ```pythonOpenAISDK
456
+ import os
457
+
458
+ from openai import OpenAI
459
+
460
+ client = OpenAI(
461
+ api_key=os.environ["XAI_API_KEY"],
462
+ base_url="https://api.x.ai/v1",
463
+ )
464
+
465
+ response = client.responses.create(
466
+ model="grok-4.3",
467
+ input="What is the meaning of life?",
468
+ )
469
+
470
+ print(response.model_dump_json(indent=2))
471
+ ```
472
+
473
+ ```javascriptOpenAISDK
474
+ import OpenAI from "openai";
475
+
476
+ const client = new OpenAI({
477
+ apiKey: process.env.XAI_API_KEY,
478
+ baseURL: "https://api.x.ai/v1",
479
+ });
480
+
481
+ const response = await client.responses.create({
482
+ model: "grok-4.3",
483
+ input: "What is the meaning of life?",
484
+ });
485
+
486
+ console.log(JSON.stringify(response, null, 2));
487
+ ```
488
+
489
+ \*\*Response example:\*\*
490
+
491
+ ```json
492
+ {
493
+ "created_at": 1754475266,
494
+ "id": "ad5663da-63e6-86c6-e0be-ff15effa8357",
495
+ "max_output_tokens": null,
496
+ "model": "latest",
497
+ "object": "response",
498
+ "output": [
499
+ {
500
+ "content": [
501
+ {
502
+ "type": "output_text",
503
+ "text": "101 multiplied by 3 is 303.",
504
+ "logprobs": null,
505
+ "annotations": []
506
+ }
507
+ ],
508
+ "id": "msg_ad5663da-63e6-86c6-e0be-ff15effa8357",
509
+ "role": "assistant",
510
+ "type": "message",
511
+ "status": "completed"
512
+ }
513
+ ],
514
+ "parallel_tool_calls": true,
515
+ "previous_response_id": null,
516
+ "reasoning": null,
517
+ "temperature": null,
518
+ "text": {
519
+ "format": {
520
+ "type": "text"
521
+ }
522
+ },
523
+ "tool_choice": "auto",
524
+ "tools": [],
525
+ "top_p": null,
526
+ "usage": {
527
+ "input_tokens": 32,
528
+ "input_tokens_details": {
529
+ "cached_tokens": 8
530
+ },
531
+ "output_tokens": 9,
532
+ "output_tokens_details": {
533
+ "reasoning_tokens": 110
534
+ },
535
+ "total_tokens": 151,
536
+ "num_sources_used": 0,
537
+ "num_server_side_tools_used": 0
538
+ },
539
+ "user": null,
540
+ "incomplete_details": null,
541
+ "status": "completed",
542
+ "store": true
543
+ }
544
+ ```
545
+
546
+ ***
547
+
548
+ ## POST /v1/responses/compact
549
+
550
+ Compacts a full Responses API input window into a shorter canonical window.
551
+
552
+ ### Request Body
553
+
554
+ * `input` (string | array\<object | object | object | object | object>, required) — Content of the input passed to a \`/v1/response\` request.
555
+
556
+ * `model` (string, required) — Model to use for compaction summarization (required).
557
+
558
+ ### Response Body
559
+
560
+ * `created_at` (integer, required) — Unix timestamp (in seconds) when the compacted conversation was created.
561
+
562
+ * `id` (string, required) — Unique ID for this compaction (e.g. \`cmp\_\<uuid>\`).
563
+
564
+ * `model` (string, required) — Model used for the compaction summary.
565
+
566
+ * `object` (string, required) — Always \`"response.compaction"\`.
567
+
568
+ * `output` (array\<object | object | object | object | object>, required) — Compacted output containing a single compaction item.
569
+ Pass this verbatim as input to the next \`/v1/responses\` call.
570
+
571
+ * `usage` (object)
572
+
573
+ * `dropped_message_count` (integer, required) — Number of messages dropped/summarized during compaction.
574
+
575
+ * `input_tokens` (integer, required) — Number of tokens in the original (pre-compaction) input.
576
+
577
+ * `input_tokens_details` (object, required) — Breakdown of input tokens for a compaction call.
578
+
579
+ * `cached_tokens` (integer, required) — Number of input tokens that were served from the prompt cache.
580
+
581
+ * `output_tokens` (integer, required) — Number of tokens in the compacted output.
582
+
583
+ * `output_tokens_details` (object, required) — Breakdown of output tokens for a compaction call.
584
+
585
+ * `reasoning_tokens` (integer, required) — Number of reasoning tokens generated during compaction.
586
+
587
+ * `total_tokens` (integer, required) — Total number of tokens used (input + output, including reasoning).
588
+
589
+ ### Code Examples
590
+
591
+ ```bash
592
+ curl -s https://api.x.ai/v1/responses/compact \
593
+ -H "Content-Type: application/json" \
594
+ -H "Authorization: Bearer $XAI_API_KEY" \
595
+ -d '{
596
+ "model": "grok-4.3",
597
+ "input": [
598
+ {"role": "system", "content": "You are a concise and knowledgeable science tutor."},
599
+ {"role": "user", "content": "What is the Higgs boson and why is it important?"},
600
+ {"role": "assistant", "content": "The Higgs boson is an elementary particle in the Standard Model, predicted by Peter Higgs in 1964 and confirmed at CERN in 2012. It is the quantum excitation of the Higgs field, which gives mass to fundamental particles via the Higgs mechanism."},
601
+ {"role": "user", "content": "How does the Higgs mechanism actually work?"},
602
+ {"role": "assistant", "content": "Through spontaneous symmetry breaking. The Higgs field has a nonzero vacuum value, and particles acquire mass in proportion to how strongly they couple to it. Photons do not couple, which is why they remain massless."}
603
+ ]
604
+ }'
605
+ ```
606
+
607
+ ```pythonOpenAISDK
608
+ import os
609
+
610
+ from openai import OpenAI
611
+
612
+ client = OpenAI(
613
+ api_key=os.environ["XAI_API_KEY"],
614
+ base_url="https://api.x.ai/v1",
615
+ )
616
+
617
+ compacted = client.responses.compact(
618
+ model="grok-4.3",
619
+ input=[
620
+ {"role": "system", "content": "You are a concise and knowledgeable science tutor."},
621
+ {"role": "user", "content": "What is the Higgs boson and why is it important?"},
622
+ {
623
+ "role": "assistant",
624
+ "content": (
625
+ "The Higgs boson is an elementary particle in the Standard Model, predicted by "
626
+ "Peter Higgs in 1964 and confirmed at CERN in 2012. It is the quantum excitation "
627
+ "of the Higgs field, which gives mass to fundamental particles via the Higgs mechanism."
628
+ ),
629
+ },
630
+ {"role": "user", "content": "How does the Higgs mechanism actually work?"},
631
+ {
632
+ "role": "assistant",
633
+ "content": (
634
+ "Through spontaneous symmetry breaking. The Higgs field has a nonzero vacuum value, "
635
+ "and particles acquire mass in proportion to how strongly they couple to it. Photons "
636
+ "do not couple, which is why they remain massless."
637
+ ),
638
+ },
639
+ ],
640
+ )
641
+
642
+ print(compacted.model_dump_json(indent=2))
643
+ ```
644
+
645
+ ```javascriptOpenAISDK
646
+ import OpenAI from "openai";
647
+
648
+ const client = new OpenAI({
649
+ apiKey: process.env.XAI_API_KEY,
650
+ baseURL: "https://api.x.ai/v1",
651
+ });
652
+
653
+ const compacted = await client.responses.compact({
654
+ model: "grok-4.3",
655
+ input: [
656
+ { role: "system", content: "You are a concise and knowledgeable science tutor." },
657
+ { role: "user", content: "What is the Higgs boson and why is it important?" },
658
+ {
659
+ role: "assistant",
660
+ content:
661
+ "The Higgs boson is an elementary particle in the Standard Model, predicted by Peter Higgs in 1964 and confirmed at CERN in 2012. It is the quantum excitation of the Higgs field, which gives mass to fundamental particles via the Higgs mechanism.",
662
+ },
663
+ { role: "user", content: "How does the Higgs mechanism actually work?" },
664
+ {
665
+ role: "assistant",
666
+ content:
667
+ "Through spontaneous symmetry breaking. The Higgs field has a nonzero vacuum value, and particles acquire mass in proportion to how strongly they couple to it. Photons do not couple, which is why they remain massless.",
668
+ },
669
+ ],
670
+ });
671
+
672
+ console.log(JSON.stringify(compacted, null, 2));
673
+ ```
674
+
675
+ \*\*Response example:\*\*
676
+
677
+ ```json
678
+ {}
679
+ ```
680
+
681
+ ***
682
+
683
+ ## GET /v1/responses/\{response\_id}
684
+
685
+ Retrieve a previously generated response.
686
+
687
+ ### Path Parameters
688
+
689
+ * `response_id` (string, required) — The response id returned by a previous create response request.
690
+
691
+ ### Response Body
692
+
693
+ * `background` (boolean, required) — OpenResponses compatibility fields.
694
+ Not used at the moment. Just for OpenResponses compatibility.
695
+ Whether to process the response asynchronously in the background.
696
+
697
+ * `completed_at` (integer | null) — The Unix timestamp (in seconds) for the response completion time. Only set when the response is completed.
698
+
699
+ * `created_at` (integer, required) — The Unix timestamp (in seconds) for the response creation time.
700
+
701
+ * `error` (object) — An error object returned when the model fails to generate a response.
702
+
703
+ * `frequency_penalty` (number, required) — (NOT SUPPORTED in Responses API) Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
704
+
705
+ * `id` (string, required) — Unique ID of the response.
706
+
707
+ * `incomplete_details` (object | object | object)
708
+
709
+ * `instructions` (string | null) — A system (or developer) message inserted into the model's context.
710
+
711
+ * `max_output_tokens` (integer | null) — Max number of tokens that can be generated in a response. This includes both output and reasoning tokens.
712
+
713
+ * `max_tool_calls` (integer | null) — The maximum number of tool calls allowed for this response.
714
+
715
+ * `metadata` (object, required) — Only included for compatibility.
716
+
717
+ * `model` (string, required) — Model name used to generate the response.
718
+
719
+ * `object` (string, required) — The object type of this resource. Always set to \`response\`.
720
+
721
+ * `output` (array\<object | object | object | object | object | object | object | object | object>, required) — The response generated by the model.
722
+
723
+ * `parallel_tool_calls` (boolean, required) — Whether to allow the model to run parallel tool calls.
724
+
725
+ * `presence_penalty` (number, required) — (NOT SUPPORTED in Responses API) Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
726
+
727
+ * `previous_response_id` (string | null) — The ID of the previous response from the model.
728
+
729
+ * `prompt_cache_key` (string | null) — The cache key used for the prompt for routing to the correct engine.
730
+
731
+ * `reasoning` (object)
732
+
733
+ * `effort` (string | null) — Constrains how hard a reasoning model thinks before responding. Only supported by \`grok-4.3\`. Possible values are \`none\` (disables reasoning completely), \`low\` (this is the default if not specified), \`medium\` and \`high\` (uses the most reasoning tokens).
734
+
735
+ * `generate_summary` (string | null) — Only included for compatibility.
736
+
737
+ * `summary` (string | null) — A summary of the model's reasoning process. Possible values are \`auto\`, \`concise\` and \`detailed\`. Only included for compatibility. The model shall always return \`detailed\`.
738
+
739
+ * `safety_identifier` (string | null) — A stable identifier used to help detect users of your application that may be violating xAI's usage policies.
740
+
741
+ * `service_tier` (string, required) — Specifies the processing tier used for serving the request.
742
+
743
+ * `status` (string, required) — Status of the response. One of \`completed\`, \`in\_progress\` or \`incomplete\`.
744
+
745
+ * `store` (boolean, required) — Whether to store the input message(s) and model response for later retrieval.
746
+
747
+ * `temperature` (number | null) — What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
748
+
749
+ * `text` (object, required)
750
+
751
+ * `format` (object | object | object)
752
+
753
+ * `tool_choice` (string | object, required) — Parameter to control how model chooses the tools.
754
+
755
+ * `name` (string, required) — Name of the function to use.
756
+
757
+ * `type` (string, required) — Type is always \`"function"\`.
758
+
759
+ * `tools` (array\<object | object | object | object | object | object | object>, required) — A list of tools the model may call in JSON-schema. Currently, only functions and web search are supported as tools. A max of 128 tools are supported.
760
+
761
+ * `top_logprobs` (integer, required) — An integer between 0 and 8 specifying the number of most likely tokens to return at each token position.
762
+
763
+ * `top_p` (number | null) — An alternative to sampling with \`temperature\`, called nucleus sampling, where the model considers the results of the tokens with \`top\_p\` probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. It is generally recommended to alter this or \`temperature\` but not both.
764
+
765
+ * `truncation` (string, required) — The truncation strategy to use for the model response.
766
+
767
+ * `usage` (object)
768
+
769
+ * `context_details` (object)
770
+
771
+ * `input_tokens` (integer, required) — Prompt tokens in the latest context (sourced from
772
+ \`SamplingUsage.context\_prompt\_tokens\`).
773
+
774
+ * `output_tokens` (integer, required) — Completion + reasoning tokens in the latest context (sourced from
775
+ \`SamplingUsage.context\_output\_tokens\`).
776
+
777
+ * `cost_in_nano_usd` (integer | null) — Cost in nano US dollars for this request.
778
+
779
+ * `cost_in_usd_ticks` (integer | null) — Accurate cost of this request in USD ticks, where "tick" is defined as follows:
780
+ TICKS\_IN\_USD\_CENT: i64 = 100\_000\_000
781
+ which means there is 10'000'000'000 ticks in one \*dollar\*.
782
+
783
+ * `input_tokens` (integer, required) — Number of input tokens used.
784
+
785
+ * `input_tokens_details` (object, required)
786
+
787
+ * `cached_tokens` (integer, required) — Token cached by xAI from previous requests and reused for this request.
788
+
789
+ * `num_server_side_tools_used` (integer, required) — Number of server side tools used.
790
+
791
+ * `num_sources_used` (integer, required) — Number of sources used (for live search).
792
+
793
+ * `output_tokens` (integer, required) — Number of output tokens used.
794
+
795
+ * `output_tokens_details` (object, required)
796
+
797
+ * `reasoning_tokens` (integer, required) — Tokens generated by the model for reasoning.
798
+
799
+ * `server_side_tool_usage_details` (object)
800
+
801
+ * `code_interpreter_calls` (integer, required) — Number of code interpreter calls.
802
+
803
+ * `document_search_calls` (integer, required) — Number of document search calls.
804
+
805
+ * `file_search_calls` (integer, required) — Number of file search calls.
806
+
807
+ * `mcp_calls` (integer, required) — Number of MCP calls.
808
+
809
+ * `web_search_calls` (integer, required) — Number of web search calls.
810
+
811
+ * `x_search_calls` (integer, required) — Number of X search calls.
812
+
813
+ * `total_tokens` (integer, required) — Total tokens used.
814
+
815
+ * `user` (string | null) — A unique identifier representing your end-user, which can help xAI to monitor and detect abuse.
816
+
817
+ \*\*Response example:\*\*
818
+
819
+ ```json
820
+ {
821
+ "created_at": 1754475266,
822
+ "id": "ad5663da-63e6-86c6-e0be-ff15effa8357",
823
+ "max_output_tokens": null,
824
+ "model": "latest",
825
+ "object": "response",
826
+ "output": [
827
+ {
828
+ "content": [
829
+ {
830
+ "type": "output_text",
831
+ "text": "101 multiplied by 3 is 303.",
832
+ "logprobs": null,
833
+ "annotations": []
834
+ }
835
+ ],
836
+ "id": "msg_ad5663da-63e6-86c6-e0be-ff15effa8357",
837
+ "role": "assistant",
838
+ "type": "message",
839
+ "status": "completed"
840
+ },
841
+ {
842
+ "id": "",
843
+ "summary": [
844
+ {
845
+ "text": "First, the user asked: \"What is 101*3?\"\n\nThis is a simple multiplication: 101 multiplied by 3.\n\nCalculating: 100 * 3 = 300, and 1 * 3 = 3, so 300 + 3 = 303.\n\nI should respond helpfully and directly, as per my system prompt: \"You are a helpful assistant that can answer questions and help with tasks.\"\n\nKeep the response concise and accurate. No need for extra fluff unless it adds value.\n\nFinal answer: 303.",
846
+ "type": "summary_text"
847
+ }
848
+ ],
849
+ "type": "reasoning",
850
+ "status": "completed"
851
+ }
852
+ ],
853
+ "parallel_tool_calls": true,
854
+ "previous_response_id": null,
855
+ "reasoning": null,
856
+ "temperature": null,
857
+ "text": {
858
+ "format": {
859
+ "type": "text"
860
+ }
861
+ },
862
+ "tool_choice": "auto",
863
+ "tools": [],
864
+ "top_p": null,
865
+ "usage": {
866
+ "prompt_tokens": 32,
867
+ "completion_tokens": 9,
868
+ "total_tokens": 151,
869
+ "prompt_tokens_details": {
870
+ "text_tokens": 32,
871
+ "audio_tokens": 0,
872
+ "image_tokens": 0,
873
+ "cached_tokens": 8
874
+ },
875
+ "completion_tokens_details": {
876
+ "reasoning_tokens": 110,
877
+ "audio_tokens": 0,
878
+ "accepted_prediction_tokens": 0,
879
+ "rejected_prediction_tokens": 0
880
+ },
881
+ "num_sources_used": 0
882
+ },
883
+ "user": null,
884
+ "incomplete_details": null,
885
+ "status": "completed",
886
+ "store": true
887
+ }
888
+ ```
889
+
890
+ ***
891
+
892
+ ## DELETE /v1/responses/\{response\_id}
893
+
894
+ Delete a previously generated response.
895
+
896
+ ### Path Parameters
897
+
898
+ * `response_id` (string, required) — The response id returned by a previous create response request.
899
+
900
+ ### Response Body
901
+
902
+ * `deleted` (boolean, required) — Whether the response was successfully deleted.
903
+
904
+ * `id` (string, required) — The response\_id to be deleted.
905
+
906
+ * `object` (string, required) — The deleted object type, which is always \`response\`.
907
+
908
+ \*\*Response example:\*\*
909
+
910
+ ```json
911
+ {
912
+ "id": "ad5663da-63e6-86c6-e0be-ff15effa8357",
913
+ "object": "response",
914
+ "deleted": true
915
+ }
916
+ ```
917
+
918
+ ***
919
+
920
+ ## GET /v1/chat/deferred-completion/\{request\_id}
921
+
922
+ Tries to fetch a result for a previously-started deferred completion. Returns \`200 Success\` with the response body, if the request has been completed. Returns \`202 Accepted\` when the request is pending processing.
923
+
924
+ ### Path Parameters
925
+
926
+ * `request_id` (string, required) — The deferred request id returned by a previous deferred chat request.
927
+
928
+ ### Response Body
929
+
930
+ * `choices` (array\<object>, required) — A list of response choices from the model. The length corresponds to the \`n\` in request body (default to 1).
931
+
932
+ * `finish_reason` (string | null) — Finish reason. \`"stop"\` means the inference has reached a model-defined or user-supplied stop sequence in \`stop\`. \`"length"\` means the inference result has reached models' maximum allowed token length or user defined value in \`max\_tokens\`. \`"end\_turn"\` or \`null\` in streaming mode when the chunk is not the last.
933
+
934
+ * `index` (integer, required) — Index of the choice within the response choices, starting from 0.
935
+
936
+ * `logprobs` (object)
937
+
938
+ * `content` (array | null) — An array the log probabilities of each output token returned.
939
+
940
+ * `message` (object, required)
941
+
942
+ * `content` (string | null) — The content of the message.
943
+
944
+ * `reasoning_content` (string | null) — The reasoning trace generated by the model.
945
+
946
+ * `refusal` (string | null) — The reason given by model if the model is unable to generate a response. null if model is able to generate.
947
+
948
+ * `role` (string, required) — The role that the message belongs to, the response from model is always \`"assistant"\`.
949
+
950
+ * `tool_calls` (array | null) — A list of tool calls asked by model for user to perform.
951
+
952
+ * `citations` (array | null) — List of all the external pages used by the model to answer.
953
+
954
+ * `created` (integer, required) — The chat completion creation time in Unix timestamp.
955
+
956
+ * `id` (string, required) — A unique ID for the chat response.
957
+
958
+ * `model` (string, required) — Model ID used to create chat completion.
959
+
960
+ * `object` (string, required) — The object type, which is always \`"chat.completion"\`.
961
+
962
+ * `output_files` (array | null) — Files generated during the response (e.g., by the code execution tool).
963
+ Only populated when \`code\_execution\_files\_output\` is included.
964
+
965
+ * `system_fingerprint` (string | null) — System fingerprint, used to indicate xAI system configuration changes.
966
+
967
+ * `usage` (object)
968
+
969
+ * `completion_tokens` (integer, required) — Total completion token used.
970
+
971
+ * `completion_tokens_details` (object, required) — Details of completion usage.
972
+
973
+ * `accepted_prediction_tokens` (integer, required) — The number of tokens in the prediction that appeared in the completion.
974
+
975
+ * `audio_tokens` (integer, required) — Audio input tokens generated by the model.
976
+
977
+ * `reasoning_tokens` (integer, required) — Tokens generated by the model for reasoning.
978
+
979
+ * `rejected_prediction_tokens` (integer, required) — The number of tokens in the prediction that did not appear in the completion.
980
+
981
+ * `cost_in_usd_ticks` (integer, required) — Accurate cost of this request in USD ticks, where "tick" is defined as follows:
982
+ TICKS\_IN\_USD\_CENT: i64 = 100\_000\_000
983
+ which means there is 10'000'000'000 ticks in one \*dollar\*.
984
+
985
+ * `num_sources_used` (integer, required) — Number of individual live search source used.
986
+
987
+ * `prompt_tokens` (integer, required) — Total prompt token used.
988
+
989
+ * `prompt_tokens_details` (object, required) — Details of prompt usage.
990
+
991
+ * `audio_tokens` (integer, required) — Audio prompt token used.
992
+
993
+ * `cached_tokens` (integer, required) — Token cached by xAI from previous requests and reused for this request.
994
+
995
+ * `image_tokens` (integer, required) — Image prompt token used.
996
+
997
+ * `text_tokens` (integer, required) — Total text prompt token used (cached + non-cached text tokens).
998
+
999
+ * `total_tokens` (integer, required) — Total token used, the sum of prompt token and completion token amount.
1000
+
1001
+ \*\*Response example:\*\*
1002
+
1003
+ ```json
1004
+ {
1005
+ "id": "335b92e4-afa5-48e7-b99c-b9a4eabc1c8e",
1006
+ "object": "chat.completion",
1007
+ "created": 1743770624,
1008
+ "model": "latest",
1009
+ "choices": [
1010
+ {
1011
+ "index": 0,
1012
+ "message": {
1013
+ "role": "assistant",
1014
+ "content": "101 multiplied by 3 is 303.",
1015
+ "refusal": null
1016
+ },
1017
+ "finish_reason": "stop"
1018
+ }
1019
+ ],
1020
+ "usage": {
1021
+ "prompt_tokens": 31,
1022
+ "completion_tokens": 11,
1023
+ "total_tokens": 42,
1024
+ "prompt_tokens_details": {
1025
+ "text_tokens": 31,
1026
+ "audio_tokens": 0,
1027
+ "image_tokens": 0,
1028
+ "cached_tokens": 0
1029
+ },
1030
+ "completion_tokens_details": {
1031
+ "reasoning_tokens": 0,
1032
+ "audio_tokens": 0,
1033
+ "accepted_prediction_tokens": 0,
1034
+ "rejected_prediction_tokens": 0
1035
+ }
1036
+ },
1037
+ "system_fingerprint": "fp_156d35dcaa"
1038
+ }
1039
+ ```
1040
+