azure_openai_client 0.0.1 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +10 -0
  3. data/Gemfile.lock +76 -0
  4. data/README.md +50 -87
  5. data/Rakefile +10 -0
  6. data/azure_openai_client.gemspec +38 -0
  7. data/docs/ChatCompletionsCreate200Response.md +28 -0
  8. data/docs/ChatCompletionsCreate200ResponseChoicesInner.md +22 -0
  9. data/docs/ChatCompletionsCreate200ResponseChoicesInnerMessage.md +20 -0
  10. data/docs/ChatCompletionsCreate200ResponseUsage.md +22 -0
  11. data/docs/ChatCompletionsCreateRequest.md +38 -0
  12. data/docs/ChatCompletionsCreateRequestMessagesInner.md +22 -0
  13. data/docs/ChatCompletionsCreateRequestStop.md +49 -0
  14. data/docs/CompletionsCreate200Response.md +28 -0
  15. data/docs/CompletionsCreate200ResponseChoicesInner.md +24 -0
  16. data/docs/CompletionsCreate200ResponseChoicesInnerLogprobs.md +24 -0
  17. data/docs/CompletionsCreate200ResponseUsage.md +22 -0
  18. data/docs/CompletionsCreateRequest.md +52 -0
  19. data/docs/CompletionsCreateRequestPrompt.md +49 -0
  20. data/docs/CompletionsCreateRequestStop.md +49 -0
  21. data/docs/DefaultApi.md +238 -0
  22. data/docs/EmbeddingsCreate200Response.md +24 -0
  23. data/docs/EmbeddingsCreate200ResponseDataInner.md +22 -0
  24. data/docs/EmbeddingsCreate200ResponseUsage.md +20 -0
  25. data/docs/EmbeddingsCreateRequest.md +24 -0
  26. data/docs/EmbeddingsCreateRequestInput.md +49 -0
  27. data/docs/ErrorResponse.md +18 -0
  28. data/docs/ErrorResponseError.md +24 -0
  29. data/git_push.sh +53 -0
  30. data/inference.json +816 -0
  31. data/lib/azure_openai_client/api/default_api.rb +120 -93
  32. data/lib/azure_openai_client/api_client.rb +77 -75
  33. data/lib/azure_openai_client/api_error.rb +5 -5
  34. data/lib/azure_openai_client/configuration.rb +114 -22
  35. data/lib/azure_openai_client/models/{inline_response_200_2.rb → chat_completions_create200_response.rb} +34 -26
  36. data/lib/azure_openai_client/models/{inline_response_200_2_choices.rb → chat_completions_create200_response_choices_inner.rb} +28 -20
  37. data/lib/azure_openai_client/models/{inline_response_200_2_message.rb → chat_completions_create200_response_choices_inner_message.rb} +28 -20
  38. data/lib/azure_openai_client/models/{inline_response_200_2_usage.rb → chat_completions_create200_response_usage.rb} +28 -20
  39. data/lib/azure_openai_client/models/{chat_completions_body.rb → chat_completions_create_request.rb} +183 -37
  40. data/lib/azure_openai_client/models/{deploymentsdeploymentidchatcompletions_messages.rb → chat_completions_create_request_messages_inner.rb} +30 -22
  41. data/lib/azure_openai_client/models/chat_completions_create_request_stop.rb +105 -0
  42. data/lib/azure_openai_client/models/{inline_response_200.rb → completions_create200_response.rb} +34 -26
  43. data/lib/azure_openai_client/models/{inline_response_200_choices.rb → completions_create200_response_choices_inner.rb} +30 -22
  44. data/lib/azure_openai_client/models/{inline_response_200_logprobs.rb → completions_create200_response_choices_inner_logprobs.rb} +30 -22
  45. data/lib/azure_openai_client/models/{inline_response_200_usage.rb → completions_create200_response_usage.rb} +28 -20
  46. data/lib/azure_openai_client/models/{deploymentid_completions_body.rb → completions_create_request.rb} +58 -52
  47. data/lib/azure_openai_client/models/completions_create_request_prompt.rb +105 -0
  48. data/lib/azure_openai_client/models/completions_create_request_stop.rb +105 -0
  49. data/lib/azure_openai_client/models/{inline_response_200_1.rb → embeddings_create200_response.rb} +30 -22
  50. data/lib/azure_openai_client/models/{inline_response_200_1_data.rb → embeddings_create200_response_data_inner.rb} +28 -20
  51. data/lib/azure_openai_client/models/{inline_response_200_1_usage.rb → embeddings_create200_response_usage.rb} +26 -18
  52. data/lib/azure_openai_client/models/embeddings_create_request.rb +252 -0
  53. data/lib/azure_openai_client/models/embeddings_create_request_input.rb +105 -0
  54. data/lib/azure_openai_client/models/error_response.rb +21 -13
  55. data/lib/azure_openai_client/models/error_response_error.rb +27 -19
  56. data/lib/azure_openai_client/version.rb +5 -6
  57. data/lib/azure_openai_client.rb +23 -23
  58. data/openapi-codegen.sh +12 -0
  59. data/openapi_config.yaml +15 -0
  60. data/spec/api/default_api_spec.rb +21 -22
  61. data/spec/api_client_spec.rb +15 -16
  62. data/spec/configuration_spec.rb +3 -3
  63. data/spec/models/chat_completions_create200_response_choices_inner_message_spec.rb +44 -0
  64. data/spec/models/chat_completions_create200_response_choices_inner_spec.rb +46 -0
  65. data/spec/models/chat_completions_create200_response_spec.rb +64 -0
  66. data/spec/models/chat_completions_create200_response_usage_spec.rb +46 -0
  67. data/spec/models/chat_completions_create_request_messages_inner_spec.rb +50 -0
  68. data/spec/models/chat_completions_create_request_spec.rb +94 -0
  69. data/spec/models/chat_completions_create_request_stop_spec.rb +31 -0
  70. data/spec/models/completions_create200_response_choices_inner_logprobs_spec.rb +52 -0
  71. data/spec/models/completions_create200_response_choices_inner_spec.rb +52 -0
  72. data/spec/models/completions_create200_response_spec.rb +64 -0
  73. data/spec/models/completions_create200_response_usage_spec.rb +46 -0
  74. data/spec/models/completions_create_request_prompt_spec.rb +31 -0
  75. data/spec/models/completions_create_request_spec.rb +136 -0
  76. data/spec/models/completions_create_request_stop_spec.rb +31 -0
  77. data/spec/models/embeddings_create200_response_data_inner_spec.rb +46 -0
  78. data/spec/models/embeddings_create200_response_spec.rb +52 -0
  79. data/spec/models/embeddings_create200_response_usage_spec.rb +40 -0
  80. data/spec/models/embeddings_create_request_input_spec.rb +31 -0
  81. data/spec/models/embeddings_create_request_spec.rb +52 -0
  82. data/spec/models/error_response_error_spec.rb +12 -20
  83. data/spec/models/error_response_spec.rb +9 -17
  84. data/spec/spec_helper.rb +4 -5
  85. metadata +91 -77
  86. data/lib/azure_openai_client/models/deploymentid_embeddings_body.rb +0 -202
  87. data/lib/azure_openai_client/models/one_ofchat_completions_body_stop.rb +0 -198
  88. data/lib/azure_openai_client/models/one_ofdeploymentid_completions_body_prompt.rb +0 -198
  89. data/lib/azure_openai_client/models/one_ofdeploymentid_completions_body_stop.rb +0 -198
  90. data/spec/models/chat_completions_body_spec.rb +0 -102
  91. data/spec/models/deploymentid_completions_body_spec.rb +0 -144
  92. data/spec/models/deploymentid_embeddings_body_spec.rb +0 -36
  93. data/spec/models/deploymentsdeploymentidchatcompletions_messages_spec.rb +0 -58
  94. data/spec/models/inline_response_200_1_data_spec.rb +0 -54
  95. data/spec/models/inline_response_200_1_spec.rb +0 -60
  96. data/spec/models/inline_response_200_1_usage_spec.rb +0 -48
  97. data/spec/models/inline_response_200_2_choices_spec.rb +0 -54
  98. data/spec/models/inline_response_200_2_message_spec.rb +0 -52
  99. data/spec/models/inline_response_200_2_spec.rb +0 -72
  100. data/spec/models/inline_response_200_2_usage_spec.rb +0 -54
  101. data/spec/models/inline_response_200_choices_spec.rb +0 -60
  102. data/spec/models/inline_response_200_logprobs_spec.rb +0 -60
  103. data/spec/models/inline_response_200_spec.rb +0 -72
  104. data/spec/models/inline_response_200_usage_spec.rb +0 -54
  105. data/spec/models/one_ofchat_completions_body_stop_spec.rb +0 -36
  106. data/spec/models/one_ofdeploymentid_completions_body_prompt_spec.rb +0 -36
  107. data/spec/models/one_ofdeploymentid_completions_body_stop_spec.rb +0 -36
data/inference.json ADDED
@@ -0,0 +1,816 @@
1
+ {
2
+ "openapi": "3.0.0",
3
+ "info": {
4
+ "title": "Azure OpenAI Service API",
5
+ "description": "Azure OpenAI APIs for completions and search",
6
+ "version": "2023-05-15"
7
+ },
8
+ "servers": [
9
+ {
10
+ "url": "https://{endpoint}/openai",
11
+ "variables": {
12
+ "endpoint": {
13
+ "default": "your-resource-name.openai.azure.com"
14
+ }
15
+ }
16
+ }
17
+ ],
18
+ "security": [
19
+ {
20
+ "bearer": [
21
+ "api.read"
22
+ ]
23
+ },
24
+ {
25
+ "apiKey": []
26
+ }
27
+ ],
28
+ "paths": {
29
+ "/deployments/{deployment-id}/completions": {
30
+ "post": {
31
+ "summary": "Creates a completion for the provided prompt, parameters and chosen model.",
32
+ "operationId": "Completions_Create",
33
+ "parameters": [
34
+ {
35
+ "in": "path",
36
+ "name": "deployment-id",
37
+ "required": true,
38
+ "schema": {
39
+ "type": "string",
40
+ "example": "davinci",
41
+ "description": "Deployment id of the model which was deployed."
42
+ }
43
+ },
44
+ {
45
+ "in": "query",
46
+ "name": "api-version",
47
+ "required": true,
48
+ "schema": {
49
+ "type": "string",
50
+ "example": "2023-05-15",
51
+ "description": "api version"
52
+ }
53
+ }
54
+ ],
55
+ "requestBody": {
56
+ "required": true,
57
+ "content": {
58
+ "application/json": {
59
+ "schema": {
60
+ "type": "object",
61
+ "properties": {
62
+ "prompt": {
63
+ "description": "The prompt(s) to generate completions for, encoded as a string or array of strings.\nNote that <|endoftext|> is the document separator that the model sees during training, so if a prompt is not specified the model will generate as if from the beginning of a new document. Maximum allowed size of string list is 2048.",
64
+ "oneOf": [
65
+ {
66
+ "type": "string",
67
+ "default": "",
68
+ "example": "This is a test.",
69
+ "nullable": true
70
+ },
71
+ {
72
+ "type": "array",
73
+ "items": {
74
+ "type": "string",
75
+ "default": "",
76
+ "example": "This is a test.",
77
+ "nullable": false
78
+ },
79
+ "description": "Array size minimum of 1 and maximum of 2048"
80
+ }
81
+ ]
82
+ },
83
+ "max_tokens": {
84
+ "description": "The token count of your prompt plus max_tokens cannot exceed the model's context length. Most models have a context length of 2048 tokens (except for the newest models, which support 4096). Has minimum of 0.",
85
+ "type": "integer",
86
+ "default": 16,
87
+ "example": 16,
88
+ "nullable": true
89
+ },
90
+ "temperature": {
91
+ "description": "What sampling temperature to use. Higher values means the model will take more risks. Try 0.9 for more creative applications, and 0 (argmax sampling) for ones with a well-defined answer.\nWe generally recommend altering this or top_p but not both.",
92
+ "type": "number",
93
+ "default": 1,
94
+ "example": 1,
95
+ "nullable": true
96
+ },
97
+ "top_p": {
98
+ "description": "An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.\nWe generally recommend altering this or temperature but not both.",
99
+ "type": "number",
100
+ "default": 1,
101
+ "example": 1,
102
+ "nullable": true
103
+ },
104
+ "logit_bias": {
105
+ "description": "Defaults to null. Modify the likelihood of specified tokens appearing in the completion. Accepts a json object that maps tokens (specified by their token ID in the GPT tokenizer) to an associated bias value from -100 to 100. You can use this tokenizer tool (which works for both GPT-2 and GPT-3) to convert text to token IDs. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model, but values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should result in a ban or exclusive selection of the relevant token. As an example, you can pass {\"50256\" &#58; -100} to prevent the <|endoftext|> token from being generated.",
106
+ "type": "object",
107
+ "nullable": false
108
+ },
109
+ "user": {
110
+ "description": "A unique identifier representing your end-user, which can help monitoring and detecting abuse",
111
+ "type": "string",
112
+ "nullable": false
113
+ },
114
+ "n": {
115
+ "description": "How many completions to generate for each prompt. Minimum of 1 and maximum of 128 allowed.\nNote: Because this parameter generates many completions, it can quickly consume your token quota. Use carefully and ensure that you have reasonable settings for max_tokens and stop.",
116
+ "type": "integer",
117
+ "default": 1,
118
+ "example": 1,
119
+ "nullable": true
120
+ },
121
+ "stream": {
122
+ "description": "Whether to stream back partial progress. If set, tokens will be sent as data-only server-sent events as they become available, with the stream terminated by a data: [DONE] message.",
123
+ "type": "boolean",
124
+ "nullable": true,
125
+ "default": false
126
+ },
127
+ "logprobs": {
128
+ "description": "Include the log probabilities on the logprobs most likely tokens, as well the chosen tokens. For example, if logprobs is 5, the API will return a list of the 5 most likely tokens. The API will always return the logprob of the sampled token, so there may be up to logprobs+1 elements in the response.\nMinimum of 0 and maximum of 5 allowed.",
129
+ "type": "integer",
130
+ "default": null,
131
+ "nullable": true
132
+ },
133
+ "model": {
134
+ "type": "string",
135
+ "example": "davinci",
136
+ "nullable": true,
137
+ "description": "ID of the model to use. You can use the Models_List operation to see all of your available models, or see our Models_Get overview for descriptions of them."
138
+ },
139
+ "suffix": {
140
+ "type": "string",
141
+ "nullable": true,
142
+ "description": "The suffix that comes after a completion of inserted text."
143
+ },
144
+ "echo": {
145
+ "description": "Echo back the prompt in addition to the completion",
146
+ "type": "boolean",
147
+ "default": false,
148
+ "nullable": true
149
+ },
150
+ "stop": {
151
+ "description": "Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.",
152
+ "oneOf": [
153
+ {
154
+ "type": "string",
155
+ "default": "<|endoftext|>",
156
+ "example": "\n",
157
+ "nullable": true
158
+ },
159
+ {
160
+ "type": "array",
161
+ "items": {
162
+ "type": "string",
163
+ "example": [
164
+ "\n"
165
+ ],
166
+ "nullable": false
167
+ },
168
+ "description": "Array minimum size of 1 and maximum of 4"
169
+ }
170
+ ]
171
+ },
172
+ "completion_config": {
173
+ "type": "string",
174
+ "nullable": true
175
+ },
176
+ "cache_level": {
177
+ "description": "can be used to disable any server-side caching, 0=no cache, 1=prompt prefix enabled, 2=full cache",
178
+ "type": "integer",
179
+ "nullable": true
180
+ },
181
+ "presence_penalty": {
182
+ "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.",
183
+ "type": "number",
184
+ "default": 0
185
+ },
186
+ "frequency_penalty": {
187
+ "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.",
188
+ "type": "number",
189
+ "default": 0
190
+ },
191
+ "best_of": {
192
+ "description": "Generates best_of completions server-side and returns the \"best\" (the one with the highest log probability per token). Results cannot be streamed.\nWhen used with n, best_of controls the number of candidate completions and n specifies how many to return – best_of must be greater than n.\nNote: Because this parameter generates many completions, it can quickly consume your token quota. Use carefully and ensure that you have reasonable settings for max_tokens and stop. Has maximum value of 128.",
193
+ "type": "integer"
194
+ }
195
+ }
196
+ },
197
+ "example": {
198
+ "prompt": "Negate the following sentence.The price for bubblegum increased on thursday.\n\n Negated Sentence:",
199
+ "max_tokens": 50
200
+ }
201
+ }
202
+ }
203
+ },
204
+ "responses": {
205
+ "200": {
206
+ "description": "OK",
207
+ "content": {
208
+ "application/json": {
209
+ "schema": {
210
+ "type": "object",
211
+ "properties": {
212
+ "id": {
213
+ "type": "string"
214
+ },
215
+ "object": {
216
+ "type": "string"
217
+ },
218
+ "created": {
219
+ "type": "integer"
220
+ },
221
+ "model": {
222
+ "type": "string"
223
+ },
224
+ "choices": {
225
+ "type": "array",
226
+ "items": {
227
+ "type": "object",
228
+ "properties": {
229
+ "text": {
230
+ "type": "string"
231
+ },
232
+ "index": {
233
+ "type": "integer"
234
+ },
235
+ "logprobs": {
236
+ "type": "object",
237
+ "properties": {
238
+ "tokens": {
239
+ "type": "array",
240
+ "items": {
241
+ "type": "string"
242
+ }
243
+ },
244
+ "token_logprobs": {
245
+ "type": "array",
246
+ "items": {
247
+ "type": "number"
248
+ }
249
+ },
250
+ "top_logprobs": {
251
+ "type": "array",
252
+ "items": {
253
+ "type": "object",
254
+ "additionalProperties": {
255
+ "type": "number"
256
+ }
257
+ }
258
+ },
259
+ "text_offset": {
260
+ "type": "array",
261
+ "items": {
262
+ "type": "integer"
263
+ }
264
+ }
265
+ }
266
+ },
267
+ "finish_reason": {
268
+ "type": "string"
269
+ }
270
+ }
271
+ }
272
+ },
273
+ "usage": {
274
+ "type": "object",
275
+ "properties": {
276
+ "completion_tokens": {
277
+ "type": "number",
278
+ "format": "int32"
279
+ },
280
+ "prompt_tokens": {
281
+ "type": "number",
282
+ "format": "int32"
283
+ },
284
+ "total_tokens": {
285
+ "type": "number",
286
+ "format": "int32"
287
+ }
288
+ },
289
+ "required": [
290
+ "prompt_tokens",
291
+ "total_tokens",
292
+ "completion_tokens"
293
+ ]
294
+ }
295
+ },
296
+ "required": [
297
+ "id",
298
+ "object",
299
+ "created",
300
+ "model",
301
+ "choices"
302
+ ]
303
+ },
304
+ "example": {
305
+ "model": "davinci",
306
+ "object": "text_completion",
307
+ "id": "cmpl-4509KAos68kxOqpE2uYGw81j6m7uo",
308
+ "created": 1637097562,
309
+ "choices": [
310
+ {
311
+ "index": 0,
312
+ "text": "The price for bubblegum decreased on thursday.",
313
+ "logprobs": null,
314
+ "finish_reason": "stop"
315
+ }
316
+ ]
317
+ }
318
+ }
319
+ },
320
+ "headers": {
321
+ "apim-request-id": {
322
+ "description": "Request ID for troubleshooting purposes",
323
+ "schema": {
324
+ "type": "string"
325
+ }
326
+ }
327
+ }
328
+ },
329
+ "default": {
330
+ "description": "Service unavailable",
331
+ "content": {
332
+ "application/json": {
333
+ "schema": {
334
+ "$ref": "#/components/schemas/errorResponse"
335
+ }
336
+ }
337
+ },
338
+ "headers": {
339
+ "apim-request-id": {
340
+ "description": "Request ID for troubleshooting purposes",
341
+ "schema": {
342
+ "type": "string"
343
+ }
344
+ }
345
+ }
346
+ }
347
+ }
348
+ }
349
+ },
350
+ "/deployments/{deployment-id}/embeddings": {
351
+ "post": {
352
+ "summary": "Get a vector representation of a given input that can be easily consumed by machine learning models and algorithms.",
353
+ "operationId": "embeddings_create",
354
+ "parameters": [
355
+ {
356
+ "in": "path",
357
+ "name": "deployment-id",
358
+ "required": true,
359
+ "schema": {
360
+ "type": "string",
361
+ "example": "ada-search-index-v1"
362
+ },
363
+ "description": "The deployment id of the model which was deployed."
364
+ },
365
+ {
366
+ "in": "query",
367
+ "name": "api-version",
368
+ "required": true,
369
+ "schema": {
370
+ "type": "string",
371
+ "example": "2023-05-15",
372
+ "description": "api version"
373
+ }
374
+ }
375
+ ],
376
+ "requestBody": {
377
+ "required": true,
378
+ "content": {
379
+ "application/json": {
380
+ "schema": {
381
+ "type": "object",
382
+ "additionalProperties": true,
383
+ "properties": {
384
+ "input": {
385
+ "description": "Input text to get embeddings for, encoded as a string. To get embeddings for multiple inputs in a single request, pass an array of strings. Each input must not exceed 2048 tokens in length.\nUnless you are embedding code, we suggest replacing newlines (\\n) in your input with a single space, as we have observed inferior results when newlines are present.",
386
+ "oneOf": [
387
+ {
388
+ "type": "string",
389
+ "default": "",
390
+ "example": "This is a test.",
391
+ "nullable": true
392
+ },
393
+ {
394
+ "type": "array",
395
+ "minItems": 1,
396
+ "maxItems": 2048,
397
+ "items": {
398
+ "type": "string",
399
+ "minLength": 1,
400
+ "example": "This is a test.",
401
+ "nullable": false
402
+ }
403
+ }
404
+ ]
405
+ },
406
+ "user": {
407
+ "description": "A unique identifier representing your end-user, which can help monitoring and detecting abuse.",
408
+ "type": "string",
409
+ "nullable": false
410
+ },
411
+ "input_type": {
412
+ "description": "input type of embedding search to use",
413
+ "type": "string",
414
+ "example": "query"
415
+ },
416
+ "model": {
417
+ "type": "string",
418
+ "description": "ID of the model to use. You can use the Models_List operation to see all of your available models, or see our Models_Get overview for descriptions of them.",
419
+ "nullable": false
420
+ }
421
+ },
422
+ "required": [
423
+ "input"
424
+ ]
425
+ }
426
+ }
427
+ }
428
+ },
429
+ "responses": {
430
+ "200": {
431
+ "description": "OK",
432
+ "content": {
433
+ "application/json": {
434
+ "schema": {
435
+ "type": "object",
436
+ "properties": {
437
+ "object": {
438
+ "type": "string"
439
+ },
440
+ "model": {
441
+ "type": "string"
442
+ },
443
+ "data": {
444
+ "type": "array",
445
+ "items": {
446
+ "type": "object",
447
+ "properties": {
448
+ "index": {
449
+ "type": "integer"
450
+ },
451
+ "object": {
452
+ "type": "string"
453
+ },
454
+ "embedding": {
455
+ "type": "array",
456
+ "items": {
457
+ "type": "number"
458
+ }
459
+ }
460
+ },
461
+ "required": [
462
+ "index",
463
+ "object",
464
+ "embedding"
465
+ ]
466
+ }
467
+ },
468
+ "usage": {
469
+ "type": "object",
470
+ "properties": {
471
+ "prompt_tokens": {
472
+ "type": "integer"
473
+ },
474
+ "total_tokens": {
475
+ "type": "integer"
476
+ }
477
+ },
478
+ "required": [
479
+ "prompt_tokens",
480
+ "total_tokens"
481
+ ]
482
+ }
483
+ },
484
+ "required": [
485
+ "object",
486
+ "model",
487
+ "data",
488
+ "usage"
489
+ ]
490
+ }
491
+ }
492
+ }
493
+ }
494
+ }
495
+ }
496
+ },
497
+ "/deployments/{deployment-id}/chat/completions": {
498
+ "post": {
499
+ "summary": "Creates a completion for the chat message",
500
+ "operationId": "ChatCompletions_Create",
501
+ "parameters": [
502
+ {
503
+ "in": "path",
504
+ "name": "deployment-id",
505
+ "required": true,
506
+ "schema": {
507
+ "type": "string",
508
+ "description": "Deployment id of the model which was deployed."
509
+ }
510
+ },
511
+ {
512
+ "in": "query",
513
+ "name": "api-version",
514
+ "required": true,
515
+ "schema": {
516
+ "type": "string",
517
+ "example": "2023-05-15",
518
+ "description": "api version"
519
+ }
520
+ }
521
+ ],
522
+ "requestBody": {
523
+ "required": true,
524
+ "content": {
525
+ "application/json": {
526
+ "schema": {
527
+ "type": "object",
528
+ "properties": {
529
+ "messages": {
530
+ "description": "The messages to generate chat completions for, in the chat format.",
531
+ "type": "array",
532
+ "minItems": 1,
533
+ "items": {
534
+ "type": "object",
535
+ "properties": {
536
+ "role": {
537
+ "type": "string",
538
+ "enum": [
539
+ "system",
540
+ "user",
541
+ "assistant"
542
+ ],
543
+ "description": "The role of the author of this message."
544
+ },
545
+ "content": {
546
+ "type": "string",
547
+ "description": "The contents of the message"
548
+ },
549
+ "name": {
550
+ "type": "string",
551
+ "description": "The name of the user in a multi-user chat"
552
+ }
553
+ },
554
+ "required": [
555
+ "role",
556
+ "content"
557
+ ]
558
+ }
559
+ },
560
+ "temperature": {
561
+ "description": "What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.\nWe generally recommend altering this or `top_p` but not both.",
562
+ "type": "number",
563
+ "minimum": 0,
564
+ "maximum": 2,
565
+ "default": 1,
566
+ "example": 1,
567
+ "nullable": true
568
+ },
569
+ "top_p": {
570
+ "description": "An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.\nWe generally recommend altering this or `temperature` but not both.",
571
+ "type": "number",
572
+ "minimum": 0,
573
+ "maximum": 1,
574
+ "default": 1,
575
+ "example": 1,
576
+ "nullable": true
577
+ },
578
+ "n": {
579
+ "description": "How many chat completion choices to generate for each input message.",
580
+ "type": "integer",
581
+ "minimum": 1,
582
+ "maximum": 128,
583
+ "default": 1,
584
+ "example": 1,
585
+ "nullable": true
586
+ },
587
+ "stream": {
588
+ "description": "If set, partial message deltas will be sent, like in ChatGPT. Tokens will be sent as data-only server-sent events as they become available, with the stream terminated by a `data: [DONE]` message.",
589
+ "type": "boolean",
590
+ "nullable": true,
591
+ "default": false
592
+ },
593
+ "stop": {
594
+ "description": "Up to 4 sequences where the API will stop generating further tokens.",
595
+ "oneOf": [
596
+ {
597
+ "type": "string",
598
+ "nullable": true
599
+ },
600
+ {
601
+ "type": "array",
602
+ "items": {
603
+ "type": "string",
604
+ "nullable": false
605
+ },
606
+ "minItems": 1,
607
+ "maxItems": 4,
608
+ "description": "Array minimum size of 1 and maximum of 4"
609
+ }
610
+ ],
611
+ "default": null
612
+ },
613
+ "max_tokens": {
614
+ "description": "The maximum number of tokens allowed for the generated answer. By default, the number of tokens the model can return will be (4096 - prompt tokens).",
615
+ "type": "integer",
616
+ "default": "inf"
617
+ },
618
+ "presence_penalty": {
619
+ "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.",
620
+ "type": "number",
621
+ "default": 0,
622
+ "minimum": -2,
623
+ "maximum": 2
624
+ },
625
+ "frequency_penalty": {
626
+ "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.",
627
+ "type": "number",
628
+ "default": 0,
629
+ "minimum": -2,
630
+ "maximum": 2
631
+ },
632
+ "logit_bias": {
633
+ "description": "Modify the likelihood of specified tokens appearing in the completion. Accepts a json object that maps tokens (specified by their token ID in the tokenizer) to an associated bias value from -100 to 100. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model, but values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should result in a ban or exclusive selection of the relevant token.",
634
+ "type": "object",
635
+ "nullable": true
636
+ },
637
+ "user": {
638
+ "description": "A unique identifier representing your end-user, which can help Azure OpenAI to monitor and detect abuse.",
639
+ "type": "string",
640
+ "example": "user-1234",
641
+ "nullable": false
642
+ }
643
+ },
644
+ "required": [
645
+ "messages"
646
+ ]
647
+ },
648
+ "example": {
649
+ "model": "gpt-35-turbo",
650
+ "messages": [
651
+ {
652
+ "role": "user",
653
+ "content": "Hello!"
654
+ }
655
+ ]
656
+ }
657
+ }
658
+ }
659
+ },
660
+ "responses": {
661
+ "200": {
662
+ "description": "OK",
663
+ "content": {
664
+ "application/json": {
665
+ "schema": {
666
+ "type": "object",
667
+ "properties": {
668
+ "id": {
669
+ "type": "string"
670
+ },
671
+ "object": {
672
+ "type": "string"
673
+ },
674
+ "created": {
675
+ "type": "integer",
676
+ "format": "unixtime"
677
+ },
678
+ "model": {
679
+ "type": "string"
680
+ },
681
+ "choices": {
682
+ "type": "array",
683
+ "items": {
684
+ "type": "object",
685
+ "properties": {
686
+ "index": {
687
+ "type": "integer"
688
+ },
689
+ "message": {
690
+ "type": "object",
691
+ "properties": {
692
+ "role": {
693
+ "type": "string",
694
+ "enum": [
695
+ "system",
696
+ "user",
697
+ "assistant"
698
+ ],
699
+ "description": "The role of the author of this message."
700
+ },
701
+ "content": {
702
+ "type": "string",
703
+ "description": "The contents of the message"
704
+ }
705
+ },
706
+ "required": [
707
+ "role",
708
+ "content"
709
+ ]
710
+ },
711
+ "finish_reason": {
712
+ "type": "string"
713
+ }
714
+ }
715
+ }
716
+ },
717
+ "usage": {
718
+ "type": "object",
719
+ "properties": {
720
+ "prompt_tokens": {
721
+ "type": "integer"
722
+ },
723
+ "completion_tokens": {
724
+ "type": "integer"
725
+ },
726
+ "total_tokens": {
727
+ "type": "integer"
728
+ }
729
+ },
730
+ "required": [
731
+ "prompt_tokens",
732
+ "completion_tokens",
733
+ "total_tokens"
734
+ ]
735
+ }
736
+ },
737
+ "required": [
738
+ "id",
739
+ "object",
740
+ "created",
741
+ "model",
742
+ "choices"
743
+ ]
744
+ },
745
+ "example": {
746
+ "id": "chatcmpl-123",
747
+ "object": "chat.completion",
748
+ "created": 1677652288,
749
+ "choices": [
750
+ {
751
+ "index": 0,
752
+ "message": {
753
+ "role": "assistant",
754
+ "content": "\n\nHello there, how may I assist you today?"
755
+ },
756
+ "finish_reason": "stop"
757
+ }
758
+ ],
759
+ "usage": {
760
+ "prompt_tokens": 9,
761
+ "completion_tokens": 12,
762
+ "total_tokens": 21
763
+ }
764
+ }
765
+ }
766
+ }
767
+ }
768
+ }
769
+ }
770
+ }
771
+ },
772
+ "components": {
773
+ "schemas": {
774
+ "errorResponse": {
775
+ "type": "object",
776
+ "properties": {
777
+ "error": {
778
+ "type": "object",
779
+ "properties": {
780
+ "code": {
781
+ "type": "string"
782
+ },
783
+ "message": {
784
+ "type": "string"
785
+ },
786
+ "param": {
787
+ "type": "string"
788
+ },
789
+ "type": {
790
+ "type": "string"
791
+ }
792
+ }
793
+ }
794
+ }
795
+ }
796
+ },
797
+ "securitySchemes": {
798
+ "bearer": {
799
+ "type": "oauth2",
800
+ "flows": {
801
+ "implicit": {
802
+ "authorizationUrl": "https://login.microsoftonline.com/common/oauth2/v2.0/authorize",
803
+ "scopes": {}
804
+ }
805
+ },
806
+ "x-tokenInfoFunc": "api.middleware.auth.bearer_auth",
807
+ "x-scopeValidateFunc": "api.middleware.auth.validate_scopes"
808
+ },
809
+ "apiKey": {
810
+ "type": "apiKey",
811
+ "name": "api-key",
812
+ "in": "header"
813
+ }
814
+ }
815
+ }
816
+ }