RubyGems - azure_openai_client - Versions diffs - 0.0.1 → 0.0.3 - Mend

azure_openai_client 0.0.1 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (107) hide show

data/inference.json ADDED Viewed

@@ -0,0 +1,816 @@
+{
+  "openapi": "3.0.0",
+  "info": {
+    "title": "Azure OpenAI Service API",
+    "description": "Azure OpenAI APIs for completions and search",
+    "version": "2023-05-15"
+  },
+  "servers": [
+    {
+      "url": "https://{endpoint}/openai",
+      "variables": {
+        "endpoint": {
+          "default": "your-resource-name.openai.azure.com"
+        }
+      }
+    }
+  ],
+  "security": [
+    {
+      "bearer": [
+        "api.read"
+      ]
+    },
+    {
+      "apiKey": []
+    }
+  ],
+  "paths": {
+    "/deployments/{deployment-id}/completions": {
+      "post": {
+        "summary": "Creates a completion for the provided prompt, parameters and chosen model.",
+        "operationId": "Completions_Create",
+        "parameters": [
+          {
+            "in": "path",
+            "name": "deployment-id",
+            "required": true,
+            "schema": {
+              "type": "string",
+              "example": "davinci",
+              "description": "Deployment id of the model which was deployed."
+            }
+          },
+          {
+            "in": "query",
+            "name": "api-version",
+            "required": true,
+            "schema": {
+              "type": "string",
+              "example": "2023-05-15",
+              "description": "api version"
+            }
+          }
+        ],
+        "requestBody": {
+          "required": true,
+          "content": {
+            "application/json": {
+              "schema": {
+                "type": "object",
+                "properties": {
+                  "prompt": {
+                    "description": "The prompt(s) to generate completions for, encoded as a string or array of strings.\nNote that <|endoftext|> is the document separator that the model sees during training, so if a prompt is not specified the model will generate as if from the beginning of a new document. Maximum allowed size of string list is 2048.",
+                    "oneOf": [
+                      {
+                        "type": "string",
+                        "default": "",
+                        "example": "This is a test.",
+                        "nullable": true
+                      },
+                      {
+                        "type": "array",
+                        "items": {
+                          "type": "string",
+                          "default": "",
+                          "example": "This is a test.",
+                          "nullable": false
+                        },
+                        "description": "Array size minimum of 1 and maximum of 2048"
+                      }
+                    ]
+                  },
+                  "max_tokens": {
+                    "description": "The token count of your prompt plus max_tokens cannot exceed the model's context length. Most models have a context length of 2048 tokens (except for the newest models, which support 4096). Has minimum of 0.",
+                    "type": "integer",
+                    "default": 16,
+                    "example": 16,
+                    "nullable": true
+                  },
+                  "temperature": {
+                    "description": "What sampling temperature to use. Higher values means the model will take more risks. Try 0.9 for more creative applications, and 0 (argmax sampling) for ones with a well-defined answer.\nWe generally recommend altering this or top_p but not both.",
+                    "type": "number",
+                    "default": 1,
+                    "example": 1,
+                    "nullable": true
+                  },
+                  "top_p": {
+                    "description": "An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.\nWe generally recommend altering this or temperature but not both.",
+                    "type": "number",
+                    "default": 1,
+                    "example": 1,
+                    "nullable": true
+                  },
+                  "logit_bias": {
+                    "description": "Defaults to null. Modify the likelihood of specified tokens appearing in the completion. Accepts a json object that maps tokens (specified by their token ID in the GPT tokenizer) to an associated bias value from -100 to 100. You can use this tokenizer tool (which works for both GPT-2 and GPT-3) to convert text to token IDs. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model, but values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should result in a ban or exclusive selection of the relevant token. As an example, you can pass {\"50256\" &#58; -100} to prevent the <|endoftext|> token from being generated.",
+                    "type": "object",
+                    "nullable": false
+                  },
+                  "user": {
+                    "description": "A unique identifier representing your end-user, which can help monitoring and detecting abuse",
+                    "type": "string",
+                    "nullable": false
+                  },
+                  "n": {
+                    "description": "How many completions to generate for each prompt. Minimum of 1 and maximum of 128 allowed.\nNote: Because this parameter generates many completions, it can quickly consume your token quota. Use carefully and ensure that you have reasonable settings for max_tokens and stop.",
+                    "type": "integer",
+                    "default": 1,
+                    "example": 1,
+                    "nullable": true
+                  },
+                  "stream": {
+                    "description": "Whether to stream back partial progress. If set, tokens will be sent as data-only server-sent events as they become available, with the stream terminated by a data: [DONE] message.",
+                    "type": "boolean",
+                    "nullable": true,
+                    "default": false
+                  },
+                  "logprobs": {
+                    "description": "Include the log probabilities on the logprobs most likely tokens, as well the chosen tokens. For example, if logprobs is 5, the API will return a list of the 5 most likely tokens. The API will always return the logprob of the sampled token, so there may be up to logprobs+1 elements in the response.\nMinimum of 0 and maximum of 5 allowed.",
+                    "type": "integer",
+                    "default": null,
+                    "nullable": true
+                  },
+                  "model": {
+                    "type": "string",
+                    "example": "davinci",
+                    "nullable": true,
+                    "description": "ID of the model to use. You can use the Models_List operation to see all of your available models, or see our Models_Get overview for descriptions of them."
+                  },
+                  "suffix": {
+                    "type": "string",
+                    "nullable": true,
+                    "description": "The suffix that comes after a completion of inserted text."
+                  },
+                  "echo": {
+                    "description": "Echo back the prompt in addition to the completion",
+                    "type": "boolean",
+                    "default": false,
+                    "nullable": true
+                  },
+                  "stop": {
+                    "description": "Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.",
+                    "oneOf": [
+                      {
+                        "type": "string",
+                        "default": "<|endoftext|>",
+                        "example": "\n",
+                        "nullable": true
+                      },
+                      {
+                        "type": "array",
+                        "items": {
+                          "type": "string",
+                          "example": [
+                            "\n"
+                          ],
+                          "nullable": false
+                        },
+                        "description": "Array minimum size of 1 and maximum of 4"
+                      }
+                    ]
+                  },
+                  "completion_config": {
+                    "type": "string",
+                    "nullable": true
+                  },
+                  "cache_level": {
+                    "description": "can be used to disable any server-side caching, 0=no cache, 1=prompt prefix enabled, 2=full cache",
+                    "type": "integer",
+                    "nullable": true
+                  },
+                  "presence_penalty": {
+                    "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.",
+                    "type": "number",
+                    "default": 0
+                  },
+                  "frequency_penalty": {
+                    "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.",
+                    "type": "number",
+                    "default": 0
+                  },
+                  "best_of": {
+                    "description": "Generates best_of completions server-side and returns the \"best\" (the one with the highest log probability per token). Results cannot be streamed.\nWhen used with n, best_of controls the number of candidate completions and n specifies how many to return – best_of must be greater than n.\nNote: Because this parameter generates many completions, it can quickly consume your token quota. Use carefully and ensure that you have reasonable settings for max_tokens and stop. Has maximum value of 128.",
+                    "type": "integer"
+                  }
+                }
+              },
+              "example": {
+                "prompt": "Negate the following sentence.The price for bubblegum increased on thursday.\n\n Negated Sentence:",
+                "max_tokens": 50
+              }
+            }
+          }
+        },
+        "responses": {
+          "200": {
+            "description": "OK",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "id": {
+                      "type": "string"
+                    },
+                    "object": {
+                      "type": "string"
+                    },
+                    "created": {
+                      "type": "integer"
+                    },
+                    "model": {
+                      "type": "string"
+                    },
+                    "choices": {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "properties": {
+                          "text": {
+                            "type": "string"
+                          },
+                          "index": {
+                            "type": "integer"
+                          },
+                          "logprobs": {
+                            "type": "object",
+                            "properties": {
+                              "tokens": {
+                                "type": "array",
+                                "items": {
+                                  "type": "string"
+                                }
+                              },
+                              "token_logprobs": {
+                                "type": "array",
+                                "items": {
+                                  "type": "number"
+                                }
+                              },
+                              "top_logprobs": {
+                                "type": "array",
+                                "items": {
+                                  "type": "object",
+                                  "additionalProperties": {
+                                    "type": "number"
+                                  }
+                                }
+                              },
+                              "text_offset": {
+                                "type": "array",
+                                "items": {
+                                  "type": "integer"
+                                }
+                              }
+                            }
+                          },
+                          "finish_reason": {
+                            "type": "string"
+                          }
+                        }
+                      }
+                    },
+                    "usage": {
+                      "type": "object",
+                      "properties": {
+                        "completion_tokens": {
+                          "type": "number",
+                          "format": "int32"
+                        },
+                        "prompt_tokens": {
+                          "type": "number",
+                          "format": "int32"
+                        },
+                        "total_tokens": {
+                          "type": "number",
+                          "format": "int32"
+                        }
+                      },
+                      "required": [
+                        "prompt_tokens",
+                        "total_tokens",
+                        "completion_tokens"
+                      ]
+                    }
+                  },
+                  "required": [
+                    "id",
+                    "object",
+                    "created",
+                    "model",
+                    "choices"
+                  ]
+                },
+                "example": {
+                  "model": "davinci",
+                  "object": "text_completion",
+                  "id": "cmpl-4509KAos68kxOqpE2uYGw81j6m7uo",
+                  "created": 1637097562,
+                  "choices": [
+                    {
+                      "index": 0,
+                      "text": "The price for bubblegum decreased on thursday.",
+                      "logprobs": null,
+                      "finish_reason": "stop"
+                    }
+                  ]
+                }
+              }
+            },
+            "headers": {
+              "apim-request-id": {
+                "description": "Request ID for troubleshooting purposes",
+                "schema": {
+                  "type": "string"
+                }
+              }
+            }
+          },
+          "default": {
+            "description": "Service unavailable",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/errorResponse"
+                }
+              }
+            },
+            "headers": {
+              "apim-request-id": {
+                "description": "Request ID for troubleshooting purposes",
+                "schema": {
+                  "type": "string"
+                }
+              }
+            }
+          }
+        }
+      }
+    },
+    "/deployments/{deployment-id}/embeddings": {
+      "post": {
+        "summary": "Get a vector representation of a given input that can be easily consumed by machine learning models and algorithms.",
+        "operationId": "embeddings_create",
+        "parameters": [
+          {
+            "in": "path",
+            "name": "deployment-id",
+            "required": true,
+            "schema": {
+              "type": "string",
+              "example": "ada-search-index-v1"
+            },
+            "description": "The deployment id of the model which was deployed."
+          },
+          {
+            "in": "query",
+            "name": "api-version",
+            "required": true,
+            "schema": {
+              "type": "string",
+              "example": "2023-05-15",
+              "description": "api version"
+            }
+          }
+        ],
+        "requestBody": {
+          "required": true,
+          "content": {
+            "application/json": {
+              "schema": {
+                "type": "object",
+                "additionalProperties": true,
+                "properties": {
+                  "input": {
+                    "description": "Input text to get embeddings for, encoded as a string. To get embeddings for multiple inputs in a single request, pass an array of strings. Each input must not exceed 2048 tokens in length.\nUnless you are embedding code, we suggest replacing newlines (\\n) in your input with a single space, as we have observed inferior results when newlines are present.",
+                    "oneOf": [
+                      {
+                        "type": "string",
+                        "default": "",
+                        "example": "This is a test.",
+                        "nullable": true
+                      },
+                      {
+                        "type": "array",
+                        "minItems": 1,
+                        "maxItems": 2048,
+                        "items": {
+                          "type": "string",
+                          "minLength": 1,
+                          "example": "This is a test.",
+                          "nullable": false
+                        }
+                      }
+                    ]
+                  },
+                  "user": {
+                    "description": "A unique identifier representing your end-user, which can help monitoring and detecting abuse.",
+                    "type": "string",
+                    "nullable": false
+                  },
+                  "input_type": {
+                    "description": "input type of embedding search to use",
+                    "type": "string",
+                    "example": "query"
+                  },
+                  "model": {
+                    "type": "string",
+                    "description": "ID of the model to use. You can use the Models_List operation to see all of your available models, or see our Models_Get overview for descriptions of them.",
+                    "nullable": false
+                  }
+                },
+                "required": [
+                  "input"
+                ]
+              }
+            }
+          }
+        },
+        "responses": {
+          "200": {
+            "description": "OK",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "object": {
+                      "type": "string"
+                    },
+                    "model": {
+                      "type": "string"
+                    },
+                    "data": {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "properties": {
+                          "index": {
+                            "type": "integer"
+                          },
+                          "object": {
+                            "type": "string"
+                          },
+                          "embedding": {
+                            "type": "array",
+                            "items": {
+                              "type": "number"
+                            }
+                          }
+                        },
+                        "required": [
+                          "index",
+                          "object",
+                          "embedding"
+                        ]
+                      }
+                    },
+                    "usage": {
+                      "type": "object",
+                      "properties": {
+                        "prompt_tokens": {
+                          "type": "integer"
+                        },
+                        "total_tokens": {
+                          "type": "integer"
+                        }
+                      },
+                      "required": [
+                        "prompt_tokens",
+                        "total_tokens"
+                      ]
+                    }
+                  },
+                  "required": [
+                    "object",
+                    "model",
+                    "data",
+                    "usage"
+                  ]
+                }
+              }
+            }
+          }
+        }
+      }
+    },
+    "/deployments/{deployment-id}/chat/completions": {
+      "post": {
+        "summary": "Creates a completion for the chat message",
+        "operationId": "ChatCompletions_Create",
+        "parameters": [
+          {
+            "in": "path",
+            "name": "deployment-id",
+            "required": true,
+            "schema": {
+              "type": "string",
+              "description": "Deployment id of the model which was deployed."
+            }
+          },
+          {
+            "in": "query",
+            "name": "api-version",
+            "required": true,
+            "schema": {
+              "type": "string",
+              "example": "2023-05-15",
+              "description": "api version"
+            }
+          }
+        ],
+        "requestBody": {
+          "required": true,
+          "content": {
+            "application/json": {
+              "schema": {
+                "type": "object",
+                "properties": {
+                  "messages": {
+                    "description": "The messages to generate chat completions for, in the chat format.",
+                    "type": "array",
+                    "minItems": 1,
+                    "items": {
+                      "type": "object",
+                      "properties": {
+                        "role": {
+                          "type": "string",
+                          "enum": [
+                            "system",
+                            "user",
+                            "assistant"
+                          ],
+                          "description": "The role of the author of this message."
+                        },
+                        "content": {
+                          "type": "string",
+                          "description": "The contents of the message"
+                        },
+                        "name": {
+                          "type": "string",
+                          "description": "The name of the user in a multi-user chat"
+                        }
+                      },
+                      "required": [
+                        "role",
+                        "content"
+                      ]
+                    }
+                  },
+                  "temperature": {
+                    "description": "What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.\nWe generally recommend altering this or `top_p` but not both.",
+                    "type": "number",
+                    "minimum": 0,
+                    "maximum": 2,
+                    "default": 1,
+                    "example": 1,
+                    "nullable": true
+                  },
+                  "top_p": {
+                    "description": "An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.\nWe generally recommend altering this or `temperature` but not both.",
+                    "type": "number",
+                    "minimum": 0,
+                    "maximum": 1,
+                    "default": 1,
+                    "example": 1,
+                    "nullable": true
+                  },
+                  "n": {
+                    "description": "How many chat completion choices to generate for each input message.",
+                    "type": "integer",
+                    "minimum": 1,
+                    "maximum": 128,
+                    "default": 1,
+                    "example": 1,
+                    "nullable": true
+                  },
+                  "stream": {
+                    "description": "If set, partial message deltas will be sent, like in ChatGPT. Tokens will be sent as data-only server-sent events as they become available, with the stream terminated by a `data: [DONE]` message.",
+                    "type": "boolean",
+                    "nullable": true,
+                    "default": false
+                  },
+                  "stop": {
+                    "description": "Up to 4 sequences where the API will stop generating further tokens.",
+                    "oneOf": [
+                      {
+                        "type": "string",
+                        "nullable": true
+                      },
+                      {
+                        "type": "array",
+                        "items": {
+                          "type": "string",
+                          "nullable": false
+                        },
+                        "minItems": 1,
+                        "maxItems": 4,
+                        "description": "Array minimum size of 1 and maximum of 4"
+                      }
+                    ],
+                    "default": null
+                  },
+                  "max_tokens": {
+                    "description": "The maximum number of tokens allowed for the generated answer. By default, the number of tokens the model can return will be (4096 - prompt tokens).",
+                    "type": "integer",
+                    "default": "inf"
+                  },
+                  "presence_penalty": {
+                    "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.",
+                    "type": "number",
+                    "default": 0,
+                    "minimum": -2,
+                    "maximum": 2
+                  },
+                  "frequency_penalty": {
+                    "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.",
+                    "type": "number",
+                    "default": 0,
+                    "minimum": -2,
+                    "maximum": 2
+                  },
+                  "logit_bias": {
+                    "description": "Modify the likelihood of specified tokens appearing in the completion. Accepts a json object that maps tokens (specified by their token ID in the tokenizer) to an associated bias value from -100 to 100. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model, but values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should result in a ban or exclusive selection of the relevant token.",
+                    "type": "object",
+                    "nullable": true
+                  },
+                  "user": {
+                    "description": "A unique identifier representing your end-user, which can help Azure OpenAI to monitor and detect abuse.",
+                    "type": "string",
+                    "example": "user-1234",
+                    "nullable": false
+                  }
+                },
+                "required": [
+                  "messages"
+                ]
+              },
+              "example": {
+                "model": "gpt-35-turbo",
+                "messages": [
+                  {
+                    "role": "user",
+                    "content": "Hello!"
+                  }
+                ]
+              }
+            }
+          }
+        },
+        "responses": {
+          "200": {
+            "description": "OK",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "id": {
+                      "type": "string"
+                    },
+                    "object": {
+                      "type": "string"
+                    },
+                    "created": {
+                      "type": "integer",
+                      "format": "unixtime"
+                    },
+                    "model": {
+                      "type": "string"
+                    },
+                    "choices": {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "properties": {
+                          "index": {
+                            "type": "integer"
+                          },
+                          "message": {
+                            "type": "object",
+                            "properties": {
+                              "role": {
+                                "type": "string",
+                                "enum": [
+                                  "system",
+                                  "user",
+                                  "assistant"
+                                ],
+                                "description": "The role of the author of this message."
+                              },
+                              "content": {
+                                "type": "string",
+                                "description": "The contents of the message"
+                              }
+                            },
+                            "required": [
+                              "role",
+                              "content"
+                            ]
+                          },
+                          "finish_reason": {
+                            "type": "string"
+                          }
+                        }
+                      }
+                    },
+                    "usage": {
+                      "type": "object",
+                      "properties": {
+                        "prompt_tokens": {
+                          "type": "integer"
+                        },
+                        "completion_tokens": {
+                          "type": "integer"
+                        },
+                        "total_tokens": {
+                          "type": "integer"
+                        }
+                      },
+                      "required": [
+                        "prompt_tokens",
+                        "completion_tokens",
+                        "total_tokens"
+                      ]
+                    }
+                  },
+                  "required": [
+                    "id",
+                    "object",
+                    "created",
+                    "model",
+                    "choices"
+                  ]
+                },
+                "example": {
+                  "id": "chatcmpl-123",
+                  "object": "chat.completion",
+                  "created": 1677652288,
+                  "choices": [
+                    {
+                      "index": 0,
+                      "message": {
+                        "role": "assistant",
+                        "content": "\n\nHello there, how may I assist you today?"
+                      },
+                      "finish_reason": "stop"
+                    }
+                  ],
+                  "usage": {
+                    "prompt_tokens": 9,
+                    "completion_tokens": 12,
+                    "total_tokens": 21
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  },
+  "components": {
+    "schemas": {
+      "errorResponse": {
+        "type": "object",
+        "properties": {
+          "error": {
+            "type": "object",
+            "properties": {
+              "code": {
+                "type": "string"
+              },
+              "message": {
+                "type": "string"
+              },
+              "param": {
+                "type": "string"
+              },
+              "type": {
+                "type": "string"
+              }
+            }
+          }
+        }
+      }
+    },
+    "securitySchemes": {
+      "bearer": {
+        "type": "oauth2",
+        "flows": {
+          "implicit": {
+            "authorizationUrl": "https://login.microsoftonline.com/common/oauth2/v2.0/authorize",
+            "scopes": {}
+          }
+        },
+        "x-tokenInfoFunc": "api.middleware.auth.bearer_auth",
+        "x-scopeValidateFunc": "api.middleware.auth.validate_scopes"
+      },
+      "apiKey": {
+        "type": "apiKey",
+        "name": "api-key",
+        "in": "header"
+      }
+    }
+  }
+}