@azure-rest/ai-inference 1.0.0-alpha.20241216.1 → 1.0.0-alpha.20241218.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +95 -80
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -18,18 +18,16 @@ import ModelClient, { isUnexpected } from "@azure-rest/ai-inference";
|
|
|
18
18
|
import { AzureKeyCredential } from "@azure/core-auth";
|
|
19
19
|
const client = new ModelClient(
|
|
20
20
|
"https://<Azure Model endpoint>",
|
|
21
|
-
new AzureKeyCredential("<Azure API key>")
|
|
21
|
+
new AzureKeyCredential("<Azure API key>"),
|
|
22
22
|
);
|
|
23
23
|
|
|
24
24
|
const response = await client.path("/chat/completions").post({
|
|
25
25
|
body: {
|
|
26
|
-
messages: [
|
|
27
|
-
|
|
28
|
-
],
|
|
29
|
-
}
|
|
26
|
+
messages: [{ role: "user", content: "How many feet are in a mile?" }],
|
|
27
|
+
},
|
|
30
28
|
});
|
|
31
29
|
|
|
32
|
-
if(isUnexpected(response)) {
|
|
30
|
+
if (isUnexpected(response)) {
|
|
33
31
|
throw response.body.error;
|
|
34
32
|
}
|
|
35
33
|
console.log(response.body.choices[0].message.content);
|
|
@@ -52,6 +50,7 @@ npm install @azure-rest/ai-inference
|
|
|
52
50
|
```
|
|
53
51
|
|
|
54
52
|
### Create and authenticate a `ModelClient`
|
|
53
|
+
|
|
55
54
|
#### Using an API Key from Azure
|
|
56
55
|
|
|
57
56
|
You can authenticate with an Azure API key using the [Azure Core Auth library][azure_core_auth]. To use the AzureKeyCredential provider shown below, please install the `@azure/core-auth` package:
|
|
@@ -86,7 +85,7 @@ Set the values of the client ID, tenant ID, and client secret of the AAD applica
|
|
|
86
85
|
|
|
87
86
|
```javascript
|
|
88
87
|
import ModelClient from "@azure-rest/ai-inference";
|
|
89
|
-
import { DefaultAzureCredential }
|
|
88
|
+
import { DefaultAzureCredential } from "@azure/identity";
|
|
90
89
|
|
|
91
90
|
const client = new ModelClient("<endpoint>", new DefaultAzureCredential());
|
|
92
91
|
```
|
|
@@ -99,20 +98,19 @@ The main concept to understand is [Completions][azure_openai_completions_docs].
|
|
|
99
98
|
import ModelClient, { isUnexpected } from "@azure-rest/ai-inference";
|
|
100
99
|
import { AzureKeyCredential } from "@azure/core-auth";
|
|
101
100
|
|
|
102
|
-
async function main(){
|
|
101
|
+
async function main() {
|
|
103
102
|
const client = new ModelClient(
|
|
104
|
-
|
|
105
|
-
|
|
103
|
+
"https://your-model-endpoint/",
|
|
104
|
+
new AzureKeyCredential("your-model-api-key"),
|
|
105
|
+
);
|
|
106
106
|
|
|
107
107
|
const response = await client.path("/chat/completions").post({
|
|
108
108
|
body: {
|
|
109
|
-
messages: [
|
|
110
|
-
|
|
111
|
-
],
|
|
112
|
-
}
|
|
109
|
+
messages: [{ role: "user", content: "Hello, world!" }],
|
|
110
|
+
},
|
|
113
111
|
});
|
|
114
112
|
|
|
115
|
-
if(isUnexpected(response)) {
|
|
113
|
+
if (isUnexpected(response)) {
|
|
116
114
|
throw response.body.error;
|
|
117
115
|
}
|
|
118
116
|
|
|
@@ -141,7 +139,7 @@ import ModelClient from "@azure-rest/ai-inference";
|
|
|
141
139
|
import { DefaultAzureCredential } from "@azure/identity";
|
|
142
140
|
import { createSseStream } from "@azure/core-sse";
|
|
143
141
|
|
|
144
|
-
async function main(){
|
|
142
|
+
async function main() {
|
|
145
143
|
const endpoint = "https://myaccount.openai.azure.com/";
|
|
146
144
|
const client = new ModelClient(endpoint, new DefaultAzureCredential());
|
|
147
145
|
|
|
@@ -155,13 +153,16 @@ async function main(){
|
|
|
155
153
|
|
|
156
154
|
console.log(`Messages: ${messages.map((m) => m.content).join("\n")}`);
|
|
157
155
|
|
|
158
|
-
const response = await client
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
156
|
+
const response = await client
|
|
157
|
+
.path("/chat/completions")
|
|
158
|
+
.post({
|
|
159
|
+
body: {
|
|
160
|
+
messages,
|
|
161
|
+
stream: true,
|
|
162
|
+
max_tokens: 128,
|
|
163
|
+
},
|
|
164
|
+
})
|
|
165
|
+
.asNodeStream();
|
|
165
166
|
|
|
166
167
|
const stream = response.body;
|
|
167
168
|
if (!stream) {
|
|
@@ -178,7 +179,7 @@ async function main(){
|
|
|
178
179
|
if (event.data === "[DONE]") {
|
|
179
180
|
return;
|
|
180
181
|
}
|
|
181
|
-
for (const choice of
|
|
182
|
+
for (const choice of JSON.parse(event.data).choices) {
|
|
182
183
|
console.log(choice.delta?.content ?? "");
|
|
183
184
|
}
|
|
184
185
|
}
|
|
@@ -197,7 +198,7 @@ This example generates text responses to input prompts using an Azure subscripti
|
|
|
197
198
|
import ModelClient from "@azure-rest/ai-inference";
|
|
198
199
|
import { AzureKeyCredential } from "@azure/core-auth";
|
|
199
200
|
|
|
200
|
-
async function main(){
|
|
201
|
+
async function main() {
|
|
201
202
|
// Replace with your Model API key
|
|
202
203
|
const key = "YOUR_MODEL_API_KEY";
|
|
203
204
|
const endpoint = "https://your-model-endpoint/";
|
|
@@ -208,17 +209,21 @@ async function main(){
|
|
|
208
209
|
{ role: "user", content: "What is inference in the context of AI?" },
|
|
209
210
|
{ role: "user", content: "Why do children love dinosaurs?" },
|
|
210
211
|
{ role: "user", content: "Generate a proof of Euler's identity" },
|
|
211
|
-
{
|
|
212
|
+
{
|
|
213
|
+
role: "user",
|
|
214
|
+
content:
|
|
215
|
+
"Describe in single words only the good things that come into your mind about your mother.",
|
|
216
|
+
},
|
|
212
217
|
];
|
|
213
218
|
|
|
214
219
|
let promptIndex = 0;
|
|
215
220
|
const response = await client.path("/chat/completions").post({
|
|
216
221
|
body: {
|
|
217
|
-
messages
|
|
218
|
-
}
|
|
222
|
+
messages,
|
|
223
|
+
},
|
|
219
224
|
});
|
|
220
225
|
|
|
221
|
-
if(response.status !== "200") {
|
|
226
|
+
if (response.status !== "200") {
|
|
222
227
|
throw response.body.error;
|
|
223
228
|
}
|
|
224
229
|
for (const choice of response.body.choices) {
|
|
@@ -241,7 +246,7 @@ This example generates a summarization of the given input prompt.
|
|
|
241
246
|
import ModelClient from "@azure-rest/ai-inference";
|
|
242
247
|
import { DefaultAzureCredential } from "@azure/identity";
|
|
243
248
|
|
|
244
|
-
async function main(){
|
|
249
|
+
async function main() {
|
|
245
250
|
const endpoint = "https://your-model-endpoint/";
|
|
246
251
|
const client = new ModelClient(endpoint, new DefaultAzureCredential());
|
|
247
252
|
|
|
@@ -266,14 +271,12 @@ async function main(){
|
|
|
266
271
|
|
|
267
272
|
const response = await client.path("/chat/completions").post({
|
|
268
273
|
body: {
|
|
269
|
-
messages: [
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
max_tokens: 64
|
|
273
|
-
}
|
|
274
|
+
messages: [{ role: "user", content: summarizationPrompt }],
|
|
275
|
+
max_tokens: 64,
|
|
276
|
+
},
|
|
274
277
|
});
|
|
275
278
|
|
|
276
|
-
if(response.status !== "200") {
|
|
279
|
+
if (response.status !== "200") {
|
|
277
280
|
throw response.body.error;
|
|
278
281
|
}
|
|
279
282
|
const completion = response.body.choices[0].message.content;
|
|
@@ -292,23 +295,23 @@ process of fulfilling a chat completions request. To use chat tools, start by de
|
|
|
292
295
|
|
|
293
296
|
```js
|
|
294
297
|
const getCurrentWeather = {
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
},
|
|
298
|
+
name: "get_current_weather",
|
|
299
|
+
description: "Get the current weather in a given location",
|
|
300
|
+
parameters: {
|
|
301
|
+
type: "object",
|
|
302
|
+
properties: {
|
|
303
|
+
location: {
|
|
304
|
+
type: "string",
|
|
305
|
+
description: "The city and state, e.g. San Francisco, CA",
|
|
306
|
+
},
|
|
307
|
+
unit: {
|
|
308
|
+
type: "string",
|
|
309
|
+
enum: ["celsius", "fahrenheit"],
|
|
308
310
|
},
|
|
309
|
-
required: ["location"],
|
|
310
311
|
},
|
|
311
|
-
|
|
312
|
+
required: ["location"],
|
|
313
|
+
},
|
|
314
|
+
};
|
|
312
315
|
```
|
|
313
316
|
|
|
314
317
|
With the tool defined, include that new definition in the options for a chat completions request:
|
|
@@ -324,8 +327,8 @@ const tools = [
|
|
|
324
327
|
const result = await client.path("/chat/completions").post({
|
|
325
328
|
body: {
|
|
326
329
|
messages,
|
|
327
|
-
tools
|
|
328
|
-
}
|
|
330
|
+
tools,
|
|
331
|
+
},
|
|
329
332
|
});
|
|
330
333
|
```
|
|
331
334
|
|
|
@@ -336,16 +339,16 @@ new request messages can be thought of as a sort of "callback" for chat completi
|
|
|
336
339
|
```js
|
|
337
340
|
// Purely for convenience and clarity, this function handles tool call responses.
|
|
338
341
|
function applyToolCall({ function: call, id }) {
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
342
|
+
if (call.name === "get_current_weather") {
|
|
343
|
+
const { location, unit } = JSON.parse(call.arguments);
|
|
344
|
+
// In a real application, this would be a call to a weather API with location and unit parameters
|
|
345
|
+
return {
|
|
346
|
+
role: "tool",
|
|
347
|
+
content: `The weather in ${location} is 72 degrees ${unit} and sunny.`,
|
|
348
|
+
toolCallId: id,
|
|
349
|
+
};
|
|
350
|
+
}
|
|
351
|
+
throw new Error(`Unknown tool call: ${call.name}`);
|
|
349
352
|
}
|
|
350
353
|
```
|
|
351
354
|
|
|
@@ -366,15 +369,15 @@ if (responseMessage?.role === "assistant") {
|
|
|
366
369
|
];
|
|
367
370
|
const toolCallResolutionResult = await client.path("/chat/completions").post({
|
|
368
371
|
body: {
|
|
369
|
-
messages: toolCallResolutionMessages
|
|
370
|
-
}
|
|
372
|
+
messages: toolCallResolutionMessages,
|
|
373
|
+
},
|
|
371
374
|
});
|
|
372
375
|
// continue handling the response as normal
|
|
373
376
|
}
|
|
374
377
|
}
|
|
375
378
|
```
|
|
376
379
|
|
|
377
|
-
### Chat with images (using models supporting image chat, such as gpt-4o)
|
|
380
|
+
### Chat with images (using models supporting image chat, such as gpt-4o)
|
|
378
381
|
|
|
379
382
|
Some Azure models allow you to use images as input components into chat completions.
|
|
380
383
|
|
|
@@ -399,14 +402,14 @@ of `finish_reason`:
|
|
|
399
402
|
```js
|
|
400
403
|
const response = await client.path("/chat/completions").post({
|
|
401
404
|
body: {
|
|
402
|
-
messages
|
|
405
|
+
messages
|
|
403
406
|
});
|
|
404
407
|
console.log(`Chatbot: ${response.choices[0].message?.content}`);
|
|
405
408
|
```
|
|
406
409
|
|
|
407
410
|
### Text Embeddings example
|
|
408
411
|
|
|
409
|
-
This example demonstrates how to get text embeddings with Entra ID authentication.
|
|
412
|
+
This example demonstrates how to get text embeddings with Entra ID authentication.
|
|
410
413
|
|
|
411
414
|
```javascript
|
|
412
415
|
import ModelClient, { isUnexpected } from "@azure-rest/ai-inference";
|
|
@@ -415,19 +418,21 @@ import { DefaultAzureCredential } from "@azure/identity";
|
|
|
415
418
|
const endpoint = "<your_model_endpoint>";
|
|
416
419
|
const credential = new DefaultAzureCredential();
|
|
417
420
|
|
|
418
|
-
async function main(){
|
|
421
|
+
async function main() {
|
|
419
422
|
const client = ModelClient(endpoint, credential);
|
|
420
423
|
const response = await client.path("/embeddings").post({
|
|
421
424
|
body: {
|
|
422
|
-
input: ["first phrase", "second phrase", "third phrase"]
|
|
423
|
-
}
|
|
425
|
+
input: ["first phrase", "second phrase", "third phrase"],
|
|
426
|
+
},
|
|
424
427
|
});
|
|
425
428
|
|
|
426
429
|
if (isUnexpected(response)) {
|
|
427
430
|
throw response.body.error;
|
|
428
431
|
}
|
|
429
432
|
for (const data of response.body.data) {
|
|
430
|
-
console.log(
|
|
433
|
+
console.log(
|
|
434
|
+
`data length: ${data.length}, [${data[0]}, ${data[1]}, ..., ${data[data.length - 2]}, ${data[data.length - 1]}]`,
|
|
435
|
+
);
|
|
431
436
|
}
|
|
432
437
|
}
|
|
433
438
|
|
|
@@ -446,18 +451,23 @@ data: length=1024, [0.04196167, 0.029083252, ..., -0.0027484894, 0.0073127747]
|
|
|
446
451
|
|
|
447
452
|
To generate embeddings for additional phrases, simply call `client.path("/embeddings").post` multiple times using the same `client`.
|
|
448
453
|
|
|
449
|
-
### Instrumentation
|
|
454
|
+
### Instrumentation
|
|
455
|
+
|
|
450
456
|
Currently instrumentation is only supported for `Chat Completion without streaming`.
|
|
451
457
|
To enable instrumentation, it is required to register exporter(s).
|
|
452
458
|
|
|
453
459
|
Here is an example to add console as a exporter:
|
|
460
|
+
|
|
454
461
|
```js
|
|
455
|
-
import {
|
|
462
|
+
import {
|
|
463
|
+
ConsoleSpanExporter,
|
|
464
|
+
NodeTracerProvider,
|
|
465
|
+
SimpleSpanProcessor,
|
|
466
|
+
} from "@opentelemetry/sdk-trace-node";
|
|
456
467
|
|
|
457
468
|
const provider = new NodeTracerProvider();
|
|
458
469
|
provider.addSpanProcessor(new SimpleSpanProcessor(new ConsoleSpanExporter()));
|
|
459
470
|
provider.register();
|
|
460
|
-
|
|
461
471
|
```
|
|
462
472
|
|
|
463
473
|
Here is an example to add application insight to be a exporter:
|
|
@@ -491,9 +501,13 @@ import ModelClient from "@azure-rest/ai-inference";
|
|
|
491
501
|
```
|
|
492
502
|
|
|
493
503
|
Finally when you are making a call for chat completion, you need to include
|
|
504
|
+
|
|
494
505
|
```js
|
|
495
|
-
tracingOptions: {
|
|
506
|
+
tracingOptions: {
|
|
507
|
+
tracingContext: context.active();
|
|
508
|
+
}
|
|
496
509
|
```
|
|
510
|
+
|
|
497
511
|
Here is an example:
|
|
498
512
|
|
|
499
513
|
```js
|
|
@@ -505,7 +519,8 @@ client.path("/chat/completions").post({
|
|
|
505
519
|
```
|
|
506
520
|
|
|
507
521
|
### Tracing Your Own Functions
|
|
508
|
-
|
|
522
|
+
|
|
523
|
+
Open Telemetry provides `startActiveSpan` to instrument you own code. Here is an example:
|
|
509
524
|
|
|
510
525
|
```js
|
|
511
526
|
import { trace } from "@opentelemetry/api";
|
|
@@ -524,7 +539,6 @@ const getWeatherFunc = (location: string, unit: string): string => {
|
|
|
524
539
|
}
|
|
525
540
|
```
|
|
526
541
|
|
|
527
|
-
|
|
528
542
|
## Troubleshooting
|
|
529
543
|
|
|
530
544
|
### Logging
|
|
@@ -540,11 +554,12 @@ setLogLevel("info");
|
|
|
540
554
|
For more detailed instructions on how to enable logs, you can look at the [@azure/logger package docs](https://github.com/Azure/azure-sdk-for-js/tree/main/sdk/core/logger).
|
|
541
555
|
|
|
542
556
|
<!-- LINKS -->
|
|
557
|
+
|
|
543
558
|
[stream_chat_completion_sample]: https://github.com/Azure/azure-sdk-for-js/blob/main/sdk/ai/ai-inference-rest/samples/v1-beta/typescript/streamChatCompletions.ts
|
|
544
559
|
[azure_openai_completions_docs]: https://learn.microsoft.com/azure/cognitive-services/openai/how-to/completions
|
|
545
560
|
[defaultazurecredential]: https://github.com/Azure/azure-sdk-for-js/tree/main/sdk/identity/identity#defaultazurecredential
|
|
546
561
|
[azure_identity]: https://github.com/Azure/azure-sdk-for-js/tree/main/sdk/identity/identity
|
|
547
562
|
[azure_core_auth]: https://github.com/Azure/azure-sdk-for-js/tree/main/sdk/core/core-auth
|
|
548
|
-
[register_aad_app]: https://
|
|
549
|
-
[azure_cli]: https://
|
|
563
|
+
[register_aad_app]: https://learn.microsoft.com/azure/cognitive-services/authentication#assign-a-role-to-a-service-principal
|
|
564
|
+
[azure_cli]: https://learn.microsoft.com/cli/azure
|
|
550
565
|
[azure_portal]: https://portal.azure.com
|