oracle-ads 2.12.8__py3-none-any.whl → 2.12.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. ads/aqua/__init__.py +4 -3
  2. ads/aqua/app.py +40 -18
  3. ads/aqua/client/__init__.py +3 -0
  4. ads/aqua/client/client.py +799 -0
  5. ads/aqua/common/enums.py +3 -0
  6. ads/aqua/common/utils.py +62 -2
  7. ads/aqua/data.py +2 -19
  8. ads/aqua/evaluation/entities.py +6 -0
  9. ads/aqua/evaluation/evaluation.py +45 -15
  10. ads/aqua/extension/aqua_ws_msg_handler.py +14 -7
  11. ads/aqua/extension/base_handler.py +12 -9
  12. ads/aqua/extension/deployment_handler.py +8 -4
  13. ads/aqua/extension/finetune_handler.py +8 -14
  14. ads/aqua/extension/model_handler.py +30 -6
  15. ads/aqua/extension/ui_handler.py +13 -1
  16. ads/aqua/finetuning/constants.py +5 -2
  17. ads/aqua/finetuning/entities.py +73 -17
  18. ads/aqua/finetuning/finetuning.py +110 -82
  19. ads/aqua/model/entities.py +5 -1
  20. ads/aqua/model/model.py +230 -104
  21. ads/aqua/modeldeployment/deployment.py +35 -11
  22. ads/aqua/modeldeployment/entities.py +7 -4
  23. ads/aqua/ui.py +24 -2
  24. ads/cli.py +16 -8
  25. ads/common/auth.py +9 -9
  26. ads/llm/autogen/__init__.py +2 -0
  27. ads/llm/autogen/constants.py +15 -0
  28. ads/llm/autogen/reports/__init__.py +2 -0
  29. ads/llm/autogen/reports/base.py +67 -0
  30. ads/llm/autogen/reports/data.py +103 -0
  31. ads/llm/autogen/reports/session.py +526 -0
  32. ads/llm/autogen/reports/templates/chat_box.html +13 -0
  33. ads/llm/autogen/reports/templates/chat_box_lt.html +5 -0
  34. ads/llm/autogen/reports/templates/chat_box_rt.html +6 -0
  35. ads/llm/autogen/reports/utils.py +56 -0
  36. ads/llm/autogen/v02/__init__.py +4 -0
  37. ads/llm/autogen/{client_v02.py → v02/client.py} +23 -10
  38. ads/llm/autogen/v02/log_handlers/__init__.py +2 -0
  39. ads/llm/autogen/v02/log_handlers/oci_file_handler.py +83 -0
  40. ads/llm/autogen/v02/loggers/__init__.py +6 -0
  41. ads/llm/autogen/v02/loggers/metric_logger.py +320 -0
  42. ads/llm/autogen/v02/loggers/session_logger.py +580 -0
  43. ads/llm/autogen/v02/loggers/utils.py +86 -0
  44. ads/llm/autogen/v02/runtime_logging.py +163 -0
  45. ads/llm/guardrails/base.py +6 -5
  46. ads/llm/langchain/plugins/chat_models/oci_data_science.py +46 -20
  47. ads/llm/langchain/plugins/llms/oci_data_science_model_deployment_endpoint.py +38 -11
  48. ads/model/__init__.py +11 -13
  49. ads/model/artifact.py +47 -8
  50. ads/model/extractor/embedding_onnx_extractor.py +80 -0
  51. ads/model/framework/embedding_onnx_model.py +438 -0
  52. ads/model/generic_model.py +26 -24
  53. ads/model/model_metadata.py +8 -7
  54. ads/opctl/config/merger.py +13 -14
  55. ads/opctl/operator/common/operator_config.py +4 -4
  56. ads/opctl/operator/lowcode/common/transformations.py +50 -8
  57. ads/opctl/operator/lowcode/common/utils.py +22 -6
  58. ads/opctl/operator/lowcode/forecast/__main__.py +10 -0
  59. ads/opctl/operator/lowcode/forecast/const.py +3 -0
  60. ads/opctl/operator/lowcode/forecast/model/arima.py +19 -13
  61. ads/opctl/operator/lowcode/forecast/model/automlx.py +129 -36
  62. ads/opctl/operator/lowcode/forecast/model/autots.py +1 -0
  63. ads/opctl/operator/lowcode/forecast/model/base_model.py +58 -17
  64. ads/opctl/operator/lowcode/forecast/model/forecast_datasets.py +1 -1
  65. ads/opctl/operator/lowcode/forecast/model/neuralprophet.py +10 -3
  66. ads/opctl/operator/lowcode/forecast/model/prophet.py +25 -18
  67. ads/opctl/operator/lowcode/forecast/model_evaluator.py +3 -2
  68. ads/opctl/operator/lowcode/forecast/operator_config.py +31 -0
  69. ads/opctl/operator/lowcode/forecast/schema.yaml +76 -0
  70. ads/opctl/operator/lowcode/forecast/utils.py +8 -6
  71. ads/opctl/operator/lowcode/forecast/whatifserve/__init__.py +7 -0
  72. ads/opctl/operator/lowcode/forecast/whatifserve/deployment_manager.py +233 -0
  73. ads/opctl/operator/lowcode/forecast/whatifserve/score.py +238 -0
  74. ads/telemetry/base.py +18 -11
  75. ads/telemetry/client.py +33 -13
  76. ads/templates/schemas/openapi.json +1740 -0
  77. ads/templates/score_embedding_onnx.jinja2 +202 -0
  78. {oracle_ads-2.12.8.dist-info → oracle_ads-2.12.10.dist-info}/METADATA +11 -10
  79. {oracle_ads-2.12.8.dist-info → oracle_ads-2.12.10.dist-info}/RECORD +82 -56
  80. {oracle_ads-2.12.8.dist-info → oracle_ads-2.12.10.dist-info}/LICENSE.txt +0 -0
  81. {oracle_ads-2.12.8.dist-info → oracle_ads-2.12.10.dist-info}/WHEEL +0 -0
  82. {oracle_ads-2.12.8.dist-info → oracle_ads-2.12.10.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,1740 @@
1
+ {
2
+ "components": {
3
+ "schemas": {
4
+ "ClassifierModel": {
5
+ "properties": {
6
+ "id2label": {
7
+ "additionalProperties": {
8
+ "type": "string"
9
+ },
10
+ "example": {
11
+ "0": "LABEL"
12
+ },
13
+ "type": "object"
14
+ },
15
+ "label2id": {
16
+ "additionalProperties": {
17
+ "minimum": 0,
18
+ "type": "integer"
19
+ },
20
+ "example": {
21
+ "LABEL": 0
22
+ },
23
+ "type": "object"
24
+ }
25
+ },
26
+ "required": [
27
+ "id2label",
28
+ "label2id"
29
+ ],
30
+ "type": "object"
31
+ },
32
+ "DecodeRequest": {
33
+ "properties": {
34
+ "ids": {
35
+ "$ref": "#/components/schemas/InputIds"
36
+ },
37
+ "skip_special_tokens": {
38
+ "default": "true",
39
+ "example": "true",
40
+ "type": "boolean"
41
+ }
42
+ },
43
+ "required": [
44
+ "ids"
45
+ ],
46
+ "type": "object"
47
+ },
48
+ "DecodeResponse": {
49
+ "example": [
50
+ "test"
51
+ ],
52
+ "items": {
53
+ "type": "string"
54
+ },
55
+ "type": "array"
56
+ },
57
+ "EmbedAllRequest": {
58
+ "properties": {
59
+ "inputs": {
60
+ "$ref": "#/components/schemas/Input"
61
+ },
62
+ "prompt_name": {
63
+ "default": "null",
64
+ "description": "The name of the prompt that should be used by for encoding. If not set, no prompt\nwill be applied.\n\nMust be a key in the `sentence-transformers` configuration `prompts` dictionary.\n\nFor example if ``prompt_name`` is \"query\" and the ``prompts`` is {\"query\": \"query: \", ...},\nthen the sentence \"What is the capital of France?\" will be encoded as\n\"query: What is the capital of France?\" because the prompt text will be prepended before\nany text to encode.",
65
+ "example": "null",
66
+ "nullable": true,
67
+ "type": "string"
68
+ },
69
+ "truncate": {
70
+ "default": "false",
71
+ "example": "false",
72
+ "nullable": true,
73
+ "type": "boolean"
74
+ },
75
+ "truncation_direction": {
76
+ "allOf": [
77
+ {
78
+ "$ref": "#/components/schemas/TruncationDirection"
79
+ }
80
+ ],
81
+ "default": "right"
82
+ }
83
+ },
84
+ "required": [
85
+ "inputs"
86
+ ],
87
+ "type": "object"
88
+ },
89
+ "EmbedAllResponse": {
90
+ "example": [
91
+ [
92
+ [
93
+ 0.0,
94
+ 1.0,
95
+ 2.0
96
+ ]
97
+ ]
98
+ ],
99
+ "items": {
100
+ "items": {
101
+ "items": {
102
+ "format": "float",
103
+ "type": "number"
104
+ },
105
+ "type": "array"
106
+ },
107
+ "type": "array"
108
+ },
109
+ "type": "array"
110
+ },
111
+ "EmbedRequest": {
112
+ "properties": {
113
+ "inputs": {
114
+ "$ref": "#/components/schemas/Input"
115
+ },
116
+ "normalize": {
117
+ "default": "true",
118
+ "example": "true",
119
+ "type": "boolean"
120
+ },
121
+ "prompt_name": {
122
+ "default": "null",
123
+ "description": "The name of the prompt that should be used by for encoding. If not set, no prompt\nwill be applied.\n\nMust be a key in the `sentence-transformers` configuration `prompts` dictionary.\n\nFor example if ``prompt_name`` is \"query\" and the ``prompts`` is {\"query\": \"query: \", ...},\nthen the sentence \"What is the capital of France?\" will be encoded as\n\"query: What is the capital of France?\" because the prompt text will be prepended before\nany text to encode.",
124
+ "example": "null",
125
+ "nullable": true,
126
+ "type": "string"
127
+ },
128
+ "truncate": {
129
+ "default": "false",
130
+ "example": "false",
131
+ "nullable": true,
132
+ "type": "boolean"
133
+ },
134
+ "truncation_direction": {
135
+ "allOf": [
136
+ {
137
+ "$ref": "#/components/schemas/TruncationDirection"
138
+ }
139
+ ],
140
+ "default": "right"
141
+ }
142
+ },
143
+ "required": [
144
+ "inputs"
145
+ ],
146
+ "type": "object"
147
+ },
148
+ "EmbedResponse": {
149
+ "example": [
150
+ [
151
+ 0.0,
152
+ 1.0,
153
+ 2.0
154
+ ]
155
+ ],
156
+ "items": {
157
+ "items": {
158
+ "format": "float",
159
+ "type": "number"
160
+ },
161
+ "type": "array"
162
+ },
163
+ "type": "array"
164
+ },
165
+ "EmbedSparseRequest": {
166
+ "properties": {
167
+ "inputs": {
168
+ "$ref": "#/components/schemas/Input"
169
+ },
170
+ "prompt_name": {
171
+ "default": "null",
172
+ "description": "The name of the prompt that should be used by for encoding. If not set, no prompt\nwill be applied.\n\nMust be a key in the `sentence-transformers` configuration `prompts` dictionary.\n\nFor example if ``prompt_name`` is \"query\" and the ``prompts`` is {\"query\": \"query: \", ...},\nthen the sentence \"What is the capital of France?\" will be encoded as\n\"query: What is the capital of France?\" because the prompt text will be prepended before\nany text to encode.",
173
+ "example": "null",
174
+ "nullable": true,
175
+ "type": "string"
176
+ },
177
+ "truncate": {
178
+ "default": "false",
179
+ "example": "false",
180
+ "nullable": true,
181
+ "type": "boolean"
182
+ },
183
+ "truncation_direction": {
184
+ "allOf": [
185
+ {
186
+ "$ref": "#/components/schemas/TruncationDirection"
187
+ }
188
+ ],
189
+ "default": "right"
190
+ }
191
+ },
192
+ "required": [
193
+ "inputs"
194
+ ],
195
+ "type": "object"
196
+ },
197
+ "EmbedSparseResponse": {
198
+ "items": {
199
+ "items": {
200
+ "$ref": "#/components/schemas/SparseValue"
201
+ },
202
+ "type": "array"
203
+ },
204
+ "type": "array"
205
+ },
206
+ "Embedding": {
207
+ "oneOf": [
208
+ {
209
+ "items": {
210
+ "format": "float",
211
+ "type": "number"
212
+ },
213
+ "type": "array"
214
+ },
215
+ {
216
+ "type": "string"
217
+ }
218
+ ]
219
+ },
220
+ "EmbeddingModel": {
221
+ "properties": {
222
+ "pooling": {
223
+ "example": "cls",
224
+ "type": "string"
225
+ }
226
+ },
227
+ "required": [
228
+ "pooling"
229
+ ],
230
+ "type": "object"
231
+ },
232
+ "EncodingFormat": {
233
+ "enum": [
234
+ "float",
235
+ "base64"
236
+ ],
237
+ "type": "string"
238
+ },
239
+ "ErrorResponse": {
240
+ "properties": {
241
+ "error": {
242
+ "type": "string"
243
+ },
244
+ "error_type": {
245
+ "$ref": "#/components/schemas/ErrorType"
246
+ }
247
+ },
248
+ "required": [
249
+ "error",
250
+ "error_type"
251
+ ],
252
+ "type": "object"
253
+ },
254
+ "ErrorType": {
255
+ "enum": [
256
+ "Unhealthy",
257
+ "Backend",
258
+ "Overloaded",
259
+ "Validation",
260
+ "Tokenizer"
261
+ ],
262
+ "type": "string"
263
+ },
264
+ "Info": {
265
+ "properties": {
266
+ "auto_truncate": {
267
+ "type": "boolean"
268
+ },
269
+ "docker_label": {
270
+ "example": "null",
271
+ "nullable": true,
272
+ "type": "string"
273
+ },
274
+ "max_batch_requests": {
275
+ "default": "null",
276
+ "example": "null",
277
+ "minimum": 0,
278
+ "nullable": true,
279
+ "type": "integer"
280
+ },
281
+ "max_batch_tokens": {
282
+ "example": "2048",
283
+ "minimum": 0,
284
+ "type": "integer"
285
+ },
286
+ "max_client_batch_size": {
287
+ "example": "32",
288
+ "minimum": 0,
289
+ "type": "integer"
290
+ },
291
+ "max_concurrent_requests": {
292
+ "description": "Router Parameters",
293
+ "example": "128",
294
+ "minimum": 0,
295
+ "type": "integer"
296
+ },
297
+ "max_input_length": {
298
+ "example": "512",
299
+ "minimum": 0,
300
+ "type": "integer"
301
+ },
302
+ "model_dtype": {
303
+ "example": "float16",
304
+ "type": "string"
305
+ },
306
+ "model_id": {
307
+ "description": "Model info",
308
+ "example": "thenlper/gte-base",
309
+ "type": "string"
310
+ },
311
+ "model_sha": {
312
+ "example": "fca14538aa9956a46526bd1d0d11d69e19b5a101",
313
+ "nullable": true,
314
+ "type": "string"
315
+ },
316
+ "model_type": {
317
+ "$ref": "#/components/schemas/ModelType"
318
+ },
319
+ "sha": {
320
+ "example": "null",
321
+ "nullable": true,
322
+ "type": "string"
323
+ },
324
+ "tokenization_workers": {
325
+ "example": "4",
326
+ "minimum": 0,
327
+ "type": "integer"
328
+ },
329
+ "version": {
330
+ "description": "Router Info",
331
+ "example": "0.5.0",
332
+ "type": "string"
333
+ }
334
+ },
335
+ "required": [
336
+ "model_id",
337
+ "model_dtype",
338
+ "model_type",
339
+ "max_concurrent_requests",
340
+ "max_input_length",
341
+ "max_batch_tokens",
342
+ "max_client_batch_size",
343
+ "auto_truncate",
344
+ "tokenization_workers",
345
+ "version"
346
+ ],
347
+ "type": "object"
348
+ },
349
+ "Input": {
350
+ "oneOf": [
351
+ {
352
+ "$ref": "#/components/schemas/InputType"
353
+ },
354
+ {
355
+ "items": {
356
+ "$ref": "#/components/schemas/InputType"
357
+ },
358
+ "type": "array"
359
+ }
360
+ ]
361
+ },
362
+ "InputIds": {
363
+ "oneOf": [
364
+ {
365
+ "items": {
366
+ "format": "int32",
367
+ "minimum": 0,
368
+ "type": "integer"
369
+ },
370
+ "type": "array"
371
+ },
372
+ {
373
+ "items": {
374
+ "items": {
375
+ "format": "int32",
376
+ "minimum": 0,
377
+ "type": "integer"
378
+ },
379
+ "type": "array"
380
+ },
381
+ "type": "array"
382
+ }
383
+ ]
384
+ },
385
+ "InputType": {
386
+ "oneOf": [
387
+ {
388
+ "type": "string"
389
+ },
390
+ {
391
+ "items": {
392
+ "format": "int32",
393
+ "minimum": 0,
394
+ "type": "integer"
395
+ },
396
+ "type": "array"
397
+ }
398
+ ]
399
+ },
400
+ "ModelType": {
401
+ "oneOf": [
402
+ {
403
+ "properties": {
404
+ "classifier": {
405
+ "$ref": "#/components/schemas/ClassifierModel"
406
+ }
407
+ },
408
+ "required": [
409
+ "classifier"
410
+ ],
411
+ "type": "object"
412
+ },
413
+ {
414
+ "properties": {
415
+ "embedding": {
416
+ "$ref": "#/components/schemas/EmbeddingModel"
417
+ }
418
+ },
419
+ "required": [
420
+ "embedding"
421
+ ],
422
+ "type": "object"
423
+ },
424
+ {
425
+ "properties": {
426
+ "reranker": {
427
+ "$ref": "#/components/schemas/ClassifierModel"
428
+ }
429
+ },
430
+ "required": [
431
+ "reranker"
432
+ ],
433
+ "type": "object"
434
+ }
435
+ ]
436
+ },
437
+ "OpenAICompatEmbedding": {
438
+ "properties": {
439
+ "embedding": {
440
+ "$ref": "#/components/schemas/Embedding"
441
+ },
442
+ "index": {
443
+ "example": "0",
444
+ "minimum": 0,
445
+ "type": "integer"
446
+ },
447
+ "object": {
448
+ "example": "embedding",
449
+ "type": "string"
450
+ }
451
+ },
452
+ "required": [
453
+ "object",
454
+ "embedding",
455
+ "index"
456
+ ],
457
+ "type": "object"
458
+ },
459
+ "OpenAICompatErrorResponse": {
460
+ "properties": {
461
+ "code": {
462
+ "format": "int32",
463
+ "minimum": 0,
464
+ "type": "integer"
465
+ },
466
+ "error_type": {
467
+ "$ref": "#/components/schemas/ErrorType"
468
+ },
469
+ "message": {
470
+ "type": "string"
471
+ }
472
+ },
473
+ "required": [
474
+ "message",
475
+ "code",
476
+ "error_type"
477
+ ],
478
+ "type": "object"
479
+ },
480
+ "OpenAICompatRequest": {
481
+ "properties": {
482
+ "encoding_format": {
483
+ "allOf": [
484
+ {
485
+ "$ref": "#/components/schemas/EncodingFormat"
486
+ }
487
+ ],
488
+ "default": "float"
489
+ },
490
+ "input": {
491
+ "$ref": "#/components/schemas/Input"
492
+ },
493
+ "model": {
494
+ "example": "null",
495
+ "nullable": true,
496
+ "type": "string"
497
+ },
498
+ "user": {
499
+ "example": "null",
500
+ "nullable": true,
501
+ "type": "string"
502
+ }
503
+ },
504
+ "required": [
505
+ "input"
506
+ ],
507
+ "type": "object"
508
+ },
509
+ "OpenAICompatResponse": {
510
+ "properties": {
511
+ "data": {
512
+ "items": {
513
+ "$ref": "#/components/schemas/OpenAICompatEmbedding"
514
+ },
515
+ "type": "array"
516
+ },
517
+ "model": {
518
+ "example": "thenlper/gte-base",
519
+ "type": "string"
520
+ },
521
+ "object": {
522
+ "example": "list",
523
+ "type": "string"
524
+ },
525
+ "usage": {
526
+ "$ref": "#/components/schemas/OpenAICompatUsage"
527
+ }
528
+ },
529
+ "required": [
530
+ "object",
531
+ "data",
532
+ "model",
533
+ "usage"
534
+ ],
535
+ "type": "object"
536
+ },
537
+ "OpenAICompatUsage": {
538
+ "properties": {
539
+ "prompt_tokens": {
540
+ "example": "512",
541
+ "minimum": 0,
542
+ "type": "integer"
543
+ },
544
+ "total_tokens": {
545
+ "example": "512",
546
+ "minimum": 0,
547
+ "type": "integer"
548
+ }
549
+ },
550
+ "required": [
551
+ "prompt_tokens",
552
+ "total_tokens"
553
+ ],
554
+ "type": "object"
555
+ },
556
+ "PredictInput": {
557
+ "description": "Model input. Can be either a single string, a pair of strings or a batch of mixed single and pairs of strings.",
558
+ "example": "What is Deep Learning?",
559
+ "oneOf": [
560
+ {
561
+ "description": "A single string",
562
+ "type": "string"
563
+ },
564
+ {
565
+ "description": "A pair of strings",
566
+ "items": {
567
+ "type": "string"
568
+ },
569
+ "maxItems": 2,
570
+ "minItems": 2,
571
+ "type": "array"
572
+ },
573
+ {
574
+ "description": "A batch",
575
+ "items": {
576
+ "oneOf": [
577
+ {
578
+ "description": "A single string",
579
+ "items": {
580
+ "type": "string"
581
+ },
582
+ "maxItems": 1,
583
+ "minItems": 1,
584
+ "type": "array"
585
+ },
586
+ {
587
+ "description": "A pair of strings",
588
+ "items": {
589
+ "type": "string"
590
+ },
591
+ "maxItems": 2,
592
+ "minItems": 2,
593
+ "type": "array"
594
+ }
595
+ ]
596
+ },
597
+ "type": "array"
598
+ }
599
+ ]
600
+ },
601
+ "PredictRequest": {
602
+ "properties": {
603
+ "inputs": {
604
+ "$ref": "#/components/schemas/PredictInput"
605
+ },
606
+ "raw_scores": {
607
+ "default": "false",
608
+ "example": "false",
609
+ "type": "boolean"
610
+ },
611
+ "truncate": {
612
+ "default": "false",
613
+ "example": "false",
614
+ "nullable": true,
615
+ "type": "boolean"
616
+ },
617
+ "truncation_direction": {
618
+ "allOf": [
619
+ {
620
+ "$ref": "#/components/schemas/TruncationDirection"
621
+ }
622
+ ],
623
+ "default": "right"
624
+ }
625
+ },
626
+ "required": [
627
+ "inputs"
628
+ ],
629
+ "type": "object"
630
+ },
631
+ "PredictResponse": {
632
+ "oneOf": [
633
+ {
634
+ "items": {
635
+ "$ref": "#/components/schemas/Prediction"
636
+ },
637
+ "type": "array"
638
+ },
639
+ {
640
+ "items": {
641
+ "items": {
642
+ "$ref": "#/components/schemas/Prediction"
643
+ },
644
+ "type": "array"
645
+ },
646
+ "type": "array"
647
+ }
648
+ ]
649
+ },
650
+ "Prediction": {
651
+ "properties": {
652
+ "label": {
653
+ "example": "admiration",
654
+ "type": "string"
655
+ },
656
+ "score": {
657
+ "example": "0.5",
658
+ "format": "float",
659
+ "type": "number"
660
+ }
661
+ },
662
+ "required": [
663
+ "score",
664
+ "label"
665
+ ],
666
+ "type": "object"
667
+ },
668
+ "Rank": {
669
+ "properties": {
670
+ "index": {
671
+ "example": "0",
672
+ "minimum": 0,
673
+ "type": "integer"
674
+ },
675
+ "score": {
676
+ "example": "1.0",
677
+ "format": "float",
678
+ "type": "number"
679
+ },
680
+ "text": {
681
+ "default": "null",
682
+ "example": "Deep Learning is ...",
683
+ "nullable": true,
684
+ "type": "string"
685
+ }
686
+ },
687
+ "required": [
688
+ "index",
689
+ "score"
690
+ ],
691
+ "type": "object"
692
+ },
693
+ "RerankRequest": {
694
+ "properties": {
695
+ "query": {
696
+ "example": "What is Deep Learning?",
697
+ "type": "string"
698
+ },
699
+ "raw_scores": {
700
+ "default": "false",
701
+ "example": "false",
702
+ "type": "boolean"
703
+ },
704
+ "return_text": {
705
+ "default": "false",
706
+ "example": "false",
707
+ "type": "boolean"
708
+ },
709
+ "texts": {
710
+ "example": [
711
+ "Deep Learning is ..."
712
+ ],
713
+ "items": {
714
+ "type": "string"
715
+ },
716
+ "type": "array"
717
+ },
718
+ "truncate": {
719
+ "default": "false",
720
+ "example": "false",
721
+ "nullable": true,
722
+ "type": "boolean"
723
+ },
724
+ "truncation_direction": {
725
+ "allOf": [
726
+ {
727
+ "$ref": "#/components/schemas/TruncationDirection"
728
+ }
729
+ ],
730
+ "default": "right"
731
+ }
732
+ },
733
+ "required": [
734
+ "query",
735
+ "texts"
736
+ ],
737
+ "type": "object"
738
+ },
739
+ "RerankResponse": {
740
+ "items": {
741
+ "$ref": "#/components/schemas/Rank"
742
+ },
743
+ "type": "array"
744
+ },
745
+ "SimilarityInput": {
746
+ "properties": {
747
+ "sentences": {
748
+ "description": "A list of strings which will be compared against the source_sentence.",
749
+ "example": [
750
+ "What is Machine Learning?"
751
+ ],
752
+ "items": {
753
+ "type": "string"
754
+ },
755
+ "type": "array"
756
+ },
757
+ "source_sentence": {
758
+ "description": "The string that you wish to compare the other strings with. This can be a phrase, sentence,\nor longer passage, depending on the model being used.",
759
+ "example": "What is Deep Learning?",
760
+ "type": "string"
761
+ }
762
+ },
763
+ "required": [
764
+ "source_sentence",
765
+ "sentences"
766
+ ],
767
+ "type": "object"
768
+ },
769
+ "SimilarityParameters": {
770
+ "properties": {
771
+ "prompt_name": {
772
+ "default": "null",
773
+ "description": "The name of the prompt that should be used by for encoding. If not set, no prompt\nwill be applied.\n\nMust be a key in the `sentence-transformers` configuration `prompts` dictionary.\n\nFor example if ``prompt_name`` is \"query\" and the ``prompts`` is {\"query\": \"query: \", ...},\nthen the sentence \"What is the capital of France?\" will be encoded as\n\"query: What is the capital of France?\" because the prompt text will be prepended before\nany text to encode.",
774
+ "example": "null",
775
+ "nullable": true,
776
+ "type": "string"
777
+ },
778
+ "truncate": {
779
+ "default": "false",
780
+ "example": "false",
781
+ "nullable": true,
782
+ "type": "boolean"
783
+ },
784
+ "truncation_direction": {
785
+ "allOf": [
786
+ {
787
+ "$ref": "#/components/schemas/TruncationDirection"
788
+ }
789
+ ],
790
+ "default": "right"
791
+ }
792
+ },
793
+ "required": [
794
+ "truncation_direction"
795
+ ],
796
+ "type": "object"
797
+ },
798
+ "SimilarityRequest": {
799
+ "properties": {
800
+ "inputs": {
801
+ "$ref": "#/components/schemas/SimilarityInput"
802
+ },
803
+ "parameters": {
804
+ "allOf": [
805
+ {
806
+ "$ref": "#/components/schemas/SimilarityParameters"
807
+ }
808
+ ],
809
+ "default": "null",
810
+ "nullable": true
811
+ }
812
+ },
813
+ "required": [
814
+ "inputs"
815
+ ],
816
+ "type": "object"
817
+ },
818
+ "SimilarityResponse": {
819
+ "example": [
820
+ 0.0,
821
+ 1.0,
822
+ 0.5
823
+ ],
824
+ "items": {
825
+ "format": "float",
826
+ "type": "number"
827
+ },
828
+ "type": "array"
829
+ },
830
+ "SimpleToken": {
831
+ "properties": {
832
+ "id": {
833
+ "example": 0,
834
+ "format": "int32",
835
+ "minimum": 0,
836
+ "type": "integer"
837
+ },
838
+ "special": {
839
+ "example": "false",
840
+ "type": "boolean"
841
+ },
842
+ "start": {
843
+ "example": 0,
844
+ "minimum": 0,
845
+ "nullable": true,
846
+ "type": "integer"
847
+ },
848
+ "stop": {
849
+ "example": 2,
850
+ "minimum": 0,
851
+ "nullable": true,
852
+ "type": "integer"
853
+ },
854
+ "text": {
855
+ "example": "test",
856
+ "type": "string"
857
+ }
858
+ },
859
+ "required": [
860
+ "id",
861
+ "text",
862
+ "special"
863
+ ],
864
+ "type": "object"
865
+ },
866
+ "SparseValue": {
867
+ "properties": {
868
+ "index": {
869
+ "minimum": 0,
870
+ "type": "integer"
871
+ },
872
+ "value": {
873
+ "format": "float",
874
+ "type": "number"
875
+ }
876
+ },
877
+ "required": [
878
+ "index",
879
+ "value"
880
+ ],
881
+ "type": "object"
882
+ },
883
+ "TokenizeInput": {
884
+ "oneOf": [
885
+ {
886
+ "type": "string"
887
+ },
888
+ {
889
+ "items": {
890
+ "type": "string"
891
+ },
892
+ "type": "array"
893
+ }
894
+ ]
895
+ },
896
+ "TokenizeRequest": {
897
+ "properties": {
898
+ "add_special_tokens": {
899
+ "default": "true",
900
+ "example": "true",
901
+ "type": "boolean"
902
+ },
903
+ "inputs": {
904
+ "$ref": "#/components/schemas/TokenizeInput"
905
+ },
906
+ "prompt_name": {
907
+ "default": "null",
908
+ "description": "The name of the prompt that should be used by for encoding. If not set, no prompt\nwill be applied.\n\nMust be a key in the `sentence-transformers` configuration `prompts` dictionary.\n\nFor example if ``prompt_name`` is \"query\" and the ``prompts`` is {\"query\": \"query: \", ...},\nthen the sentence \"What is the capital of France?\" will be encoded as\n\"query: What is the capital of France?\" because the prompt text will be prepended before\nany text to encode.",
909
+ "example": "null",
910
+ "nullable": true,
911
+ "type": "string"
912
+ }
913
+ },
914
+ "required": [
915
+ "inputs"
916
+ ],
917
+ "type": "object"
918
+ },
919
+ "TokenizeResponse": {
920
+ "example": [
921
+ [
922
+ {
923
+ "id": 0,
924
+ "special": false,
925
+ "start": 0,
926
+ "stop": 2,
927
+ "text": "test"
928
+ }
929
+ ]
930
+ ],
931
+ "items": {
932
+ "items": {
933
+ "$ref": "#/components/schemas/SimpleToken"
934
+ },
935
+ "type": "array"
936
+ },
937
+ "type": "array"
938
+ },
939
+ "TruncationDirection": {
940
+ "enum": [
941
+ "Left",
942
+ "Right"
943
+ ],
944
+ "type": "string"
945
+ }
946
+ }
947
+ },
948
+ "info": {
949
+ "contact": {
950
+ "name": "Olivier Dehaene"
951
+ },
952
+ "description": "Text Embedding Webserver",
953
+ "license": {
954
+ "name": "Apache 2.0",
955
+ "url": "https://www.apache.org/licenses/LICENSE-2.0"
956
+ },
957
+ "title": "Text Embeddings Inference",
958
+ "version": "1.6.0"
959
+ },
960
+ "openapi": "3.0.3",
961
+ "paths": {
962
+ "/decode": {
963
+ "post": {
964
+ "operationId": "decode",
965
+ "requestBody": {
966
+ "content": {
967
+ "application/json": {
968
+ "schema": {
969
+ "$ref": "#/components/schemas/DecodeRequest"
970
+ }
971
+ }
972
+ },
973
+ "required": true
974
+ },
975
+ "responses": {
976
+ "200": {
977
+ "content": {
978
+ "application/json": {
979
+ "schema": {
980
+ "$ref": "#/components/schemas/DecodeResponse"
981
+ }
982
+ }
983
+ },
984
+ "description": "Decoded ids"
985
+ },
986
+ "422": {
987
+ "content": {
988
+ "application/json": {
989
+ "example": {
990
+ "message": "Tokenization error",
991
+ "type": "tokenizer"
992
+ },
993
+ "schema": {
994
+ "$ref": "#/components/schemas/ErrorResponse"
995
+ }
996
+ }
997
+ },
998
+ "description": "Tokenization error"
999
+ }
1000
+ },
1001
+ "summary": "Decode input ids",
1002
+ "tags": [
1003
+ "Text Embeddings Inference"
1004
+ ]
1005
+ }
1006
+ },
1007
+ "/embed": {
1008
+ "post": {
1009
+ "operationId": "embed",
1010
+ "requestBody": {
1011
+ "content": {
1012
+ "application/json": {
1013
+ "schema": {
1014
+ "$ref": "#/components/schemas/EmbedRequest"
1015
+ }
1016
+ }
1017
+ },
1018
+ "required": true
1019
+ },
1020
+ "responses": {
1021
+ "200": {
1022
+ "content": {
1023
+ "application/json": {
1024
+ "schema": {
1025
+ "$ref": "#/components/schemas/EmbedResponse"
1026
+ }
1027
+ }
1028
+ },
1029
+ "description": "Embeddings"
1030
+ },
1031
+ "413": {
1032
+ "content": {
1033
+ "application/json": {
1034
+ "example": {
1035
+ "error": "Batch size error",
1036
+ "error_type": "validation"
1037
+ },
1038
+ "schema": {
1039
+ "$ref": "#/components/schemas/ErrorResponse"
1040
+ }
1041
+ }
1042
+ },
1043
+ "description": "Batch size error"
1044
+ },
1045
+ "422": {
1046
+ "content": {
1047
+ "application/json": {
1048
+ "example": {
1049
+ "error": "Tokenization error",
1050
+ "error_type": "tokenizer"
1051
+ },
1052
+ "schema": {
1053
+ "$ref": "#/components/schemas/ErrorResponse"
1054
+ }
1055
+ }
1056
+ },
1057
+ "description": "Tokenization error"
1058
+ },
1059
+ "424": {
1060
+ "content": {
1061
+ "application/json": {
1062
+ "example": {
1063
+ "error": "Inference failed",
1064
+ "error_type": "backend"
1065
+ },
1066
+ "schema": {
1067
+ "$ref": "#/components/schemas/ErrorResponse"
1068
+ }
1069
+ }
1070
+ },
1071
+ "description": "Embedding Error"
1072
+ },
1073
+ "429": {
1074
+ "content": {
1075
+ "application/json": {
1076
+ "example": {
1077
+ "error": "Model is overloaded",
1078
+ "error_type": "overloaded"
1079
+ },
1080
+ "schema": {
1081
+ "$ref": "#/components/schemas/ErrorResponse"
1082
+ }
1083
+ }
1084
+ },
1085
+ "description": "Model is overloaded"
1086
+ }
1087
+ },
1088
+ "summary": "Get Embeddings. Returns a 424 status code if the model is not an embedding model.",
1089
+ "tags": [
1090
+ "Text Embeddings Inference"
1091
+ ]
1092
+ }
1093
+ },
1094
+ "/embed_all": {
1095
+ "post": {
1096
+ "description": "Returns a 424 status code if the model is not an embedding model.",
1097
+ "operationId": "embed_all",
1098
+ "requestBody": {
1099
+ "content": {
1100
+ "application/json": {
1101
+ "schema": {
1102
+ "$ref": "#/components/schemas/EmbedAllRequest"
1103
+ }
1104
+ }
1105
+ },
1106
+ "required": true
1107
+ },
1108
+ "responses": {
1109
+ "200": {
1110
+ "content": {
1111
+ "application/json": {
1112
+ "schema": {
1113
+ "$ref": "#/components/schemas/EmbedAllResponse"
1114
+ }
1115
+ }
1116
+ },
1117
+ "description": "Embeddings"
1118
+ },
1119
+ "413": {
1120
+ "content": {
1121
+ "application/json": {
1122
+ "example": {
1123
+ "error": "Batch size error",
1124
+ "error_type": "validation"
1125
+ },
1126
+ "schema": {
1127
+ "$ref": "#/components/schemas/ErrorResponse"
1128
+ }
1129
+ }
1130
+ },
1131
+ "description": "Batch size error"
1132
+ },
1133
+ "422": {
1134
+ "content": {
1135
+ "application/json": {
1136
+ "example": {
1137
+ "error": "Tokenization error",
1138
+ "error_type": "tokenizer"
1139
+ },
1140
+ "schema": {
1141
+ "$ref": "#/components/schemas/ErrorResponse"
1142
+ }
1143
+ }
1144
+ },
1145
+ "description": "Tokenization error"
1146
+ },
1147
+ "424": {
1148
+ "content": {
1149
+ "application/json": {
1150
+ "example": {
1151
+ "error": "Inference failed",
1152
+ "error_type": "backend"
1153
+ },
1154
+ "schema": {
1155
+ "$ref": "#/components/schemas/ErrorResponse"
1156
+ }
1157
+ }
1158
+ },
1159
+ "description": "Embedding Error"
1160
+ },
1161
+ "429": {
1162
+ "content": {
1163
+ "application/json": {
1164
+ "example": {
1165
+ "error": "Model is overloaded",
1166
+ "error_type": "overloaded"
1167
+ },
1168
+ "schema": {
1169
+ "$ref": "#/components/schemas/ErrorResponse"
1170
+ }
1171
+ }
1172
+ },
1173
+ "description": "Model is overloaded"
1174
+ }
1175
+ },
1176
+ "summary": "Get all Embeddings without Pooling.",
1177
+ "tags": [
1178
+ "Text Embeddings Inference"
1179
+ ]
1180
+ }
1181
+ },
1182
+ "/embed_sparse": {
1183
+ "post": {
1184
+ "operationId": "embed_sparse",
1185
+ "requestBody": {
1186
+ "content": {
1187
+ "application/json": {
1188
+ "schema": {
1189
+ "$ref": "#/components/schemas/EmbedSparseRequest"
1190
+ }
1191
+ }
1192
+ },
1193
+ "required": true
1194
+ },
1195
+ "responses": {
1196
+ "200": {
1197
+ "content": {
1198
+ "application/json": {
1199
+ "schema": {
1200
+ "$ref": "#/components/schemas/EmbedSparseResponse"
1201
+ }
1202
+ }
1203
+ },
1204
+ "description": "Embeddings"
1205
+ },
1206
+ "413": {
1207
+ "content": {
1208
+ "application/json": {
1209
+ "example": {
1210
+ "error": "Batch size error",
1211
+ "error_type": "validation"
1212
+ },
1213
+ "schema": {
1214
+ "$ref": "#/components/schemas/ErrorResponse"
1215
+ }
1216
+ }
1217
+ },
1218
+ "description": "Batch size error"
1219
+ },
1220
+ "422": {
1221
+ "content": {
1222
+ "application/json": {
1223
+ "example": {
1224
+ "error": "Tokenization error",
1225
+ "error_type": "tokenizer"
1226
+ },
1227
+ "schema": {
1228
+ "$ref": "#/components/schemas/ErrorResponse"
1229
+ }
1230
+ }
1231
+ },
1232
+ "description": "Tokenization error"
1233
+ },
1234
+ "424": {
1235
+ "content": {
1236
+ "application/json": {
1237
+ "example": {
1238
+ "error": "Inference failed",
1239
+ "error_type": "backend"
1240
+ },
1241
+ "schema": {
1242
+ "$ref": "#/components/schemas/ErrorResponse"
1243
+ }
1244
+ }
1245
+ },
1246
+ "description": "Embedding Error"
1247
+ },
1248
+ "429": {
1249
+ "content": {
1250
+ "application/json": {
1251
+ "example": {
1252
+ "error": "Model is overloaded",
1253
+ "error_type": "overloaded"
1254
+ },
1255
+ "schema": {
1256
+ "$ref": "#/components/schemas/ErrorResponse"
1257
+ }
1258
+ }
1259
+ },
1260
+ "description": "Model is overloaded"
1261
+ }
1262
+ },
1263
+ "summary": "Get Sparse Embeddings. Returns a 424 status code if the model is not an embedding model with SPLADE pooling.",
1264
+ "tags": [
1265
+ "Text Embeddings Inference"
1266
+ ]
1267
+ }
1268
+ },
1269
+ "/health": {
1270
+ "get": {
1271
+ "operationId": "health",
1272
+ "responses": {
1273
+ "200": {
1274
+ "description": "Everything is working fine"
1275
+ },
1276
+ "503": {
1277
+ "content": {
1278
+ "application/json": {
1279
+ "example": {
1280
+ "error": "unhealthy",
1281
+ "error_type": "unhealthy"
1282
+ },
1283
+ "schema": {
1284
+ "$ref": "#/components/schemas/ErrorResponse"
1285
+ }
1286
+ }
1287
+ },
1288
+ "description": "Text embeddings Inference is down"
1289
+ }
1290
+ },
1291
+ "summary": "Health check method",
1292
+ "tags": [
1293
+ "Text Embeddings Inference"
1294
+ ]
1295
+ }
1296
+ },
1297
+ "/info": {
1298
+ "get": {
1299
+ "operationId": "get_model_info",
1300
+ "responses": {
1301
+ "200": {
1302
+ "content": {
1303
+ "application/json": {
1304
+ "schema": {
1305
+ "$ref": "#/components/schemas/Info"
1306
+ }
1307
+ }
1308
+ },
1309
+ "description": "Served model info"
1310
+ }
1311
+ },
1312
+ "summary": "Text Embeddings Inference endpoint info",
1313
+ "tags": [
1314
+ "Text Embeddings Inference"
1315
+ ]
1316
+ }
1317
+ },
1318
+ "/metrics": {
1319
+ "get": {
1320
+ "operationId": "metrics",
1321
+ "responses": {
1322
+ "200": {
1323
+ "content": {
1324
+ "text/plain": {
1325
+ "schema": {
1326
+ "type": "string"
1327
+ }
1328
+ }
1329
+ },
1330
+ "description": "Prometheus Metrics"
1331
+ }
1332
+ },
1333
+ "summary": "Prometheus metrics scrape endpoint",
1334
+ "tags": [
1335
+ "Text Embeddings Inference"
1336
+ ]
1337
+ }
1338
+ },
1339
+ "/predict": {
1340
+ "post": {
1341
+ "operationId": "predict",
1342
+ "requestBody": {
1343
+ "content": {
1344
+ "application/json": {
1345
+ "schema": {
1346
+ "$ref": "#/components/schemas/PredictRequest"
1347
+ }
1348
+ }
1349
+ },
1350
+ "required": true
1351
+ },
1352
+ "responses": {
1353
+ "200": {
1354
+ "content": {
1355
+ "application/json": {
1356
+ "schema": {
1357
+ "$ref": "#/components/schemas/PredictResponse"
1358
+ }
1359
+ }
1360
+ },
1361
+ "description": "Predictions"
1362
+ },
1363
+ "413": {
1364
+ "content": {
1365
+ "application/json": {
1366
+ "example": {
1367
+ "error": "Batch size error",
1368
+ "error_type": "validation"
1369
+ },
1370
+ "schema": {
1371
+ "$ref": "#/components/schemas/ErrorResponse"
1372
+ }
1373
+ }
1374
+ },
1375
+ "description": "Batch size error"
1376
+ },
1377
+ "422": {
1378
+ "content": {
1379
+ "application/json": {
1380
+ "example": {
1381
+ "error": "Tokenization error",
1382
+ "error_type": "tokenizer"
1383
+ },
1384
+ "schema": {
1385
+ "$ref": "#/components/schemas/ErrorResponse"
1386
+ }
1387
+ }
1388
+ },
1389
+ "description": "Tokenization error"
1390
+ },
1391
+ "424": {
1392
+ "content": {
1393
+ "application/json": {
1394
+ "example": {
1395
+ "error": "Inference failed",
1396
+ "error_type": "backend"
1397
+ },
1398
+ "schema": {
1399
+ "$ref": "#/components/schemas/ErrorResponse"
1400
+ }
1401
+ }
1402
+ },
1403
+ "description": "Prediction Error"
1404
+ },
1405
+ "429": {
1406
+ "content": {
1407
+ "application/json": {
1408
+ "example": {
1409
+ "error": "Model is overloaded",
1410
+ "error_type": "overloaded"
1411
+ },
1412
+ "schema": {
1413
+ "$ref": "#/components/schemas/ErrorResponse"
1414
+ }
1415
+ }
1416
+ },
1417
+ "description": "Model is overloaded"
1418
+ }
1419
+ },
1420
+ "summary": "Get Predictions. Returns a 424 status code if the model is not a Sequence Classification model",
1421
+ "tags": [
1422
+ "Text Embeddings Inference"
1423
+ ]
1424
+ }
1425
+ },
1426
+ "/rerank": {
1427
+ "post": {
1428
+ "description": "a single class.",
1429
+ "operationId": "rerank",
1430
+ "requestBody": {
1431
+ "content": {
1432
+ "application/json": {
1433
+ "schema": {
1434
+ "$ref": "#/components/schemas/RerankRequest"
1435
+ }
1436
+ }
1437
+ },
1438
+ "required": true
1439
+ },
1440
+ "responses": {
1441
+ "200": {
1442
+ "content": {
1443
+ "application/json": {
1444
+ "schema": {
1445
+ "$ref": "#/components/schemas/RerankResponse"
1446
+ }
1447
+ }
1448
+ },
1449
+ "description": "Ranks"
1450
+ },
1451
+ "413": {
1452
+ "content": {
1453
+ "application/json": {
1454
+ "example": {
1455
+ "error": "Batch size error",
1456
+ "error_type": "validation"
1457
+ },
1458
+ "schema": {
1459
+ "$ref": "#/components/schemas/ErrorResponse"
1460
+ }
1461
+ }
1462
+ },
1463
+ "description": "Batch size error"
1464
+ },
1465
+ "422": {
1466
+ "content": {
1467
+ "application/json": {
1468
+ "example": {
1469
+ "error": "Tokenization error",
1470
+ "error_type": "tokenizer"
1471
+ },
1472
+ "schema": {
1473
+ "$ref": "#/components/schemas/ErrorResponse"
1474
+ }
1475
+ }
1476
+ },
1477
+ "description": "Tokenization error"
1478
+ },
1479
+ "424": {
1480
+ "content": {
1481
+ "application/json": {
1482
+ "example": {
1483
+ "error": "Inference failed",
1484
+ "error_type": "backend"
1485
+ },
1486
+ "schema": {
1487
+ "$ref": "#/components/schemas/ErrorResponse"
1488
+ }
1489
+ }
1490
+ },
1491
+ "description": "Rerank Error"
1492
+ },
1493
+ "429": {
1494
+ "content": {
1495
+ "application/json": {
1496
+ "example": {
1497
+ "error": "Model is overloaded",
1498
+ "error_type": "overloaded"
1499
+ },
1500
+ "schema": {
1501
+ "$ref": "#/components/schemas/ErrorResponse"
1502
+ }
1503
+ }
1504
+ },
1505
+ "description": "Model is overloaded"
1506
+ }
1507
+ },
1508
+ "summary": "Get Ranks. Returns a 424 status code if the model is not a Sequence Classification model with",
1509
+ "tags": [
1510
+ "Text Embeddings Inference"
1511
+ ]
1512
+ }
1513
+ },
1514
+ "/similarity": {
1515
+ "post": {
1516
+ "operationId": "similarity",
1517
+ "requestBody": {
1518
+ "content": {
1519
+ "application/json": {
1520
+ "schema": {
1521
+ "$ref": "#/components/schemas/SimilarityRequest"
1522
+ }
1523
+ }
1524
+ },
1525
+ "required": true
1526
+ },
1527
+ "responses": {
1528
+ "200": {
1529
+ "content": {
1530
+ "application/json": {
1531
+ "schema": {
1532
+ "$ref": "#/components/schemas/SimilarityResponse"
1533
+ }
1534
+ }
1535
+ },
1536
+ "description": "Sentence Similarity"
1537
+ },
1538
+ "413": {
1539
+ "content": {
1540
+ "application/json": {
1541
+ "example": {
1542
+ "error": "Batch size error",
1543
+ "error_type": "validation"
1544
+ },
1545
+ "schema": {
1546
+ "$ref": "#/components/schemas/ErrorResponse"
1547
+ }
1548
+ }
1549
+ },
1550
+ "description": "Batch size error"
1551
+ },
1552
+ "422": {
1553
+ "content": {
1554
+ "application/json": {
1555
+ "example": {
1556
+ "error": "Tokenization error",
1557
+ "error_type": "tokenizer"
1558
+ },
1559
+ "schema": {
1560
+ "$ref": "#/components/schemas/ErrorResponse"
1561
+ }
1562
+ }
1563
+ },
1564
+ "description": "Tokenization error"
1565
+ },
1566
+ "424": {
1567
+ "content": {
1568
+ "application/json": {
1569
+ "example": {
1570
+ "error": "Inference failed",
1571
+ "error_type": "backend"
1572
+ },
1573
+ "schema": {
1574
+ "$ref": "#/components/schemas/ErrorResponse"
1575
+ }
1576
+ }
1577
+ },
1578
+ "description": "Embedding Error"
1579
+ },
1580
+ "429": {
1581
+ "content": {
1582
+ "application/json": {
1583
+ "example": {
1584
+ "error": "Model is overloaded",
1585
+ "error_type": "overloaded"
1586
+ },
1587
+ "schema": {
1588
+ "$ref": "#/components/schemas/ErrorResponse"
1589
+ }
1590
+ }
1591
+ },
1592
+ "description": "Model is overloaded"
1593
+ }
1594
+ },
1595
+ "summary": "Get Sentence Similarity. Returns a 424 status code if the model is not an embedding model.",
1596
+ "tags": [
1597
+ "Text Embeddings Inference"
1598
+ ]
1599
+ }
1600
+ },
1601
+ "/tokenize": {
1602
+ "post": {
1603
+ "operationId": "tokenize",
1604
+ "requestBody": {
1605
+ "content": {
1606
+ "application/json": {
1607
+ "schema": {
1608
+ "$ref": "#/components/schemas/TokenizeRequest"
1609
+ }
1610
+ }
1611
+ },
1612
+ "required": true
1613
+ },
1614
+ "responses": {
1615
+ "200": {
1616
+ "content": {
1617
+ "application/json": {
1618
+ "schema": {
1619
+ "$ref": "#/components/schemas/TokenizeResponse"
1620
+ }
1621
+ }
1622
+ },
1623
+ "description": "Tokenized ids"
1624
+ },
1625
+ "422": {
1626
+ "content": {
1627
+ "application/json": {
1628
+ "example": {
1629
+ "message": "Tokenization error",
1630
+ "type": "tokenizer"
1631
+ },
1632
+ "schema": {
1633
+ "$ref": "#/components/schemas/ErrorResponse"
1634
+ }
1635
+ }
1636
+ },
1637
+ "description": "Tokenization error"
1638
+ }
1639
+ },
1640
+ "summary": "Tokenize inputs",
1641
+ "tags": [
1642
+ "Text Embeddings Inference"
1643
+ ]
1644
+ }
1645
+ },
1646
+ "/v1/embeddings": {
1647
+ "post": {
1648
+ "operationId": "openai_embed",
1649
+ "requestBody": {
1650
+ "content": {
1651
+ "application/json": {
1652
+ "schema": {
1653
+ "$ref": "#/components/schemas/OpenAICompatRequest"
1654
+ }
1655
+ }
1656
+ },
1657
+ "required": true
1658
+ },
1659
+ "responses": {
1660
+ "200": {
1661
+ "content": {
1662
+ "application/json": {
1663
+ "schema": {
1664
+ "$ref": "#/components/schemas/OpenAICompatResponse"
1665
+ }
1666
+ }
1667
+ },
1668
+ "description": "Embeddings"
1669
+ },
1670
+ "413": {
1671
+ "content": {
1672
+ "application/json": {
1673
+ "example": {
1674
+ "message": "Batch size error",
1675
+ "type": "validation"
1676
+ },
1677
+ "schema": {
1678
+ "$ref": "#/components/schemas/OpenAICompatErrorResponse"
1679
+ }
1680
+ }
1681
+ },
1682
+ "description": "Batch size error"
1683
+ },
1684
+ "422": {
1685
+ "content": {
1686
+ "application/json": {
1687
+ "example": {
1688
+ "message": "Tokenization error",
1689
+ "type": "tokenizer"
1690
+ },
1691
+ "schema": {
1692
+ "$ref": "#/components/schemas/OpenAICompatErrorResponse"
1693
+ }
1694
+ }
1695
+ },
1696
+ "description": "Tokenization error"
1697
+ },
1698
+ "424": {
1699
+ "content": {
1700
+ "application/json": {
1701
+ "example": {
1702
+ "message": "Inference failed",
1703
+ "type": "backend"
1704
+ },
1705
+ "schema": {
1706
+ "$ref": "#/components/schemas/OpenAICompatErrorResponse"
1707
+ }
1708
+ }
1709
+ },
1710
+ "description": "Embedding Error"
1711
+ },
1712
+ "429": {
1713
+ "content": {
1714
+ "application/json": {
1715
+ "example": {
1716
+ "message": "Model is overloaded",
1717
+ "type": "overloaded"
1718
+ },
1719
+ "schema": {
1720
+ "$ref": "#/components/schemas/OpenAICompatErrorResponse"
1721
+ }
1722
+ }
1723
+ },
1724
+ "description": "Model is overloaded"
1725
+ }
1726
+ },
1727
+ "summary": "OpenAI compatible route. Returns a 424 status code if the model is not an embedding model.",
1728
+ "tags": [
1729
+ "Text Embeddings Inference"
1730
+ ]
1731
+ }
1732
+ }
1733
+ },
1734
+ "tags": [
1735
+ {
1736
+ "description": "Hugging Face Text Embeddings Inference API",
1737
+ "name": "Text Embeddings Inference"
1738
+ }
1739
+ ]
1740
+ }