elasticsearch 9.0.1__py3-none-any.whl → 9.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- elasticsearch/__init__.py +3 -3
- elasticsearch/_async/client/__init__.py +93 -249
- elasticsearch/_async/client/async_search.py +4 -4
- elasticsearch/_async/client/autoscaling.py +4 -4
- elasticsearch/_async/client/cat.py +620 -58
- elasticsearch/_async/client/ccr.py +13 -13
- elasticsearch/_async/client/cluster.py +32 -22
- elasticsearch/_async/client/connector.py +30 -30
- elasticsearch/_async/client/dangling_indices.py +3 -3
- elasticsearch/_async/client/enrich.py +5 -5
- elasticsearch/_async/client/eql.py +14 -6
- elasticsearch/_async/client/esql.py +125 -9
- elasticsearch/_async/client/features.py +2 -2
- elasticsearch/_async/client/fleet.py +3 -3
- elasticsearch/_async/client/graph.py +1 -1
- elasticsearch/_async/client/ilm.py +11 -11
- elasticsearch/_async/client/indices.py +605 -117
- elasticsearch/_async/client/inference.py +523 -116
- elasticsearch/_async/client/ingest.py +9 -16
- elasticsearch/_async/client/license.py +11 -11
- elasticsearch/_async/client/logstash.py +3 -3
- elasticsearch/_async/client/migration.py +3 -3
- elasticsearch/_async/client/ml.py +75 -87
- elasticsearch/_async/client/monitoring.py +1 -1
- elasticsearch/_async/client/nodes.py +7 -7
- elasticsearch/_async/client/query_rules.py +8 -8
- elasticsearch/_async/client/rollup.py +9 -30
- elasticsearch/_async/client/search_application.py +10 -10
- elasticsearch/_async/client/searchable_snapshots.py +4 -4
- elasticsearch/_async/client/security.py +79 -81
- elasticsearch/_async/client/shutdown.py +3 -3
- elasticsearch/_async/client/simulate.py +1 -1
- elasticsearch/_async/client/slm.py +9 -9
- elasticsearch/_async/client/snapshot.py +64 -21
- elasticsearch/_async/client/sql.py +6 -6
- elasticsearch/_async/client/ssl.py +1 -1
- elasticsearch/_async/client/synonyms.py +26 -7
- elasticsearch/_async/client/tasks.py +4 -4
- elasticsearch/_async/client/text_structure.py +4 -4
- elasticsearch/_async/client/transform.py +11 -11
- elasticsearch/_async/client/watcher.py +17 -15
- elasticsearch/_async/client/xpack.py +2 -2
- elasticsearch/_otel.py +8 -8
- elasticsearch/_sync/client/__init__.py +93 -249
- elasticsearch/_sync/client/async_search.py +4 -4
- elasticsearch/_sync/client/autoscaling.py +4 -4
- elasticsearch/_sync/client/cat.py +620 -58
- elasticsearch/_sync/client/ccr.py +13 -13
- elasticsearch/_sync/client/cluster.py +32 -22
- elasticsearch/_sync/client/connector.py +30 -30
- elasticsearch/_sync/client/dangling_indices.py +3 -3
- elasticsearch/_sync/client/enrich.py +5 -5
- elasticsearch/_sync/client/eql.py +14 -6
- elasticsearch/_sync/client/esql.py +125 -9
- elasticsearch/_sync/client/features.py +2 -2
- elasticsearch/_sync/client/fleet.py +3 -3
- elasticsearch/_sync/client/graph.py +1 -1
- elasticsearch/_sync/client/ilm.py +11 -11
- elasticsearch/_sync/client/indices.py +605 -117
- elasticsearch/_sync/client/inference.py +523 -116
- elasticsearch/_sync/client/ingest.py +9 -16
- elasticsearch/_sync/client/license.py +11 -11
- elasticsearch/_sync/client/logstash.py +3 -3
- elasticsearch/_sync/client/migration.py +3 -3
- elasticsearch/_sync/client/ml.py +75 -87
- elasticsearch/_sync/client/monitoring.py +1 -1
- elasticsearch/_sync/client/nodes.py +7 -7
- elasticsearch/_sync/client/query_rules.py +8 -8
- elasticsearch/_sync/client/rollup.py +9 -30
- elasticsearch/_sync/client/search_application.py +10 -10
- elasticsearch/_sync/client/searchable_snapshots.py +4 -4
- elasticsearch/_sync/client/security.py +79 -81
- elasticsearch/_sync/client/shutdown.py +3 -3
- elasticsearch/_sync/client/simulate.py +1 -1
- elasticsearch/_sync/client/slm.py +9 -9
- elasticsearch/_sync/client/snapshot.py +64 -21
- elasticsearch/_sync/client/sql.py +6 -6
- elasticsearch/_sync/client/ssl.py +1 -1
- elasticsearch/_sync/client/synonyms.py +26 -7
- elasticsearch/_sync/client/tasks.py +4 -4
- elasticsearch/_sync/client/text_structure.py +4 -4
- elasticsearch/_sync/client/transform.py +11 -11
- elasticsearch/_sync/client/watcher.py +17 -15
- elasticsearch/_sync/client/xpack.py +2 -2
- elasticsearch/_version.py +1 -1
- elasticsearch/compat.py +5 -0
- elasticsearch/dsl/__init__.py +2 -1
- elasticsearch/dsl/_async/document.py +1 -1
- elasticsearch/dsl/_sync/document.py +1 -1
- elasticsearch/dsl/aggs.py +20 -0
- elasticsearch/dsl/document_base.py +177 -17
- elasticsearch/dsl/field.py +241 -38
- elasticsearch/dsl/query.py +50 -5
- elasticsearch/dsl/response/__init__.py +1 -1
- elasticsearch/dsl/types.py +245 -21
- elasticsearch/dsl/utils.py +1 -1
- elasticsearch/esql/__init__.py +18 -0
- elasticsearch/esql/esql.py +1105 -0
- elasticsearch/esql/esql1.py1 +307 -0
- elasticsearch/esql/functions.py +1738 -0
- {elasticsearch-9.0.1.dist-info → elasticsearch-9.1.0.dist-info}/METADATA +3 -6
- elasticsearch-9.1.0.dist-info/RECORD +164 -0
- elasticsearch-9.0.1.dist-info/RECORD +0 -162
- elasticsearch-9.0.1.dist-info/licenses/LICENSE.txt +0 -175
- elasticsearch-9.0.1.dist-info/licenses/NOTICE.txt +0 -559
- {elasticsearch-9.0.1.dist-info → elasticsearch-9.1.0.dist-info}/WHEEL +0 -0
- {elasticsearch-9.0.1.dist-info → elasticsearch-9.1.0.dist-info}/licenses/LICENSE +0 -0
- {elasticsearch-9.0.1.dist-info → elasticsearch-9.1.0.dist-info}/licenses/NOTICE +0 -0
|
@@ -47,7 +47,7 @@ class InferenceClient(NamespacedClient):
|
|
|
47
47
|
<p>Perform completion inference on the service</p>
|
|
48
48
|
|
|
49
49
|
|
|
50
|
-
`<https://www.elastic.co/docs/api/doc/elasticsearch/
|
|
50
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-inference>`_
|
|
51
51
|
|
|
52
52
|
:param inference_id: The inference Id
|
|
53
53
|
:param input: Inference input. Either a string or an array of strings.
|
|
@@ -123,7 +123,7 @@ class InferenceClient(NamespacedClient):
|
|
|
123
123
|
<p>Delete an inference endpoint</p>
|
|
124
124
|
|
|
125
125
|
|
|
126
|
-
`<https://www.elastic.co/docs/api/doc/elasticsearch/
|
|
126
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-delete>`_
|
|
127
127
|
|
|
128
128
|
:param inference_id: The inference identifier.
|
|
129
129
|
:param task_type: The task type
|
|
@@ -197,7 +197,7 @@ class InferenceClient(NamespacedClient):
|
|
|
197
197
|
<p>Get an inference endpoint</p>
|
|
198
198
|
|
|
199
199
|
|
|
200
|
-
`<https://www.elastic.co/docs/api/doc/elasticsearch/
|
|
200
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-get>`_
|
|
201
201
|
|
|
202
202
|
:param task_type: The task type
|
|
203
203
|
:param inference_id: The inference Id
|
|
@@ -235,7 +235,7 @@ class InferenceClient(NamespacedClient):
|
|
|
235
235
|
)
|
|
236
236
|
|
|
237
237
|
@_rewrite_parameters(
|
|
238
|
-
body_fields=("input", "query", "task_settings"),
|
|
238
|
+
body_fields=("input", "input_type", "query", "task_settings"),
|
|
239
239
|
)
|
|
240
240
|
async def inference(
|
|
241
241
|
self,
|
|
@@ -257,6 +257,7 @@ class InferenceClient(NamespacedClient):
|
|
|
257
257
|
error_trace: t.Optional[bool] = None,
|
|
258
258
|
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
|
|
259
259
|
human: t.Optional[bool] = None,
|
|
260
|
+
input_type: t.Optional[str] = None,
|
|
260
261
|
pretty: t.Optional[bool] = None,
|
|
261
262
|
query: t.Optional[str] = None,
|
|
262
263
|
task_settings: t.Optional[t.Any] = None,
|
|
@@ -277,13 +278,22 @@ class InferenceClient(NamespacedClient):
|
|
|
277
278
|
</blockquote>
|
|
278
279
|
|
|
279
280
|
|
|
280
|
-
`<https://www.elastic.co/docs/api/doc/elasticsearch/
|
|
281
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-inference>`_
|
|
281
282
|
|
|
282
283
|
:param inference_id: The unique identifier for the inference endpoint.
|
|
283
284
|
:param input: The text on which you want to perform the inference task. It can
|
|
284
285
|
be a single string or an array. > info > Inference endpoints for the `completion`
|
|
285
286
|
task type currently only support a single string as input.
|
|
286
287
|
:param task_type: The type of inference task that the model performs.
|
|
288
|
+
:param input_type: Specifies the input data type for the text embedding model.
|
|
289
|
+
The `input_type` parameter only applies to Inference Endpoints with the `text_embedding`
|
|
290
|
+
task type. Possible values include: * `SEARCH` * `INGEST` * `CLASSIFICATION`
|
|
291
|
+
* `CLUSTERING` Not all services support all values. Unsupported values will
|
|
292
|
+
trigger a validation exception. Accepted values depend on the configured
|
|
293
|
+
inference service, refer to the relevant service-specific documentation for
|
|
294
|
+
more info. > info > The `input_type` parameter specified on the root level
|
|
295
|
+
of the request body will take precedence over the `input_type` parameter
|
|
296
|
+
specified in `task_settings`.
|
|
287
297
|
:param query: The query input, which is required only for the `rerank` task.
|
|
288
298
|
It is not required for other tasks.
|
|
289
299
|
:param task_settings: Task settings for the individual inference request. These
|
|
@@ -322,6 +332,8 @@ class InferenceClient(NamespacedClient):
|
|
|
322
332
|
if not __body:
|
|
323
333
|
if input is not None:
|
|
324
334
|
__body["input"] = input
|
|
335
|
+
if input_type is not None:
|
|
336
|
+
__body["input_type"] = input_type
|
|
325
337
|
if query is not None:
|
|
326
338
|
__body["query"] = query
|
|
327
339
|
if task_settings is not None:
|
|
@@ -366,26 +378,46 @@ class InferenceClient(NamespacedClient):
|
|
|
366
378
|
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
|
|
367
379
|
human: t.Optional[bool] = None,
|
|
368
380
|
pretty: t.Optional[bool] = None,
|
|
381
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
369
382
|
) -> ObjectApiResponse[t.Any]:
|
|
370
383
|
"""
|
|
371
384
|
.. raw:: html
|
|
372
385
|
|
|
373
|
-
<p>Create an inference endpoint
|
|
374
|
-
When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
|
|
375
|
-
After creating the endpoint, wait for the model deployment to complete before using it.
|
|
376
|
-
To verify the deployment status, use the get trained model statistics API.
|
|
377
|
-
Look for <code>"state": "fully_allocated"</code> in the response and ensure that the <code>"allocation_count"</code> matches the <code>"target_allocation_count"</code>.
|
|
378
|
-
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
386
|
+
<p>Create an inference endpoint.</p>
|
|
379
387
|
<p>IMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.
|
|
380
388
|
For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.
|
|
381
389
|
However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.</p>
|
|
390
|
+
<p>The following integrations are available through the inference API. You can find the available task types next to the integration name:</p>
|
|
391
|
+
<ul>
|
|
392
|
+
<li>AlibabaCloud AI Search (<code>completion</code>, <code>rerank</code>, <code>sparse_embedding</code>, <code>text_embedding</code>)</li>
|
|
393
|
+
<li>Amazon Bedrock (<code>completion</code>, <code>text_embedding</code>)</li>
|
|
394
|
+
<li>Amazon SageMaker (<code>chat_completion</code>, <code>completion</code>, <code>rerank</code>, <code>sparse_embedding</code>, <code>text_embedding</code>)</li>
|
|
395
|
+
<li>Anthropic (<code>completion</code>)</li>
|
|
396
|
+
<li>Azure AI Studio (<code>completion</code>, <code>text_embedding</code>)</li>
|
|
397
|
+
<li>Azure OpenAI (<code>completion</code>, <code>text_embedding</code>)</li>
|
|
398
|
+
<li>Cohere (<code>completion</code>, <code>rerank</code>, <code>text_embedding</code>)</li>
|
|
399
|
+
<li>DeepSeek (<code>completion</code>, <code>chat_completion</code>)</li>
|
|
400
|
+
<li>Elasticsearch (<code>rerank</code>, <code>sparse_embedding</code>, <code>text_embedding</code> - this service is for built-in models and models uploaded through Eland)</li>
|
|
401
|
+
<li>ELSER (<code>sparse_embedding</code>)</li>
|
|
402
|
+
<li>Google AI Studio (<code>completion</code>, <code>text_embedding</code>)</li>
|
|
403
|
+
<li>Google Vertex AI (<code>rerank</code>, <code>text_embedding</code>)</li>
|
|
404
|
+
<li>Hugging Face (<code>chat_completion</code>, <code>completion</code>, <code>rerank</code>, <code>text_embedding</code>)</li>
|
|
405
|
+
<li>Mistral (<code>chat_completion</code>, <code>completion</code>, <code>text_embedding</code>)</li>
|
|
406
|
+
<li>OpenAI (<code>chat_completion</code>, <code>completion</code>, <code>text_embedding</code>)</li>
|
|
407
|
+
<li>VoyageAI (<code>text_embedding</code>, <code>rerank</code>)</li>
|
|
408
|
+
<li>Watsonx inference integration (<code>text_embedding</code>)</li>
|
|
409
|
+
<li>JinaAI (<code>text_embedding</code>, <code>rerank</code>)</li>
|
|
410
|
+
</ul>
|
|
382
411
|
|
|
383
412
|
|
|
384
|
-
`<https://www.elastic.co/docs/api/doc/elasticsearch/
|
|
413
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put>`_
|
|
385
414
|
|
|
386
415
|
:param inference_id: The inference Id
|
|
387
416
|
:param inference_config:
|
|
388
|
-
:param task_type: The task type
|
|
417
|
+
:param task_type: The task type. Refer to the integration list in the API description
|
|
418
|
+
for the available task types.
|
|
419
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
420
|
+
to be created.
|
|
389
421
|
"""
|
|
390
422
|
if inference_id in SKIP_IN_PATH:
|
|
391
423
|
raise ValueError("Empty value passed for parameter 'inference_id'")
|
|
@@ -416,6 +448,8 @@ class InferenceClient(NamespacedClient):
|
|
|
416
448
|
__query["human"] = human
|
|
417
449
|
if pretty is not None:
|
|
418
450
|
__query["pretty"] = pretty
|
|
451
|
+
if timeout is not None:
|
|
452
|
+
__query["timeout"] = timeout
|
|
419
453
|
__body = inference_config if inference_config is not None else body
|
|
420
454
|
__headers = {"accept": "application/json", "content-type": "application/json"}
|
|
421
455
|
return await self.perform_request( # type: ignore[return-value]
|
|
@@ -451,6 +485,7 @@ class InferenceClient(NamespacedClient):
|
|
|
451
485
|
human: t.Optional[bool] = None,
|
|
452
486
|
pretty: t.Optional[bool] = None,
|
|
453
487
|
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
488
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
454
489
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
455
490
|
) -> ObjectApiResponse[t.Any]:
|
|
456
491
|
"""
|
|
@@ -458,14 +493,9 @@ class InferenceClient(NamespacedClient):
|
|
|
458
493
|
|
|
459
494
|
<p>Create an AlibabaCloud AI Search inference endpoint.</p>
|
|
460
495
|
<p>Create an inference endpoint to perform an inference task with the <code>alibabacloud-ai-search</code> service.</p>
|
|
461
|
-
<p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
|
|
462
|
-
After creating the endpoint, wait for the model deployment to complete before using it.
|
|
463
|
-
To verify the deployment status, use the get trained model statistics API.
|
|
464
|
-
Look for <code>"state": "fully_allocated"</code> in the response and ensure that the <code>"allocation_count"</code> matches the <code>"target_allocation_count"</code>.
|
|
465
|
-
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
466
496
|
|
|
467
497
|
|
|
468
|
-
`<https://www.elastic.co/docs/api/doc/elasticsearch/
|
|
498
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-alibabacloud>`_
|
|
469
499
|
|
|
470
500
|
:param task_type: The type of the inference task that the model will perform.
|
|
471
501
|
:param alibabacloud_inference_id: The unique identifier of the inference endpoint.
|
|
@@ -476,6 +506,8 @@ class InferenceClient(NamespacedClient):
|
|
|
476
506
|
:param chunking_settings: The chunking configuration object.
|
|
477
507
|
:param task_settings: Settings to configure the inference task. These settings
|
|
478
508
|
are specific to the task type you specified.
|
|
509
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
510
|
+
to be created.
|
|
479
511
|
"""
|
|
480
512
|
if task_type in SKIP_IN_PATH:
|
|
481
513
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -502,6 +534,8 @@ class InferenceClient(NamespacedClient):
|
|
|
502
534
|
__query["human"] = human
|
|
503
535
|
if pretty is not None:
|
|
504
536
|
__query["pretty"] = pretty
|
|
537
|
+
if timeout is not None:
|
|
538
|
+
__query["timeout"] = timeout
|
|
505
539
|
if not __body:
|
|
506
540
|
if service is not None:
|
|
507
541
|
__body["service"] = service
|
|
@@ -547,25 +581,21 @@ class InferenceClient(NamespacedClient):
|
|
|
547
581
|
human: t.Optional[bool] = None,
|
|
548
582
|
pretty: t.Optional[bool] = None,
|
|
549
583
|
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
584
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
550
585
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
551
586
|
) -> ObjectApiResponse[t.Any]:
|
|
552
587
|
"""
|
|
553
588
|
.. raw:: html
|
|
554
589
|
|
|
555
590
|
<p>Create an Amazon Bedrock inference endpoint.</p>
|
|
556
|
-
<p>
|
|
591
|
+
<p>Create an inference endpoint to perform an inference task with the <code>amazonbedrock</code> service.</p>
|
|
557
592
|
<blockquote>
|
|
558
593
|
<p>info
|
|
559
594
|
You need to provide the access and secret keys only once, during the inference model creation. The get inference API does not retrieve your access or secret keys. After creating the inference model, you cannot change the associated key pairs. If you want to use a different access and secret key pair, delete the inference model and recreate it with the same name and the updated keys.</p>
|
|
560
595
|
</blockquote>
|
|
561
|
-
<p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
|
|
562
|
-
After creating the endpoint, wait for the model deployment to complete before using it.
|
|
563
|
-
To verify the deployment status, use the get trained model statistics API.
|
|
564
|
-
Look for <code>"state": "fully_allocated"</code> in the response and ensure that the <code>"allocation_count"</code> matches the <code>"target_allocation_count"</code>.
|
|
565
|
-
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
566
596
|
|
|
567
597
|
|
|
568
|
-
`<https://www.elastic.co/docs/api/doc/elasticsearch/
|
|
598
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-amazonbedrock>`_
|
|
569
599
|
|
|
570
600
|
:param task_type: The type of the inference task that the model will perform.
|
|
571
601
|
:param amazonbedrock_inference_id: The unique identifier of the inference endpoint.
|
|
@@ -576,6 +606,8 @@ class InferenceClient(NamespacedClient):
|
|
|
576
606
|
:param chunking_settings: The chunking configuration object.
|
|
577
607
|
:param task_settings: Settings to configure the inference task. These settings
|
|
578
608
|
are specific to the task type you specified.
|
|
609
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
610
|
+
to be created.
|
|
579
611
|
"""
|
|
580
612
|
if task_type in SKIP_IN_PATH:
|
|
581
613
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -602,6 +634,8 @@ class InferenceClient(NamespacedClient):
|
|
|
602
634
|
__query["human"] = human
|
|
603
635
|
if pretty is not None:
|
|
604
636
|
__query["pretty"] = pretty
|
|
637
|
+
if timeout is not None:
|
|
638
|
+
__query["timeout"] = timeout
|
|
605
639
|
if not __body:
|
|
606
640
|
if service is not None:
|
|
607
641
|
__body["service"] = service
|
|
@@ -626,6 +660,112 @@ class InferenceClient(NamespacedClient):
|
|
|
626
660
|
path_parts=__path_parts,
|
|
627
661
|
)
|
|
628
662
|
|
|
663
|
+
@_rewrite_parameters(
|
|
664
|
+
body_fields=(
|
|
665
|
+
"service",
|
|
666
|
+
"service_settings",
|
|
667
|
+
"chunking_settings",
|
|
668
|
+
"task_settings",
|
|
669
|
+
),
|
|
670
|
+
)
|
|
671
|
+
async def put_amazonsagemaker(
|
|
672
|
+
self,
|
|
673
|
+
*,
|
|
674
|
+
task_type: t.Union[
|
|
675
|
+
str,
|
|
676
|
+
t.Literal[
|
|
677
|
+
"chat_completion",
|
|
678
|
+
"completion",
|
|
679
|
+
"rerank",
|
|
680
|
+
"sparse_embedding",
|
|
681
|
+
"text_embedding",
|
|
682
|
+
],
|
|
683
|
+
],
|
|
684
|
+
amazonsagemaker_inference_id: str,
|
|
685
|
+
service: t.Optional[t.Union[str, t.Literal["amazon_sagemaker"]]] = None,
|
|
686
|
+
service_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
687
|
+
chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
688
|
+
error_trace: t.Optional[bool] = None,
|
|
689
|
+
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
|
|
690
|
+
human: t.Optional[bool] = None,
|
|
691
|
+
pretty: t.Optional[bool] = None,
|
|
692
|
+
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
693
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
694
|
+
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
695
|
+
) -> ObjectApiResponse[t.Any]:
|
|
696
|
+
"""
|
|
697
|
+
.. raw:: html
|
|
698
|
+
|
|
699
|
+
<p>Create an Amazon SageMaker inference endpoint.</p>
|
|
700
|
+
<p>Create an inference endpoint to perform an inference task with the <code>amazon_sagemaker</code> service.</p>
|
|
701
|
+
|
|
702
|
+
|
|
703
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-amazonsagemaker>`_
|
|
704
|
+
|
|
705
|
+
:param task_type: The type of the inference task that the model will perform.
|
|
706
|
+
:param amazonsagemaker_inference_id: The unique identifier of the inference endpoint.
|
|
707
|
+
:param service: The type of service supported for the specified task type. In
|
|
708
|
+
this case, `amazon_sagemaker`.
|
|
709
|
+
:param service_settings: Settings used to install the inference model. These
|
|
710
|
+
settings are specific to the `amazon_sagemaker` service and `service_settings.api`
|
|
711
|
+
you specified.
|
|
712
|
+
:param chunking_settings: The chunking configuration object.
|
|
713
|
+
:param task_settings: Settings to configure the inference task. These settings
|
|
714
|
+
are specific to the task type and `service_settings.api` you specified.
|
|
715
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
716
|
+
to be created.
|
|
717
|
+
"""
|
|
718
|
+
if task_type in SKIP_IN_PATH:
|
|
719
|
+
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
720
|
+
if amazonsagemaker_inference_id in SKIP_IN_PATH:
|
|
721
|
+
raise ValueError(
|
|
722
|
+
"Empty value passed for parameter 'amazonsagemaker_inference_id'"
|
|
723
|
+
)
|
|
724
|
+
if service is None and body is None:
|
|
725
|
+
raise ValueError("Empty value passed for parameter 'service'")
|
|
726
|
+
if service_settings is None and body is None:
|
|
727
|
+
raise ValueError("Empty value passed for parameter 'service_settings'")
|
|
728
|
+
__path_parts: t.Dict[str, str] = {
|
|
729
|
+
"task_type": _quote(task_type),
|
|
730
|
+
"amazonsagemaker_inference_id": _quote(amazonsagemaker_inference_id),
|
|
731
|
+
}
|
|
732
|
+
__path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["amazonsagemaker_inference_id"]}'
|
|
733
|
+
__query: t.Dict[str, t.Any] = {}
|
|
734
|
+
__body: t.Dict[str, t.Any] = body if body is not None else {}
|
|
735
|
+
if error_trace is not None:
|
|
736
|
+
__query["error_trace"] = error_trace
|
|
737
|
+
if filter_path is not None:
|
|
738
|
+
__query["filter_path"] = filter_path
|
|
739
|
+
if human is not None:
|
|
740
|
+
__query["human"] = human
|
|
741
|
+
if pretty is not None:
|
|
742
|
+
__query["pretty"] = pretty
|
|
743
|
+
if timeout is not None:
|
|
744
|
+
__query["timeout"] = timeout
|
|
745
|
+
if not __body:
|
|
746
|
+
if service is not None:
|
|
747
|
+
__body["service"] = service
|
|
748
|
+
if service_settings is not None:
|
|
749
|
+
__body["service_settings"] = service_settings
|
|
750
|
+
if chunking_settings is not None:
|
|
751
|
+
__body["chunking_settings"] = chunking_settings
|
|
752
|
+
if task_settings is not None:
|
|
753
|
+
__body["task_settings"] = task_settings
|
|
754
|
+
if not __body:
|
|
755
|
+
__body = None # type: ignore[assignment]
|
|
756
|
+
__headers = {"accept": "application/json"}
|
|
757
|
+
if __body is not None:
|
|
758
|
+
__headers["content-type"] = "application/json"
|
|
759
|
+
return await self.perform_request( # type: ignore[return-value]
|
|
760
|
+
"PUT",
|
|
761
|
+
__path,
|
|
762
|
+
params=__query,
|
|
763
|
+
headers=__headers,
|
|
764
|
+
body=__body,
|
|
765
|
+
endpoint_id="inference.put_amazonsagemaker",
|
|
766
|
+
path_parts=__path_parts,
|
|
767
|
+
)
|
|
768
|
+
|
|
629
769
|
@_rewrite_parameters(
|
|
630
770
|
body_fields=(
|
|
631
771
|
"service",
|
|
@@ -647,6 +787,7 @@ class InferenceClient(NamespacedClient):
|
|
|
647
787
|
human: t.Optional[bool] = None,
|
|
648
788
|
pretty: t.Optional[bool] = None,
|
|
649
789
|
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
790
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
650
791
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
651
792
|
) -> ObjectApiResponse[t.Any]:
|
|
652
793
|
"""
|
|
@@ -654,14 +795,9 @@ class InferenceClient(NamespacedClient):
|
|
|
654
795
|
|
|
655
796
|
<p>Create an Anthropic inference endpoint.</p>
|
|
656
797
|
<p>Create an inference endpoint to perform an inference task with the <code>anthropic</code> service.</p>
|
|
657
|
-
<p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
|
|
658
|
-
After creating the endpoint, wait for the model deployment to complete before using it.
|
|
659
|
-
To verify the deployment status, use the get trained model statistics API.
|
|
660
|
-
Look for <code>"state": "fully_allocated"</code> in the response and ensure that the <code>"allocation_count"</code> matches the <code>"target_allocation_count"</code>.
|
|
661
|
-
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
662
798
|
|
|
663
799
|
|
|
664
|
-
`<https://www.elastic.co/docs/api/doc/elasticsearch/
|
|
800
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-anthropic>`_
|
|
665
801
|
|
|
666
802
|
:param task_type: The task type. The only valid task type for the model to perform
|
|
667
803
|
is `completion`.
|
|
@@ -673,6 +809,8 @@ class InferenceClient(NamespacedClient):
|
|
|
673
809
|
:param chunking_settings: The chunking configuration object.
|
|
674
810
|
:param task_settings: Settings to configure the inference task. These settings
|
|
675
811
|
are specific to the task type you specified.
|
|
812
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
813
|
+
to be created.
|
|
676
814
|
"""
|
|
677
815
|
if task_type in SKIP_IN_PATH:
|
|
678
816
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -699,6 +837,8 @@ class InferenceClient(NamespacedClient):
|
|
|
699
837
|
__query["human"] = human
|
|
700
838
|
if pretty is not None:
|
|
701
839
|
__query["pretty"] = pretty
|
|
840
|
+
if timeout is not None:
|
|
841
|
+
__query["timeout"] = timeout
|
|
702
842
|
if not __body:
|
|
703
843
|
if service is not None:
|
|
704
844
|
__body["service"] = service
|
|
@@ -744,6 +884,7 @@ class InferenceClient(NamespacedClient):
|
|
|
744
884
|
human: t.Optional[bool] = None,
|
|
745
885
|
pretty: t.Optional[bool] = None,
|
|
746
886
|
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
887
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
747
888
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
748
889
|
) -> ObjectApiResponse[t.Any]:
|
|
749
890
|
"""
|
|
@@ -751,14 +892,9 @@ class InferenceClient(NamespacedClient):
|
|
|
751
892
|
|
|
752
893
|
<p>Create an Azure AI studio inference endpoint.</p>
|
|
753
894
|
<p>Create an inference endpoint to perform an inference task with the <code>azureaistudio</code> service.</p>
|
|
754
|
-
<p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
|
|
755
|
-
After creating the endpoint, wait for the model deployment to complete before using it.
|
|
756
|
-
To verify the deployment status, use the get trained model statistics API.
|
|
757
|
-
Look for <code>"state": "fully_allocated"</code> in the response and ensure that the <code>"allocation_count"</code> matches the <code>"target_allocation_count"</code>.
|
|
758
|
-
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
759
895
|
|
|
760
896
|
|
|
761
|
-
`<https://www.elastic.co/docs/api/doc/elasticsearch/
|
|
897
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-azureaistudio>`_
|
|
762
898
|
|
|
763
899
|
:param task_type: The type of the inference task that the model will perform.
|
|
764
900
|
:param azureaistudio_inference_id: The unique identifier of the inference endpoint.
|
|
@@ -769,6 +905,8 @@ class InferenceClient(NamespacedClient):
|
|
|
769
905
|
:param chunking_settings: The chunking configuration object.
|
|
770
906
|
:param task_settings: Settings to configure the inference task. These settings
|
|
771
907
|
are specific to the task type you specified.
|
|
908
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
909
|
+
to be created.
|
|
772
910
|
"""
|
|
773
911
|
if task_type in SKIP_IN_PATH:
|
|
774
912
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -795,6 +933,8 @@ class InferenceClient(NamespacedClient):
|
|
|
795
933
|
__query["human"] = human
|
|
796
934
|
if pretty is not None:
|
|
797
935
|
__query["pretty"] = pretty
|
|
936
|
+
if timeout is not None:
|
|
937
|
+
__query["timeout"] = timeout
|
|
798
938
|
if not __body:
|
|
799
939
|
if service is not None:
|
|
800
940
|
__body["service"] = service
|
|
@@ -840,6 +980,7 @@ class InferenceClient(NamespacedClient):
|
|
|
840
980
|
human: t.Optional[bool] = None,
|
|
841
981
|
pretty: t.Optional[bool] = None,
|
|
842
982
|
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
983
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
843
984
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
844
985
|
) -> ObjectApiResponse[t.Any]:
|
|
845
986
|
"""
|
|
@@ -853,14 +994,9 @@ class InferenceClient(NamespacedClient):
|
|
|
853
994
|
<li><a href="https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#gpt-35">GPT-3.5</a></li>
|
|
854
995
|
</ul>
|
|
855
996
|
<p>The list of embeddings models that you can choose from in your deployment can be found in the <a href="https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#embeddings">Azure models documentation</a>.</p>
|
|
856
|
-
<p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
|
|
857
|
-
After creating the endpoint, wait for the model deployment to complete before using it.
|
|
858
|
-
To verify the deployment status, use the get trained model statistics API.
|
|
859
|
-
Look for <code>"state": "fully_allocated"</code> in the response and ensure that the <code>"allocation_count"</code> matches the <code>"target_allocation_count"</code>.
|
|
860
|
-
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
861
997
|
|
|
862
998
|
|
|
863
|
-
`<https://www.elastic.co/docs/api/doc/elasticsearch/
|
|
999
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-azureopenai>`_
|
|
864
1000
|
|
|
865
1001
|
:param task_type: The type of the inference task that the model will perform.
|
|
866
1002
|
NOTE: The `chat_completion` task type only supports streaming and only through
|
|
@@ -873,6 +1009,8 @@ class InferenceClient(NamespacedClient):
|
|
|
873
1009
|
:param chunking_settings: The chunking configuration object.
|
|
874
1010
|
:param task_settings: Settings to configure the inference task. These settings
|
|
875
1011
|
are specific to the task type you specified.
|
|
1012
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
1013
|
+
to be created.
|
|
876
1014
|
"""
|
|
877
1015
|
if task_type in SKIP_IN_PATH:
|
|
878
1016
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -899,6 +1037,8 @@ class InferenceClient(NamespacedClient):
|
|
|
899
1037
|
__query["human"] = human
|
|
900
1038
|
if pretty is not None:
|
|
901
1039
|
__query["pretty"] = pretty
|
|
1040
|
+
if timeout is not None:
|
|
1041
|
+
__query["timeout"] = timeout
|
|
902
1042
|
if not __body:
|
|
903
1043
|
if service is not None:
|
|
904
1044
|
__body["service"] = service
|
|
@@ -944,6 +1084,7 @@ class InferenceClient(NamespacedClient):
|
|
|
944
1084
|
human: t.Optional[bool] = None,
|
|
945
1085
|
pretty: t.Optional[bool] = None,
|
|
946
1086
|
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
1087
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
947
1088
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
948
1089
|
) -> ObjectApiResponse[t.Any]:
|
|
949
1090
|
"""
|
|
@@ -951,14 +1092,9 @@ class InferenceClient(NamespacedClient):
|
|
|
951
1092
|
|
|
952
1093
|
<p>Create a Cohere inference endpoint.</p>
|
|
953
1094
|
<p>Create an inference endpoint to perform an inference task with the <code>cohere</code> service.</p>
|
|
954
|
-
<p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
|
|
955
|
-
After creating the endpoint, wait for the model deployment to complete before using it.
|
|
956
|
-
To verify the deployment status, use the get trained model statistics API.
|
|
957
|
-
Look for <code>"state": "fully_allocated"</code> in the response and ensure that the <code>"allocation_count"</code> matches the <code>"target_allocation_count"</code>.
|
|
958
|
-
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
959
1095
|
|
|
960
1096
|
|
|
961
|
-
`<https://www.elastic.co/docs/api/doc/elasticsearch/
|
|
1097
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-cohere>`_
|
|
962
1098
|
|
|
963
1099
|
:param task_type: The type of the inference task that the model will perform.
|
|
964
1100
|
:param cohere_inference_id: The unique identifier of the inference endpoint.
|
|
@@ -969,6 +1105,8 @@ class InferenceClient(NamespacedClient):
|
|
|
969
1105
|
:param chunking_settings: The chunking configuration object.
|
|
970
1106
|
:param task_settings: Settings to configure the inference task. These settings
|
|
971
1107
|
are specific to the task type you specified.
|
|
1108
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
1109
|
+
to be created.
|
|
972
1110
|
"""
|
|
973
1111
|
if task_type in SKIP_IN_PATH:
|
|
974
1112
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -993,6 +1131,8 @@ class InferenceClient(NamespacedClient):
|
|
|
993
1131
|
__query["human"] = human
|
|
994
1132
|
if pretty is not None:
|
|
995
1133
|
__query["pretty"] = pretty
|
|
1134
|
+
if timeout is not None:
|
|
1135
|
+
__query["timeout"] = timeout
|
|
996
1136
|
if not __body:
|
|
997
1137
|
if service is not None:
|
|
998
1138
|
__body["service"] = service
|
|
@@ -1017,6 +1157,221 @@ class InferenceClient(NamespacedClient):
|
|
|
1017
1157
|
path_parts=__path_parts,
|
|
1018
1158
|
)
|
|
1019
1159
|
|
|
1160
|
+
@_rewrite_parameters(
|
|
1161
|
+
body_fields=(
|
|
1162
|
+
"service",
|
|
1163
|
+
"service_settings",
|
|
1164
|
+
"chunking_settings",
|
|
1165
|
+
"task_settings",
|
|
1166
|
+
),
|
|
1167
|
+
)
|
|
1168
|
+
async def put_custom(
|
|
1169
|
+
self,
|
|
1170
|
+
*,
|
|
1171
|
+
task_type: t.Union[
|
|
1172
|
+
str, t.Literal["completion", "rerank", "sparse_embedding", "text_embedding"]
|
|
1173
|
+
],
|
|
1174
|
+
custom_inference_id: str,
|
|
1175
|
+
service: t.Optional[t.Union[str, t.Literal["custom"]]] = None,
|
|
1176
|
+
service_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
1177
|
+
chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
1178
|
+
error_trace: t.Optional[bool] = None,
|
|
1179
|
+
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
|
|
1180
|
+
human: t.Optional[bool] = None,
|
|
1181
|
+
pretty: t.Optional[bool] = None,
|
|
1182
|
+
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
1183
|
+
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
1184
|
+
) -> ObjectApiResponse[t.Any]:
|
|
1185
|
+
"""
|
|
1186
|
+
.. raw:: html
|
|
1187
|
+
|
|
1188
|
+
<p>Create a custom inference endpoint.</p>
|
|
1189
|
+
<p>The custom service gives more control over how to interact with external inference services that aren't explicitly supported through dedicated integrations.
|
|
1190
|
+
The custom service gives you the ability to define the headers, url, query parameters, request body, and secrets.
|
|
1191
|
+
The custom service supports the template replacement functionality, which enables you to define a template that can be replaced with the value associated with that key.
|
|
1192
|
+
Templates are portions of a string that start with <code>${</code> and end with <code>}</code>.
|
|
1193
|
+
The parameters <code>secret_parameters</code> and <code>task_settings</code> are checked for keys for template replacement. Template replacement is supported in the <code>request</code>, <code>headers</code>, <code>url</code>, and <code>query_parameters</code>.
|
|
1194
|
+
If the definition (key) is not found for a template, an error message is returned.
|
|
1195
|
+
In case of an endpoint definition like the following:</p>
|
|
1196
|
+
<pre><code>PUT _inference/text_embedding/test-text-embedding
|
|
1197
|
+
{
|
|
1198
|
+
"service": "custom",
|
|
1199
|
+
"service_settings": {
|
|
1200
|
+
"secret_parameters": {
|
|
1201
|
+
"api_key": "<some api key>"
|
|
1202
|
+
},
|
|
1203
|
+
"url": "...endpoints.huggingface.cloud/v1/embeddings",
|
|
1204
|
+
"headers": {
|
|
1205
|
+
"Authorization": "Bearer ${api_key}",
|
|
1206
|
+
"Content-Type": "application/json"
|
|
1207
|
+
},
|
|
1208
|
+
"request": "{\\"input\\": ${input}}",
|
|
1209
|
+
"response": {
|
|
1210
|
+
"json_parser": {
|
|
1211
|
+
"text_embeddings":"$.data[*].embedding[*]"
|
|
1212
|
+
}
|
|
1213
|
+
}
|
|
1214
|
+
}
|
|
1215
|
+
}
|
|
1216
|
+
</code></pre>
|
|
1217
|
+
<p>To replace <code>${api_key}</code> the <code>secret_parameters</code> and <code>task_settings</code> are checked for a key named <code>api_key</code>.</p>
|
|
1218
|
+
<blockquote>
|
|
1219
|
+
<p>info
|
|
1220
|
+
Templates should not be surrounded by quotes.</p>
|
|
1221
|
+
</blockquote>
|
|
1222
|
+
<p>Pre-defined templates:</p>
|
|
1223
|
+
<ul>
|
|
1224
|
+
<li><code>${input}</code> refers to the array of input strings that comes from the <code>input</code> field of the subsequent inference requests.</li>
|
|
1225
|
+
<li><code>${input_type}</code> refers to the input type translation values.</li>
|
|
1226
|
+
<li><code>${query}</code> refers to the query field used specifically for reranking tasks.</li>
|
|
1227
|
+
<li><code>${top_n}</code> refers to the <code>top_n</code> field available when performing rerank requests.</li>
|
|
1228
|
+
<li><code>${return_documents}</code> refers to the <code>return_documents</code> field available when performing rerank requests.</li>
|
|
1229
|
+
</ul>
|
|
1230
|
+
|
|
1231
|
+
|
|
1232
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-custom>`_
|
|
1233
|
+
|
|
1234
|
+
:param task_type: The type of the inference task that the model will perform.
|
|
1235
|
+
:param custom_inference_id: The unique identifier of the inference endpoint.
|
|
1236
|
+
:param service: The type of service supported for the specified task type. In
|
|
1237
|
+
this case, `custom`.
|
|
1238
|
+
:param service_settings: Settings used to install the inference model. These
|
|
1239
|
+
settings are specific to the `custom` service.
|
|
1240
|
+
:param chunking_settings: The chunking configuration object.
|
|
1241
|
+
:param task_settings: Settings to configure the inference task. These settings
|
|
1242
|
+
are specific to the task type you specified.
|
|
1243
|
+
"""
|
|
1244
|
+
if task_type in SKIP_IN_PATH:
|
|
1245
|
+
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
1246
|
+
if custom_inference_id in SKIP_IN_PATH:
|
|
1247
|
+
raise ValueError("Empty value passed for parameter 'custom_inference_id'")
|
|
1248
|
+
if service is None and body is None:
|
|
1249
|
+
raise ValueError("Empty value passed for parameter 'service'")
|
|
1250
|
+
if service_settings is None and body is None:
|
|
1251
|
+
raise ValueError("Empty value passed for parameter 'service_settings'")
|
|
1252
|
+
__path_parts: t.Dict[str, str] = {
|
|
1253
|
+
"task_type": _quote(task_type),
|
|
1254
|
+
"custom_inference_id": _quote(custom_inference_id),
|
|
1255
|
+
}
|
|
1256
|
+
__path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["custom_inference_id"]}'
|
|
1257
|
+
__query: t.Dict[str, t.Any] = {}
|
|
1258
|
+
__body: t.Dict[str, t.Any] = body if body is not None else {}
|
|
1259
|
+
if error_trace is not None:
|
|
1260
|
+
__query["error_trace"] = error_trace
|
|
1261
|
+
if filter_path is not None:
|
|
1262
|
+
__query["filter_path"] = filter_path
|
|
1263
|
+
if human is not None:
|
|
1264
|
+
__query["human"] = human
|
|
1265
|
+
if pretty is not None:
|
|
1266
|
+
__query["pretty"] = pretty
|
|
1267
|
+
if not __body:
|
|
1268
|
+
if service is not None:
|
|
1269
|
+
__body["service"] = service
|
|
1270
|
+
if service_settings is not None:
|
|
1271
|
+
__body["service_settings"] = service_settings
|
|
1272
|
+
if chunking_settings is not None:
|
|
1273
|
+
__body["chunking_settings"] = chunking_settings
|
|
1274
|
+
if task_settings is not None:
|
|
1275
|
+
__body["task_settings"] = task_settings
|
|
1276
|
+
if not __body:
|
|
1277
|
+
__body = None # type: ignore[assignment]
|
|
1278
|
+
__headers = {"accept": "application/json"}
|
|
1279
|
+
if __body is not None:
|
|
1280
|
+
__headers["content-type"] = "application/json"
|
|
1281
|
+
return await self.perform_request( # type: ignore[return-value]
|
|
1282
|
+
"PUT",
|
|
1283
|
+
__path,
|
|
1284
|
+
params=__query,
|
|
1285
|
+
headers=__headers,
|
|
1286
|
+
body=__body,
|
|
1287
|
+
endpoint_id="inference.put_custom",
|
|
1288
|
+
path_parts=__path_parts,
|
|
1289
|
+
)
|
|
1290
|
+
|
|
1291
|
+
@_rewrite_parameters(
|
|
1292
|
+
body_fields=("service", "service_settings", "chunking_settings"),
|
|
1293
|
+
)
|
|
1294
|
+
async def put_deepseek(
|
|
1295
|
+
self,
|
|
1296
|
+
*,
|
|
1297
|
+
task_type: t.Union[str, t.Literal["chat_completion", "completion"]],
|
|
1298
|
+
deepseek_inference_id: str,
|
|
1299
|
+
service: t.Optional[t.Union[str, t.Literal["deepseek"]]] = None,
|
|
1300
|
+
service_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
1301
|
+
chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
1302
|
+
error_trace: t.Optional[bool] = None,
|
|
1303
|
+
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
|
|
1304
|
+
human: t.Optional[bool] = None,
|
|
1305
|
+
pretty: t.Optional[bool] = None,
|
|
1306
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
1307
|
+
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
1308
|
+
) -> ObjectApiResponse[t.Any]:
|
|
1309
|
+
"""
|
|
1310
|
+
.. raw:: html
|
|
1311
|
+
|
|
1312
|
+
<p>Create a DeepSeek inference endpoint.</p>
|
|
1313
|
+
<p>Create an inference endpoint to perform an inference task with the <code>deepseek</code> service.</p>
|
|
1314
|
+
|
|
1315
|
+
|
|
1316
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-deepseek>`_
|
|
1317
|
+
|
|
1318
|
+
:param task_type: The type of the inference task that the model will perform.
|
|
1319
|
+
:param deepseek_inference_id: The unique identifier of the inference endpoint.
|
|
1320
|
+
:param service: The type of service supported for the specified task type. In
|
|
1321
|
+
this case, `deepseek`.
|
|
1322
|
+
:param service_settings: Settings used to install the inference model. These
|
|
1323
|
+
settings are specific to the `deepseek` service.
|
|
1324
|
+
:param chunking_settings: The chunking configuration object.
|
|
1325
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
1326
|
+
to be created.
|
|
1327
|
+
"""
|
|
1328
|
+
if task_type in SKIP_IN_PATH:
|
|
1329
|
+
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
1330
|
+
if deepseek_inference_id in SKIP_IN_PATH:
|
|
1331
|
+
raise ValueError("Empty value passed for parameter 'deepseek_inference_id'")
|
|
1332
|
+
if service is None and body is None:
|
|
1333
|
+
raise ValueError("Empty value passed for parameter 'service'")
|
|
1334
|
+
if service_settings is None and body is None:
|
|
1335
|
+
raise ValueError("Empty value passed for parameter 'service_settings'")
|
|
1336
|
+
__path_parts: t.Dict[str, str] = {
|
|
1337
|
+
"task_type": _quote(task_type),
|
|
1338
|
+
"deepseek_inference_id": _quote(deepseek_inference_id),
|
|
1339
|
+
}
|
|
1340
|
+
__path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["deepseek_inference_id"]}'
|
|
1341
|
+
__query: t.Dict[str, t.Any] = {}
|
|
1342
|
+
__body: t.Dict[str, t.Any] = body if body is not None else {}
|
|
1343
|
+
if error_trace is not None:
|
|
1344
|
+
__query["error_trace"] = error_trace
|
|
1345
|
+
if filter_path is not None:
|
|
1346
|
+
__query["filter_path"] = filter_path
|
|
1347
|
+
if human is not None:
|
|
1348
|
+
__query["human"] = human
|
|
1349
|
+
if pretty is not None:
|
|
1350
|
+
__query["pretty"] = pretty
|
|
1351
|
+
if timeout is not None:
|
|
1352
|
+
__query["timeout"] = timeout
|
|
1353
|
+
if not __body:
|
|
1354
|
+
if service is not None:
|
|
1355
|
+
__body["service"] = service
|
|
1356
|
+
if service_settings is not None:
|
|
1357
|
+
__body["service_settings"] = service_settings
|
|
1358
|
+
if chunking_settings is not None:
|
|
1359
|
+
__body["chunking_settings"] = chunking_settings
|
|
1360
|
+
if not __body:
|
|
1361
|
+
__body = None # type: ignore[assignment]
|
|
1362
|
+
__headers = {"accept": "application/json"}
|
|
1363
|
+
if __body is not None:
|
|
1364
|
+
__headers["content-type"] = "application/json"
|
|
1365
|
+
return await self.perform_request( # type: ignore[return-value]
|
|
1366
|
+
"PUT",
|
|
1367
|
+
__path,
|
|
1368
|
+
params=__query,
|
|
1369
|
+
headers=__headers,
|
|
1370
|
+
body=__body,
|
|
1371
|
+
endpoint_id="inference.put_deepseek",
|
|
1372
|
+
path_parts=__path_parts,
|
|
1373
|
+
)
|
|
1374
|
+
|
|
1020
1375
|
@_rewrite_parameters(
|
|
1021
1376
|
body_fields=(
|
|
1022
1377
|
"service",
|
|
@@ -1040,6 +1395,7 @@ class InferenceClient(NamespacedClient):
|
|
|
1040
1395
|
human: t.Optional[bool] = None,
|
|
1041
1396
|
pretty: t.Optional[bool] = None,
|
|
1042
1397
|
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
1398
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
1043
1399
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
1044
1400
|
) -> ObjectApiResponse[t.Any]:
|
|
1045
1401
|
"""
|
|
@@ -1062,7 +1418,7 @@ class InferenceClient(NamespacedClient):
|
|
|
1062
1418
|
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
1063
1419
|
|
|
1064
1420
|
|
|
1065
|
-
`<https://www.elastic.co/docs/api/doc/elasticsearch/
|
|
1421
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-elasticsearch>`_
|
|
1066
1422
|
|
|
1067
1423
|
:param task_type: The type of the inference task that the model will perform.
|
|
1068
1424
|
:param elasticsearch_inference_id: The unique identifier of the inference endpoint.
|
|
@@ -1074,6 +1430,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1074
1430
|
:param chunking_settings: The chunking configuration object.
|
|
1075
1431
|
:param task_settings: Settings to configure the inference task. These settings
|
|
1076
1432
|
are specific to the task type you specified.
|
|
1433
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
1434
|
+
to be created.
|
|
1077
1435
|
"""
|
|
1078
1436
|
if task_type in SKIP_IN_PATH:
|
|
1079
1437
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -1100,6 +1458,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1100
1458
|
__query["human"] = human
|
|
1101
1459
|
if pretty is not None:
|
|
1102
1460
|
__query["pretty"] = pretty
|
|
1461
|
+
if timeout is not None:
|
|
1462
|
+
__query["timeout"] = timeout
|
|
1103
1463
|
if not __body:
|
|
1104
1464
|
if service is not None:
|
|
1105
1465
|
__body["service"] = service
|
|
@@ -1139,6 +1499,7 @@ class InferenceClient(NamespacedClient):
|
|
|
1139
1499
|
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
|
|
1140
1500
|
human: t.Optional[bool] = None,
|
|
1141
1501
|
pretty: t.Optional[bool] = None,
|
|
1502
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
1142
1503
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
1143
1504
|
) -> ObjectApiResponse[t.Any]:
|
|
1144
1505
|
"""
|
|
@@ -1162,7 +1523,7 @@ class InferenceClient(NamespacedClient):
|
|
|
1162
1523
|
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
1163
1524
|
|
|
1164
1525
|
|
|
1165
|
-
`<https://www.elastic.co/docs/api/doc/elasticsearch/
|
|
1526
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-elser>`_
|
|
1166
1527
|
|
|
1167
1528
|
:param task_type: The type of the inference task that the model will perform.
|
|
1168
1529
|
:param elser_inference_id: The unique identifier of the inference endpoint.
|
|
@@ -1171,6 +1532,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1171
1532
|
:param service_settings: Settings used to install the inference model. These
|
|
1172
1533
|
settings are specific to the `elser` service.
|
|
1173
1534
|
:param chunking_settings: The chunking configuration object.
|
|
1535
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
1536
|
+
to be created.
|
|
1174
1537
|
"""
|
|
1175
1538
|
if task_type in SKIP_IN_PATH:
|
|
1176
1539
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -1195,6 +1558,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1195
1558
|
__query["human"] = human
|
|
1196
1559
|
if pretty is not None:
|
|
1197
1560
|
__query["pretty"] = pretty
|
|
1561
|
+
if timeout is not None:
|
|
1562
|
+
__query["timeout"] = timeout
|
|
1198
1563
|
if not __body:
|
|
1199
1564
|
if service is not None:
|
|
1200
1565
|
__body["service"] = service
|
|
@@ -1232,6 +1597,7 @@ class InferenceClient(NamespacedClient):
|
|
|
1232
1597
|
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
|
|
1233
1598
|
human: t.Optional[bool] = None,
|
|
1234
1599
|
pretty: t.Optional[bool] = None,
|
|
1600
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
1235
1601
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
1236
1602
|
) -> ObjectApiResponse[t.Any]:
|
|
1237
1603
|
"""
|
|
@@ -1239,14 +1605,9 @@ class InferenceClient(NamespacedClient):
|
|
|
1239
1605
|
|
|
1240
1606
|
<p>Create an Google AI Studio inference endpoint.</p>
|
|
1241
1607
|
<p>Create an inference endpoint to perform an inference task with the <code>googleaistudio</code> service.</p>
|
|
1242
|
-
<p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
|
|
1243
|
-
After creating the endpoint, wait for the model deployment to complete before using it.
|
|
1244
|
-
To verify the deployment status, use the get trained model statistics API.
|
|
1245
|
-
Look for <code>"state": "fully_allocated"</code> in the response and ensure that the <code>"allocation_count"</code> matches the <code>"target_allocation_count"</code>.
|
|
1246
|
-
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
1247
1608
|
|
|
1248
1609
|
|
|
1249
|
-
`<https://www.elastic.co/docs/api/doc/elasticsearch/
|
|
1610
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-googleaistudio>`_
|
|
1250
1611
|
|
|
1251
1612
|
:param task_type: The type of the inference task that the model will perform.
|
|
1252
1613
|
:param googleaistudio_inference_id: The unique identifier of the inference endpoint.
|
|
@@ -1255,6 +1616,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1255
1616
|
:param service_settings: Settings used to install the inference model. These
|
|
1256
1617
|
settings are specific to the `googleaistudio` service.
|
|
1257
1618
|
:param chunking_settings: The chunking configuration object.
|
|
1619
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
1620
|
+
to be created.
|
|
1258
1621
|
"""
|
|
1259
1622
|
if task_type in SKIP_IN_PATH:
|
|
1260
1623
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -1281,6 +1644,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1281
1644
|
__query["human"] = human
|
|
1282
1645
|
if pretty is not None:
|
|
1283
1646
|
__query["pretty"] = pretty
|
|
1647
|
+
if timeout is not None:
|
|
1648
|
+
__query["timeout"] = timeout
|
|
1284
1649
|
if not __body:
|
|
1285
1650
|
if service is not None:
|
|
1286
1651
|
__body["service"] = service
|
|
@@ -1314,7 +1679,9 @@ class InferenceClient(NamespacedClient):
|
|
|
1314
1679
|
async def put_googlevertexai(
|
|
1315
1680
|
self,
|
|
1316
1681
|
*,
|
|
1317
|
-
task_type: t.Union[
|
|
1682
|
+
task_type: t.Union[
|
|
1683
|
+
str, t.Literal["chat_completion", "completion", "rerank", "text_embedding"]
|
|
1684
|
+
],
|
|
1318
1685
|
googlevertexai_inference_id: str,
|
|
1319
1686
|
service: t.Optional[t.Union[str, t.Literal["googlevertexai"]]] = None,
|
|
1320
1687
|
service_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
@@ -1324,6 +1691,7 @@ class InferenceClient(NamespacedClient):
|
|
|
1324
1691
|
human: t.Optional[bool] = None,
|
|
1325
1692
|
pretty: t.Optional[bool] = None,
|
|
1326
1693
|
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
1694
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
1327
1695
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
1328
1696
|
) -> ObjectApiResponse[t.Any]:
|
|
1329
1697
|
"""
|
|
@@ -1331,14 +1699,9 @@ class InferenceClient(NamespacedClient):
|
|
|
1331
1699
|
|
|
1332
1700
|
<p>Create a Google Vertex AI inference endpoint.</p>
|
|
1333
1701
|
<p>Create an inference endpoint to perform an inference task with the <code>googlevertexai</code> service.</p>
|
|
1334
|
-
<p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
|
|
1335
|
-
After creating the endpoint, wait for the model deployment to complete before using it.
|
|
1336
|
-
To verify the deployment status, use the get trained model statistics API.
|
|
1337
|
-
Look for <code>"state": "fully_allocated"</code> in the response and ensure that the <code>"allocation_count"</code> matches the <code>"target_allocation_count"</code>.
|
|
1338
|
-
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
1339
1702
|
|
|
1340
1703
|
|
|
1341
|
-
`<https://www.elastic.co/docs/api/doc/elasticsearch/
|
|
1704
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-googlevertexai>`_
|
|
1342
1705
|
|
|
1343
1706
|
:param task_type: The type of the inference task that the model will perform.
|
|
1344
1707
|
:param googlevertexai_inference_id: The unique identifier of the inference endpoint.
|
|
@@ -1349,6 +1712,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1349
1712
|
:param chunking_settings: The chunking configuration object.
|
|
1350
1713
|
:param task_settings: Settings to configure the inference task. These settings
|
|
1351
1714
|
are specific to the task type you specified.
|
|
1715
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
1716
|
+
to be created.
|
|
1352
1717
|
"""
|
|
1353
1718
|
if task_type in SKIP_IN_PATH:
|
|
1354
1719
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -1375,6 +1740,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1375
1740
|
__query["human"] = human
|
|
1376
1741
|
if pretty is not None:
|
|
1377
1742
|
__query["pretty"] = pretty
|
|
1743
|
+
if timeout is not None:
|
|
1744
|
+
__query["timeout"] = timeout
|
|
1378
1745
|
if not __body:
|
|
1379
1746
|
if service is not None:
|
|
1380
1747
|
__body["service"] = service
|
|
@@ -1400,12 +1767,19 @@ class InferenceClient(NamespacedClient):
|
|
|
1400
1767
|
)
|
|
1401
1768
|
|
|
1402
1769
|
@_rewrite_parameters(
|
|
1403
|
-
body_fields=(
|
|
1770
|
+
body_fields=(
|
|
1771
|
+
"service",
|
|
1772
|
+
"service_settings",
|
|
1773
|
+
"chunking_settings",
|
|
1774
|
+
"task_settings",
|
|
1775
|
+
),
|
|
1404
1776
|
)
|
|
1405
1777
|
async def put_hugging_face(
|
|
1406
1778
|
self,
|
|
1407
1779
|
*,
|
|
1408
|
-
task_type: t.Union[
|
|
1780
|
+
task_type: t.Union[
|
|
1781
|
+
str, t.Literal["chat_completion", "completion", "rerank", "text_embedding"]
|
|
1782
|
+
],
|
|
1409
1783
|
huggingface_inference_id: str,
|
|
1410
1784
|
service: t.Optional[t.Union[str, t.Literal["hugging_face"]]] = None,
|
|
1411
1785
|
service_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
@@ -1414,17 +1788,22 @@ class InferenceClient(NamespacedClient):
|
|
|
1414
1788
|
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
|
|
1415
1789
|
human: t.Optional[bool] = None,
|
|
1416
1790
|
pretty: t.Optional[bool] = None,
|
|
1791
|
+
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
1792
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
1417
1793
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
1418
1794
|
) -> ObjectApiResponse[t.Any]:
|
|
1419
1795
|
"""
|
|
1420
1796
|
.. raw:: html
|
|
1421
1797
|
|
|
1422
1798
|
<p>Create a Hugging Face inference endpoint.</p>
|
|
1423
|
-
<p>Create an inference endpoint to perform an inference task with the <code>hugging_face</code> service
|
|
1424
|
-
|
|
1425
|
-
|
|
1426
|
-
|
|
1427
|
-
<p>
|
|
1799
|
+
<p>Create an inference endpoint to perform an inference task with the <code>hugging_face</code> service.
|
|
1800
|
+
Supported tasks include: <code>text_embedding</code>, <code>completion</code>, and <code>chat_completion</code>.</p>
|
|
1801
|
+
<p>To configure the endpoint, first visit the Hugging Face Inference Endpoints page and create a new endpoint.
|
|
1802
|
+
Select a model that supports the task you intend to use.</p>
|
|
1803
|
+
<p>For Elastic's <code>text_embedding</code> task:
|
|
1804
|
+
The selected model must support the <code>Sentence Embeddings</code> task. On the new endpoint creation page, select the <code>Sentence Embeddings</code> task under the <code>Advanced Configuration</code> section.
|
|
1805
|
+
After the endpoint has initialized, copy the generated endpoint URL.
|
|
1806
|
+
Recommended models for <code>text_embedding</code> task:</p>
|
|
1428
1807
|
<ul>
|
|
1429
1808
|
<li><code>all-MiniLM-L6-v2</code></li>
|
|
1430
1809
|
<li><code>all-MiniLM-L12-v2</code></li>
|
|
@@ -1434,14 +1813,27 @@ class InferenceClient(NamespacedClient):
|
|
|
1434
1813
|
<li><code>multilingual-e5-base</code></li>
|
|
1435
1814
|
<li><code>multilingual-e5-small</code></li>
|
|
1436
1815
|
</ul>
|
|
1437
|
-
<p>
|
|
1438
|
-
|
|
1439
|
-
|
|
1440
|
-
|
|
1441
|
-
|
|
1816
|
+
<p>For Elastic's <code>chat_completion</code> and <code>completion</code> tasks:
|
|
1817
|
+
The selected model must support the <code>Text Generation</code> task and expose OpenAI API. HuggingFace supports both serverless and dedicated endpoints for <code>Text Generation</code>. When creating dedicated endpoint select the <code>Text Generation</code> task.
|
|
1818
|
+
After the endpoint is initialized (for dedicated) or ready (for serverless), ensure it supports the OpenAI API and includes <code>/v1/chat/completions</code> part in URL. Then, copy the full endpoint URL for use.
|
|
1819
|
+
Recommended models for <code>chat_completion</code> and <code>completion</code> tasks:</p>
|
|
1820
|
+
<ul>
|
|
1821
|
+
<li><code>Mistral-7B-Instruct-v0.2</code></li>
|
|
1822
|
+
<li><code>QwQ-32B</code></li>
|
|
1823
|
+
<li><code>Phi-3-mini-128k-instruct</code></li>
|
|
1824
|
+
</ul>
|
|
1825
|
+
<p>For Elastic's <code>rerank</code> task:
|
|
1826
|
+
The selected model must support the <code>sentence-ranking</code> task and expose OpenAI API.
|
|
1827
|
+
HuggingFace supports only dedicated (not serverless) endpoints for <code>Rerank</code> so far.
|
|
1828
|
+
After the endpoint is initialized, copy the full endpoint URL for use.
|
|
1829
|
+
Tested models for <code>rerank</code> task:</p>
|
|
1830
|
+
<ul>
|
|
1831
|
+
<li><code>bge-reranker-base</code></li>
|
|
1832
|
+
<li><code>jina-reranker-v1-turbo-en-GGUF</code></li>
|
|
1833
|
+
</ul>
|
|
1442
1834
|
|
|
1443
1835
|
|
|
1444
|
-
`<https://www.elastic.co/docs/api/doc/elasticsearch/
|
|
1836
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-hugging-face>`_
|
|
1445
1837
|
|
|
1446
1838
|
:param task_type: The type of the inference task that the model will perform.
|
|
1447
1839
|
:param huggingface_inference_id: The unique identifier of the inference endpoint.
|
|
@@ -1450,6 +1842,10 @@ class InferenceClient(NamespacedClient):
|
|
|
1450
1842
|
:param service_settings: Settings used to install the inference model. These
|
|
1451
1843
|
settings are specific to the `hugging_face` service.
|
|
1452
1844
|
:param chunking_settings: The chunking configuration object.
|
|
1845
|
+
:param task_settings: Settings to configure the inference task. These settings
|
|
1846
|
+
are specific to the task type you specified.
|
|
1847
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
1848
|
+
to be created.
|
|
1453
1849
|
"""
|
|
1454
1850
|
if task_type in SKIP_IN_PATH:
|
|
1455
1851
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -1476,6 +1872,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1476
1872
|
__query["human"] = human
|
|
1477
1873
|
if pretty is not None:
|
|
1478
1874
|
__query["pretty"] = pretty
|
|
1875
|
+
if timeout is not None:
|
|
1876
|
+
__query["timeout"] = timeout
|
|
1479
1877
|
if not __body:
|
|
1480
1878
|
if service is not None:
|
|
1481
1879
|
__body["service"] = service
|
|
@@ -1483,6 +1881,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1483
1881
|
__body["service_settings"] = service_settings
|
|
1484
1882
|
if chunking_settings is not None:
|
|
1485
1883
|
__body["chunking_settings"] = chunking_settings
|
|
1884
|
+
if task_settings is not None:
|
|
1885
|
+
__body["task_settings"] = task_settings
|
|
1486
1886
|
if not __body:
|
|
1487
1887
|
__body = None # type: ignore[assignment]
|
|
1488
1888
|
__headers = {"accept": "application/json"}
|
|
@@ -1519,6 +1919,7 @@ class InferenceClient(NamespacedClient):
|
|
|
1519
1919
|
human: t.Optional[bool] = None,
|
|
1520
1920
|
pretty: t.Optional[bool] = None,
|
|
1521
1921
|
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
1922
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
1522
1923
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
1523
1924
|
) -> ObjectApiResponse[t.Any]:
|
|
1524
1925
|
"""
|
|
@@ -1528,14 +1929,9 @@ class InferenceClient(NamespacedClient):
|
|
|
1528
1929
|
<p>Create an inference endpoint to perform an inference task with the <code>jinaai</code> service.</p>
|
|
1529
1930
|
<p>To review the available <code>rerank</code> models, refer to <a href="https://jina.ai/reranker">https://jina.ai/reranker</a>.
|
|
1530
1931
|
To review the available <code>text_embedding</code> models, refer to the <a href="https://jina.ai/embeddings/">https://jina.ai/embeddings/</a>.</p>
|
|
1531
|
-
<p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
|
|
1532
|
-
After creating the endpoint, wait for the model deployment to complete before using it.
|
|
1533
|
-
To verify the deployment status, use the get trained model statistics API.
|
|
1534
|
-
Look for <code>"state": "fully_allocated"</code> in the response and ensure that the <code>"allocation_count"</code> matches the <code>"target_allocation_count"</code>.
|
|
1535
|
-
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
1536
1932
|
|
|
1537
1933
|
|
|
1538
|
-
`<https://www.elastic.co/docs/api/doc/elasticsearch/
|
|
1934
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-jinaai>`_
|
|
1539
1935
|
|
|
1540
1936
|
:param task_type: The type of the inference task that the model will perform.
|
|
1541
1937
|
:param jinaai_inference_id: The unique identifier of the inference endpoint.
|
|
@@ -1546,6 +1942,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1546
1942
|
:param chunking_settings: The chunking configuration object.
|
|
1547
1943
|
:param task_settings: Settings to configure the inference task. These settings
|
|
1548
1944
|
are specific to the task type you specified.
|
|
1945
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
1946
|
+
to be created.
|
|
1549
1947
|
"""
|
|
1550
1948
|
if task_type in SKIP_IN_PATH:
|
|
1551
1949
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -1570,6 +1968,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1570
1968
|
__query["human"] = human
|
|
1571
1969
|
if pretty is not None:
|
|
1572
1970
|
__query["pretty"] = pretty
|
|
1971
|
+
if timeout is not None:
|
|
1972
|
+
__query["timeout"] = timeout
|
|
1573
1973
|
if not __body:
|
|
1574
1974
|
if service is not None:
|
|
1575
1975
|
__body["service"] = service
|
|
@@ -1600,7 +2000,9 @@ class InferenceClient(NamespacedClient):
|
|
|
1600
2000
|
async def put_mistral(
|
|
1601
2001
|
self,
|
|
1602
2002
|
*,
|
|
1603
|
-
task_type: t.Union[
|
|
2003
|
+
task_type: t.Union[
|
|
2004
|
+
str, t.Literal["chat_completion", "completion", "text_embedding"]
|
|
2005
|
+
],
|
|
1604
2006
|
mistral_inference_id: str,
|
|
1605
2007
|
service: t.Optional[t.Union[str, t.Literal["mistral"]]] = None,
|
|
1606
2008
|
service_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
@@ -1609,30 +2011,27 @@ class InferenceClient(NamespacedClient):
|
|
|
1609
2011
|
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
|
|
1610
2012
|
human: t.Optional[bool] = None,
|
|
1611
2013
|
pretty: t.Optional[bool] = None,
|
|
2014
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
1612
2015
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
1613
2016
|
) -> ObjectApiResponse[t.Any]:
|
|
1614
2017
|
"""
|
|
1615
2018
|
.. raw:: html
|
|
1616
2019
|
|
|
1617
2020
|
<p>Create a Mistral inference endpoint.</p>
|
|
1618
|
-
<p>
|
|
1619
|
-
<p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
|
|
1620
|
-
After creating the endpoint, wait for the model deployment to complete before using it.
|
|
1621
|
-
To verify the deployment status, use the get trained model statistics API.
|
|
1622
|
-
Look for <code>"state": "fully_allocated"</code> in the response and ensure that the <code>"allocation_count"</code> matches the <code>"target_allocation_count"</code>.
|
|
1623
|
-
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
2021
|
+
<p>Create an inference endpoint to perform an inference task with the <code>mistral</code> service.</p>
|
|
1624
2022
|
|
|
1625
2023
|
|
|
1626
|
-
`<https://www.elastic.co/docs/api/doc/elasticsearch/
|
|
2024
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-mistral>`_
|
|
1627
2025
|
|
|
1628
|
-
:param task_type: The
|
|
1629
|
-
is `text_embedding`.
|
|
2026
|
+
:param task_type: The type of the inference task that the model will perform.
|
|
1630
2027
|
:param mistral_inference_id: The unique identifier of the inference endpoint.
|
|
1631
2028
|
:param service: The type of service supported for the specified task type. In
|
|
1632
2029
|
this case, `mistral`.
|
|
1633
2030
|
:param service_settings: Settings used to install the inference model. These
|
|
1634
2031
|
settings are specific to the `mistral` service.
|
|
1635
2032
|
:param chunking_settings: The chunking configuration object.
|
|
2033
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
2034
|
+
to be created.
|
|
1636
2035
|
"""
|
|
1637
2036
|
if task_type in SKIP_IN_PATH:
|
|
1638
2037
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -1657,6 +2056,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1657
2056
|
__query["human"] = human
|
|
1658
2057
|
if pretty is not None:
|
|
1659
2058
|
__query["pretty"] = pretty
|
|
2059
|
+
if timeout is not None:
|
|
2060
|
+
__query["timeout"] = timeout
|
|
1660
2061
|
if not __body:
|
|
1661
2062
|
if service is not None:
|
|
1662
2063
|
__body["service"] = service
|
|
@@ -1702,21 +2103,17 @@ class InferenceClient(NamespacedClient):
|
|
|
1702
2103
|
human: t.Optional[bool] = None,
|
|
1703
2104
|
pretty: t.Optional[bool] = None,
|
|
1704
2105
|
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
2106
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
1705
2107
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
1706
2108
|
) -> ObjectApiResponse[t.Any]:
|
|
1707
2109
|
"""
|
|
1708
2110
|
.. raw:: html
|
|
1709
2111
|
|
|
1710
2112
|
<p>Create an OpenAI inference endpoint.</p>
|
|
1711
|
-
<p>Create an inference endpoint to perform an inference task with the <code>openai</code> service.</p>
|
|
1712
|
-
<p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
|
|
1713
|
-
After creating the endpoint, wait for the model deployment to complete before using it.
|
|
1714
|
-
To verify the deployment status, use the get trained model statistics API.
|
|
1715
|
-
Look for <code>"state": "fully_allocated"</code> in the response and ensure that the <code>"allocation_count"</code> matches the <code>"target_allocation_count"</code>.
|
|
1716
|
-
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
2113
|
+
<p>Create an inference endpoint to perform an inference task with the <code>openai</code> service or <code>openai</code> compatible APIs.</p>
|
|
1717
2114
|
|
|
1718
2115
|
|
|
1719
|
-
`<https://www.elastic.co/docs/api/doc/elasticsearch/
|
|
2116
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-openai>`_
|
|
1720
2117
|
|
|
1721
2118
|
:param task_type: The type of the inference task that the model will perform.
|
|
1722
2119
|
NOTE: The `chat_completion` task type only supports streaming and only through
|
|
@@ -1729,6 +2126,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1729
2126
|
:param chunking_settings: The chunking configuration object.
|
|
1730
2127
|
:param task_settings: Settings to configure the inference task. These settings
|
|
1731
2128
|
are specific to the task type you specified.
|
|
2129
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
2130
|
+
to be created.
|
|
1732
2131
|
"""
|
|
1733
2132
|
if task_type in SKIP_IN_PATH:
|
|
1734
2133
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -1753,6 +2152,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1753
2152
|
__query["human"] = human
|
|
1754
2153
|
if pretty is not None:
|
|
1755
2154
|
__query["pretty"] = pretty
|
|
2155
|
+
if timeout is not None:
|
|
2156
|
+
__query["timeout"] = timeout
|
|
1756
2157
|
if not __body:
|
|
1757
2158
|
if service is not None:
|
|
1758
2159
|
__body["service"] = service
|
|
@@ -1798,6 +2199,7 @@ class InferenceClient(NamespacedClient):
|
|
|
1798
2199
|
human: t.Optional[bool] = None,
|
|
1799
2200
|
pretty: t.Optional[bool] = None,
|
|
1800
2201
|
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
2202
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
1801
2203
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
1802
2204
|
) -> ObjectApiResponse[t.Any]:
|
|
1803
2205
|
"""
|
|
@@ -1808,7 +2210,7 @@ class InferenceClient(NamespacedClient):
|
|
|
1808
2210
|
<p>Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
1809
2211
|
|
|
1810
2212
|
|
|
1811
|
-
`<https://www.elastic.co/docs/api/doc/elasticsearch/
|
|
2213
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-voyageai>`_
|
|
1812
2214
|
|
|
1813
2215
|
:param task_type: The type of the inference task that the model will perform.
|
|
1814
2216
|
:param voyageai_inference_id: The unique identifier of the inference endpoint.
|
|
@@ -1819,6 +2221,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1819
2221
|
:param chunking_settings: The chunking configuration object.
|
|
1820
2222
|
:param task_settings: Settings to configure the inference task. These settings
|
|
1821
2223
|
are specific to the task type you specified.
|
|
2224
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
2225
|
+
to be created.
|
|
1822
2226
|
"""
|
|
1823
2227
|
if task_type in SKIP_IN_PATH:
|
|
1824
2228
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -1843,6 +2247,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1843
2247
|
__query["human"] = human
|
|
1844
2248
|
if pretty is not None:
|
|
1845
2249
|
__query["pretty"] = pretty
|
|
2250
|
+
if timeout is not None:
|
|
2251
|
+
__query["timeout"] = timeout
|
|
1846
2252
|
if not __body:
|
|
1847
2253
|
if service is not None:
|
|
1848
2254
|
__body["service"] = service
|
|
@@ -1873,7 +2279,9 @@ class InferenceClient(NamespacedClient):
|
|
|
1873
2279
|
async def put_watsonx(
|
|
1874
2280
|
self,
|
|
1875
2281
|
*,
|
|
1876
|
-
task_type: t.Union[
|
|
2282
|
+
task_type: t.Union[
|
|
2283
|
+
str, t.Literal["chat_completion", "completion", "text_embedding"]
|
|
2284
|
+
],
|
|
1877
2285
|
watsonx_inference_id: str,
|
|
1878
2286
|
service: t.Optional[t.Union[str, t.Literal["watsonxai"]]] = None,
|
|
1879
2287
|
service_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
@@ -1881,6 +2289,7 @@ class InferenceClient(NamespacedClient):
|
|
|
1881
2289
|
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
|
|
1882
2290
|
human: t.Optional[bool] = None,
|
|
1883
2291
|
pretty: t.Optional[bool] = None,
|
|
2292
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
1884
2293
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
1885
2294
|
) -> ObjectApiResponse[t.Any]:
|
|
1886
2295
|
"""
|
|
@@ -1890,22 +2299,18 @@ class InferenceClient(NamespacedClient):
|
|
|
1890
2299
|
<p>Create an inference endpoint to perform an inference task with the <code>watsonxai</code> service.
|
|
1891
2300
|
You need an IBM Cloud Databases for Elasticsearch deployment to use the <code>watsonxai</code> inference service.
|
|
1892
2301
|
You can provision one through the IBM catalog, the Cloud Databases CLI plug-in, the Cloud Databases API, or Terraform.</p>
|
|
1893
|
-
<p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
|
|
1894
|
-
After creating the endpoint, wait for the model deployment to complete before using it.
|
|
1895
|
-
To verify the deployment status, use the get trained model statistics API.
|
|
1896
|
-
Look for <code>"state": "fully_allocated"</code> in the response and ensure that the <code>"allocation_count"</code> matches the <code>"target_allocation_count"</code>.
|
|
1897
|
-
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
1898
2302
|
|
|
1899
2303
|
|
|
1900
|
-
`<https://www.elastic.co/docs/api/doc/elasticsearch/
|
|
2304
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-watsonx>`_
|
|
1901
2305
|
|
|
1902
|
-
:param task_type: The
|
|
1903
|
-
is `text_embedding`.
|
|
2306
|
+
:param task_type: The type of the inference task that the model will perform.
|
|
1904
2307
|
:param watsonx_inference_id: The unique identifier of the inference endpoint.
|
|
1905
2308
|
:param service: The type of service supported for the specified task type. In
|
|
1906
2309
|
this case, `watsonxai`.
|
|
1907
2310
|
:param service_settings: Settings used to install the inference model. These
|
|
1908
2311
|
settings are specific to the `watsonxai` service.
|
|
2312
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
2313
|
+
to be created.
|
|
1909
2314
|
"""
|
|
1910
2315
|
if task_type in SKIP_IN_PATH:
|
|
1911
2316
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -1930,6 +2335,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1930
2335
|
__query["human"] = human
|
|
1931
2336
|
if pretty is not None:
|
|
1932
2337
|
__query["pretty"] = pretty
|
|
2338
|
+
if timeout is not None:
|
|
2339
|
+
__query["timeout"] = timeout
|
|
1933
2340
|
if not __body:
|
|
1934
2341
|
if service is not None:
|
|
1935
2342
|
__body["service"] = service
|
|
@@ -1970,10 +2377,10 @@ class InferenceClient(NamespacedClient):
|
|
|
1970
2377
|
"""
|
|
1971
2378
|
.. raw:: html
|
|
1972
2379
|
|
|
1973
|
-
<p>Perform
|
|
2380
|
+
<p>Perform reranking inference on the service</p>
|
|
1974
2381
|
|
|
1975
2382
|
|
|
1976
|
-
`<https://www.elastic.co/docs/api/doc/elasticsearch/
|
|
2383
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-inference>`_
|
|
1977
2384
|
|
|
1978
2385
|
:param inference_id: The unique identifier for the inference endpoint.
|
|
1979
2386
|
:param input: The text on which you want to perform the inference task. It can
|
|
@@ -2049,7 +2456,7 @@ class InferenceClient(NamespacedClient):
|
|
|
2049
2456
|
<p>Perform sparse embedding inference on the service</p>
|
|
2050
2457
|
|
|
2051
2458
|
|
|
2052
|
-
`<https://www.elastic.co/docs/api/doc/elasticsearch/
|
|
2459
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-inference>`_
|
|
2053
2460
|
|
|
2054
2461
|
:param inference_id: The inference Id
|
|
2055
2462
|
:param input: Inference input. Either a string or an array of strings.
|
|
@@ -2117,7 +2524,7 @@ class InferenceClient(NamespacedClient):
|
|
|
2117
2524
|
<p>Perform text embedding inference on the service</p>
|
|
2118
2525
|
|
|
2119
2526
|
|
|
2120
|
-
`<https://www.elastic.co/docs/api/doc/elasticsearch/
|
|
2527
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-inference>`_
|
|
2121
2528
|
|
|
2122
2529
|
:param inference_id: The inference Id
|
|
2123
2530
|
:param input: Inference input. Either a string or an array of strings.
|
|
@@ -2199,7 +2606,7 @@ class InferenceClient(NamespacedClient):
|
|
|
2199
2606
|
However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.</p>
|
|
2200
2607
|
|
|
2201
2608
|
|
|
2202
|
-
`<https://www.elastic.co/docs/api/doc/elasticsearch/
|
|
2609
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-update>`_
|
|
2203
2610
|
|
|
2204
2611
|
:param inference_id: The unique identifier of the inference endpoint.
|
|
2205
2612
|
:param inference_config:
|