elasticsearch 8.18.1__py3-none-any.whl → 8.19.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- elasticsearch/_async/client/__init__.py +68 -82
- elasticsearch/_async/client/async_search.py +5 -9
- elasticsearch/_async/client/autoscaling.py +4 -4
- elasticsearch/_async/client/cat.py +744 -75
- elasticsearch/_async/client/ccr.py +13 -13
- elasticsearch/_async/client/cluster.py +38 -24
- elasticsearch/_async/client/connector.py +30 -30
- elasticsearch/_async/client/dangling_indices.py +3 -3
- elasticsearch/_async/client/enrich.py +5 -5
- elasticsearch/_async/client/eql.py +13 -5
- elasticsearch/_async/client/esql.py +54 -15
- elasticsearch/_async/client/features.py +2 -2
- elasticsearch/_async/client/fleet.py +13 -13
- elasticsearch/_async/client/graph.py +1 -1
- elasticsearch/_async/client/ilm.py +11 -11
- elasticsearch/_async/client/indices.py +132 -83
- elasticsearch/_async/client/inference.py +519 -112
- elasticsearch/_async/client/ingest.py +9 -16
- elasticsearch/_async/client/license.py +10 -10
- elasticsearch/_async/client/logstash.py +3 -3
- elasticsearch/_async/client/migration.py +3 -3
- elasticsearch/_async/client/ml.py +76 -88
- elasticsearch/_async/client/nodes.py +9 -8
- elasticsearch/_async/client/query_rules.py +8 -8
- elasticsearch/_async/client/rollup.py +8 -8
- elasticsearch/_async/client/search_application.py +10 -10
- elasticsearch/_async/client/searchable_snapshots.py +4 -4
- elasticsearch/_async/client/security.py +72 -80
- elasticsearch/_async/client/shutdown.py +3 -3
- elasticsearch/_async/client/simulate.py +1 -1
- elasticsearch/_async/client/slm.py +9 -9
- elasticsearch/_async/client/snapshot.py +286 -130
- elasticsearch/_async/client/sql.py +7 -7
- elasticsearch/_async/client/ssl.py +1 -1
- elasticsearch/_async/client/synonyms.py +7 -7
- elasticsearch/_async/client/tasks.py +3 -3
- elasticsearch/_async/client/text_structure.py +4 -4
- elasticsearch/_async/client/transform.py +69 -9
- elasticsearch/_async/client/xpack.py +1 -1
- elasticsearch/_sync/client/__init__.py +68 -82
- elasticsearch/_sync/client/async_search.py +5 -9
- elasticsearch/_sync/client/autoscaling.py +4 -4
- elasticsearch/_sync/client/cat.py +744 -75
- elasticsearch/_sync/client/ccr.py +13 -13
- elasticsearch/_sync/client/cluster.py +38 -24
- elasticsearch/_sync/client/connector.py +30 -30
- elasticsearch/_sync/client/dangling_indices.py +3 -3
- elasticsearch/_sync/client/enrich.py +5 -5
- elasticsearch/_sync/client/eql.py +13 -5
- elasticsearch/_sync/client/esql.py +54 -15
- elasticsearch/_sync/client/features.py +2 -2
- elasticsearch/_sync/client/fleet.py +13 -13
- elasticsearch/_sync/client/graph.py +1 -1
- elasticsearch/_sync/client/ilm.py +11 -11
- elasticsearch/_sync/client/indices.py +132 -83
- elasticsearch/_sync/client/inference.py +519 -112
- elasticsearch/_sync/client/ingest.py +9 -16
- elasticsearch/_sync/client/license.py +10 -10
- elasticsearch/_sync/client/logstash.py +3 -3
- elasticsearch/_sync/client/migration.py +3 -3
- elasticsearch/_sync/client/ml.py +76 -88
- elasticsearch/_sync/client/nodes.py +9 -8
- elasticsearch/_sync/client/query_rules.py +8 -8
- elasticsearch/_sync/client/rollup.py +8 -8
- elasticsearch/_sync/client/search_application.py +10 -10
- elasticsearch/_sync/client/searchable_snapshots.py +4 -4
- elasticsearch/_sync/client/security.py +72 -80
- elasticsearch/_sync/client/shutdown.py +3 -3
- elasticsearch/_sync/client/simulate.py +1 -1
- elasticsearch/_sync/client/slm.py +9 -9
- elasticsearch/_sync/client/snapshot.py +286 -130
- elasticsearch/_sync/client/sql.py +7 -7
- elasticsearch/_sync/client/ssl.py +1 -1
- elasticsearch/_sync/client/synonyms.py +7 -7
- elasticsearch/_sync/client/tasks.py +3 -3
- elasticsearch/_sync/client/text_structure.py +4 -4
- elasticsearch/_sync/client/transform.py +69 -9
- elasticsearch/_sync/client/xpack.py +1 -1
- elasticsearch/_version.py +1 -1
- elasticsearch/compat.py +5 -0
- elasticsearch/dsl/__init__.py +2 -1
- elasticsearch/dsl/_async/document.py +85 -1
- elasticsearch/dsl/_sync/document.py +85 -1
- elasticsearch/dsl/aggs.py +22 -3
- elasticsearch/dsl/document_base.py +219 -16
- elasticsearch/dsl/field.py +272 -48
- elasticsearch/dsl/query.py +49 -4
- elasticsearch/dsl/response/aggs.py +1 -1
- elasticsearch/dsl/types.py +247 -27
- elasticsearch/dsl/utils.py +2 -2
- elasticsearch/esql/__init__.py +19 -0
- elasticsearch/esql/esql.py +1156 -0
- elasticsearch/esql/functions.py +1750 -0
- elasticsearch/exceptions.py +2 -0
- {elasticsearch-8.18.1.dist-info → elasticsearch-8.19.1.dist-info}/METADATA +1 -5
- elasticsearch-8.19.1.dist-info/RECORD +164 -0
- elasticsearch-8.18.1.dist-info/RECORD +0 -163
- elasticsearch-8.18.1.dist-info/licenses/LICENSE.txt +0 -175
- elasticsearch-8.18.1.dist-info/licenses/NOTICE.txt +0 -559
- {elasticsearch-8.18.1.dist-info → elasticsearch-8.19.1.dist-info}/WHEEL +0 -0
- {elasticsearch-8.18.1.dist-info → elasticsearch-8.19.1.dist-info}/licenses/LICENSE +0 -0
- {elasticsearch-8.18.1.dist-info → elasticsearch-8.19.1.dist-info}/licenses/NOTICE +0 -0
|
@@ -47,7 +47,7 @@ class InferenceClient(NamespacedClient):
|
|
|
47
47
|
<p>Perform completion inference on the service</p>
|
|
48
48
|
|
|
49
49
|
|
|
50
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.
|
|
50
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/post-inference-api.html>`_
|
|
51
51
|
|
|
52
52
|
:param inference_id: The inference Id
|
|
53
53
|
:param input: Inference input. Either a string or an array of strings.
|
|
@@ -123,7 +123,7 @@ class InferenceClient(NamespacedClient):
|
|
|
123
123
|
<p>Delete an inference endpoint</p>
|
|
124
124
|
|
|
125
125
|
|
|
126
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.
|
|
126
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/delete-inference-api.html>`_
|
|
127
127
|
|
|
128
128
|
:param inference_id: The inference identifier.
|
|
129
129
|
:param task_type: The task type
|
|
@@ -197,7 +197,7 @@ class InferenceClient(NamespacedClient):
|
|
|
197
197
|
<p>Get an inference endpoint</p>
|
|
198
198
|
|
|
199
199
|
|
|
200
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.
|
|
200
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/get-inference-api.html>`_
|
|
201
201
|
|
|
202
202
|
:param task_type: The task type
|
|
203
203
|
:param inference_id: The inference Id
|
|
@@ -235,7 +235,7 @@ class InferenceClient(NamespacedClient):
|
|
|
235
235
|
)
|
|
236
236
|
|
|
237
237
|
@_rewrite_parameters(
|
|
238
|
-
body_fields=("input", "query", "task_settings"),
|
|
238
|
+
body_fields=("input", "input_type", "query", "task_settings"),
|
|
239
239
|
)
|
|
240
240
|
async def inference(
|
|
241
241
|
self,
|
|
@@ -257,6 +257,7 @@ class InferenceClient(NamespacedClient):
|
|
|
257
257
|
error_trace: t.Optional[bool] = None,
|
|
258
258
|
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
|
|
259
259
|
human: t.Optional[bool] = None,
|
|
260
|
+
input_type: t.Optional[str] = None,
|
|
260
261
|
pretty: t.Optional[bool] = None,
|
|
261
262
|
query: t.Optional[str] = None,
|
|
262
263
|
task_settings: t.Optional[t.Any] = None,
|
|
@@ -277,13 +278,22 @@ class InferenceClient(NamespacedClient):
|
|
|
277
278
|
</blockquote>
|
|
278
279
|
|
|
279
280
|
|
|
280
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.
|
|
281
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/post-inference-api.html>`_
|
|
281
282
|
|
|
282
283
|
:param inference_id: The unique identifier for the inference endpoint.
|
|
283
284
|
:param input: The text on which you want to perform the inference task. It can
|
|
284
285
|
be a single string or an array. > info > Inference endpoints for the `completion`
|
|
285
286
|
task type currently only support a single string as input.
|
|
286
287
|
:param task_type: The type of inference task that the model performs.
|
|
288
|
+
:param input_type: Specifies the input data type for the text embedding model.
|
|
289
|
+
The `input_type` parameter only applies to Inference Endpoints with the `text_embedding`
|
|
290
|
+
task type. Possible values include: * `SEARCH` * `INGEST` * `CLASSIFICATION`
|
|
291
|
+
* `CLUSTERING` Not all services support all values. Unsupported values will
|
|
292
|
+
trigger a validation exception. Accepted values depend on the configured
|
|
293
|
+
inference service, refer to the relevant service-specific documentation for
|
|
294
|
+
more info. > info > The `input_type` parameter specified on the root level
|
|
295
|
+
of the request body will take precedence over the `input_type` parameter
|
|
296
|
+
specified in `task_settings`.
|
|
287
297
|
:param query: The query input, which is required only for the `rerank` task.
|
|
288
298
|
It is not required for other tasks.
|
|
289
299
|
:param task_settings: Task settings for the individual inference request. These
|
|
@@ -322,6 +332,8 @@ class InferenceClient(NamespacedClient):
|
|
|
322
332
|
if not __body:
|
|
323
333
|
if input is not None:
|
|
324
334
|
__body["input"] = input
|
|
335
|
+
if input_type is not None:
|
|
336
|
+
__body["input_type"] = input_type
|
|
325
337
|
if query is not None:
|
|
326
338
|
__body["query"] = query
|
|
327
339
|
if task_settings is not None:
|
|
@@ -366,26 +378,47 @@ class InferenceClient(NamespacedClient):
|
|
|
366
378
|
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
|
|
367
379
|
human: t.Optional[bool] = None,
|
|
368
380
|
pretty: t.Optional[bool] = None,
|
|
381
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
369
382
|
) -> ObjectApiResponse[t.Any]:
|
|
370
383
|
"""
|
|
371
384
|
.. raw:: html
|
|
372
385
|
|
|
373
|
-
<p>Create an inference endpoint
|
|
374
|
-
When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
|
|
375
|
-
After creating the endpoint, wait for the model deployment to complete before using it.
|
|
376
|
-
To verify the deployment status, use the get trained model statistics API.
|
|
377
|
-
Look for <code>"state": "fully_allocated"</code> in the response and ensure that the <code>"allocation_count"</code> matches the <code>"target_allocation_count"</code>.
|
|
378
|
-
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
386
|
+
<p>Create an inference endpoint.</p>
|
|
379
387
|
<p>IMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.
|
|
380
388
|
For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.
|
|
381
389
|
However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.</p>
|
|
390
|
+
<p>The following integrations are available through the inference API. You can find the available task types next to the integration name:</p>
|
|
391
|
+
<ul>
|
|
392
|
+
<li>AlibabaCloud AI Search (<code>completion</code>, <code>rerank</code>, <code>sparse_embedding</code>, <code>text_embedding</code>)</li>
|
|
393
|
+
<li>Amazon Bedrock (<code>completion</code>, <code>text_embedding</code>)</li>
|
|
394
|
+
<li>Amazon SageMaker (<code>chat_completion</code>, <code>completion</code>, <code>rerank</code>, <code>sparse_embedding</code>, <code>text_embedding</code>)</li>
|
|
395
|
+
<li>Anthropic (<code>completion</code>)</li>
|
|
396
|
+
<li>Azure AI Studio (<code>completion</code>, <code>text_embedding</code>)</li>
|
|
397
|
+
<li>Azure OpenAI (<code>completion</code>, <code>text_embedding</code>)</li>
|
|
398
|
+
<li>Cohere (<code>completion</code>, <code>rerank</code>, <code>text_embedding</code>)</li>
|
|
399
|
+
<li>DeepSeek (<code>chat_completion</code>, <code>completion</code>)</li>
|
|
400
|
+
<li>Elasticsearch (<code>rerank</code>, <code>sparse_embedding</code>, <code>text_embedding</code> - this service is for built-in models and models uploaded through Eland)</li>
|
|
401
|
+
<li>ELSER (<code>sparse_embedding</code>)</li>
|
|
402
|
+
<li>Google AI Studio (<code>completion</code>, <code>text_embedding</code>)</li>
|
|
403
|
+
<li>Google Vertex AI (<code>chat_completion</code>, <code>completion</code>, <code>rerank</code>, <code>text_embedding</code>)</li>
|
|
404
|
+
<li>Hugging Face (<code>chat_completion</code>, <code>completion</code>, <code>rerank</code>, <code>text_embedding</code>)</li>
|
|
405
|
+
<li>JinaAI (<code>rerank</code>, <code>text_embedding</code>)</li>
|
|
406
|
+
<li>Llama (<code>chat_completion</code>, <code>completion</code>, <code>text_embedding</code>)</li>
|
|
407
|
+
<li>Mistral (<code>chat_completion</code>, <code>completion</code>, <code>text_embedding</code>)</li>
|
|
408
|
+
<li>OpenAI (<code>chat_completion</code>, <code>completion</code>, <code>text_embedding</code>)</li>
|
|
409
|
+
<li>VoyageAI (<code>rerank</code>, <code>text_embedding</code>)</li>
|
|
410
|
+
<li>Watsonx inference integration (<code>text_embedding</code>)</li>
|
|
411
|
+
</ul>
|
|
382
412
|
|
|
383
413
|
|
|
384
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.
|
|
414
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/put-inference-api.html>`_
|
|
385
415
|
|
|
386
416
|
:param inference_id: The inference Id
|
|
387
417
|
:param inference_config:
|
|
388
|
-
:param task_type: The task type
|
|
418
|
+
:param task_type: The task type. Refer to the integration list in the API description
|
|
419
|
+
for the available task types.
|
|
420
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
421
|
+
to be created.
|
|
389
422
|
"""
|
|
390
423
|
if inference_id in SKIP_IN_PATH:
|
|
391
424
|
raise ValueError("Empty value passed for parameter 'inference_id'")
|
|
@@ -416,6 +449,8 @@ class InferenceClient(NamespacedClient):
|
|
|
416
449
|
__query["human"] = human
|
|
417
450
|
if pretty is not None:
|
|
418
451
|
__query["pretty"] = pretty
|
|
452
|
+
if timeout is not None:
|
|
453
|
+
__query["timeout"] = timeout
|
|
419
454
|
__body = inference_config if inference_config is not None else body
|
|
420
455
|
__headers = {"accept": "application/json", "content-type": "application/json"}
|
|
421
456
|
return await self.perform_request( # type: ignore[return-value]
|
|
@@ -451,6 +486,7 @@ class InferenceClient(NamespacedClient):
|
|
|
451
486
|
human: t.Optional[bool] = None,
|
|
452
487
|
pretty: t.Optional[bool] = None,
|
|
453
488
|
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
489
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
454
490
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
455
491
|
) -> ObjectApiResponse[t.Any]:
|
|
456
492
|
"""
|
|
@@ -458,14 +494,9 @@ class InferenceClient(NamespacedClient):
|
|
|
458
494
|
|
|
459
495
|
<p>Create an AlibabaCloud AI Search inference endpoint.</p>
|
|
460
496
|
<p>Create an inference endpoint to perform an inference task with the <code>alibabacloud-ai-search</code> service.</p>
|
|
461
|
-
<p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
|
|
462
|
-
After creating the endpoint, wait for the model deployment to complete before using it.
|
|
463
|
-
To verify the deployment status, use the get trained model statistics API.
|
|
464
|
-
Look for <code>"state": "fully_allocated"</code> in the response and ensure that the <code>"allocation_count"</code> matches the <code>"target_allocation_count"</code>.
|
|
465
|
-
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
466
497
|
|
|
467
498
|
|
|
468
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.
|
|
499
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-alibabacloud-ai-search.html>`_
|
|
469
500
|
|
|
470
501
|
:param task_type: The type of the inference task that the model will perform.
|
|
471
502
|
:param alibabacloud_inference_id: The unique identifier of the inference endpoint.
|
|
@@ -476,6 +507,8 @@ class InferenceClient(NamespacedClient):
|
|
|
476
507
|
:param chunking_settings: The chunking configuration object.
|
|
477
508
|
:param task_settings: Settings to configure the inference task. These settings
|
|
478
509
|
are specific to the task type you specified.
|
|
510
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
511
|
+
to be created.
|
|
479
512
|
"""
|
|
480
513
|
if task_type in SKIP_IN_PATH:
|
|
481
514
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -502,6 +535,8 @@ class InferenceClient(NamespacedClient):
|
|
|
502
535
|
__query["human"] = human
|
|
503
536
|
if pretty is not None:
|
|
504
537
|
__query["pretty"] = pretty
|
|
538
|
+
if timeout is not None:
|
|
539
|
+
__query["timeout"] = timeout
|
|
505
540
|
if not __body:
|
|
506
541
|
if service is not None:
|
|
507
542
|
__body["service"] = service
|
|
@@ -547,25 +582,21 @@ class InferenceClient(NamespacedClient):
|
|
|
547
582
|
human: t.Optional[bool] = None,
|
|
548
583
|
pretty: t.Optional[bool] = None,
|
|
549
584
|
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
585
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
550
586
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
551
587
|
) -> ObjectApiResponse[t.Any]:
|
|
552
588
|
"""
|
|
553
589
|
.. raw:: html
|
|
554
590
|
|
|
555
591
|
<p>Create an Amazon Bedrock inference endpoint.</p>
|
|
556
|
-
<p>
|
|
592
|
+
<p>Create an inference endpoint to perform an inference task with the <code>amazonbedrock</code> service.</p>
|
|
557
593
|
<blockquote>
|
|
558
594
|
<p>info
|
|
559
595
|
You need to provide the access and secret keys only once, during the inference model creation. The get inference API does not retrieve your access or secret keys. After creating the inference model, you cannot change the associated key pairs. If you want to use a different access and secret key pair, delete the inference model and recreate it with the same name and the updated keys.</p>
|
|
560
596
|
</blockquote>
|
|
561
|
-
<p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
|
|
562
|
-
After creating the endpoint, wait for the model deployment to complete before using it.
|
|
563
|
-
To verify the deployment status, use the get trained model statistics API.
|
|
564
|
-
Look for <code>"state": "fully_allocated"</code> in the response and ensure that the <code>"allocation_count"</code> matches the <code>"target_allocation_count"</code>.
|
|
565
|
-
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
566
597
|
|
|
567
598
|
|
|
568
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.
|
|
599
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-amazon-bedrock.html>`_
|
|
569
600
|
|
|
570
601
|
:param task_type: The type of the inference task that the model will perform.
|
|
571
602
|
:param amazonbedrock_inference_id: The unique identifier of the inference endpoint.
|
|
@@ -576,6 +607,8 @@ class InferenceClient(NamespacedClient):
|
|
|
576
607
|
:param chunking_settings: The chunking configuration object.
|
|
577
608
|
:param task_settings: Settings to configure the inference task. These settings
|
|
578
609
|
are specific to the task type you specified.
|
|
610
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
611
|
+
to be created.
|
|
579
612
|
"""
|
|
580
613
|
if task_type in SKIP_IN_PATH:
|
|
581
614
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -602,6 +635,8 @@ class InferenceClient(NamespacedClient):
|
|
|
602
635
|
__query["human"] = human
|
|
603
636
|
if pretty is not None:
|
|
604
637
|
__query["pretty"] = pretty
|
|
638
|
+
if timeout is not None:
|
|
639
|
+
__query["timeout"] = timeout
|
|
605
640
|
if not __body:
|
|
606
641
|
if service is not None:
|
|
607
642
|
__body["service"] = service
|
|
@@ -626,6 +661,112 @@ class InferenceClient(NamespacedClient):
|
|
|
626
661
|
path_parts=__path_parts,
|
|
627
662
|
)
|
|
628
663
|
|
|
664
|
+
@_rewrite_parameters(
|
|
665
|
+
body_fields=(
|
|
666
|
+
"service",
|
|
667
|
+
"service_settings",
|
|
668
|
+
"chunking_settings",
|
|
669
|
+
"task_settings",
|
|
670
|
+
),
|
|
671
|
+
)
|
|
672
|
+
async def put_amazonsagemaker(
|
|
673
|
+
self,
|
|
674
|
+
*,
|
|
675
|
+
task_type: t.Union[
|
|
676
|
+
str,
|
|
677
|
+
t.Literal[
|
|
678
|
+
"chat_completion",
|
|
679
|
+
"completion",
|
|
680
|
+
"rerank",
|
|
681
|
+
"sparse_embedding",
|
|
682
|
+
"text_embedding",
|
|
683
|
+
],
|
|
684
|
+
],
|
|
685
|
+
amazonsagemaker_inference_id: str,
|
|
686
|
+
service: t.Optional[t.Union[str, t.Literal["amazon_sagemaker"]]] = None,
|
|
687
|
+
service_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
688
|
+
chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
689
|
+
error_trace: t.Optional[bool] = None,
|
|
690
|
+
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
|
|
691
|
+
human: t.Optional[bool] = None,
|
|
692
|
+
pretty: t.Optional[bool] = None,
|
|
693
|
+
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
694
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
695
|
+
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
696
|
+
) -> ObjectApiResponse[t.Any]:
|
|
697
|
+
"""
|
|
698
|
+
.. raw:: html
|
|
699
|
+
|
|
700
|
+
<p>Create an Amazon SageMaker inference endpoint.</p>
|
|
701
|
+
<p>Create an inference endpoint to perform an inference task with the <code>amazon_sagemaker</code> service.</p>
|
|
702
|
+
|
|
703
|
+
|
|
704
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-amazonsagemaker>`_
|
|
705
|
+
|
|
706
|
+
:param task_type: The type of the inference task that the model will perform.
|
|
707
|
+
:param amazonsagemaker_inference_id: The unique identifier of the inference endpoint.
|
|
708
|
+
:param service: The type of service supported for the specified task type. In
|
|
709
|
+
this case, `amazon_sagemaker`.
|
|
710
|
+
:param service_settings: Settings used to install the inference model. These
|
|
711
|
+
settings are specific to the `amazon_sagemaker` service and `service_settings.api`
|
|
712
|
+
you specified.
|
|
713
|
+
:param chunking_settings: The chunking configuration object.
|
|
714
|
+
:param task_settings: Settings to configure the inference task. These settings
|
|
715
|
+
are specific to the task type and `service_settings.api` you specified.
|
|
716
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
717
|
+
to be created.
|
|
718
|
+
"""
|
|
719
|
+
if task_type in SKIP_IN_PATH:
|
|
720
|
+
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
721
|
+
if amazonsagemaker_inference_id in SKIP_IN_PATH:
|
|
722
|
+
raise ValueError(
|
|
723
|
+
"Empty value passed for parameter 'amazonsagemaker_inference_id'"
|
|
724
|
+
)
|
|
725
|
+
if service is None and body is None:
|
|
726
|
+
raise ValueError("Empty value passed for parameter 'service'")
|
|
727
|
+
if service_settings is None and body is None:
|
|
728
|
+
raise ValueError("Empty value passed for parameter 'service_settings'")
|
|
729
|
+
__path_parts: t.Dict[str, str] = {
|
|
730
|
+
"task_type": _quote(task_type),
|
|
731
|
+
"amazonsagemaker_inference_id": _quote(amazonsagemaker_inference_id),
|
|
732
|
+
}
|
|
733
|
+
__path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["amazonsagemaker_inference_id"]}'
|
|
734
|
+
__query: t.Dict[str, t.Any] = {}
|
|
735
|
+
__body: t.Dict[str, t.Any] = body if body is not None else {}
|
|
736
|
+
if error_trace is not None:
|
|
737
|
+
__query["error_trace"] = error_trace
|
|
738
|
+
if filter_path is not None:
|
|
739
|
+
__query["filter_path"] = filter_path
|
|
740
|
+
if human is not None:
|
|
741
|
+
__query["human"] = human
|
|
742
|
+
if pretty is not None:
|
|
743
|
+
__query["pretty"] = pretty
|
|
744
|
+
if timeout is not None:
|
|
745
|
+
__query["timeout"] = timeout
|
|
746
|
+
if not __body:
|
|
747
|
+
if service is not None:
|
|
748
|
+
__body["service"] = service
|
|
749
|
+
if service_settings is not None:
|
|
750
|
+
__body["service_settings"] = service_settings
|
|
751
|
+
if chunking_settings is not None:
|
|
752
|
+
__body["chunking_settings"] = chunking_settings
|
|
753
|
+
if task_settings is not None:
|
|
754
|
+
__body["task_settings"] = task_settings
|
|
755
|
+
if not __body:
|
|
756
|
+
__body = None # type: ignore[assignment]
|
|
757
|
+
__headers = {"accept": "application/json"}
|
|
758
|
+
if __body is not None:
|
|
759
|
+
__headers["content-type"] = "application/json"
|
|
760
|
+
return await self.perform_request( # type: ignore[return-value]
|
|
761
|
+
"PUT",
|
|
762
|
+
__path,
|
|
763
|
+
params=__query,
|
|
764
|
+
headers=__headers,
|
|
765
|
+
body=__body,
|
|
766
|
+
endpoint_id="inference.put_amazonsagemaker",
|
|
767
|
+
path_parts=__path_parts,
|
|
768
|
+
)
|
|
769
|
+
|
|
629
770
|
@_rewrite_parameters(
|
|
630
771
|
body_fields=(
|
|
631
772
|
"service",
|
|
@@ -647,6 +788,7 @@ class InferenceClient(NamespacedClient):
|
|
|
647
788
|
human: t.Optional[bool] = None,
|
|
648
789
|
pretty: t.Optional[bool] = None,
|
|
649
790
|
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
791
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
650
792
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
651
793
|
) -> ObjectApiResponse[t.Any]:
|
|
652
794
|
"""
|
|
@@ -654,14 +796,9 @@ class InferenceClient(NamespacedClient):
|
|
|
654
796
|
|
|
655
797
|
<p>Create an Anthropic inference endpoint.</p>
|
|
656
798
|
<p>Create an inference endpoint to perform an inference task with the <code>anthropic</code> service.</p>
|
|
657
|
-
<p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
|
|
658
|
-
After creating the endpoint, wait for the model deployment to complete before using it.
|
|
659
|
-
To verify the deployment status, use the get trained model statistics API.
|
|
660
|
-
Look for <code>"state": "fully_allocated"</code> in the response and ensure that the <code>"allocation_count"</code> matches the <code>"target_allocation_count"</code>.
|
|
661
|
-
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
662
799
|
|
|
663
800
|
|
|
664
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.
|
|
801
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-anthropic.html>`_
|
|
665
802
|
|
|
666
803
|
:param task_type: The task type. The only valid task type for the model to perform
|
|
667
804
|
is `completion`.
|
|
@@ -673,6 +810,8 @@ class InferenceClient(NamespacedClient):
|
|
|
673
810
|
:param chunking_settings: The chunking configuration object.
|
|
674
811
|
:param task_settings: Settings to configure the inference task. These settings
|
|
675
812
|
are specific to the task type you specified.
|
|
813
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
814
|
+
to be created.
|
|
676
815
|
"""
|
|
677
816
|
if task_type in SKIP_IN_PATH:
|
|
678
817
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -699,6 +838,8 @@ class InferenceClient(NamespacedClient):
|
|
|
699
838
|
__query["human"] = human
|
|
700
839
|
if pretty is not None:
|
|
701
840
|
__query["pretty"] = pretty
|
|
841
|
+
if timeout is not None:
|
|
842
|
+
__query["timeout"] = timeout
|
|
702
843
|
if not __body:
|
|
703
844
|
if service is not None:
|
|
704
845
|
__body["service"] = service
|
|
@@ -744,6 +885,7 @@ class InferenceClient(NamespacedClient):
|
|
|
744
885
|
human: t.Optional[bool] = None,
|
|
745
886
|
pretty: t.Optional[bool] = None,
|
|
746
887
|
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
888
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
747
889
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
748
890
|
) -> ObjectApiResponse[t.Any]:
|
|
749
891
|
"""
|
|
@@ -751,14 +893,9 @@ class InferenceClient(NamespacedClient):
|
|
|
751
893
|
|
|
752
894
|
<p>Create an Azure AI studio inference endpoint.</p>
|
|
753
895
|
<p>Create an inference endpoint to perform an inference task with the <code>azureaistudio</code> service.</p>
|
|
754
|
-
<p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
|
|
755
|
-
After creating the endpoint, wait for the model deployment to complete before using it.
|
|
756
|
-
To verify the deployment status, use the get trained model statistics API.
|
|
757
|
-
Look for <code>"state": "fully_allocated"</code> in the response and ensure that the <code>"allocation_count"</code> matches the <code>"target_allocation_count"</code>.
|
|
758
|
-
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
759
896
|
|
|
760
897
|
|
|
761
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.
|
|
898
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-azure-ai-studio.html>`_
|
|
762
899
|
|
|
763
900
|
:param task_type: The type of the inference task that the model will perform.
|
|
764
901
|
:param azureaistudio_inference_id: The unique identifier of the inference endpoint.
|
|
@@ -769,6 +906,8 @@ class InferenceClient(NamespacedClient):
|
|
|
769
906
|
:param chunking_settings: The chunking configuration object.
|
|
770
907
|
:param task_settings: Settings to configure the inference task. These settings
|
|
771
908
|
are specific to the task type you specified.
|
|
909
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
910
|
+
to be created.
|
|
772
911
|
"""
|
|
773
912
|
if task_type in SKIP_IN_PATH:
|
|
774
913
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -795,6 +934,8 @@ class InferenceClient(NamespacedClient):
|
|
|
795
934
|
__query["human"] = human
|
|
796
935
|
if pretty is not None:
|
|
797
936
|
__query["pretty"] = pretty
|
|
937
|
+
if timeout is not None:
|
|
938
|
+
__query["timeout"] = timeout
|
|
798
939
|
if not __body:
|
|
799
940
|
if service is not None:
|
|
800
941
|
__body["service"] = service
|
|
@@ -840,6 +981,7 @@ class InferenceClient(NamespacedClient):
|
|
|
840
981
|
human: t.Optional[bool] = None,
|
|
841
982
|
pretty: t.Optional[bool] = None,
|
|
842
983
|
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
984
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
843
985
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
844
986
|
) -> ObjectApiResponse[t.Any]:
|
|
845
987
|
"""
|
|
@@ -853,14 +995,9 @@ class InferenceClient(NamespacedClient):
|
|
|
853
995
|
<li><a href="https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#gpt-35">GPT-3.5</a></li>
|
|
854
996
|
</ul>
|
|
855
997
|
<p>The list of embeddings models that you can choose from in your deployment can be found in the <a href="https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#embeddings">Azure models documentation</a>.</p>
|
|
856
|
-
<p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
|
|
857
|
-
After creating the endpoint, wait for the model deployment to complete before using it.
|
|
858
|
-
To verify the deployment status, use the get trained model statistics API.
|
|
859
|
-
Look for <code>"state": "fully_allocated"</code> in the response and ensure that the <code>"allocation_count"</code> matches the <code>"target_allocation_count"</code>.
|
|
860
|
-
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
861
998
|
|
|
862
999
|
|
|
863
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.
|
|
1000
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-azure-openai.html>`_
|
|
864
1001
|
|
|
865
1002
|
:param task_type: The type of the inference task that the model will perform.
|
|
866
1003
|
NOTE: The `chat_completion` task type only supports streaming and only through
|
|
@@ -873,6 +1010,8 @@ class InferenceClient(NamespacedClient):
|
|
|
873
1010
|
:param chunking_settings: The chunking configuration object.
|
|
874
1011
|
:param task_settings: Settings to configure the inference task. These settings
|
|
875
1012
|
are specific to the task type you specified.
|
|
1013
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
1014
|
+
to be created.
|
|
876
1015
|
"""
|
|
877
1016
|
if task_type in SKIP_IN_PATH:
|
|
878
1017
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -899,6 +1038,8 @@ class InferenceClient(NamespacedClient):
|
|
|
899
1038
|
__query["human"] = human
|
|
900
1039
|
if pretty is not None:
|
|
901
1040
|
__query["pretty"] = pretty
|
|
1041
|
+
if timeout is not None:
|
|
1042
|
+
__query["timeout"] = timeout
|
|
902
1043
|
if not __body:
|
|
903
1044
|
if service is not None:
|
|
904
1045
|
__body["service"] = service
|
|
@@ -944,6 +1085,7 @@ class InferenceClient(NamespacedClient):
|
|
|
944
1085
|
human: t.Optional[bool] = None,
|
|
945
1086
|
pretty: t.Optional[bool] = None,
|
|
946
1087
|
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
1088
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
947
1089
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
948
1090
|
) -> ObjectApiResponse[t.Any]:
|
|
949
1091
|
"""
|
|
@@ -951,14 +1093,9 @@ class InferenceClient(NamespacedClient):
|
|
|
951
1093
|
|
|
952
1094
|
<p>Create a Cohere inference endpoint.</p>
|
|
953
1095
|
<p>Create an inference endpoint to perform an inference task with the <code>cohere</code> service.</p>
|
|
954
|
-
<p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
|
|
955
|
-
After creating the endpoint, wait for the model deployment to complete before using it.
|
|
956
|
-
To verify the deployment status, use the get trained model statistics API.
|
|
957
|
-
Look for <code>"state": "fully_allocated"</code> in the response and ensure that the <code>"allocation_count"</code> matches the <code>"target_allocation_count"</code>.
|
|
958
|
-
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
959
1096
|
|
|
960
1097
|
|
|
961
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.
|
|
1098
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-cohere.html>`_
|
|
962
1099
|
|
|
963
1100
|
:param task_type: The type of the inference task that the model will perform.
|
|
964
1101
|
:param cohere_inference_id: The unique identifier of the inference endpoint.
|
|
@@ -969,6 +1106,8 @@ class InferenceClient(NamespacedClient):
|
|
|
969
1106
|
:param chunking_settings: The chunking configuration object.
|
|
970
1107
|
:param task_settings: Settings to configure the inference task. These settings
|
|
971
1108
|
are specific to the task type you specified.
|
|
1109
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
1110
|
+
to be created.
|
|
972
1111
|
"""
|
|
973
1112
|
if task_type in SKIP_IN_PATH:
|
|
974
1113
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -993,6 +1132,8 @@ class InferenceClient(NamespacedClient):
|
|
|
993
1132
|
__query["human"] = human
|
|
994
1133
|
if pretty is not None:
|
|
995
1134
|
__query["pretty"] = pretty
|
|
1135
|
+
if timeout is not None:
|
|
1136
|
+
__query["timeout"] = timeout
|
|
996
1137
|
if not __body:
|
|
997
1138
|
if service is not None:
|
|
998
1139
|
__body["service"] = service
|
|
@@ -1017,6 +1158,221 @@ class InferenceClient(NamespacedClient):
|
|
|
1017
1158
|
path_parts=__path_parts,
|
|
1018
1159
|
)
|
|
1019
1160
|
|
|
1161
|
+
@_rewrite_parameters(
|
|
1162
|
+
body_fields=(
|
|
1163
|
+
"service",
|
|
1164
|
+
"service_settings",
|
|
1165
|
+
"chunking_settings",
|
|
1166
|
+
"task_settings",
|
|
1167
|
+
),
|
|
1168
|
+
)
|
|
1169
|
+
async def put_custom(
|
|
1170
|
+
self,
|
|
1171
|
+
*,
|
|
1172
|
+
task_type: t.Union[
|
|
1173
|
+
str, t.Literal["completion", "rerank", "sparse_embedding", "text_embedding"]
|
|
1174
|
+
],
|
|
1175
|
+
custom_inference_id: str,
|
|
1176
|
+
service: t.Optional[t.Union[str, t.Literal["custom"]]] = None,
|
|
1177
|
+
service_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
1178
|
+
chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
1179
|
+
error_trace: t.Optional[bool] = None,
|
|
1180
|
+
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
|
|
1181
|
+
human: t.Optional[bool] = None,
|
|
1182
|
+
pretty: t.Optional[bool] = None,
|
|
1183
|
+
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
1184
|
+
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
1185
|
+
) -> ObjectApiResponse[t.Any]:
|
|
1186
|
+
"""
|
|
1187
|
+
.. raw:: html
|
|
1188
|
+
|
|
1189
|
+
<p>Create a custom inference endpoint.</p>
|
|
1190
|
+
<p>The custom service gives more control over how to interact with external inference services that aren't explicitly supported through dedicated integrations.
|
|
1191
|
+
The custom service gives you the ability to define the headers, url, query parameters, request body, and secrets.
|
|
1192
|
+
The custom service supports the template replacement functionality, which enables you to define a template that can be replaced with the value associated with that key.
|
|
1193
|
+
Templates are portions of a string that start with <code>${</code> and end with <code>}</code>.
|
|
1194
|
+
The parameters <code>secret_parameters</code> and <code>task_settings</code> are checked for keys for template replacement. Template replacement is supported in the <code>request</code>, <code>headers</code>, <code>url</code>, and <code>query_parameters</code>.
|
|
1195
|
+
If the definition (key) is not found for a template, an error message is returned.
|
|
1196
|
+
In case of an endpoint definition like the following:</p>
|
|
1197
|
+
<pre><code>PUT _inference/text_embedding/test-text-embedding
|
|
1198
|
+
{
|
|
1199
|
+
"service": "custom",
|
|
1200
|
+
"service_settings": {
|
|
1201
|
+
"secret_parameters": {
|
|
1202
|
+
"api_key": "<some api key>"
|
|
1203
|
+
},
|
|
1204
|
+
"url": "...endpoints.huggingface.cloud/v1/embeddings",
|
|
1205
|
+
"headers": {
|
|
1206
|
+
"Authorization": "Bearer ${api_key}",
|
|
1207
|
+
"Content-Type": "application/json"
|
|
1208
|
+
},
|
|
1209
|
+
"request": "{\\"input\\": ${input}}",
|
|
1210
|
+
"response": {
|
|
1211
|
+
"json_parser": {
|
|
1212
|
+
"text_embeddings":"$.data[*].embedding[*]"
|
|
1213
|
+
}
|
|
1214
|
+
}
|
|
1215
|
+
}
|
|
1216
|
+
}
|
|
1217
|
+
</code></pre>
|
|
1218
|
+
<p>To replace <code>${api_key}</code> the <code>secret_parameters</code> and <code>task_settings</code> are checked for a key named <code>api_key</code>.</p>
|
|
1219
|
+
<blockquote>
|
|
1220
|
+
<p>info
|
|
1221
|
+
Templates should not be surrounded by quotes.</p>
|
|
1222
|
+
</blockquote>
|
|
1223
|
+
<p>Pre-defined templates:</p>
|
|
1224
|
+
<ul>
|
|
1225
|
+
<li><code>${input}</code> refers to the array of input strings that comes from the <code>input</code> field of the subsequent inference requests.</li>
|
|
1226
|
+
<li><code>${input_type}</code> refers to the input type translation values.</li>
|
|
1227
|
+
<li><code>${query}</code> refers to the query field used specifically for reranking tasks.</li>
|
|
1228
|
+
<li><code>${top_n}</code> refers to the <code>top_n</code> field available when performing rerank requests.</li>
|
|
1229
|
+
<li><code>${return_documents}</code> refers to the <code>return_documents</code> field available when performing rerank requests.</li>
|
|
1230
|
+
</ul>
|
|
1231
|
+
|
|
1232
|
+
|
|
1233
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-custom>`_
|
|
1234
|
+
|
|
1235
|
+
:param task_type: The type of the inference task that the model will perform.
|
|
1236
|
+
:param custom_inference_id: The unique identifier of the inference endpoint.
|
|
1237
|
+
:param service: The type of service supported for the specified task type. In
|
|
1238
|
+
this case, `custom`.
|
|
1239
|
+
:param service_settings: Settings used to install the inference model. These
|
|
1240
|
+
settings are specific to the `custom` service.
|
|
1241
|
+
:param chunking_settings: The chunking configuration object.
|
|
1242
|
+
:param task_settings: Settings to configure the inference task. These settings
|
|
1243
|
+
are specific to the task type you specified.
|
|
1244
|
+
"""
|
|
1245
|
+
if task_type in SKIP_IN_PATH:
|
|
1246
|
+
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
1247
|
+
if custom_inference_id in SKIP_IN_PATH:
|
|
1248
|
+
raise ValueError("Empty value passed for parameter 'custom_inference_id'")
|
|
1249
|
+
if service is None and body is None:
|
|
1250
|
+
raise ValueError("Empty value passed for parameter 'service'")
|
|
1251
|
+
if service_settings is None and body is None:
|
|
1252
|
+
raise ValueError("Empty value passed for parameter 'service_settings'")
|
|
1253
|
+
__path_parts: t.Dict[str, str] = {
|
|
1254
|
+
"task_type": _quote(task_type),
|
|
1255
|
+
"custom_inference_id": _quote(custom_inference_id),
|
|
1256
|
+
}
|
|
1257
|
+
__path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["custom_inference_id"]}'
|
|
1258
|
+
__query: t.Dict[str, t.Any] = {}
|
|
1259
|
+
__body: t.Dict[str, t.Any] = body if body is not None else {}
|
|
1260
|
+
if error_trace is not None:
|
|
1261
|
+
__query["error_trace"] = error_trace
|
|
1262
|
+
if filter_path is not None:
|
|
1263
|
+
__query["filter_path"] = filter_path
|
|
1264
|
+
if human is not None:
|
|
1265
|
+
__query["human"] = human
|
|
1266
|
+
if pretty is not None:
|
|
1267
|
+
__query["pretty"] = pretty
|
|
1268
|
+
if not __body:
|
|
1269
|
+
if service is not None:
|
|
1270
|
+
__body["service"] = service
|
|
1271
|
+
if service_settings is not None:
|
|
1272
|
+
__body["service_settings"] = service_settings
|
|
1273
|
+
if chunking_settings is not None:
|
|
1274
|
+
__body["chunking_settings"] = chunking_settings
|
|
1275
|
+
if task_settings is not None:
|
|
1276
|
+
__body["task_settings"] = task_settings
|
|
1277
|
+
if not __body:
|
|
1278
|
+
__body = None # type: ignore[assignment]
|
|
1279
|
+
__headers = {"accept": "application/json"}
|
|
1280
|
+
if __body is not None:
|
|
1281
|
+
__headers["content-type"] = "application/json"
|
|
1282
|
+
return await self.perform_request( # type: ignore[return-value]
|
|
1283
|
+
"PUT",
|
|
1284
|
+
__path,
|
|
1285
|
+
params=__query,
|
|
1286
|
+
headers=__headers,
|
|
1287
|
+
body=__body,
|
|
1288
|
+
endpoint_id="inference.put_custom",
|
|
1289
|
+
path_parts=__path_parts,
|
|
1290
|
+
)
|
|
1291
|
+
|
|
1292
|
+
@_rewrite_parameters(
|
|
1293
|
+
body_fields=("service", "service_settings", "chunking_settings"),
|
|
1294
|
+
)
|
|
1295
|
+
async def put_deepseek(
|
|
1296
|
+
self,
|
|
1297
|
+
*,
|
|
1298
|
+
task_type: t.Union[str, t.Literal["chat_completion", "completion"]],
|
|
1299
|
+
deepseek_inference_id: str,
|
|
1300
|
+
service: t.Optional[t.Union[str, t.Literal["deepseek"]]] = None,
|
|
1301
|
+
service_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
1302
|
+
chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
1303
|
+
error_trace: t.Optional[bool] = None,
|
|
1304
|
+
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
|
|
1305
|
+
human: t.Optional[bool] = None,
|
|
1306
|
+
pretty: t.Optional[bool] = None,
|
|
1307
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
1308
|
+
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
1309
|
+
) -> ObjectApiResponse[t.Any]:
|
|
1310
|
+
"""
|
|
1311
|
+
.. raw:: html
|
|
1312
|
+
|
|
1313
|
+
<p>Create a DeepSeek inference endpoint.</p>
|
|
1314
|
+
<p>Create an inference endpoint to perform an inference task with the <code>deepseek</code> service.</p>
|
|
1315
|
+
|
|
1316
|
+
|
|
1317
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-deepseek.html>`_
|
|
1318
|
+
|
|
1319
|
+
:param task_type: The type of the inference task that the model will perform.
|
|
1320
|
+
:param deepseek_inference_id: The unique identifier of the inference endpoint.
|
|
1321
|
+
:param service: The type of service supported for the specified task type. In
|
|
1322
|
+
this case, `deepseek`.
|
|
1323
|
+
:param service_settings: Settings used to install the inference model. These
|
|
1324
|
+
settings are specific to the `deepseek` service.
|
|
1325
|
+
:param chunking_settings: The chunking configuration object.
|
|
1326
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
1327
|
+
to be created.
|
|
1328
|
+
"""
|
|
1329
|
+
if task_type in SKIP_IN_PATH:
|
|
1330
|
+
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
1331
|
+
if deepseek_inference_id in SKIP_IN_PATH:
|
|
1332
|
+
raise ValueError("Empty value passed for parameter 'deepseek_inference_id'")
|
|
1333
|
+
if service is None and body is None:
|
|
1334
|
+
raise ValueError("Empty value passed for parameter 'service'")
|
|
1335
|
+
if service_settings is None and body is None:
|
|
1336
|
+
raise ValueError("Empty value passed for parameter 'service_settings'")
|
|
1337
|
+
__path_parts: t.Dict[str, str] = {
|
|
1338
|
+
"task_type": _quote(task_type),
|
|
1339
|
+
"deepseek_inference_id": _quote(deepseek_inference_id),
|
|
1340
|
+
}
|
|
1341
|
+
__path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["deepseek_inference_id"]}'
|
|
1342
|
+
__query: t.Dict[str, t.Any] = {}
|
|
1343
|
+
__body: t.Dict[str, t.Any] = body if body is not None else {}
|
|
1344
|
+
if error_trace is not None:
|
|
1345
|
+
__query["error_trace"] = error_trace
|
|
1346
|
+
if filter_path is not None:
|
|
1347
|
+
__query["filter_path"] = filter_path
|
|
1348
|
+
if human is not None:
|
|
1349
|
+
__query["human"] = human
|
|
1350
|
+
if pretty is not None:
|
|
1351
|
+
__query["pretty"] = pretty
|
|
1352
|
+
if timeout is not None:
|
|
1353
|
+
__query["timeout"] = timeout
|
|
1354
|
+
if not __body:
|
|
1355
|
+
if service is not None:
|
|
1356
|
+
__body["service"] = service
|
|
1357
|
+
if service_settings is not None:
|
|
1358
|
+
__body["service_settings"] = service_settings
|
|
1359
|
+
if chunking_settings is not None:
|
|
1360
|
+
__body["chunking_settings"] = chunking_settings
|
|
1361
|
+
if not __body:
|
|
1362
|
+
__body = None # type: ignore[assignment]
|
|
1363
|
+
__headers = {"accept": "application/json"}
|
|
1364
|
+
if __body is not None:
|
|
1365
|
+
__headers["content-type"] = "application/json"
|
|
1366
|
+
return await self.perform_request( # type: ignore[return-value]
|
|
1367
|
+
"PUT",
|
|
1368
|
+
__path,
|
|
1369
|
+
params=__query,
|
|
1370
|
+
headers=__headers,
|
|
1371
|
+
body=__body,
|
|
1372
|
+
endpoint_id="inference.put_deepseek",
|
|
1373
|
+
path_parts=__path_parts,
|
|
1374
|
+
)
|
|
1375
|
+
|
|
1020
1376
|
@_rewrite_parameters(
|
|
1021
1377
|
body_fields=(
|
|
1022
1378
|
"service",
|
|
@@ -1040,6 +1396,7 @@ class InferenceClient(NamespacedClient):
|
|
|
1040
1396
|
human: t.Optional[bool] = None,
|
|
1041
1397
|
pretty: t.Optional[bool] = None,
|
|
1042
1398
|
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
1399
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
1043
1400
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
1044
1401
|
) -> ObjectApiResponse[t.Any]:
|
|
1045
1402
|
"""
|
|
@@ -1062,7 +1419,7 @@ class InferenceClient(NamespacedClient):
|
|
|
1062
1419
|
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
1063
1420
|
|
|
1064
1421
|
|
|
1065
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.
|
|
1422
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-elasticsearch.html>`_
|
|
1066
1423
|
|
|
1067
1424
|
:param task_type: The type of the inference task that the model will perform.
|
|
1068
1425
|
:param elasticsearch_inference_id: The unique identifier of the inference endpoint.
|
|
@@ -1074,6 +1431,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1074
1431
|
:param chunking_settings: The chunking configuration object.
|
|
1075
1432
|
:param task_settings: Settings to configure the inference task. These settings
|
|
1076
1433
|
are specific to the task type you specified.
|
|
1434
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
1435
|
+
to be created.
|
|
1077
1436
|
"""
|
|
1078
1437
|
if task_type in SKIP_IN_PATH:
|
|
1079
1438
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -1100,6 +1459,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1100
1459
|
__query["human"] = human
|
|
1101
1460
|
if pretty is not None:
|
|
1102
1461
|
__query["pretty"] = pretty
|
|
1462
|
+
if timeout is not None:
|
|
1463
|
+
__query["timeout"] = timeout
|
|
1103
1464
|
if not __body:
|
|
1104
1465
|
if service is not None:
|
|
1105
1466
|
__body["service"] = service
|
|
@@ -1139,6 +1500,7 @@ class InferenceClient(NamespacedClient):
|
|
|
1139
1500
|
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
|
|
1140
1501
|
human: t.Optional[bool] = None,
|
|
1141
1502
|
pretty: t.Optional[bool] = None,
|
|
1503
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
1142
1504
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
1143
1505
|
) -> ObjectApiResponse[t.Any]:
|
|
1144
1506
|
"""
|
|
@@ -1162,7 +1524,7 @@ class InferenceClient(NamespacedClient):
|
|
|
1162
1524
|
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
1163
1525
|
|
|
1164
1526
|
|
|
1165
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.
|
|
1527
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-elser.html>`_
|
|
1166
1528
|
|
|
1167
1529
|
:param task_type: The type of the inference task that the model will perform.
|
|
1168
1530
|
:param elser_inference_id: The unique identifier of the inference endpoint.
|
|
@@ -1171,6 +1533,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1171
1533
|
:param service_settings: Settings used to install the inference model. These
|
|
1172
1534
|
settings are specific to the `elser` service.
|
|
1173
1535
|
:param chunking_settings: The chunking configuration object.
|
|
1536
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
1537
|
+
to be created.
|
|
1174
1538
|
"""
|
|
1175
1539
|
if task_type in SKIP_IN_PATH:
|
|
1176
1540
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -1195,6 +1559,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1195
1559
|
__query["human"] = human
|
|
1196
1560
|
if pretty is not None:
|
|
1197
1561
|
__query["pretty"] = pretty
|
|
1562
|
+
if timeout is not None:
|
|
1563
|
+
__query["timeout"] = timeout
|
|
1198
1564
|
if not __body:
|
|
1199
1565
|
if service is not None:
|
|
1200
1566
|
__body["service"] = service
|
|
@@ -1232,6 +1598,7 @@ class InferenceClient(NamespacedClient):
|
|
|
1232
1598
|
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
|
|
1233
1599
|
human: t.Optional[bool] = None,
|
|
1234
1600
|
pretty: t.Optional[bool] = None,
|
|
1601
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
1235
1602
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
1236
1603
|
) -> ObjectApiResponse[t.Any]:
|
|
1237
1604
|
"""
|
|
@@ -1239,14 +1606,9 @@ class InferenceClient(NamespacedClient):
|
|
|
1239
1606
|
|
|
1240
1607
|
<p>Create an Google AI Studio inference endpoint.</p>
|
|
1241
1608
|
<p>Create an inference endpoint to perform an inference task with the <code>googleaistudio</code> service.</p>
|
|
1242
|
-
<p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
|
|
1243
|
-
After creating the endpoint, wait for the model deployment to complete before using it.
|
|
1244
|
-
To verify the deployment status, use the get trained model statistics API.
|
|
1245
|
-
Look for <code>"state": "fully_allocated"</code> in the response and ensure that the <code>"allocation_count"</code> matches the <code>"target_allocation_count"</code>.
|
|
1246
|
-
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
1247
1609
|
|
|
1248
1610
|
|
|
1249
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.
|
|
1611
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-google-ai-studio.html>`_
|
|
1250
1612
|
|
|
1251
1613
|
:param task_type: The type of the inference task that the model will perform.
|
|
1252
1614
|
:param googleaistudio_inference_id: The unique identifier of the inference endpoint.
|
|
@@ -1255,6 +1617,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1255
1617
|
:param service_settings: Settings used to install the inference model. These
|
|
1256
1618
|
settings are specific to the `googleaistudio` service.
|
|
1257
1619
|
:param chunking_settings: The chunking configuration object.
|
|
1620
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
1621
|
+
to be created.
|
|
1258
1622
|
"""
|
|
1259
1623
|
if task_type in SKIP_IN_PATH:
|
|
1260
1624
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -1281,6 +1645,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1281
1645
|
__query["human"] = human
|
|
1282
1646
|
if pretty is not None:
|
|
1283
1647
|
__query["pretty"] = pretty
|
|
1648
|
+
if timeout is not None:
|
|
1649
|
+
__query["timeout"] = timeout
|
|
1284
1650
|
if not __body:
|
|
1285
1651
|
if service is not None:
|
|
1286
1652
|
__body["service"] = service
|
|
@@ -1314,7 +1680,9 @@ class InferenceClient(NamespacedClient):
|
|
|
1314
1680
|
async def put_googlevertexai(
|
|
1315
1681
|
self,
|
|
1316
1682
|
*,
|
|
1317
|
-
task_type: t.Union[
|
|
1683
|
+
task_type: t.Union[
|
|
1684
|
+
str, t.Literal["chat_completion", "completion", "rerank", "text_embedding"]
|
|
1685
|
+
],
|
|
1318
1686
|
googlevertexai_inference_id: str,
|
|
1319
1687
|
service: t.Optional[t.Union[str, t.Literal["googlevertexai"]]] = None,
|
|
1320
1688
|
service_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
@@ -1324,6 +1692,7 @@ class InferenceClient(NamespacedClient):
|
|
|
1324
1692
|
human: t.Optional[bool] = None,
|
|
1325
1693
|
pretty: t.Optional[bool] = None,
|
|
1326
1694
|
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
1695
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
1327
1696
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
1328
1697
|
) -> ObjectApiResponse[t.Any]:
|
|
1329
1698
|
"""
|
|
@@ -1331,14 +1700,9 @@ class InferenceClient(NamespacedClient):
|
|
|
1331
1700
|
|
|
1332
1701
|
<p>Create a Google Vertex AI inference endpoint.</p>
|
|
1333
1702
|
<p>Create an inference endpoint to perform an inference task with the <code>googlevertexai</code> service.</p>
|
|
1334
|
-
<p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
|
|
1335
|
-
After creating the endpoint, wait for the model deployment to complete before using it.
|
|
1336
|
-
To verify the deployment status, use the get trained model statistics API.
|
|
1337
|
-
Look for <code>"state": "fully_allocated"</code> in the response and ensure that the <code>"allocation_count"</code> matches the <code>"target_allocation_count"</code>.
|
|
1338
|
-
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
1339
1703
|
|
|
1340
1704
|
|
|
1341
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.
|
|
1705
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-google-vertex-ai.html>`_
|
|
1342
1706
|
|
|
1343
1707
|
:param task_type: The type of the inference task that the model will perform.
|
|
1344
1708
|
:param googlevertexai_inference_id: The unique identifier of the inference endpoint.
|
|
@@ -1349,6 +1713,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1349
1713
|
:param chunking_settings: The chunking configuration object.
|
|
1350
1714
|
:param task_settings: Settings to configure the inference task. These settings
|
|
1351
1715
|
are specific to the task type you specified.
|
|
1716
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
1717
|
+
to be created.
|
|
1352
1718
|
"""
|
|
1353
1719
|
if task_type in SKIP_IN_PATH:
|
|
1354
1720
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -1375,6 +1741,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1375
1741
|
__query["human"] = human
|
|
1376
1742
|
if pretty is not None:
|
|
1377
1743
|
__query["pretty"] = pretty
|
|
1744
|
+
if timeout is not None:
|
|
1745
|
+
__query["timeout"] = timeout
|
|
1378
1746
|
if not __body:
|
|
1379
1747
|
if service is not None:
|
|
1380
1748
|
__body["service"] = service
|
|
@@ -1400,12 +1768,19 @@ class InferenceClient(NamespacedClient):
|
|
|
1400
1768
|
)
|
|
1401
1769
|
|
|
1402
1770
|
@_rewrite_parameters(
|
|
1403
|
-
body_fields=(
|
|
1771
|
+
body_fields=(
|
|
1772
|
+
"service",
|
|
1773
|
+
"service_settings",
|
|
1774
|
+
"chunking_settings",
|
|
1775
|
+
"task_settings",
|
|
1776
|
+
),
|
|
1404
1777
|
)
|
|
1405
1778
|
async def put_hugging_face(
|
|
1406
1779
|
self,
|
|
1407
1780
|
*,
|
|
1408
|
-
task_type: t.Union[
|
|
1781
|
+
task_type: t.Union[
|
|
1782
|
+
str, t.Literal["chat_completion", "completion", "rerank", "text_embedding"]
|
|
1783
|
+
],
|
|
1409
1784
|
huggingface_inference_id: str,
|
|
1410
1785
|
service: t.Optional[t.Union[str, t.Literal["hugging_face"]]] = None,
|
|
1411
1786
|
service_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
@@ -1414,17 +1789,22 @@ class InferenceClient(NamespacedClient):
|
|
|
1414
1789
|
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
|
|
1415
1790
|
human: t.Optional[bool] = None,
|
|
1416
1791
|
pretty: t.Optional[bool] = None,
|
|
1792
|
+
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
1793
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
1417
1794
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
1418
1795
|
) -> ObjectApiResponse[t.Any]:
|
|
1419
1796
|
"""
|
|
1420
1797
|
.. raw:: html
|
|
1421
1798
|
|
|
1422
1799
|
<p>Create a Hugging Face inference endpoint.</p>
|
|
1423
|
-
<p>Create an inference endpoint to perform an inference task with the <code>hugging_face</code> service
|
|
1424
|
-
|
|
1425
|
-
|
|
1426
|
-
|
|
1427
|
-
<p>
|
|
1800
|
+
<p>Create an inference endpoint to perform an inference task with the <code>hugging_face</code> service.
|
|
1801
|
+
Supported tasks include: <code>text_embedding</code>, <code>completion</code>, and <code>chat_completion</code>.</p>
|
|
1802
|
+
<p>To configure the endpoint, first visit the Hugging Face Inference Endpoints page and create a new endpoint.
|
|
1803
|
+
Select a model that supports the task you intend to use.</p>
|
|
1804
|
+
<p>For Elastic's <code>text_embedding</code> task:
|
|
1805
|
+
The selected model must support the <code>Sentence Embeddings</code> task. On the new endpoint creation page, select the <code>Sentence Embeddings</code> task under the <code>Advanced Configuration</code> section.
|
|
1806
|
+
After the endpoint has initialized, copy the generated endpoint URL.
|
|
1807
|
+
Recommended models for <code>text_embedding</code> task:</p>
|
|
1428
1808
|
<ul>
|
|
1429
1809
|
<li><code>all-MiniLM-L6-v2</code></li>
|
|
1430
1810
|
<li><code>all-MiniLM-L12-v2</code></li>
|
|
@@ -1434,14 +1814,27 @@ class InferenceClient(NamespacedClient):
|
|
|
1434
1814
|
<li><code>multilingual-e5-base</code></li>
|
|
1435
1815
|
<li><code>multilingual-e5-small</code></li>
|
|
1436
1816
|
</ul>
|
|
1437
|
-
<p>
|
|
1438
|
-
|
|
1439
|
-
|
|
1440
|
-
|
|
1441
|
-
|
|
1817
|
+
<p>For Elastic's <code>chat_completion</code> and <code>completion</code> tasks:
|
|
1818
|
+
The selected model must support the <code>Text Generation</code> task and expose OpenAI API. HuggingFace supports both serverless and dedicated endpoints for <code>Text Generation</code>. When creating dedicated endpoint select the <code>Text Generation</code> task.
|
|
1819
|
+
After the endpoint is initialized (for dedicated) or ready (for serverless), ensure it supports the OpenAI API and includes <code>/v1/chat/completions</code> part in URL. Then, copy the full endpoint URL for use.
|
|
1820
|
+
Recommended models for <code>chat_completion</code> and <code>completion</code> tasks:</p>
|
|
1821
|
+
<ul>
|
|
1822
|
+
<li><code>Mistral-7B-Instruct-v0.2</code></li>
|
|
1823
|
+
<li><code>QwQ-32B</code></li>
|
|
1824
|
+
<li><code>Phi-3-mini-128k-instruct</code></li>
|
|
1825
|
+
</ul>
|
|
1826
|
+
<p>For Elastic's <code>rerank</code> task:
|
|
1827
|
+
The selected model must support the <code>sentence-ranking</code> task and expose OpenAI API.
|
|
1828
|
+
HuggingFace supports only dedicated (not serverless) endpoints for <code>Rerank</code> so far.
|
|
1829
|
+
After the endpoint is initialized, copy the full endpoint URL for use.
|
|
1830
|
+
Tested models for <code>rerank</code> task:</p>
|
|
1831
|
+
<ul>
|
|
1832
|
+
<li><code>bge-reranker-base</code></li>
|
|
1833
|
+
<li><code>jina-reranker-v1-turbo-en-GGUF</code></li>
|
|
1834
|
+
</ul>
|
|
1442
1835
|
|
|
1443
1836
|
|
|
1444
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.
|
|
1837
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-hugging-face.html>`_
|
|
1445
1838
|
|
|
1446
1839
|
:param task_type: The type of the inference task that the model will perform.
|
|
1447
1840
|
:param huggingface_inference_id: The unique identifier of the inference endpoint.
|
|
@@ -1450,6 +1843,10 @@ class InferenceClient(NamespacedClient):
|
|
|
1450
1843
|
:param service_settings: Settings used to install the inference model. These
|
|
1451
1844
|
settings are specific to the `hugging_face` service.
|
|
1452
1845
|
:param chunking_settings: The chunking configuration object.
|
|
1846
|
+
:param task_settings: Settings to configure the inference task. These settings
|
|
1847
|
+
are specific to the task type you specified.
|
|
1848
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
1849
|
+
to be created.
|
|
1453
1850
|
"""
|
|
1454
1851
|
if task_type in SKIP_IN_PATH:
|
|
1455
1852
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -1476,6 +1873,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1476
1873
|
__query["human"] = human
|
|
1477
1874
|
if pretty is not None:
|
|
1478
1875
|
__query["pretty"] = pretty
|
|
1876
|
+
if timeout is not None:
|
|
1877
|
+
__query["timeout"] = timeout
|
|
1479
1878
|
if not __body:
|
|
1480
1879
|
if service is not None:
|
|
1481
1880
|
__body["service"] = service
|
|
@@ -1483,6 +1882,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1483
1882
|
__body["service_settings"] = service_settings
|
|
1484
1883
|
if chunking_settings is not None:
|
|
1485
1884
|
__body["chunking_settings"] = chunking_settings
|
|
1885
|
+
if task_settings is not None:
|
|
1886
|
+
__body["task_settings"] = task_settings
|
|
1486
1887
|
if not __body:
|
|
1487
1888
|
__body = None # type: ignore[assignment]
|
|
1488
1889
|
__headers = {"accept": "application/json"}
|
|
@@ -1519,6 +1920,7 @@ class InferenceClient(NamespacedClient):
|
|
|
1519
1920
|
human: t.Optional[bool] = None,
|
|
1520
1921
|
pretty: t.Optional[bool] = None,
|
|
1521
1922
|
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
1923
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
1522
1924
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
1523
1925
|
) -> ObjectApiResponse[t.Any]:
|
|
1524
1926
|
"""
|
|
@@ -1528,14 +1930,9 @@ class InferenceClient(NamespacedClient):
|
|
|
1528
1930
|
<p>Create an inference endpoint to perform an inference task with the <code>jinaai</code> service.</p>
|
|
1529
1931
|
<p>To review the available <code>rerank</code> models, refer to <a href="https://jina.ai/reranker">https://jina.ai/reranker</a>.
|
|
1530
1932
|
To review the available <code>text_embedding</code> models, refer to the <a href="https://jina.ai/embeddings/">https://jina.ai/embeddings/</a>.</p>
|
|
1531
|
-
<p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
|
|
1532
|
-
After creating the endpoint, wait for the model deployment to complete before using it.
|
|
1533
|
-
To verify the deployment status, use the get trained model statistics API.
|
|
1534
|
-
Look for <code>"state": "fully_allocated"</code> in the response and ensure that the <code>"allocation_count"</code> matches the <code>"target_allocation_count"</code>.
|
|
1535
|
-
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
1536
1933
|
|
|
1537
1934
|
|
|
1538
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.
|
|
1935
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-jinaai.html>`_
|
|
1539
1936
|
|
|
1540
1937
|
:param task_type: The type of the inference task that the model will perform.
|
|
1541
1938
|
:param jinaai_inference_id: The unique identifier of the inference endpoint.
|
|
@@ -1546,6 +1943,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1546
1943
|
:param chunking_settings: The chunking configuration object.
|
|
1547
1944
|
:param task_settings: Settings to configure the inference task. These settings
|
|
1548
1945
|
are specific to the task type you specified.
|
|
1946
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
1947
|
+
to be created.
|
|
1549
1948
|
"""
|
|
1550
1949
|
if task_type in SKIP_IN_PATH:
|
|
1551
1950
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -1570,6 +1969,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1570
1969
|
__query["human"] = human
|
|
1571
1970
|
if pretty is not None:
|
|
1572
1971
|
__query["pretty"] = pretty
|
|
1972
|
+
if timeout is not None:
|
|
1973
|
+
__query["timeout"] = timeout
|
|
1573
1974
|
if not __body:
|
|
1574
1975
|
if service is not None:
|
|
1575
1976
|
__body["service"] = service
|
|
@@ -1600,7 +2001,9 @@ class InferenceClient(NamespacedClient):
|
|
|
1600
2001
|
async def put_mistral(
|
|
1601
2002
|
self,
|
|
1602
2003
|
*,
|
|
1603
|
-
task_type: t.Union[
|
|
2004
|
+
task_type: t.Union[
|
|
2005
|
+
str, t.Literal["chat_completion", "completion", "text_embedding"]
|
|
2006
|
+
],
|
|
1604
2007
|
mistral_inference_id: str,
|
|
1605
2008
|
service: t.Optional[t.Union[str, t.Literal["mistral"]]] = None,
|
|
1606
2009
|
service_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
@@ -1609,30 +2012,27 @@ class InferenceClient(NamespacedClient):
|
|
|
1609
2012
|
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
|
|
1610
2013
|
human: t.Optional[bool] = None,
|
|
1611
2014
|
pretty: t.Optional[bool] = None,
|
|
2015
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
1612
2016
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
1613
2017
|
) -> ObjectApiResponse[t.Any]:
|
|
1614
2018
|
"""
|
|
1615
2019
|
.. raw:: html
|
|
1616
2020
|
|
|
1617
2021
|
<p>Create a Mistral inference endpoint.</p>
|
|
1618
|
-
<p>
|
|
1619
|
-
<p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
|
|
1620
|
-
After creating the endpoint, wait for the model deployment to complete before using it.
|
|
1621
|
-
To verify the deployment status, use the get trained model statistics API.
|
|
1622
|
-
Look for <code>"state": "fully_allocated"</code> in the response and ensure that the <code>"allocation_count"</code> matches the <code>"target_allocation_count"</code>.
|
|
1623
|
-
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
2022
|
+
<p>Create an inference endpoint to perform an inference task with the <code>mistral</code> service.</p>
|
|
1624
2023
|
|
|
1625
2024
|
|
|
1626
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/
|
|
2025
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-mistral.html>`_
|
|
1627
2026
|
|
|
1628
|
-
:param task_type: The
|
|
1629
|
-
is `text_embedding`.
|
|
2027
|
+
:param task_type: The type of the inference task that the model will perform.
|
|
1630
2028
|
:param mistral_inference_id: The unique identifier of the inference endpoint.
|
|
1631
2029
|
:param service: The type of service supported for the specified task type. In
|
|
1632
2030
|
this case, `mistral`.
|
|
1633
2031
|
:param service_settings: Settings used to install the inference model. These
|
|
1634
2032
|
settings are specific to the `mistral` service.
|
|
1635
2033
|
:param chunking_settings: The chunking configuration object.
|
|
2034
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
2035
|
+
to be created.
|
|
1636
2036
|
"""
|
|
1637
2037
|
if task_type in SKIP_IN_PATH:
|
|
1638
2038
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -1657,6 +2057,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1657
2057
|
__query["human"] = human
|
|
1658
2058
|
if pretty is not None:
|
|
1659
2059
|
__query["pretty"] = pretty
|
|
2060
|
+
if timeout is not None:
|
|
2061
|
+
__query["timeout"] = timeout
|
|
1660
2062
|
if not __body:
|
|
1661
2063
|
if service is not None:
|
|
1662
2064
|
__body["service"] = service
|
|
@@ -1702,6 +2104,7 @@ class InferenceClient(NamespacedClient):
|
|
|
1702
2104
|
human: t.Optional[bool] = None,
|
|
1703
2105
|
pretty: t.Optional[bool] = None,
|
|
1704
2106
|
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
2107
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
1705
2108
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
1706
2109
|
) -> ObjectApiResponse[t.Any]:
|
|
1707
2110
|
"""
|
|
@@ -1709,14 +2112,9 @@ class InferenceClient(NamespacedClient):
|
|
|
1709
2112
|
|
|
1710
2113
|
<p>Create an OpenAI inference endpoint.</p>
|
|
1711
2114
|
<p>Create an inference endpoint to perform an inference task with the <code>openai</code> service or <code>openai</code> compatible APIs.</p>
|
|
1712
|
-
<p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
|
|
1713
|
-
After creating the endpoint, wait for the model deployment to complete before using it.
|
|
1714
|
-
To verify the deployment status, use the get trained model statistics API.
|
|
1715
|
-
Look for <code>"state": "fully_allocated"</code> in the response and ensure that the <code>"allocation_count"</code> matches the <code>"target_allocation_count"</code>.
|
|
1716
|
-
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
1717
2115
|
|
|
1718
2116
|
|
|
1719
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.
|
|
2117
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-openai.html>`_
|
|
1720
2118
|
|
|
1721
2119
|
:param task_type: The type of the inference task that the model will perform.
|
|
1722
2120
|
NOTE: The `chat_completion` task type only supports streaming and only through
|
|
@@ -1729,6 +2127,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1729
2127
|
:param chunking_settings: The chunking configuration object.
|
|
1730
2128
|
:param task_settings: Settings to configure the inference task. These settings
|
|
1731
2129
|
are specific to the task type you specified.
|
|
2130
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
2131
|
+
to be created.
|
|
1732
2132
|
"""
|
|
1733
2133
|
if task_type in SKIP_IN_PATH:
|
|
1734
2134
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -1753,6 +2153,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1753
2153
|
__query["human"] = human
|
|
1754
2154
|
if pretty is not None:
|
|
1755
2155
|
__query["pretty"] = pretty
|
|
2156
|
+
if timeout is not None:
|
|
2157
|
+
__query["timeout"] = timeout
|
|
1756
2158
|
if not __body:
|
|
1757
2159
|
if service is not None:
|
|
1758
2160
|
__body["service"] = service
|
|
@@ -1798,6 +2200,7 @@ class InferenceClient(NamespacedClient):
|
|
|
1798
2200
|
human: t.Optional[bool] = None,
|
|
1799
2201
|
pretty: t.Optional[bool] = None,
|
|
1800
2202
|
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
2203
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
1801
2204
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
1802
2205
|
) -> ObjectApiResponse[t.Any]:
|
|
1803
2206
|
"""
|
|
@@ -1808,7 +2211,7 @@ class InferenceClient(NamespacedClient):
|
|
|
1808
2211
|
<p>Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
1809
2212
|
|
|
1810
2213
|
|
|
1811
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.
|
|
2214
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-voyageai.html>`_
|
|
1812
2215
|
|
|
1813
2216
|
:param task_type: The type of the inference task that the model will perform.
|
|
1814
2217
|
:param voyageai_inference_id: The unique identifier of the inference endpoint.
|
|
@@ -1819,6 +2222,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1819
2222
|
:param chunking_settings: The chunking configuration object.
|
|
1820
2223
|
:param task_settings: Settings to configure the inference task. These settings
|
|
1821
2224
|
are specific to the task type you specified.
|
|
2225
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
2226
|
+
to be created.
|
|
1822
2227
|
"""
|
|
1823
2228
|
if task_type in SKIP_IN_PATH:
|
|
1824
2229
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -1843,6 +2248,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1843
2248
|
__query["human"] = human
|
|
1844
2249
|
if pretty is not None:
|
|
1845
2250
|
__query["pretty"] = pretty
|
|
2251
|
+
if timeout is not None:
|
|
2252
|
+
__query["timeout"] = timeout
|
|
1846
2253
|
if not __body:
|
|
1847
2254
|
if service is not None:
|
|
1848
2255
|
__body["service"] = service
|
|
@@ -1881,6 +2288,7 @@ class InferenceClient(NamespacedClient):
|
|
|
1881
2288
|
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
|
|
1882
2289
|
human: t.Optional[bool] = None,
|
|
1883
2290
|
pretty: t.Optional[bool] = None,
|
|
2291
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
1884
2292
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
1885
2293
|
) -> ObjectApiResponse[t.Any]:
|
|
1886
2294
|
"""
|
|
@@ -1890,14 +2298,9 @@ class InferenceClient(NamespacedClient):
|
|
|
1890
2298
|
<p>Create an inference endpoint to perform an inference task with the <code>watsonxai</code> service.
|
|
1891
2299
|
You need an IBM Cloud Databases for Elasticsearch deployment to use the <code>watsonxai</code> inference service.
|
|
1892
2300
|
You can provision one through the IBM catalog, the Cloud Databases CLI plug-in, the Cloud Databases API, or Terraform.</p>
|
|
1893
|
-
<p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
|
|
1894
|
-
After creating the endpoint, wait for the model deployment to complete before using it.
|
|
1895
|
-
To verify the deployment status, use the get trained model statistics API.
|
|
1896
|
-
Look for <code>"state": "fully_allocated"</code> in the response and ensure that the <code>"allocation_count"</code> matches the <code>"target_allocation_count"</code>.
|
|
1897
|
-
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
1898
2301
|
|
|
1899
2302
|
|
|
1900
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.
|
|
2303
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-watsonx-ai.html>`_
|
|
1901
2304
|
|
|
1902
2305
|
:param task_type: The task type. The only valid task type for the model to perform
|
|
1903
2306
|
is `text_embedding`.
|
|
@@ -1906,6 +2309,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1906
2309
|
this case, `watsonxai`.
|
|
1907
2310
|
:param service_settings: Settings used to install the inference model. These
|
|
1908
2311
|
settings are specific to the `watsonxai` service.
|
|
2312
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
2313
|
+
to be created.
|
|
1909
2314
|
"""
|
|
1910
2315
|
if task_type in SKIP_IN_PATH:
|
|
1911
2316
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -1930,6 +2335,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1930
2335
|
__query["human"] = human
|
|
1931
2336
|
if pretty is not None:
|
|
1932
2337
|
__query["pretty"] = pretty
|
|
2338
|
+
if timeout is not None:
|
|
2339
|
+
__query["timeout"] = timeout
|
|
1933
2340
|
if not __body:
|
|
1934
2341
|
if service is not None:
|
|
1935
2342
|
__body["service"] = service
|
|
@@ -1970,10 +2377,10 @@ class InferenceClient(NamespacedClient):
|
|
|
1970
2377
|
"""
|
|
1971
2378
|
.. raw:: html
|
|
1972
2379
|
|
|
1973
|
-
<p>Perform
|
|
2380
|
+
<p>Perform reranking inference on the service</p>
|
|
1974
2381
|
|
|
1975
2382
|
|
|
1976
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.
|
|
2383
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/post-inference-api.html>`_
|
|
1977
2384
|
|
|
1978
2385
|
:param inference_id: The unique identifier for the inference endpoint.
|
|
1979
2386
|
:param input: The text on which you want to perform the inference task. It can
|
|
@@ -2049,7 +2456,7 @@ class InferenceClient(NamespacedClient):
|
|
|
2049
2456
|
<p>Perform sparse embedding inference on the service</p>
|
|
2050
2457
|
|
|
2051
2458
|
|
|
2052
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.
|
|
2459
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/post-inference-api.html>`_
|
|
2053
2460
|
|
|
2054
2461
|
:param inference_id: The inference Id
|
|
2055
2462
|
:param input: Inference input. Either a string or an array of strings.
|
|
@@ -2117,7 +2524,7 @@ class InferenceClient(NamespacedClient):
|
|
|
2117
2524
|
<p>Perform text embedding inference on the service</p>
|
|
2118
2525
|
|
|
2119
2526
|
|
|
2120
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.
|
|
2527
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/post-inference-api.html>`_
|
|
2121
2528
|
|
|
2122
2529
|
:param inference_id: The inference Id
|
|
2123
2530
|
:param input: Inference input. Either a string or an array of strings.
|
|
@@ -2199,7 +2606,7 @@ class InferenceClient(NamespacedClient):
|
|
|
2199
2606
|
However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.</p>
|
|
2200
2607
|
|
|
2201
2608
|
|
|
2202
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.
|
|
2609
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/update-inference-api.html>`_
|
|
2203
2610
|
|
|
2204
2611
|
:param inference_id: The unique identifier of the inference endpoint.
|
|
2205
2612
|
:param inference_config:
|