elasticsearch 9.0.2__py3-none-any.whl → 9.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- elasticsearch/__init__.py +3 -3
- elasticsearch/_async/client/__init__.py +88 -244
- elasticsearch/_async/client/async_search.py +4 -4
- elasticsearch/_async/client/autoscaling.py +4 -4
- elasticsearch/_async/client/cat.py +419 -51
- elasticsearch/_async/client/ccr.py +13 -13
- elasticsearch/_async/client/cluster.py +32 -22
- elasticsearch/_async/client/connector.py +30 -30
- elasticsearch/_async/client/dangling_indices.py +3 -3
- elasticsearch/_async/client/enrich.py +5 -5
- elasticsearch/_async/client/eql.py +14 -6
- elasticsearch/_async/client/esql.py +125 -9
- elasticsearch/_async/client/features.py +2 -2
- elasticsearch/_async/client/fleet.py +3 -3
- elasticsearch/_async/client/graph.py +1 -1
- elasticsearch/_async/client/ilm.py +11 -11
- elasticsearch/_async/client/indices.py +592 -113
- elasticsearch/_async/client/inference.py +521 -44
- elasticsearch/_async/client/ingest.py +9 -16
- elasticsearch/_async/client/license.py +11 -11
- elasticsearch/_async/client/logstash.py +3 -3
- elasticsearch/_async/client/migration.py +3 -3
- elasticsearch/_async/client/ml.py +75 -87
- elasticsearch/_async/client/monitoring.py +1 -1
- elasticsearch/_async/client/nodes.py +7 -7
- elasticsearch/_async/client/query_rules.py +8 -8
- elasticsearch/_async/client/rollup.py +9 -30
- elasticsearch/_async/client/search_application.py +10 -10
- elasticsearch/_async/client/searchable_snapshots.py +4 -4
- elasticsearch/_async/client/security.py +79 -81
- elasticsearch/_async/client/shutdown.py +3 -3
- elasticsearch/_async/client/simulate.py +1 -1
- elasticsearch/_async/client/slm.py +9 -9
- elasticsearch/_async/client/snapshot.py +64 -21
- elasticsearch/_async/client/sql.py +6 -6
- elasticsearch/_async/client/ssl.py +1 -1
- elasticsearch/_async/client/synonyms.py +26 -7
- elasticsearch/_async/client/tasks.py +4 -4
- elasticsearch/_async/client/text_structure.py +4 -4
- elasticsearch/_async/client/transform.py +11 -11
- elasticsearch/_async/client/watcher.py +17 -15
- elasticsearch/_async/client/xpack.py +2 -2
- elasticsearch/_otel.py +8 -8
- elasticsearch/_sync/client/__init__.py +88 -244
- elasticsearch/_sync/client/async_search.py +4 -4
- elasticsearch/_sync/client/autoscaling.py +4 -4
- elasticsearch/_sync/client/cat.py +419 -51
- elasticsearch/_sync/client/ccr.py +13 -13
- elasticsearch/_sync/client/cluster.py +32 -22
- elasticsearch/_sync/client/connector.py +30 -30
- elasticsearch/_sync/client/dangling_indices.py +3 -3
- elasticsearch/_sync/client/enrich.py +5 -5
- elasticsearch/_sync/client/eql.py +14 -6
- elasticsearch/_sync/client/esql.py +125 -9
- elasticsearch/_sync/client/features.py +2 -2
- elasticsearch/_sync/client/fleet.py +3 -3
- elasticsearch/_sync/client/graph.py +1 -1
- elasticsearch/_sync/client/ilm.py +11 -11
- elasticsearch/_sync/client/indices.py +592 -113
- elasticsearch/_sync/client/inference.py +521 -44
- elasticsearch/_sync/client/ingest.py +9 -16
- elasticsearch/_sync/client/license.py +11 -11
- elasticsearch/_sync/client/logstash.py +3 -3
- elasticsearch/_sync/client/migration.py +3 -3
- elasticsearch/_sync/client/ml.py +75 -87
- elasticsearch/_sync/client/monitoring.py +1 -1
- elasticsearch/_sync/client/nodes.py +7 -7
- elasticsearch/_sync/client/query_rules.py +8 -8
- elasticsearch/_sync/client/rollup.py +9 -30
- elasticsearch/_sync/client/search_application.py +10 -10
- elasticsearch/_sync/client/searchable_snapshots.py +4 -4
- elasticsearch/_sync/client/security.py +79 -81
- elasticsearch/_sync/client/shutdown.py +3 -3
- elasticsearch/_sync/client/simulate.py +1 -1
- elasticsearch/_sync/client/slm.py +9 -9
- elasticsearch/_sync/client/snapshot.py +64 -21
- elasticsearch/_sync/client/sql.py +6 -6
- elasticsearch/_sync/client/ssl.py +1 -1
- elasticsearch/_sync/client/synonyms.py +26 -7
- elasticsearch/_sync/client/tasks.py +4 -4
- elasticsearch/_sync/client/text_structure.py +4 -4
- elasticsearch/_sync/client/transform.py +11 -11
- elasticsearch/_sync/client/watcher.py +17 -15
- elasticsearch/_sync/client/xpack.py +2 -2
- elasticsearch/_version.py +1 -1
- elasticsearch/compat.py +5 -0
- elasticsearch/dsl/__init__.py +2 -1
- elasticsearch/dsl/aggs.py +20 -0
- elasticsearch/dsl/document_base.py +177 -17
- elasticsearch/dsl/field.py +230 -37
- elasticsearch/dsl/query.py +6 -3
- elasticsearch/dsl/response/__init__.py +1 -1
- elasticsearch/dsl/types.py +169 -11
- elasticsearch/dsl/utils.py +1 -1
- elasticsearch/{dsl/_sync/_sync_check → esql}/__init__.py +2 -0
- elasticsearch/esql/esql.py +1105 -0
- elasticsearch/esql/esql1.py1 +307 -0
- elasticsearch/esql/functions.py +1738 -0
- {elasticsearch-9.0.2.dist-info → elasticsearch-9.1.0.dist-info}/METADATA +3 -4
- elasticsearch-9.1.0.dist-info/RECORD +164 -0
- elasticsearch/dsl/_sync/_sync_check/document.py +0 -514
- elasticsearch/dsl/_sync/_sync_check/faceted_search.py +0 -50
- elasticsearch/dsl/_sync/_sync_check/index.py +0 -597
- elasticsearch/dsl/_sync/_sync_check/mapping.py +0 -49
- elasticsearch/dsl/_sync/_sync_check/search.py +0 -230
- elasticsearch/dsl/_sync/_sync_check/update_by_query.py +0 -45
- elasticsearch-9.0.2.dist-info/RECORD +0 -167
- {elasticsearch-9.0.2.dist-info → elasticsearch-9.1.0.dist-info}/WHEEL +0 -0
- {elasticsearch-9.0.2.dist-info → elasticsearch-9.1.0.dist-info}/licenses/LICENSE +0 -0
- {elasticsearch-9.0.2.dist-info → elasticsearch-9.1.0.dist-info}/licenses/NOTICE +0 -0
|
@@ -47,7 +47,7 @@ class InferenceClient(NamespacedClient):
|
|
|
47
47
|
<p>Perform completion inference on the service</p>
|
|
48
48
|
|
|
49
49
|
|
|
50
|
-
`<https://www.elastic.co/docs/api/doc/elasticsearch/
|
|
50
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-inference>`_
|
|
51
51
|
|
|
52
52
|
:param inference_id: The inference Id
|
|
53
53
|
:param input: Inference input. Either a string or an array of strings.
|
|
@@ -123,7 +123,7 @@ class InferenceClient(NamespacedClient):
|
|
|
123
123
|
<p>Delete an inference endpoint</p>
|
|
124
124
|
|
|
125
125
|
|
|
126
|
-
`<https://www.elastic.co/docs/api/doc/elasticsearch/
|
|
126
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-delete>`_
|
|
127
127
|
|
|
128
128
|
:param inference_id: The inference identifier.
|
|
129
129
|
:param task_type: The task type
|
|
@@ -197,7 +197,7 @@ class InferenceClient(NamespacedClient):
|
|
|
197
197
|
<p>Get an inference endpoint</p>
|
|
198
198
|
|
|
199
199
|
|
|
200
|
-
`<https://www.elastic.co/docs/api/doc/elasticsearch/
|
|
200
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-get>`_
|
|
201
201
|
|
|
202
202
|
:param task_type: The task type
|
|
203
203
|
:param inference_id: The inference Id
|
|
@@ -235,7 +235,7 @@ class InferenceClient(NamespacedClient):
|
|
|
235
235
|
)
|
|
236
236
|
|
|
237
237
|
@_rewrite_parameters(
|
|
238
|
-
body_fields=("input", "query", "task_settings"),
|
|
238
|
+
body_fields=("input", "input_type", "query", "task_settings"),
|
|
239
239
|
)
|
|
240
240
|
async def inference(
|
|
241
241
|
self,
|
|
@@ -257,6 +257,7 @@ class InferenceClient(NamespacedClient):
|
|
|
257
257
|
error_trace: t.Optional[bool] = None,
|
|
258
258
|
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
|
|
259
259
|
human: t.Optional[bool] = None,
|
|
260
|
+
input_type: t.Optional[str] = None,
|
|
260
261
|
pretty: t.Optional[bool] = None,
|
|
261
262
|
query: t.Optional[str] = None,
|
|
262
263
|
task_settings: t.Optional[t.Any] = None,
|
|
@@ -277,13 +278,22 @@ class InferenceClient(NamespacedClient):
|
|
|
277
278
|
</blockquote>
|
|
278
279
|
|
|
279
280
|
|
|
280
|
-
`<https://www.elastic.co/docs/api/doc/elasticsearch/
|
|
281
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-inference>`_
|
|
281
282
|
|
|
282
283
|
:param inference_id: The unique identifier for the inference endpoint.
|
|
283
284
|
:param input: The text on which you want to perform the inference task. It can
|
|
284
285
|
be a single string or an array. > info > Inference endpoints for the `completion`
|
|
285
286
|
task type currently only support a single string as input.
|
|
286
287
|
:param task_type: The type of inference task that the model performs.
|
|
288
|
+
:param input_type: Specifies the input data type for the text embedding model.
|
|
289
|
+
The `input_type` parameter only applies to Inference Endpoints with the `text_embedding`
|
|
290
|
+
task type. Possible values include: * `SEARCH` * `INGEST` * `CLASSIFICATION`
|
|
291
|
+
* `CLUSTERING` Not all services support all values. Unsupported values will
|
|
292
|
+
trigger a validation exception. Accepted values depend on the configured
|
|
293
|
+
inference service, refer to the relevant service-specific documentation for
|
|
294
|
+
more info. > info > The `input_type` parameter specified on the root level
|
|
295
|
+
of the request body will take precedence over the `input_type` parameter
|
|
296
|
+
specified in `task_settings`.
|
|
287
297
|
:param query: The query input, which is required only for the `rerank` task.
|
|
288
298
|
It is not required for other tasks.
|
|
289
299
|
:param task_settings: Task settings for the individual inference request. These
|
|
@@ -322,6 +332,8 @@ class InferenceClient(NamespacedClient):
|
|
|
322
332
|
if not __body:
|
|
323
333
|
if input is not None:
|
|
324
334
|
__body["input"] = input
|
|
335
|
+
if input_type is not None:
|
|
336
|
+
__body["input_type"] = input_type
|
|
325
337
|
if query is not None:
|
|
326
338
|
__body["query"] = query
|
|
327
339
|
if task_settings is not None:
|
|
@@ -366,6 +378,7 @@ class InferenceClient(NamespacedClient):
|
|
|
366
378
|
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
|
|
367
379
|
human: t.Optional[bool] = None,
|
|
368
380
|
pretty: t.Optional[bool] = None,
|
|
381
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
369
382
|
) -> ObjectApiResponse[t.Any]:
|
|
370
383
|
"""
|
|
371
384
|
.. raw:: html
|
|
@@ -374,13 +387,37 @@ class InferenceClient(NamespacedClient):
|
|
|
374
387
|
<p>IMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.
|
|
375
388
|
For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.
|
|
376
389
|
However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.</p>
|
|
390
|
+
<p>The following integrations are available through the inference API. You can find the available task types next to the integration name:</p>
|
|
391
|
+
<ul>
|
|
392
|
+
<li>AlibabaCloud AI Search (<code>completion</code>, <code>rerank</code>, <code>sparse_embedding</code>, <code>text_embedding</code>)</li>
|
|
393
|
+
<li>Amazon Bedrock (<code>completion</code>, <code>text_embedding</code>)</li>
|
|
394
|
+
<li>Amazon SageMaker (<code>chat_completion</code>, <code>completion</code>, <code>rerank</code>, <code>sparse_embedding</code>, <code>text_embedding</code>)</li>
|
|
395
|
+
<li>Anthropic (<code>completion</code>)</li>
|
|
396
|
+
<li>Azure AI Studio (<code>completion</code>, <code>text_embedding</code>)</li>
|
|
397
|
+
<li>Azure OpenAI (<code>completion</code>, <code>text_embedding</code>)</li>
|
|
398
|
+
<li>Cohere (<code>completion</code>, <code>rerank</code>, <code>text_embedding</code>)</li>
|
|
399
|
+
<li>DeepSeek (<code>completion</code>, <code>chat_completion</code>)</li>
|
|
400
|
+
<li>Elasticsearch (<code>rerank</code>, <code>sparse_embedding</code>, <code>text_embedding</code> - this service is for built-in models and models uploaded through Eland)</li>
|
|
401
|
+
<li>ELSER (<code>sparse_embedding</code>)</li>
|
|
402
|
+
<li>Google AI Studio (<code>completion</code>, <code>text_embedding</code>)</li>
|
|
403
|
+
<li>Google Vertex AI (<code>rerank</code>, <code>text_embedding</code>)</li>
|
|
404
|
+
<li>Hugging Face (<code>chat_completion</code>, <code>completion</code>, <code>rerank</code>, <code>text_embedding</code>)</li>
|
|
405
|
+
<li>Mistral (<code>chat_completion</code>, <code>completion</code>, <code>text_embedding</code>)</li>
|
|
406
|
+
<li>OpenAI (<code>chat_completion</code>, <code>completion</code>, <code>text_embedding</code>)</li>
|
|
407
|
+
<li>VoyageAI (<code>text_embedding</code>, <code>rerank</code>)</li>
|
|
408
|
+
<li>Watsonx inference integration (<code>text_embedding</code>)</li>
|
|
409
|
+
<li>JinaAI (<code>text_embedding</code>, <code>rerank</code>)</li>
|
|
410
|
+
</ul>
|
|
377
411
|
|
|
378
412
|
|
|
379
|
-
`<https://www.elastic.co/docs/api/doc/elasticsearch/
|
|
413
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put>`_
|
|
380
414
|
|
|
381
415
|
:param inference_id: The inference Id
|
|
382
416
|
:param inference_config:
|
|
383
|
-
:param task_type: The task type
|
|
417
|
+
:param task_type: The task type. Refer to the integration list in the API description
|
|
418
|
+
for the available task types.
|
|
419
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
420
|
+
to be created.
|
|
384
421
|
"""
|
|
385
422
|
if inference_id in SKIP_IN_PATH:
|
|
386
423
|
raise ValueError("Empty value passed for parameter 'inference_id'")
|
|
@@ -411,6 +448,8 @@ class InferenceClient(NamespacedClient):
|
|
|
411
448
|
__query["human"] = human
|
|
412
449
|
if pretty is not None:
|
|
413
450
|
__query["pretty"] = pretty
|
|
451
|
+
if timeout is not None:
|
|
452
|
+
__query["timeout"] = timeout
|
|
414
453
|
__body = inference_config if inference_config is not None else body
|
|
415
454
|
__headers = {"accept": "application/json", "content-type": "application/json"}
|
|
416
455
|
return await self.perform_request( # type: ignore[return-value]
|
|
@@ -446,6 +485,7 @@ class InferenceClient(NamespacedClient):
|
|
|
446
485
|
human: t.Optional[bool] = None,
|
|
447
486
|
pretty: t.Optional[bool] = None,
|
|
448
487
|
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
488
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
449
489
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
450
490
|
) -> ObjectApiResponse[t.Any]:
|
|
451
491
|
"""
|
|
@@ -455,7 +495,7 @@ class InferenceClient(NamespacedClient):
|
|
|
455
495
|
<p>Create an inference endpoint to perform an inference task with the <code>alibabacloud-ai-search</code> service.</p>
|
|
456
496
|
|
|
457
497
|
|
|
458
|
-
`<https://www.elastic.co/docs/api/doc/elasticsearch/
|
|
498
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-alibabacloud>`_
|
|
459
499
|
|
|
460
500
|
:param task_type: The type of the inference task that the model will perform.
|
|
461
501
|
:param alibabacloud_inference_id: The unique identifier of the inference endpoint.
|
|
@@ -466,6 +506,8 @@ class InferenceClient(NamespacedClient):
|
|
|
466
506
|
:param chunking_settings: The chunking configuration object.
|
|
467
507
|
:param task_settings: Settings to configure the inference task. These settings
|
|
468
508
|
are specific to the task type you specified.
|
|
509
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
510
|
+
to be created.
|
|
469
511
|
"""
|
|
470
512
|
if task_type in SKIP_IN_PATH:
|
|
471
513
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -492,6 +534,8 @@ class InferenceClient(NamespacedClient):
|
|
|
492
534
|
__query["human"] = human
|
|
493
535
|
if pretty is not None:
|
|
494
536
|
__query["pretty"] = pretty
|
|
537
|
+
if timeout is not None:
|
|
538
|
+
__query["timeout"] = timeout
|
|
495
539
|
if not __body:
|
|
496
540
|
if service is not None:
|
|
497
541
|
__body["service"] = service
|
|
@@ -537,20 +581,21 @@ class InferenceClient(NamespacedClient):
|
|
|
537
581
|
human: t.Optional[bool] = None,
|
|
538
582
|
pretty: t.Optional[bool] = None,
|
|
539
583
|
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
584
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
540
585
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
541
586
|
) -> ObjectApiResponse[t.Any]:
|
|
542
587
|
"""
|
|
543
588
|
.. raw:: html
|
|
544
589
|
|
|
545
590
|
<p>Create an Amazon Bedrock inference endpoint.</p>
|
|
546
|
-
<p>
|
|
591
|
+
<p>Create an inference endpoint to perform an inference task with the <code>amazonbedrock</code> service.</p>
|
|
547
592
|
<blockquote>
|
|
548
593
|
<p>info
|
|
549
594
|
You need to provide the access and secret keys only once, during the inference model creation. The get inference API does not retrieve your access or secret keys. After creating the inference model, you cannot change the associated key pairs. If you want to use a different access and secret key pair, delete the inference model and recreate it with the same name and the updated keys.</p>
|
|
550
595
|
</blockquote>
|
|
551
596
|
|
|
552
597
|
|
|
553
|
-
`<https://www.elastic.co/docs/api/doc/elasticsearch/
|
|
598
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-amazonbedrock>`_
|
|
554
599
|
|
|
555
600
|
:param task_type: The type of the inference task that the model will perform.
|
|
556
601
|
:param amazonbedrock_inference_id: The unique identifier of the inference endpoint.
|
|
@@ -561,6 +606,8 @@ class InferenceClient(NamespacedClient):
|
|
|
561
606
|
:param chunking_settings: The chunking configuration object.
|
|
562
607
|
:param task_settings: Settings to configure the inference task. These settings
|
|
563
608
|
are specific to the task type you specified.
|
|
609
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
610
|
+
to be created.
|
|
564
611
|
"""
|
|
565
612
|
if task_type in SKIP_IN_PATH:
|
|
566
613
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -587,6 +634,8 @@ class InferenceClient(NamespacedClient):
|
|
|
587
634
|
__query["human"] = human
|
|
588
635
|
if pretty is not None:
|
|
589
636
|
__query["pretty"] = pretty
|
|
637
|
+
if timeout is not None:
|
|
638
|
+
__query["timeout"] = timeout
|
|
590
639
|
if not __body:
|
|
591
640
|
if service is not None:
|
|
592
641
|
__body["service"] = service
|
|
@@ -611,6 +660,112 @@ class InferenceClient(NamespacedClient):
|
|
|
611
660
|
path_parts=__path_parts,
|
|
612
661
|
)
|
|
613
662
|
|
|
663
|
+
@_rewrite_parameters(
|
|
664
|
+
body_fields=(
|
|
665
|
+
"service",
|
|
666
|
+
"service_settings",
|
|
667
|
+
"chunking_settings",
|
|
668
|
+
"task_settings",
|
|
669
|
+
),
|
|
670
|
+
)
|
|
671
|
+
async def put_amazonsagemaker(
|
|
672
|
+
self,
|
|
673
|
+
*,
|
|
674
|
+
task_type: t.Union[
|
|
675
|
+
str,
|
|
676
|
+
t.Literal[
|
|
677
|
+
"chat_completion",
|
|
678
|
+
"completion",
|
|
679
|
+
"rerank",
|
|
680
|
+
"sparse_embedding",
|
|
681
|
+
"text_embedding",
|
|
682
|
+
],
|
|
683
|
+
],
|
|
684
|
+
amazonsagemaker_inference_id: str,
|
|
685
|
+
service: t.Optional[t.Union[str, t.Literal["amazon_sagemaker"]]] = None,
|
|
686
|
+
service_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
687
|
+
chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
688
|
+
error_trace: t.Optional[bool] = None,
|
|
689
|
+
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
|
|
690
|
+
human: t.Optional[bool] = None,
|
|
691
|
+
pretty: t.Optional[bool] = None,
|
|
692
|
+
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
693
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
694
|
+
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
695
|
+
) -> ObjectApiResponse[t.Any]:
|
|
696
|
+
"""
|
|
697
|
+
.. raw:: html
|
|
698
|
+
|
|
699
|
+
<p>Create an Amazon SageMaker inference endpoint.</p>
|
|
700
|
+
<p>Create an inference endpoint to perform an inference task with the <code>amazon_sagemaker</code> service.</p>
|
|
701
|
+
|
|
702
|
+
|
|
703
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-amazonsagemaker>`_
|
|
704
|
+
|
|
705
|
+
:param task_type: The type of the inference task that the model will perform.
|
|
706
|
+
:param amazonsagemaker_inference_id: The unique identifier of the inference endpoint.
|
|
707
|
+
:param service: The type of service supported for the specified task type. In
|
|
708
|
+
this case, `amazon_sagemaker`.
|
|
709
|
+
:param service_settings: Settings used to install the inference model. These
|
|
710
|
+
settings are specific to the `amazon_sagemaker` service and `service_settings.api`
|
|
711
|
+
you specified.
|
|
712
|
+
:param chunking_settings: The chunking configuration object.
|
|
713
|
+
:param task_settings: Settings to configure the inference task. These settings
|
|
714
|
+
are specific to the task type and `service_settings.api` you specified.
|
|
715
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
716
|
+
to be created.
|
|
717
|
+
"""
|
|
718
|
+
if task_type in SKIP_IN_PATH:
|
|
719
|
+
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
720
|
+
if amazonsagemaker_inference_id in SKIP_IN_PATH:
|
|
721
|
+
raise ValueError(
|
|
722
|
+
"Empty value passed for parameter 'amazonsagemaker_inference_id'"
|
|
723
|
+
)
|
|
724
|
+
if service is None and body is None:
|
|
725
|
+
raise ValueError("Empty value passed for parameter 'service'")
|
|
726
|
+
if service_settings is None and body is None:
|
|
727
|
+
raise ValueError("Empty value passed for parameter 'service_settings'")
|
|
728
|
+
__path_parts: t.Dict[str, str] = {
|
|
729
|
+
"task_type": _quote(task_type),
|
|
730
|
+
"amazonsagemaker_inference_id": _quote(amazonsagemaker_inference_id),
|
|
731
|
+
}
|
|
732
|
+
__path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["amazonsagemaker_inference_id"]}'
|
|
733
|
+
__query: t.Dict[str, t.Any] = {}
|
|
734
|
+
__body: t.Dict[str, t.Any] = body if body is not None else {}
|
|
735
|
+
if error_trace is not None:
|
|
736
|
+
__query["error_trace"] = error_trace
|
|
737
|
+
if filter_path is not None:
|
|
738
|
+
__query["filter_path"] = filter_path
|
|
739
|
+
if human is not None:
|
|
740
|
+
__query["human"] = human
|
|
741
|
+
if pretty is not None:
|
|
742
|
+
__query["pretty"] = pretty
|
|
743
|
+
if timeout is not None:
|
|
744
|
+
__query["timeout"] = timeout
|
|
745
|
+
if not __body:
|
|
746
|
+
if service is not None:
|
|
747
|
+
__body["service"] = service
|
|
748
|
+
if service_settings is not None:
|
|
749
|
+
__body["service_settings"] = service_settings
|
|
750
|
+
if chunking_settings is not None:
|
|
751
|
+
__body["chunking_settings"] = chunking_settings
|
|
752
|
+
if task_settings is not None:
|
|
753
|
+
__body["task_settings"] = task_settings
|
|
754
|
+
if not __body:
|
|
755
|
+
__body = None # type: ignore[assignment]
|
|
756
|
+
__headers = {"accept": "application/json"}
|
|
757
|
+
if __body is not None:
|
|
758
|
+
__headers["content-type"] = "application/json"
|
|
759
|
+
return await self.perform_request( # type: ignore[return-value]
|
|
760
|
+
"PUT",
|
|
761
|
+
__path,
|
|
762
|
+
params=__query,
|
|
763
|
+
headers=__headers,
|
|
764
|
+
body=__body,
|
|
765
|
+
endpoint_id="inference.put_amazonsagemaker",
|
|
766
|
+
path_parts=__path_parts,
|
|
767
|
+
)
|
|
768
|
+
|
|
614
769
|
@_rewrite_parameters(
|
|
615
770
|
body_fields=(
|
|
616
771
|
"service",
|
|
@@ -632,6 +787,7 @@ class InferenceClient(NamespacedClient):
|
|
|
632
787
|
human: t.Optional[bool] = None,
|
|
633
788
|
pretty: t.Optional[bool] = None,
|
|
634
789
|
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
790
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
635
791
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
636
792
|
) -> ObjectApiResponse[t.Any]:
|
|
637
793
|
"""
|
|
@@ -641,7 +797,7 @@ class InferenceClient(NamespacedClient):
|
|
|
641
797
|
<p>Create an inference endpoint to perform an inference task with the <code>anthropic</code> service.</p>
|
|
642
798
|
|
|
643
799
|
|
|
644
|
-
`<https://www.elastic.co/docs/api/doc/elasticsearch/
|
|
800
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-anthropic>`_
|
|
645
801
|
|
|
646
802
|
:param task_type: The task type. The only valid task type for the model to perform
|
|
647
803
|
is `completion`.
|
|
@@ -653,6 +809,8 @@ class InferenceClient(NamespacedClient):
|
|
|
653
809
|
:param chunking_settings: The chunking configuration object.
|
|
654
810
|
:param task_settings: Settings to configure the inference task. These settings
|
|
655
811
|
are specific to the task type you specified.
|
|
812
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
813
|
+
to be created.
|
|
656
814
|
"""
|
|
657
815
|
if task_type in SKIP_IN_PATH:
|
|
658
816
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -679,6 +837,8 @@ class InferenceClient(NamespacedClient):
|
|
|
679
837
|
__query["human"] = human
|
|
680
838
|
if pretty is not None:
|
|
681
839
|
__query["pretty"] = pretty
|
|
840
|
+
if timeout is not None:
|
|
841
|
+
__query["timeout"] = timeout
|
|
682
842
|
if not __body:
|
|
683
843
|
if service is not None:
|
|
684
844
|
__body["service"] = service
|
|
@@ -724,6 +884,7 @@ class InferenceClient(NamespacedClient):
|
|
|
724
884
|
human: t.Optional[bool] = None,
|
|
725
885
|
pretty: t.Optional[bool] = None,
|
|
726
886
|
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
887
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
727
888
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
728
889
|
) -> ObjectApiResponse[t.Any]:
|
|
729
890
|
"""
|
|
@@ -733,7 +894,7 @@ class InferenceClient(NamespacedClient):
|
|
|
733
894
|
<p>Create an inference endpoint to perform an inference task with the <code>azureaistudio</code> service.</p>
|
|
734
895
|
|
|
735
896
|
|
|
736
|
-
`<https://www.elastic.co/docs/api/doc/elasticsearch/
|
|
897
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-azureaistudio>`_
|
|
737
898
|
|
|
738
899
|
:param task_type: The type of the inference task that the model will perform.
|
|
739
900
|
:param azureaistudio_inference_id: The unique identifier of the inference endpoint.
|
|
@@ -744,6 +905,8 @@ class InferenceClient(NamespacedClient):
|
|
|
744
905
|
:param chunking_settings: The chunking configuration object.
|
|
745
906
|
:param task_settings: Settings to configure the inference task. These settings
|
|
746
907
|
are specific to the task type you specified.
|
|
908
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
909
|
+
to be created.
|
|
747
910
|
"""
|
|
748
911
|
if task_type in SKIP_IN_PATH:
|
|
749
912
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -770,6 +933,8 @@ class InferenceClient(NamespacedClient):
|
|
|
770
933
|
__query["human"] = human
|
|
771
934
|
if pretty is not None:
|
|
772
935
|
__query["pretty"] = pretty
|
|
936
|
+
if timeout is not None:
|
|
937
|
+
__query["timeout"] = timeout
|
|
773
938
|
if not __body:
|
|
774
939
|
if service is not None:
|
|
775
940
|
__body["service"] = service
|
|
@@ -815,6 +980,7 @@ class InferenceClient(NamespacedClient):
|
|
|
815
980
|
human: t.Optional[bool] = None,
|
|
816
981
|
pretty: t.Optional[bool] = None,
|
|
817
982
|
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
983
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
818
984
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
819
985
|
) -> ObjectApiResponse[t.Any]:
|
|
820
986
|
"""
|
|
@@ -830,7 +996,7 @@ class InferenceClient(NamespacedClient):
|
|
|
830
996
|
<p>The list of embeddings models that you can choose from in your deployment can be found in the <a href="https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#embeddings">Azure models documentation</a>.</p>
|
|
831
997
|
|
|
832
998
|
|
|
833
|
-
`<https://www.elastic.co/docs/api/doc/elasticsearch/
|
|
999
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-azureopenai>`_
|
|
834
1000
|
|
|
835
1001
|
:param task_type: The type of the inference task that the model will perform.
|
|
836
1002
|
NOTE: The `chat_completion` task type only supports streaming and only through
|
|
@@ -843,6 +1009,8 @@ class InferenceClient(NamespacedClient):
|
|
|
843
1009
|
:param chunking_settings: The chunking configuration object.
|
|
844
1010
|
:param task_settings: Settings to configure the inference task. These settings
|
|
845
1011
|
are specific to the task type you specified.
|
|
1012
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
1013
|
+
to be created.
|
|
846
1014
|
"""
|
|
847
1015
|
if task_type in SKIP_IN_PATH:
|
|
848
1016
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -869,6 +1037,8 @@ class InferenceClient(NamespacedClient):
|
|
|
869
1037
|
__query["human"] = human
|
|
870
1038
|
if pretty is not None:
|
|
871
1039
|
__query["pretty"] = pretty
|
|
1040
|
+
if timeout is not None:
|
|
1041
|
+
__query["timeout"] = timeout
|
|
872
1042
|
if not __body:
|
|
873
1043
|
if service is not None:
|
|
874
1044
|
__body["service"] = service
|
|
@@ -914,6 +1084,7 @@ class InferenceClient(NamespacedClient):
|
|
|
914
1084
|
human: t.Optional[bool] = None,
|
|
915
1085
|
pretty: t.Optional[bool] = None,
|
|
916
1086
|
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
1087
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
917
1088
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
918
1089
|
) -> ObjectApiResponse[t.Any]:
|
|
919
1090
|
"""
|
|
@@ -923,7 +1094,7 @@ class InferenceClient(NamespacedClient):
|
|
|
923
1094
|
<p>Create an inference endpoint to perform an inference task with the <code>cohere</code> service.</p>
|
|
924
1095
|
|
|
925
1096
|
|
|
926
|
-
`<https://www.elastic.co/docs/api/doc/elasticsearch/
|
|
1097
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-cohere>`_
|
|
927
1098
|
|
|
928
1099
|
:param task_type: The type of the inference task that the model will perform.
|
|
929
1100
|
:param cohere_inference_id: The unique identifier of the inference endpoint.
|
|
@@ -934,6 +1105,8 @@ class InferenceClient(NamespacedClient):
|
|
|
934
1105
|
:param chunking_settings: The chunking configuration object.
|
|
935
1106
|
:param task_settings: Settings to configure the inference task. These settings
|
|
936
1107
|
are specific to the task type you specified.
|
|
1108
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
1109
|
+
to be created.
|
|
937
1110
|
"""
|
|
938
1111
|
if task_type in SKIP_IN_PATH:
|
|
939
1112
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -958,6 +1131,8 @@ class InferenceClient(NamespacedClient):
|
|
|
958
1131
|
__query["human"] = human
|
|
959
1132
|
if pretty is not None:
|
|
960
1133
|
__query["pretty"] = pretty
|
|
1134
|
+
if timeout is not None:
|
|
1135
|
+
__query["timeout"] = timeout
|
|
961
1136
|
if not __body:
|
|
962
1137
|
if service is not None:
|
|
963
1138
|
__body["service"] = service
|
|
@@ -982,6 +1157,221 @@ class InferenceClient(NamespacedClient):
|
|
|
982
1157
|
path_parts=__path_parts,
|
|
983
1158
|
)
|
|
984
1159
|
|
|
1160
|
+
@_rewrite_parameters(
|
|
1161
|
+
body_fields=(
|
|
1162
|
+
"service",
|
|
1163
|
+
"service_settings",
|
|
1164
|
+
"chunking_settings",
|
|
1165
|
+
"task_settings",
|
|
1166
|
+
),
|
|
1167
|
+
)
|
|
1168
|
+
async def put_custom(
|
|
1169
|
+
self,
|
|
1170
|
+
*,
|
|
1171
|
+
task_type: t.Union[
|
|
1172
|
+
str, t.Literal["completion", "rerank", "sparse_embedding", "text_embedding"]
|
|
1173
|
+
],
|
|
1174
|
+
custom_inference_id: str,
|
|
1175
|
+
service: t.Optional[t.Union[str, t.Literal["custom"]]] = None,
|
|
1176
|
+
service_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
1177
|
+
chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
1178
|
+
error_trace: t.Optional[bool] = None,
|
|
1179
|
+
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
|
|
1180
|
+
human: t.Optional[bool] = None,
|
|
1181
|
+
pretty: t.Optional[bool] = None,
|
|
1182
|
+
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
1183
|
+
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
1184
|
+
) -> ObjectApiResponse[t.Any]:
|
|
1185
|
+
"""
|
|
1186
|
+
.. raw:: html
|
|
1187
|
+
|
|
1188
|
+
<p>Create a custom inference endpoint.</p>
|
|
1189
|
+
<p>The custom service gives more control over how to interact with external inference services that aren't explicitly supported through dedicated integrations.
|
|
1190
|
+
The custom service gives you the ability to define the headers, url, query parameters, request body, and secrets.
|
|
1191
|
+
The custom service supports the template replacement functionality, which enables you to define a template that can be replaced with the value associated with that key.
|
|
1192
|
+
Templates are portions of a string that start with <code>${</code> and end with <code>}</code>.
|
|
1193
|
+
The parameters <code>secret_parameters</code> and <code>task_settings</code> are checked for keys for template replacement. Template replacement is supported in the <code>request</code>, <code>headers</code>, <code>url</code>, and <code>query_parameters</code>.
|
|
1194
|
+
If the definition (key) is not found for a template, an error message is returned.
|
|
1195
|
+
In case of an endpoint definition like the following:</p>
|
|
1196
|
+
<pre><code>PUT _inference/text_embedding/test-text-embedding
|
|
1197
|
+
{
|
|
1198
|
+
"service": "custom",
|
|
1199
|
+
"service_settings": {
|
|
1200
|
+
"secret_parameters": {
|
|
1201
|
+
"api_key": "<some api key>"
|
|
1202
|
+
},
|
|
1203
|
+
"url": "...endpoints.huggingface.cloud/v1/embeddings",
|
|
1204
|
+
"headers": {
|
|
1205
|
+
"Authorization": "Bearer ${api_key}",
|
|
1206
|
+
"Content-Type": "application/json"
|
|
1207
|
+
},
|
|
1208
|
+
"request": "{\\"input\\": ${input}}",
|
|
1209
|
+
"response": {
|
|
1210
|
+
"json_parser": {
|
|
1211
|
+
"text_embeddings":"$.data[*].embedding[*]"
|
|
1212
|
+
}
|
|
1213
|
+
}
|
|
1214
|
+
}
|
|
1215
|
+
}
|
|
1216
|
+
</code></pre>
|
|
1217
|
+
<p>To replace <code>${api_key}</code> the <code>secret_parameters</code> and <code>task_settings</code> are checked for a key named <code>api_key</code>.</p>
|
|
1218
|
+
<blockquote>
|
|
1219
|
+
<p>info
|
|
1220
|
+
Templates should not be surrounded by quotes.</p>
|
|
1221
|
+
</blockquote>
|
|
1222
|
+
<p>Pre-defined templates:</p>
|
|
1223
|
+
<ul>
|
|
1224
|
+
<li><code>${input}</code> refers to the array of input strings that comes from the <code>input</code> field of the subsequent inference requests.</li>
|
|
1225
|
+
<li><code>${input_type}</code> refers to the input type translation values.</li>
|
|
1226
|
+
<li><code>${query}</code> refers to the query field used specifically for reranking tasks.</li>
|
|
1227
|
+
<li><code>${top_n}</code> refers to the <code>top_n</code> field available when performing rerank requests.</li>
|
|
1228
|
+
<li><code>${return_documents}</code> refers to the <code>return_documents</code> field available when performing rerank requests.</li>
|
|
1229
|
+
</ul>
|
|
1230
|
+
|
|
1231
|
+
|
|
1232
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-custom>`_
|
|
1233
|
+
|
|
1234
|
+
:param task_type: The type of the inference task that the model will perform.
|
|
1235
|
+
:param custom_inference_id: The unique identifier of the inference endpoint.
|
|
1236
|
+
:param service: The type of service supported for the specified task type. In
|
|
1237
|
+
this case, `custom`.
|
|
1238
|
+
:param service_settings: Settings used to install the inference model. These
|
|
1239
|
+
settings are specific to the `custom` service.
|
|
1240
|
+
:param chunking_settings: The chunking configuration object.
|
|
1241
|
+
:param task_settings: Settings to configure the inference task. These settings
|
|
1242
|
+
are specific to the task type you specified.
|
|
1243
|
+
"""
|
|
1244
|
+
if task_type in SKIP_IN_PATH:
|
|
1245
|
+
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
1246
|
+
if custom_inference_id in SKIP_IN_PATH:
|
|
1247
|
+
raise ValueError("Empty value passed for parameter 'custom_inference_id'")
|
|
1248
|
+
if service is None and body is None:
|
|
1249
|
+
raise ValueError("Empty value passed for parameter 'service'")
|
|
1250
|
+
if service_settings is None and body is None:
|
|
1251
|
+
raise ValueError("Empty value passed for parameter 'service_settings'")
|
|
1252
|
+
__path_parts: t.Dict[str, str] = {
|
|
1253
|
+
"task_type": _quote(task_type),
|
|
1254
|
+
"custom_inference_id": _quote(custom_inference_id),
|
|
1255
|
+
}
|
|
1256
|
+
__path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["custom_inference_id"]}'
|
|
1257
|
+
__query: t.Dict[str, t.Any] = {}
|
|
1258
|
+
__body: t.Dict[str, t.Any] = body if body is not None else {}
|
|
1259
|
+
if error_trace is not None:
|
|
1260
|
+
__query["error_trace"] = error_trace
|
|
1261
|
+
if filter_path is not None:
|
|
1262
|
+
__query["filter_path"] = filter_path
|
|
1263
|
+
if human is not None:
|
|
1264
|
+
__query["human"] = human
|
|
1265
|
+
if pretty is not None:
|
|
1266
|
+
__query["pretty"] = pretty
|
|
1267
|
+
if not __body:
|
|
1268
|
+
if service is not None:
|
|
1269
|
+
__body["service"] = service
|
|
1270
|
+
if service_settings is not None:
|
|
1271
|
+
__body["service_settings"] = service_settings
|
|
1272
|
+
if chunking_settings is not None:
|
|
1273
|
+
__body["chunking_settings"] = chunking_settings
|
|
1274
|
+
if task_settings is not None:
|
|
1275
|
+
__body["task_settings"] = task_settings
|
|
1276
|
+
if not __body:
|
|
1277
|
+
__body = None # type: ignore[assignment]
|
|
1278
|
+
__headers = {"accept": "application/json"}
|
|
1279
|
+
if __body is not None:
|
|
1280
|
+
__headers["content-type"] = "application/json"
|
|
1281
|
+
return await self.perform_request( # type: ignore[return-value]
|
|
1282
|
+
"PUT",
|
|
1283
|
+
__path,
|
|
1284
|
+
params=__query,
|
|
1285
|
+
headers=__headers,
|
|
1286
|
+
body=__body,
|
|
1287
|
+
endpoint_id="inference.put_custom",
|
|
1288
|
+
path_parts=__path_parts,
|
|
1289
|
+
)
|
|
1290
|
+
|
|
1291
|
+
@_rewrite_parameters(
|
|
1292
|
+
body_fields=("service", "service_settings", "chunking_settings"),
|
|
1293
|
+
)
|
|
1294
|
+
async def put_deepseek(
|
|
1295
|
+
self,
|
|
1296
|
+
*,
|
|
1297
|
+
task_type: t.Union[str, t.Literal["chat_completion", "completion"]],
|
|
1298
|
+
deepseek_inference_id: str,
|
|
1299
|
+
service: t.Optional[t.Union[str, t.Literal["deepseek"]]] = None,
|
|
1300
|
+
service_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
1301
|
+
chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
1302
|
+
error_trace: t.Optional[bool] = None,
|
|
1303
|
+
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
|
|
1304
|
+
human: t.Optional[bool] = None,
|
|
1305
|
+
pretty: t.Optional[bool] = None,
|
|
1306
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
1307
|
+
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
1308
|
+
) -> ObjectApiResponse[t.Any]:
|
|
1309
|
+
"""
|
|
1310
|
+
.. raw:: html
|
|
1311
|
+
|
|
1312
|
+
<p>Create a DeepSeek inference endpoint.</p>
|
|
1313
|
+
<p>Create an inference endpoint to perform an inference task with the <code>deepseek</code> service.</p>
|
|
1314
|
+
|
|
1315
|
+
|
|
1316
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-deepseek>`_
|
|
1317
|
+
|
|
1318
|
+
:param task_type: The type of the inference task that the model will perform.
|
|
1319
|
+
:param deepseek_inference_id: The unique identifier of the inference endpoint.
|
|
1320
|
+
:param service: The type of service supported for the specified task type. In
|
|
1321
|
+
this case, `deepseek`.
|
|
1322
|
+
:param service_settings: Settings used to install the inference model. These
|
|
1323
|
+
settings are specific to the `deepseek` service.
|
|
1324
|
+
:param chunking_settings: The chunking configuration object.
|
|
1325
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
1326
|
+
to be created.
|
|
1327
|
+
"""
|
|
1328
|
+
if task_type in SKIP_IN_PATH:
|
|
1329
|
+
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
1330
|
+
if deepseek_inference_id in SKIP_IN_PATH:
|
|
1331
|
+
raise ValueError("Empty value passed for parameter 'deepseek_inference_id'")
|
|
1332
|
+
if service is None and body is None:
|
|
1333
|
+
raise ValueError("Empty value passed for parameter 'service'")
|
|
1334
|
+
if service_settings is None and body is None:
|
|
1335
|
+
raise ValueError("Empty value passed for parameter 'service_settings'")
|
|
1336
|
+
__path_parts: t.Dict[str, str] = {
|
|
1337
|
+
"task_type": _quote(task_type),
|
|
1338
|
+
"deepseek_inference_id": _quote(deepseek_inference_id),
|
|
1339
|
+
}
|
|
1340
|
+
__path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["deepseek_inference_id"]}'
|
|
1341
|
+
__query: t.Dict[str, t.Any] = {}
|
|
1342
|
+
__body: t.Dict[str, t.Any] = body if body is not None else {}
|
|
1343
|
+
if error_trace is not None:
|
|
1344
|
+
__query["error_trace"] = error_trace
|
|
1345
|
+
if filter_path is not None:
|
|
1346
|
+
__query["filter_path"] = filter_path
|
|
1347
|
+
if human is not None:
|
|
1348
|
+
__query["human"] = human
|
|
1349
|
+
if pretty is not None:
|
|
1350
|
+
__query["pretty"] = pretty
|
|
1351
|
+
if timeout is not None:
|
|
1352
|
+
__query["timeout"] = timeout
|
|
1353
|
+
if not __body:
|
|
1354
|
+
if service is not None:
|
|
1355
|
+
__body["service"] = service
|
|
1356
|
+
if service_settings is not None:
|
|
1357
|
+
__body["service_settings"] = service_settings
|
|
1358
|
+
if chunking_settings is not None:
|
|
1359
|
+
__body["chunking_settings"] = chunking_settings
|
|
1360
|
+
if not __body:
|
|
1361
|
+
__body = None # type: ignore[assignment]
|
|
1362
|
+
__headers = {"accept": "application/json"}
|
|
1363
|
+
if __body is not None:
|
|
1364
|
+
__headers["content-type"] = "application/json"
|
|
1365
|
+
return await self.perform_request( # type: ignore[return-value]
|
|
1366
|
+
"PUT",
|
|
1367
|
+
__path,
|
|
1368
|
+
params=__query,
|
|
1369
|
+
headers=__headers,
|
|
1370
|
+
body=__body,
|
|
1371
|
+
endpoint_id="inference.put_deepseek",
|
|
1372
|
+
path_parts=__path_parts,
|
|
1373
|
+
)
|
|
1374
|
+
|
|
985
1375
|
@_rewrite_parameters(
|
|
986
1376
|
body_fields=(
|
|
987
1377
|
"service",
|
|
@@ -1005,6 +1395,7 @@ class InferenceClient(NamespacedClient):
|
|
|
1005
1395
|
human: t.Optional[bool] = None,
|
|
1006
1396
|
pretty: t.Optional[bool] = None,
|
|
1007
1397
|
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
1398
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
1008
1399
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
1009
1400
|
) -> ObjectApiResponse[t.Any]:
|
|
1010
1401
|
"""
|
|
@@ -1027,7 +1418,7 @@ class InferenceClient(NamespacedClient):
|
|
|
1027
1418
|
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
1028
1419
|
|
|
1029
1420
|
|
|
1030
|
-
`<https://www.elastic.co/docs/api/doc/elasticsearch/
|
|
1421
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-elasticsearch>`_
|
|
1031
1422
|
|
|
1032
1423
|
:param task_type: The type of the inference task that the model will perform.
|
|
1033
1424
|
:param elasticsearch_inference_id: The unique identifier of the inference endpoint.
|
|
@@ -1039,6 +1430,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1039
1430
|
:param chunking_settings: The chunking configuration object.
|
|
1040
1431
|
:param task_settings: Settings to configure the inference task. These settings
|
|
1041
1432
|
are specific to the task type you specified.
|
|
1433
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
1434
|
+
to be created.
|
|
1042
1435
|
"""
|
|
1043
1436
|
if task_type in SKIP_IN_PATH:
|
|
1044
1437
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -1065,6 +1458,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1065
1458
|
__query["human"] = human
|
|
1066
1459
|
if pretty is not None:
|
|
1067
1460
|
__query["pretty"] = pretty
|
|
1461
|
+
if timeout is not None:
|
|
1462
|
+
__query["timeout"] = timeout
|
|
1068
1463
|
if not __body:
|
|
1069
1464
|
if service is not None:
|
|
1070
1465
|
__body["service"] = service
|
|
@@ -1104,6 +1499,7 @@ class InferenceClient(NamespacedClient):
|
|
|
1104
1499
|
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
|
|
1105
1500
|
human: t.Optional[bool] = None,
|
|
1106
1501
|
pretty: t.Optional[bool] = None,
|
|
1502
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
1107
1503
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
1108
1504
|
) -> ObjectApiResponse[t.Any]:
|
|
1109
1505
|
"""
|
|
@@ -1127,7 +1523,7 @@ class InferenceClient(NamespacedClient):
|
|
|
1127
1523
|
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
1128
1524
|
|
|
1129
1525
|
|
|
1130
|
-
`<https://www.elastic.co/docs/api/doc/elasticsearch/
|
|
1526
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-elser>`_
|
|
1131
1527
|
|
|
1132
1528
|
:param task_type: The type of the inference task that the model will perform.
|
|
1133
1529
|
:param elser_inference_id: The unique identifier of the inference endpoint.
|
|
@@ -1136,6 +1532,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1136
1532
|
:param service_settings: Settings used to install the inference model. These
|
|
1137
1533
|
settings are specific to the `elser` service.
|
|
1138
1534
|
:param chunking_settings: The chunking configuration object.
|
|
1535
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
1536
|
+
to be created.
|
|
1139
1537
|
"""
|
|
1140
1538
|
if task_type in SKIP_IN_PATH:
|
|
1141
1539
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -1160,6 +1558,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1160
1558
|
__query["human"] = human
|
|
1161
1559
|
if pretty is not None:
|
|
1162
1560
|
__query["pretty"] = pretty
|
|
1561
|
+
if timeout is not None:
|
|
1562
|
+
__query["timeout"] = timeout
|
|
1163
1563
|
if not __body:
|
|
1164
1564
|
if service is not None:
|
|
1165
1565
|
__body["service"] = service
|
|
@@ -1197,6 +1597,7 @@ class InferenceClient(NamespacedClient):
|
|
|
1197
1597
|
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
|
|
1198
1598
|
human: t.Optional[bool] = None,
|
|
1199
1599
|
pretty: t.Optional[bool] = None,
|
|
1600
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
1200
1601
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
1201
1602
|
) -> ObjectApiResponse[t.Any]:
|
|
1202
1603
|
"""
|
|
@@ -1206,7 +1607,7 @@ class InferenceClient(NamespacedClient):
|
|
|
1206
1607
|
<p>Create an inference endpoint to perform an inference task with the <code>googleaistudio</code> service.</p>
|
|
1207
1608
|
|
|
1208
1609
|
|
|
1209
|
-
`<https://www.elastic.co/docs/api/doc/elasticsearch/
|
|
1610
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-googleaistudio>`_
|
|
1210
1611
|
|
|
1211
1612
|
:param task_type: The type of the inference task that the model will perform.
|
|
1212
1613
|
:param googleaistudio_inference_id: The unique identifier of the inference endpoint.
|
|
@@ -1215,6 +1616,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1215
1616
|
:param service_settings: Settings used to install the inference model. These
|
|
1216
1617
|
settings are specific to the `googleaistudio` service.
|
|
1217
1618
|
:param chunking_settings: The chunking configuration object.
|
|
1619
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
1620
|
+
to be created.
|
|
1218
1621
|
"""
|
|
1219
1622
|
if task_type in SKIP_IN_PATH:
|
|
1220
1623
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -1241,6 +1644,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1241
1644
|
__query["human"] = human
|
|
1242
1645
|
if pretty is not None:
|
|
1243
1646
|
__query["pretty"] = pretty
|
|
1647
|
+
if timeout is not None:
|
|
1648
|
+
__query["timeout"] = timeout
|
|
1244
1649
|
if not __body:
|
|
1245
1650
|
if service is not None:
|
|
1246
1651
|
__body["service"] = service
|
|
@@ -1274,7 +1679,9 @@ class InferenceClient(NamespacedClient):
|
|
|
1274
1679
|
async def put_googlevertexai(
|
|
1275
1680
|
self,
|
|
1276
1681
|
*,
|
|
1277
|
-
task_type: t.Union[
|
|
1682
|
+
task_type: t.Union[
|
|
1683
|
+
str, t.Literal["chat_completion", "completion", "rerank", "text_embedding"]
|
|
1684
|
+
],
|
|
1278
1685
|
googlevertexai_inference_id: str,
|
|
1279
1686
|
service: t.Optional[t.Union[str, t.Literal["googlevertexai"]]] = None,
|
|
1280
1687
|
service_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
@@ -1284,6 +1691,7 @@ class InferenceClient(NamespacedClient):
|
|
|
1284
1691
|
human: t.Optional[bool] = None,
|
|
1285
1692
|
pretty: t.Optional[bool] = None,
|
|
1286
1693
|
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
1694
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
1287
1695
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
1288
1696
|
) -> ObjectApiResponse[t.Any]:
|
|
1289
1697
|
"""
|
|
@@ -1293,7 +1701,7 @@ class InferenceClient(NamespacedClient):
|
|
|
1293
1701
|
<p>Create an inference endpoint to perform an inference task with the <code>googlevertexai</code> service.</p>
|
|
1294
1702
|
|
|
1295
1703
|
|
|
1296
|
-
`<https://www.elastic.co/docs/api/doc/elasticsearch/
|
|
1704
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-googlevertexai>`_
|
|
1297
1705
|
|
|
1298
1706
|
:param task_type: The type of the inference task that the model will perform.
|
|
1299
1707
|
:param googlevertexai_inference_id: The unique identifier of the inference endpoint.
|
|
@@ -1304,6 +1712,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1304
1712
|
:param chunking_settings: The chunking configuration object.
|
|
1305
1713
|
:param task_settings: Settings to configure the inference task. These settings
|
|
1306
1714
|
are specific to the task type you specified.
|
|
1715
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
1716
|
+
to be created.
|
|
1307
1717
|
"""
|
|
1308
1718
|
if task_type in SKIP_IN_PATH:
|
|
1309
1719
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -1330,6 +1740,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1330
1740
|
__query["human"] = human
|
|
1331
1741
|
if pretty is not None:
|
|
1332
1742
|
__query["pretty"] = pretty
|
|
1743
|
+
if timeout is not None:
|
|
1744
|
+
__query["timeout"] = timeout
|
|
1333
1745
|
if not __body:
|
|
1334
1746
|
if service is not None:
|
|
1335
1747
|
__body["service"] = service
|
|
@@ -1355,12 +1767,19 @@ class InferenceClient(NamespacedClient):
|
|
|
1355
1767
|
)
|
|
1356
1768
|
|
|
1357
1769
|
@_rewrite_parameters(
|
|
1358
|
-
body_fields=(
|
|
1770
|
+
body_fields=(
|
|
1771
|
+
"service",
|
|
1772
|
+
"service_settings",
|
|
1773
|
+
"chunking_settings",
|
|
1774
|
+
"task_settings",
|
|
1775
|
+
),
|
|
1359
1776
|
)
|
|
1360
1777
|
async def put_hugging_face(
|
|
1361
1778
|
self,
|
|
1362
1779
|
*,
|
|
1363
|
-
task_type: t.Union[
|
|
1780
|
+
task_type: t.Union[
|
|
1781
|
+
str, t.Literal["chat_completion", "completion", "rerank", "text_embedding"]
|
|
1782
|
+
],
|
|
1364
1783
|
huggingface_inference_id: str,
|
|
1365
1784
|
service: t.Optional[t.Union[str, t.Literal["hugging_face"]]] = None,
|
|
1366
1785
|
service_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
@@ -1369,17 +1788,22 @@ class InferenceClient(NamespacedClient):
|
|
|
1369
1788
|
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
|
|
1370
1789
|
human: t.Optional[bool] = None,
|
|
1371
1790
|
pretty: t.Optional[bool] = None,
|
|
1791
|
+
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
1792
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
1372
1793
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
1373
1794
|
) -> ObjectApiResponse[t.Any]:
|
|
1374
1795
|
"""
|
|
1375
1796
|
.. raw:: html
|
|
1376
1797
|
|
|
1377
1798
|
<p>Create a Hugging Face inference endpoint.</p>
|
|
1378
|
-
<p>Create an inference endpoint to perform an inference task with the <code>hugging_face</code> service
|
|
1379
|
-
|
|
1380
|
-
|
|
1381
|
-
|
|
1382
|
-
<p>
|
|
1799
|
+
<p>Create an inference endpoint to perform an inference task with the <code>hugging_face</code> service.
|
|
1800
|
+
Supported tasks include: <code>text_embedding</code>, <code>completion</code>, and <code>chat_completion</code>.</p>
|
|
1801
|
+
<p>To configure the endpoint, first visit the Hugging Face Inference Endpoints page and create a new endpoint.
|
|
1802
|
+
Select a model that supports the task you intend to use.</p>
|
|
1803
|
+
<p>For Elastic's <code>text_embedding</code> task:
|
|
1804
|
+
The selected model must support the <code>Sentence Embeddings</code> task. On the new endpoint creation page, select the <code>Sentence Embeddings</code> task under the <code>Advanced Configuration</code> section.
|
|
1805
|
+
After the endpoint has initialized, copy the generated endpoint URL.
|
|
1806
|
+
Recommended models for <code>text_embedding</code> task:</p>
|
|
1383
1807
|
<ul>
|
|
1384
1808
|
<li><code>all-MiniLM-L6-v2</code></li>
|
|
1385
1809
|
<li><code>all-MiniLM-L12-v2</code></li>
|
|
@@ -1389,9 +1813,27 @@ class InferenceClient(NamespacedClient):
|
|
|
1389
1813
|
<li><code>multilingual-e5-base</code></li>
|
|
1390
1814
|
<li><code>multilingual-e5-small</code></li>
|
|
1391
1815
|
</ul>
|
|
1816
|
+
<p>For Elastic's <code>chat_completion</code> and <code>completion</code> tasks:
|
|
1817
|
+
The selected model must support the <code>Text Generation</code> task and expose OpenAI API. HuggingFace supports both serverless and dedicated endpoints for <code>Text Generation</code>. When creating dedicated endpoint select the <code>Text Generation</code> task.
|
|
1818
|
+
After the endpoint is initialized (for dedicated) or ready (for serverless), ensure it supports the OpenAI API and includes <code>/v1/chat/completions</code> part in URL. Then, copy the full endpoint URL for use.
|
|
1819
|
+
Recommended models for <code>chat_completion</code> and <code>completion</code> tasks:</p>
|
|
1820
|
+
<ul>
|
|
1821
|
+
<li><code>Mistral-7B-Instruct-v0.2</code></li>
|
|
1822
|
+
<li><code>QwQ-32B</code></li>
|
|
1823
|
+
<li><code>Phi-3-mini-128k-instruct</code></li>
|
|
1824
|
+
</ul>
|
|
1825
|
+
<p>For Elastic's <code>rerank</code> task:
|
|
1826
|
+
The selected model must support the <code>sentence-ranking</code> task and expose OpenAI API.
|
|
1827
|
+
HuggingFace supports only dedicated (not serverless) endpoints for <code>Rerank</code> so far.
|
|
1828
|
+
After the endpoint is initialized, copy the full endpoint URL for use.
|
|
1829
|
+
Tested models for <code>rerank</code> task:</p>
|
|
1830
|
+
<ul>
|
|
1831
|
+
<li><code>bge-reranker-base</code></li>
|
|
1832
|
+
<li><code>jina-reranker-v1-turbo-en-GGUF</code></li>
|
|
1833
|
+
</ul>
|
|
1392
1834
|
|
|
1393
1835
|
|
|
1394
|
-
`<https://www.elastic.co/docs/api/doc/elasticsearch/
|
|
1836
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-hugging-face>`_
|
|
1395
1837
|
|
|
1396
1838
|
:param task_type: The type of the inference task that the model will perform.
|
|
1397
1839
|
:param huggingface_inference_id: The unique identifier of the inference endpoint.
|
|
@@ -1400,6 +1842,10 @@ class InferenceClient(NamespacedClient):
|
|
|
1400
1842
|
:param service_settings: Settings used to install the inference model. These
|
|
1401
1843
|
settings are specific to the `hugging_face` service.
|
|
1402
1844
|
:param chunking_settings: The chunking configuration object.
|
|
1845
|
+
:param task_settings: Settings to configure the inference task. These settings
|
|
1846
|
+
are specific to the task type you specified.
|
|
1847
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
1848
|
+
to be created.
|
|
1403
1849
|
"""
|
|
1404
1850
|
if task_type in SKIP_IN_PATH:
|
|
1405
1851
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -1426,6 +1872,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1426
1872
|
__query["human"] = human
|
|
1427
1873
|
if pretty is not None:
|
|
1428
1874
|
__query["pretty"] = pretty
|
|
1875
|
+
if timeout is not None:
|
|
1876
|
+
__query["timeout"] = timeout
|
|
1429
1877
|
if not __body:
|
|
1430
1878
|
if service is not None:
|
|
1431
1879
|
__body["service"] = service
|
|
@@ -1433,6 +1881,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1433
1881
|
__body["service_settings"] = service_settings
|
|
1434
1882
|
if chunking_settings is not None:
|
|
1435
1883
|
__body["chunking_settings"] = chunking_settings
|
|
1884
|
+
if task_settings is not None:
|
|
1885
|
+
__body["task_settings"] = task_settings
|
|
1436
1886
|
if not __body:
|
|
1437
1887
|
__body = None # type: ignore[assignment]
|
|
1438
1888
|
__headers = {"accept": "application/json"}
|
|
@@ -1469,6 +1919,7 @@ class InferenceClient(NamespacedClient):
|
|
|
1469
1919
|
human: t.Optional[bool] = None,
|
|
1470
1920
|
pretty: t.Optional[bool] = None,
|
|
1471
1921
|
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
1922
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
1472
1923
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
1473
1924
|
) -> ObjectApiResponse[t.Any]:
|
|
1474
1925
|
"""
|
|
@@ -1480,7 +1931,7 @@ class InferenceClient(NamespacedClient):
|
|
|
1480
1931
|
To review the available <code>text_embedding</code> models, refer to the <a href="https://jina.ai/embeddings/">https://jina.ai/embeddings/</a>.</p>
|
|
1481
1932
|
|
|
1482
1933
|
|
|
1483
|
-
`<https://www.elastic.co/docs/api/doc/elasticsearch/
|
|
1934
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-jinaai>`_
|
|
1484
1935
|
|
|
1485
1936
|
:param task_type: The type of the inference task that the model will perform.
|
|
1486
1937
|
:param jinaai_inference_id: The unique identifier of the inference endpoint.
|
|
@@ -1491,6 +1942,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1491
1942
|
:param chunking_settings: The chunking configuration object.
|
|
1492
1943
|
:param task_settings: Settings to configure the inference task. These settings
|
|
1493
1944
|
are specific to the task type you specified.
|
|
1945
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
1946
|
+
to be created.
|
|
1494
1947
|
"""
|
|
1495
1948
|
if task_type in SKIP_IN_PATH:
|
|
1496
1949
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -1515,6 +1968,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1515
1968
|
__query["human"] = human
|
|
1516
1969
|
if pretty is not None:
|
|
1517
1970
|
__query["pretty"] = pretty
|
|
1971
|
+
if timeout is not None:
|
|
1972
|
+
__query["timeout"] = timeout
|
|
1518
1973
|
if not __body:
|
|
1519
1974
|
if service is not None:
|
|
1520
1975
|
__body["service"] = service
|
|
@@ -1545,7 +2000,9 @@ class InferenceClient(NamespacedClient):
|
|
|
1545
2000
|
async def put_mistral(
|
|
1546
2001
|
self,
|
|
1547
2002
|
*,
|
|
1548
|
-
task_type: t.Union[
|
|
2003
|
+
task_type: t.Union[
|
|
2004
|
+
str, t.Literal["chat_completion", "completion", "text_embedding"]
|
|
2005
|
+
],
|
|
1549
2006
|
mistral_inference_id: str,
|
|
1550
2007
|
service: t.Optional[t.Union[str, t.Literal["mistral"]]] = None,
|
|
1551
2008
|
service_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
@@ -1554,25 +2011,27 @@ class InferenceClient(NamespacedClient):
|
|
|
1554
2011
|
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
|
|
1555
2012
|
human: t.Optional[bool] = None,
|
|
1556
2013
|
pretty: t.Optional[bool] = None,
|
|
2014
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
1557
2015
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
1558
2016
|
) -> ObjectApiResponse[t.Any]:
|
|
1559
2017
|
"""
|
|
1560
2018
|
.. raw:: html
|
|
1561
2019
|
|
|
1562
2020
|
<p>Create a Mistral inference endpoint.</p>
|
|
1563
|
-
<p>
|
|
2021
|
+
<p>Create an inference endpoint to perform an inference task with the <code>mistral</code> service.</p>
|
|
1564
2022
|
|
|
1565
2023
|
|
|
1566
|
-
`<https://www.elastic.co/docs/api/doc/elasticsearch/
|
|
2024
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-mistral>`_
|
|
1567
2025
|
|
|
1568
|
-
:param task_type: The
|
|
1569
|
-
is `text_embedding`.
|
|
2026
|
+
:param task_type: The type of the inference task that the model will perform.
|
|
1570
2027
|
:param mistral_inference_id: The unique identifier of the inference endpoint.
|
|
1571
2028
|
:param service: The type of service supported for the specified task type. In
|
|
1572
2029
|
this case, `mistral`.
|
|
1573
2030
|
:param service_settings: Settings used to install the inference model. These
|
|
1574
2031
|
settings are specific to the `mistral` service.
|
|
1575
2032
|
:param chunking_settings: The chunking configuration object.
|
|
2033
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
2034
|
+
to be created.
|
|
1576
2035
|
"""
|
|
1577
2036
|
if task_type in SKIP_IN_PATH:
|
|
1578
2037
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -1597,6 +2056,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1597
2056
|
__query["human"] = human
|
|
1598
2057
|
if pretty is not None:
|
|
1599
2058
|
__query["pretty"] = pretty
|
|
2059
|
+
if timeout is not None:
|
|
2060
|
+
__query["timeout"] = timeout
|
|
1600
2061
|
if not __body:
|
|
1601
2062
|
if service is not None:
|
|
1602
2063
|
__body["service"] = service
|
|
@@ -1642,6 +2103,7 @@ class InferenceClient(NamespacedClient):
|
|
|
1642
2103
|
human: t.Optional[bool] = None,
|
|
1643
2104
|
pretty: t.Optional[bool] = None,
|
|
1644
2105
|
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
2106
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
1645
2107
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
1646
2108
|
) -> ObjectApiResponse[t.Any]:
|
|
1647
2109
|
"""
|
|
@@ -1651,7 +2113,7 @@ class InferenceClient(NamespacedClient):
|
|
|
1651
2113
|
<p>Create an inference endpoint to perform an inference task with the <code>openai</code> service or <code>openai</code> compatible APIs.</p>
|
|
1652
2114
|
|
|
1653
2115
|
|
|
1654
|
-
`<https://www.elastic.co/docs/api/doc/elasticsearch/
|
|
2116
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-openai>`_
|
|
1655
2117
|
|
|
1656
2118
|
:param task_type: The type of the inference task that the model will perform.
|
|
1657
2119
|
NOTE: The `chat_completion` task type only supports streaming and only through
|
|
@@ -1664,6 +2126,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1664
2126
|
:param chunking_settings: The chunking configuration object.
|
|
1665
2127
|
:param task_settings: Settings to configure the inference task. These settings
|
|
1666
2128
|
are specific to the task type you specified.
|
|
2129
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
2130
|
+
to be created.
|
|
1667
2131
|
"""
|
|
1668
2132
|
if task_type in SKIP_IN_PATH:
|
|
1669
2133
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -1688,6 +2152,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1688
2152
|
__query["human"] = human
|
|
1689
2153
|
if pretty is not None:
|
|
1690
2154
|
__query["pretty"] = pretty
|
|
2155
|
+
if timeout is not None:
|
|
2156
|
+
__query["timeout"] = timeout
|
|
1691
2157
|
if not __body:
|
|
1692
2158
|
if service is not None:
|
|
1693
2159
|
__body["service"] = service
|
|
@@ -1733,6 +2199,7 @@ class InferenceClient(NamespacedClient):
|
|
|
1733
2199
|
human: t.Optional[bool] = None,
|
|
1734
2200
|
pretty: t.Optional[bool] = None,
|
|
1735
2201
|
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
2202
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
1736
2203
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
1737
2204
|
) -> ObjectApiResponse[t.Any]:
|
|
1738
2205
|
"""
|
|
@@ -1743,7 +2210,7 @@ class InferenceClient(NamespacedClient):
|
|
|
1743
2210
|
<p>Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
1744
2211
|
|
|
1745
2212
|
|
|
1746
|
-
`<https://www.elastic.co/docs/api/doc/elasticsearch/
|
|
2213
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-voyageai>`_
|
|
1747
2214
|
|
|
1748
2215
|
:param task_type: The type of the inference task that the model will perform.
|
|
1749
2216
|
:param voyageai_inference_id: The unique identifier of the inference endpoint.
|
|
@@ -1754,6 +2221,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1754
2221
|
:param chunking_settings: The chunking configuration object.
|
|
1755
2222
|
:param task_settings: Settings to configure the inference task. These settings
|
|
1756
2223
|
are specific to the task type you specified.
|
|
2224
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
2225
|
+
to be created.
|
|
1757
2226
|
"""
|
|
1758
2227
|
if task_type in SKIP_IN_PATH:
|
|
1759
2228
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -1778,6 +2247,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1778
2247
|
__query["human"] = human
|
|
1779
2248
|
if pretty is not None:
|
|
1780
2249
|
__query["pretty"] = pretty
|
|
2250
|
+
if timeout is not None:
|
|
2251
|
+
__query["timeout"] = timeout
|
|
1781
2252
|
if not __body:
|
|
1782
2253
|
if service is not None:
|
|
1783
2254
|
__body["service"] = service
|
|
@@ -1808,7 +2279,9 @@ class InferenceClient(NamespacedClient):
|
|
|
1808
2279
|
async def put_watsonx(
|
|
1809
2280
|
self,
|
|
1810
2281
|
*,
|
|
1811
|
-
task_type: t.Union[
|
|
2282
|
+
task_type: t.Union[
|
|
2283
|
+
str, t.Literal["chat_completion", "completion", "text_embedding"]
|
|
2284
|
+
],
|
|
1812
2285
|
watsonx_inference_id: str,
|
|
1813
2286
|
service: t.Optional[t.Union[str, t.Literal["watsonxai"]]] = None,
|
|
1814
2287
|
service_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
@@ -1816,6 +2289,7 @@ class InferenceClient(NamespacedClient):
|
|
|
1816
2289
|
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
|
|
1817
2290
|
human: t.Optional[bool] = None,
|
|
1818
2291
|
pretty: t.Optional[bool] = None,
|
|
2292
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
1819
2293
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
1820
2294
|
) -> ObjectApiResponse[t.Any]:
|
|
1821
2295
|
"""
|
|
@@ -1827,15 +2301,16 @@ class InferenceClient(NamespacedClient):
|
|
|
1827
2301
|
You can provision one through the IBM catalog, the Cloud Databases CLI plug-in, the Cloud Databases API, or Terraform.</p>
|
|
1828
2302
|
|
|
1829
2303
|
|
|
1830
|
-
`<https://www.elastic.co/docs/api/doc/elasticsearch/
|
|
2304
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-watsonx>`_
|
|
1831
2305
|
|
|
1832
|
-
:param task_type: The
|
|
1833
|
-
is `text_embedding`.
|
|
2306
|
+
:param task_type: The type of the inference task that the model will perform.
|
|
1834
2307
|
:param watsonx_inference_id: The unique identifier of the inference endpoint.
|
|
1835
2308
|
:param service: The type of service supported for the specified task type. In
|
|
1836
2309
|
this case, `watsonxai`.
|
|
1837
2310
|
:param service_settings: Settings used to install the inference model. These
|
|
1838
2311
|
settings are specific to the `watsonxai` service.
|
|
2312
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
2313
|
+
to be created.
|
|
1839
2314
|
"""
|
|
1840
2315
|
if task_type in SKIP_IN_PATH:
|
|
1841
2316
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -1860,6 +2335,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1860
2335
|
__query["human"] = human
|
|
1861
2336
|
if pretty is not None:
|
|
1862
2337
|
__query["pretty"] = pretty
|
|
2338
|
+
if timeout is not None:
|
|
2339
|
+
__query["timeout"] = timeout
|
|
1863
2340
|
if not __body:
|
|
1864
2341
|
if service is not None:
|
|
1865
2342
|
__body["service"] = service
|
|
@@ -1900,10 +2377,10 @@ class InferenceClient(NamespacedClient):
|
|
|
1900
2377
|
"""
|
|
1901
2378
|
.. raw:: html
|
|
1902
2379
|
|
|
1903
|
-
<p>Perform
|
|
2380
|
+
<p>Perform reranking inference on the service</p>
|
|
1904
2381
|
|
|
1905
2382
|
|
|
1906
|
-
`<https://www.elastic.co/docs/api/doc/elasticsearch/
|
|
2383
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-inference>`_
|
|
1907
2384
|
|
|
1908
2385
|
:param inference_id: The unique identifier for the inference endpoint.
|
|
1909
2386
|
:param input: The text on which you want to perform the inference task. It can
|
|
@@ -1979,7 +2456,7 @@ class InferenceClient(NamespacedClient):
|
|
|
1979
2456
|
<p>Perform sparse embedding inference on the service</p>
|
|
1980
2457
|
|
|
1981
2458
|
|
|
1982
|
-
`<https://www.elastic.co/docs/api/doc/elasticsearch/
|
|
2459
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-inference>`_
|
|
1983
2460
|
|
|
1984
2461
|
:param inference_id: The inference Id
|
|
1985
2462
|
:param input: Inference input. Either a string or an array of strings.
|
|
@@ -2047,7 +2524,7 @@ class InferenceClient(NamespacedClient):
|
|
|
2047
2524
|
<p>Perform text embedding inference on the service</p>
|
|
2048
2525
|
|
|
2049
2526
|
|
|
2050
|
-
`<https://www.elastic.co/docs/api/doc/elasticsearch/
|
|
2527
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-inference>`_
|
|
2051
2528
|
|
|
2052
2529
|
:param inference_id: The inference Id
|
|
2053
2530
|
:param input: Inference input. Either a string or an array of strings.
|
|
@@ -2129,7 +2606,7 @@ class InferenceClient(NamespacedClient):
|
|
|
2129
2606
|
However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.</p>
|
|
2130
2607
|
|
|
2131
2608
|
|
|
2132
|
-
`<https://www.elastic.co/docs/api/doc/elasticsearch/
|
|
2609
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-update>`_
|
|
2133
2610
|
|
|
2134
2611
|
:param inference_id: The unique identifier of the inference endpoint.
|
|
2135
2612
|
:param inference_config:
|