elasticsearch9 9.0.4__py3-none-any.whl → 9.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- elasticsearch9/__init__.py +3 -3
- elasticsearch9/_async/client/__init__.py +42 -42
- elasticsearch9/_async/client/async_search.py +4 -4
- elasticsearch9/_async/client/autoscaling.py +4 -4
- elasticsearch9/_async/client/cat.py +26 -26
- elasticsearch9/_async/client/ccr.py +13 -13
- elasticsearch9/_async/client/cluster.py +25 -20
- elasticsearch9/_async/client/connector.py +30 -30
- elasticsearch9/_async/client/dangling_indices.py +3 -3
- elasticsearch9/_async/client/enrich.py +5 -5
- elasticsearch9/_async/client/eql.py +4 -4
- elasticsearch9/_async/client/esql.py +112 -5
- elasticsearch9/_async/client/features.py +2 -2
- elasticsearch9/_async/client/fleet.py +3 -3
- elasticsearch9/_async/client/graph.py +1 -1
- elasticsearch9/_async/client/ilm.py +11 -11
- elasticsearch9/_async/client/indices.py +504 -69
- elasticsearch9/_async/client/inference.py +419 -46
- elasticsearch9/_async/client/ingest.py +9 -9
- elasticsearch9/_async/client/license.py +7 -7
- elasticsearch9/_async/client/logstash.py +3 -3
- elasticsearch9/_async/client/migration.py +3 -3
- elasticsearch9/_async/client/ml.py +72 -73
- elasticsearch9/_async/client/nodes.py +7 -7
- elasticsearch9/_async/client/query_rules.py +8 -8
- elasticsearch9/_async/client/rollup.py +8 -8
- elasticsearch9/_async/client/search_application.py +10 -10
- elasticsearch9/_async/client/searchable_snapshots.py +4 -4
- elasticsearch9/_async/client/security.py +68 -64
- elasticsearch9/_async/client/shutdown.py +3 -3
- elasticsearch9/_async/client/simulate.py +1 -1
- elasticsearch9/_async/client/slm.py +9 -9
- elasticsearch9/_async/client/snapshot.py +58 -21
- elasticsearch9/_async/client/sql.py +6 -6
- elasticsearch9/_async/client/ssl.py +1 -1
- elasticsearch9/_async/client/synonyms.py +25 -7
- elasticsearch9/_async/client/tasks.py +4 -4
- elasticsearch9/_async/client/text_structure.py +4 -4
- elasticsearch9/_async/client/transform.py +11 -11
- elasticsearch9/_async/client/watcher.py +13 -13
- elasticsearch9/_async/client/xpack.py +2 -2
- elasticsearch9/_otel.py +8 -8
- elasticsearch9/_sync/client/__init__.py +42 -42
- elasticsearch9/_sync/client/async_search.py +4 -4
- elasticsearch9/_sync/client/autoscaling.py +4 -4
- elasticsearch9/_sync/client/cat.py +26 -26
- elasticsearch9/_sync/client/ccr.py +13 -13
- elasticsearch9/_sync/client/cluster.py +25 -20
- elasticsearch9/_sync/client/connector.py +30 -30
- elasticsearch9/_sync/client/dangling_indices.py +3 -3
- elasticsearch9/_sync/client/enrich.py +5 -5
- elasticsearch9/_sync/client/eql.py +4 -4
- elasticsearch9/_sync/client/esql.py +112 -5
- elasticsearch9/_sync/client/features.py +2 -2
- elasticsearch9/_sync/client/fleet.py +3 -3
- elasticsearch9/_sync/client/graph.py +1 -1
- elasticsearch9/_sync/client/ilm.py +11 -11
- elasticsearch9/_sync/client/indices.py +504 -69
- elasticsearch9/_sync/client/inference.py +419 -46
- elasticsearch9/_sync/client/ingest.py +9 -9
- elasticsearch9/_sync/client/license.py +7 -7
- elasticsearch9/_sync/client/logstash.py +3 -3
- elasticsearch9/_sync/client/migration.py +3 -3
- elasticsearch9/_sync/client/ml.py +72 -73
- elasticsearch9/_sync/client/nodes.py +7 -7
- elasticsearch9/_sync/client/query_rules.py +8 -8
- elasticsearch9/_sync/client/rollup.py +8 -8
- elasticsearch9/_sync/client/search_application.py +10 -10
- elasticsearch9/_sync/client/searchable_snapshots.py +4 -4
- elasticsearch9/_sync/client/security.py +68 -64
- elasticsearch9/_sync/client/shutdown.py +3 -3
- elasticsearch9/_sync/client/simulate.py +1 -1
- elasticsearch9/_sync/client/slm.py +9 -9
- elasticsearch9/_sync/client/snapshot.py +58 -21
- elasticsearch9/_sync/client/sql.py +6 -6
- elasticsearch9/_sync/client/ssl.py +1 -1
- elasticsearch9/_sync/client/synonyms.py +25 -7
- elasticsearch9/_sync/client/tasks.py +4 -4
- elasticsearch9/_sync/client/text_structure.py +4 -4
- elasticsearch9/_sync/client/transform.py +11 -11
- elasticsearch9/_sync/client/watcher.py +13 -13
- elasticsearch9/_sync/client/xpack.py +2 -2
- elasticsearch9/_version.py +1 -1
- elasticsearch9/dsl/aggs.py +20 -0
- elasticsearch9/dsl/document_base.py +2 -3
- elasticsearch9/dsl/field.py +18 -0
- elasticsearch9/dsl/query.py +1 -1
- elasticsearch9/dsl/response/__init__.py +1 -1
- elasticsearch9/dsl/types.py +163 -5
- {elasticsearch9-9.0.4.dist-info → elasticsearch9-9.1.1.dist-info}/METADATA +3 -4
- elasticsearch9-9.1.1.dist-info/RECORD +163 -0
- elasticsearch9-9.0.4.dist-info/RECORD +0 -163
- {elasticsearch9-9.0.4.dist-info → elasticsearch9-9.1.1.dist-info}/WHEEL +0 -0
- {elasticsearch9-9.0.4.dist-info → elasticsearch9-9.1.1.dist-info}/licenses/LICENSE +0 -0
- {elasticsearch9-9.0.4.dist-info → elasticsearch9-9.1.1.dist-info}/licenses/NOTICE +0 -0
|
@@ -47,7 +47,7 @@ class InferenceClient(NamespacedClient):
|
|
|
47
47
|
<p>Perform completion inference on the service</p>
|
|
48
48
|
|
|
49
49
|
|
|
50
|
-
`<https://www.elastic.co/docs/api/doc/elasticsearch/
|
|
50
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-inference>`_
|
|
51
51
|
|
|
52
52
|
:param inference_id: The inference Id
|
|
53
53
|
:param input: Inference input. Either a string or an array of strings.
|
|
@@ -123,7 +123,7 @@ class InferenceClient(NamespacedClient):
|
|
|
123
123
|
<p>Delete an inference endpoint</p>
|
|
124
124
|
|
|
125
125
|
|
|
126
|
-
`<https://www.elastic.co/docs/api/doc/elasticsearch/
|
|
126
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-delete>`_
|
|
127
127
|
|
|
128
128
|
:param inference_id: The inference identifier.
|
|
129
129
|
:param task_type: The task type
|
|
@@ -197,7 +197,7 @@ class InferenceClient(NamespacedClient):
|
|
|
197
197
|
<p>Get an inference endpoint</p>
|
|
198
198
|
|
|
199
199
|
|
|
200
|
-
`<https://www.elastic.co/docs/api/doc/elasticsearch/
|
|
200
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-get>`_
|
|
201
201
|
|
|
202
202
|
:param task_type: The task type
|
|
203
203
|
:param inference_id: The inference Id
|
|
@@ -235,7 +235,7 @@ class InferenceClient(NamespacedClient):
|
|
|
235
235
|
)
|
|
236
236
|
|
|
237
237
|
@_rewrite_parameters(
|
|
238
|
-
body_fields=("input", "query", "task_settings"),
|
|
238
|
+
body_fields=("input", "input_type", "query", "task_settings"),
|
|
239
239
|
)
|
|
240
240
|
async def inference(
|
|
241
241
|
self,
|
|
@@ -257,6 +257,7 @@ class InferenceClient(NamespacedClient):
|
|
|
257
257
|
error_trace: t.Optional[bool] = None,
|
|
258
258
|
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
|
|
259
259
|
human: t.Optional[bool] = None,
|
|
260
|
+
input_type: t.Optional[str] = None,
|
|
260
261
|
pretty: t.Optional[bool] = None,
|
|
261
262
|
query: t.Optional[str] = None,
|
|
262
263
|
task_settings: t.Optional[t.Any] = None,
|
|
@@ -277,13 +278,22 @@ class InferenceClient(NamespacedClient):
|
|
|
277
278
|
</blockquote>
|
|
278
279
|
|
|
279
280
|
|
|
280
|
-
`<https://www.elastic.co/docs/api/doc/elasticsearch/
|
|
281
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-inference>`_
|
|
281
282
|
|
|
282
283
|
:param inference_id: The unique identifier for the inference endpoint.
|
|
283
284
|
:param input: The text on which you want to perform the inference task. It can
|
|
284
285
|
be a single string or an array. > info > Inference endpoints for the `completion`
|
|
285
286
|
task type currently only support a single string as input.
|
|
286
287
|
:param task_type: The type of inference task that the model performs.
|
|
288
|
+
:param input_type: Specifies the input data type for the text embedding model.
|
|
289
|
+
The `input_type` parameter only applies to Inference Endpoints with the `text_embedding`
|
|
290
|
+
task type. Possible values include: * `SEARCH` * `INGEST` * `CLASSIFICATION`
|
|
291
|
+
* `CLUSTERING` Not all services support all values. Unsupported values will
|
|
292
|
+
trigger a validation exception. Accepted values depend on the configured
|
|
293
|
+
inference service, refer to the relevant service-specific documentation for
|
|
294
|
+
more info. > info > The `input_type` parameter specified on the root level
|
|
295
|
+
of the request body will take precedence over the `input_type` parameter
|
|
296
|
+
specified in `task_settings`.
|
|
287
297
|
:param query: The query input, which is required only for the `rerank` task.
|
|
288
298
|
It is not required for other tasks.
|
|
289
299
|
:param task_settings: Task settings for the individual inference request. These
|
|
@@ -322,6 +332,8 @@ class InferenceClient(NamespacedClient):
|
|
|
322
332
|
if not __body:
|
|
323
333
|
if input is not None:
|
|
324
334
|
__body["input"] = input
|
|
335
|
+
if input_type is not None:
|
|
336
|
+
__body["input_type"] = input_type
|
|
325
337
|
if query is not None:
|
|
326
338
|
__body["query"] = query
|
|
327
339
|
if task_settings is not None:
|
|
@@ -379,24 +391,27 @@ class InferenceClient(NamespacedClient):
|
|
|
379
391
|
<ul>
|
|
380
392
|
<li>AlibabaCloud AI Search (<code>completion</code>, <code>rerank</code>, <code>sparse_embedding</code>, <code>text_embedding</code>)</li>
|
|
381
393
|
<li>Amazon Bedrock (<code>completion</code>, <code>text_embedding</code>)</li>
|
|
394
|
+
<li>Amazon SageMaker (<code>chat_completion</code>, <code>completion</code>, <code>rerank</code>, <code>sparse_embedding</code>, <code>text_embedding</code>)</li>
|
|
382
395
|
<li>Anthropic (<code>completion</code>)</li>
|
|
383
396
|
<li>Azure AI Studio (<code>completion</code>, <code>text_embedding</code>)</li>
|
|
384
397
|
<li>Azure OpenAI (<code>completion</code>, <code>text_embedding</code>)</li>
|
|
385
398
|
<li>Cohere (<code>completion</code>, <code>rerank</code>, <code>text_embedding</code>)</li>
|
|
399
|
+
<li>DeepSeek (<code>chat_completion</code>, <code>completion</code>)</li>
|
|
386
400
|
<li>Elasticsearch (<code>rerank</code>, <code>sparse_embedding</code>, <code>text_embedding</code> - this service is for built-in models and models uploaded through Eland)</li>
|
|
387
401
|
<li>ELSER (<code>sparse_embedding</code>)</li>
|
|
388
402
|
<li>Google AI Studio (<code>completion</code>, <code>text_embedding</code>)</li>
|
|
389
|
-
<li>Google Vertex AI (<code>rerank</code>, <code>text_embedding</code>)</li>
|
|
390
|
-
<li>Hugging Face (<code>text_embedding</code>)</li>
|
|
391
|
-
<li>
|
|
403
|
+
<li>Google Vertex AI (<code>chat_completion</code>, <code>completion</code>, <code>rerank</code>, <code>text_embedding</code>)</li>
|
|
404
|
+
<li>Hugging Face (<code>chat_completion</code>, <code>completion</code>, <code>rerank</code>, <code>text_embedding</code>)</li>
|
|
405
|
+
<li>JinaAI (<code>rerank</code>, <code>text_embedding</code>)</li>
|
|
406
|
+
<li>Llama (<code>chat_completion</code>, <code>completion</code>, <code>text_embedding</code>)</li>
|
|
407
|
+
<li>Mistral (<code>chat_completion</code>, <code>completion</code>, <code>text_embedding</code>)</li>
|
|
392
408
|
<li>OpenAI (<code>chat_completion</code>, <code>completion</code>, <code>text_embedding</code>)</li>
|
|
393
|
-
<li>VoyageAI (<code>
|
|
409
|
+
<li>VoyageAI (<code>rerank</code>, <code>text_embedding</code>)</li>
|
|
394
410
|
<li>Watsonx inference integration (<code>text_embedding</code>)</li>
|
|
395
|
-
<li>JinaAI (<code>text_embedding</code>, <code>rerank</code>)</li>
|
|
396
411
|
</ul>
|
|
397
412
|
|
|
398
413
|
|
|
399
|
-
`<https://www.elastic.co/docs/api/doc/elasticsearch/
|
|
414
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put>`_
|
|
400
415
|
|
|
401
416
|
:param inference_id: The inference Id
|
|
402
417
|
:param inference_config:
|
|
@@ -481,7 +496,7 @@ class InferenceClient(NamespacedClient):
|
|
|
481
496
|
<p>Create an inference endpoint to perform an inference task with the <code>alibabacloud-ai-search</code> service.</p>
|
|
482
497
|
|
|
483
498
|
|
|
484
|
-
`<https://www.elastic.co/docs/api/doc/elasticsearch/
|
|
499
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-alibabacloud>`_
|
|
485
500
|
|
|
486
501
|
:param task_type: The type of the inference task that the model will perform.
|
|
487
502
|
:param alibabacloud_inference_id: The unique identifier of the inference endpoint.
|
|
@@ -581,7 +596,7 @@ class InferenceClient(NamespacedClient):
|
|
|
581
596
|
</blockquote>
|
|
582
597
|
|
|
583
598
|
|
|
584
|
-
`<https://www.elastic.co/docs/api/doc/elasticsearch/
|
|
599
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-amazonbedrock>`_
|
|
585
600
|
|
|
586
601
|
:param task_type: The type of the inference task that the model will perform.
|
|
587
602
|
:param amazonbedrock_inference_id: The unique identifier of the inference endpoint.
|
|
@@ -646,6 +661,112 @@ class InferenceClient(NamespacedClient):
|
|
|
646
661
|
path_parts=__path_parts,
|
|
647
662
|
)
|
|
648
663
|
|
|
664
|
+
@_rewrite_parameters(
|
|
665
|
+
body_fields=(
|
|
666
|
+
"service",
|
|
667
|
+
"service_settings",
|
|
668
|
+
"chunking_settings",
|
|
669
|
+
"task_settings",
|
|
670
|
+
),
|
|
671
|
+
)
|
|
672
|
+
async def put_amazonsagemaker(
|
|
673
|
+
self,
|
|
674
|
+
*,
|
|
675
|
+
task_type: t.Union[
|
|
676
|
+
str,
|
|
677
|
+
t.Literal[
|
|
678
|
+
"chat_completion",
|
|
679
|
+
"completion",
|
|
680
|
+
"rerank",
|
|
681
|
+
"sparse_embedding",
|
|
682
|
+
"text_embedding",
|
|
683
|
+
],
|
|
684
|
+
],
|
|
685
|
+
amazonsagemaker_inference_id: str,
|
|
686
|
+
service: t.Optional[t.Union[str, t.Literal["amazon_sagemaker"]]] = None,
|
|
687
|
+
service_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
688
|
+
chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
689
|
+
error_trace: t.Optional[bool] = None,
|
|
690
|
+
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
|
|
691
|
+
human: t.Optional[bool] = None,
|
|
692
|
+
pretty: t.Optional[bool] = None,
|
|
693
|
+
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
694
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
695
|
+
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
696
|
+
) -> ObjectApiResponse[t.Any]:
|
|
697
|
+
"""
|
|
698
|
+
.. raw:: html
|
|
699
|
+
|
|
700
|
+
<p>Create an Amazon SageMaker inference endpoint.</p>
|
|
701
|
+
<p>Create an inference endpoint to perform an inference task with the <code>amazon_sagemaker</code> service.</p>
|
|
702
|
+
|
|
703
|
+
|
|
704
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-amazonsagemaker>`_
|
|
705
|
+
|
|
706
|
+
:param task_type: The type of the inference task that the model will perform.
|
|
707
|
+
:param amazonsagemaker_inference_id: The unique identifier of the inference endpoint.
|
|
708
|
+
:param service: The type of service supported for the specified task type. In
|
|
709
|
+
this case, `amazon_sagemaker`.
|
|
710
|
+
:param service_settings: Settings used to install the inference model. These
|
|
711
|
+
settings are specific to the `amazon_sagemaker` service and `service_settings.api`
|
|
712
|
+
you specified.
|
|
713
|
+
:param chunking_settings: The chunking configuration object.
|
|
714
|
+
:param task_settings: Settings to configure the inference task. These settings
|
|
715
|
+
are specific to the task type and `service_settings.api` you specified.
|
|
716
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
717
|
+
to be created.
|
|
718
|
+
"""
|
|
719
|
+
if task_type in SKIP_IN_PATH:
|
|
720
|
+
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
721
|
+
if amazonsagemaker_inference_id in SKIP_IN_PATH:
|
|
722
|
+
raise ValueError(
|
|
723
|
+
"Empty value passed for parameter 'amazonsagemaker_inference_id'"
|
|
724
|
+
)
|
|
725
|
+
if service is None and body is None:
|
|
726
|
+
raise ValueError("Empty value passed for parameter 'service'")
|
|
727
|
+
if service_settings is None and body is None:
|
|
728
|
+
raise ValueError("Empty value passed for parameter 'service_settings'")
|
|
729
|
+
__path_parts: t.Dict[str, str] = {
|
|
730
|
+
"task_type": _quote(task_type),
|
|
731
|
+
"amazonsagemaker_inference_id": _quote(amazonsagemaker_inference_id),
|
|
732
|
+
}
|
|
733
|
+
__path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["amazonsagemaker_inference_id"]}'
|
|
734
|
+
__query: t.Dict[str, t.Any] = {}
|
|
735
|
+
__body: t.Dict[str, t.Any] = body if body is not None else {}
|
|
736
|
+
if error_trace is not None:
|
|
737
|
+
__query["error_trace"] = error_trace
|
|
738
|
+
if filter_path is not None:
|
|
739
|
+
__query["filter_path"] = filter_path
|
|
740
|
+
if human is not None:
|
|
741
|
+
__query["human"] = human
|
|
742
|
+
if pretty is not None:
|
|
743
|
+
__query["pretty"] = pretty
|
|
744
|
+
if timeout is not None:
|
|
745
|
+
__query["timeout"] = timeout
|
|
746
|
+
if not __body:
|
|
747
|
+
if service is not None:
|
|
748
|
+
__body["service"] = service
|
|
749
|
+
if service_settings is not None:
|
|
750
|
+
__body["service_settings"] = service_settings
|
|
751
|
+
if chunking_settings is not None:
|
|
752
|
+
__body["chunking_settings"] = chunking_settings
|
|
753
|
+
if task_settings is not None:
|
|
754
|
+
__body["task_settings"] = task_settings
|
|
755
|
+
if not __body:
|
|
756
|
+
__body = None # type: ignore[assignment]
|
|
757
|
+
__headers = {"accept": "application/json"}
|
|
758
|
+
if __body is not None:
|
|
759
|
+
__headers["content-type"] = "application/json"
|
|
760
|
+
return await self.perform_request( # type: ignore[return-value]
|
|
761
|
+
"PUT",
|
|
762
|
+
__path,
|
|
763
|
+
params=__query,
|
|
764
|
+
headers=__headers,
|
|
765
|
+
body=__body,
|
|
766
|
+
endpoint_id="inference.put_amazonsagemaker",
|
|
767
|
+
path_parts=__path_parts,
|
|
768
|
+
)
|
|
769
|
+
|
|
649
770
|
@_rewrite_parameters(
|
|
650
771
|
body_fields=(
|
|
651
772
|
"service",
|
|
@@ -677,7 +798,7 @@ class InferenceClient(NamespacedClient):
|
|
|
677
798
|
<p>Create an inference endpoint to perform an inference task with the <code>anthropic</code> service.</p>
|
|
678
799
|
|
|
679
800
|
|
|
680
|
-
`<https://www.elastic.co/docs/api/doc/elasticsearch/
|
|
801
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-anthropic>`_
|
|
681
802
|
|
|
682
803
|
:param task_type: The task type. The only valid task type for the model to perform
|
|
683
804
|
is `completion`.
|
|
@@ -774,7 +895,7 @@ class InferenceClient(NamespacedClient):
|
|
|
774
895
|
<p>Create an inference endpoint to perform an inference task with the <code>azureaistudio</code> service.</p>
|
|
775
896
|
|
|
776
897
|
|
|
777
|
-
`<https://www.elastic.co/docs/api/doc/elasticsearch/
|
|
898
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-azureaistudio>`_
|
|
778
899
|
|
|
779
900
|
:param task_type: The type of the inference task that the model will perform.
|
|
780
901
|
:param azureaistudio_inference_id: The unique identifier of the inference endpoint.
|
|
@@ -876,7 +997,7 @@ class InferenceClient(NamespacedClient):
|
|
|
876
997
|
<p>The list of embeddings models that you can choose from in your deployment can be found in the <a href="https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#embeddings">Azure models documentation</a>.</p>
|
|
877
998
|
|
|
878
999
|
|
|
879
|
-
`<https://www.elastic.co/docs/api/doc/elasticsearch/
|
|
1000
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-azureopenai>`_
|
|
880
1001
|
|
|
881
1002
|
:param task_type: The type of the inference task that the model will perform.
|
|
882
1003
|
NOTE: The `chat_completion` task type only supports streaming and only through
|
|
@@ -974,7 +1095,7 @@ class InferenceClient(NamespacedClient):
|
|
|
974
1095
|
<p>Create an inference endpoint to perform an inference task with the <code>cohere</code> service.</p>
|
|
975
1096
|
|
|
976
1097
|
|
|
977
|
-
`<https://www.elastic.co/docs/api/doc/elasticsearch/
|
|
1098
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-cohere>`_
|
|
978
1099
|
|
|
979
1100
|
:param task_type: The type of the inference task that the model will perform.
|
|
980
1101
|
:param cohere_inference_id: The unique identifier of the inference endpoint.
|
|
@@ -1037,6 +1158,221 @@ class InferenceClient(NamespacedClient):
|
|
|
1037
1158
|
path_parts=__path_parts,
|
|
1038
1159
|
)
|
|
1039
1160
|
|
|
1161
|
+
@_rewrite_parameters(
|
|
1162
|
+
body_fields=(
|
|
1163
|
+
"service",
|
|
1164
|
+
"service_settings",
|
|
1165
|
+
"chunking_settings",
|
|
1166
|
+
"task_settings",
|
|
1167
|
+
),
|
|
1168
|
+
)
|
|
1169
|
+
async def put_custom(
|
|
1170
|
+
self,
|
|
1171
|
+
*,
|
|
1172
|
+
task_type: t.Union[
|
|
1173
|
+
str, t.Literal["completion", "rerank", "sparse_embedding", "text_embedding"]
|
|
1174
|
+
],
|
|
1175
|
+
custom_inference_id: str,
|
|
1176
|
+
service: t.Optional[t.Union[str, t.Literal["custom"]]] = None,
|
|
1177
|
+
service_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
1178
|
+
chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
1179
|
+
error_trace: t.Optional[bool] = None,
|
|
1180
|
+
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
|
|
1181
|
+
human: t.Optional[bool] = None,
|
|
1182
|
+
pretty: t.Optional[bool] = None,
|
|
1183
|
+
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
1184
|
+
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
1185
|
+
) -> ObjectApiResponse[t.Any]:
|
|
1186
|
+
"""
|
|
1187
|
+
.. raw:: html
|
|
1188
|
+
|
|
1189
|
+
<p>Create a custom inference endpoint.</p>
|
|
1190
|
+
<p>The custom service gives more control over how to interact with external inference services that aren't explicitly supported through dedicated integrations.
|
|
1191
|
+
The custom service gives you the ability to define the headers, url, query parameters, request body, and secrets.
|
|
1192
|
+
The custom service supports the template replacement functionality, which enables you to define a template that can be replaced with the value associated with that key.
|
|
1193
|
+
Templates are portions of a string that start with <code>${</code> and end with <code>}</code>.
|
|
1194
|
+
The parameters <code>secret_parameters</code> and <code>task_settings</code> are checked for keys for template replacement. Template replacement is supported in the <code>request</code>, <code>headers</code>, <code>url</code>, and <code>query_parameters</code>.
|
|
1195
|
+
If the definition (key) is not found for a template, an error message is returned.
|
|
1196
|
+
In case of an endpoint definition like the following:</p>
|
|
1197
|
+
<pre><code>PUT _inference/text_embedding/test-text-embedding
|
|
1198
|
+
{
|
|
1199
|
+
"service": "custom",
|
|
1200
|
+
"service_settings": {
|
|
1201
|
+
"secret_parameters": {
|
|
1202
|
+
"api_key": "<some api key>"
|
|
1203
|
+
},
|
|
1204
|
+
"url": "...endpoints.huggingface.cloud/v1/embeddings",
|
|
1205
|
+
"headers": {
|
|
1206
|
+
"Authorization": "Bearer ${api_key}",
|
|
1207
|
+
"Content-Type": "application/json"
|
|
1208
|
+
},
|
|
1209
|
+
"request": "{\\"input\\": ${input}}",
|
|
1210
|
+
"response": {
|
|
1211
|
+
"json_parser": {
|
|
1212
|
+
"text_embeddings":"$.data[*].embedding[*]"
|
|
1213
|
+
}
|
|
1214
|
+
}
|
|
1215
|
+
}
|
|
1216
|
+
}
|
|
1217
|
+
</code></pre>
|
|
1218
|
+
<p>To replace <code>${api_key}</code> the <code>secret_parameters</code> and <code>task_settings</code> are checked for a key named <code>api_key</code>.</p>
|
|
1219
|
+
<blockquote>
|
|
1220
|
+
<p>info
|
|
1221
|
+
Templates should not be surrounded by quotes.</p>
|
|
1222
|
+
</blockquote>
|
|
1223
|
+
<p>Pre-defined templates:</p>
|
|
1224
|
+
<ul>
|
|
1225
|
+
<li><code>${input}</code> refers to the array of input strings that comes from the <code>input</code> field of the subsequent inference requests.</li>
|
|
1226
|
+
<li><code>${input_type}</code> refers to the input type translation values.</li>
|
|
1227
|
+
<li><code>${query}</code> refers to the query field used specifically for reranking tasks.</li>
|
|
1228
|
+
<li><code>${top_n}</code> refers to the <code>top_n</code> field available when performing rerank requests.</li>
|
|
1229
|
+
<li><code>${return_documents}</code> refers to the <code>return_documents</code> field available when performing rerank requests.</li>
|
|
1230
|
+
</ul>
|
|
1231
|
+
|
|
1232
|
+
|
|
1233
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-custom>`_
|
|
1234
|
+
|
|
1235
|
+
:param task_type: The type of the inference task that the model will perform.
|
|
1236
|
+
:param custom_inference_id: The unique identifier of the inference endpoint.
|
|
1237
|
+
:param service: The type of service supported for the specified task type. In
|
|
1238
|
+
this case, `custom`.
|
|
1239
|
+
:param service_settings: Settings used to install the inference model. These
|
|
1240
|
+
settings are specific to the `custom` service.
|
|
1241
|
+
:param chunking_settings: The chunking configuration object.
|
|
1242
|
+
:param task_settings: Settings to configure the inference task. These settings
|
|
1243
|
+
are specific to the task type you specified.
|
|
1244
|
+
"""
|
|
1245
|
+
if task_type in SKIP_IN_PATH:
|
|
1246
|
+
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
1247
|
+
if custom_inference_id in SKIP_IN_PATH:
|
|
1248
|
+
raise ValueError("Empty value passed for parameter 'custom_inference_id'")
|
|
1249
|
+
if service is None and body is None:
|
|
1250
|
+
raise ValueError("Empty value passed for parameter 'service'")
|
|
1251
|
+
if service_settings is None and body is None:
|
|
1252
|
+
raise ValueError("Empty value passed for parameter 'service_settings'")
|
|
1253
|
+
__path_parts: t.Dict[str, str] = {
|
|
1254
|
+
"task_type": _quote(task_type),
|
|
1255
|
+
"custom_inference_id": _quote(custom_inference_id),
|
|
1256
|
+
}
|
|
1257
|
+
__path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["custom_inference_id"]}'
|
|
1258
|
+
__query: t.Dict[str, t.Any] = {}
|
|
1259
|
+
__body: t.Dict[str, t.Any] = body if body is not None else {}
|
|
1260
|
+
if error_trace is not None:
|
|
1261
|
+
__query["error_trace"] = error_trace
|
|
1262
|
+
if filter_path is not None:
|
|
1263
|
+
__query["filter_path"] = filter_path
|
|
1264
|
+
if human is not None:
|
|
1265
|
+
__query["human"] = human
|
|
1266
|
+
if pretty is not None:
|
|
1267
|
+
__query["pretty"] = pretty
|
|
1268
|
+
if not __body:
|
|
1269
|
+
if service is not None:
|
|
1270
|
+
__body["service"] = service
|
|
1271
|
+
if service_settings is not None:
|
|
1272
|
+
__body["service_settings"] = service_settings
|
|
1273
|
+
if chunking_settings is not None:
|
|
1274
|
+
__body["chunking_settings"] = chunking_settings
|
|
1275
|
+
if task_settings is not None:
|
|
1276
|
+
__body["task_settings"] = task_settings
|
|
1277
|
+
if not __body:
|
|
1278
|
+
__body = None # type: ignore[assignment]
|
|
1279
|
+
__headers = {"accept": "application/json"}
|
|
1280
|
+
if __body is not None:
|
|
1281
|
+
__headers["content-type"] = "application/json"
|
|
1282
|
+
return await self.perform_request( # type: ignore[return-value]
|
|
1283
|
+
"PUT",
|
|
1284
|
+
__path,
|
|
1285
|
+
params=__query,
|
|
1286
|
+
headers=__headers,
|
|
1287
|
+
body=__body,
|
|
1288
|
+
endpoint_id="inference.put_custom",
|
|
1289
|
+
path_parts=__path_parts,
|
|
1290
|
+
)
|
|
1291
|
+
|
|
1292
|
+
@_rewrite_parameters(
|
|
1293
|
+
body_fields=("service", "service_settings", "chunking_settings"),
|
|
1294
|
+
)
|
|
1295
|
+
async def put_deepseek(
|
|
1296
|
+
self,
|
|
1297
|
+
*,
|
|
1298
|
+
task_type: t.Union[str, t.Literal["chat_completion", "completion"]],
|
|
1299
|
+
deepseek_inference_id: str,
|
|
1300
|
+
service: t.Optional[t.Union[str, t.Literal["deepseek"]]] = None,
|
|
1301
|
+
service_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
1302
|
+
chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
1303
|
+
error_trace: t.Optional[bool] = None,
|
|
1304
|
+
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
|
|
1305
|
+
human: t.Optional[bool] = None,
|
|
1306
|
+
pretty: t.Optional[bool] = None,
|
|
1307
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
1308
|
+
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
1309
|
+
) -> ObjectApiResponse[t.Any]:
|
|
1310
|
+
"""
|
|
1311
|
+
.. raw:: html
|
|
1312
|
+
|
|
1313
|
+
<p>Create a DeepSeek inference endpoint.</p>
|
|
1314
|
+
<p>Create an inference endpoint to perform an inference task with the <code>deepseek</code> service.</p>
|
|
1315
|
+
|
|
1316
|
+
|
|
1317
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-deepseek>`_
|
|
1318
|
+
|
|
1319
|
+
:param task_type: The type of the inference task that the model will perform.
|
|
1320
|
+
:param deepseek_inference_id: The unique identifier of the inference endpoint.
|
|
1321
|
+
:param service: The type of service supported for the specified task type. In
|
|
1322
|
+
this case, `deepseek`.
|
|
1323
|
+
:param service_settings: Settings used to install the inference model. These
|
|
1324
|
+
settings are specific to the `deepseek` service.
|
|
1325
|
+
:param chunking_settings: The chunking configuration object.
|
|
1326
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
1327
|
+
to be created.
|
|
1328
|
+
"""
|
|
1329
|
+
if task_type in SKIP_IN_PATH:
|
|
1330
|
+
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
1331
|
+
if deepseek_inference_id in SKIP_IN_PATH:
|
|
1332
|
+
raise ValueError("Empty value passed for parameter 'deepseek_inference_id'")
|
|
1333
|
+
if service is None and body is None:
|
|
1334
|
+
raise ValueError("Empty value passed for parameter 'service'")
|
|
1335
|
+
if service_settings is None and body is None:
|
|
1336
|
+
raise ValueError("Empty value passed for parameter 'service_settings'")
|
|
1337
|
+
__path_parts: t.Dict[str, str] = {
|
|
1338
|
+
"task_type": _quote(task_type),
|
|
1339
|
+
"deepseek_inference_id": _quote(deepseek_inference_id),
|
|
1340
|
+
}
|
|
1341
|
+
__path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["deepseek_inference_id"]}'
|
|
1342
|
+
__query: t.Dict[str, t.Any] = {}
|
|
1343
|
+
__body: t.Dict[str, t.Any] = body if body is not None else {}
|
|
1344
|
+
if error_trace is not None:
|
|
1345
|
+
__query["error_trace"] = error_trace
|
|
1346
|
+
if filter_path is not None:
|
|
1347
|
+
__query["filter_path"] = filter_path
|
|
1348
|
+
if human is not None:
|
|
1349
|
+
__query["human"] = human
|
|
1350
|
+
if pretty is not None:
|
|
1351
|
+
__query["pretty"] = pretty
|
|
1352
|
+
if timeout is not None:
|
|
1353
|
+
__query["timeout"] = timeout
|
|
1354
|
+
if not __body:
|
|
1355
|
+
if service is not None:
|
|
1356
|
+
__body["service"] = service
|
|
1357
|
+
if service_settings is not None:
|
|
1358
|
+
__body["service_settings"] = service_settings
|
|
1359
|
+
if chunking_settings is not None:
|
|
1360
|
+
__body["chunking_settings"] = chunking_settings
|
|
1361
|
+
if not __body:
|
|
1362
|
+
__body = None # type: ignore[assignment]
|
|
1363
|
+
__headers = {"accept": "application/json"}
|
|
1364
|
+
if __body is not None:
|
|
1365
|
+
__headers["content-type"] = "application/json"
|
|
1366
|
+
return await self.perform_request( # type: ignore[return-value]
|
|
1367
|
+
"PUT",
|
|
1368
|
+
__path,
|
|
1369
|
+
params=__query,
|
|
1370
|
+
headers=__headers,
|
|
1371
|
+
body=__body,
|
|
1372
|
+
endpoint_id="inference.put_deepseek",
|
|
1373
|
+
path_parts=__path_parts,
|
|
1374
|
+
)
|
|
1375
|
+
|
|
1040
1376
|
@_rewrite_parameters(
|
|
1041
1377
|
body_fields=(
|
|
1042
1378
|
"service",
|
|
@@ -1083,7 +1419,7 @@ class InferenceClient(NamespacedClient):
|
|
|
1083
1419
|
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
1084
1420
|
|
|
1085
1421
|
|
|
1086
|
-
`<https://www.elastic.co/docs/api/doc/elasticsearch/
|
|
1422
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-elasticsearch>`_
|
|
1087
1423
|
|
|
1088
1424
|
:param task_type: The type of the inference task that the model will perform.
|
|
1089
1425
|
:param elasticsearch_inference_id: The unique identifier of the inference endpoint.
|
|
@@ -1188,7 +1524,7 @@ class InferenceClient(NamespacedClient):
|
|
|
1188
1524
|
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
1189
1525
|
|
|
1190
1526
|
|
|
1191
|
-
`<https://www.elastic.co/docs/api/doc/elasticsearch/
|
|
1527
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-elser>`_
|
|
1192
1528
|
|
|
1193
1529
|
:param task_type: The type of the inference task that the model will perform.
|
|
1194
1530
|
:param elser_inference_id: The unique identifier of the inference endpoint.
|
|
@@ -1272,7 +1608,7 @@ class InferenceClient(NamespacedClient):
|
|
|
1272
1608
|
<p>Create an inference endpoint to perform an inference task with the <code>googleaistudio</code> service.</p>
|
|
1273
1609
|
|
|
1274
1610
|
|
|
1275
|
-
`<https://www.elastic.co/docs/api/doc/elasticsearch/
|
|
1611
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-googleaistudio>`_
|
|
1276
1612
|
|
|
1277
1613
|
:param task_type: The type of the inference task that the model will perform.
|
|
1278
1614
|
:param googleaistudio_inference_id: The unique identifier of the inference endpoint.
|
|
@@ -1344,7 +1680,9 @@ class InferenceClient(NamespacedClient):
|
|
|
1344
1680
|
async def put_googlevertexai(
|
|
1345
1681
|
self,
|
|
1346
1682
|
*,
|
|
1347
|
-
task_type: t.Union[
|
|
1683
|
+
task_type: t.Union[
|
|
1684
|
+
str, t.Literal["chat_completion", "completion", "rerank", "text_embedding"]
|
|
1685
|
+
],
|
|
1348
1686
|
googlevertexai_inference_id: str,
|
|
1349
1687
|
service: t.Optional[t.Union[str, t.Literal["googlevertexai"]]] = None,
|
|
1350
1688
|
service_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
@@ -1364,7 +1702,7 @@ class InferenceClient(NamespacedClient):
|
|
|
1364
1702
|
<p>Create an inference endpoint to perform an inference task with the <code>googlevertexai</code> service.</p>
|
|
1365
1703
|
|
|
1366
1704
|
|
|
1367
|
-
`<https://www.elastic.co/docs/api/doc/elasticsearch/
|
|
1705
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-googlevertexai>`_
|
|
1368
1706
|
|
|
1369
1707
|
:param task_type: The type of the inference task that the model will perform.
|
|
1370
1708
|
:param googlevertexai_inference_id: The unique identifier of the inference endpoint.
|
|
@@ -1430,12 +1768,19 @@ class InferenceClient(NamespacedClient):
|
|
|
1430
1768
|
)
|
|
1431
1769
|
|
|
1432
1770
|
@_rewrite_parameters(
|
|
1433
|
-
body_fields=(
|
|
1771
|
+
body_fields=(
|
|
1772
|
+
"service",
|
|
1773
|
+
"service_settings",
|
|
1774
|
+
"chunking_settings",
|
|
1775
|
+
"task_settings",
|
|
1776
|
+
),
|
|
1434
1777
|
)
|
|
1435
1778
|
async def put_hugging_face(
|
|
1436
1779
|
self,
|
|
1437
1780
|
*,
|
|
1438
|
-
task_type: t.Union[
|
|
1781
|
+
task_type: t.Union[
|
|
1782
|
+
str, t.Literal["chat_completion", "completion", "rerank", "text_embedding"]
|
|
1783
|
+
],
|
|
1439
1784
|
huggingface_inference_id: str,
|
|
1440
1785
|
service: t.Optional[t.Union[str, t.Literal["hugging_face"]]] = None,
|
|
1441
1786
|
service_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
@@ -1444,6 +1789,7 @@ class InferenceClient(NamespacedClient):
|
|
|
1444
1789
|
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
|
|
1445
1790
|
human: t.Optional[bool] = None,
|
|
1446
1791
|
pretty: t.Optional[bool] = None,
|
|
1792
|
+
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
1447
1793
|
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
1448
1794
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
1449
1795
|
) -> ObjectApiResponse[t.Any]:
|
|
@@ -1451,11 +1797,14 @@ class InferenceClient(NamespacedClient):
|
|
|
1451
1797
|
.. raw:: html
|
|
1452
1798
|
|
|
1453
1799
|
<p>Create a Hugging Face inference endpoint.</p>
|
|
1454
|
-
<p>Create an inference endpoint to perform an inference task with the <code>hugging_face</code> service
|
|
1455
|
-
|
|
1456
|
-
|
|
1457
|
-
|
|
1458
|
-
<p>
|
|
1800
|
+
<p>Create an inference endpoint to perform an inference task with the <code>hugging_face</code> service.
|
|
1801
|
+
Supported tasks include: <code>text_embedding</code>, <code>completion</code>, and <code>chat_completion</code>.</p>
|
|
1802
|
+
<p>To configure the endpoint, first visit the Hugging Face Inference Endpoints page and create a new endpoint.
|
|
1803
|
+
Select a model that supports the task you intend to use.</p>
|
|
1804
|
+
<p>For Elastic's <code>text_embedding</code> task:
|
|
1805
|
+
The selected model must support the <code>Sentence Embeddings</code> task. On the new endpoint creation page, select the <code>Sentence Embeddings</code> task under the <code>Advanced Configuration</code> section.
|
|
1806
|
+
After the endpoint has initialized, copy the generated endpoint URL.
|
|
1807
|
+
Recommended models for <code>text_embedding</code> task:</p>
|
|
1459
1808
|
<ul>
|
|
1460
1809
|
<li><code>all-MiniLM-L6-v2</code></li>
|
|
1461
1810
|
<li><code>all-MiniLM-L12-v2</code></li>
|
|
@@ -1465,9 +1814,27 @@ class InferenceClient(NamespacedClient):
|
|
|
1465
1814
|
<li><code>multilingual-e5-base</code></li>
|
|
1466
1815
|
<li><code>multilingual-e5-small</code></li>
|
|
1467
1816
|
</ul>
|
|
1817
|
+
<p>For Elastic's <code>chat_completion</code> and <code>completion</code> tasks:
|
|
1818
|
+
The selected model must support the <code>Text Generation</code> task and expose OpenAI API. HuggingFace supports both serverless and dedicated endpoints for <code>Text Generation</code>. When creating dedicated endpoint select the <code>Text Generation</code> task.
|
|
1819
|
+
After the endpoint is initialized (for dedicated) or ready (for serverless), ensure it supports the OpenAI API and includes <code>/v1/chat/completions</code> part in URL. Then, copy the full endpoint URL for use.
|
|
1820
|
+
Recommended models for <code>chat_completion</code> and <code>completion</code> tasks:</p>
|
|
1821
|
+
<ul>
|
|
1822
|
+
<li><code>Mistral-7B-Instruct-v0.2</code></li>
|
|
1823
|
+
<li><code>QwQ-32B</code></li>
|
|
1824
|
+
<li><code>Phi-3-mini-128k-instruct</code></li>
|
|
1825
|
+
</ul>
|
|
1826
|
+
<p>For Elastic's <code>rerank</code> task:
|
|
1827
|
+
The selected model must support the <code>sentence-ranking</code> task and expose OpenAI API.
|
|
1828
|
+
HuggingFace supports only dedicated (not serverless) endpoints for <code>Rerank</code> so far.
|
|
1829
|
+
After the endpoint is initialized, copy the full endpoint URL for use.
|
|
1830
|
+
Tested models for <code>rerank</code> task:</p>
|
|
1831
|
+
<ul>
|
|
1832
|
+
<li><code>bge-reranker-base</code></li>
|
|
1833
|
+
<li><code>jina-reranker-v1-turbo-en-GGUF</code></li>
|
|
1834
|
+
</ul>
|
|
1468
1835
|
|
|
1469
1836
|
|
|
1470
|
-
`<https://www.elastic.co/docs/api/doc/elasticsearch/
|
|
1837
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-hugging-face>`_
|
|
1471
1838
|
|
|
1472
1839
|
:param task_type: The type of the inference task that the model will perform.
|
|
1473
1840
|
:param huggingface_inference_id: The unique identifier of the inference endpoint.
|
|
@@ -1476,6 +1843,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1476
1843
|
:param service_settings: Settings used to install the inference model. These
|
|
1477
1844
|
settings are specific to the `hugging_face` service.
|
|
1478
1845
|
:param chunking_settings: The chunking configuration object.
|
|
1846
|
+
:param task_settings: Settings to configure the inference task. These settings
|
|
1847
|
+
are specific to the task type you specified.
|
|
1479
1848
|
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
1480
1849
|
to be created.
|
|
1481
1850
|
"""
|
|
@@ -1513,6 +1882,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1513
1882
|
__body["service_settings"] = service_settings
|
|
1514
1883
|
if chunking_settings is not None:
|
|
1515
1884
|
__body["chunking_settings"] = chunking_settings
|
|
1885
|
+
if task_settings is not None:
|
|
1886
|
+
__body["task_settings"] = task_settings
|
|
1516
1887
|
if not __body:
|
|
1517
1888
|
__body = None # type: ignore[assignment]
|
|
1518
1889
|
__headers = {"accept": "application/json"}
|
|
@@ -1561,7 +1932,7 @@ class InferenceClient(NamespacedClient):
|
|
|
1561
1932
|
To review the available <code>text_embedding</code> models, refer to the <a href="https://jina.ai/embeddings/">https://jina.ai/embeddings/</a>.</p>
|
|
1562
1933
|
|
|
1563
1934
|
|
|
1564
|
-
`<https://www.elastic.co/docs/api/doc/elasticsearch/
|
|
1935
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-jinaai>`_
|
|
1565
1936
|
|
|
1566
1937
|
:param task_type: The type of the inference task that the model will perform.
|
|
1567
1938
|
:param jinaai_inference_id: The unique identifier of the inference endpoint.
|
|
@@ -1630,7 +2001,9 @@ class InferenceClient(NamespacedClient):
|
|
|
1630
2001
|
async def put_mistral(
|
|
1631
2002
|
self,
|
|
1632
2003
|
*,
|
|
1633
|
-
task_type: t.Union[
|
|
2004
|
+
task_type: t.Union[
|
|
2005
|
+
str, t.Literal["chat_completion", "completion", "text_embedding"]
|
|
2006
|
+
],
|
|
1634
2007
|
mistral_inference_id: str,
|
|
1635
2008
|
service: t.Optional[t.Union[str, t.Literal["mistral"]]] = None,
|
|
1636
2009
|
service_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
@@ -1646,13 +2019,12 @@ class InferenceClient(NamespacedClient):
|
|
|
1646
2019
|
.. raw:: html
|
|
1647
2020
|
|
|
1648
2021
|
<p>Create a Mistral inference endpoint.</p>
|
|
1649
|
-
<p>
|
|
2022
|
+
<p>Create an inference endpoint to perform an inference task with the <code>mistral</code> service.</p>
|
|
1650
2023
|
|
|
1651
2024
|
|
|
1652
|
-
`<https://www.elastic.co/docs/api/doc/elasticsearch/
|
|
2025
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-mistral>`_
|
|
1653
2026
|
|
|
1654
|
-
:param task_type: The
|
|
1655
|
-
is `text_embedding`.
|
|
2027
|
+
:param task_type: The type of the inference task that the model will perform.
|
|
1656
2028
|
:param mistral_inference_id: The unique identifier of the inference endpoint.
|
|
1657
2029
|
:param service: The type of service supported for the specified task type. In
|
|
1658
2030
|
this case, `mistral`.
|
|
@@ -1742,7 +2114,7 @@ class InferenceClient(NamespacedClient):
|
|
|
1742
2114
|
<p>Create an inference endpoint to perform an inference task with the <code>openai</code> service or <code>openai</code> compatible APIs.</p>
|
|
1743
2115
|
|
|
1744
2116
|
|
|
1745
|
-
`<https://www.elastic.co/docs/api/doc/elasticsearch/
|
|
2117
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-openai>`_
|
|
1746
2118
|
|
|
1747
2119
|
:param task_type: The type of the inference task that the model will perform.
|
|
1748
2120
|
NOTE: The `chat_completion` task type only supports streaming and only through
|
|
@@ -1839,7 +2211,7 @@ class InferenceClient(NamespacedClient):
|
|
|
1839
2211
|
<p>Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
1840
2212
|
|
|
1841
2213
|
|
|
1842
|
-
`<https://www.elastic.co/docs/api/doc/elasticsearch/
|
|
2214
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-voyageai>`_
|
|
1843
2215
|
|
|
1844
2216
|
:param task_type: The type of the inference task that the model will perform.
|
|
1845
2217
|
:param voyageai_inference_id: The unique identifier of the inference endpoint.
|
|
@@ -1908,7 +2280,9 @@ class InferenceClient(NamespacedClient):
|
|
|
1908
2280
|
async def put_watsonx(
|
|
1909
2281
|
self,
|
|
1910
2282
|
*,
|
|
1911
|
-
task_type: t.Union[
|
|
2283
|
+
task_type: t.Union[
|
|
2284
|
+
str, t.Literal["chat_completion", "completion", "text_embedding"]
|
|
2285
|
+
],
|
|
1912
2286
|
watsonx_inference_id: str,
|
|
1913
2287
|
service: t.Optional[t.Union[str, t.Literal["watsonxai"]]] = None,
|
|
1914
2288
|
service_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
@@ -1928,10 +2302,9 @@ class InferenceClient(NamespacedClient):
|
|
|
1928
2302
|
You can provision one through the IBM catalog, the Cloud Databases CLI plug-in, the Cloud Databases API, or Terraform.</p>
|
|
1929
2303
|
|
|
1930
2304
|
|
|
1931
|
-
`<https://www.elastic.co/docs/api/doc/elasticsearch/
|
|
2305
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-watsonx>`_
|
|
1932
2306
|
|
|
1933
|
-
:param task_type: The
|
|
1934
|
-
is `text_embedding`.
|
|
2307
|
+
:param task_type: The type of the inference task that the model will perform.
|
|
1935
2308
|
:param watsonx_inference_id: The unique identifier of the inference endpoint.
|
|
1936
2309
|
:param service: The type of service supported for the specified task type. In
|
|
1937
2310
|
this case, `watsonxai`.
|
|
@@ -2008,7 +2381,7 @@ class InferenceClient(NamespacedClient):
|
|
|
2008
2381
|
<p>Perform reranking inference on the service</p>
|
|
2009
2382
|
|
|
2010
2383
|
|
|
2011
|
-
`<https://www.elastic.co/docs/api/doc/elasticsearch/
|
|
2384
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-inference>`_
|
|
2012
2385
|
|
|
2013
2386
|
:param inference_id: The unique identifier for the inference endpoint.
|
|
2014
2387
|
:param input: The text on which you want to perform the inference task. It can
|
|
@@ -2084,7 +2457,7 @@ class InferenceClient(NamespacedClient):
|
|
|
2084
2457
|
<p>Perform sparse embedding inference on the service</p>
|
|
2085
2458
|
|
|
2086
2459
|
|
|
2087
|
-
`<https://www.elastic.co/docs/api/doc/elasticsearch/
|
|
2460
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-inference>`_
|
|
2088
2461
|
|
|
2089
2462
|
:param inference_id: The inference Id
|
|
2090
2463
|
:param input: Inference input. Either a string or an array of strings.
|
|
@@ -2152,7 +2525,7 @@ class InferenceClient(NamespacedClient):
|
|
|
2152
2525
|
<p>Perform text embedding inference on the service</p>
|
|
2153
2526
|
|
|
2154
2527
|
|
|
2155
|
-
`<https://www.elastic.co/docs/api/doc/elasticsearch/
|
|
2528
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-inference>`_
|
|
2156
2529
|
|
|
2157
2530
|
:param inference_id: The inference Id
|
|
2158
2531
|
:param input: Inference input. Either a string or an array of strings.
|
|
@@ -2234,7 +2607,7 @@ class InferenceClient(NamespacedClient):
|
|
|
2234
2607
|
However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.</p>
|
|
2235
2608
|
|
|
2236
2609
|
|
|
2237
|
-
`<https://www.elastic.co/docs/api/doc/elasticsearch/
|
|
2610
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-update>`_
|
|
2238
2611
|
|
|
2239
2612
|
:param inference_id: The unique identifier of the inference endpoint.
|
|
2240
2613
|
:param inference_config:
|