elasticsearch 9.2.0__py3-none-any.whl → 9.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- elasticsearch/_async/client/__init__.py +108 -85
- elasticsearch/_async/client/async_search.py +7 -6
- elasticsearch/_async/client/autoscaling.py +15 -4
- elasticsearch/_async/client/cat.py +203 -10
- elasticsearch/_async/client/ccr.py +10 -10
- elasticsearch/_async/client/cluster.py +98 -66
- elasticsearch/_async/client/connector.py +42 -41
- elasticsearch/_async/client/dangling_indices.py +8 -12
- elasticsearch/_async/client/enrich.py +10 -10
- elasticsearch/_async/client/eql.py +17 -16
- elasticsearch/_async/client/esql.py +173 -24
- elasticsearch/_async/client/features.py +6 -6
- elasticsearch/_async/client/fleet.py +8 -8
- elasticsearch/_async/client/graph.py +3 -3
- elasticsearch/_async/client/ilm.py +18 -18
- elasticsearch/_async/client/indices.py +564 -149
- elasticsearch/_async/client/inference.py +374 -64
- elasticsearch/_async/client/ingest.py +9 -9
- elasticsearch/_async/client/license.py +5 -7
- elasticsearch/_async/client/logstash.py +4 -4
- elasticsearch/_async/client/migration.py +6 -6
- elasticsearch/_async/client/ml.py +132 -88
- elasticsearch/_async/client/monitoring.py +4 -3
- elasticsearch/_async/client/nodes.py +182 -20
- elasticsearch/_async/client/project.py +13 -4
- elasticsearch/_async/client/query_rules.py +16 -16
- elasticsearch/_async/client/rollup.py +21 -21
- elasticsearch/_async/client/search_application.py +19 -19
- elasticsearch/_async/client/searchable_snapshots.py +10 -10
- elasticsearch/_async/client/security.py +34 -10
- elasticsearch/_async/client/shutdown.py +15 -4
- elasticsearch/_async/client/simulate.py +4 -4
- elasticsearch/_async/client/slm.py +17 -17
- elasticsearch/_async/client/snapshot.py +21 -21
- elasticsearch/_async/client/sql.py +17 -16
- elasticsearch/_async/client/streams.py +6 -7
- elasticsearch/_async/client/synonyms.py +10 -10
- elasticsearch/_async/client/tasks.py +8 -8
- elasticsearch/_async/client/text_structure.py +16 -12
- elasticsearch/_async/client/transform.py +51 -12
- elasticsearch/_async/client/utils.py +4 -2
- elasticsearch/_async/client/watcher.py +26 -26
- elasticsearch/_async/client/xpack.py +6 -5
- elasticsearch/_sync/client/__init__.py +110 -85
- elasticsearch/_sync/client/async_search.py +7 -6
- elasticsearch/_sync/client/autoscaling.py +15 -4
- elasticsearch/_sync/client/cat.py +203 -10
- elasticsearch/_sync/client/ccr.py +10 -10
- elasticsearch/_sync/client/cluster.py +98 -66
- elasticsearch/_sync/client/connector.py +42 -41
- elasticsearch/_sync/client/dangling_indices.py +8 -12
- elasticsearch/_sync/client/enrich.py +10 -10
- elasticsearch/_sync/client/eql.py +17 -16
- elasticsearch/_sync/client/esql.py +173 -24
- elasticsearch/_sync/client/features.py +6 -6
- elasticsearch/_sync/client/fleet.py +8 -8
- elasticsearch/_sync/client/graph.py +3 -3
- elasticsearch/_sync/client/ilm.py +18 -18
- elasticsearch/_sync/client/indices.py +564 -149
- elasticsearch/_sync/client/inference.py +374 -64
- elasticsearch/_sync/client/ingest.py +9 -9
- elasticsearch/_sync/client/license.py +5 -7
- elasticsearch/_sync/client/logstash.py +4 -4
- elasticsearch/_sync/client/migration.py +6 -6
- elasticsearch/_sync/client/ml.py +132 -88
- elasticsearch/_sync/client/monitoring.py +4 -3
- elasticsearch/_sync/client/nodes.py +182 -20
- elasticsearch/_sync/client/project.py +13 -4
- elasticsearch/_sync/client/project_routing.py +264 -0
- elasticsearch/_sync/client/query_rules.py +16 -16
- elasticsearch/_sync/client/rollup.py +21 -21
- elasticsearch/_sync/client/search_application.py +19 -19
- elasticsearch/_sync/client/searchable_snapshots.py +10 -10
- elasticsearch/_sync/client/security.py +34 -10
- elasticsearch/_sync/client/shutdown.py +15 -4
- elasticsearch/_sync/client/simulate.py +4 -4
- elasticsearch/_sync/client/slm.py +17 -17
- elasticsearch/_sync/client/snapshot.py +21 -21
- elasticsearch/_sync/client/sql.py +17 -16
- elasticsearch/_sync/client/streams.py +6 -7
- elasticsearch/_sync/client/synonyms.py +10 -10
- elasticsearch/_sync/client/tasks.py +8 -8
- elasticsearch/_sync/client/text_structure.py +16 -12
- elasticsearch/_sync/client/transform.py +51 -12
- elasticsearch/_sync/client/utils.py +16 -2
- elasticsearch/_sync/client/watcher.py +26 -26
- elasticsearch/_sync/client/xpack.py +6 -5
- elasticsearch/_version.py +2 -2
- elasticsearch/dsl/__init__.py +4 -0
- elasticsearch/dsl/_async/document.py +4 -5
- elasticsearch/dsl/_async/index.py +1 -1
- elasticsearch/dsl/_async/search.py +2 -3
- elasticsearch/dsl/_sync/document.py +4 -5
- elasticsearch/dsl/_sync/index.py +1 -1
- elasticsearch/dsl/_sync/search.py +2 -3
- elasticsearch/dsl/aggs.py +9 -9
- elasticsearch/dsl/async_connections.py +1 -2
- elasticsearch/dsl/connections.py +1 -2
- elasticsearch/dsl/document_base.py +1 -1
- elasticsearch/dsl/field.py +90 -6
- elasticsearch/dsl/pydantic.py +1 -1
- elasticsearch/dsl/query.py +25 -2
- elasticsearch/dsl/response/__init__.py +2 -0
- elasticsearch/dsl/serializer.py +1 -2
- elasticsearch/dsl/types.py +63 -8
- elasticsearch/dsl/utils.py +12 -4
- elasticsearch/esql/esql.py +1 -1
- elasticsearch/esql/functions.py +926 -252
- elasticsearch/helpers/__init__.py +2 -0
- elasticsearch/helpers/actions.py +21 -0
- elasticsearch/helpers/vectorstore/__init__.py +7 -7
- elasticsearch/helpers/vectorstore/_async/_utils.py +1 -1
- elasticsearch/helpers/vectorstore/_async/embedding_service.py +2 -2
- elasticsearch/helpers/vectorstore/_async/strategies.py +3 -3
- elasticsearch/helpers/vectorstore/_async/vectorstore.py +8 -5
- elasticsearch/helpers/vectorstore/_sync/_utils.py +1 -1
- elasticsearch/helpers/vectorstore/_sync/embedding_service.py +2 -2
- elasticsearch/helpers/vectorstore/_sync/strategies.py +3 -3
- elasticsearch/helpers/vectorstore/_sync/vectorstore.py +8 -5
- {elasticsearch-9.2.0.dist-info → elasticsearch-9.3.0.dist-info}/METADATA +2 -1
- elasticsearch-9.3.0.dist-info/RECORD +169 -0
- {elasticsearch-9.2.0.dist-info → elasticsearch-9.3.0.dist-info}/WHEEL +1 -1
- elasticsearch-9.2.0.dist-info/RECORD +0 -168
- {elasticsearch-9.2.0.dist-info → elasticsearch-9.3.0.dist-info}/licenses/LICENSE +0 -0
- {elasticsearch-9.2.0.dist-info → elasticsearch-9.3.0.dist-info}/licenses/NOTICE +0 -0
|
@@ -44,14 +44,20 @@ class InferenceClient(NamespacedClient):
|
|
|
44
44
|
"""
|
|
45
45
|
.. raw:: html
|
|
46
46
|
|
|
47
|
-
<p>Perform completion inference on the service
|
|
47
|
+
<p>Perform completion inference on the service.</p>
|
|
48
|
+
<p>Get responses for completion tasks.
|
|
49
|
+
This API works only with the completion task type.</p>
|
|
50
|
+
<p>IMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.</p>
|
|
51
|
+
<p>This API requires the <code>monitor_inference</code> cluster privilege (the built-in <code>inference_admin</code> and <code>inference_user</code> roles grant this privilege).</p>
|
|
48
52
|
|
|
49
53
|
|
|
50
54
|
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-inference>`_
|
|
51
55
|
|
|
52
56
|
:param inference_id: The inference Id
|
|
53
57
|
:param input: Inference input. Either a string or an array of strings.
|
|
54
|
-
:param task_settings:
|
|
58
|
+
:param task_settings: Task settings for the individual inference request. These
|
|
59
|
+
settings are specific to the <task_type> you specified and override the task
|
|
60
|
+
settings specified when initializing the service.
|
|
55
61
|
:param timeout: Specifies the amount of time to wait for the inference request
|
|
56
62
|
to complete.
|
|
57
63
|
"""
|
|
@@ -116,15 +122,17 @@ class InferenceClient(NamespacedClient):
|
|
|
116
122
|
"""
|
|
117
123
|
.. raw:: html
|
|
118
124
|
|
|
119
|
-
<p>Delete an inference endpoint
|
|
125
|
+
<p>Delete an inference endpoint.</p>
|
|
126
|
+
<p>This API requires the manage_inference cluster privilege (the built-in <code>inference_admin</code> role grants this privilege).</p>
|
|
120
127
|
|
|
121
128
|
|
|
122
129
|
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-delete>`_
|
|
123
130
|
|
|
124
131
|
:param inference_id: The inference identifier.
|
|
125
132
|
:param task_type: The task type
|
|
126
|
-
:param dry_run: When true, the
|
|
127
|
-
|
|
133
|
+
:param dry_run: When true, checks the semantic_text fields and inference processors
|
|
134
|
+
that reference the endpoint and returns them in a list, but does not delete
|
|
135
|
+
the endpoint.
|
|
128
136
|
:param force: When true, the inference endpoint is forcefully deleted even if
|
|
129
137
|
it is still being used by ingest processors or semantic text fields.
|
|
130
138
|
"""
|
|
@@ -190,7 +198,8 @@ class InferenceClient(NamespacedClient):
|
|
|
190
198
|
"""
|
|
191
199
|
.. raw:: html
|
|
192
200
|
|
|
193
|
-
<p>Get an inference endpoint
|
|
201
|
+
<p>Get an inference endpoint.</p>
|
|
202
|
+
<p>This API requires the <code>monitor_inference</code> cluster privilege (the built-in <code>inference_admin</code> and <code>inference_user</code> roles grant this privilege).</p>
|
|
194
203
|
|
|
195
204
|
|
|
196
205
|
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-get>`_
|
|
@@ -386,19 +395,22 @@ class InferenceClient(NamespacedClient):
|
|
|
386
395
|
<li>Amazon Bedrock (<code>completion</code>, <code>text_embedding</code>)</li>
|
|
387
396
|
<li>Amazon SageMaker (<code>chat_completion</code>, <code>completion</code>, <code>rerank</code>, <code>sparse_embedding</code>, <code>text_embedding</code>)</li>
|
|
388
397
|
<li>Anthropic (<code>completion</code>)</li>
|
|
389
|
-
<li>Azure AI Studio (<code>completion</code>,
|
|
390
|
-
<li>Azure OpenAI (<code>completion</code>, <code>text_embedding</code>)</li>
|
|
398
|
+
<li>Azure AI Studio (<code>completion</code>, <code>rerank</code>, <code>text_embedding</code>)</li>
|
|
399
|
+
<li>Azure OpenAI (<code>chat_completion</code>, <code>completion</code>, <code>text_embedding</code>)</li>
|
|
391
400
|
<li>Cohere (<code>completion</code>, <code>rerank</code>, <code>text_embedding</code>)</li>
|
|
392
401
|
<li>DeepSeek (<code>chat_completion</code>, <code>completion</code>)</li>
|
|
393
402
|
<li>Elasticsearch (<code>rerank</code>, <code>sparse_embedding</code>, <code>text_embedding</code> - this service is for built-in models and models uploaded through Eland)</li>
|
|
394
403
|
<li>ELSER (<code>sparse_embedding</code>)</li>
|
|
395
404
|
<li>Google AI Studio (<code>completion</code>, <code>text_embedding</code>)</li>
|
|
396
405
|
<li>Google Vertex AI (<code>chat_completion</code>, <code>completion</code>, <code>rerank</code>, <code>text_embedding</code>)</li>
|
|
406
|
+
<li>Groq (<code>chat_completion</code>)</li>
|
|
397
407
|
<li>Hugging Face (<code>chat_completion</code>, <code>completion</code>, <code>rerank</code>, <code>text_embedding</code>)</li>
|
|
398
408
|
<li>JinaAI (<code>rerank</code>, <code>text_embedding</code>)</li>
|
|
399
409
|
<li>Llama (<code>chat_completion</code>, <code>completion</code>, <code>text_embedding</code>)</li>
|
|
400
410
|
<li>Mistral (<code>chat_completion</code>, <code>completion</code>, <code>text_embedding</code>)</li>
|
|
411
|
+
<li>Nvidia (<code>chat_completion</code>, <code>completion</code>, <code>text_embedding</code>, <code>rerank</code>)</li>
|
|
401
412
|
<li>OpenAI (<code>chat_completion</code>, <code>completion</code>, <code>text_embedding</code>)</li>
|
|
413
|
+
<li>OpenShift AI (<code>chat_completion</code>, <code>completion</code>, <code>rerank</code>, <code>text_embedding</code>)</li>
|
|
402
414
|
<li>VoyageAI (<code>rerank</code>, <code>text_embedding</code>)</li>
|
|
403
415
|
<li>Watsonx inference integration (<code>text_embedding</code>)</li>
|
|
404
416
|
</ul>
|
|
@@ -544,7 +556,7 @@ class InferenceClient(NamespacedClient):
|
|
|
544
556
|
self,
|
|
545
557
|
*,
|
|
546
558
|
task_type: t.Union[
|
|
547
|
-
str, t.Literal["completion", "rerank", "
|
|
559
|
+
str, t.Literal["completion", "rerank", "sparse_embedding", "text_embedding"]
|
|
548
560
|
],
|
|
549
561
|
alibabacloud_inference_id: str,
|
|
550
562
|
service: t.Optional[t.Union[str, t.Literal["alibabacloud-ai-search"]]] = None,
|
|
@@ -573,7 +585,9 @@ class InferenceClient(NamespacedClient):
|
|
|
573
585
|
this case, `alibabacloud-ai-search`.
|
|
574
586
|
:param service_settings: Settings used to install the inference model. These
|
|
575
587
|
settings are specific to the `alibabacloud-ai-search` service.
|
|
576
|
-
:param chunking_settings: The chunking configuration object.
|
|
588
|
+
:param chunking_settings: The chunking configuration object. Applies only to
|
|
589
|
+
the `sparse_embedding` or `text_embedding` task types. Not applicable to
|
|
590
|
+
the `rerank` or `completion` task types.
|
|
577
591
|
:param task_settings: Settings to configure the inference task. These settings
|
|
578
592
|
are specific to the task type you specified.
|
|
579
593
|
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
@@ -669,7 +683,8 @@ class InferenceClient(NamespacedClient):
|
|
|
669
683
|
this case, `amazonbedrock`.
|
|
670
684
|
:param service_settings: Settings used to install the inference model. These
|
|
671
685
|
settings are specific to the `amazonbedrock` service.
|
|
672
|
-
:param chunking_settings: The chunking configuration object.
|
|
686
|
+
:param chunking_settings: The chunking configuration object. Applies only to
|
|
687
|
+
the `text_embedding` task type. Not applicable to the `completion` task type.
|
|
673
688
|
:param task_settings: Settings to configure the inference task. These settings
|
|
674
689
|
are specific to the task type you specified.
|
|
675
690
|
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
@@ -771,7 +786,9 @@ class InferenceClient(NamespacedClient):
|
|
|
771
786
|
:param service_settings: Settings used to install the inference model. These
|
|
772
787
|
settings are specific to the `amazon_sagemaker` service and `service_settings.api`
|
|
773
788
|
you specified.
|
|
774
|
-
:param chunking_settings: The chunking configuration object.
|
|
789
|
+
:param chunking_settings: The chunking configuration object. Applies only to
|
|
790
|
+
the `sparse_embedding` or `text_embedding` task types. Not applicable to
|
|
791
|
+
the `rerank`, `completion`, or `chat_completion` task types.
|
|
775
792
|
:param task_settings: Settings to configure the inference task. These settings
|
|
776
793
|
are specific to the task type and `service_settings.api` you specified.
|
|
777
794
|
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
@@ -825,12 +842,7 @@ class InferenceClient(NamespacedClient):
|
|
|
825
842
|
)
|
|
826
843
|
|
|
827
844
|
@_rewrite_parameters(
|
|
828
|
-
body_fields=(
|
|
829
|
-
"service",
|
|
830
|
-
"service_settings",
|
|
831
|
-
"chunking_settings",
|
|
832
|
-
"task_settings",
|
|
833
|
-
),
|
|
845
|
+
body_fields=("service", "service_settings", "task_settings"),
|
|
834
846
|
)
|
|
835
847
|
def put_anthropic(
|
|
836
848
|
self,
|
|
@@ -839,7 +851,6 @@ class InferenceClient(NamespacedClient):
|
|
|
839
851
|
anthropic_inference_id: str,
|
|
840
852
|
service: t.Optional[t.Union[str, t.Literal["anthropic"]]] = None,
|
|
841
853
|
service_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
842
|
-
chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
843
854
|
error_trace: t.Optional[bool] = None,
|
|
844
855
|
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
|
|
845
856
|
human: t.Optional[bool] = None,
|
|
@@ -863,8 +874,7 @@ class InferenceClient(NamespacedClient):
|
|
|
863
874
|
:param service: The type of service supported for the specified task type. In
|
|
864
875
|
this case, `anthropic`.
|
|
865
876
|
:param service_settings: Settings used to install the inference model. These
|
|
866
|
-
settings are specific to the `
|
|
867
|
-
:param chunking_settings: The chunking configuration object.
|
|
877
|
+
settings are specific to the `anthropic` service.
|
|
868
878
|
:param task_settings: Settings to configure the inference task. These settings
|
|
869
879
|
are specific to the task type you specified.
|
|
870
880
|
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
@@ -902,8 +912,6 @@ class InferenceClient(NamespacedClient):
|
|
|
902
912
|
__body["service"] = service
|
|
903
913
|
if service_settings is not None:
|
|
904
914
|
__body["service_settings"] = service_settings
|
|
905
|
-
if chunking_settings is not None:
|
|
906
|
-
__body["chunking_settings"] = chunking_settings
|
|
907
915
|
if task_settings is not None:
|
|
908
916
|
__body["task_settings"] = task_settings
|
|
909
917
|
__headers = {"accept": "application/json", "content-type": "application/json"}
|
|
@@ -955,8 +963,10 @@ class InferenceClient(NamespacedClient):
|
|
|
955
963
|
:param service: The type of service supported for the specified task type. In
|
|
956
964
|
this case, `azureaistudio`.
|
|
957
965
|
:param service_settings: Settings used to install the inference model. These
|
|
958
|
-
settings are specific to the `
|
|
959
|
-
:param chunking_settings: The chunking configuration object.
|
|
966
|
+
settings are specific to the `azureaistudio` service.
|
|
967
|
+
:param chunking_settings: The chunking configuration object. Applies only to
|
|
968
|
+
the `text_embedding` task type. Not applicable to the `rerank` or `completion`
|
|
969
|
+
task types.
|
|
960
970
|
:param task_settings: Settings to configure the inference task. These settings
|
|
961
971
|
are specific to the task type you specified.
|
|
962
972
|
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
@@ -1020,7 +1030,9 @@ class InferenceClient(NamespacedClient):
|
|
|
1020
1030
|
def put_azureopenai(
|
|
1021
1031
|
self,
|
|
1022
1032
|
*,
|
|
1023
|
-
task_type: t.Union[
|
|
1033
|
+
task_type: t.Union[
|
|
1034
|
+
str, t.Literal["chat_completion", "completion", "text_embedding"]
|
|
1035
|
+
],
|
|
1024
1036
|
azureopenai_inference_id: str,
|
|
1025
1037
|
service: t.Optional[t.Union[str, t.Literal["azureopenai"]]] = None,
|
|
1026
1038
|
service_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
@@ -1056,7 +1068,9 @@ class InferenceClient(NamespacedClient):
|
|
|
1056
1068
|
this case, `azureopenai`.
|
|
1057
1069
|
:param service_settings: Settings used to install the inference model. These
|
|
1058
1070
|
settings are specific to the `azureopenai` service.
|
|
1059
|
-
:param chunking_settings: The chunking configuration object.
|
|
1071
|
+
:param chunking_settings: The chunking configuration object. Applies only to
|
|
1072
|
+
the `text_embedding` task type. Not applicable to the `completion` and `chat_completion`
|
|
1073
|
+
task types.
|
|
1060
1074
|
:param task_settings: Settings to configure the inference task. These settings
|
|
1061
1075
|
are specific to the task type you specified.
|
|
1062
1076
|
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
@@ -1148,7 +1162,9 @@ class InferenceClient(NamespacedClient):
|
|
|
1148
1162
|
this case, `cohere`.
|
|
1149
1163
|
:param service_settings: Settings used to install the inference model. These
|
|
1150
1164
|
settings are specific to the `cohere` service.
|
|
1151
|
-
:param chunking_settings: The chunking configuration object.
|
|
1165
|
+
:param chunking_settings: The chunking configuration object. Applies only to
|
|
1166
|
+
the `text_embedding` task type. Not applicable to the `rerank` or `completion`
|
|
1167
|
+
task type.
|
|
1152
1168
|
:param task_settings: Settings to configure the inference task. These settings
|
|
1153
1169
|
are specific to the task type you specified.
|
|
1154
1170
|
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
@@ -1200,12 +1216,7 @@ class InferenceClient(NamespacedClient):
|
|
|
1200
1216
|
)
|
|
1201
1217
|
|
|
1202
1218
|
@_rewrite_parameters(
|
|
1203
|
-
body_fields=(
|
|
1204
|
-
"service",
|
|
1205
|
-
"service_settings",
|
|
1206
|
-
"chunking_settings",
|
|
1207
|
-
"task_settings",
|
|
1208
|
-
),
|
|
1219
|
+
body_fields=("service", "service_settings", "task_settings"),
|
|
1209
1220
|
)
|
|
1210
1221
|
def put_contextualai(
|
|
1211
1222
|
self,
|
|
@@ -1214,7 +1225,6 @@ class InferenceClient(NamespacedClient):
|
|
|
1214
1225
|
contextualai_inference_id: str,
|
|
1215
1226
|
service: t.Optional[t.Union[str, t.Literal["contextualai"]]] = None,
|
|
1216
1227
|
service_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
1217
|
-
chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
1218
1228
|
error_trace: t.Optional[bool] = None,
|
|
1219
1229
|
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
|
|
1220
1230
|
human: t.Optional[bool] = None,
|
|
@@ -1239,7 +1249,6 @@ class InferenceClient(NamespacedClient):
|
|
|
1239
1249
|
this case, `contextualai`.
|
|
1240
1250
|
:param service_settings: Settings used to install the inference model. These
|
|
1241
1251
|
settings are specific to the `contextualai` service.
|
|
1242
|
-
:param chunking_settings: The chunking configuration object.
|
|
1243
1252
|
:param task_settings: Settings to configure the inference task. These settings
|
|
1244
1253
|
are specific to the task type you specified.
|
|
1245
1254
|
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
@@ -1277,8 +1286,6 @@ class InferenceClient(NamespacedClient):
|
|
|
1277
1286
|
__body["service"] = service
|
|
1278
1287
|
if service_settings is not None:
|
|
1279
1288
|
__body["service_settings"] = service_settings
|
|
1280
|
-
if chunking_settings is not None:
|
|
1281
|
-
__body["chunking_settings"] = chunking_settings
|
|
1282
1289
|
if task_settings is not None:
|
|
1283
1290
|
__body["task_settings"] = task_settings
|
|
1284
1291
|
__headers = {"accept": "application/json", "content-type": "application/json"}
|
|
@@ -1372,7 +1379,9 @@ class InferenceClient(NamespacedClient):
|
|
|
1372
1379
|
this case, `custom`.
|
|
1373
1380
|
:param service_settings: Settings used to install the inference model. These
|
|
1374
1381
|
settings are specific to the `custom` service.
|
|
1375
|
-
:param chunking_settings: The chunking configuration object.
|
|
1382
|
+
:param chunking_settings: The chunking configuration object. Applies only to
|
|
1383
|
+
the `sparse_embedding` or `text_embedding` task types. Not applicable to
|
|
1384
|
+
the `rerank` or `completion` task types.
|
|
1376
1385
|
:param task_settings: Settings to configure the inference task. These settings
|
|
1377
1386
|
are specific to the task type you specified.
|
|
1378
1387
|
"""
|
|
@@ -1420,7 +1429,7 @@ class InferenceClient(NamespacedClient):
|
|
|
1420
1429
|
)
|
|
1421
1430
|
|
|
1422
1431
|
@_rewrite_parameters(
|
|
1423
|
-
body_fields=("service", "service_settings"
|
|
1432
|
+
body_fields=("service", "service_settings"),
|
|
1424
1433
|
)
|
|
1425
1434
|
def put_deepseek(
|
|
1426
1435
|
self,
|
|
@@ -1429,7 +1438,6 @@ class InferenceClient(NamespacedClient):
|
|
|
1429
1438
|
deepseek_inference_id: str,
|
|
1430
1439
|
service: t.Optional[t.Union[str, t.Literal["deepseek"]]] = None,
|
|
1431
1440
|
service_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
1432
|
-
chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
1433
1441
|
error_trace: t.Optional[bool] = None,
|
|
1434
1442
|
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
|
|
1435
1443
|
human: t.Optional[bool] = None,
|
|
@@ -1452,7 +1460,6 @@ class InferenceClient(NamespacedClient):
|
|
|
1452
1460
|
this case, `deepseek`.
|
|
1453
1461
|
:param service_settings: Settings used to install the inference model. These
|
|
1454
1462
|
settings are specific to the `deepseek` service.
|
|
1455
|
-
:param chunking_settings: The chunking configuration object.
|
|
1456
1463
|
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
1457
1464
|
to be created.
|
|
1458
1465
|
"""
|
|
@@ -1486,8 +1493,6 @@ class InferenceClient(NamespacedClient):
|
|
|
1486
1493
|
__body["service"] = service
|
|
1487
1494
|
if service_settings is not None:
|
|
1488
1495
|
__body["service_settings"] = service_settings
|
|
1489
|
-
if chunking_settings is not None:
|
|
1490
|
-
__body["chunking_settings"] = chunking_settings
|
|
1491
1496
|
__headers = {"accept": "application/json", "content-type": "application/json"}
|
|
1492
1497
|
return self.perform_request( # type: ignore[return-value]
|
|
1493
1498
|
"PUT",
|
|
@@ -1554,7 +1559,9 @@ class InferenceClient(NamespacedClient):
|
|
|
1554
1559
|
this case, `elasticsearch`.
|
|
1555
1560
|
:param service_settings: Settings used to install the inference model. These
|
|
1556
1561
|
settings are specific to the `elasticsearch` service.
|
|
1557
|
-
:param chunking_settings: The chunking configuration object.
|
|
1562
|
+
:param chunking_settings: The chunking configuration object. Applies only to
|
|
1563
|
+
the `sparse_embedding` and `text_embedding` task types. Not applicable to
|
|
1564
|
+
the `rerank` task type.
|
|
1558
1565
|
:param task_settings: Settings to configure the inference task. These settings
|
|
1559
1566
|
are specific to the task type you specified.
|
|
1560
1567
|
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
@@ -1735,7 +1742,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1735
1742
|
this case, `googleaistudio`.
|
|
1736
1743
|
:param service_settings: Settings used to install the inference model. These
|
|
1737
1744
|
settings are specific to the `googleaistudio` service.
|
|
1738
|
-
:param chunking_settings: The chunking configuration object.
|
|
1745
|
+
:param chunking_settings: The chunking configuration object. Applies only to
|
|
1746
|
+
the `text_embedding` task type. Not applicable to the `completion` task type.
|
|
1739
1747
|
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
1740
1748
|
to be created.
|
|
1741
1749
|
"""
|
|
@@ -1825,7 +1833,9 @@ class InferenceClient(NamespacedClient):
|
|
|
1825
1833
|
this case, `googlevertexai`.
|
|
1826
1834
|
:param service_settings: Settings used to install the inference model. These
|
|
1827
1835
|
settings are specific to the `googlevertexai` service.
|
|
1828
|
-
:param chunking_settings: The chunking configuration object.
|
|
1836
|
+
:param chunking_settings: The chunking configuration object. Applies only to
|
|
1837
|
+
the `text_embedding` task type. Not applicable to the `rerank`, `completion`,
|
|
1838
|
+
or `chat_completion` task types.
|
|
1829
1839
|
:param task_settings: Settings to configure the inference task. These settings
|
|
1830
1840
|
are specific to the task type you specified.
|
|
1831
1841
|
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
@@ -1878,6 +1888,82 @@ class InferenceClient(NamespacedClient):
|
|
|
1878
1888
|
path_parts=__path_parts,
|
|
1879
1889
|
)
|
|
1880
1890
|
|
|
1891
|
+
@_rewrite_parameters(
|
|
1892
|
+
body_fields=("service", "service_settings"),
|
|
1893
|
+
)
|
|
1894
|
+
def put_groq(
|
|
1895
|
+
self,
|
|
1896
|
+
*,
|
|
1897
|
+
task_type: t.Union[str, t.Literal["chat_completion"]],
|
|
1898
|
+
groq_inference_id: str,
|
|
1899
|
+
service: t.Optional[t.Union[str, t.Literal["groq"]]] = None,
|
|
1900
|
+
service_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
1901
|
+
error_trace: t.Optional[bool] = None,
|
|
1902
|
+
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
|
|
1903
|
+
human: t.Optional[bool] = None,
|
|
1904
|
+
pretty: t.Optional[bool] = None,
|
|
1905
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
1906
|
+
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
1907
|
+
) -> ObjectApiResponse[t.Any]:
|
|
1908
|
+
"""
|
|
1909
|
+
.. raw:: html
|
|
1910
|
+
|
|
1911
|
+
<p>Create a Groq inference endpoint.</p>
|
|
1912
|
+
<p>Create an inference endpoint to perform an inference task with the <code>groq</code> service.</p>
|
|
1913
|
+
|
|
1914
|
+
|
|
1915
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-groq>`_
|
|
1916
|
+
|
|
1917
|
+
:param task_type: The type of the inference task that the model will perform.
|
|
1918
|
+
:param groq_inference_id: The unique identifier of the inference endpoint.
|
|
1919
|
+
:param service: The type of service supported for the specified task type. In
|
|
1920
|
+
this case, `groq`.
|
|
1921
|
+
:param service_settings: Settings used to install the inference model. These
|
|
1922
|
+
settings are specific to the `groq` service.
|
|
1923
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
1924
|
+
to be created.
|
|
1925
|
+
"""
|
|
1926
|
+
if task_type in SKIP_IN_PATH:
|
|
1927
|
+
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
1928
|
+
if groq_inference_id in SKIP_IN_PATH:
|
|
1929
|
+
raise ValueError("Empty value passed for parameter 'groq_inference_id'")
|
|
1930
|
+
if service is None and body is None:
|
|
1931
|
+
raise ValueError("Empty value passed for parameter 'service'")
|
|
1932
|
+
if service_settings is None and body is None:
|
|
1933
|
+
raise ValueError("Empty value passed for parameter 'service_settings'")
|
|
1934
|
+
__path_parts: t.Dict[str, str] = {
|
|
1935
|
+
"task_type": _quote(task_type),
|
|
1936
|
+
"groq_inference_id": _quote(groq_inference_id),
|
|
1937
|
+
}
|
|
1938
|
+
__path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["groq_inference_id"]}'
|
|
1939
|
+
__query: t.Dict[str, t.Any] = {}
|
|
1940
|
+
__body: t.Dict[str, t.Any] = body if body is not None else {}
|
|
1941
|
+
if error_trace is not None:
|
|
1942
|
+
__query["error_trace"] = error_trace
|
|
1943
|
+
if filter_path is not None:
|
|
1944
|
+
__query["filter_path"] = filter_path
|
|
1945
|
+
if human is not None:
|
|
1946
|
+
__query["human"] = human
|
|
1947
|
+
if pretty is not None:
|
|
1948
|
+
__query["pretty"] = pretty
|
|
1949
|
+
if timeout is not None:
|
|
1950
|
+
__query["timeout"] = timeout
|
|
1951
|
+
if not __body:
|
|
1952
|
+
if service is not None:
|
|
1953
|
+
__body["service"] = service
|
|
1954
|
+
if service_settings is not None:
|
|
1955
|
+
__body["service_settings"] = service_settings
|
|
1956
|
+
__headers = {"accept": "application/json", "content-type": "application/json"}
|
|
1957
|
+
return self.perform_request( # type: ignore[return-value]
|
|
1958
|
+
"PUT",
|
|
1959
|
+
__path,
|
|
1960
|
+
params=__query,
|
|
1961
|
+
headers=__headers,
|
|
1962
|
+
body=__body,
|
|
1963
|
+
endpoint_id="inference.put_groq",
|
|
1964
|
+
path_parts=__path_parts,
|
|
1965
|
+
)
|
|
1966
|
+
|
|
1881
1967
|
@_rewrite_parameters(
|
|
1882
1968
|
body_fields=(
|
|
1883
1969
|
"service",
|
|
@@ -1953,7 +2039,9 @@ class InferenceClient(NamespacedClient):
|
|
|
1953
2039
|
this case, `hugging_face`.
|
|
1954
2040
|
:param service_settings: Settings used to install the inference model. These
|
|
1955
2041
|
settings are specific to the `hugging_face` service.
|
|
1956
|
-
:param chunking_settings: The chunking configuration object.
|
|
2042
|
+
:param chunking_settings: The chunking configuration object. Applies only to
|
|
2043
|
+
the `text_embedding` task type. Not applicable to the `rerank`, `completion`,
|
|
2044
|
+
or `chat_completion` task types.
|
|
1957
2045
|
:param task_settings: Settings to configure the inference task. These settings
|
|
1958
2046
|
are specific to the task type you specified.
|
|
1959
2047
|
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
@@ -2047,7 +2135,8 @@ class InferenceClient(NamespacedClient):
|
|
|
2047
2135
|
this case, `jinaai`.
|
|
2048
2136
|
:param service_settings: Settings used to install the inference model. These
|
|
2049
2137
|
settings are specific to the `jinaai` service.
|
|
2050
|
-
:param chunking_settings: The chunking configuration object.
|
|
2138
|
+
:param chunking_settings: The chunking configuration object. Applies only to
|
|
2139
|
+
the `text_embedding` task type. Not applicable to the `rerank` task type.
|
|
2051
2140
|
:param task_settings: Settings to configure the inference task. These settings
|
|
2052
2141
|
are specific to the task type you specified.
|
|
2053
2142
|
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
@@ -2133,7 +2222,9 @@ class InferenceClient(NamespacedClient):
|
|
|
2133
2222
|
this case, `llama`.
|
|
2134
2223
|
:param service_settings: Settings used to install the inference model. These
|
|
2135
2224
|
settings are specific to the `llama` service.
|
|
2136
|
-
:param chunking_settings: The chunking configuration object.
|
|
2225
|
+
:param chunking_settings: The chunking configuration object. Applies only to
|
|
2226
|
+
the `text_embedding` task type. Not applicable to the `completion` or `chat_completion`
|
|
2227
|
+
task types.
|
|
2137
2228
|
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
2138
2229
|
to be created.
|
|
2139
2230
|
"""
|
|
@@ -2215,7 +2306,9 @@ class InferenceClient(NamespacedClient):
|
|
|
2215
2306
|
this case, `mistral`.
|
|
2216
2307
|
:param service_settings: Settings used to install the inference model. These
|
|
2217
2308
|
settings are specific to the `mistral` service.
|
|
2218
|
-
:param chunking_settings: The chunking configuration object.
|
|
2309
|
+
:param chunking_settings: The chunking configuration object. Applies only to
|
|
2310
|
+
the `text_embedding` task type. Not applicable to the `completion` or `chat_completion`
|
|
2311
|
+
task types.
|
|
2219
2312
|
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
2220
2313
|
to be created.
|
|
2221
2314
|
"""
|
|
@@ -2262,6 +2355,104 @@ class InferenceClient(NamespacedClient):
|
|
|
2262
2355
|
path_parts=__path_parts,
|
|
2263
2356
|
)
|
|
2264
2357
|
|
|
2358
|
+
@_rewrite_parameters(
|
|
2359
|
+
body_fields=(
|
|
2360
|
+
"service",
|
|
2361
|
+
"service_settings",
|
|
2362
|
+
"chunking_settings",
|
|
2363
|
+
"task_settings",
|
|
2364
|
+
),
|
|
2365
|
+
)
|
|
2366
|
+
def put_nvidia(
|
|
2367
|
+
self,
|
|
2368
|
+
*,
|
|
2369
|
+
task_type: t.Union[
|
|
2370
|
+
str, t.Literal["chat_completion", "completion", "rerank", "text_embedding"]
|
|
2371
|
+
],
|
|
2372
|
+
nvidia_inference_id: str,
|
|
2373
|
+
service: t.Optional[t.Union[str, t.Literal["nvidia"]]] = None,
|
|
2374
|
+
service_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
2375
|
+
chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
2376
|
+
error_trace: t.Optional[bool] = None,
|
|
2377
|
+
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
|
|
2378
|
+
human: t.Optional[bool] = None,
|
|
2379
|
+
pretty: t.Optional[bool] = None,
|
|
2380
|
+
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
2381
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
2382
|
+
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
2383
|
+
) -> ObjectApiResponse[t.Any]:
|
|
2384
|
+
"""
|
|
2385
|
+
.. raw:: html
|
|
2386
|
+
|
|
2387
|
+
<p>Create an Nvidia inference endpoint.</p>
|
|
2388
|
+
<p>Create an inference endpoint to perform an inference task with the <code>nvidia</code> service.</p>
|
|
2389
|
+
|
|
2390
|
+
|
|
2391
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-nvidia>`_
|
|
2392
|
+
|
|
2393
|
+
:param task_type: The type of the inference task that the model will perform.
|
|
2394
|
+
NOTE: The `chat_completion` task type only supports streaming and only through
|
|
2395
|
+
the _stream API.
|
|
2396
|
+
:param nvidia_inference_id: The unique identifier of the inference endpoint.
|
|
2397
|
+
:param service: The type of service supported for the specified task type. In
|
|
2398
|
+
this case, `nvidia`.
|
|
2399
|
+
:param service_settings: Settings used to install the inference model. These
|
|
2400
|
+
settings are specific to the `nvidia` service.
|
|
2401
|
+
:param chunking_settings: The chunking configuration object. Applies only to
|
|
2402
|
+
the `text_embedding` task type. Not applicable to the `rerank`, `completion`,
|
|
2403
|
+
or `chat_completion` task types.
|
|
2404
|
+
:param task_settings: Settings to configure the inference task. Applies only
|
|
2405
|
+
to the `text_embedding` task type. Not applicable to the `rerank`, `completion`,
|
|
2406
|
+
or `chat_completion` task types. These settings are specific to the task
|
|
2407
|
+
type you specified.
|
|
2408
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
2409
|
+
to be created.
|
|
2410
|
+
"""
|
|
2411
|
+
if task_type in SKIP_IN_PATH:
|
|
2412
|
+
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
2413
|
+
if nvidia_inference_id in SKIP_IN_PATH:
|
|
2414
|
+
raise ValueError("Empty value passed for parameter 'nvidia_inference_id'")
|
|
2415
|
+
if service is None and body is None:
|
|
2416
|
+
raise ValueError("Empty value passed for parameter 'service'")
|
|
2417
|
+
if service_settings is None and body is None:
|
|
2418
|
+
raise ValueError("Empty value passed for parameter 'service_settings'")
|
|
2419
|
+
__path_parts: t.Dict[str, str] = {
|
|
2420
|
+
"task_type": _quote(task_type),
|
|
2421
|
+
"nvidia_inference_id": _quote(nvidia_inference_id),
|
|
2422
|
+
}
|
|
2423
|
+
__path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["nvidia_inference_id"]}'
|
|
2424
|
+
__query: t.Dict[str, t.Any] = {}
|
|
2425
|
+
__body: t.Dict[str, t.Any] = body if body is not None else {}
|
|
2426
|
+
if error_trace is not None:
|
|
2427
|
+
__query["error_trace"] = error_trace
|
|
2428
|
+
if filter_path is not None:
|
|
2429
|
+
__query["filter_path"] = filter_path
|
|
2430
|
+
if human is not None:
|
|
2431
|
+
__query["human"] = human
|
|
2432
|
+
if pretty is not None:
|
|
2433
|
+
__query["pretty"] = pretty
|
|
2434
|
+
if timeout is not None:
|
|
2435
|
+
__query["timeout"] = timeout
|
|
2436
|
+
if not __body:
|
|
2437
|
+
if service is not None:
|
|
2438
|
+
__body["service"] = service
|
|
2439
|
+
if service_settings is not None:
|
|
2440
|
+
__body["service_settings"] = service_settings
|
|
2441
|
+
if chunking_settings is not None:
|
|
2442
|
+
__body["chunking_settings"] = chunking_settings
|
|
2443
|
+
if task_settings is not None:
|
|
2444
|
+
__body["task_settings"] = task_settings
|
|
2445
|
+
__headers = {"accept": "application/json", "content-type": "application/json"}
|
|
2446
|
+
return self.perform_request( # type: ignore[return-value]
|
|
2447
|
+
"PUT",
|
|
2448
|
+
__path,
|
|
2449
|
+
params=__query,
|
|
2450
|
+
headers=__headers,
|
|
2451
|
+
body=__body,
|
|
2452
|
+
endpoint_id="inference.put_nvidia",
|
|
2453
|
+
path_parts=__path_parts,
|
|
2454
|
+
)
|
|
2455
|
+
|
|
2265
2456
|
@_rewrite_parameters(
|
|
2266
2457
|
body_fields=(
|
|
2267
2458
|
"service",
|
|
@@ -2305,7 +2496,9 @@ class InferenceClient(NamespacedClient):
|
|
|
2305
2496
|
this case, `openai`.
|
|
2306
2497
|
:param service_settings: Settings used to install the inference model. These
|
|
2307
2498
|
settings are specific to the `openai` service.
|
|
2308
|
-
:param chunking_settings: The chunking configuration object.
|
|
2499
|
+
:param chunking_settings: The chunking configuration object. Applies only to
|
|
2500
|
+
the `text_embedding` task type. Not applicable to the `completion` or `chat_completion`
|
|
2501
|
+
task types.
|
|
2309
2502
|
:param task_settings: Settings to configure the inference task. These settings
|
|
2310
2503
|
are specific to the task type you specified.
|
|
2311
2504
|
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
@@ -2356,6 +2549,106 @@ class InferenceClient(NamespacedClient):
|
|
|
2356
2549
|
path_parts=__path_parts,
|
|
2357
2550
|
)
|
|
2358
2551
|
|
|
2552
|
+
@_rewrite_parameters(
|
|
2553
|
+
body_fields=(
|
|
2554
|
+
"service",
|
|
2555
|
+
"service_settings",
|
|
2556
|
+
"chunking_settings",
|
|
2557
|
+
"task_settings",
|
|
2558
|
+
),
|
|
2559
|
+
)
|
|
2560
|
+
def put_openshift_ai(
|
|
2561
|
+
self,
|
|
2562
|
+
*,
|
|
2563
|
+
task_type: t.Union[
|
|
2564
|
+
str, t.Literal["chat_completion", "completion", "rerank", "text_embedding"]
|
|
2565
|
+
],
|
|
2566
|
+
openshiftai_inference_id: str,
|
|
2567
|
+
service: t.Optional[t.Union[str, t.Literal["openshift_ai"]]] = None,
|
|
2568
|
+
service_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
2569
|
+
chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
2570
|
+
error_trace: t.Optional[bool] = None,
|
|
2571
|
+
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
|
|
2572
|
+
human: t.Optional[bool] = None,
|
|
2573
|
+
pretty: t.Optional[bool] = None,
|
|
2574
|
+
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
2575
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
2576
|
+
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
2577
|
+
) -> ObjectApiResponse[t.Any]:
|
|
2578
|
+
"""
|
|
2579
|
+
.. raw:: html
|
|
2580
|
+
|
|
2581
|
+
<p>Create an OpenShift AI inference endpoint.</p>
|
|
2582
|
+
<p>Create an inference endpoint to perform an inference task with the <code>openshift_ai</code> service.</p>
|
|
2583
|
+
|
|
2584
|
+
|
|
2585
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-openshift-ai>`_
|
|
2586
|
+
|
|
2587
|
+
:param task_type: The type of the inference task that the model will perform.
|
|
2588
|
+
NOTE: The `chat_completion` task type only supports streaming and only through
|
|
2589
|
+
the _stream API.
|
|
2590
|
+
:param openshiftai_inference_id: The unique identifier of the inference endpoint.
|
|
2591
|
+
:param service: The type of service supported for the specified task type. In
|
|
2592
|
+
this case, `openshift_ai`.
|
|
2593
|
+
:param service_settings: Settings used to install the inference model. These
|
|
2594
|
+
settings are specific to the `openshift_ai` service.
|
|
2595
|
+
:param chunking_settings: The chunking configuration object. Applies only to
|
|
2596
|
+
the `text_embedding` task type. Not applicable to the `rerank`, `completion`,
|
|
2597
|
+
or `chat_completion` task types.
|
|
2598
|
+
:param task_settings: Settings to configure the inference task. Applies only
|
|
2599
|
+
to the `rerank` task type. Not applicable to the `text_embedding`, `completion`,
|
|
2600
|
+
or `chat_completion` task types. These settings are specific to the task
|
|
2601
|
+
type you specified.
|
|
2602
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
2603
|
+
to be created.
|
|
2604
|
+
"""
|
|
2605
|
+
if task_type in SKIP_IN_PATH:
|
|
2606
|
+
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
2607
|
+
if openshiftai_inference_id in SKIP_IN_PATH:
|
|
2608
|
+
raise ValueError(
|
|
2609
|
+
"Empty value passed for parameter 'openshiftai_inference_id'"
|
|
2610
|
+
)
|
|
2611
|
+
if service is None and body is None:
|
|
2612
|
+
raise ValueError("Empty value passed for parameter 'service'")
|
|
2613
|
+
if service_settings is None and body is None:
|
|
2614
|
+
raise ValueError("Empty value passed for parameter 'service_settings'")
|
|
2615
|
+
__path_parts: t.Dict[str, str] = {
|
|
2616
|
+
"task_type": _quote(task_type),
|
|
2617
|
+
"openshiftai_inference_id": _quote(openshiftai_inference_id),
|
|
2618
|
+
}
|
|
2619
|
+
__path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["openshiftai_inference_id"]}'
|
|
2620
|
+
__query: t.Dict[str, t.Any] = {}
|
|
2621
|
+
__body: t.Dict[str, t.Any] = body if body is not None else {}
|
|
2622
|
+
if error_trace is not None:
|
|
2623
|
+
__query["error_trace"] = error_trace
|
|
2624
|
+
if filter_path is not None:
|
|
2625
|
+
__query["filter_path"] = filter_path
|
|
2626
|
+
if human is not None:
|
|
2627
|
+
__query["human"] = human
|
|
2628
|
+
if pretty is not None:
|
|
2629
|
+
__query["pretty"] = pretty
|
|
2630
|
+
if timeout is not None:
|
|
2631
|
+
__query["timeout"] = timeout
|
|
2632
|
+
if not __body:
|
|
2633
|
+
if service is not None:
|
|
2634
|
+
__body["service"] = service
|
|
2635
|
+
if service_settings is not None:
|
|
2636
|
+
__body["service_settings"] = service_settings
|
|
2637
|
+
if chunking_settings is not None:
|
|
2638
|
+
__body["chunking_settings"] = chunking_settings
|
|
2639
|
+
if task_settings is not None:
|
|
2640
|
+
__body["task_settings"] = task_settings
|
|
2641
|
+
__headers = {"accept": "application/json", "content-type": "application/json"}
|
|
2642
|
+
return self.perform_request( # type: ignore[return-value]
|
|
2643
|
+
"PUT",
|
|
2644
|
+
__path,
|
|
2645
|
+
params=__query,
|
|
2646
|
+
headers=__headers,
|
|
2647
|
+
body=__body,
|
|
2648
|
+
endpoint_id="inference.put_openshift_ai",
|
|
2649
|
+
path_parts=__path_parts,
|
|
2650
|
+
)
|
|
2651
|
+
|
|
2359
2652
|
@_rewrite_parameters(
|
|
2360
2653
|
body_fields=(
|
|
2361
2654
|
"service",
|
|
@@ -2396,7 +2689,8 @@ class InferenceClient(NamespacedClient):
|
|
|
2396
2689
|
this case, `voyageai`.
|
|
2397
2690
|
:param service_settings: Settings used to install the inference model. These
|
|
2398
2691
|
settings are specific to the `voyageai` service.
|
|
2399
|
-
:param chunking_settings: The chunking configuration object.
|
|
2692
|
+
:param chunking_settings: The chunking configuration object. Applies only to
|
|
2693
|
+
the `text_embedding` task type. Not applicable to the `rerank` task type.
|
|
2400
2694
|
:param task_settings: Settings to configure the inference task. These settings
|
|
2401
2695
|
are specific to the task type you specified.
|
|
2402
2696
|
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
@@ -2448,7 +2742,7 @@ class InferenceClient(NamespacedClient):
|
|
|
2448
2742
|
)
|
|
2449
2743
|
|
|
2450
2744
|
@_rewrite_parameters(
|
|
2451
|
-
body_fields=("service", "service_settings"),
|
|
2745
|
+
body_fields=("service", "service_settings", "chunking_settings"),
|
|
2452
2746
|
)
|
|
2453
2747
|
def put_watsonx(
|
|
2454
2748
|
self,
|
|
@@ -2459,6 +2753,7 @@ class InferenceClient(NamespacedClient):
|
|
|
2459
2753
|
watsonx_inference_id: str,
|
|
2460
2754
|
service: t.Optional[t.Union[str, t.Literal["watsonxai"]]] = None,
|
|
2461
2755
|
service_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
2756
|
+
chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
2462
2757
|
error_trace: t.Optional[bool] = None,
|
|
2463
2758
|
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
|
|
2464
2759
|
human: t.Optional[bool] = None,
|
|
@@ -2483,6 +2778,9 @@ class InferenceClient(NamespacedClient):
|
|
|
2483
2778
|
this case, `watsonxai`.
|
|
2484
2779
|
:param service_settings: Settings used to install the inference model. These
|
|
2485
2780
|
settings are specific to the `watsonxai` service.
|
|
2781
|
+
:param chunking_settings: The chunking configuration object. Applies only to
|
|
2782
|
+
the `text_embedding` task type. Not applicable to the `completion` or `chat_completion`
|
|
2783
|
+
task types.
|
|
2486
2784
|
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
2487
2785
|
to be created.
|
|
2488
2786
|
"""
|
|
@@ -2516,6 +2814,8 @@ class InferenceClient(NamespacedClient):
|
|
|
2516
2814
|
__body["service"] = service
|
|
2517
2815
|
if service_settings is not None:
|
|
2518
2816
|
__body["service_settings"] = service_settings
|
|
2817
|
+
if chunking_settings is not None:
|
|
2818
|
+
__body["chunking_settings"] = chunking_settings
|
|
2519
2819
|
__headers = {"accept": "application/json", "content-type": "application/json"}
|
|
2520
2820
|
return self.perform_request( # type: ignore[return-value]
|
|
2521
2821
|
"PUT",
|
|
@@ -2528,39 +2828,41 @@ class InferenceClient(NamespacedClient):
|
|
|
2528
2828
|
)
|
|
2529
2829
|
|
|
2530
2830
|
@_rewrite_parameters(
|
|
2531
|
-
body_fields=("input", "query", "task_settings"),
|
|
2831
|
+
body_fields=("input", "query", "return_documents", "task_settings", "top_n"),
|
|
2532
2832
|
)
|
|
2533
2833
|
def rerank(
|
|
2534
2834
|
self,
|
|
2535
2835
|
*,
|
|
2536
2836
|
inference_id: str,
|
|
2537
|
-
input: t.Optional[t.
|
|
2837
|
+
input: t.Optional[t.Sequence[str]] = None,
|
|
2538
2838
|
query: t.Optional[str] = None,
|
|
2539
2839
|
error_trace: t.Optional[bool] = None,
|
|
2540
2840
|
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
|
|
2541
2841
|
human: t.Optional[bool] = None,
|
|
2542
2842
|
pretty: t.Optional[bool] = None,
|
|
2843
|
+
return_documents: t.Optional[bool] = None,
|
|
2543
2844
|
task_settings: t.Optional[t.Any] = None,
|
|
2544
2845
|
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
2846
|
+
top_n: t.Optional[int] = None,
|
|
2545
2847
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
2546
2848
|
) -> ObjectApiResponse[t.Any]:
|
|
2547
2849
|
"""
|
|
2548
2850
|
.. raw:: html
|
|
2549
2851
|
|
|
2550
|
-
<p>Perform reranking inference on the service
|
|
2852
|
+
<p>Perform reranking inference on the service.</p>
|
|
2551
2853
|
|
|
2552
2854
|
|
|
2553
2855
|
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-inference>`_
|
|
2554
2856
|
|
|
2555
2857
|
:param inference_id: The unique identifier for the inference endpoint.
|
|
2556
|
-
:param input: The
|
|
2557
|
-
be a single string or an array. > info > Inference endpoints for the `completion`
|
|
2558
|
-
task type currently only support a single string as input.
|
|
2858
|
+
:param input: The documents to rank.
|
|
2559
2859
|
:param query: Query input.
|
|
2860
|
+
:param return_documents: Include the document text in the response.
|
|
2560
2861
|
:param task_settings: Task settings for the individual inference request. These
|
|
2561
2862
|
settings are specific to the task type you specified and override the task
|
|
2562
2863
|
settings specified when initializing the service.
|
|
2563
2864
|
:param timeout: The amount of time to wait for the inference request to complete.
|
|
2865
|
+
:param top_n: Limit the response to the top N documents.
|
|
2564
2866
|
"""
|
|
2565
2867
|
if inference_id in SKIP_IN_PATH:
|
|
2566
2868
|
raise ValueError("Empty value passed for parameter 'inference_id'")
|
|
@@ -2587,8 +2889,12 @@ class InferenceClient(NamespacedClient):
|
|
|
2587
2889
|
__body["input"] = input
|
|
2588
2890
|
if query is not None:
|
|
2589
2891
|
__body["query"] = query
|
|
2892
|
+
if return_documents is not None:
|
|
2893
|
+
__body["return_documents"] = return_documents
|
|
2590
2894
|
if task_settings is not None:
|
|
2591
2895
|
__body["task_settings"] = task_settings
|
|
2896
|
+
if top_n is not None:
|
|
2897
|
+
__body["top_n"] = top_n
|
|
2592
2898
|
__headers = {"accept": "application/json", "content-type": "application/json"}
|
|
2593
2899
|
return self.perform_request( # type: ignore[return-value]
|
|
2594
2900
|
"POST",
|
|
@@ -2619,14 +2925,16 @@ class InferenceClient(NamespacedClient):
|
|
|
2619
2925
|
"""
|
|
2620
2926
|
.. raw:: html
|
|
2621
2927
|
|
|
2622
|
-
<p>Perform sparse embedding inference on the service
|
|
2928
|
+
<p>Perform sparse embedding inference on the service.</p>
|
|
2623
2929
|
|
|
2624
2930
|
|
|
2625
2931
|
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-inference>`_
|
|
2626
2932
|
|
|
2627
2933
|
:param inference_id: The inference Id
|
|
2628
2934
|
:param input: Inference input. Either a string or an array of strings.
|
|
2629
|
-
:param task_settings:
|
|
2935
|
+
:param task_settings: Task settings for the individual inference request. These
|
|
2936
|
+
settings are specific to the <task_type> you specified and override the task
|
|
2937
|
+
settings specified when initializing the service.
|
|
2630
2938
|
:param timeout: Specifies the amount of time to wait for the inference request
|
|
2631
2939
|
to complete.
|
|
2632
2940
|
"""
|
|
@@ -2684,7 +2992,7 @@ class InferenceClient(NamespacedClient):
|
|
|
2684
2992
|
"""
|
|
2685
2993
|
.. raw:: html
|
|
2686
2994
|
|
|
2687
|
-
<p>Perform text embedding inference on the service
|
|
2995
|
+
<p>Perform text embedding inference on the service.</p>
|
|
2688
2996
|
|
|
2689
2997
|
|
|
2690
2998
|
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-inference>`_
|
|
@@ -2698,7 +3006,9 @@ class InferenceClient(NamespacedClient):
|
|
|
2698
3006
|
to the relevant service-specific documentation for more info. > info > The
|
|
2699
3007
|
`input_type` parameter specified on the root level of the request body will
|
|
2700
3008
|
take precedence over the `input_type` parameter specified in `task_settings`.
|
|
2701
|
-
:param task_settings:
|
|
3009
|
+
:param task_settings: Task settings for the individual inference request. These
|
|
3010
|
+
settings are specific to the <task_type> you specified and override the task
|
|
3011
|
+
settings specified when initializing the service.
|
|
2702
3012
|
:param timeout: Specifies the amount of time to wait for the inference request
|
|
2703
3013
|
to complete.
|
|
2704
3014
|
"""
|