elasticsearch 8.18.0__py3-none-any.whl → 8.19.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- elasticsearch/_async/client/__init__.py +56 -76
- elasticsearch/_async/client/async_search.py +5 -9
- elasticsearch/_async/client/autoscaling.py +4 -4
- elasticsearch/_async/client/cat.py +620 -65
- elasticsearch/_async/client/ccr.py +13 -13
- elasticsearch/_async/client/cluster.py +33 -24
- elasticsearch/_async/client/connector.py +30 -30
- elasticsearch/_async/client/dangling_indices.py +3 -3
- elasticsearch/_async/client/enrich.py +5 -5
- elasticsearch/_async/client/eql.py +13 -5
- elasticsearch/_async/client/esql.py +38 -9
- elasticsearch/_async/client/features.py +2 -2
- elasticsearch/_async/client/fleet.py +13 -13
- elasticsearch/_async/client/graph.py +1 -1
- elasticsearch/_async/client/ilm.py +11 -11
- elasticsearch/_async/client/indices.py +131 -82
- elasticsearch/_async/client/inference.py +516 -110
- elasticsearch/_async/client/ingest.py +9 -16
- elasticsearch/_async/client/license.py +11 -11
- elasticsearch/_async/client/logstash.py +3 -3
- elasticsearch/_async/client/migration.py +3 -3
- elasticsearch/_async/client/ml.py +81 -93
- elasticsearch/_async/client/nodes.py +9 -8
- elasticsearch/_async/client/query_rules.py +8 -8
- elasticsearch/_async/client/rollup.py +8 -8
- elasticsearch/_async/client/search_application.py +10 -10
- elasticsearch/_async/client/searchable_snapshots.py +4 -4
- elasticsearch/_async/client/security.py +72 -80
- elasticsearch/_async/client/shutdown.py +3 -3
- elasticsearch/_async/client/simulate.py +1 -1
- elasticsearch/_async/client/slm.py +9 -9
- elasticsearch/_async/client/snapshot.py +19 -13
- elasticsearch/_async/client/sql.py +6 -6
- elasticsearch/_async/client/ssl.py +1 -1
- elasticsearch/_async/client/synonyms.py +7 -7
- elasticsearch/_async/client/tasks.py +3 -3
- elasticsearch/_async/client/text_structure.py +4 -4
- elasticsearch/_async/client/transform.py +11 -11
- elasticsearch/_async/client/watcher.py +13 -13
- elasticsearch/_async/client/xpack.py +2 -2
- elasticsearch/_sync/client/__init__.py +56 -76
- elasticsearch/_sync/client/async_search.py +5 -9
- elasticsearch/_sync/client/autoscaling.py +4 -4
- elasticsearch/_sync/client/cat.py +620 -65
- elasticsearch/_sync/client/ccr.py +13 -13
- elasticsearch/_sync/client/cluster.py +33 -24
- elasticsearch/_sync/client/connector.py +30 -30
- elasticsearch/_sync/client/dangling_indices.py +3 -3
- elasticsearch/_sync/client/enrich.py +5 -5
- elasticsearch/_sync/client/eql.py +13 -5
- elasticsearch/_sync/client/esql.py +38 -9
- elasticsearch/_sync/client/features.py +2 -2
- elasticsearch/_sync/client/fleet.py +13 -13
- elasticsearch/_sync/client/graph.py +1 -1
- elasticsearch/_sync/client/ilm.py +11 -11
- elasticsearch/_sync/client/indices.py +131 -82
- elasticsearch/_sync/client/inference.py +516 -110
- elasticsearch/_sync/client/ingest.py +9 -16
- elasticsearch/_sync/client/license.py +11 -11
- elasticsearch/_sync/client/logstash.py +3 -3
- elasticsearch/_sync/client/migration.py +3 -3
- elasticsearch/_sync/client/ml.py +81 -93
- elasticsearch/_sync/client/nodes.py +9 -8
- elasticsearch/_sync/client/query_rules.py +8 -8
- elasticsearch/_sync/client/rollup.py +8 -8
- elasticsearch/_sync/client/search_application.py +10 -10
- elasticsearch/_sync/client/searchable_snapshots.py +4 -4
- elasticsearch/_sync/client/security.py +72 -80
- elasticsearch/_sync/client/shutdown.py +3 -3
- elasticsearch/_sync/client/simulate.py +1 -1
- elasticsearch/_sync/client/slm.py +9 -9
- elasticsearch/_sync/client/snapshot.py +19 -13
- elasticsearch/_sync/client/sql.py +6 -6
- elasticsearch/_sync/client/ssl.py +1 -1
- elasticsearch/_sync/client/synonyms.py +7 -7
- elasticsearch/_sync/client/tasks.py +3 -3
- elasticsearch/_sync/client/text_structure.py +4 -4
- elasticsearch/_sync/client/transform.py +11 -11
- elasticsearch/_sync/client/watcher.py +13 -13
- elasticsearch/_sync/client/xpack.py +2 -2
- elasticsearch/_version.py +1 -1
- elasticsearch/compat.py +5 -0
- elasticsearch/dsl/__init__.py +2 -1
- elasticsearch/dsl/_async/document.py +1 -1
- elasticsearch/dsl/_sync/document.py +1 -1
- elasticsearch/dsl/aggs.py +2 -3
- elasticsearch/dsl/document_base.py +176 -16
- elasticsearch/dsl/field.py +361 -38
- elasticsearch/dsl/query.py +55 -4
- elasticsearch/dsl/types.py +151 -22
- elasticsearch/dsl/utils.py +1 -1
- elasticsearch/esql/__init__.py +18 -0
- elasticsearch/esql/esql.py +1105 -0
- elasticsearch/esql/functions.py +1738 -0
- elasticsearch/exceptions.py +2 -0
- {elasticsearch-8.18.0.dist-info → elasticsearch-8.19.0.dist-info}/METADATA +1 -1
- elasticsearch-8.19.0.dist-info/RECORD +164 -0
- elasticsearch-8.18.0.dist-info/RECORD +0 -161
- {elasticsearch-8.18.0.dist-info → elasticsearch-8.19.0.dist-info}/WHEEL +0 -0
- {elasticsearch-8.18.0.dist-info → elasticsearch-8.19.0.dist-info}/licenses/LICENSE +0 -0
- {elasticsearch-8.18.0.dist-info → elasticsearch-8.19.0.dist-info}/licenses/NOTICE +0 -0
|
@@ -47,7 +47,7 @@ class InferenceClient(NamespacedClient):
|
|
|
47
47
|
<p>Perform completion inference on the service</p>
|
|
48
48
|
|
|
49
49
|
|
|
50
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.
|
|
50
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/post-inference-api.html>`_
|
|
51
51
|
|
|
52
52
|
:param inference_id: The inference Id
|
|
53
53
|
:param input: Inference input. Either a string or an array of strings.
|
|
@@ -123,7 +123,7 @@ class InferenceClient(NamespacedClient):
|
|
|
123
123
|
<p>Delete an inference endpoint</p>
|
|
124
124
|
|
|
125
125
|
|
|
126
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.
|
|
126
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/delete-inference-api.html>`_
|
|
127
127
|
|
|
128
128
|
:param inference_id: The inference identifier.
|
|
129
129
|
:param task_type: The task type
|
|
@@ -197,7 +197,7 @@ class InferenceClient(NamespacedClient):
|
|
|
197
197
|
<p>Get an inference endpoint</p>
|
|
198
198
|
|
|
199
199
|
|
|
200
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.
|
|
200
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/get-inference-api.html>`_
|
|
201
201
|
|
|
202
202
|
:param task_type: The task type
|
|
203
203
|
:param inference_id: The inference Id
|
|
@@ -234,6 +234,125 @@ class InferenceClient(NamespacedClient):
|
|
|
234
234
|
path_parts=__path_parts,
|
|
235
235
|
)
|
|
236
236
|
|
|
237
|
+
@_rewrite_parameters(
|
|
238
|
+
body_fields=("input", "input_type", "query", "task_settings"),
|
|
239
|
+
)
|
|
240
|
+
def inference(
|
|
241
|
+
self,
|
|
242
|
+
*,
|
|
243
|
+
inference_id: str,
|
|
244
|
+
input: t.Optional[t.Union[str, t.Sequence[str]]] = None,
|
|
245
|
+
task_type: t.Optional[
|
|
246
|
+
t.Union[
|
|
247
|
+
str,
|
|
248
|
+
t.Literal[
|
|
249
|
+
"chat_completion",
|
|
250
|
+
"completion",
|
|
251
|
+
"rerank",
|
|
252
|
+
"sparse_embedding",
|
|
253
|
+
"text_embedding",
|
|
254
|
+
],
|
|
255
|
+
]
|
|
256
|
+
] = None,
|
|
257
|
+
error_trace: t.Optional[bool] = None,
|
|
258
|
+
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
|
|
259
|
+
human: t.Optional[bool] = None,
|
|
260
|
+
input_type: t.Optional[str] = None,
|
|
261
|
+
pretty: t.Optional[bool] = None,
|
|
262
|
+
query: t.Optional[str] = None,
|
|
263
|
+
task_settings: t.Optional[t.Any] = None,
|
|
264
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
265
|
+
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
266
|
+
) -> ObjectApiResponse[t.Any]:
|
|
267
|
+
"""
|
|
268
|
+
.. raw:: html
|
|
269
|
+
|
|
270
|
+
<p>Perform inference on the service.</p>
|
|
271
|
+
<p>This API enables you to use machine learning models to perform specific tasks on data that you provide as an input.
|
|
272
|
+
It returns a response with the results of the tasks.
|
|
273
|
+
The inference endpoint you use can perform one specific task that has been defined when the endpoint was created with the create inference API.</p>
|
|
274
|
+
<p>For details about using this API with a service, such as Amazon Bedrock, Anthropic, or HuggingFace, refer to the service-specific documentation.</p>
|
|
275
|
+
<blockquote>
|
|
276
|
+
<p>info
|
|
277
|
+
The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.</p>
|
|
278
|
+
</blockquote>
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/post-inference-api.html>`_
|
|
282
|
+
|
|
283
|
+
:param inference_id: The unique identifier for the inference endpoint.
|
|
284
|
+
:param input: The text on which you want to perform the inference task. It can
|
|
285
|
+
be a single string or an array. > info > Inference endpoints for the `completion`
|
|
286
|
+
task type currently only support a single string as input.
|
|
287
|
+
:param task_type: The type of inference task that the model performs.
|
|
288
|
+
:param input_type: Specifies the input data type for the text embedding model.
|
|
289
|
+
The `input_type` parameter only applies to Inference Endpoints with the `text_embedding`
|
|
290
|
+
task type. Possible values include: * `SEARCH` * `INGEST` * `CLASSIFICATION`
|
|
291
|
+
* `CLUSTERING` Not all services support all values. Unsupported values will
|
|
292
|
+
trigger a validation exception. Accepted values depend on the configured
|
|
293
|
+
inference service, refer to the relevant service-specific documentation for
|
|
294
|
+
more info. > info > The `input_type` parameter specified on the root level
|
|
295
|
+
of the request body will take precedence over the `input_type` parameter
|
|
296
|
+
specified in `task_settings`.
|
|
297
|
+
:param query: The query input, which is required only for the `rerank` task.
|
|
298
|
+
It is not required for other tasks.
|
|
299
|
+
:param task_settings: Task settings for the individual inference request. These
|
|
300
|
+
settings are specific to the task type you specified and override the task
|
|
301
|
+
settings specified when initializing the service.
|
|
302
|
+
:param timeout: The amount of time to wait for the inference request to complete.
|
|
303
|
+
"""
|
|
304
|
+
if inference_id in SKIP_IN_PATH:
|
|
305
|
+
raise ValueError("Empty value passed for parameter 'inference_id'")
|
|
306
|
+
if input is None and body is None:
|
|
307
|
+
raise ValueError("Empty value passed for parameter 'input'")
|
|
308
|
+
__path_parts: t.Dict[str, str]
|
|
309
|
+
if task_type not in SKIP_IN_PATH and inference_id not in SKIP_IN_PATH:
|
|
310
|
+
__path_parts = {
|
|
311
|
+
"task_type": _quote(task_type),
|
|
312
|
+
"inference_id": _quote(inference_id),
|
|
313
|
+
}
|
|
314
|
+
__path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["inference_id"]}'
|
|
315
|
+
elif inference_id not in SKIP_IN_PATH:
|
|
316
|
+
__path_parts = {"inference_id": _quote(inference_id)}
|
|
317
|
+
__path = f'/_inference/{__path_parts["inference_id"]}'
|
|
318
|
+
else:
|
|
319
|
+
raise ValueError("Couldn't find a path for the given parameters")
|
|
320
|
+
__query: t.Dict[str, t.Any] = {}
|
|
321
|
+
__body: t.Dict[str, t.Any] = body if body is not None else {}
|
|
322
|
+
if error_trace is not None:
|
|
323
|
+
__query["error_trace"] = error_trace
|
|
324
|
+
if filter_path is not None:
|
|
325
|
+
__query["filter_path"] = filter_path
|
|
326
|
+
if human is not None:
|
|
327
|
+
__query["human"] = human
|
|
328
|
+
if pretty is not None:
|
|
329
|
+
__query["pretty"] = pretty
|
|
330
|
+
if timeout is not None:
|
|
331
|
+
__query["timeout"] = timeout
|
|
332
|
+
if not __body:
|
|
333
|
+
if input is not None:
|
|
334
|
+
__body["input"] = input
|
|
335
|
+
if input_type is not None:
|
|
336
|
+
__body["input_type"] = input_type
|
|
337
|
+
if query is not None:
|
|
338
|
+
__body["query"] = query
|
|
339
|
+
if task_settings is not None:
|
|
340
|
+
__body["task_settings"] = task_settings
|
|
341
|
+
if not __body:
|
|
342
|
+
__body = None # type: ignore[assignment]
|
|
343
|
+
__headers = {"accept": "application/json"}
|
|
344
|
+
if __body is not None:
|
|
345
|
+
__headers["content-type"] = "application/json"
|
|
346
|
+
return self.perform_request( # type: ignore[return-value]
|
|
347
|
+
"POST",
|
|
348
|
+
__path,
|
|
349
|
+
params=__query,
|
|
350
|
+
headers=__headers,
|
|
351
|
+
body=__body,
|
|
352
|
+
endpoint_id="inference.inference",
|
|
353
|
+
path_parts=__path_parts,
|
|
354
|
+
)
|
|
355
|
+
|
|
237
356
|
@_rewrite_parameters(
|
|
238
357
|
body_name="inference_config",
|
|
239
358
|
)
|
|
@@ -259,26 +378,45 @@ class InferenceClient(NamespacedClient):
|
|
|
259
378
|
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
|
|
260
379
|
human: t.Optional[bool] = None,
|
|
261
380
|
pretty: t.Optional[bool] = None,
|
|
381
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
262
382
|
) -> ObjectApiResponse[t.Any]:
|
|
263
383
|
"""
|
|
264
384
|
.. raw:: html
|
|
265
385
|
|
|
266
|
-
<p>Create an inference endpoint
|
|
267
|
-
When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
|
|
268
|
-
After creating the endpoint, wait for the model deployment to complete before using it.
|
|
269
|
-
To verify the deployment status, use the get trained model statistics API.
|
|
270
|
-
Look for <code>"state": "fully_allocated"</code> in the response and ensure that the <code>"allocation_count"</code> matches the <code>"target_allocation_count"</code>.
|
|
271
|
-
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
386
|
+
<p>Create an inference endpoint.</p>
|
|
272
387
|
<p>IMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.
|
|
273
388
|
For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.
|
|
274
389
|
However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.</p>
|
|
390
|
+
<p>The following integrations are available through the inference API. You can find the available task types next to the integration name:</p>
|
|
391
|
+
<ul>
|
|
392
|
+
<li>AlibabaCloud AI Search (<code>completion</code>, <code>rerank</code>, <code>sparse_embedding</code>, <code>text_embedding</code>)</li>
|
|
393
|
+
<li>Amazon Bedrock (<code>completion</code>, <code>text_embedding</code>)</li>
|
|
394
|
+
<li>Anthropic (<code>completion</code>)</li>
|
|
395
|
+
<li>Azure AI Studio (<code>completion</code>, <code>text_embedding</code>)</li>
|
|
396
|
+
<li>Azure OpenAI (<code>completion</code>, <code>text_embedding</code>)</li>
|
|
397
|
+
<li>Cohere (<code>completion</code>, <code>rerank</code>, <code>text_embedding</code>)</li>
|
|
398
|
+
<li>DeepSeek (<code>completion</code>, <code>chat_completion</code>)</li>
|
|
399
|
+
<li>Elasticsearch (<code>rerank</code>, <code>sparse_embedding</code>, <code>text_embedding</code> - this service is for built-in models and models uploaded through Eland)</li>
|
|
400
|
+
<li>ELSER (<code>sparse_embedding</code>)</li>
|
|
401
|
+
<li>Google AI Studio (<code>completion</code>, <code>text_embedding</code>)</li>
|
|
402
|
+
<li>Google Vertex AI (<code>rerank</code>, <code>text_embedding</code>)</li>
|
|
403
|
+
<li>Hugging Face (<code>chat_completion</code>, <code>completion</code>, <code>rerank</code>, <code>text_embedding</code>)</li>
|
|
404
|
+
<li>Mistral (<code>chat_completion</code>, <code>completion</code>, <code>text_embedding</code>)</li>
|
|
405
|
+
<li>OpenAI (<code>chat_completion</code>, <code>completion</code>, <code>text_embedding</code>)</li>
|
|
406
|
+
<li>VoyageAI (<code>text_embedding</code>, <code>rerank</code>)</li>
|
|
407
|
+
<li>Watsonx inference integration (<code>text_embedding</code>)</li>
|
|
408
|
+
<li>JinaAI (<code>text_embedding</code>, <code>rerank</code>)</li>
|
|
409
|
+
</ul>
|
|
275
410
|
|
|
276
411
|
|
|
277
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.
|
|
412
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/put-inference-api.html>`_
|
|
278
413
|
|
|
279
414
|
:param inference_id: The inference Id
|
|
280
415
|
:param inference_config:
|
|
281
|
-
:param task_type: The task type
|
|
416
|
+
:param task_type: The task type. Refer to the integration list in the API description
|
|
417
|
+
for the available task types.
|
|
418
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
419
|
+
to be created.
|
|
282
420
|
"""
|
|
283
421
|
if inference_id in SKIP_IN_PATH:
|
|
284
422
|
raise ValueError("Empty value passed for parameter 'inference_id'")
|
|
@@ -309,6 +447,8 @@ class InferenceClient(NamespacedClient):
|
|
|
309
447
|
__query["human"] = human
|
|
310
448
|
if pretty is not None:
|
|
311
449
|
__query["pretty"] = pretty
|
|
450
|
+
if timeout is not None:
|
|
451
|
+
__query["timeout"] = timeout
|
|
312
452
|
__body = inference_config if inference_config is not None else body
|
|
313
453
|
__headers = {"accept": "application/json", "content-type": "application/json"}
|
|
314
454
|
return self.perform_request( # type: ignore[return-value]
|
|
@@ -344,6 +484,7 @@ class InferenceClient(NamespacedClient):
|
|
|
344
484
|
human: t.Optional[bool] = None,
|
|
345
485
|
pretty: t.Optional[bool] = None,
|
|
346
486
|
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
487
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
347
488
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
348
489
|
) -> ObjectApiResponse[t.Any]:
|
|
349
490
|
"""
|
|
@@ -351,14 +492,9 @@ class InferenceClient(NamespacedClient):
|
|
|
351
492
|
|
|
352
493
|
<p>Create an AlibabaCloud AI Search inference endpoint.</p>
|
|
353
494
|
<p>Create an inference endpoint to perform an inference task with the <code>alibabacloud-ai-search</code> service.</p>
|
|
354
|
-
<p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
|
|
355
|
-
After creating the endpoint, wait for the model deployment to complete before using it.
|
|
356
|
-
To verify the deployment status, use the get trained model statistics API.
|
|
357
|
-
Look for <code>"state": "fully_allocated"</code> in the response and ensure that the <code>"allocation_count"</code> matches the <code>"target_allocation_count"</code>.
|
|
358
|
-
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
359
495
|
|
|
360
496
|
|
|
361
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.
|
|
497
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-alibabacloud-ai-search.html>`_
|
|
362
498
|
|
|
363
499
|
:param task_type: The type of the inference task that the model will perform.
|
|
364
500
|
:param alibabacloud_inference_id: The unique identifier of the inference endpoint.
|
|
@@ -369,6 +505,8 @@ class InferenceClient(NamespacedClient):
|
|
|
369
505
|
:param chunking_settings: The chunking configuration object.
|
|
370
506
|
:param task_settings: Settings to configure the inference task. These settings
|
|
371
507
|
are specific to the task type you specified.
|
|
508
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
509
|
+
to be created.
|
|
372
510
|
"""
|
|
373
511
|
if task_type in SKIP_IN_PATH:
|
|
374
512
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -395,6 +533,8 @@ class InferenceClient(NamespacedClient):
|
|
|
395
533
|
__query["human"] = human
|
|
396
534
|
if pretty is not None:
|
|
397
535
|
__query["pretty"] = pretty
|
|
536
|
+
if timeout is not None:
|
|
537
|
+
__query["timeout"] = timeout
|
|
398
538
|
if not __body:
|
|
399
539
|
if service is not None:
|
|
400
540
|
__body["service"] = service
|
|
@@ -440,25 +580,21 @@ class InferenceClient(NamespacedClient):
|
|
|
440
580
|
human: t.Optional[bool] = None,
|
|
441
581
|
pretty: t.Optional[bool] = None,
|
|
442
582
|
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
583
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
443
584
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
444
585
|
) -> ObjectApiResponse[t.Any]:
|
|
445
586
|
"""
|
|
446
587
|
.. raw:: html
|
|
447
588
|
|
|
448
589
|
<p>Create an Amazon Bedrock inference endpoint.</p>
|
|
449
|
-
<p>
|
|
590
|
+
<p>Create an inference endpoint to perform an inference task with the <code>amazonbedrock</code> service.</p>
|
|
450
591
|
<blockquote>
|
|
451
592
|
<p>info
|
|
452
593
|
You need to provide the access and secret keys only once, during the inference model creation. The get inference API does not retrieve your access or secret keys. After creating the inference model, you cannot change the associated key pairs. If you want to use a different access and secret key pair, delete the inference model and recreate it with the same name and the updated keys.</p>
|
|
453
594
|
</blockquote>
|
|
454
|
-
<p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
|
|
455
|
-
After creating the endpoint, wait for the model deployment to complete before using it.
|
|
456
|
-
To verify the deployment status, use the get trained model statistics API.
|
|
457
|
-
Look for <code>"state": "fully_allocated"</code> in the response and ensure that the <code>"allocation_count"</code> matches the <code>"target_allocation_count"</code>.
|
|
458
|
-
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
459
595
|
|
|
460
596
|
|
|
461
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.
|
|
597
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-amazon-bedrock.html>`_
|
|
462
598
|
|
|
463
599
|
:param task_type: The type of the inference task that the model will perform.
|
|
464
600
|
:param amazonbedrock_inference_id: The unique identifier of the inference endpoint.
|
|
@@ -469,6 +605,8 @@ class InferenceClient(NamespacedClient):
|
|
|
469
605
|
:param chunking_settings: The chunking configuration object.
|
|
470
606
|
:param task_settings: Settings to configure the inference task. These settings
|
|
471
607
|
are specific to the task type you specified.
|
|
608
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
609
|
+
to be created.
|
|
472
610
|
"""
|
|
473
611
|
if task_type in SKIP_IN_PATH:
|
|
474
612
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -495,6 +633,8 @@ class InferenceClient(NamespacedClient):
|
|
|
495
633
|
__query["human"] = human
|
|
496
634
|
if pretty is not None:
|
|
497
635
|
__query["pretty"] = pretty
|
|
636
|
+
if timeout is not None:
|
|
637
|
+
__query["timeout"] = timeout
|
|
498
638
|
if not __body:
|
|
499
639
|
if service is not None:
|
|
500
640
|
__body["service"] = service
|
|
@@ -540,6 +680,7 @@ class InferenceClient(NamespacedClient):
|
|
|
540
680
|
human: t.Optional[bool] = None,
|
|
541
681
|
pretty: t.Optional[bool] = None,
|
|
542
682
|
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
683
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
543
684
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
544
685
|
) -> ObjectApiResponse[t.Any]:
|
|
545
686
|
"""
|
|
@@ -547,14 +688,9 @@ class InferenceClient(NamespacedClient):
|
|
|
547
688
|
|
|
548
689
|
<p>Create an Anthropic inference endpoint.</p>
|
|
549
690
|
<p>Create an inference endpoint to perform an inference task with the <code>anthropic</code> service.</p>
|
|
550
|
-
<p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
|
|
551
|
-
After creating the endpoint, wait for the model deployment to complete before using it.
|
|
552
|
-
To verify the deployment status, use the get trained model statistics API.
|
|
553
|
-
Look for <code>"state": "fully_allocated"</code> in the response and ensure that the <code>"allocation_count"</code> matches the <code>"target_allocation_count"</code>.
|
|
554
|
-
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
555
691
|
|
|
556
692
|
|
|
557
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.
|
|
693
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-anthropic.html>`_
|
|
558
694
|
|
|
559
695
|
:param task_type: The task type. The only valid task type for the model to perform
|
|
560
696
|
is `completion`.
|
|
@@ -566,6 +702,8 @@ class InferenceClient(NamespacedClient):
|
|
|
566
702
|
:param chunking_settings: The chunking configuration object.
|
|
567
703
|
:param task_settings: Settings to configure the inference task. These settings
|
|
568
704
|
are specific to the task type you specified.
|
|
705
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
706
|
+
to be created.
|
|
569
707
|
"""
|
|
570
708
|
if task_type in SKIP_IN_PATH:
|
|
571
709
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -592,6 +730,8 @@ class InferenceClient(NamespacedClient):
|
|
|
592
730
|
__query["human"] = human
|
|
593
731
|
if pretty is not None:
|
|
594
732
|
__query["pretty"] = pretty
|
|
733
|
+
if timeout is not None:
|
|
734
|
+
__query["timeout"] = timeout
|
|
595
735
|
if not __body:
|
|
596
736
|
if service is not None:
|
|
597
737
|
__body["service"] = service
|
|
@@ -637,6 +777,7 @@ class InferenceClient(NamespacedClient):
|
|
|
637
777
|
human: t.Optional[bool] = None,
|
|
638
778
|
pretty: t.Optional[bool] = None,
|
|
639
779
|
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
780
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
640
781
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
641
782
|
) -> ObjectApiResponse[t.Any]:
|
|
642
783
|
"""
|
|
@@ -644,14 +785,9 @@ class InferenceClient(NamespacedClient):
|
|
|
644
785
|
|
|
645
786
|
<p>Create an Azure AI studio inference endpoint.</p>
|
|
646
787
|
<p>Create an inference endpoint to perform an inference task with the <code>azureaistudio</code> service.</p>
|
|
647
|
-
<p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
|
|
648
|
-
After creating the endpoint, wait for the model deployment to complete before using it.
|
|
649
|
-
To verify the deployment status, use the get trained model statistics API.
|
|
650
|
-
Look for <code>"state": "fully_allocated"</code> in the response and ensure that the <code>"allocation_count"</code> matches the <code>"target_allocation_count"</code>.
|
|
651
|
-
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
652
788
|
|
|
653
789
|
|
|
654
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.
|
|
790
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-azure-ai-studio.html>`_
|
|
655
791
|
|
|
656
792
|
:param task_type: The type of the inference task that the model will perform.
|
|
657
793
|
:param azureaistudio_inference_id: The unique identifier of the inference endpoint.
|
|
@@ -662,6 +798,8 @@ class InferenceClient(NamespacedClient):
|
|
|
662
798
|
:param chunking_settings: The chunking configuration object.
|
|
663
799
|
:param task_settings: Settings to configure the inference task. These settings
|
|
664
800
|
are specific to the task type you specified.
|
|
801
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
802
|
+
to be created.
|
|
665
803
|
"""
|
|
666
804
|
if task_type in SKIP_IN_PATH:
|
|
667
805
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -688,6 +826,8 @@ class InferenceClient(NamespacedClient):
|
|
|
688
826
|
__query["human"] = human
|
|
689
827
|
if pretty is not None:
|
|
690
828
|
__query["pretty"] = pretty
|
|
829
|
+
if timeout is not None:
|
|
830
|
+
__query["timeout"] = timeout
|
|
691
831
|
if not __body:
|
|
692
832
|
if service is not None:
|
|
693
833
|
__body["service"] = service
|
|
@@ -733,6 +873,7 @@ class InferenceClient(NamespacedClient):
|
|
|
733
873
|
human: t.Optional[bool] = None,
|
|
734
874
|
pretty: t.Optional[bool] = None,
|
|
735
875
|
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
876
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
736
877
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
737
878
|
) -> ObjectApiResponse[t.Any]:
|
|
738
879
|
"""
|
|
@@ -746,14 +887,9 @@ class InferenceClient(NamespacedClient):
|
|
|
746
887
|
<li><a href="https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#gpt-35">GPT-3.5</a></li>
|
|
747
888
|
</ul>
|
|
748
889
|
<p>The list of embeddings models that you can choose from in your deployment can be found in the <a href="https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#embeddings">Azure models documentation</a>.</p>
|
|
749
|
-
<p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
|
|
750
|
-
After creating the endpoint, wait for the model deployment to complete before using it.
|
|
751
|
-
To verify the deployment status, use the get trained model statistics API.
|
|
752
|
-
Look for <code>"state": "fully_allocated"</code> in the response and ensure that the <code>"allocation_count"</code> matches the <code>"target_allocation_count"</code>.
|
|
753
|
-
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
754
890
|
|
|
755
891
|
|
|
756
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.
|
|
892
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-azure-openai.html>`_
|
|
757
893
|
|
|
758
894
|
:param task_type: The type of the inference task that the model will perform.
|
|
759
895
|
NOTE: The `chat_completion` task type only supports streaming and only through
|
|
@@ -766,6 +902,8 @@ class InferenceClient(NamespacedClient):
|
|
|
766
902
|
:param chunking_settings: The chunking configuration object.
|
|
767
903
|
:param task_settings: Settings to configure the inference task. These settings
|
|
768
904
|
are specific to the task type you specified.
|
|
905
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
906
|
+
to be created.
|
|
769
907
|
"""
|
|
770
908
|
if task_type in SKIP_IN_PATH:
|
|
771
909
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -792,6 +930,8 @@ class InferenceClient(NamespacedClient):
|
|
|
792
930
|
__query["human"] = human
|
|
793
931
|
if pretty is not None:
|
|
794
932
|
__query["pretty"] = pretty
|
|
933
|
+
if timeout is not None:
|
|
934
|
+
__query["timeout"] = timeout
|
|
795
935
|
if not __body:
|
|
796
936
|
if service is not None:
|
|
797
937
|
__body["service"] = service
|
|
@@ -837,6 +977,7 @@ class InferenceClient(NamespacedClient):
|
|
|
837
977
|
human: t.Optional[bool] = None,
|
|
838
978
|
pretty: t.Optional[bool] = None,
|
|
839
979
|
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
980
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
840
981
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
841
982
|
) -> ObjectApiResponse[t.Any]:
|
|
842
983
|
"""
|
|
@@ -844,14 +985,9 @@ class InferenceClient(NamespacedClient):
|
|
|
844
985
|
|
|
845
986
|
<p>Create a Cohere inference endpoint.</p>
|
|
846
987
|
<p>Create an inference endpoint to perform an inference task with the <code>cohere</code> service.</p>
|
|
847
|
-
<p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
|
|
848
|
-
After creating the endpoint, wait for the model deployment to complete before using it.
|
|
849
|
-
To verify the deployment status, use the get trained model statistics API.
|
|
850
|
-
Look for <code>"state": "fully_allocated"</code> in the response and ensure that the <code>"allocation_count"</code> matches the <code>"target_allocation_count"</code>.
|
|
851
|
-
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
852
988
|
|
|
853
989
|
|
|
854
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.
|
|
990
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-cohere.html>`_
|
|
855
991
|
|
|
856
992
|
:param task_type: The type of the inference task that the model will perform.
|
|
857
993
|
:param cohere_inference_id: The unique identifier of the inference endpoint.
|
|
@@ -862,6 +998,8 @@ class InferenceClient(NamespacedClient):
|
|
|
862
998
|
:param chunking_settings: The chunking configuration object.
|
|
863
999
|
:param task_settings: Settings to configure the inference task. These settings
|
|
864
1000
|
are specific to the task type you specified.
|
|
1001
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
1002
|
+
to be created.
|
|
865
1003
|
"""
|
|
866
1004
|
if task_type in SKIP_IN_PATH:
|
|
867
1005
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -886,6 +1024,8 @@ class InferenceClient(NamespacedClient):
|
|
|
886
1024
|
__query["human"] = human
|
|
887
1025
|
if pretty is not None:
|
|
888
1026
|
__query["pretty"] = pretty
|
|
1027
|
+
if timeout is not None:
|
|
1028
|
+
__query["timeout"] = timeout
|
|
889
1029
|
if not __body:
|
|
890
1030
|
if service is not None:
|
|
891
1031
|
__body["service"] = service
|
|
@@ -910,6 +1050,221 @@ class InferenceClient(NamespacedClient):
|
|
|
910
1050
|
path_parts=__path_parts,
|
|
911
1051
|
)
|
|
912
1052
|
|
|
1053
|
+
@_rewrite_parameters(
|
|
1054
|
+
body_fields=(
|
|
1055
|
+
"service",
|
|
1056
|
+
"service_settings",
|
|
1057
|
+
"chunking_settings",
|
|
1058
|
+
"task_settings",
|
|
1059
|
+
),
|
|
1060
|
+
)
|
|
1061
|
+
def put_custom(
|
|
1062
|
+
self,
|
|
1063
|
+
*,
|
|
1064
|
+
task_type: t.Union[
|
|
1065
|
+
str, t.Literal["completion", "rerank", "sparse_embedding", "text_embedding"]
|
|
1066
|
+
],
|
|
1067
|
+
custom_inference_id: str,
|
|
1068
|
+
service: t.Optional[t.Union[str, t.Literal["custom"]]] = None,
|
|
1069
|
+
service_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
1070
|
+
chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
1071
|
+
error_trace: t.Optional[bool] = None,
|
|
1072
|
+
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
|
|
1073
|
+
human: t.Optional[bool] = None,
|
|
1074
|
+
pretty: t.Optional[bool] = None,
|
|
1075
|
+
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
1076
|
+
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
1077
|
+
) -> ObjectApiResponse[t.Any]:
|
|
1078
|
+
"""
|
|
1079
|
+
.. raw:: html
|
|
1080
|
+
|
|
1081
|
+
<p>Create a custom inference endpoint.</p>
|
|
1082
|
+
<p>The custom service gives more control over how to interact with external inference services that aren't explicitly supported through dedicated integrations.
|
|
1083
|
+
The custom service gives you the ability to define the headers, url, query parameters, request body, and secrets.
|
|
1084
|
+
The custom service supports the template replacement functionality, which enables you to define a template that can be replaced with the value associated with that key.
|
|
1085
|
+
Templates are portions of a string that start with <code>${</code> and end with <code>}</code>.
|
|
1086
|
+
The parameters <code>secret_parameters</code> and <code>task_settings</code> are checked for keys for template replacement. Template replacement is supported in the <code>request</code>, <code>headers</code>, <code>url</code>, and <code>query_parameters</code>.
|
|
1087
|
+
If the definition (key) is not found for a template, an error message is returned.
|
|
1088
|
+
In case of an endpoint definition like the following:</p>
|
|
1089
|
+
<pre><code>PUT _inference/text_embedding/test-text-embedding
|
|
1090
|
+
{
|
|
1091
|
+
"service": "custom",
|
|
1092
|
+
"service_settings": {
|
|
1093
|
+
"secret_parameters": {
|
|
1094
|
+
"api_key": "<some api key>"
|
|
1095
|
+
},
|
|
1096
|
+
"url": "...endpoints.huggingface.cloud/v1/embeddings",
|
|
1097
|
+
"headers": {
|
|
1098
|
+
"Authorization": "Bearer ${api_key}",
|
|
1099
|
+
"Content-Type": "application/json"
|
|
1100
|
+
},
|
|
1101
|
+
"request": "{\\"input\\": ${input}}",
|
|
1102
|
+
"response": {
|
|
1103
|
+
"json_parser": {
|
|
1104
|
+
"text_embeddings":"$.data[*].embedding[*]"
|
|
1105
|
+
}
|
|
1106
|
+
}
|
|
1107
|
+
}
|
|
1108
|
+
}
|
|
1109
|
+
</code></pre>
|
|
1110
|
+
<p>To replace <code>${api_key}</code> the <code>secret_parameters</code> and <code>task_settings</code> are checked for a key named <code>api_key</code>.</p>
|
|
1111
|
+
<blockquote>
|
|
1112
|
+
<p>info
|
|
1113
|
+
Templates should not be surrounded by quotes.</p>
|
|
1114
|
+
</blockquote>
|
|
1115
|
+
<p>Pre-defined templates:</p>
|
|
1116
|
+
<ul>
|
|
1117
|
+
<li><code>${input}</code> refers to the array of input strings that comes from the <code>input</code> field of the subsequent inference requests.</li>
|
|
1118
|
+
<li><code>${input_type}</code> refers to the input type translation values.</li>
|
|
1119
|
+
<li><code>${query}</code> refers to the query field used specifically for reranking tasks.</li>
|
|
1120
|
+
<li><code>${top_n}</code> refers to the <code>top_n</code> field available when performing rerank requests.</li>
|
|
1121
|
+
<li><code>${return_documents}</code> refers to the <code>return_documents</code> field available when performing rerank requests.</li>
|
|
1122
|
+
</ul>
|
|
1123
|
+
|
|
1124
|
+
|
|
1125
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-custom>`_
|
|
1126
|
+
|
|
1127
|
+
:param task_type: The type of the inference task that the model will perform.
|
|
1128
|
+
:param custom_inference_id: The unique identifier of the inference endpoint.
|
|
1129
|
+
:param service: The type of service supported for the specified task type. In
|
|
1130
|
+
this case, `custom`.
|
|
1131
|
+
:param service_settings: Settings used to install the inference model. These
|
|
1132
|
+
settings are specific to the `custom` service.
|
|
1133
|
+
:param chunking_settings: The chunking configuration object.
|
|
1134
|
+
:param task_settings: Settings to configure the inference task. These settings
|
|
1135
|
+
are specific to the task type you specified.
|
|
1136
|
+
"""
|
|
1137
|
+
if task_type in SKIP_IN_PATH:
|
|
1138
|
+
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
1139
|
+
if custom_inference_id in SKIP_IN_PATH:
|
|
1140
|
+
raise ValueError("Empty value passed for parameter 'custom_inference_id'")
|
|
1141
|
+
if service is None and body is None:
|
|
1142
|
+
raise ValueError("Empty value passed for parameter 'service'")
|
|
1143
|
+
if service_settings is None and body is None:
|
|
1144
|
+
raise ValueError("Empty value passed for parameter 'service_settings'")
|
|
1145
|
+
__path_parts: t.Dict[str, str] = {
|
|
1146
|
+
"task_type": _quote(task_type),
|
|
1147
|
+
"custom_inference_id": _quote(custom_inference_id),
|
|
1148
|
+
}
|
|
1149
|
+
__path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["custom_inference_id"]}'
|
|
1150
|
+
__query: t.Dict[str, t.Any] = {}
|
|
1151
|
+
__body: t.Dict[str, t.Any] = body if body is not None else {}
|
|
1152
|
+
if error_trace is not None:
|
|
1153
|
+
__query["error_trace"] = error_trace
|
|
1154
|
+
if filter_path is not None:
|
|
1155
|
+
__query["filter_path"] = filter_path
|
|
1156
|
+
if human is not None:
|
|
1157
|
+
__query["human"] = human
|
|
1158
|
+
if pretty is not None:
|
|
1159
|
+
__query["pretty"] = pretty
|
|
1160
|
+
if not __body:
|
|
1161
|
+
if service is not None:
|
|
1162
|
+
__body["service"] = service
|
|
1163
|
+
if service_settings is not None:
|
|
1164
|
+
__body["service_settings"] = service_settings
|
|
1165
|
+
if chunking_settings is not None:
|
|
1166
|
+
__body["chunking_settings"] = chunking_settings
|
|
1167
|
+
if task_settings is not None:
|
|
1168
|
+
__body["task_settings"] = task_settings
|
|
1169
|
+
if not __body:
|
|
1170
|
+
__body = None # type: ignore[assignment]
|
|
1171
|
+
__headers = {"accept": "application/json"}
|
|
1172
|
+
if __body is not None:
|
|
1173
|
+
__headers["content-type"] = "application/json"
|
|
1174
|
+
return self.perform_request( # type: ignore[return-value]
|
|
1175
|
+
"PUT",
|
|
1176
|
+
__path,
|
|
1177
|
+
params=__query,
|
|
1178
|
+
headers=__headers,
|
|
1179
|
+
body=__body,
|
|
1180
|
+
endpoint_id="inference.put_custom",
|
|
1181
|
+
path_parts=__path_parts,
|
|
1182
|
+
)
|
|
1183
|
+
|
|
1184
|
+
@_rewrite_parameters(
|
|
1185
|
+
body_fields=("service", "service_settings", "chunking_settings"),
|
|
1186
|
+
)
|
|
1187
|
+
def put_deepseek(
|
|
1188
|
+
self,
|
|
1189
|
+
*,
|
|
1190
|
+
task_type: t.Union[str, t.Literal["chat_completion", "completion"]],
|
|
1191
|
+
deepseek_inference_id: str,
|
|
1192
|
+
service: t.Optional[t.Union[str, t.Literal["deepseek"]]] = None,
|
|
1193
|
+
service_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
1194
|
+
chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
1195
|
+
error_trace: t.Optional[bool] = None,
|
|
1196
|
+
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
|
|
1197
|
+
human: t.Optional[bool] = None,
|
|
1198
|
+
pretty: t.Optional[bool] = None,
|
|
1199
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
1200
|
+
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
1201
|
+
) -> ObjectApiResponse[t.Any]:
|
|
1202
|
+
"""
|
|
1203
|
+
.. raw:: html
|
|
1204
|
+
|
|
1205
|
+
<p>Create a DeepSeek inference endpoint.</p>
|
|
1206
|
+
<p>Create an inference endpoint to perform an inference task with the <code>deepseek</code> service.</p>
|
|
1207
|
+
|
|
1208
|
+
|
|
1209
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-deepseek.html>`_
|
|
1210
|
+
|
|
1211
|
+
:param task_type: The type of the inference task that the model will perform.
|
|
1212
|
+
:param deepseek_inference_id: The unique identifier of the inference endpoint.
|
|
1213
|
+
:param service: The type of service supported for the specified task type. In
|
|
1214
|
+
this case, `deepseek`.
|
|
1215
|
+
:param service_settings: Settings used to install the inference model. These
|
|
1216
|
+
settings are specific to the `deepseek` service.
|
|
1217
|
+
:param chunking_settings: The chunking configuration object.
|
|
1218
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
1219
|
+
to be created.
|
|
1220
|
+
"""
|
|
1221
|
+
if task_type in SKIP_IN_PATH:
|
|
1222
|
+
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
1223
|
+
if deepseek_inference_id in SKIP_IN_PATH:
|
|
1224
|
+
raise ValueError("Empty value passed for parameter 'deepseek_inference_id'")
|
|
1225
|
+
if service is None and body is None:
|
|
1226
|
+
raise ValueError("Empty value passed for parameter 'service'")
|
|
1227
|
+
if service_settings is None and body is None:
|
|
1228
|
+
raise ValueError("Empty value passed for parameter 'service_settings'")
|
|
1229
|
+
__path_parts: t.Dict[str, str] = {
|
|
1230
|
+
"task_type": _quote(task_type),
|
|
1231
|
+
"deepseek_inference_id": _quote(deepseek_inference_id),
|
|
1232
|
+
}
|
|
1233
|
+
__path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["deepseek_inference_id"]}'
|
|
1234
|
+
__query: t.Dict[str, t.Any] = {}
|
|
1235
|
+
__body: t.Dict[str, t.Any] = body if body is not None else {}
|
|
1236
|
+
if error_trace is not None:
|
|
1237
|
+
__query["error_trace"] = error_trace
|
|
1238
|
+
if filter_path is not None:
|
|
1239
|
+
__query["filter_path"] = filter_path
|
|
1240
|
+
if human is not None:
|
|
1241
|
+
__query["human"] = human
|
|
1242
|
+
if pretty is not None:
|
|
1243
|
+
__query["pretty"] = pretty
|
|
1244
|
+
if timeout is not None:
|
|
1245
|
+
__query["timeout"] = timeout
|
|
1246
|
+
if not __body:
|
|
1247
|
+
if service is not None:
|
|
1248
|
+
__body["service"] = service
|
|
1249
|
+
if service_settings is not None:
|
|
1250
|
+
__body["service_settings"] = service_settings
|
|
1251
|
+
if chunking_settings is not None:
|
|
1252
|
+
__body["chunking_settings"] = chunking_settings
|
|
1253
|
+
if not __body:
|
|
1254
|
+
__body = None # type: ignore[assignment]
|
|
1255
|
+
__headers = {"accept": "application/json"}
|
|
1256
|
+
if __body is not None:
|
|
1257
|
+
__headers["content-type"] = "application/json"
|
|
1258
|
+
return self.perform_request( # type: ignore[return-value]
|
|
1259
|
+
"PUT",
|
|
1260
|
+
__path,
|
|
1261
|
+
params=__query,
|
|
1262
|
+
headers=__headers,
|
|
1263
|
+
body=__body,
|
|
1264
|
+
endpoint_id="inference.put_deepseek",
|
|
1265
|
+
path_parts=__path_parts,
|
|
1266
|
+
)
|
|
1267
|
+
|
|
913
1268
|
@_rewrite_parameters(
|
|
914
1269
|
body_fields=(
|
|
915
1270
|
"service",
|
|
@@ -933,6 +1288,7 @@ class InferenceClient(NamespacedClient):
|
|
|
933
1288
|
human: t.Optional[bool] = None,
|
|
934
1289
|
pretty: t.Optional[bool] = None,
|
|
935
1290
|
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
1291
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
936
1292
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
937
1293
|
) -> ObjectApiResponse[t.Any]:
|
|
938
1294
|
"""
|
|
@@ -955,7 +1311,7 @@ class InferenceClient(NamespacedClient):
|
|
|
955
1311
|
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
956
1312
|
|
|
957
1313
|
|
|
958
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.
|
|
1314
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-elasticsearch.html>`_
|
|
959
1315
|
|
|
960
1316
|
:param task_type: The type of the inference task that the model will perform.
|
|
961
1317
|
:param elasticsearch_inference_id: The unique identifier of the inference endpoint.
|
|
@@ -967,6 +1323,8 @@ class InferenceClient(NamespacedClient):
|
|
|
967
1323
|
:param chunking_settings: The chunking configuration object.
|
|
968
1324
|
:param task_settings: Settings to configure the inference task. These settings
|
|
969
1325
|
are specific to the task type you specified.
|
|
1326
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
1327
|
+
to be created.
|
|
970
1328
|
"""
|
|
971
1329
|
if task_type in SKIP_IN_PATH:
|
|
972
1330
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -993,6 +1351,8 @@ class InferenceClient(NamespacedClient):
|
|
|
993
1351
|
__query["human"] = human
|
|
994
1352
|
if pretty is not None:
|
|
995
1353
|
__query["pretty"] = pretty
|
|
1354
|
+
if timeout is not None:
|
|
1355
|
+
__query["timeout"] = timeout
|
|
996
1356
|
if not __body:
|
|
997
1357
|
if service is not None:
|
|
998
1358
|
__body["service"] = service
|
|
@@ -1032,6 +1392,7 @@ class InferenceClient(NamespacedClient):
|
|
|
1032
1392
|
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
|
|
1033
1393
|
human: t.Optional[bool] = None,
|
|
1034
1394
|
pretty: t.Optional[bool] = None,
|
|
1395
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
1035
1396
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
1036
1397
|
) -> ObjectApiResponse[t.Any]:
|
|
1037
1398
|
"""
|
|
@@ -1055,7 +1416,7 @@ class InferenceClient(NamespacedClient):
|
|
|
1055
1416
|
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
1056
1417
|
|
|
1057
1418
|
|
|
1058
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.
|
|
1419
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-elser.html>`_
|
|
1059
1420
|
|
|
1060
1421
|
:param task_type: The type of the inference task that the model will perform.
|
|
1061
1422
|
:param elser_inference_id: The unique identifier of the inference endpoint.
|
|
@@ -1064,6 +1425,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1064
1425
|
:param service_settings: Settings used to install the inference model. These
|
|
1065
1426
|
settings are specific to the `elser` service.
|
|
1066
1427
|
:param chunking_settings: The chunking configuration object.
|
|
1428
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
1429
|
+
to be created.
|
|
1067
1430
|
"""
|
|
1068
1431
|
if task_type in SKIP_IN_PATH:
|
|
1069
1432
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -1088,6 +1451,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1088
1451
|
__query["human"] = human
|
|
1089
1452
|
if pretty is not None:
|
|
1090
1453
|
__query["pretty"] = pretty
|
|
1454
|
+
if timeout is not None:
|
|
1455
|
+
__query["timeout"] = timeout
|
|
1091
1456
|
if not __body:
|
|
1092
1457
|
if service is not None:
|
|
1093
1458
|
__body["service"] = service
|
|
@@ -1125,6 +1490,7 @@ class InferenceClient(NamespacedClient):
|
|
|
1125
1490
|
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
|
|
1126
1491
|
human: t.Optional[bool] = None,
|
|
1127
1492
|
pretty: t.Optional[bool] = None,
|
|
1493
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
1128
1494
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
1129
1495
|
) -> ObjectApiResponse[t.Any]:
|
|
1130
1496
|
"""
|
|
@@ -1132,14 +1498,9 @@ class InferenceClient(NamespacedClient):
|
|
|
1132
1498
|
|
|
1133
1499
|
<p>Create an Google AI Studio inference endpoint.</p>
|
|
1134
1500
|
<p>Create an inference endpoint to perform an inference task with the <code>googleaistudio</code> service.</p>
|
|
1135
|
-
<p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
|
|
1136
|
-
After creating the endpoint, wait for the model deployment to complete before using it.
|
|
1137
|
-
To verify the deployment status, use the get trained model statistics API.
|
|
1138
|
-
Look for <code>"state": "fully_allocated"</code> in the response and ensure that the <code>"allocation_count"</code> matches the <code>"target_allocation_count"</code>.
|
|
1139
|
-
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
1140
1501
|
|
|
1141
1502
|
|
|
1142
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.
|
|
1503
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-google-ai-studio.html>`_
|
|
1143
1504
|
|
|
1144
1505
|
:param task_type: The type of the inference task that the model will perform.
|
|
1145
1506
|
:param googleaistudio_inference_id: The unique identifier of the inference endpoint.
|
|
@@ -1148,6 +1509,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1148
1509
|
:param service_settings: Settings used to install the inference model. These
|
|
1149
1510
|
settings are specific to the `googleaistudio` service.
|
|
1150
1511
|
:param chunking_settings: The chunking configuration object.
|
|
1512
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
1513
|
+
to be created.
|
|
1151
1514
|
"""
|
|
1152
1515
|
if task_type in SKIP_IN_PATH:
|
|
1153
1516
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -1174,6 +1537,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1174
1537
|
__query["human"] = human
|
|
1175
1538
|
if pretty is not None:
|
|
1176
1539
|
__query["pretty"] = pretty
|
|
1540
|
+
if timeout is not None:
|
|
1541
|
+
__query["timeout"] = timeout
|
|
1177
1542
|
if not __body:
|
|
1178
1543
|
if service is not None:
|
|
1179
1544
|
__body["service"] = service
|
|
@@ -1207,7 +1572,9 @@ class InferenceClient(NamespacedClient):
|
|
|
1207
1572
|
def put_googlevertexai(
|
|
1208
1573
|
self,
|
|
1209
1574
|
*,
|
|
1210
|
-
task_type: t.Union[
|
|
1575
|
+
task_type: t.Union[
|
|
1576
|
+
str, t.Literal["chat_completion", "completion", "rerank", "text_embedding"]
|
|
1577
|
+
],
|
|
1211
1578
|
googlevertexai_inference_id: str,
|
|
1212
1579
|
service: t.Optional[t.Union[str, t.Literal["googlevertexai"]]] = None,
|
|
1213
1580
|
service_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
@@ -1217,6 +1584,7 @@ class InferenceClient(NamespacedClient):
|
|
|
1217
1584
|
human: t.Optional[bool] = None,
|
|
1218
1585
|
pretty: t.Optional[bool] = None,
|
|
1219
1586
|
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
1587
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
1220
1588
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
1221
1589
|
) -> ObjectApiResponse[t.Any]:
|
|
1222
1590
|
"""
|
|
@@ -1224,14 +1592,9 @@ class InferenceClient(NamespacedClient):
|
|
|
1224
1592
|
|
|
1225
1593
|
<p>Create a Google Vertex AI inference endpoint.</p>
|
|
1226
1594
|
<p>Create an inference endpoint to perform an inference task with the <code>googlevertexai</code> service.</p>
|
|
1227
|
-
<p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
|
|
1228
|
-
After creating the endpoint, wait for the model deployment to complete before using it.
|
|
1229
|
-
To verify the deployment status, use the get trained model statistics API.
|
|
1230
|
-
Look for <code>"state": "fully_allocated"</code> in the response and ensure that the <code>"allocation_count"</code> matches the <code>"target_allocation_count"</code>.
|
|
1231
|
-
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
1232
1595
|
|
|
1233
1596
|
|
|
1234
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.
|
|
1597
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-google-vertex-ai.html>`_
|
|
1235
1598
|
|
|
1236
1599
|
:param task_type: The type of the inference task that the model will perform.
|
|
1237
1600
|
:param googlevertexai_inference_id: The unique identifier of the inference endpoint.
|
|
@@ -1242,6 +1605,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1242
1605
|
:param chunking_settings: The chunking configuration object.
|
|
1243
1606
|
:param task_settings: Settings to configure the inference task. These settings
|
|
1244
1607
|
are specific to the task type you specified.
|
|
1608
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
1609
|
+
to be created.
|
|
1245
1610
|
"""
|
|
1246
1611
|
if task_type in SKIP_IN_PATH:
|
|
1247
1612
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -1268,6 +1633,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1268
1633
|
__query["human"] = human
|
|
1269
1634
|
if pretty is not None:
|
|
1270
1635
|
__query["pretty"] = pretty
|
|
1636
|
+
if timeout is not None:
|
|
1637
|
+
__query["timeout"] = timeout
|
|
1271
1638
|
if not __body:
|
|
1272
1639
|
if service is not None:
|
|
1273
1640
|
__body["service"] = service
|
|
@@ -1293,12 +1660,19 @@ class InferenceClient(NamespacedClient):
|
|
|
1293
1660
|
)
|
|
1294
1661
|
|
|
1295
1662
|
@_rewrite_parameters(
|
|
1296
|
-
body_fields=(
|
|
1663
|
+
body_fields=(
|
|
1664
|
+
"service",
|
|
1665
|
+
"service_settings",
|
|
1666
|
+
"chunking_settings",
|
|
1667
|
+
"task_settings",
|
|
1668
|
+
),
|
|
1297
1669
|
)
|
|
1298
1670
|
def put_hugging_face(
|
|
1299
1671
|
self,
|
|
1300
1672
|
*,
|
|
1301
|
-
task_type: t.Union[
|
|
1673
|
+
task_type: t.Union[
|
|
1674
|
+
str, t.Literal["chat_completion", "completion", "rerank", "text_embedding"]
|
|
1675
|
+
],
|
|
1302
1676
|
huggingface_inference_id: str,
|
|
1303
1677
|
service: t.Optional[t.Union[str, t.Literal["hugging_face"]]] = None,
|
|
1304
1678
|
service_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
@@ -1307,17 +1681,22 @@ class InferenceClient(NamespacedClient):
|
|
|
1307
1681
|
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
|
|
1308
1682
|
human: t.Optional[bool] = None,
|
|
1309
1683
|
pretty: t.Optional[bool] = None,
|
|
1684
|
+
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
1685
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
1310
1686
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
1311
1687
|
) -> ObjectApiResponse[t.Any]:
|
|
1312
1688
|
"""
|
|
1313
1689
|
.. raw:: html
|
|
1314
1690
|
|
|
1315
1691
|
<p>Create a Hugging Face inference endpoint.</p>
|
|
1316
|
-
<p>Create an inference endpoint to perform an inference task with the <code>hugging_face</code> service
|
|
1317
|
-
|
|
1318
|
-
|
|
1319
|
-
|
|
1320
|
-
<p>
|
|
1692
|
+
<p>Create an inference endpoint to perform an inference task with the <code>hugging_face</code> service.
|
|
1693
|
+
Supported tasks include: <code>text_embedding</code>, <code>completion</code>, and <code>chat_completion</code>.</p>
|
|
1694
|
+
<p>To configure the endpoint, first visit the Hugging Face Inference Endpoints page and create a new endpoint.
|
|
1695
|
+
Select a model that supports the task you intend to use.</p>
|
|
1696
|
+
<p>For Elastic's <code>text_embedding</code> task:
|
|
1697
|
+
The selected model must support the <code>Sentence Embeddings</code> task. On the new endpoint creation page, select the <code>Sentence Embeddings</code> task under the <code>Advanced Configuration</code> section.
|
|
1698
|
+
After the endpoint has initialized, copy the generated endpoint URL.
|
|
1699
|
+
Recommended models for <code>text_embedding</code> task:</p>
|
|
1321
1700
|
<ul>
|
|
1322
1701
|
<li><code>all-MiniLM-L6-v2</code></li>
|
|
1323
1702
|
<li><code>all-MiniLM-L12-v2</code></li>
|
|
@@ -1327,14 +1706,27 @@ class InferenceClient(NamespacedClient):
|
|
|
1327
1706
|
<li><code>multilingual-e5-base</code></li>
|
|
1328
1707
|
<li><code>multilingual-e5-small</code></li>
|
|
1329
1708
|
</ul>
|
|
1330
|
-
<p>
|
|
1331
|
-
|
|
1332
|
-
|
|
1333
|
-
|
|
1334
|
-
|
|
1709
|
+
<p>For Elastic's <code>chat_completion</code> and <code>completion</code> tasks:
|
|
1710
|
+
The selected model must support the <code>Text Generation</code> task and expose OpenAI API. HuggingFace supports both serverless and dedicated endpoints for <code>Text Generation</code>. When creating dedicated endpoint select the <code>Text Generation</code> task.
|
|
1711
|
+
After the endpoint is initialized (for dedicated) or ready (for serverless), ensure it supports the OpenAI API and includes <code>/v1/chat/completions</code> part in URL. Then, copy the full endpoint URL for use.
|
|
1712
|
+
Recommended models for <code>chat_completion</code> and <code>completion</code> tasks:</p>
|
|
1713
|
+
<ul>
|
|
1714
|
+
<li><code>Mistral-7B-Instruct-v0.2</code></li>
|
|
1715
|
+
<li><code>QwQ-32B</code></li>
|
|
1716
|
+
<li><code>Phi-3-mini-128k-instruct</code></li>
|
|
1717
|
+
</ul>
|
|
1718
|
+
<p>For Elastic's <code>rerank</code> task:
|
|
1719
|
+
The selected model must support the <code>sentence-ranking</code> task and expose OpenAI API.
|
|
1720
|
+
HuggingFace supports only dedicated (not serverless) endpoints for <code>Rerank</code> so far.
|
|
1721
|
+
After the endpoint is initialized, copy the full endpoint URL for use.
|
|
1722
|
+
Tested models for <code>rerank</code> task:</p>
|
|
1723
|
+
<ul>
|
|
1724
|
+
<li><code>bge-reranker-base</code></li>
|
|
1725
|
+
<li><code>jina-reranker-v1-turbo-en-GGUF</code></li>
|
|
1726
|
+
</ul>
|
|
1335
1727
|
|
|
1336
1728
|
|
|
1337
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.
|
|
1729
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-hugging-face.html>`_
|
|
1338
1730
|
|
|
1339
1731
|
:param task_type: The type of the inference task that the model will perform.
|
|
1340
1732
|
:param huggingface_inference_id: The unique identifier of the inference endpoint.
|
|
@@ -1343,6 +1735,10 @@ class InferenceClient(NamespacedClient):
|
|
|
1343
1735
|
:param service_settings: Settings used to install the inference model. These
|
|
1344
1736
|
settings are specific to the `hugging_face` service.
|
|
1345
1737
|
:param chunking_settings: The chunking configuration object.
|
|
1738
|
+
:param task_settings: Settings to configure the inference task. These settings
|
|
1739
|
+
are specific to the task type you specified.
|
|
1740
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
1741
|
+
to be created.
|
|
1346
1742
|
"""
|
|
1347
1743
|
if task_type in SKIP_IN_PATH:
|
|
1348
1744
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -1369,6 +1765,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1369
1765
|
__query["human"] = human
|
|
1370
1766
|
if pretty is not None:
|
|
1371
1767
|
__query["pretty"] = pretty
|
|
1768
|
+
if timeout is not None:
|
|
1769
|
+
__query["timeout"] = timeout
|
|
1372
1770
|
if not __body:
|
|
1373
1771
|
if service is not None:
|
|
1374
1772
|
__body["service"] = service
|
|
@@ -1376,6 +1774,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1376
1774
|
__body["service_settings"] = service_settings
|
|
1377
1775
|
if chunking_settings is not None:
|
|
1378
1776
|
__body["chunking_settings"] = chunking_settings
|
|
1777
|
+
if task_settings is not None:
|
|
1778
|
+
__body["task_settings"] = task_settings
|
|
1379
1779
|
if not __body:
|
|
1380
1780
|
__body = None # type: ignore[assignment]
|
|
1381
1781
|
__headers = {"accept": "application/json"}
|
|
@@ -1412,6 +1812,7 @@ class InferenceClient(NamespacedClient):
|
|
|
1412
1812
|
human: t.Optional[bool] = None,
|
|
1413
1813
|
pretty: t.Optional[bool] = None,
|
|
1414
1814
|
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
1815
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
1415
1816
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
1416
1817
|
) -> ObjectApiResponse[t.Any]:
|
|
1417
1818
|
"""
|
|
@@ -1421,14 +1822,9 @@ class InferenceClient(NamespacedClient):
|
|
|
1421
1822
|
<p>Create an inference endpoint to perform an inference task with the <code>jinaai</code> service.</p>
|
|
1422
1823
|
<p>To review the available <code>rerank</code> models, refer to <a href="https://jina.ai/reranker">https://jina.ai/reranker</a>.
|
|
1423
1824
|
To review the available <code>text_embedding</code> models, refer to the <a href="https://jina.ai/embeddings/">https://jina.ai/embeddings/</a>.</p>
|
|
1424
|
-
<p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
|
|
1425
|
-
After creating the endpoint, wait for the model deployment to complete before using it.
|
|
1426
|
-
To verify the deployment status, use the get trained model statistics API.
|
|
1427
|
-
Look for <code>"state": "fully_allocated"</code> in the response and ensure that the <code>"allocation_count"</code> matches the <code>"target_allocation_count"</code>.
|
|
1428
|
-
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
1429
1825
|
|
|
1430
1826
|
|
|
1431
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.
|
|
1827
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-jinaai.html>`_
|
|
1432
1828
|
|
|
1433
1829
|
:param task_type: The type of the inference task that the model will perform.
|
|
1434
1830
|
:param jinaai_inference_id: The unique identifier of the inference endpoint.
|
|
@@ -1439,6 +1835,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1439
1835
|
:param chunking_settings: The chunking configuration object.
|
|
1440
1836
|
:param task_settings: Settings to configure the inference task. These settings
|
|
1441
1837
|
are specific to the task type you specified.
|
|
1838
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
1839
|
+
to be created.
|
|
1442
1840
|
"""
|
|
1443
1841
|
if task_type in SKIP_IN_PATH:
|
|
1444
1842
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -1463,6 +1861,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1463
1861
|
__query["human"] = human
|
|
1464
1862
|
if pretty is not None:
|
|
1465
1863
|
__query["pretty"] = pretty
|
|
1864
|
+
if timeout is not None:
|
|
1865
|
+
__query["timeout"] = timeout
|
|
1466
1866
|
if not __body:
|
|
1467
1867
|
if service is not None:
|
|
1468
1868
|
__body["service"] = service
|
|
@@ -1493,7 +1893,9 @@ class InferenceClient(NamespacedClient):
|
|
|
1493
1893
|
def put_mistral(
|
|
1494
1894
|
self,
|
|
1495
1895
|
*,
|
|
1496
|
-
task_type: t.Union[
|
|
1896
|
+
task_type: t.Union[
|
|
1897
|
+
str, t.Literal["chat_completion", "completion", "text_embedding"]
|
|
1898
|
+
],
|
|
1497
1899
|
mistral_inference_id: str,
|
|
1498
1900
|
service: t.Optional[t.Union[str, t.Literal["mistral"]]] = None,
|
|
1499
1901
|
service_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
@@ -1502,30 +1904,27 @@ class InferenceClient(NamespacedClient):
|
|
|
1502
1904
|
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
|
|
1503
1905
|
human: t.Optional[bool] = None,
|
|
1504
1906
|
pretty: t.Optional[bool] = None,
|
|
1907
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
1505
1908
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
1506
1909
|
) -> ObjectApiResponse[t.Any]:
|
|
1507
1910
|
"""
|
|
1508
1911
|
.. raw:: html
|
|
1509
1912
|
|
|
1510
1913
|
<p>Create a Mistral inference endpoint.</p>
|
|
1511
|
-
<p>
|
|
1512
|
-
<p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
|
|
1513
|
-
After creating the endpoint, wait for the model deployment to complete before using it.
|
|
1514
|
-
To verify the deployment status, use the get trained model statistics API.
|
|
1515
|
-
Look for <code>"state": "fully_allocated"</code> in the response and ensure that the <code>"allocation_count"</code> matches the <code>"target_allocation_count"</code>.
|
|
1516
|
-
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
1914
|
+
<p>Create an inference endpoint to perform an inference task with the <code>mistral</code> service.</p>
|
|
1517
1915
|
|
|
1518
1916
|
|
|
1519
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/
|
|
1917
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-mistral.html>`_
|
|
1520
1918
|
|
|
1521
|
-
:param task_type: The
|
|
1522
|
-
is `text_embedding`.
|
|
1919
|
+
:param task_type: The type of the inference task that the model will perform.
|
|
1523
1920
|
:param mistral_inference_id: The unique identifier of the inference endpoint.
|
|
1524
1921
|
:param service: The type of service supported for the specified task type. In
|
|
1525
1922
|
this case, `mistral`.
|
|
1526
1923
|
:param service_settings: Settings used to install the inference model. These
|
|
1527
1924
|
settings are specific to the `mistral` service.
|
|
1528
1925
|
:param chunking_settings: The chunking configuration object.
|
|
1926
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
1927
|
+
to be created.
|
|
1529
1928
|
"""
|
|
1530
1929
|
if task_type in SKIP_IN_PATH:
|
|
1531
1930
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -1550,6 +1949,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1550
1949
|
__query["human"] = human
|
|
1551
1950
|
if pretty is not None:
|
|
1552
1951
|
__query["pretty"] = pretty
|
|
1952
|
+
if timeout is not None:
|
|
1953
|
+
__query["timeout"] = timeout
|
|
1553
1954
|
if not __body:
|
|
1554
1955
|
if service is not None:
|
|
1555
1956
|
__body["service"] = service
|
|
@@ -1595,6 +1996,7 @@ class InferenceClient(NamespacedClient):
|
|
|
1595
1996
|
human: t.Optional[bool] = None,
|
|
1596
1997
|
pretty: t.Optional[bool] = None,
|
|
1597
1998
|
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
1999
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
1598
2000
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
1599
2001
|
) -> ObjectApiResponse[t.Any]:
|
|
1600
2002
|
"""
|
|
@@ -1602,14 +2004,9 @@ class InferenceClient(NamespacedClient):
|
|
|
1602
2004
|
|
|
1603
2005
|
<p>Create an OpenAI inference endpoint.</p>
|
|
1604
2006
|
<p>Create an inference endpoint to perform an inference task with the <code>openai</code> service or <code>openai</code> compatible APIs.</p>
|
|
1605
|
-
<p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
|
|
1606
|
-
After creating the endpoint, wait for the model deployment to complete before using it.
|
|
1607
|
-
To verify the deployment status, use the get trained model statistics API.
|
|
1608
|
-
Look for <code>"state": "fully_allocated"</code> in the response and ensure that the <code>"allocation_count"</code> matches the <code>"target_allocation_count"</code>.
|
|
1609
|
-
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
1610
2007
|
|
|
1611
2008
|
|
|
1612
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.
|
|
2009
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-openai.html>`_
|
|
1613
2010
|
|
|
1614
2011
|
:param task_type: The type of the inference task that the model will perform.
|
|
1615
2012
|
NOTE: The `chat_completion` task type only supports streaming and only through
|
|
@@ -1622,6 +2019,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1622
2019
|
:param chunking_settings: The chunking configuration object.
|
|
1623
2020
|
:param task_settings: Settings to configure the inference task. These settings
|
|
1624
2021
|
are specific to the task type you specified.
|
|
2022
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
2023
|
+
to be created.
|
|
1625
2024
|
"""
|
|
1626
2025
|
if task_type in SKIP_IN_PATH:
|
|
1627
2026
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -1646,6 +2045,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1646
2045
|
__query["human"] = human
|
|
1647
2046
|
if pretty is not None:
|
|
1648
2047
|
__query["pretty"] = pretty
|
|
2048
|
+
if timeout is not None:
|
|
2049
|
+
__query["timeout"] = timeout
|
|
1649
2050
|
if not __body:
|
|
1650
2051
|
if service is not None:
|
|
1651
2052
|
__body["service"] = service
|
|
@@ -1691,6 +2092,7 @@ class InferenceClient(NamespacedClient):
|
|
|
1691
2092
|
human: t.Optional[bool] = None,
|
|
1692
2093
|
pretty: t.Optional[bool] = None,
|
|
1693
2094
|
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
2095
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
1694
2096
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
1695
2097
|
) -> ObjectApiResponse[t.Any]:
|
|
1696
2098
|
"""
|
|
@@ -1701,7 +2103,7 @@ class InferenceClient(NamespacedClient):
|
|
|
1701
2103
|
<p>Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
1702
2104
|
|
|
1703
2105
|
|
|
1704
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.
|
|
2106
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-voyageai.html>`_
|
|
1705
2107
|
|
|
1706
2108
|
:param task_type: The type of the inference task that the model will perform.
|
|
1707
2109
|
:param voyageai_inference_id: The unique identifier of the inference endpoint.
|
|
@@ -1712,6 +2114,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1712
2114
|
:param chunking_settings: The chunking configuration object.
|
|
1713
2115
|
:param task_settings: Settings to configure the inference task. These settings
|
|
1714
2116
|
are specific to the task type you specified.
|
|
2117
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
2118
|
+
to be created.
|
|
1715
2119
|
"""
|
|
1716
2120
|
if task_type in SKIP_IN_PATH:
|
|
1717
2121
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -1736,6 +2140,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1736
2140
|
__query["human"] = human
|
|
1737
2141
|
if pretty is not None:
|
|
1738
2142
|
__query["pretty"] = pretty
|
|
2143
|
+
if timeout is not None:
|
|
2144
|
+
__query["timeout"] = timeout
|
|
1739
2145
|
if not __body:
|
|
1740
2146
|
if service is not None:
|
|
1741
2147
|
__body["service"] = service
|
|
@@ -1774,6 +2180,7 @@ class InferenceClient(NamespacedClient):
|
|
|
1774
2180
|
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
|
|
1775
2181
|
human: t.Optional[bool] = None,
|
|
1776
2182
|
pretty: t.Optional[bool] = None,
|
|
2183
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
1777
2184
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
1778
2185
|
) -> ObjectApiResponse[t.Any]:
|
|
1779
2186
|
"""
|
|
@@ -1783,14 +2190,9 @@ class InferenceClient(NamespacedClient):
|
|
|
1783
2190
|
<p>Create an inference endpoint to perform an inference task with the <code>watsonxai</code> service.
|
|
1784
2191
|
You need an IBM Cloud Databases for Elasticsearch deployment to use the <code>watsonxai</code> inference service.
|
|
1785
2192
|
You can provision one through the IBM catalog, the Cloud Databases CLI plug-in, the Cloud Databases API, or Terraform.</p>
|
|
1786
|
-
<p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
|
|
1787
|
-
After creating the endpoint, wait for the model deployment to complete before using it.
|
|
1788
|
-
To verify the deployment status, use the get trained model statistics API.
|
|
1789
|
-
Look for <code>"state": "fully_allocated"</code> in the response and ensure that the <code>"allocation_count"</code> matches the <code>"target_allocation_count"</code>.
|
|
1790
|
-
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
1791
2193
|
|
|
1792
2194
|
|
|
1793
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.
|
|
2195
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-watsonx-ai.html>`_
|
|
1794
2196
|
|
|
1795
2197
|
:param task_type: The task type. The only valid task type for the model to perform
|
|
1796
2198
|
is `text_embedding`.
|
|
@@ -1799,6 +2201,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1799
2201
|
this case, `watsonxai`.
|
|
1800
2202
|
:param service_settings: Settings used to install the inference model. These
|
|
1801
2203
|
settings are specific to the `watsonxai` service.
|
|
2204
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
2205
|
+
to be created.
|
|
1802
2206
|
"""
|
|
1803
2207
|
if task_type in SKIP_IN_PATH:
|
|
1804
2208
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -1823,6 +2227,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1823
2227
|
__query["human"] = human
|
|
1824
2228
|
if pretty is not None:
|
|
1825
2229
|
__query["pretty"] = pretty
|
|
2230
|
+
if timeout is not None:
|
|
2231
|
+
__query["timeout"] = timeout
|
|
1826
2232
|
if not __body:
|
|
1827
2233
|
if service is not None:
|
|
1828
2234
|
__body["service"] = service
|
|
@@ -1863,10 +2269,10 @@ class InferenceClient(NamespacedClient):
|
|
|
1863
2269
|
"""
|
|
1864
2270
|
.. raw:: html
|
|
1865
2271
|
|
|
1866
|
-
<p>Perform
|
|
2272
|
+
<p>Perform reranking inference on the service</p>
|
|
1867
2273
|
|
|
1868
2274
|
|
|
1869
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.
|
|
2275
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/post-inference-api.html>`_
|
|
1870
2276
|
|
|
1871
2277
|
:param inference_id: The unique identifier for the inference endpoint.
|
|
1872
2278
|
:param input: The text on which you want to perform the inference task. It can
|
|
@@ -1942,7 +2348,7 @@ class InferenceClient(NamespacedClient):
|
|
|
1942
2348
|
<p>Perform sparse embedding inference on the service</p>
|
|
1943
2349
|
|
|
1944
2350
|
|
|
1945
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.
|
|
2351
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/post-inference-api.html>`_
|
|
1946
2352
|
|
|
1947
2353
|
:param inference_id: The inference Id
|
|
1948
2354
|
:param input: Inference input. Either a string or an array of strings.
|
|
@@ -2010,7 +2416,7 @@ class InferenceClient(NamespacedClient):
|
|
|
2010
2416
|
<p>Perform text embedding inference on the service</p>
|
|
2011
2417
|
|
|
2012
2418
|
|
|
2013
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.
|
|
2419
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/post-inference-api.html>`_
|
|
2014
2420
|
|
|
2015
2421
|
:param inference_id: The inference Id
|
|
2016
2422
|
:param input: Inference input. Either a string or an array of strings.
|
|
@@ -2092,7 +2498,7 @@ class InferenceClient(NamespacedClient):
|
|
|
2092
2498
|
However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.</p>
|
|
2093
2499
|
|
|
2094
2500
|
|
|
2095
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.
|
|
2501
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/update-inference-api.html>`_
|
|
2096
2502
|
|
|
2097
2503
|
:param inference_id: The unique identifier of the inference endpoint.
|
|
2098
2504
|
:param inference_config:
|