elasticsearch 8.18.1__py3-none-any.whl → 8.19.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- elasticsearch/_async/client/__init__.py +56 -76
- elasticsearch/_async/client/async_search.py +5 -9
- elasticsearch/_async/client/autoscaling.py +4 -4
- elasticsearch/_async/client/cat.py +620 -65
- elasticsearch/_async/client/ccr.py +13 -13
- elasticsearch/_async/client/cluster.py +31 -22
- elasticsearch/_async/client/connector.py +30 -30
- elasticsearch/_async/client/dangling_indices.py +3 -3
- elasticsearch/_async/client/enrich.py +5 -5
- elasticsearch/_async/client/eql.py +13 -5
- elasticsearch/_async/client/esql.py +38 -9
- elasticsearch/_async/client/features.py +2 -2
- elasticsearch/_async/client/fleet.py +13 -13
- elasticsearch/_async/client/graph.py +1 -1
- elasticsearch/_async/client/ilm.py +11 -11
- elasticsearch/_async/client/indices.py +131 -82
- elasticsearch/_async/client/inference.py +411 -112
- elasticsearch/_async/client/ingest.py +9 -16
- elasticsearch/_async/client/license.py +10 -10
- elasticsearch/_async/client/logstash.py +3 -3
- elasticsearch/_async/client/migration.py +3 -3
- elasticsearch/_async/client/ml.py +76 -88
- elasticsearch/_async/client/nodes.py +9 -8
- elasticsearch/_async/client/query_rules.py +8 -8
- elasticsearch/_async/client/rollup.py +8 -8
- elasticsearch/_async/client/search_application.py +10 -10
- elasticsearch/_async/client/searchable_snapshots.py +4 -4
- elasticsearch/_async/client/security.py +72 -80
- elasticsearch/_async/client/shutdown.py +3 -3
- elasticsearch/_async/client/simulate.py +1 -1
- elasticsearch/_async/client/slm.py +9 -9
- elasticsearch/_async/client/snapshot.py +19 -13
- elasticsearch/_async/client/sql.py +6 -6
- elasticsearch/_async/client/ssl.py +1 -1
- elasticsearch/_async/client/synonyms.py +7 -7
- elasticsearch/_async/client/tasks.py +3 -3
- elasticsearch/_async/client/text_structure.py +4 -4
- elasticsearch/_async/client/transform.py +9 -9
- elasticsearch/_async/client/xpack.py +1 -1
- elasticsearch/_sync/client/__init__.py +56 -76
- elasticsearch/_sync/client/async_search.py +5 -9
- elasticsearch/_sync/client/autoscaling.py +4 -4
- elasticsearch/_sync/client/cat.py +620 -65
- elasticsearch/_sync/client/ccr.py +13 -13
- elasticsearch/_sync/client/cluster.py +31 -22
- elasticsearch/_sync/client/connector.py +30 -30
- elasticsearch/_sync/client/dangling_indices.py +3 -3
- elasticsearch/_sync/client/enrich.py +5 -5
- elasticsearch/_sync/client/eql.py +13 -5
- elasticsearch/_sync/client/esql.py +38 -9
- elasticsearch/_sync/client/features.py +2 -2
- elasticsearch/_sync/client/fleet.py +13 -13
- elasticsearch/_sync/client/graph.py +1 -1
- elasticsearch/_sync/client/ilm.py +11 -11
- elasticsearch/_sync/client/indices.py +131 -82
- elasticsearch/_sync/client/inference.py +411 -112
- elasticsearch/_sync/client/ingest.py +9 -16
- elasticsearch/_sync/client/license.py +10 -10
- elasticsearch/_sync/client/logstash.py +3 -3
- elasticsearch/_sync/client/migration.py +3 -3
- elasticsearch/_sync/client/ml.py +76 -88
- elasticsearch/_sync/client/nodes.py +9 -8
- elasticsearch/_sync/client/query_rules.py +8 -8
- elasticsearch/_sync/client/rollup.py +8 -8
- elasticsearch/_sync/client/search_application.py +10 -10
- elasticsearch/_sync/client/searchable_snapshots.py +4 -4
- elasticsearch/_sync/client/security.py +72 -80
- elasticsearch/_sync/client/shutdown.py +3 -3
- elasticsearch/_sync/client/simulate.py +1 -1
- elasticsearch/_sync/client/slm.py +9 -9
- elasticsearch/_sync/client/snapshot.py +19 -13
- elasticsearch/_sync/client/sql.py +6 -6
- elasticsearch/_sync/client/ssl.py +1 -1
- elasticsearch/_sync/client/synonyms.py +7 -7
- elasticsearch/_sync/client/tasks.py +3 -3
- elasticsearch/_sync/client/text_structure.py +4 -4
- elasticsearch/_sync/client/transform.py +9 -9
- elasticsearch/_sync/client/xpack.py +1 -1
- elasticsearch/_version.py +1 -1
- elasticsearch/compat.py +5 -0
- elasticsearch/dsl/__init__.py +2 -1
- elasticsearch/dsl/_async/document.py +1 -1
- elasticsearch/dsl/_sync/document.py +1 -1
- elasticsearch/dsl/aggs.py +2 -3
- elasticsearch/dsl/document_base.py +176 -16
- elasticsearch/dsl/field.py +223 -38
- elasticsearch/dsl/query.py +49 -4
- elasticsearch/dsl/types.py +107 -16
- elasticsearch/dsl/utils.py +1 -1
- elasticsearch/esql/__init__.py +18 -0
- elasticsearch/esql/esql.py +1105 -0
- elasticsearch/esql/functions.py +1738 -0
- elasticsearch/exceptions.py +2 -0
- {elasticsearch-8.18.1.dist-info → elasticsearch-8.19.0.dist-info}/METADATA +1 -3
- elasticsearch-8.19.0.dist-info/RECORD +164 -0
- elasticsearch-8.18.1.dist-info/RECORD +0 -163
- elasticsearch-8.18.1.dist-info/licenses/LICENSE.txt +0 -175
- elasticsearch-8.18.1.dist-info/licenses/NOTICE.txt +0 -559
- {elasticsearch-8.18.1.dist-info → elasticsearch-8.19.0.dist-info}/WHEEL +0 -0
- {elasticsearch-8.18.1.dist-info → elasticsearch-8.19.0.dist-info}/licenses/LICENSE +0 -0
- {elasticsearch-8.18.1.dist-info → elasticsearch-8.19.0.dist-info}/licenses/NOTICE +0 -0
|
@@ -47,7 +47,7 @@ class InferenceClient(NamespacedClient):
|
|
|
47
47
|
<p>Perform completion inference on the service</p>
|
|
48
48
|
|
|
49
49
|
|
|
50
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.
|
|
50
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/post-inference-api.html>`_
|
|
51
51
|
|
|
52
52
|
:param inference_id: The inference Id
|
|
53
53
|
:param input: Inference input. Either a string or an array of strings.
|
|
@@ -123,7 +123,7 @@ class InferenceClient(NamespacedClient):
|
|
|
123
123
|
<p>Delete an inference endpoint</p>
|
|
124
124
|
|
|
125
125
|
|
|
126
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.
|
|
126
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/delete-inference-api.html>`_
|
|
127
127
|
|
|
128
128
|
:param inference_id: The inference identifier.
|
|
129
129
|
:param task_type: The task type
|
|
@@ -197,7 +197,7 @@ class InferenceClient(NamespacedClient):
|
|
|
197
197
|
<p>Get an inference endpoint</p>
|
|
198
198
|
|
|
199
199
|
|
|
200
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.
|
|
200
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/get-inference-api.html>`_
|
|
201
201
|
|
|
202
202
|
:param task_type: The task type
|
|
203
203
|
:param inference_id: The inference Id
|
|
@@ -235,7 +235,7 @@ class InferenceClient(NamespacedClient):
|
|
|
235
235
|
)
|
|
236
236
|
|
|
237
237
|
@_rewrite_parameters(
|
|
238
|
-
body_fields=("input", "query", "task_settings"),
|
|
238
|
+
body_fields=("input", "input_type", "query", "task_settings"),
|
|
239
239
|
)
|
|
240
240
|
async def inference(
|
|
241
241
|
self,
|
|
@@ -257,6 +257,7 @@ class InferenceClient(NamespacedClient):
|
|
|
257
257
|
error_trace: t.Optional[bool] = None,
|
|
258
258
|
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
|
|
259
259
|
human: t.Optional[bool] = None,
|
|
260
|
+
input_type: t.Optional[str] = None,
|
|
260
261
|
pretty: t.Optional[bool] = None,
|
|
261
262
|
query: t.Optional[str] = None,
|
|
262
263
|
task_settings: t.Optional[t.Any] = None,
|
|
@@ -277,13 +278,22 @@ class InferenceClient(NamespacedClient):
|
|
|
277
278
|
</blockquote>
|
|
278
279
|
|
|
279
280
|
|
|
280
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.
|
|
281
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/post-inference-api.html>`_
|
|
281
282
|
|
|
282
283
|
:param inference_id: The unique identifier for the inference endpoint.
|
|
283
284
|
:param input: The text on which you want to perform the inference task. It can
|
|
284
285
|
be a single string or an array. > info > Inference endpoints for the `completion`
|
|
285
286
|
task type currently only support a single string as input.
|
|
286
287
|
:param task_type: The type of inference task that the model performs.
|
|
288
|
+
:param input_type: Specifies the input data type for the text embedding model.
|
|
289
|
+
The `input_type` parameter only applies to Inference Endpoints with the `text_embedding`
|
|
290
|
+
task type. Possible values include: * `SEARCH` * `INGEST` * `CLASSIFICATION`
|
|
291
|
+
* `CLUSTERING` Not all services support all values. Unsupported values will
|
|
292
|
+
trigger a validation exception. Accepted values depend on the configured
|
|
293
|
+
inference service, refer to the relevant service-specific documentation for
|
|
294
|
+
more info. > info > The `input_type` parameter specified on the root level
|
|
295
|
+
of the request body will take precedence over the `input_type` parameter
|
|
296
|
+
specified in `task_settings`.
|
|
287
297
|
:param query: The query input, which is required only for the `rerank` task.
|
|
288
298
|
It is not required for other tasks.
|
|
289
299
|
:param task_settings: Task settings for the individual inference request. These
|
|
@@ -322,6 +332,8 @@ class InferenceClient(NamespacedClient):
|
|
|
322
332
|
if not __body:
|
|
323
333
|
if input is not None:
|
|
324
334
|
__body["input"] = input
|
|
335
|
+
if input_type is not None:
|
|
336
|
+
__body["input_type"] = input_type
|
|
325
337
|
if query is not None:
|
|
326
338
|
__body["query"] = query
|
|
327
339
|
if task_settings is not None:
|
|
@@ -366,26 +378,45 @@ class InferenceClient(NamespacedClient):
|
|
|
366
378
|
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
|
|
367
379
|
human: t.Optional[bool] = None,
|
|
368
380
|
pretty: t.Optional[bool] = None,
|
|
381
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
369
382
|
) -> ObjectApiResponse[t.Any]:
|
|
370
383
|
"""
|
|
371
384
|
.. raw:: html
|
|
372
385
|
|
|
373
|
-
<p>Create an inference endpoint
|
|
374
|
-
When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
|
|
375
|
-
After creating the endpoint, wait for the model deployment to complete before using it.
|
|
376
|
-
To verify the deployment status, use the get trained model statistics API.
|
|
377
|
-
Look for <code>"state": "fully_allocated"</code> in the response and ensure that the <code>"allocation_count"</code> matches the <code>"target_allocation_count"</code>.
|
|
378
|
-
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
386
|
+
<p>Create an inference endpoint.</p>
|
|
379
387
|
<p>IMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.
|
|
380
388
|
For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.
|
|
381
389
|
However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.</p>
|
|
390
|
+
<p>The following integrations are available through the inference API. You can find the available task types next to the integration name:</p>
|
|
391
|
+
<ul>
|
|
392
|
+
<li>AlibabaCloud AI Search (<code>completion</code>, <code>rerank</code>, <code>sparse_embedding</code>, <code>text_embedding</code>)</li>
|
|
393
|
+
<li>Amazon Bedrock (<code>completion</code>, <code>text_embedding</code>)</li>
|
|
394
|
+
<li>Anthropic (<code>completion</code>)</li>
|
|
395
|
+
<li>Azure AI Studio (<code>completion</code>, <code>text_embedding</code>)</li>
|
|
396
|
+
<li>Azure OpenAI (<code>completion</code>, <code>text_embedding</code>)</li>
|
|
397
|
+
<li>Cohere (<code>completion</code>, <code>rerank</code>, <code>text_embedding</code>)</li>
|
|
398
|
+
<li>DeepSeek (<code>completion</code>, <code>chat_completion</code>)</li>
|
|
399
|
+
<li>Elasticsearch (<code>rerank</code>, <code>sparse_embedding</code>, <code>text_embedding</code> - this service is for built-in models and models uploaded through Eland)</li>
|
|
400
|
+
<li>ELSER (<code>sparse_embedding</code>)</li>
|
|
401
|
+
<li>Google AI Studio (<code>completion</code>, <code>text_embedding</code>)</li>
|
|
402
|
+
<li>Google Vertex AI (<code>rerank</code>, <code>text_embedding</code>)</li>
|
|
403
|
+
<li>Hugging Face (<code>chat_completion</code>, <code>completion</code>, <code>rerank</code>, <code>text_embedding</code>)</li>
|
|
404
|
+
<li>Mistral (<code>chat_completion</code>, <code>completion</code>, <code>text_embedding</code>)</li>
|
|
405
|
+
<li>OpenAI (<code>chat_completion</code>, <code>completion</code>, <code>text_embedding</code>)</li>
|
|
406
|
+
<li>VoyageAI (<code>text_embedding</code>, <code>rerank</code>)</li>
|
|
407
|
+
<li>Watsonx inference integration (<code>text_embedding</code>)</li>
|
|
408
|
+
<li>JinaAI (<code>text_embedding</code>, <code>rerank</code>)</li>
|
|
409
|
+
</ul>
|
|
382
410
|
|
|
383
411
|
|
|
384
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.
|
|
412
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/put-inference-api.html>`_
|
|
385
413
|
|
|
386
414
|
:param inference_id: The inference Id
|
|
387
415
|
:param inference_config:
|
|
388
|
-
:param task_type: The task type
|
|
416
|
+
:param task_type: The task type. Refer to the integration list in the API description
|
|
417
|
+
for the available task types.
|
|
418
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
419
|
+
to be created.
|
|
389
420
|
"""
|
|
390
421
|
if inference_id in SKIP_IN_PATH:
|
|
391
422
|
raise ValueError("Empty value passed for parameter 'inference_id'")
|
|
@@ -416,6 +447,8 @@ class InferenceClient(NamespacedClient):
|
|
|
416
447
|
__query["human"] = human
|
|
417
448
|
if pretty is not None:
|
|
418
449
|
__query["pretty"] = pretty
|
|
450
|
+
if timeout is not None:
|
|
451
|
+
__query["timeout"] = timeout
|
|
419
452
|
__body = inference_config if inference_config is not None else body
|
|
420
453
|
__headers = {"accept": "application/json", "content-type": "application/json"}
|
|
421
454
|
return await self.perform_request( # type: ignore[return-value]
|
|
@@ -451,6 +484,7 @@ class InferenceClient(NamespacedClient):
|
|
|
451
484
|
human: t.Optional[bool] = None,
|
|
452
485
|
pretty: t.Optional[bool] = None,
|
|
453
486
|
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
487
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
454
488
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
455
489
|
) -> ObjectApiResponse[t.Any]:
|
|
456
490
|
"""
|
|
@@ -458,14 +492,9 @@ class InferenceClient(NamespacedClient):
|
|
|
458
492
|
|
|
459
493
|
<p>Create an AlibabaCloud AI Search inference endpoint.</p>
|
|
460
494
|
<p>Create an inference endpoint to perform an inference task with the <code>alibabacloud-ai-search</code> service.</p>
|
|
461
|
-
<p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
|
|
462
|
-
After creating the endpoint, wait for the model deployment to complete before using it.
|
|
463
|
-
To verify the deployment status, use the get trained model statistics API.
|
|
464
|
-
Look for <code>"state": "fully_allocated"</code> in the response and ensure that the <code>"allocation_count"</code> matches the <code>"target_allocation_count"</code>.
|
|
465
|
-
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
466
495
|
|
|
467
496
|
|
|
468
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.
|
|
497
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-alibabacloud-ai-search.html>`_
|
|
469
498
|
|
|
470
499
|
:param task_type: The type of the inference task that the model will perform.
|
|
471
500
|
:param alibabacloud_inference_id: The unique identifier of the inference endpoint.
|
|
@@ -476,6 +505,8 @@ class InferenceClient(NamespacedClient):
|
|
|
476
505
|
:param chunking_settings: The chunking configuration object.
|
|
477
506
|
:param task_settings: Settings to configure the inference task. These settings
|
|
478
507
|
are specific to the task type you specified.
|
|
508
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
509
|
+
to be created.
|
|
479
510
|
"""
|
|
480
511
|
if task_type in SKIP_IN_PATH:
|
|
481
512
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -502,6 +533,8 @@ class InferenceClient(NamespacedClient):
|
|
|
502
533
|
__query["human"] = human
|
|
503
534
|
if pretty is not None:
|
|
504
535
|
__query["pretty"] = pretty
|
|
536
|
+
if timeout is not None:
|
|
537
|
+
__query["timeout"] = timeout
|
|
505
538
|
if not __body:
|
|
506
539
|
if service is not None:
|
|
507
540
|
__body["service"] = service
|
|
@@ -547,25 +580,21 @@ class InferenceClient(NamespacedClient):
|
|
|
547
580
|
human: t.Optional[bool] = None,
|
|
548
581
|
pretty: t.Optional[bool] = None,
|
|
549
582
|
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
583
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
550
584
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
551
585
|
) -> ObjectApiResponse[t.Any]:
|
|
552
586
|
"""
|
|
553
587
|
.. raw:: html
|
|
554
588
|
|
|
555
589
|
<p>Create an Amazon Bedrock inference endpoint.</p>
|
|
556
|
-
<p>
|
|
590
|
+
<p>Create an inference endpoint to perform an inference task with the <code>amazonbedrock</code> service.</p>
|
|
557
591
|
<blockquote>
|
|
558
592
|
<p>info
|
|
559
593
|
You need to provide the access and secret keys only once, during the inference model creation. The get inference API does not retrieve your access or secret keys. After creating the inference model, you cannot change the associated key pairs. If you want to use a different access and secret key pair, delete the inference model and recreate it with the same name and the updated keys.</p>
|
|
560
594
|
</blockquote>
|
|
561
|
-
<p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
|
|
562
|
-
After creating the endpoint, wait for the model deployment to complete before using it.
|
|
563
|
-
To verify the deployment status, use the get trained model statistics API.
|
|
564
|
-
Look for <code>"state": "fully_allocated"</code> in the response and ensure that the <code>"allocation_count"</code> matches the <code>"target_allocation_count"</code>.
|
|
565
|
-
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
566
595
|
|
|
567
596
|
|
|
568
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.
|
|
597
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-amazon-bedrock.html>`_
|
|
569
598
|
|
|
570
599
|
:param task_type: The type of the inference task that the model will perform.
|
|
571
600
|
:param amazonbedrock_inference_id: The unique identifier of the inference endpoint.
|
|
@@ -576,6 +605,8 @@ class InferenceClient(NamespacedClient):
|
|
|
576
605
|
:param chunking_settings: The chunking configuration object.
|
|
577
606
|
:param task_settings: Settings to configure the inference task. These settings
|
|
578
607
|
are specific to the task type you specified.
|
|
608
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
609
|
+
to be created.
|
|
579
610
|
"""
|
|
580
611
|
if task_type in SKIP_IN_PATH:
|
|
581
612
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -602,6 +633,8 @@ class InferenceClient(NamespacedClient):
|
|
|
602
633
|
__query["human"] = human
|
|
603
634
|
if pretty is not None:
|
|
604
635
|
__query["pretty"] = pretty
|
|
636
|
+
if timeout is not None:
|
|
637
|
+
__query["timeout"] = timeout
|
|
605
638
|
if not __body:
|
|
606
639
|
if service is not None:
|
|
607
640
|
__body["service"] = service
|
|
@@ -647,6 +680,7 @@ class InferenceClient(NamespacedClient):
|
|
|
647
680
|
human: t.Optional[bool] = None,
|
|
648
681
|
pretty: t.Optional[bool] = None,
|
|
649
682
|
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
683
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
650
684
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
651
685
|
) -> ObjectApiResponse[t.Any]:
|
|
652
686
|
"""
|
|
@@ -654,14 +688,9 @@ class InferenceClient(NamespacedClient):
|
|
|
654
688
|
|
|
655
689
|
<p>Create an Anthropic inference endpoint.</p>
|
|
656
690
|
<p>Create an inference endpoint to perform an inference task with the <code>anthropic</code> service.</p>
|
|
657
|
-
<p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
|
|
658
|
-
After creating the endpoint, wait for the model deployment to complete before using it.
|
|
659
|
-
To verify the deployment status, use the get trained model statistics API.
|
|
660
|
-
Look for <code>"state": "fully_allocated"</code> in the response and ensure that the <code>"allocation_count"</code> matches the <code>"target_allocation_count"</code>.
|
|
661
|
-
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
662
691
|
|
|
663
692
|
|
|
664
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.
|
|
693
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-anthropic.html>`_
|
|
665
694
|
|
|
666
695
|
:param task_type: The task type. The only valid task type for the model to perform
|
|
667
696
|
is `completion`.
|
|
@@ -673,6 +702,8 @@ class InferenceClient(NamespacedClient):
|
|
|
673
702
|
:param chunking_settings: The chunking configuration object.
|
|
674
703
|
:param task_settings: Settings to configure the inference task. These settings
|
|
675
704
|
are specific to the task type you specified.
|
|
705
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
706
|
+
to be created.
|
|
676
707
|
"""
|
|
677
708
|
if task_type in SKIP_IN_PATH:
|
|
678
709
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -699,6 +730,8 @@ class InferenceClient(NamespacedClient):
|
|
|
699
730
|
__query["human"] = human
|
|
700
731
|
if pretty is not None:
|
|
701
732
|
__query["pretty"] = pretty
|
|
733
|
+
if timeout is not None:
|
|
734
|
+
__query["timeout"] = timeout
|
|
702
735
|
if not __body:
|
|
703
736
|
if service is not None:
|
|
704
737
|
__body["service"] = service
|
|
@@ -744,6 +777,7 @@ class InferenceClient(NamespacedClient):
|
|
|
744
777
|
human: t.Optional[bool] = None,
|
|
745
778
|
pretty: t.Optional[bool] = None,
|
|
746
779
|
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
780
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
747
781
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
748
782
|
) -> ObjectApiResponse[t.Any]:
|
|
749
783
|
"""
|
|
@@ -751,14 +785,9 @@ class InferenceClient(NamespacedClient):
|
|
|
751
785
|
|
|
752
786
|
<p>Create an Azure AI studio inference endpoint.</p>
|
|
753
787
|
<p>Create an inference endpoint to perform an inference task with the <code>azureaistudio</code> service.</p>
|
|
754
|
-
<p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
|
|
755
|
-
After creating the endpoint, wait for the model deployment to complete before using it.
|
|
756
|
-
To verify the deployment status, use the get trained model statistics API.
|
|
757
|
-
Look for <code>"state": "fully_allocated"</code> in the response and ensure that the <code>"allocation_count"</code> matches the <code>"target_allocation_count"</code>.
|
|
758
|
-
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
759
788
|
|
|
760
789
|
|
|
761
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.
|
|
790
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-azure-ai-studio.html>`_
|
|
762
791
|
|
|
763
792
|
:param task_type: The type of the inference task that the model will perform.
|
|
764
793
|
:param azureaistudio_inference_id: The unique identifier of the inference endpoint.
|
|
@@ -769,6 +798,8 @@ class InferenceClient(NamespacedClient):
|
|
|
769
798
|
:param chunking_settings: The chunking configuration object.
|
|
770
799
|
:param task_settings: Settings to configure the inference task. These settings
|
|
771
800
|
are specific to the task type you specified.
|
|
801
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
802
|
+
to be created.
|
|
772
803
|
"""
|
|
773
804
|
if task_type in SKIP_IN_PATH:
|
|
774
805
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -795,6 +826,8 @@ class InferenceClient(NamespacedClient):
|
|
|
795
826
|
__query["human"] = human
|
|
796
827
|
if pretty is not None:
|
|
797
828
|
__query["pretty"] = pretty
|
|
829
|
+
if timeout is not None:
|
|
830
|
+
__query["timeout"] = timeout
|
|
798
831
|
if not __body:
|
|
799
832
|
if service is not None:
|
|
800
833
|
__body["service"] = service
|
|
@@ -840,6 +873,7 @@ class InferenceClient(NamespacedClient):
|
|
|
840
873
|
human: t.Optional[bool] = None,
|
|
841
874
|
pretty: t.Optional[bool] = None,
|
|
842
875
|
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
876
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
843
877
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
844
878
|
) -> ObjectApiResponse[t.Any]:
|
|
845
879
|
"""
|
|
@@ -853,14 +887,9 @@ class InferenceClient(NamespacedClient):
|
|
|
853
887
|
<li><a href="https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#gpt-35">GPT-3.5</a></li>
|
|
854
888
|
</ul>
|
|
855
889
|
<p>The list of embeddings models that you can choose from in your deployment can be found in the <a href="https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#embeddings">Azure models documentation</a>.</p>
|
|
856
|
-
<p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
|
|
857
|
-
After creating the endpoint, wait for the model deployment to complete before using it.
|
|
858
|
-
To verify the deployment status, use the get trained model statistics API.
|
|
859
|
-
Look for <code>"state": "fully_allocated"</code> in the response and ensure that the <code>"allocation_count"</code> matches the <code>"target_allocation_count"</code>.
|
|
860
|
-
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
861
890
|
|
|
862
891
|
|
|
863
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.
|
|
892
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-azure-openai.html>`_
|
|
864
893
|
|
|
865
894
|
:param task_type: The type of the inference task that the model will perform.
|
|
866
895
|
NOTE: The `chat_completion` task type only supports streaming and only through
|
|
@@ -873,6 +902,8 @@ class InferenceClient(NamespacedClient):
|
|
|
873
902
|
:param chunking_settings: The chunking configuration object.
|
|
874
903
|
:param task_settings: Settings to configure the inference task. These settings
|
|
875
904
|
are specific to the task type you specified.
|
|
905
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
906
|
+
to be created.
|
|
876
907
|
"""
|
|
877
908
|
if task_type in SKIP_IN_PATH:
|
|
878
909
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -899,6 +930,8 @@ class InferenceClient(NamespacedClient):
|
|
|
899
930
|
__query["human"] = human
|
|
900
931
|
if pretty is not None:
|
|
901
932
|
__query["pretty"] = pretty
|
|
933
|
+
if timeout is not None:
|
|
934
|
+
__query["timeout"] = timeout
|
|
902
935
|
if not __body:
|
|
903
936
|
if service is not None:
|
|
904
937
|
__body["service"] = service
|
|
@@ -944,6 +977,7 @@ class InferenceClient(NamespacedClient):
|
|
|
944
977
|
human: t.Optional[bool] = None,
|
|
945
978
|
pretty: t.Optional[bool] = None,
|
|
946
979
|
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
980
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
947
981
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
948
982
|
) -> ObjectApiResponse[t.Any]:
|
|
949
983
|
"""
|
|
@@ -951,14 +985,9 @@ class InferenceClient(NamespacedClient):
|
|
|
951
985
|
|
|
952
986
|
<p>Create a Cohere inference endpoint.</p>
|
|
953
987
|
<p>Create an inference endpoint to perform an inference task with the <code>cohere</code> service.</p>
|
|
954
|
-
<p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
|
|
955
|
-
After creating the endpoint, wait for the model deployment to complete before using it.
|
|
956
|
-
To verify the deployment status, use the get trained model statistics API.
|
|
957
|
-
Look for <code>"state": "fully_allocated"</code> in the response and ensure that the <code>"allocation_count"</code> matches the <code>"target_allocation_count"</code>.
|
|
958
|
-
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
959
988
|
|
|
960
989
|
|
|
961
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.
|
|
990
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-cohere.html>`_
|
|
962
991
|
|
|
963
992
|
:param task_type: The type of the inference task that the model will perform.
|
|
964
993
|
:param cohere_inference_id: The unique identifier of the inference endpoint.
|
|
@@ -969,6 +998,8 @@ class InferenceClient(NamespacedClient):
|
|
|
969
998
|
:param chunking_settings: The chunking configuration object.
|
|
970
999
|
:param task_settings: Settings to configure the inference task. These settings
|
|
971
1000
|
are specific to the task type you specified.
|
|
1001
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
1002
|
+
to be created.
|
|
972
1003
|
"""
|
|
973
1004
|
if task_type in SKIP_IN_PATH:
|
|
974
1005
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -993,6 +1024,8 @@ class InferenceClient(NamespacedClient):
|
|
|
993
1024
|
__query["human"] = human
|
|
994
1025
|
if pretty is not None:
|
|
995
1026
|
__query["pretty"] = pretty
|
|
1027
|
+
if timeout is not None:
|
|
1028
|
+
__query["timeout"] = timeout
|
|
996
1029
|
if not __body:
|
|
997
1030
|
if service is not None:
|
|
998
1031
|
__body["service"] = service
|
|
@@ -1017,6 +1050,221 @@ class InferenceClient(NamespacedClient):
|
|
|
1017
1050
|
path_parts=__path_parts,
|
|
1018
1051
|
)
|
|
1019
1052
|
|
|
1053
|
+
@_rewrite_parameters(
|
|
1054
|
+
body_fields=(
|
|
1055
|
+
"service",
|
|
1056
|
+
"service_settings",
|
|
1057
|
+
"chunking_settings",
|
|
1058
|
+
"task_settings",
|
|
1059
|
+
),
|
|
1060
|
+
)
|
|
1061
|
+
async def put_custom(
|
|
1062
|
+
self,
|
|
1063
|
+
*,
|
|
1064
|
+
task_type: t.Union[
|
|
1065
|
+
str, t.Literal["completion", "rerank", "sparse_embedding", "text_embedding"]
|
|
1066
|
+
],
|
|
1067
|
+
custom_inference_id: str,
|
|
1068
|
+
service: t.Optional[t.Union[str, t.Literal["custom"]]] = None,
|
|
1069
|
+
service_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
1070
|
+
chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
1071
|
+
error_trace: t.Optional[bool] = None,
|
|
1072
|
+
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
|
|
1073
|
+
human: t.Optional[bool] = None,
|
|
1074
|
+
pretty: t.Optional[bool] = None,
|
|
1075
|
+
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
1076
|
+
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
1077
|
+
) -> ObjectApiResponse[t.Any]:
|
|
1078
|
+
"""
|
|
1079
|
+
.. raw:: html
|
|
1080
|
+
|
|
1081
|
+
<p>Create a custom inference endpoint.</p>
|
|
1082
|
+
<p>The custom service gives more control over how to interact with external inference services that aren't explicitly supported through dedicated integrations.
|
|
1083
|
+
The custom service gives you the ability to define the headers, url, query parameters, request body, and secrets.
|
|
1084
|
+
The custom service supports the template replacement functionality, which enables you to define a template that can be replaced with the value associated with that key.
|
|
1085
|
+
Templates are portions of a string that start with <code>${</code> and end with <code>}</code>.
|
|
1086
|
+
The parameters <code>secret_parameters</code> and <code>task_settings</code> are checked for keys for template replacement. Template replacement is supported in the <code>request</code>, <code>headers</code>, <code>url</code>, and <code>query_parameters</code>.
|
|
1087
|
+
If the definition (key) is not found for a template, an error message is returned.
|
|
1088
|
+
In case of an endpoint definition like the following:</p>
|
|
1089
|
+
<pre><code>PUT _inference/text_embedding/test-text-embedding
|
|
1090
|
+
{
|
|
1091
|
+
"service": "custom",
|
|
1092
|
+
"service_settings": {
|
|
1093
|
+
"secret_parameters": {
|
|
1094
|
+
"api_key": "<some api key>"
|
|
1095
|
+
},
|
|
1096
|
+
"url": "...endpoints.huggingface.cloud/v1/embeddings",
|
|
1097
|
+
"headers": {
|
|
1098
|
+
"Authorization": "Bearer ${api_key}",
|
|
1099
|
+
"Content-Type": "application/json"
|
|
1100
|
+
},
|
|
1101
|
+
"request": "{\\"input\\": ${input}}",
|
|
1102
|
+
"response": {
|
|
1103
|
+
"json_parser": {
|
|
1104
|
+
"text_embeddings":"$.data[*].embedding[*]"
|
|
1105
|
+
}
|
|
1106
|
+
}
|
|
1107
|
+
}
|
|
1108
|
+
}
|
|
1109
|
+
</code></pre>
|
|
1110
|
+
<p>To replace <code>${api_key}</code> the <code>secret_parameters</code> and <code>task_settings</code> are checked for a key named <code>api_key</code>.</p>
|
|
1111
|
+
<blockquote>
|
|
1112
|
+
<p>info
|
|
1113
|
+
Templates should not be surrounded by quotes.</p>
|
|
1114
|
+
</blockquote>
|
|
1115
|
+
<p>Pre-defined templates:</p>
|
|
1116
|
+
<ul>
|
|
1117
|
+
<li><code>${input}</code> refers to the array of input strings that comes from the <code>input</code> field of the subsequent inference requests.</li>
|
|
1118
|
+
<li><code>${input_type}</code> refers to the input type translation values.</li>
|
|
1119
|
+
<li><code>${query}</code> refers to the query field used specifically for reranking tasks.</li>
|
|
1120
|
+
<li><code>${top_n}</code> refers to the <code>top_n</code> field available when performing rerank requests.</li>
|
|
1121
|
+
<li><code>${return_documents}</code> refers to the <code>return_documents</code> field available when performing rerank requests.</li>
|
|
1122
|
+
</ul>
|
|
1123
|
+
|
|
1124
|
+
|
|
1125
|
+
`<https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-custom>`_
|
|
1126
|
+
|
|
1127
|
+
:param task_type: The type of the inference task that the model will perform.
|
|
1128
|
+
:param custom_inference_id: The unique identifier of the inference endpoint.
|
|
1129
|
+
:param service: The type of service supported for the specified task type. In
|
|
1130
|
+
this case, `custom`.
|
|
1131
|
+
:param service_settings: Settings used to install the inference model. These
|
|
1132
|
+
settings are specific to the `custom` service.
|
|
1133
|
+
:param chunking_settings: The chunking configuration object.
|
|
1134
|
+
:param task_settings: Settings to configure the inference task. These settings
|
|
1135
|
+
are specific to the task type you specified.
|
|
1136
|
+
"""
|
|
1137
|
+
if task_type in SKIP_IN_PATH:
|
|
1138
|
+
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
1139
|
+
if custom_inference_id in SKIP_IN_PATH:
|
|
1140
|
+
raise ValueError("Empty value passed for parameter 'custom_inference_id'")
|
|
1141
|
+
if service is None and body is None:
|
|
1142
|
+
raise ValueError("Empty value passed for parameter 'service'")
|
|
1143
|
+
if service_settings is None and body is None:
|
|
1144
|
+
raise ValueError("Empty value passed for parameter 'service_settings'")
|
|
1145
|
+
__path_parts: t.Dict[str, str] = {
|
|
1146
|
+
"task_type": _quote(task_type),
|
|
1147
|
+
"custom_inference_id": _quote(custom_inference_id),
|
|
1148
|
+
}
|
|
1149
|
+
__path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["custom_inference_id"]}'
|
|
1150
|
+
__query: t.Dict[str, t.Any] = {}
|
|
1151
|
+
__body: t.Dict[str, t.Any] = body if body is not None else {}
|
|
1152
|
+
if error_trace is not None:
|
|
1153
|
+
__query["error_trace"] = error_trace
|
|
1154
|
+
if filter_path is not None:
|
|
1155
|
+
__query["filter_path"] = filter_path
|
|
1156
|
+
if human is not None:
|
|
1157
|
+
__query["human"] = human
|
|
1158
|
+
if pretty is not None:
|
|
1159
|
+
__query["pretty"] = pretty
|
|
1160
|
+
if not __body:
|
|
1161
|
+
if service is not None:
|
|
1162
|
+
__body["service"] = service
|
|
1163
|
+
if service_settings is not None:
|
|
1164
|
+
__body["service_settings"] = service_settings
|
|
1165
|
+
if chunking_settings is not None:
|
|
1166
|
+
__body["chunking_settings"] = chunking_settings
|
|
1167
|
+
if task_settings is not None:
|
|
1168
|
+
__body["task_settings"] = task_settings
|
|
1169
|
+
if not __body:
|
|
1170
|
+
__body = None # type: ignore[assignment]
|
|
1171
|
+
__headers = {"accept": "application/json"}
|
|
1172
|
+
if __body is not None:
|
|
1173
|
+
__headers["content-type"] = "application/json"
|
|
1174
|
+
return await self.perform_request( # type: ignore[return-value]
|
|
1175
|
+
"PUT",
|
|
1176
|
+
__path,
|
|
1177
|
+
params=__query,
|
|
1178
|
+
headers=__headers,
|
|
1179
|
+
body=__body,
|
|
1180
|
+
endpoint_id="inference.put_custom",
|
|
1181
|
+
path_parts=__path_parts,
|
|
1182
|
+
)
|
|
1183
|
+
|
|
1184
|
+
@_rewrite_parameters(
|
|
1185
|
+
body_fields=("service", "service_settings", "chunking_settings"),
|
|
1186
|
+
)
|
|
1187
|
+
async def put_deepseek(
|
|
1188
|
+
self,
|
|
1189
|
+
*,
|
|
1190
|
+
task_type: t.Union[str, t.Literal["chat_completion", "completion"]],
|
|
1191
|
+
deepseek_inference_id: str,
|
|
1192
|
+
service: t.Optional[t.Union[str, t.Literal["deepseek"]]] = None,
|
|
1193
|
+
service_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
1194
|
+
chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
1195
|
+
error_trace: t.Optional[bool] = None,
|
|
1196
|
+
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
|
|
1197
|
+
human: t.Optional[bool] = None,
|
|
1198
|
+
pretty: t.Optional[bool] = None,
|
|
1199
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
1200
|
+
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
1201
|
+
) -> ObjectApiResponse[t.Any]:
|
|
1202
|
+
"""
|
|
1203
|
+
.. raw:: html
|
|
1204
|
+
|
|
1205
|
+
<p>Create a DeepSeek inference endpoint.</p>
|
|
1206
|
+
<p>Create an inference endpoint to perform an inference task with the <code>deepseek</code> service.</p>
|
|
1207
|
+
|
|
1208
|
+
|
|
1209
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-deepseek.html>`_
|
|
1210
|
+
|
|
1211
|
+
:param task_type: The type of the inference task that the model will perform.
|
|
1212
|
+
:param deepseek_inference_id: The unique identifier of the inference endpoint.
|
|
1213
|
+
:param service: The type of service supported for the specified task type. In
|
|
1214
|
+
this case, `deepseek`.
|
|
1215
|
+
:param service_settings: Settings used to install the inference model. These
|
|
1216
|
+
settings are specific to the `deepseek` service.
|
|
1217
|
+
:param chunking_settings: The chunking configuration object.
|
|
1218
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
1219
|
+
to be created.
|
|
1220
|
+
"""
|
|
1221
|
+
if task_type in SKIP_IN_PATH:
|
|
1222
|
+
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
1223
|
+
if deepseek_inference_id in SKIP_IN_PATH:
|
|
1224
|
+
raise ValueError("Empty value passed for parameter 'deepseek_inference_id'")
|
|
1225
|
+
if service is None and body is None:
|
|
1226
|
+
raise ValueError("Empty value passed for parameter 'service'")
|
|
1227
|
+
if service_settings is None and body is None:
|
|
1228
|
+
raise ValueError("Empty value passed for parameter 'service_settings'")
|
|
1229
|
+
__path_parts: t.Dict[str, str] = {
|
|
1230
|
+
"task_type": _quote(task_type),
|
|
1231
|
+
"deepseek_inference_id": _quote(deepseek_inference_id),
|
|
1232
|
+
}
|
|
1233
|
+
__path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["deepseek_inference_id"]}'
|
|
1234
|
+
__query: t.Dict[str, t.Any] = {}
|
|
1235
|
+
__body: t.Dict[str, t.Any] = body if body is not None else {}
|
|
1236
|
+
if error_trace is not None:
|
|
1237
|
+
__query["error_trace"] = error_trace
|
|
1238
|
+
if filter_path is not None:
|
|
1239
|
+
__query["filter_path"] = filter_path
|
|
1240
|
+
if human is not None:
|
|
1241
|
+
__query["human"] = human
|
|
1242
|
+
if pretty is not None:
|
|
1243
|
+
__query["pretty"] = pretty
|
|
1244
|
+
if timeout is not None:
|
|
1245
|
+
__query["timeout"] = timeout
|
|
1246
|
+
if not __body:
|
|
1247
|
+
if service is not None:
|
|
1248
|
+
__body["service"] = service
|
|
1249
|
+
if service_settings is not None:
|
|
1250
|
+
__body["service_settings"] = service_settings
|
|
1251
|
+
if chunking_settings is not None:
|
|
1252
|
+
__body["chunking_settings"] = chunking_settings
|
|
1253
|
+
if not __body:
|
|
1254
|
+
__body = None # type: ignore[assignment]
|
|
1255
|
+
__headers = {"accept": "application/json"}
|
|
1256
|
+
if __body is not None:
|
|
1257
|
+
__headers["content-type"] = "application/json"
|
|
1258
|
+
return await self.perform_request( # type: ignore[return-value]
|
|
1259
|
+
"PUT",
|
|
1260
|
+
__path,
|
|
1261
|
+
params=__query,
|
|
1262
|
+
headers=__headers,
|
|
1263
|
+
body=__body,
|
|
1264
|
+
endpoint_id="inference.put_deepseek",
|
|
1265
|
+
path_parts=__path_parts,
|
|
1266
|
+
)
|
|
1267
|
+
|
|
1020
1268
|
@_rewrite_parameters(
|
|
1021
1269
|
body_fields=(
|
|
1022
1270
|
"service",
|
|
@@ -1040,6 +1288,7 @@ class InferenceClient(NamespacedClient):
|
|
|
1040
1288
|
human: t.Optional[bool] = None,
|
|
1041
1289
|
pretty: t.Optional[bool] = None,
|
|
1042
1290
|
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
1291
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
1043
1292
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
1044
1293
|
) -> ObjectApiResponse[t.Any]:
|
|
1045
1294
|
"""
|
|
@@ -1062,7 +1311,7 @@ class InferenceClient(NamespacedClient):
|
|
|
1062
1311
|
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
1063
1312
|
|
|
1064
1313
|
|
|
1065
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.
|
|
1314
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-elasticsearch.html>`_
|
|
1066
1315
|
|
|
1067
1316
|
:param task_type: The type of the inference task that the model will perform.
|
|
1068
1317
|
:param elasticsearch_inference_id: The unique identifier of the inference endpoint.
|
|
@@ -1074,6 +1323,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1074
1323
|
:param chunking_settings: The chunking configuration object.
|
|
1075
1324
|
:param task_settings: Settings to configure the inference task. These settings
|
|
1076
1325
|
are specific to the task type you specified.
|
|
1326
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
1327
|
+
to be created.
|
|
1077
1328
|
"""
|
|
1078
1329
|
if task_type in SKIP_IN_PATH:
|
|
1079
1330
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -1100,6 +1351,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1100
1351
|
__query["human"] = human
|
|
1101
1352
|
if pretty is not None:
|
|
1102
1353
|
__query["pretty"] = pretty
|
|
1354
|
+
if timeout is not None:
|
|
1355
|
+
__query["timeout"] = timeout
|
|
1103
1356
|
if not __body:
|
|
1104
1357
|
if service is not None:
|
|
1105
1358
|
__body["service"] = service
|
|
@@ -1139,6 +1392,7 @@ class InferenceClient(NamespacedClient):
|
|
|
1139
1392
|
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
|
|
1140
1393
|
human: t.Optional[bool] = None,
|
|
1141
1394
|
pretty: t.Optional[bool] = None,
|
|
1395
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
1142
1396
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
1143
1397
|
) -> ObjectApiResponse[t.Any]:
|
|
1144
1398
|
"""
|
|
@@ -1162,7 +1416,7 @@ class InferenceClient(NamespacedClient):
|
|
|
1162
1416
|
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
1163
1417
|
|
|
1164
1418
|
|
|
1165
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.
|
|
1419
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-elser.html>`_
|
|
1166
1420
|
|
|
1167
1421
|
:param task_type: The type of the inference task that the model will perform.
|
|
1168
1422
|
:param elser_inference_id: The unique identifier of the inference endpoint.
|
|
@@ -1171,6 +1425,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1171
1425
|
:param service_settings: Settings used to install the inference model. These
|
|
1172
1426
|
settings are specific to the `elser` service.
|
|
1173
1427
|
:param chunking_settings: The chunking configuration object.
|
|
1428
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
1429
|
+
to be created.
|
|
1174
1430
|
"""
|
|
1175
1431
|
if task_type in SKIP_IN_PATH:
|
|
1176
1432
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -1195,6 +1451,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1195
1451
|
__query["human"] = human
|
|
1196
1452
|
if pretty is not None:
|
|
1197
1453
|
__query["pretty"] = pretty
|
|
1454
|
+
if timeout is not None:
|
|
1455
|
+
__query["timeout"] = timeout
|
|
1198
1456
|
if not __body:
|
|
1199
1457
|
if service is not None:
|
|
1200
1458
|
__body["service"] = service
|
|
@@ -1232,6 +1490,7 @@ class InferenceClient(NamespacedClient):
|
|
|
1232
1490
|
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
|
|
1233
1491
|
human: t.Optional[bool] = None,
|
|
1234
1492
|
pretty: t.Optional[bool] = None,
|
|
1493
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
1235
1494
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
1236
1495
|
) -> ObjectApiResponse[t.Any]:
|
|
1237
1496
|
"""
|
|
@@ -1239,14 +1498,9 @@ class InferenceClient(NamespacedClient):
|
|
|
1239
1498
|
|
|
1240
1499
|
<p>Create an Google AI Studio inference endpoint.</p>
|
|
1241
1500
|
<p>Create an inference endpoint to perform an inference task with the <code>googleaistudio</code> service.</p>
|
|
1242
|
-
<p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
|
|
1243
|
-
After creating the endpoint, wait for the model deployment to complete before using it.
|
|
1244
|
-
To verify the deployment status, use the get trained model statistics API.
|
|
1245
|
-
Look for <code>"state": "fully_allocated"</code> in the response and ensure that the <code>"allocation_count"</code> matches the <code>"target_allocation_count"</code>.
|
|
1246
|
-
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
1247
1501
|
|
|
1248
1502
|
|
|
1249
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.
|
|
1503
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-google-ai-studio.html>`_
|
|
1250
1504
|
|
|
1251
1505
|
:param task_type: The type of the inference task that the model will perform.
|
|
1252
1506
|
:param googleaistudio_inference_id: The unique identifier of the inference endpoint.
|
|
@@ -1255,6 +1509,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1255
1509
|
:param service_settings: Settings used to install the inference model. These
|
|
1256
1510
|
settings are specific to the `googleaistudio` service.
|
|
1257
1511
|
:param chunking_settings: The chunking configuration object.
|
|
1512
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
1513
|
+
to be created.
|
|
1258
1514
|
"""
|
|
1259
1515
|
if task_type in SKIP_IN_PATH:
|
|
1260
1516
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -1281,6 +1537,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1281
1537
|
__query["human"] = human
|
|
1282
1538
|
if pretty is not None:
|
|
1283
1539
|
__query["pretty"] = pretty
|
|
1540
|
+
if timeout is not None:
|
|
1541
|
+
__query["timeout"] = timeout
|
|
1284
1542
|
if not __body:
|
|
1285
1543
|
if service is not None:
|
|
1286
1544
|
__body["service"] = service
|
|
@@ -1314,7 +1572,9 @@ class InferenceClient(NamespacedClient):
|
|
|
1314
1572
|
async def put_googlevertexai(
|
|
1315
1573
|
self,
|
|
1316
1574
|
*,
|
|
1317
|
-
task_type: t.Union[
|
|
1575
|
+
task_type: t.Union[
|
|
1576
|
+
str, t.Literal["chat_completion", "completion", "rerank", "text_embedding"]
|
|
1577
|
+
],
|
|
1318
1578
|
googlevertexai_inference_id: str,
|
|
1319
1579
|
service: t.Optional[t.Union[str, t.Literal["googlevertexai"]]] = None,
|
|
1320
1580
|
service_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
@@ -1324,6 +1584,7 @@ class InferenceClient(NamespacedClient):
|
|
|
1324
1584
|
human: t.Optional[bool] = None,
|
|
1325
1585
|
pretty: t.Optional[bool] = None,
|
|
1326
1586
|
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
1587
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
1327
1588
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
1328
1589
|
) -> ObjectApiResponse[t.Any]:
|
|
1329
1590
|
"""
|
|
@@ -1331,14 +1592,9 @@ class InferenceClient(NamespacedClient):
|
|
|
1331
1592
|
|
|
1332
1593
|
<p>Create a Google Vertex AI inference endpoint.</p>
|
|
1333
1594
|
<p>Create an inference endpoint to perform an inference task with the <code>googlevertexai</code> service.</p>
|
|
1334
|
-
<p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
|
|
1335
|
-
After creating the endpoint, wait for the model deployment to complete before using it.
|
|
1336
|
-
To verify the deployment status, use the get trained model statistics API.
|
|
1337
|
-
Look for <code>"state": "fully_allocated"</code> in the response and ensure that the <code>"allocation_count"</code> matches the <code>"target_allocation_count"</code>.
|
|
1338
|
-
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
1339
1595
|
|
|
1340
1596
|
|
|
1341
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.
|
|
1597
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-google-vertex-ai.html>`_
|
|
1342
1598
|
|
|
1343
1599
|
:param task_type: The type of the inference task that the model will perform.
|
|
1344
1600
|
:param googlevertexai_inference_id: The unique identifier of the inference endpoint.
|
|
@@ -1349,6 +1605,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1349
1605
|
:param chunking_settings: The chunking configuration object.
|
|
1350
1606
|
:param task_settings: Settings to configure the inference task. These settings
|
|
1351
1607
|
are specific to the task type you specified.
|
|
1608
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
1609
|
+
to be created.
|
|
1352
1610
|
"""
|
|
1353
1611
|
if task_type in SKIP_IN_PATH:
|
|
1354
1612
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -1375,6 +1633,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1375
1633
|
__query["human"] = human
|
|
1376
1634
|
if pretty is not None:
|
|
1377
1635
|
__query["pretty"] = pretty
|
|
1636
|
+
if timeout is not None:
|
|
1637
|
+
__query["timeout"] = timeout
|
|
1378
1638
|
if not __body:
|
|
1379
1639
|
if service is not None:
|
|
1380
1640
|
__body["service"] = service
|
|
@@ -1400,12 +1660,19 @@ class InferenceClient(NamespacedClient):
|
|
|
1400
1660
|
)
|
|
1401
1661
|
|
|
1402
1662
|
@_rewrite_parameters(
|
|
1403
|
-
body_fields=(
|
|
1663
|
+
body_fields=(
|
|
1664
|
+
"service",
|
|
1665
|
+
"service_settings",
|
|
1666
|
+
"chunking_settings",
|
|
1667
|
+
"task_settings",
|
|
1668
|
+
),
|
|
1404
1669
|
)
|
|
1405
1670
|
async def put_hugging_face(
|
|
1406
1671
|
self,
|
|
1407
1672
|
*,
|
|
1408
|
-
task_type: t.Union[
|
|
1673
|
+
task_type: t.Union[
|
|
1674
|
+
str, t.Literal["chat_completion", "completion", "rerank", "text_embedding"]
|
|
1675
|
+
],
|
|
1409
1676
|
huggingface_inference_id: str,
|
|
1410
1677
|
service: t.Optional[t.Union[str, t.Literal["hugging_face"]]] = None,
|
|
1411
1678
|
service_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
@@ -1414,17 +1681,22 @@ class InferenceClient(NamespacedClient):
|
|
|
1414
1681
|
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
|
|
1415
1682
|
human: t.Optional[bool] = None,
|
|
1416
1683
|
pretty: t.Optional[bool] = None,
|
|
1684
|
+
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
1685
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
1417
1686
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
1418
1687
|
) -> ObjectApiResponse[t.Any]:
|
|
1419
1688
|
"""
|
|
1420
1689
|
.. raw:: html
|
|
1421
1690
|
|
|
1422
1691
|
<p>Create a Hugging Face inference endpoint.</p>
|
|
1423
|
-
<p>Create an inference endpoint to perform an inference task with the <code>hugging_face</code> service
|
|
1424
|
-
|
|
1425
|
-
|
|
1426
|
-
|
|
1427
|
-
<p>
|
|
1692
|
+
<p>Create an inference endpoint to perform an inference task with the <code>hugging_face</code> service.
|
|
1693
|
+
Supported tasks include: <code>text_embedding</code>, <code>completion</code>, and <code>chat_completion</code>.</p>
|
|
1694
|
+
<p>To configure the endpoint, first visit the Hugging Face Inference Endpoints page and create a new endpoint.
|
|
1695
|
+
Select a model that supports the task you intend to use.</p>
|
|
1696
|
+
<p>For Elastic's <code>text_embedding</code> task:
|
|
1697
|
+
The selected model must support the <code>Sentence Embeddings</code> task. On the new endpoint creation page, select the <code>Sentence Embeddings</code> task under the <code>Advanced Configuration</code> section.
|
|
1698
|
+
After the endpoint has initialized, copy the generated endpoint URL.
|
|
1699
|
+
Recommended models for <code>text_embedding</code> task:</p>
|
|
1428
1700
|
<ul>
|
|
1429
1701
|
<li><code>all-MiniLM-L6-v2</code></li>
|
|
1430
1702
|
<li><code>all-MiniLM-L12-v2</code></li>
|
|
@@ -1434,14 +1706,27 @@ class InferenceClient(NamespacedClient):
|
|
|
1434
1706
|
<li><code>multilingual-e5-base</code></li>
|
|
1435
1707
|
<li><code>multilingual-e5-small</code></li>
|
|
1436
1708
|
</ul>
|
|
1437
|
-
<p>
|
|
1438
|
-
|
|
1439
|
-
|
|
1440
|
-
|
|
1441
|
-
|
|
1709
|
+
<p>For Elastic's <code>chat_completion</code> and <code>completion</code> tasks:
|
|
1710
|
+
The selected model must support the <code>Text Generation</code> task and expose OpenAI API. HuggingFace supports both serverless and dedicated endpoints for <code>Text Generation</code>. When creating dedicated endpoint select the <code>Text Generation</code> task.
|
|
1711
|
+
After the endpoint is initialized (for dedicated) or ready (for serverless), ensure it supports the OpenAI API and includes <code>/v1/chat/completions</code> part in URL. Then, copy the full endpoint URL for use.
|
|
1712
|
+
Recommended models for <code>chat_completion</code> and <code>completion</code> tasks:</p>
|
|
1713
|
+
<ul>
|
|
1714
|
+
<li><code>Mistral-7B-Instruct-v0.2</code></li>
|
|
1715
|
+
<li><code>QwQ-32B</code></li>
|
|
1716
|
+
<li><code>Phi-3-mini-128k-instruct</code></li>
|
|
1717
|
+
</ul>
|
|
1718
|
+
<p>For Elastic's <code>rerank</code> task:
|
|
1719
|
+
The selected model must support the <code>sentence-ranking</code> task and expose OpenAI API.
|
|
1720
|
+
HuggingFace supports only dedicated (not serverless) endpoints for <code>Rerank</code> so far.
|
|
1721
|
+
After the endpoint is initialized, copy the full endpoint URL for use.
|
|
1722
|
+
Tested models for <code>rerank</code> task:</p>
|
|
1723
|
+
<ul>
|
|
1724
|
+
<li><code>bge-reranker-base</code></li>
|
|
1725
|
+
<li><code>jina-reranker-v1-turbo-en-GGUF</code></li>
|
|
1726
|
+
</ul>
|
|
1442
1727
|
|
|
1443
1728
|
|
|
1444
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.
|
|
1729
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-hugging-face.html>`_
|
|
1445
1730
|
|
|
1446
1731
|
:param task_type: The type of the inference task that the model will perform.
|
|
1447
1732
|
:param huggingface_inference_id: The unique identifier of the inference endpoint.
|
|
@@ -1450,6 +1735,10 @@ class InferenceClient(NamespacedClient):
|
|
|
1450
1735
|
:param service_settings: Settings used to install the inference model. These
|
|
1451
1736
|
settings are specific to the `hugging_face` service.
|
|
1452
1737
|
:param chunking_settings: The chunking configuration object.
|
|
1738
|
+
:param task_settings: Settings to configure the inference task. These settings
|
|
1739
|
+
are specific to the task type you specified.
|
|
1740
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
1741
|
+
to be created.
|
|
1453
1742
|
"""
|
|
1454
1743
|
if task_type in SKIP_IN_PATH:
|
|
1455
1744
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -1476,6 +1765,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1476
1765
|
__query["human"] = human
|
|
1477
1766
|
if pretty is not None:
|
|
1478
1767
|
__query["pretty"] = pretty
|
|
1768
|
+
if timeout is not None:
|
|
1769
|
+
__query["timeout"] = timeout
|
|
1479
1770
|
if not __body:
|
|
1480
1771
|
if service is not None:
|
|
1481
1772
|
__body["service"] = service
|
|
@@ -1483,6 +1774,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1483
1774
|
__body["service_settings"] = service_settings
|
|
1484
1775
|
if chunking_settings is not None:
|
|
1485
1776
|
__body["chunking_settings"] = chunking_settings
|
|
1777
|
+
if task_settings is not None:
|
|
1778
|
+
__body["task_settings"] = task_settings
|
|
1486
1779
|
if not __body:
|
|
1487
1780
|
__body = None # type: ignore[assignment]
|
|
1488
1781
|
__headers = {"accept": "application/json"}
|
|
@@ -1519,6 +1812,7 @@ class InferenceClient(NamespacedClient):
|
|
|
1519
1812
|
human: t.Optional[bool] = None,
|
|
1520
1813
|
pretty: t.Optional[bool] = None,
|
|
1521
1814
|
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
1815
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
1522
1816
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
1523
1817
|
) -> ObjectApiResponse[t.Any]:
|
|
1524
1818
|
"""
|
|
@@ -1528,14 +1822,9 @@ class InferenceClient(NamespacedClient):
|
|
|
1528
1822
|
<p>Create an inference endpoint to perform an inference task with the <code>jinaai</code> service.</p>
|
|
1529
1823
|
<p>To review the available <code>rerank</code> models, refer to <a href="https://jina.ai/reranker">https://jina.ai/reranker</a>.
|
|
1530
1824
|
To review the available <code>text_embedding</code> models, refer to the <a href="https://jina.ai/embeddings/">https://jina.ai/embeddings/</a>.</p>
|
|
1531
|
-
<p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
|
|
1532
|
-
After creating the endpoint, wait for the model deployment to complete before using it.
|
|
1533
|
-
To verify the deployment status, use the get trained model statistics API.
|
|
1534
|
-
Look for <code>"state": "fully_allocated"</code> in the response and ensure that the <code>"allocation_count"</code> matches the <code>"target_allocation_count"</code>.
|
|
1535
|
-
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
1536
1825
|
|
|
1537
1826
|
|
|
1538
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.
|
|
1827
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-jinaai.html>`_
|
|
1539
1828
|
|
|
1540
1829
|
:param task_type: The type of the inference task that the model will perform.
|
|
1541
1830
|
:param jinaai_inference_id: The unique identifier of the inference endpoint.
|
|
@@ -1546,6 +1835,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1546
1835
|
:param chunking_settings: The chunking configuration object.
|
|
1547
1836
|
:param task_settings: Settings to configure the inference task. These settings
|
|
1548
1837
|
are specific to the task type you specified.
|
|
1838
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
1839
|
+
to be created.
|
|
1549
1840
|
"""
|
|
1550
1841
|
if task_type in SKIP_IN_PATH:
|
|
1551
1842
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -1570,6 +1861,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1570
1861
|
__query["human"] = human
|
|
1571
1862
|
if pretty is not None:
|
|
1572
1863
|
__query["pretty"] = pretty
|
|
1864
|
+
if timeout is not None:
|
|
1865
|
+
__query["timeout"] = timeout
|
|
1573
1866
|
if not __body:
|
|
1574
1867
|
if service is not None:
|
|
1575
1868
|
__body["service"] = service
|
|
@@ -1600,7 +1893,9 @@ class InferenceClient(NamespacedClient):
|
|
|
1600
1893
|
async def put_mistral(
|
|
1601
1894
|
self,
|
|
1602
1895
|
*,
|
|
1603
|
-
task_type: t.Union[
|
|
1896
|
+
task_type: t.Union[
|
|
1897
|
+
str, t.Literal["chat_completion", "completion", "text_embedding"]
|
|
1898
|
+
],
|
|
1604
1899
|
mistral_inference_id: str,
|
|
1605
1900
|
service: t.Optional[t.Union[str, t.Literal["mistral"]]] = None,
|
|
1606
1901
|
service_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
@@ -1609,30 +1904,27 @@ class InferenceClient(NamespacedClient):
|
|
|
1609
1904
|
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
|
|
1610
1905
|
human: t.Optional[bool] = None,
|
|
1611
1906
|
pretty: t.Optional[bool] = None,
|
|
1907
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
1612
1908
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
1613
1909
|
) -> ObjectApiResponse[t.Any]:
|
|
1614
1910
|
"""
|
|
1615
1911
|
.. raw:: html
|
|
1616
1912
|
|
|
1617
1913
|
<p>Create a Mistral inference endpoint.</p>
|
|
1618
|
-
<p>
|
|
1619
|
-
<p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
|
|
1620
|
-
After creating the endpoint, wait for the model deployment to complete before using it.
|
|
1621
|
-
To verify the deployment status, use the get trained model statistics API.
|
|
1622
|
-
Look for <code>"state": "fully_allocated"</code> in the response and ensure that the <code>"allocation_count"</code> matches the <code>"target_allocation_count"</code>.
|
|
1623
|
-
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
1914
|
+
<p>Create an inference endpoint to perform an inference task with the <code>mistral</code> service.</p>
|
|
1624
1915
|
|
|
1625
1916
|
|
|
1626
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/
|
|
1917
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-mistral.html>`_
|
|
1627
1918
|
|
|
1628
|
-
:param task_type: The
|
|
1629
|
-
is `text_embedding`.
|
|
1919
|
+
:param task_type: The type of the inference task that the model will perform.
|
|
1630
1920
|
:param mistral_inference_id: The unique identifier of the inference endpoint.
|
|
1631
1921
|
:param service: The type of service supported for the specified task type. In
|
|
1632
1922
|
this case, `mistral`.
|
|
1633
1923
|
:param service_settings: Settings used to install the inference model. These
|
|
1634
1924
|
settings are specific to the `mistral` service.
|
|
1635
1925
|
:param chunking_settings: The chunking configuration object.
|
|
1926
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
1927
|
+
to be created.
|
|
1636
1928
|
"""
|
|
1637
1929
|
if task_type in SKIP_IN_PATH:
|
|
1638
1930
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -1657,6 +1949,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1657
1949
|
__query["human"] = human
|
|
1658
1950
|
if pretty is not None:
|
|
1659
1951
|
__query["pretty"] = pretty
|
|
1952
|
+
if timeout is not None:
|
|
1953
|
+
__query["timeout"] = timeout
|
|
1660
1954
|
if not __body:
|
|
1661
1955
|
if service is not None:
|
|
1662
1956
|
__body["service"] = service
|
|
@@ -1702,6 +1996,7 @@ class InferenceClient(NamespacedClient):
|
|
|
1702
1996
|
human: t.Optional[bool] = None,
|
|
1703
1997
|
pretty: t.Optional[bool] = None,
|
|
1704
1998
|
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
1999
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
1705
2000
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
1706
2001
|
) -> ObjectApiResponse[t.Any]:
|
|
1707
2002
|
"""
|
|
@@ -1709,14 +2004,9 @@ class InferenceClient(NamespacedClient):
|
|
|
1709
2004
|
|
|
1710
2005
|
<p>Create an OpenAI inference endpoint.</p>
|
|
1711
2006
|
<p>Create an inference endpoint to perform an inference task with the <code>openai</code> service or <code>openai</code> compatible APIs.</p>
|
|
1712
|
-
<p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
|
|
1713
|
-
After creating the endpoint, wait for the model deployment to complete before using it.
|
|
1714
|
-
To verify the deployment status, use the get trained model statistics API.
|
|
1715
|
-
Look for <code>"state": "fully_allocated"</code> in the response and ensure that the <code>"allocation_count"</code> matches the <code>"target_allocation_count"</code>.
|
|
1716
|
-
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
1717
2007
|
|
|
1718
2008
|
|
|
1719
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.
|
|
2009
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-openai.html>`_
|
|
1720
2010
|
|
|
1721
2011
|
:param task_type: The type of the inference task that the model will perform.
|
|
1722
2012
|
NOTE: The `chat_completion` task type only supports streaming and only through
|
|
@@ -1729,6 +2019,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1729
2019
|
:param chunking_settings: The chunking configuration object.
|
|
1730
2020
|
:param task_settings: Settings to configure the inference task. These settings
|
|
1731
2021
|
are specific to the task type you specified.
|
|
2022
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
2023
|
+
to be created.
|
|
1732
2024
|
"""
|
|
1733
2025
|
if task_type in SKIP_IN_PATH:
|
|
1734
2026
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -1753,6 +2045,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1753
2045
|
__query["human"] = human
|
|
1754
2046
|
if pretty is not None:
|
|
1755
2047
|
__query["pretty"] = pretty
|
|
2048
|
+
if timeout is not None:
|
|
2049
|
+
__query["timeout"] = timeout
|
|
1756
2050
|
if not __body:
|
|
1757
2051
|
if service is not None:
|
|
1758
2052
|
__body["service"] = service
|
|
@@ -1798,6 +2092,7 @@ class InferenceClient(NamespacedClient):
|
|
|
1798
2092
|
human: t.Optional[bool] = None,
|
|
1799
2093
|
pretty: t.Optional[bool] = None,
|
|
1800
2094
|
task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
|
|
2095
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
1801
2096
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
1802
2097
|
) -> ObjectApiResponse[t.Any]:
|
|
1803
2098
|
"""
|
|
@@ -1808,7 +2103,7 @@ class InferenceClient(NamespacedClient):
|
|
|
1808
2103
|
<p>Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
1809
2104
|
|
|
1810
2105
|
|
|
1811
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.
|
|
2106
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-voyageai.html>`_
|
|
1812
2107
|
|
|
1813
2108
|
:param task_type: The type of the inference task that the model will perform.
|
|
1814
2109
|
:param voyageai_inference_id: The unique identifier of the inference endpoint.
|
|
@@ -1819,6 +2114,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1819
2114
|
:param chunking_settings: The chunking configuration object.
|
|
1820
2115
|
:param task_settings: Settings to configure the inference task. These settings
|
|
1821
2116
|
are specific to the task type you specified.
|
|
2117
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
2118
|
+
to be created.
|
|
1822
2119
|
"""
|
|
1823
2120
|
if task_type in SKIP_IN_PATH:
|
|
1824
2121
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -1843,6 +2140,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1843
2140
|
__query["human"] = human
|
|
1844
2141
|
if pretty is not None:
|
|
1845
2142
|
__query["pretty"] = pretty
|
|
2143
|
+
if timeout is not None:
|
|
2144
|
+
__query["timeout"] = timeout
|
|
1846
2145
|
if not __body:
|
|
1847
2146
|
if service is not None:
|
|
1848
2147
|
__body["service"] = service
|
|
@@ -1881,6 +2180,7 @@ class InferenceClient(NamespacedClient):
|
|
|
1881
2180
|
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
|
|
1882
2181
|
human: t.Optional[bool] = None,
|
|
1883
2182
|
pretty: t.Optional[bool] = None,
|
|
2183
|
+
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
|
|
1884
2184
|
body: t.Optional[t.Dict[str, t.Any]] = None,
|
|
1885
2185
|
) -> ObjectApiResponse[t.Any]:
|
|
1886
2186
|
"""
|
|
@@ -1890,14 +2190,9 @@ class InferenceClient(NamespacedClient):
|
|
|
1890
2190
|
<p>Create an inference endpoint to perform an inference task with the <code>watsonxai</code> service.
|
|
1891
2191
|
You need an IBM Cloud Databases for Elasticsearch deployment to use the <code>watsonxai</code> inference service.
|
|
1892
2192
|
You can provision one through the IBM catalog, the Cloud Databases CLI plug-in, the Cloud Databases API, or Terraform.</p>
|
|
1893
|
-
<p>When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
|
|
1894
|
-
After creating the endpoint, wait for the model deployment to complete before using it.
|
|
1895
|
-
To verify the deployment status, use the get trained model statistics API.
|
|
1896
|
-
Look for <code>"state": "fully_allocated"</code> in the response and ensure that the <code>"allocation_count"</code> matches the <code>"target_allocation_count"</code>.
|
|
1897
|
-
Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.</p>
|
|
1898
2193
|
|
|
1899
2194
|
|
|
1900
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.
|
|
2195
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-watsonx-ai.html>`_
|
|
1901
2196
|
|
|
1902
2197
|
:param task_type: The task type. The only valid task type for the model to perform
|
|
1903
2198
|
is `text_embedding`.
|
|
@@ -1906,6 +2201,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1906
2201
|
this case, `watsonxai`.
|
|
1907
2202
|
:param service_settings: Settings used to install the inference model. These
|
|
1908
2203
|
settings are specific to the `watsonxai` service.
|
|
2204
|
+
:param timeout: Specifies the amount of time to wait for the inference endpoint
|
|
2205
|
+
to be created.
|
|
1909
2206
|
"""
|
|
1910
2207
|
if task_type in SKIP_IN_PATH:
|
|
1911
2208
|
raise ValueError("Empty value passed for parameter 'task_type'")
|
|
@@ -1930,6 +2227,8 @@ class InferenceClient(NamespacedClient):
|
|
|
1930
2227
|
__query["human"] = human
|
|
1931
2228
|
if pretty is not None:
|
|
1932
2229
|
__query["pretty"] = pretty
|
|
2230
|
+
if timeout is not None:
|
|
2231
|
+
__query["timeout"] = timeout
|
|
1933
2232
|
if not __body:
|
|
1934
2233
|
if service is not None:
|
|
1935
2234
|
__body["service"] = service
|
|
@@ -1970,10 +2269,10 @@ class InferenceClient(NamespacedClient):
|
|
|
1970
2269
|
"""
|
|
1971
2270
|
.. raw:: html
|
|
1972
2271
|
|
|
1973
|
-
<p>Perform
|
|
2272
|
+
<p>Perform reranking inference on the service</p>
|
|
1974
2273
|
|
|
1975
2274
|
|
|
1976
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.
|
|
2275
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/post-inference-api.html>`_
|
|
1977
2276
|
|
|
1978
2277
|
:param inference_id: The unique identifier for the inference endpoint.
|
|
1979
2278
|
:param input: The text on which you want to perform the inference task. It can
|
|
@@ -2049,7 +2348,7 @@ class InferenceClient(NamespacedClient):
|
|
|
2049
2348
|
<p>Perform sparse embedding inference on the service</p>
|
|
2050
2349
|
|
|
2051
2350
|
|
|
2052
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.
|
|
2351
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/post-inference-api.html>`_
|
|
2053
2352
|
|
|
2054
2353
|
:param inference_id: The inference Id
|
|
2055
2354
|
:param input: Inference input. Either a string or an array of strings.
|
|
@@ -2117,7 +2416,7 @@ class InferenceClient(NamespacedClient):
|
|
|
2117
2416
|
<p>Perform text embedding inference on the service</p>
|
|
2118
2417
|
|
|
2119
2418
|
|
|
2120
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.
|
|
2419
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/post-inference-api.html>`_
|
|
2121
2420
|
|
|
2122
2421
|
:param inference_id: The inference Id
|
|
2123
2422
|
:param input: Inference input. Either a string or an array of strings.
|
|
@@ -2199,7 +2498,7 @@ class InferenceClient(NamespacedClient):
|
|
|
2199
2498
|
However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.</p>
|
|
2200
2499
|
|
|
2201
2500
|
|
|
2202
|
-
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.
|
|
2501
|
+
`<https://www.elastic.co/guide/en/elasticsearch/reference/8.19/update-inference-api.html>`_
|
|
2203
2502
|
|
|
2204
2503
|
:param inference_id: The unique identifier of the inference endpoint.
|
|
2205
2504
|
:param inference_config:
|